LeetCode Solutions
1044. Longest Duplicate Substring
Time: $O(n\log n)$ Space: $O(n)$
class Solution {
public:
string longestDupSubstring(string s) {
constexpr int kMod = 1'000'000'007;
const int n = s.length();
vector<int> pows(n, 1);
int bestStart = -1;
int l = 1;
int r = n;
for (int i = 1; i < n; ++i)
pows[i] = (pows[i - 1] * 26L) % kMod;
while (l < r) {
const int m = (l + r) / 2;
const int start = getStart(s, m, pows, kMod);
if (start == -1) {
r = m;
} else {
bestStart = start;
l = m + 1;
}
}
if (bestStart == -1)
return "";
if (getStart(s, l, pows, kMod) == -1)
return s.substr(bestStart, l - 1);
return s.substr(bestStart, l);
}
private:
// K := length of hashed substring
int getStart(const string& s, int k, const vector<int>& pows,
const int& kMod) {
unordered_map<int, vector<int>> hashedToStarts;
long long h = 0;
// Compute hash value of s[:k]
for (int i = 0; i < k; ++i)
h = ((h * 26) % kMod + val(s[i])) % kMod;
hashedToStarts[h].push_back(0);
// Compute rolling hash by Rabin Karp
for (int i = k; i < s.length(); ++i) {
const int startIndex = i - k + 1;
h = ((h - static_cast<long long>(pows[k - 1]) * val(s[i - k])) % kMod +
kMod) %
kMod;
h = (h * 26 + val(s[i])) % kMod;
if (hashedToStarts.count(h)) {
const string currSub = s.substr(startIndex, k);
for (const int start : hashedToStarts[h])
if (s.substr(start, k) == currSub)
return startIndex;
}
hashedToStarts[h].push_back(startIndex);
}
return -1;
}
int val(char c) {
return c - 'a';
}
};
class Solution {
public String longestDupSubstring(String s) {
final int kMod = 1_000_000_007;
final int n = s.length();
int[] pows = new int[n];
int bestStart = -1;
int l = 1;
int r = n;
pows[0] = 1;
for (int i = 1; i < n; ++i)
pows[i] = (int) ((pows[i - 1] * 26L) % (long) kMod);
while (l < r) {
final int m = (l + r) / 2;
final int start = getStart(s, m, pows, kMod);
if (start == -1) {
r = m;
} else {
bestStart = start;
l = m + 1;
}
}
if (bestStart == -1)
return "";
if (getStart(s, l, pows, kMod) == -1)
return s.substring(bestStart, bestStart + l - 1);
return s.substring(bestStart, bestStart + l);
}
// K := length of hashed substring
private int getStart(final String s, int k, int[] pows, int kMod) {
Map<Long, List<Integer>> hashedToStarts = new HashMap<>();
long h = 0;
// Compute hash value of s[:k]
for (int i = 0; i < k; ++i)
h = ((h * 26) % kMod + val(s.charAt(i))) % kMod;
hashedToStarts.put(h, new ArrayList<>());
hashedToStarts.get(h).add(0);
// Compute rolling hash by Rabin Karp
for (int i = k; i < s.length(); ++i) {
final int startIndex = i - k + 1;
h = ((h - (long) (pows[k - 1]) * val(s.charAt(i - k))) % kMod + kMod) % kMod;
h = (h * 26 + val(s.charAt(i))) % kMod;
if (hashedToStarts.containsKey(h)) {
final String currSub = s.substring(startIndex, startIndex + k);
for (final int start : hashedToStarts.get(h))
if (s.substring(start, start + k).equals(currSub))
return startIndex;
}
hashedToStarts.put(h, new ArrayList<>());
hashedToStarts.get(h).add(startIndex);
}
return -1;
}
private int val(char c) {
return c - 'a';
}
}
class Solution:
def longestDupSubstring(self, s: str) -> str:
kMod = 1_000_000_007
bestStart = -1
l = 1
r = len(s)
def val(c: str) -> int:
return ord(c) - ord('a')
# K := length of hashed substring
def getStart(k: int) -> Optional[int]:
maxPow = pow(26, k - 1, kMod)
hashedToStart = defaultdict(list)
h = 0
# Compute hash value of s[:k]
for i in range(k):
h = (h * 26 + val(s[i])) % kMod
hashedToStart[h].append(0)
# Compute rolling hash by Rabin Karp
for i in range(k, len(s)):
startIndex = i - k + 1
h = (h - maxPow * val(s[i - k])) % kMod
h = (h * 26 + val(s[i])) % kMod
if h in hashedToStart:
currSub = s[startIndex:startIndex + k]
for start in hashedToStart[h]:
if s[start:start + k] == currSub:
return startIndex
hashedToStart[h].append(startIndex)
while l < r:
m = (l + r) // 2
start: Optional[int] = getStart(m)
if start:
bestStart = start
l = m + 1
else:
r = m
if bestStart == -1:
return ''
if getStart(l):
return s[bestStart:bestStart + l]
return s[bestStart:bestStart + l - 1]