LeetCode Solutions

1044. Longest Duplicate Substring

Time: $O(n\log n)$

Space: $O(n)$

			

class Solution {
 public:
  string longestDupSubstring(string s) {
    constexpr int kMod = 1'000'000'007;
    const int n = s.length();
    vector<int> pows(n, 1);
    int bestStart = -1;
    int l = 1;
    int r = n;

    for (int i = 1; i < n; ++i)
      pows[i] = (pows[i - 1] * 26L) % kMod;

    while (l < r) {
      const int m = (l + r) / 2;
      const int start = getStart(s, m, pows, kMod);
      if (start == -1) {
        r = m;
      } else {
        bestStart = start;
        l = m + 1;
      }
    }

    if (bestStart == -1)
      return "";
    if (getStart(s, l, pows, kMod) == -1)
      return s.substr(bestStart, l - 1);
    return s.substr(bestStart, l);
  }

 private:
  // K := length of hashed substring
  int getStart(const string& s, int k, const vector<int>& pows,
               const int& kMod) {
    unordered_map<int, vector<int>> hashedToStarts;
    long long h = 0;

    // Compute hash value of s[:k]
    for (int i = 0; i < k; ++i)
      h = ((h * 26) % kMod + val(s[i])) % kMod;
    hashedToStarts[h].push_back(0);

    // Compute rolling hash by Rabin Karp
    for (int i = k; i < s.length(); ++i) {
      const int startIndex = i - k + 1;
      h = ((h - static_cast<long long>(pows[k - 1]) * val(s[i - k])) % kMod +
           kMod) %
          kMod;
      h = (h * 26 + val(s[i])) % kMod;
      if (hashedToStarts.count(h)) {
        const string currSub = s.substr(startIndex, k);
        for (const int start : hashedToStarts[h])
          if (s.substr(start, k) == currSub)
            return startIndex;
      }
      hashedToStarts[h].push_back(startIndex);
    }

    return -1;
  }

  int val(char c) {
    return c - 'a';
  }
};
			

class Solution {
  public String longestDupSubstring(String s) {
    final int kMod = 1_000_000_007;
    final int n = s.length();
    int[] pows = new int[n];
    int bestStart = -1;
    int l = 1;
    int r = n;

    pows[0] = 1;
    for (int i = 1; i < n; ++i)
      pows[i] = (int) ((pows[i - 1] * 26L) % (long) kMod);

    while (l < r) {
      final int m = (l + r) / 2;
      final int start = getStart(s, m, pows, kMod);
      if (start == -1) {
        r = m;
      } else {
        bestStart = start;
        l = m + 1;
      }
    }

    if (bestStart == -1)
      return "";
    if (getStart(s, l, pows, kMod) == -1)
      return s.substring(bestStart, bestStart + l - 1);
    return s.substring(bestStart, bestStart + l);
  }

  // K := length of hashed substring
  private int getStart(final String s, int k, int[] pows, int kMod) {
    Map<Long, List<Integer>> hashedToStarts = new HashMap<>();
    long h = 0;

    // Compute hash value of s[:k]
    for (int i = 0; i < k; ++i)
      h = ((h * 26) % kMod + val(s.charAt(i))) % kMod;
    hashedToStarts.put(h, new ArrayList<>());
    hashedToStarts.get(h).add(0);

    // Compute rolling hash by Rabin Karp
    for (int i = k; i < s.length(); ++i) {
      final int startIndex = i - k + 1;
      h = ((h - (long) (pows[k - 1]) * val(s.charAt(i - k))) % kMod + kMod) % kMod;
      h = (h * 26 + val(s.charAt(i))) % kMod;
      if (hashedToStarts.containsKey(h)) {
        final String currSub = s.substring(startIndex, startIndex + k);
        for (final int start : hashedToStarts.get(h))
          if (s.substring(start, start + k).equals(currSub))
            return startIndex;
      }
      hashedToStarts.put(h, new ArrayList<>());
      hashedToStarts.get(h).add(startIndex);
    }

    return -1;
  }

  private int val(char c) {
    return c - 'a';
  }
}
			

class Solution:
  def longestDupSubstring(self, s: str) -> str:
    kMod = 1_000_000_007
    bestStart = -1
    l = 1
    r = len(s)

    def val(c: str) -> int:
      return ord(c) - ord('a')

    # K := length of hashed substring
    def getStart(k: int) -> Optional[int]:
      maxPow = pow(26, k - 1, kMod)
      hashedToStart = defaultdict(list)
      h = 0

      # Compute hash value of s[:k]
      for i in range(k):
        h = (h * 26 + val(s[i])) % kMod
      hashedToStart[h].append(0)

      # Compute rolling hash by Rabin Karp
      for i in range(k, len(s)):
        startIndex = i - k + 1
        h = (h - maxPow * val(s[i - k])) % kMod
        h = (h * 26 + val(s[i])) % kMod
        if h in hashedToStart:
          currSub = s[startIndex:startIndex + k]
          for start in hashedToStart[h]:
            if s[start:start + k] == currSub:
              return startIndex
        hashedToStart[h].append(startIndex)

    while l < r:
      m = (l + r) // 2
      start: Optional[int] = getStart(m)
      if start:
        bestStart = start
        l = m + 1
      else:
        r = m

    if bestStart == -1:
      return ''
    if getStart(l):
      return s[bestStart:bestStart + l]
    return s[bestStart:bestStart + l - 1]