Longest Repeating Substring - Google Top Interview Questions


Problem Statement :


Given a lowercase alphabet string s, return the length of the longest substring that occurs at least two times in s. If there's no such string, return 0.

Constraints

0 ≤ n ≤ 1,000 where n is the length of s

Example 1

Input

s = "abcdzabcd"

Output

4

Explanation

The longest substring that occurs more than once is "abcd".

Example 2

Input

s = "abcdefg"

Output

0

Explanation

There's no repeating substring.


Solution :



title-img



                        Solution in C++ :

string s;
int n;

struct node {
    int l, r, par, link;
    map<char, int> next;

    node(int l = 0, int r = 0, int par = -1) : l(l), r(r), par(par), link(-1) {
    }
    int len() {
        return r - l;
    }
    int &get(char c) {
        if (!next.count(c)) next[c] = -1;
        return next[c];
    }
};
node t[2005];
int sz;
int ans;

void dfs(int curr, int len) {
    len += t[curr].r - t[curr].l;
    if (t[curr].next.size()) {
        ans = max(ans, len);
        for (auto out : t[curr].next) {
            dfs(out.second, len);
        }
    }
}

struct state {
    int v, pos;
    state(int v, int pos) : v(v), pos(pos) {
    }
};
state ptr(0, 0);

state go(state st, int l, int r) {
    while (l < r)
        if (st.pos == t[st.v].len()) {
            st = state(t[st.v].get(s[l]), 0);
            if (st.v == -1) return st;
        } else {
            if (s[t[st.v].l + st.pos] != s[l]) return state(-1, -1);
            if (r - l < t[st.v].len() - st.pos) return state(st.v, st.pos + r - l);
            l += t[st.v].len() - st.pos;
            st.pos = t[st.v].len();
        }
    return st;
}

int split(state st) {
    if (st.pos == t[st.v].len()) return st.v;
    if (st.pos == 0) return t[st.v].par;
    node v = t[st.v];
    int id = sz++;
    t[id] = node(v.l, v.l + st.pos, v.par);
    t[v.par].get(s[v.l]) = id;
    t[id].get(s[v.l + st.pos]) = st.v;
    t[st.v].par = id;
    t[st.v].l += st.pos;
    return id;
}

int get_link(int v) {
    if (t[v].link != -1) return t[v].link;
    if (t[v].par == -1) return 0;
    int to = get_link(t[v].par);
    return t[v].link = split(go(state(to, t[to].len()), t[v].l + (t[v].par == 0), t[v].r));
}

void tree_extend(int pos) {
    for (;;) {
        state nptr = go(ptr, pos, pos + 1);
        if (nptr.v != -1) {
            ptr = nptr;
            return;
        }

        int mid = split(ptr);
        int leaf = sz++;
        t[leaf] = node(pos, n, mid);
        t[mid].get(s[pos]) = leaf;

        ptr.v = get_link(mid);
        ptr.pos = t[ptr.v].len();
        if (!mid) break;
    }
}

void build_tree() {
    sz = 1;
    ptr = state(0, 0);
    for (int i = 0; i < n; ++i) tree_extend(i);
}

int solve(string str) {
    str += "$";
    s = str;
    n = str.size();
    build_tree();
    ans = 0;
    dfs(0, 0);
    for (int i = 0; i <= n; i++) {
        t[i] = node();
    }
    return ans;
}
                    

                        Solution in Java :

import java.util.*;

class Solution {
    public class RabinKarp {
        String s;
        long[] prefix_hash;
        long[] powers;

        final int PRIME1 = 1009;
        final int MOD1 = 998244353;

        /**
         * Class constructor specifiying the string s we want to work with.
         */
        RabinKarp(String s) {
            this.s = s;
            this.prefix_hash = new long[this.s.length() + 1];
            this.powers = new long[this.s.length() + 1];
            this.computeHashAndPow();
        }
        /**
         * Computes the prefix hash values and computes the prefix powers.
         */
        private void computeHashAndPow() {
            this.powers[0] = 1;
            for (int i = 1; i <= this.s.length(); i++) {
                long c = (long) this.s.charAt(i - 1) - 'a' + 1;
                this.prefix_hash[i] = ((prefix_hash[i - 1] * PRIME1 + c) % MOD1);
                this.powers[i] = ((this.powers[i - 1] * PRIME1) % MOD1);
            }
        }

        /**
         * Takes a left and right inclusive indices that resembles a substring and calculates the
         * hash in O(1) time
         * @param l The left of the substring
         * @param r The right of the substring
         * @return The hash of the substring [l,r].
         */
        public long getHashSubstring(int l, int r) {
            return (this.prefix_hash[r + 1] - this.prefix_hash[l] * this.powers[r - l + 1] % MOD1
                       + MOD1)
                % MOD1;
        }
    }
    public int solve(String s) {
        int n = s.length();
        RabinKarp rk = new RabinKarp(s);
        int res = 0;

        int left = 1;
        int right = n;

        while (left <= right) {
            int candidate_length = left + (right - left) / 2;
            HashSet<Long> seen = new HashSet();

            boolean worked = false;

            for (int i = 0; i < n; i++) {
                if (i + candidate_length - 1 < n) {
                    long hash = rk.getHashSubstring(i, i + candidate_length - 1);
                    if (seen.contains(hash)) {
                        worked = true;
                        res = Math.max(res, candidate_length);
                    }
                    seen.add(hash);
                }
            }
            if (worked) {
                left = candidate_length + 1;
            } else {
                right = candidate_length - 1;
            }
        }
        return res;
    }
}
                    

                        Solution in Python : 
                            
MOD = 10 ** 9 + 7
P = 53


class Solution:
    def solve(self, s):
        N = len(s)

        def can(x):
            if x <= 0:
                return True
            if x > N:
                return False
            hash = 0
            power = (P ** (x - 1)) % MOD
            hash_to_idx = defaultdict(list)  # maps hash => start index of substring

            for i in range(x):
                hash = (hash * P + ord(s[i])) % MOD
            hash_to_idx[hash].append(0)
            for i in range(x, N):
                hash = (hash - power * ord(s[i - x])) % MOD
                hash = (hash * P + ord(s[i])) % MOD
                start_idx = i - x + 1
                if hash in hash_to_idx:
                    this_string = s[start_idx : i + 1]
                    for other_idx in hash_to_idx[hash]:
                        other_string = s[other_idx : other_idx + x]
                        if this_string == other_string:
                            return True
                hash_to_idx[hash].append(start_idx)
            return False

        lo = 0
        hi = N + 1
        while lo < hi - 1:
            mid = (lo + hi) // 2
            if can(mid):
                lo = mid
            else:
                hi = mid
        return lo
                    

View More Similar Problems

Kitty's Calculations on a Tree

Kitty has a tree, T , consisting of n nodes where each node is uniquely labeled from 1 to n . Her friend Alex gave her q sets, where each set contains k distinct nodes. Kitty needs to calculate the following expression on each set: where: { u ,v } denotes an unordered pair of nodes belonging to the set. dist(u , v) denotes the number of edges on the unique (shortest) path between nodes a

View Solution →

Is This a Binary Search Tree?

For the purposes of this challenge, we define a binary tree to be a binary search tree with the following ordering requirements: The data value of every node in a node's left subtree is less than the data value of that node. The data value of every node in a node's right subtree is greater than the data value of that node. Given the root node of a binary tree, can you determine if it's also a

View Solution →

Square-Ten Tree

The square-ten tree decomposition of an array is defined as follows: The lowest () level of the square-ten tree consists of single array elements in their natural order. The level (starting from ) of the square-ten tree consists of subsequent array subsegments of length in their natural order. Thus, the level contains subsegments of length , the level contains subsegments of length , the

View Solution →

Balanced Forest

Greg has a tree of nodes containing integer data. He wants to insert a node with some non-zero integer value somewhere into the tree. His goal is to be able to cut two edges and have the values of each of the three new trees sum to the same amount. This is called a balanced forest. Being frugal, the data value he inserts should be minimal. Determine the minimal amount that a new node can have to a

View Solution →

Jenny's Subtrees

Jenny loves experimenting with trees. Her favorite tree has n nodes connected by n - 1 edges, and each edge is ` unit in length. She wants to cut a subtree (i.e., a connected part of the original tree) of radius r from this tree by performing the following two steps: 1. Choose a node, x , from the tree. 2. Cut a subtree consisting of all nodes which are not further than r units from node x .

View Solution →

Tree Coordinates

We consider metric space to be a pair, , where is a set and such that the following conditions hold: where is the distance between points and . Let's define the product of two metric spaces, , to be such that: , where , . So, it follows logically that is also a metric space. We then define squared metric space, , to be the product of a metric space multiplied with itself: . For

View Solution →