Longest Repeating Substring - Google Top Interview Questions


Problem Statement :


Given a lowercase alphabet string s, return the length of the longest substring that occurs at least two times in s. If there's no such string, return 0.

Constraints

0 ≤ n ≤ 1,000 where n is the length of s

Example 1

Input

s = "abcdzabcd"

Output

4

Explanation

The longest substring that occurs more than once is "abcd".

Example 2

Input

s = "abcdefg"

Output

0

Explanation

There's no repeating substring.



Solution :



title-img




                        Solution in C++ :

string s;
int n;

struct node {
    int l, r, par, link;
    map<char, int> next;

    node(int l = 0, int r = 0, int par = -1) : l(l), r(r), par(par), link(-1) {
    }
    int len() {
        return r - l;
    }
    int &get(char c) {
        if (!next.count(c)) next[c] = -1;
        return next[c];
    }
};
node t[2005];
int sz;
int ans;

void dfs(int curr, int len) {
    len += t[curr].r - t[curr].l;
    if (t[curr].next.size()) {
        ans = max(ans, len);
        for (auto out : t[curr].next) {
            dfs(out.second, len);
        }
    }
}

struct state {
    int v, pos;
    state(int v, int pos) : v(v), pos(pos) {
    }
};
state ptr(0, 0);

state go(state st, int l, int r) {
    while (l < r)
        if (st.pos == t[st.v].len()) {
            st = state(t[st.v].get(s[l]), 0);
            if (st.v == -1) return st;
        } else {
            if (s[t[st.v].l + st.pos] != s[l]) return state(-1, -1);
            if (r - l < t[st.v].len() - st.pos) return state(st.v, st.pos + r - l);
            l += t[st.v].len() - st.pos;
            st.pos = t[st.v].len();
        }
    return st;
}

int split(state st) {
    if (st.pos == t[st.v].len()) return st.v;
    if (st.pos == 0) return t[st.v].par;
    node v = t[st.v];
    int id = sz++;
    t[id] = node(v.l, v.l + st.pos, v.par);
    t[v.par].get(s[v.l]) = id;
    t[id].get(s[v.l + st.pos]) = st.v;
    t[st.v].par = id;
    t[st.v].l += st.pos;
    return id;
}

int get_link(int v) {
    if (t[v].link != -1) return t[v].link;
    if (t[v].par == -1) return 0;
    int to = get_link(t[v].par);
    return t[v].link = split(go(state(to, t[to].len()), t[v].l + (t[v].par == 0), t[v].r));
}

void tree_extend(int pos) {
    for (;;) {
        state nptr = go(ptr, pos, pos + 1);
        if (nptr.v != -1) {
            ptr = nptr;
            return;
        }

        int mid = split(ptr);
        int leaf = sz++;
        t[leaf] = node(pos, n, mid);
        t[mid].get(s[pos]) = leaf;

        ptr.v = get_link(mid);
        ptr.pos = t[ptr.v].len();
        if (!mid) break;
    }
}

void build_tree() {
    sz = 1;
    ptr = state(0, 0);
    for (int i = 0; i < n; ++i) tree_extend(i);
}

int solve(string str) {
    str += "$";
    s = str;
    n = str.size();
    build_tree();
    ans = 0;
    dfs(0, 0);
    for (int i = 0; i <= n; i++) {
        t[i] = node();
    }
    return ans;
}
                    


                        Solution in Java :

import java.util.*;

class Solution {
    public class RabinKarp {
        String s;
        long[] prefix_hash;
        long[] powers;

        final int PRIME1 = 1009;
        final int MOD1 = 998244353;

        /**
         * Class constructor specifiying the string s we want to work with.
         */
        RabinKarp(String s) {
            this.s = s;
            this.prefix_hash = new long[this.s.length() + 1];
            this.powers = new long[this.s.length() + 1];
            this.computeHashAndPow();
        }
        /**
         * Computes the prefix hash values and computes the prefix powers.
         */
        private void computeHashAndPow() {
            this.powers[0] = 1;
            for (int i = 1; i <= this.s.length(); i++) {
                long c = (long) this.s.charAt(i - 1) - 'a' + 1;
                this.prefix_hash[i] = ((prefix_hash[i - 1] * PRIME1 + c) % MOD1);
                this.powers[i] = ((this.powers[i - 1] * PRIME1) % MOD1);
            }
        }

        /**
         * Takes a left and right inclusive indices that resembles a substring and calculates the
         * hash in O(1) time
         * @param l The left of the substring
         * @param r The right of the substring
         * @return The hash of the substring [l,r].
         */
        public long getHashSubstring(int l, int r) {
            return (this.prefix_hash[r + 1] - this.prefix_hash[l] * this.powers[r - l + 1] % MOD1
                       + MOD1)
                % MOD1;
        }
    }
    public int solve(String s) {
        int n = s.length();
        RabinKarp rk = new RabinKarp(s);
        int res = 0;

        int left = 1;
        int right = n;

        while (left <= right) {
            int candidate_length = left + (right - left) / 2;
            HashSet<Long> seen = new HashSet();

            boolean worked = false;

            for (int i = 0; i < n; i++) {
                if (i + candidate_length - 1 < n) {
                    long hash = rk.getHashSubstring(i, i + candidate_length - 1);
                    if (seen.contains(hash)) {
                        worked = true;
                        res = Math.max(res, candidate_length);
                    }
                    seen.add(hash);
                }
            }
            if (worked) {
                left = candidate_length + 1;
            } else {
                right = candidate_length - 1;
            }
        }
        return res;
    }
}
                    


                        Solution in Python : 
                            
MOD = 10 ** 9 + 7
P = 53


class Solution:
    def solve(self, s):
        N = len(s)

        def can(x):
            if x <= 0:
                return True
            if x > N:
                return False
            hash = 0
            power = (P ** (x - 1)) % MOD
            hash_to_idx = defaultdict(list)  # maps hash => start index of substring

            for i in range(x):
                hash = (hash * P + ord(s[i])) % MOD
            hash_to_idx[hash].append(0)
            for i in range(x, N):
                hash = (hash - power * ord(s[i - x])) % MOD
                hash = (hash * P + ord(s[i])) % MOD
                start_idx = i - x + 1
                if hash in hash_to_idx:
                    this_string = s[start_idx : i + 1]
                    for other_idx in hash_to_idx[hash]:
                        other_string = s[other_idx : other_idx + x]
                        if this_string == other_string:
                            return True
                hash_to_idx[hash].append(start_idx)
            return False

        lo = 0
        hi = N + 1
        while lo < hi - 1:
            mid = (lo + hi) // 2
            if can(mid):
                lo = mid
            else:
                hi = mid
        return lo
                    


View More Similar Problems

Balanced Forest

Greg has a tree of nodes containing integer data. He wants to insert a node with some non-zero integer value somewhere into the tree. His goal is to be able to cut two edges and have the values of each of the three new trees sum to the same amount. This is called a balanced forest. Being frugal, the data value he inserts should be minimal. Determine the minimal amount that a new node can have to a

View Solution →

Jenny's Subtrees

Jenny loves experimenting with trees. Her favorite tree has n nodes connected by n - 1 edges, and each edge is ` unit in length. She wants to cut a subtree (i.e., a connected part of the original tree) of radius r from this tree by performing the following two steps: 1. Choose a node, x , from the tree. 2. Cut a subtree consisting of all nodes which are not further than r units from node x .

View Solution →

Tree Coordinates

We consider metric space to be a pair, , where is a set and such that the following conditions hold: where is the distance between points and . Let's define the product of two metric spaces, , to be such that: , where , . So, it follows logically that is also a metric space. We then define squared metric space, , to be the product of a metric space multiplied with itself: . For

View Solution →

Array Pairs

Consider an array of n integers, A = [ a1, a2, . . . . an] . Find and print the total number of (i , j) pairs such that ai * aj <= max(ai, ai+1, . . . aj) where i < j. Input Format The first line contains an integer, n , denoting the number of elements in the array. The second line consists of n space-separated integers describing the respective values of a1, a2 , . . . an .

View Solution →

Self Balancing Tree

An AVL tree (Georgy Adelson-Velsky and Landis' tree, named after the inventors) is a self-balancing binary search tree. In an AVL tree, the heights of the two child subtrees of any node differ by at most one; if at any time they differ by more than one, rebalancing is done to restore this property. We define balance factor for each node as : balanceFactor = height(left subtree) - height(righ

View Solution →

Array and simple queries

Given two numbers N and M. N indicates the number of elements in the array A[](1-indexed) and M indicates number of queries. You need to perform two types of queries on the array A[] . You are given queries. Queries can be of two types, type 1 and type 2. Type 1 queries are represented as 1 i j : Modify the given array by removing elements from i to j and adding them to the front. Ty

View Solution →