Longest Repeating Substring - Google Top Interview Questions


Problem Statement :


Given a lowercase alphabet string s, return the length of the longest substring that occurs at least two times in s. If there's no such string, return 0.

Constraints

0 ≤ n ≤ 1,000 where n is the length of s

Example 1

Input

s = "abcdzabcd"

Output

4

Explanation

The longest substring that occurs more than once is "abcd".

Example 2

Input

s = "abcdefg"

Output

0

Explanation

There's no repeating substring.



Solution :



title-img




                        Solution in C++ :

string s;
int n;

struct node {
    int l, r, par, link;
    map<char, int> next;

    node(int l = 0, int r = 0, int par = -1) : l(l), r(r), par(par), link(-1) {
    }
    int len() {
        return r - l;
    }
    int &get(char c) {
        if (!next.count(c)) next[c] = -1;
        return next[c];
    }
};
node t[2005];
int sz;
int ans;

void dfs(int curr, int len) {
    len += t[curr].r - t[curr].l;
    if (t[curr].next.size()) {
        ans = max(ans, len);
        for (auto out : t[curr].next) {
            dfs(out.second, len);
        }
    }
}

struct state {
    int v, pos;
    state(int v, int pos) : v(v), pos(pos) {
    }
};
state ptr(0, 0);

state go(state st, int l, int r) {
    while (l < r)
        if (st.pos == t[st.v].len()) {
            st = state(t[st.v].get(s[l]), 0);
            if (st.v == -1) return st;
        } else {
            if (s[t[st.v].l + st.pos] != s[l]) return state(-1, -1);
            if (r - l < t[st.v].len() - st.pos) return state(st.v, st.pos + r - l);
            l += t[st.v].len() - st.pos;
            st.pos = t[st.v].len();
        }
    return st;
}

int split(state st) {
    if (st.pos == t[st.v].len()) return st.v;
    if (st.pos == 0) return t[st.v].par;
    node v = t[st.v];
    int id = sz++;
    t[id] = node(v.l, v.l + st.pos, v.par);
    t[v.par].get(s[v.l]) = id;
    t[id].get(s[v.l + st.pos]) = st.v;
    t[st.v].par = id;
    t[st.v].l += st.pos;
    return id;
}

int get_link(int v) {
    if (t[v].link != -1) return t[v].link;
    if (t[v].par == -1) return 0;
    int to = get_link(t[v].par);
    return t[v].link = split(go(state(to, t[to].len()), t[v].l + (t[v].par == 0), t[v].r));
}

void tree_extend(int pos) {
    for (;;) {
        state nptr = go(ptr, pos, pos + 1);
        if (nptr.v != -1) {
            ptr = nptr;
            return;
        }

        int mid = split(ptr);
        int leaf = sz++;
        t[leaf] = node(pos, n, mid);
        t[mid].get(s[pos]) = leaf;

        ptr.v = get_link(mid);
        ptr.pos = t[ptr.v].len();
        if (!mid) break;
    }
}

void build_tree() {
    sz = 1;
    ptr = state(0, 0);
    for (int i = 0; i < n; ++i) tree_extend(i);
}

int solve(string str) {
    str += "$";
    s = str;
    n = str.size();
    build_tree();
    ans = 0;
    dfs(0, 0);
    for (int i = 0; i <= n; i++) {
        t[i] = node();
    }
    return ans;
}
                    


                        Solution in Java :

import java.util.*;

class Solution {
    public class RabinKarp {
        String s;
        long[] prefix_hash;
        long[] powers;

        final int PRIME1 = 1009;
        final int MOD1 = 998244353;

        /**
         * Class constructor specifiying the string s we want to work with.
         */
        RabinKarp(String s) {
            this.s = s;
            this.prefix_hash = new long[this.s.length() + 1];
            this.powers = new long[this.s.length() + 1];
            this.computeHashAndPow();
        }
        /**
         * Computes the prefix hash values and computes the prefix powers.
         */
        private void computeHashAndPow() {
            this.powers[0] = 1;
            for (int i = 1; i <= this.s.length(); i++) {
                long c = (long) this.s.charAt(i - 1) - 'a' + 1;
                this.prefix_hash[i] = ((prefix_hash[i - 1] * PRIME1 + c) % MOD1);
                this.powers[i] = ((this.powers[i - 1] * PRIME1) % MOD1);
            }
        }

        /**
         * Takes a left and right inclusive indices that resembles a substring and calculates the
         * hash in O(1) time
         * @param l The left of the substring
         * @param r The right of the substring
         * @return The hash of the substring [l,r].
         */
        public long getHashSubstring(int l, int r) {
            return (this.prefix_hash[r + 1] - this.prefix_hash[l] * this.powers[r - l + 1] % MOD1
                       + MOD1)
                % MOD1;
        }
    }
    public int solve(String s) {
        int n = s.length();
        RabinKarp rk = new RabinKarp(s);
        int res = 0;

        int left = 1;
        int right = n;

        while (left <= right) {
            int candidate_length = left + (right - left) / 2;
            HashSet<Long> seen = new HashSet();

            boolean worked = false;

            for (int i = 0; i < n; i++) {
                if (i + candidate_length - 1 < n) {
                    long hash = rk.getHashSubstring(i, i + candidate_length - 1);
                    if (seen.contains(hash)) {
                        worked = true;
                        res = Math.max(res, candidate_length);
                    }
                    seen.add(hash);
                }
            }
            if (worked) {
                left = candidate_length + 1;
            } else {
                right = candidate_length - 1;
            }
        }
        return res;
    }
}
                    


                        Solution in Python : 
                            
MOD = 10 ** 9 + 7
P = 53


class Solution:
    def solve(self, s):
        N = len(s)

        def can(x):
            if x <= 0:
                return True
            if x > N:
                return False
            hash = 0
            power = (P ** (x - 1)) % MOD
            hash_to_idx = defaultdict(list)  # maps hash => start index of substring

            for i in range(x):
                hash = (hash * P + ord(s[i])) % MOD
            hash_to_idx[hash].append(0)
            for i in range(x, N):
                hash = (hash - power * ord(s[i - x])) % MOD
                hash = (hash * P + ord(s[i])) % MOD
                start_idx = i - x + 1
                if hash in hash_to_idx:
                    this_string = s[start_idx : i + 1]
                    for other_idx in hash_to_idx[hash]:
                        other_string = s[other_idx : other_idx + x]
                        if this_string == other_string:
                            return True
                hash_to_idx[hash].append(start_idx)
            return False

        lo = 0
        hi = N + 1
        while lo < hi - 1:
            mid = (lo + hi) // 2
            if can(mid):
                lo = mid
            else:
                hi = mid
        return lo
                    


View More Similar Problems

Components in a graph

There are 2 * N nodes in an undirected graph, and a number of edges connecting some nodes. In each edge, the first value will be between 1 and N, inclusive. The second node will be between N + 1 and , 2 * N inclusive. Given a list of edges, determine the size of the smallest and largest connected components that have or more nodes. A node can have any number of connections. The highest node valu

View Solution →

Kundu and Tree

Kundu is true tree lover. Tree is a connected graph having N vertices and N-1 edges. Today when he got a tree, he colored each edge with one of either red(r) or black(b) color. He is interested in knowing how many triplets(a,b,c) of vertices are there , such that, there is atleast one edge having red color on all the three paths i.e. from vertex a to b, vertex b to c and vertex c to a . Note that

View Solution →

Super Maximum Cost Queries

Victoria has a tree, T , consisting of N nodes numbered from 1 to N. Each edge from node Ui to Vi in tree T has an integer weight, Wi. Let's define the cost, C, of a path from some node X to some other node Y as the maximum weight ( W ) for any edge in the unique path from node X to Y node . Victoria wants your help processing Q queries on tree T, where each query contains 2 integers, L and

View Solution →

Contacts

We're going to make our own Contacts application! The application must perform two types of operations: 1 . add name, where name is a string denoting a contact name. This must store name as a new contact in the application. find partial, where partial is a string denoting a partial name to search the application for. It must count the number of contacts starting partial with and print the co

View Solution →

No Prefix Set

There is a given list of strings where each string contains only lowercase letters from a - j, inclusive. The set of strings is said to be a GOOD SET if no string is a prefix of another string. In this case, print GOOD SET. Otherwise, print BAD SET on the first line followed by the string being checked. Note If two strings are identical, they are prefixes of each other. Function Descriptio

View Solution →

Cube Summation

You are given a 3-D Matrix in which each block contains 0 initially. The first block is defined by the coordinate (1,1,1) and the last block is defined by the coordinate (N,N,N). There are two types of queries. UPDATE x y z W updates the value of block (x,y,z) to W. QUERY x1 y1 z1 x2 y2 z2 calculates the sum of the value of blocks whose x coordinate is between x1 and x2 (inclusive), y coor

View Solution →