Longest Repeating Substring - Google Top Interview Questions
Problem Statement :
Given a lowercase alphabet string s, return the length of the longest substring that occurs at least two times in s. If there's no such string, return 0. Constraints 0 ≤ n ≤ 1,000 where n is the length of s Example 1 Input s = "abcdzabcd" Output 4 Explanation The longest substring that occurs more than once is "abcd". Example 2 Input s = "abcdefg" Output 0 Explanation There's no repeating substring.
Solution :
Solution in C++ :
string s;
int n;
struct node {
int l, r, par, link;
map<char, int> next;
node(int l = 0, int r = 0, int par = -1) : l(l), r(r), par(par), link(-1) {
}
int len() {
return r - l;
}
int &get(char c) {
if (!next.count(c)) next[c] = -1;
return next[c];
}
};
node t[2005];
int sz;
int ans;
void dfs(int curr, int len) {
len += t[curr].r - t[curr].l;
if (t[curr].next.size()) {
ans = max(ans, len);
for (auto out : t[curr].next) {
dfs(out.second, len);
}
}
}
struct state {
int v, pos;
state(int v, int pos) : v(v), pos(pos) {
}
};
state ptr(0, 0);
state go(state st, int l, int r) {
while (l < r)
if (st.pos == t[st.v].len()) {
st = state(t[st.v].get(s[l]), 0);
if (st.v == -1) return st;
} else {
if (s[t[st.v].l + st.pos] != s[l]) return state(-1, -1);
if (r - l < t[st.v].len() - st.pos) return state(st.v, st.pos + r - l);
l += t[st.v].len() - st.pos;
st.pos = t[st.v].len();
}
return st;
}
int split(state st) {
if (st.pos == t[st.v].len()) return st.v;
if (st.pos == 0) return t[st.v].par;
node v = t[st.v];
int id = sz++;
t[id] = node(v.l, v.l + st.pos, v.par);
t[v.par].get(s[v.l]) = id;
t[id].get(s[v.l + st.pos]) = st.v;
t[st.v].par = id;
t[st.v].l += st.pos;
return id;
}
int get_link(int v) {
if (t[v].link != -1) return t[v].link;
if (t[v].par == -1) return 0;
int to = get_link(t[v].par);
return t[v].link = split(go(state(to, t[to].len()), t[v].l + (t[v].par == 0), t[v].r));
}
void tree_extend(int pos) {
for (;;) {
state nptr = go(ptr, pos, pos + 1);
if (nptr.v != -1) {
ptr = nptr;
return;
}
int mid = split(ptr);
int leaf = sz++;
t[leaf] = node(pos, n, mid);
t[mid].get(s[pos]) = leaf;
ptr.v = get_link(mid);
ptr.pos = t[ptr.v].len();
if (!mid) break;
}
}
void build_tree() {
sz = 1;
ptr = state(0, 0);
for (int i = 0; i < n; ++i) tree_extend(i);
}
int solve(string str) {
str += "$";
s = str;
n = str.size();
build_tree();
ans = 0;
dfs(0, 0);
for (int i = 0; i <= n; i++) {
t[i] = node();
}
return ans;
}
Solution in Java :
import java.util.*;
class Solution {
public class RabinKarp {
String s;
long[] prefix_hash;
long[] powers;
final int PRIME1 = 1009;
final int MOD1 = 998244353;
/**
* Class constructor specifiying the string s we want to work with.
*/
RabinKarp(String s) {
this.s = s;
this.prefix_hash = new long[this.s.length() + 1];
this.powers = new long[this.s.length() + 1];
this.computeHashAndPow();
}
/**
* Computes the prefix hash values and computes the prefix powers.
*/
private void computeHashAndPow() {
this.powers[0] = 1;
for (int i = 1; i <= this.s.length(); i++) {
long c = (long) this.s.charAt(i - 1) - 'a' + 1;
this.prefix_hash[i] = ((prefix_hash[i - 1] * PRIME1 + c) % MOD1);
this.powers[i] = ((this.powers[i - 1] * PRIME1) % MOD1);
}
}
/**
* Takes a left and right inclusive indices that resembles a substring and calculates the
* hash in O(1) time
* @param l The left of the substring
* @param r The right of the substring
* @return The hash of the substring [l,r].
*/
public long getHashSubstring(int l, int r) {
return (this.prefix_hash[r + 1] - this.prefix_hash[l] * this.powers[r - l + 1] % MOD1
+ MOD1)
% MOD1;
}
}
public int solve(String s) {
int n = s.length();
RabinKarp rk = new RabinKarp(s);
int res = 0;
int left = 1;
int right = n;
while (left <= right) {
int candidate_length = left + (right - left) / 2;
HashSet<Long> seen = new HashSet();
boolean worked = false;
for (int i = 0; i < n; i++) {
if (i + candidate_length - 1 < n) {
long hash = rk.getHashSubstring(i, i + candidate_length - 1);
if (seen.contains(hash)) {
worked = true;
res = Math.max(res, candidate_length);
}
seen.add(hash);
}
}
if (worked) {
left = candidate_length + 1;
} else {
right = candidate_length - 1;
}
}
return res;
}
}
Solution in Python :
MOD = 10 ** 9 + 7
P = 53
class Solution:
def solve(self, s):
N = len(s)
def can(x):
if x <= 0:
return True
if x > N:
return False
hash = 0
power = (P ** (x - 1)) % MOD
hash_to_idx = defaultdict(list) # maps hash => start index of substring
for i in range(x):
hash = (hash * P + ord(s[i])) % MOD
hash_to_idx[hash].append(0)
for i in range(x, N):
hash = (hash - power * ord(s[i - x])) % MOD
hash = (hash * P + ord(s[i])) % MOD
start_idx = i - x + 1
if hash in hash_to_idx:
this_string = s[start_idx : i + 1]
for other_idx in hash_to_idx[hash]:
other_string = s[other_idx : other_idx + x]
if this_string == other_string:
return True
hash_to_idx[hash].append(start_idx)
return False
lo = 0
hi = N + 1
while lo < hi - 1:
mid = (lo + hi) // 2
if can(mid):
lo = mid
else:
hi = mid
return lo
View More Similar Problems
Components in a graph
There are 2 * N nodes in an undirected graph, and a number of edges connecting some nodes. In each edge, the first value will be between 1 and N, inclusive. The second node will be between N + 1 and , 2 * N inclusive. Given a list of edges, determine the size of the smallest and largest connected components that have or more nodes. A node can have any number of connections. The highest node valu
View Solution →Kundu and Tree
Kundu is true tree lover. Tree is a connected graph having N vertices and N-1 edges. Today when he got a tree, he colored each edge with one of either red(r) or black(b) color. He is interested in knowing how many triplets(a,b,c) of vertices are there , such that, there is atleast one edge having red color on all the three paths i.e. from vertex a to b, vertex b to c and vertex c to a . Note that
View Solution →Super Maximum Cost Queries
Victoria has a tree, T , consisting of N nodes numbered from 1 to N. Each edge from node Ui to Vi in tree T has an integer weight, Wi. Let's define the cost, C, of a path from some node X to some other node Y as the maximum weight ( W ) for any edge in the unique path from node X to Y node . Victoria wants your help processing Q queries on tree T, where each query contains 2 integers, L and
View Solution →Contacts
We're going to make our own Contacts application! The application must perform two types of operations: 1 . add name, where name is a string denoting a contact name. This must store name as a new contact in the application. find partial, where partial is a string denoting a partial name to search the application for. It must count the number of contacts starting partial with and print the co
View Solution →No Prefix Set
There is a given list of strings where each string contains only lowercase letters from a - j, inclusive. The set of strings is said to be a GOOD SET if no string is a prefix of another string. In this case, print GOOD SET. Otherwise, print BAD SET on the first line followed by the string being checked. Note If two strings are identical, they are prefixes of each other. Function Descriptio
View Solution →Cube Summation
You are given a 3-D Matrix in which each block contains 0 initially. The first block is defined by the coordinate (1,1,1) and the last block is defined by the coordinate (N,N,N). There are two types of queries. UPDATE x y z W updates the value of block (x,y,z) to W. QUERY x1 y1 z1 x2 y2 z2 calculates the sum of the value of blocks whose x coordinate is between x1 and x2 (inclusive), y coor
View Solution →