Find Strings
Problem Statement :
A substring is defined as a contiguous sequence of one or more characters in the string. More information on substrings can be found here. You are given n strings w[1], w[2], ......, w[n]. Let S[i] denote the set of all distinct substrings of the string w[i]. Let , that is, S is a set of strings that is the union of all substrings in all sets S[1], S[2], ..... S[n]. There will be many queries. For each query you will be given an integer 'k'. Your task is to find the kth element of the -indexed lexicographically ordered set of substrings in the set S. If there is no element , return INVALID. For example, your strings are . All of the substrings are and . Combine the two sets and sort them to get . So, for instance if , we return 'a'. If , we return 'bc'. If though, there is not an so we return INVALID. Function Description Complete the findStrings function in the editor below. It should return array of strings. findStrings has the following parameter(s): w: an array of strings queries: an array of integers Input Format The first line contains an integer n, the number of strings in the array w. Each of the next n lines consists of a string w[ i ]. The next line contains an integer q, the number of queries. Each of the next q lines consists of a single integer k. Constraints 1 <= n <= 50 1 <= | w[ i ] | <= 2000 1 <= q <= 500 1 <= k <= 10^9 Each character of w[ i ] e ascii[ a - z ] Output Format Return an array of q strings where the ith string is the answer to the ith query. If a k is invalid, return "INVALID" for that case.
Solution :
Solution in C :
in C++ :
#include <iostream>
#include <list>
#include <vector>
#include <string.h>
using namespace std;
const int MAXL=100011;
struct SuffixArray{
struct RadixElement{
int id,k[2];
}RE[MAXL],RT[MAXL];
int N,A[MAXL],SA[MAXL],Rank[MAXL],Height[MAXL],
C[MAXL],Contribute[MAXL];
void RadixSort()
{
int i,y;
for (y=1;y>=0;y--){
memset(C,0,sizeof(C));
for (i=1;i<=N;i++) C[RE[i].k[y]]++;
for (i=1;i<MAXL;i++) C[i]+=C[i-1];
for (i=N;i>=1;i--) RT[C[RE[i].k[y]]--]=RE[i];
for (i=1;i<=N;i++) RE[i]=RT[i];
}
for (i=1;i<=N;i++){
Rank[ RE[i].id ]=Rank[ RE[i-1].id ];
if (RE[i].k[0]!=RE[i-1].k[0] || RE[i].k[1]!=RE[i-1].k[1])
Rank[ RE[i].id ]++;
}
}
void CalcSA(){
int i,k;
RE[0].k[0]=-1;
for (i=1;i<=N;i++)
RE[i].id=i,RE[i].k[0]=A[i],RE[i].k[1]=0;
RadixSort();
for (k=1;k+1<=N;k*=2)
{
for (i=1;i<=N;i++)
RE[i].id=i,RE[i].k[0]=Rank[i],RE[i].k[1]=i+k<=N?Rank[i+k]:0;
RadixSort();
}
for (i=1;i<=N;i++)
SA[ Rank[i] ]=i;
}
void CalcHeight(){
int i,k,h=0;
for (i=1;i<=N;i++)
{
if (Rank[i]==1)
h=0;
else
{
k=SA[Rank[i]-1];
if (--h<0) h=0;
for (;A[i+h]==A[k+h];h++);
}
Height[Rank[i]]=h;
}
}
void CalContribute(){
int i,j=0;
for (i=1;i<=N;i++)
{
int pos = SA[i];
int maxrange = 0;
j = pos;
while(A[j]<=26&&j<=N){
++j;
}
Contribute[i] = j- pos - Height[i];
}
}
}SA;
string check(int query)
{
char result[2000];
int sum = 0,i,pos = 0;
for(i = 1; i<=SA.N;i++)
{
sum += SA.Contribute[i];
if(query <= sum)
{
pos = i;
break;
}
}
if(pos)
{
int index = query - sum;
int size = index + SA.Height[pos] + SA.Contribute[pos];
for (int i=0; i < size; ++i)
{
result[i] = SA.A[SA.SA[pos] + i] + 'a' -1;
}
result[size]='\0';
return result;
}
return "INVALID";
}
int main()
{
int cas;
bool running = true;
while (running)
{
int i;
cin>>cas;
list<string> inlist;
vector<int> querylist;
for(i=0; i < cas; ++i){
string v;
cin>>v;
inlist.push_back(v);
}
list<string>::iterator it;
SA.N=0;
for(it = inlist.begin(),i = 1; it!= inlist.end();it++,++i){
for (const char *p=(*it).c_str();*p;p++)
{
SA.A[++SA.N]=*p-'a'+1;
}
if (i<cas)
SA.A[++SA.N]=30+i;
}
int queryNumber;
cin>>queryNumber;
for(i=0; i < queryNumber; ++i){
int query;
cin>>query;
querylist.push_back(query);
}
SA.CalcSA();
SA.CalcHeight();
SA.CalContribute();
vector<int>::iterator qit;
for(qit = querylist.begin(); qit!= querylist.end();qit++){
cout<<check(*qit)<<endl;
}
running = false;
}
}
In Java :
import java.io.* ;
import java.text.DecimalFormat;
import java.util.*;
import static java.lang.Math.* ;
import static java.util.Arrays.* ;
public class Solution {
public static void main(String[] args) {
new Solution().solveProblem();
out.close();
}
static Scanner in =
new Scanner(new InputStreamReader(System.in));
static PrintStream out =
new PrintStream(new BufferedOutputStream(System.out));
int[] maxx ;
String s ="";
public void solveProblem() {
int n = in.nextInt() ;
in.nextLine() ;
String[] sn = new String[n] ;
for( int i = 0 ; i < n ; i++ ){
sn[i] = in.nextLine() + "A" ;
s += sn[i];
}
T = s.toCharArray() ;
maxx = new int[T.length] ;
int som = 0 ;
for( int i = 0 ; i < n ; i++ ){
int nu = sn[i].length() ;
for( int j = som ; j < som + nu ; j++)
maxx[j] = som + nu ;
som += nu ;
}
this.n = T.length ;
constructSA() ;
computeLCP() ;
int q = in.nextInt() ; in.nextLine() ;
for( int i = 0 ; i < q ; i++ )
losOp(in.nextLong()) ;
}
void losOp( long k ) {
int start = 0 ;
for( int i = 0 ; i < n ; i++ ){
int ind = SA[i] ;
start = LCP[i] ;
long aantal = max(0,maxx[ind] - 1 - ind - start) ;
if( T[ind] != 'A' && aantal >= k ){
out.println(s.substring(ind, (int) (ind+start+k))) ;
return ;
}else if( T[ind] != 'A')
k -= aantal ;
//System.out.println(k);
}
out.println("INVALID") ;
}
int maxlen = 100010 ;
int n ;
char[] T ;
int[] RA = new int[maxlen] ;
int[] RATemp = new int[maxlen] ;
int[] SA = new int[maxlen] ;
int[] SATemp = new int[maxlen] ;
int[] c = new int[maxlen] ;
void constructSA(){
for( int i = 0 ; i < n ; i++ ){
RA[i] = T[i]-'.' ;
SA[i] = i ;
}
for( int k = 1 ; k < n ; k <<= 1 ){
countingSort( k ) ;
countingSort( 0 ) ;
RATemp[SA[0]] = 1 ;
int r = 1 ;
for( int i = 1 ; i < n ; i++ ){
RATemp[SA[i]] = (
RA[SA[i]] == RA[SA[i-1]] &&
RA[SA[i]+k] == RA[SA[i-1]+k] ) ? r : ++r ;
}
RA = RATemp.clone() ;
}
}
void countingSort( int k ){
int sum = 0 ;
int maxi = max( 300, n ) ;
fill( c, 0 ) ;
for( int i = 0 ; i < n ; i++ )
c[ ( i + k ) < n ? RA[i+k] : 0 ]++ ;
for( int i = 0 ; i <= maxi ; i++ ){
int t = c[i] ;
c[i] = sum ;
sum += t ;
}
for( int i = 0 ; i < n ; i++ ){
SATemp[c[(
SA[i] + k) < n ? RA[SA[i]+k] : 0]++ ] = SA[i] ;
}
SA = SATemp.clone() ;
}
int[] Phi ;
int[] LCP ;
int max = 0 ;
void computeLCP(){
LCP = new int[n] ;
Phi = new int[n] ;
int[] PLCP = new int[n] ;
Phi[SA[0]] = -1 ;
for( int i = 1 ; i < n ; i++ )
Phi[SA[i]] = SA[i-1] ;
int L = 0;
for( int i = 0 ; i < n ; i++){
if( Phi[i] == -1){
PLCP[i] = 0 ;
continue ;
}
while( i+L < n && Phi[i]+L < n
&& T[i+L] == T[Phi[i]+L])
L++ ;
max = max(max,L) ;
PLCP[i] = L ;
L = max(L-1,0) ;
}
for( int i =1 ; i < n ; i++ )
LCP[i] = PLCP[SA[i]] ;
}
}
In C :
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
typedef struct node {
struct node * parent;
int baselen;
char * base;
int count;
struct node * children[26];
int numofchildren;
} node;
const int BLOCKSIZE = (1 << 20);
const int MAXN = 50;
const int MAXLEN = 2000;
int poolsize = 0;
node * poolstart = NULL;
int pooloffset = 0;
node * getnode() {
node * retnode;
int i;
if (pooloffset < poolsize) {
retnode = poolstart + pooloffset++;
} else {
poolsize = BLOCKSIZE / sizeof(node);
poolstart = (node *)malloc(poolsize * sizeof(node));
pooloffset = 1;
retnode = poolstart;
}
retnode -> parent = NULL;
retnode -> baselen = 0;
retnode -> base = NULL;
retnode -> count = 0;
for (i = 0; i < 26; i++)
retnode -> children[i] = NULL;
retnode -> numofchildren = 0;
return retnode;
}
int main() {
char w[MAXN][MAXLEN];
int n, q, k, i, j, l, lenw, toadd;
node * root = getnode();
node * currentnode, * newnode, * pt, * ch;
int currentbasepos, cl, ncl, toprint, cbl;
char actchar, * cb, * ncb;
// process strings
scanf("%i", &n);
for (i = 0; i < n; i++) {
scanf("%s", w[i]);
lenw = strlen(w[i]);
// one string
for (j = 0; j < lenw; j++) {
currentnode = root;
currentbasepos = 0;
toadd = 0;
for (l = j; l < lenw; l++) {
actchar = *(w[i] + l) - 'a';
if (currentnode == root) {
if (currentnode -> children[actchar]) {
currentbasepos = 1;
currentnode = currentnode -> children[actchar];
continue;
} else {
newnode = getnode();
newnode -> parent = root;
root -> numofchildren += 1;
root -> children[actchar] = newnode;
newnode -> base = w[i] + l;
newnode -> baselen = lenw - l;
toadd = lenw - l;
break;
}
}
if (currentbasepos == currentnode -> baselen) {
if (currentnode -> numofchildren == 0) {
currentnode -> baselen += lenw - l;
currentnode -> base = w[i] + (l - currentbasepos);
toadd = lenw - l;
currentnode = currentnode -> parent;
break;
} else {
if (currentnode -> children[actchar]) {
currentbasepos = 1;
currentnode = currentnode -> children[actchar];
continue;
} else {
newnode = getnode();
newnode -> base = w[i] + l;
newnode -> baselen = lenw - l;
newnode -> parent = currentnode;
currentnode -> children[actchar] = newnode;
currentnode -> numofchildren += 1;
toadd = lenw - l;
break;
}
}
} else {
cb = currentnode -> base;
ncb = cb + currentbasepos;
if (*ncb == actchar + 'a') {
currentbasepos++;
continue;
} else {
pt = currentnode -> parent;
cl = currentnode -> baselen;
ncl = cl - currentbasepos;
newnode = getnode();
newnode -> base = cb;
newnode -> baselen = currentbasepos;
newnode -> count = currentnode -> count + ncl;
newnode -> parent = pt;
newnode -> numofchildren = 1;
pt -> children[(*cb) - 'a'] = newnode;
currentnode -> baselen = ncl;
currentnode -> base = ncb;
newnode -> children[(*ncb) - 'a'] = currentnode;
currentnode -> parent = newnode;
currentnode = newnode;
l--;
continue;
}
}
}
while (currentnode) {
currentnode -> count += toadd;
currentnode = currentnode -> parent;
}
}
}
// process queries
scanf("%i", &q);
for (i = 0; i < q; i++) {
scanf("%i", &k);
if (k > root -> count) {
printf("INVALID");
} else {
currentnode = root;
while (k) {
cbl = currentnode -> baselen;
cb = currentnode -> base;
toprint = cbl <= k ? cbl : k;
for (j = 0; j < toprint; j++) putchar(*(cb + j));
k -= toprint;
for (j = 0; j < 26; j++) {
ch = currentnode -> children[j];
if (ch) {
if (ch -> baselen + ch -> count >= k) {
currentnode = ch;
break;
} else {
k -= ch -> baselen + ch -> count;
}
}
}
}
}
if (i < q - 1) printf("\n");
}
return 0;
}
In Python3 :
#!/usr/bin/py
# Head ends here
def findStrings(a,query):
temp = {}
suffixes = []
LCP = []
for s in a:
for i in range(len(s)):
val = s[i:]
if val not in temp:
temp[val] = 1
suffixes.append(val)
#del temp
suffixes.sort()
n = len(suffixes)
for i in range(n):
if i == 0:
LCP.append(None)
else:
LCP.append(find_lcp(suffixes[i-1], suffixes[i]))
#print(suffixes)
#print(LCP)
num_sub_str = sum(len(s) for s in suffixes) - sum(v for v in LCP if v is not None)
for q in query:
#print("q = " + str(q) + " len=" + str(num_sub_str))
print(find_ith(suffixes, LCP, q-1))
def find_ith(suffixes, LCP, i):
data = zip(suffixes,LCP)
low = high = 0
for suf, lcp in data:
if lcp is None:
lcp = 0
high += len(suf) - lcp
if high - 1 == i:
return suf
elif high - 1 > i:
for _i, j in enumerate(list(range(lcp, len(suf)))):
if low + _i == i:
return suf[:j+1]
low = high
return "INVALID"
def find_lcp(s1,s2):
upper_bound = min(len(s1), len(s2))
count = 0
for i in range(upper_bound):
if s1[i] == s2[i]:
count += 1
else:
return count
return count
# Tail starts here
if __name__ == '__main__':
n = int(input())
string=[]
for i in range(0,n):
string.append(input().strip())
q= int(input())
query=[]
for i in range(0,q):
t1=int(input())
query.append(t1)
findStrings(string,query)
View More Similar Problems
Tree: Preorder Traversal
Complete the preorder function in the editor below, which has 1 parameter: a pointer to the root of a binary tree. It must print the values in the tree's preorder traversal as a single line of space-separated values. Input Format Our test code passes the root node of a binary tree to the preOrder function. Constraints 1 <= Nodes in the tree <= 500 Output Format Print the tree's
View Solution →Tree: Postorder Traversal
Complete the postorder function in the editor below. It received 1 parameter: a pointer to the root of a binary tree. It must print the values in the tree's postorder traversal as a single line of space-separated values. Input Format Our test code passes the root node of a binary tree to the postorder function. Constraints 1 <= Nodes in the tree <= 500 Output Format Print the
View Solution →Tree: Inorder Traversal
In this challenge, you are required to implement inorder traversal of a tree. Complete the inorder function in your editor below, which has 1 parameter: a pointer to the root of a binary tree. It must print the values in the tree's inorder traversal as a single line of space-separated values. Input Format Our hidden tester code passes the root node of a binary tree to your $inOrder* func
View Solution →Tree: Height of a Binary Tree
The height of a binary tree is the number of edges between the tree's root and its furthest leaf. For example, the following binary tree is of height : image Function Description Complete the getHeight or height function in the editor. It must return the height of a binary tree as an integer. getHeight or height has the following parameter(s): root: a reference to the root of a binary
View Solution →Tree : Top View
Given a pointer to the root of a binary tree, print the top view of the binary tree. The tree as seen from the top the nodes, is called the top view of the tree. For example : 1 \ 2 \ 5 / \ 3 6 \ 4 Top View : 1 -> 2 -> 5 -> 6 Complete the function topView and print the resulting values on a single line separated by space.
View Solution →Tree: Level Order Traversal
Given a pointer to the root of a binary tree, you need to print the level order traversal of this tree. In level-order traversal, nodes are visited level by level from left to right. Complete the function levelOrder and print the values in a single line separated by a space. For example: 1 \ 2 \ 5 / \ 3 6 \ 4 F
View Solution →