# Find Strings

### Problem Statement :

```A substring is defined as a contiguous sequence of one or more characters in the string. More information on substrings can be found here.

You are given n strings w[1], w[2], ......, w[n]. Let S[i] denote the set of all distinct substrings of the string w[i]. Let , that is, S is a set of strings that is the union of all substrings in all sets S[1], S[2], ..... S[n]. There will be many queries. For each query you will be given an integer 'k'. Your task is to find the kth element of the -indexed lexicographically ordered set of substrings in the set S. If there is no element , return INVALID.

For example, your strings are . All of the substrings are  and . Combine the two sets and sort them to get . So, for instance if , we return 'a'. If , we return 'bc'. If  though, there is not an  so we return INVALID.

Function Description

Complete the findStrings function in the editor below. It should return array of strings.

findStrings has the following parameter(s):

w: an array of strings
queries: an array of integers
Input Format

The first line contains an integer n, the number of strings in the array w.
Each of the next n lines consists of a string w[ i ].
The next line contains an integer q, the number of queries.
Each of the next q lines consists of a single integer k.

Constraints

1  <=  n  <=  50
1  <=  | w[ i ] | <=  2000
1  <=  q  <=  500
1  <=  k  <=  10^9

Each character of w[ i ] e ascii[ a - z ]

Output Format

Return an array of q strings where the ith string is the answer to the ith query. If a k is invalid, return "INVALID" for that case.```

### Solution :

```                            ```Solution in C :

in   C++  :

#include <iostream>
#include <list>
#include <vector>
#include <string.h>
using namespace std;
const int MAXL=100011;
struct SuffixArray{
struct RadixElement{
int id,k[2];
}RE[MAXL],RT[MAXL];
int N,A[MAXL],SA[MAXL],Rank[MAXL],Height[MAXL],
C[MAXL],Contribute[MAXL];
void RadixSort()
{
int i,y;
for (y=1;y>=0;y--){
memset(C,0,sizeof(C));
for (i=1;i<=N;i++) C[RE[i].k[y]]++;
for (i=1;i<MAXL;i++) C[i]+=C[i-1];
for (i=N;i>=1;i--) RT[C[RE[i].k[y]]--]=RE[i];
for (i=1;i<=N;i++) RE[i]=RT[i];
}
for (i=1;i<=N;i++){
Rank[ RE[i].id ]=Rank[ RE[i-1].id ];
if (RE[i].k[0]!=RE[i-1].k[0] || RE[i].k[1]!=RE[i-1].k[1])
Rank[ RE[i].id ]++;
}
}
void CalcSA(){
int i,k;
RE[0].k[0]=-1;
for (i=1;i<=N;i++)
RE[i].id=i,RE[i].k[0]=A[i],RE[i].k[1]=0;
RadixSort();
for (k=1;k+1<=N;k*=2)
{
for (i=1;i<=N;i++)
RE[i].id=i,RE[i].k[0]=Rank[i],RE[i].k[1]=i+k<=N?Rank[i+k]:0;
RadixSort();
}
for (i=1;i<=N;i++)
SA[ Rank[i] ]=i;
}
void CalcHeight(){
int i,k,h=0;
for (i=1;i<=N;i++)
{
if (Rank[i]==1)
h=0;
else
{
k=SA[Rank[i]-1];
if (--h<0) h=0;
for (;A[i+h]==A[k+h];h++);
}
Height[Rank[i]]=h;
}
}
void CalContribute(){
int i,j=0;
for (i=1;i<=N;i++)
{
int pos = SA[i];
int maxrange = 0;
j = pos;
while(A[j]<=26&&j<=N){
++j;
}
Contribute[i] = j- pos - Height[i];
}
}
}SA;

string check(int query)
{
char result[2000];
int sum = 0,i,pos = 0;
for(i = 1; i<=SA.N;i++)
{
sum += SA.Contribute[i];
if(query <= sum)
{
pos = i;
break;
}
}
if(pos)
{
int index = query - sum;
int size = index + SA.Height[pos] + SA.Contribute[pos];
for (int i=0; i < size; ++i)
{
result[i] = SA.A[SA.SA[pos] + i] + 'a' -1;
}
result[size]='\0';
return result;
}
return "INVALID";
}
int main()
{
int cas;
bool running = true;
while (running)
{
int i;
cin>>cas;
list<string> inlist;
vector<int> querylist;
for(i=0; i < cas; ++i){
string v;
cin>>v;
inlist.push_back(v);
}
list<string>::iterator it;
SA.N=0;
for(it = inlist.begin(),i = 1; it!= inlist.end();it++,++i){
for (const char *p=(*it).c_str();*p;p++)
{
SA.A[++SA.N]=*p-'a'+1;
}
if (i<cas)
SA.A[++SA.N]=30+i;
}

int queryNumber;
cin>>queryNumber;
for(i=0; i < queryNumber; ++i){
int query;
cin>>query;
querylist.push_back(query);
}
SA.CalcSA();
SA.CalcHeight();
SA.CalContribute();
vector<int>::iterator qit;
for(qit = querylist.begin(); qit!= querylist.end();qit++){
cout<<check(*qit)<<endl;
}
running = false;
}
}

In    Java  :

import java.io.* ;
import java.text.DecimalFormat;
import java.util.*;
import static java.lang.Math.* ;
import static java.util.Arrays.* ;

public class Solution {

public static void main(String[] args) {

new Solution().solveProblem();

out.close();
}

static Scanner in =
new Scanner(new InputStreamReader(System.in));
static PrintStream out =
new PrintStream(new BufferedOutputStream(System.out));

int[] maxx ;
String s ="";
public void solveProblem() {

int n = in.nextInt() ;
in.nextLine() ;

String[] sn = new String[n] ;
for( int i = 0 ; i < n ; i++ ){
sn[i] =  in.nextLine() + "A" ;
s += sn[i];
}

T = s.toCharArray() ;
maxx = new int[T.length] ;
int som = 0 ;
for( int i = 0 ; i < n ; i++ ){
int nu = sn[i].length() ;

for( int j = som ; j < som + nu ; j++)
maxx[j] = som + nu ;

som += nu ;

}

this.n = T.length ;

constructSA() ;
computeLCP() ;

int q = in.nextInt() ; in.nextLine() ;
for( int i = 0 ; i < q ; i++ )
losOp(in.nextLong()) ;

}

void losOp( long k ) {

int start = 0 ;
for( int i = 0 ; i < n ; i++ ){

int ind = SA[i] ;
start = LCP[i] ;

long aantal = max(0,maxx[ind] - 1 - ind - start) ;

if( T[ind] != 'A' && aantal >= k ){
out.println(s.substring(ind, (int) (ind+start+k))) ;
return ;
}else if( T[ind] != 'A')
k -= aantal ;
//System.out.println(k);

}

out.println("INVALID") ;

}

int maxlen = 100010 ;
int n ;

char[] T ;

int[] RA = new int[maxlen] ;
int[] RATemp = new int[maxlen] ;
int[] SA = new int[maxlen] ;
int[] SATemp = new int[maxlen] ;

int[] c = new int[maxlen] ;

void constructSA(){

for( int i = 0 ; i < n ; i++ ){
RA[i] = T[i]-'.' ;
SA[i] = i ;
}

for( int k = 1 ; k < n ; k <<= 1 ){
countingSort( k ) ;
countingSort( 0 ) ;

RATemp[SA[0]] = 1 ;
int r = 1 ;
for( int i = 1 ; i < n ; i++ ){
RATemp[SA[i]] = (
RA[SA[i]] == RA[SA[i-1]] &&
RA[SA[i]+k] == RA[SA[i-1]+k] ) ? r : ++r ;

}

RA = RATemp.clone() ;
}
}

void countingSort( int k ){

int sum = 0 ;
int maxi = max( 300, n ) ;

fill( c, 0 ) ;

for( int i = 0 ; i < n ; i++ )
c[ ( i + k ) < n ? RA[i+k] : 0 ]++ ;

for( int i = 0 ; i <= maxi ; i++ ){
int t = c[i] ;
c[i] = sum ;
sum += t ;
}

for( int i = 0 ; i < n ; i++ ){
SATemp[c[(
SA[i] + k) < n ? RA[SA[i]+k] : 0]++ ] = SA[i] ;
}

SA = SATemp.clone() ;
}

int[] Phi ;
int[] LCP ;
int max = 0 ;

void computeLCP(){

LCP = new int[n] ;
Phi = new int[n] ;
int[] PLCP = new int[n] ;

Phi[SA[0]] = -1 ;
for( int i = 1 ; i < n ; i++ )
Phi[SA[i]] = SA[i-1] ;

int L = 0;
for( int i = 0 ; i < n ; i++){
if( Phi[i] == -1){
PLCP[i] = 0 ;
continue ;
}

while( i+L < n && Phi[i]+L < n
&& T[i+L] == T[Phi[i]+L])
L++ ;

max = max(max,L) ;
PLCP[i] = L ;
L = max(L-1,0) ;
}

for( int i =1 ; i < n ; i++ )
LCP[i] = PLCP[SA[i]] ;

}

}

In   C  :

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

typedef struct node {
struct node * parent;
int           baselen;
char        * base;
int           count;
struct node * children[26];
int           numofchildren;
} node;

const int BLOCKSIZE = (1 << 20);
const int MAXN = 50;
const int MAXLEN = 2000;

int    poolsize = 0;
node * poolstart = NULL;
int    pooloffset = 0;

node * getnode() {
node * retnode;
int i;

if (pooloffset < poolsize) {
retnode = poolstart + pooloffset++;
} else {
poolsize = BLOCKSIZE / sizeof(node);
poolstart = (node *)malloc(poolsize * sizeof(node));
pooloffset = 1;
retnode = poolstart;
}

retnode -> parent = NULL;
retnode -> baselen = 0;
retnode -> base = NULL;
retnode -> count = 0;
for (i = 0; i < 26; i++)
retnode -> children[i] = NULL;
retnode -> numofchildren = 0;

return retnode;
}

int main() {
char w[MAXN][MAXLEN];
int n, q, k, i, j, l, lenw, toadd;
node * root = getnode();
node * currentnode, * newnode, * pt, * ch;
int currentbasepos, cl, ncl, toprint, cbl;
char actchar, * cb, * ncb;

// process strings
scanf("%i", &n);
for (i = 0; i < n; i++) {
scanf("%s", w[i]);
lenw = strlen(w[i]);

// one string
for (j = 0; j < lenw; j++) {
currentnode = root;
currentbasepos = 0;
toadd = 0;
for (l = j; l < lenw; l++) {
actchar = *(w[i] + l) - 'a';

if (currentnode == root) {
if (currentnode -> children[actchar]) {
currentbasepos = 1;
currentnode = currentnode -> children[actchar];
continue;
} else {
newnode = getnode();
newnode -> parent = root;
root -> numofchildren += 1;
root -> children[actchar] = newnode;
newnode -> base = w[i] + l;
newnode -> baselen = lenw - l;
toadd = lenw - l;
break;
}
}

if (currentbasepos == currentnode -> baselen) {
if (currentnode -> numofchildren == 0) {
currentnode -> baselen += lenw - l;
currentnode -> base = w[i] + (l - currentbasepos);
toadd = lenw - l;
currentnode = currentnode -> parent;
break;
} else {
if (currentnode -> children[actchar]) {
currentbasepos = 1;
currentnode = currentnode -> children[actchar];
continue;
} else {
newnode = getnode();
newnode -> base = w[i] + l;
newnode -> baselen = lenw - l;
newnode -> parent = currentnode;
currentnode -> children[actchar] = newnode;
currentnode -> numofchildren += 1;
toadd = lenw - l;
break;
}
}
} else {
cb = currentnode -> base;
ncb = cb + currentbasepos;

if (*ncb == actchar + 'a') {
currentbasepos++;
continue;
} else {
pt = currentnode -> parent;
cl = currentnode -> baselen;
ncl = cl - currentbasepos;

newnode = getnode();
newnode -> base = cb;
newnode -> baselen = currentbasepos;
newnode -> count = currentnode -> count + ncl;
newnode -> parent = pt;
newnode -> numofchildren = 1;
pt -> children[(*cb) - 'a'] = newnode;
currentnode -> baselen = ncl;
currentnode -> base = ncb;
newnode -> children[(*ncb) - 'a'] = currentnode;
currentnode -> parent = newnode;
currentnode = newnode;
l--;
continue;
}
}
}

while (currentnode) {
currentnode -> count += toadd;
currentnode = currentnode -> parent;
}
}
}

// process queries
scanf("%i", &q);
for (i = 0; i < q; i++) {
scanf("%i", &k);

if (k > root -> count) {
printf("INVALID");
} else {
currentnode = root;
while (k) {
cbl = currentnode -> baselen;
cb  = currentnode -> base;
toprint = cbl <= k ? cbl : k;
for (j = 0; j < toprint; j++) putchar(*(cb + j));
k -= toprint;
for (j = 0; j < 26; j++) {
ch = currentnode -> children[j];
if (ch) {
if (ch -> baselen + ch -> count >= k) {
currentnode = ch;
break;
} else {
k -= ch -> baselen + ch -> count;
}
}
}
}
}
if (i < q - 1) printf("\n");
}

return 0;
}

In    Python3  :

#!/usr/bin/py
# Head ends here
def findStrings(a,query):
temp = {}
suffixes = []
LCP = []
for s in a:
for i in range(len(s)):
val = s[i:]
if val not in temp:
temp[val] = 1
suffixes.append(val)
#del temp
suffixes.sort()
n = len(suffixes)
for i in range(n):
if i == 0:
LCP.append(None)
else:
LCP.append(find_lcp(suffixes[i-1], suffixes[i]))
#print(suffixes)
#print(LCP)
num_sub_str = sum(len(s) for s in suffixes) - sum(v for v in LCP if v is not None)
for q in query:
#print("q = " + str(q) + " len=" + str(num_sub_str))
print(find_ith(suffixes, LCP, q-1))

def find_ith(suffixes, LCP, i):
data = zip(suffixes,LCP)
low = high = 0
for suf, lcp in data:
if lcp is None:
lcp = 0
high += len(suf) - lcp
if high - 1 == i:
return suf
elif high - 1 > i:
for _i, j in enumerate(list(range(lcp, len(suf)))):
if low + _i == i:
return suf[:j+1]
low = high
return "INVALID"

def find_lcp(s1,s2):
upper_bound = min(len(s1), len(s2))
count = 0
for i in range(upper_bound):
if s1[i] == s2[i]:
count += 1
else:
return count
return count
# Tail starts here

if __name__ == '__main__':
n = int(input())
string=[]
for i in range(0,n):
string.append(input().strip())
q= int(input())
query=[]
for i in range(0,q):
t1=int(input())
query.append(t1)
findStrings(string,query)```
```

