Find Strings


Problem Statement :


A substring is defined as a contiguous sequence of one or more characters in the string. More information on substrings can be found here.

You are given n strings w[1], w[2], ......, w[n]. Let S[i] denote the set of all distinct substrings of the string w[i]. Let , that is, S is a set of strings that is the union of all substrings in all sets S[1], S[2], ..... S[n]. There will be many queries. For each query you will be given an integer 'k'. Your task is to find the kth element of the -indexed lexicographically ordered set of substrings in the set S. If there is no element , return INVALID.

For example, your strings are . All of the substrings are  and . Combine the two sets and sort them to get . So, for instance if , we return 'a'. If , we return 'bc'. If  though, there is not an  so we return INVALID.

Function Description

Complete the findStrings function in the editor below. It should return array of strings.

findStrings has the following parameter(s):

w: an array of strings
queries: an array of integers
Input Format

The first line contains an integer n, the number of strings in the array w.
Each of the next n lines consists of a string w[ i ].
The next line contains an integer q, the number of queries.
Each of the next q lines consists of a single integer k.


Constraints

1  <=  n  <=  50
1  <=  | w[ i ] | <=  2000
1  <=  q  <=  500
1  <=  k  <=  10^9



Each character of w[ i ] e ascii[ a - z ]

Output Format

Return an array of q strings where the ith string is the answer to the ith query. If a k is invalid, return "INVALID" for that case.



Solution :



title-img


                            Solution in C :

in   C++  :









#include <iostream>
#include <list>
#include <vector>
#include <string.h>
using namespace std;
const int MAXL=100011;
struct SuffixArray{
struct RadixElement{
int id,k[2];
}RE[MAXL],RT[MAXL];
int N,A[MAXL],SA[MAXL],Rank[MAXL],Height[MAXL],
C[MAXL],Contribute[MAXL];
void RadixSort()
{
int i,y;
for (y=1;y>=0;y--){
memset(C,0,sizeof(C));
for (i=1;i<=N;i++) C[RE[i].k[y]]++;
for (i=1;i<MAXL;i++) C[i]+=C[i-1];
for (i=N;i>=1;i--) RT[C[RE[i].k[y]]--]=RE[i];
for (i=1;i<=N;i++) RE[i]=RT[i];
}
for (i=1;i<=N;i++){
Rank[ RE[i].id ]=Rank[ RE[i-1].id ];
if (RE[i].k[0]!=RE[i-1].k[0] || RE[i].k[1]!=RE[i-1].k[1])
Rank[ RE[i].id ]++;
}
}
void CalcSA(){
int i,k;
RE[0].k[0]=-1;
for (i=1;i<=N;i++)
RE[i].id=i,RE[i].k[0]=A[i],RE[i].k[1]=0;
RadixSort();
for (k=1;k+1<=N;k*=2)
{
for (i=1;i<=N;i++)
RE[i].id=i,RE[i].k[0]=Rank[i],RE[i].k[1]=i+k<=N?Rank[i+k]:0;
RadixSort();
}
for (i=1;i<=N;i++)
SA[ Rank[i] ]=i;
}
void CalcHeight(){
int i,k,h=0;
for (i=1;i<=N;i++)
{
if (Rank[i]==1)
h=0;
else
{
k=SA[Rank[i]-1];
if (--h<0) h=0;
for (;A[i+h]==A[k+h];h++);
}
Height[Rank[i]]=h;
}
}
void CalContribute(){
int i,j=0;
for (i=1;i<=N;i++)
{
int pos = SA[i];
int maxrange = 0;
j = pos;
while(A[j]<=26&&j<=N){
++j;
}
Contribute[i] = j- pos - Height[i];
}
}
}SA;

string check(int query)
{
char result[2000];
int sum = 0,i,pos = 0;
for(i = 1; i<=SA.N;i++)
{
sum += SA.Contribute[i];
if(query <= sum)
{
pos = i;
break;
}
}
if(pos)
{
int index = query - sum;
int size = index + SA.Height[pos] + SA.Contribute[pos];        
for (int i=0; i < size; ++i)
{
result[i] = SA.A[SA.SA[pos] + i] + 'a' -1;
}
result[size]='\0';
return result;        
}
return "INVALID";    
}
int main()
{
int cas;
bool running = true;    
while (running)
{
int i;
cin>>cas;
list<string> inlist;
vector<int> querylist;
for(i=0; i < cas; ++i){
string v;
cin>>v;        
inlist.push_back(v);
}
list<string>::iterator it;        
SA.N=0;    
for(it = inlist.begin(),i = 1; it!= inlist.end();it++,++i){    
for (const char *p=(*it).c_str();*p;p++)
{
SA.A[++SA.N]=*p-'a'+1;                
}
if (i<cas)
SA.A[++SA.N]=30+i;
}

int queryNumber;
cin>>queryNumber;
for(i=0; i < queryNumber; ++i){
int query;
cin>>query;
querylist.push_back(query);
}
SA.CalcSA();
SA.CalcHeight();
SA.CalContribute();
vector<int>::iterator qit;
for(qit = querylist.begin(); qit!= querylist.end();qit++){        
cout<<check(*qit)<<endl;
}
running = false;
}
}










In    Java  :









import java.io.* ;
import java.text.DecimalFormat;
import java.util.*;
import static java.lang.Math.* ;
import static java.util.Arrays.* ;

public class Solution {

public static void main(String[] args) {

new Solution().solveProblem();

out.close();
}

static Scanner in = 
new Scanner(new InputStreamReader(System.in));
static PrintStream out =
new PrintStream(new BufferedOutputStream(System.out));


int[] maxx ;
String s ="";
public void solveProblem() {        

int n = in.nextInt() ;
in.nextLine() ;

String[] sn = new String[n] ;
for( int i = 0 ; i < n ; i++ ){
sn[i] =  in.nextLine() + "A" ;
s += sn[i];
}

T = s.toCharArray() ;
maxx = new int[T.length] ;
int som = 0 ;
for( int i = 0 ; i < n ; i++ ){
int nu = sn[i].length() ;

for( int j = som ; j < som + nu ; j++)
maxx[j] = som + nu ;

som += nu ;

}


this.n = T.length ;

constructSA() ;
computeLCP() ;

int q = in.nextInt() ; in.nextLine() ;
for( int i = 0 ; i < q ; i++ )
losOp(in.nextLong()) ;



}

void losOp( long k ) {

int start = 0 ;
for( int i = 0 ; i < n ; i++ ){            

int ind = SA[i] ;
start = LCP[i] ;

long aantal = max(0,maxx[ind] - 1 - ind - start) ;

if( T[ind] != 'A' && aantal >= k ){
out.println(s.substring(ind, (int) (ind+start+k))) ;
return ;
}else if( T[ind] != 'A')
k -= aantal ;
//System.out.println(k);


}

out.println("INVALID") ;

}

int maxlen = 100010 ;
int n ;

char[] T ;

int[] RA = new int[maxlen] ;
int[] RATemp = new int[maxlen] ;
int[] SA = new int[maxlen] ; 
int[] SATemp = new int[maxlen] ;

int[] c = new int[maxlen] ;

void constructSA(){

for( int i = 0 ; i < n ; i++ ){
RA[i] = T[i]-'.' ;
SA[i] = i ;
}

for( int k = 1 ; k < n ; k <<= 1 ){
countingSort( k ) ;
countingSort( 0 ) ;

RATemp[SA[0]] = 1 ;            
int r = 1 ;
for( int i = 1 ; i < n ; i++ ){
RATemp[SA[i]] = ( 
    RA[SA[i]] == RA[SA[i-1]] &&  
    RA[SA[i]+k] == RA[SA[i-1]+k] ) ? r : ++r ;

}

RA = RATemp.clone() ;
}
}    

void countingSort( int k ){

int sum = 0 ;
int maxi = max( 300, n ) ;

fill( c, 0 ) ;

for( int i = 0 ; i < n ; i++ )
c[ ( i + k ) < n ? RA[i+k] : 0 ]++ ;

for( int i = 0 ; i <= maxi ; i++ ){
int t = c[i] ;
c[i] = sum ;
sum += t ;
}

for( int i = 0 ; i < n ; i++ ){
SATemp[c[(
SA[i] + k) < n ? RA[SA[i]+k] : 0]++ ] = SA[i] ;
}


SA = SATemp.clone() ;
}

int[] Phi ;
int[] LCP ;
int max = 0 ;

void computeLCP(){

LCP = new int[n] ;
Phi = new int[n] ;
int[] PLCP = new int[n] ;


Phi[SA[0]] = -1 ;
for( int i = 1 ; i < n ; i++ )
Phi[SA[i]] = SA[i-1] ;

int L = 0;    
for( int i = 0 ; i < n ; i++){
if( Phi[i] == -1){
PLCP[i] = 0 ;
continue ;
}

while( i+L < n && Phi[i]+L < n 
&& T[i+L] == T[Phi[i]+L])
L++ ;

max = max(max,L) ;
PLCP[i] = L ;
L = max(L-1,0) ;
}

for( int i =1 ; i < n ; i++ )
LCP[i] = PLCP[SA[i]] ;


}

}









In   C  :








#include <stdio.h>
#include <stdlib.h>
#include <string.h>

typedef struct node {
struct node * parent;
int           baselen;
char        * base;
int           count;
struct node * children[26];
int           numofchildren;
} node;

const int BLOCKSIZE = (1 << 20);
const int MAXN = 50;
const int MAXLEN = 2000;

int    poolsize = 0;
node * poolstart = NULL;
int    pooloffset = 0;

node * getnode() {
node * retnode;
int i;

if (pooloffset < poolsize) {
retnode = poolstart + pooloffset++;
} else {
poolsize = BLOCKSIZE / sizeof(node);
poolstart = (node *)malloc(poolsize * sizeof(node));
pooloffset = 1;
retnode = poolstart;
}

retnode -> parent = NULL;
retnode -> baselen = 0;
retnode -> base = NULL;
retnode -> count = 0;
for (i = 0; i < 26; i++)
retnode -> children[i] = NULL;
retnode -> numofchildren = 0;

return retnode;
}

int main() {
char w[MAXN][MAXLEN];
int n, q, k, i, j, l, lenw, toadd;
node * root = getnode();
node * currentnode, * newnode, * pt, * ch;
int currentbasepos, cl, ncl, toprint, cbl;
char actchar, * cb, * ncb;

// process strings
scanf("%i", &n);
for (i = 0; i < n; i++) {
scanf("%s", w[i]);
lenw = strlen(w[i]);

// one string
for (j = 0; j < lenw; j++) {
currentnode = root;
currentbasepos = 0;
toadd = 0;
for (l = j; l < lenw; l++) {
actchar = *(w[i] + l) - 'a';

if (currentnode == root) {
if (currentnode -> children[actchar]) {
currentbasepos = 1;
currentnode = currentnode -> children[actchar];
continue;
} else {
newnode = getnode();
newnode -> parent = root;
root -> numofchildren += 1;
root -> children[actchar] = newnode;
newnode -> base = w[i] + l;
newnode -> baselen = lenw - l;
toadd = lenw - l;
break;
}
}

if (currentbasepos == currentnode -> baselen) {
if (currentnode -> numofchildren == 0) {
currentnode -> baselen += lenw - l;
currentnode -> base = w[i] + (l - currentbasepos);
toadd = lenw - l;
currentnode = currentnode -> parent;
break;
} else {
if (currentnode -> children[actchar]) {
currentbasepos = 1;
currentnode = currentnode -> children[actchar];
continue;
} else {
newnode = getnode();
newnode -> base = w[i] + l;
newnode -> baselen = lenw - l;
newnode -> parent = currentnode;
currentnode -> children[actchar] = newnode;
currentnode -> numofchildren += 1;
toadd = lenw - l;
break;
}
}
} else {
cb = currentnode -> base;
ncb = cb + currentbasepos;

if (*ncb == actchar + 'a') {
currentbasepos++;
continue;
} else {
pt = currentnode -> parent;
cl = currentnode -> baselen;
ncl = cl - currentbasepos;

newnode = getnode();
newnode -> base = cb;
newnode -> baselen = currentbasepos;
newnode -> count = currentnode -> count + ncl;
newnode -> parent = pt;
newnode -> numofchildren = 1;
pt -> children[(*cb) - 'a'] = newnode;
currentnode -> baselen = ncl;
currentnode -> base = ncb;
newnode -> children[(*ncb) - 'a'] = currentnode;
currentnode -> parent = newnode;
currentnode = newnode;
l--;
continue;
}
}
}

while (currentnode) {
currentnode -> count += toadd;
currentnode = currentnode -> parent;
}
}
}

// process queries
scanf("%i", &q);
for (i = 0; i < q; i++) {
scanf("%i", &k);

if (k > root -> count) {
printf("INVALID");
} else {
currentnode = root;
while (k) {
cbl = currentnode -> baselen;
cb  = currentnode -> base;
toprint = cbl <= k ? cbl : k;
for (j = 0; j < toprint; j++) putchar(*(cb + j));
k -= toprint;
for (j = 0; j < 26; j++) {
ch = currentnode -> children[j];
if (ch) {
if (ch -> baselen + ch -> count >= k) {
currentnode = ch;
break;
} else {
k -= ch -> baselen + ch -> count;
}
}
}
}
}
if (i < q - 1) printf("\n");
}

return 0;
}











In    Python3  :








#!/usr/bin/py
# Head ends here
def findStrings(a,query):
    temp = {}
    suffixes = []
    LCP = []
    for s in a:
        for i in range(len(s)):
            val = s[i:]
            if val not in temp:
                temp[val] = 1
                suffixes.append(val)
        #del temp
    suffixes.sort()
    n = len(suffixes)
    for i in range(n):
        if i == 0:
            LCP.append(None)
        else:
            LCP.append(find_lcp(suffixes[i-1], suffixes[i]))
    #print(suffixes)
    #print(LCP)
    num_sub_str = sum(len(s) for s in suffixes) - sum(v for v in LCP if v is not None)
    for q in query:
        #print("q = " + str(q) + " len=" + str(num_sub_str))
        print(find_ith(suffixes, LCP, q-1))
       
        
def find_ith(suffixes, LCP, i):
    data = zip(suffixes,LCP)
    low = high = 0
    for suf, lcp in data:
        if lcp is None:
            lcp = 0
        high += len(suf) - lcp
        if high - 1 == i:
            return suf
        elif high - 1 > i:
            for _i, j in enumerate(list(range(lcp, len(suf)))):
                if low + _i == i:
                    return suf[:j+1]
        low = high
    return "INVALID"
                
def find_lcp(s1,s2):
    upper_bound = min(len(s1), len(s2))
    count = 0
    for i in range(upper_bound):
        if s1[i] == s2[i]:
            count += 1
        else:
            return count
    return count
# Tail starts here

if __name__ == '__main__':
    n = int(input())
    string=[]
    for i in range(0,n):
        string.append(input().strip())
    q= int(input())
    query=[]
    for i in range(0,q):
        t1=int(input())
        query.append(t1)
    findStrings(string,query)
                        








View More Similar Problems

AND xor OR

Given an array of distinct elements. Let and be the smallest and the next smallest element in the interval where . . where , are the bitwise operators , and respectively. Your task is to find the maximum possible value of . Input Format First line contains integer N. Second line contains N integers, representing elements of the array A[] . Output Format Print the value

View Solution →

Waiter

You are a waiter at a party. There is a pile of numbered plates. Create an empty answers array. At each iteration, i, remove each plate from the top of the stack in order. Determine if the number on the plate is evenly divisible ith the prime number. If it is, stack it in pile Bi. Otherwise, stack it in stack Ai. Store the values Bi in from top to bottom in answers. In the next iteration, do the

View Solution →

Queue using Two Stacks

A queue is an abstract data type that maintains the order in which elements were added to it, allowing the oldest elements to be removed from the front and new elements to be added to the rear. This is called a First-In-First-Out (FIFO) data structure because the first element added to the queue (i.e., the one that has been waiting the longest) is always the first one to be removed. A basic que

View Solution →

Castle on the Grid

You are given a square grid with some cells open (.) and some blocked (X). Your playing piece can move along any row or column until it reaches the edge of the grid or a blocked cell. Given a grid, a start and a goal, determine the minmum number of moves to get to the goal. Function Description Complete the minimumMoves function in the editor. minimumMoves has the following parameter(s):

View Solution →

Down to Zero II

You are given Q queries. Each query consists of a single number N. You can perform any of the 2 operations N on in each move: 1: If we take 2 integers a and b where , N = a * b , then we can change N = max( a, b ) 2: Decrease the value of N by 1. Determine the minimum number of moves required to reduce the value of N to 0. Input Format The first line contains the integer Q.

View Solution →

Truck Tour

Suppose there is a circle. There are N petrol pumps on that circle. Petrol pumps are numbered 0 to (N-1) (both inclusive). You have two pieces of information corresponding to each of the petrol pump: (1) the amount of petrol that particular petrol pump will give, and (2) the distance from that petrol pump to the next petrol pump. Initially, you have a tank of infinite capacity carrying no petr

View Solution →