Find Strings


Problem Statement :


A substring is defined as a contiguous sequence of one or more characters in the string. More information on substrings can be found here.

You are given n strings w[1], w[2], ......, w[n]. Let S[i] denote the set of all distinct substrings of the string w[i]. Let , that is, S is a set of strings that is the union of all substrings in all sets S[1], S[2], ..... S[n]. There will be many queries. For each query you will be given an integer 'k'. Your task is to find the kth element of the -indexed lexicographically ordered set of substrings in the set S. If there is no element , return INVALID.

For example, your strings are . All of the substrings are  and . Combine the two sets and sort them to get . So, for instance if , we return 'a'. If , we return 'bc'. If  though, there is not an  so we return INVALID.

Function Description

Complete the findStrings function in the editor below. It should return array of strings.

findStrings has the following parameter(s):

w: an array of strings
queries: an array of integers
Input Format

The first line contains an integer n, the number of strings in the array w.
Each of the next n lines consists of a string w[ i ].
The next line contains an integer q, the number of queries.
Each of the next q lines consists of a single integer k.


Constraints

1  <=  n  <=  50
1  <=  | w[ i ] | <=  2000
1  <=  q  <=  500
1  <=  k  <=  10^9



Each character of w[ i ] e ascii[ a - z ]

Output Format

Return an array of q strings where the ith string is the answer to the ith query. If a k is invalid, return "INVALID" for that case.



Solution :



title-img


                            Solution in C :

in   C++  :









#include <iostream>
#include <list>
#include <vector>
#include <string.h>
using namespace std;
const int MAXL=100011;
struct SuffixArray{
struct RadixElement{
int id,k[2];
}RE[MAXL],RT[MAXL];
int N,A[MAXL],SA[MAXL],Rank[MAXL],Height[MAXL],
C[MAXL],Contribute[MAXL];
void RadixSort()
{
int i,y;
for (y=1;y>=0;y--){
memset(C,0,sizeof(C));
for (i=1;i<=N;i++) C[RE[i].k[y]]++;
for (i=1;i<MAXL;i++) C[i]+=C[i-1];
for (i=N;i>=1;i--) RT[C[RE[i].k[y]]--]=RE[i];
for (i=1;i<=N;i++) RE[i]=RT[i];
}
for (i=1;i<=N;i++){
Rank[ RE[i].id ]=Rank[ RE[i-1].id ];
if (RE[i].k[0]!=RE[i-1].k[0] || RE[i].k[1]!=RE[i-1].k[1])
Rank[ RE[i].id ]++;
}
}
void CalcSA(){
int i,k;
RE[0].k[0]=-1;
for (i=1;i<=N;i++)
RE[i].id=i,RE[i].k[0]=A[i],RE[i].k[1]=0;
RadixSort();
for (k=1;k+1<=N;k*=2)
{
for (i=1;i<=N;i++)
RE[i].id=i,RE[i].k[0]=Rank[i],RE[i].k[1]=i+k<=N?Rank[i+k]:0;
RadixSort();
}
for (i=1;i<=N;i++)
SA[ Rank[i] ]=i;
}
void CalcHeight(){
int i,k,h=0;
for (i=1;i<=N;i++)
{
if (Rank[i]==1)
h=0;
else
{
k=SA[Rank[i]-1];
if (--h<0) h=0;
for (;A[i+h]==A[k+h];h++);
}
Height[Rank[i]]=h;
}
}
void CalContribute(){
int i,j=0;
for (i=1;i<=N;i++)
{
int pos = SA[i];
int maxrange = 0;
j = pos;
while(A[j]<=26&&j<=N){
++j;
}
Contribute[i] = j- pos - Height[i];
}
}
}SA;

string check(int query)
{
char result[2000];
int sum = 0,i,pos = 0;
for(i = 1; i<=SA.N;i++)
{
sum += SA.Contribute[i];
if(query <= sum)
{
pos = i;
break;
}
}
if(pos)
{
int index = query - sum;
int size = index + SA.Height[pos] + SA.Contribute[pos];        
for (int i=0; i < size; ++i)
{
result[i] = SA.A[SA.SA[pos] + i] + 'a' -1;
}
result[size]='\0';
return result;        
}
return "INVALID";    
}
int main()
{
int cas;
bool running = true;    
while (running)
{
int i;
cin>>cas;
list<string> inlist;
vector<int> querylist;
for(i=0; i < cas; ++i){
string v;
cin>>v;        
inlist.push_back(v);
}
list<string>::iterator it;        
SA.N=0;    
for(it = inlist.begin(),i = 1; it!= inlist.end();it++,++i){    
for (const char *p=(*it).c_str();*p;p++)
{
SA.A[++SA.N]=*p-'a'+1;                
}
if (i<cas)
SA.A[++SA.N]=30+i;
}

int queryNumber;
cin>>queryNumber;
for(i=0; i < queryNumber; ++i){
int query;
cin>>query;
querylist.push_back(query);
}
SA.CalcSA();
SA.CalcHeight();
SA.CalContribute();
vector<int>::iterator qit;
for(qit = querylist.begin(); qit!= querylist.end();qit++){        
cout<<check(*qit)<<endl;
}
running = false;
}
}










In    Java  :









import java.io.* ;
import java.text.DecimalFormat;
import java.util.*;
import static java.lang.Math.* ;
import static java.util.Arrays.* ;

public class Solution {

public static void main(String[] args) {

new Solution().solveProblem();

out.close();
}

static Scanner in = 
new Scanner(new InputStreamReader(System.in));
static PrintStream out =
new PrintStream(new BufferedOutputStream(System.out));


int[] maxx ;
String s ="";
public void solveProblem() {        

int n = in.nextInt() ;
in.nextLine() ;

String[] sn = new String[n] ;
for( int i = 0 ; i < n ; i++ ){
sn[i] =  in.nextLine() + "A" ;
s += sn[i];
}

T = s.toCharArray() ;
maxx = new int[T.length] ;
int som = 0 ;
for( int i = 0 ; i < n ; i++ ){
int nu = sn[i].length() ;

for( int j = som ; j < som + nu ; j++)
maxx[j] = som + nu ;

som += nu ;

}


this.n = T.length ;

constructSA() ;
computeLCP() ;

int q = in.nextInt() ; in.nextLine() ;
for( int i = 0 ; i < q ; i++ )
losOp(in.nextLong()) ;



}

void losOp( long k ) {

int start = 0 ;
for( int i = 0 ; i < n ; i++ ){            

int ind = SA[i] ;
start = LCP[i] ;

long aantal = max(0,maxx[ind] - 1 - ind - start) ;

if( T[ind] != 'A' && aantal >= k ){
out.println(s.substring(ind, (int) (ind+start+k))) ;
return ;
}else if( T[ind] != 'A')
k -= aantal ;
//System.out.println(k);


}

out.println("INVALID") ;

}

int maxlen = 100010 ;
int n ;

char[] T ;

int[] RA = new int[maxlen] ;
int[] RATemp = new int[maxlen] ;
int[] SA = new int[maxlen] ; 
int[] SATemp = new int[maxlen] ;

int[] c = new int[maxlen] ;

void constructSA(){

for( int i = 0 ; i < n ; i++ ){
RA[i] = T[i]-'.' ;
SA[i] = i ;
}

for( int k = 1 ; k < n ; k <<= 1 ){
countingSort( k ) ;
countingSort( 0 ) ;

RATemp[SA[0]] = 1 ;            
int r = 1 ;
for( int i = 1 ; i < n ; i++ ){
RATemp[SA[i]] = ( 
    RA[SA[i]] == RA[SA[i-1]] &&  
    RA[SA[i]+k] == RA[SA[i-1]+k] ) ? r : ++r ;

}

RA = RATemp.clone() ;
}
}    

void countingSort( int k ){

int sum = 0 ;
int maxi = max( 300, n ) ;

fill( c, 0 ) ;

for( int i = 0 ; i < n ; i++ )
c[ ( i + k ) < n ? RA[i+k] : 0 ]++ ;

for( int i = 0 ; i <= maxi ; i++ ){
int t = c[i] ;
c[i] = sum ;
sum += t ;
}

for( int i = 0 ; i < n ; i++ ){
SATemp[c[(
SA[i] + k) < n ? RA[SA[i]+k] : 0]++ ] = SA[i] ;
}


SA = SATemp.clone() ;
}

int[] Phi ;
int[] LCP ;
int max = 0 ;

void computeLCP(){

LCP = new int[n] ;
Phi = new int[n] ;
int[] PLCP = new int[n] ;


Phi[SA[0]] = -1 ;
for( int i = 1 ; i < n ; i++ )
Phi[SA[i]] = SA[i-1] ;

int L = 0;    
for( int i = 0 ; i < n ; i++){
if( Phi[i] == -1){
PLCP[i] = 0 ;
continue ;
}

while( i+L < n && Phi[i]+L < n 
&& T[i+L] == T[Phi[i]+L])
L++ ;

max = max(max,L) ;
PLCP[i] = L ;
L = max(L-1,0) ;
}

for( int i =1 ; i < n ; i++ )
LCP[i] = PLCP[SA[i]] ;


}

}









In   C  :








#include <stdio.h>
#include <stdlib.h>
#include <string.h>

typedef struct node {
struct node * parent;
int           baselen;
char        * base;
int           count;
struct node * children[26];
int           numofchildren;
} node;

const int BLOCKSIZE = (1 << 20);
const int MAXN = 50;
const int MAXLEN = 2000;

int    poolsize = 0;
node * poolstart = NULL;
int    pooloffset = 0;

node * getnode() {
node * retnode;
int i;

if (pooloffset < poolsize) {
retnode = poolstart + pooloffset++;
} else {
poolsize = BLOCKSIZE / sizeof(node);
poolstart = (node *)malloc(poolsize * sizeof(node));
pooloffset = 1;
retnode = poolstart;
}

retnode -> parent = NULL;
retnode -> baselen = 0;
retnode -> base = NULL;
retnode -> count = 0;
for (i = 0; i < 26; i++)
retnode -> children[i] = NULL;
retnode -> numofchildren = 0;

return retnode;
}

int main() {
char w[MAXN][MAXLEN];
int n, q, k, i, j, l, lenw, toadd;
node * root = getnode();
node * currentnode, * newnode, * pt, * ch;
int currentbasepos, cl, ncl, toprint, cbl;
char actchar, * cb, * ncb;

// process strings
scanf("%i", &n);
for (i = 0; i < n; i++) {
scanf("%s", w[i]);
lenw = strlen(w[i]);

// one string
for (j = 0; j < lenw; j++) {
currentnode = root;
currentbasepos = 0;
toadd = 0;
for (l = j; l < lenw; l++) {
actchar = *(w[i] + l) - 'a';

if (currentnode == root) {
if (currentnode -> children[actchar]) {
currentbasepos = 1;
currentnode = currentnode -> children[actchar];
continue;
} else {
newnode = getnode();
newnode -> parent = root;
root -> numofchildren += 1;
root -> children[actchar] = newnode;
newnode -> base = w[i] + l;
newnode -> baselen = lenw - l;
toadd = lenw - l;
break;
}
}

if (currentbasepos == currentnode -> baselen) {
if (currentnode -> numofchildren == 0) {
currentnode -> baselen += lenw - l;
currentnode -> base = w[i] + (l - currentbasepos);
toadd = lenw - l;
currentnode = currentnode -> parent;
break;
} else {
if (currentnode -> children[actchar]) {
currentbasepos = 1;
currentnode = currentnode -> children[actchar];
continue;
} else {
newnode = getnode();
newnode -> base = w[i] + l;
newnode -> baselen = lenw - l;
newnode -> parent = currentnode;
currentnode -> children[actchar] = newnode;
currentnode -> numofchildren += 1;
toadd = lenw - l;
break;
}
}
} else {
cb = currentnode -> base;
ncb = cb + currentbasepos;

if (*ncb == actchar + 'a') {
currentbasepos++;
continue;
} else {
pt = currentnode -> parent;
cl = currentnode -> baselen;
ncl = cl - currentbasepos;

newnode = getnode();
newnode -> base = cb;
newnode -> baselen = currentbasepos;
newnode -> count = currentnode -> count + ncl;
newnode -> parent = pt;
newnode -> numofchildren = 1;
pt -> children[(*cb) - 'a'] = newnode;
currentnode -> baselen = ncl;
currentnode -> base = ncb;
newnode -> children[(*ncb) - 'a'] = currentnode;
currentnode -> parent = newnode;
currentnode = newnode;
l--;
continue;
}
}
}

while (currentnode) {
currentnode -> count += toadd;
currentnode = currentnode -> parent;
}
}
}

// process queries
scanf("%i", &q);
for (i = 0; i < q; i++) {
scanf("%i", &k);

if (k > root -> count) {
printf("INVALID");
} else {
currentnode = root;
while (k) {
cbl = currentnode -> baselen;
cb  = currentnode -> base;
toprint = cbl <= k ? cbl : k;
for (j = 0; j < toprint; j++) putchar(*(cb + j));
k -= toprint;
for (j = 0; j < 26; j++) {
ch = currentnode -> children[j];
if (ch) {
if (ch -> baselen + ch -> count >= k) {
currentnode = ch;
break;
} else {
k -= ch -> baselen + ch -> count;
}
}
}
}
}
if (i < q - 1) printf("\n");
}

return 0;
}











In    Python3  :








#!/usr/bin/py
# Head ends here
def findStrings(a,query):
    temp = {}
    suffixes = []
    LCP = []
    for s in a:
        for i in range(len(s)):
            val = s[i:]
            if val not in temp:
                temp[val] = 1
                suffixes.append(val)
        #del temp
    suffixes.sort()
    n = len(suffixes)
    for i in range(n):
        if i == 0:
            LCP.append(None)
        else:
            LCP.append(find_lcp(suffixes[i-1], suffixes[i]))
    #print(suffixes)
    #print(LCP)
    num_sub_str = sum(len(s) for s in suffixes) - sum(v for v in LCP if v is not None)
    for q in query:
        #print("q = " + str(q) + " len=" + str(num_sub_str))
        print(find_ith(suffixes, LCP, q-1))
       
        
def find_ith(suffixes, LCP, i):
    data = zip(suffixes,LCP)
    low = high = 0
    for suf, lcp in data:
        if lcp is None:
            lcp = 0
        high += len(suf) - lcp
        if high - 1 == i:
            return suf
        elif high - 1 > i:
            for _i, j in enumerate(list(range(lcp, len(suf)))):
                if low + _i == i:
                    return suf[:j+1]
        low = high
    return "INVALID"
                
def find_lcp(s1,s2):
    upper_bound = min(len(s1), len(s2))
    count = 0
    for i in range(upper_bound):
        if s1[i] == s2[i]:
            count += 1
        else:
            return count
    return count
# Tail starts here

if __name__ == '__main__':
    n = int(input())
    string=[]
    for i in range(0,n):
        string.append(input().strip())
    q= int(input())
    query=[]
    for i in range(0,q):
        t1=int(input())
        query.append(t1)
    findStrings(string,query)
                        








View More Similar Problems

Swap Nodes [Algo]

A binary tree is a tree which is characterized by one of the following properties: It can be empty (null). It contains a root node only. It contains a root node with a left subtree, a right subtree, or both. These subtrees are also binary trees. In-order traversal is performed as Traverse the left subtree. Visit root. Traverse the right subtree. For this in-order traversal, start from

View Solution →

Kitty's Calculations on a Tree

Kitty has a tree, T , consisting of n nodes where each node is uniquely labeled from 1 to n . Her friend Alex gave her q sets, where each set contains k distinct nodes. Kitty needs to calculate the following expression on each set: where: { u ,v } denotes an unordered pair of nodes belonging to the set. dist(u , v) denotes the number of edges on the unique (shortest) path between nodes a

View Solution →

Is This a Binary Search Tree?

For the purposes of this challenge, we define a binary tree to be a binary search tree with the following ordering requirements: The data value of every node in a node's left subtree is less than the data value of that node. The data value of every node in a node's right subtree is greater than the data value of that node. Given the root node of a binary tree, can you determine if it's also a

View Solution →

Square-Ten Tree

The square-ten tree decomposition of an array is defined as follows: The lowest () level of the square-ten tree consists of single array elements in their natural order. The level (starting from ) of the square-ten tree consists of subsequent array subsegments of length in their natural order. Thus, the level contains subsegments of length , the level contains subsegments of length , the

View Solution →

Balanced Forest

Greg has a tree of nodes containing integer data. He wants to insert a node with some non-zero integer value somewhere into the tree. His goal is to be able to cut two edges and have the values of each of the three new trees sum to the same amount. This is called a balanced forest. Being frugal, the data value he inserts should be minimal. Determine the minimal amount that a new node can have to a

View Solution →

Jenny's Subtrees

Jenny loves experimenting with trees. Her favorite tree has n nodes connected by n - 1 edges, and each edge is ` unit in length. She wants to cut a subtree (i.e., a connected part of the original tree) of radius r from this tree by performing the following two steps: 1. Choose a node, x , from the tree. 2. Cut a subtree consisting of all nodes which are not further than r units from node x .

View Solution →