Find Strings


Problem Statement :


A substring is defined as a contiguous sequence of one or more characters in the string. More information on substrings can be found here.

You are given n strings w[1], w[2], ......, w[n]. Let S[i] denote the set of all distinct substrings of the string w[i]. Let , that is, S is a set of strings that is the union of all substrings in all sets S[1], S[2], ..... S[n]. There will be many queries. For each query you will be given an integer 'k'. Your task is to find the kth element of the -indexed lexicographically ordered set of substrings in the set S. If there is no element , return INVALID.

For example, your strings are . All of the substrings are  and . Combine the two sets and sort them to get . So, for instance if , we return 'a'. If , we return 'bc'. If  though, there is not an  so we return INVALID.

Function Description

Complete the findStrings function in the editor below. It should return array of strings.

findStrings has the following parameter(s):

w: an array of strings
queries: an array of integers
Input Format

The first line contains an integer n, the number of strings in the array w.
Each of the next n lines consists of a string w[ i ].
The next line contains an integer q, the number of queries.
Each of the next q lines consists of a single integer k.


Constraints

1  <=  n  <=  50
1  <=  | w[ i ] | <=  2000
1  <=  q  <=  500
1  <=  k  <=  10^9



Each character of w[ i ] e ascii[ a - z ]

Output Format

Return an array of q strings where the ith string is the answer to the ith query. If a k is invalid, return "INVALID" for that case.



Solution :



title-img


                            Solution in C :

in   C++  :









#include <iostream>
#include <list>
#include <vector>
#include <string.h>
using namespace std;
const int MAXL=100011;
struct SuffixArray{
struct RadixElement{
int id,k[2];
}RE[MAXL],RT[MAXL];
int N,A[MAXL],SA[MAXL],Rank[MAXL],Height[MAXL],
C[MAXL],Contribute[MAXL];
void RadixSort()
{
int i,y;
for (y=1;y>=0;y--){
memset(C,0,sizeof(C));
for (i=1;i<=N;i++) C[RE[i].k[y]]++;
for (i=1;i<MAXL;i++) C[i]+=C[i-1];
for (i=N;i>=1;i--) RT[C[RE[i].k[y]]--]=RE[i];
for (i=1;i<=N;i++) RE[i]=RT[i];
}
for (i=1;i<=N;i++){
Rank[ RE[i].id ]=Rank[ RE[i-1].id ];
if (RE[i].k[0]!=RE[i-1].k[0] || RE[i].k[1]!=RE[i-1].k[1])
Rank[ RE[i].id ]++;
}
}
void CalcSA(){
int i,k;
RE[0].k[0]=-1;
for (i=1;i<=N;i++)
RE[i].id=i,RE[i].k[0]=A[i],RE[i].k[1]=0;
RadixSort();
for (k=1;k+1<=N;k*=2)
{
for (i=1;i<=N;i++)
RE[i].id=i,RE[i].k[0]=Rank[i],RE[i].k[1]=i+k<=N?Rank[i+k]:0;
RadixSort();
}
for (i=1;i<=N;i++)
SA[ Rank[i] ]=i;
}
void CalcHeight(){
int i,k,h=0;
for (i=1;i<=N;i++)
{
if (Rank[i]==1)
h=0;
else
{
k=SA[Rank[i]-1];
if (--h<0) h=0;
for (;A[i+h]==A[k+h];h++);
}
Height[Rank[i]]=h;
}
}
void CalContribute(){
int i,j=0;
for (i=1;i<=N;i++)
{
int pos = SA[i];
int maxrange = 0;
j = pos;
while(A[j]<=26&&j<=N){
++j;
}
Contribute[i] = j- pos - Height[i];
}
}
}SA;

string check(int query)
{
char result[2000];
int sum = 0,i,pos = 0;
for(i = 1; i<=SA.N;i++)
{
sum += SA.Contribute[i];
if(query <= sum)
{
pos = i;
break;
}
}
if(pos)
{
int index = query - sum;
int size = index + SA.Height[pos] + SA.Contribute[pos];        
for (int i=0; i < size; ++i)
{
result[i] = SA.A[SA.SA[pos] + i] + 'a' -1;
}
result[size]='\0';
return result;        
}
return "INVALID";    
}
int main()
{
int cas;
bool running = true;    
while (running)
{
int i;
cin>>cas;
list<string> inlist;
vector<int> querylist;
for(i=0; i < cas; ++i){
string v;
cin>>v;        
inlist.push_back(v);
}
list<string>::iterator it;        
SA.N=0;    
for(it = inlist.begin(),i = 1; it!= inlist.end();it++,++i){    
for (const char *p=(*it).c_str();*p;p++)
{
SA.A[++SA.N]=*p-'a'+1;                
}
if (i<cas)
SA.A[++SA.N]=30+i;
}

int queryNumber;
cin>>queryNumber;
for(i=0; i < queryNumber; ++i){
int query;
cin>>query;
querylist.push_back(query);
}
SA.CalcSA();
SA.CalcHeight();
SA.CalContribute();
vector<int>::iterator qit;
for(qit = querylist.begin(); qit!= querylist.end();qit++){        
cout<<check(*qit)<<endl;
}
running = false;
}
}










In    Java  :









import java.io.* ;
import java.text.DecimalFormat;
import java.util.*;
import static java.lang.Math.* ;
import static java.util.Arrays.* ;

public class Solution {

public static void main(String[] args) {

new Solution().solveProblem();

out.close();
}

static Scanner in = 
new Scanner(new InputStreamReader(System.in));
static PrintStream out =
new PrintStream(new BufferedOutputStream(System.out));


int[] maxx ;
String s ="";
public void solveProblem() {        

int n = in.nextInt() ;
in.nextLine() ;

String[] sn = new String[n] ;
for( int i = 0 ; i < n ; i++ ){
sn[i] =  in.nextLine() + "A" ;
s += sn[i];
}

T = s.toCharArray() ;
maxx = new int[T.length] ;
int som = 0 ;
for( int i = 0 ; i < n ; i++ ){
int nu = sn[i].length() ;

for( int j = som ; j < som + nu ; j++)
maxx[j] = som + nu ;

som += nu ;

}


this.n = T.length ;

constructSA() ;
computeLCP() ;

int q = in.nextInt() ; in.nextLine() ;
for( int i = 0 ; i < q ; i++ )
losOp(in.nextLong()) ;



}

void losOp( long k ) {

int start = 0 ;
for( int i = 0 ; i < n ; i++ ){            

int ind = SA[i] ;
start = LCP[i] ;

long aantal = max(0,maxx[ind] - 1 - ind - start) ;

if( T[ind] != 'A' && aantal >= k ){
out.println(s.substring(ind, (int) (ind+start+k))) ;
return ;
}else if( T[ind] != 'A')
k -= aantal ;
//System.out.println(k);


}

out.println("INVALID") ;

}

int maxlen = 100010 ;
int n ;

char[] T ;

int[] RA = new int[maxlen] ;
int[] RATemp = new int[maxlen] ;
int[] SA = new int[maxlen] ; 
int[] SATemp = new int[maxlen] ;

int[] c = new int[maxlen] ;

void constructSA(){

for( int i = 0 ; i < n ; i++ ){
RA[i] = T[i]-'.' ;
SA[i] = i ;
}

for( int k = 1 ; k < n ; k <<= 1 ){
countingSort( k ) ;
countingSort( 0 ) ;

RATemp[SA[0]] = 1 ;            
int r = 1 ;
for( int i = 1 ; i < n ; i++ ){
RATemp[SA[i]] = ( 
    RA[SA[i]] == RA[SA[i-1]] &&  
    RA[SA[i]+k] == RA[SA[i-1]+k] ) ? r : ++r ;

}

RA = RATemp.clone() ;
}
}    

void countingSort( int k ){

int sum = 0 ;
int maxi = max( 300, n ) ;

fill( c, 0 ) ;

for( int i = 0 ; i < n ; i++ )
c[ ( i + k ) < n ? RA[i+k] : 0 ]++ ;

for( int i = 0 ; i <= maxi ; i++ ){
int t = c[i] ;
c[i] = sum ;
sum += t ;
}

for( int i = 0 ; i < n ; i++ ){
SATemp[c[(
SA[i] + k) < n ? RA[SA[i]+k] : 0]++ ] = SA[i] ;
}


SA = SATemp.clone() ;
}

int[] Phi ;
int[] LCP ;
int max = 0 ;

void computeLCP(){

LCP = new int[n] ;
Phi = new int[n] ;
int[] PLCP = new int[n] ;


Phi[SA[0]] = -1 ;
for( int i = 1 ; i < n ; i++ )
Phi[SA[i]] = SA[i-1] ;

int L = 0;    
for( int i = 0 ; i < n ; i++){
if( Phi[i] == -1){
PLCP[i] = 0 ;
continue ;
}

while( i+L < n && Phi[i]+L < n 
&& T[i+L] == T[Phi[i]+L])
L++ ;

max = max(max,L) ;
PLCP[i] = L ;
L = max(L-1,0) ;
}

for( int i =1 ; i < n ; i++ )
LCP[i] = PLCP[SA[i]] ;


}

}









In   C  :








#include <stdio.h>
#include <stdlib.h>
#include <string.h>

typedef struct node {
struct node * parent;
int           baselen;
char        * base;
int           count;
struct node * children[26];
int           numofchildren;
} node;

const int BLOCKSIZE = (1 << 20);
const int MAXN = 50;
const int MAXLEN = 2000;

int    poolsize = 0;
node * poolstart = NULL;
int    pooloffset = 0;

node * getnode() {
node * retnode;
int i;

if (pooloffset < poolsize) {
retnode = poolstart + pooloffset++;
} else {
poolsize = BLOCKSIZE / sizeof(node);
poolstart = (node *)malloc(poolsize * sizeof(node));
pooloffset = 1;
retnode = poolstart;
}

retnode -> parent = NULL;
retnode -> baselen = 0;
retnode -> base = NULL;
retnode -> count = 0;
for (i = 0; i < 26; i++)
retnode -> children[i] = NULL;
retnode -> numofchildren = 0;

return retnode;
}

int main() {
char w[MAXN][MAXLEN];
int n, q, k, i, j, l, lenw, toadd;
node * root = getnode();
node * currentnode, * newnode, * pt, * ch;
int currentbasepos, cl, ncl, toprint, cbl;
char actchar, * cb, * ncb;

// process strings
scanf("%i", &n);
for (i = 0; i < n; i++) {
scanf("%s", w[i]);
lenw = strlen(w[i]);

// one string
for (j = 0; j < lenw; j++) {
currentnode = root;
currentbasepos = 0;
toadd = 0;
for (l = j; l < lenw; l++) {
actchar = *(w[i] + l) - 'a';

if (currentnode == root) {
if (currentnode -> children[actchar]) {
currentbasepos = 1;
currentnode = currentnode -> children[actchar];
continue;
} else {
newnode = getnode();
newnode -> parent = root;
root -> numofchildren += 1;
root -> children[actchar] = newnode;
newnode -> base = w[i] + l;
newnode -> baselen = lenw - l;
toadd = lenw - l;
break;
}
}

if (currentbasepos == currentnode -> baselen) {
if (currentnode -> numofchildren == 0) {
currentnode -> baselen += lenw - l;
currentnode -> base = w[i] + (l - currentbasepos);
toadd = lenw - l;
currentnode = currentnode -> parent;
break;
} else {
if (currentnode -> children[actchar]) {
currentbasepos = 1;
currentnode = currentnode -> children[actchar];
continue;
} else {
newnode = getnode();
newnode -> base = w[i] + l;
newnode -> baselen = lenw - l;
newnode -> parent = currentnode;
currentnode -> children[actchar] = newnode;
currentnode -> numofchildren += 1;
toadd = lenw - l;
break;
}
}
} else {
cb = currentnode -> base;
ncb = cb + currentbasepos;

if (*ncb == actchar + 'a') {
currentbasepos++;
continue;
} else {
pt = currentnode -> parent;
cl = currentnode -> baselen;
ncl = cl - currentbasepos;

newnode = getnode();
newnode -> base = cb;
newnode -> baselen = currentbasepos;
newnode -> count = currentnode -> count + ncl;
newnode -> parent = pt;
newnode -> numofchildren = 1;
pt -> children[(*cb) - 'a'] = newnode;
currentnode -> baselen = ncl;
currentnode -> base = ncb;
newnode -> children[(*ncb) - 'a'] = currentnode;
currentnode -> parent = newnode;
currentnode = newnode;
l--;
continue;
}
}
}

while (currentnode) {
currentnode -> count += toadd;
currentnode = currentnode -> parent;
}
}
}

// process queries
scanf("%i", &q);
for (i = 0; i < q; i++) {
scanf("%i", &k);

if (k > root -> count) {
printf("INVALID");
} else {
currentnode = root;
while (k) {
cbl = currentnode -> baselen;
cb  = currentnode -> base;
toprint = cbl <= k ? cbl : k;
for (j = 0; j < toprint; j++) putchar(*(cb + j));
k -= toprint;
for (j = 0; j < 26; j++) {
ch = currentnode -> children[j];
if (ch) {
if (ch -> baselen + ch -> count >= k) {
currentnode = ch;
break;
} else {
k -= ch -> baselen + ch -> count;
}
}
}
}
}
if (i < q - 1) printf("\n");
}

return 0;
}











In    Python3  :








#!/usr/bin/py
# Head ends here
def findStrings(a,query):
    temp = {}
    suffixes = []
    LCP = []
    for s in a:
        for i in range(len(s)):
            val = s[i:]
            if val not in temp:
                temp[val] = 1
                suffixes.append(val)
        #del temp
    suffixes.sort()
    n = len(suffixes)
    for i in range(n):
        if i == 0:
            LCP.append(None)
        else:
            LCP.append(find_lcp(suffixes[i-1], suffixes[i]))
    #print(suffixes)
    #print(LCP)
    num_sub_str = sum(len(s) for s in suffixes) - sum(v for v in LCP if v is not None)
    for q in query:
        #print("q = " + str(q) + " len=" + str(num_sub_str))
        print(find_ith(suffixes, LCP, q-1))
       
        
def find_ith(suffixes, LCP, i):
    data = zip(suffixes,LCP)
    low = high = 0
    for suf, lcp in data:
        if lcp is None:
            lcp = 0
        high += len(suf) - lcp
        if high - 1 == i:
            return suf
        elif high - 1 > i:
            for _i, j in enumerate(list(range(lcp, len(suf)))):
                if low + _i == i:
                    return suf[:j+1]
        low = high
    return "INVALID"
                
def find_lcp(s1,s2):
    upper_bound = min(len(s1), len(s2))
    count = 0
    for i in range(upper_bound):
        if s1[i] == s2[i]:
            count += 1
        else:
            return count
    return count
# Tail starts here

if __name__ == '__main__':
    n = int(input())
    string=[]
    for i in range(0,n):
        string.append(input().strip())
    q= int(input())
    query=[]
    for i in range(0,q):
        t1=int(input())
        query.append(t1)
    findStrings(string,query)
                        








View More Similar Problems

Reverse a linked list

Given the pointer to the head node of a linked list, change the next pointers of the nodes so that their order is reversed. The head pointer given may be null meaning that the initial list is empty. Example: head references the list 1->2->3->Null. Manipulate the next pointers of each node in place and return head, now referencing the head of the list 3->2->1->Null. Function Descriptio

View Solution →

Compare two linked lists

You’re given the pointer to the head nodes of two linked lists. Compare the data in the nodes of the linked lists to check if they are equal. If all data attributes are equal and the lists are the same length, return 1. Otherwise, return 0. Example: list1=1->2->3->Null list2=1->2->3->4->Null The two lists have equal data attributes for the first 3 nodes. list2 is longer, though, so the lis

View Solution →

Merge two sorted linked lists

This challenge is part of a tutorial track by MyCodeSchool Given pointers to the heads of two sorted linked lists, merge them into a single, sorted linked list. Either head pointer may be null meaning that the corresponding list is empty. Example headA refers to 1 -> 3 -> 7 -> NULL headB refers to 1 -> 2 -> NULL The new list is 1 -> 1 -> 2 -> 3 -> 7 -> NULL. Function Description C

View Solution →

Get Node Value

This challenge is part of a tutorial track by MyCodeSchool Given a pointer to the head of a linked list and a specific position, determine the data value at that position. Count backwards from the tail node. The tail is at postion 0, its parent is at 1 and so on. Example head refers to 3 -> 2 -> 1 -> 0 -> NULL positionFromTail = 2 Each of the data values matches its distance from the t

View Solution →

Delete duplicate-value nodes from a sorted linked list

This challenge is part of a tutorial track by MyCodeSchool You are given the pointer to the head node of a sorted linked list, where the data in the nodes is in ascending order. Delete nodes and return a sorted list with each distinct value in the original list. The given head pointer may be null indicating that the list is empty. Example head refers to the first node in the list 1 -> 2 -

View Solution →

Cycle Detection

A linked list is said to contain a cycle if any node is visited more than once while traversing the list. Given a pointer to the head of a linked list, determine if it contains a cycle. If it does, return 1. Otherwise, return 0. Example head refers 1 -> 2 -> 3 -> NUL The numbers shown are the node numbers, not their data values. There is no cycle in this list so return 0. head refer

View Solution →