Find Strings
Problem Statement :
A substring is defined as a contiguous sequence of one or more characters in the string. More information on substrings can be found here. You are given n strings w[1], w[2], ......, w[n]. Let S[i] denote the set of all distinct substrings of the string w[i]. Let , that is, S is a set of strings that is the union of all substrings in all sets S[1], S[2], ..... S[n]. There will be many queries. For each query you will be given an integer 'k'. Your task is to find the kth element of the -indexed lexicographically ordered set of substrings in the set S. If there is no element , return INVALID. For example, your strings are . All of the substrings are and . Combine the two sets and sort them to get . So, for instance if , we return 'a'. If , we return 'bc'. If though, there is not an so we return INVALID. Function Description Complete the findStrings function in the editor below. It should return array of strings. findStrings has the following parameter(s): w: an array of strings queries: an array of integers Input Format The first line contains an integer n, the number of strings in the array w. Each of the next n lines consists of a string w[ i ]. The next line contains an integer q, the number of queries. Each of the next q lines consists of a single integer k. Constraints 1 <= n <= 50 1 <= | w[ i ] | <= 2000 1 <= q <= 500 1 <= k <= 10^9 Each character of w[ i ] e ascii[ a - z ] Output Format Return an array of q strings where the ith string is the answer to the ith query. If a k is invalid, return "INVALID" for that case.
Solution :
Solution in C :
in C++ :
#include <iostream>
#include <list>
#include <vector>
#include <string.h>
using namespace std;
const int MAXL=100011;
struct SuffixArray{
struct RadixElement{
int id,k[2];
}RE[MAXL],RT[MAXL];
int N,A[MAXL],SA[MAXL],Rank[MAXL],Height[MAXL],
C[MAXL],Contribute[MAXL];
void RadixSort()
{
int i,y;
for (y=1;y>=0;y--){
memset(C,0,sizeof(C));
for (i=1;i<=N;i++) C[RE[i].k[y]]++;
for (i=1;i<MAXL;i++) C[i]+=C[i-1];
for (i=N;i>=1;i--) RT[C[RE[i].k[y]]--]=RE[i];
for (i=1;i<=N;i++) RE[i]=RT[i];
}
for (i=1;i<=N;i++){
Rank[ RE[i].id ]=Rank[ RE[i-1].id ];
if (RE[i].k[0]!=RE[i-1].k[0] || RE[i].k[1]!=RE[i-1].k[1])
Rank[ RE[i].id ]++;
}
}
void CalcSA(){
int i,k;
RE[0].k[0]=-1;
for (i=1;i<=N;i++)
RE[i].id=i,RE[i].k[0]=A[i],RE[i].k[1]=0;
RadixSort();
for (k=1;k+1<=N;k*=2)
{
for (i=1;i<=N;i++)
RE[i].id=i,RE[i].k[0]=Rank[i],RE[i].k[1]=i+k<=N?Rank[i+k]:0;
RadixSort();
}
for (i=1;i<=N;i++)
SA[ Rank[i] ]=i;
}
void CalcHeight(){
int i,k,h=0;
for (i=1;i<=N;i++)
{
if (Rank[i]==1)
h=0;
else
{
k=SA[Rank[i]-1];
if (--h<0) h=0;
for (;A[i+h]==A[k+h];h++);
}
Height[Rank[i]]=h;
}
}
void CalContribute(){
int i,j=0;
for (i=1;i<=N;i++)
{
int pos = SA[i];
int maxrange = 0;
j = pos;
while(A[j]<=26&&j<=N){
++j;
}
Contribute[i] = j- pos - Height[i];
}
}
}SA;
string check(int query)
{
char result[2000];
int sum = 0,i,pos = 0;
for(i = 1; i<=SA.N;i++)
{
sum += SA.Contribute[i];
if(query <= sum)
{
pos = i;
break;
}
}
if(pos)
{
int index = query - sum;
int size = index + SA.Height[pos] + SA.Contribute[pos];
for (int i=0; i < size; ++i)
{
result[i] = SA.A[SA.SA[pos] + i] + 'a' -1;
}
result[size]='\0';
return result;
}
return "INVALID";
}
int main()
{
int cas;
bool running = true;
while (running)
{
int i;
cin>>cas;
list<string> inlist;
vector<int> querylist;
for(i=0; i < cas; ++i){
string v;
cin>>v;
inlist.push_back(v);
}
list<string>::iterator it;
SA.N=0;
for(it = inlist.begin(),i = 1; it!= inlist.end();it++,++i){
for (const char *p=(*it).c_str();*p;p++)
{
SA.A[++SA.N]=*p-'a'+1;
}
if (i<cas)
SA.A[++SA.N]=30+i;
}
int queryNumber;
cin>>queryNumber;
for(i=0; i < queryNumber; ++i){
int query;
cin>>query;
querylist.push_back(query);
}
SA.CalcSA();
SA.CalcHeight();
SA.CalContribute();
vector<int>::iterator qit;
for(qit = querylist.begin(); qit!= querylist.end();qit++){
cout<<check(*qit)<<endl;
}
running = false;
}
}
In Java :
import java.io.* ;
import java.text.DecimalFormat;
import java.util.*;
import static java.lang.Math.* ;
import static java.util.Arrays.* ;
public class Solution {
public static void main(String[] args) {
new Solution().solveProblem();
out.close();
}
static Scanner in =
new Scanner(new InputStreamReader(System.in));
static PrintStream out =
new PrintStream(new BufferedOutputStream(System.out));
int[] maxx ;
String s ="";
public void solveProblem() {
int n = in.nextInt() ;
in.nextLine() ;
String[] sn = new String[n] ;
for( int i = 0 ; i < n ; i++ ){
sn[i] = in.nextLine() + "A" ;
s += sn[i];
}
T = s.toCharArray() ;
maxx = new int[T.length] ;
int som = 0 ;
for( int i = 0 ; i < n ; i++ ){
int nu = sn[i].length() ;
for( int j = som ; j < som + nu ; j++)
maxx[j] = som + nu ;
som += nu ;
}
this.n = T.length ;
constructSA() ;
computeLCP() ;
int q = in.nextInt() ; in.nextLine() ;
for( int i = 0 ; i < q ; i++ )
losOp(in.nextLong()) ;
}
void losOp( long k ) {
int start = 0 ;
for( int i = 0 ; i < n ; i++ ){
int ind = SA[i] ;
start = LCP[i] ;
long aantal = max(0,maxx[ind] - 1 - ind - start) ;
if( T[ind] != 'A' && aantal >= k ){
out.println(s.substring(ind, (int) (ind+start+k))) ;
return ;
}else if( T[ind] != 'A')
k -= aantal ;
//System.out.println(k);
}
out.println("INVALID") ;
}
int maxlen = 100010 ;
int n ;
char[] T ;
int[] RA = new int[maxlen] ;
int[] RATemp = new int[maxlen] ;
int[] SA = new int[maxlen] ;
int[] SATemp = new int[maxlen] ;
int[] c = new int[maxlen] ;
void constructSA(){
for( int i = 0 ; i < n ; i++ ){
RA[i] = T[i]-'.' ;
SA[i] = i ;
}
for( int k = 1 ; k < n ; k <<= 1 ){
countingSort( k ) ;
countingSort( 0 ) ;
RATemp[SA[0]] = 1 ;
int r = 1 ;
for( int i = 1 ; i < n ; i++ ){
RATemp[SA[i]] = (
RA[SA[i]] == RA[SA[i-1]] &&
RA[SA[i]+k] == RA[SA[i-1]+k] ) ? r : ++r ;
}
RA = RATemp.clone() ;
}
}
void countingSort( int k ){
int sum = 0 ;
int maxi = max( 300, n ) ;
fill( c, 0 ) ;
for( int i = 0 ; i < n ; i++ )
c[ ( i + k ) < n ? RA[i+k] : 0 ]++ ;
for( int i = 0 ; i <= maxi ; i++ ){
int t = c[i] ;
c[i] = sum ;
sum += t ;
}
for( int i = 0 ; i < n ; i++ ){
SATemp[c[(
SA[i] + k) < n ? RA[SA[i]+k] : 0]++ ] = SA[i] ;
}
SA = SATemp.clone() ;
}
int[] Phi ;
int[] LCP ;
int max = 0 ;
void computeLCP(){
LCP = new int[n] ;
Phi = new int[n] ;
int[] PLCP = new int[n] ;
Phi[SA[0]] = -1 ;
for( int i = 1 ; i < n ; i++ )
Phi[SA[i]] = SA[i-1] ;
int L = 0;
for( int i = 0 ; i < n ; i++){
if( Phi[i] == -1){
PLCP[i] = 0 ;
continue ;
}
while( i+L < n && Phi[i]+L < n
&& T[i+L] == T[Phi[i]+L])
L++ ;
max = max(max,L) ;
PLCP[i] = L ;
L = max(L-1,0) ;
}
for( int i =1 ; i < n ; i++ )
LCP[i] = PLCP[SA[i]] ;
}
}
In C :
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
typedef struct node {
struct node * parent;
int baselen;
char * base;
int count;
struct node * children[26];
int numofchildren;
} node;
const int BLOCKSIZE = (1 << 20);
const int MAXN = 50;
const int MAXLEN = 2000;
int poolsize = 0;
node * poolstart = NULL;
int pooloffset = 0;
node * getnode() {
node * retnode;
int i;
if (pooloffset < poolsize) {
retnode = poolstart + pooloffset++;
} else {
poolsize = BLOCKSIZE / sizeof(node);
poolstart = (node *)malloc(poolsize * sizeof(node));
pooloffset = 1;
retnode = poolstart;
}
retnode -> parent = NULL;
retnode -> baselen = 0;
retnode -> base = NULL;
retnode -> count = 0;
for (i = 0; i < 26; i++)
retnode -> children[i] = NULL;
retnode -> numofchildren = 0;
return retnode;
}
int main() {
char w[MAXN][MAXLEN];
int n, q, k, i, j, l, lenw, toadd;
node * root = getnode();
node * currentnode, * newnode, * pt, * ch;
int currentbasepos, cl, ncl, toprint, cbl;
char actchar, * cb, * ncb;
// process strings
scanf("%i", &n);
for (i = 0; i < n; i++) {
scanf("%s", w[i]);
lenw = strlen(w[i]);
// one string
for (j = 0; j < lenw; j++) {
currentnode = root;
currentbasepos = 0;
toadd = 0;
for (l = j; l < lenw; l++) {
actchar = *(w[i] + l) - 'a';
if (currentnode == root) {
if (currentnode -> children[actchar]) {
currentbasepos = 1;
currentnode = currentnode -> children[actchar];
continue;
} else {
newnode = getnode();
newnode -> parent = root;
root -> numofchildren += 1;
root -> children[actchar] = newnode;
newnode -> base = w[i] + l;
newnode -> baselen = lenw - l;
toadd = lenw - l;
break;
}
}
if (currentbasepos == currentnode -> baselen) {
if (currentnode -> numofchildren == 0) {
currentnode -> baselen += lenw - l;
currentnode -> base = w[i] + (l - currentbasepos);
toadd = lenw - l;
currentnode = currentnode -> parent;
break;
} else {
if (currentnode -> children[actchar]) {
currentbasepos = 1;
currentnode = currentnode -> children[actchar];
continue;
} else {
newnode = getnode();
newnode -> base = w[i] + l;
newnode -> baselen = lenw - l;
newnode -> parent = currentnode;
currentnode -> children[actchar] = newnode;
currentnode -> numofchildren += 1;
toadd = lenw - l;
break;
}
}
} else {
cb = currentnode -> base;
ncb = cb + currentbasepos;
if (*ncb == actchar + 'a') {
currentbasepos++;
continue;
} else {
pt = currentnode -> parent;
cl = currentnode -> baselen;
ncl = cl - currentbasepos;
newnode = getnode();
newnode -> base = cb;
newnode -> baselen = currentbasepos;
newnode -> count = currentnode -> count + ncl;
newnode -> parent = pt;
newnode -> numofchildren = 1;
pt -> children[(*cb) - 'a'] = newnode;
currentnode -> baselen = ncl;
currentnode -> base = ncb;
newnode -> children[(*ncb) - 'a'] = currentnode;
currentnode -> parent = newnode;
currentnode = newnode;
l--;
continue;
}
}
}
while (currentnode) {
currentnode -> count += toadd;
currentnode = currentnode -> parent;
}
}
}
// process queries
scanf("%i", &q);
for (i = 0; i < q; i++) {
scanf("%i", &k);
if (k > root -> count) {
printf("INVALID");
} else {
currentnode = root;
while (k) {
cbl = currentnode -> baselen;
cb = currentnode -> base;
toprint = cbl <= k ? cbl : k;
for (j = 0; j < toprint; j++) putchar(*(cb + j));
k -= toprint;
for (j = 0; j < 26; j++) {
ch = currentnode -> children[j];
if (ch) {
if (ch -> baselen + ch -> count >= k) {
currentnode = ch;
break;
} else {
k -= ch -> baselen + ch -> count;
}
}
}
}
}
if (i < q - 1) printf("\n");
}
return 0;
}
In Python3 :
#!/usr/bin/py
# Head ends here
def findStrings(a,query):
temp = {}
suffixes = []
LCP = []
for s in a:
for i in range(len(s)):
val = s[i:]
if val not in temp:
temp[val] = 1
suffixes.append(val)
#del temp
suffixes.sort()
n = len(suffixes)
for i in range(n):
if i == 0:
LCP.append(None)
else:
LCP.append(find_lcp(suffixes[i-1], suffixes[i]))
#print(suffixes)
#print(LCP)
num_sub_str = sum(len(s) for s in suffixes) - sum(v for v in LCP if v is not None)
for q in query:
#print("q = " + str(q) + " len=" + str(num_sub_str))
print(find_ith(suffixes, LCP, q-1))
def find_ith(suffixes, LCP, i):
data = zip(suffixes,LCP)
low = high = 0
for suf, lcp in data:
if lcp is None:
lcp = 0
high += len(suf) - lcp
if high - 1 == i:
return suf
elif high - 1 > i:
for _i, j in enumerate(list(range(lcp, len(suf)))):
if low + _i == i:
return suf[:j+1]
low = high
return "INVALID"
def find_lcp(s1,s2):
upper_bound = min(len(s1), len(s2))
count = 0
for i in range(upper_bound):
if s1[i] == s2[i]:
count += 1
else:
return count
return count
# Tail starts here
if __name__ == '__main__':
n = int(input())
string=[]
for i in range(0,n):
string.append(input().strip())
q= int(input())
query=[]
for i in range(0,q):
t1=int(input())
query.append(t1)
findStrings(string,query)
View More Similar Problems
Swap Nodes [Algo]
A binary tree is a tree which is characterized by one of the following properties: It can be empty (null). It contains a root node only. It contains a root node with a left subtree, a right subtree, or both. These subtrees are also binary trees. In-order traversal is performed as Traverse the left subtree. Visit root. Traverse the right subtree. For this in-order traversal, start from
View Solution →Kitty's Calculations on a Tree
Kitty has a tree, T , consisting of n nodes where each node is uniquely labeled from 1 to n . Her friend Alex gave her q sets, where each set contains k distinct nodes. Kitty needs to calculate the following expression on each set: where: { u ,v } denotes an unordered pair of nodes belonging to the set. dist(u , v) denotes the number of edges on the unique (shortest) path between nodes a
View Solution →Is This a Binary Search Tree?
For the purposes of this challenge, we define a binary tree to be a binary search tree with the following ordering requirements: The data value of every node in a node's left subtree is less than the data value of that node. The data value of every node in a node's right subtree is greater than the data value of that node. Given the root node of a binary tree, can you determine if it's also a
View Solution →Square-Ten Tree
The square-ten tree decomposition of an array is defined as follows: The lowest () level of the square-ten tree consists of single array elements in their natural order. The level (starting from ) of the square-ten tree consists of subsequent array subsegments of length in their natural order. Thus, the level contains subsegments of length , the level contains subsegments of length , the
View Solution →Balanced Forest
Greg has a tree of nodes containing integer data. He wants to insert a node with some non-zero integer value somewhere into the tree. His goal is to be able to cut two edges and have the values of each of the three new trees sum to the same amount. This is called a balanced forest. Being frugal, the data value he inserts should be minimal. Determine the minimal amount that a new node can have to a
View Solution →Jenny's Subtrees
Jenny loves experimenting with trees. Her favorite tree has n nodes connected by n - 1 edges, and each edge is ` unit in length. She wants to cut a subtree (i.e., a connected part of the original tree) of radius r from this tree by performing the following two steps: 1. Choose a node, x , from the tree. 2. Cut a subtree consisting of all nodes which are not further than r units from node x .
View Solution →