Save Humanity

Problem Statement :

Oh!! Mankind is in trouble again. This time, it's a deadly disease spreading at a rate never seen before. The need of the hour is to set up efficient virus detectors. You are the lead at Central Hospital and you need to find a fast and reliable way to detect the footprints of the virus DNA in that of the patient.

The DNA of the patient as well as of the virus consists of lowercase letters. Since the collected data is raw, there may be some errors. You will need to find all substrings in the patient DNA that either exactly match the virus DNA or have at most one mismatch, i.e., a difference in at most one location.

For example, "aa" and "aa" are matching, "ab" and "aa" are matching, while "abb" and "bab" are not.

Function Description

Complete the virusIndices function in the editor below. It should print a list of space-separated integers that represent the starting indices of matching substrings in increasing order, or No match!.

virusIndices has the following parameter(s):

p: a string that represents patient DNA
v: a string that represents virus DNA

Input Format

The first line contains an integer , the number of test cases.

. Each of the next lines contains two space-separated strings (the patient DNA) and (the virus DNA).

Constraints

1 <= t <= 10
1 <= | p |, | v | <= 10^5

All characters in p and v e ascii[ a - z ].

Output Format

For each test case, output a single line containing a space-delimited list of starting indices (0-indexed) of p substrings of which are matching with v according to the condition mentioned above. The indices have to be in increasing order. If there is no matching substring, output No Match!.

Solution :

                            Solution in C :

In  C++  :








#include<iostream>
#include<stdio.h>
#include<vector>
#include<string.h>
#include<stdlib.h>
using namespace std ;
#define MAXN 200002
char text[MAXN],pat[MAXN] ;
int szp,szt ;

int p1[2 * MAXN],p2[2 * MAXN] ;
char s[2 * MAXN] ;
vector<int> reta ;
vector<int> solve1()
{
 memset(p1,0,sizeof p1) ;
 memset(p2,0,sizeof p2) ;
 memset(s,0,sizeof s) ;
 int n = 0 ;
 for(int i = 0;i < szp;i++) s[n++] = pat[i] ;
 for(int i = 0;i < szt;i++) s[n++] = text[i] ;
 p1[0] = n ;
 int g = 0,f = 0 ;
 for(int i = 1;i < n;i++)
 {
  if(i < g && p1[i - f] != g - i)
   p1[i] = min(p1[i - f],g - i) ;
  else
  {
   g = max(g,i) ;
   f = i ;
   while(g < n && s[g] == s[g - f]) g++ ;
   p1[i] = g - f ;
  }
 }
 
 n = 0 ;
 for(int i = szp - 1;i >= 0;i--) s[n++] = pat[i] ;
 for(int i = szt - 1;i >= 0;i--) s[n++] = text[i] ;
 p2[0] = n ;
 g = 0,f = 0 ;
 for(int i = 1;i < n;i++)
 {
  if(i < g && p2[i - f] != g - i)
   p2[i] = min(p2[i - f],g - i) ;
  else
  {
   g = max(g,i) ;
   f = i ;
   while(g < n && s[g] == s[g - f]) g++ ;
   p2[i] = g - f ;
  }
 }
 
 reta.clear() ;
 for(int i = 0;i + szp <= szt;i++)
 {
  int start = p1[szp + i] ;
  int end = p2[szp + szt - 1 - (i + szp - 1)] ;
  if(start + end + 1 >= szp) reta.push_back(i) ;
 }
 return reta ;
}

vector<int> solve2()
{
 vector<int> ret ;
 for(int i = 0;i + szp <= szt;i++)
 {
  int miss = 0 ;
  for(int j = 0;j < szp;j++)
   if(text[i + j] != pat[j])
    miss++ ;
  if(miss <= 1) ret.push_back(i) ;
 }
 return ret ;
}

void gen()
{
 szt = rand() % 1000 + 1 ;
 memset(text,0,sizeof text) ;
 for(int i = 0;i < szt;i++) text[i] = rand() % 3 + 'a' ;

 szp = rand() % szt + 1 ;
 memset(pat,0,sizeof pat) ;
 for(int i = 0;i < szp;i++) pat[i] = rand() % 3 + 'a' ;
}


char get1()
{
 if(rand() % 50000 < 49998) return 'a' ;
 else if(rand() % 100 < 80) return 'b' ;
 return 'c' ;
}

char get2()
{
 if(rand() % 50000 < 49999) return 'a' ;
 return 'b' ;
}

void generate()
{
 srand(time(NULL)) ;
 char in[10] = "in .txt" ;
 for(int test = 0;test < 10;test++)
 {
  in[2] = test + '0' ;
  FILE * fout = fopen(in,"w") ;
  
  int runs = 10 ;
  fprintf(fout,"%d\n",runs) ;
  for(int t = 0;t < runs;t++)
  {
   szt = 100000 - rand() % 1000 + 1 ;
   if(test <= 2) szt = rand() % 30 + 1 ;
   szp = rand() % szt + 1 ;
   memset(text,0,sizeof text) ;
   memset(pat,0,sizeof pat) ;

   if(test <= 2)
   {
    for(int i = 0;i < szt;i++) text[i] = rand() % 2 + 'a' ;
    for(int i = 0;i < szp;i++) pat[i] = rand() % 2 + 'a' ;
   }
   else if(test <= 5)
   {
    for(int i = 0;i < szt;i++) text[i] = get1() ;
    for(int i = 0;i < szp;i++) pat[i] = get1() ;
   }
   else if(test <= 7)
   {
    for(int i = 0;i < szt;i++) text[i] = get2() ;
    for(int i = 0;i < szp;i++) pat[i] = get2() ;
   }
   else
   {
    for(int i = 0;i < szt;i++) text[i] = i % 26 + 'a' ;
    for(int i = 0;i < szp;i++) pat[i] = i % 26 + 'a' ;
    for(int i = 0;i < 10;i++) text[rand() % szt] = 'a' + rand() % 26 ;
   }
   fprintf(fout,"%s\n%s\n\n",text,pat) ;
  }
 }
}



void test()
{
 for(int t = 0;t < 1000;t++)
 {
  gen() ;
  vector<int> ret1 = solve1() ;
  vector<int> ret2 = solve2() ;
  for(int i = 0;i < ret1.size();i++) cout << ret1[i] << " " ; cout << endl ;
  for(int i = 0;i < ret2.size();i++) cout << ret2[i] << " " ; cout << endl ;
  cout << endl ;
  if(ret1 != ret2)
  {
   cout << "Failed on: " << t << endl ;
   cout << text << endl << pat << endl ;
   for(int i = 0;i < ret1.size();i++) cout << ret1[i] << " " ; cout << endl ;
   for(int i = 0;i < ret2.size();i++) cout << ret2[i] << " " ; cout << endl ;
   while(1) ;
  }
 }
}

int main()
{
// srand(time(NULL));
// generate() ; return 0 ;
// test() ; return 0 ;
 int runs ;
 scanf("%d",&runs) ;
 while(runs--)
 {
  memset(text,0,sizeof text) ;
  memset(pat,0,sizeof pat) ;
  scanf("%s%s",text,pat) ;
  szt = strlen(text) ;
  szp = strlen(pat) ;
  vector<int> ret1 = solve1() ;
  for(int i = 0;i < ret1.size();i++)
  {
   if(i > 0) printf(" ") ;
   printf("%d",ret1[i]) ;
  }
  printf("\n") ;
 }
 return 0 ;
}









In   Java  :








import java.util.*;


public class Solution {
private static final Scanner sc = new Scanner(System.in);

static RollingHash text, target;
private static boolean ok(final int s, final int l, 
final int r) {
if(r - l < 2) {
return true;
}

final int m = (l + r) / 2;
final long l1 = text.Hash(s + l, s + m);
final long l2 = target.Hash(l, m);
final long r1 = text.Hash(s + m, s + r);
final long r2 = target.Hash(m, r);

if(l1 == l2) {
return r1 == r2 ? true : ok(s, m, r);
} else if(r1 == r2) {
return ok(s, l, m);
}

return false;
}

public static void main(String[] args) {
int T = sc.nextInt();
while(T-- != 0) {
final String s = sc.next();
final String t = sc.next();
text = new RollingHash(s);
target = new RollingHash(t);
boolean first = true;
for(int i = 0; i + t.length() <= s.length(); i++) {
if(ok(i, 0, t.length())) {
System.out.print((first ? "" : " ") + i);
first = false;
}
}
System.out.println();
}
}


static class RollingHash {
private static final long RADIX = 1000000009L;
private int n;
long[] table, pow;

public RollingHash(String str) {
n = str.length();

table = new long[n + 1];
pow = new long[n + 1];

pow[0] = 1;
table[0] = 0;
for (int i = 0; i < n; i++) {
table[i + 1] = table[i] * RADIX + str.charAt(i % n);
pow[i + 1] = pow[i] * RADIX;
}
}

public long Hash(int i, int j) {
assert(i <= j);
return table[j] - table[i] * pow[j - i];
}
}


}










In   C :








#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <time.h>

#define kMaxSize 100001
#define kMaxMismatch 1

typedef long long int lli;

int findDna8b(char* p, char* v, int vc);

int main()
{
    // Allocate memory for strings.
    char* p = (char*)malloc(kMaxSize * sizeof(char));
    char* v = (char*)malloc(kMaxSize * sizeof(char));
    
    // Test cases.
    int tc;
    scanf("%d", &tc);
    while (0 < tc--)
    {
        // Load strings.
        scanf("%s %s", p, v);
        int pc = (int)strlen(p);
        int vc = (int)strlen(v);
        
        // Look for v in p. Print starting index of each match.
        int c = (pc-vc);
        int matched = 0;
        for (int i = 0; i <= c; i++){
            if (findDna8b(&p[i], v, vc) == 1){
                matched++;
                printf("%d ", i);
            }
        }
        
        // We have to indicate if no matches were found.
        if (matched <= 0)
            printf("No Match!\n");
        else
            printf("\n");
    }
    
    return 0;
}

int findDna8b(char* p, char* v, int vc)
{
    lli* p8 = (lli*)p;
    lli* v8 = (lli*)v;
    
    int c = vc/8;
    int mismatch = 0;
    int i;
    for (i = 0; i < c; i++){
        if (p8[i] != v8[i])
        {
            for (int j = i*8; j < (i*8)+8; j++){
                if (p[j] != v[j]){
                    mismatch++;
                    if (mismatch > kMaxMismatch) return -1;
                }
            }
        }
    }
    
    for (int j = i*8; j < vc; j++){
        if (p[j] != v[j]){
            mismatch++;
            if (mismatch > kMaxMismatch) return -1;
        }
    }
    
    return 1;
}








In   Python3  :






#!/bin/python3

import os
import sys
import math 
import re
from collections import defaultdict
#
# Complete the virusIndices function below.
#
def virusIndices(p, v):  
    
    def WM_approx_Ham1_search(pattern, text):
        m = len(pattern)
        S_table = defaultdict(int)
        for i, c in enumerate(pattern):
            S_table[c] |= 1 << i
        R0 = 0
        R1 = 0
        mask = 1 << (m - 1)
        for j, c in enumerate(text):
            S = S_table[c]
            shR0 = (R0 << 1) | 1
            R0 = shR0 & S
            R1 = ((R1 << 1) | 1) & S | shR0
            # if _DEBUG:
            #     print("j= %2d msk=%s S=%s R0=%s R1=%s" \
            #         % tuple([j] + map(bitstr, [mask, S, R0, R1])))
            if R0 & mask: # exact match
                yield j - m + 1
            elif R1 & mask: # match with one substitution
                yield j - m + 1
            
    matches = list(WM_approx_Ham1_search(v, p))
    # print(matches)
    
    if not len(matches):
        print('No Match!')
    else:
        print(' '.join(str(ind) for ind in matches))
        
if __name__ == '__main__':
    t = int(input())

    for t_itr in range(t):
        pv = input().split()

        p = pv[0]

        v = pv[1]

        virusIndices(p, v)

View More Similar Problems

Minimum Average Waiting Time

Tieu owns a pizza restaurant and he manages it in his own way. While in a normal restaurant, a customer is served by following the first-come, first-served rule, Tieu simply minimizes the average waiting time of his customers. So he gets to decide who is served first, regardless of how sooner or later a person comes. Different kinds of pizzas take different amounts of time to cook. Also, once h

View Solution →

Merging Communities

People connect with each other in a social network. A connection between Person I and Person J is represented as . When two persons belonging to different communities connect, the net effect is the merger of both communities which I and J belongs to. At the beginning, there are N people representing N communities. Suppose person 1 and 2 connected and later 2 and 3 connected, then ,1 , 2 and 3 w

View Solution →

Components in a graph

There are 2 * N nodes in an undirected graph, and a number of edges connecting some nodes. In each edge, the first value will be between 1 and N, inclusive. The second node will be between N + 1 and , 2 * N inclusive. Given a list of edges, determine the size of the smallest and largest connected components that have or more nodes. A node can have any number of connections. The highest node valu

View Solution →

Kundu and Tree

Kundu is true tree lover. Tree is a connected graph having N vertices and N-1 edges. Today when he got a tree, he colored each edge with one of either red(r) or black(b) color. He is interested in knowing how many triplets(a,b,c) of vertices are there , such that, there is atleast one edge having red color on all the three paths i.e. from vertex a to b, vertex b to c and vertex c to a . Note that

View Solution →

Super Maximum Cost Queries

Victoria has a tree, T , consisting of N nodes numbered from 1 to N. Each edge from node Ui to Vi in tree T has an integer weight, Wi. Let's define the cost, C, of a path from some node X to some other node Y as the maximum weight ( W ) for any edge in the unique path from node X to Y node . Victoria wants your help processing Q queries on tree T, where each query contains 2 integers, L and

View Solution →

Contacts

We're going to make our own Contacts application! The application must perform two types of operations: 1 . add name, where name is a string denoting a contact name. This must store name as a new contact in the application. find partial, where partial is a string denoting a partial name to search the application for. It must count the number of contacts starting partial with and print the co

View Solution →

Our Effort to provide you the best solutions requires some appreciation

Please disable your adblocker and refresh

Save Humanity

Problem Statement :

Solution :

View More Similar Problems

Minimum Average Waiting Time

Merging Communities

Components in a graph

Kundu and Tree

Super Maximum Cost Queries

Contacts