Save Humanity


Problem Statement :


Oh!! Mankind is in trouble again. This time, it's a deadly disease spreading at a rate never seen before. The need of the hour is to set up efficient virus detectors. You are the lead at Central Hospital and you need to find a fast and reliable way to detect the footprints of the virus DNA in that of the patient.

The DNA of the patient as well as of the virus consists of lowercase letters. Since the collected data is raw, there may be some errors. You will need to find all substrings in the patient DNA that either exactly match the virus DNA or have at most one mismatch, i.e., a difference in at most one location.

For example, "aa" and "aa" are matching, "ab" and "aa" are matching, while "abb" and "bab" are not.

Function Description

Complete the virusIndices function in the editor below. It should print a list of space-separated integers that represent the starting indices of matching substrings in increasing order, or No match!.

virusIndices has the following parameter(s):

p: a string that represents patient DNA
v: a string that represents virus DNA

Input Format

The first line contains an integer , the number of test cases.

. Each of the next  lines contains two space-separated strings  (the patient DNA) and  (the virus DNA).

Constraints

1  <=  t  <= 10
1  <=  | p |, | v |  <=  10^5

All characters in p  and v e ascii[ a - z ].


Output Format

For each test case, output a single line containing a space-delimited list of starting indices (0-indexed) of p substrings of  which are matching with v according to the condition mentioned above. The indices have to be in increasing order. If there is no matching substring, output No Match!.



Solution :



title-img


                            Solution in C :

In  C++  :








#include<iostream>
#include<stdio.h>
#include<vector>
#include<string.h>
#include<stdlib.h>
using namespace std ;
#define MAXN 200002
char text[MAXN],pat[MAXN] ;
int szp,szt ;

int p1[2 * MAXN],p2[2 * MAXN] ;
char s[2 * MAXN] ;
vector<int> reta ;
vector<int> solve1()
{
 memset(p1,0,sizeof p1) ;
 memset(p2,0,sizeof p2) ;
 memset(s,0,sizeof s) ;
 int n = 0 ;
 for(int i = 0;i < szp;i++) s[n++] = pat[i] ;
 for(int i = 0;i < szt;i++) s[n++] = text[i] ;
 p1[0] = n ;
 int g = 0,f = 0 ;
 for(int i = 1;i < n;i++)
 {
  if(i < g && p1[i - f] != g - i)
   p1[i] = min(p1[i - f],g - i) ;
  else
  {
   g = max(g,i) ;
   f = i ;
   while(g < n && s[g] == s[g - f]) g++ ;
   p1[i] = g - f ;
  }
 }
 
 n = 0 ;
 for(int i = szp - 1;i >= 0;i--) s[n++] = pat[i] ;
 for(int i = szt - 1;i >= 0;i--) s[n++] = text[i] ;
 p2[0] = n ;
 g = 0,f = 0 ;
 for(int i = 1;i < n;i++)
 {
  if(i < g && p2[i - f] != g - i)
   p2[i] = min(p2[i - f],g - i) ;
  else
  {
   g = max(g,i) ;
   f = i ;
   while(g < n && s[g] == s[g - f]) g++ ;
   p2[i] = g - f ;
  }
 }
 
 reta.clear() ;
 for(int i = 0;i + szp <= szt;i++)
 {
  int start = p1[szp + i] ;
  int end = p2[szp + szt - 1 - (i + szp - 1)] ;
  if(start + end + 1 >= szp) reta.push_back(i) ;
 }
 return reta ;
}

vector<int> solve2()
{
 vector<int> ret ;
 for(int i = 0;i + szp <= szt;i++)
 {
  int miss = 0 ;
  for(int j = 0;j < szp;j++)
   if(text[i + j] != pat[j])
    miss++ ;
  if(miss <= 1) ret.push_back(i) ;
 }
 return ret ;
}

void gen()
{
 szt = rand() % 1000 + 1 ;
 memset(text,0,sizeof text) ;
 for(int i = 0;i < szt;i++) text[i] = rand() % 3 + 'a' ;

 szp = rand() % szt + 1 ;
 memset(pat,0,sizeof pat) ;
 for(int i = 0;i < szp;i++) pat[i] = rand() % 3 + 'a' ;
}


char get1()
{
 if(rand() % 50000 < 49998) return 'a' ;
 else if(rand() % 100 < 80) return 'b' ;
 return 'c' ;
}

char get2()
{
 if(rand() % 50000 < 49999) return 'a' ;
 return 'b' ;
}

void generate()
{
 srand(time(NULL)) ;
 char in[10] = "in .txt" ;
 for(int test = 0;test < 10;test++)
 {
  in[2] = test + '0' ;
  FILE * fout = fopen(in,"w") ;
  
  int runs = 10 ;
  fprintf(fout,"%d\n",runs) ;
  for(int t = 0;t < runs;t++)
  {
   szt = 100000 - rand() % 1000 + 1 ;
   if(test <= 2) szt = rand() % 30 + 1 ;
   szp = rand() % szt + 1 ;
   memset(text,0,sizeof text) ;
   memset(pat,0,sizeof pat) ;

   if(test <= 2)
   {
    for(int i = 0;i < szt;i++) text[i] = rand() % 2 + 'a' ;
    for(int i = 0;i < szp;i++) pat[i] = rand() % 2 + 'a' ;
   }
   else if(test <= 5)
   {
    for(int i = 0;i < szt;i++) text[i] = get1() ;
    for(int i = 0;i < szp;i++) pat[i] = get1() ;
   }
   else if(test <= 7)
   {
    for(int i = 0;i < szt;i++) text[i] = get2() ;
    for(int i = 0;i < szp;i++) pat[i] = get2() ;
   }
   else
   {
    for(int i = 0;i < szt;i++) text[i] = i % 26 + 'a' ;
    for(int i = 0;i < szp;i++) pat[i] = i % 26 + 'a' ;
    for(int i = 0;i < 10;i++) text[rand() % szt] = 'a' + rand() % 26 ;
   }
   fprintf(fout,"%s\n%s\n\n",text,pat) ;
  }
 }
}



void test()
{
 for(int t = 0;t < 1000;t++)
 {
  gen() ;
  vector<int> ret1 = solve1() ;
  vector<int> ret2 = solve2() ;
  for(int i = 0;i < ret1.size();i++) cout << ret1[i] << " " ; cout << endl ;
  for(int i = 0;i < ret2.size();i++) cout << ret2[i] << " " ; cout << endl ;
  cout << endl ;
  if(ret1 != ret2)
  {
   cout << "Failed on: " << t << endl ;
   cout << text << endl << pat << endl ;
   for(int i = 0;i < ret1.size();i++) cout << ret1[i] << " " ; cout << endl ;
   for(int i = 0;i < ret2.size();i++) cout << ret2[i] << " " ; cout << endl ;
   while(1) ;
  }
 }
}

int main()
{
// srand(time(NULL));
// generate() ; return 0 ;
// test() ; return 0 ;
 int runs ;
 scanf("%d",&runs) ;
 while(runs--)
 {
  memset(text,0,sizeof text) ;
  memset(pat,0,sizeof pat) ;
  scanf("%s%s",text,pat) ;
  szt = strlen(text) ;
  szp = strlen(pat) ;
  vector<int> ret1 = solve1() ;
  for(int i = 0;i < ret1.size();i++)
  {
   if(i > 0) printf(" ") ;
   printf("%d",ret1[i]) ;
  }
  printf("\n") ;
 }
 return 0 ;
}









In   Java  :








import java.util.*;


public class Solution {
private static final Scanner sc = new Scanner(System.in);

static RollingHash text, target;
private static boolean ok(final int s, final int l, 
final int r) {
if(r - l < 2) {
return true;
}

final int m = (l + r) / 2;
final long l1 = text.Hash(s + l, s + m);
final long l2 = target.Hash(l, m);
final long r1 = text.Hash(s + m, s + r);
final long r2 = target.Hash(m, r);

if(l1 == l2) {
return r1 == r2 ? true : ok(s, m, r);
} else if(r1 == r2) {
return ok(s, l, m);
}

return false;
}

public static void main(String[] args) {
int T = sc.nextInt();
while(T-- != 0) {
final String s = sc.next();
final String t = sc.next();
text = new RollingHash(s);
target = new RollingHash(t);
boolean first = true;
for(int i = 0; i + t.length() <= s.length(); i++) {
if(ok(i, 0, t.length())) {
System.out.print((first ? "" : " ") + i);
first = false;
}
}
System.out.println();
}
}


static class RollingHash {
private static final long RADIX = 1000000009L;
private int n;
long[] table, pow;

public RollingHash(String str) {
n = str.length();

table = new long[n + 1];
pow = new long[n + 1];

pow[0] = 1;
table[0] = 0;
for (int i = 0; i < n; i++) {
table[i + 1] = table[i] * RADIX + str.charAt(i % n);
pow[i + 1] = pow[i] * RADIX;
}
}

public long Hash(int i, int j) {
assert(i <= j);
return table[j] - table[i] * pow[j - i];
}
}


}










In   C :








#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <time.h>

#define kMaxSize 100001
#define kMaxMismatch 1

typedef long long int lli;

int findDna8b(char* p, char* v, int vc);

int main()
{
    // Allocate memory for strings.
    char* p = (char*)malloc(kMaxSize * sizeof(char));
    char* v = (char*)malloc(kMaxSize * sizeof(char));
    
    // Test cases.
    int tc;
    scanf("%d", &tc);
    while (0 < tc--)
    {
        // Load strings.
        scanf("%s %s", p, v);
        int pc = (int)strlen(p);
        int vc = (int)strlen(v);
        
        // Look for v in p. Print starting index of each match.
        int c = (pc-vc);
        int matched = 0;
        for (int i = 0; i <= c; i++){
            if (findDna8b(&p[i], v, vc) == 1){
                matched++;
                printf("%d ", i);
            }
        }
        
        // We have to indicate if no matches were found.
        if (matched <= 0)
            printf("No Match!\n");
        else
            printf("\n");
    }
    
    return 0;
}

int findDna8b(char* p, char* v, int vc)
{
    lli* p8 = (lli*)p;
    lli* v8 = (lli*)v;
    
    int c = vc/8;
    int mismatch = 0;
    int i;
    for (i = 0; i < c; i++){
        if (p8[i] != v8[i])
        {
            for (int j = i*8; j < (i*8)+8; j++){
                if (p[j] != v[j]){
                    mismatch++;
                    if (mismatch > kMaxMismatch) return -1;
                }
            }
        }
    }
    
    for (int j = i*8; j < vc; j++){
        if (p[j] != v[j]){
            mismatch++;
            if (mismatch > kMaxMismatch) return -1;
        }
    }
    
    return 1;
}








In   Python3  :






#!/bin/python3

import os
import sys
import math 
import re
from collections import defaultdict
#
# Complete the virusIndices function below.
#
def virusIndices(p, v):  
    
    def WM_approx_Ham1_search(pattern, text):
        m = len(pattern)
        S_table = defaultdict(int)
        for i, c in enumerate(pattern):
            S_table[c] |= 1 << i
        R0 = 0
        R1 = 0
        mask = 1 << (m - 1)
        for j, c in enumerate(text):
            S = S_table[c]
            shR0 = (R0 << 1) | 1
            R0 = shR0 & S
            R1 = ((R1 << 1) | 1) & S | shR0
            # if _DEBUG:
            #     print("j= %2d msk=%s S=%s R0=%s R1=%s" \
            #         % tuple([j] + map(bitstr, [mask, S, R0, R1])))
            if R0 & mask: # exact match
                yield j - m + 1
            elif R1 & mask: # match with one substitution
                yield j - m + 1
            
    matches = list(WM_approx_Ham1_search(v, p))
    # print(matches)
    
    if not len(matches):
        print('No Match!')
    else:
        print(' '.join(str(ind) for ind in matches))
        
if __name__ == '__main__':
    t = int(input())

    for t_itr in range(t):
        pv = input().split()

        p = pv[0]

        v = pv[1]

        virusIndices(p, v)
                        








View More Similar Problems

Fibonacci Numbers Tree

Shashank loves trees and math. He has a rooted tree, T , consisting of N nodes uniquely labeled with integers in the inclusive range [1 , N ]. The node labeled as 1 is the root node of tree , and each node in is associated with some positive integer value (all values are initially ). Let's define Fk as the Kth Fibonacci number. Shashank wants to perform 22 types of operations over his tree, T

View Solution →

Pair Sums

Given an array, we define its value to be the value obtained by following these instructions: Write down all pairs of numbers from this array. Compute the product of each pair. Find the sum of all the products. For example, for a given array, for a given array [7,2 ,-1 ,2 ] Note that ( 7 , 2 ) is listed twice, one for each occurrence of 2. Given an array of integers, find the largest v

View Solution →

Lazy White Falcon

White Falcon just solved the data structure problem below using heavy-light decomposition. Can you help her find a new solution that doesn't require implementing any fancy techniques? There are 2 types of query operations that can be performed on a tree: 1 u x: Assign x as the value of node u. 2 u v: Print the sum of the node values in the unique path from node u to node v. Given a tree wi

View Solution →

Ticket to Ride

Simon received the board game Ticket to Ride as a birthday present. After playing it with his friends, he decides to come up with a strategy for the game. There are n cities on the map and n - 1 road plans. Each road plan consists of the following: Two cities which can be directly connected by a road. The length of the proposed road. The entire road plan is designed in such a way that if o

View Solution →

Heavy Light White Falcon

Our lazy white falcon finally decided to learn heavy-light decomposition. Her teacher gave an assignment for her to practice this new technique. Please help her by solving this problem. You are given a tree with N nodes and each node's value is initially 0. The problem asks you to operate the following two types of queries: "1 u x" assign x to the value of the node . "2 u v" print the maxim

View Solution →

Number Game on a Tree

Andy and Lily love playing games with numbers and trees. Today they have a tree consisting of n nodes and n -1 edges. Each edge i has an integer weight, wi. Before the game starts, Andy chooses an unordered pair of distinct nodes, ( u , v ), and uses all the edge weights present on the unique path from node u to node v to construct a list of numbers. For example, in the diagram below, Andy

View Solution →