Save Humanity


Problem Statement :


Oh!! Mankind is in trouble again. This time, it's a deadly disease spreading at a rate never seen before. The need of the hour is to set up efficient virus detectors. You are the lead at Central Hospital and you need to find a fast and reliable way to detect the footprints of the virus DNA in that of the patient.

The DNA of the patient as well as of the virus consists of lowercase letters. Since the collected data is raw, there may be some errors. You will need to find all substrings in the patient DNA that either exactly match the virus DNA or have at most one mismatch, i.e., a difference in at most one location.

For example, "aa" and "aa" are matching, "ab" and "aa" are matching, while "abb" and "bab" are not.

Function Description

Complete the virusIndices function in the editor below. It should print a list of space-separated integers that represent the starting indices of matching substrings in increasing order, or No match!.

virusIndices has the following parameter(s):

p: a string that represents patient DNA
v: a string that represents virus DNA

Input Format

The first line contains an integer , the number of test cases.

. Each of the next  lines contains two space-separated strings  (the patient DNA) and  (the virus DNA).

Constraints

1  <=  t  <= 10
1  <=  | p |, | v |  <=  10^5

All characters in p  and v e ascii[ a - z ].


Output Format

For each test case, output a single line containing a space-delimited list of starting indices (0-indexed) of p substrings of  which are matching with v according to the condition mentioned above. The indices have to be in increasing order. If there is no matching substring, output No Match!.



Solution :



title-img


                            Solution in C :

In  C++  :








#include<iostream>
#include<stdio.h>
#include<vector>
#include<string.h>
#include<stdlib.h>
using namespace std ;
#define MAXN 200002
char text[MAXN],pat[MAXN] ;
int szp,szt ;

int p1[2 * MAXN],p2[2 * MAXN] ;
char s[2 * MAXN] ;
vector<int> reta ;
vector<int> solve1()
{
 memset(p1,0,sizeof p1) ;
 memset(p2,0,sizeof p2) ;
 memset(s,0,sizeof s) ;
 int n = 0 ;
 for(int i = 0;i < szp;i++) s[n++] = pat[i] ;
 for(int i = 0;i < szt;i++) s[n++] = text[i] ;
 p1[0] = n ;
 int g = 0,f = 0 ;
 for(int i = 1;i < n;i++)
 {
  if(i < g && p1[i - f] != g - i)
   p1[i] = min(p1[i - f],g - i) ;
  else
  {
   g = max(g,i) ;
   f = i ;
   while(g < n && s[g] == s[g - f]) g++ ;
   p1[i] = g - f ;
  }
 }
 
 n = 0 ;
 for(int i = szp - 1;i >= 0;i--) s[n++] = pat[i] ;
 for(int i = szt - 1;i >= 0;i--) s[n++] = text[i] ;
 p2[0] = n ;
 g = 0,f = 0 ;
 for(int i = 1;i < n;i++)
 {
  if(i < g && p2[i - f] != g - i)
   p2[i] = min(p2[i - f],g - i) ;
  else
  {
   g = max(g,i) ;
   f = i ;
   while(g < n && s[g] == s[g - f]) g++ ;
   p2[i] = g - f ;
  }
 }
 
 reta.clear() ;
 for(int i = 0;i + szp <= szt;i++)
 {
  int start = p1[szp + i] ;
  int end = p2[szp + szt - 1 - (i + szp - 1)] ;
  if(start + end + 1 >= szp) reta.push_back(i) ;
 }
 return reta ;
}

vector<int> solve2()
{
 vector<int> ret ;
 for(int i = 0;i + szp <= szt;i++)
 {
  int miss = 0 ;
  for(int j = 0;j < szp;j++)
   if(text[i + j] != pat[j])
    miss++ ;
  if(miss <= 1) ret.push_back(i) ;
 }
 return ret ;
}

void gen()
{
 szt = rand() % 1000 + 1 ;
 memset(text,0,sizeof text) ;
 for(int i = 0;i < szt;i++) text[i] = rand() % 3 + 'a' ;

 szp = rand() % szt + 1 ;
 memset(pat,0,sizeof pat) ;
 for(int i = 0;i < szp;i++) pat[i] = rand() % 3 + 'a' ;
}


char get1()
{
 if(rand() % 50000 < 49998) return 'a' ;
 else if(rand() % 100 < 80) return 'b' ;
 return 'c' ;
}

char get2()
{
 if(rand() % 50000 < 49999) return 'a' ;
 return 'b' ;
}

void generate()
{
 srand(time(NULL)) ;
 char in[10] = "in .txt" ;
 for(int test = 0;test < 10;test++)
 {
  in[2] = test + '0' ;
  FILE * fout = fopen(in,"w") ;
  
  int runs = 10 ;
  fprintf(fout,"%d\n",runs) ;
  for(int t = 0;t < runs;t++)
  {
   szt = 100000 - rand() % 1000 + 1 ;
   if(test <= 2) szt = rand() % 30 + 1 ;
   szp = rand() % szt + 1 ;
   memset(text,0,sizeof text) ;
   memset(pat,0,sizeof pat) ;

   if(test <= 2)
   {
    for(int i = 0;i < szt;i++) text[i] = rand() % 2 + 'a' ;
    for(int i = 0;i < szp;i++) pat[i] = rand() % 2 + 'a' ;
   }
   else if(test <= 5)
   {
    for(int i = 0;i < szt;i++) text[i] = get1() ;
    for(int i = 0;i < szp;i++) pat[i] = get1() ;
   }
   else if(test <= 7)
   {
    for(int i = 0;i < szt;i++) text[i] = get2() ;
    for(int i = 0;i < szp;i++) pat[i] = get2() ;
   }
   else
   {
    for(int i = 0;i < szt;i++) text[i] = i % 26 + 'a' ;
    for(int i = 0;i < szp;i++) pat[i] = i % 26 + 'a' ;
    for(int i = 0;i < 10;i++) text[rand() % szt] = 'a' + rand() % 26 ;
   }
   fprintf(fout,"%s\n%s\n\n",text,pat) ;
  }
 }
}



void test()
{
 for(int t = 0;t < 1000;t++)
 {
  gen() ;
  vector<int> ret1 = solve1() ;
  vector<int> ret2 = solve2() ;
  for(int i = 0;i < ret1.size();i++) cout << ret1[i] << " " ; cout << endl ;
  for(int i = 0;i < ret2.size();i++) cout << ret2[i] << " " ; cout << endl ;
  cout << endl ;
  if(ret1 != ret2)
  {
   cout << "Failed on: " << t << endl ;
   cout << text << endl << pat << endl ;
   for(int i = 0;i < ret1.size();i++) cout << ret1[i] << " " ; cout << endl ;
   for(int i = 0;i < ret2.size();i++) cout << ret2[i] << " " ; cout << endl ;
   while(1) ;
  }
 }
}

int main()
{
// srand(time(NULL));
// generate() ; return 0 ;
// test() ; return 0 ;
 int runs ;
 scanf("%d",&runs) ;
 while(runs--)
 {
  memset(text,0,sizeof text) ;
  memset(pat,0,sizeof pat) ;
  scanf("%s%s",text,pat) ;
  szt = strlen(text) ;
  szp = strlen(pat) ;
  vector<int> ret1 = solve1() ;
  for(int i = 0;i < ret1.size();i++)
  {
   if(i > 0) printf(" ") ;
   printf("%d",ret1[i]) ;
  }
  printf("\n") ;
 }
 return 0 ;
}









In   Java  :








import java.util.*;


public class Solution {
private static final Scanner sc = new Scanner(System.in);

static RollingHash text, target;
private static boolean ok(final int s, final int l, 
final int r) {
if(r - l < 2) {
return true;
}

final int m = (l + r) / 2;
final long l1 = text.Hash(s + l, s + m);
final long l2 = target.Hash(l, m);
final long r1 = text.Hash(s + m, s + r);
final long r2 = target.Hash(m, r);

if(l1 == l2) {
return r1 == r2 ? true : ok(s, m, r);
} else if(r1 == r2) {
return ok(s, l, m);
}

return false;
}

public static void main(String[] args) {
int T = sc.nextInt();
while(T-- != 0) {
final String s = sc.next();
final String t = sc.next();
text = new RollingHash(s);
target = new RollingHash(t);
boolean first = true;
for(int i = 0; i + t.length() <= s.length(); i++) {
if(ok(i, 0, t.length())) {
System.out.print((first ? "" : " ") + i);
first = false;
}
}
System.out.println();
}
}


static class RollingHash {
private static final long RADIX = 1000000009L;
private int n;
long[] table, pow;

public RollingHash(String str) {
n = str.length();

table = new long[n + 1];
pow = new long[n + 1];

pow[0] = 1;
table[0] = 0;
for (int i = 0; i < n; i++) {
table[i + 1] = table[i] * RADIX + str.charAt(i % n);
pow[i + 1] = pow[i] * RADIX;
}
}

public long Hash(int i, int j) {
assert(i <= j);
return table[j] - table[i] * pow[j - i];
}
}


}










In   C :








#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <time.h>

#define kMaxSize 100001
#define kMaxMismatch 1

typedef long long int lli;

int findDna8b(char* p, char* v, int vc);

int main()
{
    // Allocate memory for strings.
    char* p = (char*)malloc(kMaxSize * sizeof(char));
    char* v = (char*)malloc(kMaxSize * sizeof(char));
    
    // Test cases.
    int tc;
    scanf("%d", &tc);
    while (0 < tc--)
    {
        // Load strings.
        scanf("%s %s", p, v);
        int pc = (int)strlen(p);
        int vc = (int)strlen(v);
        
        // Look for v in p. Print starting index of each match.
        int c = (pc-vc);
        int matched = 0;
        for (int i = 0; i <= c; i++){
            if (findDna8b(&p[i], v, vc) == 1){
                matched++;
                printf("%d ", i);
            }
        }
        
        // We have to indicate if no matches were found.
        if (matched <= 0)
            printf("No Match!\n");
        else
            printf("\n");
    }
    
    return 0;
}

int findDna8b(char* p, char* v, int vc)
{
    lli* p8 = (lli*)p;
    lli* v8 = (lli*)v;
    
    int c = vc/8;
    int mismatch = 0;
    int i;
    for (i = 0; i < c; i++){
        if (p8[i] != v8[i])
        {
            for (int j = i*8; j < (i*8)+8; j++){
                if (p[j] != v[j]){
                    mismatch++;
                    if (mismatch > kMaxMismatch) return -1;
                }
            }
        }
    }
    
    for (int j = i*8; j < vc; j++){
        if (p[j] != v[j]){
            mismatch++;
            if (mismatch > kMaxMismatch) return -1;
        }
    }
    
    return 1;
}








In   Python3  :






#!/bin/python3

import os
import sys
import math 
import re
from collections import defaultdict
#
# Complete the virusIndices function below.
#
def virusIndices(p, v):  
    
    def WM_approx_Ham1_search(pattern, text):
        m = len(pattern)
        S_table = defaultdict(int)
        for i, c in enumerate(pattern):
            S_table[c] |= 1 << i
        R0 = 0
        R1 = 0
        mask = 1 << (m - 1)
        for j, c in enumerate(text):
            S = S_table[c]
            shR0 = (R0 << 1) | 1
            R0 = shR0 & S
            R1 = ((R1 << 1) | 1) & S | shR0
            # if _DEBUG:
            #     print("j= %2d msk=%s S=%s R0=%s R1=%s" \
            #         % tuple([j] + map(bitstr, [mask, S, R0, R1])))
            if R0 & mask: # exact match
                yield j - m + 1
            elif R1 & mask: # match with one substitution
                yield j - m + 1
            
    matches = list(WM_approx_Ham1_search(v, p))
    # print(matches)
    
    if not len(matches):
        print('No Match!')
    else:
        print(' '.join(str(ind) for ind in matches))
        
if __name__ == '__main__':
    t = int(input())

    for t_itr in range(t):
        pv = input().split()

        p = pv[0]

        v = pv[1]

        virusIndices(p, v)
                        








View More Similar Problems

Print in Reverse

Given a pointer to the head of a singly-linked list, print each data value from the reversed list. If the given list is empty, do not print anything. Example head* refers to the linked list with data values 1->2->3->Null Print the following: 3 2 1 Function Description: Complete the reversePrint function in the editor below. reversePrint has the following parameters: Sing

View Solution →

Reverse a linked list

Given the pointer to the head node of a linked list, change the next pointers of the nodes so that their order is reversed. The head pointer given may be null meaning that the initial list is empty. Example: head references the list 1->2->3->Null. Manipulate the next pointers of each node in place and return head, now referencing the head of the list 3->2->1->Null. Function Descriptio

View Solution →

Compare two linked lists

You’re given the pointer to the head nodes of two linked lists. Compare the data in the nodes of the linked lists to check if they are equal. If all data attributes are equal and the lists are the same length, return 1. Otherwise, return 0. Example: list1=1->2->3->Null list2=1->2->3->4->Null The two lists have equal data attributes for the first 3 nodes. list2 is longer, though, so the lis

View Solution →

Merge two sorted linked lists

This challenge is part of a tutorial track by MyCodeSchool Given pointers to the heads of two sorted linked lists, merge them into a single, sorted linked list. Either head pointer may be null meaning that the corresponding list is empty. Example headA refers to 1 -> 3 -> 7 -> NULL headB refers to 1 -> 2 -> NULL The new list is 1 -> 1 -> 2 -> 3 -> 7 -> NULL. Function Description C

View Solution →

Get Node Value

This challenge is part of a tutorial track by MyCodeSchool Given a pointer to the head of a linked list and a specific position, determine the data value at that position. Count backwards from the tail node. The tail is at postion 0, its parent is at 1 and so on. Example head refers to 3 -> 2 -> 1 -> 0 -> NULL positionFromTail = 2 Each of the data values matches its distance from the t

View Solution →

Delete duplicate-value nodes from a sorted linked list

This challenge is part of a tutorial track by MyCodeSchool You are given the pointer to the head node of a sorted linked list, where the data in the nodes is in ascending order. Delete nodes and return a sorted list with each distinct value in the original list. The given head pointer may be null indicating that the list is empty. Example head refers to the first node in the list 1 -> 2 -

View Solution →