Save Humanity
Problem Statement :
Oh!! Mankind is in trouble again. This time, it's a deadly disease spreading at a rate never seen before. The need of the hour is to set up efficient virus detectors. You are the lead at Central Hospital and you need to find a fast and reliable way to detect the footprints of the virus DNA in that of the patient. The DNA of the patient as well as of the virus consists of lowercase letters. Since the collected data is raw, there may be some errors. You will need to find all substrings in the patient DNA that either exactly match the virus DNA or have at most one mismatch, i.e., a difference in at most one location. For example, "aa" and "aa" are matching, "ab" and "aa" are matching, while "abb" and "bab" are not. Function Description Complete the virusIndices function in the editor below. It should print a list of space-separated integers that represent the starting indices of matching substrings in increasing order, or No match!. virusIndices has the following parameter(s): p: a string that represents patient DNA v: a string that represents virus DNA Input Format The first line contains an integer , the number of test cases. . Each of the next lines contains two space-separated strings (the patient DNA) and (the virus DNA). Constraints 1 <= t <= 10 1 <= | p |, | v | <= 10^5 All characters in p and v e ascii[ a - z ]. Output Format For each test case, output a single line containing a space-delimited list of starting indices (0-indexed) of p substrings of which are matching with v according to the condition mentioned above. The indices have to be in increasing order. If there is no matching substring, output No Match!.
Solution :
Solution in C :
In C++ :
#include<iostream>
#include<stdio.h>
#include<vector>
#include<string.h>
#include<stdlib.h>
using namespace std ;
#define MAXN 200002
char text[MAXN],pat[MAXN] ;
int szp,szt ;
int p1[2 * MAXN],p2[2 * MAXN] ;
char s[2 * MAXN] ;
vector<int> reta ;
vector<int> solve1()
{
memset(p1,0,sizeof p1) ;
memset(p2,0,sizeof p2) ;
memset(s,0,sizeof s) ;
int n = 0 ;
for(int i = 0;i < szp;i++) s[n++] = pat[i] ;
for(int i = 0;i < szt;i++) s[n++] = text[i] ;
p1[0] = n ;
int g = 0,f = 0 ;
for(int i = 1;i < n;i++)
{
if(i < g && p1[i - f] != g - i)
p1[i] = min(p1[i - f],g - i) ;
else
{
g = max(g,i) ;
f = i ;
while(g < n && s[g] == s[g - f]) g++ ;
p1[i] = g - f ;
}
}
n = 0 ;
for(int i = szp - 1;i >= 0;i--) s[n++] = pat[i] ;
for(int i = szt - 1;i >= 0;i--) s[n++] = text[i] ;
p2[0] = n ;
g = 0,f = 0 ;
for(int i = 1;i < n;i++)
{
if(i < g && p2[i - f] != g - i)
p2[i] = min(p2[i - f],g - i) ;
else
{
g = max(g,i) ;
f = i ;
while(g < n && s[g] == s[g - f]) g++ ;
p2[i] = g - f ;
}
}
reta.clear() ;
for(int i = 0;i + szp <= szt;i++)
{
int start = p1[szp + i] ;
int end = p2[szp + szt - 1 - (i + szp - 1)] ;
if(start + end + 1 >= szp) reta.push_back(i) ;
}
return reta ;
}
vector<int> solve2()
{
vector<int> ret ;
for(int i = 0;i + szp <= szt;i++)
{
int miss = 0 ;
for(int j = 0;j < szp;j++)
if(text[i + j] != pat[j])
miss++ ;
if(miss <= 1) ret.push_back(i) ;
}
return ret ;
}
void gen()
{
szt = rand() % 1000 + 1 ;
memset(text,0,sizeof text) ;
for(int i = 0;i < szt;i++) text[i] = rand() % 3 + 'a' ;
szp = rand() % szt + 1 ;
memset(pat,0,sizeof pat) ;
for(int i = 0;i < szp;i++) pat[i] = rand() % 3 + 'a' ;
}
char get1()
{
if(rand() % 50000 < 49998) return 'a' ;
else if(rand() % 100 < 80) return 'b' ;
return 'c' ;
}
char get2()
{
if(rand() % 50000 < 49999) return 'a' ;
return 'b' ;
}
void generate()
{
srand(time(NULL)) ;
char in[10] = "in .txt" ;
for(int test = 0;test < 10;test++)
{
in[2] = test + '0' ;
FILE * fout = fopen(in,"w") ;
int runs = 10 ;
fprintf(fout,"%d\n",runs) ;
for(int t = 0;t < runs;t++)
{
szt = 100000 - rand() % 1000 + 1 ;
if(test <= 2) szt = rand() % 30 + 1 ;
szp = rand() % szt + 1 ;
memset(text,0,sizeof text) ;
memset(pat,0,sizeof pat) ;
if(test <= 2)
{
for(int i = 0;i < szt;i++) text[i] = rand() % 2 + 'a' ;
for(int i = 0;i < szp;i++) pat[i] = rand() % 2 + 'a' ;
}
else if(test <= 5)
{
for(int i = 0;i < szt;i++) text[i] = get1() ;
for(int i = 0;i < szp;i++) pat[i] = get1() ;
}
else if(test <= 7)
{
for(int i = 0;i < szt;i++) text[i] = get2() ;
for(int i = 0;i < szp;i++) pat[i] = get2() ;
}
else
{
for(int i = 0;i < szt;i++) text[i] = i % 26 + 'a' ;
for(int i = 0;i < szp;i++) pat[i] = i % 26 + 'a' ;
for(int i = 0;i < 10;i++) text[rand() % szt] = 'a' + rand() % 26 ;
}
fprintf(fout,"%s\n%s\n\n",text,pat) ;
}
}
}
void test()
{
for(int t = 0;t < 1000;t++)
{
gen() ;
vector<int> ret1 = solve1() ;
vector<int> ret2 = solve2() ;
for(int i = 0;i < ret1.size();i++) cout << ret1[i] << " " ; cout << endl ;
for(int i = 0;i < ret2.size();i++) cout << ret2[i] << " " ; cout << endl ;
cout << endl ;
if(ret1 != ret2)
{
cout << "Failed on: " << t << endl ;
cout << text << endl << pat << endl ;
for(int i = 0;i < ret1.size();i++) cout << ret1[i] << " " ; cout << endl ;
for(int i = 0;i < ret2.size();i++) cout << ret2[i] << " " ; cout << endl ;
while(1) ;
}
}
}
int main()
{
// srand(time(NULL));
// generate() ; return 0 ;
// test() ; return 0 ;
int runs ;
scanf("%d",&runs) ;
while(runs--)
{
memset(text,0,sizeof text) ;
memset(pat,0,sizeof pat) ;
scanf("%s%s",text,pat) ;
szt = strlen(text) ;
szp = strlen(pat) ;
vector<int> ret1 = solve1() ;
for(int i = 0;i < ret1.size();i++)
{
if(i > 0) printf(" ") ;
printf("%d",ret1[i]) ;
}
printf("\n") ;
}
return 0 ;
}
In Java :
import java.util.*;
public class Solution {
private static final Scanner sc = new Scanner(System.in);
static RollingHash text, target;
private static boolean ok(final int s, final int l,
final int r) {
if(r - l < 2) {
return true;
}
final int m = (l + r) / 2;
final long l1 = text.Hash(s + l, s + m);
final long l2 = target.Hash(l, m);
final long r1 = text.Hash(s + m, s + r);
final long r2 = target.Hash(m, r);
if(l1 == l2) {
return r1 == r2 ? true : ok(s, m, r);
} else if(r1 == r2) {
return ok(s, l, m);
}
return false;
}
public static void main(String[] args) {
int T = sc.nextInt();
while(T-- != 0) {
final String s = sc.next();
final String t = sc.next();
text = new RollingHash(s);
target = new RollingHash(t);
boolean first = true;
for(int i = 0; i + t.length() <= s.length(); i++) {
if(ok(i, 0, t.length())) {
System.out.print((first ? "" : " ") + i);
first = false;
}
}
System.out.println();
}
}
static class RollingHash {
private static final long RADIX = 1000000009L;
private int n;
long[] table, pow;
public RollingHash(String str) {
n = str.length();
table = new long[n + 1];
pow = new long[n + 1];
pow[0] = 1;
table[0] = 0;
for (int i = 0; i < n; i++) {
table[i + 1] = table[i] * RADIX + str.charAt(i % n);
pow[i + 1] = pow[i] * RADIX;
}
}
public long Hash(int i, int j) {
assert(i <= j);
return table[j] - table[i] * pow[j - i];
}
}
}
In C :
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <time.h>
#define kMaxSize 100001
#define kMaxMismatch 1
typedef long long int lli;
int findDna8b(char* p, char* v, int vc);
int main()
{
// Allocate memory for strings.
char* p = (char*)malloc(kMaxSize * sizeof(char));
char* v = (char*)malloc(kMaxSize * sizeof(char));
// Test cases.
int tc;
scanf("%d", &tc);
while (0 < tc--)
{
// Load strings.
scanf("%s %s", p, v);
int pc = (int)strlen(p);
int vc = (int)strlen(v);
// Look for v in p. Print starting index of each match.
int c = (pc-vc);
int matched = 0;
for (int i = 0; i <= c; i++){
if (findDna8b(&p[i], v, vc) == 1){
matched++;
printf("%d ", i);
}
}
// We have to indicate if no matches were found.
if (matched <= 0)
printf("No Match!\n");
else
printf("\n");
}
return 0;
}
int findDna8b(char* p, char* v, int vc)
{
lli* p8 = (lli*)p;
lli* v8 = (lli*)v;
int c = vc/8;
int mismatch = 0;
int i;
for (i = 0; i < c; i++){
if (p8[i] != v8[i])
{
for (int j = i*8; j < (i*8)+8; j++){
if (p[j] != v[j]){
mismatch++;
if (mismatch > kMaxMismatch) return -1;
}
}
}
}
for (int j = i*8; j < vc; j++){
if (p[j] != v[j]){
mismatch++;
if (mismatch > kMaxMismatch) return -1;
}
}
return 1;
}
In Python3 :
#!/bin/python3
import os
import sys
import math
import re
from collections import defaultdict
#
# Complete the virusIndices function below.
#
def virusIndices(p, v):
def WM_approx_Ham1_search(pattern, text):
m = len(pattern)
S_table = defaultdict(int)
for i, c in enumerate(pattern):
S_table[c] |= 1 << i
R0 = 0
R1 = 0
mask = 1 << (m - 1)
for j, c in enumerate(text):
S = S_table[c]
shR0 = (R0 << 1) | 1
R0 = shR0 & S
R1 = ((R1 << 1) | 1) & S | shR0
# if _DEBUG:
# print("j= %2d msk=%s S=%s R0=%s R1=%s" \
# % tuple([j] + map(bitstr, [mask, S, R0, R1])))
if R0 & mask: # exact match
yield j - m + 1
elif R1 & mask: # match with one substitution
yield j - m + 1
matches = list(WM_approx_Ham1_search(v, p))
# print(matches)
if not len(matches):
print('No Match!')
else:
print(' '.join(str(ind) for ind in matches))
if __name__ == '__main__':
t = int(input())
for t_itr in range(t):
pv = input().split()
p = pv[0]
v = pv[1]
virusIndices(p, v)
View More Similar Problems
Minimum Average Waiting Time
Tieu owns a pizza restaurant and he manages it in his own way. While in a normal restaurant, a customer is served by following the first-come, first-served rule, Tieu simply minimizes the average waiting time of his customers. So he gets to decide who is served first, regardless of how sooner or later a person comes. Different kinds of pizzas take different amounts of time to cook. Also, once h
View Solution →Merging Communities
People connect with each other in a social network. A connection between Person I and Person J is represented as . When two persons belonging to different communities connect, the net effect is the merger of both communities which I and J belongs to. At the beginning, there are N people representing N communities. Suppose person 1 and 2 connected and later 2 and 3 connected, then ,1 , 2 and 3 w
View Solution →Components in a graph
There are 2 * N nodes in an undirected graph, and a number of edges connecting some nodes. In each edge, the first value will be between 1 and N, inclusive. The second node will be between N + 1 and , 2 * N inclusive. Given a list of edges, determine the size of the smallest and largest connected components that have or more nodes. A node can have any number of connections. The highest node valu
View Solution →Kundu and Tree
Kundu is true tree lover. Tree is a connected graph having N vertices and N-1 edges. Today when he got a tree, he colored each edge with one of either red(r) or black(b) color. He is interested in knowing how many triplets(a,b,c) of vertices are there , such that, there is atleast one edge having red color on all the three paths i.e. from vertex a to b, vertex b to c and vertex c to a . Note that
View Solution →Super Maximum Cost Queries
Victoria has a tree, T , consisting of N nodes numbered from 1 to N. Each edge from node Ui to Vi in tree T has an integer weight, Wi. Let's define the cost, C, of a path from some node X to some other node Y as the maximum weight ( W ) for any edge in the unique path from node X to Y node . Victoria wants your help processing Q queries on tree T, where each query contains 2 integers, L and
View Solution →Contacts
We're going to make our own Contacts application! The application must perform two types of operations: 1 . add name, where name is a string denoting a contact name. This must store name as a new contact in the application. find partial, where partial is a string denoting a partial name to search the application for. It must count the number of contacts starting partial with and print the co
View Solution →