Determining DNA Health
Problem Statement :
DNA is a nucleic acid present in the bodies of living things. Each piece of DNA contains a number of genes, some of which are beneficial and increase the DNA's total health. Each gene has a health value, and the total health of a DNA is the sum of the health values of all the beneficial genes that occur as a substring in the DNA. We represent genes and DNA as non-empty strings of lowercase English alphabetic letters, and the same gene may appear multiple times as a susbtring of a DNA. Given the following: An array of beneficial gene strings, . Note that these gene sequences are not guaranteed to be distinct. An array of gene health values, , where each is the health value for gene . A set of DNA strands where the definition of each strand has three components, , , and , where string is a DNA for which genes are healthy. Find and print the respective total healths of the unhealthiest (minimum total health) and healthiest (maximum total health) strands of DNA as two space-separated values on a single line. Input Format The first line contains an integer, , denoting the total number of genes. The second line contains space-separated strings describing the respective values of (i.e., the elements of ). The third line contains space-separated integers describing the respective values of (i.e., the elements of ). The fourth line contains an integer, , denoting the number of strands of DNA to process. Each of the subsequent lines describes a DNA strand in the form start end d, denoting that the healthy genes for DNA strand are and their respective correlated health values are . Output Format Print two space-separated integers describing the respective total health of the unhealthiest and the healthiest strands of DNA.
Solution :
Solution in C :
In C++ :
/*AMETHYSTS*/
#pragma comment(linker, "/STACK:1000000000")
#include <cstdio>
#include <iostream>
#include <ctime>
#include <string>
#include <vector>
#include <cmath>
#include <algorithm>
#include <cstring>
#include <set>
#include <cstdlib>
#include <ctime>
#include <cassert>
#include <bitset>
#include <fstream>
#include <deque>
#include <stack>
#include <climits>
#include <string>
#include <queue>
#include <memory.h>
#include <map>
#include <unordered_map>
#define ll long long
#define ld double
#define pii pair <ll, ll>
#define mp make_pair
using namespace std;
const int maxn = (int)1e5 + 10, maxm = (int)2e6 + 10;
char s[maxm];
int len;
struct point {
int ed[26];
int lnk;
int par;
int c;
vector <int> pos;
point() {
memset(ed, -1, sizeof ed);
lnk = -1;
par = -1;
c = -1;
}
};
point mas[maxm];
int posit = 0;
int my_new() {
return posit++;
}
void add(int it, int pos, int num) {
if (pos == len) {
mas[it].pos.push_back(num);
return;
}
int c = s[pos] - 'a';
if (mas[it].ed[c] == -1) {
mas[it].ed[c] = my_new();
int nit = mas[it].ed[c];
mas[nit].par = it;
mas[nit].c = c;
}
add(mas[it].ed[c], pos + 1, num);
}
int h[maxn];
int l[maxn], r[maxn];
vector <int> z[maxm];
int getlnk(int it);
int go(int it, int c) {
if (mas[it].ed[c] != -1) {
return mas[it].ed[c];
}
if (it == 0) {
mas[it].ed[c] = 0;
} else {
mas[it].ed[c] = go(getlnk(it), c);
}
return mas[it].ed[c];
}
int getlnk(int it) {
if (mas[it].lnk != -1) {
return mas[it].lnk;
}
if (it == 0 || mas[it].par == 0) {
mas[it].lnk = 0;
} else {
mas[it].lnk = go(getlnk(mas[it].par), mas[it].c);
}
return mas[it].lnk;
}
vector <int> ed[maxm];
ll ans[maxn];
ll tr[maxn];
void change(int pos, int x) {
for ( ; pos < maxn; pos |= pos + 1) {
tr[pos] += x;
}
}
ll getsum(int pos) {
ll ans = 0;
for ( ; pos >= 0; pos = (pos & (pos + 1)) - 1) {
ans += tr[pos];
}
return ans;
}
ll gores(int l, int r) {
return getsum(r) - getsum(l - 1);
}
void dfs(int v) {
for (int i = 0; i < (int)mas[v].pos.size(); i++) {
int g = mas[v].pos[i];
change(g, h[g]);
}
for (int i = 0; i < (int)z[v].size(); i++) {
int num = z[v][i];
int lm = l[num];
int rm = r[num];
ans[num] += gores(lm, rm);
}
for (int i = 0; i < (int)ed[v].size(); i++) {
int u = ed[v][i];
dfs(u);
}
for (int i = 0; i < (int)mas[v].pos.size(); i++) {
int g = mas[v].pos[i];
change(g, -h[g]);
}
}
int main() {
int n;
cin >> n;
int tr = my_new();
for (int i = 0; i < n; i++) {
scanf("%s", s);
len = strlen(s);
add(tr, 0, i);
}
for (int i = 0; i < n; i++) {
scanf("%d", &h[i]);
}
int q;
scanf("%d", &q);
for (int i = 0; i < q; i++) {
scanf("%d %d %s", &l[i], &r[i], s);
len = strlen(s);
int it = tr;
for (int j = 0; j < len; j++) {
it = go(it, s[j] - 'a');
z[it].push_back(i);
}
}
ll mins = (ll)1e18;
ll maxs = 0;
for (int i = 1; i < posit; i++) {
ed[getlnk(i)].push_back(i);
}
dfs(0);
for (int i = 0; i < q; i++) {
mins = min(mins, ans[i]);
maxs = max(maxs, ans[i]);
}
cout << mins << ' ' << maxs << endl;
return 0;
}
In Java :
#include <cstdio>
#include <cstring>
#include <algorithm>
#include <vector>
#include <iostream>
#include <cassert>
#include <cmath>
#include <string>
#include <queue>
#include <set>
#include <map>
#include <cstdlib>
using namespace std;
#define INF 1e+9
#define mp make_pair
#define pb push_back
#define fi first
#define fs first
#define se second
#define i64 long long
#define li long long
#define lint long long
#define pii pair<int, int>
#define vi vector<int>
#define forn(i, n) for (int i = 0; i < (int)n; i++)
#define fore(i, b, e) for (int i = (int)b; i <= (int)e; i++)
struct vertex {
int next[26];
vi numbers;
int p;
char pch;
int link;
int go[26];
};
const int maxn = 2e6+5;
vertex t[maxn];
int sz;
int cost[maxn];
void init() {
t[0].p = t[0].link = -1;
memset (t[0].next, 255, sizeof t[0].next);
memset (t[0].go, 255, sizeof t[0].go);
sz = 1;
}
void add_string (const string & s, int num) {
int v = 0;
for (size_t i=0; i<s.length(); ++i) {
int c = s[i]-'a';
if (t[v].next[c] == -1) {
memset (t[sz].next, 255, sizeof t[sz].next);
memset (t[sz].go, 255, sizeof t[sz].go);
t[sz].link = -1;
t[sz].p = v;
t[sz].pch = c;
t[v].next[c] = sz++;
}
v = t[v].next[c];
}
t[v].numbers.pb(num);
}
int go (int v, int c);
int get_link (int v) {
if (t[v].link == -1) {
if (v == 0 || t[v].p == 0)
t[v].link = 0;
else
t[v].link = go (get_link (t[v].p), t[v].pch);
}
return t[v].link;
}
int go (int v, int c) {
if (t[v].go[c] == -1) {
if (t[v].next[c] != -1)
t[v].go[c] = t[v].next[c];
else
t[v].go[c] = v==0 ? 0 : go (get_link (v), c);
}
return t[v].go[c];
}
int main() {
#ifdef LOCAL
freopen("inp", "r", stdin);
//freopen("outp", "w", stdout);
#else
// freopen(TASKNAME ".in", "r", stdin);
// freopen(TASKNAME ".out", "w", stdout);
#endif
init();
int n;
scanf("%d", &n);
forn(j, n) {
string s;
cin >> s;
// cout << "s = " << s << endl;
add_string(s, j);
}
forn(j, n)
scanf("%d", &cost[j]);
int q;
scanf("%d", &q);
i64 minn = 0;
i64 maxx = 0;
forn(query, q) {
int L, R;
scanf("%d%d", &L, &R);
string s;
cin >> s;
int cur = 0;
i64 sum = 0;
for (char c : s) {
cur = go(cur, (int)(c - 'a'));
//printf("cur = %d\n", cur);
int tmp = cur;
while(tmp != 0) {
for (int x : t[tmp].numbers) {
if (x >= L && x <= R)
sum += cost[x];
//printf("%c x = %d sum = %lld\n", c, x, sum);
}
tmp = get_link(tmp);
}
}
if (query == 0 || sum > maxx)
maxx = sum;
if (query == 0 || sum < minn)
minn = sum;
}
cout << minn << " " << maxx;
}
In C :
#define _XOPEN_SOURCE 500
#include <math.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <assert.h>
#include <limits.h>
#include <stdbool.h>
struct node {
unsigned int count;
unsigned int next;
unsigned long *value;
unsigned int *index;
struct node *child['z' - 'a' + 1];
};
void init_node(struct node *n)
{
n->value = 0;
memset(n->child, 0, sizeof(n->child));
return;
}
struct node *insert_node(struct node *r, char *s)
{
int i;
int j;
struct node *n;
n = r;
for (i = 0; s[i] != 0; i++) {
j = s[i] - 'a';
if (n->child[j] == NULL) {
n->child[j] = malloc(sizeof(struct node));
init_node(n->child[j]);
}
n = n->child[j];
}
return n;
}
int main()
{
int dc;
int f;
char **g;
int gc;
int *h;
int i;
int j;
int k;
int l;
unsigned long max;
unsigned long min;
struct node **n;
struct node *p;
char *s;
struct node *t;
unsigned long x;
s = malloc(sizeof(char) * (4 * 1024 * 1024));
scanf("%d", &gc);
g = malloc(sizeof(char *) * gc);
for (i = 0; i < gc; i++) {
scanf("%s", s);
g[i] = strdup(s);
}
h = malloc(sizeof(int) * gc);
for (i = 0; i < gc; i++) {
scanf("%d", &h[i]);
}
t = malloc(sizeof(struct node));
init_node(t);
n = malloc(sizeof(struct node *) * gc);
for (i = 0; i < gc; i++) {
n[i] = insert_node(t, g[i]);
}
for (i = 0; i < gc; i++) {
n[i]->count++;
}
for (i = 0; i < gc; i++) {
if (n[i]->next == 0) {
n[i]->value = malloc(sizeof(unsigned long) * n[i]->count);
n[i]->index = malloc(sizeof(unsigned int) * n[i]->count);
}
n[i]->value[n[i]->next] = h[i];
n[i]->index[n[i]->next] = i;
n[i]->next++;
}
scanf("%d", &dc);
min = -1;
max = 0;
while (dc > 0) {
scanf("%d %d %s", &f, &l, s);
x = 0;
for (i = 0; s[i] != 0; i++) {
p = t;
for (j = i; s[j] != 0; j++) {
p = p->child[s[j] - 'a'];
if (p == NULL) {
break;
}
for (k = 0; k < p->count; k++) {
if (p->index[k] < f) {
continue;
}
if (p->index[k] > l) {
break;
}
x += p->value[k];
}
}
}
if ((min == -1) || (x < min)) {
min = x;
}
if (x > max) {
max = x;
}
dc--;
}
printf("%ld %ld\n", min, max);
return 0;
}
In Python3 :
#!/bin/python3
'''Compact prefix trie'''
import bisect
bisect_left = bisect.bisect_left
bisect_right = bisect.bisect_right
class ScoreTable:
def __init__(self):
self.scoreIndexes = []
self.scoreSums = []
def append(self, i, s):
self.scoreIndexes.append(i)
partial = self.scoreSums[-1] if len(self.scoreSums) > 0 else 0
self.scoreSums.append(s + partial)
def getScore(self, start, end):
lo = bisect_left(self.scoreIndexes, start)
hi = bisect_right(self.scoreIndexes, end)
if hi == lo:
return 0
elif lo == 0:
return self.scoreSums[hi-1]
else:
return self.scoreSums[hi-1] - self.scoreSums[lo-1]
def longestCommonPrefix(s, t):
stop = min(len(s), len(t))
i = 0
while i < stop and s[i] == t[i]:
i += 1
return s[:i]
class TrieNode:
def __init__(self, label):
self.label = label
self.child = {}
self.scores = None
def __str__(self):
return '{' + ','.join([c + ':' + str(n) for c,n in self.child.items()]) + '}'
def insert(self, k, ix, score):
'''Insert the key k into the trie rooted at this node.'''
i = 0
node = self
while i < len(k):
if k[i] in node.child:
child = node.child[k[i]]
lcp = longestCommonPrefix(k[i:], child.label)
if len(lcp) < len(child.label):
# Split child node at longest common prefix
suffix = child.label[len(lcp):]
child.label = suffix
prefNode = TrieNode(lcp)
prefNode.child[suffix[0]] = child
node.child[lcp[0]] = prefNode
child = prefNode
node = child
i += len(lcp)
else:
newNode = TrieNode(k[i:])
node.child[k[i]] = newNode
node = newNode
i = len(k)
if not node.scores:
node.scores = ScoreTable()
node.scores.append(ix, score)
def traverse(self):
yield self
for node in self.child.values():
yield from node.traverse()
def countNodes(t):
return sum(1 for _ in t.traverse())
def soloChildren(t):
return sum(1 for n in t.traverse() if len(n.child) == 1)
def buildGeneTrie(genes, healths):
# Build compact prefix trie.
trie = TrieNode('')
for i in range(len(genes)):
trie.insert(genes[i], i, healths[i])
return trie
def checkDNA(geneTrie, dna, start, end):
# print('scoring', dna, start, end)
score = 0
z = len(dna)
for i in range(z):
node = geneTrie
j = i
while j < z and dna[j] in node.child:
node = node.child[dna[j]]
x = len(node.label)
if node.label != dna[j:j+x]: break
if node.scores:
score += node.scores.getScore(start, end)
j += x
# print('final score:', score)
return score
def main():
n_ = int(input())
genes = input().split()
healths = list(map(int, input().split()))
# print('building trie...')
geneTrie = buildGeneTrie(genes, healths)
# print(countNodes(geneTrie), 'nodes')
# print(soloChildren(geneTrie), 'nodes with only one child')
# print(geneTrie)
best = 0
worst = float('inf')
s = int(input())
for _ in range(s):
first,last,d = input().split()
first,last = int(first), int(last)
score = checkDNA(geneTrie, d, first, last)
# if score < worst: print('***', d, first, last, score)
best = max(best, score)
worst = min(worst, score)
print(worst, best)
if __name__ == '__main__':
main()
# import cProfile
# cProfile.run('main()', sort='cumtime')
View More Similar Problems
Print in Reverse
Given a pointer to the head of a singly-linked list, print each data value from the reversed list. If the given list is empty, do not print anything. Example head* refers to the linked list with data values 1->2->3->Null Print the following: 3 2 1 Function Description: Complete the reversePrint function in the editor below. reversePrint has the following parameters: Sing
View Solution →Reverse a linked list
Given the pointer to the head node of a linked list, change the next pointers of the nodes so that their order is reversed. The head pointer given may be null meaning that the initial list is empty. Example: head references the list 1->2->3->Null. Manipulate the next pointers of each node in place and return head, now referencing the head of the list 3->2->1->Null. Function Descriptio
View Solution →Compare two linked lists
You’re given the pointer to the head nodes of two linked lists. Compare the data in the nodes of the linked lists to check if they are equal. If all data attributes are equal and the lists are the same length, return 1. Otherwise, return 0. Example: list1=1->2->3->Null list2=1->2->3->4->Null The two lists have equal data attributes for the first 3 nodes. list2 is longer, though, so the lis
View Solution →Merge two sorted linked lists
This challenge is part of a tutorial track by MyCodeSchool Given pointers to the heads of two sorted linked lists, merge them into a single, sorted linked list. Either head pointer may be null meaning that the corresponding list is empty. Example headA refers to 1 -> 3 -> 7 -> NULL headB refers to 1 -> 2 -> NULL The new list is 1 -> 1 -> 2 -> 3 -> 7 -> NULL. Function Description C
View Solution →Get Node Value
This challenge is part of a tutorial track by MyCodeSchool Given a pointer to the head of a linked list and a specific position, determine the data value at that position. Count backwards from the tail node. The tail is at postion 0, its parent is at 1 and so on. Example head refers to 3 -> 2 -> 1 -> 0 -> NULL positionFromTail = 2 Each of the data values matches its distance from the t
View Solution →Delete duplicate-value nodes from a sorted linked list
This challenge is part of a tutorial track by MyCodeSchool You are given the pointer to the head node of a sorted linked list, where the data in the nodes is in ascending order. Delete nodes and return a sorted list with each distinct value in the original list. The given head pointer may be null indicating that the list is empty. Example head refers to the first node in the list 1 -> 2 -
View Solution →