Gene Mutation Groups - Google Top Interview Questions


Problem Statement :


You are given a list of unique strings genes where each element has the same length and contains characters "A", "C", "G" and/or "T".

If strings a and b are the same string except for one character, then a and b are in the same mutation group.

If strings a and b are in a group and b and c are in a group, then a and c are in the same group.
Return the total number of mutation groups.

Constraints

n ≤ 10,000

k ≤ 20 where k is the length of a string in genes

Example 1

Input

genes = ["ACGT", "ACCT", "AGGT", "TTTT", "TTTG"]

Output

2

Explanation

There are two mutation groups:



["ACGT", "ACCT", "AGGT"]

["TTTT", "TTTG"]



Solution :



title-img




                        Solution in C++ :

class UnionFind {
    private:
    vector<int> parents, rank;

    public:
    UnionFind(int n) {
        parents.resize(n);
        rank.resize(n);
        for (int i = 0; i < n; i++) {
            parents[i] = i;
            rank[i] = 1;
        }
    }

    int find(int node) {
        int root = node;

        while (root != parents[root]) {
            root = parents[root];
        }

        // Path compression
        while (node != root) {
            int temp = parents[node];
            parents[node] = root;
            node = temp;
        }

        return root;
    }

    // Returns true if union happens
    bool unify(int a, int b) {
        int rootA = find(a);
        int rootB = find(b);

        if (rootA == rootB) return false;

        // Union by rank
        if (rank[rootA] > rank[rootB]) {
            parents[rootB] = rootA;
        } else if (rank[rootB] > rank[rootA]) {
            parents[rootA] = rootB;
        } else {
            parents[rootB] = rootA;
            rank[rootA]++;
        }

        return true;
    }
};

int solve(vector<string>& genes) {  // Time: O(N * K), Space: O(N)
    int n = genes.size();
    unordered_map<string, int> gene_map;
    char type[4] = {'A', 'C', 'G', 'T'};

    for (int i = 0; i < n; i++) gene_map[genes[i]] = i;

    UnionFind union_find(n);
    int groups = n;

    for (string& gene : genes) {
        int curr = gene_map[gene];

        for (char& c : gene) {
            char orig = c;
            for (char t : type) {
                if (t != orig) {
                    c = t;
                    if (gene_map.count(gene)) {
                        if (union_find.unify(curr, gene_map[gene])) groups--;
                    }
                }
            }

            c = orig;
        }
    }

    return groups;
}
                    


                        Solution in Java :

import java.util.*;

class Solution {
    class DisjointSet {
        int val;
        DisjointSet parent;
        public DisjointSet(int value) {
            this.val = value;
            this.parent = this;
        }
    }
    private static final String delim = "_";
    private Map<Integer, DisjointSet> map = new HashMap();
    private Map<String, Integer> indexMap = new HashMap();
    private Set<DisjointSet> parent = new HashSet();

    private String s1 = "CGT";
    private String s2 = "AGT";
    private String s3 = "ACT";
    private String s4 = "ACG";
    private Map<Character, String> replaceMap = new HashMap();

    public int solve(String[] genes) {
        if (genes == null || genes.length == 0)
            return 0;

        replaceMap.put('A', s1);
        replaceMap.put('C', s2);
        replaceMap.put('G', s3);
        replaceMap.put('T', s4);

        for (int i = 0; i < genes.length; i++) map.put(i, new DisjointSet(i));

        for (int i = 0; i < genes.length; i++) {
            String str = genes[i];
            for (int j = 0; j < str.length(); j++) {
                String replaceString = replaceMap.get(str.charAt(j));
                for (int k = 0; k < replaceString.length(); k++) {
                    char replaceChar = replaceString.charAt(k);
                    String temStr = new StringBuilder().append(str).toString();
                    char[] tempChar = temStr.toCharArray();
                    tempChar[j] = replaceChar;
                    if (indexMap.containsKey(new String(tempChar))) {
                        union(i, indexMap.get(new String(tempChar)));
                    }
                }
            }
            indexMap.put(str, i);
        }

        for (int i = 0; i < genes.length; i++) {
            // add the parent (i.e find) of each node in a hashset
            parent.add(find(map.get(i)));
        }
        return parent.size();
    }

    private void union(int idx1, int idx2) {
        DisjointSet set1 = map.get(idx1);
        DisjointSet set2 = map.get(idx2);

        DisjointSet par1 = find(set1);
        DisjointSet par2 = find(set2);

        if (par1.val == par2.val)
            return;
        par1.parent = par2;
    }

    private DisjointSet find(DisjointSet set) {
        if (set.parent == set)
            return set;
        return set.parent = find(set.parent);
    }
}
                    


                        Solution in Python : 
                            
class Solution:
    def solve(self, A):
        dsu = DSU()
        for word in A:
            for i in range(len(word)):
                root = word[:i] + "*" + word[i + 1 :]
                dsu.union(word, root)
        return len(set(map(dsu.find, A)))


class DSU:
    def __init__(self):
        self.mp = {}
        self.par = []
        self.sz = []

    def find(self, x):
        try:
            i = self.mp[x]
        except:
            self.mp[x] = i = len(self.mp)
            self.par.append(i)
            self.sz.append(1)
        return self._find(i)

    def _find(self, x):
        if self.par[x] != x:
            self.par[x] = self._find(self.par[x])
        return self.par[x]

    def union(self, x, y):
        xr, yr = self.find(x), self.find(y)
        if xr == yr:
            return False
        if self.sz[xr] < self.sz[yr]:
            xr, yr = yr, xr
        self.par[yr] = xr
        self.sz[xr] += self.sz[yr]
        self.sz[yr] = self.sz[xr]
        return True
                    


View More Similar Problems

Self-Driving Bus

Treeland is a country with n cities and n - 1 roads. There is exactly one path between any two cities. The ruler of Treeland wants to implement a self-driving bus system and asks tree-loving Alex to plan the bus routes. Alex decides that each route must contain a subset of connected cities; a subset of cities is connected if the following two conditions are true: There is a path between ever

View Solution →

Unique Colors

You are given an unrooted tree of n nodes numbered from 1 to n . Each node i has a color, ci. Let d( i , j ) be the number of different colors in the path between node i and node j. For each node i, calculate the value of sum, defined as follows: Your task is to print the value of sumi for each node 1 <= i <= n. Input Format The first line contains a single integer, n, denoti

View Solution →

Fibonacci Numbers Tree

Shashank loves trees and math. He has a rooted tree, T , consisting of N nodes uniquely labeled with integers in the inclusive range [1 , N ]. The node labeled as 1 is the root node of tree , and each node in is associated with some positive integer value (all values are initially ). Let's define Fk as the Kth Fibonacci number. Shashank wants to perform 22 types of operations over his tree, T

View Solution →

Pair Sums

Given an array, we define its value to be the value obtained by following these instructions: Write down all pairs of numbers from this array. Compute the product of each pair. Find the sum of all the products. For example, for a given array, for a given array [7,2 ,-1 ,2 ] Note that ( 7 , 2 ) is listed twice, one for each occurrence of 2. Given an array of integers, find the largest v

View Solution →

Lazy White Falcon

White Falcon just solved the data structure problem below using heavy-light decomposition. Can you help her find a new solution that doesn't require implementing any fancy techniques? There are 2 types of query operations that can be performed on a tree: 1 u x: Assign x as the value of node u. 2 u v: Print the sum of the node values in the unique path from node u to node v. Given a tree wi

View Solution →

Ticket to Ride

Simon received the board game Ticket to Ride as a birthday present. After playing it with his friends, he decides to come up with a strategy for the game. There are n cities on the map and n - 1 road plans. Each road plan consists of the following: Two cities which can be directly connected by a road. The length of the proposed road. The entire road plan is designed in such a way that if o

View Solution →