Gene Mutation Groups - Google Top Interview Questions


Problem Statement :


You are given a list of unique strings genes where each element has the same length and contains characters "A", "C", "G" and/or "T".

If strings a and b are the same string except for one character, then a and b are in the same mutation group.

If strings a and b are in a group and b and c are in a group, then a and c are in the same group.
Return the total number of mutation groups.

Constraints

n ≤ 10,000

k ≤ 20 where k is the length of a string in genes

Example 1

Input

genes = ["ACGT", "ACCT", "AGGT", "TTTT", "TTTG"]

Output

2

Explanation

There are two mutation groups:



["ACGT", "ACCT", "AGGT"]

["TTTT", "TTTG"]



Solution :



title-img




                        Solution in C++ :

class UnionFind {
    private:
    vector<int> parents, rank;

    public:
    UnionFind(int n) {
        parents.resize(n);
        rank.resize(n);
        for (int i = 0; i < n; i++) {
            parents[i] = i;
            rank[i] = 1;
        }
    }

    int find(int node) {
        int root = node;

        while (root != parents[root]) {
            root = parents[root];
        }

        // Path compression
        while (node != root) {
            int temp = parents[node];
            parents[node] = root;
            node = temp;
        }

        return root;
    }

    // Returns true if union happens
    bool unify(int a, int b) {
        int rootA = find(a);
        int rootB = find(b);

        if (rootA == rootB) return false;

        // Union by rank
        if (rank[rootA] > rank[rootB]) {
            parents[rootB] = rootA;
        } else if (rank[rootB] > rank[rootA]) {
            parents[rootA] = rootB;
        } else {
            parents[rootB] = rootA;
            rank[rootA]++;
        }

        return true;
    }
};

int solve(vector<string>& genes) {  // Time: O(N * K), Space: O(N)
    int n = genes.size();
    unordered_map<string, int> gene_map;
    char type[4] = {'A', 'C', 'G', 'T'};

    for (int i = 0; i < n; i++) gene_map[genes[i]] = i;

    UnionFind union_find(n);
    int groups = n;

    for (string& gene : genes) {
        int curr = gene_map[gene];

        for (char& c : gene) {
            char orig = c;
            for (char t : type) {
                if (t != orig) {
                    c = t;
                    if (gene_map.count(gene)) {
                        if (union_find.unify(curr, gene_map[gene])) groups--;
                    }
                }
            }

            c = orig;
        }
    }

    return groups;
}
                    


                        Solution in Java :

import java.util.*;

class Solution {
    class DisjointSet {
        int val;
        DisjointSet parent;
        public DisjointSet(int value) {
            this.val = value;
            this.parent = this;
        }
    }
    private static final String delim = "_";
    private Map<Integer, DisjointSet> map = new HashMap();
    private Map<String, Integer> indexMap = new HashMap();
    private Set<DisjointSet> parent = new HashSet();

    private String s1 = "CGT";
    private String s2 = "AGT";
    private String s3 = "ACT";
    private String s4 = "ACG";
    private Map<Character, String> replaceMap = new HashMap();

    public int solve(String[] genes) {
        if (genes == null || genes.length == 0)
            return 0;

        replaceMap.put('A', s1);
        replaceMap.put('C', s2);
        replaceMap.put('G', s3);
        replaceMap.put('T', s4);

        for (int i = 0; i < genes.length; i++) map.put(i, new DisjointSet(i));

        for (int i = 0; i < genes.length; i++) {
            String str = genes[i];
            for (int j = 0; j < str.length(); j++) {
                String replaceString = replaceMap.get(str.charAt(j));
                for (int k = 0; k < replaceString.length(); k++) {
                    char replaceChar = replaceString.charAt(k);
                    String temStr = new StringBuilder().append(str).toString();
                    char[] tempChar = temStr.toCharArray();
                    tempChar[j] = replaceChar;
                    if (indexMap.containsKey(new String(tempChar))) {
                        union(i, indexMap.get(new String(tempChar)));
                    }
                }
            }
            indexMap.put(str, i);
        }

        for (int i = 0; i < genes.length; i++) {
            // add the parent (i.e find) of each node in a hashset
            parent.add(find(map.get(i)));
        }
        return parent.size();
    }

    private void union(int idx1, int idx2) {
        DisjointSet set1 = map.get(idx1);
        DisjointSet set2 = map.get(idx2);

        DisjointSet par1 = find(set1);
        DisjointSet par2 = find(set2);

        if (par1.val == par2.val)
            return;
        par1.parent = par2;
    }

    private DisjointSet find(DisjointSet set) {
        if (set.parent == set)
            return set;
        return set.parent = find(set.parent);
    }
}
                    


                        Solution in Python : 
                            
class Solution:
    def solve(self, A):
        dsu = DSU()
        for word in A:
            for i in range(len(word)):
                root = word[:i] + "*" + word[i + 1 :]
                dsu.union(word, root)
        return len(set(map(dsu.find, A)))


class DSU:
    def __init__(self):
        self.mp = {}
        self.par = []
        self.sz = []

    def find(self, x):
        try:
            i = self.mp[x]
        except:
            self.mp[x] = i = len(self.mp)
            self.par.append(i)
            self.sz.append(1)
        return self._find(i)

    def _find(self, x):
        if self.par[x] != x:
            self.par[x] = self._find(self.par[x])
        return self.par[x]

    def union(self, x, y):
        xr, yr = self.find(x), self.find(y)
        if xr == yr:
            return False
        if self.sz[xr] < self.sz[yr]:
            xr, yr = yr, xr
        self.par[yr] = xr
        self.sz[xr] += self.sz[yr]
        self.sz[yr] = self.sz[xr]
        return True
                    


View More Similar Problems

Delete a Node

Delete the node at a given position in a linked list and return a reference to the head node. The head is at position 0. The list may be empty after you delete the node. In that case, return a null value. Example: list=0->1->2->3 position=2 After removing the node at position 2, list'= 0->1->-3. Function Description: Complete the deleteNode function in the editor below. deleteNo

View Solution →

Print in Reverse

Given a pointer to the head of a singly-linked list, print each data value from the reversed list. If the given list is empty, do not print anything. Example head* refers to the linked list with data values 1->2->3->Null Print the following: 3 2 1 Function Description: Complete the reversePrint function in the editor below. reversePrint has the following parameters: Sing

View Solution →

Reverse a linked list

Given the pointer to the head node of a linked list, change the next pointers of the nodes so that their order is reversed. The head pointer given may be null meaning that the initial list is empty. Example: head references the list 1->2->3->Null. Manipulate the next pointers of each node in place and return head, now referencing the head of the list 3->2->1->Null. Function Descriptio

View Solution →

Compare two linked lists

You’re given the pointer to the head nodes of two linked lists. Compare the data in the nodes of the linked lists to check if they are equal. If all data attributes are equal and the lists are the same length, return 1. Otherwise, return 0. Example: list1=1->2->3->Null list2=1->2->3->4->Null The two lists have equal data attributes for the first 3 nodes. list2 is longer, though, so the lis

View Solution →

Merge two sorted linked lists

This challenge is part of a tutorial track by MyCodeSchool Given pointers to the heads of two sorted linked lists, merge them into a single, sorted linked list. Either head pointer may be null meaning that the corresponding list is empty. Example headA refers to 1 -> 3 -> 7 -> NULL headB refers to 1 -> 2 -> NULL The new list is 1 -> 1 -> 2 -> 3 -> 7 -> NULL. Function Description C

View Solution →

Get Node Value

This challenge is part of a tutorial track by MyCodeSchool Given a pointer to the head of a linked list and a specific position, determine the data value at that position. Count backwards from the tail node. The tail is at postion 0, its parent is at 1 and so on. Example head refers to 3 -> 2 -> 1 -> 0 -> NULL positionFromTail = 2 Each of the data values matches its distance from the t

View Solution →