Determining DNA Health – HackerRank Solution

In this post, we will solve Determining DNA Health HackerRank Solution. This problem (Determining DNA Health) is a part of HackerRank Problem Solving series.

Task

DNA is a nucleic acid present in the bodies of living things. Each piece of DNA contains a number of genes, some of which are beneficial and increase the DNA’s total health. Each gene has a health value, and the total health of a DNA is the sum of the health values of all the beneficial genes that occur as a substring in the DNA. We represent genes and DNA as non-empty strings of lowercase English alphabetic letters, and the same gene may appear multiple times as a susbtring of a DNA.

Given the following:

  • An array of beneficial gene strings, genes = [g0, g1, . . . , gn-1]. Note that these gene sequences are not guaranteed to be distinct.
  • An array of gene health values, health = [h0, h1, . . . , hn-1], where each hi is the health value for gene gi.
  • A set of s DNA strands where the definition of each strand has three components, startend, and d, where string d is a DNA for which genes gstart, . . . , gend are healthy.

Find and print the respective total healths of the unhealthiest (minimum total health) and healthiest (maximum total health) strands of DNA as two space-separated values on a single line.

Input Format

The first line contains an integer, n, denoting the total number of genes.
The second line contains n space-separated strings describing the respective values of g0, g1, . . . , gn-1 (i.e., the elements of genes).
The third line contains n space-separated integers describing the respective values of h0, h1, . . . , hn-1 (i.e., the elements of health).
The fourth line contains an integer, s, denoting the number of strands of DNA to process.
Each of the s subsequent lines describes a DNA strand in the form start end d, denoting that the healthy genes for DNA strand d are gstart, . . . , gend and their respective correlated health values are hstart, . . . , hend.

Constraints

  • 1 <= n, s <= 105
  • 0 <= hi <= 107
  • 0 <= first <= last < n
  • 1 <= the sum of the lengths of all genes and DNA strands <= 2 x 106
  • It is guaranteed that each gi consists of lowercase English alphabetic letters only (i.e., a to z).

Output Format

Print two space-separated integers describing the respective total health of the unhealthiest and the healthiest strands of DNA.

Sample Input 0

6
a b c aa d b
1 2 3 4 5 6
3
1 5 caaab
0 4 xyz
2 4 bcdybc

Sample Output 0

0 19

Explanation 0

In the diagrams below, the ranges of beneficial genes for a specific DNA on the left are highlighed in green and individual instances of beneficial genes on the right are bolded. The total healths of the s = 3 strands are:

  1. The total health of caaab is 3 + 4 + 4 + 2 + 6 = 19.
  2. The total health of xyz is 0, because it contains no beneficial genes.
  3. The total health of bcdybc is 3 + 5 + 3 = 11.

The unhealthiest DNA strand is xyz with a total health of 0, and the healthiest DNA strand is caaab with a total health of 19. Thus, we print 0 19 as our answer.

Solution – Determining DNA Health – HackerRank Solution

C++

#include <cstdio>
#include <cstring>
#include <algorithm>
#include <vector>
#include <iostream>
#include <cassert>
#include <cmath>
#include <string>
#include <queue>
#include <set>
#include <map>
#include <cstdlib>

using namespace std;

#define INF 1e+9
#define mp make_pair
#define pb push_back
#define fi first
#define fs first
#define se second
#define i64 long long
#define li long long
#define lint long long
#define pii pair<int, int>
#define vi vector<int>

#define forn(i, n) for (int i = 0; i < (int)n; i++)
#define fore(i, b, e) for (int i = (int)b; i <= (int)e; i++)

struct vertex {
    int next[26];
    vi numbers;
    int p;
    char pch;
    int link;
    int go[26];
};

const int maxn = 2e6+5;
 
vertex t[maxn];
int sz;
int cost[maxn];
 
void init() {
    t[0].p = t[0].link = -1;
    memset (t[0].next, 255, sizeof t[0].next);
    memset (t[0].go, 255, sizeof t[0].go);
    sz = 1;
}
 
void add_string (const string & s, int num) {
    int v = 0;
    for (size_t i=0; i<s.length(); ++i) {
        int c = s[i]-'a';
        if (t[v].next[c] == -1) {
            memset (t[sz].next, 255, sizeof t[sz].next);
            memset (t[sz].go, 255, sizeof t[sz].go);
            t[sz].link = -1;
            t[sz].p = v;
            t[sz].pch = c;
            t[v].next[c] = sz++;
        }
        v = t[v].next[c];
    }
    t[v].numbers.pb(num);
}
 
int go (int v, int c);
 
int get_link (int v) {
    if (t[v].link == -1) {
        if (v == 0 || t[v].p == 0)
            t[v].link = 0;
        else
            t[v].link = go (get_link (t[v].p), t[v].pch);
    }
    return t[v].link;
}
 
int go (int v, int c) {
    if (t[v].go[c] == -1) {
        if (t[v].next[c] != -1)
            t[v].go[c] = t[v].next[c];
        else
            t[v].go[c] = v==0 ? 0 : go (get_link (v), c);
    }
    return t[v].go[c];
}

int main() {
#ifdef LOCAL
    freopen("inp", "r", stdin);
    //freopen("outp", "w", stdout);
#else
    // freopen(TASKNAME ".in", "r", stdin);
    // freopen(TASKNAME ".out", "w", stdout);
#endif
    init();
    int n;
    scanf("%d", &n);
    forn(j, n) {
        string s;
        cin >> s;
//        cout << "s = " << s << endl;
        add_string(s, j);
    }
    forn(j, n)
        scanf("%d", &cost[j]);
    int q;
    scanf("%d", &q);
    i64 minn = 0;
    i64 maxx = 0;
    forn(query, q) {
        int L, R;
        scanf("%d%d", &L, &R);
        string s;
        cin >> s;
        int cur = 0;
        i64 sum = 0;
        for (char c : s) {
            cur = go(cur, (int)(c - 'a'));
            //printf("cur = %d\n", cur);
            int tmp = cur;
            while(tmp != 0) {
                for (int x : t[tmp].numbers) {
                    if (x >= L && x <= R)
                        sum += cost[x];
                    //printf("%c x = %d sum = %lld\n", c, x, sum);
                }
                tmp = get_link(tmp);
            }
        }
        if (query == 0 || sum > maxx)
            maxx = sum;
        if (query == 0 || sum < minn)
            minn = sum;
    }
    cout << minn << " " << maxx;
}

Python

from math import inf
from bisect import bisect_left as bLeft, bisect_right as bRight
from collections import defaultdict

def getHealth(seq, first, last, largest):
    h, ls = 0, len(seq)

    for f in range(ls):
        for j in range(1, largest+1):
            if f+j > ls:
                break
            sub = seq[f:f+j]
            if sub not in subs:
                break
            if sub not in gMap:
                continue
            ids, hs = gMap[sub]
            h += hs[bRight(ids, last)]-hs[bLeft(ids, first)]
    return h

if __name__ == '__main__':

    n = int(input())
    genes = input().rstrip().split()
    health = list(map(int, input().rstrip().split()))

    gMap = defaultdict(lambda: [[], [0]])
    subs = set()
    for i, gene in enumerate(genes):
        gMap[gene][0].append(i)
        for j in range(1, min(len(gene), 500)+1):
            subs.add(gene[:j])

    for v in gMap.values():
        for i, ix in enumerate(v[0]):
            v[1].append(v[1][i]+health[ix])

    largest = max(list(map(len, genes)))
    hMin, hMax = inf, 0

    s = int(input())
    for s_itr in range(s):
        firstLastd = input().split()
        first = int(firstLastd[0])
        last = int(firstLastd[1])
        d = firstLastd[2]
        h = getHealth(d, first, last, largest)
        hMin, hMax = min(hMin, h), max(hMax, h)

    print(hMin, hMax)

Java

import java.io.BufferedReader;
import java.io.File;
import java.io.InputStreamReader;
import java.io.PrintStream;
import java.util.Arrays;

public class Solution {

    private static String[] genes;
    private static int[] health;

    public static void main(String[] args) throws Exception {
        PrintStream out = new PrintStream(System.out);
        
        long startMillis = System.currentTimeMillis();
        
        BufferedReader in = new BufferedReader(new InputStreamReader(System.in));
        int n = Integer.parseInt(in.readLine());

        genes = new String[n];
        int longestKey = 0;
        
        String[] genesItems = in.readLine().split(" ");

        Trie trie = new Solution().new Trie();
        for (int i = 0; i < n; i++) {
            String genesItem = genesItems[i];
            genes[i] = genesItem;
            
            trie.insert(genes[i], i);
            
            if (genes[i].length() > longestKey)
                longestKey = genes[i].length();
        }

        health = new int[n];

        String[] healthItems = in.readLine().split(" ");
        
        for (int i = 0; i < n; i++) {
            int healthItem = Integer.parseInt(healthItems[i]);
            health[i] = healthItem;
        }

        //out.println("Longest key: " + longestKey);
        //out.println("Loading: " + (System.currentTimeMillis() - startMillis)/1000f);
        startMillis = System.currentTimeMillis();
        
        int s = Integer.parseInt(in.readLine());
        long maxH = 0, minH = Long.MAX_VALUE;
        
        for (int sItr = 0; sItr < s; sItr++) {
            String[] firstLastd = in.readLine().split(" ");

            int first = Integer.parseInt(firstLastd[0]);
            int last = Integer.parseInt(firstLastd[1]);

            String d = firstLastd[2];
            long total = 0;
            int keyLen = d.length();
            
            for (int i = 0; i < keyLen; i++) {
                total += trie.find(d.substring(i, keyLen > longestKey && i + longestKey < keyLen? i + longestKey : keyLen), first, last);
            }
            
            if (total > maxH)
                maxH = total;
            
            if (total < minH)
                minH = total;
        }
        
        in.close();
        out.println(minH + " " + maxH);
        out.close();
    }
    
    class Trie {
        private final short SIZE = 26;
        private TrieNode root;
        
        public Trie () {
            this.root = new TrieNode();
        }
        
        class TrieNode {
            private TrieNode[] children = new TrieNode[SIZE]; 
            private int index[];
            private boolean isEnd;
            
            public TrieNode(){
                isEnd = false;
            }
            
            @Override
            public String toString(){
                return (index != null? Arrays.toString(index) + " " : "") 
                        + Arrays.toString(Arrays.stream(children).filter(p -> p != null).toArray());
            }
        }
        
        public void insert(String key, int index) {
            int arrayPos;
            
            TrieNode node = root;
            
            for (int i = 0; i < key.length(); i++) {
                arrayPos = key.charAt(i) - 'a';
                
                if (node.children[arrayPos] == null) {
                    node.children[arrayPos] = new TrieNode();
                }
                
                node = node.children[arrayPos];
            }
            
            if (node.index == null) {
                node.index = new int[1];
            } else {
                node.index = Arrays.copyOf(node.index, node.index.length + 1);
            }
            
            node.index[node.index.length - 1] = index;
            node.isEnd = true;
        }
        
        public long find(String key, int start, int end) {
            int arrayPos;
            
            TrieNode node = root;
            long result = 0;
            int keyLen = key.length();
            
            for (int i = 0; i < keyLen; i++) {
                arrayPos = key.charAt(i) - 'a';
                
                if (node.children[arrayPos] != null) {
                    node = node.children[arrayPos];
                    
                    if (node.isEnd 
                            && (   start <= node.index[node.index.length - 1] 
                                || end >= node.index[0])) {
                        
                        int sI = -1, eI = -1;
                        for (int j = 0, k = node.index.length - 1; j < node.index.length && k >= 0; j++, k--) {
                            if (sI == -1) {
                                if (node.index[j] >= start)
                                    sI = j;
                                else if (node.index[k] < start && k + 1 < node.index.length && node.index[k + 1] >= start)
                                    sI = k + 1;
                            }
                            
                            if (eI == -1) {
                                if (node.index[k] <= end)
                                    eI = k;
                                else if (node.index[j] > end && j - 1 >= 0 && node.index[j - 1] <= end)
                                    eI = j - 1;
                            }
                            
                            if (j == k || (sI != -1 && eI != -1))
                                break;
                        }
                        
                        if (sI != -1 && eI != -1) {
                            for (int j = sI; j <= eI; j++) {
                                result += health[node.index[j]];
                            }
                        }
                    }
                } else
                    break;
            }
            
            return result;
        }
        
        @Override
        public String toString() {
            return root.toString();
        }
    }
}

Note: This problem (Determining DNA Health) is generated by HackerRank but the solution is provided by CodingBroz. This tutorial is only for Educational and Learning purpose.

Leave a Comment

Your email address will not be published. Required fields are marked *