472. Concatenated Words (Hard)

Given a list of words (without duplicates), please write a program that returns all concatenated words in the given list of words.

A concatenated word is defined as a string that is comprised entirely of at least two shorter words in the given array.

Example:

Input: ["cat","cats","catsdogcats","dog","dogcatsdog","hippopotamuses","rat","ratcatdogcat"]

Output: ["catsdogcats","dogcatsdog","ratcatdogcat"]

Explanation: "catsdogcats" can be concatenated by "cats", "dog" and "cats"; 
 "dogcatsdog" can be concatenated by "dog", "cats" and "dog"; 
"ratcatdogcat" can be concatenated by "rat", "cat", "dog" and "cat".

Note:

The number of elements of the given array will not exceed 10,000
The length sum of elements in the given array will not exceed 600,000.
All the input string will only include lower case letters.
The returned elements order does not matter.

Solution 1: DP

based on word break I, remove current word from the set, find whether it can be built by other words.

dp[i] == true: word[0..i] can be built by other words

dp[i] = (dp[j] == true && word[j+1..i] exists in dict

push to final result if dp[len] == true, since current word does not exist in the dict, it must be combined by other words.

dp[j] == true if dp[i] == true && word[i+1..j] exists in dict

vector<string> findAllConcatenatedWordsInADict(vector<string>& words) {
    if (words.size() <= 2) return {};
    vector<string> res;
    unordered_set<string> dict(words.begin(), words.end());
    for (string word: words) {
        dict.erase(word);
        int len = word.size();
        vector<bool> dp(len+1, false);
        dp[0] = true;
        for (int i = 1; i <= len; ++i) {
            for (int j = 0; j < i; ++j) {
                if (dp[j] && dict.count(word.substr(j, i-j))) {
                    dp[i] = true; 
                    break;
                }
            }
        }
        if (dp[len]) res.push_back(word);
        dict.insert(word);
    }

    return res;
}

下面这种方法跟上面的方法很类似，不同的是判断每个单词的时候不用将其移除set，而是在判断的过程中加了判断，使其不会判断单词本身是否在集合set中存在，而且由于对单词中子字符串的遍历顺序不同，加了一些优化在里面，使得其运算速度更快一些，参见代码如下：

dp[j] == true if dp[i] == true && word[i+1..j] exists in dict

j-i < len to avoid itself in dict

so we can push to final result if dp[len] == true, it must be combined by other words.

vector<string> findAllConcatenatedWordsInADict(vector<string>& words) {
    if (words.size() <= 2) return {};
    vector<string> res;
    unordered_set<string> dict(words.begin(), words.end());
    for (string word: words) {
        int len = word.size();
        vector<bool> dp(len+1, false);
        dp[0] = true;
        for (int i = 0; i <= len; ++i) {
            if (!dp[i]) continue;
            for (int j = i+1; j <= len; ++j) {
                if (j-i < len && dict.count(word.substr(i, j-i))) {
                    dp[j] = true; 
                }
            }
            if (dp[len]) { res.push_back(word); break; }
        }
    }

    return res;
}

Solution 2: DFS

下面这种方法是递归的写法，其中递归函数中的cnt表示有其他单词组成的个数，至少得由其他两个单词组成才符合题意，参见代码如下：

class Solution {
    bool dfs(string& word, unordered_set<string>& dict, int pos, int cnt) {
        if (pos == word.size() && cnt >= 2) return true;
        for (int i = 1; i <= word.size()-pos; ++i) { // possible length
            string t = word.substr(pos, i);
            if (dict.count(t) && dfs(word, dict, pos+i, cnt+1)) return true;
        }
        return false;
    }
public:
    vector<string> findAllConcatenatedWordsInADict(vector<string>& words) {
        vector<string> res;
        unordered_set<string> dict(words.begin(), words.end());
        for (string word: words) {
            if (dfs(word, dict, 0, 0)) res.push_back(word);
        }
        return res;
    }
};

Solution 3: Trie MLE

class Solution {
private:
    struct TrieNode {
        bool isWord;
        TrieNode* kids[26];
        TrieNode(): isWord(false) {
            for (int i = 0; i < 26; ++i) {
                kids[i] = NULL;
            }
        }
    };

    TrieNode* root;
public:
    bool dfs(string& word, TrieNode* t, int pos, int cnt) {
        //return true, if we reach a word at the end & the original word s is a concatenated word
        if (pos == word.size()) return t->isWord && cnt >= 1;
        if (!t->kids[word[pos]-'a']) return false;
        //reached a word, try to start from root again
        if (t->kids[word[pos]-'a']->isWord && dfs(word, root, pos+1, cnt+1)) return true;
        // keep going
        return dfs(word, t->kids[word[pos]-'a'], pos+1, cnt);
    }
    vector<string> findAllConcatenatedWordsInADict(vector<string>& words) {
        vector<string> res;
        root = new TrieNode();
        // build trie
        for (string word: words) {
            TrieNode *cur = root;
            for (char c : word) {
                if (!cur->kids[c-'a']) cur->kids[c-'a'] = new TrieNode();
                cur = cur->kids[c-'a'];
            }
            cur->isWord = true;
        }

        for (string word: words) {
            if (dfs(word, root, 0, 0)) res.push_back(word);
        }
        return res;
    }

};

472. Concatenated Words (Hard)

472. Concatenated Words (Hard)

results matching ""

No results matching ""