472. Concatenated Words (Hard)
Given a list of words (without duplicates), please write a program that returns all concatenated words in the given list of words.
A concatenated word is defined as a string that is comprised entirely of at least two shorter words in the given array.
Example:
Input: ["cat","cats","catsdogcats","dog","dogcatsdog","hippopotamuses","rat","ratcatdogcat"]
Output: ["catsdogcats","dogcatsdog","ratcatdogcat"]
Explanation: "catsdogcats" can be concatenated by "cats", "dog" and "cats";
"dogcatsdog" can be concatenated by "dog", "cats" and "dog";
"ratcatdogcat" can be concatenated by "rat", "cat", "dog" and "cat".
Note:
- The number of elements of the given array will not exceed
10,000
- The length sum of elements in the given array will not exceed
600,000
. - All the input string will only include lower case letters.
- The returned elements order does not matter.
Solution 1: DP
based on word break I
, remove current word from the set, find whether it can be built by other words.
dp[i] == true: word[0..i] can be built by other words
dp[i] = (dp[j] == true && word[j+1..i] exists in dict
push to final result if dp[len] == true
, since current word does not exist in the dict, it must be combined by other words.
dp[j] == true if dp[i] == true && word[i+1..j] exists in dict
vector<string> findAllConcatenatedWordsInADict(vector<string>& words) {
if (words.size() <= 2) return {};
vector<string> res;
unordered_set<string> dict(words.begin(), words.end());
for (string word: words) {
dict.erase(word);
int len = word.size();
vector<bool> dp(len+1, false);
dp[0] = true;
for (int i = 1; i <= len; ++i) {
for (int j = 0; j < i; ++j) {
if (dp[j] && dict.count(word.substr(j, i-j))) {
dp[i] = true;
break;
}
}
}
if (dp[len]) res.push_back(word);
dict.insert(word);
}
return res;
}
下面这种方法跟上面的方法很类似,不同的是判断每个单词的时候不用将其移除set,而是在判断的过程中加了判断,使其不会判断单词本身是否在集合set中存在,而且由于对单词中子字符串的遍历顺序不同,加了一些优化在里面,使得其运算速度更快一些,参见代码如下:
dp[j] == true if dp[i] == true && word[i+1..j] exists in dict
j-i < len to avoid itself in dict
so we can push to final result if dp[len] == true
, it must be combined by other words.
vector<string> findAllConcatenatedWordsInADict(vector<string>& words) {
if (words.size() <= 2) return {};
vector<string> res;
unordered_set<string> dict(words.begin(), words.end());
for (string word: words) {
int len = word.size();
vector<bool> dp(len+1, false);
dp[0] = true;
for (int i = 0; i <= len; ++i) {
if (!dp[i]) continue;
for (int j = i+1; j <= len; ++j) {
if (j-i < len && dict.count(word.substr(i, j-i))) {
dp[j] = true;
}
}
if (dp[len]) { res.push_back(word); break; }
}
}
return res;
}
Solution 2: DFS
下面这种方法是递归的写法,其中递归函数中的cnt表示有其他单词组成的个数,至少得由其他两个单词组成才符合题意,参见代码如下:
class Solution {
bool dfs(string& word, unordered_set<string>& dict, int pos, int cnt) {
if (pos == word.size() && cnt >= 2) return true;
for (int i = 1; i <= word.size()-pos; ++i) { // possible length
string t = word.substr(pos, i);
if (dict.count(t) && dfs(word, dict, pos+i, cnt+1)) return true;
}
return false;
}
public:
vector<string> findAllConcatenatedWordsInADict(vector<string>& words) {
vector<string> res;
unordered_set<string> dict(words.begin(), words.end());
for (string word: words) {
if (dfs(word, dict, 0, 0)) res.push_back(word);
}
return res;
}
};
Solution 3: Trie MLE
class Solution {
private:
struct TrieNode {
bool isWord;
TrieNode* kids[26];
TrieNode(): isWord(false) {
for (int i = 0; i < 26; ++i) {
kids[i] = NULL;
}
}
};
TrieNode* root;
public:
bool dfs(string& word, TrieNode* t, int pos, int cnt) {
//return true, if we reach a word at the end & the original word s is a concatenated word
if (pos == word.size()) return t->isWord && cnt >= 1;
if (!t->kids[word[pos]-'a']) return false;
//reached a word, try to start from root again
if (t->kids[word[pos]-'a']->isWord && dfs(word, root, pos+1, cnt+1)) return true;
// keep going
return dfs(word, t->kids[word[pos]-'a'], pos+1, cnt);
}
vector<string> findAllConcatenatedWordsInADict(vector<string>& words) {
vector<string> res;
root = new TrieNode();
// build trie
for (string word: words) {
TrieNode *cur = root;
for (char c : word) {
if (!cur->kids[c-'a']) cur->kids[c-'a'] = new TrieNode();
cur = cur->kids[c-'a'];
}
cur->isWord = true;
}
for (string word: words) {
if (dfs(word, root, 0, 0)) res.push_back(word);
}
return res;
}
};