Facebook Interview Question
SDETsCountry: United States
This can be nicely implemented, if the dictionary is in a trie.
Just start finding words in the trie, and put all word breaks onto
a stack, together with the number of words found (1 at the beginning)
Then go back to the stack pick the next, try to extend until you eventually
reach the the end of the string.
the question is when to stop. If you want to be sure to minimize, you have to
work through the complete stack. Intuitively, I would say, the algorithm will first
try longest words (with common prefixes), if he can make it through, the first solution
has a high chance of being the best. "High chance" needs to be investigated, if your life
depends on speed and accuracy.
Here an implementation, with the hash set for dictionary, slower, but doesn't need a Trie
implementation.
#include <string>
#include <unordered_set>
#include <stack>
#include <algorithm>
#include <limits>
#include <iostream>
using namespace std;
int min_word_breaks(const string& inputStr, const unordered_set<string>& dict)
{
int min_word_breaks = numeric_limits<int>::max();
size_t max_word_len = 0;
for (const auto& word : dict) max_word_len = max(max_word_len, word.size());
stack<pair<size_t, int>> s({ {0, 0} });
while (!s.empty()) {
auto idx = s.top().first;
auto word_ct = s.top().second;
s.pop();
if (idx == inputStr.length()) {
min_word_breaks = min(min_word_breaks, word_ct - 1);
} else if (word_ct + 1 < min_word_breaks) { // explore if I could beat current best
for (size_t len = 1; idx + len <= inputStr.length() && len <= max_word_len; ++len) {
if (dict.count(inputStr.substr(idx, len))) {
s.push({ idx + len, word_ct + 1 });
}
}
}
}
return min_word_breaks;
}
int main()
{
cout << min_word_breaks("CatMat", { "Cat", "Mat", "Ca", "tM", "at", "C", "Dog", "og", "Do" }) << endl;
}
Here is the python solution
def test():
str = "CatMat"
wordList = ["Cat", "Mat", "Ca", "tM", "at", "C", "Dog", "og", "Do"]
print wordBreak(str, wordList)
def wordBreak(sentences, wordList):
numBreak = 0
#Sort the wordList by the length of the word
wordList.sort(lambda x, y: -1 * cmp(len(x), len(y)))
for word in wordList:
if len(sentences) == 0:
break
#Check if the word is appeared in the sentences, if so, collect the substring and update sentences with the substring
while word in sentences:
sentences = subString(sentences, word)
numBreak += 1
#Increment numBreak if there are left over of sentences
if sentences != '':
numBreak += 1
return numBreak
def subString(sentences, word):
startItWord = 0
endItWord = len(word)
while((endItWord - startItWord <= len(sentences)) and (endItWord <= len(sentences))):
if sentences[startItWord:endItWord] == word:
return sentences[0:startItWord] + sentences[endItWord:]
else:
startItWord += 1
endItWord += 1
return sentences
Define wordBreak function recursively as follows, where S is the input string and D is the dictionary.
wordBreak(S, D) = 0, if S belongs to D
infinity, if no word in D is a prefix of S
(min of wordBreak(S - word, D) for every word in D that is prefix of S) + 1
Use dynamic programming to compute above recurrence.
function wordBreak(S, D, DP){
if (!DP.has(S)){
if (D.has(S)){
DP.set(S, 0);
}
else {
let breaks = Number.MAX_SAFE_INTEGER;
D.forEach((word) => {
if (S.startsWith(word)){
breaks = Math.min(
breaks,
wordBreak(S.substring(word.length), D, DP)
);
}
});
if (breaks === Number.MAX_SAFE_INTEGER){
DP.set(S, breaks);
}
else {
DP.set(S, breaks + 1);
}
}
}
return DP.get(S);
}
console.log(
wordBreak(
'CatMat',
new Set(['Cat', 'Mat', 'Ca', 'tM', 'at', 'C', 'Dog', 'og', 'Do']),
new Map()
)
);
Time complexity is O(|S| * |D| * |w|), where w is a word in D. The O(|D| * |w|) complexity per subproblem might be improved with help of Trie data structure tho.
Recursive solution with look-up table :
+ Going over input string and split it into two parts ( left + right ) of different length.
+ If left is a valid string in dictionary then we recurse over right part.
+ Every recursive call, we use look-up table to avoid duplicate calls.
+ After all combination for a given string, save "minimum cuts" into lookup table.
#include<iostream>
#include<unordered_set>
#include<unordered_map>
#include<limits.h>
using namespace std;
unordered_map<string, int> table;
int minCut( const string str,
const unordered_set<string>& dict )
{
// look up return
if( table.find( str) != table.end() )
{
return table[str];
}
// find it in dict
if( dict.find( str) != dict.end())
{
table[str] = 0;
return table[str];
}
int minCount = INT_MAX;
for( int i=1; i<str.size(); ++i )
{
int count = 0;
string left = str.substr(0,i);
if( dict.find(left) != dict.end() )
{
string right = str.substr(i);
count += minCut( right, dict);
// INT_MAX means right substring is not breakable into valid combination
if( count != INT_MAX )
{
++count;
minCount = min( count, minCount);
}
}
}
table[str] = minCount;
return table[str];
}
int main()
{
string str = "CatMatYY";
unordered_set<string> dict = { "Cat", "Mat", "Ca", "tM", "at", "C", "M", "Y"};
cout << minCut( str, dict );
return 0;
}
public static void main(String[] args) {
String s = "CatMat";
Set<String> dict = new HashSet<String>();
dict.add("Cat");
dict.add("Mat");
dict.add("Ca");
dict.add("tM");
dict.add("t");
dict.add("a");
dict.add("C");
dict.add("Dog");
dict.add("og");
dict.add("Do");
int n = wordBreak(s, dict, 0, s.length());
System.out.println(n == s.length() ? -1 : n);
}
public static int wordBreak(String s, Set<String> dict, int l, int h) {
if (l > h)
return s.length();
String temp = s.substring(l, h);
if (dict.contains(temp))
return 0;
String str = "";
int min = s.length();
for (int i = l; i < h; i++) {
str += s.charAt(i);
if (dict.contains(str)) {
min = Math.min(min, wordBreak(s, dict, i + 1, h)+1);
}
}
return min;
}
import java.util.HashSet;
import java.util.Set;
public class StringPartition {
public static void main(String[] args) {
String s = "CatMat";
String[] dict = {"Cat", "Mat", "Ca", "tM", "at", "C", "Dog", "og", "Do"};
Set<String> dictSet = new HashSet<>();
for(String word: dict) {
dictSet.add(word);
}
System.out.println(wordBreak(s, dictSet));
}
public static int wordBreak(String s, Set<String> dict) {
int leftStart = (s.length()/2);
int rightStart = leftStart + 1;
for(int i = leftStart, j = rightStart; i >= 0 & j < s.length(); i--, j++) {
String wordL = s.substring(0, i);
String wordR = s.substring(j, s.length());
if(!dict.contains(wordL) && !dict.contains(wordR)) {
return -1;
}
}
return 1;
}
}
import java.util.HashSet;
import java.util.Set;
public class StringPartition {
public static void main(String[] args) {
String s = "CatMat";
String[] dict = {"Cat", "Mat", "Ca", "tM", "at", "C", "Dog", "og", "Do"};
Set<String> dictSet = new HashSet<>();
for(String word: dict) {
dictSet.add(word);
}
System.out.println(wordBreak(s, dictSet));
}
public static int wordBreak(String s, Set<String> dict) {
int leftStart = (s.length()/2);
int rightStart = leftStart + 1;
for(int i = leftStart, j = rightStart; i >= 0 & j < s.length(); i--, j++) {
String wordL = s.substring(0, i);
String wordR = s.substring(j, s.length());
if(!dict.contains(wordL) && !dict.contains(wordR)) {
return -1;
}
}
return 1;
}
}
import java.util.HashSet;
import java.util.Set;
public class StringPartition {
public static void main(String[] args) {
String s = "CatMat";
String[] dict = {"Cat", "Mat", "Ca", "tM", "at", "C", "Dog", "og", "Do"};
Set<String> dictSet = new HashSet<>();
for(String word: dict) {
dictSet.add(word);
}
System.out.println(wordBreak(s, dictSet));
}
public static int wordBreak(String s, Set<String> dict) {
int leftStart = (s.length()/2);
int rightStart = leftStart + 1;
for(int i = leftStart, j = rightStart; i >= 0 & j < s.length(); i--, j++) {
String wordL = s.substring(0, i);
String wordR = s.substring(j, s.length());
if(!dict.contains(wordL) && !dict.contains(wordR)) {
return -1;
}
}
return 1;
}
}
/**/
import java.util.HashSet;
import java.util.Set;
public class StringPartition {
public static void main(String[] args) {
String s = "CatMat";
String[] dict = {"Cat", "Mat", "Ca", "tM", "at", "C", "Dog", "og", "Do"};
Set<String> dictSet = new HashSet<>();
for(String word: dict) {
dictSet.add(word);
}
System.out.println(wordBreak(s, dictSet));
}
public static int wordBreak(String s, Set<String> dict) {
int leftStart = (s.length()/2);
int rightStart = leftStart + 1;
for(int i = leftStart, j = rightStart; i >= 0 & j < s.length(); i--, j++) {
String wordL = s.substring(0, i);
String wordR = s.substring(j, s.length());
if(!dict.contains(wordL) && !dict.contains(wordR)) {
return -1;
}
}
return 1;
}
}
/**/
class MinimumBreak {
int breaks[];
public int minimumBreaks(Set<String> dic, String word){
breaks = new int[word.length()+1];
for(int i=0;i<breaks.length;i++) breaks[i] = -1;
int result = findBreaks(dic, word, 0);
return result;
}
private int findBreaks(Set<String> dic, String word, int from){
if(dic.contains(word.substring(from))) {
breaks[from] = 0;
}else {
StringBuilder prefix = new StringBuilder(word.length());
int min = Integer.MAX_VALUE;
for (int i = from; i < word.length()-1; i++) {
prefix.append(word.charAt(i));
if (dic.contains(prefix.toString())) {
if (breaks[i + 1] == -1)
breaks[i + 1] = findBreaks(dic, word, i + 1);
min = Math.min(min, breaks[i + 1]+1);
}
}
breaks[from] = min;
}
return breaks[from];
}
}
Time O(s * w^2), space O(s), where s is size of the string to break, and w is max size of the dictionary words.
Time can be improved to O(s * w) if we use a trie for dictionary.
#include<iostream>
#include<vector>
#include<unordered_map>
#include<unordered_set>
using namespace std;
int MinBreaksCount(string const &s, unordered_set<string> const dict, unordered_map<int, int> &memo, int idx = 0)
{
if (idx < 0 ||
idx > s.size())
{
return numeric_limits<int>::max();
}
if (idx == s.size()) {
return 0;
}
auto it = memo.find(idx);
if (it != memo.end()) {
return it->second;
}
int min_count = numeric_limits<int>::max();
for (int i = idx; i < s.size(); ++i) {
if (dict.find(s.substr(idx, i - idx + 1)) != dict.end()) {
int count = MinBreaksCount(s, dict, memo, i + 1);
if (count != numeric_limits<int>::max()) {
if (i != s.size() - 1) {
++count;
}
min_count = min(min_count, count);
}
}
}
memo[idx] = min_count;
return memo[idx];
}
int main () {
unordered_set<string> dict = {"cat", "mat", "ca", "tm", "at", "c", "dog", "og", "do"};
unordered_map<int, int> memo;
cout << MinBreaksCount("catmat", dict, memo) << "\n";
return 0;
}
DICTIONARY = sorted(["Cat", "Mat", "Ca", "tM", "at", "C", "Dog", "og", "Do"], key=lambda x:len(x), reverse=True)
USED = [False for j in range(len(DICTIONARY))]
def find_min_breaks(string, words=()):
if string == '':
return words
min_result = None
for index, dict_word in enumerate(DICTIONARY):
if not USED[index] and string.startswith(dict_word):
USED[index] = True
result = find_min_breaks(string[len(dict_word):], words + (dict_word,))
if result and (min_result is None or len(result) < len(min_result)):
min_result = result
USED[index] = False
return min_result
- Inca Rose November 08, 2017