Google Interview Question
Software EngineersCountry: United States
Interview Type: Phone Interview
Why it is not scalable ?. We can segregate the dictionary by word length.
Map<Integer, HashSet<String>> map; //key - length & value - set of words
and this would also be helpful in decreasing the lookup time in the set.
However, I believe the most efficient way is to use Trie.
The main assumption here is that the character is mistyped not and not left over.
public static void main(String[] args) {
Set<String> set = new HashSet<>();
set.add("vil");set.add("sit");set.add("flick");set.add("pat");set.add("pluck");set.add("sat");set.add("vat");
System.out.println(ladderLength("vit", 3, set));
}
public static List<String> ladderLength(String beginWord, int count, Set<String> wordDict) {
LinkedList<String> queue = new LinkedList<>();
queue.offer(beginWord);
List<String> result = new ArrayList<>();
while(!queue.isEmpty()){
String word = queue.poll();
if(result.size() == count){
break;
}
if (!word.equals(beginWord)) {
result.add(word);
}
char[] arr = word.toCharArray();
for(int i=0; i<arr.length; i++){
for(char c='a'; c<='z'; c++){
char temp = arr[i];
if(arr[i]!=c){
arr[i]=c;
}
String newWord = new String(arr);
if(wordDict.contains(newWord)){
queue.add(newWord);
wordDict.remove(newWord);
}
arr[i]=temp;
}
}
}
return result;
}
public class Example {
public static void main(String[] args) {
Example e = new Example();
List<String> list = new ArrayList<>();
list.add("vil");
list.add("sit");
list.add("flick");
list.add("pat");
list.add("pluck");
list.add("sat");
list.add("vat");
List<String> r = e.getClosestStrings("vit", list, 1);
r.forEach(System.out::println);
}
public List<String> getClosestStrings(String str, List<String> list, int maxDist) {
if (str == null || list == null || list.isEmpty()) return null;
List<String> res = new ArrayList<>();
PriorityQueue<StringDist> pq = new PriorityQueue<>(Comparator.comparingInt(s1 -> s1.dist));
Map<Character, Integer> charCountMap = getCharCountMap(str);
for(String s: list) {
int dist = getDist(getCharCountMap(s), charCountMap);
pq.offer(new StringDist(s, str, dist));
}
while (!pq.isEmpty()) {
StringDist sd = pq.poll();
if (sd.dist > maxDist) break;
res.add(sd.source);
}
return res;
}
private Map<Character, Integer> getCharCountMap(String str) {
Map<Character, Integer> map = new HashMap<>();
for(char c: str.toCharArray()) {
map.put(c, map.getOrDefault(c, 0) + 1);
}
return map;
}
private Integer getDist(Map<Character, Integer> map1, Map<Character, Integer> map2) {
int diff = 0;
for(Map.Entry<Character, Integer> e: map1.entrySet()) {
diff += map2.containsKey(e.getKey()) ? Math.abs(e.getValue() - map2.get(e.getKey())) : e.getValue();
}
return diff;
}
private class StringDist {
String source;
String target;
Integer dist;
public StringDist(String source, String target, Integer dist) {
this.source = source;
this.target = target;
this.dist = dist;
}
}
}
def misspelled_match(input,dictionary):
output = []
n = len(input)
for word in dictionary:
if len(word) == n:
count = 0
for i,e in enumerate(word):
if e not in input:
if count == 0:
count += 1
else:
break
if i == n-1:
output.append(word)
#print(word)
return output
private static List<String> findWordInDictWith1Mistake(List<String> dict, String word) {
List<String> result = new ArrayList<>();
for( String s : dict ) {
int foundMistake=0;
for( int i = 0 ; i < word.length() && foundMistake < 2 ; ++i ) {
if(s.charAt(i) != word.charAt(i))
++foundMistake;
}
if(foundMistake == 1)
result.add(s);
}
return result;
}
def compute_distance(w,q):
assert len(w) == len(q)
dist = 0
for k in range(len(w)):
if w[k] != q[k]:
dist = dist +1
return dist
def miss_splell(words, query):
result = []
for w in words:
if len(w) == len(query):
dist = compute_distance(w, query)
if dist == 1:
result.append(w)
if len(result) == 3:
break
return result
static List<String> findWords(String str)
{
List<String> result = new ArrayList<String>();
int allowedMatchCount = (str.length()/2)+1;
for(String str2:dict1)
{
if(getCountOfMatches(str, str2)>=allowedMatchCount)
{
System.out.println(str2);
result.add(str2);
}
}
return result;
}
static int getCountOfMatches(String str1, String str2)
{
BitSet bs = new BitSet(str1.length());
for(int i=0;i<str1.length();i++)
{
bs.set(i, str1.charAt(i) == str2.charAt(i));
}
return bs.cardinality();
}
I also thought storing all patterns into hash table.
But, there are a lot of cases of the patterns. Using the memory space will increase terribly.
I thought using the Trie could be efficient. So, modified trie.
class ClosetTrie
{
class Node;
typedef unordered_map<char, Node*> _children;
class Node
{
public:
_children children;
~Node()
{
for (pair<const char, Node*>& child : children)
delete child.second;
}
void insert(const char* str)
{
if (*str == '\0')
children['\0'] = NULL;
else
{
_children::const_iterator it = children.find(*str);
Node* child;
if (it == children.end())
{
child = new Node;
children[*str] = child;
}
else
child = it->second;
child->insert(str + 1);
}
}
void find(const char* str, int diff, const string& prefix, list<string>& ret)
{
if (ret.size() >= 3)
return;
_children::const_iterator it = children.find(*str);
if (it != children.end())
{
if (*str == '\0')
{
ret.push_back(prefix);
}
else
it->second->find(str + 1, diff, prefix + *str, ret);
}
if (diff == 0)
return;
const char* next = *str == '\0' ? str : str + 1;
--diff;
for (pair<const char, Node*>& child : children)
{
if(child.first!=*str)
child.second->find(next, diff, prefix + child.first, ret);
}
}
};
Node root;
public:
ClosetTrie(const unordered_set<string>& dict)
{
for (const std::string& str : dict)
root.insert(str.c_str());
}
void find(const char* word, list<string>& ret)
{
root.find(word, 1, "", ret);
}
};
int main()
{
ClosetTrie trie({ "vil", "sit", "flick", "pat", "pluck", "sat", "vat" });
list<string> closets;
trie.find("vit", closets);
return 0;
}
#include <iostream>
#include <string>
#include <set>
#include <map>
#include <vector>
using namespace std;
int main(int argc, char** argv) {
set<string> dict = { "vil", "sit", "flick", "pat", "pluck", "sat", "vat", "vit" };
map<int, vector<string>> myMap;
string result[3];
string input = "vit";
for(auto iter = dict.begin(); iter != dict.end(); ++iter) {
if(input.length() != iter->length()) continue;
int diffCount = 0;
for(int i = 0; i < input.length(); ++i) {
if(input.at(i) != iter->at(i)) diffCount++;
}
myMap[diffCount].push_back(*iter);
}
for(int i = 0; i < 3; ++i) {
result[i] = myMap[1][i];
cout << result[i] << endl;
}
return 0;
}
#include <iostream>
#include <string>
#include <set>
#include <map>
#include <vector>
using namespace std;
int main(int argc, char** argv) {
set<string> dict = { "vil", "sit", "flick", "pat", "pluck", "sat", "vat", "vit" };
map<int, vector<string>> myMap;
string result[3];
string input = "vit";
for(auto iter = dict.begin(); iter != dict.end(); ++iter) {
if(input.length() != iter->length()) continue;
int diffCount = 0;
for(int i = 0; i < input.length(); ++i) {
if(input.at(i) != iter->at(i)) diffCount++;
}
myMap[diffCount].push_back(*iter);
}
for(int i = 0; i < 3; ++i) {
result[i] = myMap[1][i];
cout << result[i] << endl;
}
return 0;
}
#include <iostream>
#include <string>
#include <set>
#include <map>
#include <vector>
using namespace std;
int main(int argc, char** argv) {
set<string> dict = { "vil", "sit", "flick", "pat", "pluck", "sat", "vat", "vit" };
map<int, vector<string>> myMap;
string result[3];
string input = "vit";
for(auto iter = dict.begin(); iter != dict.end(); ++iter) {
if(input.length() != iter->length()) continue;
int diffCount = 0;
for(int i = 0; i < input.length(); ++i) {
if(input.at(i) != iter->at(i)) diffCount++;
}
myMap[diffCount].push_back(*iter);
}
for(int i = 0; i < 3; ++i) {
result[i] = myMap[1][i];
cout << result[i] << endl;
}
return 0;
}
The following code should have run time complexity On^3.
def findClosetWords(list_input,word):
list_ans = []
for input in list_input:
cnt = 0
for char_ in input:
for char in word:
if char == char_:
cnt += 1
if (cnt == len(word)-1) | (cnt == len(word)+1):
list_ans.append(input)
return list_ans
list_input = ['vit','sitt','flick','pii','pluck','sat','vat']
word = "sit"
ans = findClosetWords(list_input,word)
print(ans)
Linear time C++ solution, using a hash set and a heap to collect the nearest results.
The heap contains the 3 results that have the minimum distance of the mistype.
Time complexity is O(n*26*log(3)) = O(n) where n is the length of the word (not of the dictionary, that usually is orders of magnitude bigger).
#include <iostream>
#include <unordered_set>
typedef std::pair<int, string> diffType;
vector<string> findAlternatives(string s, const unordered_set<string> &dict)
{
vector<string> out;
if (dict.empty()) return out;
priority_queue<diffType> pq;
const int maxElements = 3;
for (int i = 0; i < s.length(); ++i)
{
char backupChar = s[i];
for (char c = 'a'; c <= 'z'; ++c)
{
s[i] = c;
if (dict.find(s) != dict.end())
{
pq.push({std::abs(c - backupChar), s});
if (pq.size() > maxElements)
pq.pop();
}
}
s[i] = backupChar;
}
while (!pq.empty())
{
out.push_back(pq.top().second);
pq.pop();
}
return out;
}
int main()
{
std::unordered_set<string> dictSet = {"wil", "vil", "sil",
"pil", "men", "pion", "pal"};
std::vector<string> res = findAlternatives("mil", dictSet);
for (string &s: res)
std::cout << s << " ";
std::cout << std::endl;
}
public static void main(String arg[]){
String[] dic1={"vil","sit","flick","pat","pluck","sat","vat"};
String input="vit";
String[]ans=new String[input.length()];
int track;
int count=0,k=0;
for(int i=0;i<dic1.length;i++){
track=0;
if (dic1[i].length() == input.length()) {
for(int j=0;j<input.length();j++) {
if (dic1[i].charAt(j) == input.charAt(j)) {
track++;
}
}
if(track==0){
}
else if (track== 2) {
count++;
if (count <=3) {
ans[k] = dic1[i];
k++;
}
}
}
}
System.out.println(Arrays.toString(ans));
}
import java.util.*;
import java.lang.*;
import java.io.*;
import static java.util.stream.Collectors.toCollection;
public class MyClass {
static class Dictionary {
private static int compare(String a, String b)
{
if(a.length() > b.length() + 1 || a.length() < b.length())
return -1;
int match = 0;
for(int i = 0; i < a.length(); i++) {
if(a.charAt(i) == b.charAt(i)) {
match ++;
continue;
}
}
return match;
}
public static Set<String> findSimillar(String a, Set<String> s) {
Set<String> set = new TreeSet<>();
for(String m : s) {
int match = compare(a,m);
if(match == a.length()) {
set.clear();
set.add(a);
return set;
}
if(match == a.length() - 1)
set.add(m);
}
if(set.size() > 0) {
return set.stream()
.limit(3)
.collect(toCollection(LinkedHashSet::new));
}
return set;
}
}
public static void main(String args[]) {
Set<String> set = new HashSet<>();
set.add("vil");set.add("sit");set.add("flick");set.add("pat");set.add("pluck");set.add("sat");set.add("vat");
System.out.println(Dictionary.findSimillar("sot",set));
}
}
import java.util.*;
import java.lang.*;
import java.io.*;
import static java.util.stream.Collectors.toCollection;
public class MyClass {
static class Dictionary {
private static int compare(String a, String b)
{
if(a.length() > b.length() + 1 || a.length() < b.length())
return -1;
int match = 0;
for(int i = 0; i < a.length(); i++) {
if(a.charAt(i) == b.charAt(i)) {
match ++;
continue;
}
}
return match;
}
public static Set<String> findSimillar(String a, Set<String> s) {
Set<String> set = new TreeSet<>();
for(String m : s) {
int match = compare(a,m);
if(match == a.length()) {
set.clear();
set.add(a);
return set;
}
if(match == a.length() - 1)
set.add(m);
}
if(set.size() > 0) {
return set.stream()
.limit(3)
.collect(toCollection(LinkedHashSet::new));
}
return set;
}
}
public static void main(String args[]) {
Set<String> set = new HashSet<>();
set.add("vil");set.add("sit");set.add("flick");set.add("pat");set.add("pluck");set.add("sat");set.add("vat");
System.out.println(Dictionary.findSimillar("sot",set));
}
}
Most answers (I did not check them all) scan the collection of words
and check if they differ by 1 character from the input. That may work
fine for the given example, but is not too scalable. Here's my C++
solution:
1. Build a dictionary of patterns that can be matched by the given
words. For example: "?at" => "pat", "sat", "vat".
2. Build all possible patterns from the input word and check
them against this dictionary:
- hb March 11, 2019