Twitter Interview Question
Software Engineer / DevelopersCountry: United States
Interview Type: Phone Interview
Let's say one of the names is "Lady Gaga". Wouldn't this algorithm consider the tweet "I am Gaga for Cocoa Puffs" to match?
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Pattern;
public class StringTweetUserFind {
public static Pattern createPat(String name){
Pattern HEYPATTERN = Pattern.compile(".*"+name+".*");
return HEYPATTERN;
}
static Boolean findName(String tweet)
{
//Populating the list with names
List<String> names = new ArrayList<String>();
names.add("XXX YYY");
names.add("SASDVZZXACC QADFGBADSEBFHDT");
names.add("IIIIIIIII TTTTTTTTT");
Boolean foundFlag = false;
for(String itr:names){
Pattern pattern = createPat(itr);
if (pattern.matcher(tweet).matches()) {
return true;
}
}
return foundFlag;
}
public static void main(String[] args){
String tweet = "Ok folks! Non BJP, non Cong front to be "
+ "SASDVZZXACC QADFGBADSEBFHDT officially 'formed' today. The fun has only just begun! "
+ "A long summer lies ahead! Time to work! Bye";
Boolean output = findName(tweet);
System.out.println(output);
}
}
Use a map, insert all names in map.
Traverse through the tweet (word by word, perhaps using a helper function giving you the next word)
See if the word from tweet is present in the map. Iff yes then return true.
This returns true if any of the names in the list occur in the tweet.
import sys
def isNameInTweet(names, tweet):
# insert names into dictionary
prev = None
nametable = {}
tweetwords = []
for n in names:
nametable[n] = True
# tokenize tweet by word
tweetwords = tweet.split()
print tweetwords
# combine words into pairs in order, lookup in dict
for w in tweetwords:
if not prev:
prev = w
continue
print "looking for " + prev + " " + w
if str(prev + " " + w) in nametable:
return True
prev = w
return False
def main():
names = ['Katy Perry', 'Russell Brand', 'Russel Crowe']
tweet = 'Katy Perry and Russel Brand make a nice couple!'
print str(names) + " " + str(tweet) + " " +str( isNameInTweet(names, tweet))
if __name__ == "__main__":
sys.exit(main())
Sample trie based solution
/**
* Load list of names into trie
* Then compare the suffixes of given tweet with trie if found then return true
*/
public class TwitterTrie {
public TwitterTrie() {
super();
}
public static void main(String[] args) {
TwitterTrie twitterTrie = new TwitterTrie();
TwitterTrie.Trie trie = twitterTrie.new Trie();
String[] strs = { "Prakash", "Swathi", "Rama" };
for (String str : strs) {
trie.insert(str);
}
trie.print();
System.out.println(twitterTrie.found("This is ramakrishna again", trie));
System.out.println(twitterTrie.found("This is gopi again", trie));
}
public boolean found(String tweet, TwitterTrie.Trie root) {
boolean b = false;
for (int i = 0; i < tweet.length(); i++) {
b = root.search(tweet.substring(i));
if (b)
break;
}
return b;
}
class Trie {
private static final int ALPHA_SIZE = 26;
private static final int BUF_SIZE = 1024;
private Trie root;
private Trie[] trieArray;
private char data;
private boolean word;
public Trie() {
super();
}
public void insert(String data) {
Trie root = this.getRoot();
if (root == null) {
root = new Trie(' ');
this.setRoot(root);
}
for (int i = 0; i < data.length(); i++) {
if (root.getTrieArray()[atoi(data.charAt(i))] == null) {
root.getTrieArray()[atoi(data.charAt(i))] = new Trie(data.charAt(i));
}
root = root.getTrieArray()[atoi(data.charAt(i))];
}
root.setWord(true);
}
public void print() {
Trie root = this.getRoot();
if (root == null) {
return;
}
char[] buffer = new char[BUF_SIZE];
int index = 0;
traverse(root, index, buffer);
}
public void traverse(Trie root, int index, char[] buffer) {
if (root == null)
return;
if (root.isWord())
System.out.println(new String(buffer, 0, index));
for (int i = 0; i < ALPHA_SIZE; i++) {
if (root.getTrieArray()[i] != null) {
buffer[index] = root.getTrieArray()[i].getData();
traverse(root.getTrieArray()[i], index + 1, buffer);
}
}
}
public boolean search(String pattern) {
Trie root = this.getRoot();
if (root == null)
return false;
int x = 0;
for (int i = 0; i < pattern.length(); i++) {
if (atoi(pattern.charAt(i)) > 26) {
continue;
} else if (root.getTrieArray()[atoi(pattern.charAt(i))] != null) {
root = root.getTrieArray()[atoi(pattern.charAt(i))];
} else {
break;
}
}
if (root.isWord()) {
return root.isWord();
}
return false;
}
public int atoi(char ch) {
int i = 0;
if (ch >= 65 && ch <= 90)
i = ch - 65;
else if (ch >= 97 && ch <= 122)
i = ch - 97;
else
i = (int)ch;
return i;
}
private Trie(char data) {
this.setData(data);
this.setTrieArray(new Trie[ALPHA_SIZE]);
}
public void setRoot(Trie root) {
this.root = root;
}
public Trie getRoot() {
return root;
}
public void setTrieArray(Trie[] trieArray) {
this.trieArray = trieArray;
}
public Trie[] getTrieArray() {
return trieArray;
}
public void setData(char data) {
this.data = data;
}
public char getData() {
return data;
}
public void setWord(boolean word) {
this.word = word;
}
public boolean isWord() {
return word;
}
}
}
import java.util.ArrayList;
import java.util.List;
public class C {
public static void main(String[] args) {
List<String> names = new ArrayList<String>();
String tweet ="Ankit Jain yesterday met Anuj Jha";
names.add("Ankit Jain");
names.add("Anuj Jha");
for(int i=0;i<names.size();i++){
if(tweet.contains(names.get(i))){
System.out.println(names.get(i));
}
}
}
}
use regex maybe?
use strict;
use warnings;
my @names = qw/ name1 name2 name3 /;
my $tweet = "this is a test tweet with name1";
my $tweet2 = "this is a test tweet without";
sub contains_name{
my $search_tweet = shift;
foreach my $name(@names){
return 1 if $search_tweet =~ m/$name/;
}
return 0;
}
printf("$tweet %s name\n", contains_name($tweet) ? "contains" : "does not contain");
printf("$tweet2 %s name\n", contains_name($tweet2) ? "contains" : "does not contain");
Build a suffix tree of all the names in the dictionary, since this is preprocessing, we will not take any time during execution, as we run through the text check if each word exists in the suffix tree, if it does return true else return false, this will take O(n*k) time where k = average length of a word and n = no of words...which implies O(n) for large sentences.
- Praveen August 17, 2013