Amazon Interview Question
Web DevelopersCountry: United States
public static List<Integer>findIndicesSort(String haystack, String needle){
List<Integer> anagramIndices=new ArrayList<Integer>();
char[] haystackChars=haystack.toCharArray();
char[] needleChars=needle.toCharArray();
Arrays.sort(needleChars);
char[] movingWindow;
for(int i=0;i<haystackChars.length-needleChars.length+1;i++){
movingWindow=Arrays.copyOfRange(haystackChars,i,i+needleChars.length);
Arrays.sort(movingWindow);
if(Arrays.equals(needleChars, movingWindow))
anagramIndices.add(i);
}
return anagramIndices;
}
There are multiple ways to do it. One is to sort the strings and then compare them.
Complexity will be m * n log(n) where haysack contains m strings. Some optimizations can be made as to first check the length of the string in haysack before sorting it.
Another is to use use an array of 26 and then marking the chars with 1 if in needle. Use it like a hash map and also maintain another array of 26 chars for duplicates.
short[] charCount = new short[26]; // assuming all chars are same case
short[] needleCount = new short[26];
so for needle = "abbdd" we will have needleCount = [1,2,0,2,....0,0,0]
we calculate charCount in same way for every string in haysack whose length is same.
Not sure if a better solution is there. If no duplicates are allowed then we can use BitSet and can xor them to check if both are same or not.
The trivial O(n^2) solution is for each index `i` in haystack iterate to `j` (where `j` == `i` + `needle.length`) and determine if the substring(i,j) is an anagram of needle.
This is an O(n) solution:
function clone (target) {
var o = {}
for (var key in target) {
if (({}).hasOwnProperty.call(target, key)) {
o[key] = target[key]
}
}
return o
}
module.exports = function (needle, haystack) {
var i, j, NEEDLE = {}
var solutions = []
for (i = 0; i < needle.length; ++i) {
if (NEEDLE[needle.charAt(i)]) {
NEEDLE[needle.charAt(i)]++
} else {
NEEDLE[needle.charAt(i)] = 1
}
}
i = j = 0
var l = needle.length
for (; i <= haystack.length - l;) {
var o = clone(NEEDLE)
var char = haystack.charAt(i)
// if the character is not in needle then increment
// `i` and continue this does not start a possible
// anagram
if (!o[char]) {
i++
continue
}
o[char]--
var valid = true // is this substring from i, j of length l
// a valid anagram
for (j = 1; j < l;) {
char = haystack.charAt(i + j)
if (!o[char]) {
valid = false
break
}
o[char]--
j++
}
if (!valid) {
i++
continue
}
solutions.push(i)
// The current substring haystack(i,j) of length l
// is a valid anagram
// it stands to reason that if charAt(j + 1) === charAt(i)
// then that is a valid anagram too.
j = i + l - 1
while (haystack.charAt(j + 1) === haystack.charAt(i) && j + 1 < haystack.length) {
i++
j++
solutions.push(i)
}
i++
}
return solutions
}
var needle = 'abc'
var haystack = 'dabcabebacab'
var solutions = module.exports(needle, haystack)
console.log(solutions)
My Python solution
from copy import deepcopy
def stringcompare(haystack,needle):
output = []
needle_dict = {}
length = len(needle)
for c in needle:
if c in needle_dict.keys():
needle_dict[c] += 1
else:
needle_dict[c] = 1
for i in range(len(haystack)-len(needle)+1):
test = deepcopy(needle_dict)
if(compare(haystack[i:i+length],test)):
output.append(i)
print(output)
def compare(arr,dict):
for c in arr:
if(c in dict.keys()):
dict[c] -= 1
if dict[c] < 0:
return False
else:
return False
return True
def main():
haystack = 'abcdefghibcdajklmndcba'
needle = 'bcda'
stringcompare(haystack, needle)
if __name__ == '__main__':
main()
#include <stdio.h>
char haystack[] = "abcdefghibcdajklmndcba";
char needle[] = "bcda";
int needleset[4] = {0,};
int sizehaystack = 0;
int needlesize = 0;
int start = 0;
void allsetneedset()
{
for(int i=0;i<needlesize;i++)
{
needleset[i] = 1;
}
}
void setneedset(int index)
{
needleset[index] = 1;
}
int clrneedset(int index)
{
if(!needleset[index])
return 0;
else
{
needleset[index] = 0;
return 1;
}
}
void allclrneedset()
{
for(int i=0;i<needlesize;i++)
{
needleset[i] = 0;
}
}
int isallclrneedset()
{
int ret = 1;
for(int i=0;i<(needlesize-1);i++)
{
if(needleset[i] == 1)
{
ret = 0;
break;
}
}
if(ret != 0)
{
if(needleset[needlesize-1] == 0)
ret = 1;
else
ret = 0;
}
return ret;
}
int isallsetneedset()
{
int ret = 1;
for(int i=0;i<(needlesize-1);i++)
{
if(needleset[i] == 0)
{
ret = 0;
break;
}
}
if(ret != 0)
{
if(needleset[needlesize-1] == 1)
ret = 1;
else
ret = 0;
}
return ret;
}
void stringcompare(char haystack[], char needle[])
{
while(1)
{
if(haystack[sizehaystack] == '\0')
break;
sizehaystack++;
}
while(1)
{
if(needle[needlesize] == '\0')
break;
needlesize++;
}
allsetneedset();
//printf("size:%d", sizehaystack);
printf("[");
for(int i=0; i<sizehaystack; i++)
{
if(isallsetneedset())
{
start = i;
}
for(int j=0; j<needlesize; j++)
{
if(haystack[i] == needle[j])
{
if(clrneedset(j)) // success
{
if(isallclrneedset())
{
allsetneedset();
printf("%d",start);
start = i;
}
break;
}
else
{
allsetneedset();
break;
}
}
}
}
printf("]");
}
int main(int argc, const char * argv[]) {
// insert code here...
printf("Hello, World!\n");
stringcompare(haystack, needle);
return 0;
}
and
#include <stdio.h>
char haystack[] = "abcdefghibcdajklmndcba";
char needle[] = "bcda";
int needleset[4] = {0,};
int sizehaystack = 0;
int needlesize = 0;
int start = 0;
void allsetneedset()
{
for(int i=0;i<needlesize;i++)
{
needleset[i] = 1;
}
}
void setneedset(int index)
{
needleset[index] = 1;
}
int clrneedset(int index)
{
if(!needleset[index])
return 0;
else
{
needleset[index] = 0;
return 1;
}
}
void allclrneedset()
{
for(int i=0;i<needlesize;i++)
{
needleset[i] = 0;
}
}
int isallclrneedset()
{
int ret = 1;
for(int i=0;i<(needlesize-1);i++)
{
if(needleset[i] == 1)
{
ret = 0;
break;
}
}
if(ret != 0)
{
if(needleset[needlesize-1] == 0)
ret = 1;
else
ret = 0;
}
return ret;
}
int isallsetneedset()
{
int ret = 1;
for(int i=0;i<(needlesize-1);i++)
{
if(needleset[i] == 0)
{
ret = 0;
break;
}
}
if(ret != 0)
{
if(needleset[needlesize-1] == 1)
ret = 1;
else
ret = 0;
}
return ret;
}
void stringcompare(char haystack[], char needle[])
{
while(1)
{
if(haystack[sizehaystack] == '\0')
break;
sizehaystack++;
}
while(1)
{
if(needle[needlesize] == '\0')
break;
needlesize++;
}
allsetneedset();
//printf("size:%d", sizehaystack);
printf("[");
for(int i=0; i<sizehaystack; i++)
{
if(isallsetneedset())
{
start = i;
}
for(int j=0; j<needlesize; j++)
{
if(haystack[i] == needle[j])
{
if(clrneedset(j)) // success
{
if(isallclrneedset())
{
allsetneedset();
printf("%d",start);
start = i;
}
break;
}
else
{
allsetneedset();
break;
}
}
}
}
printf("]");
}
int main(int argc, const char * argv[]) {
// insert code here...
printf("Hello, World!\n");
stringcompare(haystack, needle);
return 0;
}
and
private static bool equalCharactersArray(int[] list1, int[] list2)
{
for (int i = 0; i < list1.Length; i++)
{
if (list1[i] != list2[i])
{
return false;
}
}
return true;
}
public static List<int> findIndicesOfString(String haystack, String needle)
{
int count = 0;
int n = haystack.Length;
int m = needle.Length;
List<int> anagramIndices = new List<int>();
if (n < m | m == 0 | m == 0)
{
return anagramIndices;
}
//array of count of all characters
int[] textHist = new int[256];
int[] patHist = new int[256];
//initial histogram window of size m
int i = 0;
for (i = 0; i < m; i++)
{
patHist[needle[i]]++;
textHist[haystack[i]]++;
}
//search
do
{
if (equalCharactersArray(textHist, patHist))
{
Console.WriteLine("anagram found : " + haystack.Substring(i - m, needle.Length));
anagramIndices.Add(i-m);
count++;
}
//move window by 1 position to the right and check for anagram in the next pattern
textHist[haystack[i]]++;
textHist[haystack[i - m]]--;
} while (++i < n);
return anagramIndices;
}
def get_counter(s):
d = {}
for c in list(s):
if c in d:
d[c] += 1
else:
d[c] = 1
return d
def get_agram_idx(s, h):
if len(s) > len(h):
return None
if s is None or h is None:
return None
cc = get_counter(s)
agram_idx = []
agram_found = False
for i in range(len(h) +1 - len(s)):
#short circuit logic, just check if new char is same as old
if agram_found:
if h[i-1] == h[i+len(s)-1]:
agram_idx.append(i)
else:
agram_found = False
continue
c = cc.copy()
for j in range(len(s)):
if h[i+j] not in c:
break
c[h[i+j]] -= 1
if set(c.values()) == set([0]):
agram_idx.append(i)
return agram_idx
This solution is not fully correct, even though it's the most upvoted answer. Try this example and it will break: System.out.println(findAnagrams("BAEED", "AB"));
Here's the corrected code:
//private static int primes[] = new int[]{2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27};
public static Collection<String> findAnagrams(String haystack, String needle){
int hashNeedle = hash(needle);
Collection<String> anagrams = new ArrayList<>();
Map<Integer, Integer> hashes = new HashMap<>();
int index = 0, hashHayStack = 1;
for(char ch : haystack.toLowerCase().toCharArray()){
hashHayStack *= primes[ch - 'a'];
if(hashHayStack%hashNeedle == 0
&& hashes.containsKey(hashHayStack/hashNeedle)){
int startIndex = index - (needle.length() - 1);
anagrams.add(haystack.substring(startIndex, startIndex + needle.length()));
}
hashes.put(hashHayStack, index++);
}
if (hash(haystack.substring(0, needle.length())) == hashNeedle) {
anagrams.add(needle);
}
return anagrams;
}
private static int hash(String word){
int hash = 1;
word = word.toLowerCase();
for(int i = 0; i< word.length(); i++){
int index = word.charAt(i) - 'a';
hash *= primes[index];
}
return hash;
}
This solution is not fully correct, even though it's the most upvoted answer. Try this example and it will break: System.out.println(findAnagrams("BAEED", "AB"));
Here's the corrected code:
//private static int primes[] = new int[]{2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27};
public static Collection<String> findAnagrams(String haystack, String needle){
int hashNeedle = hash(needle);
Collection<String> anagrams = new ArrayList<>();
Map<Integer, Integer> hashes = new HashMap<>();
int index = 0, hashHayStack = 1;
for(char ch : haystack.toLowerCase().toCharArray()){
hashHayStack *= primes[ch - 'a'];
if(hashHayStack%hashNeedle == 0
&& hashes.containsKey(hashHayStack/hashNeedle)){
int startIndex = index - (needle.length() - 1);
anagrams.add(haystack.substring(startIndex, startIndex + needle.length()));
}
hashes.put(hashHayStack, index++);
}
if (hash(haystack.substring(0, needle.length())) == hashNeedle) {
anagrams.add(needle);
}
return anagrams;
}
private static int hash(String word){
int hash = 1;
word = word.toLowerCase();
for(int i = 0; i< word.length(); i++){
int index = word.charAt(i) - 'a';
hash *= primes[index];
}
return hash;
}
public static Collection<String> findAnagrams2(String haystack, String needle){
Collection<String> anagrams = new ArrayList<>();
int hashNeedle = hash(needle);
int boxCarHash = hash(haystack.substring(0, needle.length()));
char[] haystackCharArr = haystack.toLowerCase().toCharArray();
System.out.println("Needle hash is: " + hashNeedle + " and boxcar is: " + boxCarHash);
for (int index = 0; index + needle.length() <= haystack.length(); index++) {
System.out.println(index + "." + haystack.substring(index, index + needle.length()) + ", boxcar=" + boxCarHash);
if (boxCarHash == hashNeedle) {
anagrams.add(haystack.substring(index, index + needle.length()));
}
if (index + needle.length() < haystack.length()) {
boxCarHash /= primes[haystackCharArr[index] - 'a'];
boxCarHash *= primes[haystackCharArr[index + needle.length()] - 'a'];
}
}
return anagrams;
}
Find anagrams using prime hash
- Adnan Ahmad January 23, 2017