Amazon Interview Question
SDE1sCountry: United States
Interview Type: Phone Interview
There is a n^3 answer. n^2 possible substring with cost n to check if the substring appears again.
def max_substring(s):
long_repeated = ""
for x in xrange(len(s)):
for y in xrange(x+1, len(s)+1):
t = s[x:y]
if t in s[x+1:] and len(t) > len(long_repeated):
long_repeated = t
return long_repeated
extract all suffixes, sort them and check suffix[i] and suffix [i+1]... I can imagine that's something one can develop in an interview.
--
for banana:
suffixes[]= {anana, nana, ana, na, a}, sorted: {anana, ana, a, nana, na}
anana -> ana: 3
ana -> a: 1
nana -> na: 2
tehre are:
--
to create suffixes O(n)
to sort suffixes O(n*n*lg(n))
to compare suffixes (O(n^2))
total: O(n^2*lg(n))
/*
an O(n^2) solution based on [en.wikipedia.org/wiki/Longest_common_substring_problem]:
the summarized idea is that the longest common sub string of two strings a, b is as well the longest
common suffix(LCS) of ALL prefixes of a and b.
the longest common suffix of two strings a, b is :
LCS(a, b, i, j) = LCS(a, b, i - 1, j - 1) + 1 if a[i] == b[j]
LCS(a, b, len(a), len(b))
the longest common substring is therefore:
max(LCS(a, b, k, l) for all 1 < k <= len(a), and all 1 < l <= len(b)
if a = b, the longest common substring would actually be a.
But I assume the question asks for all sub strings that are not prefixes or suffixes of a,
assuming "banana" is a suffix and prefix and substring of "banana".
therefore the valid prefixes must not start at the same index in the given string:
max(LCS(a, a, k, l)) for all 1 < k <= len(a) and all 1 < l <= len(b) where k != l
*/
string maxRepeatingSubstring(const string& str)
{
// that is the max common substring where start and end isn't equal
size_t n = str.length();
vector<vector<size_t>> dp(n + 1, vector<size_t>(n + 1, 0));
size_t max_start = 0;
size_t max_len = 0;
for (size_t i = 0; i < n; ++i) {
for (size_t j = 0; j < n; ++j) {
if (i == j) continue; // that would be the same start
if (str[i] == str[j]) {
size_t cur_len = dp[i][j] + 1;
dp[i + 1][j + 1] = cur_len;
if (cur_len > max_len) {
max_start = i - cur_len + 1;
max_len = cur_len;
}
}
}
}
if (max_len > 0) return str.substr(max_start, max_len);
else return string();
}
int main()
{
cout << maxRepeatingSubstring("banana") << endl;
cout << maxRepeatingSubstring("aaa") << endl;
}
import java.util.Set;
import java.util.HashSet;
public class LongestRepeatingSubstring
{
public static void main(String[] args)
{
LongestRepeatingSubstring l = new LongestRepeatingSubstring();
System.out.println(l.find("banana"));
System.out.println(l.find("abcdefg"));
System.out.println(l.find("aaaaaaa"));
System.out.println(l.find(""));
}
public String find(String s)
{
String max = "";
String localMax = "";
for(int i=1;i<s.length();i++)
{
localMax = helper(s, i);
if(localMax.length()>max.length())
max = localMax;
}
return max;
}
private String helper(String s, int windowSize)
{
String res = "";
Set<String> set = new HashSet<>();
for(int i=0;i<s.length()-windowSize+1;i++)
{
String temp = s.substring(i, i+windowSize);
if(set.contains(temp))
{
res = temp;
}
else
{
set.add(temp);
}
}
return res;
}
}
We can use a trie. O(N^2) time and space.
#include <iostream>
#include <unordered_map>
using namespace std;
class Node {
public:
Node(char c, Node *parent)
{
c_ = c;
parent_ = parent;
}
Node()
{
c_ = 0;
parent_ = NULL;
}
Node *GetChild(char c) const
{
auto it = children_.find(c);
return (it == children_.end()) ? NULL : it->second;
}
Node *AddChild(char c)
{
Node *n = GetChild(c);
if (!n) {
n = new Node(c, this);
children_[c] = n;
}
return n;
}
char GetCharacter() const
{
return c_;
}
Node *GetParent() const
{
return parent_;
}
private:
char c_;
unordered_map<char, Node*> children_;
Node *parent_;
};
class Trie {
public:
Node *GetRoot()
{
return &root_;
}
private:
Node root_;
};
string LargestRepeatingSubstring(string const &s)
{
Trie trie;
int max_size = 0;
Node *terminal_node = NULL;
for (int i = 0; i < s.size(); ++i) {
Node *n = trie.GetRoot();
int size = 0;
int substring_broken = false;
for (int j = i; j < s.size(); ++j) {
Node *next_node = n->GetChild(s[j]);
if (next_node) {
if (!substring_broken &&
++size > max_size)
{
max_size = size;
terminal_node = next_node;
}
} else {
substring_broken = true;
next_node = n->AddChild(s[j]);
}
n = next_node;
}
}
string out;
for (Node *n = terminal_node; n != NULL; n = n->GetParent()) {
out += n->GetCharacter();
}
reverse(out.begin(), out.end());
return out;
}
int main() {
cout << LargestRepeatingSubstring("banana") << "\n";
}
The question has problem, the trivial solution is to have a substring with size 1. If we do ignore the substring of size 1, then :
- NoOne August 22, 2017