Yelp Onsite 31-40
P31 链接 问题是 edit distance的变形题。就是给 两个string (e.g. 'query', 'quray'),然后有三个打分 (类似与 edit distance 的 insert, replace, delete),但每个分数不同,然后叫你算出最小值。反正DP类问题,不难。
161 One Edit Distance (Medium)
class Solution {
public:
bool isOneEditDistance(string s, string t) {
if (s.size() < t.size()) swap(s, t);
int m = s.size(), n = t.size(), diff = m-n;
if (diff >= 2) return false;
else if (diff == 1) {
for (int i = 0; i < n; ++i) {
if (s[i] != t[i]) return s.substr(i+1) == t.substr(i);
}
return true;
} else {
int cnt = 0;
for (int i = 0; i < n; ++i) {
if (s[i] != t[i]) ++cnt;
}
return cnt == 1;
}
}
};
class Solution {
public:
bool isOneEditDistance(string s, string t) {
for (int i = 0; i < min(s.size(), t.size()); ++i) {
if (s[i] != t[i]) {
if (s.size() == t.size()) return s.substr(i+1) == t.substr(i+1);
else if (s.size() > t.size()) return s.substr(i+1) == t.substr(i);
else return s.substr(i) == t.substr(i+1);
}
}
return abs((int)s.size() - (int)t.size()) == 1;
}
};
我觉得这个也挺难的呀 72 Edit Distance (Hard)
delete = dp[i-1][j] + 1
add = dp[i][j-1] + 1
replace = dp[i-1][j-1] + 1
class Solution {
public:
int minDistance(string word1, string word2) {
int m = word1.size(), n = word2.size();
vector<vector<int>> dp(m+1, vector<int>(n+1));
for (int i = 0; i <= m; ++i) dp[i][0] = i;
for (int i = 0; i <= n; ++i) dp[0][i] = i;
for (int i = 1; i <= m; ++i) {
for (int j = 1; j <= n; ++j) {
if (word1[i-1] == word2[j-1]) dp[i][j] = dp[i-1][j-1];
else dp[i][j] = min(dp[i-1][j-1], min(dp[i][j-1], dp[i-1][j]))+1;
}
}
return dp[m][n];
}
};
P32 给你1,2,3,4,5个business id, 这里面的business可能是duplicates, 如果是,把它们merge,然后每次只返回最小的那个id. e.g. 1,3,5都是重复的,如果输入3,5 都会返回1。然后他给你三个function 分别是, mark_business(id1,id2), represent(id), compare(id1,id2)。不是的,可以用union find的方法,或者更好的话DFS with strong connectivity,这也是我后来发现的。
#include <vector>
#include <iostream>
using namespace std;
class Solution {
public:
void reduce(vector<vector<int>>& ids, int n) {
root.resize(n);
for (int i = 0; i < n; ++i) root[i] = i;
for (auto& a: ids) {
int i = find(a[0]), j = find(a[1]);
if (i != j) {
if (i < j) root[j] = i;
else root[i] = j;
}
}
}
void mark_business(int id1, int id2) {
int i = find(id1), j = find(id2);
if (i != j) {
if (i < j) root[j] = i;
else root[i] = j;
}
}
int represent(int id) {
return find(id);
}
private:
int find(int id) {
while (root[id] != id) {
id = root[id];
}
return id;
}
vector<int> root;
};
int main() {
Solution s;
vector<vector<int>> ids = {{1,2},{2,3},{5,6}};
s.reduce(ids, 9);
for (int i = 0; i < 9; ++i) cout << i << ": " << s.represent(i) << endl;
return 0;
}
P33 给一组integer数据, 和 target num, 返回这组数据 +,-,x, /能不能得到 target num,每个数用一次。 请问下楼主,第四题中数组中的数可以变换次序么?另外表达式之间可以加括号么?应该都可以的。 有点像算24点拓展到n张牌任意target
下面以n张牌为例,叙述该算法。
第一步:抽取n张牌当中的任意两张牌,对其做加减乘除运算,将这两个数四则运算过后的结果作为一个数,与其余n-2个数一起构成n-1个数据。
第二步:将得到的n-1组数据,重复第一步的操作。
一二步不断循环,直到n=1,这个时候所有的可能全部计算完毕,输出结果。
version 1: py
class Solution(object):
def ifTarget(self, nums, target):
return self.dfs(nums, '', target)
def dfs(self, nums, path, target):
if len(nums)==1:
if nums[0]==target:
print(path)
return True
else:
False
for i in range(len(nums)-1):
for j in range(i+1, len(nums)):
a, b = nums[i], nums[j]
candidates = [a+b, a-b, a*b, b-a] + ([b/a] if a else []) + ([a/b] if b else [])
for c in candidates:
if self.dfs(nums[:i]+nums[i+1:j]+nums[j+1:] + [c], path + str(a) + str(b), target):
return True
return False
nums = [1,2,3,4]
so = Solution()
ans = so.ifTarget(nums, 24)
print(ans)
version 2: c++
#include <string>
#include <vector>
#include <cmath>
#include <iostream>
using namespace std;
class Solution {
public:
bool ifTarget(vector<double>& nums, vector<string>& expression, int target, int n) {
if (n == 1) {
if (fabs(nums[0]-target) < PRECISION) {
cout << expression[0] << endl;
return true;
}
return false;
}
for (int i = 0; i < n; ++i) {
for (int j = i+1; j < n; ++j) {
int a = nums[i], b = nums[j];
nums[j] = nums[n-1]; // replace with the last
string expa = expression[i], expb = expression[j];
expression[j] = expression[n-1];
// '+'
expression[i] = '('+expa+'+'+expb+')';
nums[i] = a+b;
if (ifTarget(nums, expression, target, n-1)) return true;
// '-': a-b, b-a
expression[i] = '(' + expa + '-' + expb + ')';
nums[i] = a - b;
if (ifTarget(nums, expression, target, n-1)) return true;
expression[i] = '(' + expb + '-' + expa + ')';
nums[i] = b - a;
if (ifTarget(nums, expression, target, n-1)) return true;
// '*'
expression[i] = '('+expa+'*'+expb+')';
nums[i] = a*b;
if (ifTarget(nums, expression, target, n-1)) return true;
// '/': a/b, b/a
if (b) {
expression[i] = '(' + expa + '/' + expb + ')';
nums[i] = a / b;
if (ifTarget(nums, expression, target, n-1)) return true;
}
if (a) {
expression[i] = '(' + expb + '/' + expa + ')';
nums[i] = b/a;
if (ifTarget(nums, expression, target, n-1)) return true;
}
// 恢复数组
nums[i] = a;
nums[j] = b;
expression[i] = expa;
expression[j] = expb;
}
}
return false;
}
private:
const double PRECISION = 1E-6;
};
int main() {
Solution s;
int cnt = 0, x = 0;
cin >> cnt;
vector<double> nums;
vector<string> expression;
for (int i = 0; i < cnt; ++i) {
cin >> x;
nums.push_back(x);
expression.push_back(to_string(x));
}
s.ifTarget(nums, expression, 24, 4);
return 0;
}
P34 讲intern project,biggest challenge 是什么。 链接 问了database, index。 如何用几句话讲明白什么是index coding: maximal square
221 Maximal Square
class Solution {
public:
int maximalSquare(vector<vector<char>>& matrix) {
if (matrix.empty() || matrix[0].empty()) return 0;
int m = matrix.size(), n = matrix[0].size();
vector<vector<int>> dp(m, vector<int>(n,0));
int res = 0;
for (int i = 0; i < m; ++i) {
for (int j = 0; j < n; ++j) {
if (matrix[i][j] == '1') {
int tmp = min(i>0 ? dp[i-1][j]:0, j>0 ? dp[i][j-1]:0);
tmp = min(tmp, (i>0 && j>0) ? dp[i-1][j-1]:0);
dp[i][j] = tmp+1;
res = max(res, dp[i][j]);
}
}
}
return res*res;
}
};
P35 大概讲讲实习经历。问了inverted index。 coding: Word Serach 2, word search 给个board 里面有字母,给个dictionary,search board里面上下左右走能不能找到这个词 做过了P13题
P36 聊简历,why yelp? 为什么你适合这个职位? 讲intern project,问garbage collection如何实现? 问了database index 是什么?如果有一个query 非常慢,为什么? 怎么找到原因。如何优化?如果是因为index的问题,解释为什么index影响query 速度。 coding: data stream return top k most frequent items (strings)
import heapq
class Solution(object):
def __init__(self, k):
self.q = []
self.dic = {}
self.k = k
def add(self, num):
if num not in self.dic:
self.dic[num] = 1
else:
self.dic[num] += 1
cnt = self.dic[num]
if self.q and (cnt-1,num) in self.q:
self.q.remove((cnt-1,num))
self.q.append((cnt,num))
heapq.heapify(self.q)
else:
if len(self.q)<self.k:
heapq.heappush(self.q, (cnt, num))
else:
if cnt>self.q[0][0]:
heapq.heappop(self.q)
heapq.heappush(self.q, (cnt, num))
return self.q
nums = [1,1,1,1,2,2,3,4,5,6,4,4,4,4,4,4]
so = Solution(3)
for i in nums:
ans = so.add(i)
print ans
或者用set of vector
#include <vector>
#include <string>
#include <unordered_map>
#include <unordered_set>
#include <queue>
#include <algorithm>
#include <iostream>
using namespace std;
class Solution {
public:
Solution(int x): k(x) {}
void add(int num) {
++m[num];
// not in the topk set
if (!ks.count(num)) {
if (topk.size() < k) {
topk.push({m[num], num});
ks.insert(num);
} else {
if (m[num] > topk.top().first) {
ks.erase(topk.top().second);
ks.insert(num);
topk.pop();
topk.push({m[num], num});
}
}
} else {
vector<pair<int,int>> tmp;
while (topk.top().second != num) {
tmp.push_back(topk.top()); topk.pop();
}
// pop old num
topk.pop(); topk.push({m[num], num});
for (auto& a: tmp) {
topk.push(a);
}
}
}
void printTopK() {
vector<pair<int,int>> tmp;
while (!topk.empty()) {
cout << topk.top().second << ": " << topk.top().first << endl;
tmp.push_back(topk.top()); topk.pop();
}
for (auto& a: tmp) {
topk.push(a);
}
}
private:
int k;
unordered_map<int,int> m;
// minheap
priority_queue<pair<int,int>, vector<pair<int,int>>, greater<pair<int,int>>> topk;
unordered_set<int> ks;
};
int main() {
Solution s(3);
vector<int> nums = {1,1,1,1,2,2,3,4,5,6,4,4,4,4,4,4};
for (auto& n: nums) {
s.add(n);
s.printTopK();
cout << endl;
}
}
P37 yelp有哪里需要improve?如何improve?需要收集什么数据?如何test 你的方法有效? 如何检查fraud,安全漏洞,或者安全问题。 coding: 给一个字符串“aBc1D2”, 其中的大小写可能不对,bool is_valid(string s) 函数,找到正确唯一的valid的字符串。 这都是什么面经啊!完全没懂什么意思!
找出所有可能的,然后用那个isvalid来求出那一个就行了?
#include <vector>
#include <string>
#include <algorithm>
#include <iostream>
using namespace std;
class Solution {
void dfs(string& word, int pos, string& res) {
if (pos == word.size()) {
if (isValid(word)) res = word;
return;
}
if (word[pos] >= '0' && word[pos] <= '9') dfs(word, pos+1, res);
else {
word[pos] = tolower(word[pos]);
dfs(word, pos+1, res);
word[pos] = toupper(word[pos]);
dfs(word, pos+1, res);
}
}
public:
Solution(string s): validStr(s) {}
bool isValid(string& s) {
return s == validStr;
}
string combination(string word) {
string res;
dfs(word, 0, res);
return res;
}
private:
string validStr;
};
int main() {
Solution s("abc1d2");
cout << s.combination("aBc1D2") << endl;
}
P38 link 这部分之后是代码,leetcode的LRU cache,让我主要实现插入的逻辑。我就是hashmap加linked list,然后问了一下优化,说记录链表的尾巴可以加速插入。
version 1: py
import collections
class LRUCache(object):
def __init__(self, capa):
self.dic = collections.OrderedDict()
self.capa = capa
def get(self, key):
if key in self.dic:
value = self.dic.pop(key)
self.dic[key] = value
return value
def set(self, key, val):
if key in self.dic:
self.dic.pop(key)
elif len(self.dic)==self.capa:
self.dic.popitem(last=False)
self.dic[key]=val
so = LRUCache(5)
so.set(1,1)
so.set(2,2)
so.set(3,3)
so.set(4,4)
so.set(5,5)
so.set(6,6)
so.set(7,7)
print so.dic
version 2: c++
class LRUCache{
public:
LRUCache(int capacity): cap(capacity) {}
int get(int key) {
auto it = m.find(key);
if (it == m.end()) return -1;
l.splice(l.begin(), l, it->second);
return it->second->second;
}
void set(int key, int value) {
auto it = m.find(key);
if (it != m.end()) l.erase(it->second);
l.push_front(make_pair(key, value));
m[key] = l.begin();
if (m.size() > cap) {
int k = l.rbegin()->first;
l.pop_back();
m.erase(k);
}
}
private:
int cap;
list<pair<int, int>> l;
unordered_map<int, list<pair<int, int>>::iterator> m;
};
P39 然后是一个华人女性,主要问了我缓存机制。然后问了给了好多课表,然后有先修课要求修完先修才能修后面的,就是一个dependency graph,然后考的就是topological sort。用一个hashmap记录每门课的indegree,码完问了一下时间复杂度。说是$$O(n^2)$$,n是节点个数,这里不同的课程数。
210 Course Schedule II
vector<int> findOrder(int numCourses, vector<pair<int, int>>& prerequisites) {
vector<vector<int>> graph(numCourses, vector<int>(0));
vector<int> in(numCourses, 0);
vector<int> path(numCourses);
queue<int> q;
for (auto pair: prerequisites) {
graph[pair.second].push_back(pair.first);
++in[pair.first];
}
for (int i = 0; i < numCourses; ++i) {
if (in[i] == 0) q.push(i);
}
int cur = 0;
while (!q.empty()) {
int t = q.front(); q.pop();
path[cur++] = t;
for (auto a: graph[t]) {
if (--in[a] == 0) q.push(a);
}
}
for (int i = 0; i < numCourses; ++i) {
if (in[i] != 0) return vector<int>();
}
return path;
}
P40 第三个是一个manager,白人小哥。大哥感觉不是很钻技术的,上来主要跟我讲我是一个manager,主要和人打交道,然后问了问偏behavioral的像是平常做过项目里那次最challenging啊,实习里做的项目我没用过你能给我讲明白么,让你再做一次实习项目你会如何改进。说完之后问了一个很简单的anagram的题,就是找一堆单词里哪两个是anagram。sort单词以后作为key然后hash就行了。
#include <vector>
#include <string>
#include <algorithm>
#include <unordered_map>
#include <iostream>
using namespace std;
class Solution {
public:
vector<string> findAnagram(vector<string>& words) {
unordered_map<string,string> m;
for (auto& w: words) {
string tmp = w;
sort(tmp.begin(), tmp.end());
if (m.count(tmp)) return {m[tmp], w};
m[tmp] = w;
}
return {};
}
};
int main() {
Solution s;
vector<string> words = {"abc", "aaa", "ccc", "bca"};
for (auto& a: s.findAnagram(words)) cout << a << " ";
cout << endl;
return 0;x
}
P40 第四个是一个英国小哥,在公司十年了,感觉是tech lead,然后前面也是客套几句介绍公司,然后问了我一个网站如果相应速度很慢如何解决。上别的网课讲了如何提高网站性能,然后我就基本照着“当在地址栏里输入网址发生了什么”里面每一个步骤将可能发生的问题和相应的解决方案,说了很多,感觉他还很满意。然后问了我一个Word count的题目,要求求出一个单词stream里面最常出现的前十个,先说一个弱弱的把stream里单词变成键值对(key是单词value是出现次数)然后sort。问更好方法,说了用min heap,变成键值对后再放进堆里,堆深度一定时间变成线性的。 然后HR姐姐进来问了我是否有offer,然后问了细节。最让我开心的是她问了我有没有人可以做reference check,因为之前看到面经里表现不错要发offer的结束后都问了reference所以觉得应该还不错。送走之后等了两周HR打电话恭喜我拿到offer。 这道题写过,就不再写了。
见P36
import heapq
class Solution(object):
def __init__(self, k):
self.q = []
self.dic = {}
self.k = k
def add(self, num):
if num not in self.dic:
self.dic[num] = 1
else:
self.dic[num] += 1
cnt = self.dic[num]
if self.q and (cnt-1,num) in self.q:
self.q.remove((cnt-1,num))
self.q.append((cnt,num))
heapq.heapify(self.q)
else:
if len(self.q)<self.k:
heapq.heappush(self.q, (cnt, num))
else:
if cnt>self.q[0][0]:
heapq.heappop(self.q)
heapq.heappush(self.q, (cnt, num))
return self.q
nums = ['a','a','a','a','a','b','c','d','e','f','d','d','d','d','d','d']
so = Solution(3)
for i in nums:
ans = so.add(i)
print ans