-
Notifications
You must be signed in to change notification settings - Fork 24
/
Copy pathTop_K_Frequent_element_algorithm.cpp
355 lines (332 loc) · 11.8 KB
/
Top_K_Frequent_element_algorithm.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
// Just Sorting
// The easiest way to think of this problem and easy to implement.
// Time complexity: O(nlogn), naive sort is o(nlogn)
// Space complexity: O(n), for map and list
class Solution {
public List<String> topKFrequent(String[] words, int k) {
Map<String, Integer> map = new HashMap<>();
for(String word:words){
map.put(word, map.getOrDefault(word, 0)+1);
}
List<Map.Entry<String, Integer>> l = new LinkedList<>();
for(Map.Entry<String, Integer> e:map.entrySet()){
l.add(e);
}
Collections.sort(l, new MyComparator());//just use our Comparator to sort
List<String> ans = new LinkedList<>();
for(int i = 0;i<=k-1;i++){
ans.add(l.get(i).getKey());
}
return ans;
}
}
/*
// Implement our own comparator for this problem, I will also use this Comparator in other methods(A little different in minHeap method).
// We can also use anonymous Comparaotr or Lambda function.
// */
class MyComparator implements Comparator<Map.Entry<String, Integer>> {
public int compare(Map.Entry<String, Integer> e1, Map.Entry<String, Integer> e2){
String word1 = e1.getKey();
int freq1 = e1.getValue();
String word2 = e2.getKey();
int freq2 = e2.getValue();
if(freq1!=freq2){
return freq2-freq1;
}
else {
return word1.compareTo(word2);
}
}
}
Max Heap
Maintain a max heap and add all the words in it. Pop top K words to get the results.
Time Complexity: O(nlogn + Klogn) = O(nlogn)
Space Complexity: O(n), for heap
class Solution {
public List<String> topKFrequent(String[] words, int k) {
Map<String, Integer> map = new HashMap<>();
for(String word:words){
map.put(word, map.getOrDefault(word, 0)+1);
}
PriorityQueue<Map.Entry<String, Integer>> pq = new PriorityQueue<>(new MyComparator());
for(Map.Entry<String, Integer> e:map.entrySet()){
pq.offer(e);
}
List<String> ans = new LinkedList<>();
for(int i = 0;i<=k-1;i++){
ans.add(pq.poll().getKey());
}
return ans;
}
}
class MyComparator implements Comparator<Map.Entry<String, Integer>> {
public int compare(Map.Entry<String, Integer> e1, Map.Entry<String, Integer> e2){
String word1 = e1.getKey();
int freq1 = e1.getValue();
String word2 = e2.getKey();
int freq2 = e2.getValue();
if(freq1!=freq2){
return freq2-freq1;
}
else {
return word1.compareTo(word2);
}
}
}
// Min Heap
// Instead of using a max heap, we only store Top K Freqency word we have met so far in our min heap.
// Time Complexity: O(nlogK), logK time for each word
// Space Complexity: O(K), since the largest number of words in our minheap is K
class Solution {
public List<String> topKFrequent(String[] words, int k) {
Map<String, Integer> map = new HashMap<>();
for(String word:words){
map.put(word, map.getOrDefault(word, 0)+1);
}
MyComparator comparator = new MyComparator();
PriorityQueue<Map.Entry<String, Integer>> pq = new PriorityQueue<>(comparator);
for(Map.Entry<String, Integer> e:map.entrySet()){
// If minHeap's size is smaller than K, we just add the entry
if(pq.size()<k){
pq.offer(e);
}
// Else, we compare the current entry with "min" entry in priority queue
else {
if(comparator.compare(e, pq.peek())>0){
pq.poll();
pq.offer(e);
}
}
}
List<String> ans = new LinkedList<>();
for(int i = 0;i<=k-1;i++){
ans.add(0, pq.poll().getKey());//the "smaller" entry poll out ealier
}
return ans;
}
}
// The comparaotr is reversed as maxHeap
class MyComparator implements Comparator<Map.Entry<String, Integer>> {
public int compare(Map.Entry<String, Integer> e1, Map.Entry<String, Integer> e2){
String word1 = e1.getKey();
int freq1 = e1.getValue();
String word2 = e2.getKey();
int freq2 = e2.getValue();
if(freq1!=freq2){
return freq1-freq2;
}
else {
return word2.compareTo(word1);
}
}
}
// Bucket sort + Trie
// This method is derived from 347. Top K Frequent Elements. At 347, we use bucket sort(LinkedList in each bucket) to find top K frequency integers and we can choose any integer if there is a tie of frequency . But in this question, the problem is that when there is a tie of frequency, we need to compare the lexicographic order. Thus using bucket sort(LinkedList in each bucket) is not good.
// The way to solve the tie problem is to use either trie or BST.
// Time Complexity: O(nm) = O(n), m time to construct trie for each word and m is a constant
// Space Complexity: O(nm) = O(n), m space for each bucket and m is a constant
class Solution {
public List<String> topKFrequent(String[] words, int k) {
Map<String, Integer> map = new HashMap<>();
for(String word:words){
map.put(word, map.getOrDefault(word, 0)+1);
}
Trie[] buckets = new Trie[words.length];
for(Map.Entry<String, Integer> e:map.entrySet()){
//for each word, add it into trie at its bucket
String word = e.getKey();
int freq = e.getValue();
if(buckets[freq]==null){
buckets[freq] = new Trie();
}
buckets[freq].addWord(word);
}
List<String> ans = new LinkedList<>();
for(int i = buckets.length-1;i>=0;i--){
//for trie in each bucket, get all the words with same frequency in lexicographic order. Compare with k and get the result
if(buckets[i]!=null){
List<String> l = new LinkedList<>();
buckets[i].getWords(buckets[i].root, l);
if(l.size()<k){
ans.addAll(l);
k = k - l.size();
}
else {
for(int j = 0;j<=k-1;j++){
ans.add(l.get(j));
}
break;
}
}
}
return ans;
}
}
class TrieNode {
TrieNode[] children = new TrieNode[26];
String word = null;
}
class Trie {
TrieNode root = new TrieNode();
public void addWord(String word){
TrieNode cur = root;
for(char c:word.toCharArray()){
if(cur.children[c-'a']==null){
cur.children[c-'a'] = new TrieNode();
}
cur = cur.children[c-'a'];
}
cur.word = word;
}
public void getWords(TrieNode node, List<String> ans){
//use DFS to get lexicograpic order of all the words with same frequency
if(node==null){
return;
}
if(node.word!=null){
ans.add(node.word);
}
for(int i = 0;i<=25;i++){
if(node.children[i]!=null){
getWords(node.children[i], ans);
}
}
}
}
// Bucket sort + BST
// The reason we use Trie is to break the tie of same word frequency. Thus we can easily use BST to replace Trie(In Java, we can use TreeMap or TreeSet)
// Time Complexity: O(n), not sure
// Space Complexity: O(n), not sure
class Solution {
public List<String> topKFrequent(String[] words, int k) {
Map<String, Integer> map = new HashMap<>();
for(String word:words){
map.put(word, map.getOrDefault(word, 0)+1);
}
TreeMap<String, Integer>[] buckets = new TreeMap[words.length];
for(Map.Entry<String, Integer> e:map.entrySet()){
String word = e.getKey();
int freq = e.getValue();
if(buckets[freq]==null){
buckets[freq] = new TreeMap<>((a, b)->{
return a.compareTo(b);
});
}
buckets[freq].put(word, freq);
}
List<String> ans = new LinkedList<>();
for(int i = buckets.length-1;i>=0;i--){
if(buckets[i]!=null){
TreeMap<String, Integer> temp = buckets[i];
if(temp.size()<k){
k = k - temp.size();
while(temp.size()>0){
ans.add(temp.pollFirstEntry().getKey());
}
}
else {
while(k>0){
ans.add(temp.pollFirstEntry().getKey());
k--;
}
break;
}
}
}
return ans;
}
}
// Quick select
// If the question is to find Kth frequency word, quick select is a good solution and only cost O(n), for this question, after getting Top K frequency words by using quick select, we also need to do a sort to make sure they are in the right order.
// Time Complexity: O(n+KlogK), n time for quick select and KlogK time for sort
// Space Complexity: O(n)
class Solution {
public List<String> topKFrequent(String[] words, int k) {
Map<String, Integer> map = new HashMap<>();
for(String word:words){
map.put(word, map.getOrDefault(word, 0)+1);
}
Map.Entry<String, Integer>[] entrys = new Map.Entry[map.size()];
int index = 0;
for(Map.Entry<String, Integer> e:map.entrySet()){
entrys[index] = e;
index++;
}
//do quick select
int start = 0;
int end = entrys.length-1;
int mid = 0;
while(start<=end){
mid = partition(entrys, start, end);
if(mid == k-1){
break;
}
else if(mid<k-1){
start = mid + 1;
}
else {
end = mid - 1;
}
}
List<String> ans = new LinkedList<>();
List<Map.Entry<String, Integer>> l = new LinkedList<>();
for(int i = 0;i<=mid;i++){
l.add(entrys[i]);
}
//still need to sort these K words, because we only know they are in result, but not in right order
Collections.sort(l, new MyComparator());
for(Map.Entry<String, Integer> e:l){
ans.add(e.getKey());
}
return ans;
}
private int partition(Map.Entry<String, Integer>[] entrys, int start, int end){
int pivot = start;
int left = start + 1;
int right = end;
MyComparator myComparator = new MyComparator();
while(true){
while(left<=end){
if(myComparator.compare(entrys[left], entrys[pivot])<=0){
left++;
}
else {
break;
}
}
while(right>=start+1){
if(myComparator.compare(entrys[right], entrys[pivot])>0){
right--;
}
else {
break;
}
}
if(left>right){
break;
}
swap(entrys, left, right);
}
swap(entrys, pivot, right);
return right;
}
private void swap(Map.Entry<String, Integer>[] entrys, int i, int j){
Map.Entry<String, Integer> a = entrys[i];
entrys[i] = entrys[j];
entrys[j] = a;
}
}
class MyComparator implements Comparator<Map.Entry<String, Integer>> {
public int compare(Map.Entry<String, Integer> e1, Map.Entry<String, Integer> e2){
String word1 = e1.getKey();
int freq1 = e1.getValue();
String word2 = e2.getKey();
int freq2 = e2.getValue();
if(freq1!=freq2){
return freq2-freq1;
}
else {
return word1.compareTo(word2);
}
}
}