智能提示 前缀树 dfs 回溯 多叉树迭代dfs 插入排序的选取
题干 智能提示的典型实现方式是: 在系统后台维护一个字典,当用户输入字符时,在字典中查找以用户当前输入的字符串为前缀的全部单词,选取其中历史使用率最高的若干单词作为候选词,建议给用户. 备注:这里约定一个字符串不能称为自己的前缀。若用户输入的字符串恰好是字典中的一个单词,则该单词不必向用户建议
输入格式 输入第一行为3个正整数n、m、k。n为字典中单词个数。m为用户查询数,即用户输入的单词个数。对于用户输入的每个字符串,程序需要返回字典中以该字符串为前缀的、历史使用频率最高的k个单词。接下来n行,表示字典信息,每行为1个整数和1个字符串,整数表示单词的历史使用频率,字符串表示单词,请注意,单词包含的每个字符为a-z的小写字母或0-9的数字,即数字也可能构成字典中的单词。字典内的单词并非按使用频率有序存放。接下来m行,表示用户的查询,每行为一个a-z的小写字母或0-9的数字组成的字符串,表示用户的查询。另外请注意,由于字典往往是在用户历史数据的基础上加工而得,所以字典中可能出现重复单词,若某个单词在字典中出现多次,则其历史使用频率以最高者为准。 (n ≤ 10000, m ≤ 20000, k ≤ 10, 每个单词长度不超过20,单词历史使用频率小于$2^{31}$)
输出格式: 对于用户输入的每个字符串,按使用频率降序输出字典中以该字符串为前缀的、历史使用频率最高的k个单词,每个占1行。若多个单词历史使用频率相同,则字典序靠前的单词排名靠前。若单词中包含数字,则字典序以ACSII码判定,即0<1<2<…<9<a<b<c<…<z
。若字典中满足输出条件的单词个数大于0小于k,则有多少就输出多少个。若字典中没有满足输出条件的单词,则输出“no suggestion”。针对用户每个查询所输出的信息,用空行间隔。
输入样例 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 20 3 4 1827187200 the 1595609600 to 1107331800 that 401542500 this 334039800 they 282026500 their 250991700 them 196118888 these 150877900 than 144968100 time 125563600 then 109336600 two 196120000 there 87862100 those 79292500 through 75885600 the 71578000 think 67462300 2 65648600 tx356 57087700 though th xxx the
输出样例 1 2 3 4 5 6 7 8 9 10 11 the that this they no suggestion they their them there
分析1 一眼前缀树,搜索时找到用户输入的前缀perfix最后一个字母对应的节点后对其子节点进行dfs
PTA测试时间450ms
实现:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 #include <bits/stdc++.h> using namespace std;#define char2pos(ch) ((ch >= '0' && ch <= '9' ) ? ch - '0' : ch - 'a' + 10) #define pos2char(pos) ((pos >= 0 && pos <= 9) ? pos + '0' : pos - 10 + 'a' ) class trietree {public : struct Node { int perfix = 0 ; int freq = 0 ; Node *next[36 ] = {NULL }; }; void insert (char *str, int freq) { Node *cur = &root; for (int i = 0 ; i < strlen (str); i++) { int pos = char2pos (str[i]); if (!cur->next[pos]) { cur->next[pos] = new Node; } cur = cur->next[pos]; cur->perfix++; } cur->freq = cur->freq > freq ? cur->freq : freq; } vector<pair<int , string>> query (char *perfix) { Node *perfix_end_node = &root; for (int i = 0 ; i < strlen (perfix); i++) { int pos = char2pos (perfix[i]); if (perfix_end_node->next[pos]) { perfix_end_node = perfix_end_node->next[pos]; } else { return {}; } } bool perfix_end_node_has_son = false ; { vector<pair<int , string>> result; auto push = [&](char *str, int freq) { int i = 0 ; while (i < result.size () && result[i].first >= freq) { i++; } result.insert (result.begin () + i, {freq, str}); }; for (int i = 0 ; i < 36 ; i++) { if (perfix_end_node->next[i]) { perfix_end_node_has_son = true ; Node *cur = perfix_end_node->next[i]; char path[21 ] = {0 }; int pathend = 0 ; function<void (Node *, char )> dfsRe = [&](Node *root, char ch) { if (!root) return ; path[pathend++] = ch; if (root->freq) { path[pathend] = 0 ; push (path, root->freq); } for (int i = 0 ; i < 36 ; i++) { dfsRe (root->next[i], pos2char (i)); } pathend--; }; dfsRe (cur, pos2char (i)); } } if (!perfix_end_node_has_son) { return {}; } return result; } } Node root; }; int main (int argc, char *argv[]) { int N, M, K; scanf ("%d" , &N); scanf ("%d" , &M); scanf ("%d" , &K); trietree t; char buf[21 ]; int freq = INT_MIN; for (int i = 0 ; i < N; i++) { scanf ("%d" , &freq); scanf ("%s" , buf); t.insert (buf, freq); } for (int i = 0 ; i < M; i++) { scanf ("%s" , buf); auto result = t.query (buf); if (result.size ()) { for (int i = 0 , num = K; i < result.size () && num > 0 ; i++, num--) { printf ("%s%s\n" , buf,result[i].second.c_str ()); } } else { printf ("no suggestion\n" ); } if (i != M - 1 ) printf ("\n" ); } return 0 ; }
分析2 分析1代码提交后发现时间复杂度太高,遂想到可以对递归剪枝 如 用户重复查询同一个perfix会对同一dfs重复多次 用户查询th than thana 等重复dfs了thana的a节点 于是将算法改为在读入字典数据后立即对每一个节点构造其suggestions
PTA测试时间30ms
实现:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 #include <bits/stdc++.h> using namespace std;#define char2pos(ch) ((ch >= '0' && ch <= '9' ) ? ch - '0' : ch - 'a' + 10) #define pos2char(pos) ((pos >= 0 && pos <= 9) ? pos + '0' : pos - 10 + 'a' ) class trietree {public : struct Node { int perfix = 0 ; int freq = 0 ; Node *next[36 ] = {NULL }; vector<int > nextvec; void vecpush (int pos) { int cur = 0 ; while (cur < nextvec.size () && nextvec[cur] < pos) { cur++; } nextvec.insert (nextvec.begin () + cur, pos); } vector<pair<string, int >> suggestions; }; void insert (char *str, int freq) { int len = strlen (str); if (!len) return ; Node *cur = &root; for (int i = 0 ; i < len; i++) { int pos = char2pos (str[i]); if (!cur->next[pos]) { cur->next[pos] = new Node; cur->vecpush (pos); } cur = cur->next[pos]; cur->perfix++; } cur->freq = cur->freq > freq ? cur->freq : freq; } void suggestion_push (Node *node, string str, int freq) { int i = 0 ; while (i < node->suggestions.size () && node->suggestions[i].second >= freq) { i++; if (i + 1 > max_suggestion_count) return ; } if (i + 1 <= max_suggestion_count) node->suggestions.insert (node->suggestions.begin () + i, {str, freq}); } char buildpath[21 ]; int buildpathend = 0 ; void build_suggestions (Node *root) { if (!root) { return ; } for (auto it : root->nextvec) { Node *son = root->next[it]; buildpath[buildpathend++] = pos2char (it); if (son->freq) { buildpath[buildpathend] = 0 ; suggestion_push (root, buildpath, son->freq); } build_suggestions (son); for (auto sonsug : son->suggestions) { suggestion_push (root, sonsug.first, sonsug.second); } buildpathend--; } } vector<pair<string, int >> query (char *perfix, int num) { Node *perfix_end_node = &root; for (int i = 0 ; i < strlen (perfix); i++) { int pos = char2pos (perfix[i]); if (perfix_end_node->next[pos]) { perfix_end_node = perfix_end_node->next[pos]; } else { return {}; } } return perfix_end_node->suggestions; } Node root; int max_suggestion_count = 0 ; }; int main (int argc, char *argv[]) { int N, M, K; scanf ("%d" , &N); scanf ("%d" , &M); scanf ("%d" , &K); trietree t; t.max_suggestion_count = K; char buf[21 ]; int freq; for (int i = 0 ; i < N; i++) { scanf ("%d" , &freq); scanf ("%s" , buf); t.insert (buf, freq); } t.build_suggestions (&t.root); for (int i = 0 ; i < M; i++) { scanf ("%s" , buf); int num = K; auto result = t.query (buf, K); if (result.size ()) { for (auto &&it : result) { if (!num) break ; printf ("%s\n" ,it.first.c_str ()); num--; } } else { printf ("no suggestion\n" ); } if (i != M - 1 ) printf ("\n" ); } return 0 ; }
分析3(from 2) 若不是投入使用,仅仅为了通过题目,可以进行以下优化 在用户输入一系列perfix时有可能某些节点从未被访问 因此可以在输入一个perfix后再对该perfix最后一个字母的节点进行dfs建立suggestions
实现:无
分析4 (from ?) 其实大可不必dfs过来dfs过去的,还回溯个毛线啊,直接在insert的时候把沿途路径上每个节点的suggestions更新了就行了
PTA测试时间30ms
实现:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 #include <bits/stdc++.h> using namespace std;#define char2pos(ch) ((ch >= '0' && ch <= '9' ) ? ch - '0' : ch - 'a' + 10) #define pos2char(pos) ((pos >= 0 && pos <= 9) ? pos + '0' : pos - 10 + 'a' ) class trietree {public : struct Node { int perfix = 0 ; int freq = 0 ; Node *next[36 ] = {NULL }; vector<int > nextvec; void vecpush (int pos) { int cur = 0 ; while (cur < nextvec.size () && nextvec[cur] < pos) { cur++; } nextvec.insert (nextvec.begin () + cur, pos); } vector<pair<string, int >> suggestions; }; void insert (char *str, int freq) { int len = strlen (str); if (!len) return ; Node *cur = &root; for (int i = 0 ; i < len; i++) { int pos = char2pos (str[i]); if (!cur->next[pos]) { cur->next[pos] = new Node; cur->vecpush (pos); } cur = cur->next[pos]; if (i != len - 1 ) { suggestion_push (cur, str, freq); } cur->perfix++; } cur->freq = cur->freq > freq ? cur->freq : freq; } void suggestion_push (Node *node, string str,int freq) { int i = 0 ; auto curvec = &node->suggestions; int inserted = false ; for (auto it = curvec->begin (); it != curvec->end (); it++) { if ((*it).first == str) { (*it).second = (*it).second > freq ? (*it).second : freq; inserted = true ; } } if (!inserted) { curvec->push_back ({str, freq}); } sort (curvec->begin (), curvec->end (), [](pair<string, int > &a, pair<string, int > &b) { if (a.second > b.second) return true ; if (a.second < b.second) return false ; return a.first < b.first; }); if (curvec->size ()>max_suggestion_count) curvec->erase (curvec->begin ()+max_suggestion_count,curvec->end ()); } vector<pair<string, int >> query (char *perfix, int num) { Node *perfix_end_node = &root; for (int i = 0 ; i < strlen (perfix); i++) { int pos = char2pos (perfix[i]); if (perfix_end_node->next[pos]) { perfix_end_node = perfix_end_node->next[pos]; } else { return {}; } } return perfix_end_node->suggestions; } Node root; int max_suggestion_count = 0 ; }; int main (int argc, char *argv[]) { int N, M, K; scanf ("%d" , &N); scanf ("%d" , &M); scanf ("%d" , &K); trietree t; t.max_suggestion_count = K; char buf[21 ]; int freq; for (int i = 0 ; i < N; i++) { scanf ("%d" , &freq); scanf ("%s" , buf); t.insert (buf, freq); } for (int i = 0 ; i < M; i++) { scanf ("%s" , buf); auto result = t.query (buf, K); if (result.size ()) { for (int i = 0 , num = K; i < result.size () && num > 0 ; i++, num--) { printf ("%s\n" , result[i].first.c_str ()); } } else { printf ("no suggestion\n" ); } if (i != M - 1 ) printf ("\n" ); } return 0 ; }
CSDN java实现
扩展 当需要处理的字符不仅仅是什么26个英文字母的时候,甚至出现中文等语言的时候如何处理? ????怎么实现啊????? 编码???
额外收获
cout和printf性能对比
多叉树(一般树)的dfs及其递归转迭代思路: whileleftin;while(stack)(popandpushnext,whileleftin)
递归转栈大概率不会减少时间,申请系统的用户栈和自己模拟栈性能似乎差不多?(至少不会带来明显的性能提升)[待证]
其他
int perfix = 0;
在本题是没用的
nextvec纯纯负优化,不如整个map<int,Node*>
,也能自动排序,也能避免36次循环,不亦乐乎?
另外附上分析1栈版本代码
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 #include <bits/stdc++.h> using namespace std;#define char2pos(ch) ((ch >= '0' && ch <= '9' ) ? ch - '0' : ch - 'a' + 10) #define pos2char(pos) ((pos >= 0 && pos <= 9) ? pos + '0' : pos - 10 + 'a' ) class trietree {public : struct Node { int perfix = 0 ; int freq = 0 ; Node *next[36 ] = {NULL }; vector<int > nextvec; void vecpush (int pos) { int cur = 0 ; while (cur < nextvec.size () && nextvec[cur] < pos) { cur++; } nextvec.insert (nextvec.begin () + cur, pos); } }; void insert (char *str, int freq) { int len = strlen (str); if (!len) return ; Node *cur = &root; for (int i = 0 ; i < len; i++) { int pos = char2pos (str[i]); if (!cur->next[pos]) { cur->next[pos] = new Node; cur->vecpush (pos); } cur = cur->next[pos]; cur->perfix++; } cur->freq = cur->freq > freq ? cur->freq : freq; } vector<pair<int , string>> query (char *perfix,int num) { Node *perfix_end_node = &root; for (int i = 0 ; i < strlen (perfix); i++) { int pos = char2pos (perfix[i]); if (perfix_end_node->next[pos]) { perfix_end_node = perfix_end_node->next[pos]; } else { return {}; } } if (!perfix_end_node->nextvec.size ()) { return {}; } { vector<pair<int , string>> result; auto push = [&](char *str, int freq) { int i = 0 ; while (i < result.size () && result[i].first >= freq) { i++; }if (i+1 <=num) result.insert (result.begin () + i, {freq, str}); }; function<void (Node *)> dfs = [&](Node *root) { if (!root) return ; char path[21 ] = {0 }; int pathend = 0 ; stack<pair<Node *, int >> stack; while (root) { stack.push ({root, 0 }); if (root->nextvec.size ()) { path[pathend++] = pos2char (root->nextvec[0 ]); root = root->next[root->nextvec[0 ]]; if (root->freq) { path[pathend] = 0 ; push (path, root->freq); } } else break ; } while (stack.size ()) { auto [top, pos] = stack.top (); stack.pop (); pathend--; if (!stack.size ()) { break ; } else { if (pos + 1 <= stack.top ().first->nextvec.size () - 1 ) { auto next = stack.top ().first->next[stack.top ().first->nextvec[pos + 1 ]]; path[pathend++] = pos2char (stack.top ().first->nextvec[pos + 1 ]); if (next->freq) { path[pathend] = 0 ; push (path, next->freq); } stack.push ({next, pos + 1 }); while (next->nextvec.size ()) { int pos = next->nextvec[0 ]; auto node = next->next[pos]; stack.push ({node, 0 }); path[pathend++] = pos2char (pos); if (node->freq) { path[pathend] = 0 ; push (path, node->freq); } next = node; } } } } }; for (auto it : perfix_end_node->nextvec) { Node tmp = { .perfix = 0 , .freq = 0 , .next = {NULL }, .nextvec = {it}, }; tmp.next[it] = perfix_end_node->next[it]; dfs (&tmp); } return result; } } Node root; }; int main (int argc, char *argv[]) { int N, M, K; scanf ("%d" , &N); scanf ("%d" , &M); scanf ("%d" , &K); trietree t; char buf[21 ]; int freq = INT_MIN; for (int i = 0 ; i < N; i++) { scanf ("%d" , &freq); scanf ("%s" , buf); t.insert (buf, freq); } for (int i = 0 ; i < M; i++) { scanf ("%s" , buf); int num = K; auto result = t.query (buf, K); if (result.size ()) { for (auto &&it : result) { if (!num) break ; printf ("%s%s\n" ,buf,it.second.c_str ()); num--; } } else { printf ("no suggestion\n" ); } if (i != M - 1 ) printf ("\n" ); } return 0 ; }