[K9 keyboard problem]Finding the words by ranking - updating

Problem

You have a list of words with ranking.

Now you need to create a function that will take this list as input and provide a way so that a T9 keyboard can provide three top results of probable words based on rankings for the numbers punched in. To help the understanding here is the picture of T9 keyboard.


Figure 1: example T9 keyboard

For example, given the list words with ranking:

[("test", 13), ("tax", 7), ("tom", 50), ("text", 100), ("top", 79),("tub", 101), ("tap", 3))]

Following input from the T9 keyboard, should return the following output.

key = 8: [ (tap, 3)(tax, 7)(test, 13)]
key = 82: [ (tap, 3)(tax, 7)]
key = 83: [ (test, 13)(text, 100)]
key = 86: [ (tom, 50)(top, 79)]
key = 2: [ ]


1. Brute-force approach


We can sort the list of words by rank then search the list to find words matching the keyword. This brute-force approach will result in the following running time.

Sorting : O(NlogN), one time cost
Lookup: O(N)

To solve this problem, we need to understand Trie data structure and implement it.
Using the Trie, we can store the input words in each level. Only modification I would make is to remember only Top 3 words. 

Once the Trie is built, searching top 3 words starting with given keyword can be found via Tree traverse.

Each node contains the list of top 3 words that share the same prefix and sorted by ranking. (See Figure 1.)


Overall Running is below:

- Building a Trie: 
   O(NlogN): sorting the list of N words by ranking
   O(K×NlogN): to build the tree, where K is the longest  string, N is the number of words given as a input.

- Initializing the T8Keyboard class: O(1)
- Searching the words : O(4I×I×logN)
  O(4I): to create the list of possible word combination, where I is the length of key.
  O(4I×I×logN): to find the top 3 words corresponding to each candidate words
  O(NlogN) to sort the results by ranking 

Therefore, overall running time will be O(4I×I×logN)


In the solution, we used the std::map to store the children nodes with a character as a key. It requires O(logN) to check if there are child with a specified character in each level in the tree.

However, if we use the hash table, in this context a array of node * with size of 256. We can reduce the time to search the candidate words, to $O(4I×I)$ at an expense of the space, since we need to keep the 256 slots of node * even though only one slot is used.

Here is the complete code.

#include<vector>
#include<map>
#include<algorithm>
#include<iostream>
using namespace std;
struct word {
string value;
int rank;
word(string v, int r):value(v), rank(r){}
};
struct node {
char v;
vector<word*> words;
map<char, struct node *> children;
node (char value):v(value) {}
};
bool compare(word * prev, word* next)
{
return prev->rank < next->rank;
}
class Trie {
public:
Trie(vector<word*>& input)
{
root = build_trie(input);
}
vector<word*> find_words(string& input, int K)
{
vector<word*> result;
map<char, node*>::iterator found;
if ((found = root->children.find(input[0]))!= root->children.end())
{
result = search_words(found->second, input, 1, K);
}
return result;
}
private:
void add_word(node * parent, word* input, int pos)
{
if (pos >= input->value.length())
return;
map<char, node*>::iterator found;
char cur = input->value[pos++];
if (parent->v != '*')
{
parent->words.push_back(input);
}
if ((found = parent->children.find(cur))!= parent->children.end())
{
return add_word(found->second, input, pos);
}
else
{
node * new_node = new node(cur);
parent->children[cur] = new_node;
return add_word(new_node, input, pos);
}
}
node* build_trie(vector<word*>& input)
{
//build tree
node * first = new node('*');
sort(input.begin(), input.end(), compare);
for (int i = 0; i < input.size(); i++)
add_word(first, input[i], 0);
return first;
}
vector<word*> search_words(node* p, string& input, int pos, int max)
{
map<char, node*>::iterator found;
vector<word*> result;
if (pos >= input.length())
{
for(int i = 0; i <p->words.size(); i++)
{
if (result.size() == max)
break;
result.push_back(p->words[i]);
}
return result;
}
char c = input[pos++];
if ((found = p->children.find(c))!= p->children.end())
{
return search_words(found->second, input, pos, max);
}
return result;
}
node* root;
};
class T9Keyboard{
public:
T9Keyboard(Trie* data)
{
db = data;
mapping['2'] = "abc";
mapping['3'] = "def";
mapping['4'] = "ghi";
mapping['5'] = "jkl";
mapping['6'] = "mno";
mapping['7'] = "prqs";
mapping['8'] = "tuv";
mapping['9'] = "wxyz";
}
void permutate(vector<string>& list, int lpos, string left, vector<string>& result)
{
if (lpos >= list.size())
{
result.push_back(left);
return;
}
for (int i = 0; i < list[lpos].length(); i++)
{
left.push_back(list[lpos][i]);
permutate(list, lpos+1, left, result);
left.pop_back();
}
}
vector<string> generate_candidates(string key)
{
vector<string> input;
vector<string> candidates;
for (int i = 0; i < key.length(); i++)
input.push_back(mapping[key[i]]);
permutate(input, 0, "", candidates);
return candidates;
}
void find(string key)
{
vector<word*> result;
vector<string> candidates = generate_candidates(key);
for (int i = 0; i < candidates.size(); i++)
{
vector<word*> found = db->find_words(candidates[i], 3);
result.insert(result.end(), found.begin(), found.end());
}
sort(result.begin(), result.end(), compare);
cout<< "key = "<<key<<": ";
cout<<"[ ";
for (int i = 0; i < result.size() && i< 3; i++)
{
cout<<"("<<result[i]->value<<", " << result[i]->rank<<")";
}
cout<<"]"<<endl;
}
private:
map<char, string> mapping;
Trie* db;
};
int main ()
{
vector<word*> input;
input.push_back(new word("test", 13));
input.push_back(new word("tax", 7));
input.push_back(new word("tom", 50));
input.push_back(new word("text", 100));
input.push_back(new word("top", 79));
input.push_back(new word("tub", 101));
input.push_back(new word("tap", 3));
Trie *db = new Trie(input);
T9Keyboard k(db);
k.find("8");
k.find("82");
k.find("83");
k.find("86");
k.find("2");
return 1;
}
view raw K9_keyboard.cpp hosted with ❤ by GitHub

Comments

Popular posts from this blog

Planting flowers with no adjacent flower plots

Find the shorted path from the vertex 0 for given list of vertices.