Word Transformer
17.22 Word Transformer: Given two words of equal length that are in a dictionary, write a method to transform one word into another word by changing only one letter at a time. The new word you get in each step must be in the dictionary.
EXAMPLE
Input: DAMP, LIKE
Output: DAMP -> LAMP -> LIMP -> LIME -> LIKE
I decided to challenge myself and find the minimum path between two words.
Complexity: O(n^2) space and time. O(n) time for any word after construction of the tree.
The Idea: The general idea is to construct a graph where a link represents the relationship between two words a and b where a differs from b by one 1 character (order preserving). Just to expand on the implementation a bit, I've grabbed a dictionary of 475k words from github and preprocessed the words by word length. Then each cluster could be used to generate a graph individually. I also did some research to find the distribution of words by length. This way, I can roughly account for the size of each vector per word length. The base algorithm iterates through each combination of words and finds their associated relationship. Then the inverse graph becomes added itself, as to represent the association that if a diff(1) b
, then b diff(1) a
. Each node also represents a prev
Node pointer. This node is used for backtracking after using BFS search algorithm to find the shortest path. Basically in the BFS algorithm, if every node that gets passed into the queue along with it gets the previous node in which it breadthed out.
enum State {
UnVisited, Visited, Visiting
};
class Node {
public:
explicit Node(string word_id) :
word(word_id), state(UnVisited), prev(nullptr) {}
string word;
State state;
Node *prev; // explicitly for BFS shortest path
bool operator==(const Node &other) const {
return (word == other.word);
}
};
namespace std {
template <>
struct hash<Node> {
std::size_t operator()(const Node& k) const {
return ((hash<string>()(k.word)));
}
};
}
class Graph {
public:
Graph(const vector<string> &words)
: words_ref(words) {
map_defaults();
analyize_lev1_word_rel();
add_inv_graph();
}
void traverse_all() {
for (auto& i : graph) {
cout << setw(10) << i.first.word
<< "(" << i.first.state << ") : ";
traverse_set(i.second);
}
}
vector<string> min_path_lev_1_distance(const string &a, const string &b) {
vector<string> min_path;
if (graph.find(Node(a)) == graph.end() || graph.find(Node(b)) == graph.end()) {
cout << "Words not found within dictionary" << endl;
return min_path;
}
queue<Node> q;
q.push(Node(a)); // a -> ... -> b
const_cast<Node&>(graph.find(Node(a))->first).state = Visited;
while (!q.empty()) {
Node front = q.front();
q.pop();
// need this first direct pointer for previous
auto *front_graph_key = &const_cast<Node&>(graph.find(front)->first);
if (front.word == b) {
return traverse_BFS_shortest_path(graph.find(Node(b))->first);
}
unordered_set<string> adj_elements = findAdj(Node(front));
for (const string &s : adj_elements) {
if (graph.find(Node(s))->first.state == UnVisited) {
q.push(Node(s));
// need to have assurance that the changes to the graph
// occur to the actual elements within the graph, not copies.
const_cast<Node&>(graph.find(Node(s))->first).state = Visited;
const_cast<Node&>(graph.find(Node(s))->first).prev = front_graph_key;
}
}
}
return vector<string>();
}
private:
const vector<string> words_ref;
unordered_map<Node, unordered_set<string>> graph;
void map_defaults() {
for (const string &s : words_ref)
graph.insert( {Node(s), unordered_set<string>() });
}
void analyize_lev1_word_rel() {
for (int i = 0; i < words_ref.size(); i++) {
for (int j = i + 1; j < words_ref.size(); j++) {
// if i and j are 1 apart, then j and i are one apart
// this will be taken care of in add_inv_graph
if (is_distance_1_apart(words_ref[i], words_ref[j])) {
graph[Node(words_ref[i])].insert(words_ref[j]);
}
}
}
}
bool is_distance_1_apart(const string one, const string two) {
assert(one.length() == two.length());
int diff = 0, iter2 = 0;
for (char c : one) {
if (diff >= 2) break;
if (c != two[iter2++])
diff++;
}
return (diff < 2);
}
vector<string> traverse_BFS_shortest_path(const Node &end) {
vector<string> sp;
sp.push_back(end.word);
Node *iter = end.prev;
while (iter) {
sp.push_back(iter->word);
iter = iter->prev;
}
reverse(sp.begin(), sp.end());
return sp;
}
void add_inv_graph()
{
for (auto &node_i : graph) {
for (auto &node_j : node_i.second) {
graph[Node(node_j)].insert(node_i.first.word);
}
}
}
void resetStates() {
for (auto &i : graph)
const_cast<Node&>(i.first).state = UnVisited;
}
bool exists(const Node &n) {
return (graph.find(n) != graph.end());
}
bool contains(const unordered_set<Node> &s, const Node &n) {
return (s.find(n) != s.end());
}
void traverse_set(const unordered_set<string> &s)
{
for (const string &i : s)
cout << i << ", ";
cout << endl;
}
unordered_set<string>& findAdj(const Node &n)
{
if (!exists(n)) {
cout << "Key does not exist" << endl;
return unordered_set<string>();
}
return graph.find(n)->second;
}
};
#define MAX_WORD_LEN 32
vector<vector<string>> cluster_by_size(const string &file_path, const int num_words)
{
array<float, MAX_WORD_LEN> word_length_distribution =
{0.00, 0.001, 0.006, 0.026, 0.052, 0.085, 0.122, 0.14, 0.14, 0.126,
0.101, 0.075, 0.052, 0.032, 0.020, 0.01, 0.006, 0.003, 0.002, 0.001, 0.0};
int iter = 0;
vector<vector<string>> clusters(MAX_WORD_LEN);
for (vector<string> &v : clusters)
v.reserve(num_words*word_length_distribution[iter++]);
ifstream infile;
infile.open(file_path);
int index = 0;
if (infile.good()) {
string line;
while (getline(infile, line)) {
int length_hash = line.length();
clusters.at(length_hash).push_back(line);
}
}
return clusters;
}
void test(vector<vector<string>> &clusters)
{
vector<Graph> g_clusters;
g_clusters.reserve(MAX_WORD_LEN);
// create clustors
for (vector<string> &v : clusters)
g_clusters.emplace_back(v);
// traverse a few
for (int i : {3}) {
g_clusters[i].traverse_all();
cout << "\n\n";
}
vector<string> min_path = g_clusters[4].min_path_lev_1_distance("damp", "like");
print(min_path);
}
int main()
{
const string filepath = "../infiles/words_alpha.txt";
vector<vector<string>> clusters = cluster_by_size(filepath, 475000);
test(clusters);
}
Sample output: (note: only uses small margin of dictionary words)
wey(0) : way, wee, wed, wei, wea, web, wef,
vip(0) : vis, viz,
wae(0) : wan, waf, wag, wap, wah, wee, way, war, was, wat, waw, wax, wab, wac, wad,
vis(0) : viz, vss, ais, vip,
vug(0) : vum, vog,
waf(0) : waw, wag, wap, wah, way, wan, war, was, wat, wax, wef, wae, wab, wac, wad,
aeq(0) : aer, aes, aet,
viz(0) : vip, vis,
voc(0) : vol, vod, voe, vow, vog, soc, von, vox,
vol(0) : von, vog, vow, vox, sol, voc, vod, voe,
ack(0) : act, acy, aik, alk,
vod(0) : voe, vow, vog, vol, von, vox, sod, voc,
voe(0) : vow, vog, vod, vol, von, soe, vox, voc,
sob(0) : sok, soc, sol, sod, soe, sog, soh, soy, son, sop, sos, sot, sou,
vog(0) : vod, vol, von, vow, vox, vug, sog, voc, voe,
sok(0) : sod, sot, sol, son, sop, soc, sos, sou, sob, soe, sog, soh, soy,
von(0) : vog, vow, vox, voc, son, fon, vod, vol, voe,
own(0) : owd, owl, owt, owe, owk,
vow(0) : vox, vog, fow, voc, vod, vol, voe, von,
vox(0) : fox, voc, vod, vol, voe, vow, vog, von,
sny(0) : soy,
vss(0) : vis,
aer(0) : aes, aet, agr, air, aeq,
vum(0) : vug,
wab(0) : wac, wad, wae, wan, waf, wag, wap, wah, way, war, was, wat, waw, wax, web,
wac(0) : wad, wae, wan, waf, wag, wap, wah, way, war, was, wat, waw, wax, pac, wab,
sml(0) : sol, sma,
wad(0) : wae, wan, waf, wag, wap, wah, way, war, was, wat, waw, wed, wax, pad, wab, wac,
wag(0) : wax, wap, wah, way, wan, war, was, wat, waw, wae, waf, wab, wac, wad,
wah(0) : way, wan, wax, wap, war, was, wat, pah, waw, wae, waf, wab, wac, wad, wag,
wee(0) : wef, wea, wae, wey, wei, wed, web,
way(0) : pay, wan, wax, wap, war, was, wat, waw, wae, wey, waf, wab, wac, wad, wag, wah,
wan(0) : wax, wap, can, war, was, wat, waw, wae, waf, wab, wac, wad, wag, wah, way,
wap(0) : wab, war, was, cap, wat, waw, wax, wae, waf, wac, wad, wag, wah, way, wan,
war(0) : wac, was, wat, waw, wax, waf, car, wae, wab, wad, wag, wah, way, wan, wap,
was(0) : wad, wat, wag, waw, wax, wae, waf, wab, wac, wah, way, wan, wap, war,
wat(0) : wag, waw, wah, wax, wae, waf, wab, wac, wad, way, wan, wap, war, was,
waw(0) : wah, wax, wae, waf, wab, wac, wad, wag, way, wan, wap, war, was, wat,
wed(0) : wee, wef, wea, wey, wei, wad, web,
wax(0) : wae, wan, waf, wab, wac, wad, wag, wah, way, wap, war, was, wat, waw,
wea(0) : web, wed, wee, wef, wey, wei,
web(0) : wed, wee, wab, wef, wea, wey, wei,
wef(0) : wea, wey, wei, web, waf, wee, wed,
wei(0) : wea, wey, wee, wed, web, wef,
slt(0) : sot,
sma(0) : sml,
soc(0) : sol, sod, soe, sog, sop, soh, soy, sok, son, sos, sot, sou, voc, sob,
sod(0) : soe, sog, sop, soh, soy, sos, sok, sol, son, sot, sou, vod, sob, soc,
soe(0) : sog, sop, soh, soy, sos, sok, sot, sol, son, sou, voe, sob, soc, sod,
sog(0) : sop, soh, soy, sos, sok, sot, sol, son, sou, vog, sob, soc, sod, soe,
owt(0) : own, owe, owd, owl, owk,
ahs(0) : aes, aht, ahu, ays, ais, ads, aho, ahi, aha,
soh(0) : soy, sos, sok, sot, sol, son, sop, sou, sob, soc, sod, soe, sog,
pad(0) : pah, pay, pal, pac, wad,
owe(0) : owk, owd, owt, owl, own,
soy(0) : sos, sok, sot, sol, son, sop, sou, sny, sob, soc, sod, soe, sog, soh,
sol(0) : sml, son, sop, sok, sos, sot, sou, vol, sob, soc, sod, soe, sog, soh, soy,
ahu(0) : ayu, aht, aku, ahs, aho, ahi, aha,
son(0) : sop, soc, sok, von, fon, sos, sot, sou, sob, sod, soe, sog, soh, soy, sol,
owl(0) : own, owd, owt, owe, owk,
sop(0) : soc, sok, sos, sod, sot, fop, soe, sou, sob, sog, soh, soy, sol, son,
sos(0) : sod, sot, soe, sou, sob, soc, sok, sog, soh, soy, sol, son, sop,
aho(0) : ado, ahs, aht, ahu, ako, ago, ahi, aha,
sot(0) : soe, sou, slt, fot, sob, soc, sok, sod, sog, soh, soy, sol, son, sop, sos,
pah(0) : pay, pad, pal, wah, pac,
sou(0) : soh, fou, sob, soc, sok, sod, soe, sog, soy, sol, son, sop, sos, sot,
owd(0) : owe, owk, owt, owl, own,
ake(0) : aye, ako, aku, ade, ale, aka, age,
owk(0) : owd, owt, owl, own, owe,
oxy(0) :
ozs(0) :
pac(0) : wac, pal, pad, pah, pay,
pay(0) : pad, pal, way, pah, pac,
pal(0) : pad, pah, pac, pay,
fon(0) : fow, foo, fox, fop, for, fot, fou, von, son,
foo(0) : fox, fop, for, fro, fot, fou, fow, fon,
adj(0) : adm, ado, adp, adc, ads, adv, adz, ada, add, ade, ady,
fop(0) : for, fot, sop, fou, foo, fow, fox, fon,
for(0) : fot, fou, foo, fow, fop, fox, fon,
fot(0) : fou, foo, frt, fow, fop, fox, sot, fon, for,
ado(0) : adp, aho, ads, ago, adv, adj, ako, adz, adm, ada, adc, add, ade, ady,
fou(0) : foo, fow, fop, sou, fox, fon, for, fot,
adm(0) : ado, adp, adc, ads, adv, aim, adj, adz, ada, add, ade, ady,
fow(0) : fop, fox, foo, vow, fon, for, fot, fou,
fox(0) : fop, vox, fon, fow, foo, for, fot, fou,
alf(0) : alg, ala, aly, alc, alk, ald, all, aff, alb, ale,
fpm(0) : fps,
fps(0) : frs, fpm,
fra(0) : fry, fro, frs, frt,
fry(0) : fro, frs, frt, fra,
fro(0) : frs, foo, frt, fry, fra,
frs(0) : frt, fry, fra, fps, fro,
frt(0) : fro, fot, fry, fra, frs,
uli(0) :
cam(0) : can, cap, car,
can(0) : cap, wan, car, cam,
ada(0) : ads, adc, add, adm, ade, ala, ady, adj, ado, adp, adv, adz, aga, aha, aka,
cap(0) : car, wap, cam, can,
adc(0) : add, adm, ade, ady, adj, ado, adp, ads, adv, adz, alc, ada,
car(0) : can, war, cam, cap,
acy(0) : ack, aly, ady, act, agy,
act(0) : aet, aft, agt, aht, ait, ack, acy,
add(0) : adm, ade, ady, adz, adj, ado, adp, ads, adv, afd, adc, aid, ald, ada,
ade(0) : ady, adz, adj, adm, ado, adp, ads, adv, age, aye, ake, ale, ada, adc, add,
ady(0) : adz, adj, adm, ado, adp, agy, ads, adv, ada, aly, adc, acy, add, ade,
adp(0) : adc, ads, adv, adj, adz, ado, adm, ada, add, ade, ady,
ads(0) : adv, adj, adz, aes, ado, ahs, add, ays, ais, adm, ada, adc, ade, ady, adp,
adv(0) : adj, adz, ado, ade, adm, ady, ada, adc, add, adp, ads,
adz(0) : adj, ado, ade, adm, ady, ada, adc, add, adp, ads, adv,
aes(0) : ahs, aet, ays, ais, aeq, aer, ads,
aet(0) : aeq, aft, agt, aes, aht, ait, act, aer,
afb(0) : alb, aft, afd, aff,
afd(0) : aff, aft, aid, afb, add, ald,
aff(0) : afd, aft, alf, afb,
aft(0) : agt, aff, aht, ait, act, aet, afb, afd,
aga(0) : aka, age, agy, ago, agr, aha, agt, ada, ala,
age(0) : aga, ake, agy, ago, agr, ade, agt, ale, aye,
agy(0) : ago, agr, agt, ady, aly, age, acy, aga,
ago(0) : aho, agr, agt, ako, ado, agy, aga, age,
agr(0) : air, agt, aer, agy, aga, age, ago,
agt(0) : agy, aga, aht, ait, act, aet, aft, age, ago, agr,
aha(0) : ahi, aho, ahs, aga, aht, ahu, ada, ala, aka,
ahi(0) : aho, ahs, aht, ahu, aha,
aht(0) : act, ahu, aho, ait, aet, ahs, aft, aha, ahi, agt,
aid(0) : ald, ais, aik, ait, ail, aim, ain, air, aix, add, afd,
ako(0) : ake, aku, aho, ado, ago, aka,
aye(0) : age, ays, ayu, ake, ade, ale,
aik(0) : ait, alk, ail, aim, ain, air, ais, aix, ack, aid,
ail(0) : aim, ain, air, aik, ais, all, aid, ait, aix,
aim(0) : ain, adm, air, aik, ais, ail, aid, ait, aix,
ain(0) : air, aik, ais, ail, aid, ait, aix, aim,
air(0) : aik, ais, ail, aid, agr, ait, aix, aer, aim, ain,
ais(0) : aik, ays, aid, ait, ads, vis, ahs, aix, aes, ail, aim, ain, air,
ays(0) : aye, ayu, ahs, ads, aes, ais,
ait(0) : aet, aix, act, aft, agt, aht, ail, aid, aik, aim, ain, air, ais,
ayu(0) : aku, ahu, aye, ays,
aix(0) : ait, ail, aid, ais, aik, aim, ain, air,
aka(0) : aga, aku, ake, ako, ada, ala, aha,
aku(0) : ahu, ake, ayu, ako, aka,
ala(0) : alb, alk, alc, all, aka, ald, ale, alf, alg, ada, aly, aga, aha,
alb(0) : alk, alc, afb, all, ald, ale, alf, alg, aly, ala,
alc(0) : all, ald, ale, alf, alg, ala, aly, adc, alk, alb,
ald(0) : ale, alf, alg, aly, aid, alk, add, all, afd, ala, alb, alc,
ale(0) : alf, alg, aly, alk, age, all, ake, ade, aye, ala, alb, alc, ald,
alg(0) : ala, aly, alc, alk, ald, all, alf, alb, ale,
aly(0) : alc, alk, ald, acy, all, alf, ady, ala, agy, alb, ale, alg,
alk(0) : ald, aik, all, alf, ack, ala, alb, alc, ale, alg, aly,
all(0) : alf, alc, ail, aly, ala, alb, ald, ale, alg, alk,
damp lamp limp lime like
Last updated