The exercise taken from programmingpraxis is to generate all anagrams from a phrase, e.g “hey man” -> “may hen” etc. The words must come from a dictionary. My solution has some optimizations in place. The main one: multiple words that are anagrams are treated as one and trivially expanded after the main part of the algorithm completes.
import re
from collections import defaultdict
import functools
import time
import itertools
PAT_SMALL = re.compile(r"[a-z]+$")
def get_letter_count_dict(word):
d = defaultdict(int)
for ch in word:
d[ch] += 1
return d
def filt(test_word, against):
t = get_letter_count_dict(test_word)
for ch in t:
if ch not in against or t[ch] > against[ch]:
return False
return True
def filtered_read(fname, letter_count, length_min):
f = open(fname)
words = [ w.rstrip() for w in f.readlines() ]
f.close()
words = [ w for w in words if len(w) >= length_min and PAT_SMALL.match(w) and filt(w, letter_count) ]
print("Using filtered dictionary with %d lowercase words" % len(words))
return words
def gen_anagrams(word_letter_count, n, words_and_dicts, start_index):
retv= []
k = start_index
n_words = len(words_and_dicts)
while k < n_words:
w1 = words_and_dicts[k][0]
if len(w1) > n:
k += 1
break
new_let_count = words_and_dicts[k][1]
diff_let_count = word_letter_count.copy()
good = True
left = n
for ch in new_let_count:
if ch in diff_let_count:
letters_taken = new_let_count[ch]
diff_let_count[ch] -= letters_taken
if diff_let_count[ch] < 0:
good = False
break
left -= letters_taken
else:
good = False
break
if good:
if left == 0:
retv = retv + [ [w1,] ]
else:
leftover_sol = gen_anagrams(diff_let_count, left, words_and_dicts, k)
for t in leftover_sol:
retv.append([ w1 ] + t)
k += 1
return retv
def expand(solutions, sorted2word):
i = 1
for sol in solutions:
for mapped in itertools.product(*(sorted2word[anagram] for anagram in sol)):
print(i, mapped)
i += 1
def find_anagrams(word, min_length = 3, do_expand = False):
ss = "".join(sorted([c for c in word if 'a' <= c <= 'z' ]))
ss_d = get_letter_count_dict(ss)
n = len(ss)
words = filtered_read("wordlists/mwords/74550com.mon", ss_d, min_length)
# operate on equivalence classes of all anagrams
# create structures for reverse expansion
sorted2word = defaultdict(list)
wordclass = set()
for w in words:
sortedletters = "".join(sorted(list(w)))
sorted2word[sortedletters] += [ w ]
wordclass.add(sortedletters)
# order by length ascending, this is used in gen_anagrams
class_dicts = sorted([ (len(w), w, get_letter_count_dict(w)) for w in wordclass if len(w) <= n ])
class_dicts = [ (trip[1], trip[2]) for trip in class_dicts ]
start_time = time.clock()
r = gen_anagrams(ss_d, n, class_dicts, 0)
stop_time = time.clock()
print("%d unique solutions found in %5.2f seconds" % (len(r), stop_time - start_time))
if do_expand:
expand(r, sorted2word)
if __name__ == "__main__":
find_anagrams("programming praxis", do_expand = True)