xref: /freebsd/usr.bin/fortune/tools/do_uniq.py (revision 43faedc1339a9624c7acedb7f3e5624e64da5b99)
1#!/usr/local/bin/python
2#
3# $FreeBSD$
4#
5# an aggressive little script for trimming duplicate cookies
6from __future__ import print_function
7import argparse
8import re
9
10wordlist = [
11    'hadnot',
12    'donot', 'hadnt',
13    'dont', 'have', 'more', 'will', 'your',
14    'and', 'are', 'had', 'the', 'you',
15    'am', 'an', 'is', 'll', 've', 'we',
16    'a', 'd', 'i', 'm', 's',
17]
18
19
20def hash(fortune):
21    f = fortune
22    f = f.lower()
23    f = re.sub('[\W_]', '', f)
24    for word in wordlist:
25        f = re.sub(word, '', f)
26#    f = re.sub('[aeiouy]', '', f)
27#    f = re.sub('[^aeiouy]', '', f)
28    f = f[:30]
29#    f = f[-30:]
30    return f
31
32
33def edit(datfile):
34    dups = {}
35    fortunes = []
36    fortune = ""
37    with open(datfile, "r") as datfiledf:
38        for line in datfiledf:
39            if line == "%\n":
40                key = hash(fortune)
41                if key not in dups:
42                    dups[key] = []
43                dups[key].append(fortune)
44                fortunes.append(fortune)
45                fortune = ""
46            else:
47                fortune += line
48    for key in list(dups.keys()):
49        if len(dups[key]) == 1:
50            del dups[key]
51    with open(datfile + "~", "w") as o:
52        for fortune in fortunes:
53            key = hash(fortune)
54            if key in dups:
55                print('\n' * 50)
56                for f in dups[key]:
57                    if f != fortune:
58                        print(f, '%')
59                print(fortune, '%')
60                if input("Remove last fortune? ") == 'y':
61                    del dups[key]
62                    continue
63            o.write(fortune + "%\n")
64
65parser = argparse.ArgumentParser(description="trimming duplicate cookies")
66parser.add_argument("filename", type=str, nargs=1)
67args = parser.parse_args()
68edit(args.filename[0])
69