xref: /freebsd/usr.bin/fortune/tools/do_uniq.py (revision 6ae1554a5d9b318f8ad53ccc39fa5a961403da73)
1*6ae1554aSColin Percival#!/usr/local/bin/python
2*6ae1554aSColin Percival#
3*6ae1554aSColin Percival# $FreeBSD$
4*6ae1554aSColin Percival#
5*6ae1554aSColin Percival# an aggressive little script for trimming duplicate cookies
6*6ae1554aSColin Percival
7*6ae1554aSColin Percivalimport argparse
8*6ae1554aSColin Percivalimport re
9*6ae1554aSColin Percival
10*6ae1554aSColin Percivalwordlist = [
11*6ae1554aSColin Percival    'hadnot',
12*6ae1554aSColin Percival    'donot', 'hadnt',
13*6ae1554aSColin Percival    'dont', 'have', 'more', 'will', 'your',
14*6ae1554aSColin Percival    'and', 'are', 'had', 'the', 'you',
15*6ae1554aSColin Percival    'am', 'an', 'is', 'll', 've', 'we',
16*6ae1554aSColin Percival    'a', 'd', 'i', 'm', 's',
17*6ae1554aSColin Percival]
18*6ae1554aSColin Percival
19*6ae1554aSColin Percival
20*6ae1554aSColin Percivaldef hash(fortune):
21*6ae1554aSColin Percival    f = fortune
22*6ae1554aSColin Percival    f = f.lower()
23*6ae1554aSColin Percival    f = re.sub('[\W_]', '', f)
24*6ae1554aSColin Percival    for word in wordlist:
25*6ae1554aSColin Percival        f = re.sub(word, '', f)
26*6ae1554aSColin Percival#    f = re.sub('[aeiouy]', '', f)
27*6ae1554aSColin Percival#    f = re.sub('[^aeiouy]', '', f)
28*6ae1554aSColin Percival    f = f[:30]
29*6ae1554aSColin Percival#    f = f[-30:]
30*6ae1554aSColin Percival    return f
31*6ae1554aSColin Percival
32*6ae1554aSColin Percival
33*6ae1554aSColin Percivaldef edit(datfile):
34*6ae1554aSColin Percival    dups = {}
35*6ae1554aSColin Percival    fortunes = []
36*6ae1554aSColin Percival    fortune = ""
37*6ae1554aSColin Percival    with open(datfile, "r") as datfiledf:
38*6ae1554aSColin Percival        for line in datfiledf:
39*6ae1554aSColin Percival            if line == "%\n":
40*6ae1554aSColin Percival                key = hash(fortune)
41*6ae1554aSColin Percival                if key not in dups:
42*6ae1554aSColin Percival                    dups[key] = []
43*6ae1554aSColin Percival                dups[key].append(fortune)
44*6ae1554aSColin Percival                fortunes.append(fortune)
45*6ae1554aSColin Percival                fortune = ""
46*6ae1554aSColin Percival            else:
47*6ae1554aSColin Percival                fortune += line
48*6ae1554aSColin Percival    for key in list(dups.keys()):
49*6ae1554aSColin Percival        if len(dups[key]) == 1:
50*6ae1554aSColin Percival            del dups[key]
51*6ae1554aSColin Percival    with open(datfile + "~", "w") as o:
52*6ae1554aSColin Percival        for fortune in fortunes:
53*6ae1554aSColin Percival            key = hash(fortune)
54*6ae1554aSColin Percival            if key in dups:
55*6ae1554aSColin Percival                print('\n' * 50)
56*6ae1554aSColin Percival                for f in dups[key]:
57*6ae1554aSColin Percival                    if f != fortune:
58*6ae1554aSColin Percival                        print(f, '%')
59*6ae1554aSColin Percival                print(fortune, '%')
60*6ae1554aSColin Percival                if input("Remove last fortune? ") == 'y':
61*6ae1554aSColin Percival                    del dups[key]
62*6ae1554aSColin Percival                    continue
63*6ae1554aSColin Percival            o.write(fortune + "%\n")
64*6ae1554aSColin Percival
65*6ae1554aSColin Percivalparser = argparse.ArgumentParser(description="trimming duplicate cookies")
66*6ae1554aSColin Percivalparser.add_argument("filename", type=str, nargs=1)
67*6ae1554aSColin Percivalargs = parser.parse_args()
68*6ae1554aSColin Percivaledit(args.filename[0])
69