xref: /freebsd/usr.bin/fortune/tools/do_uniq.py (revision 749f65e3e30e857301e44db4a87eca99d45cdc66)
16ae1554aSColin Percival#!/usr/local/bin/python
26ae1554aSColin Percival#
36ae1554aSColin Percival# $FreeBSD$
46ae1554aSColin Percival#
56ae1554aSColin Percival# an aggressive little script for trimming duplicate cookies
6*749f65e3SCraig Rodriguesfrom __future__ import print_function
76ae1554aSColin Percivalimport argparse
86ae1554aSColin Percivalimport re
96ae1554aSColin Percival
106ae1554aSColin Percivalwordlist = [
116ae1554aSColin Percival    'hadnot',
126ae1554aSColin Percival    'donot', 'hadnt',
136ae1554aSColin Percival    'dont', 'have', 'more', 'will', 'your',
146ae1554aSColin Percival    'and', 'are', 'had', 'the', 'you',
156ae1554aSColin Percival    'am', 'an', 'is', 'll', 've', 'we',
166ae1554aSColin Percival    'a', 'd', 'i', 'm', 's',
176ae1554aSColin Percival]
186ae1554aSColin Percival
196ae1554aSColin Percival
206ae1554aSColin Percivaldef hash(fortune):
216ae1554aSColin Percival    f = fortune
226ae1554aSColin Percival    f = f.lower()
236ae1554aSColin Percival    f = re.sub('[\W_]', '', f)
246ae1554aSColin Percival    for word in wordlist:
256ae1554aSColin Percival        f = re.sub(word, '', f)
266ae1554aSColin Percival#    f = re.sub('[aeiouy]', '', f)
276ae1554aSColin Percival#    f = re.sub('[^aeiouy]', '', f)
286ae1554aSColin Percival    f = f[:30]
296ae1554aSColin Percival#    f = f[-30:]
306ae1554aSColin Percival    return f
316ae1554aSColin Percival
326ae1554aSColin Percival
336ae1554aSColin Percivaldef edit(datfile):
346ae1554aSColin Percival    dups = {}
356ae1554aSColin Percival    fortunes = []
366ae1554aSColin Percival    fortune = ""
376ae1554aSColin Percival    with open(datfile, "r") as datfiledf:
386ae1554aSColin Percival        for line in datfiledf:
396ae1554aSColin Percival            if line == "%\n":
406ae1554aSColin Percival                key = hash(fortune)
416ae1554aSColin Percival                if key not in dups:
426ae1554aSColin Percival                    dups[key] = []
436ae1554aSColin Percival                dups[key].append(fortune)
446ae1554aSColin Percival                fortunes.append(fortune)
456ae1554aSColin Percival                fortune = ""
466ae1554aSColin Percival            else:
476ae1554aSColin Percival                fortune += line
486ae1554aSColin Percival    for key in list(dups.keys()):
496ae1554aSColin Percival        if len(dups[key]) == 1:
506ae1554aSColin Percival            del dups[key]
516ae1554aSColin Percival    with open(datfile + "~", "w") as o:
526ae1554aSColin Percival        for fortune in fortunes:
536ae1554aSColin Percival            key = hash(fortune)
546ae1554aSColin Percival            if key in dups:
556ae1554aSColin Percival                print('\n' * 50)
566ae1554aSColin Percival                for f in dups[key]:
576ae1554aSColin Percival                    if f != fortune:
586ae1554aSColin Percival                        print(f, '%')
596ae1554aSColin Percival                print(fortune, '%')
606ae1554aSColin Percival                if input("Remove last fortune? ") == 'y':
616ae1554aSColin Percival                    del dups[key]
626ae1554aSColin Percival                    continue
636ae1554aSColin Percival            o.write(fortune + "%\n")
646ae1554aSColin Percival
656ae1554aSColin Percivalparser = argparse.ArgumentParser(description="trimming duplicate cookies")
666ae1554aSColin Percivalparser.add_argument("filename", type=str, nargs=1)
676ae1554aSColin Percivalargs = parser.parse_args()
686ae1554aSColin Percivaledit(args.filename[0])
69