1#!/usr/local/bin/python 2# 3# $FreeBSD$ 4# 5# an aggressive little script for trimming duplicate cookies 6from __future__ import print_function 7import argparse 8import re 9 10wordlist = [ 11 'hadnot', 12 'donot', 'hadnt', 13 'dont', 'have', 'more', 'will', 'your', 14 'and', 'are', 'had', 'the', 'you', 15 'am', 'an', 'is', 'll', 've', 'we', 16 'a', 'd', 'i', 'm', 's', 17] 18 19 20def hash(fortune): 21 f = fortune 22 f = f.lower() 23 f = re.sub('[\W_]', '', f) 24 for word in wordlist: 25 f = re.sub(word, '', f) 26# f = re.sub('[aeiouy]', '', f) 27# f = re.sub('[^aeiouy]', '', f) 28 f = f[:30] 29# f = f[-30:] 30 return f 31 32 33def edit(datfile): 34 dups = {} 35 fortunes = [] 36 fortune = "" 37 with open(datfile, "r") as datfiledf: 38 for line in datfiledf: 39 if line == "%\n": 40 key = hash(fortune) 41 if key not in dups: 42 dups[key] = [] 43 dups[key].append(fortune) 44 fortunes.append(fortune) 45 fortune = "" 46 else: 47 fortune += line 48 for key in list(dups.keys()): 49 if len(dups[key]) == 1: 50 del dups[key] 51 with open(datfile + "~", "w") as o: 52 for fortune in fortunes: 53 key = hash(fortune) 54 if key in dups: 55 print('\n' * 50) 56 for f in dups[key]: 57 if f != fortune: 58 print(f, '%') 59 print(fortune, '%') 60 if input("Remove last fortune? ") == 'y': 61 del dups[key] 62 continue 63 o.write(fortune + "%\n") 64 65parser = argparse.ArgumentParser(description="trimming duplicate cookies") 66parser.add_argument("filename", type=str, nargs=1) 67args = parser.parse_args() 68edit(args.filename[0]) 69