1#!/usr/local/bin/python 2# 3# 4# an aggressive little script for trimming duplicate cookies 5from __future__ import print_function 6import argparse 7import re 8 9wordlist = [ 10 'hadnot', 11 'donot', 'hadnt', 12 'dont', 'have', 'more', 'will', 'your', 13 'and', 'are', 'had', 'the', 'you', 14 'am', 'an', 'is', 'll', 've', 'we', 15 'a', 'd', 'i', 'm', 's', 16] 17 18 19def hash(fortune): 20 f = fortune 21 f = f.lower() 22 f = re.sub('[\W_]', '', f) 23 for word in wordlist: 24 f = re.sub(word, '', f) 25# f = re.sub('[aeiouy]', '', f) 26# f = re.sub('[^aeiouy]', '', f) 27 f = f[:30] 28# f = f[-30:] 29 return f 30 31 32def edit(datfile): 33 dups = {} 34 fortunes = [] 35 fortune = "" 36 with open(datfile, "r") as datfiledf: 37 for line in datfiledf: 38 if line == "%\n": 39 key = hash(fortune) 40 if key not in dups: 41 dups[key] = [] 42 dups[key].append(fortune) 43 fortunes.append(fortune) 44 fortune = "" 45 else: 46 fortune += line 47 for key in list(dups.keys()): 48 if len(dups[key]) == 1: 49 del dups[key] 50 with open(datfile + "~", "w") as o: 51 for fortune in fortunes: 52 key = hash(fortune) 53 if key in dups: 54 print('\n' * 50) 55 for f in dups[key]: 56 if f != fortune: 57 print(f, '%') 58 print(fortune, '%') 59 if input("Remove last fortune? ") == 'y': 60 del dups[key] 61 continue 62 o.write(fortune + "%\n") 63 64parser = argparse.ArgumentParser(description="trimming duplicate cookies") 65parser.add_argument("filename", type=str, nargs=1) 66args = parser.parse_args() 67edit(args.filename[0]) 68