1# 2# CDDL HEADER START 3# 4# The contents of this file are subject to the terms of the 5# Common Development and Distribution License (the "License"). 6# You may not use this file except in compliance with the License. 7# 8# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9# or http://www.opensolaris.org/os/licensing. 10# See the License for the specific language governing permissions 11# and limitations under the License. 12# 13# When distributing Covered Code, include this CDDL HEADER in each 14# file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15# If applicable, add the following below this CDDL HEADER, with the 16# fields enclosed by brackets "[]" replaced with your own identifying 17# information: Portions Copyright [yyyy] [name of copyright owner] 18# 19# CDDL HEADER END 20# 21 22# 23# Copyright 2016 Joyent, Inc. 24# 25 26import re, sys 27 28spellMsg = '%s: Line %d contains "%s", a common misspelling of "%s"\n' 29altMsg = '%s: Lined %d contains "%s"; please use "%s" instead for consistency with other documentation\n' 30 31misspellings = { 32 'absense': 'absence', 33 'accessable': 'accessible', 34 'accomodate': 'accommodate', 35 'accomodation': 'accommodation', 36 'accross': 'across', 37 'acheive': 'achieve', 38 'addional': 'additional', 39 'addres': 'address', 40 'admininistrative': 'administrative', 41 'adminstered': 'administered', 42 'adminstrate': 'administrate', 43 'adminstration': 'administration', 44 'adminstrative': 'administrative', 45 'adminstrator': 'administrator', 46 'admissability': 'admissibility', 47 'adress': 'address', 48 'adressable': 'addressable', 49 'adressed': 'addressed', 50 'adressing': 'addressing, dressing', 51 'aginst': 'against', 52 'agression': 'aggression', 53 'agressive': 'aggressive', 54 'alot': 'a lot, allot', 55 'and and': 'and', 56 'apparantly': 'apparently', 57 'appearence': 'appearance', 58 'arguement': 'argument', 59 'assasination': 'assassination', 60 'auxilliary': 'auxiliary', 61 'basicly': 'basically', 62 'begining': 'beginning', 63 'belive': 'believe', 64 'beteen': 'between', 65 'betwen': 'between', 66 'beween': 'between', 67 'bewteen': 'between', 68 'bizzare': 'bizarre', 69 'buisness': 'business', 70 'calender': 'calendar', 71 'cemetary': 'cemetery', 72 'chauffer': 'chauffeur', 73 'collegue': 'colleague', 74 'comming': 'coming', 75 'commited': 'committed', 76 'commitee': 'committee', 77 'commiting': 'committing', 78 'comparision': 'comparison', 79 'comparisions': 'comparisons', 80 'compatability': 'compatibility', 81 'compatable': 'compatible', 82 'compatablity': 'compatibility', 83 'compatiable': 'compatible', 84 'compatiblity': 'compatibility', 85 'completly': 'completely', 86 'concious': 'conscious', 87 'condidtion': 'condition', 88 'conected': 'connected', 89 'conjuction': 'conjunction', 90 'continous': 'continuous', 91 'curiousity': 'curiosity', 92 'deamon': 'daemon', 93 'definately': 'definitely', 94 'desireable': 'desirable', 95 'diffrent': 'different', 96 'dilemna': 'dilemma', 97 'dissapear': 'disappear', 98 'dissapoint': 'disappoint', 99 'ecstacy': 'ecstasy', 100 'embarass': 'embarrass', 101 'enviroment': 'environment', 102 'exept': 'except', 103 'existance': 'existence', 104 'familar': 'familiar', 105 'finaly': 'finally', 106 'folowing': 'following', 107 'foriegn': 'foreign', 108 'forseeable': 'foreseeable', 109 'fourty': 'forty', 110 'foward': 'forward', 111 'freind': 'friend', 112 'futher': 'further', 113 'gaurd': 'guard', 114 'glamourous': 'glamorous', 115 'goverment': 'government', 116 'happend': 'happened', 117 'harrassment': 'harassment', 118 'hierachical': 'hierarchical', 119 'hierachies': 'hierarchies', 120 'hierachy': 'hierarchy', 121 'hierarcical': 'hierarchical', 122 'hierarcy': 'hierarchy', 123 'honourary': 'honorary', 124 'humourous': 'humorous', 125 'idiosyncracy': 'idiosyncrasy', 126 'immediatly': 'immediately', 127 'inaccessable': 'inaccessible', 128 'inbetween': 'between', 129 'incidently': 'incidentally', 130 'independant': 'independent', 131 'infomation': 'information', 132 'interupt': 'interrupt', 133 'intial': 'initial', 134 'intially': 'initially', 135 'irresistable': 'irresistible', 136 'jist': 'gist', 137 'knowlege': 'knowledge', 138 'lenght': 'length', 139 'liase': 'liaise', 140 'liason': 'liaison', 141 'libary': 'library', 142 'maching': 'machine, marching, matching', 143 'millenia': 'millennia', 144 'millenium': 'millennium', 145 'neccessary': 'necessary', 146 'negotation': 'negotiation', 147 'nontheless': 'nonetheless', 148 'noticable': 'noticeable', 149 'occassion': 'occasion', 150 'occassional': 'occasional', 151 'occassionally': 'occasionally', 152 'occurance': 'occurrence', 153 'occured': 'occurred', 154 'occurence': 'occurrence', 155 'occuring': 'occurring', 156 'ommision': 'omission', 157 'orginal': 'original', 158 'orginally': 'originally', 159 'pavillion': 'pavilion', 160 'peice': 'piece', 161 'persistant': 'persistent', 162 'politican': 'politician', 163 'posession': 'possession', 164 'possiblity': 'possibility', 165 'preceed': 'precede', 166 'preceeded': 'preceded', 167 'preceeding': 'preceding', 168 'preceeds': 'precedes', 169 'prefered': 'preferred', 170 'prefering': 'preferring', 171 'presense': 'presence', 172 'proces': 'process', 173 'propoganda': 'propaganda', 174 'psuedo': 'pseudo', 175 'publically': 'publicly', 176 'realy': 'really', 177 'reciept': 'receipt', 178 'recieve': 'receive', 179 'recieved': 'received', 180 'reciever': 'receiver', 181 'recievers': 'receivers', 182 'recieves': 'receives', 183 'recieving': 'receiving', 184 'recomend': 'recommend', 185 'recomended': 'recommended', 186 'recomending': 'recommending', 187 'recomends': 'recommends', 188 'recurse': 'recur', 189 'recurses': 'recurs', 190 'recursing': 'recurring', 191 'refered': 'referred', 192 'refering': 'referring', 193 'religous': 'religious', 194 'rember': 'remember', 195 'remeber': 'remember', 196 'repetion': 'repetition', 197 'reponsible': 'responsible', 198 'resistence': 'resistance', 199 'retreive': 'retrieve', 200 'seige': 'siege', 201 'sence': 'since', 202 'seperate': 'separate', 203 'seperated': 'separated', 204 'seperately': 'separately', 205 'seperates': 'separates', 206 'similiar': 'similar', 207 'somwhere': 'somewhere', 208 'sould': 'could, should, sold, soul', 209 'sturcture': 'structure', 210 'succesful': 'successful', 211 'succesfully': 'successfully', 212 'successfull': 'successful', 213 'sucessful': 'successful', 214 'supercede': 'supersede', 215 'supress': 'suppress', 216 'supressed': 'suppressed', 217 'suprise': 'surprise', 218 'suprisingly': 'surprisingly', 219 'sytem': 'system', 220 'tendancy': 'tendency', 221 'the the': 'the', 222 'the these': 'these', 223 'therefor': 'therefore', 224 'threshhold': 'threshold', 225 'tolerence': 'tolerance', 226 'tommorow': 'tomorrow', 227 'tommorrow': 'tomorrow', 228 'tounge': 'tongue', 229 'tranformed': 'transformed', 230 'transfered': 'transferred', 231 'truely': 'truly', 232 'trustworthyness': 'trustworthiness', 233 'unforseen': 'unforeseen', 234 'unfortunatly': 'unfortunately', 235 'unsuccessfull': 'unsuccessful', 236 'untill': 'until', 237 'upto': 'up to', 238 'whereever': 'wherever', 239 'wich': 'which', 240 'wierd': 'weird', 241 'wtih': 'with', 242} 243 244alternates = { 245} 246 247misspellingREs = [] 248alternateREs = [] 249 250for misspelling, correct in misspellings.iteritems(): 251 regex = re.compile(r'\b%s\b' % (misspelling), re.IGNORECASE) 252 entry = (regex, misspelling, correct) 253 misspellingREs.append(entry) 254 255for alternate, correct in alternates.iteritems(): 256 regex = re.compile(r'\b%s\b' % (alternate), re.IGNORECASE) 257 entry = (regex, alternate, correct) 258 alternateREs.append(entry) 259 260def check(errmsg, output, filename, line, lineno, entry): 261 if entry[0].search(line): 262 output.write(errmsg % (filename, lineno, entry[1], entry[2])) 263 return 1 264 else: 265 return 0 266 267def spellcheck(fh, filename=None, output=sys.stderr, **opts): 268 lineno = 1 269 ret = 0 270 271 if not filename: 272 filename = fh.name 273 274 fh.seek(0) 275 for line in fh: 276 for entry in misspellingREs: 277 ret |= check(spellMsg, output, filename, line, 278 lineno, entry) 279 for entry in alternateREs: 280 ret |= check(altMsg, output, filename, line, 281 lineno, entry) 282 lineno += 1 283 284 return ret 285