1# 2# CDDL HEADER START 3# 4# The contents of this file are subject to the terms of the 5# Common Development and Distribution License (the "License"). 6# You may not use this file except in compliance with the License. 7# 8# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9# or http://www.opensolaris.org/os/licensing. 10# See the License for the specific language governing permissions 11# and limitations under the License. 12# 13# When distributing Covered Code, include this CDDL HEADER in each 14# file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15# If applicable, add the following below this CDDL HEADER, with the 16# fields enclosed by brackets "[]" replaced with your own identifying 17# information: Portions Copyright [yyyy] [name of copyright owner] 18# 19# CDDL HEADER END 20# 21 22# 23# Copyright 2016 Joyent, Inc. 24# Copyright 2019 OmniOS Community Edition (OmniOSce) Association. 25# 26 27import re, sys 28 29spellMsg = 'contains "{}", a common misspelling of "{}"' 30altMsg = 'contains "{}"; please use "{}" instead for consistency with other documentation' 31caseMsg = 'contains "{}"; please use "{}" instead' 32 33misspellings = { 34 'absense': 'absence', 35 'accessable': 'accessible', 36 'accomodate': 'accommodate', 37 'accomodation': 'accommodation', 38 'accross': 'across', 39 'acheive': 'achieve', 40 'addional': 'additional', 41 'addres': 'address', 42 'admininistrative': 'administrative', 43 'adminstered': 'administered', 44 'adminstrate': 'administrate', 45 'adminstration': 'administration', 46 'adminstrative': 'administrative', 47 'adminstrator': 'administrator', 48 'admissability': 'admissibility', 49 'adress': 'address', 50 'adressable': 'addressable', 51 'adressed': 'addressed', 52 'adressing': 'addressing, dressing', 53 'aginst': 'against', 54 'agression': 'aggression', 55 'agressive': 'aggressive', 56 'alot': 'a lot, allot', 57 'and and': 'and', 58 'apparantly': 'apparently', 59 'appearence': 'appearance', 60 'arguement': 'argument', 61 'assasination': 'assassination', 62 'auxilliary': 'auxiliary', 63 'basicly': 'basically', 64 'begining': 'beginning', 65 'belive': 'believe', 66 'beteen': 'between', 67 'betwen': 'between', 68 'beween': 'between', 69 'bewteen': 'between', 70 'bizzare': 'bizarre', 71 'buisness': 'business', 72 'calender': 'calendar', 73 'cemetary': 'cemetery', 74 'chauffer': 'chauffeur', 75 'collegue': 'colleague', 76 'comming': 'coming', 77 'commited': 'committed', 78 'commitee': 'committee', 79 'commiting': 'committing', 80 'comparision': 'comparison', 81 'comparisions': 'comparisons', 82 'compatability': 'compatibility', 83 'compatable': 'compatible', 84 'compatablity': 'compatibility', 85 'compatiable': 'compatible', 86 'compatiblity': 'compatibility', 87 'completly': 'completely', 88 'concious': 'conscious', 89 'condidtion': 'condition', 90 'conected': 'connected', 91 'conjuction': 'conjunction', 92 'continous': 'continuous', 93 'curiousity': 'curiosity', 94 'deamon': 'daemon', 95 'definately': 'definitely', 96 'desireable': 'desirable', 97 'diffrent': 'different', 98 'dilemna': 'dilemma', 99 'dissapear': 'disappear', 100 'dissapoint': 'disappoint', 101 'ecstacy': 'ecstasy', 102 'embarass': 'embarrass', 103 'enviroment': 'environment', 104 'exept': 'except', 105 'existance': 'existence', 106 'familar': 'familiar', 107 'finaly': 'finally', 108 'folowing': 'following', 109 'foriegn': 'foreign', 110 'forseeable': 'foreseeable', 111 'fourty': 'forty', 112 'foward': 'forward', 113 'freind': 'friend', 114 'futher': 'further', 115 'gaurd': 'guard', 116 'glamourous': 'glamorous', 117 'goverment': 'government', 118 'happend': 'happened', 119 'harrassment': 'harassment', 120 'hierachical': 'hierarchical', 121 'hierachies': 'hierarchies', 122 'hierachy': 'hierarchy', 123 'hierarcical': 'hierarchical', 124 'hierarcy': 'hierarchy', 125 'honourary': 'honorary', 126 'humourous': 'humorous', 127 'idiosyncracy': 'idiosyncrasy', 128 'immediatly': 'immediately', 129 'inaccessable': 'inaccessible', 130 'inbetween': 'between', 131 'incidently': 'incidentally', 132 'independant': 'independent', 133 'infomation': 'information', 134 'interupt': 'interrupt', 135 'intial': 'initial', 136 'intially': 'initially', 137 'irresistable': 'irresistible', 138 'jist': 'gist', 139 'knowlege': 'knowledge', 140 'lenght': 'length', 141 'liase': 'liaise', 142 'liason': 'liaison', 143 'libary': 'library', 144 'maching': 'machine, marching, matching', 145 'millenia': 'millennia', 146 'millenium': 'millennium', 147 'neccessary': 'necessary', 148 'negotation': 'negotiation', 149 'nontheless': 'nonetheless', 150 'noticable': 'noticeable', 151 'occassion': 'occasion', 152 'occassional': 'occasional', 153 'occassionally': 'occasionally', 154 'occurance': 'occurrence', 155 'occured': 'occurred', 156 'occurence': 'occurrence', 157 'occuring': 'occurring', 158 'ommision': 'omission', 159 'orginal': 'original', 160 'orginally': 'originally', 161 'ouput': 'output', 162 'overriden': 'overridden', 163 'particuliar': 'particular', 164 'pavillion': 'pavilion', 165 'peice': 'piece', 166 'persistant': 'persistent', 167 'politican': 'politician', 168 'posession': 'possession', 169 'possiblity': 'possibility', 170 'preceed': 'precede', 171 'preceeded': 'preceded', 172 'preceeding': 'preceding', 173 'preceeds': 'precedes', 174 'prefered': 'preferred', 175 'prefering': 'preferring', 176 'presense': 'presence', 177 'proces': 'process', 178 'propoganda': 'propaganda', 179 'psuedo': 'pseudo', 180 'publically': 'publicly', 181 'realy': 'really', 182 'reciept': 'receipt', 183 'recieve': 'receive', 184 'recieved': 'received', 185 'reciever': 'receiver', 186 'recievers': 'receivers', 187 'recieves': 'receives', 188 'recieving': 'receiving', 189 'recomend': 'recommend', 190 'recomended': 'recommended', 191 'recomending': 'recommending', 192 'recomends': 'recommends', 193 'recurse': 'recur', 194 'recurses': 'recurs', 195 'recursing': 'recurring', 196 'refered': 'referred', 197 'refering': 'referring', 198 'religous': 'religious', 199 'rember': 'remember', 200 'remeber': 'remember', 201 'repetion': 'repetition', 202 'reponsible': 'responsible', 203 'resistence': 'resistance', 204 'retreive': 'retrieve', 205 'seige': 'siege', 206 'sence': 'since', 207 'seperate': 'separate', 208 'seperated': 'separated', 209 'seperately': 'separately', 210 'seperates': 'separates', 211 'similiar': 'similar', 212 'somwhere': 'somewhere', 213 'sould': 'could, should, sold, soul', 214 'sturcture': 'structure', 215 'succesful': 'successful', 216 'succesfully': 'successfully', 217 'successfull': 'successful', 218 'sucessful': 'successful', 219 'supercede': 'supersede', 220 'supress': 'suppress', 221 'supressed': 'suppressed', 222 'suprise': 'surprise', 223 'suprisingly': 'surprisingly', 224 'sytem': 'system', 225 'tendancy': 'tendency', 226 'the the': 'the', 227 'the these': 'these', 228 'therefor': 'therefore', 229 'threshhold': 'threshold', 230 'tolerence': 'tolerance', 231 'tommorow': 'tomorrow', 232 'tommorrow': 'tomorrow', 233 'tounge': 'tongue', 234 'tranformed': 'transformed', 235 'transfered': 'transferred', 236 'truely': 'truly', 237 'trustworthyness': 'trustworthiness', 238 'uncommited': 'uncommitted', 239 'unforseen': 'unforeseen', 240 'unfortunatly': 'unfortunately', 241 'unsuccessfull': 'unsuccessful', 242 'untill': 'until', 243 'upto': 'up to', 244 'whereever': 'wherever', 245 'wich': 'which', 246 'wierd': 'weird', 247 'wtih': 'with', 248} 249 250alternates = { 251 'judgement': 'judgment', 252 'parseable': 'parsable', 253 'sub-command': 'subcommand', 254 'sub-commands': 'subcommands', 255 'writeable': 'writable' 256} 257 258case = { 259 'Illumos': 'illumos' 260} 261 262misspellingREs = [] 263alternateREs = [] 264caseREs = [] 265 266for misspelling, correct in misspellings.items(): 267 regex = re.compile(r'\b%s\b' % (misspelling), re.IGNORECASE) 268 entry = (regex, misspelling, correct) 269 misspellingREs.append(entry) 270 271for alternate, correct in alternates.items(): 272 regex = re.compile(r'\b%s\b' % (alternate), re.IGNORECASE) 273 entry = (regex, alternate, correct) 274 alternateREs.append(entry) 275 276for alternate, correct in case.items(): 277 regex = re.compile(r'\b%s\b' % (alternate)) 278 entry = (regex, alternate, correct) 279 caseREs.append(entry) 280 281def spellcheck_line(line): 282 errs = [] 283 for entry in misspellingREs: 284 if entry[0].search(line): 285 errs.append(spellMsg.format(entry[1], entry[2])) 286 for entry in alternateREs: 287 if entry[0].search(line): 288 errs.append(altMsg.format(entry[1], entry[2])) 289 for entry in caseREs: 290 if entry[0].search(line): 291 errs.append(caseMsg.format(entry[1], entry[2])) 292 return errs 293 294def spellcheck(fh, filename=None, output=sys.stderr, **opts): 295 lineno = 1 296 ret = 0 297 298 if not filename: 299 filename = fh.name 300 301 fh.seek(0) 302 for line in fh: 303 line = line.decode(errors='replace') 304 for err in spellcheck_line(line): 305 output.write('{}: Line {} {}\n'.format( 306 filename, lineno, err)) 307 ret = 1 308 lineno += 1 309 310 return ret 311