1# 2# CDDL HEADER START 3# 4# The contents of this file are subject to the terms of the 5# Common Development and Distribution License (the "License"). 6# You may not use this file except in compliance with the License. 7# 8# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9# or http://www.opensolaris.org/os/licensing. 10# See the License for the specific language governing permissions 11# and limitations under the License. 12# 13# When distributing Covered Code, include this CDDL HEADER in each 14# file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15# If applicable, add the following below this CDDL HEADER, with the 16# fields enclosed by brackets "[]" replaced with your own identifying 17# information: Portions Copyright [yyyy] [name of copyright owner] 18# 19# CDDL HEADER END 20# 21 22# 23# Copyright 2016 Joyent, Inc. 24# Copyright 2019 OmniOS Community Edition (OmniOSce) Association. 25# 26 27import re, sys 28 29spellMsg = 'contains "{}", a common misspelling of "{}"' 30altMsg = 'contains "{}"; please use "{}" instead for consistency with other documentation' 31caseMsg = 'contains "{}"; please use "{}" instead' 32 33misspellings = { 34 'absense': 'absence', 35 'accessable': 'accessible', 36 'accomodate': 'accommodate', 37 'accomodation': 'accommodation', 38 'accross': 'across', 39 'acheive': 'achieve', 40 'addional': 'additional', 41 'addres': 'address', 42 'admininistrative': 'administrative', 43 'adminstered': 'administered', 44 'adminstrate': 'administrate', 45 'adminstration': 'administration', 46 'adminstrative': 'administrative', 47 'adminstrator': 'administrator', 48 'admissability': 'admissibility', 49 'adress': 'address', 50 'adressable': 'addressable', 51 'adressed': 'addressed', 52 'adressing': 'addressing, dressing', 53 'aginst': 'against', 54 'agression': 'aggression', 55 'agressive': 'aggressive', 56 'alot': 'a lot, allot', 57 'and and': 'and', 58 'apparantly': 'apparently', 59 'appearence': 'appearance', 60 'arguement': 'argument', 61 'assasination': 'assassination', 62 'auxilliary': 'auxiliary', 63 'basicly': 'basically', 64 'begining': 'beginning', 65 'belive': 'believe', 66 'beteen': 'between', 67 'betwen': 'between', 68 'beween': 'between', 69 'bewteen': 'between', 70 'bizzare': 'bizarre', 71 'buisness': 'business', 72 'calender': 'calendar', 73 'cemetary': 'cemetery', 74 'chauffer': 'chauffeur', 75 'collegue': 'colleague', 76 'comming': 'coming', 77 'commited': 'committed', 78 'commitee': 'committee', 79 'commiting': 'committing', 80 'comparision': 'comparison', 81 'comparisions': 'comparisons', 82 'compatability': 'compatibility', 83 'compatable': 'compatible', 84 'compatablity': 'compatibility', 85 'compatiable': 'compatible', 86 'compatiblity': 'compatibility', 87 'completly': 'completely', 88 'concious': 'conscious', 89 'condidtion': 'condition', 90 'conected': 'connected', 91 'conjuction': 'conjunction', 92 'continous': 'continuous', 93 'curiousity': 'curiosity', 94 'deamon': 'daemon', 95 'definately': 'definitely', 96 'desireable': 'desirable', 97 'diffrent': 'different', 98 'dilemna': 'dilemma', 99 'dissapear': 'disappear', 100 'dissapoint': 'disappoint', 101 'ecstacy': 'ecstasy', 102 'embarass': 'embarrass', 103 'enviroment': 'environment', 104 'exept': 'except', 105 'existance': 'existence', 106 'familar': 'familiar', 107 'finaly': 'finally', 108 'folowing': 'following', 109 'foriegn': 'foreign', 110 'forseeable': 'foreseeable', 111 'fourty': 'forty', 112 'foward': 'forward', 113 'freind': 'friend', 114 'futher': 'further', 115 'gaurd': 'guard', 116 'glamourous': 'glamorous', 117 'goverment': 'government', 118 'happend': 'happened', 119 'harrassment': 'harassment', 120 'hierachical': 'hierarchical', 121 'hierachies': 'hierarchies', 122 'hierachy': 'hierarchy', 123 'hierarcical': 'hierarchical', 124 'hierarcy': 'hierarchy', 125 'honourary': 'honorary', 126 'humourous': 'humorous', 127 'idiosyncracy': 'idiosyncrasy', 128 'immediatly': 'immediately', 129 'inaccessable': 'inaccessible', 130 'inbetween': 'between', 131 'incidently': 'incidentally', 132 'independant': 'independent', 133 'infomation': 'information', 134 'interupt': 'interrupt', 135 'intial': 'initial', 136 'intially': 'initially', 137 'irresistable': 'irresistible', 138 'jist': 'gist', 139 'knowlege': 'knowledge', 140 'lenght': 'length', 141 'liase': 'liaise', 142 'liason': 'liaison', 143 'libary': 'library', 144 'maching': 'machine, marching, matching', 145 'millenia': 'millennia', 146 'millenium': 'millennium', 147 'neccessary': 'necessary', 148 'negotation': 'negotiation', 149 'nontheless': 'nonetheless', 150 'noticable': 'noticeable', 151 'occassion': 'occasion', 152 'occassional': 'occasional', 153 'occassionally': 'occasionally', 154 'occurance': 'occurrence', 155 'occured': 'occurred', 156 'occurence': 'occurrence', 157 'occuring': 'occurring', 158 'ommision': 'omission', 159 'orginal': 'original', 160 'orginally': 'originally', 161 'ouput': 'output', 162 'overriden': 'overridden', 163 'particuliar': 'particular', 164 'pavillion': 'pavilion', 165 'peice': 'piece', 166 'persistant': 'persistent', 167 'pesudo': 'pseudo', 168 'politican': 'politician', 169 'posession': 'possession', 170 'possiblity': 'possibility', 171 'preceed': 'precede', 172 'preceeded': 'preceded', 173 'preceeding': 'preceding', 174 'preceeds': 'precedes', 175 'prefered': 'preferred', 176 'prefering': 'preferring', 177 'presense': 'presence', 178 'proces': 'process', 179 'propoganda': 'propaganda', 180 'psuedo': 'pseudo', 181 'publically': 'publicly', 182 'realy': 'really', 183 'reciept': 'receipt', 184 'recieve': 'receive', 185 'recieved': 'received', 186 'reciever': 'receiver', 187 'recievers': 'receivers', 188 'recieves': 'receives', 189 'recieving': 'receiving', 190 'recomend': 'recommend', 191 'recomended': 'recommended', 192 'recomending': 'recommending', 193 'recomends': 'recommends', 194 'recurse': 'recur', 195 'recurses': 'recurs', 196 'recursing': 'recurring', 197 'refered': 'referred', 198 'refering': 'referring', 199 'religous': 'religious', 200 'rember': 'remember', 201 'remeber': 'remember', 202 'repetion': 'repetition', 203 'reponsible': 'responsible', 204 'resistence': 'resistance', 205 'retreive': 'retrieve', 206 'seige': 'siege', 207 'sence': 'since', 208 'seperate': 'separate', 209 'seperated': 'separated', 210 'seperately': 'separately', 211 'seperates': 'separates', 212 'similiar': 'similar', 213 'somwhere': 'somewhere', 214 'sould': 'could, should, sold, soul', 215 'sturcture': 'structure', 216 'succesful': 'successful', 217 'succesfully': 'successfully', 218 'successfull': 'successful', 219 'sucessful': 'successful', 220 'supercede': 'supersede', 221 'supress': 'suppress', 222 'supressed': 'suppressed', 223 'suprise': 'surprise', 224 'suprisingly': 'surprisingly', 225 'sytem': 'system', 226 'tendancy': 'tendency', 227 'the the': 'the', 228 'the these': 'these', 229 'therefor': 'therefore', 230 'threshhold': 'threshold', 231 'tolerence': 'tolerance', 232 'tommorow': 'tomorrow', 233 'tommorrow': 'tomorrow', 234 'tounge': 'tongue', 235 'tranformed': 'transformed', 236 'transfered': 'transferred', 237 'truely': 'truly', 238 'trustworthyness': 'trustworthiness', 239 'uncommited': 'uncommitted', 240 'unforseen': 'unforeseen', 241 'unfortunatly': 'unfortunately', 242 'unsuccessfull': 'unsuccessful', 243 'untill': 'until', 244 'upto': 'up to', 245 'whereever': 'wherever', 246 'wich': 'which', 247 'wierd': 'weird', 248 'wtih': 'with', 249} 250 251alternates = { 252 'judgement': 'judgment', 253 'parseable': 'parsable', 254 'sub-command': 'subcommand', 255 'sub-commands': 'subcommands', 256 'writeable': 'writable' 257} 258 259case = { 260 'Illumos': 'illumos' 261} 262 263misspellingREs = [] 264alternateREs = [] 265caseREs = [] 266 267for misspelling, correct in misspellings.items(): 268 regex = re.compile(r'\b%s\b' % (misspelling), re.IGNORECASE) 269 entry = (regex, misspelling, correct) 270 misspellingREs.append(entry) 271 272for alternate, correct in alternates.items(): 273 regex = re.compile(r'\b%s\b' % (alternate), re.IGNORECASE) 274 entry = (regex, alternate, correct) 275 alternateREs.append(entry) 276 277for alternate, correct in case.items(): 278 regex = re.compile(r'\b%s\b' % (alternate)) 279 entry = (regex, alternate, correct) 280 caseREs.append(entry) 281 282def spellcheck_line(line): 283 errs = [] 284 for entry in misspellingREs: 285 if entry[0].search(line): 286 errs.append(spellMsg.format(entry[1], entry[2])) 287 for entry in alternateREs: 288 if entry[0].search(line): 289 errs.append(altMsg.format(entry[1], entry[2])) 290 for entry in caseREs: 291 if entry[0].search(line): 292 errs.append(caseMsg.format(entry[1], entry[2])) 293 return errs 294 295def spellcheck(fh, filename=None, output=sys.stderr, **opts): 296 lineno = 1 297 ret = 0 298 299 if not filename: 300 filename = fh.name 301 302 fh.seek(0) 303 for line in fh: 304 line = line.decode(errors='replace') 305 for err in spellcheck_line(line): 306 output.write('{}: Line {} {}\n'.format( 307 filename, lineno, err)) 308 ret = 1 309 lineno += 1 310 311 return ret 312