1# 2# CDDL HEADER START 3# 4# The contents of this file are subject to the terms of the 5# Common Development and Distribution License (the "License"). 6# You may not use this file except in compliance with the License. 7# 8# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9# or http://www.opensolaris.org/os/licensing. 10# See the License for the specific language governing permissions 11# and limitations under the License. 12# 13# When distributing Covered Code, include this CDDL HEADER in each 14# file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15# If applicable, add the following below this CDDL HEADER, with the 16# fields enclosed by brackets "[]" replaced with your own identifying 17# information: Portions Copyright [yyyy] [name of copyright owner] 18# 19# CDDL HEADER END 20# 21 22# 23# Copyright 2016 Joyent, Inc. 24# Copyright 2018 OmniOS Community Edition (OmniOSce) Association. 25# 26 27import re, sys 28 29spellMsg = '%s: Line %d contains "%s", a common misspelling of "%s"\n' 30altMsg = '%s: Line %d contains "%s"; please use "%s" instead for consistency with other documentation\n' 31 32misspellings = { 33 'absense': 'absence', 34 'accessable': 'accessible', 35 'accomodate': 'accommodate', 36 'accomodation': 'accommodation', 37 'accross': 'across', 38 'acheive': 'achieve', 39 'addional': 'additional', 40 'addres': 'address', 41 'admininistrative': 'administrative', 42 'adminstered': 'administered', 43 'adminstrate': 'administrate', 44 'adminstration': 'administration', 45 'adminstrative': 'administrative', 46 'adminstrator': 'administrator', 47 'admissability': 'admissibility', 48 'adress': 'address', 49 'adressable': 'addressable', 50 'adressed': 'addressed', 51 'adressing': 'addressing, dressing', 52 'aginst': 'against', 53 'agression': 'aggression', 54 'agressive': 'aggressive', 55 'alot': 'a lot, allot', 56 'and and': 'and', 57 'apparantly': 'apparently', 58 'appearence': 'appearance', 59 'arguement': 'argument', 60 'assasination': 'assassination', 61 'auxilliary': 'auxiliary', 62 'basicly': 'basically', 63 'begining': 'beginning', 64 'belive': 'believe', 65 'beteen': 'between', 66 'betwen': 'between', 67 'beween': 'between', 68 'bewteen': 'between', 69 'bizzare': 'bizarre', 70 'buisness': 'business', 71 'calender': 'calendar', 72 'cemetary': 'cemetery', 73 'chauffer': 'chauffeur', 74 'collegue': 'colleague', 75 'comming': 'coming', 76 'commited': 'committed', 77 'commitee': 'committee', 78 'commiting': 'committing', 79 'comparision': 'comparison', 80 'comparisions': 'comparisons', 81 'compatability': 'compatibility', 82 'compatable': 'compatible', 83 'compatablity': 'compatibility', 84 'compatiable': 'compatible', 85 'compatiblity': 'compatibility', 86 'completly': 'completely', 87 'concious': 'conscious', 88 'condidtion': 'condition', 89 'conected': 'connected', 90 'conjuction': 'conjunction', 91 'continous': 'continuous', 92 'curiousity': 'curiosity', 93 'deamon': 'daemon', 94 'definately': 'definitely', 95 'desireable': 'desirable', 96 'diffrent': 'different', 97 'dilemna': 'dilemma', 98 'dissapear': 'disappear', 99 'dissapoint': 'disappoint', 100 'ecstacy': 'ecstasy', 101 'embarass': 'embarrass', 102 'enviroment': 'environment', 103 'exept': 'except', 104 'existance': 'existence', 105 'familar': 'familiar', 106 'finaly': 'finally', 107 'folowing': 'following', 108 'foriegn': 'foreign', 109 'forseeable': 'foreseeable', 110 'fourty': 'forty', 111 'foward': 'forward', 112 'freind': 'friend', 113 'futher': 'further', 114 'gaurd': 'guard', 115 'glamourous': 'glamorous', 116 'goverment': 'government', 117 'happend': 'happened', 118 'harrassment': 'harassment', 119 'hierachical': 'hierarchical', 120 'hierachies': 'hierarchies', 121 'hierachy': 'hierarchy', 122 'hierarcical': 'hierarchical', 123 'hierarcy': 'hierarchy', 124 'honourary': 'honorary', 125 'humourous': 'humorous', 126 'idiosyncracy': 'idiosyncrasy', 127 'immediatly': 'immediately', 128 'inaccessable': 'inaccessible', 129 'inbetween': 'between', 130 'incidently': 'incidentally', 131 'independant': 'independent', 132 'infomation': 'information', 133 'interupt': 'interrupt', 134 'intial': 'initial', 135 'intially': 'initially', 136 'irresistable': 'irresistible', 137 'jist': 'gist', 138 'knowlege': 'knowledge', 139 'lenght': 'length', 140 'liase': 'liaise', 141 'liason': 'liaison', 142 'libary': 'library', 143 'maching': 'machine, marching, matching', 144 'millenia': 'millennia', 145 'millenium': 'millennium', 146 'neccessary': 'necessary', 147 'negotation': 'negotiation', 148 'nontheless': 'nonetheless', 149 'noticable': 'noticeable', 150 'occassion': 'occasion', 151 'occassional': 'occasional', 152 'occassionally': 'occasionally', 153 'occurance': 'occurrence', 154 'occured': 'occurred', 155 'occurence': 'occurrence', 156 'occuring': 'occurring', 157 'ommision': 'omission', 158 'orginal': 'original', 159 'orginally': 'originally', 160 'ouput': 'output', 161 'overriden': 'overridden', 162 'particuliar': 'particular', 163 'pavillion': 'pavilion', 164 'peice': 'piece', 165 'persistant': 'persistent', 166 'politican': 'politician', 167 'posession': 'possession', 168 'possiblity': 'possibility', 169 'preceed': 'precede', 170 'preceeded': 'preceded', 171 'preceeding': 'preceding', 172 'preceeds': 'precedes', 173 'prefered': 'preferred', 174 'prefering': 'preferring', 175 'presense': 'presence', 176 'proces': 'process', 177 'propoganda': 'propaganda', 178 'psuedo': 'pseudo', 179 'publically': 'publicly', 180 'realy': 'really', 181 'reciept': 'receipt', 182 'recieve': 'receive', 183 'recieved': 'received', 184 'reciever': 'receiver', 185 'recievers': 'receivers', 186 'recieves': 'receives', 187 'recieving': 'receiving', 188 'recomend': 'recommend', 189 'recomended': 'recommended', 190 'recomending': 'recommending', 191 'recomends': 'recommends', 192 'recurse': 'recur', 193 'recurses': 'recurs', 194 'recursing': 'recurring', 195 'refered': 'referred', 196 'refering': 'referring', 197 'religous': 'religious', 198 'rember': 'remember', 199 'remeber': 'remember', 200 'repetion': 'repetition', 201 'reponsible': 'responsible', 202 'resistence': 'resistance', 203 'retreive': 'retrieve', 204 'seige': 'siege', 205 'sence': 'since', 206 'seperate': 'separate', 207 'seperated': 'separated', 208 'seperately': 'separately', 209 'seperates': 'separates', 210 'similiar': 'similar', 211 'somwhere': 'somewhere', 212 'sould': 'could, should, sold, soul', 213 'sturcture': 'structure', 214 'succesful': 'successful', 215 'succesfully': 'successfully', 216 'successfull': 'successful', 217 'sucessful': 'successful', 218 'supercede': 'supersede', 219 'supress': 'suppress', 220 'supressed': 'suppressed', 221 'suprise': 'surprise', 222 'suprisingly': 'surprisingly', 223 'sytem': 'system', 224 'tendancy': 'tendency', 225 'the the': 'the', 226 'the these': 'these', 227 'therefor': 'therefore', 228 'threshhold': 'threshold', 229 'tolerence': 'tolerance', 230 'tommorow': 'tomorrow', 231 'tommorrow': 'tomorrow', 232 'tounge': 'tongue', 233 'tranformed': 'transformed', 234 'transfered': 'transferred', 235 'truely': 'truly', 236 'trustworthyness': 'trustworthiness', 237 'uncommited': 'uncommitted', 238 'unforseen': 'unforeseen', 239 'unfortunatly': 'unfortunately', 240 'unsuccessfull': 'unsuccessful', 241 'untill': 'until', 242 'upto': 'up to', 243 'whereever': 'wherever', 244 'wich': 'which', 245 'wierd': 'weird', 246 'wtih': 'with', 247} 248 249alternates = { 250 'parseable': 'parsable', 251 'sub-command': 'subcommand', 252 'sub-commands': 'subcommands', 253 'writeable': 'writable' 254} 255 256misspellingREs = [] 257alternateREs = [] 258 259for misspelling, correct in misspellings.items(): 260 regex = re.compile(r'\b%s\b' % (misspelling), re.IGNORECASE) 261 entry = (regex, misspelling, correct) 262 misspellingREs.append(entry) 263 264for alternate, correct in alternates.items(): 265 regex = re.compile(r'\b%s\b' % (alternate), re.IGNORECASE) 266 entry = (regex, alternate, correct) 267 alternateREs.append(entry) 268 269def check(errmsg, output, filename, line, lineno, entry): 270 if entry[0].search(line): 271 output.write(errmsg % (filename, lineno, entry[1], entry[2])) 272 return 1 273 else: 274 return 0 275 276def spellcheck(fh, filename=None, output=sys.stderr, **opts): 277 lineno = 1 278 ret = 0 279 280 if not filename: 281 filename = fh.name 282 283 fh.seek(0) 284 for line in fh: 285 for entry in misspellingREs: 286 ret |= check(spellMsg, output, filename, line, 287 lineno, entry) 288 for entry in alternateREs: 289 ret |= check(altMsg, output, filename, line, 290 lineno, entry) 291 lineno += 1 292 293 return ret 294