xref: /illumos-gate/usr/src/tools/onbld/Checks/SpellCheck.py (revision a90997d2f0a442a8aa8a56cbbbbf577716a18742)
171af3be3SCody Peter Mello#
271af3be3SCody Peter Mello# CDDL HEADER START
371af3be3SCody Peter Mello#
471af3be3SCody Peter Mello# The contents of this file are subject to the terms of the
571af3be3SCody Peter Mello# Common Development and Distribution License (the "License").
671af3be3SCody Peter Mello# You may not use this file except in compliance with the License.
771af3be3SCody Peter Mello#
871af3be3SCody Peter Mello# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
971af3be3SCody Peter Mello# or http://www.opensolaris.org/os/licensing.
1071af3be3SCody Peter Mello# See the License for the specific language governing permissions
1171af3be3SCody Peter Mello# and limitations under the License.
1271af3be3SCody Peter Mello#
1371af3be3SCody Peter Mello# When distributing Covered Code, include this CDDL HEADER in each
1471af3be3SCody Peter Mello# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
1571af3be3SCody Peter Mello# If applicable, add the following below this CDDL HEADER, with the
1671af3be3SCody Peter Mello# fields enclosed by brackets "[]" replaced with your own identifying
1771af3be3SCody Peter Mello# information: Portions Copyright [yyyy] [name of copyright owner]
1871af3be3SCody Peter Mello#
1971af3be3SCody Peter Mello# CDDL HEADER END
2071af3be3SCody Peter Mello#
2171af3be3SCody Peter Mello
2271af3be3SCody Peter Mello#
2371af3be3SCody Peter Mello# Copyright 2016 Joyent, Inc.
24*a90997d2SAndy Fiddaman# Copyright 2019 OmniOS Community Edition (OmniOSce) Association.
2571af3be3SCody Peter Mello#
2671af3be3SCody Peter Mello
2771af3be3SCody Peter Melloimport re, sys
2871af3be3SCody Peter Mello
2971af3be3SCody Peter MellospellMsg = '%s: Line %d contains "%s", a common misspelling of "%s"\n'
30bccbd30bSPeter TribblealtMsg = '%s: Line %d contains "%s"; please use "%s" instead for consistency with other documentation\n'
3171af3be3SCody Peter Mello
3271af3be3SCody Peter Mellomisspellings = {
3371af3be3SCody Peter Mello	'absense': 'absence',
3471af3be3SCody Peter Mello	'accessable': 'accessible',
3571af3be3SCody Peter Mello	'accomodate': 'accommodate',
3671af3be3SCody Peter Mello	'accomodation': 'accommodation',
3771af3be3SCody Peter Mello	'accross': 'across',
3871af3be3SCody Peter Mello	'acheive': 'achieve',
3971af3be3SCody Peter Mello	'addional': 'additional',
4071af3be3SCody Peter Mello	'addres': 'address',
4171af3be3SCody Peter Mello	'admininistrative': 'administrative',
4271af3be3SCody Peter Mello	'adminstered': 'administered',
4371af3be3SCody Peter Mello	'adminstrate': 'administrate',
4471af3be3SCody Peter Mello	'adminstration': 'administration',
4571af3be3SCody Peter Mello	'adminstrative': 'administrative',
4671af3be3SCody Peter Mello	'adminstrator': 'administrator',
4771af3be3SCody Peter Mello	'admissability': 'admissibility',
4871af3be3SCody Peter Mello	'adress': 'address',
4971af3be3SCody Peter Mello	'adressable': 'addressable',
5071af3be3SCody Peter Mello	'adressed': 'addressed',
5171af3be3SCody Peter Mello	'adressing': 'addressing, dressing',
5271af3be3SCody Peter Mello	'aginst': 'against',
5371af3be3SCody Peter Mello	'agression': 'aggression',
5471af3be3SCody Peter Mello	'agressive': 'aggressive',
5571af3be3SCody Peter Mello	'alot': 'a lot, allot',
5671af3be3SCody Peter Mello	'and and': 'and',
5771af3be3SCody Peter Mello	'apparantly': 'apparently',
5871af3be3SCody Peter Mello	'appearence': 'appearance',
5971af3be3SCody Peter Mello	'arguement': 'argument',
6071af3be3SCody Peter Mello	'assasination': 'assassination',
6171af3be3SCody Peter Mello	'auxilliary': 'auxiliary',
6271af3be3SCody Peter Mello	'basicly': 'basically',
6371af3be3SCody Peter Mello	'begining': 'beginning',
6471af3be3SCody Peter Mello	'belive': 'believe',
6571af3be3SCody Peter Mello	'beteen': 'between',
6671af3be3SCody Peter Mello	'betwen': 'between',
6771af3be3SCody Peter Mello	'beween': 'between',
6871af3be3SCody Peter Mello	'bewteen': 'between',
6971af3be3SCody Peter Mello	'bizzare': 'bizarre',
7071af3be3SCody Peter Mello	'buisness': 'business',
7171af3be3SCody Peter Mello	'calender': 'calendar',
7271af3be3SCody Peter Mello	'cemetary': 'cemetery',
7371af3be3SCody Peter Mello	'chauffer': 'chauffeur',
7471af3be3SCody Peter Mello	'collegue': 'colleague',
7571af3be3SCody Peter Mello	'comming': 'coming',
7671af3be3SCody Peter Mello	'commited': 'committed',
7771af3be3SCody Peter Mello	'commitee': 'committee',
7871af3be3SCody Peter Mello	'commiting': 'committing',
7971af3be3SCody Peter Mello	'comparision': 'comparison',
8071af3be3SCody Peter Mello	'comparisions': 'comparisons',
8171af3be3SCody Peter Mello	'compatability': 'compatibility',
8271af3be3SCody Peter Mello	'compatable': 'compatible',
8371af3be3SCody Peter Mello	'compatablity': 'compatibility',
8471af3be3SCody Peter Mello	'compatiable': 'compatible',
8571af3be3SCody Peter Mello	'compatiblity': 'compatibility',
8671af3be3SCody Peter Mello	'completly': 'completely',
8771af3be3SCody Peter Mello	'concious': 'conscious',
8871af3be3SCody Peter Mello	'condidtion': 'condition',
8971af3be3SCody Peter Mello	'conected': 'connected',
9071af3be3SCody Peter Mello	'conjuction': 'conjunction',
9171af3be3SCody Peter Mello	'continous': 'continuous',
9271af3be3SCody Peter Mello	'curiousity': 'curiosity',
9371af3be3SCody Peter Mello	'deamon': 'daemon',
9471af3be3SCody Peter Mello	'definately': 'definitely',
9571af3be3SCody Peter Mello	'desireable': 'desirable',
9671af3be3SCody Peter Mello	'diffrent': 'different',
9771af3be3SCody Peter Mello	'dilemna': 'dilemma',
9871af3be3SCody Peter Mello	'dissapear': 'disappear',
9971af3be3SCody Peter Mello	'dissapoint': 'disappoint',
10071af3be3SCody Peter Mello	'ecstacy': 'ecstasy',
10171af3be3SCody Peter Mello	'embarass': 'embarrass',
10271af3be3SCody Peter Mello	'enviroment': 'environment',
10371af3be3SCody Peter Mello	'exept': 'except',
10471af3be3SCody Peter Mello	'existance': 'existence',
10571af3be3SCody Peter Mello	'familar': 'familiar',
10671af3be3SCody Peter Mello	'finaly': 'finally',
10771af3be3SCody Peter Mello	'folowing': 'following',
10871af3be3SCody Peter Mello	'foriegn': 'foreign',
10971af3be3SCody Peter Mello	'forseeable': 'foreseeable',
11071af3be3SCody Peter Mello	'fourty': 'forty',
11171af3be3SCody Peter Mello	'foward': 'forward',
11271af3be3SCody Peter Mello	'freind': 'friend',
11371af3be3SCody Peter Mello	'futher': 'further',
11471af3be3SCody Peter Mello	'gaurd': 'guard',
11571af3be3SCody Peter Mello	'glamourous': 'glamorous',
11671af3be3SCody Peter Mello	'goverment': 'government',
11771af3be3SCody Peter Mello	'happend': 'happened',
11871af3be3SCody Peter Mello	'harrassment': 'harassment',
11971af3be3SCody Peter Mello	'hierachical': 'hierarchical',
12071af3be3SCody Peter Mello	'hierachies': 'hierarchies',
12171af3be3SCody Peter Mello	'hierachy': 'hierarchy',
12271af3be3SCody Peter Mello	'hierarcical': 'hierarchical',
12371af3be3SCody Peter Mello	'hierarcy': 'hierarchy',
12471af3be3SCody Peter Mello	'honourary': 'honorary',
12571af3be3SCody Peter Mello	'humourous': 'humorous',
12671af3be3SCody Peter Mello	'idiosyncracy': 'idiosyncrasy',
12771af3be3SCody Peter Mello	'immediatly': 'immediately',
12871af3be3SCody Peter Mello	'inaccessable': 'inaccessible',
12971af3be3SCody Peter Mello	'inbetween': 'between',
13071af3be3SCody Peter Mello	'incidently': 'incidentally',
13171af3be3SCody Peter Mello	'independant': 'independent',
13271af3be3SCody Peter Mello	'infomation': 'information',
13371af3be3SCody Peter Mello	'interupt': 'interrupt',
13471af3be3SCody Peter Mello	'intial': 'initial',
13571af3be3SCody Peter Mello	'intially': 'initially',
13671af3be3SCody Peter Mello	'irresistable': 'irresistible',
13771af3be3SCody Peter Mello	'jist': 'gist',
13871af3be3SCody Peter Mello	'knowlege': 'knowledge',
13971af3be3SCody Peter Mello	'lenght': 'length',
14071af3be3SCody Peter Mello	'liase': 'liaise',
14171af3be3SCody Peter Mello	'liason': 'liaison',
14271af3be3SCody Peter Mello	'libary': 'library',
14371af3be3SCody Peter Mello	'maching': 'machine, marching, matching',
14471af3be3SCody Peter Mello	'millenia': 'millennia',
14571af3be3SCody Peter Mello	'millenium': 'millennium',
14671af3be3SCody Peter Mello	'neccessary': 'necessary',
14771af3be3SCody Peter Mello	'negotation': 'negotiation',
14871af3be3SCody Peter Mello	'nontheless': 'nonetheless',
14971af3be3SCody Peter Mello	'noticable': 'noticeable',
15071af3be3SCody Peter Mello	'occassion': 'occasion',
15171af3be3SCody Peter Mello	'occassional': 'occasional',
15271af3be3SCody Peter Mello	'occassionally': 'occasionally',
15371af3be3SCody Peter Mello	'occurance': 'occurrence',
15471af3be3SCody Peter Mello	'occured': 'occurred',
15571af3be3SCody Peter Mello	'occurence': 'occurrence',
15671af3be3SCody Peter Mello	'occuring': 'occurring',
15771af3be3SCody Peter Mello	'ommision': 'omission',
15871af3be3SCody Peter Mello	'orginal': 'original',
15971af3be3SCody Peter Mello	'orginally': 'originally',
16003270635SPeter Tribble	'ouput': 'output',
16103270635SPeter Tribble	'overriden': 'overridden',
16203270635SPeter Tribble	'particuliar': 'particular',
16371af3be3SCody Peter Mello	'pavillion': 'pavilion',
16471af3be3SCody Peter Mello	'peice': 'piece',
16571af3be3SCody Peter Mello	'persistant': 'persistent',
16671af3be3SCody Peter Mello	'politican': 'politician',
16771af3be3SCody Peter Mello	'posession': 'possession',
16871af3be3SCody Peter Mello	'possiblity': 'possibility',
16971af3be3SCody Peter Mello	'preceed': 'precede',
17071af3be3SCody Peter Mello	'preceeded': 'preceded',
17171af3be3SCody Peter Mello	'preceeding': 'preceding',
17271af3be3SCody Peter Mello	'preceeds': 'precedes',
17371af3be3SCody Peter Mello	'prefered': 'preferred',
17471af3be3SCody Peter Mello	'prefering': 'preferring',
17571af3be3SCody Peter Mello	'presense': 'presence',
17671af3be3SCody Peter Mello	'proces': 'process',
17771af3be3SCody Peter Mello	'propoganda': 'propaganda',
17871af3be3SCody Peter Mello	'psuedo': 'pseudo',
17971af3be3SCody Peter Mello	'publically': 'publicly',
18071af3be3SCody Peter Mello	'realy': 'really',
18171af3be3SCody Peter Mello	'reciept': 'receipt',
18271af3be3SCody Peter Mello	'recieve': 'receive',
18371af3be3SCody Peter Mello	'recieved': 'received',
18471af3be3SCody Peter Mello	'reciever': 'receiver',
18571af3be3SCody Peter Mello	'recievers': 'receivers',
18671af3be3SCody Peter Mello	'recieves': 'receives',
18771af3be3SCody Peter Mello	'recieving': 'receiving',
18871af3be3SCody Peter Mello	'recomend': 'recommend',
18971af3be3SCody Peter Mello	'recomended': 'recommended',
19071af3be3SCody Peter Mello	'recomending': 'recommending',
19171af3be3SCody Peter Mello	'recomends': 'recommends',
19271af3be3SCody Peter Mello	'recurse': 'recur',
19371af3be3SCody Peter Mello	'recurses': 'recurs',
19471af3be3SCody Peter Mello	'recursing': 'recurring',
19571af3be3SCody Peter Mello	'refered': 'referred',
19671af3be3SCody Peter Mello	'refering': 'referring',
19771af3be3SCody Peter Mello	'religous': 'religious',
19871af3be3SCody Peter Mello	'rember': 'remember',
19971af3be3SCody Peter Mello	'remeber': 'remember',
20071af3be3SCody Peter Mello	'repetion': 'repetition',
20171af3be3SCody Peter Mello	'reponsible': 'responsible',
20271af3be3SCody Peter Mello	'resistence': 'resistance',
20371af3be3SCody Peter Mello	'retreive': 'retrieve',
20471af3be3SCody Peter Mello	'seige': 'siege',
20571af3be3SCody Peter Mello	'sence': 'since',
20671af3be3SCody Peter Mello	'seperate': 'separate',
20771af3be3SCody Peter Mello	'seperated': 'separated',
20871af3be3SCody Peter Mello	'seperately': 'separately',
20971af3be3SCody Peter Mello	'seperates': 'separates',
21071af3be3SCody Peter Mello	'similiar': 'similar',
21171af3be3SCody Peter Mello	'somwhere': 'somewhere',
21271af3be3SCody Peter Mello	'sould': 'could, should, sold, soul',
21371af3be3SCody Peter Mello	'sturcture': 'structure',
21471af3be3SCody Peter Mello	'succesful': 'successful',
21571af3be3SCody Peter Mello	'succesfully': 'successfully',
21671af3be3SCody Peter Mello	'successfull': 'successful',
21771af3be3SCody Peter Mello	'sucessful': 'successful',
21871af3be3SCody Peter Mello	'supercede': 'supersede',
21971af3be3SCody Peter Mello	'supress': 'suppress',
22071af3be3SCody Peter Mello	'supressed': 'suppressed',
22171af3be3SCody Peter Mello	'suprise': 'surprise',
22271af3be3SCody Peter Mello	'suprisingly': 'surprisingly',
22371af3be3SCody Peter Mello	'sytem': 'system',
22471af3be3SCody Peter Mello	'tendancy': 'tendency',
22571af3be3SCody Peter Mello	'the the': 'the',
22671af3be3SCody Peter Mello	'the these': 'these',
22771af3be3SCody Peter Mello	'therefor': 'therefore',
22871af3be3SCody Peter Mello	'threshhold': 'threshold',
22971af3be3SCody Peter Mello	'tolerence': 'tolerance',
23071af3be3SCody Peter Mello	'tommorow': 'tomorrow',
23171af3be3SCody Peter Mello	'tommorrow': 'tomorrow',
23271af3be3SCody Peter Mello	'tounge': 'tongue',
23371af3be3SCody Peter Mello	'tranformed': 'transformed',
23471af3be3SCody Peter Mello	'transfered': 'transferred',
23571af3be3SCody Peter Mello	'truely': 'truly',
23671af3be3SCody Peter Mello	'trustworthyness': 'trustworthiness',
237bccbd30bSPeter Tribble	'uncommited': 'uncommitted',
23871af3be3SCody Peter Mello	'unforseen': 'unforeseen',
23971af3be3SCody Peter Mello	'unfortunatly': 'unfortunately',
24071af3be3SCody Peter Mello	'unsuccessfull': 'unsuccessful',
24171af3be3SCody Peter Mello	'untill': 'until',
24271af3be3SCody Peter Mello	'upto': 'up to',
24371af3be3SCody Peter Mello	'whereever': 'wherever',
24471af3be3SCody Peter Mello	'wich': 'which',
24571af3be3SCody Peter Mello	'wierd': 'weird',
24671af3be3SCody Peter Mello	'wtih': 'with',
24771af3be3SCody Peter Mello}
24871af3be3SCody Peter Mello
24971af3be3SCody Peter Melloalternates = {
250f3a07f94SCody Peter Mello	'parseable': 'parsable',
2511b2031a9SCody Peter Mello	'sub-command': 'subcommand',
2521b2031a9SCody Peter Mello	'sub-commands': 'subcommands',
253831b40b1SCody Peter Mello	'writeable': 'writable'
25471af3be3SCody Peter Mello}
25571af3be3SCody Peter Mello
25671af3be3SCody Peter MellomisspellingREs = []
25771af3be3SCody Peter MelloalternateREs = []
25871af3be3SCody Peter Mello
259ca13eaa5SAndy Fiddamanfor misspelling, correct in misspellings.items():
26071af3be3SCody Peter Mello	regex = re.compile(r'\b%s\b' % (misspelling), re.IGNORECASE)
26171af3be3SCody Peter Mello	entry = (regex, misspelling, correct)
26271af3be3SCody Peter Mello	misspellingREs.append(entry)
26371af3be3SCody Peter Mello
264ca13eaa5SAndy Fiddamanfor alternate, correct in alternates.items():
26571af3be3SCody Peter Mello	regex = re.compile(r'\b%s\b' % (alternate), re.IGNORECASE)
26671af3be3SCody Peter Mello	entry = (regex, alternate, correct)
26771af3be3SCody Peter Mello	alternateREs.append(entry)
26871af3be3SCody Peter Mello
26971af3be3SCody Peter Mellodef check(errmsg, output, filename, line, lineno, entry):
27071af3be3SCody Peter Mello	if entry[0].search(line):
27171af3be3SCody Peter Mello		output.write(errmsg % (filename, lineno, entry[1], entry[2]))
27271af3be3SCody Peter Mello		return 1
27371af3be3SCody Peter Mello	else:
27471af3be3SCody Peter Mello		return 0
27571af3be3SCody Peter Mello
27671af3be3SCody Peter Mellodef spellcheck(fh, filename=None, output=sys.stderr, **opts):
27771af3be3SCody Peter Mello	lineno = 1
27871af3be3SCody Peter Mello	ret = 0
27971af3be3SCody Peter Mello
28071af3be3SCody Peter Mello	if not filename:
28171af3be3SCody Peter Mello		filename = fh.name
28271af3be3SCody Peter Mello
28371af3be3SCody Peter Mello	fh.seek(0)
28471af3be3SCody Peter Mello	for line in fh:
285*a90997d2SAndy Fiddaman		line = line.decode(errors='replace')
28671af3be3SCody Peter Mello		for entry in misspellingREs:
28771af3be3SCody Peter Mello			ret |= check(spellMsg, output, filename, line,
28871af3be3SCody Peter Mello			    lineno, entry)
28971af3be3SCody Peter Mello		for entry in alternateREs:
29071af3be3SCody Peter Mello			ret |= check(altMsg, output, filename, line,
29171af3be3SCody Peter Mello			    lineno, entry)
29271af3be3SCody Peter Mello		lineno += 1
29371af3be3SCody Peter Mello
29471af3be3SCody Peter Mello	return ret
295