xref: /illumos-gate/usr/src/tools/onbld/Checks/SpellCheck.py (revision 518062b351ad2770c7529db1397091d695284665)
171af3be3SCody Peter Mello#
271af3be3SCody Peter Mello# CDDL HEADER START
371af3be3SCody Peter Mello#
471af3be3SCody Peter Mello# The contents of this file are subject to the terms of the
571af3be3SCody Peter Mello# Common Development and Distribution License (the "License").
671af3be3SCody Peter Mello# You may not use this file except in compliance with the License.
771af3be3SCody Peter Mello#
871af3be3SCody Peter Mello# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
971af3be3SCody Peter Mello# or http://www.opensolaris.org/os/licensing.
1071af3be3SCody Peter Mello# See the License for the specific language governing permissions
1171af3be3SCody Peter Mello# and limitations under the License.
1271af3be3SCody Peter Mello#
1371af3be3SCody Peter Mello# When distributing Covered Code, include this CDDL HEADER in each
1471af3be3SCody Peter Mello# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
1571af3be3SCody Peter Mello# If applicable, add the following below this CDDL HEADER, with the
1671af3be3SCody Peter Mello# fields enclosed by brackets "[]" replaced with your own identifying
1771af3be3SCody Peter Mello# information: Portions Copyright [yyyy] [name of copyright owner]
1871af3be3SCody Peter Mello#
1971af3be3SCody Peter Mello# CDDL HEADER END
2071af3be3SCody Peter Mello#
2171af3be3SCody Peter Mello
2271af3be3SCody Peter Mello#
2371af3be3SCody Peter Mello# Copyright 2016 Joyent, Inc.
24a90997d2SAndy Fiddaman# Copyright 2019 OmniOS Community Edition (OmniOSce) Association.
2571af3be3SCody Peter Mello#
2671af3be3SCody Peter Mello
2771af3be3SCody Peter Melloimport re, sys
2871af3be3SCody Peter Mello
2918ce2efcSAndy FiddamanspellMsg = 'contains "{}", a common misspelling of "{}"'
3018ce2efcSAndy FiddamanaltMsg = 'contains "{}"; please use "{}" instead for consistency with other documentation'
3118ce2efcSAndy FiddamancaseMsg = 'contains "{}"; please use "{}" instead'
3271af3be3SCody Peter Mello
3371af3be3SCody Peter Mellomisspellings = {
3471af3be3SCody Peter Mello	'absense': 'absence',
3571af3be3SCody Peter Mello	'accessable': 'accessible',
3671af3be3SCody Peter Mello	'accomodate': 'accommodate',
3771af3be3SCody Peter Mello	'accomodation': 'accommodation',
3871af3be3SCody Peter Mello	'accross': 'across',
3971af3be3SCody Peter Mello	'acheive': 'achieve',
4071af3be3SCody Peter Mello	'addional': 'additional',
4171af3be3SCody Peter Mello	'addres': 'address',
4271af3be3SCody Peter Mello	'admininistrative': 'administrative',
4371af3be3SCody Peter Mello	'adminstered': 'administered',
4471af3be3SCody Peter Mello	'adminstrate': 'administrate',
4571af3be3SCody Peter Mello	'adminstration': 'administration',
4671af3be3SCody Peter Mello	'adminstrative': 'administrative',
4771af3be3SCody Peter Mello	'adminstrator': 'administrator',
4871af3be3SCody Peter Mello	'admissability': 'admissibility',
4971af3be3SCody Peter Mello	'adress': 'address',
5071af3be3SCody Peter Mello	'adressable': 'addressable',
5171af3be3SCody Peter Mello	'adressed': 'addressed',
5271af3be3SCody Peter Mello	'adressing': 'addressing, dressing',
5371af3be3SCody Peter Mello	'aginst': 'against',
5471af3be3SCody Peter Mello	'agression': 'aggression',
5571af3be3SCody Peter Mello	'agressive': 'aggressive',
5671af3be3SCody Peter Mello	'alot': 'a lot, allot',
5771af3be3SCody Peter Mello	'and and': 'and',
5871af3be3SCody Peter Mello	'apparantly': 'apparently',
5971af3be3SCody Peter Mello	'appearence': 'appearance',
6071af3be3SCody Peter Mello	'arguement': 'argument',
6171af3be3SCody Peter Mello	'assasination': 'assassination',
6271af3be3SCody Peter Mello	'auxilliary': 'auxiliary',
6371af3be3SCody Peter Mello	'basicly': 'basically',
6471af3be3SCody Peter Mello	'begining': 'beginning',
6571af3be3SCody Peter Mello	'belive': 'believe',
6671af3be3SCody Peter Mello	'beteen': 'between',
6771af3be3SCody Peter Mello	'betwen': 'between',
6871af3be3SCody Peter Mello	'beween': 'between',
6971af3be3SCody Peter Mello	'bewteen': 'between',
7071af3be3SCody Peter Mello	'bizzare': 'bizarre',
7171af3be3SCody Peter Mello	'buisness': 'business',
7271af3be3SCody Peter Mello	'calender': 'calendar',
7371af3be3SCody Peter Mello	'cemetary': 'cemetery',
7471af3be3SCody Peter Mello	'chauffer': 'chauffeur',
7571af3be3SCody Peter Mello	'collegue': 'colleague',
7671af3be3SCody Peter Mello	'comming': 'coming',
7771af3be3SCody Peter Mello	'commited': 'committed',
7871af3be3SCody Peter Mello	'commitee': 'committee',
7971af3be3SCody Peter Mello	'commiting': 'committing',
8071af3be3SCody Peter Mello	'comparision': 'comparison',
8171af3be3SCody Peter Mello	'comparisions': 'comparisons',
8271af3be3SCody Peter Mello	'compatability': 'compatibility',
8371af3be3SCody Peter Mello	'compatable': 'compatible',
8471af3be3SCody Peter Mello	'compatablity': 'compatibility',
8571af3be3SCody Peter Mello	'compatiable': 'compatible',
8671af3be3SCody Peter Mello	'compatiblity': 'compatibility',
8771af3be3SCody Peter Mello	'completly': 'completely',
8871af3be3SCody Peter Mello	'concious': 'conscious',
8971af3be3SCody Peter Mello	'condidtion': 'condition',
9071af3be3SCody Peter Mello	'conected': 'connected',
9171af3be3SCody Peter Mello	'conjuction': 'conjunction',
9271af3be3SCody Peter Mello	'continous': 'continuous',
9371af3be3SCody Peter Mello	'curiousity': 'curiosity',
9471af3be3SCody Peter Mello	'deamon': 'daemon',
9571af3be3SCody Peter Mello	'definately': 'definitely',
9671af3be3SCody Peter Mello	'desireable': 'desirable',
9771af3be3SCody Peter Mello	'diffrent': 'different',
9871af3be3SCody Peter Mello	'dilemna': 'dilemma',
9971af3be3SCody Peter Mello	'dissapear': 'disappear',
10071af3be3SCody Peter Mello	'dissapoint': 'disappoint',
10171af3be3SCody Peter Mello	'ecstacy': 'ecstasy',
10271af3be3SCody Peter Mello	'embarass': 'embarrass',
10371af3be3SCody Peter Mello	'enviroment': 'environment',
10471af3be3SCody Peter Mello	'exept': 'except',
10571af3be3SCody Peter Mello	'existance': 'existence',
10671af3be3SCody Peter Mello	'familar': 'familiar',
10771af3be3SCody Peter Mello	'finaly': 'finally',
10871af3be3SCody Peter Mello	'folowing': 'following',
10971af3be3SCody Peter Mello	'foriegn': 'foreign',
11071af3be3SCody Peter Mello	'forseeable': 'foreseeable',
11171af3be3SCody Peter Mello	'fourty': 'forty',
11271af3be3SCody Peter Mello	'foward': 'forward',
11371af3be3SCody Peter Mello	'freind': 'friend',
11471af3be3SCody Peter Mello	'futher': 'further',
11571af3be3SCody Peter Mello	'gaurd': 'guard',
11671af3be3SCody Peter Mello	'glamourous': 'glamorous',
11771af3be3SCody Peter Mello	'goverment': 'government',
11871af3be3SCody Peter Mello	'happend': 'happened',
11971af3be3SCody Peter Mello	'harrassment': 'harassment',
12071af3be3SCody Peter Mello	'hierachical': 'hierarchical',
12171af3be3SCody Peter Mello	'hierachies': 'hierarchies',
12271af3be3SCody Peter Mello	'hierachy': 'hierarchy',
12371af3be3SCody Peter Mello	'hierarcical': 'hierarchical',
12471af3be3SCody Peter Mello	'hierarcy': 'hierarchy',
12571af3be3SCody Peter Mello	'honourary': 'honorary',
12671af3be3SCody Peter Mello	'humourous': 'humorous',
12771af3be3SCody Peter Mello	'idiosyncracy': 'idiosyncrasy',
12871af3be3SCody Peter Mello	'immediatly': 'immediately',
12971af3be3SCody Peter Mello	'inaccessable': 'inaccessible',
13071af3be3SCody Peter Mello	'inbetween': 'between',
13171af3be3SCody Peter Mello	'incidently': 'incidentally',
13271af3be3SCody Peter Mello	'independant': 'independent',
13371af3be3SCody Peter Mello	'infomation': 'information',
13471af3be3SCody Peter Mello	'interupt': 'interrupt',
13571af3be3SCody Peter Mello	'intial': 'initial',
13671af3be3SCody Peter Mello	'intially': 'initially',
13771af3be3SCody Peter Mello	'irresistable': 'irresistible',
13871af3be3SCody Peter Mello	'jist': 'gist',
13971af3be3SCody Peter Mello	'knowlege': 'knowledge',
14071af3be3SCody Peter Mello	'lenght': 'length',
14171af3be3SCody Peter Mello	'liase': 'liaise',
14271af3be3SCody Peter Mello	'liason': 'liaison',
14371af3be3SCody Peter Mello	'libary': 'library',
14471af3be3SCody Peter Mello	'maching': 'machine, marching, matching',
14571af3be3SCody Peter Mello	'millenia': 'millennia',
14671af3be3SCody Peter Mello	'millenium': 'millennium',
14771af3be3SCody Peter Mello	'neccessary': 'necessary',
14871af3be3SCody Peter Mello	'negotation': 'negotiation',
14971af3be3SCody Peter Mello	'nontheless': 'nonetheless',
15071af3be3SCody Peter Mello	'noticable': 'noticeable',
15171af3be3SCody Peter Mello	'occassion': 'occasion',
15271af3be3SCody Peter Mello	'occassional': 'occasional',
15371af3be3SCody Peter Mello	'occassionally': 'occasionally',
15471af3be3SCody Peter Mello	'occurance': 'occurrence',
15571af3be3SCody Peter Mello	'occured': 'occurred',
15671af3be3SCody Peter Mello	'occurence': 'occurrence',
15771af3be3SCody Peter Mello	'occuring': 'occurring',
15871af3be3SCody Peter Mello	'ommision': 'omission',
15971af3be3SCody Peter Mello	'orginal': 'original',
16071af3be3SCody Peter Mello	'orginally': 'originally',
16103270635SPeter Tribble	'ouput': 'output',
16203270635SPeter Tribble	'overriden': 'overridden',
16303270635SPeter Tribble	'particuliar': 'particular',
16471af3be3SCody Peter Mello	'pavillion': 'pavilion',
16571af3be3SCody Peter Mello	'peice': 'piece',
16671af3be3SCody Peter Mello	'persistant': 'persistent',
16771af3be3SCody Peter Mello	'politican': 'politician',
16871af3be3SCody Peter Mello	'posession': 'possession',
16971af3be3SCody Peter Mello	'possiblity': 'possibility',
17071af3be3SCody Peter Mello	'preceed': 'precede',
17171af3be3SCody Peter Mello	'preceeded': 'preceded',
17271af3be3SCody Peter Mello	'preceeding': 'preceding',
17371af3be3SCody Peter Mello	'preceeds': 'precedes',
17471af3be3SCody Peter Mello	'prefered': 'preferred',
17571af3be3SCody Peter Mello	'prefering': 'preferring',
17671af3be3SCody Peter Mello	'presense': 'presence',
17771af3be3SCody Peter Mello	'proces': 'process',
17871af3be3SCody Peter Mello	'propoganda': 'propaganda',
17971af3be3SCody Peter Mello	'psuedo': 'pseudo',
18071af3be3SCody Peter Mello	'publically': 'publicly',
18171af3be3SCody Peter Mello	'realy': 'really',
18271af3be3SCody Peter Mello	'reciept': 'receipt',
18371af3be3SCody Peter Mello	'recieve': 'receive',
18471af3be3SCody Peter Mello	'recieved': 'received',
18571af3be3SCody Peter Mello	'reciever': 'receiver',
18671af3be3SCody Peter Mello	'recievers': 'receivers',
18771af3be3SCody Peter Mello	'recieves': 'receives',
18871af3be3SCody Peter Mello	'recieving': 'receiving',
18971af3be3SCody Peter Mello	'recomend': 'recommend',
19071af3be3SCody Peter Mello	'recomended': 'recommended',
19171af3be3SCody Peter Mello	'recomending': 'recommending',
19271af3be3SCody Peter Mello	'recomends': 'recommends',
19371af3be3SCody Peter Mello	'recurse': 'recur',
19471af3be3SCody Peter Mello	'recurses': 'recurs',
19571af3be3SCody Peter Mello	'recursing': 'recurring',
19671af3be3SCody Peter Mello	'refered': 'referred',
19771af3be3SCody Peter Mello	'refering': 'referring',
19871af3be3SCody Peter Mello	'religous': 'religious',
19971af3be3SCody Peter Mello	'rember': 'remember',
20071af3be3SCody Peter Mello	'remeber': 'remember',
20171af3be3SCody Peter Mello	'repetion': 'repetition',
20271af3be3SCody Peter Mello	'reponsible': 'responsible',
20371af3be3SCody Peter Mello	'resistence': 'resistance',
20471af3be3SCody Peter Mello	'retreive': 'retrieve',
20571af3be3SCody Peter Mello	'seige': 'siege',
20671af3be3SCody Peter Mello	'sence': 'since',
20771af3be3SCody Peter Mello	'seperate': 'separate',
20871af3be3SCody Peter Mello	'seperated': 'separated',
20971af3be3SCody Peter Mello	'seperately': 'separately',
21071af3be3SCody Peter Mello	'seperates': 'separates',
21171af3be3SCody Peter Mello	'similiar': 'similar',
21271af3be3SCody Peter Mello	'somwhere': 'somewhere',
21371af3be3SCody Peter Mello	'sould': 'could, should, sold, soul',
21471af3be3SCody Peter Mello	'sturcture': 'structure',
21571af3be3SCody Peter Mello	'succesful': 'successful',
21671af3be3SCody Peter Mello	'succesfully': 'successfully',
21771af3be3SCody Peter Mello	'successfull': 'successful',
21871af3be3SCody Peter Mello	'sucessful': 'successful',
21971af3be3SCody Peter Mello	'supercede': 'supersede',
22071af3be3SCody Peter Mello	'supress': 'suppress',
22171af3be3SCody Peter Mello	'supressed': 'suppressed',
22271af3be3SCody Peter Mello	'suprise': 'surprise',
22371af3be3SCody Peter Mello	'suprisingly': 'surprisingly',
22471af3be3SCody Peter Mello	'sytem': 'system',
22571af3be3SCody Peter Mello	'tendancy': 'tendency',
22671af3be3SCody Peter Mello	'the the': 'the',
22771af3be3SCody Peter Mello	'the these': 'these',
22871af3be3SCody Peter Mello	'therefor': 'therefore',
22971af3be3SCody Peter Mello	'threshhold': 'threshold',
23071af3be3SCody Peter Mello	'tolerence': 'tolerance',
23171af3be3SCody Peter Mello	'tommorow': 'tomorrow',
23271af3be3SCody Peter Mello	'tommorrow': 'tomorrow',
23371af3be3SCody Peter Mello	'tounge': 'tongue',
23471af3be3SCody Peter Mello	'tranformed': 'transformed',
23571af3be3SCody Peter Mello	'transfered': 'transferred',
23671af3be3SCody Peter Mello	'truely': 'truly',
23771af3be3SCody Peter Mello	'trustworthyness': 'trustworthiness',
238bccbd30bSPeter Tribble	'uncommited': 'uncommitted',
23971af3be3SCody Peter Mello	'unforseen': 'unforeseen',
24071af3be3SCody Peter Mello	'unfortunatly': 'unfortunately',
24171af3be3SCody Peter Mello	'unsuccessfull': 'unsuccessful',
24271af3be3SCody Peter Mello	'untill': 'until',
24371af3be3SCody Peter Mello	'upto': 'up to',
24471af3be3SCody Peter Mello	'whereever': 'wherever',
24571af3be3SCody Peter Mello	'wich': 'which',
24671af3be3SCody Peter Mello	'wierd': 'weird',
24771af3be3SCody Peter Mello	'wtih': 'with',
24871af3be3SCody Peter Mello}
24971af3be3SCody Peter Mello
25071af3be3SCody Peter Melloalternates = {
251*518062b3SDan Cross	'judgement': 'judgment',
252f3a07f94SCody Peter Mello	'parseable': 'parsable',
2531b2031a9SCody Peter Mello	'sub-command': 'subcommand',
2541b2031a9SCody Peter Mello	'sub-commands': 'subcommands',
255831b40b1SCody Peter Mello	'writeable': 'writable'
25671af3be3SCody Peter Mello}
25771af3be3SCody Peter Mello
25818ce2efcSAndy Fiddamancase = {
25918ce2efcSAndy Fiddaman	'Illumos': 'illumos'
26018ce2efcSAndy Fiddaman}
26118ce2efcSAndy Fiddaman
26271af3be3SCody Peter MellomisspellingREs = []
26371af3be3SCody Peter MelloalternateREs = []
26418ce2efcSAndy FiddamancaseREs = []
26571af3be3SCody Peter Mello
266ca13eaa5SAndy Fiddamanfor misspelling, correct in misspellings.items():
26771af3be3SCody Peter Mello	regex = re.compile(r'\b%s\b' % (misspelling), re.IGNORECASE)
26871af3be3SCody Peter Mello	entry = (regex, misspelling, correct)
26971af3be3SCody Peter Mello	misspellingREs.append(entry)
27071af3be3SCody Peter Mello
271ca13eaa5SAndy Fiddamanfor alternate, correct in alternates.items():
27271af3be3SCody Peter Mello	regex = re.compile(r'\b%s\b' % (alternate), re.IGNORECASE)
27371af3be3SCody Peter Mello	entry = (regex, alternate, correct)
27471af3be3SCody Peter Mello	alternateREs.append(entry)
27571af3be3SCody Peter Mello
27618ce2efcSAndy Fiddamanfor alternate, correct in case.items():
27718ce2efcSAndy Fiddaman	regex = re.compile(r'\b%s\b' % (alternate))
27818ce2efcSAndy Fiddaman	entry = (regex, alternate, correct)
27918ce2efcSAndy Fiddaman	caseREs.append(entry)
28018ce2efcSAndy Fiddaman
28118ce2efcSAndy Fiddamandef spellcheck_line(line):
28218ce2efcSAndy Fiddaman	errs = []
28318ce2efcSAndy Fiddaman	for entry in misspellingREs:
28471af3be3SCody Peter Mello		if entry[0].search(line):
28518ce2efcSAndy Fiddaman			errs.append(spellMsg.format(entry[1], entry[2]))
28618ce2efcSAndy Fiddaman	for entry in alternateREs:
28718ce2efcSAndy Fiddaman		if entry[0].search(line):
28818ce2efcSAndy Fiddaman			errs.append(altMsg.format(entry[1], entry[2]))
28918ce2efcSAndy Fiddaman	for entry in caseREs:
29018ce2efcSAndy Fiddaman		if entry[0].search(line):
29118ce2efcSAndy Fiddaman			errs.append(caseMsg.format(entry[1], entry[2]))
29218ce2efcSAndy Fiddaman	return errs
29371af3be3SCody Peter Mello
29471af3be3SCody Peter Mellodef spellcheck(fh, filename=None, output=sys.stderr, **opts):
29571af3be3SCody Peter Mello	lineno = 1
29671af3be3SCody Peter Mello	ret = 0
29771af3be3SCody Peter Mello
29871af3be3SCody Peter Mello	if not filename:
29971af3be3SCody Peter Mello		filename = fh.name
30071af3be3SCody Peter Mello
30171af3be3SCody Peter Mello	fh.seek(0)
30271af3be3SCody Peter Mello	for line in fh:
303a90997d2SAndy Fiddaman		line = line.decode(errors='replace')
30418ce2efcSAndy Fiddaman		for err in spellcheck_line(line):
30518ce2efcSAndy Fiddaman			output.write('{}: Line {} {}\n'.format(
30618ce2efcSAndy Fiddaman			    filename, lineno, err))
30718ce2efcSAndy Fiddaman			ret = 1
30871af3be3SCody Peter Mello		lineno += 1
30971af3be3SCody Peter Mello
31071af3be3SCody Peter Mello	return ret
311