xref: /illumos-gate/usr/src/tools/onbld/Checks/SpellCheck.py (revision 45ede40b2394db7967e59f19288fae9b62efd4aa)
1#
2# CDDL HEADER START
3#
4# The contents of this file are subject to the terms of the
5# Common Development and Distribution License (the "License").
6# You may not use this file except in compliance with the License.
7#
8# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9# or http://www.opensolaris.org/os/licensing.
10# See the License for the specific language governing permissions
11# and limitations under the License.
12#
13# When distributing Covered Code, include this CDDL HEADER in each
14# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15# If applicable, add the following below this CDDL HEADER, with the
16# fields enclosed by brackets "[]" replaced with your own identifying
17# information: Portions Copyright [yyyy] [name of copyright owner]
18#
19# CDDL HEADER END
20#
21
22#
23# Copyright 2016 Joyent, Inc.
24# Copyright 2019 OmniOS Community Edition (OmniOSce) Association.
25#
26
27import re, sys
28
29spellMsg = 'contains "{}", a common misspelling of "{}"'
30altMsg = 'contains "{}"; please use "{}" instead for consistency with other documentation'
31caseMsg = 'contains "{}"; please use "{}" instead'
32
33misspellings = {
34	'absense': 'absence',
35	'accessable': 'accessible',
36	'accomodate': 'accommodate',
37	'accomodation': 'accommodation',
38	'accross': 'across',
39	'acheive': 'achieve',
40	'addional': 'additional',
41	'addres': 'address',
42	'admininistrative': 'administrative',
43	'adminstered': 'administered',
44	'adminstrate': 'administrate',
45	'adminstration': 'administration',
46	'adminstrative': 'administrative',
47	'adminstrator': 'administrator',
48	'admissability': 'admissibility',
49	'adress': 'address',
50	'adressable': 'addressable',
51	'adressed': 'addressed',
52	'adressing': 'addressing, dressing',
53	'aginst': 'against',
54	'agression': 'aggression',
55	'agressive': 'aggressive',
56	'alot': 'a lot, allot',
57	'and and': 'and',
58	'apparantly': 'apparently',
59	'appearence': 'appearance',
60	'arguement': 'argument',
61	'assasination': 'assassination',
62	'auxilliary': 'auxiliary',
63	'basicly': 'basically',
64	'begining': 'beginning',
65	'belive': 'believe',
66	'beteen': 'between',
67	'betwen': 'between',
68	'beween': 'between',
69	'bewteen': 'between',
70	'bizzare': 'bizarre',
71	'buisness': 'business',
72	'calender': 'calendar',
73	'cemetary': 'cemetery',
74	'chauffer': 'chauffeur',
75	'collegue': 'colleague',
76	'comming': 'coming',
77	'commited': 'committed',
78	'commitee': 'committee',
79	'commiting': 'committing',
80	'comparision': 'comparison',
81	'comparisions': 'comparisons',
82	'compatability': 'compatibility',
83	'compatable': 'compatible',
84	'compatablity': 'compatibility',
85	'compatiable': 'compatible',
86	'compatiblity': 'compatibility',
87	'completly': 'completely',
88	'concious': 'conscious',
89	'condidtion': 'condition',
90	'conected': 'connected',
91	'conjuction': 'conjunction',
92	'continous': 'continuous',
93	'curiousity': 'curiosity',
94	'deamon': 'daemon',
95	'definately': 'definitely',
96	'desireable': 'desirable',
97	'diffrent': 'different',
98	'dilemna': 'dilemma',
99	'dissapear': 'disappear',
100	'dissapoint': 'disappoint',
101	'ecstacy': 'ecstasy',
102	'embarass': 'embarrass',
103	'enviroment': 'environment',
104	'exept': 'except',
105	'existance': 'existence',
106	'familar': 'familiar',
107	'finaly': 'finally',
108	'folowing': 'following',
109	'foriegn': 'foreign',
110	'forseeable': 'foreseeable',
111	'fourty': 'forty',
112	'foward': 'forward',
113	'freind': 'friend',
114	'futher': 'further',
115	'gaurd': 'guard',
116	'glamourous': 'glamorous',
117	'goverment': 'government',
118	'happend': 'happened',
119	'harrassment': 'harassment',
120	'hierachical': 'hierarchical',
121	'hierachies': 'hierarchies',
122	'hierachy': 'hierarchy',
123	'hierarcical': 'hierarchical',
124	'hierarcy': 'hierarchy',
125	'honourary': 'honorary',
126	'humourous': 'humorous',
127	'idiosyncracy': 'idiosyncrasy',
128	'immediatly': 'immediately',
129	'inaccessable': 'inaccessible',
130	'inbetween': 'between',
131	'incidently': 'incidentally',
132	'independant': 'independent',
133	'infomation': 'information',
134	'interupt': 'interrupt',
135	'intial': 'initial',
136	'intially': 'initially',
137	'irresistable': 'irresistible',
138	'jist': 'gist',
139	'knowlege': 'knowledge',
140	'lenght': 'length',
141	'liase': 'liaise',
142	'liason': 'liaison',
143	'libary': 'library',
144	'maching': 'machine, marching, matching',
145	'millenia': 'millennia',
146	'millenium': 'millennium',
147	'neccessary': 'necessary',
148	'negotation': 'negotiation',
149	'nontheless': 'nonetheless',
150	'noticable': 'noticeable',
151	'occassion': 'occasion',
152	'occassional': 'occasional',
153	'occassionally': 'occasionally',
154	'occurance': 'occurrence',
155	'occured': 'occurred',
156	'occurence': 'occurrence',
157	'occuring': 'occurring',
158	'ommision': 'omission',
159	'orginal': 'original',
160	'orginally': 'originally',
161	'ouput': 'output',
162	'overriden': 'overridden',
163	'particuliar': 'particular',
164	'pavillion': 'pavilion',
165	'peice': 'piece',
166	'persistant': 'persistent',
167	'politican': 'politician',
168	'posession': 'possession',
169	'possiblity': 'possibility',
170	'preceed': 'precede',
171	'preceeded': 'preceded',
172	'preceeding': 'preceding',
173	'preceeds': 'precedes',
174	'prefered': 'preferred',
175	'prefering': 'preferring',
176	'presense': 'presence',
177	'proces': 'process',
178	'propoganda': 'propaganda',
179	'psuedo': 'pseudo',
180	'publically': 'publicly',
181	'realy': 'really',
182	'reciept': 'receipt',
183	'recieve': 'receive',
184	'recieved': 'received',
185	'reciever': 'receiver',
186	'recievers': 'receivers',
187	'recieves': 'receives',
188	'recieving': 'receiving',
189	'recomend': 'recommend',
190	'recomended': 'recommended',
191	'recomending': 'recommending',
192	'recomends': 'recommends',
193	'recurse': 'recur',
194	'recurses': 'recurs',
195	'recursing': 'recurring',
196	'refered': 'referred',
197	'refering': 'referring',
198	'religous': 'religious',
199	'rember': 'remember',
200	'remeber': 'remember',
201	'repetion': 'repetition',
202	'reponsible': 'responsible',
203	'resistence': 'resistance',
204	'retreive': 'retrieve',
205	'seige': 'siege',
206	'sence': 'since',
207	'seperate': 'separate',
208	'seperated': 'separated',
209	'seperately': 'separately',
210	'seperates': 'separates',
211	'similiar': 'similar',
212	'somwhere': 'somewhere',
213	'sould': 'could, should, sold, soul',
214	'sturcture': 'structure',
215	'succesful': 'successful',
216	'succesfully': 'successfully',
217	'successfull': 'successful',
218	'sucessful': 'successful',
219	'supercede': 'supersede',
220	'supress': 'suppress',
221	'supressed': 'suppressed',
222	'suprise': 'surprise',
223	'suprisingly': 'surprisingly',
224	'sytem': 'system',
225	'tendancy': 'tendency',
226	'the the': 'the',
227	'the these': 'these',
228	'therefor': 'therefore',
229	'threshhold': 'threshold',
230	'tolerence': 'tolerance',
231	'tommorow': 'tomorrow',
232	'tommorrow': 'tomorrow',
233	'tounge': 'tongue',
234	'tranformed': 'transformed',
235	'transfered': 'transferred',
236	'truely': 'truly',
237	'trustworthyness': 'trustworthiness',
238	'uncommited': 'uncommitted',
239	'unforseen': 'unforeseen',
240	'unfortunatly': 'unfortunately',
241	'unsuccessfull': 'unsuccessful',
242	'untill': 'until',
243	'upto': 'up to',
244	'whereever': 'wherever',
245	'wich': 'which',
246	'wierd': 'weird',
247	'wtih': 'with',
248}
249
250alternates = {
251	'parseable': 'parsable',
252	'sub-command': 'subcommand',
253	'sub-commands': 'subcommands',
254	'writeable': 'writable'
255}
256
257case = {
258	'Illumos': 'illumos'
259}
260
261misspellingREs = []
262alternateREs = []
263caseREs = []
264
265for misspelling, correct in misspellings.items():
266	regex = re.compile(r'\b%s\b' % (misspelling), re.IGNORECASE)
267	entry = (regex, misspelling, correct)
268	misspellingREs.append(entry)
269
270for alternate, correct in alternates.items():
271	regex = re.compile(r'\b%s\b' % (alternate), re.IGNORECASE)
272	entry = (regex, alternate, correct)
273	alternateREs.append(entry)
274
275for alternate, correct in case.items():
276	regex = re.compile(r'\b%s\b' % (alternate))
277	entry = (regex, alternate, correct)
278	caseREs.append(entry)
279
280def spellcheck_line(line):
281	errs = []
282	for entry in misspellingREs:
283		if entry[0].search(line):
284			errs.append(spellMsg.format(entry[1], entry[2]))
285	for entry in alternateREs:
286		if entry[0].search(line):
287			errs.append(altMsg.format(entry[1], entry[2]))
288	for entry in caseREs:
289		if entry[0].search(line):
290			errs.append(caseMsg.format(entry[1], entry[2]))
291	return errs
292
293def spellcheck(fh, filename=None, output=sys.stderr, **opts):
294	lineno = 1
295	ret = 0
296
297	if not filename:
298		filename = fh.name
299
300	fh.seek(0)
301	for line in fh:
302		line = line.decode(errors='replace')
303		for err in spellcheck_line(line):
304			output.write('{}: Line {} {}\n'.format(
305			    filename, lineno, err))
306			ret = 1
307		lineno += 1
308
309	return ret
310