xref: /illumos-gate/usr/src/tools/onbld/Checks/SpellCheck.py (revision c5749750a3e052f1194f65a303456224c51dea63)
1#
2# CDDL HEADER START
3#
4# The contents of this file are subject to the terms of the
5# Common Development and Distribution License (the "License").
6# You may not use this file except in compliance with the License.
7#
8# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9# or http://www.opensolaris.org/os/licensing.
10# See the License for the specific language governing permissions
11# and limitations under the License.
12#
13# When distributing Covered Code, include this CDDL HEADER in each
14# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15# If applicable, add the following below this CDDL HEADER, with the
16# fields enclosed by brackets "[]" replaced with your own identifying
17# information: Portions Copyright [yyyy] [name of copyright owner]
18#
19# CDDL HEADER END
20#
21
22#
23# Copyright 2016 Joyent, Inc.
24#
25
26import re, sys
27
28spellMsg = '%s: Line %d contains "%s", a common misspelling of "%s"\n'
29altMsg = '%s: Line %d contains "%s"; please use "%s" instead for consistency with other documentation\n'
30
31misspellings = {
32	'absense': 'absence',
33	'accessable': 'accessible',
34	'accomodate': 'accommodate',
35	'accomodation': 'accommodation',
36	'accross': 'across',
37	'acheive': 'achieve',
38	'addional': 'additional',
39	'addres': 'address',
40	'admininistrative': 'administrative',
41	'adminstered': 'administered',
42	'adminstrate': 'administrate',
43	'adminstration': 'administration',
44	'adminstrative': 'administrative',
45	'adminstrator': 'administrator',
46	'admissability': 'admissibility',
47	'adress': 'address',
48	'adressable': 'addressable',
49	'adressed': 'addressed',
50	'adressing': 'addressing, dressing',
51	'aginst': 'against',
52	'agression': 'aggression',
53	'agressive': 'aggressive',
54	'alot': 'a lot, allot',
55	'and and': 'and',
56	'apparantly': 'apparently',
57	'appearence': 'appearance',
58	'arguement': 'argument',
59	'assasination': 'assassination',
60	'auxilliary': 'auxiliary',
61	'basicly': 'basically',
62	'begining': 'beginning',
63	'belive': 'believe',
64	'beteen': 'between',
65	'betwen': 'between',
66	'beween': 'between',
67	'bewteen': 'between',
68	'bizzare': 'bizarre',
69	'buisness': 'business',
70	'calender': 'calendar',
71	'cemetary': 'cemetery',
72	'chauffer': 'chauffeur',
73	'collegue': 'colleague',
74	'comming': 'coming',
75	'commited': 'committed',
76	'commitee': 'committee',
77	'commiting': 'committing',
78	'comparision': 'comparison',
79	'comparisions': 'comparisons',
80	'compatability': 'compatibility',
81	'compatable': 'compatible',
82	'compatablity': 'compatibility',
83	'compatiable': 'compatible',
84	'compatiblity': 'compatibility',
85	'completly': 'completely',
86	'concious': 'conscious',
87	'condidtion': 'condition',
88	'conected': 'connected',
89	'conjuction': 'conjunction',
90	'continous': 'continuous',
91	'curiousity': 'curiosity',
92	'deamon': 'daemon',
93	'definately': 'definitely',
94	'desireable': 'desirable',
95	'diffrent': 'different',
96	'dilemna': 'dilemma',
97	'dissapear': 'disappear',
98	'dissapoint': 'disappoint',
99	'ecstacy': 'ecstasy',
100	'embarass': 'embarrass',
101	'enviroment': 'environment',
102	'exept': 'except',
103	'existance': 'existence',
104	'familar': 'familiar',
105	'finaly': 'finally',
106	'folowing': 'following',
107	'foriegn': 'foreign',
108	'forseeable': 'foreseeable',
109	'fourty': 'forty',
110	'foward': 'forward',
111	'freind': 'friend',
112	'futher': 'further',
113	'gaurd': 'guard',
114	'glamourous': 'glamorous',
115	'goverment': 'government',
116	'happend': 'happened',
117	'harrassment': 'harassment',
118	'hierachical': 'hierarchical',
119	'hierachies': 'hierarchies',
120	'hierachy': 'hierarchy',
121	'hierarcical': 'hierarchical',
122	'hierarcy': 'hierarchy',
123	'honourary': 'honorary',
124	'humourous': 'humorous',
125	'idiosyncracy': 'idiosyncrasy',
126	'immediatly': 'immediately',
127	'inaccessable': 'inaccessible',
128	'inbetween': 'between',
129	'incidently': 'incidentally',
130	'independant': 'independent',
131	'infomation': 'information',
132	'interupt': 'interrupt',
133	'intial': 'initial',
134	'intially': 'initially',
135	'irresistable': 'irresistible',
136	'jist': 'gist',
137	'knowlege': 'knowledge',
138	'lenght': 'length',
139	'liase': 'liaise',
140	'liason': 'liaison',
141	'libary': 'library',
142	'maching': 'machine, marching, matching',
143	'millenia': 'millennia',
144	'millenium': 'millennium',
145	'neccessary': 'necessary',
146	'negotation': 'negotiation',
147	'nontheless': 'nonetheless',
148	'noticable': 'noticeable',
149	'occassion': 'occasion',
150	'occassional': 'occasional',
151	'occassionally': 'occasionally',
152	'occurance': 'occurrence',
153	'occured': 'occurred',
154	'occurence': 'occurrence',
155	'occuring': 'occurring',
156	'ommision': 'omission',
157	'orginal': 'original',
158	'orginally': 'originally',
159	'ouput': 'output',
160	'overriden': 'overridden',
161	'particuliar': 'particular',
162	'pavillion': 'pavilion',
163	'peice': 'piece',
164	'persistant': 'persistent',
165	'politican': 'politician',
166	'posession': 'possession',
167	'possiblity': 'possibility',
168	'preceed': 'precede',
169	'preceeded': 'preceded',
170	'preceeding': 'preceding',
171	'preceeds': 'precedes',
172	'prefered': 'preferred',
173	'prefering': 'preferring',
174	'presense': 'presence',
175	'proces': 'process',
176	'propoganda': 'propaganda',
177	'psuedo': 'pseudo',
178	'publically': 'publicly',
179	'realy': 'really',
180	'reciept': 'receipt',
181	'recieve': 'receive',
182	'recieved': 'received',
183	'reciever': 'receiver',
184	'recievers': 'receivers',
185	'recieves': 'receives',
186	'recieving': 'receiving',
187	'recomend': 'recommend',
188	'recomended': 'recommended',
189	'recomending': 'recommending',
190	'recomends': 'recommends',
191	'recurse': 'recur',
192	'recurses': 'recurs',
193	'recursing': 'recurring',
194	'refered': 'referred',
195	'refering': 'referring',
196	'religous': 'religious',
197	'rember': 'remember',
198	'remeber': 'remember',
199	'repetion': 'repetition',
200	'reponsible': 'responsible',
201	'resistence': 'resistance',
202	'retreive': 'retrieve',
203	'seige': 'siege',
204	'sence': 'since',
205	'seperate': 'separate',
206	'seperated': 'separated',
207	'seperately': 'separately',
208	'seperates': 'separates',
209	'similiar': 'similar',
210	'somwhere': 'somewhere',
211	'sould': 'could, should, sold, soul',
212	'sturcture': 'structure',
213	'succesful': 'successful',
214	'succesfully': 'successfully',
215	'successfull': 'successful',
216	'sucessful': 'successful',
217	'supercede': 'supersede',
218	'supress': 'suppress',
219	'supressed': 'suppressed',
220	'suprise': 'surprise',
221	'suprisingly': 'surprisingly',
222	'sytem': 'system',
223	'tendancy': 'tendency',
224	'the the': 'the',
225	'the these': 'these',
226	'therefor': 'therefore',
227	'threshhold': 'threshold',
228	'tolerence': 'tolerance',
229	'tommorow': 'tomorrow',
230	'tommorrow': 'tomorrow',
231	'tounge': 'tongue',
232	'tranformed': 'transformed',
233	'transfered': 'transferred',
234	'truely': 'truly',
235	'trustworthyness': 'trustworthiness',
236	'uncommited': 'uncommitted',
237	'unforseen': 'unforeseen',
238	'unfortunatly': 'unfortunately',
239	'unsuccessfull': 'unsuccessful',
240	'untill': 'until',
241	'upto': 'up to',
242	'whereever': 'wherever',
243	'wich': 'which',
244	'wierd': 'weird',
245	'wtih': 'with',
246}
247
248alternates = {
249	'parseable': 'parsable',
250	'sub-command': 'subcommand',
251	'sub-commands': 'subcommands',
252	'writeable': 'writable'
253}
254
255misspellingREs = []
256alternateREs = []
257
258for misspelling, correct in misspellings.iteritems():
259	regex = re.compile(r'\b%s\b' % (misspelling), re.IGNORECASE)
260	entry = (regex, misspelling, correct)
261	misspellingREs.append(entry)
262
263for alternate, correct in alternates.iteritems():
264	regex = re.compile(r'\b%s\b' % (alternate), re.IGNORECASE)
265	entry = (regex, alternate, correct)
266	alternateREs.append(entry)
267
268def check(errmsg, output, filename, line, lineno, entry):
269	if entry[0].search(line):
270		output.write(errmsg % (filename, lineno, entry[1], entry[2]))
271		return 1
272	else:
273		return 0
274
275def spellcheck(fh, filename=None, output=sys.stderr, **opts):
276	lineno = 1
277	ret = 0
278
279	if not filename:
280		filename = fh.name
281
282	fh.seek(0)
283	for line in fh:
284		for entry in misspellingREs:
285			ret |= check(spellMsg, output, filename, line,
286			    lineno, entry)
287		for entry in alternateREs:
288			ret |= check(altMsg, output, filename, line,
289			    lineno, entry)
290		lineno += 1
291
292	return ret
293