xref: /illumos-gate/usr/src/tools/onbld/Checks/HdrChk.py (revision 163bd69b3c164dda2a59c7f08ca788e7d6ba9bea)
1#! /usr/bin/python
2#
3# CDDL HEADER START
4#
5# The contents of this file are subject to the terms of the
6# Common Development and Distribution License (the "License").
7# You may not use this file except in compliance with the License.
8#
9# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10# or http://www.opensolaris.org/os/licensing.
11# See the License for the specific language governing permissions
12# and limitations under the License.
13#
14# When distributing Covered Code, include this CDDL HEADER in each
15# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16# If applicable, add the following below this CDDL HEADER, with the
17# fields enclosed by brackets "[]" replaced with your own identifying
18# information: Portions Copyright [yyyy] [name of copyright owner]
19#
20# CDDL HEADER END
21#
22
23#
24# Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
25# Use is subject to license terms.
26#
27# ident	"%Z%%M%	%I%	%E% SMI"
28#
29
30#
31# Check that header files conform to our standards
32#
33# Standards for all header files (lenient):
34#
35#       1) Begin with a comment containing a copyright message
36#
37#       2) Enclosed in a guard of the form:
38#
39#          #ifndef GUARD
40#          #define GUARD
41#          #endif /* [!]GUARD */
42#
43#          The preferred form is without the bang character, but either is
44#          acceptable.
45#
46#       3) Has a valid ident declaration
47#
48# Additional standards for system header files:
49#
50#       1) The file guard must take the form '_FILENAME_H[_]', where FILENAME
51#          matches the basename of the file.  If it is installed in a
52#          subdirectory, it must be of the form _DIR_FILENAME_H.  The form
53#          without the trailing underscore is preferred.
54#
55#       2) All #include directives must use the <> form.
56#
57#       3) If the header file contains anything besides comments and
58#          preprocessor directives, then it must be enclosed in a C++ guard of
59#          the form:
60#
61#          #ifdef __cplusplus
62#          extern "C" {
63#          #endif
64#
65#          #ifdef __cplusplus
66#          }
67#          #endif
68#
69
70import re, os, sys
71
72class HeaderFile(object):
73	def __init__(self, fh, filename=None, lenient=False):
74		self.file = fh
75		self.lenient = lenient
76		self.lineno = 0
77		self.has_copyright = False
78		self.eof = False
79
80		if filename:
81			self.filename = filename
82		else:
83			self.filename = fh.name
84
85	def getline(self):
86		for line in self.file:
87			self.lineno += 1
88			if not line or line.isspace():
89				continue
90			else:
91				line = line.rstrip('\r\n')
92
93				# Recursively join continuation lines
94				if line.endswith('\\'):
95					line = line[0:-1] + self.getline()
96
97				return line
98		else:
99			self.eof = True
100			return ''
101
102	#
103	# Optionally take a line to start skipping/processing with
104	#
105	def skipcomments(self, curline=None):
106		line = curline or self.getline()
107		while line:
108			# When lenient, allow C++ comments
109			if self.lenient and re.search(r'^\s*//', line):
110				line = self.getline()
111				continue
112
113			if not re.search(r'^\s*/\*', line):
114				return line
115
116			while not re.search(r'\*/', line):
117				#
118				# We explicitly exclude the form used in the
119				# CDDL header rather than attempting to craft
120				# a match for every possibly valid copyright
121				# notice
122				#
123				if re.search(r'Copyright (?!\[yyyy\])', line):
124					self.has_copyright = True
125				line = self.getline()
126
127			if re.search(r'Copyright (?!\[yyyy\])', line):
128				self.has_copyright = True
129			line = self.getline()
130
131		return line
132
133
134def err(stream, msg, hdr):
135	if not hdr.eof:
136		stream.write("%s: line %d: %s\n" %
137			     (hdr.filename, hdr.lineno, msg))
138	else:
139		stream.write("%s: %s\n" % (hdr.filename, msg))
140
141
142#
143# Keyword strings (both expanded and literal) for the various SCMs
144# Be certain to wrap each full expression in parens.
145#
146idents = [
147	# SCCS
148	r'((\%Z\%(\%M\%)\t\%I\%|\%W\%)\t\%E\% SMI)',
149	r'(@\(#\)(\w[-\.\w]+\.h)\t\d+\.\d+(\.\d+\.\d+)?\t\d\d/\d\d/\d\d SMI)',
150]
151
152IDENT = re.compile(r'(%s)' % '|'.join(idents))
153
154
155def hdrchk(fh, filename=None, lenient=False, output=sys.stderr):
156	found_ident = False
157	guard = None
158	ret = 0
159
160	hdr = HeaderFile(fh, filename=filename, lenient=lenient)
161
162	#
163	# Step 1:
164	#
165	# Headers must begin with a comment containing a copyright notice.  We
166	# don't validate the contents of the copyright, only that it exists
167	#
168	line = hdr.skipcomments()
169
170	if not hdr.has_copyright:
171		err(output, "Missing copyright in opening comment", hdr)
172		ret = 1
173
174	#
175	# Step 2:
176	#
177	# For application header files only, allow the ident string to appear
178	# before the header guard.
179	if lenient and line.startswith("#pragma ident") and IDENT.search(line):
180		found_ident = 1
181		line = hdr.skipcomments()
182
183	#
184	# Step 3: Header guards
185	#
186	match = re.search(r'^#ifndef\s([a-zA-Z0-9_]+)$', line)
187	if not match:
188		err(output, "Invalid or missing header guard", hdr)
189		ret = 1
190	else:
191		guard = match.group(1)
192
193		if not lenient:
194			guardname = os.path.basename(hdr.filename)
195
196			#
197			# If we aren't being lenient, validate the name of the
198			# guard
199			#
200
201			guardname = guardname.upper()
202			guardname = guardname.replace('.', '_').replace('-','_')
203			guardname = guardname.replace('+', "_PLUS")
204
205			if not re.search(r'^_.*%s[_]?$' % guardname, guard):
206				err(output, "Header guard does not match "
207				    "suggested style (_FILEPATH_H_)", hdr)
208				ret = 1
209
210		line = hdr.getline()
211		if not re.search(r'#define\s%s$' % guard, line):
212			err(output, "Invalid header guard", hdr)
213			ret = 1
214			if not line:
215				line = hdr.skipcomments()
216		else:
217			line = hdr.skipcomments()
218
219
220	#
221	# Step 4: ident string
222	#
223	# We allow both the keyword and extracted versions
224	#
225	if (not found_ident and line.startswith("#pragma ident") and
226	    not IDENT.search(line)):
227		err(output, "Invalid #pragma ident", hdr)
228		ret = 1
229	else:
230		line = hdr.skipcomments(line)
231
232	#
233	# Main processing loop
234	#
235	in_cplusplus = False
236	found_endguard = False
237	found_cplusplus = False
238	found_code = False
239
240	while line:
241		if not (line.startswith('#') or line.startswith('using')):
242			found_code = True
243			line = hdr.getline()
244			continue
245
246		match = re.search(r'^#include(.*)$', line)
247		if match:
248			#
249			# For system files, make sure #includes are of the form:
250			# '#include <file>'
251			#
252			if not lenient and not re.search(r'\s<.*>',
253							 match.group(1)):
254				err(output, "Bad include", hdr)
255				ret = 1
256		elif not in_cplusplus and re.search(r'^#ifdef\s__cplusplus$',
257						    line):
258			#
259			# Start of C++ header guard.
260			# Make sure it is of the form:
261			#
262			# #ifdef __cplusplus
263			# extern "C" {
264			# #endif
265			#
266			line = hdr.getline()
267			if line == 'extern "C" {':
268				line = hdr.getline()
269				if line != '#endif':
270					err(output, "Bad __cplusplus clause",
271					    hdr)
272					ret = 1
273				else:
274					in_cplusplus = True
275					found_cplusplus = True
276			else:
277				continue
278		elif in_cplusplus and re.search(r'^#ifdef\s__cplusplus$', line):
279			#
280			# End of C++ header guard.  Make sure it is of the form:
281			#
282			# #ifdef __cplusplus
283			# }
284			# #endif
285			#
286			line = hdr.getline()
287			if line == '}':
288				line = hdr.getline()
289				if line != '#endif':
290					err(output, "Bad __cplusplus clause",
291					    hdr)
292					ret = 1
293				else:
294					in_cplusplus = False
295			else:
296				continue
297		elif re.search(r'^#endif\s/\* [!]?%s \*/$' % guard, line):
298			#
299			# Ending header guard
300			#
301			found_endguard = True
302
303		line = hdr.skipcomments()
304
305	#
306	# Check for missing end clauses
307	#
308	if (not lenient) and (not found_cplusplus) and found_code:
309		err(output, "Missing __cplusplus guard", hdr)
310		ret = 1
311
312	if in_cplusplus:
313		err(output, "Missing closing #ifdef __cplusplus", hdr)
314		ret = 1
315
316	if not found_endguard:
317		err(output, "Missing or invalid ending header guard", hdr)
318		ret = 1
319
320	return ret
321