xref: /freebsd/contrib/pkgconf/doc/extract.py (revision a3cefe7f2b4df0f70ff92d4570ce18e517af43ec)
1*a3cefe7fSPierre Pronchery# derived from https://github.com/jeanralphaviles/comment_parser/blob/master/comment_parser/parsers/c_parser.py
2*a3cefe7fSPierre Pronchery# MIT license - https://github.com/jeanralphaviles/comment_parser/blob/master/LICENSE
3*a3cefe7fSPierre Pronchery
4*a3cefe7fSPierre Pronchery
5*a3cefe7fSPierre Proncheryclass Comment:
6*a3cefe7fSPierre Pronchery    def __init__(self, comment, line, multiline):
7*a3cefe7fSPierre Pronchery        self.comment = comment
8*a3cefe7fSPierre Pronchery        self.line = line
9*a3cefe7fSPierre Pronchery        self.multiline = multiline
10*a3cefe7fSPierre Pronchery
11*a3cefe7fSPierre Pronchery    def __repr__(self):
12*a3cefe7fSPierre Pronchery        return "Comment(comment=%r, line=%r, multiline=%r)" % (self.comment, self.line, self.multiline)
13*a3cefe7fSPierre Pronchery
14*a3cefe7fSPierre Pronchery    @property
15*a3cefe7fSPierre Pronchery    def clean_text(self):
16*a3cefe7fSPierre Pronchery        if not self.multiline:
17*a3cefe7fSPierre Pronchery            return self.comment.strip()
18*a3cefe7fSPierre Pronchery
19*a3cefe7fSPierre Pronchery        lines = self.comment.splitlines()
20*a3cefe7fSPierre Pronchery        cleanlines = []
21*a3cefe7fSPierre Pronchery        for line in lines:
22*a3cefe7fSPierre Pronchery            if line[0:3] == ' * ':
23*a3cefe7fSPierre Pronchery                cleanlines.append(line[3:])
24*a3cefe7fSPierre Pronchery            elif len(line) == 2:
25*a3cefe7fSPierre Pronchery                cleanlines.append('')
26*a3cefe7fSPierre Pronchery        return '\n'.join(cleanlines)
27*a3cefe7fSPierre Pronchery
28*a3cefe7fSPierre Pronchery    @property
29*a3cefe7fSPierre Pronchery    def doc_text(self):
30*a3cefe7fSPierre Pronchery        text = self.clean_text
31*a3cefe7fSPierre Pronchery        if '!doc' in text[0:4]:
32*a3cefe7fSPierre Pronchery            return text[5:]
33*a3cefe7fSPierre Pronchery        return None
34*a3cefe7fSPierre Pronchery
35*a3cefe7fSPierre Pronchery
36*a3cefe7fSPierre Proncheryclass FileError(Exception):
37*a3cefe7fSPierre Pronchery    pass
38*a3cefe7fSPierre Pronchery
39*a3cefe7fSPierre Pronchery
40*a3cefe7fSPierre Proncheryclass UnterminatedCommentError(Exception):
41*a3cefe7fSPierre Pronchery    pass
42*a3cefe7fSPierre Pronchery
43*a3cefe7fSPierre Pronchery
44*a3cefe7fSPierre Proncherydef extract_comments(filename):
45*a3cefe7fSPierre Pronchery    """Extracts a list of comments from the given C family source file.
46*a3cefe7fSPierre Pronchery    Comments are represented with the Comment class found in the common module.
47*a3cefe7fSPierre Pronchery    C family comments come in two forms, single and multi-line comments.
48*a3cefe7fSPierre Pronchery        - Single-line comments begin with '//' and continue to the end of line.
49*a3cefe7fSPierre Pronchery        - Multi-line comments begin with '/*' and end with '*/' and can span
50*a3cefe7fSPierre Pronchery            multiple lines of code. If a multi-line comment does not terminate
51*a3cefe7fSPierre Pronchery            before EOF is reached, then an exception is raised.
52*a3cefe7fSPierre Pronchery    Note that this doesn't take language-specific preprocessor directives into
53*a3cefe7fSPierre Pronchery    consideration.
54*a3cefe7fSPierre Pronchery    Args:
55*a3cefe7fSPierre Pronchery        filename: String name of the file to extract comments from.
56*a3cefe7fSPierre Pronchery    Returns:
57*a3cefe7fSPierre Pronchery        Python list of Comment objects in the order that they appear in the file.
58*a3cefe7fSPierre Pronchery    Raises:
59*a3cefe7fSPierre Pronchery        FileError: File was unable to be open or read.
60*a3cefe7fSPierre Pronchery        UnterminatedCommentError: Encountered an unterminated multi-line
61*a3cefe7fSPierre Pronchery            comment.
62*a3cefe7fSPierre Pronchery    """
63*a3cefe7fSPierre Pronchery    try:
64*a3cefe7fSPierre Pronchery        with open(filename, 'r') as source_file:
65*a3cefe7fSPierre Pronchery            state = 0
66*a3cefe7fSPierre Pronchery            current_comment = ''
67*a3cefe7fSPierre Pronchery            comments = []
68*a3cefe7fSPierre Pronchery            line_counter = 1
69*a3cefe7fSPierre Pronchery            comment_start = 1
70*a3cefe7fSPierre Pronchery            while True:
71*a3cefe7fSPierre Pronchery                char = source_file.read(1)
72*a3cefe7fSPierre Pronchery                if not char:
73*a3cefe7fSPierre Pronchery                    if state == 3 or state == 4:
74*a3cefe7fSPierre Pronchery                        raise UnterminatedCommentError()
75*a3cefe7fSPierre Pronchery                    if state == 2:
76*a3cefe7fSPierre Pronchery                        # Was in single line comment. Create comment.
77*a3cefe7fSPierre Pronchery                        comment = Comment(current_comment, line_counter, False)
78*a3cefe7fSPierre Pronchery                        comments.append(comment)
79*a3cefe7fSPierre Pronchery                    return comments
80*a3cefe7fSPierre Pronchery                if state == 0:
81*a3cefe7fSPierre Pronchery                    # Waiting for comment start character or beginning of
82*a3cefe7fSPierre Pronchery                    # string.
83*a3cefe7fSPierre Pronchery                    if char == '/':
84*a3cefe7fSPierre Pronchery                        state = 1
85*a3cefe7fSPierre Pronchery                    elif char == '"':
86*a3cefe7fSPierre Pronchery                        state = 5
87*a3cefe7fSPierre Pronchery                elif state == 1:
88*a3cefe7fSPierre Pronchery                    # Found comment start character, classify next character and
89*a3cefe7fSPierre Pronchery                    # determine if single or multiline comment.
90*a3cefe7fSPierre Pronchery                    if char == '/':
91*a3cefe7fSPierre Pronchery                        state = 2
92*a3cefe7fSPierre Pronchery                    elif char == '*':
93*a3cefe7fSPierre Pronchery                        comment_start = line_counter
94*a3cefe7fSPierre Pronchery                        state = 3
95*a3cefe7fSPierre Pronchery                    else:
96*a3cefe7fSPierre Pronchery                        state = 0
97*a3cefe7fSPierre Pronchery                elif state == 2:
98*a3cefe7fSPierre Pronchery                    # In single line comment, read characters until EOL.
99*a3cefe7fSPierre Pronchery                    if char == '\n':
100*a3cefe7fSPierre Pronchery                        comment = Comment(current_comment, line_counter, False)
101*a3cefe7fSPierre Pronchery                        comments.append(comment)
102*a3cefe7fSPierre Pronchery                        current_comment = ''
103*a3cefe7fSPierre Pronchery                        state = 0
104*a3cefe7fSPierre Pronchery                    else:
105*a3cefe7fSPierre Pronchery                        current_comment += char
106*a3cefe7fSPierre Pronchery                elif state == 3:
107*a3cefe7fSPierre Pronchery                    # In multi-line comment, add characters until '*'
108*a3cefe7fSPierre Pronchery                    # encountered.
109*a3cefe7fSPierre Pronchery                    if char == '*':
110*a3cefe7fSPierre Pronchery                        state = 4
111*a3cefe7fSPierre Pronchery                    else:
112*a3cefe7fSPierre Pronchery                        current_comment += char
113*a3cefe7fSPierre Pronchery                elif state == 4:
114*a3cefe7fSPierre Pronchery                    # In multi-line comment with asterisk found. Determine if
115*a3cefe7fSPierre Pronchery                    # comment is ending.
116*a3cefe7fSPierre Pronchery                    if char == '/':
117*a3cefe7fSPierre Pronchery                        comment = Comment(
118*a3cefe7fSPierre Pronchery                            current_comment, comment_start, True)
119*a3cefe7fSPierre Pronchery                        comments.append(comment)
120*a3cefe7fSPierre Pronchery                        current_comment = ''
121*a3cefe7fSPierre Pronchery                        state = 0
122*a3cefe7fSPierre Pronchery                    else:
123*a3cefe7fSPierre Pronchery                        current_comment += '*'
124*a3cefe7fSPierre Pronchery                        # Care for multiple '*' in a row
125*a3cefe7fSPierre Pronchery                        if char != '*':
126*a3cefe7fSPierre Pronchery                            current_comment += char
127*a3cefe7fSPierre Pronchery                            state = 3
128*a3cefe7fSPierre Pronchery                elif state == 5:
129*a3cefe7fSPierre Pronchery                    # In string literal, expect literal end or escape char.
130*a3cefe7fSPierre Pronchery                    if char == '"':
131*a3cefe7fSPierre Pronchery                        state = 0
132*a3cefe7fSPierre Pronchery                    elif char == '\\':
133*a3cefe7fSPierre Pronchery                        state = 6
134*a3cefe7fSPierre Pronchery                elif state == 6:
135*a3cefe7fSPierre Pronchery                    # In string literal, escaping current char.
136*a3cefe7fSPierre Pronchery                    state = 5
137*a3cefe7fSPierre Pronchery                if char == '\n':
138*a3cefe7fSPierre Pronchery                    line_counter += 1
139*a3cefe7fSPierre Pronchery    except OSError as exception:
140*a3cefe7fSPierre Pronchery        raise FileError(str(exception))
141*a3cefe7fSPierre Pronchery
142*a3cefe7fSPierre Pronchery
143*a3cefe7fSPierre Proncheryif __name__ == '__main__':
144*a3cefe7fSPierre Pronchery    import sys
145*a3cefe7fSPierre Pronchery    from pprint import pprint
146*a3cefe7fSPierre Pronchery
147*a3cefe7fSPierre Pronchery    comments = [comment for comment in extract_comments(sys.argv[1]) if comment.doc_text]
148*a3cefe7fSPierre Pronchery    for comment in comments:
149*a3cefe7fSPierre Pronchery        print(comment.doc_text)
150