1*a3cefe7fSPierre Pronchery# derived from https://github.com/jeanralphaviles/comment_parser/blob/master/comment_parser/parsers/c_parser.py 2*a3cefe7fSPierre Pronchery# MIT license - https://github.com/jeanralphaviles/comment_parser/blob/master/LICENSE 3*a3cefe7fSPierre Pronchery 4*a3cefe7fSPierre Pronchery 5*a3cefe7fSPierre Proncheryclass Comment: 6*a3cefe7fSPierre Pronchery def __init__(self, comment, line, multiline): 7*a3cefe7fSPierre Pronchery self.comment = comment 8*a3cefe7fSPierre Pronchery self.line = line 9*a3cefe7fSPierre Pronchery self.multiline = multiline 10*a3cefe7fSPierre Pronchery 11*a3cefe7fSPierre Pronchery def __repr__(self): 12*a3cefe7fSPierre Pronchery return "Comment(comment=%r, line=%r, multiline=%r)" % (self.comment, self.line, self.multiline) 13*a3cefe7fSPierre Pronchery 14*a3cefe7fSPierre Pronchery @property 15*a3cefe7fSPierre Pronchery def clean_text(self): 16*a3cefe7fSPierre Pronchery if not self.multiline: 17*a3cefe7fSPierre Pronchery return self.comment.strip() 18*a3cefe7fSPierre Pronchery 19*a3cefe7fSPierre Pronchery lines = self.comment.splitlines() 20*a3cefe7fSPierre Pronchery cleanlines = [] 21*a3cefe7fSPierre Pronchery for line in lines: 22*a3cefe7fSPierre Pronchery if line[0:3] == ' * ': 23*a3cefe7fSPierre Pronchery cleanlines.append(line[3:]) 24*a3cefe7fSPierre Pronchery elif len(line) == 2: 25*a3cefe7fSPierre Pronchery cleanlines.append('') 26*a3cefe7fSPierre Pronchery return '\n'.join(cleanlines) 27*a3cefe7fSPierre Pronchery 28*a3cefe7fSPierre Pronchery @property 29*a3cefe7fSPierre Pronchery def doc_text(self): 30*a3cefe7fSPierre Pronchery text = self.clean_text 31*a3cefe7fSPierre Pronchery if '!doc' in text[0:4]: 32*a3cefe7fSPierre Pronchery return text[5:] 33*a3cefe7fSPierre Pronchery return None 34*a3cefe7fSPierre Pronchery 35*a3cefe7fSPierre Pronchery 36*a3cefe7fSPierre Proncheryclass FileError(Exception): 37*a3cefe7fSPierre Pronchery pass 38*a3cefe7fSPierre Pronchery 39*a3cefe7fSPierre Pronchery 40*a3cefe7fSPierre Proncheryclass UnterminatedCommentError(Exception): 41*a3cefe7fSPierre Pronchery pass 42*a3cefe7fSPierre Pronchery 43*a3cefe7fSPierre Pronchery 44*a3cefe7fSPierre Proncherydef extract_comments(filename): 45*a3cefe7fSPierre Pronchery """Extracts a list of comments from the given C family source file. 46*a3cefe7fSPierre Pronchery Comments are represented with the Comment class found in the common module. 47*a3cefe7fSPierre Pronchery C family comments come in two forms, single and multi-line comments. 48*a3cefe7fSPierre Pronchery - Single-line comments begin with '//' and continue to the end of line. 49*a3cefe7fSPierre Pronchery - Multi-line comments begin with '/*' and end with '*/' and can span 50*a3cefe7fSPierre Pronchery multiple lines of code. If a multi-line comment does not terminate 51*a3cefe7fSPierre Pronchery before EOF is reached, then an exception is raised. 52*a3cefe7fSPierre Pronchery Note that this doesn't take language-specific preprocessor directives into 53*a3cefe7fSPierre Pronchery consideration. 54*a3cefe7fSPierre Pronchery Args: 55*a3cefe7fSPierre Pronchery filename: String name of the file to extract comments from. 56*a3cefe7fSPierre Pronchery Returns: 57*a3cefe7fSPierre Pronchery Python list of Comment objects in the order that they appear in the file. 58*a3cefe7fSPierre Pronchery Raises: 59*a3cefe7fSPierre Pronchery FileError: File was unable to be open or read. 60*a3cefe7fSPierre Pronchery UnterminatedCommentError: Encountered an unterminated multi-line 61*a3cefe7fSPierre Pronchery comment. 62*a3cefe7fSPierre Pronchery """ 63*a3cefe7fSPierre Pronchery try: 64*a3cefe7fSPierre Pronchery with open(filename, 'r') as source_file: 65*a3cefe7fSPierre Pronchery state = 0 66*a3cefe7fSPierre Pronchery current_comment = '' 67*a3cefe7fSPierre Pronchery comments = [] 68*a3cefe7fSPierre Pronchery line_counter = 1 69*a3cefe7fSPierre Pronchery comment_start = 1 70*a3cefe7fSPierre Pronchery while True: 71*a3cefe7fSPierre Pronchery char = source_file.read(1) 72*a3cefe7fSPierre Pronchery if not char: 73*a3cefe7fSPierre Pronchery if state == 3 or state == 4: 74*a3cefe7fSPierre Pronchery raise UnterminatedCommentError() 75*a3cefe7fSPierre Pronchery if state == 2: 76*a3cefe7fSPierre Pronchery # Was in single line comment. Create comment. 77*a3cefe7fSPierre Pronchery comment = Comment(current_comment, line_counter, False) 78*a3cefe7fSPierre Pronchery comments.append(comment) 79*a3cefe7fSPierre Pronchery return comments 80*a3cefe7fSPierre Pronchery if state == 0: 81*a3cefe7fSPierre Pronchery # Waiting for comment start character or beginning of 82*a3cefe7fSPierre Pronchery # string. 83*a3cefe7fSPierre Pronchery if char == '/': 84*a3cefe7fSPierre Pronchery state = 1 85*a3cefe7fSPierre Pronchery elif char == '"': 86*a3cefe7fSPierre Pronchery state = 5 87*a3cefe7fSPierre Pronchery elif state == 1: 88*a3cefe7fSPierre Pronchery # Found comment start character, classify next character and 89*a3cefe7fSPierre Pronchery # determine if single or multiline comment. 90*a3cefe7fSPierre Pronchery if char == '/': 91*a3cefe7fSPierre Pronchery state = 2 92*a3cefe7fSPierre Pronchery elif char == '*': 93*a3cefe7fSPierre Pronchery comment_start = line_counter 94*a3cefe7fSPierre Pronchery state = 3 95*a3cefe7fSPierre Pronchery else: 96*a3cefe7fSPierre Pronchery state = 0 97*a3cefe7fSPierre Pronchery elif state == 2: 98*a3cefe7fSPierre Pronchery # In single line comment, read characters until EOL. 99*a3cefe7fSPierre Pronchery if char == '\n': 100*a3cefe7fSPierre Pronchery comment = Comment(current_comment, line_counter, False) 101*a3cefe7fSPierre Pronchery comments.append(comment) 102*a3cefe7fSPierre Pronchery current_comment = '' 103*a3cefe7fSPierre Pronchery state = 0 104*a3cefe7fSPierre Pronchery else: 105*a3cefe7fSPierre Pronchery current_comment += char 106*a3cefe7fSPierre Pronchery elif state == 3: 107*a3cefe7fSPierre Pronchery # In multi-line comment, add characters until '*' 108*a3cefe7fSPierre Pronchery # encountered. 109*a3cefe7fSPierre Pronchery if char == '*': 110*a3cefe7fSPierre Pronchery state = 4 111*a3cefe7fSPierre Pronchery else: 112*a3cefe7fSPierre Pronchery current_comment += char 113*a3cefe7fSPierre Pronchery elif state == 4: 114*a3cefe7fSPierre Pronchery # In multi-line comment with asterisk found. Determine if 115*a3cefe7fSPierre Pronchery # comment is ending. 116*a3cefe7fSPierre Pronchery if char == '/': 117*a3cefe7fSPierre Pronchery comment = Comment( 118*a3cefe7fSPierre Pronchery current_comment, comment_start, True) 119*a3cefe7fSPierre Pronchery comments.append(comment) 120*a3cefe7fSPierre Pronchery current_comment = '' 121*a3cefe7fSPierre Pronchery state = 0 122*a3cefe7fSPierre Pronchery else: 123*a3cefe7fSPierre Pronchery current_comment += '*' 124*a3cefe7fSPierre Pronchery # Care for multiple '*' in a row 125*a3cefe7fSPierre Pronchery if char != '*': 126*a3cefe7fSPierre Pronchery current_comment += char 127*a3cefe7fSPierre Pronchery state = 3 128*a3cefe7fSPierre Pronchery elif state == 5: 129*a3cefe7fSPierre Pronchery # In string literal, expect literal end or escape char. 130*a3cefe7fSPierre Pronchery if char == '"': 131*a3cefe7fSPierre Pronchery state = 0 132*a3cefe7fSPierre Pronchery elif char == '\\': 133*a3cefe7fSPierre Pronchery state = 6 134*a3cefe7fSPierre Pronchery elif state == 6: 135*a3cefe7fSPierre Pronchery # In string literal, escaping current char. 136*a3cefe7fSPierre Pronchery state = 5 137*a3cefe7fSPierre Pronchery if char == '\n': 138*a3cefe7fSPierre Pronchery line_counter += 1 139*a3cefe7fSPierre Pronchery except OSError as exception: 140*a3cefe7fSPierre Pronchery raise FileError(str(exception)) 141*a3cefe7fSPierre Pronchery 142*a3cefe7fSPierre Pronchery 143*a3cefe7fSPierre Proncheryif __name__ == '__main__': 144*a3cefe7fSPierre Pronchery import sys 145*a3cefe7fSPierre Pronchery from pprint import pprint 146*a3cefe7fSPierre Pronchery 147*a3cefe7fSPierre Pronchery comments = [comment for comment in extract_comments(sys.argv[1]) if comment.doc_text] 148*a3cefe7fSPierre Pronchery for comment in comments: 149*a3cefe7fSPierre Pronchery print(comment.doc_text) 150