xref: /linux/tools/docs/checktransupdate.py (revision 19dcccbc064d6c58eaafae1ecb94821a2535cc26)
1#!/usr/bin/env python3
2# SPDX-License-Identifier: GPL-2.0
3
4"""
5This script helps track the translation status of the documentation
6in different locales, e.g., zh_CN. More specially, it uses `git log`
7commit to find the latest english commit from the translation commit
8(order by author date) and the latest english commits from HEAD. If
9differences occur, report the file and commits that need to be updated.
10
11The usage is as follows:
12- tools/docs/checktransupdate.py -l zh_CN
13This will print all the files that need to be updated or translated in the zh_CN locale.
14- tools/docs/checktransupdate.py Documentation/translations/zh_CN/dev-tools/testing-overview.rst
15This will only print the status of the specified file.
16- tools/docs/checktransupdate.py Documentation/translations/zh_CN/dev-tools
17This will print the status of all files under the directory.
18
19The output is something like:
20Documentation/dev-tools/kfence.rst
21No translation in the locale of zh_CN
22
23Documentation/translations/zh_CN/dev-tools/testing-overview.rst
24commit 42fb9cfd5b18 ("Documentation: dev-tools: Add link to RV docs")
251 commits needs resolving in total
26"""
27
28import os
29import re
30import time
31import logging
32from argparse import ArgumentParser, ArgumentTypeError, BooleanOptionalAction
33from datetime import datetime
34
35
36def get_origin_path(file_path):
37    """Get the origin path from the translation path"""
38    paths = file_path.split("/")
39    tidx = paths.index("translations")
40    opaths = paths[:tidx]
41    opaths += paths[tidx + 2 :]
42    return "/".join(opaths)
43
44
45def get_latest_commit_from(file_path, commit):
46    """Get the latest commit from the specified commit for the specified file"""
47    command = f"git log --pretty=format:%H%n%aD%n%cD%n%n%B {commit} -1 -- {file_path}"
48    logging.debug(command)
49    pipe = os.popen(command)
50    result = pipe.read()
51    result = result.split("\n")
52    if len(result) <= 1:
53        return None
54
55    logging.debug("Result: %s", result[0])
56
57    return {
58        "hash": result[0],
59        "author_date": datetime.strptime(result[1], "%a, %d %b %Y %H:%M:%S %z"),
60        "commit_date": datetime.strptime(result[2], "%a, %d %b %Y %H:%M:%S %z"),
61        "message": result[4:],
62    }
63
64
65def get_origin_from_trans(origin_path, t_from_head):
66    """Get the latest origin commit from the translation commit"""
67    o_from_t = get_latest_commit_from(origin_path, t_from_head["hash"])
68    while o_from_t is not None and o_from_t["author_date"] > t_from_head["author_date"]:
69        o_from_t = get_latest_commit_from(origin_path, o_from_t["hash"] + "^")
70    if o_from_t is not None:
71        logging.debug("tracked origin commit id: %s", o_from_t["hash"])
72    return o_from_t
73
74
75def get_origin_from_trans_smartly(origin_path, t_from_head):
76    """Get the latest origin commit from the formatted translation commit:
77    (1) update to commit HASH (TITLE)
78    (2) Update the translation through commit HASH (TITLE)
79    """
80    # catch flag for 12-bit commit hash
81    hash_re = r'([0-9a-f]{12})'
82    # pattern 1: contains "update to commit HASH"
83    pat_update_to = re.compile(rf'update to commit {hash_re}')
84    # pattern 2: contains "Update the translation through commit HASH"
85    pat_update_translation = re.compile(rf'Update the translation through commit {hash_re}')
86
87    origin_commit_hash = None
88    for line in t_from_head["message"]:
89        # check if the line matches the first pattern
90        match = pat_update_to.search(line)
91        if match:
92            origin_commit_hash = match.group(1)
93            break
94        # check if the line matches the second pattern
95        match = pat_update_translation.search(line)
96        if match:
97            origin_commit_hash = match.group(1)
98            break
99    if origin_commit_hash is None:
100        return None
101    o_from_t = get_latest_commit_from(origin_path, origin_commit_hash)
102    if o_from_t is not None:
103        logging.debug("tracked origin commit id: %s", o_from_t["hash"])
104    return o_from_t
105
106
107def get_commits_count_between(opath, commit1, commit2):
108    """Get the commits count between two commits for the specified file"""
109    command = f"git log --pretty=format:%H {commit1}...{commit2} -- {opath}"
110    logging.debug(command)
111    pipe = os.popen(command)
112    result = pipe.read().split("\n")
113    # filter out empty lines
114    result = list(filter(lambda x: x != "", result))
115    return result
116
117
118def pretty_output(commit):
119    """Pretty print the commit message"""
120    command = f"git log --pretty='format:%h (\"%s\")' -1 {commit}"
121    logging.debug(command)
122    pipe = os.popen(command)
123    return pipe.read()
124
125
126def valid_commit(commit):
127    """Check if the commit is valid or not"""
128    msg = pretty_output(commit)
129    return "Merge tag" not in msg
130
131def check_per_file(file_path):
132    """Check the translation status for the specified file"""
133    opath = get_origin_path(file_path)
134
135    if not os.path.isfile(opath):
136        logging.error("Cannot find the origin path for %s", file_path)
137        return
138
139    o_from_head = get_latest_commit_from(opath, "HEAD")
140    t_from_head = get_latest_commit_from(file_path, "HEAD")
141
142    if o_from_head is None or t_from_head is None:
143        logging.error("Cannot find the latest commit for %s", file_path)
144        return
145
146    o_from_t = get_origin_from_trans_smartly(opath, t_from_head)
147    # notice, o_from_t from get_*_smartly() is always more accurate than from get_*()
148    if o_from_t is None:
149        o_from_t = get_origin_from_trans(opath, t_from_head)
150
151    if o_from_t is None:
152        logging.error("Error: Cannot find the latest origin commit for %s", file_path)
153        return
154
155    if o_from_head["hash"] == o_from_t["hash"]:
156        logging.debug("No update needed for %s", file_path)
157    else:
158        logging.info(file_path)
159        commits = get_commits_count_between(
160            opath, o_from_t["hash"], o_from_head["hash"]
161        )
162        count = 0
163        for commit in commits:
164            if valid_commit(commit):
165                logging.info("commit %s", pretty_output(commit))
166                count += 1
167        logging.info("%d commits needs resolving in total\n", count)
168
169
170def valid_locales(locale):
171    """Check if the locale is valid or not"""
172    script_path = os.path.dirname(os.path.abspath(__file__))
173    linux_path = os.path.join(script_path, "../..")
174    if not os.path.isdir(f"{linux_path}/Documentation/translations/{locale}"):
175        raise ArgumentTypeError("Invalid locale: {locale}")
176    return locale
177
178
179def list_files_with_excluding_folders(folder, exclude_folders, include_suffix):
180    """List all files with the specified suffix in the folder and its subfolders"""
181    files = []
182    stack = [folder]
183
184    while stack:
185        pwd = stack.pop()
186        # filter out the exclude folders
187        if os.path.basename(pwd) in exclude_folders:
188            continue
189        # list all files and folders
190        for item in os.listdir(pwd):
191            ab_item = os.path.join(pwd, item)
192            if os.path.isdir(ab_item):
193                stack.append(ab_item)
194            else:
195                if ab_item.endswith(include_suffix):
196                    files.append(ab_item)
197
198    return files
199
200
201class DmesgFormatter(logging.Formatter):
202    """Custom dmesg logging formatter"""
203    def format(self, record):
204        timestamp = time.time()
205        formatted_time = f"[{timestamp:>10.6f}]"
206        log_message = f"{formatted_time} {record.getMessage()}"
207        return log_message
208
209
210def config_logging(log_level, log_file="checktransupdate.log"):
211    """configure logging based on the log level"""
212    # set up the root logger
213    logger = logging.getLogger()
214    logger.setLevel(log_level)
215
216    # Create console handler
217    console_handler = logging.StreamHandler()
218    console_handler.setLevel(log_level)
219
220    # Create file handler
221    file_handler = logging.FileHandler(log_file)
222    file_handler.setLevel(log_level)
223
224    # Create formatter and add it to the handlers
225    formatter = DmesgFormatter()
226    console_handler.setFormatter(formatter)
227    file_handler.setFormatter(formatter)
228
229    # Add the handler to the logger
230    logger.addHandler(console_handler)
231    logger.addHandler(file_handler)
232
233
234def main():
235    """Main function of the script"""
236    script_path = os.path.dirname(os.path.abspath(__file__))
237    linux_path = os.path.join(script_path, "../..")
238
239    parser = ArgumentParser(description="Check the translation update")
240    parser.add_argument(
241        "-l",
242        "--locale",
243        default="zh_CN",
244        type=valid_locales,
245        help="Locale to check when files are not specified",
246    )
247
248    parser.add_argument(
249        "--print-missing-translations",
250        action=BooleanOptionalAction,
251        default=True,
252        help="Print files that do not have translations",
253    )
254
255    parser.add_argument(
256        '--log',
257        default='INFO',
258        choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'],
259        help='Set the logging level')
260
261    parser.add_argument(
262        '--logfile',
263        default='checktransupdate.log',
264        help='Set the logging file (default: checktransupdate.log)')
265
266    parser.add_argument(
267        "files", nargs="*", help="Files or directories to check, if not specified, check all files"
268    )
269    args = parser.parse_args()
270
271    # Configure logging based on the --log argument
272    log_level = getattr(logging, args.log.upper(), logging.INFO)
273    config_logging(log_level)
274
275    # Get files related to linux path
276    files = args.files
277    if len(files) == 0:
278        offical_files = list_files_with_excluding_folders(
279            os.path.join(linux_path, "Documentation"), ["translations", "output"], "rst"
280        )
281
282        for file in offical_files:
283            # split the path into parts
284            path_parts = file.split(os.sep)
285            # find the index of the "Documentation" directory
286            kindex = path_parts.index("Documentation")
287            # insert the translations and locale after the Documentation directory
288            new_path_parts = path_parts[:kindex + 1] + ["translations", args.locale] \
289                           + path_parts[kindex + 1 :]
290            # join the path parts back together
291            new_file = os.sep.join(new_path_parts)
292            if os.path.isfile(new_file):
293                files.append(new_file)
294            else:
295                if args.print_missing_translations:
296                    logging.info(os.path.relpath(os.path.abspath(file), linux_path))
297                    logging.info("No translation in the locale of %s\n", args.locale)
298    else:
299        # check if the files are directories or files
300        new_files = []
301        for file in files:
302            if os.path.isfile(file):
303                new_files.append(file)
304            elif os.path.isdir(file):
305                # for directories, list all files in the directory and its subfolders
306                new_files.extend(list_files_with_excluding_folders(file, [], "rst"))
307        files = new_files
308
309    files = list(map(lambda x: os.path.relpath(os.path.abspath(x), linux_path), files))
310
311    # cd to linux root directory
312    os.chdir(linux_path)
313
314    for file in files:
315        check_per_file(file)
316
317
318if __name__ == "__main__":
319    main()
320