xref: /linux/scripts/checktransupdate.py (revision 566ab427f827b0256d3e8ce0235d088e6a9c28bd)
1#!/usr/bin/env python3
2# SPDX-License-Identifier: GPL-2.0
3
4"""
5This script helps track the translation status of the documentation
6in different locales, e.g., zh_CN. More specially, it uses `git log`
7commit to find the latest english commit from the translation commit
8(order by author date) and the latest english commits from HEAD. If
9differences occur, report the file and commits that need to be updated.
10
11The usage is as follows:
12- ./scripts/checktransupdate.py -l zh_CN
13This will print all the files that need to be updated or translated in the zh_CN locale.
14- ./scripts/checktransupdate.py Documentation/translations/zh_CN/dev-tools/testing-overview.rst
15This will only print the status of the specified file.
16
17The output is something like:
18Documentation/dev-tools/kfence.rst
19No translation in the locale of zh_CN
20
21Documentation/translations/zh_CN/dev-tools/testing-overview.rst
22commit 42fb9cfd5b18 ("Documentation: dev-tools: Add link to RV docs")
231 commits needs resolving in total
24"""
25
26import os
27import time
28import logging
29from argparse import ArgumentParser, ArgumentTypeError, BooleanOptionalAction
30from datetime import datetime
31
32
33def get_origin_path(file_path):
34    """Get the origin path from the translation path"""
35    paths = file_path.split("/")
36    tidx = paths.index("translations")
37    opaths = paths[:tidx]
38    opaths += paths[tidx + 2 :]
39    return "/".join(opaths)
40
41
42def get_latest_commit_from(file_path, commit):
43    """Get the latest commit from the specified commit for the specified file"""
44    command = f"git log --pretty=format:%H%n%aD%n%cD%n%n%B {commit} -1 -- {file_path}"
45    logging.debug(command)
46    pipe = os.popen(command)
47    result = pipe.read()
48    result = result.split("\n")
49    if len(result) <= 1:
50        return None
51
52    logging.debug("Result: %s", result[0])
53
54    return {
55        "hash": result[0],
56        "author_date": datetime.strptime(result[1], "%a, %d %b %Y %H:%M:%S %z"),
57        "commit_date": datetime.strptime(result[2], "%a, %d %b %Y %H:%M:%S %z"),
58        "message": result[4:],
59    }
60
61
62def get_origin_from_trans(origin_path, t_from_head):
63    """Get the latest origin commit from the translation commit"""
64    o_from_t = get_latest_commit_from(origin_path, t_from_head["hash"])
65    while o_from_t is not None and o_from_t["author_date"] > t_from_head["author_date"]:
66        o_from_t = get_latest_commit_from(origin_path, o_from_t["hash"] + "^")
67    if o_from_t is not None:
68        logging.debug("tracked origin commit id: %s", o_from_t["hash"])
69    return o_from_t
70
71
72def get_commits_count_between(opath, commit1, commit2):
73    """Get the commits count between two commits for the specified file"""
74    command = f"git log --pretty=format:%H {commit1}...{commit2} -- {opath}"
75    logging.debug(command)
76    pipe = os.popen(command)
77    result = pipe.read().split("\n")
78    # filter out empty lines
79    result = list(filter(lambda x: x != "", result))
80    return result
81
82
83def pretty_output(commit):
84    """Pretty print the commit message"""
85    command = f"git log --pretty='format:%h (\"%s\")' -1 {commit}"
86    logging.debug(command)
87    pipe = os.popen(command)
88    return pipe.read()
89
90
91def valid_commit(commit):
92    """Check if the commit is valid or not"""
93    msg = pretty_output(commit)
94    return "Merge tag" not in msg
95
96def check_per_file(file_path):
97    """Check the translation status for the specified file"""
98    opath = get_origin_path(file_path)
99
100    if not os.path.isfile(opath):
101        logging.error("Cannot find the origin path for {file_path}")
102        return
103
104    o_from_head = get_latest_commit_from(opath, "HEAD")
105    t_from_head = get_latest_commit_from(file_path, "HEAD")
106
107    if o_from_head is None or t_from_head is None:
108        logging.error("Cannot find the latest commit for %s", file_path)
109        return
110
111    o_from_t = get_origin_from_trans(opath, t_from_head)
112
113    if o_from_t is None:
114        logging.error("Error: Cannot find the latest origin commit for %s", file_path)
115        return
116
117    if o_from_head["hash"] == o_from_t["hash"]:
118        logging.debug("No update needed for %s", file_path)
119    else:
120        logging.info(file_path)
121        commits = get_commits_count_between(
122            opath, o_from_t["hash"], o_from_head["hash"]
123        )
124        count = 0
125        for commit in commits:
126            if valid_commit(commit):
127                logging.info("commit %s", pretty_output(commit))
128                count += 1
129        logging.info("%d commits needs resolving in total\n", count)
130
131
132def valid_locales(locale):
133    """Check if the locale is valid or not"""
134    script_path = os.path.dirname(os.path.abspath(__file__))
135    linux_path = os.path.join(script_path, "..")
136    if not os.path.isdir(f"{linux_path}/Documentation/translations/{locale}"):
137        raise ArgumentTypeError("Invalid locale: {locale}")
138    return locale
139
140
141def list_files_with_excluding_folders(folder, exclude_folders, include_suffix):
142    """List all files with the specified suffix in the folder and its subfolders"""
143    files = []
144    stack = [folder]
145
146    while stack:
147        pwd = stack.pop()
148        # filter out the exclude folders
149        if os.path.basename(pwd) in exclude_folders:
150            continue
151        # list all files and folders
152        for item in os.listdir(pwd):
153            ab_item = os.path.join(pwd, item)
154            if os.path.isdir(ab_item):
155                stack.append(ab_item)
156            else:
157                if ab_item.endswith(include_suffix):
158                    files.append(ab_item)
159
160    return files
161
162
163class DmesgFormatter(logging.Formatter):
164    """Custom dmesg logging formatter"""
165    def format(self, record):
166        timestamp = time.time()
167        formatted_time = f"[{timestamp:>10.6f}]"
168        log_message = f"{formatted_time} {record.getMessage()}"
169        return log_message
170
171
172def config_logging(log_level, log_file="checktransupdate.log"):
173    """configure logging based on the log level"""
174    # set up the root logger
175    logger = logging.getLogger()
176    logger.setLevel(log_level)
177
178    # Create console handler
179    console_handler = logging.StreamHandler()
180    console_handler.setLevel(log_level)
181
182    # Create file handler
183    file_handler = logging.FileHandler(log_file)
184    file_handler.setLevel(log_level)
185
186    # Create formatter and add it to the handlers
187    formatter = DmesgFormatter()
188    console_handler.setFormatter(formatter)
189    file_handler.setFormatter(formatter)
190
191    # Add the handler to the logger
192    logger.addHandler(console_handler)
193    logger.addHandler(file_handler)
194
195
196def main():
197    """Main function of the script"""
198    script_path = os.path.dirname(os.path.abspath(__file__))
199    linux_path = os.path.join(script_path, "..")
200
201    parser = ArgumentParser(description="Check the translation update")
202    parser.add_argument(
203        "-l",
204        "--locale",
205        default="zh_CN",
206        type=valid_locales,
207        help="Locale to check when files are not specified",
208    )
209
210    parser.add_argument(
211        "--print-missing-translations",
212        action=BooleanOptionalAction,
213        default=True,
214        help="Print files that do not have translations",
215    )
216
217    parser.add_argument(
218        '--log',
219        default='INFO',
220        choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'],
221        help='Set the logging level')
222
223    parser.add_argument(
224        '--logfile',
225        default='checktransupdate.log',
226        help='Set the logging file (default: checktransupdate.log)')
227
228    parser.add_argument(
229        "files", nargs="*", help="Files to check, if not specified, check all files"
230    )
231    args = parser.parse_args()
232
233    # Configure logging based on the --log argument
234    log_level = getattr(logging, args.log.upper(), logging.INFO)
235    config_logging(log_level)
236
237    # Get files related to linux path
238    files = args.files
239    if len(files) == 0:
240        offical_files = list_files_with_excluding_folders(
241            os.path.join(linux_path, "Documentation"), ["translations", "output"], "rst"
242        )
243
244        for file in offical_files:
245            # split the path into parts
246            path_parts = file.split(os.sep)
247            # find the index of the "Documentation" directory
248            kindex = path_parts.index("Documentation")
249            # insert the translations and locale after the Documentation directory
250            new_path_parts = path_parts[:kindex + 1] + ["translations", args.locale] \
251                           + path_parts[kindex + 1 :]
252            # join the path parts back together
253            new_file = os.sep.join(new_path_parts)
254            if os.path.isfile(new_file):
255                files.append(new_file)
256            else:
257                if args.print_missing_translations:
258                    logging.info(os.path.relpath(os.path.abspath(file), linux_path))
259                    logging.info("No translation in the locale of %s\n", args.locale)
260
261    files = list(map(lambda x: os.path.relpath(os.path.abspath(x), linux_path), files))
262
263    # cd to linux root directory
264    os.chdir(linux_path)
265
266    for file in files:
267        check_per_file(file)
268
269
270if __name__ == "__main__":
271    main()
272