xref: /linux/scripts/get_maintainer.pl (revision 79997eda0d31bc68203c95ecb978773ee6ce7a1f)
1#!/usr/bin/env perl
2# SPDX-License-Identifier: GPL-2.0
3#
4# (c) 2007, Joe Perches <joe@perches.com>
5#           created from checkpatch.pl
6#
7# Print selected MAINTAINERS information for
8# the files modified in a patch or for a file
9#
10# usage: perl scripts/get_maintainer.pl [OPTIONS] <patch>
11#        perl scripts/get_maintainer.pl [OPTIONS] -f <file>
12
13use warnings;
14use strict;
15
16my $P = $0;
17my $V = '0.26';
18
19use Getopt::Long qw(:config no_auto_abbrev);
20use Cwd;
21use File::Find;
22use File::Spec::Functions;
23
24my $cur_path = fastgetcwd() . '/';
25my $lk_path = "./";
26my $email = 1;
27my $email_usename = 1;
28my $email_maintainer = 1;
29my $email_reviewer = 1;
30my $email_fixes = 1;
31my $email_list = 1;
32my $email_moderated_list = 1;
33my $email_subscriber_list = 0;
34my $email_git_penguin_chiefs = 0;
35my $email_git = 0;
36my $email_git_all_signature_types = 0;
37my $email_git_blame = 0;
38my $email_git_blame_signatures = 1;
39my $email_git_fallback = 1;
40my $email_git_min_signatures = 1;
41my $email_git_max_maintainers = 5;
42my $email_git_min_percent = 5;
43my $email_git_since = "1-year-ago";
44my $email_hg_since = "-365";
45my $interactive = 0;
46my $email_remove_duplicates = 1;
47my $email_use_mailmap = 1;
48my $output_multiline = 1;
49my $output_separator = ", ";
50my $output_roles = 0;
51my $output_rolestats = 1;
52my $output_section_maxlen = 50;
53my $scm = 0;
54my $tree = 1;
55my $web = 0;
56my $subsystem = 0;
57my $status = 0;
58my $letters = "";
59my $keywords = 1;
60my $keywords_in_file = 0;
61my $sections = 0;
62my $email_file_emails = 0;
63my $from_filename = 0;
64my $pattern_depth = 0;
65my $self_test = undef;
66my $version = 0;
67my $help = 0;
68my $find_maintainer_files = 0;
69my $maintainer_path;
70my $vcs_used = 0;
71
72my $exit = 0;
73
74my @files = ();
75my @fixes = ();			# If a patch description includes Fixes: lines
76my @range = ();
77my @keyword_tvi = ();
78my @file_emails = ();
79
80my %commit_author_hash;
81my %commit_signer_hash;
82
83my @penguin_chief = ();
84push(@penguin_chief, "Linus Torvalds:torvalds\@linux-foundation.org");
85#Andrew wants in on most everything - 2009/01/14
86#push(@penguin_chief, "Andrew Morton:akpm\@linux-foundation.org");
87
88my @penguin_chief_names = ();
89foreach my $chief (@penguin_chief) {
90    if ($chief =~ m/^(.*):(.*)/) {
91	my $chief_name = $1;
92	my $chief_addr = $2;
93	push(@penguin_chief_names, $chief_name);
94    }
95}
96my $penguin_chiefs = "\(" . join("|", @penguin_chief_names) . "\)";
97
98# Signature types of people who are either
99# 	a) responsible for the code in question, or
100# 	b) familiar enough with it to give relevant feedback
101my @signature_tags = ();
102push(@signature_tags, "Signed-off-by:");
103push(@signature_tags, "Reviewed-by:");
104push(@signature_tags, "Acked-by:");
105
106my $signature_pattern = "\(" . join("|", @signature_tags) . "\)";
107
108# rfc822 email address - preloaded methods go here.
109my $rfc822_lwsp = "(?:(?:\\r\\n)?[ \\t])";
110my $rfc822_char = '[\\000-\\377]';
111
112# VCS command support: class-like functions and strings
113
114my %VCS_cmds;
115
116my %VCS_cmds_git = (
117    "execute_cmd" => \&git_execute_cmd,
118    "available" => '(which("git") ne "") && (-e ".git")',
119    "find_signers_cmd" =>
120	"git log --no-color --follow --since=\$email_git_since " .
121	    '--numstat --no-merges ' .
122	    '--format="GitCommit: %H%n' .
123		      'GitAuthor: %an <%ae>%n' .
124		      'GitDate: %aD%n' .
125		      'GitSubject: %s%n' .
126		      '%b%n"' .
127	    " -- \$file",
128    "find_commit_signers_cmd" =>
129	"git log --no-color " .
130	    '--numstat ' .
131	    '--format="GitCommit: %H%n' .
132		      'GitAuthor: %an <%ae>%n' .
133		      'GitDate: %aD%n' .
134		      'GitSubject: %s%n' .
135		      '%b%n"' .
136	    " -1 \$commit",
137    "find_commit_author_cmd" =>
138	"git log --no-color " .
139	    '--numstat ' .
140	    '--format="GitCommit: %H%n' .
141		      'GitAuthor: %an <%ae>%n' .
142		      'GitDate: %aD%n' .
143		      'GitSubject: %s%n"' .
144	    " -1 \$commit",
145    "blame_range_cmd" => "git blame -l -L \$diff_start,+\$diff_length \$file",
146    "blame_file_cmd" => "git blame -l \$file",
147    "commit_pattern" => "^GitCommit: ([0-9a-f]{40,40})",
148    "blame_commit_pattern" => "^([0-9a-f]+) ",
149    "author_pattern" => "^GitAuthor: (.*)",
150    "subject_pattern" => "^GitSubject: (.*)",
151    "stat_pattern" => "^(\\d+)\\t(\\d+)\\t\$file\$",
152    "file_exists_cmd" => "git ls-files \$file",
153    "list_files_cmd" => "git ls-files \$file",
154);
155
156my %VCS_cmds_hg = (
157    "execute_cmd" => \&hg_execute_cmd,
158    "available" => '(which("hg") ne "") && (-d ".hg")',
159    "find_signers_cmd" =>
160	"hg log --date=\$email_hg_since " .
161	    "--template='HgCommit: {node}\\n" .
162	                "HgAuthor: {author}\\n" .
163			"HgSubject: {desc}\\n'" .
164	    " -- \$file",
165    "find_commit_signers_cmd" =>
166	"hg log " .
167	    "--template='HgSubject: {desc}\\n'" .
168	    " -r \$commit",
169    "find_commit_author_cmd" =>
170	"hg log " .
171	    "--template='HgCommit: {node}\\n" .
172		        "HgAuthor: {author}\\n" .
173			"HgSubject: {desc|firstline}\\n'" .
174	    " -r \$commit",
175    "blame_range_cmd" => "",		# not supported
176    "blame_file_cmd" => "hg blame -n \$file",
177    "commit_pattern" => "^HgCommit: ([0-9a-f]{40,40})",
178    "blame_commit_pattern" => "^([ 0-9a-f]+):",
179    "author_pattern" => "^HgAuthor: (.*)",
180    "subject_pattern" => "^HgSubject: (.*)",
181    "stat_pattern" => "^(\\d+)\t(\\d+)\t\$file\$",
182    "file_exists_cmd" => "hg files \$file",
183    "list_files_cmd" => "hg manifest -R \$file",
184);
185
186my $conf = which_conf(".get_maintainer.conf");
187if (-f $conf) {
188    my @conf_args;
189    open(my $conffile, '<', "$conf")
190	or warn "$P: Can't find a readable .get_maintainer.conf file $!\n";
191
192    while (<$conffile>) {
193	my $line = $_;
194
195	$line =~ s/\s*\n?$//g;
196	$line =~ s/^\s*//g;
197	$line =~ s/\s+/ /g;
198
199	next if ($line =~ m/^\s*#/);
200	next if ($line =~ m/^\s*$/);
201
202	my @words = split(" ", $line);
203	foreach my $word (@words) {
204	    last if ($word =~ m/^#/);
205	    push (@conf_args, $word);
206	}
207    }
208    close($conffile);
209    unshift(@ARGV, @conf_args) if @conf_args;
210}
211
212my @ignore_emails = ();
213my $ignore_file = which_conf(".get_maintainer.ignore");
214if (-f $ignore_file) {
215    open(my $ignore, '<', "$ignore_file")
216	or warn "$P: Can't find a readable .get_maintainer.ignore file $!\n";
217    while (<$ignore>) {
218	my $line = $_;
219
220	$line =~ s/\s*\n?$//;
221	$line =~ s/^\s*//;
222	$line =~ s/\s+$//;
223	$line =~ s/#.*$//;
224
225	next if ($line =~ m/^\s*$/);
226	if (rfc822_valid($line)) {
227	    push(@ignore_emails, $line);
228	}
229    }
230    close($ignore);
231}
232
233if ($#ARGV > 0) {
234    foreach (@ARGV) {
235        if ($_ =~ /^-{1,2}self-test(?:=|$)/) {
236            die "$P: using --self-test does not allow any other option or argument\n";
237        }
238    }
239}
240
241if (!GetOptions(
242		'email!' => \$email,
243		'git!' => \$email_git,
244		'git-all-signature-types!' => \$email_git_all_signature_types,
245		'git-blame!' => \$email_git_blame,
246		'git-blame-signatures!' => \$email_git_blame_signatures,
247		'git-fallback!' => \$email_git_fallback,
248		'git-chief-penguins!' => \$email_git_penguin_chiefs,
249		'git-min-signatures=i' => \$email_git_min_signatures,
250		'git-max-maintainers=i' => \$email_git_max_maintainers,
251		'git-min-percent=i' => \$email_git_min_percent,
252		'git-since=s' => \$email_git_since,
253		'hg-since=s' => \$email_hg_since,
254		'i|interactive!' => \$interactive,
255		'remove-duplicates!' => \$email_remove_duplicates,
256		'mailmap!' => \$email_use_mailmap,
257		'm!' => \$email_maintainer,
258		'r!' => \$email_reviewer,
259		'n!' => \$email_usename,
260		'l!' => \$email_list,
261		'fixes!' => \$email_fixes,
262		'moderated!' => \$email_moderated_list,
263		's!' => \$email_subscriber_list,
264		'multiline!' => \$output_multiline,
265		'roles!' => \$output_roles,
266		'rolestats!' => \$output_rolestats,
267		'separator=s' => \$output_separator,
268		'subsystem!' => \$subsystem,
269		'status!' => \$status,
270		'scm!' => \$scm,
271		'tree!' => \$tree,
272		'web!' => \$web,
273		'letters=s' => \$letters,
274		'pattern-depth=i' => \$pattern_depth,
275		'k|keywords!' => \$keywords,
276		'kf|keywords-in-file!' => \$keywords_in_file,
277		'sections!' => \$sections,
278		'fe|file-emails!' => \$email_file_emails,
279		'f|file' => \$from_filename,
280		'find-maintainer-files' => \$find_maintainer_files,
281		'mpath|maintainer-path=s' => \$maintainer_path,
282		'self-test:s' => \$self_test,
283		'v|version' => \$version,
284		'h|help|usage' => \$help,
285		)) {
286    die "$P: invalid argument - use --help if necessary\n";
287}
288
289if ($help != 0) {
290    usage();
291    exit 0;
292}
293
294if ($version != 0) {
295    print("${P} ${V}\n");
296    exit 0;
297}
298
299if (defined $self_test) {
300    read_all_maintainer_files();
301    self_test();
302    exit 0;
303}
304
305if (-t STDIN && !@ARGV) {
306    # We're talking to a terminal, but have no command line arguments.
307    die "$P: missing patchfile or -f file - use --help if necessary\n";
308}
309
310$output_multiline = 0 if ($output_separator ne ", ");
311$output_rolestats = 1 if ($interactive);
312$output_roles = 1 if ($output_rolestats);
313
314if ($sections || $letters ne "") {
315    $sections = 1;
316    $email = 0;
317    $email_list = 0;
318    $scm = 0;
319    $status = 0;
320    $subsystem = 0;
321    $web = 0;
322    $keywords = 0;
323    $keywords_in_file = 0;
324    $interactive = 0;
325} else {
326    my $selections = $email + $scm + $status + $subsystem + $web;
327    if ($selections == 0) {
328	die "$P:  Missing required option: email, scm, status, subsystem or web\n";
329    }
330}
331
332if ($email &&
333    ($email_maintainer + $email_reviewer +
334     $email_list + $email_subscriber_list +
335     $email_git + $email_git_penguin_chiefs + $email_git_blame) == 0) {
336    die "$P: Please select at least 1 email option\n";
337}
338
339if ($tree && !top_of_kernel_tree($lk_path)) {
340    die "$P: The current directory does not appear to be "
341	. "a linux kernel source tree.\n";
342}
343
344## Read MAINTAINERS for type/value pairs
345
346my @typevalue = ();
347my %keyword_hash;
348my @mfiles = ();
349my @self_test_info = ();
350
351sub read_maintainer_file {
352    my ($file) = @_;
353
354    open (my $maint, '<', "$file")
355	or die "$P: Can't open MAINTAINERS file '$file': $!\n";
356    my $i = 1;
357    while (<$maint>) {
358	my $line = $_;
359	chomp $line;
360
361	if ($line =~ m/^([A-Z]):\s*(.*)/) {
362	    my $type = $1;
363	    my $value = $2;
364
365	    ##Filename pattern matching
366	    if ($type eq "F" || $type eq "X") {
367		$value =~ s@\.@\\\.@g;       ##Convert . to \.
368		$value =~ s/\*/\.\*/g;       ##Convert * to .*
369		$value =~ s/\?/\./g;         ##Convert ? to .
370		##if pattern is a directory and it lacks a trailing slash, add one
371		if ((-d $value)) {
372		    $value =~ s@([^/])$@$1/@;
373		}
374	    } elsif ($type eq "K") {
375		$keyword_hash{@typevalue} = $value;
376	    }
377	    push(@typevalue, "$type:$value");
378	} elsif (!(/^\s*$/ || /^\s*\#/)) {
379	    push(@typevalue, $line);
380	}
381	if (defined $self_test) {
382	    push(@self_test_info, {file=>$file, linenr=>$i, line=>$line});
383	}
384	$i++;
385    }
386    close($maint);
387}
388
389sub find_is_maintainer_file {
390    my ($file) = $_;
391    return if ($file !~ m@/MAINTAINERS$@);
392    $file = $File::Find::name;
393    return if (! -f $file);
394    push(@mfiles, $file);
395}
396
397sub find_ignore_git {
398    return grep { $_ !~ /^\.git$/; } @_;
399}
400
401read_all_maintainer_files();
402
403sub read_all_maintainer_files {
404    my $path = "${lk_path}MAINTAINERS";
405    if (defined $maintainer_path) {
406	$path = $maintainer_path;
407	# Perl Cookbook tilde expansion if necessary
408	$path =~ s@^~([^/]*)@ $1 ? (getpwnam($1))[7] : ( $ENV{HOME} || $ENV{LOGDIR} || (getpwuid($<))[7])@ex;
409    }
410
411    if (-d $path) {
412	$path .= '/' if ($path !~ m@/$@);
413	if ($find_maintainer_files) {
414	    find( { wanted => \&find_is_maintainer_file,
415		    preprocess => \&find_ignore_git,
416		    no_chdir => 1,
417		}, "$path");
418	} else {
419	    opendir(DIR, "$path") or die $!;
420	    my @files = readdir(DIR);
421	    closedir(DIR);
422	    foreach my $file (@files) {
423		push(@mfiles, "$path$file") if ($file !~ /^\./);
424	    }
425	}
426    } elsif (-f "$path") {
427	push(@mfiles, "$path");
428    } else {
429	die "$P: MAINTAINER file not found '$path'\n";
430    }
431    die "$P: No MAINTAINER files found in '$path'\n" if (scalar(@mfiles) == 0);
432    foreach my $file (@mfiles) {
433	read_maintainer_file("$file");
434    }
435}
436
437sub maintainers_in_file {
438    my ($file) = @_;
439
440    return if ($file =~ m@\bMAINTAINERS$@);
441
442    if (-f $file && ($email_file_emails || $file =~ /\.yaml$/)) {
443	open(my $f, '<', $file)
444	    or die "$P: Can't open $file: $!\n";
445	my $text = do { local($/) ; <$f> };
446	close($f);
447
448	my @poss_addr = $text =~ m$[A-Za-zÀ-ÿ\"\' \,\.\+-]*\s*[\,]*\s*[\(\<\{]{0,1}[A-Za-z0-9_\.\+-]+\@[A-Za-z0-9\.-]+\.[A-Za-z0-9]+[\)\>\}]{0,1}$g;
449	push(@file_emails, clean_file_emails(@poss_addr));
450    }
451}
452
453#
454# Read mail address map
455#
456
457my $mailmap;
458
459read_mailmap();
460
461sub read_mailmap {
462    $mailmap = {
463	names => {},
464	addresses => {}
465    };
466
467    return if (!$email_use_mailmap || !(-f "${lk_path}.mailmap"));
468
469    open(my $mailmap_file, '<', "${lk_path}.mailmap")
470	or warn "$P: Can't open .mailmap: $!\n";
471
472    while (<$mailmap_file>) {
473	s/#.*$//; #strip comments
474	s/^\s+|\s+$//g; #trim
475
476	next if (/^\s*$/); #skip empty lines
477	#entries have one of the following formats:
478	# name1 <mail1>
479	# <mail1> <mail2>
480	# name1 <mail1> <mail2>
481	# name1 <mail1> name2 <mail2>
482	# (see man git-shortlog)
483
484	if (/^([^<]+)<([^>]+)>$/) {
485	    my $real_name = $1;
486	    my $address = $2;
487
488	    $real_name =~ s/\s+$//;
489	    ($real_name, $address) = parse_email("$real_name <$address>");
490	    $mailmap->{names}->{$address} = $real_name;
491
492	} elsif (/^<([^>]+)>\s*<([^>]+)>$/) {
493	    my $real_address = $1;
494	    my $wrong_address = $2;
495
496	    $mailmap->{addresses}->{$wrong_address} = $real_address;
497
498	} elsif (/^(.+)<([^>]+)>\s*<([^>]+)>$/) {
499	    my $real_name = $1;
500	    my $real_address = $2;
501	    my $wrong_address = $3;
502
503	    $real_name =~ s/\s+$//;
504	    ($real_name, $real_address) =
505		parse_email("$real_name <$real_address>");
506	    $mailmap->{names}->{$wrong_address} = $real_name;
507	    $mailmap->{addresses}->{$wrong_address} = $real_address;
508
509	} elsif (/^(.+)<([^>]+)>\s*(.+)\s*<([^>]+)>$/) {
510	    my $real_name = $1;
511	    my $real_address = $2;
512	    my $wrong_name = $3;
513	    my $wrong_address = $4;
514
515	    $real_name =~ s/\s+$//;
516	    ($real_name, $real_address) =
517		parse_email("$real_name <$real_address>");
518
519	    $wrong_name =~ s/\s+$//;
520	    ($wrong_name, $wrong_address) =
521		parse_email("$wrong_name <$wrong_address>");
522
523	    my $wrong_email = format_email($wrong_name, $wrong_address, 1);
524	    $mailmap->{names}->{$wrong_email} = $real_name;
525	    $mailmap->{addresses}->{$wrong_email} = $real_address;
526	}
527    }
528    close($mailmap_file);
529}
530
531## use the filenames on the command line or find the filenames in the patchfiles
532
533if (!@ARGV) {
534    push(@ARGV, "&STDIN");
535}
536
537foreach my $file (@ARGV) {
538    if ($file ne "&STDIN") {
539	$file = canonpath($file);
540	##if $file is a directory and it lacks a trailing slash, add one
541	if ((-d $file)) {
542	    $file =~ s@([^/])$@$1/@;
543	} elsif (!(-f $file)) {
544	    die "$P: file '${file}' not found\n";
545	}
546    }
547    if ($from_filename && (vcs_exists() && !vcs_file_exists($file))) {
548	warn "$P: file '$file' not found in version control $!\n";
549    }
550    if ($from_filename || ($file ne "&STDIN" && vcs_file_exists($file))) {
551	$file =~ s/^\Q${cur_path}\E//;	#strip any absolute path
552	$file =~ s/^\Q${lk_path}\E//;	#or the path to the lk tree
553	push(@files, $file);
554	if ($file ne "MAINTAINERS" && -f $file && $keywords && $keywords_in_file) {
555	    open(my $f, '<', $file)
556		or die "$P: Can't open $file: $!\n";
557	    my $text = do { local($/) ; <$f> };
558	    close($f);
559	    foreach my $line (keys %keyword_hash) {
560		if ($text =~ m/$keyword_hash{$line}/x) {
561		    push(@keyword_tvi, $line);
562		}
563	    }
564	}
565    } else {
566	my $file_cnt = @files;
567	my $lastfile;
568
569	open(my $patch, "< $file")
570	    or die "$P: Can't open $file: $!\n";
571
572	# We can check arbitrary information before the patch
573	# like the commit message, mail headers, etc...
574	# This allows us to match arbitrary keywords against any part
575	# of a git format-patch generated file (subject tags, etc...)
576
577	my $patch_prefix = "";			#Parsing the intro
578
579	while (<$patch>) {
580	    my $patch_line = $_;
581	    if (m/^ mode change [0-7]+ => [0-7]+ (\S+)\s*$/) {
582		my $filename = $1;
583		push(@files, $filename);
584	    } elsif (m/^rename (?:from|to) (\S+)\s*$/) {
585		my $filename = $1;
586		push(@files, $filename);
587	    } elsif (m/^diff --git a\/(\S+) b\/(\S+)\s*$/) {
588		my $filename1 = $1;
589		my $filename2 = $2;
590		push(@files, $filename1);
591		push(@files, $filename2);
592	    } elsif (m/^Fixes:\s+([0-9a-fA-F]{6,40})/) {
593		push(@fixes, $1) if ($email_fixes);
594	    } elsif (m/^\+\+\+\s+(\S+)/ or m/^---\s+(\S+)/) {
595		my $filename = $1;
596		$filename =~ s@^[^/]*/@@;
597		$filename =~ s@\n@@;
598		$lastfile = $filename;
599		push(@files, $filename);
600		$patch_prefix = "^[+-].*";	#Now parsing the actual patch
601	    } elsif (m/^\@\@ -(\d+),(\d+)/) {
602		if ($email_git_blame) {
603		    push(@range, "$lastfile:$1:$2");
604		}
605	    } elsif ($keywords) {
606		foreach my $line (keys %keyword_hash) {
607		    if ($patch_line =~ m/${patch_prefix}$keyword_hash{$line}/x) {
608			push(@keyword_tvi, $line);
609		    }
610		}
611	    }
612	}
613	close($patch);
614
615	if ($file_cnt == @files) {
616	    warn "$P: file '${file}' doesn't appear to be a patch.  "
617		. "Add -f to options?\n";
618	}
619	@files = sort_and_uniq(@files);
620    }
621}
622
623@file_emails = uniq(@file_emails);
624@fixes = uniq(@fixes);
625
626my %email_hash_name;
627my %email_hash_address;
628my @email_to = ();
629my %hash_list_to;
630my @list_to = ();
631my @scm = ();
632my @web = ();
633my @subsystem = ();
634my @status = ();
635my %deduplicate_name_hash = ();
636my %deduplicate_address_hash = ();
637
638my @maintainers = get_maintainers();
639if (@maintainers) {
640    @maintainers = merge_email(@maintainers);
641    output(@maintainers);
642}
643
644if ($scm) {
645    @scm = uniq(@scm);
646    output(@scm);
647}
648
649if ($status) {
650    @status = uniq(@status);
651    output(@status);
652}
653
654if ($subsystem) {
655    @subsystem = uniq(@subsystem);
656    output(@subsystem);
657}
658
659if ($web) {
660    @web = uniq(@web);
661    output(@web);
662}
663
664exit($exit);
665
666sub self_test {
667    my @lsfiles = ();
668    my @good_links = ();
669    my @bad_links = ();
670    my @section_headers = ();
671    my $index = 0;
672
673    @lsfiles = vcs_list_files($lk_path);
674
675    for my $x (@self_test_info) {
676	$index++;
677
678	## Section header duplication and missing section content
679	if (($self_test eq "" || $self_test =~ /\bsections\b/) &&
680	    $x->{line} =~ /^\S[^:]/ &&
681	    defined $self_test_info[$index] &&
682	    $self_test_info[$index]->{line} =~ /^([A-Z]):\s*\S/) {
683	    my $has_S = 0;
684	    my $has_F = 0;
685	    my $has_ML = 0;
686	    my $status = "";
687	    if (grep(m@^\Q$x->{line}\E@, @section_headers)) {
688		print("$x->{file}:$x->{linenr}: warning: duplicate section header\t$x->{line}\n");
689	    } else {
690		push(@section_headers, $x->{line});
691	    }
692	    my $nextline = $index;
693	    while (defined $self_test_info[$nextline] &&
694		   $self_test_info[$nextline]->{line} =~ /^([A-Z]):\s*(\S.*)/) {
695		my $type = $1;
696		my $value = $2;
697		if ($type eq "S") {
698		    $has_S = 1;
699		    $status = $value;
700		} elsif ($type eq "F" || $type eq "N") {
701		    $has_F = 1;
702		} elsif ($type eq "M" || $type eq "R" || $type eq "L") {
703		    $has_ML = 1;
704		}
705		$nextline++;
706	    }
707	    if (!$has_ML && $status !~ /orphan|obsolete/i) {
708		print("$x->{file}:$x->{linenr}: warning: section without email address\t$x->{line}\n");
709	    }
710	    if (!$has_S) {
711		print("$x->{file}:$x->{linenr}: warning: section without status \t$x->{line}\n");
712	    }
713	    if (!$has_F) {
714		print("$x->{file}:$x->{linenr}: warning: section without file pattern\t$x->{line}\n");
715	    }
716	}
717
718	next if ($x->{line} !~ /^([A-Z]):\s*(.*)/);
719
720	my $type = $1;
721	my $value = $2;
722
723	## Filename pattern matching
724	if (($type eq "F" || $type eq "X") &&
725	    ($self_test eq "" || $self_test =~ /\bpatterns\b/)) {
726	    $value =~ s@\.@\\\.@g;       ##Convert . to \.
727	    $value =~ s/\*/\.\*/g;       ##Convert * to .*
728	    $value =~ s/\?/\./g;         ##Convert ? to .
729	    ##if pattern is a directory and it lacks a trailing slash, add one
730	    if ((-d $value)) {
731		$value =~ s@([^/])$@$1/@;
732	    }
733	    if (!grep(m@^$value@, @lsfiles)) {
734		print("$x->{file}:$x->{linenr}: warning: no file matches\t$x->{line}\n");
735	    }
736
737	## Link reachability
738	} elsif (($type eq "W" || $type eq "Q" || $type eq "B") &&
739		 $value =~ /^https?:/ &&
740		 ($self_test eq "" || $self_test =~ /\blinks\b/)) {
741	    next if (grep(m@^\Q$value\E$@, @good_links));
742	    my $isbad = 0;
743	    if (grep(m@^\Q$value\E$@, @bad_links)) {
744	        $isbad = 1;
745	    } else {
746		my $output = `wget --spider -q --no-check-certificate --timeout 10 --tries 1 $value`;
747		if ($? == 0) {
748		    push(@good_links, $value);
749		} else {
750		    push(@bad_links, $value);
751		    $isbad = 1;
752		}
753	    }
754	    if ($isbad) {
755	        print("$x->{file}:$x->{linenr}: warning: possible bad link\t$x->{line}\n");
756	    }
757
758	## SCM reachability
759	} elsif ($type eq "T" &&
760		 ($self_test eq "" || $self_test =~ /\bscm\b/)) {
761	    next if (grep(m@^\Q$value\E$@, @good_links));
762	    my $isbad = 0;
763	    if (grep(m@^\Q$value\E$@, @bad_links)) {
764	        $isbad = 1;
765            } elsif ($value !~ /^(?:git|quilt|hg)\s+\S/) {
766		print("$x->{file}:$x->{linenr}: warning: malformed entry\t$x->{line}\n");
767	    } elsif ($value =~ /^git\s+(\S+)(\s+([^\(]+\S+))?/) {
768		my $url = $1;
769		my $branch = "";
770		$branch = $3 if $3;
771		my $output = `git ls-remote --exit-code -h "$url" $branch > /dev/null 2>&1`;
772		if ($? == 0) {
773		    push(@good_links, $value);
774		} else {
775		    push(@bad_links, $value);
776		    $isbad = 1;
777		}
778	    } elsif ($value =~ /^(?:quilt|hg)\s+(https?:\S+)/) {
779		my $url = $1;
780		my $output = `wget --spider -q --no-check-certificate --timeout 10 --tries 1 $url`;
781		if ($? == 0) {
782		    push(@good_links, $value);
783		} else {
784		    push(@bad_links, $value);
785		    $isbad = 1;
786		}
787	    }
788	    if ($isbad) {
789		print("$x->{file}:$x->{linenr}: warning: possible bad link\t$x->{line}\n");
790	    }
791	}
792    }
793}
794
795sub ignore_email_address {
796    my ($address) = @_;
797
798    foreach my $ignore (@ignore_emails) {
799	return 1 if ($ignore eq $address);
800    }
801
802    return 0;
803}
804
805sub range_is_maintained {
806    my ($start, $end) = @_;
807
808    for (my $i = $start; $i < $end; $i++) {
809	my $line = $typevalue[$i];
810	if ($line =~ m/^([A-Z]):\s*(.*)/) {
811	    my $type = $1;
812	    my $value = $2;
813	    if ($type eq 'S') {
814		if ($value =~ /(maintain|support)/i) {
815		    return 1;
816		}
817	    }
818	}
819    }
820    return 0;
821}
822
823sub range_has_maintainer {
824    my ($start, $end) = @_;
825
826    for (my $i = $start; $i < $end; $i++) {
827	my $line = $typevalue[$i];
828	if ($line =~ m/^([A-Z]):\s*(.*)/) {
829	    my $type = $1;
830	    my $value = $2;
831	    if ($type eq 'M') {
832		return 1;
833	    }
834	}
835    }
836    return 0;
837}
838
839sub get_maintainers {
840    %email_hash_name = ();
841    %email_hash_address = ();
842    %commit_author_hash = ();
843    %commit_signer_hash = ();
844    @email_to = ();
845    %hash_list_to = ();
846    @list_to = ();
847    @scm = ();
848    @web = ();
849    @subsystem = ();
850    @status = ();
851    %deduplicate_name_hash = ();
852    %deduplicate_address_hash = ();
853    if ($email_git_all_signature_types) {
854	$signature_pattern = "(.+?)[Bb][Yy]:";
855    } else {
856	$signature_pattern = "\(" . join("|", @signature_tags) . "\)";
857    }
858
859    # Find responsible parties
860
861    my %exact_pattern_match_hash = ();
862
863    foreach my $file (@files) {
864
865	my %hash;
866	my $tvi = find_first_section();
867	while ($tvi < @typevalue) {
868	    my $start = find_starting_index($tvi);
869	    my $end = find_ending_index($tvi);
870	    my $exclude = 0;
871	    my $i;
872
873	    #Do not match excluded file patterns
874
875	    for ($i = $start; $i < $end; $i++) {
876		my $line = $typevalue[$i];
877		if ($line =~ m/^([A-Z]):\s*(.*)/) {
878		    my $type = $1;
879		    my $value = $2;
880		    if ($type eq 'X') {
881			if (file_match_pattern($file, $value)) {
882			    $exclude = 1;
883			    last;
884			}
885		    }
886		}
887	    }
888
889	    if (!$exclude) {
890		for ($i = $start; $i < $end; $i++) {
891		    my $line = $typevalue[$i];
892		    if ($line =~ m/^([A-Z]):\s*(.*)/) {
893			my $type = $1;
894			my $value = $2;
895			if ($type eq 'F') {
896			    if (file_match_pattern($file, $value)) {
897				my $value_pd = ($value =~ tr@/@@);
898				my $file_pd = ($file  =~ tr@/@@);
899				$value_pd++ if (substr($value,-1,1) ne "/");
900				$value_pd = -1 if ($value =~ /^\.\*/);
901				if ($value_pd >= $file_pd &&
902				    range_is_maintained($start, $end) &&
903				    range_has_maintainer($start, $end)) {
904				    $exact_pattern_match_hash{$file} = 1;
905				}
906				if ($pattern_depth == 0 ||
907				    (($file_pd - $value_pd) < $pattern_depth)) {
908				    $hash{$tvi} = $value_pd;
909				}
910			    }
911			} elsif ($type eq 'N') {
912			    if ($file =~ m/$value/x) {
913				$hash{$tvi} = 0;
914			    }
915			}
916		    }
917		}
918	    }
919	    $tvi = $end + 1;
920	}
921
922	foreach my $line (sort {$hash{$b} <=> $hash{$a}} keys %hash) {
923	    add_categories($line, "");
924	    if ($sections) {
925		my $i;
926		my $start = find_starting_index($line);
927		my $end = find_ending_index($line);
928		for ($i = $start; $i < $end; $i++) {
929		    my $line = $typevalue[$i];
930		    if ($line =~ /^[FX]:/) {		##Restore file patterns
931			$line =~ s/([^\\])\.([^\*])/$1\?$2/g;
932			$line =~ s/([^\\])\.$/$1\?/g;	##Convert . back to ?
933			$line =~ s/\\\./\./g;       	##Convert \. to .
934			$line =~ s/\.\*/\*/g;       	##Convert .* to *
935		    }
936		    my $count = $line =~ s/^([A-Z]):/$1:\t/g;
937		    if ($letters eq "" || (!$count || $letters =~ /$1/i)) {
938			print("$line\n");
939		    }
940		}
941		print("\n");
942	    }
943	}
944
945	maintainers_in_file($file);
946    }
947
948    if ($keywords) {
949	@keyword_tvi = sort_and_uniq(@keyword_tvi);
950	foreach my $line (@keyword_tvi) {
951	    add_categories($line, ":Keyword:$keyword_hash{$line}");
952	}
953    }
954
955    foreach my $email (@email_to, @list_to) {
956	$email->[0] = deduplicate_email($email->[0]);
957    }
958
959    foreach my $file (@files) {
960	if ($email &&
961	    ($email_git ||
962	     ($email_git_fallback &&
963	      $file !~ /MAINTAINERS$/ &&
964	      !$exact_pattern_match_hash{$file}))) {
965	    vcs_file_signoffs($file);
966	}
967	if ($email && $email_git_blame) {
968	    vcs_file_blame($file);
969	}
970    }
971
972    if ($email) {
973	foreach my $chief (@penguin_chief) {
974	    if ($chief =~ m/^(.*):(.*)/) {
975		my $email_address;
976
977		$email_address = format_email($1, $2, $email_usename);
978		if ($email_git_penguin_chiefs) {
979		    push(@email_to, [$email_address, 'chief penguin']);
980		} else {
981		    @email_to = grep($_->[0] !~ /${email_address}/, @email_to);
982		}
983	    }
984	}
985
986	foreach my $email (@file_emails) {
987	    $email = mailmap_email($email);
988	    my ($name, $address) = parse_email($email);
989
990	    my $tmp_email = format_email($name, $address, $email_usename);
991	    push_email_address($tmp_email, '');
992	    add_role($tmp_email, 'in file');
993	}
994    }
995
996    foreach my $fix (@fixes) {
997	vcs_add_commit_signers($fix, "blamed_fixes");
998    }
999
1000    my @to = ();
1001    if ($email || $email_list) {
1002	if ($email) {
1003	    @to = (@to, @email_to);
1004	}
1005	if ($email_list) {
1006	    @to = (@to, @list_to);
1007	}
1008    }
1009
1010    if ($interactive) {
1011	@to = interactive_get_maintainers(\@to);
1012    }
1013
1014    return @to;
1015}
1016
1017sub file_match_pattern {
1018    my ($file, $pattern) = @_;
1019    if (substr($pattern, -1) eq "/") {
1020	if ($file =~ m@^$pattern@) {
1021	    return 1;
1022	}
1023    } else {
1024	if ($file =~ m@^$pattern@) {
1025	    my $s1 = ($file =~ tr@/@@);
1026	    my $s2 = ($pattern =~ tr@/@@);
1027	    if ($s1 == $s2) {
1028		return 1;
1029	    }
1030	}
1031    }
1032    return 0;
1033}
1034
1035sub usage {
1036    print <<EOT;
1037usage: $P [options] patchfile
1038       $P [options] -f file|directory
1039version: $V
1040
1041MAINTAINER field selection options:
1042  --email => print email address(es) if any
1043    --git => include recent git \*-by: signers
1044    --git-all-signature-types => include signers regardless of signature type
1045        or use only ${signature_pattern} signers (default: $email_git_all_signature_types)
1046    --git-fallback => use git when no exact MAINTAINERS pattern (default: $email_git_fallback)
1047    --git-chief-penguins => include ${penguin_chiefs}
1048    --git-min-signatures => number of signatures required (default: $email_git_min_signatures)
1049    --git-max-maintainers => maximum maintainers to add (default: $email_git_max_maintainers)
1050    --git-min-percent => minimum percentage of commits required (default: $email_git_min_percent)
1051    --git-blame => use git blame to find modified commits for patch or file
1052    --git-blame-signatures => when used with --git-blame, also include all commit signers
1053    --git-since => git history to use (default: $email_git_since)
1054    --hg-since => hg history to use (default: $email_hg_since)
1055    --interactive => display a menu (mostly useful if used with the --git option)
1056    --m => include maintainer(s) if any
1057    --r => include reviewer(s) if any
1058    --n => include name 'Full Name <addr\@domain.tld>'
1059    --l => include list(s) if any
1060    --moderated => include moderated lists(s) if any (default: true)
1061    --s => include subscriber only list(s) if any (default: false)
1062    --remove-duplicates => minimize duplicate email names/addresses
1063    --roles => show roles (status:subsystem, git-signer, list, etc...)
1064    --rolestats => show roles and statistics (commits/total_commits, %)
1065    --file-emails => add email addresses found in -f file (default: 0 (off))
1066    --fixes => for patches, add signatures of commits with 'Fixes: <commit>' (default: 1 (on))
1067  --scm => print SCM tree(s) if any
1068  --status => print status if any
1069  --subsystem => print subsystem name if any
1070  --web => print website(s) if any
1071
1072Output type options:
1073  --separator [, ] => separator for multiple entries on 1 line
1074    using --separator also sets --nomultiline if --separator is not [, ]
1075  --multiline => print 1 entry per line
1076
1077Other options:
1078  --pattern-depth => Number of pattern directory traversals (default: 0 (all))
1079  --keywords => scan patch for keywords (default: $keywords)
1080  --keywords-in-file => scan file for keywords (default: $keywords_in_file)
1081  --sections => print all of the subsystem sections with pattern matches
1082  --letters => print all matching 'letter' types from all matching sections
1083  --mailmap => use .mailmap file (default: $email_use_mailmap)
1084  --no-tree => run without a kernel tree
1085  --self-test => show potential issues with MAINTAINERS file content
1086  --version => show version
1087  --help => show this help information
1088
1089Default options:
1090  [--email --tree --nogit --git-fallback --m --r --n --l --multiline
1091   --pattern-depth=0 --remove-duplicates --rolestats --keywords]
1092
1093Notes:
1094  Using "-f directory" may give unexpected results:
1095      Used with "--git", git signators for _all_ files in and below
1096          directory are examined as git recurses directories.
1097          Any specified X: (exclude) pattern matches are _not_ ignored.
1098      Used with "--nogit", directory is used as a pattern match,
1099          no individual file within the directory or subdirectory
1100          is matched.
1101      Used with "--git-blame", does not iterate all files in directory
1102  Using "--git-blame" is slow and may add old committers and authors
1103      that are no longer active maintainers to the output.
1104  Using "--roles" or "--rolestats" with git send-email --cc-cmd or any
1105      other automated tools that expect only ["name"] <email address>
1106      may not work because of additional output after <email address>.
1107  Using "--rolestats" and "--git-blame" shows the #/total=% commits,
1108      not the percentage of the entire file authored.  # of commits is
1109      not a good measure of amount of code authored.  1 major commit may
1110      contain a thousand lines, 5 trivial commits may modify a single line.
1111  If git is not installed, but mercurial (hg) is installed and an .hg
1112      repository exists, the following options apply to mercurial:
1113          --git,
1114          --git-min-signatures, --git-max-maintainers, --git-min-percent, and
1115          --git-blame
1116      Use --hg-since not --git-since to control date selection
1117  File ".get_maintainer.conf", if it exists in the linux kernel source root
1118      directory, can change whatever get_maintainer defaults are desired.
1119      Entries in this file can be any command line argument.
1120      This file is prepended to any additional command line arguments.
1121      Multiple lines and # comments are allowed.
1122  Most options have both positive and negative forms.
1123      The negative forms for --<foo> are --no<foo> and --no-<foo>.
1124
1125EOT
1126}
1127
1128sub top_of_kernel_tree {
1129    my ($lk_path) = @_;
1130
1131    if ($lk_path ne "" && substr($lk_path,length($lk_path)-1,1) ne "/") {
1132	$lk_path .= "/";
1133    }
1134    if (   (-f "${lk_path}COPYING")
1135	&& (-f "${lk_path}CREDITS")
1136	&& (-f "${lk_path}Kbuild")
1137	&& (-e "${lk_path}MAINTAINERS")
1138	&& (-f "${lk_path}Makefile")
1139	&& (-f "${lk_path}README")
1140	&& (-d "${lk_path}Documentation")
1141	&& (-d "${lk_path}arch")
1142	&& (-d "${lk_path}include")
1143	&& (-d "${lk_path}drivers")
1144	&& (-d "${lk_path}fs")
1145	&& (-d "${lk_path}init")
1146	&& (-d "${lk_path}ipc")
1147	&& (-d "${lk_path}kernel")
1148	&& (-d "${lk_path}lib")
1149	&& (-d "${lk_path}scripts")) {
1150	return 1;
1151    }
1152    return 0;
1153}
1154
1155sub parse_email {
1156    my ($formatted_email) = @_;
1157
1158    my $name = "";
1159    my $address = "";
1160
1161    if ($formatted_email =~ /^([^<]+)<(.+\@.*)>.*$/) {
1162	$name = $1;
1163	$address = $2;
1164    } elsif ($formatted_email =~ /^\s*<(.+\@\S*)>.*$/) {
1165	$address = $1;
1166    } elsif ($formatted_email =~ /^(.+\@\S*).*$/) {
1167	$address = $1;
1168    }
1169
1170    $name =~ s/^\s+|\s+$//g;
1171    $name =~ s/^\"|\"$//g;
1172    $address =~ s/^\s+|\s+$//g;
1173
1174    if ($name =~ /[^\w \-]/i) {  	 ##has "must quote" chars
1175	$name =~ s/(?<!\\)"/\\"/g;       ##escape quotes
1176	$name = "\"$name\"";
1177    }
1178
1179    return ($name, $address);
1180}
1181
1182sub format_email {
1183    my ($name, $address, $usename) = @_;
1184
1185    my $formatted_email;
1186
1187    $name =~ s/^\s+|\s+$//g;
1188    $name =~ s/^\"|\"$//g;
1189    $address =~ s/^\s+|\s+$//g;
1190
1191    if ($name =~ /[^\w \-]/i) {          ##has "must quote" chars
1192	$name =~ s/(?<!\\)"/\\"/g;       ##escape quotes
1193	$name = "\"$name\"";
1194    }
1195
1196    if ($usename) {
1197	if ("$name" eq "") {
1198	    $formatted_email = "$address";
1199	} else {
1200	    $formatted_email = "$name <$address>";
1201	}
1202    } else {
1203	$formatted_email = $address;
1204    }
1205
1206    return $formatted_email;
1207}
1208
1209sub find_first_section {
1210    my $index = 0;
1211
1212    while ($index < @typevalue) {
1213	my $tv = $typevalue[$index];
1214	if (($tv =~ m/^([A-Z]):\s*(.*)/)) {
1215	    last;
1216	}
1217	$index++;
1218    }
1219
1220    return $index;
1221}
1222
1223sub find_starting_index {
1224    my ($index) = @_;
1225
1226    while ($index > 0) {
1227	my $tv = $typevalue[$index];
1228	if (!($tv =~ m/^([A-Z]):\s*(.*)/)) {
1229	    last;
1230	}
1231	$index--;
1232    }
1233
1234    return $index;
1235}
1236
1237sub find_ending_index {
1238    my ($index) = @_;
1239
1240    while ($index < @typevalue) {
1241	my $tv = $typevalue[$index];
1242	if (!($tv =~ m/^([A-Z]):\s*(.*)/)) {
1243	    last;
1244	}
1245	$index++;
1246    }
1247
1248    return $index;
1249}
1250
1251sub get_subsystem_name {
1252    my ($index) = @_;
1253
1254    my $start = find_starting_index($index);
1255
1256    my $subsystem = $typevalue[$start];
1257    if ($output_section_maxlen && length($subsystem) > $output_section_maxlen) {
1258	$subsystem = substr($subsystem, 0, $output_section_maxlen - 3);
1259	$subsystem =~ s/\s*$//;
1260	$subsystem = $subsystem . "...";
1261    }
1262    return $subsystem;
1263}
1264
1265sub get_maintainer_role {
1266    my ($index) = @_;
1267
1268    my $i;
1269    my $start = find_starting_index($index);
1270    my $end = find_ending_index($index);
1271
1272    my $role = "unknown";
1273    my $subsystem = get_subsystem_name($index);
1274
1275    for ($i = $start + 1; $i < $end; $i++) {
1276	my $tv = $typevalue[$i];
1277	if ($tv =~ m/^([A-Z]):\s*(.*)/) {
1278	    my $ptype = $1;
1279	    my $pvalue = $2;
1280	    if ($ptype eq "S") {
1281		$role = $pvalue;
1282	    }
1283	}
1284    }
1285
1286    $role = lc($role);
1287    if      ($role eq "supported") {
1288	$role = "supporter";
1289    } elsif ($role eq "maintained") {
1290	$role = "maintainer";
1291    } elsif ($role eq "odd fixes") {
1292	$role = "odd fixer";
1293    } elsif ($role eq "orphan") {
1294	$role = "orphan minder";
1295    } elsif ($role eq "obsolete") {
1296	$role = "obsolete minder";
1297    } elsif ($role eq "buried alive in reporters") {
1298	$role = "chief penguin";
1299    }
1300
1301    return $role . ":" . $subsystem;
1302}
1303
1304sub get_list_role {
1305    my ($index) = @_;
1306
1307    my $subsystem = get_subsystem_name($index);
1308
1309    if ($subsystem eq "THE REST") {
1310	$subsystem = "";
1311    }
1312
1313    return $subsystem;
1314}
1315
1316sub add_categories {
1317    my ($index, $suffix) = @_;
1318
1319    my $i;
1320    my $start = find_starting_index($index);
1321    my $end = find_ending_index($index);
1322
1323    push(@subsystem, $typevalue[$start]);
1324
1325    for ($i = $start + 1; $i < $end; $i++) {
1326	my $tv = $typevalue[$i];
1327	if ($tv =~ m/^([A-Z]):\s*(.*)/) {
1328	    my $ptype = $1;
1329	    my $pvalue = $2;
1330	    if ($ptype eq "L") {
1331		my $list_address = $pvalue;
1332		my $list_additional = "";
1333		my $list_role = get_list_role($i);
1334
1335		if ($list_role ne "") {
1336		    $list_role = ":" . $list_role;
1337		}
1338		if ($list_address =~ m/([^\s]+)\s+(.*)$/) {
1339		    $list_address = $1;
1340		    $list_additional = $2;
1341		}
1342		if ($list_additional =~ m/subscribers-only/) {
1343		    if ($email_subscriber_list) {
1344			if (!$hash_list_to{lc($list_address)}) {
1345			    $hash_list_to{lc($list_address)} = 1;
1346			    push(@list_to, [$list_address,
1347					    "subscriber list${list_role}" . $suffix]);
1348			}
1349		    }
1350		} else {
1351		    if ($email_list) {
1352			if (!$hash_list_to{lc($list_address)}) {
1353			    if ($list_additional =~ m/moderated/) {
1354				if ($email_moderated_list) {
1355				    $hash_list_to{lc($list_address)} = 1;
1356				    push(@list_to, [$list_address,
1357						    "moderated list${list_role}" . $suffix]);
1358				}
1359			    } else {
1360				$hash_list_to{lc($list_address)} = 1;
1361				push(@list_to, [$list_address,
1362						"open list${list_role}" . $suffix]);
1363			    }
1364			}
1365		    }
1366		}
1367	    } elsif ($ptype eq "M") {
1368		if ($email_maintainer) {
1369		    my $role = get_maintainer_role($i);
1370		    push_email_addresses($pvalue, $role . $suffix);
1371		}
1372	    } elsif ($ptype eq "R") {
1373		if ($email_reviewer) {
1374		    my $subsystem = get_subsystem_name($i);
1375		    push_email_addresses($pvalue, "reviewer:$subsystem" . $suffix);
1376		}
1377	    } elsif ($ptype eq "T") {
1378		push(@scm, $pvalue . $suffix);
1379	    } elsif ($ptype eq "W") {
1380		push(@web, $pvalue . $suffix);
1381	    } elsif ($ptype eq "S") {
1382		push(@status, $pvalue . $suffix);
1383	    }
1384	}
1385    }
1386}
1387
1388sub email_inuse {
1389    my ($name, $address) = @_;
1390
1391    return 1 if (($name eq "") && ($address eq ""));
1392    return 1 if (($name ne "") && exists($email_hash_name{lc($name)}));
1393    return 1 if (($address ne "") && exists($email_hash_address{lc($address)}));
1394
1395    return 0;
1396}
1397
1398sub push_email_address {
1399    my ($line, $role) = @_;
1400
1401    my ($name, $address) = parse_email($line);
1402
1403    if ($address eq "") {
1404	return 0;
1405    }
1406
1407    if (!$email_remove_duplicates) {
1408	push(@email_to, [format_email($name, $address, $email_usename), $role]);
1409    } elsif (!email_inuse($name, $address)) {
1410	push(@email_to, [format_email($name, $address, $email_usename), $role]);
1411	$email_hash_name{lc($name)}++ if ($name ne "");
1412	$email_hash_address{lc($address)}++;
1413    }
1414
1415    return 1;
1416}
1417
1418sub push_email_addresses {
1419    my ($address, $role) = @_;
1420
1421    my @address_list = ();
1422
1423    if (rfc822_valid($address)) {
1424	push_email_address($address, $role);
1425    } elsif (@address_list = rfc822_validlist($address)) {
1426	my $array_count = shift(@address_list);
1427	while (my $entry = shift(@address_list)) {
1428	    push_email_address($entry, $role);
1429	}
1430    } else {
1431	if (!push_email_address($address, $role)) {
1432	    warn("Invalid MAINTAINERS address: '" . $address . "'\n");
1433	}
1434    }
1435}
1436
1437sub add_role {
1438    my ($line, $role) = @_;
1439
1440    my ($name, $address) = parse_email($line);
1441    my $email = format_email($name, $address, $email_usename);
1442
1443    foreach my $entry (@email_to) {
1444	if ($email_remove_duplicates) {
1445	    my ($entry_name, $entry_address) = parse_email($entry->[0]);
1446	    if (($name eq $entry_name || $address eq $entry_address)
1447		&& ($role eq "" || !($entry->[1] =~ m/$role/))
1448	    ) {
1449		if ($entry->[1] eq "") {
1450		    $entry->[1] = "$role";
1451		} else {
1452		    $entry->[1] = "$entry->[1],$role";
1453		}
1454	    }
1455	} else {
1456	    if ($email eq $entry->[0]
1457		&& ($role eq "" || !($entry->[1] =~ m/$role/))
1458	    ) {
1459		if ($entry->[1] eq "") {
1460		    $entry->[1] = "$role";
1461		} else {
1462		    $entry->[1] = "$entry->[1],$role";
1463		}
1464	    }
1465	}
1466    }
1467}
1468
1469sub which {
1470    my ($bin) = @_;
1471
1472    foreach my $path (split(/:/, $ENV{PATH})) {
1473	if (-e "$path/$bin") {
1474	    return "$path/$bin";
1475	}
1476    }
1477
1478    return "";
1479}
1480
1481sub which_conf {
1482    my ($conf) = @_;
1483
1484    foreach my $path (split(/:/, ".:$ENV{HOME}:.scripts")) {
1485	if (-e "$path/$conf") {
1486	    return "$path/$conf";
1487	}
1488    }
1489
1490    return "";
1491}
1492
1493sub mailmap_email {
1494    my ($line) = @_;
1495
1496    my ($name, $address) = parse_email($line);
1497    my $email = format_email($name, $address, 1);
1498    my $real_name = $name;
1499    my $real_address = $address;
1500
1501    if (exists $mailmap->{names}->{$email} ||
1502	exists $mailmap->{addresses}->{$email}) {
1503	if (exists $mailmap->{names}->{$email}) {
1504	    $real_name = $mailmap->{names}->{$email};
1505	}
1506	if (exists $mailmap->{addresses}->{$email}) {
1507	    $real_address = $mailmap->{addresses}->{$email};
1508	}
1509    } else {
1510	if (exists $mailmap->{names}->{$address}) {
1511	    $real_name = $mailmap->{names}->{$address};
1512	}
1513	if (exists $mailmap->{addresses}->{$address}) {
1514	    $real_address = $mailmap->{addresses}->{$address};
1515	}
1516    }
1517    return format_email($real_name, $real_address, 1);
1518}
1519
1520sub mailmap {
1521    my (@addresses) = @_;
1522
1523    my @mapped_emails = ();
1524    foreach my $line (@addresses) {
1525	push(@mapped_emails, mailmap_email($line));
1526    }
1527    merge_by_realname(@mapped_emails) if ($email_use_mailmap);
1528    return @mapped_emails;
1529}
1530
1531sub merge_by_realname {
1532    my %address_map;
1533    my (@emails) = @_;
1534
1535    foreach my $email (@emails) {
1536	my ($name, $address) = parse_email($email);
1537	if (exists $address_map{$name}) {
1538	    $address = $address_map{$name};
1539	    $email = format_email($name, $address, 1);
1540	} else {
1541	    $address_map{$name} = $address;
1542	}
1543    }
1544}
1545
1546sub git_execute_cmd {
1547    my ($cmd) = @_;
1548    my @lines = ();
1549
1550    my $output = `$cmd`;
1551    $output =~ s/^\s*//gm;
1552    @lines = split("\n", $output);
1553
1554    return @lines;
1555}
1556
1557sub hg_execute_cmd {
1558    my ($cmd) = @_;
1559    my @lines = ();
1560
1561    my $output = `$cmd`;
1562    @lines = split("\n", $output);
1563
1564    return @lines;
1565}
1566
1567sub extract_formatted_signatures {
1568    my (@signature_lines) = @_;
1569
1570    my @type = @signature_lines;
1571
1572    s/\s*(.*):.*/$1/ for (@type);
1573
1574    # cut -f2- -d":"
1575    s/\s*.*:\s*(.+)\s*/$1/ for (@signature_lines);
1576
1577## Reformat email addresses (with names) to avoid badly written signatures
1578
1579    foreach my $signer (@signature_lines) {
1580	$signer = deduplicate_email($signer);
1581    }
1582
1583    return (\@type, \@signature_lines);
1584}
1585
1586sub vcs_find_signers {
1587    my ($cmd, $file) = @_;
1588    my $commits;
1589    my @lines = ();
1590    my @signatures = ();
1591    my @authors = ();
1592    my @stats = ();
1593
1594    @lines = &{$VCS_cmds{"execute_cmd"}}($cmd);
1595
1596    my $pattern = $VCS_cmds{"commit_pattern"};
1597    my $author_pattern = $VCS_cmds{"author_pattern"};
1598    my $stat_pattern = $VCS_cmds{"stat_pattern"};
1599
1600    $stat_pattern =~ s/(\$\w+)/$1/eeg;		#interpolate $stat_pattern
1601
1602    $commits = grep(/$pattern/, @lines);	# of commits
1603
1604    @authors = grep(/$author_pattern/, @lines);
1605    @signatures = grep(/^[ \t]*${signature_pattern}.*\@.*$/, @lines);
1606    @stats = grep(/$stat_pattern/, @lines);
1607
1608#    print("stats: <@stats>\n");
1609
1610    return (0, \@signatures, \@authors, \@stats) if !@signatures;
1611
1612    save_commits_by_author(@lines) if ($interactive);
1613    save_commits_by_signer(@lines) if ($interactive);
1614
1615    if (!$email_git_penguin_chiefs) {
1616	@signatures = grep(!/${penguin_chiefs}/i, @signatures);
1617    }
1618
1619    my ($author_ref, $authors_ref) = extract_formatted_signatures(@authors);
1620    my ($types_ref, $signers_ref) = extract_formatted_signatures(@signatures);
1621
1622    return ($commits, $signers_ref, $authors_ref, \@stats);
1623}
1624
1625sub vcs_find_author {
1626    my ($cmd) = @_;
1627    my @lines = ();
1628
1629    @lines = &{$VCS_cmds{"execute_cmd"}}($cmd);
1630
1631    if (!$email_git_penguin_chiefs) {
1632	@lines = grep(!/${penguin_chiefs}/i, @lines);
1633    }
1634
1635    return @lines if !@lines;
1636
1637    my @authors = ();
1638    foreach my $line (@lines) {
1639	if ($line =~ m/$VCS_cmds{"author_pattern"}/) {
1640	    my $author = $1;
1641	    my ($name, $address) = parse_email($author);
1642	    $author = format_email($name, $address, 1);
1643	    push(@authors, $author);
1644	}
1645    }
1646
1647    save_commits_by_author(@lines) if ($interactive);
1648    save_commits_by_signer(@lines) if ($interactive);
1649
1650    return @authors;
1651}
1652
1653sub vcs_save_commits {
1654    my ($cmd) = @_;
1655    my @lines = ();
1656    my @commits = ();
1657
1658    @lines = &{$VCS_cmds{"execute_cmd"}}($cmd);
1659
1660    foreach my $line (@lines) {
1661	if ($line =~ m/$VCS_cmds{"blame_commit_pattern"}/) {
1662	    push(@commits, $1);
1663	}
1664    }
1665
1666    return @commits;
1667}
1668
1669sub vcs_blame {
1670    my ($file) = @_;
1671    my $cmd;
1672    my @commits = ();
1673
1674    return @commits if (!(-f $file));
1675
1676    if (@range && $VCS_cmds{"blame_range_cmd"} eq "") {
1677	my @all_commits = ();
1678
1679	$cmd = $VCS_cmds{"blame_file_cmd"};
1680	$cmd =~ s/(\$\w+)/$1/eeg;		#interpolate $cmd
1681	@all_commits = vcs_save_commits($cmd);
1682
1683	foreach my $file_range_diff (@range) {
1684	    next if (!($file_range_diff =~ m/(.+):(.+):(.+)/));
1685	    my $diff_file = $1;
1686	    my $diff_start = $2;
1687	    my $diff_length = $3;
1688	    next if ("$file" ne "$diff_file");
1689	    for (my $i = $diff_start; $i < $diff_start + $diff_length; $i++) {
1690		push(@commits, $all_commits[$i]);
1691	    }
1692	}
1693    } elsif (@range) {
1694	foreach my $file_range_diff (@range) {
1695	    next if (!($file_range_diff =~ m/(.+):(.+):(.+)/));
1696	    my $diff_file = $1;
1697	    my $diff_start = $2;
1698	    my $diff_length = $3;
1699	    next if ("$file" ne "$diff_file");
1700	    $cmd = $VCS_cmds{"blame_range_cmd"};
1701	    $cmd =~ s/(\$\w+)/$1/eeg;		#interpolate $cmd
1702	    push(@commits, vcs_save_commits($cmd));
1703	}
1704    } else {
1705	$cmd = $VCS_cmds{"blame_file_cmd"};
1706	$cmd =~ s/(\$\w+)/$1/eeg;		#interpolate $cmd
1707	@commits = vcs_save_commits($cmd);
1708    }
1709
1710    foreach my $commit (@commits) {
1711	$commit =~ s/^\^//g;
1712    }
1713
1714    return @commits;
1715}
1716
1717my $printed_novcs = 0;
1718sub vcs_exists {
1719    %VCS_cmds = %VCS_cmds_git;
1720    return 1 if eval $VCS_cmds{"available"};
1721    %VCS_cmds = %VCS_cmds_hg;
1722    return 2 if eval $VCS_cmds{"available"};
1723    %VCS_cmds = ();
1724    if (!$printed_novcs && $email_git) {
1725	warn("$P: No supported VCS found.  Add --nogit to options?\n");
1726	warn("Using a git repository produces better results.\n");
1727	warn("Try Linus Torvalds' latest git repository using:\n");
1728	warn("git clone git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git\n");
1729	$printed_novcs = 1;
1730    }
1731    return 0;
1732}
1733
1734sub vcs_is_git {
1735    vcs_exists();
1736    return $vcs_used == 1;
1737}
1738
1739sub vcs_is_hg {
1740    return $vcs_used == 2;
1741}
1742
1743sub vcs_add_commit_signers {
1744    return if (!vcs_exists());
1745
1746    my ($commit, $desc) = @_;
1747    my $commit_count = 0;
1748    my $commit_authors_ref;
1749    my $commit_signers_ref;
1750    my $stats_ref;
1751    my @commit_authors = ();
1752    my @commit_signers = ();
1753    my $cmd;
1754
1755    $cmd = $VCS_cmds{"find_commit_signers_cmd"};
1756    $cmd =~ s/(\$\w+)/$1/eeg;	#substitute variables in $cmd
1757
1758    ($commit_count, $commit_signers_ref, $commit_authors_ref, $stats_ref) = vcs_find_signers($cmd, "");
1759    @commit_authors = @{$commit_authors_ref} if defined $commit_authors_ref;
1760    @commit_signers = @{$commit_signers_ref} if defined $commit_signers_ref;
1761
1762    foreach my $signer (@commit_signers) {
1763	$signer = deduplicate_email($signer);
1764    }
1765
1766    vcs_assign($desc, 1, @commit_signers);
1767}
1768
1769sub interactive_get_maintainers {
1770    my ($list_ref) = @_;
1771    my @list = @$list_ref;
1772
1773    vcs_exists();
1774
1775    my %selected;
1776    my %authored;
1777    my %signed;
1778    my $count = 0;
1779    my $maintained = 0;
1780    foreach my $entry (@list) {
1781	$maintained = 1 if ($entry->[1] =~ /^(maintainer|supporter)/i);
1782	$selected{$count} = 1;
1783	$authored{$count} = 0;
1784	$signed{$count} = 0;
1785	$count++;
1786    }
1787
1788    #menu loop
1789    my $done = 0;
1790    my $print_options = 0;
1791    my $redraw = 1;
1792    while (!$done) {
1793	$count = 0;
1794	if ($redraw) {
1795	    printf STDERR "\n%1s %2s %-65s",
1796			  "*", "#", "email/list and role:stats";
1797	    if ($email_git ||
1798		($email_git_fallback && !$maintained) ||
1799		$email_git_blame) {
1800		print STDERR "auth sign";
1801	    }
1802	    print STDERR "\n";
1803	    foreach my $entry (@list) {
1804		my $email = $entry->[0];
1805		my $role = $entry->[1];
1806		my $sel = "";
1807		$sel = "*" if ($selected{$count});
1808		my $commit_author = $commit_author_hash{$email};
1809		my $commit_signer = $commit_signer_hash{$email};
1810		my $authored = 0;
1811		my $signed = 0;
1812		$authored++ for (@{$commit_author});
1813		$signed++ for (@{$commit_signer});
1814		printf STDERR "%1s %2d %-65s", $sel, $count + 1, $email;
1815		printf STDERR "%4d %4d", $authored, $signed
1816		    if ($authored > 0 || $signed > 0);
1817		printf STDERR "\n     %s\n", $role;
1818		if ($authored{$count}) {
1819		    my $commit_author = $commit_author_hash{$email};
1820		    foreach my $ref (@{$commit_author}) {
1821			print STDERR "     Author: @{$ref}[1]\n";
1822		    }
1823		}
1824		if ($signed{$count}) {
1825		    my $commit_signer = $commit_signer_hash{$email};
1826		    foreach my $ref (@{$commit_signer}) {
1827			print STDERR "     @{$ref}[2]: @{$ref}[1]\n";
1828		    }
1829		}
1830
1831		$count++;
1832	    }
1833	}
1834	my $date_ref = \$email_git_since;
1835	$date_ref = \$email_hg_since if (vcs_is_hg());
1836	if ($print_options) {
1837	    $print_options = 0;
1838	    if (vcs_exists()) {
1839		print STDERR <<EOT
1840
1841Version Control options:
1842g  use git history      [$email_git]
1843gf use git-fallback     [$email_git_fallback]
1844b  use git blame        [$email_git_blame]
1845bs use blame signatures [$email_git_blame_signatures]
1846c# minimum commits      [$email_git_min_signatures]
1847%# min percent          [$email_git_min_percent]
1848d# history to use       [$$date_ref]
1849x# max maintainers      [$email_git_max_maintainers]
1850t  all signature types  [$email_git_all_signature_types]
1851m  use .mailmap         [$email_use_mailmap]
1852EOT
1853	    }
1854	    print STDERR <<EOT
1855
1856Additional options:
18570  toggle all
1858tm toggle maintainers
1859tg toggle git entries
1860tl toggle open list entries
1861ts toggle subscriber list entries
1862f  emails in file       [$email_file_emails]
1863k  keywords in file     [$keywords]
1864r  remove duplicates    [$email_remove_duplicates]
1865p# pattern match depth  [$pattern_depth]
1866EOT
1867	}
1868	print STDERR
1869"\n#(toggle), A#(author), S#(signed) *(all), ^(none), O(options), Y(approve): ";
1870
1871	my $input = <STDIN>;
1872	chomp($input);
1873
1874	$redraw = 1;
1875	my $rerun = 0;
1876	my @wish = split(/[, ]+/, $input);
1877	foreach my $nr (@wish) {
1878	    $nr = lc($nr);
1879	    my $sel = substr($nr, 0, 1);
1880	    my $str = substr($nr, 1);
1881	    my $val = 0;
1882	    $val = $1 if $str =~ /^(\d+)$/;
1883
1884	    if ($sel eq "y") {
1885		$interactive = 0;
1886		$done = 1;
1887		$output_rolestats = 0;
1888		$output_roles = 0;
1889		last;
1890	    } elsif ($nr =~ /^\d+$/ && $nr > 0 && $nr <= $count) {
1891		$selected{$nr - 1} = !$selected{$nr - 1};
1892	    } elsif ($sel eq "*" || $sel eq '^') {
1893		my $toggle = 0;
1894		$toggle = 1 if ($sel eq '*');
1895		for (my $i = 0; $i < $count; $i++) {
1896		    $selected{$i} = $toggle;
1897		}
1898	    } elsif ($sel eq "0") {
1899		for (my $i = 0; $i < $count; $i++) {
1900		    $selected{$i} = !$selected{$i};
1901		}
1902	    } elsif ($sel eq "t") {
1903		if (lc($str) eq "m") {
1904		    for (my $i = 0; $i < $count; $i++) {
1905			$selected{$i} = !$selected{$i}
1906			    if ($list[$i]->[1] =~ /^(maintainer|supporter)/i);
1907		    }
1908		} elsif (lc($str) eq "g") {
1909		    for (my $i = 0; $i < $count; $i++) {
1910			$selected{$i} = !$selected{$i}
1911			    if ($list[$i]->[1] =~ /^(author|commit|signer)/i);
1912		    }
1913		} elsif (lc($str) eq "l") {
1914		    for (my $i = 0; $i < $count; $i++) {
1915			$selected{$i} = !$selected{$i}
1916			    if ($list[$i]->[1] =~ /^(open list)/i);
1917		    }
1918		} elsif (lc($str) eq "s") {
1919		    for (my $i = 0; $i < $count; $i++) {
1920			$selected{$i} = !$selected{$i}
1921			    if ($list[$i]->[1] =~ /^(subscriber list)/i);
1922		    }
1923		}
1924	    } elsif ($sel eq "a") {
1925		if ($val > 0 && $val <= $count) {
1926		    $authored{$val - 1} = !$authored{$val - 1};
1927		} elsif ($str eq '*' || $str eq '^') {
1928		    my $toggle = 0;
1929		    $toggle = 1 if ($str eq '*');
1930		    for (my $i = 0; $i < $count; $i++) {
1931			$authored{$i} = $toggle;
1932		    }
1933		}
1934	    } elsif ($sel eq "s") {
1935		if ($val > 0 && $val <= $count) {
1936		    $signed{$val - 1} = !$signed{$val - 1};
1937		} elsif ($str eq '*' || $str eq '^') {
1938		    my $toggle = 0;
1939		    $toggle = 1 if ($str eq '*');
1940		    for (my $i = 0; $i < $count; $i++) {
1941			$signed{$i} = $toggle;
1942		    }
1943		}
1944	    } elsif ($sel eq "o") {
1945		$print_options = 1;
1946		$redraw = 1;
1947	    } elsif ($sel eq "g") {
1948		if ($str eq "f") {
1949		    bool_invert(\$email_git_fallback);
1950		} else {
1951		    bool_invert(\$email_git);
1952		}
1953		$rerun = 1;
1954	    } elsif ($sel eq "b") {
1955		if ($str eq "s") {
1956		    bool_invert(\$email_git_blame_signatures);
1957		} else {
1958		    bool_invert(\$email_git_blame);
1959		}
1960		$rerun = 1;
1961	    } elsif ($sel eq "c") {
1962		if ($val > 0) {
1963		    $email_git_min_signatures = $val;
1964		    $rerun = 1;
1965		}
1966	    } elsif ($sel eq "x") {
1967		if ($val > 0) {
1968		    $email_git_max_maintainers = $val;
1969		    $rerun = 1;
1970		}
1971	    } elsif ($sel eq "%") {
1972		if ($str ne "" && $val >= 0) {
1973		    $email_git_min_percent = $val;
1974		    $rerun = 1;
1975		}
1976	    } elsif ($sel eq "d") {
1977		if (vcs_is_git()) {
1978		    $email_git_since = $str;
1979		} elsif (vcs_is_hg()) {
1980		    $email_hg_since = $str;
1981		}
1982		$rerun = 1;
1983	    } elsif ($sel eq "t") {
1984		bool_invert(\$email_git_all_signature_types);
1985		$rerun = 1;
1986	    } elsif ($sel eq "f") {
1987		bool_invert(\$email_file_emails);
1988		$rerun = 1;
1989	    } elsif ($sel eq "r") {
1990		bool_invert(\$email_remove_duplicates);
1991		$rerun = 1;
1992	    } elsif ($sel eq "m") {
1993		bool_invert(\$email_use_mailmap);
1994		read_mailmap();
1995		$rerun = 1;
1996	    } elsif ($sel eq "k") {
1997		bool_invert(\$keywords);
1998		$rerun = 1;
1999	    } elsif ($sel eq "p") {
2000		if ($str ne "" && $val >= 0) {
2001		    $pattern_depth = $val;
2002		    $rerun = 1;
2003		}
2004	    } elsif ($sel eq "h" || $sel eq "?") {
2005		print STDERR <<EOT
2006
2007Interactive mode allows you to select the various maintainers, submitters,
2008commit signers and mailing lists that could be CC'd on a patch.
2009
2010Any *'d entry is selected.
2011
2012If you have git or hg installed, you can choose to summarize the commit
2013history of files in the patch.  Also, each line of the current file can
2014be matched to its commit author and that commits signers with blame.
2015
2016Various knobs exist to control the length of time for active commit
2017tracking, the maximum number of commit authors and signers to add,
2018and such.
2019
2020Enter selections at the prompt until you are satisfied that the selected
2021maintainers are appropriate.  You may enter multiple selections separated
2022by either commas or spaces.
2023
2024EOT
2025	    } else {
2026		print STDERR "invalid option: '$nr'\n";
2027		$redraw = 0;
2028	    }
2029	}
2030	if ($rerun) {
2031	    print STDERR "git-blame can be very slow, please have patience..."
2032		if ($email_git_blame);
2033	    goto &get_maintainers;
2034	}
2035    }
2036
2037    #drop not selected entries
2038    $count = 0;
2039    my @new_emailto = ();
2040    foreach my $entry (@list) {
2041	if ($selected{$count}) {
2042	    push(@new_emailto, $list[$count]);
2043	}
2044	$count++;
2045    }
2046    return @new_emailto;
2047}
2048
2049sub bool_invert {
2050    my ($bool_ref) = @_;
2051
2052    if ($$bool_ref) {
2053	$$bool_ref = 0;
2054    } else {
2055	$$bool_ref = 1;
2056    }
2057}
2058
2059sub deduplicate_email {
2060    my ($email) = @_;
2061
2062    my $matched = 0;
2063    my ($name, $address) = parse_email($email);
2064    $email = format_email($name, $address, 1);
2065    $email = mailmap_email($email);
2066
2067    return $email if (!$email_remove_duplicates);
2068
2069    ($name, $address) = parse_email($email);
2070
2071    if ($name ne "" && $deduplicate_name_hash{lc($name)}) {
2072	$name = $deduplicate_name_hash{lc($name)}->[0];
2073	$address = $deduplicate_name_hash{lc($name)}->[1];
2074	$matched = 1;
2075    } elsif ($deduplicate_address_hash{lc($address)}) {
2076	$name = $deduplicate_address_hash{lc($address)}->[0];
2077	$address = $deduplicate_address_hash{lc($address)}->[1];
2078	$matched = 1;
2079    }
2080    if (!$matched) {
2081	$deduplicate_name_hash{lc($name)} = [ $name, $address ];
2082	$deduplicate_address_hash{lc($address)} = [ $name, $address ];
2083    }
2084    $email = format_email($name, $address, 1);
2085    $email = mailmap_email($email);
2086    return $email;
2087}
2088
2089sub save_commits_by_author {
2090    my (@lines) = @_;
2091
2092    my @authors = ();
2093    my @commits = ();
2094    my @subjects = ();
2095
2096    foreach my $line (@lines) {
2097	if ($line =~ m/$VCS_cmds{"author_pattern"}/) {
2098	    my $author = $1;
2099	    $author = deduplicate_email($author);
2100	    push(@authors, $author);
2101	}
2102	push(@commits, $1) if ($line =~ m/$VCS_cmds{"commit_pattern"}/);
2103	push(@subjects, $1) if ($line =~ m/$VCS_cmds{"subject_pattern"}/);
2104    }
2105
2106    for (my $i = 0; $i < @authors; $i++) {
2107	my $exists = 0;
2108	foreach my $ref(@{$commit_author_hash{$authors[$i]}}) {
2109	    if (@{$ref}[0] eq $commits[$i] &&
2110		@{$ref}[1] eq $subjects[$i]) {
2111		$exists = 1;
2112		last;
2113	    }
2114	}
2115	if (!$exists) {
2116	    push(@{$commit_author_hash{$authors[$i]}},
2117		 [ ($commits[$i], $subjects[$i]) ]);
2118	}
2119    }
2120}
2121
2122sub save_commits_by_signer {
2123    my (@lines) = @_;
2124
2125    my $commit = "";
2126    my $subject = "";
2127
2128    foreach my $line (@lines) {
2129	$commit = $1 if ($line =~ m/$VCS_cmds{"commit_pattern"}/);
2130	$subject = $1 if ($line =~ m/$VCS_cmds{"subject_pattern"}/);
2131	if ($line =~ /^[ \t]*${signature_pattern}.*\@.*$/) {
2132	    my @signatures = ($line);
2133	    my ($types_ref, $signers_ref) = extract_formatted_signatures(@signatures);
2134	    my @types = @$types_ref;
2135	    my @signers = @$signers_ref;
2136
2137	    my $type = $types[0];
2138	    my $signer = $signers[0];
2139
2140	    $signer = deduplicate_email($signer);
2141
2142	    my $exists = 0;
2143	    foreach my $ref(@{$commit_signer_hash{$signer}}) {
2144		if (@{$ref}[0] eq $commit &&
2145		    @{$ref}[1] eq $subject &&
2146		    @{$ref}[2] eq $type) {
2147		    $exists = 1;
2148		    last;
2149		}
2150	    }
2151	    if (!$exists) {
2152		push(@{$commit_signer_hash{$signer}},
2153		     [ ($commit, $subject, $type) ]);
2154	    }
2155	}
2156    }
2157}
2158
2159sub vcs_assign {
2160    my ($role, $divisor, @lines) = @_;
2161
2162    my %hash;
2163    my $count = 0;
2164
2165    return if (@lines <= 0);
2166
2167    if ($divisor <= 0) {
2168	warn("Bad divisor in " . (caller(0))[3] . ": $divisor\n");
2169	$divisor = 1;
2170    }
2171
2172    @lines = mailmap(@lines);
2173
2174    return if (@lines <= 0);
2175
2176    @lines = sort(@lines);
2177
2178    # uniq -c
2179    $hash{$_}++ for @lines;
2180
2181    # sort -rn
2182    foreach my $line (sort {$hash{$b} <=> $hash{$a}} keys %hash) {
2183	my $sign_offs = $hash{$line};
2184	my $percent = $sign_offs * 100 / $divisor;
2185
2186	$percent = 100 if ($percent > 100);
2187	next if (ignore_email_address($line));
2188	$count++;
2189	last if ($sign_offs < $email_git_min_signatures ||
2190		 $count > $email_git_max_maintainers ||
2191		 $percent < $email_git_min_percent);
2192	push_email_address($line, '');
2193	if ($output_rolestats) {
2194	    my $fmt_percent = sprintf("%.0f", $percent);
2195	    add_role($line, "$role:$sign_offs/$divisor=$fmt_percent%");
2196	} else {
2197	    add_role($line, $role);
2198	}
2199    }
2200}
2201
2202sub vcs_file_signoffs {
2203    my ($file) = @_;
2204
2205    my $authors_ref;
2206    my $signers_ref;
2207    my $stats_ref;
2208    my @authors = ();
2209    my @signers = ();
2210    my @stats = ();
2211    my $commits;
2212
2213    $vcs_used = vcs_exists();
2214    return if (!$vcs_used);
2215
2216    my $cmd = $VCS_cmds{"find_signers_cmd"};
2217    $cmd =~ s/(\$\w+)/$1/eeg;		# interpolate $cmd
2218
2219    ($commits, $signers_ref, $authors_ref, $stats_ref) = vcs_find_signers($cmd, $file);
2220
2221    @signers = @{$signers_ref} if defined $signers_ref;
2222    @authors = @{$authors_ref} if defined $authors_ref;
2223    @stats = @{$stats_ref} if defined $stats_ref;
2224
2225#    print("commits: <$commits>\nsigners:<@signers>\nauthors: <@authors>\nstats: <@stats>\n");
2226
2227    foreach my $signer (@signers) {
2228	$signer = deduplicate_email($signer);
2229    }
2230
2231    vcs_assign("commit_signer", $commits, @signers);
2232    vcs_assign("authored", $commits, @authors);
2233    if ($#authors == $#stats) {
2234	my $stat_pattern = $VCS_cmds{"stat_pattern"};
2235	$stat_pattern =~ s/(\$\w+)/$1/eeg;	#interpolate $stat_pattern
2236
2237	my $added = 0;
2238	my $deleted = 0;
2239	for (my $i = 0; $i <= $#stats; $i++) {
2240	    if ($stats[$i] =~ /$stat_pattern/) {
2241		$added += $1;
2242		$deleted += $2;
2243	    }
2244	}
2245	my @tmp_authors = uniq(@authors);
2246	foreach my $author (@tmp_authors) {
2247	    $author = deduplicate_email($author);
2248	}
2249	@tmp_authors = uniq(@tmp_authors);
2250	my @list_added = ();
2251	my @list_deleted = ();
2252	foreach my $author (@tmp_authors) {
2253	    my $auth_added = 0;
2254	    my $auth_deleted = 0;
2255	    for (my $i = 0; $i <= $#stats; $i++) {
2256		if ($author eq deduplicate_email($authors[$i]) &&
2257		    $stats[$i] =~ /$stat_pattern/) {
2258		    $auth_added += $1;
2259		    $auth_deleted += $2;
2260		}
2261	    }
2262	    for (my $i = 0; $i < $auth_added; $i++) {
2263		push(@list_added, $author);
2264	    }
2265	    for (my $i = 0; $i < $auth_deleted; $i++) {
2266		push(@list_deleted, $author);
2267	    }
2268	}
2269	vcs_assign("added_lines", $added, @list_added);
2270	vcs_assign("removed_lines", $deleted, @list_deleted);
2271    }
2272}
2273
2274sub vcs_file_blame {
2275    my ($file) = @_;
2276
2277    my @signers = ();
2278    my @all_commits = ();
2279    my @commits = ();
2280    my $total_commits;
2281    my $total_lines;
2282
2283    $vcs_used = vcs_exists();
2284    return if (!$vcs_used);
2285
2286    @all_commits = vcs_blame($file);
2287    @commits = uniq(@all_commits);
2288    $total_commits = @commits;
2289    $total_lines = @all_commits;
2290
2291    if ($email_git_blame_signatures) {
2292	if (vcs_is_hg()) {
2293	    my $commit_count;
2294	    my $commit_authors_ref;
2295	    my $commit_signers_ref;
2296	    my $stats_ref;
2297	    my @commit_authors = ();
2298	    my @commit_signers = ();
2299	    my $commit = join(" -r ", @commits);
2300	    my $cmd;
2301
2302	    $cmd = $VCS_cmds{"find_commit_signers_cmd"};
2303	    $cmd =~ s/(\$\w+)/$1/eeg;	#substitute variables in $cmd
2304
2305	    ($commit_count, $commit_signers_ref, $commit_authors_ref, $stats_ref) = vcs_find_signers($cmd, $file);
2306	    @commit_authors = @{$commit_authors_ref} if defined $commit_authors_ref;
2307	    @commit_signers = @{$commit_signers_ref} if defined $commit_signers_ref;
2308
2309	    push(@signers, @commit_signers);
2310	} else {
2311	    foreach my $commit (@commits) {
2312		my $commit_count;
2313		my $commit_authors_ref;
2314		my $commit_signers_ref;
2315		my $stats_ref;
2316		my @commit_authors = ();
2317		my @commit_signers = ();
2318		my $cmd;
2319
2320		$cmd = $VCS_cmds{"find_commit_signers_cmd"};
2321		$cmd =~ s/(\$\w+)/$1/eeg;	#substitute variables in $cmd
2322
2323		($commit_count, $commit_signers_ref, $commit_authors_ref, $stats_ref) = vcs_find_signers($cmd, $file);
2324		@commit_authors = @{$commit_authors_ref} if defined $commit_authors_ref;
2325		@commit_signers = @{$commit_signers_ref} if defined $commit_signers_ref;
2326
2327		push(@signers, @commit_signers);
2328	    }
2329	}
2330    }
2331
2332    if ($from_filename) {
2333	if ($output_rolestats) {
2334	    my @blame_signers;
2335	    if (vcs_is_hg()) {{		# Double brace for last exit
2336		my $commit_count;
2337		my @commit_signers = ();
2338		@commits = uniq(@commits);
2339		@commits = sort(@commits);
2340		my $commit = join(" -r ", @commits);
2341		my $cmd;
2342
2343		$cmd = $VCS_cmds{"find_commit_author_cmd"};
2344		$cmd =~ s/(\$\w+)/$1/eeg;	#substitute variables in $cmd
2345
2346		my @lines = ();
2347
2348		@lines = &{$VCS_cmds{"execute_cmd"}}($cmd);
2349
2350		if (!$email_git_penguin_chiefs) {
2351		    @lines = grep(!/${penguin_chiefs}/i, @lines);
2352		}
2353
2354		last if !@lines;
2355
2356		my @authors = ();
2357		foreach my $line (@lines) {
2358		    if ($line =~ m/$VCS_cmds{"author_pattern"}/) {
2359			my $author = $1;
2360			$author = deduplicate_email($author);
2361			push(@authors, $author);
2362		    }
2363		}
2364
2365		save_commits_by_author(@lines) if ($interactive);
2366		save_commits_by_signer(@lines) if ($interactive);
2367
2368		push(@signers, @authors);
2369	    }}
2370	    else {
2371		foreach my $commit (@commits) {
2372		    my $i;
2373		    my $cmd = $VCS_cmds{"find_commit_author_cmd"};
2374		    $cmd =~ s/(\$\w+)/$1/eeg;	#interpolate $cmd
2375		    my @author = vcs_find_author($cmd);
2376		    next if !@author;
2377
2378		    my $formatted_author = deduplicate_email($author[0]);
2379
2380		    my $count = grep(/$commit/, @all_commits);
2381		    for ($i = 0; $i < $count ; $i++) {
2382			push(@blame_signers, $formatted_author);
2383		    }
2384		}
2385	    }
2386	    if (@blame_signers) {
2387		vcs_assign("authored lines", $total_lines, @blame_signers);
2388	    }
2389	}
2390	foreach my $signer (@signers) {
2391	    $signer = deduplicate_email($signer);
2392	}
2393	vcs_assign("commits", $total_commits, @signers);
2394    } else {
2395	foreach my $signer (@signers) {
2396	    $signer = deduplicate_email($signer);
2397	}
2398	vcs_assign("modified commits", $total_commits, @signers);
2399    }
2400}
2401
2402sub vcs_file_exists {
2403    my ($file) = @_;
2404
2405    my $exists;
2406
2407    my $vcs_used = vcs_exists();
2408    return 0 if (!$vcs_used);
2409
2410    my $cmd = $VCS_cmds{"file_exists_cmd"};
2411    $cmd =~ s/(\$\w+)/$1/eeg;		# interpolate $cmd
2412    $cmd .= " 2>&1";
2413    $exists = &{$VCS_cmds{"execute_cmd"}}($cmd);
2414
2415    return 0 if ($? != 0);
2416
2417    return $exists;
2418}
2419
2420sub vcs_list_files {
2421    my ($file) = @_;
2422
2423    my @lsfiles = ();
2424
2425    my $vcs_used = vcs_exists();
2426    return 0 if (!$vcs_used);
2427
2428    my $cmd = $VCS_cmds{"list_files_cmd"};
2429    $cmd =~ s/(\$\w+)/$1/eeg;   # interpolate $cmd
2430    @lsfiles = &{$VCS_cmds{"execute_cmd"}}($cmd);
2431
2432    return () if ($? != 0);
2433
2434    return @lsfiles;
2435}
2436
2437sub uniq {
2438    my (@parms) = @_;
2439
2440    my %saw;
2441    @parms = grep(!$saw{$_}++, @parms);
2442    return @parms;
2443}
2444
2445sub sort_and_uniq {
2446    my (@parms) = @_;
2447
2448    my %saw;
2449    @parms = sort @parms;
2450    @parms = grep(!$saw{$_}++, @parms);
2451    return @parms;
2452}
2453
2454sub clean_file_emails {
2455    my (@file_emails) = @_;
2456    my @fmt_emails = ();
2457
2458    foreach my $email (@file_emails) {
2459	$email =~ s/[\(\<\{]{0,1}([A-Za-z0-9_\.\+-]+\@[A-Za-z0-9\.-]+)[\)\>\}]{0,1}/\<$1\>/g;
2460	my ($name, $address) = parse_email($email);
2461	if ($name eq '"[,\.]"') {
2462	    $name = "";
2463	}
2464
2465	my @nw = split(/[^A-Za-zÀ-ÿ\'\,\.\+-]/, $name);
2466	if (@nw > 2) {
2467	    my $first = $nw[@nw - 3];
2468	    my $middle = $nw[@nw - 2];
2469	    my $last = $nw[@nw - 1];
2470
2471	    if (((length($first) == 1 && $first =~ m/[A-Za-z]/) ||
2472		 (length($first) == 2 && substr($first, -1) eq ".")) ||
2473		(length($middle) == 1 ||
2474		 (length($middle) == 2 && substr($middle, -1) eq "."))) {
2475		$name = "$first $middle $last";
2476	    } else {
2477		$name = "$middle $last";
2478	    }
2479	}
2480
2481	if (substr($name, -1) =~ /[,\.]/) {
2482	    $name = substr($name, 0, length($name) - 1);
2483	} elsif (substr($name, -2) =~ /[,\.]"/) {
2484	    $name = substr($name, 0, length($name) - 2) . '"';
2485	}
2486
2487	if (substr($name, 0, 1) =~ /[,\.]/) {
2488	    $name = substr($name, 1, length($name) - 1);
2489	} elsif (substr($name, 0, 2) =~ /"[,\.]/) {
2490	    $name = '"' . substr($name, 2, length($name) - 2);
2491	}
2492
2493	my $fmt_email = format_email($name, $address, $email_usename);
2494	push(@fmt_emails, $fmt_email);
2495    }
2496    return @fmt_emails;
2497}
2498
2499sub merge_email {
2500    my @lines;
2501    my %saw;
2502
2503    for (@_) {
2504	my ($address, $role) = @$_;
2505	if (!$saw{$address}) {
2506	    if ($output_roles) {
2507		push(@lines, "$address ($role)");
2508	    } else {
2509		push(@lines, $address);
2510	    }
2511	    $saw{$address} = 1;
2512	}
2513    }
2514
2515    return @lines;
2516}
2517
2518sub output {
2519    my (@parms) = @_;
2520
2521    if ($output_multiline) {
2522	foreach my $line (@parms) {
2523	    print("${line}\n");
2524	}
2525    } else {
2526	print(join($output_separator, @parms));
2527	print("\n");
2528    }
2529}
2530
2531my $rfc822re;
2532
2533sub make_rfc822re {
2534#   Basic lexical tokens are specials, domain_literal, quoted_string, atom, and
2535#   comment.  We must allow for rfc822_lwsp (or comments) after each of these.
2536#   This regexp will only work on addresses which have had comments stripped
2537#   and replaced with rfc822_lwsp.
2538
2539    my $specials = '()<>@,;:\\\\".\\[\\]';
2540    my $controls = '\\000-\\037\\177';
2541
2542    my $dtext = "[^\\[\\]\\r\\\\]";
2543    my $domain_literal = "\\[(?:$dtext|\\\\.)*\\]$rfc822_lwsp*";
2544
2545    my $quoted_string = "\"(?:[^\\\"\\r\\\\]|\\\\.|$rfc822_lwsp)*\"$rfc822_lwsp*";
2546
2547#   Use zero-width assertion to spot the limit of an atom.  A simple
2548#   $rfc822_lwsp* causes the regexp engine to hang occasionally.
2549    my $atom = "[^$specials $controls]+(?:$rfc822_lwsp+|\\Z|(?=[\\[\"$specials]))";
2550    my $word = "(?:$atom|$quoted_string)";
2551    my $localpart = "$word(?:\\.$rfc822_lwsp*$word)*";
2552
2553    my $sub_domain = "(?:$atom|$domain_literal)";
2554    my $domain = "$sub_domain(?:\\.$rfc822_lwsp*$sub_domain)*";
2555
2556    my $addr_spec = "$localpart\@$rfc822_lwsp*$domain";
2557
2558    my $phrase = "$word*";
2559    my $route = "(?:\@$domain(?:,\@$rfc822_lwsp*$domain)*:$rfc822_lwsp*)";
2560    my $route_addr = "\\<$rfc822_lwsp*$route?$addr_spec\\>$rfc822_lwsp*";
2561    my $mailbox = "(?:$addr_spec|$phrase$route_addr)";
2562
2563    my $group = "$phrase:$rfc822_lwsp*(?:$mailbox(?:,\\s*$mailbox)*)?;\\s*";
2564    my $address = "(?:$mailbox|$group)";
2565
2566    return "$rfc822_lwsp*$address";
2567}
2568
2569sub rfc822_strip_comments {
2570    my $s = shift;
2571#   Recursively remove comments, and replace with a single space.  The simpler
2572#   regexps in the Email Addressing FAQ are imperfect - they will miss escaped
2573#   chars in atoms, for example.
2574
2575    while ($s =~ s/^((?:[^"\\]|\\.)*
2576                    (?:"(?:[^"\\]|\\.)*"(?:[^"\\]|\\.)*)*)
2577                    \((?:[^()\\]|\\.)*\)/$1 /osx) {}
2578    return $s;
2579}
2580
2581#   valid: returns true if the parameter is an RFC822 valid address
2582#
2583sub rfc822_valid {
2584    my $s = rfc822_strip_comments(shift);
2585
2586    if (!$rfc822re) {
2587        $rfc822re = make_rfc822re();
2588    }
2589
2590    return $s =~ m/^$rfc822re$/so && $s =~ m/^$rfc822_char*$/;
2591}
2592
2593#   validlist: In scalar context, returns true if the parameter is an RFC822
2594#              valid list of addresses.
2595#
2596#              In list context, returns an empty list on failure (an invalid
2597#              address was found); otherwise a list whose first element is the
2598#              number of addresses found and whose remaining elements are the
2599#              addresses.  This is needed to disambiguate failure (invalid)
2600#              from success with no addresses found, because an empty string is
2601#              a valid list.
2602
2603sub rfc822_validlist {
2604    my $s = rfc822_strip_comments(shift);
2605
2606    if (!$rfc822re) {
2607        $rfc822re = make_rfc822re();
2608    }
2609    # * null list items are valid according to the RFC
2610    # * the '1' business is to aid in distinguishing failure from no results
2611
2612    my @r;
2613    if ($s =~ m/^(?:$rfc822re)?(?:,(?:$rfc822re)?)*$/so &&
2614	$s =~ m/^$rfc822_char*$/) {
2615        while ($s =~ m/(?:^|,$rfc822_lwsp*)($rfc822re)/gos) {
2616            push(@r, $1);
2617        }
2618        return wantarray ? (scalar(@r), @r) : 1;
2619    }
2620    return wantarray ? () : 0;
2621}
2622