123cf27dbSMartin Matuska#!/usr/bin/env perl 223cf27dbSMartin Matuska 323cf27dbSMartin Matuska# SPDX-License-Identifier: MIT 423cf27dbSMartin Matuska# 523cf27dbSMartin Matuska# Copyright (c) 2023, Rob Norris <robn@despairlabs.com> 623cf27dbSMartin Matuska# 723cf27dbSMartin Matuska# Permission is hereby granted, free of charge, to any person obtaining a copy 823cf27dbSMartin Matuska# of this software and associated documentation files (the "Software"), to 923cf27dbSMartin Matuska# deal in the Software without restriction, including without limitation the 1023cf27dbSMartin Matuska# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 1123cf27dbSMartin Matuska# sell copies of the Software, and to permit persons to whom the Software is 1223cf27dbSMartin Matuska# furnished to do so, subject to the following conditions: 1323cf27dbSMartin Matuska# 1423cf27dbSMartin Matuska# The above copyright notice and this permission notice shall be included in 1523cf27dbSMartin Matuska# all copies or substantial portions of the Software. 1623cf27dbSMartin Matuska# 1723cf27dbSMartin Matuska# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 1823cf27dbSMartin Matuska# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 1923cf27dbSMartin Matuska# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 2023cf27dbSMartin Matuska# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 2123cf27dbSMartin Matuska# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 2223cf27dbSMartin Matuska# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 2323cf27dbSMartin Matuska# IN THE SOFTWARE. 2423cf27dbSMartin Matuska 2523cf27dbSMartin Matuska 2623cf27dbSMartin Matuska# This program will update the AUTHORS file to include commit authors that are 2723cf27dbSMartin Matuska# in the git history but are not yet credited. 2823cf27dbSMartin Matuska# 2923cf27dbSMartin Matuska# The CONTRIBUTORS section of the AUTHORS file attempts to be a list of 3023cf27dbSMartin Matuska# individual contributors to OpenZFS, with one name, address and line per 3123cf27dbSMartin Matuska# person. This is good for readability, but does not really leave room for the 3223cf27dbSMartin Matuska# that names and emails on commits from the same individual can be different, 3323cf27dbSMartin Matuska# for all kinds of reasons, not limited to: 3423cf27dbSMartin Matuska# 3523cf27dbSMartin Matuska# - a person might change organisations, and so their email address changes 3623cf27dbSMartin Matuska# 3723cf27dbSMartin Matuska# - a person might be paid to work on OpenZFS for their employer, and then hack 3823cf27dbSMartin Matuska# on personal projects in the evening, so commits legitimately come from 3923cf27dbSMartin Matuska# different addresses 4023cf27dbSMartin Matuska# 4123cf27dbSMartin Matuska# - names change for all kinds of reasons 4223cf27dbSMartin Matuska# 4323cf27dbSMartin Matuska# To try and account for this, this program will try to find all the possible 4423cf27dbSMartin Matuska# names and emails for a single contributor, and then select the "best" one to 4523cf27dbSMartin Matuska# add to the AUTHORS file. 4623cf27dbSMartin Matuska# 4723cf27dbSMartin Matuska# The CONTRIBUTORS section of the AUTHORS file is considered the source of 4823cf27dbSMartin Matuska# truth. Once an individual committer is listed in there, that line will not be 4923cf27dbSMartin Matuska# removed regardless of what is discovered in the commit history. However, it 5023cf27dbSMartin Matuska# can't just be _anything_. The name or email still has to match something seen 5123cf27dbSMartin Matuska# in the commit history, so that we're able to undertand that its the same 5223cf27dbSMartin Matuska# contributor. 5323cf27dbSMartin Matuska# 5423cf27dbSMartin Matuska# The bulk of the work is in running `git log` to fetch commit author names and 5523cf27dbSMartin Matuska# emails. For each value, we generate a "slug" to use as an internal id for 5623cf27dbSMartin Matuska# that value, which is mostly just the lowercase of the value with whitespace 5723cf27dbSMartin Matuska# and punctuation removed. Two values with subtle differences can produce the 5823cf27dbSMartin Matuska# same slug, so at this point we also try to keep the "best" pre-slug value as 5923cf27dbSMartin Matuska# the display version. We use this slug to update two maps, one of email->name, 6023cf27dbSMartin Matuska# the other of name->email. 6123cf27dbSMartin Matuska# 62*b1c1ee44SMartin Matuska# Where possible, we also consider Signed-off-by: trailers in the commit 63*b1c1ee44SMartin Matuska# message, and if they match the commit author, enter them into the maps also. 64*b1c1ee44SMartin Matuska# Because a commit can contain multiple signoffs, we only track one if either 65*b1c1ee44SMartin Matuska# the name or the email address match the commit author (by slug). This is 66*b1c1ee44SMartin Matuska# mostly aimed at letting an explicit signoff override a generated name or 67*b1c1ee44SMartin Matuska# email on the same commit (usually a Github noreply), while avoiding every 68*b1c1ee44SMartin Matuska# signoff ever being treated as a possible canonical ident for some other 69*b1c1ee44SMartin Matuska# committer. (Also note that this behaviour only works for signoffs that can be 70*b1c1ee44SMartin Matuska# extracted with git-interpret-trailers, which misses many seen in the OpenZFS 71*b1c1ee44SMartin Matuska# git history, for various reasons). 72*b1c1ee44SMartin Matuska# 7323cf27dbSMartin Matuska# Once collected, we then walk all the emails we've seen and get all the names 7423cf27dbSMartin Matuska# associated with every instance. Then for each of those names, we get all the 7523cf27dbSMartin Matuska# emails associated, and so on until we've seen all the connected names and 7623cf27dbSMartin Matuska# emails. This collection is every possible name and email for an individual 7723cf27dbSMartin Matuska# contributor. 7823cf27dbSMartin Matuska# 7923cf27dbSMartin Matuska# Finaly, we consider these groups, and select the "best" name and email for 8023cf27dbSMartin Matuska# the contributor, and add them to the author tables if they aren't there 8123cf27dbSMartin Matuska# already. Once we've done everyone, we write out a new AUTHORS file, and 8223cf27dbSMartin Matuska# that's the whole job. 8323cf27dbSMartin Matuska# 8423cf27dbSMartin Matuska# This is imperfect! Its necessary for the user to examine the diff and make 8523cf27dbSMartin Matuska# sure its sensible. If it hasn't hooked up right, it may necessary to adjust 8623cf27dbSMartin Matuska# the input data (via .mailmap) or improve the heuristics in this program. It 8723cf27dbSMartin Matuska# took a long time to get into good shape when first written (355 new names 8823cf27dbSMartin Matuska# added to AUTHORS!) but hopefully in the future we'll be running this 8923cf27dbSMartin Matuska# regularly so it doesn't fall so far behind. 9023cf27dbSMartin Matuska 9123cf27dbSMartin Matuska 9223cf27dbSMartin Matuskause 5.010; 9323cf27dbSMartin Matuskause warnings; 9423cf27dbSMartin Matuskause strict; 9523cf27dbSMartin Matuska 9623cf27dbSMartin Matuska# Storage for the "best looking" version of name or email, keyed on slug. 9723cf27dbSMartin Matuskamy %display_name; 9823cf27dbSMartin Matuskamy %display_email; 9923cf27dbSMartin Matuska 10023cf27dbSMartin Matuska# First, we load the existing AUTHORS file. We save everything before 10123cf27dbSMartin Matuska# CONTRIBUTORS: line as-is so we can write it back out to the new file. Then 10223cf27dbSMartin Matuska# we extract name,email pairs from the remainder and store them in a pair of 10323cf27dbSMartin Matuska# hashtables, keyed on slug. 10423cf27dbSMartin Matuskamy %authors_name; 10523cf27dbSMartin Matuskamy %authors_email; 10623cf27dbSMartin Matuska 10723cf27dbSMartin Matuskamy @authors_header; 10823cf27dbSMartin Matuska 10923cf27dbSMartin Matuskafor my $line (do { local (@ARGV) = ('AUTHORS'); <> }) { 11023cf27dbSMartin Matuska chomp $line; 11123cf27dbSMartin Matuska state $in_header = 1; 11223cf27dbSMartin Matuska if ($in_header) { 11323cf27dbSMartin Matuska push @authors_header, $line; 11423cf27dbSMartin Matuska $in_header = 0 if $line =~ m/^CONTRIBUTORS:/; 11523cf27dbSMartin Matuska } else { 11623cf27dbSMartin Matuska my ($name, $email) = $line =~ m/^\s+(.+)(?= <) <([^>]+)/; 11723cf27dbSMartin Matuska next unless $name; 11823cf27dbSMartin Matuska 11923cf27dbSMartin Matuska my $semail = email_slug($email); 12023cf27dbSMartin Matuska my $sname = name_slug($name); 12123cf27dbSMartin Matuska 12223cf27dbSMartin Matuska $authors_name{$semail} = $sname; 12323cf27dbSMartin Matuska $authors_email{$sname} = $semail; 12423cf27dbSMartin Matuska 12523cf27dbSMartin Matuska # The name/email in AUTHORS is already the "best looking" 12623cf27dbSMartin Matuska # version, by definition. 12723cf27dbSMartin Matuska $display_name{$sname} = $name; 12823cf27dbSMartin Matuska $display_email{$semail} = $email; 12923cf27dbSMartin Matuska } 13023cf27dbSMartin Matuska} 13123cf27dbSMartin Matuska 132*b1c1ee44SMartin Matuska# Next, we load all the commit authors and signoff pairs, and form name<->email 133*b1c1ee44SMartin Matuska# mappings, keyed on slug. Note that this format is getting the 134*b1c1ee44SMartin Matuska# .mailmap-converted form. This lets us control the input to some extent by 135*b1c1ee44SMartin Matuska# making changes there. 136*b1c1ee44SMartin Matuskamy %seen_names; 137*b1c1ee44SMartin Matuskamy %seen_emails; 13823cf27dbSMartin Matuska 139*b1c1ee44SMartin Matuska# The true email address from commits, by slug. We do this so we can generate 140*b1c1ee44SMartin Matuska# mailmap entries, which will only match the exact address from the commit, 141*b1c1ee44SMartin Matuska# not anything "prettified". This lets us remember the prefix part of Github 142*b1c1ee44SMartin Matuska# noreply addresses, while not including it in AUTHORS if that is truly the 143*b1c1ee44SMartin Matuska# best option we have. 144*b1c1ee44SMartin Matuskamy %commit_email; 145*b1c1ee44SMartin Matuska 146*b1c1ee44SMartin Matuskafor my $line (reverse qx(git log --pretty=tformat:'%aN:::%aE:::%(trailers:key=signed-off-by,valueonly,separator=:::)')) { 14723cf27dbSMartin Matuska chomp $line; 148*b1c1ee44SMartin Matuska my ($name, $email, @signoffs) = split ':::', $line; 14923cf27dbSMartin Matuska next unless $name && $email; 15023cf27dbSMartin Matuska 15123cf27dbSMartin Matuska my $semail = email_slug($email); 15223cf27dbSMartin Matuska my $sname = name_slug($name); 15323cf27dbSMartin Matuska 154*b1c1ee44SMartin Matuska # Track the committer name and email. 155*b1c1ee44SMartin Matuska $seen_names{$semail}{$sname} = 1; 156*b1c1ee44SMartin Matuska $seen_emails{$sname}{$semail} = 1; 15723cf27dbSMartin Matuska 158*b1c1ee44SMartin Matuska # Keep the original commit address. 159*b1c1ee44SMartin Matuska $commit_email{$semail} = $email; 16023cf27dbSMartin Matuska 161*b1c1ee44SMartin Matuska # Consider if these are the best we've ever seen. 16223cf27dbSMartin Matuska update_display_name($name); 163*b1c1ee44SMartin Matuska update_display_email($email); 164*b1c1ee44SMartin Matuska 165*b1c1ee44SMartin Matuska # Check signoffs. any that have a matching name or email as the 166*b1c1ee44SMartin Matuska # committer (by slug), also track them. 167*b1c1ee44SMartin Matuska for my $signoff (@signoffs) { 168*b1c1ee44SMartin Matuska my ($soname, $soemail) = $signoff =~ m/^([^<]+)\s+<(.+)>$/; 169*b1c1ee44SMartin Matuska next unless $soname && $soemail; 170*b1c1ee44SMartin Matuska my $ssoname = name_slug($soname); 171*b1c1ee44SMartin Matuska my $ssoemail = email_slug($soemail); 172*b1c1ee44SMartin Matuska if (($semail eq $ssoemail) ^ ($sname eq $ssoname)) { 173*b1c1ee44SMartin Matuska $seen_names{$ssoemail}{$ssoname} = 1; 174*b1c1ee44SMartin Matuska $seen_emails{$ssoname}{$ssoemail} = 1; 175*b1c1ee44SMartin Matuska update_display_name($soname); 176*b1c1ee44SMartin Matuska update_display_email($soemail); 177*b1c1ee44SMartin Matuska } 17823cf27dbSMartin Matuska } 17923cf27dbSMartin Matuska} 18023cf27dbSMartin Matuska 18123cf27dbSMartin Matuska# Now collect unique committers by all names+emails we've ever seen for them. 18223cf27dbSMartin Matuska# We start with emails and resolve all possible names, then we resolve the 18323cf27dbSMartin Matuska# emails for those names, and round and round until there's nothing left. 18423cf27dbSMartin Matuskamy @committers; 185*b1c1ee44SMartin Matuskafor my $start_email (sort keys %seen_names) { 18623cf27dbSMartin Matuska # it might have been deleted already through a cross-reference 187*b1c1ee44SMartin Matuska next unless $seen_names{$start_email}; 18823cf27dbSMartin Matuska 18923cf27dbSMartin Matuska my %emails; 19023cf27dbSMartin Matuska my %names; 19123cf27dbSMartin Matuska 19223cf27dbSMartin Matuska my @check_emails = ($start_email); 19323cf27dbSMartin Matuska my @check_names; 19423cf27dbSMartin Matuska while (@check_emails || @check_names) { 19523cf27dbSMartin Matuska while (my $email = shift @check_emails) { 19623cf27dbSMartin Matuska next if $emails{$email}++; 19723cf27dbSMartin Matuska push @check_names, 198*b1c1ee44SMartin Matuska sort keys %{delete $seen_names{$email}}; 19923cf27dbSMartin Matuska } 20023cf27dbSMartin Matuska while (my $name = shift @check_names) { 20123cf27dbSMartin Matuska next if $names{$name}++; 20223cf27dbSMartin Matuska push @check_emails, 203*b1c1ee44SMartin Matuska sort keys %{delete $seen_emails{$name}}; 20423cf27dbSMartin Matuska } 20523cf27dbSMartin Matuska } 20623cf27dbSMartin Matuska 20723cf27dbSMartin Matuska # A "committer" is the collection of connected names and emails. 20823cf27dbSMartin Matuska push @committers, [[sort keys %emails], [sort keys %names]]; 20923cf27dbSMartin Matuska} 21023cf27dbSMartin Matuska 21123cf27dbSMartin Matuska# Now we have our committers, we can work out what to add to AUTHORS. 21223cf27dbSMartin Matuskafor my $committer (@committers) { 21323cf27dbSMartin Matuska my ($emails, $names) = @$committer; 21423cf27dbSMartin Matuska 21523cf27dbSMartin Matuska # If this commiter is already in AUTHORS, we must not touch. 21623cf27dbSMartin Matuska next if grep { $authors_name{$_} } @$emails; 21723cf27dbSMartin Matuska next if grep { $authors_email{$_} } @$names; 21823cf27dbSMartin Matuska 21923cf27dbSMartin Matuska # Decide on the "best" name and email to use 22023cf27dbSMartin Matuska my $email = best_email(@$emails); 22123cf27dbSMartin Matuska my $name = best_name(@$names); 22223cf27dbSMartin Matuska 22323cf27dbSMartin Matuska $authors_email{$name} = $email; 22423cf27dbSMartin Matuska $authors_name{$email} = $name; 225*b1c1ee44SMartin Matuska 226*b1c1ee44SMartin Matuska # We've now selected our canonical name going forward. If there 227*b1c1ee44SMartin Matuska # were other options from commit authors only (not signoffs), 228*b1c1ee44SMartin Matuska # emit mailmap lines for the user to past into .mailmap 229*b1c1ee44SMartin Matuska my $cemail = $display_email{email_slug($authors_email{$name})}; 230*b1c1ee44SMartin Matuska for my $alias (@$emails) { 231*b1c1ee44SMartin Matuska next if $alias eq $email; 232*b1c1ee44SMartin Matuska 233*b1c1ee44SMartin Matuska my $calias = $commit_email{$alias}; 234*b1c1ee44SMartin Matuska next unless $calias; 235*b1c1ee44SMartin Matuska 236*b1c1ee44SMartin Matuska my $cname = $display_name{$name}; 237*b1c1ee44SMartin Matuska say "$cname <$cemail> <$calias>"; 238*b1c1ee44SMartin Matuska } 23923cf27dbSMartin Matuska} 24023cf27dbSMartin Matuska 24123cf27dbSMartin Matuska# Now output the new AUTHORS file 24223cf27dbSMartin Matuskaopen my $fh, '>', 'AUTHORS' or die "E: couldn't open AUTHORS for write: $!\n"; 24323cf27dbSMartin Matuskasay $fh join("\n", @authors_header, ""); 24423cf27dbSMartin Matuskafor my $name (sort keys %authors_email) { 24523cf27dbSMartin Matuska my $cname = $display_name{$name}; 24623cf27dbSMartin Matuska my $cemail = $display_email{email_slug($authors_email{$name})}; 24723cf27dbSMartin Matuska say $fh " $cname <$cemail>"; 24823cf27dbSMartin Matuska} 24923cf27dbSMartin Matuska 25023cf27dbSMartin Matuskaexit 0; 25123cf27dbSMartin Matuska 25223cf27dbSMartin Matuska# "Slugs" are used at the hashtable key for names and emails. They are used to 25323cf27dbSMartin Matuska# making two variants of a value be the "same" for matching. Mostly this is 25423cf27dbSMartin Matuska# to make upper and lower-case versions of a name or email compare the same, 25523cf27dbSMartin Matuska# but we do a little bit of munging to handle some common cases. 25623cf27dbSMartin Matuska# 25723cf27dbSMartin Matuska# Note that these are only used for matching internally; for display, the 25823cf27dbSMartin Matuska# slug will be used to look up the display form. 25923cf27dbSMartin Matuskasub name_slug { 26023cf27dbSMartin Matuska my ($name) = @_; 26123cf27dbSMartin Matuska 26223cf27dbSMartin Matuska # Remove spaces and dots, to handle differences in initials. 26323cf27dbSMartin Matuska $name =~ s/[\s\.]//g; 26423cf27dbSMartin Matuska 26523cf27dbSMartin Matuska return lc $name; 26623cf27dbSMartin Matuska} 26723cf27dbSMartin Matuskasub email_slug { 26823cf27dbSMartin Matuska my ($email) = @_; 26923cf27dbSMartin Matuska 27023cf27dbSMartin Matuska # Remove everything up to and including the first space, and the last 27123cf27dbSMartin Matuska # space and everything after it. 27223cf27dbSMartin Matuska $email =~ s/^(.*\s+)|(\s+.*)$//g; 27323cf27dbSMartin Matuska 27423cf27dbSMartin Matuska # Remove the leading userid+ on Github noreply addresses. They're 27523cf27dbSMartin Matuska # optional and we want to treat them as the same thing. 27623cf27dbSMartin Matuska $email =~ s/^[^\+]*\+//g if $email =~ m/\.noreply\.github\.com$/; 27723cf27dbSMartin Matuska 27823cf27dbSMartin Matuska return lc $email; 27923cf27dbSMartin Matuska} 28023cf27dbSMartin Matuska 281*b1c1ee44SMartin Matuska# As we accumulate new names and addresses, record the "best looking" version 282*b1c1ee44SMartin Matuska# of each. Once we decide to add a committer to AUTHORS, we'll take the best 283*b1c1ee44SMartin Matuska# version of their name and address from here. 284*b1c1ee44SMartin Matuska# 285*b1c1ee44SMartin Matuska# Note that we don't record them if they're already in AUTHORS (that is, in 286*b1c1ee44SMartin Matuska# %authors_name or %authors_email) because that file already contains the 287*b1c1ee44SMartin Matuska# "best" version, by definition. So we return immediately if we've seen it 288*b1c1ee44SMartin Matuska# there already. 28923cf27dbSMartin Matuskasub update_display_name { 29023cf27dbSMartin Matuska my ($name) = @_; 29123cf27dbSMartin Matuska my $sname = name_slug($name); 292*b1c1ee44SMartin Matuska return if $authors_email{$sname}; 29323cf27dbSMartin Matuska 29423cf27dbSMartin Matuska # For names, "more specific" means "has more non-lower-case characters" 29523cf27dbSMartin Matuska # (in ASCII), guessing that if a person has gone to some effort to 29623cf27dbSMartin Matuska # specialise their name in a later commit, they presumably care more 29723cf27dbSMartin Matuska # about it. If this is wrong, its probably better to add a .mailmap 29823cf27dbSMartin Matuska # entry. 29923cf27dbSMartin Matuska 30023cf27dbSMartin Matuska my $cname = $display_name{$sname}; 30123cf27dbSMartin Matuska if (!$cname || 30223cf27dbSMartin Matuska ($name =~ tr/a-z //) < ($cname =~ tr/a-z //)) { 30323cf27dbSMartin Matuska $display_name{$sname} = $name; 30423cf27dbSMartin Matuska } 30523cf27dbSMartin Matuska} 30623cf27dbSMartin Matuskasub update_display_email { 30723cf27dbSMartin Matuska my ($email) = @_; 30823cf27dbSMartin Matuska my $semail = email_slug($email); 309*b1c1ee44SMartin Matuska return if $authors_name{$semail}; 31023cf27dbSMartin Matuska 31123cf27dbSMartin Matuska # Like names, we prefer uppercase when possible. We also remove any 31223cf27dbSMartin Matuska # leading "plus address" for Github noreply addresses. 313*b1c1ee44SMartin Matuska 31423cf27dbSMartin Matuska $email =~ s/^[^\+]*\+//g if $email =~ m/\.noreply\.github\.com$/; 31523cf27dbSMartin Matuska 31623cf27dbSMartin Matuska my $cemail = $display_email{$semail}; 31723cf27dbSMartin Matuska if (!$cemail || 31823cf27dbSMartin Matuska ($email =~ tr/a-z //) < ($cemail =~ tr/a-z //)) { 31923cf27dbSMartin Matuska $display_email{$semail} = $email; 32023cf27dbSMartin Matuska } 32123cf27dbSMartin Matuska} 32223cf27dbSMartin Matuska 32323cf27dbSMartin Matuskasub best_name { 32423cf27dbSMartin Matuska my @names = sort { 32523cf27dbSMartin Matuska my $cmp; 32623cf27dbSMartin Matuska my ($aa) = $display_name{$a}; 32723cf27dbSMartin Matuska my ($bb) = $display_name{$b}; 32823cf27dbSMartin Matuska 32923cf27dbSMartin Matuska # The "best" name is very subjective, and a simple sort 33023cf27dbSMartin Matuska # produced good-enough results, so I didn't try harder. Use of 33123cf27dbSMartin Matuska # accented characters, punctuation and caps are probably an 33223cf27dbSMartin Matuska # indicator of "better", but possibly we should also take into 33323cf27dbSMartin Matuska # account the most recent name we saw, in case the committer 33423cf27dbSMartin Matuska # has changed their name or nickname or similar. 33523cf27dbSMartin Matuska # 33623cf27dbSMartin Matuska # Really, .mailmap is the place to control this. 33723cf27dbSMartin Matuska 33823cf27dbSMartin Matuska return ($aa cmp $bb); 33923cf27dbSMartin Matuska } @_; 34023cf27dbSMartin Matuska 34123cf27dbSMartin Matuska return shift @names; 34223cf27dbSMartin Matuska} 34323cf27dbSMartin Matuskasub best_email { 34423cf27dbSMartin Matuska state $internal_re = qr/\.(?:internal|local|\(none\))$/; 34523cf27dbSMartin Matuska state $noreply_re = qr/\.noreply\.github\.com$/; 34623cf27dbSMartin Matuska state $freemail_re = qr/\@(?:gmail|hotmail)\.com$/; 34723cf27dbSMartin Matuska 34823cf27dbSMartin Matuska my @emails = sort { 34923cf27dbSMartin Matuska my $cmp; 35023cf27dbSMartin Matuska 35123cf27dbSMartin Matuska # prefer address with a single @ over those without 35223cf27dbSMartin Matuska $cmp = (($b =~ tr/@//) == 1) <=> (($a =~ tr/@//) == 1); 35323cf27dbSMartin Matuska return $cmp unless $cmp == 0; 35423cf27dbSMartin Matuska 35523cf27dbSMartin Matuska # prefer any address over internal/local addresses 35623cf27dbSMartin Matuska $cmp = (($a =~ $internal_re) <=> ($b =~ $internal_re)); 35723cf27dbSMartin Matuska return $cmp unless $cmp == 0; 35823cf27dbSMartin Matuska 35923cf27dbSMartin Matuska # prefer any address over github noreply aliases 36023cf27dbSMartin Matuska $cmp = (($a =~ $noreply_re) <=> ($b =~ $noreply_re)); 36123cf27dbSMartin Matuska return $cmp unless $cmp == 0; 36223cf27dbSMartin Matuska 36323cf27dbSMartin Matuska # prefer any address over freemail providers 36423cf27dbSMartin Matuska $cmp = (($a =~ $freemail_re) <=> ($b =~ $freemail_re)); 36523cf27dbSMartin Matuska return $cmp unless $cmp == 0; 36623cf27dbSMartin Matuska 36723cf27dbSMartin Matuska # alphabetical by domain 36823cf27dbSMartin Matuska my ($alocal, $adom) = split /\@/, $a; 36923cf27dbSMartin Matuska my ($blocal, $bdom) = split /\@/, $b; 37023cf27dbSMartin Matuska $cmp = ($adom cmp $bdom); 37123cf27dbSMartin Matuska return $cmp unless $cmp == 0; 37223cf27dbSMartin Matuska 37323cf27dbSMartin Matuska # alphabetical by local part 37423cf27dbSMartin Matuska return ($alocal cmp $blocal); 37523cf27dbSMartin Matuska } @_; 37623cf27dbSMartin Matuska 37723cf27dbSMartin Matuska return shift @emails; 37823cf27dbSMartin Matuska} 379