mirror of https://code.videolan.org/videolan/vlc
Scripts to help keep up-to-date AUTHORS file
Signed-off-by: Jean-Baptiste Kempf <jb@videolan.org>
This commit is contained in:
parent
829b5a9238
commit
5d71a580e2
|
@ -0,0 +1,138 @@
|
|||
#!/bin/bash
|
||||
|
||||
# A script checking the git logs for commits. Final goal is updating AUTHORS.
|
||||
# Run it in source root
|
||||
|
||||
# To be copied and run in the git directory for having "git shortlog -sn po/" find the logs.
|
||||
# It will generate a subdirectory temp_update_AUTHORS
|
||||
|
||||
|
||||
mkdir -p temp_update_AUTHORS
|
||||
|
||||
echo "Checking all git logs"
|
||||
git shortlog -sn > temp_update_AUTHORS/all_git.txt
|
||||
|
||||
echo "Checking "po only" git logs"
|
||||
git shortlog -sn po/ > temp_update_AUTHORS/po_git.txt
|
||||
|
||||
echo "reading AUTHORS"
|
||||
sed -n '/Programming/,$ s/[^-].*/&/p' < AUTHORS | sed '1 d' > temp_update_AUTHORS/programmers_part.txt
|
||||
# The bottom part of AUTHORS, could be done with one sed, but I don't care...
|
||||
|
||||
|
||||
echo "Removing commit counts from git log"
|
||||
sed 's/[0-9 \t]*\(.*\)/\1/g' < temp_update_AUTHORS/all_git.txt |sort|uniq > temp_update_AUTHORS/all_git_namesonly.txt
|
||||
# I think "uniq" is not needed here.
|
||||
|
||||
|
||||
echo "Removing translators from the git log"
|
||||
# Remove translators. (Commiters with the same count in /po and total and hence are listed twice). Then the commit counter is removed
|
||||
cat temp_update_AUTHORS/all_git.txt temp_update_AUTHORS/po_git.txt|sort|uniq -u |sed 's/[0-9 \t]*\(.*\)/\1/g' | sort|uniq> temp_update_AUTHORS/coders_only.txt
|
||||
|
||||
|
||||
# Similar effect with second sed run:
|
||||
# Remove translators. I remove every line containing the name. Maybe the .* before and after the last \1 should be removed (i.e. for contributors "Firstname Secondname aka something_you_want_to_keep"
|
||||
#cat temp_update_AUTHORS/all_git.txt temp_update_AUTHORS/po_git.txt|sort|uniq -D|uniq|sed 's/[0-9 \t]*\(.*\)/\1/g' |sed 's:[0-9 \t]*\(.*\):s^.*\1.*^^g:' > temp_update_AUTHORS/remove_translators_gen
|
||||
#
|
||||
#sed -f temp_update_AUTHORS/remove_translators_gen < temp_update_AUTHORS/all_git_namesonly.txt > temp_update_AUTHORS/coders_only.txt
|
||||
# This is everyone who did code commits with git. The blank lines are the removed translators.
|
||||
|
||||
|
||||
# Now, I want to reduce the number of lines the human reader has to check, so we are going to kill the already listed contributors.
|
||||
|
||||
echo "Finding pre-git contributors in AUTHORS"
|
||||
sed 's:\(.*\):s^.*\1.*^^g:' < temp_update_AUTHORS/coders_only.txt > temp_update_AUTHORS/remove_git_commiters_gen
|
||||
sed -f temp_update_AUTHORS/remove_git_commiters_gen < temp_update_AUTHORS/programmers_part.txt |sort| uniq -u > temp_update_AUTHORS/pre-git.txt
|
||||
|
||||
sed 's:\(.*\):s^.*\1.*^^g:' < temp_update_AUTHORS/programmers_part.txt > temp_update_AUTHORS/remove_programmers_part_gen
|
||||
sed -f temp_update_AUTHORS/remove_programmers_part_gen < temp_update_AUTHORS/coders_only.txt |sort| uniq -u > temp_update_AUTHORS/new_coders_only.txt
|
||||
|
||||
# VideoLAN as a contributor can be removed, I think
|
||||
|
||||
|
||||
|
||||
sed 's/[0-9 \t]*\(.*\)/\1/g' < temp_update_AUTHORS/all_git.txt > temp_update_AUTHORS/all_git_namesonly_ordered.txt
|
||||
# Just remove the tab an the # commits, keep the order. This file is going to be the sort order.
|
||||
# You can re-order the complete programmers part like that and simply append the pre-git commiters.
|
||||
# Ordering the contributors that way is not a bad idea.
|
||||
# The question: Is it easier/better to check the new commiters in this order?
|
||||
# One can find (UPPERCASE issues, middle names,..) better when listing them alphabetically.
|
||||
|
||||
# I suggest checking manually a file build like that:
|
||||
# alphabetically ordered, complete list of contributors/git (code) commiters, with an extra marking for new ones. Example
|
||||
# Old Commiter
|
||||
# New Commiter ---XXX---NEW
|
||||
# New COMMITER ---XXX---NEW |same name with UPPERCASE part
|
||||
# Very Commiter
|
||||
# Very New Commiter ---XXX---NEW |Same person with a middle name
|
||||
|
||||
# The uppercase case can be done by script I guess, I did not look up how to make sure the intended version will be picked.
|
||||
# How to proceed with manually found problems? Solve them for the future (.mailmap/own script/...)
|
||||
|
||||
rm -f temp_update_AUTHORS/ordering_log.txt
|
||||
rm -f temp_update_AUTHORS/ordered_by_commits.txt
|
||||
FileName='temp_update_AUTHORS/all_git_namesonly_ordered.txt'
|
||||
while read LINE
|
||||
do
|
||||
if [ "$LINE" = "VideoLAN" ]; then
|
||||
echo "VideoLAN is not a person"
|
||||
else
|
||||
# grep "$LINE" temp_update_AUTHORS/new_coders_only.txt >> temp_update_AUTHORS/ordering_log.txt
|
||||
grep "$LINE" temp_update_AUTHORS/coders_only.txt >> temp_update_AUTHORS/ordering_log.txt
|
||||
# I want to keep the $? (it removes some broken names) but I could send the output to /dev/null
|
||||
# If someone's name is a prefix to some other's name, this diff will show it:
|
||||
# diff temp_update_AUTHORS/ordering_log.txt temp_update_AUTHORS/ordered_by_commits.txt
|
||||
# AFAIK this will not effect the output, since we don't use the grep output but only the git output
|
||||
if [ $? = "0" ]; then
|
||||
echo "$LINE" >> temp_update_AUTHORS/ordered_by_commits.txt
|
||||
fi
|
||||
fi
|
||||
done < $FileName
|
||||
|
||||
|
||||
|
||||
cat temp_update_AUTHORS/all_git.txt temp_update_AUTHORS/po_git.txt|sort|uniq -D|uniq|sed 's/[0-9 \t]*\(.*\)/\1/g' > temp_update_AUTHORS/translators.txt
|
||||
wc -l temp_update_AUTHORS/*
|
||||
echo "Some contributors only commited into po. Please cross-check that with the localization part. See: temp_update_AUTHORS/translators.txt"
|
||||
echo "But first, please check if temp_update_AUTHORS/review.txt contains complete names and other constraints for publishing (i.e. UPPERCASE name parts, broken text, a name and it's abbreviation both present...)"
|
||||
|
||||
|
||||
sed 's/\(.*\)/\1 ---XXX---NEW/g' < temp_update_AUTHORS/new_coders_only.txt | cat - temp_update_AUTHORS/programmers_part.txt |sort > temp_update_AUTHORS/review.txt
|
||||
# This file contains VideoLAN as a contributor.
|
||||
|
||||
echo
|
||||
echo "For the lazy ones: Have a look at temp_update_AUTHORS/final.txt"
|
||||
echo "Contains all git code commiters (the translators are stored somewhere else) sorted by commits, and the pre-git commiters"
|
||||
echo "Programming" > temp_update_AUTHORS/final.txt
|
||||
echo "-----------" >> temp_update_AUTHORS/final.txt
|
||||
cat temp_update_AUTHORS/ordered_by_commits.txt temp_update_AUTHORS/pre-git.txt >> temp_update_AUTHORS/final.txt
|
||||
echo
|
||||
|
||||
echo "Listing email adresses used with different names..."
|
||||
git shortlog -sne |sed 's/[^<]*\(.*\)/\1/g' |sort|uniq -d
|
||||
echo "If something was listed here you should probably modify .mailmap"
|
||||
|
||||
# This last part puts the relevant addresses into temp_twice_used_adress/check_for_this.txt
|
||||
# Currently, this is not needed (.mailmap is up to date)
|
||||
|
||||
#mkdir -p temp_twice_used_adress
|
||||
#echo "Checking all git logs"
|
||||
#git shortlog -sne > temp_twice_used_adress/all_shortlog_sne.txt
|
||||
#
|
||||
#echo "Removing everything but email addresses"
|
||||
#sed 's/[^<]*\(.*\)/\1/g' < temp_twice_used_adress/all_shortlog_sne.txt |sort|uniq -d > temp_twice_used_adress/all_git_addresses_only.txt
|
||||
#
|
||||
#FileName='temp_twice_used_adress/all_git_addresses_only.txt'
|
||||
#while read LINE
|
||||
#do
|
||||
# grep "$LINE" temp_twice_used_adress/all_shortlog_sne.txt >> temp_twice_used_adress/check_for_this.txt
|
||||
## I want to keep the $? (it removes some broken names) but I could send the output to /dev/null
|
||||
# if [ $? = "0" ]; then
|
||||
# echo "$LINE"
|
||||
# fi
|
||||
#done < $FileName
|
||||
|
||||
|
||||
cp temp_update_AUTHORS/final.txt .
|
||||
rm -rf temp_update_AUTHORS/
|
||||
|
|
@ -0,0 +1,122 @@
|
|||
#!/bin/bash
|
||||
|
||||
# A script checking the po-files for translators. Final goal is updating AUTHORS.
|
||||
|
||||
# To be copied and run in the git directory for having "ls po/*po" find the files.
|
||||
# It will generate a subdirectory temp_translators
|
||||
|
||||
echo Searching the headers of *.po
|
||||
|
||||
mkdir temp_translators
|
||||
# Should check for existance, for the real use should be running a diff next time for reducing the lines to read
|
||||
|
||||
git describe > temp_translators/start.txt
|
||||
date >> temp_translators/start.txt
|
||||
# to have the date visible
|
||||
|
||||
|
||||
for i in $( ls po/*po ); do
|
||||
echo item: $i
|
||||
echo $i >>temp_translators/start.txt
|
||||
|
||||
if [ -h $i ]
|
||||
then
|
||||
echo just a link
|
||||
echo $i is just a link >>temp_translators/start.txt
|
||||
else
|
||||
|
||||
|
||||
# translationlangcode=$(echo $i | sed 's/\([a-z]*\).po*/\1/')
|
||||
# longlanguage=$(sed -n '1,30 s/[*]*[t,T]ranslati[.]*/&/p' < $i)
|
||||
# poeditlanguage=$(sed -n '1,30 s/[.]*X-Poedit-Language:[.]*/&/p' < $i)
|
||||
# echo $longlanguage $poeditlanguage
|
||||
# echo $translationlangcode >> ausgabe/start.txt
|
||||
# I used this to have more output on running the script. Gives also the language name instead of the po-file's name only
|
||||
|
||||
|
||||
sed '/#: include/ q' < $i >> temp_translators/start.txt
|
||||
# The header should be done by now.
|
||||
#TODO: A real safe way would take only the wanted meta parts (i.e. "Last-Translator:) and all commented lines #
|
||||
|
||||
fi
|
||||
done
|
||||
|
||||
|
||||
|
||||
# Removing every \n The result will have to be read by humans, so there it's no use in keeping them
|
||||
sed 's:\\n::g' < temp_translators/start.txt > temp_translators/start_without_backslash_n.txt
|
||||
|
||||
echo '"Plural-Forms:.*' > temp_translators/doubles.txt
|
||||
# TODO: This can be more than one line ^^
|
||||
echo '"&& n%100<=10 ? 3 : n%100>=11 && n%100<=99 ? 4 : 5;"' >> temp_translators/doubles.txt
|
||||
echo '"%100<10 || n%100>=20) ? 1 : 2);"' >> temp_translators/doubles.txt
|
||||
echo '"|| n%100>=20) ? 1 : 2);"' >> temp_translators/doubles.txt
|
||||
echo '"%100==4 ? 3 : 0);"' >> temp_translators/doubles.txt
|
||||
# The only two liners so far. Quick fix for the above TODO...
|
||||
|
||||
echo '"PO-Revision-Date: .*"' >> temp_translators/doubles.txt
|
||||
echo '"X-Poedit-Bookmark.*' >> temp_translators/doubles.txt
|
||||
echo '"Project-Id-Version:.*' >> temp_translators/doubles.txt
|
||||
echo '"X-Generator: .*' >> temp_translators/doubles.txt
|
||||
echo '"Language: .*' >> temp_translators/doubles.txt
|
||||
echo ' *[Cc]opyright *([cC])[0-9 ,-]*t*h*e* *VideoLAN$* *t*e*a*m*[0-9 ,-]*\.*' >> temp_translators/doubles.txt
|
||||
echo '[ ]*\$[ ]*[i,I][d,D][ ]*[:]*[ ]*\$' >> temp_translators/doubles.txt
|
||||
echo '"X-Poedit-Country:.*' >> temp_translators/doubles.txt
|
||||
echo '"X-Project-Style:.*' >> temp_translators/doubles.txt
|
||||
echo ' *<videolan@videolan.org> *' >> temp_translators/doubles.txt
|
||||
# Whatever line occurs twice or more is most probably not a translators name. However, you can check it. The file will not be deleted
|
||||
#TODO: If I knew sed better, I would put all the removed parts in a logfile. sort uniq would give a list to check for everything deleted
|
||||
sort -r temp_translators/start_without_backslash_n.txt | uniq -d >> temp_translators/doubles.txt
|
||||
# With the reverse Order the # will be removed at the end. Else the doubles beginning with # will not be matched
|
||||
|
||||
# Just check a "sort results.txt|less" for more to remove
|
||||
|
||||
# Changing the strings to sed commands removing the doubles
|
||||
sed 's:.*:s^&^^g:' < temp_translators/doubles.txt > temp_translators/generated_com
|
||||
|
||||
|
||||
# Removing all doubles
|
||||
sed -f temp_translators/generated_com temp_translators/start_without_backslash_n.txt >temp_translators/results.txt
|
||||
|
||||
|
||||
# Now, we are going to mark the already mentioned translators..
|
||||
# Some names are written in CAPITALS. A modified marking script would be good.
|
||||
|
||||
sed -n '/Localization/,/^$/ p' <AUTHORS | sed -n '3,$ s/\(.*\) --* .*/\1/p' >temp_translators/localization_part.txt
|
||||
#sed -n '/Localization/,/^$/ p' <AUTHORS | sed -n '3,$ s/\(.*\) - .*/\1/p' >temp_translators/sect.txt
|
||||
#The second line is what we want. I added -* to have "Éric Lassauge -- French" included, too. But I really think this is a typo in AUTHORS
|
||||
|
||||
sed -n '/Programming/,/^$/ p' <AUTHORS | sed -n '3,$ p' >temp_translators/pro_part.txt
|
||||
sed 's:.*:s^&^YYY-- & --YYY^g:' < temp_translators/pro_part.txt > temp_translators/replace_prog_names
|
||||
|
||||
|
||||
# Changing the strings to sed commands removing the doubles
|
||||
sed 's:.*:s^&^XXX-- & --XXX^g:' < temp_translators/localization_part.txt > temp_translators/replacenames
|
||||
|
||||
|
||||
|
||||
#mkdir -p temp_twice_used_adress
|
||||
#echo "Checking all git logs"
|
||||
#git shortlog -sne > temp_twice_used_adress/all_shortlog_sne.txt
|
||||
|
||||
|
||||
#echo "Removing everything but email addresses"
|
||||
git shortlog -sne |sed 's/[^<]*\(.*\)/\1/g' |sort > temp_translators/git_addresses_only.txt
|
||||
#uniq -d|
|
||||
sed 's:.*:s^&^ZZZ-- & --ZZZ^g:' < temp_translators/git_addresses_only.txt > temp_translators/replace_git_commiters
|
||||
|
||||
|
||||
|
||||
#sed -f temp_translators/replacenames <temp_translators/results.txt |uniq >temp_translators/review.txt
|
||||
|
||||
#sed -f temp_translators/replacenames <temp_translators/results.txt |sed -f temp_translators/replace_prog_names |uniq >temp_translators/review.txt
|
||||
sed -f temp_translators/replacenames <temp_translators/results.txt |sed -f temp_translators/replace_prog_names |sed -f temp_translators/replace_git_commiters |uniq >temp_translators/review.txt
|
||||
|
||||
echo "Now temp_translators/review.txt should be reviewed. I don't think this can be done automatically, so I have done it already (not on your computer). Feedback is appreciated."
|
||||
echo "XXX are named translators, YYY are named programmers, ZZZ commited with git"
|
||||
|
||||
# temp_translators/review.txt has to be reviewed manually. I don't think you would simply remove the names from AUTHORS found because then you would
|
||||
# see an email adress and would not know, if it has to be added or how (with what name).
|
||||
# The simpler to handle git commiter check script gives better names and dates, but of course not for all contributors. However, cross checking is recommended.
|
||||
|
||||
# The idea is to use a diff next time, but I did not work out yet what files would be best for that. Probably after finding the doubles would be a good time to remove everything visible in older versions.
|
Loading…
Reference in New Issue