=head1 NAME
order-footnote
=head1 SYNOPSIS
order-footnote.pl < [broken-page] > [fixed-page]
=head1 DESCRIPTION
This is a proof of concept program which reorders wikipedia automatic
footnotes.
=head1 BUGS
The program assumes that the {{ref}} macros all occur after the
{{note}} macros in one block, which they should. Before bringing into
use, this should actually be checked.
We don't correctly handle duplicate references. We haven't yet got
functionality to guess a good name and spit out the same footnote
twice. In this case we output what looks like a missing footnote, but
links back to the earlier reference link forward to its matching
footnote.
=head1 COPYRIGHT
This program may be distributed under the terms of the GFDL or, more
appropriately under the terms of the GNU GPL. Copyright 2005 "mozzerati"
of the wikipedia project.
=cut
#read through references till we hit a note
$reftemplate="ref";
$reftemplatere="(?:[Rr]ef|[a]n)";
$notetemplate="note";
$notetemplatere="(?:[Nn]ote|[Aa]nb)";
@reflist=();
%fixrefs=(); # mapping of references in original to new name for them
%notes=();
while ( <> ) {
#check if we reach the footnotes section
m/^([#\*])?\s*\{\{$notetemplatere\|/ && last;
while ( s/(.*?)\{\{($reftemplatere)\|([^\{\}]+)(\})// ) {
my $text=$1; my $template=$2; my $ref=$3; my $close=$4;
my $baseref=$ref; my $differ=1;
while (defined $fixrefs{$ref}) {
$differ++;
$ref=$baseref . $differ;
}
print $text, "{{", $template, "|", $ref, "$close";
print STDERR "ref $ref\n";
$fixrefs{$ref}=$baseref;
push @reflist, $ref;
}
print $_; #remains of the line with no references
}
#read through the entire rest of the text finding notes; We print out
#notes all together at the position of the first note, but it's not
#totally obvious that that's correct.. perhaps we should really do two
#passes, read through from beginning to end, get all references, then
#position the notes at the position of the last note.
defined $_ or do {
die "failed to find any footnotes" if @reflist;
#apparently the article is empty of notes, so all should be okay.
print STDERR "no notes found; article should be unchanged\n";
exit;
};
$after ="";
$matched=0;
LOOP: {
do{{
defined $_ or last;
$matched && m/^\#[#*:]|^\<\!\-\-/ && do {
$notes{$name} .= $_;
next;
};
$matched=0;
# we should check for broken templates (e.g. not closed properly)
if ( ($name)=m/^(?:[\#\*])?\s*\{\{$notetemplatere\|([^\{\}]+)\}/ ) {
#we should check if the same note is already defined, and put at the end with a new name if it is...
#in doing this we should prefer the fuller note, ignoring semi-empty ones
$notes{$name} = $_;
$matched=1;
} else {
$after .= $_;
}
} continue { $_ = <> } } while (defined $_);
}
%usednotes=();
while ( $name=shift @reflist ) {
my $oldname=$fixrefs{$name};
defined $notes{$oldname} or do {
print "# {{$notetemplate|$name}} {{note-missing}}\n";
next;
};
my $note=$notes{$oldname};
$note =~ s/({{$notetemplatere|)$oldname/$1$name/;
print $note;
$usednotes{$name}=1;
}
# print out unused notes:
# the real version would probably separate them out
while (($key, $value) = each %notes) {
print $value unless $usednotes{$key} ;
}
print $after;