#!/usr/bin/env perl
# latexrevise - takes output file of latexdiff and removes either discarded
# or appended passages, then deletes all other latexdiff markup
#
# Copyright (C) 2004 F J Tilmann (tilmann@gfz-potsdam.de)
#
# Repository: https://github.com/ftilmann/latexdiff
# CTAN page: http://www.ctan.org/pkg/latexdiff
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
#
# Detailed usage information at the end of the file
# Note: version number now keeping up with latexdiff
# Version 1.3.2
# - Functionality -c option: if comment to be deleted is the last in the line replace new line by space character. This is done to avoid errors or unintended line breaks from empty lines after deletion (which are implicit \par)
# - Introduce option -r / --replace which removes markup of the form \replaced{new text}{old text} in the changes package
# Version 1.3.1 (Bug fix) Remove some deprecation warnings due to uncommented left parentheses
# Version 1.0.2 Option --version
# Version 1.0.1 no changes to latexrevise
# Version 0.3 Updated for compatibility with latexdiff 0.3 output (DIFAUXCMD removal)
# Version 0.1 First public release
use Getopt::Long ;
use strict;
use warnings;
my $versionstring=< '; # To mark added comment line
my $DELMARKOPEN='\\\DIFdelbegin(?:FL)? '; # Token to mark begin of deleted text
my $DELMARKCLOSE='\\\DIFdelend(?:FL)?(?: |\z)'; # Token to mark end of deleted text
my $DELOPEN='\\\DIFdel(?:FL)?{'; # To mark begin of deleted text passage
my $DELCLOSE='}'; # To mark end of deleted text passage
my $ALTEXT='FL'; # string which might be appended to above commands
my $DELCMDOPEN='%DIFDELCMD < '; # To mark begin of deleted commands (must begin with %, i.e., be a comment
my $DELCMDCLOSE="%%%\n"; # To mark end of deleted commands (must end with a new line)
my $AUXCMD='%DIFAUXCMD' ; # follows auxiliary commands put in by latexdiff to make difference file legal
my $DELCOMMENT='DIF < '; # To mark deleted comment line
my $PREAMBLEXTBEG='^%DIF PREAMBLE EXTENSION ADDED BY LATEXDIFF$';
my $PREAMBLEXTEND='^%DIF END PREAMBLE EXTENSION ADDED BY LATEXDIFF$';
#my $pat0 = '(?:[^{}]|\\\{|\\\})*';
#my $pat1 = '(?:[^{}]|\\\{|\\\}|\{'.$pat0.'\})*';
#my $pat2 = '(?:[^{}]|\\\{|\\\}|\{'.$pat1.'\})*';
#my $pat3 = '(?:[^{}]|\\\{|\\\}|\{'.$pat2.'\})*';
#my $pat4 = '(?:[^{}]|\\\{|\\\}|\{'.$pat3.'\})*';
my $pat0 = '(?:[^{}])*';
my $pat_n = $pat0;
# if you get "undefined control sequence MATHBLOCKmath" error, increase the maximum value in this loop
for (my $i_pat = 0; $i_pat < 20; ++$i_pat){
$pat_n = '(?:[^{}]|\{'.$pat_n.'\}|\\\\\{|\\\\\})*';
# Actually within the text body, quoted braces are replaced in pre-processing. The only place where
# the last part of the pattern matters is when processing the arguments of context2cmds in the preamble
# and these contain a \{ or \} combination, probably rare.
# It should thus be fine to use the simpler version below.
### $pat_n = '(?:[^{}]|\{'.$pat_n.'\})*';
}
my $brat0 = '(?:[^\[\]]|\\\[|\\\])*';
my ($input,$preamble,$body,$post);
my (@matches);
my ($cnt,$prematch,$postmatch);
my ($help,$version);
my ($verbose,$quiet,$accept,$decline,$simplify)=(0,0,0,0,0);
my ($comment,$comenv,$markup,$markenv,$replace);
# A word unlikely ever to be used in a real latex file
my $someword='gobbledegooksygook';
Getopt::Long::Configure('bundling');
GetOptions('accept|a' => \$accept,
'decline|d'=> \$decline,
'simplify|s' => \$simplify,
'comment|c=s' => \$comment,
'comment-environment|e=s' => \$comenv,
'markup|m=s' => \$markup,
'markup-environment|n=s' => \$markenv,
'replace|r=s' => \$replace,
'no-warnings|q' => \$verbose,
'version' => \$version,
'verbose|V' => \$verbose,
'help|h|H' => \$help);
if ( $help ) {
usage() ;
}
if ( $version ) {
print STDERR $versionstring ;
exit 0;
}
if ( ($accept && $decline) || ($accept && $simplify) || ($decline && $simplify) ) {
die '-a,-d and -s options are mutually exclusive. Type latexrevise -h to get more help.';
}
print STDERR "ACCEPT mode\n" if $verbose && $accept;
print STDERR "DECLINE mode\n" if $verbose && $decline;
print STDERR "SIMPLIFY mode. WARNING: The output will not normally be valid latex,\n" if $verbose && $simplify;
# Slurp old and new files
{
local $/ ; # locally set record operator to undefined, ie. enable whole-file mode
$input=<>;
}
# split into parts
($preamble,$body,$post)=splitdoc($input,'\begin{document}','\end{document}');
if (length $preamble && ( $accept || $decline ) ) {
#
# WORK ON PREAMBLE
#
# (compare subroutine linediff in latexdiff to make sure correct strings are used)
# remove extra commands added to preamble by latexdiff
$preamble =~ s/${PREAMBLEXTBEG}.*?${PREAMBLEXTEND}\n{0,1}//smg ;
if ( $accept ) {
# delete mark up in appended lines
$preamble =~ s/^(.*) %DIF > $/$1/mg ;
} elsif ( $decline ) {
# delete appended lines
# $preamble =~ s/^(.*) %DIF > $//mg ;
$preamble =~ s/^(.*) %DIF > \n//mg ;
# delete markup in deleted lines
$preamble =~ s/^%DIF < //mg ;
}
# remove any remaining DIF markups
#$preamble =~ s/%DIF.*$//mg ;
$preamble =~ s/%DIF.*?\n//sg ;
}
#print $preamble ;
#
# WORK ON BODY
#
if ($accept) {
# remove ADDMARKOPEN, ADDMARKCLOSE tokens
@matches= $body =~ m/${ADDMARKOPEN}(.*?)${ADDMARKCLOSE}/sg;
checkpure(@matches);
$body =~ s/${ADDMARKOPEN}(.*?)${ADDMARKCLOSE}/$1/sg;
# remove text flanked by DELMARKOPEN, DELMARKCLOSE tokens
@matches= $body =~ m/${DELMARKOPEN}(.*?)${DELMARKCLOSE}/sg;
checkpure(@matches);
$body =~ s/${DELMARKOPEN}(.*?)${DELMARKCLOSE}//sg;
# remove markup of added comments
$body =~ s/%${ADDCOMMENT}(.*?)$/%$1/mg ;
# remove deleted comments (full line)
$body =~ s/^%${DELCOMMENT}.*?\n//mg ;
# remove deleted comments (part of line)
$body =~ s/%${DELCOMMENT}.*?$//mg ;
}
elsif ( $decline) {
# remove DELMARKOPEN, DELMARKCLOSE tokens
@matches= $body =~ m/${DELMARKOPEN}(.*?)${DELMARKCLOSE}/sg;
checkpure(@matches);
$body =~ s/${DELMARKOPEN}(.*?)${DELMARKCLOSE}/$1/sg;
# remove text flanked by ADDMARKOPEN, ADDMARKCLOSE tokens
# as latexdiff algorithm keeps the formatting and white spaces
# of the new text, sometimes whitespace might be inserted or
# removed inappropriately. We try to guess whether this has
# happened
# Mop up tokens. This must be done already now as otherwise
# detection of white-space problems does not work
$cnt = $body =~ s/${DELOPEN}($pat_n)${DELCLOSE}/$1/sg;
# remove markup of deleted commands
$cnt += $body =~ s/${DELCMDOPEN}(.*?)${DELCMDCLOSE}/$1/sg ;
$cnt += $body =~ s/${DELCMDOPEN}//g ;
# remove aux commands
$cnt += $body =~ s/^.*${AUXCMD}$/${someword}/mg; $body =~ s/${someword}\n//g;
while ( $body =~ m/${ADDMARKOPEN}(.*?)${ADDMARKCLOSE}/s ) {
$prematch=$`;
$postmatch=$';
checkpure($1);
if ( $prematch =~ /\w$/s && $postmatch =~ /^\w/ ) {
# apparently no white-space between word=>Insert white space
$body =~ s/${ADDMARKOPEN}(.*?)${ADDMARKCLOSE}/ /s ;
}
elsif ( $prematch =~ /\s$/s && $postmatch =~ /^[.,;:]/ ) {
# space immediately before one of ".,:;" => remove this space
$body =~ s/\s${ADDMARKOPEN}(.*?)${ADDMARKCLOSE}//s ;
}
else {
# do not insert or remove any extras
$body =~ s/${ADDMARKOPEN}(.*?)${ADDMARKCLOSE}//s;
}
}
# Alternative without special cases treatment
# @matches= $body =~ m/${ADDMARKOPEN}(.*?)${ADDMARKCLOSE}/sg;
# checkpure(@matches);
# $body =~ s/${ADDMARKOPEN}(.*?)${ADDMARKCLOSE}//sg;
# remove markup of deleted comments
$body =~ s/%${DELCOMMENT}(.*?)$/%$1/mg ;
# remove added comments (full line)
$body =~ s/^%${ADDCOMMENT}.*?\n//mg ;
# remove added comments (part of line)
$body =~ s/%${ADDCOMMENT}.*?$//mg ;
}
# remove any remaining tokens
if ( $accept || $decline || $simplify ) {
# first substitution command deals with special case of added paragraph
$cnt = $body =~ s/${ADDOPEN}($pat_n)\n${ADDCLOSE}\n/$1\n/sg;
$cnt += $body =~ s/${ADDOPEN}($pat_n)${ADDCLOSE}/$1/sg;
$cnt==0 || warn 'Remaining $ADDOPEN tokens in DECLINE mode\n' unless ( $quiet || $accept || $simplify );
}
if ($accept || $simplify ) {
# Note: in decline mode these commands have already been removed above
$cnt = $body =~ s/${DELOPEN}($pat_n)${DELCLOSE}/$1/sg;
#### remove markup of deleted commands
$cnt += $body =~ s/${DELCMDOPEN}(.*?)${DELCMDCLOSE}/$1/sg ;
$cnt += $body =~ s/${DELCMDOPEN}//g ;
# remove aux commands
# $cnt +=
$body =~ s/^.*${AUXCMD}$/${someword}/mg; $body =~ s/${someword}\n//g;
#### remove deleted comments
###$cnt += $body =~ s/${DIFDELCMD}.*?$//mg ;
$cnt==0 || warn 'Remaining $DELOPEN or $DIFDELCMD tokens in ACCEPT mode\n' unless ( $quiet || $simplify );
}
# Remove comment commands
if (defined($comment)) {
print STDERR "Removing \\$comment\{..\} sequences (incl. argument)..." if $verbose;
# protect $comments in comments by making them look different
$body =~ s/(%.*)${comment}(.*)$/$1${someword}$2/mg ;
# carry out the substitution
$cnt = 0 + $body =~ s/\\${comment}(?:\[${brat0}\])?\{${pat_n}\}(?: *\n)?//sg ;
print STDERR "$cnt matches found and removed.\n" if $verbose;
# and undo the protection substitution
$body =~ s/(%.*)${someword}(.*)$/$1${comment}$2/mg ;
}
if (defined($comenv)) {
print STDERR "Removing $comenv environments ..." if $verbose;
$body =~ s/(%.*)${comenv}/$1${someword}/mg ;
## $cnt = 0 + $body =~ s/\\begin(?:\[${brat0}\])?\{\$comenv\}.*?\\end\{\$comenv\}//sg ;
$cnt = 0 + $body =~ s/\\begin(?:\[${brat0}\])?\{${comenv}\}.*?\\end\{${comenv}\}\s*?\n//sg ;
print STDERR "$cnt matches found and removed.\n" if $verbose;
$body =~ s/(%.*)${someword}/$1${comenv}/mg ;
}
if (defined($markup)) {
print STDERR "Removing \\$markup\{..\} commands (leaving argument)..." if $verbose;
# protect $markups in comments by making them look different
$body =~ s/(%.*)${markup}(.*)$/$1${someword}$2/mg ;
# carry out the substitution
$cnt = 0 + $body =~ s/\\${markup}(?:\[${brat0}\])?\{(${pat_n})\}/$1/sg ;
print STDERR "$cnt matches found and removed.\n" if $verbose;
# and undo the protection substitution
$body =~ s/(%.*)${someword}(.*)$/$1${markup}$2/mg ;
}
if (defined($markenv)) {
print STDERR "Removing $markenv environments ..." if $verbose;
$body =~ s/(%.*)${markenv}/$1${someword}/mg ;
$cnt = 0 + $body =~ s/\\begin(?:\[${brat0}\])?\{${markenv}\}\n?//sg;
$cnt += 0 + $body =~ s/\\end\{${markenv}\}\n?//sg;
print STDERR $cnt/2, " matches found and removed.\n" if $verbose;
$body =~ s/(%.*)${someword}/$1${markenv}/mg ;
}
if (defined($replace)) {
print STDERR "Removing \\$replace\{..\}\{..\} commands (leaving 1st and discarding 2nd argument))..." if $verbose;
# protect $markups in comments by making them look different
$body =~ s/(%.*)${replace}(.*)$/$1${someword}$2/mg ;
# carry out the substitution
$cnt = 0 + $body =~ s/\\${replace}(?:\[${brat0}\])?\{(${pat_n})\}\s?\{(${pat_n})\}/$1/sg ;
print STDERR "$cnt matches found and removed.\n" if $verbose;
# and undo the protection substitution
$body =~ s/(%.*)${someword}(.*)$/$1${replace}$2/mg ;
}
if ( length $preamble ) {
print "$preamble\\begin{document}${body}\\end{document}$post";
} else {
print $body;
}
# checkpure(@matches)
# checks whether any of the strings in matches contains
# $ADDMARKOPEN, $ADDMARKCLOSE,$DELMARKOPEN, or $DELMARKCLOSE
# If so, die reporting nesting problems, otherwise return to caller
sub checkpure {
while (defined($_=shift)) {
if ( /$ADDMARKOPEN/ || /$ADDMARKCLOSE/
|| /$DELMARKOPEN/ || /$DELMARKCLOSE/ ) {
die <=0 && $j>$i ) {
$part1 = substr($text,0,$i) ;
$part2 = substr($text,$i+$l1,$j-$i-$l1);
$part3 = substr($text,$j+$l2) unless $j+$l2 >= length $text;
} else {
die "$word1 or $word2 not in the correct order or not present as a pair."
}
return ($part1,$part2,$part3);
}
sub usage {
print STDERR <<"EOF";
Usage: $0 [OPTIONS] [diff.tex] > revised.tex
Read a file diff.tex (output of latexdiff), and remove its markup.
If no filename is given read from standard input. The command can be used
in ACCEPT, DECLINE, or SIMPLIFY mode, and be used to remove user-defined
latex commands from the input (see options -c, -e, -m, -n below).
In ACCEPT mode, all appended text fragments (or preamble lines)
are kept, and all discarded text fragments (or preamble lines) are
deleted.
In DECLINE mode, all discarded text fragments are kept, and all appended
text fragments are deleted.
If you wish to keep some changes, edit the diff.tex file in
advance, and manually remove those tokens which would otherwise be
deleted. Note that latexrevise only pays attention to the \\DIFaddbegin,
\\DIFaddend, \\DIFdelbegin, and \\DIFdelend tokens and corresponding FL
varieties. All \\DIFadd and \\DIFdel commands (but not their content) are
simply deleted. The commands added by latexdiff to the preamble are also
removed.
In SIMPLIFY mode all latexdiff markup is removed from the body of the text (after
\\begin{document}) except for \\DIFaddbegin, \\DIFaddend, \\DIFdelbegin, \\DIFdelend
tokens and the corresponding FL varieties of those commands. The result
will not in general be valid latex-code but might be easier to read and edit in
preparation for a subsequent run in ACCEPT or DECLINE mode.
In SIMPLIFY mode the preamble is left unmodified.
-a
--accept Run in ACCEPT mode (delete all blocks marked by \\DIFdelbegin
and \\DIFdelend).
-d
--decline Run in DECLINE mode (delete all blocks marked by \\DIFaddbegin
and \\DIFaddend).
-s
--simplify Run in SIMPLIFY mode (Keep all \\DIFaddbegin, \\DIFaddend,
\\DIFdelbegin, \\DIFdelend tokens, but remove all other latexdiff
markup from body.
Note that the three mode options are mutually exclusive. If no mode option is given,
latexrevise simply removes user annotations and markup according to the following four
options; these functions can be very useful outside the latexdiff context, too.
-c cmd
--comment=cmd Remove \\cmd{...}. cmd is supposed to mark some explicit
annotations which should be removed from the file before
release.
-e envir
--comment-environment=envir
Remove explicit annotation environments from the text, i.e. remove
\\begin{envir}
...
\\end{envir}
blocks.
-m cmd
--markup=cmd Remove the markup command cmd but leave its argument, i.e.
turn \\cmd{abc} into abc.
-n envir
--markup-environment=envir
Similarly, remove \\begin{envir} and \\end{envir} commands,
but leave content of the environment in the text.
-r cmd
--replace=cmd For constructions \\cmd{..}{..}, remove the command, leave the
content of first argument, and delete second argument.
-q
--no-warnings Do not warn users about \\DIDadd{..} or \\DIFdel statements
which should not be there anymore
-V
--verbose Verbose output
EOF
exit 0;
}
=head1 NAME
latexrevise - selectively remove markup and text from latexdiff output
=head1 SYNOPSIS
B [ B ] [ F ] > F
=head1 DESCRIPTION
I reads a file C (output of I), and remove the markup commands.
If no filename is given the input is read from standard input. The command can be used
in I, I, or I mode, or can be used to remove user-defined
latex commands from the input (see B<-c>, B<-e>, B<-m>, and B<-n> below).
In I mode, all appended text fragments (or preamble lines)
are kept, and all discarded text fragments (or preamble lines) are
deleted.
In I mode, all discarded text fragments are kept, and all appended
text fragments are deleted.
If you wish to keep some changes, edit the diff.tex file in
advance, and manually remove those tokens which would otherwise be
deleted. Note that I only pays attention to the C<\DIFaddbegin>,
C<\DIFaddend>, C<\DIFdelbegin>, and C<\DIFdelend> tokens and corresponding FL
varieties. All C<\DIFadd> and C<\DIFdel> commands (but not their contents) are
simply deleted. The commands added by latexdiff to the preamble are also
removed.
In I mode, C<\DIFaddbegin, \DIFaddend, \DIFdelbegin, \DIFdelend>
tokens and their corresponding C varieties are kept but all other markup (e.g. C and <\DIFdel>) is removed. The result
will not in general be valid latex-code but it will be easier to read and edit in
preparation for a subsequent run in I or I mode.
In I mode the preamble is left unmodified.
=head1 OPTIONS
=over 4
=item B<-a> or B<--accept>
Run in I mode (delete all blocks marked by C<\DIFdelbegin> and C<\DIFdelend>).
=item B<-d> or B<--decline>
Run in I mode (delete all blocks marked by C<\DIFaddbegin>
and C<\DIFaddend>).
=item B<-s> or B<--simplify>
Run in I mode (Keep all C<\DIFaddbegin>, C<\DIFaddend>,
C<\DIFdelbegin>, C<\DIFdelend> tokens, but remove all other latexdiff
markup from body).
=back
Note that the three mode options are mutually exclusive. If no mode option is given,
I simply removes user annotations and markup according to the following four
options. These functions can be very useful outside the latexdiff context, too
=over 4
=item B<-c cmd> or B<--comment=cmd>
Remove C<\cmd{...}> sequences. C is supposed to mark some explicit
annotations which should be removed from the file before
release.
=item B<-e envir> or B<--comment-environment=envir>
Remove explicit annotation environments from the text, i.e. remove
\begin{envir}
...
\end{envir}
blocks.
=item B<-m cmd> or B<--markup=cmd>
Remove the markup command C<\cmd> but leave its argument, i.e.
turn C<\cmd{abc}> into C.
=item B<-n envir> or B<--markup-environment=envir>
Similarly, remove C<\begin{envir}> and C<\end{envir}> commands but
leave content of the environment in the text.
=item B<-r cmd> or B<--replace=cmd>
For constructions C<\\cmd{..}{..}>, remove the command, leave the
content of first argument, and delete second argument, i.e. turn
C<\cmd{abc}{def}> into C.
=item B<-V> or B<--verbose>
Verbose output
=item B<-q> or B<--no-warnings>
Do not warn users about C<\DIDadd{..}> or C<\DIFdel{..}> statements
which should have been removed already.
=back
=head1 BUGS
The current version is a beta version which has not yet been
extensively tested. It has not been actively maintained so might not process output of newer versions of latexdiff entirely correctly.
Please submit bug reports using the issue tracker of the github repository page I,
or send them to I. Include the serial number of I
(Option --version). If you come across latexdiff
output which is not processed correctly by I please include the
problem file as well as the old and new files on which it is based,
ideally edited to only contain the offending passage as long as that still
reproduces the problem.
Note that I gets confused by commented C<\begin{document}> or
C<\end{document}> statements
=head1 SEE ALSO
L
=head1 PORTABILITY
I does not make use of external commands and thus should run
on any platform supporting PERL v5 or higher.
=head1 AUTHOR
Copyright (C) 2004 Frederik Tilmann
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License Version 3
=cut