March 2012
I had quite a bit docbook lying around in which the programlistings and computeroutputs were indented. Excessive indentation is not a good idea in such whitespace-preserving environments, so I took a look at the “Fancy Patterns” section of the Perl CookBook and wrote this little Perl snippet to unindent them (after saying M-x untabify in Emacs of course):
#!/usr/bin/perl
use strict;
local $/=undef;
open FILE, "/home/jurjen/ApprenticesNotes.xml" or die "Couldn't open Appnotes";
my $string = <FILE>;
close FILE;
# Other modifications, they have nothing to do with indentation
$string =~ s/<literallayout>[[:space:]]+<computeroutput>/<literallayout><computeroutput>/g;
$string =~ s%</computeroutput>[[:space:]]+</literallayout>%</computeroutput></literallayout>%g;
$string =~ s/<command>[[:space:]]+/<command>/g;
$string =~ s%[[:space:]]+</command>%</command>%g;
$string =~ s/<computeroutput>[[:space:]]+/<computeroutput>/g;
$string =~ s%\n +</computeroutput>%</computeroutput>%g;
# T
sub unindent
{
my ($input) = @_;
my $minLength = length($input);
my $spaceFound = 0;
while ($input =~ m/(\n +)/gs) {
my $thisLength = length($&) - 1;
if ($thisLength < $minLength){
$minLength = $thisLength;
}
$spaceFound = 1;
}
if ($spaceFound && $minLength > 0){
my $re = '\n'.(' ' x $minLength);
$input =~ s/$re/\n/g;
}
return $input;
}
# Note the 'e' modifier that executes the substitution
$string =~ s%(<computeroutput>.*?</computeroutput>)%{unindent($1)}%gse;
$string =~ s%(<programlisting>.*?</programlisting>)%{unindent($1)}%gse;
print $string;