Feb 2, 2012

Check the number of words in LATEX


We use a perl program to count the LATEX article words.



BEGIN {

%cutlist = (

'begin' => 1,

'end' => 1,

'usepackage' => 1,

'addtolength' => 1,

'documentclass' => 1,

'author' => 1,

'title' => 1,

'chapter' => 1,

'bibliography' => 1,

'bibliographystyle' => 1,

'section' => 1,

'subsection' => 1,

'subsubsection' => 1,

'thanks' => 1,

'pagestyle' => 1,

);

my $line = '';

my $cumline = '';

my $depth = 0;

my $words = 0;

my $fnwords = 0;

my $i = 0;

my @tags = ();

my $thistag = '';

}


$line = $_;



# Regularize line endings

$line =~ s/\r/\n/g;



# Remove comments

$line =~ s/(?


# Count curly braces

while($line =~ /\{/g){$depth++}

while($line =~ /\}/g){$depth--}






$cumline .= $line;


if ($depth == 0) {


while($cumline =~ s/(\\\w+)?\s*\{([^\{\}]*)\}/<"$i"<$2>"$i">/s){push @tags, $1; $i++;}


$i = 0;

while($#tags >= 0){

$thistag = shift @tags;

$thistag =~ s/\\//;


if ($thistag eq 'footnote') {

# Footnotes are counted separately.

$cumline =~ s/<"$i"<(.*)>"$i">//s;

$line = $1;

while($line =~ /\b\w+\b/g){$fnwords++};


} elsif (defined($cutlist{$thistag})) {


# The arguments of these tags are removed.

$cumline =~ s/<"$i"<.*>"$i">//s;


} else {


# The arguments of other tags are left in.

$cumline =~ s/<"$i"<(.*)>"$i">/$1/s;


};


$i++;

}



$cumline =~ s/\\\w+//g;




# Count the remaining words in the present bit of text.

while($cumline =~ /\b\w+\b/g){$words++}

$cumline = '';

$i = 0;

}



END{

print "\n$words words in the main text\n$fnwords in the footnotes\n";

print ($words+$fnwords);

print " total\n\n";

}



Store this program in a XXX.pl file, and run it in command line.


>perl XXX.pl foo.tex


No comments:

Post a Comment