#!/usr/bin/perl -w
use strict;
my %section;
my %reference;
my $dotfile="images/sref.dot";
my $bib2ref="bin/bib2ref";
my $bibdb="bib";
main();
sub main
{
my @doc= <>;
my $doc=join("",@doc);
ck_runon_sentences($doc);
ck_long_paragraphs($doc);
get_sections($doc);
$doc = bib2ref($doc);
get_references($doc);
if (1)
{
for my $name (keys %section)
{
my $title = $section{$name}{title};
$title =~ s/\n/ /g;
$title =~ s/\s+/ /g;
# warn "$name $section{$name}{title} $section{$name}{number}\n";
warn "$section{$name}{number} $name $title\n";
# warn "$section{$name}{title}\n";
}
# exit;
}
$doc = replace($doc);
gen_dot();
print $doc;
}
# scan for long sentences
sub ck_runon_sentences
{
my $doc = shift;
# while ($doc =~ /(([^\.\:\[\<\s]+\s+){5,}\.)/msg)
# while ($doc =~ /(([\S]+\s+){5,}\.)/msg)
while ($doc =~ /([^\.\:\[\<]{400,}.)/sg)
{
my $text=$1;
warn "run-on sentence:\n$text\n";
}
}
sub ck_long_paragraphs
{
my $doc = shift;
while ($doc =~ /([^(^\s*$)]{600,})/sg)
{
my $text=$1;
warn "long paragraph:\n$text\n";
}
}
# get section names, calculate numbers
sub get_sections
{
# $doc can't be a global -- we consume it in the while() loop below
my $doc = shift;
my $number=shift || "";
$number.="." if $number;
# warn length($doc) . " - $number\n";
# get section, including any nested text and sections
# # Programming Perl 3ed p 203
# # Perl Cookbook p 193
# # Mastering Regular Expressions p 228
# # ...but this hits a SIGSEGV bug in 5.6...
# my $re = ")*.)*?)";
# while ( $doc =~ m{$re}msg )
# ... so workaround, one tag at a time ...
my $i=1;
my $re='^(((?!).)*?)(.*)';
while($doc =~ m{$re}sg)
{
my $attr=$3;
$doc=$4;
my $name = getval("name",$attr);
my $title = getval("title",$attr);
my $type = getval("type",$attr);
my $subnumber = $number . $i++;
# warn "$name $title $subnumber\n";
warn "duplicate section name: $name\n" if exists $section{$name};
$section{$name}{title}=$title;
$section{$name}{type}=$type;
$section{$name}{number}=$subnumber;
# warn length($doc) . " -- $subnumber\n";
# recurse if nested
# $DB::single=1 if $name eq 'requirements';
$doc = get_sections($doc,$subnumber) if $doc =~ m{$re}s;
# discard the rest of this section, including one tag
$doc =~ s/^(.*?)<\/section>(.*)/$2/s;
my $sectext=$1;
# extract all of the [# and references in this section
while ($sectext =~ //g)
{
push @{$section{$name}{sref}}, getval("name",$1);
}
while($sectext =~ /\[\#(.*?)\]/g)
{
push @{$section{$name}{sref}}, $1;
}
}
return $doc;
}
sub get_references
{
my $doc =shift;
while ( $doc =~ //msg)
{
my $attr=$1;
my $name = getval("name",$attr);
warn "syntax:\n$attr\n" unless $name;
$reference{$name}=1;
}
}
# insert bibtex refereences
sub bib2ref
{
my $doc=shift;
my $bibhtml = `$bib2ref < $bibdb`;
$doc =~ s//$bibhtml/;
return $doc;
}
# replace tags
sub replace
{
my $doc=shift;
#
$doc =~ s/()/replace_tag($1,%section)/gems;
#
$doc =~ s||
|g;
#
$doc =~ s/()/replace_tag($1,%section)/gems;
# [#foo]
$doc =~ s/(\[\#.*?\])/replace_tag($1,%section)/ge;
# cut second and subsequent (section ...) verboseness in each
# paragraph
# [!foo]
$doc =~ s/(\[\!.*?\])/replace_tag($1,%section)/ge;
# [foo [bar] ]
$doc =~ s/(\[[^\d\#](((\[.*?\])*?.)*?)\])/replace_tag($1,%reference)/gems;
#
$doc =~ s/()/replace_tag($1,%section)/ge;
#
$doc =~ s||
|g;
# XXX
warn "TODO:\n$1\n" while $doc =~ /(.{0,80}XXX.{0,80})/gms;
return $doc;
}
sub replace_tag
{
my $raw=shift;
my %index=@_;
# warn "$raw";
for($raw)
{
//ms && do
# /)*)*?)>/ms && do
# /)*)*?)>/m && do
# /]*>)*)*?)>/ms && do
# //ms && do
# /()|(]*>)/m && do
{
my $attr=$1;
my $type="";
my $name=getval("name",$attr);
$type=getval("type",$attr);
# if ($type eq 'figure')
# {
# warn "1=$1\n";
# warn "2=$2\n";
# warn "3=$3\n";
# }
my $title=$index{$name}{title};
my $number=$index{$name}{number};
my $dots = $number =~ s/\././g || 0;
my $level = 2 + $dots;
# warn "$name $title $number $level";
my $out;
$out = "$number $title";
$out = "$number $title "
if $type eq 'faq';
$out = "
Figure $number: $title
"
if $type eq 'figure';
$out = "
$number $title "
if $type eq 'def';
# $out = "
$number $name - "
$out = "
$number - "
if $type eq 'point';
return $out;
};
// && do
{
my $attr=$1;
my $name=getval("name",$attr);
return "[$name]";
};
my $sref;
/\[\#(.*?)\]/ && ($sref=$1);
/\[\!(.*?)\]/ && ($sref=$1);
//ms && ($sref=$1);
if ($sref)
{
my $attr=$sref;
# warn "$attr";
my $verbose=0; # verbosity of generated text
my $name=$attr;
if ($attr =~ /=/)
{
$name=getval("name",$attr);
$verbose=getval("verbose",$attr) || 1;
}
my @name=split(',',$name);
my @out;
# warn "$_";
# warn "@name";
for my $name (@name)
{
# warn $name;
# $DB::single=1 if $name eq 'turing';
unless (exists $index{$name})
{
warn "section not found: $name\n";
push @out, "[#$name]";
next;
}
my $title=$index{$name}{title};
$title="" unless $title;
my $number=$index{$name}{number};
my $type=$index{$name}{type};
my $out;
if ($verbose)
{
$out = "($number)";
$out = "($number), '$title'" if $title;
}
elsif ($type eq "figure")
{
$out = "(figure $number)";
}
else
{
$out = "($number)";
}
push @out, $out;
}
my $out ="";
my $s = $#name > 0 ? "s" : "";
if ($verbose)
{
$out .= "See section$s " . join(", ", @out) . ".";
}
else
{
$out .= join(" ", @out);
}
return $out;
};
# handle external references, including nested eg [foo [bar] baz]
/(\[(((\[.*?\])*.)*?)\])/ms && do
{
my $name=$2;
my $out = "[$name]";
unless ($index{$name})
{
warn "reference not found: $raw\n";
$out = "[$name]";
}
return $out;
};
}
}
sub gen_dot
{
open(DOT,">$dotfile") || die $!;
print DOT "digraph sref {
fontpath=\"/home/stevegt/fonts\";
node [shape=box fontname = \"timrom\" ];
size=\"7,10\";
rankdir=LR;
ratio=\"fill\";
// rotate=90;
";
# print DOT "graph sref
# {
# node [size=\"2,2\"];\n";
my @end;
for my $name (keys %section)
{
next unless $name =~ /^thesis/;
unless (exists $section{$name}{sref})
{
my $number = $section{$name}{number};
warn "axiom: $number\n";
push @end, $number;
next;
}
my @dependents;
for my $dependent (keys %section)
{
next unless $section{$dependent}{sref};
my @sref = @{$section{$dependent}{sref}};
push @dependents, grep /^$name$/, @sref;
}
my $number = $section{$name}{number};
unless (@dependents)
{
warn "implication: $number\n";
push @end, $number;
}
my @sref = @{$section{$name}{sref}};
next unless @sref;
my @number;
for my $sref (@sref)
{
my $number = $section{$sref}{number};
next unless $number;
push @number, $number;
}
# print DOT "\"$name\" -> {" . (join (' ',map {"\"$_\" "} @sref)) . "}\n";
print DOT "\"$number\" -> {" . (join (' ',map {"\"$_\" "} @number)) . "}\n";
}
for my $end (@end)
{
print DOT "$end [shape=circle];\n";
}
print DOT "}\n";
close DOT;
}
# extract value from string containing attribute
sub getval
{
my $name=shift;
my $attr=shift;
my $val = "";
$val = $2 if $attr =~ m|$name=(['"])(.*?)\1|s;
return $val;
}