#!/usr/bin/perl -w use strict; my %section; my %reference; my $dotfile="images/sref.dot"; my $bib2ref="bin/bib2ref"; my $bibdb="bib"; main(); sub main { my @doc= <>; my $doc=join("",@doc); ck_runon_sentences($doc); ck_long_paragraphs($doc); get_sections($doc); $doc = bib2ref($doc); get_references($doc); if (1) { for my $name (keys %section) { my $title = $section{$name}{title}; $title =~ s/\n/ /g; $title =~ s/\s+/ /g; # warn "$name $section{$name}{title} $section{$name}{number}\n"; warn "$section{$name}{number} $name $title\n"; # warn "$section{$name}{title}\n"; } # exit; } $doc = replace($doc); gen_dot(); print $doc; } # scan for long sentences sub ck_runon_sentences { my $doc = shift; # while ($doc =~ /(([^\.\:\[\<\s]+\s+){5,}\.)/msg) # while ($doc =~ /(([\S]+\s+){5,}\.)/msg) while ($doc =~ /([^\.\:\[\<]{400,}.)/sg) { my $text=$1; warn "run-on sentence:\n$text\n"; } } sub ck_long_paragraphs { my $doc = shift; while ($doc =~ /([^(^\s*$)]{600,})/sg) { my $text=$1; warn "long paragraph:\n$text\n"; } } # get section names, calculate numbers sub get_sections { # $doc can't be a global -- we consume it in the while() loop below my $doc = shift; my $number=shift || ""; $number.="." if $number; # warn length($doc) . " - $number\n"; # get section, including any nested text and sections # # Programming Perl 3ed p 203 # # Perl Cookbook p 193 # # Mastering Regular Expressions p 228 # # ...but this hits a SIGSEGV bug in 5.6... # my $re = "(((.*?)*.)*?)"; # while ( $doc =~ m{$re}msg ) # ... so workaround, one tag at a time ... my $i=1; my $re='^(((?!).)*?)(.*)'; while($doc =~ m{$re}sg) { my $attr=$3; $doc=$4; my $name = getval("name",$attr); my $title = getval("title",$attr); my $type = getval("type",$attr); my $subnumber = $number . $i++; # warn "$name $title $subnumber\n"; warn "duplicate section name: $name\n" if exists $section{$name}; $section{$name}{title}=$title; $section{$name}{type}=$type; $section{$name}{number}=$subnumber; # warn length($doc) . " -- $subnumber\n"; # recurse if nested
# $DB::single=1 if $name eq 'requirements'; $doc = get_sections($doc,$subnumber) if $doc =~ m{$re}s; # discard the rest of this section, including one
tag $doc =~ s/^(.*?)<\/section>(.*)/$2/s; my $sectext=$1; # extract all of the [# and references in this section while ($sectext =~ //g) { push @{$section{$name}{sref}}, getval("name",$1); } while($sectext =~ /\[\#(.*?)\]/g) { push @{$section{$name}{sref}}, $1; } } return $doc; } sub get_references { my $doc =shift; while ( $doc =~ //msg) { my $attr=$1; my $name = getval("name",$attr); warn "syntax:\n$attr\n" unless $name; $reference{$name}=1; } } # insert bibtex refereences sub bib2ref { my $doc=shift; my $bibhtml = `$bib2ref < $bibdb`; $doc =~ s//$bibhtml/; return $doc; } # replace tags sub replace { my $doc=shift; #
$doc =~ s/()/replace_tag($1,%section)/gems; #
$doc =~ s||

|g; # $doc =~ s/()/replace_tag($1,%section)/gems; # [#foo] $doc =~ s/(\[\#.*?\])/replace_tag($1,%section)/ge; # cut second and subsequent (section ...) verboseness in each # paragraph # [!foo] $doc =~ s/(\[\!.*?\])/replace_tag($1,%section)/ge; # [foo [bar] ] $doc =~ s/(\[[^\d\#](((\[.*?\])*?.)*?)\])/replace_tag($1,%reference)/gems; # $doc =~ s/()/replace_tag($1,%section)/ge; # $doc =~ s||

|g; # XXX warn "TODO:\n$1\n" while $doc =~ /(.{0,80}XXX.{0,80})/gms; return $doc; } sub replace_tag { my $raw=shift; my %index=@_; # warn "$raw"; for($raw) { //ms && do # /)*)*?)>/ms && do # /)*)*?)>/m && do # /]*>)*)*?)>/ms && do # //ms && do # /()|(]*>)/m && do { my $attr=$1; my $type=""; my $name=getval("name",$attr); $type=getval("type",$attr); # if ($type eq 'figure') # { # warn "1=$1\n"; # warn "2=$2\n"; # warn "3=$3\n"; # } my $title=$index{$name}{title}; my $number=$index{$name}{number}; my $dots = $number =~ s/\././g || 0; my $level = 2 + $dots; # warn "$name $title $number $level"; my $out; $out = "$number $title"; $out = "

$number $title " if $type eq 'faq'; $out = "

Figure $number: $title
" if $type eq 'figure'; $out = "

$number $title " if $type eq 'def'; # $out = "

$number $name - " $out = "

$number - " if $type eq 'point'; return $out; }; // && do { my $attr=$1; my $name=getval("name",$attr); return "

[$name]"; }; my $sref; /\[\#(.*?)\]/ && ($sref=$1); /\[\!(.*?)\]/ && ($sref=$1); //ms && ($sref=$1); if ($sref) { my $attr=$sref; # warn "$attr"; my $verbose=0; # verbosity of generated text my $name=$attr; if ($attr =~ /=/) { $name=getval("name",$attr); $verbose=getval("verbose",$attr) || 1; } my @name=split(',',$name); my @out; # warn "$_"; # warn "@name"; for my $name (@name) { # warn $name; # $DB::single=1 if $name eq 'turing'; unless (exists $index{$name}) { warn "section not found: $name\n"; push @out, "[#$name]"; next; } my $title=$index{$name}{title}; $title="" unless $title; my $number=$index{$name}{number}; my $type=$index{$name}{type}; my $out; if ($verbose) { $out = "($number)"; $out = "($number), '$title'" if $title; } elsif ($type eq "figure") { $out = "(figure $number)"; } else { $out = "($number)"; } push @out, $out; } my $out =""; my $s = $#name > 0 ? "s" : ""; if ($verbose) { $out .= "See section$s " . join(", ", @out) . "."; } else { $out .= join(" ", @out); } return $out; }; # handle external references, including nested eg [foo [bar] baz] /(\[(((\[.*?\])*.)*?)\])/ms && do { my $name=$2; my $out = "[$name]"; unless ($index{$name}) { warn "reference not found: $raw\n"; $out = "[$name]"; } return $out; }; } } sub gen_dot { open(DOT,">$dotfile") || die $!; print DOT "digraph sref { fontpath=\"/home/stevegt/fonts\"; node [shape=box fontname = \"timrom\" ]; size=\"7,10\"; rankdir=LR; ratio=\"fill\"; // rotate=90; "; # print DOT "graph sref # { # node [size=\"2,2\"];\n"; my @end; for my $name (keys %section) { next unless $name =~ /^thesis/; unless (exists $section{$name}{sref}) { my $number = $section{$name}{number}; warn "axiom: $number\n"; push @end, $number; next; } my @dependents; for my $dependent (keys %section) { next unless $section{$dependent}{sref}; my @sref = @{$section{$dependent}{sref}}; push @dependents, grep /^$name$/, @sref; } my $number = $section{$name}{number}; unless (@dependents) { warn "implication: $number\n"; push @end, $number; } my @sref = @{$section{$name}{sref}}; next unless @sref; my @number; for my $sref (@sref) { my $number = $section{$sref}{number}; next unless $number; push @number, $number; } # print DOT "\"$name\" -> {" . (join (' ',map {"\"$_\" "} @sref)) . "}\n"; print DOT "\"$number\" -> {" . (join (' ',map {"\"$_\" "} @number)) . "}\n"; } for my $end (@end) { print DOT "$end [shape=circle];\n"; } print DOT "}\n"; close DOT; } # extract value from string containing attribute sub getval { my $name=shift; my $attr=shift; my $val = ""; $val = $2 if $attr =~ m|$name=(['"])(.*?)\1|s; return $val; }