#!/usr/bin/perl -w
use strict;

my %section;
my %reference;

my $dotfile="images/sref.dot";
my $bib2ref="bin/bib2ref";
my $bibdb="bib";

main();

sub main
{
  my @doc= <>;
  my $doc=join("",@doc);

  ck_runon_sentences($doc);
  ck_long_paragraphs($doc);
  get_sections($doc);
  $doc = bib2ref($doc);
  get_references($doc);

  if (1)
  {
    for my $name (keys %section)
    {
      my $title = $section{$name}{title};
      $title =~ s/\n/ /g;
      $title =~ s/\s+/ /g;
      # warn "$name $section{$name}{title} $section{$name}{number}\n";
      warn "$section{$name}{number} $name $title\n";
      # warn "$section{$name}{title}\n";
    }
    # exit;
  }

  $doc = replace($doc);

  gen_dot();

  print $doc;
}

# scan for long sentences
sub ck_runon_sentences
{
  my $doc = shift;
  # while ($doc =~ /(([^\.\:\[\<\s]+\s+){5,}\.)/msg)
  # while ($doc =~ /(([\S]+\s+){5,}\.)/msg)
  while ($doc =~ /([^\.\:\[\<]{400,}.)/sg)
  {
    my $text=$1;
    warn "run-on sentence:\n$text\n";
  }
}

sub ck_long_paragraphs
{
  my $doc = shift;
  while ($doc =~ /([^(^\s*$)]{600,})/sg)
  {
    my $text=$1;
    warn "long paragraph:\n$text\n";
  }
}

# get section names, calculate numbers
sub get_sections
{
  # $doc can't be a global -- we consume it in the while() loop below
  my $doc = shift;
  my $number=shift || "";
  $number.="." if $number;

  # warn length($doc) . " - $number\n";
  
  # get section, including any nested text and sections
  # # Programming Perl 3ed p 203
  # # Perl Cookbook p 193
  # # Mastering Regular Expressions p 228
  # # ...but this hits a SIGSEGV bug in 5.6...
  # my $re = "<section(.*?)>(((<section.*?>.*?</section>)*.)*?)</section>";
  # while ( $doc =~ m{$re}msg )
  # ... so workaround, one tag at a time ...
  my $i=1;
  my $re='^(((?!</section>).)*?)<section(.*?)>(.*)';
  while($doc =~ m{$re}sg)
  {
    my $attr=$3;
    $doc=$4;
    my $name = getval("name",$attr);
    my $title = getval("title",$attr);
    my $type = getval("type",$attr);
    my $subnumber = $number . $i++;
    # warn "$name $title $subnumber\n";
    warn "duplicate section name: $name\n" if exists $section{$name};
    $section{$name}{title}=$title;
    $section{$name}{type}=$type;
    $section{$name}{number}=$subnumber;
    # warn length($doc) . " -- $subnumber\n";
    # recurse if nested <section ...>
    # $DB::single=1 if $name eq 'requirements';
    $doc = get_sections($doc,$subnumber) if $doc =~ m{$re}s;
    # discard the rest of this section, including one </section> tag
    $doc =~ s/^(.*?)<\/section>(.*)/$2/s;
    my $sectext=$1;
    # extract all of the [# and <sref/> references in this section
    while ($sectext =~ /<sref(.*?)\/>/g)
    {
      push @{$section{$name}{sref}}, getval("name",$1);
    }
    while($sectext =~ /\[\#(.*?)\]/g)
    {
      push @{$section{$name}{sref}}, $1;
    }
  }
  return $doc;
}

sub get_references
{
  my $doc =shift;

  while ( $doc =~ /<reference(.*?)>/msg)
  {
    my $attr=$1;
    my $name = getval("name",$attr);
    warn "syntax:\n$attr\n" unless $name;
    $reference{$name}=1;
  }
}


# insert bibtex refereences 
sub bib2ref
{
  my $doc=shift;
  my $bibhtml = `$bib2ref < $bibdb`;
  $doc =~ s/<bibref\/>/$bibhtml/;
  return $doc;
}

# replace tags 
sub replace
{
  my $doc=shift;

  # <section>
  $doc =~ s/(<section.*?>)/replace_tag($1,%section)/gems;
  # </section>
  $doc =~ s|</section>|<p>|g;

  # <sref/>
  $doc =~ s/(<sref.*?\/>)/replace_tag($1,%section)/gems;

  # [#foo]
  $doc =~ s/(\[\#.*?\])/replace_tag($1,%section)/ge;
  # cut second and subsequent (section ...) verboseness in each
  # paragraph

  # [!foo]
  $doc =~ s/(\[\!.*?\])/replace_tag($1,%section)/ge;

  # [foo [bar] ]
  $doc =~ s/(\[[^\d\#](((\[.*?\])*?.)*?)\])/replace_tag($1,%reference)/gems;
  
  # <reference>
  $doc =~ s/(<reference.*?>)/replace_tag($1,%section)/ge;
  # </reference>
  $doc =~ s|</reference>|<p>|g;

  # XXX
  warn "TODO:\n$1\n" while $doc =~ /(.{0,80}XXX.{0,80})/gms;

  return $doc;
}

sub replace_tag
{
  my $raw=shift;
  my %index=@_;

  # warn "$raw";

  for($raw)
  {

    /<section(.*?)>/ms && do 
    # /<section(([^<]*?(<.*?>)*)*?)>/ms && do 
    # /<section(([^<](<.*?>)*)*?)>/m && do 
    # /<section(([^<]*(<[^>]*>)*)*?)>/ms && do 
    # /<section(([^'"]*(['"])[^\3]*\3)*?)>/ms && do 
    # /(<section.*?">)|(<section[^>]*>)/m && do 
    {
      my $attr=$1;
      my $type="";
      my $name=getval("name",$attr);
      $type=getval("type",$attr);
      # if ($type eq 'figure')
      # {
	# warn "1=$1\n";
	# warn "2=$2\n";
	# warn "3=$3\n";
	# }
      my $title=$index{$name}{title};
      my $number=$index{$name}{number};
      my $dots = $number =~ s/\././g || 0;
      my $level = 2 + $dots;
      # warn "$name $title $number $level";
      my $out;
      $out = "<a name='$name'><h$level>$number $title</h$level></a>";
      $out = "<p><a name='$name'><b>$number $title</b></a> "
      if $type eq 'faq';
      $out = "<p>
      <a name='$name'>
      <img src='images/$name'>
      <blockquote><b>Figure $number:</b> $title</blockquote></a>"
      if $type eq 'figure';
      $out = "<p><a name='$name'><b>$number $title</b></a> "
      if $type eq 'def';
      # $out = "<p><a name='$name'><b>$number $name</b></a> - "
      $out = "<p><a name='$name'><b>$number</b></a> - "
      if $type eq 'point';
      return $out;
    };


    /<reference(.*?)>/ && do 
    {
      my $attr=$1;
      my $name=getval("name",$attr);
      return "<p><a name='$name'>[$name]</a>";
    };


    my $sref;
    /\[\#(.*?)\]/ && ($sref=$1);
    /\[\!(.*?)\]/ && ($sref=$1);
    /<sref(.*?)\/>/ms && ($sref=$1);
    if ($sref)
    {
      my $attr=$sref;
      # warn "$attr";
      my $verbose=0;  # verbosity of generated text
      my $name=$attr;
      if ($attr =~ /=/)
      {
	$name=getval("name",$attr);
	$verbose=getval("verbose",$attr) || 1;
      }
      my @name=split(',',$name);
      my @out;
      # warn "$_";
      # warn "@name";
      for my $name (@name)
      {
	# warn $name;
	# $DB::single=1 if $name eq 'turing';
	unless (exists $index{$name})
	{
	  warn "section not found: $name\n";
          push @out, "[#$name]";
	  next;
	}
	my $title=$index{$name}{title};
	$title="" unless $title;
	my $number=$index{$name}{number};
	my $type=$index{$name}{type};
	my $out;
	if ($verbose)
	{
	  $out = "<a href='#$name'>($number)</a>";
	  $out = "($number), <a href='#$name'>'$title'</a>" if $title;
	}
	elsif ($type eq "figure")
	{
	  $out = "<a href='#$name'><b>(figure $number)</b></a>";
	}
	else
	{
	  $out = "<a href='#$name'>($number)</a>";
	}
        push @out, $out;
      }
      my $out ="";
      my $s = $#name > 0 ? "s" : "";
      if ($verbose)
      {
	$out .= "See section$s " . join(", ", @out) . ".";
      }
      else
      {
	$out .= join(" ", @out);
      }
      return $out;
    };

    # handle external references, including nested eg [foo [bar] baz]
    /(\[(((\[.*?\])*.)*?)\])/ms && do
    {
      my $name=$2;
      my $out = "<a href='#$name'>[$name]</a>";
      unless ($index{$name})
      {
	warn "reference not found: $raw\n";
	$out = "[$name]";
      }
      return $out;
    };

  }

}

sub gen_dot
{
  open(DOT,">$dotfile") || die $!;
  print DOT "digraph sref {
    fontpath=\"/home/stevegt/fonts\";
    node [shape=box fontname = \"timrom\" ];
    size=\"7,10\";
    rankdir=LR;
    ratio=\"fill\";
    // rotate=90;
    ";
    # print DOT "graph sref 
    # {
      # node [size=\"2,2\"];\n";

  my @end;
  for my $name (keys %section)
  {
    next unless $name =~ /^thesis/;
    unless (exists $section{$name}{sref})
    {
      my $number = $section{$name}{number};
      warn "axiom: $number\n";
      push @end, $number;
      next;
    }
    my @dependents;
    for my $dependent (keys %section)
    {
      next unless $section{$dependent}{sref};
      my @sref = @{$section{$dependent}{sref}};
      push @dependents, grep /^$name$/, @sref;
    }
    my $number = $section{$name}{number};
    unless (@dependents)
    {
      warn "implication: $number\n";
      push @end, $number;
    }
    my @sref = @{$section{$name}{sref}};
    next unless @sref;
    my @number;
    for my $sref (@sref)
    {
      my $number = $section{$sref}{number};
      next unless $number;
      push @number, $number;
    }
    # print DOT "\"$name\" -> {" . (join (' ',map {"\"$_\" "} @sref)) . "}\n";
    print DOT "\"$number\" -> {" . (join (' ',map {"\"$_\" "} @number)) . "}\n";
  }
  for my $end (@end)
  {
    print DOT "$end [shape=circle];\n";
  }
  print DOT "}\n";
  close DOT;

}

# extract value from string containing attribute
sub getval
{
  my $name=shift;
  my $attr=shift;
  my $val = "";
  $val = $2 if $attr =~ m|$name=(['"])(.*?)\1|s; 
  return $val;
}

