package RcsLite;

use strict;
use Algorithm::Diff;

# Simple interface to RCS.  Doesn't support:
#    branches
#    locking

# TODO:
#
#  - Only read the part of the file required
#  - Check for binary

sub new
{
   my $self = {};
   bless $self;   
   return $self;
}

sub _trace
{
   my( $text ) = @_;
   #print $text;
}


# Process an RCS file

# File format information:
#
#rcstext    ::=  admin {delta}* desc {deltatext}*
#admin      ::=  head {num};
#                { branch   {num}; }
#                access {id}*;
#                symbols {sym : num}*;
#                locks {id : num}*;  {strict  ;}
#                { comment  {string}; }
#                { expand   {string}; }
#                { newphrase }*
#delta      ::=  num
#                date num;
#                author id;
#                state {id};
#                branches {num}*;
#                next {num};
#                { newphrase }*
#desc       ::=  desc string
#deltatext  ::=  num
#                log string
#                { newphrase }*
#                text string
#num        ::=  {digit | .}+
#digit      ::=  0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9
#id         ::=  {num} idchar {idchar | num }*
#sym        ::=  {digit}* idchar {idchar | digit }*
#idchar     ::=  any visible graphic character except special
#special    ::=  $ | , | . | : | ; | @
#string     ::=  @{any character, with @ doubled}*@
#newphrase  ::=  id word* ;
#word       ::=  id | num | string | :
#
# Identifiers are case sensitive. Keywords are in lower case only. The sets of keywords and 
# identifiers can overlap. In most environments RCS uses the ISO 8859/1 encoding: 
# visible graphic characters are codes 041-176 and 240-377, and white space characters are 
# codes 010-015 and 040. 
#
# Dates, which appear after the date keyword, are of the form Y.mm.dd.hh.mm.ss, 
# where Y is the year, mm the month (01-12), dd the day (01-31), hh the hour (00-23), 
# mm the minute (00-59), and ss the second (00-60). Y contains just the last two digits of 
# the year for years from 1900 through 1999, and all the digits of years thereafter. 
# Dates use the Gregorian calendar; times use UTC. 
#
# The newphrase productions in the grammar are reserved for future extensions to the format 
# of RCS files. No newphrase will begin with any keyword already in use. 

sub _readTo
{
    my( $file, $char ) = @_;
    my $buf = "";
    my $ch;
    my $space = 0;
    my $string = "";
    my $state = "";
    while( read( $file, $ch, 1 ) ) {
       if( $ch eq "@" ) {
          if( $state eq "@" ) {
             $state = "e";
             next;
          } elsif( $state eq "e" ) {
             $state = "@";
             $string .= "@";
             next;
          } else {
             $state = "@";
             next;
          }
       } else {
          if( $state eq "e" ) {
             $state = "";
             if( $char eq "@" ) {
                last;
             }
             # End of string
          } elsif ( $state eq "@" ) {
             $string .= $ch;
             next;
          }
       }
       
       if( $ch =~ /\s/ ) {
          if( length( $buf ) == 0 ) {
              next;
          } elsif( $space ) {
              next;
          } else {
              $space = 1;
              $ch = " ";
          }
       } else {
          $space = 0;
       }
       $buf .= $ch;
       if( $ch eq $char ) {
           last;
       }
    }
    return( $buf, $string );
}

sub process
{
    my( $self, $file ) = @_;
    my $where = "admin.head";
    my $fh;
    open( $fh, "$file" );
    my $lastWhere = "";
    my $going = 1;
    my $term = ";";
    my $string = "";
    my $num = "";
    my $headNum = "";
    my @author = ();
    my @log = ();
    my @text = ();
    my $dnum = "";
    while( $going ) {
       ($_, $string) = _readTo( $fh, $term );
       last if( ! $_ );
      
       my $lastWhere = $where;
       #print "\"$where -- $_\"\n";
       if( $where eq "admin.head" ) {
          if( /^head\s+([0-9]+)\.([0-9]+);$/o ) {
             die( "Only support start of version being 1" ) if( $1 ne "1" );
             $headNum = $2;
             $where = "admin.access"; # Don't support branch
          } else {
             last;
          }
       } elsif( $where eq "admin.access" ) {
          if( /^access\s*.*;$/o ) {
             $where = "admin.symbols";
          } else {
             last;
          }
       } elsif( $where eq "admin.symbols" ) {
          if( /^symbols.*;$/o ) {
             $where = "admin.locks";
          } else {
             last;
          }
       } elsif( $where eq "admin.locks" ) {
          if( /^locks.*;$/o ) {
             $where = "admin.postLocks";
          } else {
             last;
          }
       } elsif( $where eq "admin.postLocks" ) {
          if( /^strict\s*;/o ) {
             $where = "admin.postStrict";
          }
       } elsif( $where eq "admin.postStrict" ) {
          if( /^comment\s.*$/o ) {
             $where = "admin.postComment";
             $self->{comment} = $string;
          }
       } elsif( $where eq "admin.postStrict" || $where eq "admin.postComment" || 
                $where eq "delta.date") {
          if( /^([0-9]+)\.([0-9]+)\s+date\s+(\d\d\d\d(\.\d\d){5}?);$/o ) {
             $where = "delta.author";
             $num = $2;
          }
       } elsif( $where eq "delta.author" ) {
          if( /^author\s+(.*);$/o ) {
             $author[$num] = $1;
             if( $num == 1 ) {
                $where = "desc";
                $term = "@";
             } else {
                $where = "delta.date";
             }
          }
       } elsif( $where eq "desc" ) {
          if( /desc\s*$/o ) {
             $self->{"description"} = $string;
             $where = "deltatext.log";
          }
       } elsif( $where eq "deltatext.log" ) {
          if( /\d+\.(\d+)\s+log\s+$/o ) {
             $dnum = $1;
             $log[$dnum] = $string;
             $where = "deltatext.text";
          }
       } elsif( $where eq "deltatext.text" ) {
          if( /text\s*$/o ) {
             $where = "deltatext.log";
             $text[$dnum] = $string;
             if( $dnum == 1 ) {
                $where = "done";
                last;
             }
          }
       }
    }
    
    $self->{"head"} = $headNum;
    $self->{"author"} = \@author;
    $self->{"log"} = \@log;
    $self->{"delta"} = \@text;
    $self->{"status"} = $dnum;
}

sub head
{
    my( $self ) = @_;
    return $self->{"head"};
}

sub comment
{
    my( $self ) = @_;
    return $self->{"comment"};
}

sub description
{
    my( $self ) = @_;
    return $self->{"description"};
}

sub author
{
    my( $self, $version ) = @_;
    return ${$self->{"author"}}[$version];
}

sub log
{
    my( $self, $version ) = @_;
    return ${$self->{"log"}}[$version];
}

sub delta
{
    my( $self, $version ) = @_;
    return ${$self->{"delta"}}[$version];
}

sub _patch
{
   # Both params are references to arrays
   my( $text, $delta ) = @_;
   my $adj = 0;
   my $pos = 0;
   my $d;
   while( $pos <= $#${delta} ) {
       $d = $delta->[$pos];
       _trace( "patch: $pos -> $d\n" );
       if( $d =~ /^([ad])(\d+)\s(\d+)$/ ) {
          my $offset = $2;
          my $length = $3;
          if( $1 eq "d" ) {
             _trace( "patch: text = @$text\n  adj=$adj\n" );
             splice( @$text, $offset + $adj - 1, $length );
             $adj -= $length;
             $pos++;
          } elsif( $1 eq "a" ) {
             splice( @$text, $offset + $adj, 0, @${delta}[$pos+1..$pos+$length] );
             $adj += $length;
             $pos += $length + 1;
          } else {
             die( "only a and d supported, found " . $d );
          }
       } else {
          die( "wrong!" . $d );
       }
       _trace( "patch: end loop $pos $#${delta}\n" );
   }
}


sub _patchN
{
    my( $self, $text, $version, $target ) = @_;
    my $deltaText= $self->delta( $version );
    my @delta = split( /\n/, $deltaText );
    $delta[$#delta] .= "\n" if( $deltaText =~ /\n$/o );
    _patch( $text, \@delta );
    if( $version == $target ) {
        return join( "\n", @$text );
    } else {
        return $self->_patchN( $text, $version-1, $target );
    }
}

sub _diffText
{
    my( $new, $old ) = @_;
    my @lNew = split( /\n/, $$new );
    my @lOld = split( /\n/, $$old );
    return _diff( \@lNew, \@lOld );
}


sub _diff
{
    my( $new, $old ) = @_;
    # Work out diffs to change new to old, which are refs to lists
    my $diffs = Algorithm::Diff::diff( $new, $old );

    my $adj = 0;
    my @patch = ();
    my @del = ();
    my @ins = ();
    my $out = "";
    my $start = 0;
    foreach my $chunk ( @$diffs ) {
       my $chunkSign = "";
       my @lines = ();
       foreach my $line ( @$chunk ) {
           my( $sign, $pos, $what ) = @$line;
           if( $chunkSign ne $sign && $chunkSign ne "") {
               $adj += _addChunk( $chunkSign, \$out, \@lines, $start, $adj );
           }
           if( ! @lines ) {
               $start = $pos;
           }
           $chunkSign = $sign;
           push @lines, ( $what );
       }
       $adj += _addChunk( $chunkSign, \$out, \@lines, $start, $adj );
    }
    return $out;
}


sub _addChunk
{
   my( $chunkSign, $out, $lines, $start, $adj ) = @_;
   my $nLines = $#${lines} + 1;
   if( $nLines > 0 ) {
       $$out .= "\n" if( $$out );
       if( $chunkSign eq "+" ) {
          $$out .= "a";
          $$out .= $start-$adj;
          $$out .= " $nLines\n";
          $$out .= join( "\n", @$lines );
       } else {
          $$out .= "d";
          $$out .= $start+1;
          $$out .= " $nLines";
          $nLines *= -1;
       }
       @$lines = ();
   }
   return $nLines;
}


sub text
{
    my( $self, $version ) = @_;
    my $head = $self->head();
    if( $version == $head ) {
        return $self->delta( $version );
    } else {
        my @text = split( /\n/, $self->delta( $head ) );
        return $self->_patchN( \@text, $head-1, $version );
    }
}

sub validTo
{
    my( $self ) = @_;
    return $self->{"status"};
}

#process( "c:/tmp/rcs/RCS/a.txt" );

1;