#!/usr/local/bin/perl
#

sub CHK_CMD_LINE {
        if ( $#ARGV < 0 )
        {
        print "Usage: ", $0, " inputfiles \n";
        exit 0;
        }
}

sub INIT_VAR {
        $docno                  =  "IR4873";
        $title                  =  "NIST Interagency Report 4873: Automatic Indexing";
        $subtitle               =  "Automatic Indexing";
	$type			=  "chapter";
        $page_chapter           =  "1";
        $page_number            =  "1";
        $author1_first          =  "Donna";
        $author1_last           =  "Harman";
        $author_publisher       =  "National Institute of Standards and Technology";
        $author_editor1_last    =  "";
        $author_editor1_first   =  "";
        $copyright_month        =  "July";
        $copyright_day          =  "";
        $copyright_year         =  "1992";
        $copyright_statement    =  "";
	$copyright_ownership	=  "National Institute of Standards and Technology";
}

sub INSERT_TOP_METADATA {
        print ( OUTPUT "<DOC> \n");
        print ( OUTPUT "<DOCNO> $docno </DOCNO>         \n");
        print ( OUTPUT "<TITLE> $title </TITLE>         \n");
        print ( OUTPUT "<SUBTITLE> $subtitle </SUBTITLE>         \n");
        print ( OUTPUT "<TYPE> $type </TYPE>         \n");
        print ( OUTPUT "<PAGE CHAPTER=\"$page_chapter\" NUMBER=\"$page_number\">                   \n");
        print ( OUTPUT "<AUTHOR> $author1_first $author1_last </AUTHOR>  \n");
	print ( OUTPUT "<PUBLISHER> $author_publisher </PUBLISHER> \n"); 
	print ( OUTPUT "<EDITOR> $author_editor1_first $author_editor1_last </EDITOR> \n");
        print ( OUTPUT "<COPYRIGHT MTH=\"$copyright_month\" DAY=\"$copyright_day\" YEAR=\"$copyright_year\" BY=\"$copyright_ownership\">   \n");
 	print ( OUTPUT "$copyright_statement \n");
 	print ( OUTPUT "</COPYRIGHT> \n");
  	print ( OUTPUT "<BODY> \n");
}

sub INSERT_BOTTOM_METADATA {
        print ( OUTPUT "</BODY>                  \n");
        print ( OUTPUT "</PAGE>                  \n");
        print ( OUTPUT "</DOC> \n");
}

sub CLEAN_DATA {
	if ($_ =~ / /)             # only procced if line contains a space
	{
        $_ =~ s/~/[OCRerr]/g;
        $_ =~ s/&/&amp;/g;
        $_ =~ s/</&lt;/g;
        $_ =~ s/>/&gt;/g;
	}
}

&CHK_CMD_LINE;
&INIT_VAR;
for $arg (@ARGV) 
  {
  open( INPUT, $arg ) or die "Can't open input file: $arg";
  open( OUTPUT, ">$arg.rev") or die "can't open output file: $arg.rev";
  &INSERT_TOP_METADATA;
  while (<INPUT>) 
  {          
    &CLEAN_DATA;
    print ( OUTPUT $_);
  }
  &INSERT_BOTTOM_METADATA;
  close( INPUT );
  $page_number++;
  close( OUTPUT );
  }
