#! /usr/local/bin/perl
##---------------------------------------------------------------------------##
##  File:
##      mtems-expand
##  Author:
##      Tomaz Erjavec   tomaz.erjavec@ijs.si
##  Description:
##	mtems-expand is a Perl program that exapands MULTEXT-East 
##	morphosyntactic descriptions or checks their well-formedness
##      The MSDs should be input in the command line (SP is separator) or
##      in standard input, one per line
##	It works with the ASCII / LaTeX format of morphosyntactic specifications
##      for the languages of MTE:
##      these should come along with this program; 
##      later versions might be available at URL
##      http://nl.ijs.si/ME/
##---------------------------------------------------------------------------##
##  Copyright (C) 1996,97	Tomaz Erjavec, tomaz.erjavec@ijs.si
##
##  This program is free software; you can redistribute it and/or modify
##  it under the terms of the GNU General Public License as published by
##  the Free Software Foundation; either version 2 of the License, or
##  (at your option) any later version.
##  
##  This program is distributed in the hope that it will be useful,
##  but WITHOUT ANY WARRANTY; without even the implied warranty of
##  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
##  GNU General Public License for more details.
##  
##  You should have received a copy of the GNU General Public License
##  along with this program; if not, write to the Free Software
##  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
##---------------------------------------------------------------------------##

# Bugs:
# very sensitive to the format of the tables
# not optimised at all
# man pages missing
# could do with a nice emacs interface to this program


######################INSTALLATION SPECIFIC SETTINGS

$DEF_MODE    = 'check';       #default mode
$DEF_TBLFILE =                #default ms-table file;
   'msd.tex'; 

######################CODE

$VERSION = "1.5";

$HELP = "mtems-expand $VERSION
Usage: mtems-expand [-bchv] [-l language] [-t file] [codes]
 -h --help\t give this help 
 -b --brief\t display short expansion of code(s) (default mode is $DEF_MODE)
 -c --check\t display only only invalid code(s)
 -v --verbose\t display verbose expansion of code(s)
 -l lng \t lng is 2-letter name of one of MTE languages
    --lang lng\t (supported values are those from the table file)
 -t tbl \t tbl is name of the database MTE morph-syn table file
    --table tbl\t (default file is \'$DEF_TBLFILE\')
 codes\t\t codes to expand/check; should be separated by SP
      \t\t if no code given, uses standard input; 
      \t\t expects one code per line\n";

# Process switches
#$argv = join(' ',@ARGV);			# Save the command line arguments
while ($ARGV[0] =~ /^(.+)$/) {
    $_ = shift;
    if (/^-b(rief)?$/) {
	(!$MODE
	 || print("Only one mode switch allowed\n")
	 && die "For help, type: mtems-expand -h\n");
	$MODE='brief'
	}
    elsif (/^-v(erbose)?$/) {
	(!$MODE
	 || print("Only one mode switch allowed")
	 && die "For help, type: mtems-expand -h\n");
	$MODE='verbose'
    }
    elsif (/^-c(heck)?$/) {
	(!$MODE
	 || print("Only one mode switch allowed")
	 && die "For help, type: mtems-expand -h\n");
	$MODE='check'
	}
    elsif (/^-t(able)?$/) {
	$_ = shift;
        ((($TBL) = /^(.+)$/)
	 || print("No filename for -t? $_\n")
	 && die "For help, type: mtems-expand -h\n");
    }
    elsif (/^-l(ang(uage)?)?$/) {
	$_ = shift;
        ((($LANG) = /^(\w\w)$/)
	 || print("No language for -l? $_\n")
	 && die "For help, type: mtems-expand -h\n");
	$LANG =~ tr/a-z/A-Z/;
    }
    elsif (/^-h(elp)?$/) {
	print($HELP); exit;
    }
    elsif (/^-(.*)$/) {
	print("Unrecognised switch: $_ \n");
        die "For help, type: mtems-expand -h\n";
    }
    elsif (/^(\w[\w\-]*)$/) {
	$CODES = "$CODES $1";
    }
    else {
	print("Strange commands: $_ \n");
        die "For help, type: mtems-expand -h\n";
    }
}

if (!$MODE) {$MODE = $DEF_MODE};
if (!$TBL) {$TBL = $DEF_TBLFILE};

####Main

open TBL or die "Can't open table file $TBL: $!\n";
close(TBL);

#if ($MODE=~'check') {print "Command line: $argv\n"}

if ($CODES) {
    while ($CODES =~ s/^ ?([^ ]+)//) {
	$CODE = $1;
	&expand_code
	}
}
else {
    while (<>) {
	if (m/^\s*([\w-]+)\s*$/) {
	    $CODE=$1;
	    &expand_code;
	}
	else {
	    chop($_);
	    print "***$_***\n";
	    if ($MODE=~'verbose') {print "\n"}
	}

    }
}

####Code expansion

sub expand_code {
    local($ind) = 0;
    local($taberr) = 0;
    local($caterr) = 0;
    local($atterr) = 0;
    local($valerr) = 0;
    local(@codes) = split(//,$CODE);

    open TBL or die "Can't open table file $TBL: $!\n";

    $code = shift(@codes);
    while (!$taberr &&                          #getting Cat name
	   $line !~ /Part-of-Speech\s+Code/) {
	$line=<TBL>;
	$taberr=eof(TBL);
	};
    if ($taberr) {
	print("Can't find Part-of-Speech in table file!\n");
	die "For help, type: mtems-split -h\n";
    }
    $line=<TBL>; $line=<TBL>;
    while (!$caterr &&                          #processing Cat
	   ($line !~ /^(\w+)\s+$code\s*/)) { 
	$caterr = ($line =~ /^[= ]+/);         #EOT & catcode not found!
	$line=<TBL>;
    }
    if ($caterr) {$cat="*$code*"}
    else {
	$line =~ /(\w+)\s+.+/; $cat=$1;
	while (!$taberr &&                      #looking for Cat table
	       ($line !~ /\($code\)/)) { 
	    $taberr=eof(TBL);
	    $line=<TBL>;
	}
    }
    if ($taberr) {
	print("Can't find section for $cat ($code) in table file!\n");
	die "For help, type: mtems-split -h\n";
    }
    if (!$caterr) {
	while ($line !~ /^=[ =]+/) {            #skip to first line of table
	    $line=<TBL>                   
	    }
	if ($LANG) {
	    if ($line !~ /\s($LANG)\s/) {
		print("Strange language for -l: $LANG\n");
		die "For help, type: mtems-expand -h\n"
		}
	    else {$line =~ /^(.+)($LANG)/; $lngoffset = length($1)}
	}
	$line=<TBL>;
	$line =~ /^(.+)VAL\s/; $valoffset = length($1);
	$line =~ /^(.+)C\s/;   $codoffset = length($1);
	if ($LANG) {
	    $caterr = ($line !~ /^.{$lngoffset}x/);
	    if ($caterr) {$cat="?$code-$cat?"}
	}
	$line=<TBL>; $line=<TBL>;
    }
    if    ($MODE=~'check') {
	if ($caterr) {print "$CODE\t$cat\n"}
    }
    elsif ($MODE=~'brief')   {print "$CODE: $cat "} 
    elsif ($MODE=~'verbose') {print ">>>>\t$CODE\nPoS:\t$cat\n"}
    
    if (!$caterr) { 
	foreach $code(@codes) {                 #processing att:val
	    $ind++;
  	    $valerr = 0;
	    while (!$atterr && ($line !~ /^$ind/)) { #processing att
		$atterr =                       #end of cat & att not found!
		    ($line =~ m/^[= ]+$/);
		$line=<TBL>
		}
	    if ($atterr) {$att="*$ind*"} 
	    else {
		$line =~ /\d+\s?(\w+)/; $att=$1 #extract att
		} 
	    if ($atterr || $code eq '-') {$val=$code}
	    else { 
		while (!$valerr &&              #processing val
		       ($line !~ /^.{$codoffset}$code/)) {
		    $valerr = 
			($line =~ /^[-*= ]+$/); #end of att & val not found!
		    $line=<TBL>
		    };
		if ($valerr) {$val="*$code*"} 
		else {
		    $line =~ /^.{$valoffset}(\w+)/; $val=$1;
		    if ($LANG) {
			$valerr = ($line !~ /^.{$lngoffset}x/);
			if ($valerr) {$val="?$code:$val?"}
		    }
		}
	    }
	    if ($MODE=~'check') {
		if ($atterr)    {print "$CODE\t$att\n"}
		elsif ($valerr) {print "$CODE\t$att:$val\n"}
	    }
	    elsif ($MODE=~'brief') {
		if ($val !~ '-') {
		    if ($val eq "no") {$val="-$att"}     # '-clitic' more 
		    elsif ($val eq 'yes') {$val="+$att"} # informative than 'no'
		    $val =~ tr/A-Z/a-z/;                 # '-Clitic' is ugly
		    print "$val "
		    }
	    }
	    elsif ($MODE=~'verbose') {print "$att:\t$val\n"}
	}
    }
    if (!($MODE=~'check')) {print "\n"};
}
close TBL;

#LSD - yo!
