#! /usr/local/bin/perl
##---------------------------------------------------------------------------##
##  File:
##      mtems-split
##  Author:
##      Tomaz Erjavec   tomaz.erjavec@ijs.si
##  Description:
##	mtems-split is a Perl program that splits the MULTEXT-East 
##	morphosyntactic descriptions for all languages into 
##	language-specific tables. 
##      It works with the ASCII / LaTeX format of morphosyntactic tables 
##      for all languages from the MTE deliverable D1.1 
##      (availabe at http://nl.ijs.si/ME/Resources/MorphSyn/)
##---------------------------------------------------------------------------##
##  Copyright (C) 1996	Tomaz Erjavec, tomaz.erjavec@ijs.si
##
##  This program is free software; you can redistribute it and/or modify
##  it under the terms of the GNU General Public License as published by
##  the Free Software Foundation; either version 2 of the License, or
##  (at your option) any later version.
##  
##  This program is distributed in the hope that it will be useful,
##  but WITHOUT ANY WARRANTY; without even the implied warranty of
##  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
##  GNU General Public License for more details.
##  
##  You should have received a copy of the GNU General Public License
##  along with this program; if not, write to the Free Software
##  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
##---------------------------------------------------------------------------##


######################CODE

$VERSION = "1.0";

$HELP = "mtems-split $VERSION
Usage: mtems-split [-h] -l language [file]
 -h --help\t give this help 
 -l lng \t lng is 2-letter name of one of MTE languages
    --lang lng\t (supported values: bu cz es hu ro sl)
 file\t input MTE common table file

 Language tables are written to standard output
";

# Process switches
while ($ARGV[0] =~ /^-/) {
    $_ = shift;
    if (/^-l(ang(uage)?)?$/) {
	$_ = shift;
        ((($LANG) = /^(\w\w)$/)
	 || print("No language for -l? $_\n")
	 && die "For help, type: mtems-split -h\n");
	$LANG =~ tr/a-z/A-Z/;
    }
    elsif (/^-h(elp)?$/) {
	print($HELP); exit;
    }
    else {
	print("Unrecognised switch: $_ \n");
        die "For help, type: mtems-split -h\n";
    }
}

####Main

if (!$LANG) {
    print("No language specified\n");
    die "For help, type: mtems-split -h\n";
}

#Get POSs
while (!/Part-of-Speech\s+Code/) {$_=<>};
$_=<>; $_=<>;
while (!/^[= ]+$/) {
    m/^(\w+)\s+(\w)\s*$/;
    push(@cats,("$1!$2"));
    $_=<>;
}
foreach $cat(@cats) {
    $catno++;
    $cat =~ /(.+)!(.+)/; $catnam=$1; $catcode=$2; 
    while (!/$catnam(s)?\s+\($catcode\)/) {$_=<>};
    print "\n\n$catno. $catnam ($catcode)\n\n";
    while (!/^=[ =]+/) {$_=<>;}            #skip to first line of table
    if (!/\s$LANG\s/) {
	print("Strange language for -l: $LANG\n");
	die "For help, type: mtems-split -h\n"
	}
    else {
	m/^(.+)($LANG)/;      $lngoffset = length($1);
	m/^([= ]+)\s\s[A-Z]/; $eqline = $1; 
	$rowoffset = length($eqline); 
    }
    $_ = <>;
    if (!/^.{$lngoffset}x/) {
	print "Not applicable.\n";
    }
    else {
	m/^(.+)VAL\s/; $valoffset = length($1);
	print "$eqline\n";
	print substr($_,0,$rowoffset); print "\n";
	$_ = <>;
	print "$eqline\n";
	$first = 1;
	while ($first || !/^[= ]+$/) {         #process cat table
	    $_ = <>;
	    $first = 0;
	    $lngatt = 0;
	    while (!/^(-|=|\*)/) {             #process attribute
		if (/^.{$lngoffset}x/) {$lngatt=1;}
		push(@attlines,$_);
		$_=<>;
	    }
	    if (!$lngatt) {                    #att not approp for lang
		@attlines[0] =~ /(^\d+\s?\w+)/;
		$line = sprintf("%-$rowoffset s",$1);
		$line =~ s/ $/-/;
		print "$line\n";
		@attlines = ();
	    }
	    else {
 		foreach $line(@attlines) {     #processing vals
		    if ($line =~ /^.{$lngoffset}x/) {
			print substr($line,0,$rowoffset); print "\n";
 		    }
 		    elsif ($line =~ /^\d+/) {
			print substr($line,0,$valoffset); print "\n";
		    }
		}
		@attlines = ();
 	    }
	    print substr($_,0,$rowoffset); print "\n";
	}
    }
}
#LSD - yo!
