#! /usr/bin/perl
##---------------------------------------------------------------------------##
##  File:
##      mtems-split
##  Author:
##      Tomaz Erjavec   tomaz.erjavec@ijs.si
##  Description:
##	mtems-split is a Perl program that splits the common tables
##	of the MULTEXT-East morphosyntactic specifications into 
##	language-specific tables. 
##      It works with the ASCII / LaTeX format of the specifications
##      availabe at http://nl.ijs.si/ME/V3/msd/tex/msd.tex
##---------------------------------------------------------------------------##
##  Copyright (C) 2003       	Tomaz Erjavec, tomaz.erjavec@ijs.si
##
##  This program is free software; you can redistribute it and/or modify
##  it under the terms of the GNU General Public License as published by
##  the Free Software Foundation; either version 2 of the License, or
##  (at your option) any later version.
##---------------------------------------------------------------------------##


######################CODE

$VERSION = "1.2";

$HELP = "mtems-split $VERSION
Usage: mtems-split [-h] -l language [file]
 -h --help\t give this help 
 -l lng \t lng is 2-letter name of one of MTE languages
    --lang lng\t
 file\t input MTE common table file

 Language tables are written to standard output
";

# Process switches
while ($ARGV[0] =~ /^-/) {
    $_ = shift;
    if (/^-l(ang(uage)?)?$/) {
	$_ = shift;
        ((($LANG) = /^(\w\w)$/)
	 || print("No language for -l? $_\n")
	 && die "For help, type: mtems-split -h\n");
	$LANG =~ tr/a-z/A-Z/;
        $langprint=$LANG;
	$langprint =~ tr/A-Z/a-z/;
    }
    elsif (/^-h(elp)?$/) {
	print($HELP); exit;
    }
    else {
	print("Unrecognised switch: $_ \n");
        die "For help, type: mtems-split -h\n";
    }
}

####Main

if (!$LANG) {
    print("No language specified\n");
    die "For help, type: mtems-split -h\n";
}

#Get POSs
while (!/Part-of-Speech\s+Code/) {$_=<>};
$_=<>; $_=<>;
while (!/^[= ]+$/) {
  m/^(\w+)\s+(\w)\s*/;
  push(@cats,("$1!$2"));
  $_=<>;
}

print "\\label{msd-$langprint}";
print '
\chapter{Application to XXX}
\markright{\telri{\hfill} \delID --- XXX{\hfill}}

Authors:\\\\
YYY


';

foreach $cat(@cats) {
  $catno++;
  $cat =~ /(.+)!(.+)/; $catnam=$1; $catcode=$2; 
  while (!/$catnam(s)?\s+\($catcode\)/) {$_=<>};
  
  #   print "\n\n$catno. $catnam ($catcode)\n\n"; ASCII version
  
  print "\\label{msd-$langprint:$catcode}\n";
  print "\\section{$catnam ($catcode)}\n\n";
  print "\\begin{small}\n";
  print "\\begin{verbatim}\n";
  
  while (!/^=[ =]+/) {$_=<>;}            #skip to first line of table
  if (!/\s$LANG\s/) {
    print("Strange language for -l: $LANG\n");
    die "For help, type: mtems-split -h\n"
  }
  else {
    m/^(.+)($LANG)/;      $lngoffset = length($1);
    m/^([= ]+)\s\s[A-Z]/; $eqline = $1; 
    $rowoffset = length($eqline); 
  }
  $_ = <>;
  if (!/^.{$lngoffset}x/) {
    print "Not applicable.\n";
  }
  else {
    m/^(.+)VAL\s/; $valoffset = length($1);
    print "$eqline\n";
    print substr($_,0,$rowoffset); print "\n";
    $_ = <>;
    print "$eqline\n";
    $first = 1;
    while ($first || !/^[= ]+/) {          #process cat table
      $_ = <>;
      $first = 0;
      $lngatt = 0;
      while (!/^(-|=|\*)/) {             #process attribute
	s/ \* /   /;   #get rid of * next to attrib. name (RO)
	if (/^.{$lngoffset}x/) {$lngatt=1;}
	push(@attlines,$_);
	$_=<>;
      }
      if (!$lngatt) {                    #att not approp for lang
	@attlines[0] =~ /(^\d+\s?\w+)/;
	$line = $1;
	$ll = $rowoffset - length($line);
	#		$line =~ sprintf "%s%${rowoffset}c", $1, '-' ;
	#		$line =~ s/$/-/;
	print "$line";
	for (2..$ll) {print " "};
	print "-\n";
	@attlines = ();
      }
      else {
	foreach $line(@attlines) {     #processing vals
	  if ($line =~ /^.{$lngoffset}x/) {
	    print substr($line,0,$rowoffset); print "\n";
	  }
	  elsif ($line =~ /^\d+/) {
	    print substr($line,0,$valoffset); print "\n";
	  }
	}
	@attlines = ();
      }
      print substr($_,0,$rowoffset); print "\n";
    }
  }
  print "\\end{verbatim}\n";
  print "\\end{small}\n\n";
}
#LSD - yo!