#! /usr/bin/perl -w
##---------------------------------------------------------------------------##
##  File:
##      mtems2fslib
##  Date: 
##      2004-02-25
##  Author:
##      Tomaz Erjavec   tomaz.erjavec@ijs.si
##  Description:
##	mtems2fslib is a Perl program that converts a file of the type
##	MSD <tab> lang1 lang2 ... langN
##      to TEI P4 fsLib/fs encoding
##	The following driver takes the V3 MULTEXT-East lexica and
##      produces the lexical list of MSDs in FS format
##
##   #!/bin/csh
##   setenv LC_ALL C
##   set ID  = "../../lex"
##   rm *.tmp
##   foreach l ('bg' 'cs' 'en' 'et' 'hu' 'ro' 'sl')
##   echo "$l"
##   echo "s/\n/\t$l\n/" > 0.tmp
##   cut -f3 $ID/wfl-$l.tbl | sort | uniq | perl -p 0.tmp >> 1.tmp
##   end
##   sort 1.tmp > 2.tmp 
##   cat 2.tmp | wfl-flatten > 3.tmp
##   cat 3.tmp | mtems2fslib > msd-fslib.tei
##
##---------------------------------------------------------------------------##
##  Copyright (C) 2003         	Tomaz Erjavec, tomaz.erjavec@ijs.si
##
##  This program is free software; you can redistribute it and/or modify
##  it under the terms of the GNU General Public License as published by
##  the Free Software Foundation; either version 2 of the License, or
##  (at your option) any later version.
##---------------------------------------------------------------------------##

%cats=("N" => "Noun",
       "V" => "Verb",
       "A" => "Adjective",
       "P" => "Pronoun",
       "D" => "Determiner",
       "T" => "Article",
       "R" => "Adverb",
       "S" => "Adposition",
       "C" => "Conjunction",
       "M" => "Numeral",
       "I" => "Interjection",
       "X" => "Residual",
       "Y" => "Abbreviation",
       "Q" => "Particle");
$oldpos='';
print '<!-- THIS FILE IS AUTOMATICALLY GENERATED FROM MULTEXT-EAST LEXICA';
print ' - EDIT AT YOUR OWN RISK! -->';
while (<>) {
  ($msd, $lan)=/(.+)\t(.+)/;
  ($pos, $tail)=$msd=~/(.)(.*)/;
  if (not ($oldpos eq $pos)) {
    if ($oldpos) {print "</fsLib>\n"}
    $oldpos=$pos;
    print "\n<fsLib type='$cats{$pos}'>\n";
  }
  $n=0;
  $feats="$pos$n. ";
  while ($tail=~s/(.)//) {
    $c=$1;
    $n++;
    if ($c=~/-/) {}
    else {$feats.=$pos.$n.".".$c." "}
  }
  $feats=~s/ $//;
  print "<fs id=\"$msd\" select=\"$lan\" feats=\"$feats\"/>\n"
}
print "</fsLib>\n";

