#! /usr/local/bin/perl ##---------------------------------------------------------------------------## ## File: ## mtems-expand ## Author: ## Tomaz Erjavec tomaz.erjavec@ijs.si ## Description: ## mtems-expand is a Perl program that exapands MULTEXT-East ## morphosyntactic descriptions or checks their well-formedness ## The MSDs should be input in the command line (SP is separator) or ## in standard input, one per line ## It works with the ASCII / LaTeX format of morphosyntactic specifications ## for the languages of MTE: ## these should come along with this program; ## later versions might be available at URL ## http://nl.ijs.si/ME/ ##---------------------------------------------------------------------------## ## Copyright (C) 1996,97 Tomaz Erjavec, tomaz.erjavec@ijs.si ## ## This program is free software; you can redistribute it and/or modify ## it under the terms of the GNU General Public License as published by ## the Free Software Foundation; either version 2 of the License, or ## (at your option) any later version. ## ## This program is distributed in the hope that it will be useful, ## but WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ## GNU General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with this program; if not, write to the Free Software ## Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ##---------------------------------------------------------------------------## # Bugs: # very sensitive to the format of the tables # not optimised at all # man pages missing # could do with a nice emacs interface to this program ######################INSTALLATION SPECIFIC SETTINGS $DEF_MODE = 'check'; #default mode $DEF_TBLFILE = #default ms-table file; 'msd.tex'; ######################CODE $VERSION = "1.5"; $HELP = "mtems-expand $VERSION Usage: mtems-expand [-bchv] [-l language] [-t file] [codes] -h --help\t give this help -b --brief\t display short expansion of code(s) (default mode is $DEF_MODE) -c --check\t display only only invalid code(s) -v --verbose\t display verbose expansion of code(s) -l lng \t lng is 2-letter name of one of MTE languages --lang lng\t (supported values are those from the table file) -t tbl \t tbl is name of the database MTE morph-syn table file --table tbl\t (default file is \'$DEF_TBLFILE\') codes\t\t codes to expand/check; should be separated by SP \t\t if no code given, uses standard input; \t\t expects one code per line\n"; # Process switches #$argv = join(' ',@ARGV); # Save the command line arguments while ($ARGV[0] =~ /^(.+)$/) { $_ = shift; if (/^-b(rief)?$/) { (!$MODE || print("Only one mode switch allowed\n") && die "For help, type: mtems-expand -h\n"); $MODE='brief' } elsif (/^-v(erbose)?$/) { (!$MODE || print("Only one mode switch allowed") && die "For help, type: mtems-expand -h\n"); $MODE='verbose' } elsif (/^-c(heck)?$/) { (!$MODE || print("Only one mode switch allowed") && die "For help, type: mtems-expand -h\n"); $MODE='check' } elsif (/^-t(able)?$/) { $_ = shift; ((($TBL) = /^(.+)$/) || print("No filename for -t? $_\n") && die "For help, type: mtems-expand -h\n"); } elsif (/^-l(ang(uage)?)?$/) { $_ = shift; ((($LANG) = /^(\w\w)$/) || print("No language for -l? $_\n") && die "For help, type: mtems-expand -h\n"); $LANG =~ tr/a-z/A-Z/; } elsif (/^-h(elp)?$/) { print($HELP); exit; } elsif (/^-(.*)$/) { print("Unrecognised switch: $_ \n"); die "For help, type: mtems-expand -h\n"; } elsif (/^(\w[\w\-]*)$/) { $CODES = "$CODES $1"; } else { print("Strange commands: $_ \n"); die "For help, type: mtems-expand -h\n"; } } if (!$MODE) {$MODE = $DEF_MODE}; if (!$TBL) {$TBL = $DEF_TBLFILE}; ####Main open TBL or die "Can't open table file $TBL: $!\n"; close(TBL); #if ($MODE=~'check') {print "Command line: $argv\n"} if ($CODES) { while ($CODES =~ s/^ ?([^ ]+)//) { $CODE = $1; &expand_code } } else { while (<>) { if (m/^\s*([\w-]+)\s*$/) { $CODE=$1; &expand_code; } else { chop($_); print "***$_***\n"; if ($MODE=~'verbose') {print "\n"} } } } ####Code expansion sub expand_code { local($ind) = 0; local($taberr) = 0; local($caterr) = 0; local($atterr) = 0; local($valerr) = 0; local(@codes) = split(//,$CODE); open TBL or die "Can't open table file $TBL: $!\n"; $code = shift(@codes); while (!$taberr && #getting Cat name $line !~ /Part-of-Speech\s+Code/) { $line=; $taberr=eof(TBL); }; if ($taberr) { print("Can't find Part-of-Speech in table file!\n"); die "For help, type: mtems-split -h\n"; } $line=; $line=; while (!$caterr && #processing Cat ($line !~ /^(\w+)\s+$code\s*/)) { $caterr = ($line =~ /^[= ]+/); #EOT & catcode not found! $line=; } if ($caterr) {$cat="*$code*"} else { $line =~ /(\w+)\s+.+/; $cat=$1; while (!$taberr && #looking for Cat table ($line !~ /\($code\)/)) { $taberr=eof(TBL); $line=; } } if ($taberr) { print("Can't find section for $cat ($code) in table file!\n"); die "For help, type: mtems-split -h\n"; } if (!$caterr) { while ($line !~ /^=[ =]+/) { #skip to first line of table $line= } if ($LANG) { if ($line !~ /\s($LANG)\s/) { print("Strange language for -l: $LANG\n"); die "For help, type: mtems-expand -h\n" } else {$line =~ /^(.+)($LANG)/; $lngoffset = length($1)} } $line=; $line =~ /^(.+)VAL\s/; $valoffset = length($1); $line =~ /^(.+)C\s/; $codoffset = length($1); if ($LANG) { $caterr = ($line !~ /^.{$lngoffset}x/); if ($caterr) {$cat="?$code-$cat?"} } $line=; $line=; } if ($MODE=~'check') { if ($caterr) {print "$CODE\t$cat\n"} } elsif ($MODE=~'brief') {print "$CODE: $cat "} elsif ($MODE=~'verbose') {print ">>>>\t$CODE\nPoS:\t$cat\n"} if (!$caterr) { foreach $code(@codes) { #processing att:val $ind++; $valerr = 0; while (!$atterr && ($line !~ /^$ind/)) { #processing att $atterr = #end of cat & att not found! ($line =~ m/^[= ]+$/); $line= } if ($atterr) {$att="*$ind*"} else { $line =~ /\d+\s?(\w+)/; $att=$1 #extract att } if ($atterr || $code eq '-') {$val=$code} else { while (!$valerr && #processing val ($line !~ /^.{$codoffset}$code/)) { $valerr = ($line =~ /^[-*= ]+$/); #end of att & val not found! $line= }; if ($valerr) {$val="*$code*"} else { $line =~ /^.{$valoffset}(\w+)/; $val=$1; if ($LANG) { $valerr = ($line !~ /^.{$lngoffset}x/); if ($valerr) {$val="?$code:$val?"} } } } if ($MODE=~'check') { if ($atterr) {print "$CODE\t$att\n"} elsif ($valerr) {print "$CODE\t$att:$val\n"} } elsif ($MODE=~'brief') { if ($val !~ '-') { if ($val eq "no") {$val="-$att"} # '-clitic' more elsif ($val eq 'yes') {$val="+$att"} # informative than 'no' $val =~ tr/A-Z/a-z/; # '-Clitic' is ugly print "$val " } } elsif ($MODE=~'verbose') {print "$att:\t$val\n"} } } if (!($MODE=~'check')) {print "\n"}; } close TBL; #LSD - yo!