#! /usr/local/bin/perl
##---------------------------------------------------------------------------##
## File:
## mtewfl-count
## Author:
## Tomaz Erjavec tomaz.erjavec@ijs.si
## Description:
## Count as many things as possible in a Multext-East Word Form Lexicon
## To Do:
## o lots
##
##---------------------------------------------------------------------------##
## Copyright (C) 1996 Tomaz Erjavec tomaz.erjavec@ijs.si
##
## This program is free software; you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by
## the Free Software Foundation; either version 2 of the License, or
## (at your option) any later version.
##
## This program is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
## GNU General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with this program; if not, write to the Free Software
## Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
##---------------------------------------------------------------------------##
###############################################################################
@legalcats = (
"Nouns (N)",
"Verbs (V)",
"Adjectives (A)",
"Pronouns (P)",
"Determiners (D)",
"Articles (T)",
"Adverbs (R)",
"Adpositions (S)",
"Conjunctions (C)",
"Numerals (M)",
"Interjections (I)",
"Residuals (X)",
"Abbreviations (Y)",
"Particles (Q)",
);
@catcodes = @legalcats;
foreach $catcode (@catcodes) {
$catcode =~ s/.+ \(([A-Z])\)/$1/;
}
@catcodes = sort(@catcodes);
@wfl = ;
foreach $wfl (@wfl) {
$wfl =~ s/(.+)\t(.+)\t(.+)/$3\t$1\t$2/;
};
@wfl = sort(@wfl);
# $tmpfiles = "mte-\*.tmp";
# if (-r $tmpfiles) {print "juhej"; die; exec "rm -f $tmpfiles"};
#exec "rm -f mte-\*.tmp";
open(TMPOUT, ">mte-0\.tmp" );
foreach $wfl (@wfl) {
$wfl =~ s/(.+)\t(.+)\t(.+)/$2\t$3\t$1/;
print TMPOUT "$wfl";
};
close(TMPOUT);
foreach $wfl (@wfl) {
$wfl =~ /^.+\t.+\t(.)/;
$fcat = $1;
if ($fcat !~ $curcat) {
if (!$fresh) {close(TMPOUT)};
$curcat = $fcat;
open(TMPOUT,">mte-$curcat\.tmp" ) || die "Can't open file";
};
print TMPOUT "$wfl";
};
close(TMPOUT);
push(@legalcats,"TOTAL (0)");
printf "%18s %-6s%-6s%-6s%-6s%-6s\n",
"Category ", "Entrs", " WFSs", " Lms", " =", " MSDs";
foreach $cat (@legalcats) {
printf "%18s", "$cat:";
$cat =~/.+ \(([A-Z0])\)/;
$curcat = $1;
$tmpin = "mte-$curcat\.tmp";
if (-r $tmpin) {
open(TMPIN, $tmpin);
chop(@cwfl = );
close(TMPIN);
}
else {@cwfl = ()};
@wf=(); @lm=(); @ms=();
foreach $cwfl (@cwfl) {
($wf,$lm,$ms) = split(/\t/,$cwfl,3);
push(@wf,$wf);
push(@lm,$lm);
push(@ms,$ms);
}
@wf = sort(@wf); @lm = sort(@lm); @ms = sort(@ms);
$cnt = @wf;
printf "%6d", $cnt;
$cnt = 0; $old = 0;
foreach $wf (@wf) {if ($wf ne $old) {$cnt++; $old=$wf}};
printf "%6d", $cnt;
$cnt = 0; $old = 0;
foreach $lm (@lm) {if ($lm ne $old) {$cnt++; $old=$lm}};
printf "%6d", $cnt;
$cnt = 0;
foreach $lm (@lm) {if ($lm eq "=") {$cnt++}}
printf "%6d", $cnt;
$cnt = 0; $old = 0;
foreach $ms (@ms) {if ($ms ne $old) {$cnt++; $old=$ms}};
printf "%6d", $cnt;
# $cnt = 0; $old = 0;
# foreach $ms (@ms) {
# if ($lm ne $old) {; $old=$lm}
# };
# printf "%6d", 999;
print "\n";
}
##yo LSD
| Top
| Table of contents
| Multext-East
| LPL/CNRS