#! /usr/local/bin/perl ##---------------------------------------------------------------------------## ## File: ## mtewfl-count ## Author: ## Tomaz Erjavec tomaz.erjavec@ijs.si ## Description: ## Count as many things as possible in a Multext-East Word Form Lexicon ## To Do: ## o lots ## ##---------------------------------------------------------------------------## ## Copyright (C) 1996 Tomaz Erjavec tomaz.erjavec@ijs.si ## ## This program is free software; you can redistribute it and/or modify ## it under the terms of the GNU General Public License as published by ## the Free Software Foundation; either version 2 of the License, or ## (at your option) any later version. ## ## This program is distributed in the hope that it will be useful, ## but WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ## GNU General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with this program; if not, write to the Free Software ## Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ##---------------------------------------------------------------------------## ############################################################################### @legalcats = ( "Nouns (N)", "Verbs (V)", "Adjectives (A)", "Pronouns (P)", "Determiners (D)", "Articles (T)", "Adverbs (R)", "Adpositions (S)", "Conjunctions (C)", "Numerals (M)", "Interjections (I)", "Residuals (X)", "Abbreviations (Y)", "Particles (Q)", ); @catcodes = @legalcats; foreach $catcode (@catcodes) { $catcode =~ s/.+ \(([A-Z])\)/$1/; } @catcodes = sort(@catcodes); @wfl =; foreach $wfl (@wfl) { $wfl =~ s/(.+)\t(.+)\t(.+)/$3\t$1\t$2/; }; @wfl = sort(@wfl); # $tmpfiles = "mte-\*.tmp"; # if (-r $tmpfiles) {print "juhej"; die; exec "rm -f $tmpfiles"}; #exec "rm -f mte-\*.tmp"; open(TMPOUT, ">mte-0\.tmp" ); foreach $wfl (@wfl) { $wfl =~ s/(.+)\t(.+)\t(.+)/$2\t$3\t$1/; print TMPOUT "$wfl"; }; close(TMPOUT); foreach $wfl (@wfl) { $wfl =~ /^.+\t.+\t(.)/; $fcat = $1; if ($fcat !~ $curcat) { if (!$fresh) {close(TMPOUT)}; $curcat = $fcat; open(TMPOUT,">mte-$curcat\.tmp" ) || die "Can't open file"; }; print TMPOUT "$wfl"; }; close(TMPOUT); push(@legalcats,"TOTAL (0)"); printf "%18s %-6s%-6s%-6s%-6s%-6s\n", "Category ", "Entrs", " WFSs", " Lms", " =", " MSDs"; foreach $cat (@legalcats) { printf "%18s", "$cat:"; $cat =~/.+ \(([A-Z0])\)/; $curcat = $1; $tmpin = "mte-$curcat\.tmp"; if (-r $tmpin) { open(TMPIN, $tmpin); chop(@cwfl = ); close(TMPIN); } else {@cwfl = ()}; @wf=(); @lm=(); @ms=(); foreach $cwfl (@cwfl) { ($wf,$lm,$ms) = split(/\t/,$cwfl,3); push(@wf,$wf); push(@lm,$lm); push(@ms,$ms); } @wf = sort(@wf); @lm = sort(@lm); @ms = sort(@ms); $cnt = @wf; printf "%6d", $cnt; $cnt = 0; $old = 0; foreach $wf (@wf) {if ($wf ne $old) {$cnt++; $old=$wf}}; printf "%6d", $cnt; $cnt = 0; $old = 0; foreach $lm (@lm) {if ($lm ne $old) {$cnt++; $old=$lm}}; printf "%6d", $cnt; $cnt = 0; foreach $lm (@lm) {if ($lm eq "=") {$cnt++}} printf "%6d", $cnt; $cnt = 0; $old = 0; foreach $ms (@ms) {if ($ms ne $old) {$cnt++; $old=$ms}}; printf "%6d", $cnt; # $cnt = 0; $old = 0; # foreach $ms (@ms) { # if ($lm ne $old) {; $old=$lm} # }; # printf "%6d", 999; print "\n"; } ##yo LSD