next up previous contents
Next: Hungarian Up: Morphosyntactic Tagging Previous: Czech

Estonian

 COP project 106 MULTEXT-East Deliverable D2.3 F ``1984'', Estonian

<cesHeader
  version="4.1"
  type="text"
  lang=en
  creator=HJK
  status="update"
  date.created="1997-11-28"
  date.updated="1997-12-21"
>
    <filedesc>
      <titlestmt>
        <h.title>Multext-East cesAna: Nineteen Eighty-Four, Estonian</h.title>
        <respstmt>
          <respname>Heiki-Jaan Kaalep</respname>
          <resptype>Overall Responsibility</resptype>
          <respname>Kadri Muischnek</respname>
          <resptype>Hand-tagging of part 1, chapter 1-4; 
                    part 2 chapter 9</resptype>
          <respname>Andriela R&auml;&auml;bis</respname>
          <resptype>Hand-tagging of part 1, chapter 5-7;  
                    part 3 chapter 1, 3, 4</resptype>
          <respname>Heili Orav</respname>
          <resptype>Hand-tagging of part 1, chapter 8;  
                    part 3 chapter 2, 5, 6</resptype>
          <respname>Helen Potter</respname>
          <resptype>Hand-tagging of part 2, chapter 1-7</resptype>
          <respname>K&uuml;lli Habicht</respname>
          <resptype>Hand-tagging of part 2, chapter 8</resptype>
         <respname>Vladim&iacute;r Petkevi&ccaron;</respname>
          <resptype>Conversion to cesAna DTD </resptype>
        </respstmt>
      </titlestmt>
      <editionstmt version="1.0">MTE Final Release</editionstmt>
      <extent>
        <wordCount>75433</wordCount>
        <byteCount units="MB">18.7 MB</byteCount>
        <extnote>wordCount represents he number of TOK TYPE=WORD
          elements in the text. byteCount is in megaBytes</extnote>
      </extent>
      <publicationstmt>
        <distributor>
           T&Uuml; arvutuslingvistika uurimisgrupp
        </distributor>
        <pubaddress>Tiigi 78-232, Tartu, Estonia</pubaddress>
        <eaddress type="email">hkaalep@psych.ut.ee</eaddress>
        <eaddress type="www">http://www.cl.ut.ee</eaddress>
        <availability status="free">
           Freely available
        </availability>
        <pubDate value="1998-01-01">January 1st, 1998</pubDate>
      </publicationstmt>
      <sourcedesc>
        <biblfull>
          <titlestmt>
            <h.title>Multext-East CES1: Nineteen Eighty-Four, Estonian</h.title>
          </titlestmt>
          <publicationstmt>
            <distributor>
           T&Uuml; arvutuslingvistika uurimisgrupp
            </distributor>
            <pubaddress>Tiigi 78-232, Tartu, Estonia</pubaddress>
            <eaddress type="email">hkaalep@psych.ut.ee</eaddress>
            <eaddress type="www">http://www.cl.ut.ee</eaddress>
            <availability status="free">
             Freely available
            </availability>
            <pubDate value="1997-10-01">October 1, 1997</pubDate>
          </publicationstmt>
              <sourcedesc>
                <biblstruct>
                  <monogr>
                    <h.title>1984</h.title>
                    <h.author>George Orwell</h.author>
                    <h.author>Translator: Elias Treeman</h.author>
                    <imprint>
                      <pubdate>1990</pubdate>
                      <publisher>Loomingu Raamatukogu nr. 48-51</publisher>
                      <publisher>Perioodika</publisher>
                      <pubplace>Tallinn</pubplace>
                    </imprint>
                  </monogr>
                </biblstruct>
              </sourcedesc>
            </biblfull>
          </sourcedesc>
     </filedesc>
    <encodingdesc>
      <projectdesc>
        MULTEXT-East:
        Multilingual Text Tools and Corpora for Central and Eastern
        European Languages.
        EU Copernicus Project COP106
      </projectdesc>
      <editorialdecl>
        <transduction>
          In the cesDoc to cesAna conversion, DIV, QUOTE tags and
          HEAD, POEM, LIST elements have been omitted. cesDoc P
          elements are encoded as PAR, and S as S. 
          Q tags have been encoded as punctuation symbols.
          cesDoc sub-S level tags are omitted: DATE, NAME, ABBR, etc.
        </transduction>
        <quotation>
          QUOTE tags from the cesDoc source not retained.
        </quotation>
        <segmentation>
          S segmentation same as in cesDoc source (hand-validated).
          TOK segmentation performed with mtseg and manually corrected,
         </segmentation>
      </editorialdecl>
      <tagsdecl>
        <tagusage gi=chunkList occurs=1>
          Element corresponds to TEXT of the cesDoc source
        </tagusage>
        <tagusage gi=chunk occurs=1>
          Element corresponds to BODY of the cesDoc source
        </tagusage>
        <tagusage gi=par occurs=1266>
          Elements correspond to P elements of the cesDoc source.
          The FROM attribute gives the reference to the ID of the
          corresponding cesDoc P element.
        </tagusage>
        <tagusage gi=s occurs=6478>
          Elements correspond to S elements of the cesDoc source
          The FROM attribute gives the reference to the ID of the
        corresponding cesDoc S element.
        </tagusage>
        <tagusage gi=tok occurs=94906>
          Tokens are of TYPE=WORD or PUNCT, with the CLASS attribute
          giving the mtseg class of the token.
        </tagusage>
        <tagusage gi=orth   occurs=94906>
          Contains the orthography of the token, as found in the
          cesDoc source.
        </tagusage>
        <tagusage gi=disamb occurs=75433>
          Contains disambiguated lexical information.
        </tagusage>
        <tagusage gi=lex    occurs=147542>
          Contains undisambiguated lexical information.
        </tagusage>
        <tagusage gi=base   occurs=222975>
          Base or lemmma of a token.
        </tagusage>
        <tagusage gi=msd    occurs=222975>
          Morphosyntactic description of a token.
        </tagusage>
        <tagusage gi=ctag   occurs=94906>
          Corpus tag.
        </tagusage>
      </tagsdecl>
    </encodingdesc>
    <profiledesc>
      <creation date="1997-11-28">
      </creation>
      <langusage>
        <![ %ONECOMPONENT [ &ISOlang; ]]>
      </langusage>
    </profiledesc>
    <revisiondesc>
      <change>
        <changedate>1997-11-28</changedate>
        <respname>Heiki-Jaan Kaalep</respname>
        <h.item>Initial header</h.item>
      </change>
       <change>
       <changedate>1997-12-21</changedate>
        <respname>Tomaz Erjavec, IJS</respname>
         <h.item>Modified EDITIONSTMT and changed ... to &hellip;</h.item>
       </change>
    </revisiondesc>
  </cesheader>


Multext-East