Next: About this document ... Up: Morphosyntactic Tagging Previous: Romanian
Slovene

COP project 106 MULTEXT-East Deliverable D2.3 F ``1984'', Slovene
<cesHeader
  version="4.1"
  type="text"
  lang=en
  creator=ET
  status="update"
  date.created="1997-11-04"
  date.updated="1997-12-20"
>
    <filedesc>
      <titlestmt>
        <h.title>Multext-East cesAna: Nineteen Eighty-Four, Slovene</h.title>
        <respstmt>
          <respname>Toma&zcaron; Erjavec</respname>
          <resptype>Overall Responsibility</resptype>
          <respname>Aleksandra Bizjak, Primo&zcaron; Jakopin</respname>
          <resptype>Tagging</resptype>
          <respname>Vladim&iacute;r Petkevi&ccaron;</respname>
          <resptype>Conversion to cesAna DTD </resptype>
        </respstmt>
      </titlestmt>
      <editionstmt version="1.0">MTE Final Release</editionstmt>
      <extent>
        <wordCount>90768</wordCount>
        <byteCount units="MB">22.5</byteCount>
        <extnote>wordCount represents the number of TOK TYPE=WORD
          elements in the text.</extnote>
      </extent>
      <publicationstmt>
        <distributor>
          Dept. for Intelligent Systems, Jozef &Scaron;tefan Institute
        </distributor>
        <pubaddress>Jamova 39, SI-1000 Ljubljana, Slovenia</pubaddress>
        <eaddress type="email">tomaz.erjavec@ijs.si</eaddress>
        <eaddress type="www">http://nl.ijs.si/ME</eaddress>
        <availability status="restricted">
          Available for research purposes upon receipt of signed agreement
        </availability>
        <pubDate value="1998-01-01">January 1st, 1998</pubDate>
      </publicationstmt>
      <sourcedesc>
        <biblfull>
          <titlestmt>
            <h.title>Multext-East CES1: Nineteen Eighty-Four, Slovene</h.title>
          </titlestmt>
          <publicationstmt>
            <distributor>
              Dept. for Intelligent Systems, Jozef &Scaron;tefan Institute
            </distributor>
            <pubaddress>Jamova 39, SI-1000 Ljubljana, Slovenia</pubaddress>
            <eaddress type="email">tomaz.erjavec@ijs.si</eaddress>
            <eaddress type="www">http://nl.ijs.si/ME</eaddress>
            <availability status="restricted">
              Available for research purposes upon receipt of signed agreement
            </availability>
            <pubDate value="1997-10-01">October 1, 1997</pubDate>
          </publicationstmt>
      <sourcedesc>
        <biblfull>
          <titlestmt>
            <h.title>
              The European Corpus Initiative
              Multilingual Corpus 1:
              1984 by George Orwell (Slovene)
            </h.title>
            <respstmt>
              <respname>Association for Computational Linguistics</respname>
              <resptype>Converted from OTA's DTD to ECI DTD</resptype>
            </respstmt>
          </titlestmt>
          <publicationstmt>
            <distributor>ACL</distributor>
            <pubaddress>ACL</pubaddress>
            <availability status=restricted>
              Available for research purposes upon receipt of signed
              agreement
            </availability>
            <pubdate>1994</pubdate>
          </publicationstmt>
          <sourcedesc>
            <biblfull>
              <titlestmt>
                <h.title>Orwell's 1984: electronic edition</h.title>
                <respstmt>
                  <respname>Oxford Text Archive</respname>
                  <resptype>
                    The four versions of Orwell's 1984 in the OTA
                    were all prepared by the OUCS KDEM service in
                    1985 for Dr David C Bennett of the School of
                    Oriental And African Studies at London
                    University.  The texts here have not been
                    encoded or proofread in any way since they were
                    produced (other than the English text, which was
                    converted to an SGML like encoding by John
                    Price-Wilkin, and subsequently automatically
                    converted to conform to the OTA's dtd by myself
                    and Alan Morrison. The other languages were
                    converted to TEI conformant SGML by the ECI
                    project 1993.) --LB, Nov 1992
                  </resptype>
                </respstmt>
              </titlestmt>
              <editionstmt>
                Public Domain TEI edition prepared at the Oxford Text
                Archive
              </editionstmt>
              <publicationstmt>
                <distributor>Oxford Text Archive</distributor>
                <pubaddress>
                  Oxford University Computing Service
                  13 Banbury Road
                  Oxford OX2 6NN UK
                  archive@ox.ac.uk
                </pubaddress>
                <availability status=restricted>
                  Freely available for non-commercial
                  use provided that this header is included in its
                  entirety with any copy distributed
                </availability>
                <pubdate>19 Nov 1992</pubdate>
              </publicationstmt>
              <sourcedesc>
                <biblstruct>
                  <monogr>
                    <h.title>1984</h.title>
                    <h.author>George Orwell</h.author>
                    <h.author>Translator: Alenka Puhar</h.author>
                    <imprint>
                      <pubdate>1983</pubdate>
                      <publisher>Knji&zcaron;nica Kondor</publisher>
                      <publisher>Mladinska knjiga</publisher>
                      <pubplace>Ljubljana</pubplace>
                    </imprint>
                  </monogr>
                </biblstruct>
              </sourcedesc>
            </biblfull>
          </sourcedesc>
        </biblfull>
      </sourcedesc>
      </biblfull>
      </sourcedesc>
    </filedesc>
    <encodingdesc>
      <projectdesc>
        MULTEXT-East:
        Multilingual Text Tools and Corpora for Central and Eastern
        European Languages.
        EU Copernicus Project COP106
      </projectdesc>
      <editorialdecl>
        <transduction>
          In the cesDoc to cesAna conversion, DIV, QUOTE, Q tags and
          HEAD, POEM, LIST elements have been omitted. cesDoc P
          elements are encoded as PAR, and S as S.
          cesDoc sub-S level tags are omitted: DATE, NAME, ABBR, etc.
        </transduction>
        <quotation>
          Q and QUOTE tags from the cesDoc source not retained.
        </quotation>
        <segmentation>
          S segmentation same as in cesDoc source (hand-validated).
          TOK segmentation performed with mtseg and manually corrected,
         </segmentation>
      </editorialdecl>
      <tagsdecl>
        <tagusage gi=chunklist occurs=1>
          Element corresponds to TEXT of the cesDoc source
        </tagusage>
        <tagusage gi=chunk occurs=1>
          Element corresponds to BODY of the cesDoc source
        </tagusage>
        <tagusage gi=par occurs=1288>
          Elements correspond to P elements of the cesDoc source.
          The FROM attribute gives the reference to the ID of the
          corresponding cesDoc P element.
        </tagusage>
        <tagusage gi=s occurs=6689>
          Elements correspond to S elements of the cesDoc source
          The FROM attribute gives the reference to the ID of the
          corresponding cesDoc S element.
        </tagusage>
        <tagusage gi=tok occurs=107770>
          Tokens are of TYPE=WORD or PUNCT, with the CLASS attribute
          giving the mtseg class of the token (ABBR, COMP, INIT, TTL).
        </tagusage>
        <tagusage gi=orth occurs=107770>
          Contains the orthography of the token, as found in the cesDoc source
           (except for COMP, which have underscore instead of blank).
        </tagusage>
        <tagusage gi=disamb occurs=90792>
          Contains disambiguated lexical information for WORDs.
        </tagusage>
        <tagusage gi=lex occurs=187562>
          Contains undisambiguated lexical information for WORDs.
        </tagusage>
        <tagusage gi=base occurs=278354>
          Base or lemmma of a WORD.
        </tagusage>
        <tagusage gi=msd occurs=278354>
          Morphosyntactic description of a WORD.
        </tagusage>
        <tagusage gi=ctag occurs=16978>
          Corpus tag of PUNCT tokens.
        </tagusage>
      </tagsdecl>
    </encodingdesc>
    <profiledesc>
      <creation date="1997-12-11"></creation>
      <langusage>
        <![ %ONECOMPONENT [ &ISOlang; ]]>
        <language id=ns-sl iso639=sl>Newspeak Slovene</language>
      </langusage>
    </profiledesc>
    <revisiondesc>
      <change>
        <changedate>1997-11-04</changedate>
        <respname>Toma&zcaron; Erjavec, IJS</respname>
        <h.item>Initial header</h.item>
      </change>
      <change>
        <changedate>1997-12-11</changedate>
        <respname>Toma&zcaron; Erjavec, IJS</respname>
        <h.item>Manually disambiguated version prepared</h.item>
      </change>
      <change>
        <changedate>1997-12-20</changedate>
        <respname>Toma&zcaron; Erjavec, IJS</respname>
        <h.item>Prepared final version</h.item>
      </change>
    </revisiondesc>
  </cesHeader>
Multext-East