next up previous contents
Next: Czech Up: Morphosyntactic Tagging Previous: English

Bulgarian

 COP project 106 MULTEXT-East Deliverable D2.3 F ``1984'', Bulgarian

<cesHeader
  version="4.1"
  type="text"
  lang=en
  creator=LD
  status="update"
  date.created="1997-11-30"
  date.updated="1997-12-21"
>
  <filedesc>
   <titlestmt>
     <h.title>Multext-East cesAna: Nineteen Eighty-Four, Bulgarian</h.title>
       <respstmt>
         <respname>Ludmila Dimitrova, Lydia Sinapova</respname>
         <resptype>Overall Responsibility</resptype>
         <respname>Ludmila Dimitrova, Kiril Simov</respname>
         <resptype>Hand-tagging of first chapter first part</resptype>
         <respname>Ludmila Dimitrova</respname>
         <resptype>Hand-tagging of second chapter first part, first
           chapter second part</resptype>
         <respname>Vladim&iacute;r Petkevi&ccaron;</respname>
         <resptype>Conversion to cesAna DTD </resptype>
      </respstmt>
      </titlestmt>
      <editionstmt version="1.0">MTE Final Release</editionstmt>
      <extent>
        <wordCount>86020</wordCount>
        <byteCount units="MB">29.9</byteCount>
        <extnote>wordCount represents the number of TOK TYPE=WORD
           elements in the text. 
        </extnote>
      </extent>
      <publicationstmt>
        <distributor>
          Institute of Mathematics and Informatics,
          Bulgarian Academy of Sciences, Sofia
        </distributor>
        <pubaddress>
          Acad G. Bonchev st. bl.8
          1113 Sofia, Bulgaria
        </pubaddress>
        <eaddress type="email">ludmila@ling.math.acad.bg</eaddress>
        <availability status="restricted">
          Available for research purposes upon receipt of signed
          agreement
        </availability>
        <pubDate value="1998-01-01">January 1st, 1998</pubDate>
      </publicationstmt>
      <sourcedesc>
        <biblfull>
          <titlestmt>
            <h.title>Multext-East CES1: Nineteen Eighty-Four, Bulgarian
            </h.title>
          </titlestmt>
          <publicationstmt>
            <distributor>
             Institute of Mathematics and Informatics,
             Bulgarian Academy of Sciences, Sofia
            </distributor>
            <pubaddress>
             Acad G. Bonchev st. bl.8
             1113 Sofia, Bulgaria
            </pubaddress>
            <eaddress type="email">ludmila@ling.math.acad.bg</eaddress>
            <availability status="restricted">
              Available for research purposes upon receipt of signed
              agreement
            </availability>
            <pubDate value="1997-10-01">October 1, 1997</pubDate>
          </publicationstmt>
      <sourcedesc>
        <biblfull>
          <titlestmt>
            <h.title> Electronic form of 1984 by George Orwell in
                      Bulgarian
            </h.title>
                <respstmt>
                  <respname>
                    Ludmila Dimitrova (BAS), Lydia Sinapova (BAS), 
                    Kiril Simov(BAS)
                  </respname>
                  <resptype>
                     Typing-in 1984.
                  </resptype>
                </respstmt>
          </titlestmt>
          <publicationstmt>
            <distributor>
             Institute of Mathematics and Informatics,
             Bulgarian Academy of Sciences, Sofia
            </distributor>
            <pubaddress>
             Acad G. Bonchev st. bl.8
             1113 Sofia, Bulgaria
            </pubaddress>
            <availability status=restricted>
              Available for research purposes upon receipt of signed
              agreement
            </availability>
            <pubdate>1997</pubdate>
          </publicationstmt>
              <sourcedesc>
                <biblstruct>
                  <monogr>
                    <h.title>1984)</h.title>
                    <h.author>George Orwell</h.author>
                    <h.author>Translator: Lydia Bozhilova</h.author>
                    <imprint>
                      <pubdate>1989</pubdate>
                      <publisher>Profizdat</publisher>
                      <pubplace>Sofia, Bulgaria</pubplace>
                    </imprint>
                  </monogr>
                </biblstruct>
              </sourcedesc>
            </biblfull>
          </sourcedesc>
        </biblfull>
      </sourcedesc>
     </filedesc>
    <encodingdesc>
      <projectdesc>
        MULTEXT-East:
        Multilingual Text Tools and Corpora for Central and Eastern
        European Languages.
        EU Copernicus Project COP106
      </projectdesc>
      <editorialdecl>
        <transduction>
          In the cesDoc to cesAna conversion, DIV, QUOTE, Q tags and
          HEAD, POEM, LIST elements have been omitted. cesDoc P
          elements are encoded as PAR, and S as S.
          cesDoc sub-S level tags are omitted: DATE, NAME, ABBR, etc.
        </transduction>
        <quotation>
          Q and QUOTE tags from the cesDoc source not retained.
        </quotation>
        <segmentation>
          S segmentation same as in cesDoc source (hand-validated).
          TOK segmentation performed with mtseg and manually corrected,
         </segmentation>
      </editorialdecl>
      <tagsdecl>
        <tagusage gi=chunklist occurs=1>
          Element corresponds to TEXT of the cesDoc source
        </tagusage>
        <tagusage gi=chunk occurs=1>
          Element corresponds to BODY of the cesDoc source
        </tagusage>
       <tagusage gi=par occurs=1322>
          Elements correspond to P elements of the cesDoc source.
          The FROM attribute gives the reference to the ID of the
          corresponding cesDoc P element.
        </tagusage>
        <tagusage gi=s occurs=6682>
          Elements correspond to S elements of the cesDoc source
          The FROM attribute gives the reference to the ID of the
        corresponding cesDoc S element.
        </tagusage>
        <tagusage gi=tok occurs=101173>
          Tokens are of TYPE=WORD or PUNCT, with the CLASS attribute
          giving the mtseg class of the token.
        </tagusage>
        <tagusage gi=orth   occurs=101173>
          Contains the orthography of the token, as found in the
          cesDoc source.
        </tagusage>
        <tagusage gi=disamb occurs=86020>
          Contains disambiguated lexical information.
        </tagusage>
        <tagusage gi=lex    occurs=156002>
          Contains undisambiguated lexical information.
        </tagusage>
        <tagusage gi=base   occurs=242022>
          Base or lemma of a token.
        </tagusage>
       <tagusage gi=msd    occurs=156002>
          Morphosyntactic description of a token.
        </tagusage>
        <tagusage gi=ctag   occurs=257175>
          Corpus tag.
        </tagusage>
      </tagsdecl>
    </encodingdesc>
    <profiledesc>
      <creation date="1997-11-27">
      </creation>
      <langusage>
        <![ %ONECOMPONENT [ &ISOlang; ]]>
        <language id=ns-bg iso639=bg>Newspeak Bulgarian</language>
      </langusage>
    </profiledesc>
    <revisiondesc>
       <change>
       <changedate>1997-12-19</changedate>
        <respname>Vladim&iacute;r Petkevi&ccaron;, &Uacute;TKL FFUK, 
                  Prague</respname>
         <h.item>Filled in tags' usage, wordcount and bytecount</h.item>
       </change>
       <change>
       <changedate>1997-12-21</changedate>
        <respname>Tomaz Erjavec, IJS</respname>
         <h.item>Converted from ISO Cyrillic to SGML entities</h.item>
         <h.item>Changed ... to &hellip;</h.item>
         <h.item>Modified EDITIONSTMT, BYTECOUNT</h.item>
       </change>
     </revisiondesc>
  </cesheader>


Multext-East