next up previous contents
Next: Further Reading Up: Corpus Encoding Standard Previous: The cesAlign DTD

The cesAna DTD

The cesAna DTD is used for segmentation and grammatical annotation, including:

Example:

 <chunkList type=TEXT lang=sl>
  <chunk type=BODY lang=sl>
   <par from='Osl.1.2.2'>   
    <s from='Osl.1.2.2.1'>  
     <tok type=WORD>
      <orth>Bil</orth>
      <disamb><base>biti</base><msd>Vcps-sma</msd></disamb>
      <lex><base>biti</base><msd>Vcps-sma</msd></lex>
      <lex><base>biti</base><msd>Vmps-sma</msd></lex>
     </tok>
     <tok type=WORD>
      <orth>je</orth>
      <disamb><base>biti</base><msd>Vcip3s-an</msd></disamb>
      <lex><base>biti</base><msd>Vcip3s-an</msd></lex>
      <lex><base>jesti</base><msd>Vmip3s--n</msd></lex>
      <lex><base>on</base><msd>Pp3fsg--y-n</msd></lex>
     </tok>
     <tok type=WORD>
      <orth>jasen</orth>
      <disamb><base>jasen</base><msd>Afpmsn</msd></disamb>
      <lex><base>jasen</base><msd>Afpmsn</msd></lex>
      <lex><base>jasen</base><msd>Afpmsa</msd></lex>
     </tok>
     <tok type=PUNCT>
      <orth>,</orth>
      <ctag>COMMA</ctag>
     </tok>
     <tok type=WORD>
      <orth>mrzel</orth>
      <disamb><base>mrzel</base><msd>Afpmsn</msd></disamb>
      <lex><base>mrzel</base><msd>Afpmsn</msd></lex>
      <lex><base>mrzel</base><msd>Afpmsa</msd></lex>
     </tok>
...
     <tok type=PUNCT>
      <orth>.</orth>
      <ctag>PERIOD</ctag>
     </tok>
    </s>


Tomaz Erjavec
1/9/2000