MAINTAINER "CLARIN.SI <info@clarin.si>"
NAME "ParlaMint-XX-en 4.1 (European parliaments, translation to English)"
INFO "European parliamentary corpora translated to English ParlaMint-XX-en, v4.1"
ENCODING "UTF-8"
DEFAULTLOCALE "en_GB.UTF-8"
LANGUAGE "English"
INFOHREF "http://hdl.handle.net/11356/1910"
TAGSETDOC "https://universaldependencies.org/guidelines.html"

PATH "/data/manatee-data/parlamint41_xx_en/"

VERTICAL "| zcat /data/vert/ParlaMint-XX-en.4.1.vert.gz"

DOCSTRUCTURE speech
MAXDETAIL 10000

SUBCORPATTRS "speech.corpus|speech.lang,speech.subcorpus|speech.body|speech.date,speech.term|speech.session|speech.meeting,speech.sitting|speech.agenda,speech.speaker_mp|speech.speaker_minister|speech.speaker_role,speech.speaker_id|speech.speaker_name|speech.speaker_gender,speech.speaker_birth|speech.speaker_party|speech.speaker_party_name,speech.party_status|speech.party_orientation"
SHORTREF "=speech.corpus,=speech.date"
FULLREF "speech.id,speech.title,speech.date,speech.corpus,speech.lang,speech.subcorpus,speech.body,speech.term,speech.session,speech.meeting,speech.sitting,speech.agenda,speech.speaker_name,speech.speaker_gender,speech.speaker_role,speech.speaker_mp,speech.speaker_minister,speech.speaker_party,speech.speaker_party_name,speech.party_status,speech.party_orientation,p.lang,p.id,s.id,name.type"

ALIGNSTRUCT s
ALIGNED parlamint41_xx

SIMPLEQUERY '[lc="%s" | norm="%s" | lemma_lc="%s"]'

STRUCTURE speech {
    TYPE "file64"
    DISPLAYTAG 0
    DISPLAYBEGIN "%(speaker_name): "
    DISPLAYEND ""
    ATTRIBUTE id {
      TYPE "MD_MGD"
      LABEL "Speech ID"
      TYPE "UNIQUE"
    }
    ATTRIBUTE text_id {
      TYPE "MD_MGD"
      LABEL "Text ID"
    }
    ATTRIBUTE corpus {
      TYPE "MD_MGD"
    }
    ATTRIBUTE subcorpus {
      TYPE "MD_MGD"
    }
    ATTRIBUTE lang {
      TYPE "MD_MGD"
    }
    ATTRIBUTE title {
      TYPE "MD_MGD"
    }
    ATTRIBUTE body {
      TYPE "MD_MGD"
      LABEL "Parliamentary body"
      MULTIVALUE yes
      MULTISEP "|"
    }
    ATTRIBUTE term {
      TYPE "MD_MGD"
    }
    ATTRIBUTE session {
      TYPE "MD_MGD"
      TEXTBOXLENGTH 4
    }
    ATTRIBUTE meeting {
      TYPE "MD_MGD"
      TEXTBOXLENGTH 4
    }
    ATTRIBUTE sitting {
      TYPE "MD_MGD"
      TEXTBOXLENGTH 4
    }
    ATTRIBUTE agenda {
      TYPE "MD_MGD"
      TEXTBOXLENGTH 4
    }
    ATTRIBUTE date {
      TYPE "MD_MGD"
    }
    ATTRIBUTE speaker_id {
      TYPE "MD_MGD"
      DEFAULTVALUE "-"
    }
    ATTRIBUTE speaker_name {
      TYPE "MD_MGD"
      DEFAULTVALUE "-"
    }
    ATTRIBUTE speaker_role {
      TYPE "MD_MGD"
      DEFAULTVALUE "-"
    }
    ATTRIBUTE speaker_mp {
      TYPE "MD_MGD"
      DEFAULTVALUE "-"
    }
    ATTRIBUTE speaker_minister {
      TYPE "MD_MGD"
      DEFAULTVALUE "-"
    }
    ATTRIBUTE speaker_party {
      TYPE "MD_MGD"
      DEFAULTVALUE "-"
    }
    ATTRIBUTE speaker_party_name {
      TYPE "MD_MGD"
      DEFAULTVALUE "-"
    }
    ATTRIBUTE party_status {
      TYPE "MD_MGD"
      DEFAULTVALUE "-"
    }
    ATTRIBUTE party_orientation {
      TYPE "MD_MGD"
      DEFAULTVALUE "-"
    }
    ATTRIBUTE speaker_gender {
      TYPE "MD_MGD"
      DEFAULTVALUE "-"
    }
    ATTRIBUTE speaker_birth {
      TYPE "MD_MGD"
      DEFAULTVALUE "-"
      TEXTBOXLENGTH 10
    }
}

STRUCTURE p {
    TYPE "file64"
    DISPLAYTAG 0
    DISPLAYBEGIN ""
    DISPLAYEND " ¶ "
    ATTRIBUTE id {
      TYPE "MD_MGD"
      TYPE "UNIQUE"
    }
    ATTRIBUTE lang {
      TYPE "MD_MGD"
    }
}

STRUCTURE s {
    TYPE "file64"
    ATTRIBUTE id {
      TYPE "MD_MGD"
      LABEL "Sentence ID"
      TYPE "UNIQUE"
    }
    DISPLAYTAG 0
}

STRUCTURE phr {
    TYPE "file64"
    ATTRIBUTE usas_tags {
      TYPE "MD_MGD"
      LABEL "USAS tags"
      MULTIVALUE yes
      MULTISEP ","
    }
    ATTRIBUTE usas_cats {
      TYPE "MD_MGD"
      LABEL "USAS categories"
      MULTIVALUE yes
      MULTISEP " "
    }
    ATTRIBUTE usas_glosses {
      TYPE "MD_MGD"
      LABEL "USAS glosses"
      MULTIVALUE yes
      MULTISEP "|"
    }
}

STRUCTURE name {
    TYPE "file64"
    ATTRIBUTE type {
      TYPE "MD_MGD"
    }
    DISPLAYTAG 0
    DISPLAYBEGIN "[%(type):"
    DISPLAYEND "]"
}

STRUCTURE note {
    TYPE "file64"
    DISPLAYTAG 0
    DISPLAYBEGIN "[%(type): %(content)"
    DISPLAYEND "]"
    ATTRIBUTE type {
      TYPE "MD_MGD"
    }
    ATTRIBUTE corpus {
      TYPE "MD_MGD"
    }
    ATTRIBUTE content {
      TYPE "MD_MGD"
    }
}
STRUCTURE g {
    TYPE "file64"
    DISPLAYTAG 0
    DISPLAYBEGIN "_EMPTY_"
}

ATTRIBUTE word{
    TYPE "MD_MGD"
}
ATTRIBUTE lc {
    LABEL    "word (lowercase)"
    DYNAMIC  "utf8lowercase"
    DYNLIB   "internal"
    ARG1     "C"
    FUNTYPE  "s"
    FROMATTR "word"
    DYNTYPE  "index"
    TRANSQUERY "yes"
}

ATTRIBUTE norm {
    TYPE "MD_MGD"
    LABEL "syntactic word"
    MULTIVALUE yes
    MULTISEP "|"
}

ATTRIBUTE lemma {
   TYPE "MD_MGD"
   MULTIVALUE yes
   MULTISEP "|"
}
ATTRIBUTE lemma_lc {
   LABEL    "lemma (lowercase)"
   DYNAMIC  "utf8lowercase"
   DYNLIB   "internal"
   ARG1     "C"
   FUNTYPE  "s"
   FROMATTR "lemma"
   TYPE     "index"
   TRANSQUERY "yes"
}

ATTRIBUTE pos {
   TYPE "MD_MGD"
   LABEL "UD PoS tag"
   MULTIVALUE yes
   MULTISEP "|"
}

ATTRIBUTE feats {
    TYPE  "MD_MGD"
    LABEL "UD features"
    MULTIVALUE yes
    MULTISEP "|"
}

ATTRIBUTE usas_tags {
  TYPE "MD_MGD"
  LABEL "USAS tags"
  MULTIVALUE yes
  MULTISEP ","
}
ATTRIBUTE usas_cats {
  TYPE "MD_MGD"
  LABEL "USAS categories"
  MULTIVALUE yes
  MULTISEP " "
}
ATTRIBUTE usas_glosses {
  TYPE "MD_MGD"
  LABEL "USAS glosses"
  MULTIVALUE yes
  MULTISEP "|"
}
    
ATTRIBUTE id {
    TYPE "MD_MGD"
    LABEL "ID of token"
    MULTIVALUE yes
    MULTISEP "|"
}
