#!/usr/local/bin/perl -w # # Convert ISO Latin 2 from 8-bit to SGML entities # et 22/4/96 # V.0 # while (<>) { # Estonian section: ISO-10: Latin 6 # all other Estonian chars have the same code as in Latin-2, except for o~ # which does not appear in Latin-2 # Two options are provided: # If you want Estonian to be coded in Latin 6, uncomment the following # s/õ\;/\xF5/go; # s/Õ\;/\xD5/go; # If you want Estonian to be coded in Latin 2, you will see o~ as o'' # uncomment the following: # s/õ\;/&odblac\;/go; # s/Õ\;/&Odblac\;/go; s/&unused\;/\xA0/go; s/&breve\;/\xA2/go; s/¨\;/\xA8/go; s/°/\xB0/go; # s/&ring\;/\xB0/go; s/&ogon\;/\xB2/go; s/´\;/\xB4/go; s/&caron\;/\xB7/go; s/¸\;/\xB8/go; s/&dblac\;/\xBD/go; s/&dot\;/\xFF/go; s/×\;/\xD7/go; s/÷\;/\xF7/go; s/&circro\;/\xA4/go; #??? circle with bars - telephone? microphone? s/&dash\;/\xAD/go; #??? thick short dash / hyphen s/á\;/\xE1/go; s/Á\;/\xC1/go; s/&aogon\;/\xB1/go; s/&Aogon\;/\xA1/go; s/â\;/\xE2/go; s/Â\;/\xC2/go; s/&abreve\;/\xE3/go; s/&Abreve\;/\xC3/go; s/ä\;/\xE4/go; s/Ä\;/\xC4/go; s/&cacute\;/\xE6/go; s/&Cacute\;/\xC6/go; s/&ccaron\;/\xE8/go; s/&Ccaron\;/\xC8/go; s/ç\;/\xE7/go; s/Ç\;/\xC7/go; s/&dcaron\;/\xEF/go; #Equivalent to small Dcaron, but looks like d' s/&dmidot\;/\xEF/go; #so it dould be 'middle dot', same as as lmidot s/&Dcaron\;/\xCF/go; s/&dstrok\;/\xF0/go; s/&Dstrok\;/\xD0/go; s/é\;/\xE9/go; s/É\;/\xC9/go; s/&ecaron\;/\xEC/go; s/&Ecaron\;/\xCC/go; s/&eogon\;/\xEA/go; s/&Eogon\;/\xCA/go; s/ë\;/\xEB/go; s/Ë\;/\xCB/go; s/í\;/\xED/go; s/Í\;/\xCD/go; s/î\;/\xEE/go; s/Î\;/\xCE/go; s/&lacute\;/\xE5/go; s/&Lacute\;/\xC5/go; s/&lmidot\;/\xB5/go; s/&Lmidot\;/\xA5/go; s/&lstrok\;/\xB3/go; s/&Lstrok\;/\xA3/go; s/&nacute\;/\xF1/go; s/&Nacute\;/\xD1/go; s/&ncaron\;/\xF2/go; s/&Ncaron\;/\xD2/go; s/ó\;/\xF3/go; s/Ó\;/\xD3/go; s/ô\;/\xF4/go; s/Ô\;/\xD4/go; s/&odblac\;/\xF5/go; s/&Odblac\;/\xD5/go; s/ö\;/\xF6/go; s/Ö\;/\xD6/go; s/&racute\;/\xE0/go; s/&Racute\;/\xC0/go; s/&rcaron\;/\xF8/go; s/&Rcaron\;/\xD8/go; s/&sacute\;/\xB6/go; s/&Sacute\;/\xA6/go; s/&scaron\;/\xB9/go; s/&Scaron\;/\xA9/go; s/&scedil\;/\xBA/go; s/&Scedil\;/\xAA/go; s/ß\;/\xDF/go; s/&tcaron\;/\xBB/go; #Equivalent to small Tcaron, but looks like t' s/&tmidot\;/\xBB/go; #so it dould be 'middle dot', same as as lmidot s/&Tcaron\;/\xAB/go; s/&tcedil\;/\xFE/go; s/&Tcedil\;/\xDE/go; s/&uring\;/\xF9/go; s/&Uring\;/\xD9/go; s/ú\;/\xFA/go; s/Ú\;/\xDA/go; s/&udblac\;/\xFB/go; s/&Udblac\;/\xDB/go; s/ü\;/\xFC/go; s/Ü\;/\xDC/go; s/ý\;/\xFD/go; s/Ý\;/\xDD/go; s/&zacute\;/\xBC/go; s/&Zacute\;/\xAC/go; s/&zcaron\;/\xBE/go; s/&Zcaron\;/\xAE/go; s/&zdot\;/\xBF/go; s/&Zdot\;/\xAF/go; print; }