#! /bin/sh
#
# tagcount
#
# count up tags between <text> and </text> in an SGML document
# provide output in <tagusage> tags sorted by element name.
# 
# script from Heiki-Jaan Kaalep (hkaalep@psych.ut.ee) modified 
# and extended by Greg Priest-Dorman (priestdo@cs.vassar.edu)
#
# requires gnu sed and possibly gnu grep
# 
tr '\012' ' ' |\
sed 's/^..*<text/<text/g' | \
sed 's/</@</g' | \
sed 's/>/>@/g' | tr '@' '\012' | \
grep "<[^<>/]*>" | \
sed 's/ [^>]*>/>/g' | \
sort | uniq -c | \
sed 's/^ *//'| \
sed 's/^\([0-9]*\)\(.*\)$/\2 \1/' | \
sort | \
sed 's/^.</<tagusage gi=/'| \
sed 's/> \([0-9]*\)/ occurs=\1><\/tagusage>/'
# 
