Skip to content
Snippets Groups Projects
inject-metadata.sh 1023 B
Newer Older
#!/bin/sh

METADATA="${1}"
SOURCE="${2%/}"
TARGET="${3%/}"

getClass()
{
	DOMAIN="${1}"
	if [ "${DOMAIN%%|*}" == "${DOMAIN}" ]
	then
		case "${DOMAIN}" in
			Géographie*) printf "geography";;
			*) printf "other";;
		esac
	else
		while true
		do
			case "${DOMAIN%%|*}" in
				Géographie*)
					if [ "${DOMAIN%%|*}" != "${DOMAIN}" ]
					then DOMAIN="${DOMAIN#*| }"
					else printf "geography"; return
					fi;;
				*) printf "some_geography"; return;;
			esac
		done
	fi
}

while read LINE
do
	FILE="${LINE%%,*}"
	LINE="${LINE#*,}"
	TOME="${LINE%%,*}"
	LINE="${LINE#*,}"
	RANK="${LINE%%,*}"
	LINE="${LINE#*,}"
	HEAD="${LINE%%,*}"
	LINE="${LINE#*,}"
	DOMAIN="${LINE%%,*}"
	CLASS="$(getClass "${DOMAIN}")"
	INPUT="${SOURCE}/${FILE}.xml"
	OUTPUT="${TARGET}/${FILE}.xml"
	head -n 1 "${INPUT}" > "${OUTPUT}"

	cat >> "${OUTPUT}" << EOF
<corpus><doc><meta>
fileName	${FILE}.txt
tome	${TOME}
rank	${RANK}
head	${HEAD}
domain	${DOMAIN}
class	${CLASS}
EOF

	tail -n +3 "${INPUT}" >> "${OUTPUT}"
done < <(tail -n +2 ${METADATA})