#!/bin/sh METADATA="${1}" SOURCE="${2%/}" TARGET="${3%/}" getClass() { DOMAIN="${1}" if [ "${DOMAIN%%|*}" == "${DOMAIN}" ] then case "${DOMAIN}" in GĂ©ographie*) printf "geography";; *) printf "other";; esac else while true do case "${DOMAIN%%|*}" in GĂ©ographie*) if [ "${DOMAIN%%|*}" != "${DOMAIN}" ] then DOMAIN="${DOMAIN#*| }" else printf "geography"; return fi;; *) printf "some_geography"; return;; esac done fi } while read LINE do FILE="${LINE%%,*}" LINE="${LINE#*,}" TOME="${LINE%%,*}" LINE="${LINE#*,}" RANK="${LINE%%,*}" LINE="${LINE#*,}" HEAD="${LINE%%,*}" LINE="${LINE#*,}" DOMAIN="${LINE%%,*}" CLASS="$(getClass "${DOMAIN}")" INPUT="${SOURCE}/${FILE}.xml" OUTPUT="${TARGET}/${FILE}.xml" head -n 1 "${INPUT}" > "${OUTPUT}" cat >> "${OUTPUT}" << EOF <corpus><doc><meta> fileName ${FILE}.txt tome ${TOME} rank ${RANK} head ${HEAD} domain ${DOMAIN} class ${CLASS} EOF tail -n +3 "${INPUT}" >> "${OUTPUT}" done < <(tail -n +2 ${METADATA})