| | #!/bin/bash |
| |
|
| | |
| | |
| |
|
| |
|
| | set -euo pipefail |
| |
|
| | root=$(dirname $0) |
| |
|
| | lang_map_path=$root/utils.map_token_lang.tsv |
| |
|
| | usage () { |
| | echo "usage: $0 lang" >&2 |
| | exit 1 |
| | } |
| |
|
| | [ $# -eq 1 ] || usage |
| |
|
| | lang=$1 |
| |
|
| | declare -A lang_map |
| |
|
| | while read line; do |
| | key=$(cut -f1 <<< "$line") |
| | val=$(cut -f2 <<< "$line") |
| | lang_map[$key]=$val |
| | done < $lang_map_path |
| |
|
| | if [ -v "lang_map[$lang]" ]; then |
| | lang=${lang_map[$lang]} |
| | elif [ -v "lang_map[${lang:0:3}]" ]; then |
| | lang=${lang_map[${lang:0:3}]} |
| | else |
| | echo "undefined mapping: ${lang}, falling back to: en" >&2 |
| | lang=en |
| | fi |
| |
|
| | perl $root/normalize-punctuation.perl $lang |
| |
|