#!/usr/bin/env bash
APERTIUM_PATH="/usr/bin"
#!/bin/bash
PATH="${APERTIUM_PATH}:${PATH}"
INPUT_FILE="/dev/stdin"
OUTPUT_FILE="/dev/stdout"
[ -z "$TMPDIR" ] && TMPDIR=/tmp
message ()
{
echo "USAGE: $(basename "$0") [-f format] [in [out]]"
echo " -f format one of: txt (default), html, rtf, odt, docx, wxml, xlsx, pptx"
echo " in input file (stdin by default)"
echo " out output file (stdout by default)"
exit 1;
}
locale_utf8 ()
{
LC_CTYPE=$(locale -a|grep -i "utf[.]*8"|head -1)
export LC_CTYPE
if [ "$LC_CTYPE" = "" ]
then echo "Error: Install an UTF-8 locale in your system";
exit 1;
fi
}
test_zip ()
{
if ! command -v zip &>/dev/null; then
echo "Error: Install 'zip' command in your system";
exit 1;
fi
if ! command -v unzip &>/dev/null; then
echo "Error: Install 'unzip' command in your system";
exit 1;
fi
}
test_gawk ()
{
if ! command -v gawk &>/dev/null; then
echo "Error: Install 'gawk' in your system"
exit 1
fi
}
unformat_latex()
{
test_gawk
if [ "$FICHERO" = "" ]
then FICHERO=$(mktemp "$TMPDIR"/apertium.XXXXXXXX)
cat > "$FICHERO"
BORRAFICHERO="true"
fi
apertium-prelatex "$FICHERO" | \
apertium-utils-fixlatex | \
apertium-deslatex >"$SALIDA"
if [ "$BORRAFICHERO" = "true" ]
then rm -Rf "$FICHERO"
fi
}
unformat_odt ()
{
INPUT_TMPDIR=$(mktemp -d "$TMPDIR"/apertium.XXXXXXXX)
locale_utf8
test_zip
unzip -q -o -d "$INPUT_TMPDIR" "$FICHERO"
find "$INPUT_TMPDIR" | grep content\\\.xml |\
awk '{printf ""; PART = $0; while(getline < PART) printf(" %s", $0); printf("\n");}' |\
apertium-desodt >"$SALIDA"
rm -Rf "$INPUT_TMPDIR"
}
unformat_docx ()
{
INPUT_TMPDIR=$(mktemp -d "$TMPDIR"/apertium.XXXXXXXX)
locale_utf8
test_zip
unzip -q -o -d "$INPUT_TMPDIR" "$FICHERO"
for i in $(find "$INPUT_TMPDIR"|grep "xlsx$");
do LOCALTEMP=$(mktemp "$TMPDIR"/apertium.XXXXXXXX)
apertium -f xlsx -d "$DIRECTORY" "$OPCIONU" "$PREFIJO" <"$i" >"$LOCALTEMP";
cp "$LOCALTEMP" "$i";
rm "$LOCALTEMP";
done;
find "$INPUT_TMPDIR" | grep "xml" |\
grep -v -i \\\(settings\\\|theme\\\|styles\\\|font\\\|rels\\\|docProps\\\) |\
awk '{printf ""; PART = $0; while(getline < PART) printf(" %s", $0); printf("\n");}' |\
apertium-deswxml >"$SALIDA"
rm -Rf "$INPUT_TMPDIR"
}
unformat_pptx ()
{
INPUT_TMPDIR=$(mktemp -d "$TMPDIR"/apertium.XXXXXXXX)
locale_utf8
test_zip
unzip -q -o -d "$INPUT_TMPDIR" "$FICHERO"
for i in $(find "$INPUT_TMPDIR"|grep "xlsx$");
do LOCALTEMP=$(mktemp "$TMPDIR"/apertium.XXXXXXXX)
apertium -f xlsx -d "$DIRECTORY" "$OPCIONU" "$PREFIJO" <"$i" >"$LOCALTEMP"
cp "$LOCALTEMP" "$i"
rm "$LOCALTEMP"
done;
find . -path '**/slides/slide*.xml' |\
awk '{printf ""; PART = $0; while(getline < PART) printf(" %s", $0); printf("\n");}' |\
apertium-despptx >"$SALIDA"
rm -Rf "$INPUT_TMPDIR"
}
unformat_xlsx ()
{
INPUT_TMPDIR=$(mktemp -d "$TMPDIR"/apertium.XXXXXXXX)
locale_utf8
test_zip
unzip -q -o -d "$INPUT_TMPDIR" "$FICHERO"
find "$INPUT_TMPDIR" | grep "sharedStrings.xml" |\
awk '{printf ""; PART = $0; while(getline < PART) printf(" %s", $0); printf("\n");}' |\
apertium-desxlsx >"$SALIDA"
rm -Rf "$INPUT_TMPDIR"
}
while getopts "f:" opt; do
case "$opt" in
f) FORMAT=$OPTARG ;;
\?) echo "ERROR: Unknown option $OPTARG" >&2; message >&2 ;;
:) echo "ERROR: $OPTARG requires an argument" >&2; message >&2 ;;
esac
done
shift "$((OPTIND-1))"
case "$#" in
2)
OUTPUT_FILE=$2;
INPUT_FILE=$1;
if [ ! -e "$INPUT_FILE" ];
then echo "Error: file '$INPUT_FILE' not found."
message;
fi
;;
1)
INPUT_FILE=$1;
if [ ! -e "$INPUT_FILE" ];
then echo "Error: file '$INPUT_FILE' not found."
message;
fi
;;
0)
;;
*)
message
;;
esac
if [ -z "$FORMAT" ]; then FORMAT="txt"; fi
FORMATADOR=$FORMAT;
FICHERO=$INPUT_FILE;
SALIDA=$OUTPUT_FILE;
case "$FORMATADOR" in
rtf)
MILOCALE=$(locale -a | grep -E -i -v -m1 'utf|^C|^POSIX$')
if [ "$MILOCALE" = "" ]
then echo "Error: Install a ISO-8859-1 compatible locale in your system";
exit 1;
fi
export LC_CTYPE=$MILOCALE
;;
html-noent)
FORMATADOR="html"
;;
latex)
unformat_latex
exit 0
;;
odt)
unformat_odt
exit 0
;;
docx)
unformat_docx
exit 0
;;
xlsx)
unformat_xlsx
exit 0
;;
pptx)
unformat_pptx
exit 0
;;
wxml)
locale_utf8
;;
*)
;;
esac
apertium-des"$FORMATADOR" "$FICHERO" >"$SALIDA"