#!/bin/bash

function print_help_and_exit
{
cat >&2 << 'EOF'

'voronota-js-lt-graph' script describes a molecular structure as a graph.

Options:
    --input                   string  *  path to input protein file
    --probe                   number     probe radius is 1.4
    --output-dir              string  *  output directory path
    --output-naming           string     output files naming mode, default is 'BASENAME/name', other possibilities are 'BASENAME_name' and 'BASENAME/BASENAME_name'
    --help | -h                          flag to display help message and exit
    
Standard output:
    Information messages in stdout, error messages in stderr
    
Examples:
    voronota-js-lt-graph --input "./2zsk.pdb" --output-dir "./output"
    
    voronota-js-lt-graph --input "./2zsk.pdb" --output-dir "./output" --probe 1.4
EOF
exit 1
}

readonly ZEROARG=$0

if [ -z "$1" ]
then
	print_help_and_exit
fi

SELFLOCATION="$(dirname ${ZEROARG})"

if [[ $ZEROARG == *"/"* ]]
then
	cd "$SELFLOCATION"
	SELFLOCATION="$(pwd)"
	export PATH="${SELFLOCATION}:${PATH}"
	cd - &> /dev/null
fi

export LC_ALL=C

command -v voronota-js &> /dev/null || { echo >&2 "Error: 'voronota-js' executable not in binaries path"; exit 1; }

INFILE=""
PROBE="1.4"
OUTPUT_DIR=""
OUTPUT_NAMING="BASENAME/name"
HELP_MODE="false"

while [[ $# > 0 ]]
do
	OPTION="$1"
	OPTARG="$2"
	shift
	case $OPTION in
	--input)
		INFILE="$OPTARG"
		shift
		;;
	--probe)
		PROBE="$OPTARG"
		shift
		;;
	--output-dir)
		OUTPUT_DIR="$OPTARG"
		shift
		;;
	--output-naming)
		OUTPUT_NAMING="$OPTARG"
		shift
		;;
	-h|--help)
		HELP_MODE="true"
		;;
	*)
		echo >&2 "Error: invalid command line option '$OPTION'"
		exit 1
		;;
	esac
done

if [ "$HELP_MODE" == "true" ]
then
	print_help_and_exit
fi

if [ -z "$INFILE" ]
then
	echo >&2 "Error: no input file provided"
	exit 1
fi

if [ -z "$OUTPUT_DIR" ]
then
	echo >&2 "Error: no output directory provided"
	exit 1
fi

if [ "$OUTPUT_NAMING" != "BASENAME/name" ] && [ "$OUTPUT_NAMING" != "BASENAME_name" ] && [ "$OUTPUT_NAMING" != "BASENAME/BASENAME_name" ]
then
	echo >&2 "Error: invalid output naming mode"
	exit 1
fi

if [ ! -s "$INFILE" ]
then
	echo >&2 "Error: input structure file '$INFILE' does not exist"
	exit 1
fi

BASENAME="$(basename ${INFILE} .pdb)"

OUTPREFIX="${OUTPUT_DIR}/${BASENAME}/"

if [ "$OUTPUT_NAMING" == "BASENAME/name" ]
then
	OUTPREFIX="${OUTPUT_DIR}/${BASENAME}/"
fi

if [ "$OUTPUT_NAMING" == "BASENAME_name" ]
then
	OUTPREFIX="${OUTPUT_DIR}/${BASENAME}_"
fi

if [ "$OUTPUT_NAMING" == "BASENAME/BASENAME_name" ]
then
	OUTPREFIX="${OUTPUT_DIR}/${BASENAME}/${BASENAME}_"
fi

DATA_DESCRIPTION="input '$INFILE'"

readonly TMPLDIR=$(mktemp -d)
trap "rm -r $TMPLDIR" EXIT

{
cat << EOF
var params={}
params.input_structure_file='$INFILE';
params.probe=$PROBE;
params.output_prefix='${TMPLDIR}/';
EOF

cat << 'EOF'
voronota_auto_assert_full_success=true;

voronota_import('-file', params.input_structure_file, '-as-assembly', '-title', 'model');

voronota_construct_contacts_radically_fast('-probe', params.probe);

voronota_set_adjunct_of_atoms_by_type_number("-name", "atom_type", "-typing-mode", "protein_atom");
voronota_set_adjunct_of_atoms_by_type_number("-name", "residue_type", "-typing-mode", "protein_residue");

voronota_set_adjunct_of_atoms_by_contact_areas("-use [-solvent] -name sas_area");

voronota_auto_assert_full_success=false;
voronota_set_adjunct_of_atoms("-use [-v! sas_area] -name sas_area -value 0");
voronota_set_adjunct_of_atoms("-use [-v! volume] -name volume -value 0");
voronota_auto_assert_full_success=true;

voronota_set_adjunct_of_atoms_by_residue_pooling('-source-name sas_area -destination-name residue_sas_area -pooling-mode sum');
voronota_set_adjunct_of_atoms_by_residue_pooling('-source-name volume -destination-name residue_volume -pooling-mode sum');

voronota_export_adjuncts_of_atoms('-file', params.output_prefix+'graph_nodes.csv', '-use', '[]', '-no-serial', '-adjuncts', ['atom_index', 'residue_index', 'atom_type', 'residue_type', 'center_x', 'center_y', 'center_z', 'radius', 'sas_area', 'volume'], '-sep', ',', '-expand-ids', true);

voronota_export_adjuncts_of_contacts('-file', params.output_prefix+'graph_links.csv', '-atoms-use', '[]', '-contacts-use', '[-no-solvent]', '-no-serial', '-adjuncts', ['atom_index1', 'atom_index2', 'area', 'boundary', 'distance'], '-sep', ',', '-expand-ids', true);

voronota_export_adjuncts_of_atoms('-file', params.output_prefix+'coarse_graph_nodes.csv', '-use', '[]', '-no-serial', '-no-name', '-adjuncts', ['residue_type', 'residue_sas_area', 'residue_volume'], '-sep', ',', '-expand-ids', true);

voronota_export_adjuncts_of_contacts('-inter-residue', true, '-file', params.output_prefix+'coarse_graph_links.csv', '-atoms-use', '[]', '-contacts-use', '[-no-solvent -min-seq-sep 1]', '-no-serial', '-adjuncts', ['area', 'boundary', 'distance'], '-sep', ',', '-expand-ids', true);
EOF
} \
| voronota-js

if [ ! -s "${TMPLDIR}/graph_nodes.csv" ] || [ ! -s "${TMPLDIR}/graph_links.csv" ] || [ ! -s "${TMPLDIR}/coarse_graph_nodes.csv" ] || [ ! -s "${TMPLDIR}/coarse_graph_links.csv" ]
then
	echo >&2 "Failed: graph data for $DATA_DESCRIPTION"
	exit 1
fi

mkdir -p "$(dirname ${OUTPREFIX}name)"

cat "${TMPLDIR}/graph_links.csv" | tr ',' '\t' > "${OUTPREFIX}atom_graph_links.tsv"

cat "${TMPLDIR}/graph_nodes.csv" | tr ',' '\t' > "${OUTPREFIX}atom_graph_nodes.tsv"

cat "${TMPLDIR}/coarse_graph_links.csv" | tr ',' '\t' > "${OUTPREFIX}residue_graph_links.tsv"

{
cat "${TMPLDIR}/coarse_graph_nodes.csv" | head -1
cat "${TMPLDIR}/coarse_graph_nodes.csv" | tail -n +2 | sort -V | uniq
} | tr ',' '\t' > "${OUTPREFIX}residue_graph_nodes.tsv"

exit 0

