#!/bin/bash
#
# dependencies: python, pafy, ffmpeg, sox, praat
#
# run in a blank directory - be aware that it takes a long time
# and change the pafy and praat vars to point to those locations 
#
# takes as its arguments a youtube video ID, the speaker's gender, 
# a working directory, and a starttime and endtime
#
# 1. uses pafy to download it (location specified in vars)
# 2. extracts audio and cuts it
# 3. runs a script to segment into approximate PBUs
# 4. gets audio features
# 5. converts video to pngs/jpegs
# 6. gets movement amplitudes
# 7. compiles them into a dataframe file that can be easily read into R

# input args
id=$1
gender=$2
working_dir=$3
starttime=$4
endtime=$5

# vars - change to appropriate locations/settings for your machine
pafy='/home/robvoigt/software/pafy'
praat='/home/robvoigt/software'
PBU_size=2 # the maximum acceptable average size of the extracted PBUs



if [ ! -d $working_dir ];
then
    echo "Working dir doesn't exist, creating it..."
    mkdir $working_dir
    echo "$1 $2 $3 $4 $5" > $working_dir/args
fi


if [ $gender == "male" ] || [ $gender == "female" ];
then
    echo "Hello there!"
else
    echo "Gender parameter (2nd argument) must be 'male' or 'female'"
    exit
fi



# download the video
if [ -f "$working_dir/video.mp4" ];
then
    echo "Video already downloaded, skipping download.."
else
    # get vid file name
    vid_file=$($pafy/ytdl -i $id | head -1 | cut -f 2- -d ' ')
    vid_file+=".mp4"

    # get best quality mp4
    echo "Getting 640x360 mp4 video..."
    stream_num=$($pafy/ytdl -s $id | grep 'mp4    \[640x360\]' | head -1 | cut -f 1 -d ' ')
    if [[ -z $stream_num ]];
    then
	echo "Couldn't find appropriate stream number, may have to get stream manually..."
	exit
    fi
    $pafy/ytdl -n $stream_num $id
    mv "${vid_file}" $working_dir/video.mp4
fi


# -- ACOUSTIC --

# if we haven't yet extracted the audio, do so
if [ -f "$working_dir/audio.wav" ];
then
    echo "Audio already extracted, skipping ffmpeg extraction..."
else
    echo "Extracting audio..."
    ffmpeg -i $working_dir/video.mp4 -acodec pcm_s16le -ac 2 $working_dir/audio.wav
    echo "Cutting audio to the right size..."
    sox $working_dir/audio.wav $working_dir/new_audio.wav trim 0 $endtime
    rm $working_dir/audio.wav
    mv $working_dir/new_audio.wav $working_dir/audio.wav
fi



# get features
if [ -f "$working_dir/features.txt" ];
then
    echo "features.txt already exists, not attempting to re-extract features..."
else
    # get PBUs from praat
    echo "Extracting PBUs..."
    dbVal=-30
    $praat/praat find_pbus.praat $working_dir/audio.wav $working_dir/audio.TextGrid $dbVal
    avg_PBU_len=$(python calculate_PBU_len.py $working_dir/audio.TextGrid $starttime $endtime)
    while awk "BEGIN {exit $avg_PBU_len >= $PBU_size ? 0 : 1 }"; do
	let dbVal+=3
	echo "PBUs were too large, trying again with silence dbVal of ${dbVal}"
	rm $working_dir/audio.TextGrid
	$praat/praat find_pbus.praat $working_dir/audio.wav $working_dir/audio.TextGrid $dbVal
	avg_PBU_len=$(python calculate_PBU_len.py $working_dir/audio.TextGrid $starttime $endtime)
    done
	
    # get features from PBUs
    echo "Getting features for each PBU..."
    $praat/praat get_features.praat $working_dir/audio.wav $working_dir/audio.TextGrid $working_dir/features.txt $gender
fi



# -- VISUAL --

# convert video to series of pngs
if [ -d "$working_dir/pngs" ];
then
    #echo "jpgs directory already exists - not attempting to re-extract jpgs"
    echo "pngs directory already exists - not attempting to re-extract pngs"
else
    echo "Extracting PNGs from video"
    mkdir $working_dir/jpgs
    mkdir $working_dir/pngs
    #ffmpeg -i "$working_dir/video.mp4" -vframes "$((30 * $endtime))" -an -f image2 "$working_dir/jpgs/out_%05d.jpg"
    ffmpeg -i "$working_dir/video.mp4" -vframes "$((30 * $endtime))" -an -f image2 "$working_dir/pngs/out_%05d.png"
fi

if [ -f "$working_dir/amps" ];
then
    echo "Movement amplitudes already extracted, skipping..."
else
    # get movement amplitude by diffing the images
    echo "Getting movement amplitudes..."
    python amplitude.py $working_dir/pngs/ $starttime $endtime > $working_dir/amps
fi

rm $working_dir/pngs/*

echo "Compiling dataframe..."
python compile_data.py $working_dir/features.txt $working_dir/amps > $working_dir/data.out
#rm raw_amps
