#!/bin/bash
# (C) Copyright Douglas Eadline, 2021  All rights reserved.
# Usable under Creative Commons CC BY-NC: This license allows reusers to distribute, 
# remix, adapt, and build upon the material in any medium or format for noncommercial 
# purposes only, and only so long as attribution is given to the creator. 
# set -x

print_help(){
echo  "This script provides an easy shorthand to run the parquet-tools package."
echo "Run as (with argument order):"
echo "  'parquet-tools.sh COMMAND FILE'"
echo "  COMMANDS are: "
echo "    help Prints this message."
echo "    cat  Prints out content for a given parquet file."
echo "    head  Prints out the first 5 records for a given parquet file."
echo "    schema  Prints out the schema for a given parquet file."
echo "    meta  Prints out metadata for a given parquet file."
echo "    dump  Prints out row groups and metadata for a given parquet file."
echo "    merge  Merges multiple Parquet files into one Parquet file."
echo "  FILE is of the form: (Note: parquet-tools.sh will work with local"
echo "  and HDFS files with the appropriate file paths.)"
echo "    file:///home/zeppelin/Parquet-Files/names.parquet"
echo "    hdfs://localhost:9000//user/hive/warehouse/u_data_parquet"
echo "  To run parquet-tools without this script:"
echo "    yarn jar /opt/parquet-tools/parquet-tools-1.11.1.jar OPTIONS"
echo "  More information: https://apache.googlesource.com/parquet-mr/+/apache-parquet-1.8.0/parquet-tools/README.md"
}

PARQUET="/opt/parquet-tools/parquet-tools-1.11.1.jar"

YARN=$(which yarn)
if [ "$YARN" == "" ];then
  echo "ERROR: Hadoop yarn not found"
  exit 1
fi

if [ "$1" == "" ]; then
  echo "ERROR: No command argument"
  print_help
  exit 1
fi
if [ "$1" != "help" ] && [ "$2" == "" ]; then
  echo "ERROR: No file argument"
  print_help
  exit 1
fi

case $1 in
   help)
     print_help
     ;;
   cat|head|schema|meta|dump|merge)
     $YARN jar $PARQUET $1 $2
     ;;
   *)
     echo "ERROR: Invalid Command."
     print_help
     exit 1
     ;;
esac


