#!/bin/bash

# Please provide the path to the RSA private key you
# created as part of the Whirr installation
RSA_PRIVATE_KEY=$HOME/.ssh/id_rsa_whirr

# DO NOT MODIFY ANYTHING FROM HERE ON DOWN
CLUSTER_NAME=myimpalacluster
CLUSTER_USERNAME=impala
WHIRR_INSTANCES=$HOME/.whirr/$CLUSTER_NAME/instances

SETUP_IMPALA_SCRIPT=setup-impala.sh

# Generate a random password to secure the 'root' and 'hive' mysql users
RANDOM_PASSWORD=$(dd count=1 bs=16 if=/dev/urandom of=/dev/stdout 2>/dev/null | base64)

# Use Whirr to bring up the CDH cluster
whirr launch-cluster --config impalacluster.properties

# Fetch the list of workers from the Whirr deployment
WORKER_NODES=$(egrep -v 'hadoop-namenode|hadoop-jobtracker|ganglia-metad' \
                    $WHIRR_INSTANCES | awk '{print $3}')

# Install the Hive metastore on the first worker node
# Hive box internal IP
HIVE_MYSQL_BOX_INTERNAL=$(head -1 $WHIRR_INSTANCES | awk '{print $4}')
# Hive box external IP
HIVE_MYSQL_BOX_EXTERNAL=$(head -1 $WHIRR_INSTANCES | awk '{print $3}')

# Copy the impala setup script to every machine in the cluster and run it
SSH_OPTS=" -i $RSA_PRIVATE_KEY -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no "
for WORKER_NODE in $WORKER_NODES
do
	scp $SSH_OPTS $SETUP_IMPALA_SCRIPT $CLUSTER_USERNAME@$WORKER_NODE:/tmp
        # Run the script in the background so the installation is in parallel
	ssh $SSH_OPTS $CLUSTER_USERNAME@$WORKER_NODE \
            sudo bash /tmp/$SETUP_IMPALA_SCRIPT $HIVE_MYSQL_BOX_INTERNAL $RANDOM_PASSWORD > /tmp/impala-install.log 2>&1 &
done

echo "Waiting for the installation scripts to finish on all the nodes. This will take about a minute per node in the cluster."
wait

echo "The password for your root and Hive account on the MySQL box is $RANDOM_PASSWORD"
echo "Please save this password somewhere safe."
