#!/bin/bash

# Ip address of the box with the Hive metastore
HIVE_METASTORE_IP=$1
# Password to use for the hive user
HIVE_PASSWORD=$2

HADOOP_CONF_DIR=/etc/hadoop/conf
HIVE_CONF_DIR=/etc/hive/conf
IMPALA_CONF_DIR=/etc/impala/conf
IMPALA_REPO_FILE=http://beta.cloudera.com/impala/redhat/6/x86_64/impala/cloudera-impala.repo

function write_hive_site {
cat > $1 <<HIVESITE
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
  <name>javax.jdo.option.ConnectionURL</name>
  <value>jdbc:mysql://$HIVE_METASTORE_IP/metastore</value>
</property>
<property>
  <name>javax.jdo.option.ConnectionDriverName</name>
  <value>com.mysql.jdbc.Driver</value>
</property>
<property>
  <name>javax.jdo.option.ConnectionUserName</name>
  <value>hive</value>
</property>
<property>
  <name>javax.jdo.option.ConnectionPassword</name>
  <value>$HIVE_PASSWORD</value>
</property>
<property>
  <name>datanucleus.autoCreateSchema</name>
  <value>false</value>
</property>
<property>
  <name>datanucleus.fixedDatastore</name>
  <value>true</value>
</property>
</configuration>
HIVESITE
}

# Some configuration only needs to be run the the box housing the hive metastore
/sbin/ifconfig -a | grep "addr:$HIVE_METASTORE_IP " > /dev/null && {

# Install all the necessary packages
yum install -y hive mysql mysql-server mysql-connector-java
# Start the mysql server
/etc/init.d/mysqld start
# Create the Hive metastore and hive user
/usr/bin/mysql -u root <<SQL
-- Create the metastore database
create DATABASE metastore;
-- Use the metastore database
use metastore;
-- Import the metastore schema from hive
SOURCE /usr/lib/hive/scripts/metastore/upgrade/mysql/hive-schema-0.9.0.mysql.sql;
-- Secure the root accounts with the hive password
update mysql.user set password = PASSWORD('$HIVE_PASSWORD') where user = 'root';
-- Create a user 'hive' with random password for localhost access
CREATE USER 'hive'@'localhost' IDENTIFIED BY '$HIVE_PASSWORD';
-- Grant privileges on the metastore to the 'hive' user on localhost
GRANT ALL PRIVILEGES ON metastore.* TO 'hive'@'localhost' WITH GRANT OPTION;
-- Create a user 'hive' with random password
CREATE USER 'hive'@'%' IDENTIFIED BY '$HIVE_PASSWORD';
-- Grant privileges on the metastore to the 'hive' user
GRANT ALL PRIVILEGES ON metastore.* TO 'hive'@'%' WITH GRANT OPTION;
-- Load the new privileges
FLUSH PRIVILEGES;
SQL
# Write the hive-site to the Hive configuration directory
write_hive_site $HIVE_CONF_DIR/hive-site.xml
# Link the mysql connector to hive lib
ln -s /usr/share/java/mysql-connector-java.jar /usr/lib/hive/lib
# Load up a really basic tab-delimited table into hive for testing end-to-end functionality
cat > /tmp/numbers.txt <<TABLE
1	one
2	two
3	three
4	four
TABLE
sudo -E -u impala hadoop fs -mkdir /user/impala
sudo -E -u impala hadoop fs -put /tmp/numbers.txt /user/impala/numbers.txt
sudo -E -u impala hive -e "CREATE TABLE numbers (num INT, word STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' STORED AS TEXTFILE;"
sudo -E -u impala hive -e "LOAD DATA INPATH '/user/impala/numbers.txt' into table numbers;"
} # /end hive metadata store specific commands


# Fetch the Cloudera yum repo file
(cd /etc/yum.repos.d/ && wget -N $IMPALA_REPO_FILE)
# Install the impala and impala-shell packages
yum -y install impala impala-shell impala-server impala-state-store

# Create the impala configuration directory
mkdir -p $IMPALA_CONF_DIR
# Install the hive-site.xml into the Impala configuration directory
write_hive_site $IMPALA_CONF_DIR/hive-site.xml

# Copy the Hadoop core-site.xml into the Impala config directory
# Make sure to prepend the some properties for performance
CORE_SITE_XML=core-site.xml
cat > $IMPALA_CONF_DIR/$CORE_SITE_XML <<'EOF'
<configuration>
  <property>
    <name>dfs.client.read.shortcircuit</name>
    <value>true</value>
  </property>
  <property>
    <name>dfs.client.read.shortcircuit.skip.checksum</name>
    <value>false</value>
  </property>
EOF
grep -v "<configuration>" $HADOOP_CONF_DIR/$CORE_SITE_XML >> $IMPALA_CONF_DIR/$CORE_SITE_XML

# Update the hdfs-site.xml file
HDFS_SITE_XML=hdfs-site.xml
cat > /tmp/$HDFS_SITE_XML <<'EOF'
<configuration>
  <property>
    <name>dfs.datanode.data.dir.perm</name>
    <value>755</value>
  </property>
  <property>
    <name>dfs.block.local-path-access.user</name>
    <value>impala</value>
  </property>
  <!-- enable data locality tracking -->
  <property>
    <name>dfs.datanode.hdfs-blocks-metadata.enabled</name>
    <value>true</value>
  </property>
EOF
grep -v "<configuration>" $HADOOP_CONF_DIR/$HDFS_SITE_XML >> /tmp/$HDFS_SITE_XML
mv /tmp/$HDFS_SITE_XML $HADOOP_CONF_DIR/$HDFS_SITE_XML
# Copy the hdfs-site.xml file into the Impala config directory
cp $HADOOP_CONF_DIR/$HDFS_SITE_XML $IMPALA_CONF_DIR/$HDFS_SITE_XML
# Copy the log4j properties from Hadoop to Impala
cp $HADOOP_CONF_DIR/log4j.properties $IMPALA_CONF_DIR/log4j.properties

# Add Impala to the HDFS group
/usr/sbin/usermod -G hdfs impala

# Restart HDFS
/etc/init.d/hadoop-hdfs-datanode restart

# Start the impala services
# NOTE: It's important to run impala as a non-root user or performance will suffer (no direct reads)
sudo -E -u impala GVLOG_v=1 nohup /usr/bin/statestored < /dev/null > /tmp/statestored.out 2>&1 &

# Impalad needs to have the NN and NN port passed in on the command-line
# FIXME: Parsing XML with bash is a bit brittle.
NN=$(grep "fs.default.name" -A 1 $HADOOP_CONF_DIR/$CORE_SITE_XML | tail -1 | awk -F/ '{print $3}')
NN_HOST=$(echo $NN| awk -F: '{print $1}')
NN_PORT=$(echo $NN| awk -F: '{print $2}')
sudo -E -u impala GVLOG_v=1 nohup /usr/bin/impalad \
-state_store_host=$HIVE_METASTORE_IP -nn=$NN_HOST -nn_port=$NN_PORT \
-ipaddress=$(host $HOSTNAME | awk '{print $4}') < /dev/null > /tmp/impalad.out 2>&1 &
