#!/bin/bash
#
# Copyright 2013 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS-IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Top-level harness which deploys a ready-to-use Hadoop cluster including
# starting GCE VMs, installing Hadoop binaries, configuring HDFS, installing
# GHFS libraries, and configuring GHFS.
#
# Usage: ./bdutil [deploy|delete] [optional: <paths to env files>]...

BDUTIL_VERSION='0.34.4'

# Prints the usage for this script and exits.
function print_usage() {
cat <<EOF
bdutil version: "${BDUTIL_VERSION}"

Usage: ./bdutil [ optional flags ] <command> [ args ]

Description:
  Utility for creating a GCE cluster and installing, configuring, and calling
  Hadoop and Hadoop compatible software on it.

Flags:
  -b, --bucket
    GCS bucket used in deployment and by cluster.

  -d, --use_attached_pds
    If true, uses additional non-boot volumes, optionally creating them on
    deploy if they don't exist already and deleting them on cluster delete.

  -e, --env_var_files
    Comma seperated list of bash files that are sourced to configure cluster
    and installed software. Files are sourced in order with later files being
    sourced last. bdutil_env.sh is always sourced first. Flag arguments are
    set after all sourced files, but before the evaluate_late_variable_bindings
    method of bdutil_env.sh. see bdutil_env.sh for more information.

  -F, --default_fs
    Specifies the default filesystem to set, one of [gs|hdfs].

  -f, --force
    Assume default response (y) at prompt.

  -h, --help
    Print this help message.

  -i, --image
    Specify the GCE image to use.

  -m, --machine_type
    Specify the GCE machine type to use.

  --network
    Specify a network name with which to associate new virtual machines.

  -n, --num_workers
    The number of worker nodes to create.

  -P, --prefix
    Common prefix for cluster nodes

  -p, --project
    The Google Cloud project to use to create cluster.

  -t, --target
    Where to execute code for run_command and run_command_group.
    Must be one of [master|workers|all].
    Default is master.

  -u, --upload_files
    Comma seperated list of additional files to upload to VMs

  -v --verbose
    If provided, sends gcutil output to console in addition to logfiles.

  -z, --zone
    Specify the GCE zone to use.


Commands:
  create, delete, deploy, run_command, run_command_group, run_command_steps,
  print_overrides

create               Creates the VMs and optionally disks for the cluster.

delete               Deletes the VMs and optionally disks for the cluster.

deploy               Creates the VMs and optionally disks for the cluster and
                     then runs all COMMAND_STEPS specified in resolved
                     env_var_files on them.

generate_config      Generates an overrides file specified by the single
                     positional argument passed to this command which contains
                     the environment-variable settings generated from flags.

run_command          Executes given code on nodes of a cluster. Uses --target
                     flag. Positional arguments following run_command will be
                     executed.
                     Use -- to pass flags to your command (see example).

run_command_group    Executes the given command group from COMMAND_GROUPS
                     variable in resolved env_var_files on nodes of a cluster.
                     Uses --target flag. Takes one positional argument of the
                     command to run.

run_command_steps    Runs all COMMAND_STEPS specified in resolved
                     env_var_files on the cluster.

shell                Creates a new SSH connection to the master node.

socksproxy           Create a SOCKS proxy running through the master node.


Examples:

  Deploy a 5-worker cluster with prefix 'my-cluster' and BigQuery/Datastore installed:
  ./bdutil -b foo-bucket -n 5 -P my-cluster -e bigquery_env.sh,datastore_env.sh deploy

  Upload and execute hadoop-validate-setup.sh on the cluster's master node:
  ./bdutil -P my-cluster -u hadoop-validate-setup.sh run_command -- sudo -u foo-user ./hadoop-validate-setup.sh

  Configure an existing 20-worker Hadoop cluster, installing connectors as desired:
  ./bdutil -P pre-existing-cluster -n 20 run_command_group install_connectors

  Generate an env file from flags, then deploy/delete using that file.
  ./bdutil -P prod-cluster1 -n 20 -b prod-bucket1 generate_config prod1_env.sh
  ./bdutil -e prod1_env.sh deploy
  ./bdutil -e prod1_env.sh delete

EOF
exit 0
}

# Simple wrapper around "echo" so that it's easy to add log messages with a
# date/time prefix.
function loginfo() {
  echo "$(date): ${@}"
}

# Simple wrapper around "echo" controllable with ${VERBOSE_MODE}.
function logdebug() {
  if (( ${VERBOSE_MODE} )); then
    loginfo ${@}
  fi
}

# Simple wrapper to pass errors to stderr.
function logerror() {
  loginfo ${@} >&2
}

# Give instructions on full usage statement and fail.
function print_help() {
  logerror "For help run './bdutil --help.'"
  exit 1
}

# Helper to consolidate the various error logs into a single debug file for
# easy review after an error occurs.
function consolidate_error_logs() {
  if [[ -a ${GCUTIL_STDOUT_FILE} ]]; then
    echo '******************* gcutil stdout *******************' \
        >> ${AGGREGATE_DEBUG_FILE}
    cat ${GCUTIL_STDOUT_FILE} >> ${AGGREGATE_DEBUG_FILE}
    echo >> ${AGGREGATE_DEBUG_FILE}
  fi
  if [[ -a ${GCUTIL_STDERR_FILE} ]]; then
    echo '******************* gcutil stderr *******************' \
        >> ${AGGREGATE_DEBUG_FILE}
    cat ${GCUTIL_STDERR_FILE} >> ${AGGREGATE_DEBUG_FILE}
    echo >> ${AGGREGATE_DEBUG_FILE}
    echo '************ ERROR logs from gcutil stderr ************' \
        >> ${AGGREGATE_DEBUG_FILE}
    cat ${GCUTIL_STDERR_FILE} | grep -i 'error' >> ${AGGREGATE_DEBUG_FILE}
    echo >> ${AGGREGATE_DEBUG_FILE}
  fi
  if [[ -a ${VM_DEBUG_FILE} ]]; then
    echo '******************* Exit codes and VM logs *******************' \
        >> ${AGGREGATE_DEBUG_FILE}
    cat ${VM_DEBUG_FILE} >> ${AGGREGATE_DEBUG_FILE}
    echo >> ${AGGREGATE_DEBUG_FILE}
  fi
  logerror "Detailed debug info available in file: ${AGGREGATE_DEBUG_FILE}"
  logerror 'Check console output for error messages and/or retry your command.'
}

# Handler for errors occuring during the deployment to print useful info before
# exiting. The following global variables control whether handle_error() should
# actually process and consolidate a trapped error, or otherwise simply flip
# CAUGHT_ERROR to '1' without trying to consolidate logs or exiting in case
# the caller wants to simply continue on error.
SUPPRESS_TRAPPED_ERRORS=0
CAUGHT_ERROR=0
function handle_error() {
  # Save the error code responsible for the trap.
  local errcode=$?
  local bash_command=${BASH_COMMAND}
  local lineno=${BASH_LINENO[0]}

  CAUGHT_ERROR=1

  if (( ${SUPPRESS_TRAPPED_ERRORS} )); then
    loginfo "Continuing despite trapped error with code '${errcode}'"
    return
  fi

  # Wait for remaining async things to finish, otherwise our error message may
  # get lost among other logspam.
  wait
  logerror "Command failed: ${bash_command} on line ${lineno}."
  logerror "Exit code of failed command: ${errcode}"

  consolidate_error_logs
  exit ${errcode}
}

# Use handle_error() for any errors in deployment commands.
trap handle_error ERR

# Needed to introduce small delays to mitigate hitting API on the same second
function sleep_for_api_ops() {
  sleep "${GCUTIL_SLEEP_TIME_BETWEEN_ASYNC_CALLS_SECONDS}"
}

# Helper for waiting on all async jobs to finish, with info logging. $1 should
# be a short description of what's being waiting on.
function await_async_jobs() {
  trap handle_error ERR

  # Sleep a tiny bit to allow the async process to report its kickoff first,
  # to try to keep this "Might take a while" warning as the last message
  # before the long wait.
  sleep '0.5'
  loginfo "Waiting on async '$1' jobs to finish. Might take a while..."
  for SUBPROC in $(jobs -p); do wait ${SUBPROC}; done
  # Newline since the async jobs may have printed dots for progress.
  echo
}

# Given $1 describing the command to confirm (deploy|delete), prints and reads
# a confirmation prompt from the console.
SKIP_PROMPT=0
function prompt_confirmation() {
  trap handle_error ERR
  local msg="$1 cluster with following settings?
      CONFIGBUCKET='${CONFIGBUCKET?}'
      PROJECT='${PROJECT?}'
      GCE_IMAGE='${GCE_IMAGE?}'
      GCE_ZONE='${GCE_ZONE?}'
      GCE_NETWORK='${GCE_NETWORK?}'
      PREFIX='${PREFIX?}'
      NUM_WORKERS=${NUM_WORKERS?}
      NAMENODE_HOSTNAME='${NAMENODE_HOSTNAME}'
      WORKERS='${WORKERS[@]}'
      BDUTIL_GCS_STAGING_DIR='${BDUTIL_GCS_STAGING_DIR}'
      "
  if (( ${USE_ATTACHED_PDS} )); then
    msg+="\
      NAMENODE_ATTACHED_PD='${NAMENODE_ATTACHED_PD}'
      WORKER_ATTACHED_PDS='${WORKER_ATTACHED_PDS[@]}'
      "
  fi
  if [[ -n "${TARGET}" ]]; then
    msg+="TARGET='${TARGET}'
      "
  fi
  if [[ -n "${COMMAND_GROUP}" ]]; then
    msg+="COMMAND_GROUP='${COMMAND_GROUP}'
      "
  fi
  msg+="(y/n) "
  if (( ${SKIP_PROMPT} )); then
    echo "${msg}" y
  else
    read -p "${msg}" PROMPT_RESPONSE
    if [[ ${PROMPT_RESPONSE} != 'y' ]]; then
      logerror "Aborting command '${BDUTIL_CMD}', exiting..."
      exit 1
    fi
  fi

  if [[ ${EUID} -eq 0 ]]; then
    msg='Are you sure you want to run the command as root? (y/n)'
    read -p "${msg}" PROMPT_RESPONSE
    if [[ ${PROMPT_RESPONSE} != 'y' ]]; then
      logerror "Aborting command '$1', exiting..."
      exit 1
    fi
  fi
}

FORCE_TTY_ALLOCATION=1
function get_gcutil_cmd() {
  local gcutil_cmd=$1
  local cmd_args=${@:2}
  local gcutil_flags=()

  gcutil_flags+=("--project=${PROJECT}")
  gcutil_flags+=("--zone=${GCE_ZONE}")

  # Only add the --permit_root_ssh flag if running as root.
  if [[ ${EUID} -eq 0 ]]; then
    gcutil_flags+=('--permit_root_ssh')
  fi

  # --max_wait_time and --sleep_between_polls are not applicable to gcutil ssh
  # or gcutil push or gcutil pull.
  if ! [[ "${gcutil_cmd}" =~ ^(pull|push|ssh)$ ]]; then
    gcutil_flags+=("--max_wait_time=${GCUTIL_TIMEOUT_SECONDS}")
    gcutil_flags+=("--sleep_between_polls=${GCUTIL_POLL_INTERVAL_SECONDS}")
  fi

  # Force Pseudo tty allocation to bypass CentOS's sudoers defaults
  if [[ "${gcutil_cmd}" == ssh ]] && (( ${FORCE_TTY_ALLOCATION} )); then
    gcutil_flags+=('--ssh_arg=-tt')
  fi

  echo gcutil ${gcutil_cmd} ${gcutil_flags[@]} ${cmd_args}
}

# The gcutil command with global flags and some common command flags to use for
# all GCE operations.
INITIAL_SSH_SUCCEEDED=1
function run_gcutil_cmd() {
  local gcutil_cmd=$1
  local full_cmd=$(get_gcutil_cmd ${@})
  if (( ${VERBOSE_MODE} )); then
    loginfo "Running ${full_cmd}"
    ${full_cmd} \
        2> >(tee -a ${GCUTIL_STDERR_FILE} 1>&2) \
        1> >(tee -a ${GCUTIL_STDOUT_FILE}) \
        < /dev/null
  else
    echo -n "."
    ${full_cmd} 2>>${GCUTIL_STDERR_FILE} 1>>${GCUTIL_STDOUT_FILE} < /dev/null
  fi

  local exitcode=$?
  if (( ${exitcode} != 0 )); then
    if [[ "${@:3}" == "exit 0" ]]; then
      # This is just an sshability check; only log it to debug.
      logdebug "Exited ${exitcode} : ${full_cmd}"
    else
      logerror "Exited ${exitcode} : ${full_cmd}"
    fi
    loginfo "Exited ${exitcode} : ${full_cmd}" >> ${VM_DEBUG_FILE}
    if [[ "${gcutil_cmd}" == 'ssh' ]] && (( ${INITIAL_SSH_SUCCEEDED} )); then
      local target_node=$2
      local fetch_logs_cmd=$(
          get_gcutil_cmd ssh ${target_node} tail -vn 30 "*.stderr")
      logerror "Fetching on-VM logs with cmd: ${fetch_logs_cmd}"
      ${fetch_logs_cmd} < /dev/null \
          | sed "s/^/${target_node}:\t/" >> ${VM_DEBUG_FILE}
    fi
  else
    echo -n '.'
  fi
  return ${exitcode}
}

# Checks for obvious issues like missing "required" fields.
function run_sanity_checks() {
  trap handle_error ERR
  if [[ -z "${CONFIGBUCKET}" ]]; then
    logerror 'CONFIGBUCKET must be provided'
    print_help
  fi

  # Make sure groupings of shell scripts for running on VMs are defined.
  if (( ${#COMMAND_GROUPS[@]} <= 0 )); then
    logerror 'COMMAND_GROUPS must be non-empty.'
    print_help
  fi

  # Make sure the series of steps to run on VMs are defined.
  if (( ${#COMMAND_STEPS[@]} <= 0 )); then
    logerror 'COMMAND_STEPS must be non-empty.'
    print_help
  fi

  # Make sure the hostnames all abide by the PREFIX.
  for NODE in ${WORKERS[@]} ${NAMENODE_HOSTNAME?}; do
    if ! [[ "${NODE}" =~ ^${PREFIX}.* ]]; then
      logerror "Error: VM instance name ${NODE} doesn't start with ${PREFIX}."
      print_help
    fi
  done

  # Check for the right number of elements in WORKERS.
  if (( ${#WORKERS[@]} != ${NUM_WORKERS?} )); then
    logerror "WORKERS must contain ${NUM_WORKERS} elements; got ${#WORKERS[@]}"
    print_help
  fi

  # Check for disk names being defined if USE_ATTACHED_PDS is true.
  if (( ${USE_ATTACHED_PDS} )); then
    if (( ${#WORKER_ATTACHED_PDS[@]} != ${NUM_WORKERS?} )); then
      local actual=${#WORKER_ATTACHED_PDS[@]}
      local varname='WORKER_ATTACHED_PDS'
      logerror "${varname} has ${actual} elements, expected ${NUM_WORKERS}"
      print_help
    fi
    if [[ -z "${NAMENODE_ATTACHED_PD}" ]]; then
      logerror 'NAMENODE_ATTACHED_PD must be defined since USE_ATTACHED_PDS==1.'
      print_help
    fi
  fi

  # Make sure GCS connector is installed if it is the default FS
  if [[ "${DEFAULT_FS}" == 'gs' ]] && (( ! "${INSTALL_GCS_CONNECTOR}" )); then
    logerror 'INSTALL_GCS_CONNECTOR must 1 if DEFAULT_FS is gs.'
    print_help
  fi

  # Make sure HDFS is enabled if it is the default FS
  if [[ "${DEFAULT_FS}" == 'hdfs' ]] && (( ! "${ENABLE_HDFS}" )); then
    logerror 'ENABLE_HDFS must 1 if DEFAULT_FS is hdfs.'
    print_help
  fi

  local scheme=${HADOOP_TARBALL_URI%%://*}
  # Make sure HADOOP_TARBALL_URI uses supported scheme
  if [[ ! "${scheme}" =~ ^(gs|https?)$ ]] ; then
    logerror "Unsupported scheme: \"$scheme\" in" \
        "HADOOP_TARBALL_URI: ${HADOOP_TARBALL_URI}."
    print_help
  fi

  # Make sure TARGET is set correctly
  if [[ "${BDUTIL_CMD}" =~ ^run_command(_group)?$ ]]; then
    if [[ -z "${TARGET}" ]]; then
      if [[ "${BDUTIL_CMD}" == 'run_command_group' ]]; then
        TARGET='all'
      else
        TARGET='master'
      fi
      logerror "TARGET unspecified assuming ${TARGET}."
    elif ! [[ "${TARGET}" =~ ^(master|workers|all)$ ]]; then
      logerror '--target must be [master|workers|all].'
      print_help
    fi
  elif [[ -n "${TARGET}" ]]; then
    logerror "Flag --target can only be specified for run_command" \
        "or run_command_group."
    print_help
  fi

  if [[ -n "${COMMAND_GROUP}" ]]; then
    if ! grep -q ${COMMAND_GROUP}: <<< ${COMMAND_GROUPS[@]}; then
      logerror "Command group '${COMMAND_GROUP}' not found in" \
          "resolved COMMAND_GROUPS."
      print_help
    fi
  fi

  if [[ -z "${PROJECT}" ]]; then
    loginfo 'No project provided; using default gcloud project.'
    PROJECT="$(gcloud config list | grep project | cut -d'=' -f2 | tr -d ' ')"
    if [[ -n "${PROJECT}" ]]; then
      loginfo "Using project '${PROJECT}'"
      echo "PROJECT=${PROJECT}" >> ${OVERRIDES_FILE}
    else
      logerror 'Cannot find project using gcloud.'
      print_help
    fi
  fi

  # TODO(dhuo): Possibly all "late variable bindings" could be generated here
  # instead of actually requiring the evaluate_late_variable_bindings function.
  if [[ -z "${BDUTIL_GCS_STAGING_DIR}" ]]; then
    loginfo 'No staging directory got defined; computing one now.'
    local staging_dir_base="gs://${CONFIGBUCKET}/bdutil-staging"
    BDUTIL_GCS_STAGING_DIR="${staging_dir_base}/${NAMENODE_HOSTNAME}"
    echo "BDUTIL_GCS_STAGING_DIR=${BDUTIL_GCS_STAGING_DIR}" >> ${OVERRIDES_FILE}
  fi
}

# Checks for more heavyweight but obvious issues like CONFIGBUCKET
# inaccessibility prior to turning on any VMs.
function validate_heavyweight_settings() {
  # Perform gsutil checks last, because they are slow.
  loginfo "Checking for existence of gs://${CONFIGBUCKET}..."
  gsutil ls -b gs://${CONFIGBUCKET}

  # Catch the exitcode so that we can provide more user-friendly error messages
  # while still propagating the return value out for consolidated error-trap
  # handling.
  local exitcode=$?
  if (( ${exitcode} != 0 )); then
    logerror "Failed to access bucket gs://${CONFIGBUCKET}."
    logerror 'Please make sure the bucket exists and is accessible with gsutil.'
    return ${exitcode}
  fi

  # Make sure HADOOP_TARBALL_URI exists if it st
  local scheme=${HADOOP_TARBALL_URI%%://*}
  if [[ "${scheme}" == 'gs' ]]; then
    loginfo "Checking for existence of ${HADOOP_TARBALL_URI}..."
    if (( ${VERBOSE_MODE} )); then
      gsutil stat ${HADOOP_TARBALL_URI}
    else
      gsutil -q stat ${HADOOP_TARBALL_URI}
    fi
    local exitcode=$?
    if (( ${exitcode} != 0 )); then
      logerror "Failed to find file ${HADOOP_TARBALL_URI}."
      logerror 'Please make sure it exists and is accessible with gsutil.'
      return ${exitcode}
    fi
  fi

  # Check all the specified UPLOAD_FILES.
  if (( ${#UPLOAD_FILES[@]} > 0 )); then
    loginfo "Checking upload files..."
    for upload_file in ${UPLOAD_FILES[@]}; do
      if [[ -r "${upload_file}" ]]; then
        loginfo "Verified '${upload_file}'"
      else
        logerror "Failed to read file ${upload_file}."
        logerror 'Please make sure it exists and is accessible.'
        return 1
      fi
    done
  fi

  return 0
}

# In the case of a single-node setup, we expect $WORKERS and $NAMENODE_HOSTNAME
# to refer to the same thing, so some logic must call this function to avoid
# duplicating certain steps (e.g. addinstance/deleteinstance).
function is_single_node_setup() {
  if [ ${#WORKERS[@]} == 1 ] &&
     [ "${WORKERS[0]}" == "${NAMENODE_HOSTNAME}" ]; then
    true
  else
    false
  fi
}

# Repeatedly try to ssh into node until success or limit is reached.
# Will fail if node takes too long.
function wait_for_ssh() {
  trap handle_error ERR
  local node=$1
  local max_attempts=5
  local is_sshable="run_gcutil_cmd ssh ${node} exit 0"
  local sleep_time=${GCUTIL_POLL_INTERVAL_SECONDS}
  for (( i=0; i < ${max_attempts}; i++ )); do
    if ${is_sshable}; then
      return 0
    else
      # Save the error code responsible for the trap.
      local errcode=$?
      loginfo "'${node}' not yet sshable (${errcode}); sleeping ${sleep_time}."
      sleep ${sleep_time}
    fi
  done
  logerror "Node '${node}' did not become ssh-able after ${max_attempts} attempts"
  return ${errcode}
}

# Creates the VMs and optionally PDs of the cluster
function create_cluster() {
  INITIAL_SSH_SUCCEEDED=0

  trap handle_error ERR
  # Optionally create the disks to be attached to the VMs.
  if (( ${USE_ATTACHED_PDS} && ${CREATE_ATTACHED_PDS_ON_DEPLOY} )); then
    if ! is_single_node_setup; then
      loginfo "Creating attached worker disks: ${WORKER_ATTACHED_PDS[@]}"
      for ((i=0; i < NUM_WORKERS; i++)); do
        if (( ${i} > 0 && ${i} % ${MAX_CONCURRENT_ASYNC_PROCESSES} == 0 )); then
          await_async_jobs 'adddisk (partial)'
          loginfo 'Done. Invoking next batch...'
        fi
        DISK=${WORKER_ATTACHED_PDS[${i}]}
        run_gcutil_cmd \
            adddisk \
            --size_gb=${WORKER_ATTACHED_PDS_SIZE_GB} \
            ${DISK} &
        sleep_for_api_ops
      done
    fi
    loginfo "Creating attached master disk: ${NAMENODE_ATTACHED_PD}"
    run_gcutil_cmd \
        adddisk \
        --size_gb=${NAMENODE_ATTACHED_PD_SIZE_GB} \
        ${NAMENODE_ATTACHED_PD} &
    await_async_jobs 'adddisk'
    loginfo 'Done creating disks!'
  fi

  # Start workers and master.
  # For now, we will always auto-create a persistent boot disk and auto-delete
  # it on shutdown; truly persistent volumes will be used as a non-root mount
  # point. We can preserve the persistent boot disk once the setup is
  # idempotent.
  if ! is_single_node_setup; then
    loginfo "Creating worker instances: ${WORKERS[@]}"
    for ((i=0; i < NUM_WORKERS; i++)); do
      if (( ${i} > 0 && ${i} % ${MAX_CONCURRENT_ASYNC_PROCESSES} == 0 )); then
        await_async_jobs 'addinstance (partial)'
        loginfo 'Done. Invoking next batch...'
      fi
      if (( ${USE_ATTACHED_PDS} )); then
        local optional_disk_arg="--disk=${WORKER_ATTACHED_PDS[${i}]},mode=rw"
      fi
      run_gcutil_cmd \
          addinstance \
          --machine_type=${GCE_MACHINE_TYPE} \
          --service_account=default \
          --image=${GCE_IMAGE} \
          --network=${GCE_NETWORK} \
          --service_account_scopes=${GCE_SERVICE_ACCOUNT_SCOPES} \
          --persistent_boot_disk \
          --external_ip_address=ephemeral \
          ${optional_disk_arg} ${WORKERS[${i}]} &
      sleep_for_api_ops
    done
  fi
  loginfo "Creating master instance: ${NAMENODE_HOSTNAME}"
  if (( ${USE_ATTACHED_PDS} )); then
    optional_disk_arg="--disk=${NAMENODE_ATTACHED_PD},mode=rw"
  fi
  run_gcutil_cmd \
      addinstance \
      --machine_type=${GCE_MACHINE_TYPE} \
      --service_account=default \
      --image=${GCE_IMAGE} \
      --network=${GCE_NETWORK} \
      --service_account_scopes=${GCE_SERVICE_ACCOUNT_SCOPES} \
      --persistent_boot_disk \
      --external_ip_address=ephemeral \
      ${optional_disk_arg} ${NAMENODE_HOSTNAME} &
  await_async_jobs 'addinstance'

  loginfo 'Instances all created. Entering polling loop to wait for ssh-ability'

  # This wait is necessary due to VMs not being immediately ssh-able. It may
  # still fail if a VM is particularly slow in becoming ssh-able.
  for ((i=0; i < NUM_WORKERS; i++)); do
    if (( ${i} > 0 && ${i} % ${MAX_CONCURRENT_ASYNC_PROCESSES} == 0 )); then
      await_async_jobs 'wait_for_ssh (partial)'
      loginfo 'Done. Invoking next batch...'
    fi
    NODE=${WORKERS[${i}]}
    wait_for_ssh ${NODE} &
    sleep_for_api_ops
  done
  wait_for_ssh ${NAMENODE_HOSTNAME} &

  # Wait for all nodes to be ready.
  await_async_jobs 'wait_for_ssh'

  INITIAL_SSH_SUCCEEDED=1
  loginfo 'Instances all ssh-able'
}

# Delete cluster and optionally attached PDs
function delete_cluster() {
  # For deletion, we want to continue despite errors, but print a warning at the end.
  SUPPRESS_TRAPPED_ERRORS=1
  trap handle_error ERR
  loginfo 'Deleting hadoop cluster...'

  if ! is_single_node_setup; then
    for ((i=0; i < NUM_WORKERS; i++)); do
      if (( ${i} > 0 && ${i} % ${MAX_CONCURRENT_ASYNC_PROCESSES} == 0 )); then
        await_async_jobs 'deleteinstance (partial)'
        loginfo 'Done. Invoking next batch...'
      fi
      NODE=${WORKERS[${i}]}
      run_gcutil_cmd deleteinstance --force --delete_boot_pd ${NODE} &
      sleep_for_api_ops
    done
  fi
  run_gcutil_cmd deleteinstance --force --delete_boot_pd ${NAMENODE_HOSTNAME} &
  await_async_jobs 'deleteinstance'
  loginfo 'Done deleting VMs!'

  # Optionally delete all the attached disks as well now that the instances
  # have been deleted.
  if (( ${USE_ATTACHED_PDS} && ${DELETE_ATTACHED_PDS_ON_DELETE} )); then
    if ! is_single_node_setup; then
      loginfo "Deleting attached worker disks: ${WORKER_ATTACHED_PDS[@]}"
      for ((i=0; i < NUM_WORKERS; i++)); do
        if (( ${i} > 0 && ${i} % ${MAX_CONCURRENT_ASYNC_PROCESSES} == 0 )); then
          await_async_jobs 'deletedisk (partial)'
          loginfo 'Done. Invoking next batch...'
        fi
        DISK=${WORKER_ATTACHED_PDS[${i}]}
        run_gcutil_cmd deletedisk --force ${DISK} &
        sleep_for_api_ops
      done
    fi
    loginfo "Deleting attached master disk: ${NAMENODE_ATTACHED_PD}"
    run_gcutil_cmd deletedisk --force ${NAMENODE_ATTACHED_PD} &
    await_async_jobs 'deletedisk'
    loginfo 'Done deleting disks!'
  fi

  if (( ${CAUGHT_ERROR} )); then
    logerror "Warning: Some errors occurred, please review specified logfiles."
    consolidate_error_logs
    exit 1
  fi

  SUPPRESS_TRAPPED_ERRORS=0
}

# Check whether to use custom environment-variable file(s). We always include
# bdutil_env.sh first.
function source_env_files() {
  trap handle_error ERR
  ENV_FILES=("${BDUTIL_DIR}/bdutil_env.sh" ${ENV_FILES[@]})
  if (( ${#ENV_FILES[@]} )); then
    loginfo "Using custom environment-variable file(s): ${ENV_FILES[@]}"
  else
    loginfo "Using default environment-variable file: ${ENV_FILES[@]}"
  fi

  # Pull in all the environment variables from the files, or exit if we can't
  # find one of them.
  for ENV_FILE in ${ENV_FILES[@]}; do
    if [[ -r "${ENV_FILE}" ]]; then
      loginfo "Reading environment-variable file: ${ENV_FILE}"
      source ${ENV_FILE}
    else
      logerror "Cannot find environment-variable file: ${ENV_FILE}"
      exit 1
    fi
  done

  # Evaluate command level overrides.
  if [[ -r "${OVERRIDES_FILE}" ]]; then
    source ${OVERRIDES_FILE}
  fi

  evaluate_late_variable_bindings
}

# Copies the temporary ${OVERRIDES_FILE} to ${GENERATE_CONFIG_FILENAME}.
function generate_config_file() {
  trap handle_error ERR
  if [[ -r "${GENERATE_CONFIG_FILENAME}" ]]; then
    local msg="Overwrite existing file '${GENERATE_CONFIG_FILENAME}'? (y/n)"
    read -p "${msg}" PROMPT_RESPONSE
    if [[ ${PROMPT_RESPONSE} != 'y' ]]; then
      logerror "Not generating config file, exiting..."
      exit 1
    fi
  fi
  loginfo "Creating config file '${GENERATE_CONFIG_FILENAME}' with contents:"
  cat ${OVERRIDES_FILE}

  cp ${OVERRIDES_FILE} ${GENERATE_CONFIG_FILENAME}
  loginfo "To deploy: ./bdutil deploy -e ${GENERATE_CONFIG_FILENAME}"
}

# Generate grouped scripts with resolved environment files and
# add them to UPLOAD_FILES.
function generate_scripts_from_command_groups() {
  trap handle_error ERR
  # Dump a temporary script which "sets" all the env variables. This will act as
  # preamble for all the other remote scripts.
  cat <<EOF > "${SCRIPT_TMPDIR}/hadoop-env-setup.sh"
#!/bin/bash
set -e -a
$(cat ${ENV_FILES[@]})
$(cat ${OVERRIDES_FILE})
evaluate_late_variable_bindings
set +a
EOF
  UPLOAD_FILES+=("${SCRIPT_TMPDIR}/hadoop-env-setup.sh")

  # Iterate over the COMMAND_GROUPS array defined by the ENV_FILES.
  loginfo "Generating ${#COMMAND_GROUPS[@]} command groups..."
  for cmd_group in "${COMMAND_GROUPS[@]}"; do
    local filegroup="${cmd_group%%:*}"
    logdebug "Generating command group '${filegroup}':"
    local genfile="${SCRIPT_TMPDIR}/${filegroup}.sh"
    cat << 'EOF' > ${genfile}
#!/usr/bin/env bash
set -e
. $(dirname $0)/hadoop-env-setup.sh
EOF
    local files=(${cmd_group#*:})
    for FILE in "${files[@]}"; do
      if [[ "${FILE}" =~ ^/.* ]]; then
        # Absolute path.
        local resolved_file="${FILE}"
      else
        # Relative path.
        local resolved_file"=${BDUTIL_DIR}/${FILE}"
      fi
      logdebug "    Appending file '${resolved_file}'..."
      cat ${resolved_file} >> ${genfile}
    done
    # Make it runnable.
    chmod 750 ${genfile}
    UPLOAD_FILES+=(${genfile})
  done

  loginfo 'Done generating remote shell scripts.'
}

# Upload generated scripts and any specified upload files.
function upload_scripts_and_files () {
  trap handle_error ERR

  if (( ${#UPLOAD_FILES[@]} == 0 )); then
    loginfo "No files to upload."
    return
  fi

  # End with a slash so that even if we're uploading only one file, gsutil
  # will treat it as a directory.
  local staging_dir="${BDUTIL_GCS_STAGING_DIR}/${INVOCATION_ID}/"
  loginfo "Staging file and script dependencies into ${staging_dir}..."

  gsutil -m cp ${UPLOAD_FILES[@]} ${staging_dir}

  local base_names=(${UPLOAD_FILES[@]##*/})
  local remote_files=(${base_names[@]/#/${staging_dir}})

  # Make the VMs download the bootstrap file.
  loginfo 'Downloading staging files onto VMs...'
  # Note that we avoid simply downloading gs://staging_dir/*, since the glob
  # listing is only "eventually consistent" so we will occasionally miss files.
  local bootstrap_cmd="gsutil -m cp ${remote_files[@]} . && chmod 755 *"
  run_remote_command "${bootstrap_cmd}" "${bootstrap_cmd}" bootstrap bootstrap

  loginfo 'Uploads of shell scripts finished, deleting staging files...'

  gsutil -m rm "${remote_files[@]}"

  loginfo 'Staging files successfully deleted.'
}

# Generates a string which simply runs the given command as root
# while redirecting its stdout and stderr to separate logfiles.
function get_remote_cmd() {
  local cmd="$1"
  local logname="$2"
  local full_cmd="sudo su -l -c \"cd \${PWD} && ${cmd}\""
  if (( ${VERBOSE_MODE} )) && [[ "${TARGET}" == master ]]; then
    full_cmd+="\
        2> >(tee -a ${logname}.stderr 1>&2) \
        1> >(tee -a ${logname}.stdout)"
  else
    full_cmd+=" 2>>${logname}.stderr 1>>${logname}.stdout"
  fi
  # Use sudo su -l instead of sudo to have /usr/local/bin in the PATH
  echo "${full_cmd}"
}

# Run command on VM using ssh.
function run_remote_command() {
  trap handle_error ERR
  local master_cmd="$1"
  local workers_cmd="$2"
  local master_logname="${3:-${master_cmd%% *}_${INVOCATION_ID}}"
  local workers_logname="${4:-${workers_cmd%% *}_${INVOCATION_ID}}"
  if [[ -n "${workers_cmd}" ]] && [[ "${TARGET}" != master ]]; then
    loginfo "Invoking on workers: ${workers_cmd}"
    for ((i=0; i < NUM_WORKERS; i++)); do
      if (( ${i} > 0 && ${i} % ${MAX_CONCURRENT_ASYNC_PROCESSES} == 0 )); then
        await_async_jobs 'ssh (partial)'
        loginfo 'Done. Invoking next batch...'
      fi
      NODE=${WORKERS[${i}]}
      # Instrumented command piping stderr/stdout to files.
      local inst_cmd=$(get_remote_cmd "${workers_cmd}" "${workers_logname}")
      run_gcutil_cmd ssh ${NODE} ${inst_cmd} &
      sleep_for_api_ops
    done
  fi
  if [[ -n "${master_cmd}" ]] && [[ "${TARGET}" != workers ]]; then
    loginfo "Invoking on master: ${master_cmd}"
    local inst_cmd=$(get_remote_cmd "${master_cmd}" "${master_logname}")
    run_gcutil_cmd ssh ${NAMENODE_HOSTNAME} ${inst_cmd} &
  fi
  # Wait for all the async stuff to finish before moving on.
  await_async_jobs 'ssh'
}

# Run a single command group speficied in an environment file.
function run_command_group() {
  trap handle_error ERR
  if [[ "${BDUTIL_CMD}" == "deploy" ]]; then
    local log_suffix="deploy"
  else
    local log_suffix="$(date +%s)"
  fi

  local master_cmd_grp=${COMMAND_GROUP:-$1}
  local workers_cmd_grp=${COMMAND_GROUP:-$2}
  if [[ "${master_cmd_grp}" == "*" ]]; then
    local master_cmd=''
    local master_log=''
  else
    local master_cmd="./${master_cmd_grp}.sh"
    local master_log="${master_cmd_grp}_${log_suffix}"
  fi
  if [[ "${workers_cmd_grp}" == "*" ]]; then
    local workers_cmd=''
    local workers_log=''
  else
    local workers_cmd="./${workers_cmd_grp}.sh"
    local workers_log="${workers_cmd_grp}_${log_suffix}"
  fi

  run_remote_command "${master_cmd}" "${workers_cmd}" "${master_log}" "${workers_log}"
}

# Iterate over the deployment-specification's COMMAND_STEPS to run the setup.
function run_command_steps() {
  trap handle_error ERR
  for COMMAND_STR in ${COMMAND_STEPS[@]}; do
    local workers_cmd_grp=$(echo ${COMMAND_STR} | cut -d ',' -f 2)
    local master_cmd_grp=$(echo ${COMMAND_STR} | cut -d ',' -f 1)
    run_command_group "${master_cmd_grp}" "${workers_cmd_grp}"
    loginfo "Step '${COMMAND_STR}' done..."
  done

  loginfo 'Command steps complete.'
}

# Signal handler for SIGINT when the SOCKS proxy is running.
function socksproxy_shutdown() {
  cat <<SD_EOF
*******************************************************************************
**
** Proxy terminated.
**
*******************************************************************************
SD_EOF
}

# Create a SOCKS proxy to the namenode on local port $SOCKS_PROXY_PORT.
function run_socks_proxy() {
  SUPPRESS_TRAPPED_ERRORS=1
  trap socksproxy_shutdown SIGINT
  cat <<EOF
*******************************************************************************
**
** Starting SOCKS proxy to ${NAMENODE_HOSTNAME} on port ${SOCKS_PROXY_PORT}
**
** To terminate the proxy press CTRL-C
**
*******************************************************************************
EOF
  FORCE_TTY_ALLOCATION=0
  PROXY_CMD=$(get_gcutil_cmd ssh \
      --ssh_arg=-N \
      --ssh_arg=-D \
      --ssh_arg="${SOCKS_PROXY_PORT}" \
      "${NAMENODE_HOSTNAME}")

  eval $PROXY_CMD
}

# Sets the names of the directories hold invocation specific files.
function configure_vm_directories() {
  local script_base_dir='/tmp/bdutil'
  local subdir=''
  case "${BDUTIL_CMD}" in
    deploy)
      subdir=deploy
      ;;
    run_command_group)
      subdir="${COMMAND_GROUP}_${INVOCATION_ID}"
      ;;
    *)
      subdir="${BDUTIL_CMD}_${INVOCATION_ID}"
      ;;
  esac
  VM_SCRIPT_DIR=${script_base_dir}/${subdir}
}

function prepend_line_to_file() {
  local line=$1
  local file=$2

  mv "${file}" "${file}.bak"
  echo "${line}" > "${file}"
  cat "${file}.bak" >> "${file}"
  rm "${file}.bak"
}

function parse_input() {
  trap handle_error ERR
  local positional_args=()

  function validate_argument() {
    local flag=$1
    local value=$2
    if [[ ${value} == -* ]] || [[ -z "${value}" ]]; then
      logerror "Improper value '${value}' for flag '${flag}'."
      print_help
    fi
  }

  # Ensure the overrides file exists for future use
  touch "${OVERRIDES_FILE}"

  while (( $# ));  do
    case "$1" in
      -b|--bucket)
        validate_argument $1 $2
        echo "CONFIGBUCKET=$2" >> ${OVERRIDES_FILE}
        shift 2;
        ;;
      -d|--use_attached_pds)
        if [[ "${2}" =~ ^(true|false|0|1)$ ]]; then
          echo "USE_ATTACHED_PDS=$2" >> ${OVERRIDES_FILE}
          shift 2
        else
          echo "USE_ATTACHED_PDS=true" >> ${OVERRIDES_FILE}
          shift
        fi
        ;;
      -e|--env_var_files)
        validate_argument $1 $2
        ENV_FILES+=(${2//,/ })
        shift 2
        ;;
      -F|--default_fs)
        validate_argument $1 $2
        echo "DEFAULT_FS=$2" >> ${OVERRIDES_FILE}
        shift 2
        ;;
      -f|--force)
        SKIP_PROMPT=1
        shift
        ;;
      -h|--help)
        print_usage
        ;;
      -i|--image)
        validate_argument $1 $2
        echo "GCE_IMAGE=$2" >> ${OVERRIDES_FILE}
        shift 2
        ;;
      -m|--machine_type)
        validate_argument $1 $2
        echo "GCE_MACHINE_TYPE=$2" >> ${OVERRIDES_FILE}
        shift 2
        ;;
      --network)
        validate_argument $1 $2
        echo "GCE_NETWORK=$2" >> ${OVERRIDES_FILE}
        shift 2
        ;;
      -n|--num_workers)
        validate_argument $1 $2
        echo "NUM_WORKERS=$2" >> ${OVERRIDES_FILE}
        shift 2
        ;;
      -P|--prefix)
        validate_argument $1 $2
        echo "PREFIX=$2" >> ${OVERRIDES_FILE}
        shift 2
        ;;
      -p|--project)
        validate_argument $1 $2
        echo "PROJECT=$2" >> ${OVERRIDES_FILE}
        shift 2
        ;;
      -t|--target)
        validate_argument $1 $2
        readonly TARGET="$2"
        shift 2
        ;;
      -u|--upload_files)
        validate_argument $1 $2
        local extra_uploads=(${2//,/ })
        echo "UPLOAD_FILES+=(${extra_uploads[@]})" >> ${OVERRIDES_FILE}
        shift 2
        ;;
      -v|--verbose)
        if [[ "${2}" =~ ^(true|false|0|1)$ ]]; then
          echo "VERBOSE_MODE=$2" >> ${OVERRIDES_FILE}
          shift 2
        else
          echo "VERBOSE_MODE=true" >> ${OVERRIDES_FILE}
          shift
        fi
        ;;
      -z|--zone)
        validate_argument $1 $2
        echo "GCE_ZONE=$2" >> ${OVERRIDES_FILE}
        shift 2
        ;;
      --)
        shift
        break
        ;;
      -|-?|--*)
        logerror "Error! Unknown flag: '$1'."
        print_help
        ;;
      -??*)
        # Split flags
        set -- ${1:0:2} -${1:2} ${@:2}
        ;;
      -*)
        logerror "Error! Unknown flag: '$1'."
        print_help
        ;;
      *)
        positional_args+=($1)
        shift
        ;;
    esac
  done

  set -- ${positional_args[@]} $@

  if ! (( $# )); then
    logerror "Error! No command specified."
    print_help
  else
    BDUTIL_CMD=$1
    ADDITIONAL_ARGS=(${@:2})
  fi

  case "${BDUTIL_CMD}" in
    run_command_group)
      if (( ${#ADDITIONAL_ARGS[@]} == 1 )); then
        readonly COMMAND_GROUP=${ADDITIONAL_ARGS[0]}
      else
        logerror "Error! run_command_group only takes 1 argument."
        logerror "Got arguments: ${ADDITIONAL_ARGS[@]}"
        print_help
      fi
      ;;
    run_command)
      if (( ${#ADDITIONAL_ARGS[@]} )); then
        readonly REMOTE_COMMAND="${ADDITIONAL_ARGS[@]}"
        prepend_line_to_file 'UPLOAD_FILES=()' "${OVERRIDES_FILE}"
      else
        logerror "Error! run_command requires a command"
        print_help
      fi
      ;;
    generate_config)
      if (( ${#ADDITIONAL_ARGS[@]} == 1 )); then
        readonly GENERATE_CONFIG_FILENAME=${ADDITIONAL_ARGS[0]}
      else
        logerror "Error! generate_config only takes 1 argument."
        logerror "Got arguments: ${ADDITIONAL_ARGS[@]}"
        print_help
      fi
      ;;
    create|deploy|delete|run_command_steps)
      if (( ${#ADDITIONAL_ARGS[@]} )); then
        logerror "Error! ${BDUTIL_CMD} doesn't take any arguments."
        logerror "Got arguments: ${ADDITIONAL_ARGS[@]}"
        print_help
      fi
      ;;
    shell)
      # No extra argument validation.
      ;;
    socksproxy)
      if (( ${#ADDITIONAL_ARGS[@]} )); then
        readonly SOCKS_PROXY_PORT=${ADDITIONAL_ARGS[0]}
      else
        readonly SOCKS_PROXY_PORT=1080
      fi
      ;;
    *)
      logerror "Error! unknown command '${BDUTIL_CMD}'."
      print_help
  esac
}

function main() {
  trap handle_error ERR

  # The temporary directory in which to place generated scripts for running
  # on remote VMs. Can be modified if a different directory is prefereed.
  INVOCATION_ID="$(date +%Y%m%d-%H%M%S)"
  SCRIPT_TMPDIR="/tmp/bdutil-${INVOCATION_ID}"
  loginfo "Using local tmp dir for staging files: ${SCRIPT_TMPDIR}"

  # A file containing more detailed debug info from inside VMs, only created
  # on failure.
  VM_DEBUG_FILE="${SCRIPT_TMPDIR}/vmdebuginfo.txt"

  # A file containing a copy of the stdout from running gcutil commands.
  GCUTIL_STDOUT_FILE="${SCRIPT_TMPDIR}/gcutil_out.txt"

  # A file containing a copy of the stderr from running gcutil commands.
  GCUTIL_STDERR_FILE="${SCRIPT_TMPDIR}/gcutil_err.txt"

  # The aggregated file containing gcutil stdout/stderr and debug info from VMs,
  # only created on failure.
  AGGREGATE_DEBUG_FILE="${SCRIPT_TMPDIR}/debuginfo.txt"

  # File holding overrides derived from command-line flags.
  OVERRIDES_FILE="${SCRIPT_TMPDIR}/flag_overrides_env.sh"

  # Create the temporary staging directory.
  mkdir ${SCRIPT_TMPDIR}

  parse_input $@

  source_env_files

  run_sanity_checks

  configure_vm_directories


  # Create the files which will contain gcutil stdout/stderr.
  touch ${GCUTIL_STDOUT_FILE}
  touch ${GCUTIL_STDERR_FILE}

  case ${BDUTIL_CMD} in
    create)
      prompt_confirmation 'Create'
      create_cluster
      ;;
    delete)
      prompt_confirmation 'Delete'
      delete_cluster
      ;;
    deploy)
      prompt_confirmation 'Deploy'
      validate_heavyweight_settings
      generate_scripts_from_command_groups
      create_cluster
      upload_scripts_and_files
      run_command_steps
      ;;
    generate_config)
      prompt_confirmation "Generate config '${GENERATE_CONFIG_FILENAME}' for"
      generate_config_file
      ;;
    run_command_steps)
      prompt_confirmation 'Run command steps on'
      validate_heavyweight_settings
      generate_scripts_from_command_groups
      upload_scripts_and_files
      run_command_steps
      ;;
    run_command_group)
      prompt_confirmation "Run command group '${COMMAND_GROUP}' on"
      validate_heavyweight_settings
      generate_scripts_from_command_groups
      upload_scripts_and_files
      run_command_group ${COMMAND_GROUP}
      ;;
    run_command)
      prompt_confirmation "Run command: '${REMOTE_COMMAND}' on"
      validate_heavyweight_settings
      upload_scripts_and_files
      run_remote_command "${REMOTE_COMMAND}" "${REMOTE_COMMAND}"
      ;;
    shell)
      shell_cmd=$(get_gcutil_cmd ssh "${NAMENODE_HOSTNAME}")
      eval $shell_cmd
      ;;
    socksproxy)
      run_socks_proxy
      ;;
    *)
      logerror "Unknown command: '${BDUTIL_CMD}'. Exiting."
      print_help
      ;;
  esac

  loginfo 'Execution complete. Cleaning up temporary files...'
  rm -r ${SCRIPT_TMPDIR}

  case ${BDUTIL_CMD} in
    delete|generate_config|shell|socksproxy)
      loginfo "Cleanup complete."
      ;;
    *)
      # For the suggested login command, don't add a -tt ssh arg.
      FORCE_TTY_ALLOCATION=0
      LOGIN_CMD=$(get_gcutil_cmd ssh ${NAMENODE_HOSTNAME})
      loginfo "Cleanup complete. To log in to the master: ${LOGIN_CMD}"
      ;;
  esac
}

BDUTIL_DIR="$(dirname $0)"

# Call main function
main $@
