X-Git-Url: http://git.hcoop.net/clinton/scripts.git/blobdiff_plain/563da25fb03507c5c6e5adf6c19be97c28815df0..a0d92e6305b3c637ec6be3b6630ebe71eb2c0348:/s3 diff --git a/s3 b/s3 new file mode 100755 index 0000000..4d070b4 --- /dev/null +++ b/s3 @@ -0,0 +1,221 @@ +#!/bin/bash +# basic amazon s3 operations +# Licensed under the terms of the GNU GPL v2 +# Copyright 2007 Victor Lowther + + + +# print a message and bail +die() { + echo $* + exit 1 +} + +# check to see if the variable name passed exists and holds a value. +# Die if it does not. +check_or_die() { + [[ ${!1} ]] || die "Environment variable ${1} is not set." +} + +# check to see if we have all the needed S3 variables defined. +# Bail if we do not. +check_s3() { + local sak x + for x in S3_ACCESS_KEY_ID S3_SECRET_ACCESS_KEY; do + check_or_die ${x}; + done + [[ -f ${S3_SECRET_ACCESS_KEY} ]] || die "S3_SECRET_ACCESS_KEY must point to a file!" + sak="$(wc -c "${S3_SECRET_ACCESS_KEY}")" + (( ${sak%%[!0-9 ]*} == 40 )) || \ + die "S3 Secret Access Key is not exactly 40 bytes long. Please fix it." +} +# check to see if our external dependencies exist +check_dep() { + local res=0 + while [[ $# -ne 0 ]]; do + which "${1}" >& /dev/null || { res=1; echo "${1} not found."; } + shift + done + (( res == 0 )) || die "aborting." +} + +check_deps() { + check_dep openssl date hmac cat grep curl + check_s3 +} + +urlenc() { + # $1 = string to url encode + # output is on stdout + # we don't urlencode everything, just enough stuff. + echo -n "${1}" | + sed 's/%/%25/g + s/ /%20/g + s/#/%23/g + s/\$/%24/g + s/\&/%26/g + s/+/%2b/g + s/,/%2c/g + s/:/%3a/g + s/;/%3b/g + s/?/%3f/g + s/@/%40/g + s/ /%09/g' +} + +xmldec() { + # no parameters. + # accept input on stdin, put it on stdout. + # patches accepted to get more stuff + sed 's/\"/\"/g + s/\&/\&/g + s/\<//g' +} + +## basic S3 functionality. x-amz-header functionality is not implemented. +# make an S3 signature string, which will be output on stdout. +s3_signature_string() { + # $1 = HTTP verb + # $2 = date string, must be in UTC + # $3 = bucket name, if any + # $4 = resource path, if any + # $5 = content md5, if any + # $6 = content MIME type, if any + # $7 = canonicalized headers, if any + # signature string will be output on stdout + local verr="Must pass a verb to s3_signature_string!" + local verb="${1:?verr}" + local bucket="${3}" + local resource="${4}" + local derr="Must pass a date to s3_signature_string!" + local date="${2:?derr}" + local mime="${6}" + local md5="${5}" + local headers="${7}" + printf "%s\n%s\n%s\n%s\n%s%s%s" \ + "${verb}" "${md5}" "${mime}" "${date}" \ + "${headers}" "${bucket}" "${resource}" | \ + hmac sha1 "${S3_SECRET_ACCESS_KEY}" | openssl base64 -e -a +} + +# cheesy, but it is the best way to have multiple headers. +curl_headers() { + # each arg passed will be output on its own line + local parms=$# + for ((;$#;)); do + echo "header = \"${1}\"" + shift + done +} + +s3_curl() { + # invoke curl to do all the heavy HTTP lifting + # $1 = method (one of GET, PUT, or DELETE. HEAD is not handled yet.) + # $2 = remote bucket. + # $3 = remote name + # $4 = local name. + local bucket remote date sig md5 arg inout headers + # header handling is kinda fugly, but it works. + bucket="${2:+/${2}}/" # slashify the bucket + remote="$(urlenc "${3}")" # if you don't, strange things may happen. + stdopts="--connect-timeout 10 --fail --silent" + [[ $CURL_S3_DEBUG == true ]] && stdopts="${stdopts} --show-error --fail" + case "${1}" in + GET) arg="-o" inout="${4:--}" # stdout if no $4 + ;; + PUT) [[ ${2} ]] || die "PUT can has bucket?" + if [[ ! ${3} ]]; then + arg="-X PUT" + headers[${#headers[@]}]="Content-Length: 0" + elif [[ -f ${4} ]]; then + md5="$(openssl dgst -md5 -binary "${4}"|openssl base64 -e -a)" + arg="-T" inout="${4}" + headers[${#headers[@]}]="Expect: 100-continue" + else + die "Cannot write non-existing file ${4}" + fi + ;; + DELETE) arg="-X DELETE" + ;; + HEAD) arg="-I" ;; + *) die "Unknown verb ${1}. It probably would not have worked anyways." ;; + esac + date="$(TZ=UTC date '+%a, %e %b %Y %H:%M:%S %z')" + sig=$(s3_signature_string ${1} "${date}" "${bucket}" "${remote}" "${md5}") + + headers[${#headers[@]}]="Authorization: AWS ${S3_ACCESS_KEY_ID}:${sig}" + headers[${#headers[@]}]="Date: ${date}" + [[ ${md5} ]] && headers[${#headers[@]}]="Content-MD5: ${md5}" + curl ${arg} "${inout}" ${stdopts} -K <(curl_headers "${headers[@]}") \ + "http://s3.amazonaws.com${bucket}${remote}" + return $? +} + +s3_put() { + # $1 = remote bucket to put it into + # $2 = remote name to put + # $3 = file to put. This must be present if $2 is. + s3_curl PUT "${1}" "${2}" "${3:-${2}}" + return $? +} + +s3_get() { + # $1 = bucket to get file from + # $2 = remote file to get + # $3 = local file to get into. Will be overwritten if it exists. + # If this contains a path, that path must exist before calling this. + s3_curl GET "${1}" "${2}" "${3:-${2}}" + return $? +} + +s3_test() { + # same args as s3_get, but uses the HEAD verb instead of the GET verb. + s3_curl HEAD "${1}" "${2}" >/dev/null + return $? +} + +# Hideously ugly, but it works well enough. +s3_buckets() { + s3_get |grep -o '[^>]*' |sed 's/<[^>]*>//g' |xmldec + return $? +} + +# this will only return the first thousand entries, alas +# Mabye some kind soul can fix this without writing an XML parser in bash? +# Also need to add xml entity handling. +s3_list() { + # $1 = bucket to list + [ "x${1}" == "x" ] && return 1 + s3_get "${1}" |grep -o '[^>]*' |sed 's/<[^>]*>//g'| xmldec + return $? +} + +s3_delete() { + # $1 = bucket to delete from + # $2 = item to delete + s3_curl DELETE "${1}" "${2}" + return $? +} + +# because this uses s3_list, it suffers from the same flaws. +s3_rmrf() { + # $1 = bucket to delete everything from + s3_list "${1}" | while read f; do + s3_delete "${1}" "${f}"; + done +} + +check_deps +case $1 in + put) shift; s3_put "$@" ;; + get) shift; s3_get "$@" ;; + rm) shift; s3_delete "$@" ;; + ls) shift; s3_list "$@" ;; + test) shift; s3_test "$@" ;; + buckets) s3_buckets ;; + rmrf) shift; s3_rmrf "$@" ;; + *) die "Unknown command ${1}." + ;; +esac +