s3: Parameterize curl command.
[clinton/scripts.git] / s3
CommitLineData
a0d92e63 1#!/bin/bash
2# basic amazon s3 operations
3# Licensed under the terms of the GNU GPL v2
4# Copyright 2007 Victor Lowther <victor.lowther@gmail.com>
5
6228995d 6CURL=/home/mwolson_admin/bin/curl
c347f520 7HMAC=$(dirname $0)/s3-hmac
a0d92e63 8
9# print a message and bail
10die() {
11 echo $*
12 exit 1
13}
14
15# check to see if the variable name passed exists and holds a value.
16# Die if it does not.
17check_or_die() {
18 [[ ${!1} ]] || die "Environment variable ${1} is not set."
19}
20
21# check to see if we have all the needed S3 variables defined.
22# Bail if we do not.
23check_s3() {
24 local sak x
25 for x in S3_ACCESS_KEY_ID S3_SECRET_ACCESS_KEY; do
26 check_or_die ${x};
27 done
28 [[ -f ${S3_SECRET_ACCESS_KEY} ]] || die "S3_SECRET_ACCESS_KEY must point to a file!"
29 sak="$(wc -c "${S3_SECRET_ACCESS_KEY}")"
30 (( ${sak%%[!0-9 ]*} == 40 )) || \
31 die "S3 Secret Access Key is not exactly 40 bytes long. Please fix it."
32}
33# check to see if our external dependencies exist
34check_dep() {
35 local res=0
36 while [[ $# -ne 0 ]]; do
37 which "${1}" >& /dev/null || { res=1; echo "${1} not found."; }
38 shift
39 done
40 (( res == 0 )) || die "aborting."
41}
42
c347f520 43check_hmac() {
44 if test ! -f $HMAC || test ! -x $HMAC; then
45 die "hmac script not found or not executable."
46 fi
47}
48
a0d92e63 49check_deps() {
6228995d 50 check_dep openssl date cat grep
c347f520 51 check_hmac
a0d92e63 52 check_s3
53}
54
55urlenc() {
56 # $1 = string to url encode
57 # output is on stdout
58 # we don't urlencode everything, just enough stuff.
59 echo -n "${1}" |
60 sed 's/%/%25/g
61 s/ /%20/g
62 s/#/%23/g
63 s/\$/%24/g
64 s/\&/%26/g
65 s/+/%2b/g
66 s/,/%2c/g
67 s/:/%3a/g
68 s/;/%3b/g
69 s/?/%3f/g
70 s/@/%40/g
71 s/ /%09/g'
72}
73
74xmldec() {
75 # no parameters.
76 # accept input on stdin, put it on stdout.
77 # patches accepted to get more stuff
78 sed 's/\&quot;/\"/g
79 s/\&amp;/\&/g
80 s/\&lt;/</g
81 s/\&gt;/>/g'
82}
83
84## basic S3 functionality. x-amz-header functionality is not implemented.
85# make an S3 signature string, which will be output on stdout.
86s3_signature_string() {
87 # $1 = HTTP verb
88 # $2 = date string, must be in UTC
89 # $3 = bucket name, if any
90 # $4 = resource path, if any
91 # $5 = content md5, if any
92 # $6 = content MIME type, if any
93 # $7 = canonicalized headers, if any
94 # signature string will be output on stdout
95 local verr="Must pass a verb to s3_signature_string!"
96 local verb="${1:?verr}"
97 local bucket="${3}"
98 local resource="${4}"
99 local derr="Must pass a date to s3_signature_string!"
100 local date="${2:?derr}"
101 local mime="${6}"
102 local md5="${5}"
103 local headers="${7}"
104 printf "%s\n%s\n%s\n%s\n%s%s%s" \
105 "${verb}" "${md5}" "${mime}" "${date}" \
106 "${headers}" "${bucket}" "${resource}" | \
c347f520 107 $HMAC sha1 "${S3_SECRET_ACCESS_KEY}" | openssl base64 -e -a
a0d92e63 108}
109
110# cheesy, but it is the best way to have multiple headers.
111curl_headers() {
112 # each arg passed will be output on its own line
113 local parms=$#
114 for ((;$#;)); do
115 echo "header = \"${1}\""
116 shift
117 done
118}
119
120s3_curl() {
121 # invoke curl to do all the heavy HTTP lifting
122 # $1 = method (one of GET, PUT, or DELETE. HEAD is not handled yet.)
123 # $2 = remote bucket.
124 # $3 = remote name
125 # $4 = local name.
f8c2d5c6 126 # $5 = bandwidth limit.
a0d92e63 127 local bucket remote date sig md5 arg inout headers
128 # header handling is kinda fugly, but it works.
129 bucket="${2:+/${2}}/" # slashify the bucket
130 remote="$(urlenc "${3}")" # if you don't, strange things may happen.
131 stdopts="--connect-timeout 10 --fail --silent"
132 [[ $CURL_S3_DEBUG == true ]] && stdopts="${stdopts} --show-error --fail"
f8c2d5c6 133 test -n "${5}" && stdopts="${stdopts} --limit-rate ${5}"
a0d92e63 134 case "${1}" in
135 GET) arg="-o" inout="${4:--}" # stdout if no $4
136 ;;
137 PUT) [[ ${2} ]] || die "PUT can has bucket?"
138 if [[ ! ${3} ]]; then
139 arg="-X PUT"
140 headers[${#headers[@]}]="Content-Length: 0"
141 elif [[ -f ${4} ]]; then
142 md5="$(openssl dgst -md5 -binary "${4}"|openssl base64 -e -a)"
143 arg="-T" inout="${4}"
144 headers[${#headers[@]}]="Expect: 100-continue"
145 else
146 die "Cannot write non-existing file ${4}"
147 fi
148 ;;
149 DELETE) arg="-X DELETE"
150 ;;
151 HEAD) arg="-I" ;;
152 *) die "Unknown verb ${1}. It probably would not have worked anyways." ;;
153 esac
154 date="$(TZ=UTC date '+%a, %e %b %Y %H:%M:%S %z')"
155 sig=$(s3_signature_string ${1} "${date}" "${bucket}" "${remote}" "${md5}")
156
157 headers[${#headers[@]}]="Authorization: AWS ${S3_ACCESS_KEY_ID}:${sig}"
158 headers[${#headers[@]}]="Date: ${date}"
159 [[ ${md5} ]] && headers[${#headers[@]}]="Content-MD5: ${md5}"
6228995d 160 $CURL ${arg} "${inout}" ${stdopts} -K <(curl_headers "${headers[@]}") \
a0d92e63 161 "http://s3.amazonaws.com${bucket}${remote}"
162 return $?
163}
164
165s3_put() {
166 # $1 = remote bucket to put it into
167 # $2 = remote name to put
168 # $3 = file to put. This must be present if $2 is.
c2b40851 169 # $4 = bandwidth limit.
f8c2d5c6 170 s3_curl PUT "${1}" "${2}" "${3:-${2}}" "${4}"
a0d92e63 171 return $?
172}
173
174s3_get() {
175 # $1 = bucket to get file from
176 # $2 = remote file to get
177 # $3 = local file to get into. Will be overwritten if it exists.
178 # If this contains a path, that path must exist before calling this.
c2b40851 179 # $4 = bandwidth limit.
f8c2d5c6 180 s3_curl GET "${1}" "${2}" "${3:-${2}}" "${4}"
a0d92e63 181 return $?
182}
183
184s3_test() {
185 # same args as s3_get, but uses the HEAD verb instead of the GET verb.
186 s3_curl HEAD "${1}" "${2}" >/dev/null
187 return $?
188}
189
190# Hideously ugly, but it works well enough.
191s3_buckets() {
192 s3_get |grep -o '<Name>[^>]*</Name>' |sed 's/<[^>]*>//g' |xmldec
193 return $?
194}
195
196# this will only return the first thousand entries, alas
197# Mabye some kind soul can fix this without writing an XML parser in bash?
198# Also need to add xml entity handling.
199s3_list() {
200 # $1 = bucket to list
201 [ "x${1}" == "x" ] && return 1
202 s3_get "${1}" |grep -o '<Key>[^>]*</Key>' |sed 's/<[^>]*>//g'| xmldec
203 return $?
204}
205
206s3_delete() {
207 # $1 = bucket to delete from
208 # $2 = item to delete
209 s3_curl DELETE "${1}" "${2}"
210 return $?
211}
212
213# because this uses s3_list, it suffers from the same flaws.
214s3_rmrf() {
215 # $1 = bucket to delete everything from
216 s3_list "${1}" | while read f; do
217 s3_delete "${1}" "${f}";
218 done
219}
220
221check_deps
222case $1 in
223 put) shift; s3_put "$@" ;;
224 get) shift; s3_get "$@" ;;
225 rm) shift; s3_delete "$@" ;;
226 ls) shift; s3_list "$@" ;;
227 test) shift; s3_test "$@" ;;
228 buckets) s3_buckets ;;
229 rmrf) shift; s3_rmrf "$@" ;;
230 *) die "Unknown command ${1}."
231 ;;
232esac
233