s3: Wait longer after failure, pick up any failed pieces later.
[clinton/scripts.git] / s3
CommitLineData
a0d92e63 1#!/bin/bash
2# basic amazon s3 operations
3# Licensed under the terms of the GNU GPL v2
4# Copyright 2007 Victor Lowther <victor.lowther@gmail.com>
5
6228995d 6CURL=/home/mwolson_admin/bin/curl
c347f520 7HMAC=$(dirname $0)/s3-hmac
8862af6d 8ATTEMPTS=7
a2983bf8 9ATTEMPT_WAIT=1m
a0d92e63 10
11# print a message and bail
12die() {
13 echo $*
14 exit 1
15}
16
17# check to see if the variable name passed exists and holds a value.
18# Die if it does not.
19check_or_die() {
20 [[ ${!1} ]] || die "Environment variable ${1} is not set."
21}
22
23# check to see if we have all the needed S3 variables defined.
24# Bail if we do not.
25check_s3() {
26 local sak x
27 for x in S3_ACCESS_KEY_ID S3_SECRET_ACCESS_KEY; do
28 check_or_die ${x};
29 done
30 [[ -f ${S3_SECRET_ACCESS_KEY} ]] || die "S3_SECRET_ACCESS_KEY must point to a file!"
31 sak="$(wc -c "${S3_SECRET_ACCESS_KEY}")"
32 (( ${sak%%[!0-9 ]*} == 40 )) || \
33 die "S3 Secret Access Key is not exactly 40 bytes long. Please fix it."
34}
35# check to see if our external dependencies exist
36check_dep() {
37 local res=0
38 while [[ $# -ne 0 ]]; do
39 which "${1}" >& /dev/null || { res=1; echo "${1} not found."; }
40 shift
41 done
42 (( res == 0 )) || die "aborting."
43}
44
c347f520 45check_hmac() {
46 if test ! -f $HMAC || test ! -x $HMAC; then
47 die "hmac script not found or not executable."
48 fi
49}
50
a0d92e63 51check_deps() {
6228995d 52 check_dep openssl date cat grep
c347f520 53 check_hmac
a0d92e63 54 check_s3
55}
56
57urlenc() {
58 # $1 = string to url encode
59 # output is on stdout
60 # we don't urlencode everything, just enough stuff.
61 echo -n "${1}" |
62 sed 's/%/%25/g
63 s/ /%20/g
64 s/#/%23/g
65 s/\$/%24/g
66 s/\&/%26/g
67 s/+/%2b/g
68 s/,/%2c/g
69 s/:/%3a/g
70 s/;/%3b/g
71 s/?/%3f/g
72 s/@/%40/g
73 s/ /%09/g'
74}
75
76xmldec() {
77 # no parameters.
78 # accept input on stdin, put it on stdout.
79 # patches accepted to get more stuff
80 sed 's/\&quot;/\"/g
81 s/\&amp;/\&/g
82 s/\&lt;/</g
83 s/\&gt;/>/g'
84}
85
86## basic S3 functionality. x-amz-header functionality is not implemented.
87# make an S3 signature string, which will be output on stdout.
88s3_signature_string() {
89 # $1 = HTTP verb
90 # $2 = date string, must be in UTC
91 # $3 = bucket name, if any
92 # $4 = resource path, if any
93 # $5 = content md5, if any
94 # $6 = content MIME type, if any
95 # $7 = canonicalized headers, if any
96 # signature string will be output on stdout
97 local verr="Must pass a verb to s3_signature_string!"
98 local verb="${1:?verr}"
99 local bucket="${3}"
100 local resource="${4}"
101 local derr="Must pass a date to s3_signature_string!"
102 local date="${2:?derr}"
103 local mime="${6}"
104 local md5="${5}"
105 local headers="${7}"
106 printf "%s\n%s\n%s\n%s\n%s%s%s" \
107 "${verb}" "${md5}" "${mime}" "${date}" \
108 "${headers}" "${bucket}" "${resource}" | \
c347f520 109 $HMAC sha1 "${S3_SECRET_ACCESS_KEY}" | openssl base64 -e -a
a0d92e63 110}
111
112# cheesy, but it is the best way to have multiple headers.
113curl_headers() {
114 # each arg passed will be output on its own line
115 local parms=$#
116 for ((;$#;)); do
117 echo "header = \"${1}\""
118 shift
119 done
120}
121
122s3_curl() {
123 # invoke curl to do all the heavy HTTP lifting
124 # $1 = method (one of GET, PUT, or DELETE. HEAD is not handled yet.)
125 # $2 = remote bucket.
126 # $3 = remote name
127 # $4 = local name.
f8c2d5c6 128 # $5 = bandwidth limit.
7cc8af57 129 local bucket remote date sig md5 arg inout headers tries ret
a0d92e63 130 # header handling is kinda fugly, but it works.
131 bucket="${2:+/${2}}/" # slashify the bucket
132 remote="$(urlenc "${3}")" # if you don't, strange things may happen.
133 stdopts="--connect-timeout 10 --fail --silent"
134 [[ $CURL_S3_DEBUG == true ]] && stdopts="${stdopts} --show-error --fail"
f8c2d5c6 135 test -n "${5}" && stdopts="${stdopts} --limit-rate ${5}"
a0d92e63 136 case "${1}" in
137 GET) arg="-o" inout="${4:--}" # stdout if no $4
138 ;;
139 PUT) [[ ${2} ]] || die "PUT can has bucket?"
140 if [[ ! ${3} ]]; then
141 arg="-X PUT"
142 headers[${#headers[@]}]="Content-Length: 0"
143 elif [[ -f ${4} ]]; then
144 md5="$(openssl dgst -md5 -binary "${4}"|openssl base64 -e -a)"
145 arg="-T" inout="${4}"
146 headers[${#headers[@]}]="Expect: 100-continue"
147 else
148 die "Cannot write non-existing file ${4}"
149 fi
150 ;;
151 DELETE) arg="-X DELETE"
152 ;;
153 HEAD) arg="-I" ;;
154 *) die "Unknown verb ${1}. It probably would not have worked anyways." ;;
155 esac
156 date="$(TZ=UTC date '+%a, %e %b %Y %H:%M:%S %z')"
157 sig=$(s3_signature_string ${1} "${date}" "${bucket}" "${remote}" "${md5}")
158
159 headers[${#headers[@]}]="Authorization: AWS ${S3_ACCESS_KEY_ID}:${sig}"
160 headers[${#headers[@]}]="Date: ${date}"
161 [[ ${md5} ]] && headers[${#headers[@]}]="Content-MD5: ${md5}"
7cc8af57 162 tries=0
ff624219 163 while true; do
6228995d 164 $CURL ${arg} "${inout}" ${stdopts} -K <(curl_headers "${headers[@]}") \
a0d92e63 165 "http://s3.amazonaws.com${bucket}${remote}"
7cc8af57 166 ret=$?
167 test $ret -eq 0 && break;
ff624219 168 if test $tries -lt $ATTEMPTS; then
169 tries=$(expr $tries + 1)
170 echo "Retrying ..."
171 sleep $ATTEMPT_WAIT
172 else
173 break
174 fi
7cc8af57 175 done
176 return $ret
a0d92e63 177}
178
179s3_put() {
180 # $1 = remote bucket to put it into
181 # $2 = remote name to put
182 # $3 = file to put. This must be present if $2 is.
c2b40851 183 # $4 = bandwidth limit.
f8c2d5c6 184 s3_curl PUT "${1}" "${2}" "${3:-${2}}" "${4}"
a0d92e63 185 return $?
186}
187
188s3_get() {
189 # $1 = bucket to get file from
190 # $2 = remote file to get
191 # $3 = local file to get into. Will be overwritten if it exists.
192 # If this contains a path, that path must exist before calling this.
c2b40851 193 # $4 = bandwidth limit.
f8c2d5c6 194 s3_curl GET "${1}" "${2}" "${3:-${2}}" "${4}"
a0d92e63 195 return $?
196}
197
198s3_test() {
199 # same args as s3_get, but uses the HEAD verb instead of the GET verb.
200 s3_curl HEAD "${1}" "${2}" >/dev/null
201 return $?
202}
203
204# Hideously ugly, but it works well enough.
205s3_buckets() {
206 s3_get |grep -o '<Name>[^>]*</Name>' |sed 's/<[^>]*>//g' |xmldec
207 return $?
208}
209
210# this will only return the first thousand entries, alas
211# Mabye some kind soul can fix this without writing an XML parser in bash?
212# Also need to add xml entity handling.
213s3_list() {
214 # $1 = bucket to list
215 [ "x${1}" == "x" ] && return 1
216 s3_get "${1}" |grep -o '<Key>[^>]*</Key>' |sed 's/<[^>]*>//g'| xmldec
217 return $?
218}
219
220s3_delete() {
221 # $1 = bucket to delete from
222 # $2 = item to delete
223 s3_curl DELETE "${1}" "${2}"
224 return $?
225}
226
227# because this uses s3_list, it suffers from the same flaws.
228s3_rmrf() {
229 # $1 = bucket to delete everything from
230 s3_list "${1}" | while read f; do
231 s3_delete "${1}" "${f}";
232 done
233}
234
235check_deps
236case $1 in
237 put) shift; s3_put "$@" ;;
238 get) shift; s3_get "$@" ;;
239 rm) shift; s3_delete "$@" ;;
240 ls) shift; s3_list "$@" ;;
241 test) shift; s3_test "$@" ;;
242 buckets) s3_buckets ;;
243 rmrf) shift; s3_rmrf "$@" ;;
244 *) die "Unknown command ${1}."
245 ;;
246esac
247