hcoop-backup: More work.
[hcoop/scripts.git] / s3
1 #!/bin/bash
2 # basic amazon s3 operations
3 # Licensed under the terms of the GNU GPL v2
4 # Copyright 2007 Victor Lowther <victor.lowther@gmail.com>
5
6 HMAC=$(dirname $0)/s3-hmac
7
8 # print a message and bail
9 die() {
10 echo $*
11 exit 1
12 }
13
14 # check to see if the variable name passed exists and holds a value.
15 # Die if it does not.
16 check_or_die() {
17 [[ ${!1} ]] || die "Environment variable ${1} is not set."
18 }
19
20 # check to see if we have all the needed S3 variables defined.
21 # Bail if we do not.
22 check_s3() {
23 local sak x
24 for x in S3_ACCESS_KEY_ID S3_SECRET_ACCESS_KEY; do
25 check_or_die ${x};
26 done
27 [[ -f ${S3_SECRET_ACCESS_KEY} ]] || die "S3_SECRET_ACCESS_KEY must point to a file!"
28 sak="$(wc -c "${S3_SECRET_ACCESS_KEY}")"
29 (( ${sak%%[!0-9 ]*} == 40 )) || \
30 die "S3 Secret Access Key is not exactly 40 bytes long. Please fix it."
31 }
32 # check to see if our external dependencies exist
33 check_dep() {
34 local res=0
35 while [[ $# -ne 0 ]]; do
36 which "${1}" >& /dev/null || { res=1; echo "${1} not found."; }
37 shift
38 done
39 (( res == 0 )) || die "aborting."
40 }
41
42 check_hmac() {
43 if test ! -f $HMAC || test ! -x $HMAC; then
44 die "hmac script not found or not executable."
45 fi
46 }
47
48 check_deps() {
49 check_dep openssl date cat grep curl
50 check_hmac
51 check_s3
52 }
53
54 urlenc() {
55 # $1 = string to url encode
56 # output is on stdout
57 # we don't urlencode everything, just enough stuff.
58 echo -n "${1}" |
59 sed 's/%/%25/g
60 s/ /%20/g
61 s/#/%23/g
62 s/\$/%24/g
63 s/\&/%26/g
64 s/+/%2b/g
65 s/,/%2c/g
66 s/:/%3a/g
67 s/;/%3b/g
68 s/?/%3f/g
69 s/@/%40/g
70 s/ /%09/g'
71 }
72
73 xmldec() {
74 # no parameters.
75 # accept input on stdin, put it on stdout.
76 # patches accepted to get more stuff
77 sed 's/\&quot;/\"/g
78 s/\&amp;/\&/g
79 s/\&lt;/</g
80 s/\&gt;/>/g'
81 }
82
83 ## basic S3 functionality. x-amz-header functionality is not implemented.
84 # make an S3 signature string, which will be output on stdout.
85 s3_signature_string() {
86 # $1 = HTTP verb
87 # $2 = date string, must be in UTC
88 # $3 = bucket name, if any
89 # $4 = resource path, if any
90 # $5 = content md5, if any
91 # $6 = content MIME type, if any
92 # $7 = canonicalized headers, if any
93 # signature string will be output on stdout
94 local verr="Must pass a verb to s3_signature_string!"
95 local verb="${1:?verr}"
96 local bucket="${3}"
97 local resource="${4}"
98 local derr="Must pass a date to s3_signature_string!"
99 local date="${2:?derr}"
100 local mime="${6}"
101 local md5="${5}"
102 local headers="${7}"
103 printf "%s\n%s\n%s\n%s\n%s%s%s" \
104 "${verb}" "${md5}" "${mime}" "${date}" \
105 "${headers}" "${bucket}" "${resource}" | \
106 $HMAC sha1 "${S3_SECRET_ACCESS_KEY}" | openssl base64 -e -a
107 }
108
109 # cheesy, but it is the best way to have multiple headers.
110 curl_headers() {
111 # each arg passed will be output on its own line
112 local parms=$#
113 for ((;$#;)); do
114 echo "header = \"${1}\""
115 shift
116 done
117 }
118
119 s3_curl() {
120 # invoke curl to do all the heavy HTTP lifting
121 # $1 = method (one of GET, PUT, or DELETE. HEAD is not handled yet.)
122 # $2 = remote bucket.
123 # $3 = remote name
124 # $4 = local name.
125 # $5 = bandwidth limit.
126 local bucket remote date sig md5 arg inout headers
127 # header handling is kinda fugly, but it works.
128 bucket="${2:+/${2}}/" # slashify the bucket
129 remote="$(urlenc "${3}")" # if you don't, strange things may happen.
130 stdopts="--connect-timeout 10 --fail --silent"
131 [[ $CURL_S3_DEBUG == true ]] && stdopts="${stdopts} --show-error --fail"
132 test -n "${5}" && stdopts="${stdopts} --limit-rate ${5}"
133 case "${1}" in
134 GET) arg="-o" inout="${4:--}" # stdout if no $4
135 ;;
136 PUT) [[ ${2} ]] || die "PUT can has bucket?"
137 if [[ ! ${3} ]]; then
138 arg="-X PUT"
139 headers[${#headers[@]}]="Content-Length: 0"
140 elif [[ -f ${4} ]]; then
141 md5="$(openssl dgst -md5 -binary "${4}"|openssl base64 -e -a)"
142 arg="-T" inout="${4}"
143 headers[${#headers[@]}]="Expect: 100-continue"
144 else
145 die "Cannot write non-existing file ${4}"
146 fi
147 ;;
148 DELETE) arg="-X DELETE"
149 ;;
150 HEAD) arg="-I" ;;
151 *) die "Unknown verb ${1}. It probably would not have worked anyways." ;;
152 esac
153 date="$(TZ=UTC date '+%a, %e %b %Y %H:%M:%S %z')"
154 sig=$(s3_signature_string ${1} "${date}" "${bucket}" "${remote}" "${md5}")
155
156 headers[${#headers[@]}]="Authorization: AWS ${S3_ACCESS_KEY_ID}:${sig}"
157 headers[${#headers[@]}]="Date: ${date}"
158 [[ ${md5} ]] && headers[${#headers[@]}]="Content-MD5: ${md5}"
159 curl ${arg} "${inout}" ${stdopts} -K <(curl_headers "${headers[@]}") \
160 "http://s3.amazonaws.com${bucket}${remote}"
161 return $?
162 }
163
164 s3_put() {
165 # $1 = remote bucket to put it into
166 # $2 = remote name to put
167 # $3 = file to put. This must be present if $2 is.
168 # $4 = bandwidth limit.
169 s3_curl PUT "${1}" "${2}" "${3:-${2}}" "${4}"
170 return $?
171 }
172
173 s3_get() {
174 # $1 = bucket to get file from
175 # $2 = remote file to get
176 # $3 = local file to get into. Will be overwritten if it exists.
177 # If this contains a path, that path must exist before calling this.
178 # $4 = bandwidth limit.
179 s3_curl GET "${1}" "${2}" "${3:-${2}}" "${4}"
180 return $?
181 }
182
183 s3_test() {
184 # same args as s3_get, but uses the HEAD verb instead of the GET verb.
185 s3_curl HEAD "${1}" "${2}" >/dev/null
186 return $?
187 }
188
189 # Hideously ugly, but it works well enough.
190 s3_buckets() {
191 s3_get |grep -o '<Name>[^>]*</Name>' |sed 's/<[^>]*>//g' |xmldec
192 return $?
193 }
194
195 # this will only return the first thousand entries, alas
196 # Mabye some kind soul can fix this without writing an XML parser in bash?
197 # Also need to add xml entity handling.
198 s3_list() {
199 # $1 = bucket to list
200 [ "x${1}" == "x" ] && return 1
201 s3_get "${1}" |grep -o '<Key>[^>]*</Key>' |sed 's/<[^>]*>//g'| xmldec
202 return $?
203 }
204
205 s3_delete() {
206 # $1 = bucket to delete from
207 # $2 = item to delete
208 s3_curl DELETE "${1}" "${2}"
209 return $?
210 }
211
212 # because this uses s3_list, it suffers from the same flaws.
213 s3_rmrf() {
214 # $1 = bucket to delete everything from
215 s3_list "${1}" | while read f; do
216 s3_delete "${1}" "${f}";
217 done
218 }
219
220 check_deps
221 case $1 in
222 put) shift; s3_put "$@" ;;
223 get) shift; s3_get "$@" ;;
224 rm) shift; s3_delete "$@" ;;
225 ls) shift; s3_list "$@" ;;
226 test) shift; s3_test "$@" ;;
227 buckets) s3_buckets ;;
228 rmrf) shift; s3_rmrf "$@" ;;
229 *) die "Unknown command ${1}."
230 ;;
231 esac
232