Rampantly initial checkin of progress on backup scripts.
[clinton/scripts.git] / s3
CommitLineData
a0d92e63 1#!/bin/bash
2# basic amazon s3 operations
3# Licensed under the terms of the GNU GPL v2
4# Copyright 2007 Victor Lowther <victor.lowther@gmail.com>
5
6
7
8# print a message and bail
9die() {
10 echo $*
11 exit 1
12}
13
14# check to see if the variable name passed exists and holds a value.
15# Die if it does not.
16check_or_die() {
17 [[ ${!1} ]] || die "Environment variable ${1} is not set."
18}
19
20# check to see if we have all the needed S3 variables defined.
21# Bail if we do not.
22check_s3() {
23 local sak x
24 for x in S3_ACCESS_KEY_ID S3_SECRET_ACCESS_KEY; do
25 check_or_die ${x};
26 done
27 [[ -f ${S3_SECRET_ACCESS_KEY} ]] || die "S3_SECRET_ACCESS_KEY must point to a file!"
28 sak="$(wc -c "${S3_SECRET_ACCESS_KEY}")"
29 (( ${sak%%[!0-9 ]*} == 40 )) || \
30 die "S3 Secret Access Key is not exactly 40 bytes long. Please fix it."
31}
32# check to see if our external dependencies exist
33check_dep() {
34 local res=0
35 while [[ $# -ne 0 ]]; do
36 which "${1}" >& /dev/null || { res=1; echo "${1} not found."; }
37 shift
38 done
39 (( res == 0 )) || die "aborting."
40}
41
42check_deps() {
43 check_dep openssl date hmac cat grep curl
44 check_s3
45}
46
47urlenc() {
48 # $1 = string to url encode
49 # output is on stdout
50 # we don't urlencode everything, just enough stuff.
51 echo -n "${1}" |
52 sed 's/%/%25/g
53 s/ /%20/g
54 s/#/%23/g
55 s/\$/%24/g
56 s/\&/%26/g
57 s/+/%2b/g
58 s/,/%2c/g
59 s/:/%3a/g
60 s/;/%3b/g
61 s/?/%3f/g
62 s/@/%40/g
63 s/ /%09/g'
64}
65
66xmldec() {
67 # no parameters.
68 # accept input on stdin, put it on stdout.
69 # patches accepted to get more stuff
70 sed 's/\&quot;/\"/g
71 s/\&amp;/\&/g
72 s/\&lt;/</g
73 s/\&gt;/>/g'
74}
75
76## basic S3 functionality. x-amz-header functionality is not implemented.
77# make an S3 signature string, which will be output on stdout.
78s3_signature_string() {
79 # $1 = HTTP verb
80 # $2 = date string, must be in UTC
81 # $3 = bucket name, if any
82 # $4 = resource path, if any
83 # $5 = content md5, if any
84 # $6 = content MIME type, if any
85 # $7 = canonicalized headers, if any
86 # signature string will be output on stdout
87 local verr="Must pass a verb to s3_signature_string!"
88 local verb="${1:?verr}"
89 local bucket="${3}"
90 local resource="${4}"
91 local derr="Must pass a date to s3_signature_string!"
92 local date="${2:?derr}"
93 local mime="${6}"
94 local md5="${5}"
95 local headers="${7}"
96 printf "%s\n%s\n%s\n%s\n%s%s%s" \
97 "${verb}" "${md5}" "${mime}" "${date}" \
98 "${headers}" "${bucket}" "${resource}" | \
99 hmac sha1 "${S3_SECRET_ACCESS_KEY}" | openssl base64 -e -a
100}
101
102# cheesy, but it is the best way to have multiple headers.
103curl_headers() {
104 # each arg passed will be output on its own line
105 local parms=$#
106 for ((;$#;)); do
107 echo "header = \"${1}\""
108 shift
109 done
110}
111
112s3_curl() {
113 # invoke curl to do all the heavy HTTP lifting
114 # $1 = method (one of GET, PUT, or DELETE. HEAD is not handled yet.)
115 # $2 = remote bucket.
116 # $3 = remote name
117 # $4 = local name.
118 local bucket remote date sig md5 arg inout headers
119 # header handling is kinda fugly, but it works.
120 bucket="${2:+/${2}}/" # slashify the bucket
121 remote="$(urlenc "${3}")" # if you don't, strange things may happen.
122 stdopts="--connect-timeout 10 --fail --silent"
123 [[ $CURL_S3_DEBUG == true ]] && stdopts="${stdopts} --show-error --fail"
124 case "${1}" in
125 GET) arg="-o" inout="${4:--}" # stdout if no $4
126 ;;
127 PUT) [[ ${2} ]] || die "PUT can has bucket?"
128 if [[ ! ${3} ]]; then
129 arg="-X PUT"
130 headers[${#headers[@]}]="Content-Length: 0"
131 elif [[ -f ${4} ]]; then
132 md5="$(openssl dgst -md5 -binary "${4}"|openssl base64 -e -a)"
133 arg="-T" inout="${4}"
134 headers[${#headers[@]}]="Expect: 100-continue"
135 else
136 die "Cannot write non-existing file ${4}"
137 fi
138 ;;
139 DELETE) arg="-X DELETE"
140 ;;
141 HEAD) arg="-I" ;;
142 *) die "Unknown verb ${1}. It probably would not have worked anyways." ;;
143 esac
144 date="$(TZ=UTC date '+%a, %e %b %Y %H:%M:%S %z')"
145 sig=$(s3_signature_string ${1} "${date}" "${bucket}" "${remote}" "${md5}")
146
147 headers[${#headers[@]}]="Authorization: AWS ${S3_ACCESS_KEY_ID}:${sig}"
148 headers[${#headers[@]}]="Date: ${date}"
149 [[ ${md5} ]] && headers[${#headers[@]}]="Content-MD5: ${md5}"
150 curl ${arg} "${inout}" ${stdopts} -K <(curl_headers "${headers[@]}") \
151 "http://s3.amazonaws.com${bucket}${remote}"
152 return $?
153}
154
155s3_put() {
156 # $1 = remote bucket to put it into
157 # $2 = remote name to put
158 # $3 = file to put. This must be present if $2 is.
159 s3_curl PUT "${1}" "${2}" "${3:-${2}}"
160 return $?
161}
162
163s3_get() {
164 # $1 = bucket to get file from
165 # $2 = remote file to get
166 # $3 = local file to get into. Will be overwritten if it exists.
167 # If this contains a path, that path must exist before calling this.
168 s3_curl GET "${1}" "${2}" "${3:-${2}}"
169 return $?
170}
171
172s3_test() {
173 # same args as s3_get, but uses the HEAD verb instead of the GET verb.
174 s3_curl HEAD "${1}" "${2}" >/dev/null
175 return $?
176}
177
178# Hideously ugly, but it works well enough.
179s3_buckets() {
180 s3_get |grep -o '<Name>[^>]*</Name>' |sed 's/<[^>]*>//g' |xmldec
181 return $?
182}
183
184# this will only return the first thousand entries, alas
185# Mabye some kind soul can fix this without writing an XML parser in bash?
186# Also need to add xml entity handling.
187s3_list() {
188 # $1 = bucket to list
189 [ "x${1}" == "x" ] && return 1
190 s3_get "${1}" |grep -o '<Key>[^>]*</Key>' |sed 's/<[^>]*>//g'| xmldec
191 return $?
192}
193
194s3_delete() {
195 # $1 = bucket to delete from
196 # $2 = item to delete
197 s3_curl DELETE "${1}" "${2}"
198 return $?
199}
200
201# because this uses s3_list, it suffers from the same flaws.
202s3_rmrf() {
203 # $1 = bucket to delete everything from
204 s3_list "${1}" | while read f; do
205 s3_delete "${1}" "${f}";
206 done
207}
208
209check_deps
210case $1 in
211 put) shift; s3_put "$@" ;;
212 get) shift; s3_get "$@" ;;
213 rm) shift; s3_delete "$@" ;;
214 ls) shift; s3_list "$@" ;;
215 test) shift; s3_test "$@" ;;
216 buckets) s3_buckets ;;
217 rmrf) shift; s3_rmrf "$@" ;;
218 *) die "Unknown command ${1}."
219 ;;
220esac
221