3 Thingiverse bulk downloader
12 from shutil
import copyfile
13 from bs4
import BeautifulSoup
15 URL_BASE
= "https://www.thingiverse.com"
16 URL_COLLECTION
= URL_BASE
+ "/ajax/thingcollection/list_collected_things"
17 USER_COLLECTION
= URL_BASE
+ "/ajax/user/designs"
19 ID_REGEX
= re
.compile(r
'"id":(\d*),')
20 TOTAL_REGEX
= re
.compile(r
'"total":(\d*),')
21 LAST_PAGE_REGEX
= re
.compile(r
'"last_page":(\d*),')
22 # This appears to be fixed at 12, but if it changes would screw the rest up.
23 PER_PAGE_REGEX
= re
.compile(r
'"per_page":(\d*),')
24 NO_WHITESPACE_REGEX
= re
.compile(r
'[-\s]+')
31 """ Remove whitespace from a string """
32 return str(NO_WHITESPACE_REGEX
.sub('-', value
))
36 Normalizes string, converts to lowercase, removes non-alpha characters,
37 and converts spaces to hyphens.
39 value
= unicodedata
.normalize('NFKD', value
).encode('ascii', 'ignore').decode()
40 value
= str(re
.sub(r
'[^\w\s-]', '', value
).strip())
41 value
= str(NO_WHITESPACE_REGEX
.sub('-', value
))
42 #value = str(re.sub(r'[-\s]+', '-', value))
46 """ Holds details of a group of things for download
47 This is effectively (although not actually) an abstract class
48 - use Collection or Designs instead.
56 # These should be set by child classes.
58 self
.download_dir
= None
59 self
.collection_url
= None
61 def _get_small_grouping(self
, req
):
62 """ Handle small groupings """
63 soup
= BeautifulSoup(req
.text
, features
='lxml')
64 links
= soup
.find_all('a', {'class':'card-img-holder'})
65 self
.things
= [x
['href'].split(':')[1] for x
in links
]
70 """ retrieve the things of the grouping. """
72 # We've already done it.
75 # Check for initialisation:
77 print("No URL set - object not initialised properly?")
78 raise ValueError("No URL set - object not initialised properly?")
80 # Get the internal details of the grouping.
82 print("Querying {}".format(self
.url
))
83 c_req
= requests
.get(self
.url
)
84 total
= TOTAL_REGEX
.search(c_req
.text
)
86 # This is a small (<13) items grouping. Pull the list from this req.
87 return self
._get
_small
_grouping
(c_req
)
88 self
.total
= total
.groups()[0]
89 self
.req_id
= ID_REGEX
.search(c_req
.text
).groups()[0]
90 self
.last_page
= int(LAST_PAGE_REGEX
.search(c_req
.text
).groups()[0])
91 self
.per_page
= PER_PAGE_REGEX
.search(c_req
.text
).groups()[0]
98 for current_page
in range(1, self
.last_page
+ 1):
99 parameters
['page'] = current_page
100 req
= requests
.post(self
.collection_url
, parameters
)
101 soup
= BeautifulSoup(req
.text
, features
='lxml')
102 links
= soup
.find_all('a', {'class':'card-img-holder'})
103 self
.things
+= [x
['href'].split(':')[1] for x
in links
]
108 """ Downloads all the files in a collection """
112 if not self
.download_dir
:
113 raise ValueError("No download_dir set - invalidly initialised object?")
115 base_dir
= os
.getcwd()
117 os
.mkdir(self
.download_dir
)
118 except FileExistsError
:
119 print("Target directory {} already exists. Assuming a resume."
120 .format(self
.download_dir
))
122 print("Downloading {} things.".format(self
.total
))
123 for thing
in self
.things
:
124 Thing(thing
).download(self
.download_dir
)
126 class Collection(Grouping
):
127 """ Holds details of a collection. """
128 def __init__(self
, user
, name
, directory
):
129 Grouping
.__init
__(self
)
132 self
.url
= "{}/{}/collections/{}".format(
133 URL_BASE
, self
.user
, strip_ws(self
.name
))
134 self
.download_dir
= os
.path
.join(directory
,
135 "{}-{}".format(slugify(self
.user
), slugify(self
.name
)))
136 self
.collection_url
= URL_COLLECTION
138 class Designs(Grouping
):
139 """ Holds details of all of a users' designs. """
140 def __init__(self
, user
, directory
):
141 Grouping
.__init
__(self
)
143 self
.url
= "{}/{}/designs".format(URL_BASE
, self
.user
)
144 self
.download_dir
= os
.path
.join(directory
, "{} designs".format(slugify(self
.user
)))
145 self
.collection_url
= USER_COLLECTION
148 """ An individual design on thingiverse. """
149 def __init__(self
, thing_id
):
150 self
.thing_id
= thing_id
151 self
.last_time
= None
153 self
._needs
_download
= True
156 self
.download_dir
= None
158 def _parse(self
, base_dir
):
159 """ Work out what, if anything needs to be done. """
163 url
= "{}/thing:{}/files".format(URL_BASE
, self
.thing_id
)
164 req
= requests
.get(url
)
166 soup
= BeautifulSoup(self
.text
, features
='lxml')
168 self
.title
= slugify(soup
.find_all('h1')[0].text
.strip())
169 self
.download_dir
= os
.path
.join(base_dir
, self
.title
)
171 if not os
.path
.exists(self
.download_dir
):
176 timestamp_file
= os
.path
.join(self
.download_dir
, 'timestamp.txt')
177 if not os
.path
.exists(timestamp_file
):
178 # Old download from before
180 print("Old-style download directory found. Assuming update required.")
185 with
open(timestamp_file
, 'r') as timestamp_handle
:
186 self
.last_time
= timestamp_handle
.readlines()[0]
188 print("last downloaded version: {}".format(self
.last_time
))
189 except FileNotFoundError
:
190 # Not run on this thing before.
192 print("Old-style download directory found. Assuming update required.")
193 self
.last_time
= None
197 # OK, so we have a timestamp, lets see if there is anything new to get
198 file_links
= soup
.find_all('a', {'class':'file-download'})
199 for file_link
in file_links
:
200 timestamp
= file_link
.find_all('time')[0]['datetime']
202 print("Checking {} (updated {})".format(file_link
["title"], timestamp
))
203 if timestamp
> self
.last_time
:
204 print("Found new/updated file {}".format(file_link
["title"]))
205 self
._needs
_download
= True
208 # Got here, so nope, no new files.
209 print("Found no new files for {}".format(self
.title
))
210 self
._needs
_download
= False
213 def download(self
, base_dir
):
214 """ Download all files for a given thing. """
216 self
._parse
(base_dir
)
218 if not self
._needs
_download
:
220 print("{} already downloaded - skipping.".format(self
.title
))
223 # Have we already downloaded some things?
224 timestamp_file
= os
.path
.join(self
.download_dir
, 'timestamp.txt')
226 if os
.path
.exists(self
.download_dir
):
227 if not os
.path
.exists(timestamp_file
):
228 # edge case: old style dir w/out timestamp.
229 print("Old style download dir found for {}".format(self
.title
))
230 os
.rename(self
.download_dir
, "{}_old".format(self
.download_dir
))
232 prev_dir
= "{}_{}".format(self
.download_dir
, self
.last_time
)
233 os
.rename(self
.download_dir
, prev_dir
)
235 # Get the list of files to download
236 soup
= BeautifulSoup(self
.text
, features
='lxml')
237 file_links
= soup
.find_all('a', {'class':'file-download'})
243 if not self
.last_time
:
244 # If we don't have anything to copy from, then it is all new.
245 new_file_links
= file_links
246 new_last_time
= file_links
[0].find_all('time')[0]['datetime']
247 for file_link
in file_links
:
248 timestamp
= file_link
.find_all('time')[0]['datetime']
250 print("Found file {} from {}".format(file_link
["title"], timestamp
))
251 if timestamp
> new_last_time
:
252 new_last_time
= timestamp
254 for file_link
in file_links
:
255 timestamp
= file_link
.find_all('time')[0]['datetime']
257 print("Checking {} (updated {})".format(file_link
["title"], timestamp
))
258 if timestamp
> self
.last_time
:
259 new_file_links
.append(file_link
)
261 old_file_links
.append(file_link
)
262 if not new_last_time
or timestamp
> new_last_time
:
263 new_last_time
= timestamp
266 print("new timestamp {}".format(new_last_time
))
268 # OK. Time to get to work.
269 os
.mkdir(self
.download_dir
)
270 # First grab the cached files (if any)
271 for file_link
in old_file_links
:
272 old_file
= os
.path
.join(prev_dir
, file_link
["title"])
273 new_file
= os
.path
.join(self
.download_dir
, file_link
["title"])
276 print("Copying {} to {}".format(old_file
, new_file
))
277 copyfile(old_file
, new_file
)
278 except FileNotFoundError
:
279 print("Unable to find {} in old archive, redownloading".format(file_link
["title"]))
280 new_file_links
.append(file_link
)
282 # Now download the new ones
283 files
= [("{}{}".format(URL_BASE
, x
['href']), x
["title"]) for x
in new_file_links
]
285 for url
, name
in files
:
286 file_name
= os
.path
.join(self
.download_dir
, name
)
288 print("Downloading {} from {} to {}".format(name
, url
, file_name
))
289 data_req
= requests
.get(url
)
290 with
open(file_name
, 'wb') as handle
:
291 handle
.write(data_req
.content
)
292 except Exception as exception
:
293 print("Failed to download {} - {}".format(name
, exception
))
294 os
.rename(self
.download_dir
, "{}_failed".format(self
.download_dir
))
298 # Now write the timestamp
299 with
open(timestamp_file
, 'w') as timestamp_handle
:
300 timestamp_handle
.write(new_last_time
)
301 except Exception as exception
:
302 print("Failed to write timestamp file - {}".format(exception
))
303 os
.rename(self
.download_dir
, "{}_failed".format(self
.download_dir
))
305 self
._needs
_download
= False
307 print("Download of {} finished".format(self
.title
))
310 """ Entry point for script being run as a command. """
311 parser
= argparse
.ArgumentParser()
312 parser
.add_argument("-v", "--verbose", help="Be more verbose", action
="store_true")
313 parser
.add_argument("-d", "--directory", help="Target directory to download into")
314 subparsers
= parser
.add_subparsers(help="Type of thing to download", dest
="subcommand")
315 collection_parser
= subparsers
.add_parser('collection', help="Download an entire collection")
316 collection_parser
.add_argument("owner", help="The owner of the collection to get")
317 collection_parser
.add_argument("collection", help="The name of the collection to get")
318 thing_parser
= subparsers
.add_parser('thing', help="Download a single thing.")
319 thing_parser
.add_argument("thing", help="Thing ID to download")
320 user_parser
= subparsers
.add_parser("user", help="Download all things by a user")
321 user_parser
.add_argument("user", help="The user to get the designs of")
322 version_parser
= subparsers
.add_parser("version", help="Show the current version")
324 args
= parser
.parse_args()
325 if not args
.subcommand
:
328 if not args
.directory
:
329 args
.directory
= os
.getcwd()
332 VERBOSE
= args
.verbose
333 if args
.subcommand
.startswith("collection"):
334 collection
= Collection(args
.owner
, args
.collection
, args
.directory
)
335 print(collection
.get())
336 collection
.download()
337 if args
.subcommand
== "thing":
338 Thing(args
.thing
).download(args
.directory
)
339 if args
.subcommand
== "user":
340 designs
= Designs(args
.user
, args
.directory
)
343 if args
.subcommand
== "version":
344 print("thingy_grabber.py version {}".format(VERSION
))
346 if __name__
== "__main__":