59b5ff595e9299266699872e16724d0e80a8df32
[clinton/thingy_grabber.git] / thingy_grabber.py
1 #!/usr/bin/env python3
2 """
3 Thingiverse bulk downloader
4 """
5
6 import re
7 import sys
8 import os
9 import argparse
10 import unicodedata
11 import requests
12 import logging
13 import multiprocessing
14 import enum
15 import datetime
16 from shutil import copyfile
17 from dataclasses import dataclass
18 import atexit
19 import py7zr
20 import glob
21 import shutil
22
23 SEVENZIP_FILTERS = [{'id': py7zr.FILTER_LZMA2}]
24
25 # I don't think this is exported by datetime
26 DEFAULT_DATETIME_FORMAT = '%Y-%m-%d %H:%M:%S'
27 # Windows cannot handle : in filenames
28 SAFE_DATETIME_FORMAT = '%Y-%m-%d %H.%M.%S'
29
30 API_BASE="https://api.thingiverse.com"
31 ACCESS_QP="access_token={}"
32 PAGE_QP="page={}"
33 API_USER_DESIGNS = API_BASE + "/users/{}/things/"
34 API_USER_COLLECTIONS = API_BASE + "/users/{}/collections/all?" + ACCESS_QP
35
36 # Currently useless as it gives the same info as the matching element in API_USER_COLLECTIONS
37 API_COLLECTION = API_BASE + "/collections/{}/?" + ACCESS_QP
38 API_COLLECTION_THINGS = API_BASE + "/collections/{}/things/?" + ACCESS_QP
39
40 API_THING_DETAILS = API_BASE + "/things/{}/?" + ACCESS_QP
41 API_THING_FILES = API_BASE + "/things/{}/files/?" + ACCESS_QP
42 API_THING_IMAGES = API_BASE + "/things/{}/images/?" + ACCESS_QP
43 API_THING_DOWNLOAD = "/download/?" + ACCESS_QP
44
45 API_KEY = None
46
47 DOWNLOADER_COUNT = 1
48 RETRY_COUNT = 3
49
50 MAX_PATH_LENGTH = 250
51
52 VERSION = "0.10.2"
53
54 TIMESTAMP_FILE = "timestamp.txt"
55
56 SESSION = requests.Session()
57
58 @dataclass
59 class ThingLink:
60 thing_id: str
61 name: str
62 api_link: str
63
64 @dataclass
65 class FileLink:
66 name: str
67 last_update: datetime.datetime
68 link: str
69
70 @dataclass
71 class ImageLink:
72 name: str
73 link: str
74
75 class FileLinks:
76 def __init__(self, initial_links=[]):
77 self.links = []
78 self.last_update = None
79 for link in initial_links:
80 self.append(link)
81
82 def __iter__(self):
83 return iter(self.links)
84
85 def __getitem__(self, item):
86 return self.links[item]
87
88 def __len__(self):
89 return len(self.links)
90
91 def append(self, link):
92 try:
93 self.last_update = max(self.last_update, link.last_update)
94 except TypeError:
95 self.last_update = link.last_update
96 self.links.append(link)
97
98
99 class State(enum.Enum):
100 OK = enum.auto()
101 FAILED = enum.auto()
102 ALREADY_DOWNLOADED = enum.auto()
103
104 def sanitise_url(url):
105 """ remove api keys from an url
106 """
107 return re.sub(r'access_token=\w*',
108 'access_token=***',
109 url)
110
111 def strip_time(date_obj):
112 """ Takes a datetime object and returns another with the time set to 00:00
113 """
114 return datetime.datetime.combine(date_obj.date(), datetime.time())
115
116 def rename_unique(dir_name, target_dir_name):
117 """ Move a directory sideways to a new name, ensuring it is unique.
118 """
119 target_dir = target_dir_name
120 inc = 0
121 while os.path.exists(target_dir):
122 target_dir = "{}_{}".format(target_dir_name, inc)
123 inc += 1
124 os.rename(dir_name, target_dir)
125 return target_dir
126
127
128 def fail_dir(dir_name):
129 """ When a download has failed, move it sideways.
130 """
131 return rename_unique(dir_name,"{}_failed".format(dir_name))
132
133
134 def truncate_name(file_name):
135 """ Ensure the filename is not too long for, well windows basically.
136 """
137 path = os.path.abspath(file_name)
138 if len(path) <= MAX_PATH_LENGTH:
139 return path
140 to_cut = len(path) - (MAX_PATH_LENGTH + 3)
141 base, extension = os.path.splitext(path)
142 inc = 0
143 new_path = "{}_{}{}".format(base, inc, extension)
144 while os.path.exists(new_path):
145 new_path = "{}_{}{}".format(base, inc, extension)
146 inc += 1
147 return new_path
148
149
150 def strip_ws(value):
151 """ Remove whitespace from a string """
152 return str(NO_WHITESPACE_REGEX.sub('-', value))
153
154
155 def slugify(value):
156 """
157 Normalise string, removes invalid for filename charactersr
158 and converts string to lowercase.
159 """
160 logging.debug("Sluggyfying {}".format(value))
161 value = unicodedata.normalize('NFKC', value).lower().strip()
162 value = re.sub(r'[\\/<>:\?\*\|"]', '', value)
163 value = re.sub(r'\.*$', '', value)
164 return value
165
166
167 class Downloader(multiprocessing.Process):
168 """
169 Class to handle downloading the things we have found to get.
170 """
171
172 def __init__(self, thing_queue, download_directory, compress):
173 multiprocessing.Process.__init__(self)
174 # TODO: add parameters
175 self.thing_queue = thing_queue
176 self.download_directory = download_directory
177 self.compress = compress
178
179 def run(self):
180 """ actual download loop.
181 """
182 while True:
183 thing_id = self.thing_queue.get()
184 if thing_id is None:
185 logging.info("Shutting download queue")
186 self.thing_queue.task_done()
187 break
188 logging.info("Handling id {}".format(thing_id))
189 Thing(thing_id).download(self.download_directory, self.compress)
190 self.thing_queue.task_done()
191 return
192
193
194
195
196
197 class Grouping:
198 """ Holds details of a group of things for download
199 This is effectively (although not actually) an abstract class
200 - use Collection or Designs instead.
201 """
202
203 def __init__(self, quick, compress):
204 self.things = []
205 self.total = 0
206 self.req_id = None
207 self.last_page = 0
208 self.per_page = None
209 # Should we stop downloading when we hit a known datestamp?
210 self.quick = quick
211 self.compress = compress
212 # These should be set by child classes.
213 self.url = None
214 self.download_dir = None
215
216 def get(self):
217 """ retrieve the things of the grouping. """
218 if self.things:
219 # We've already done it.
220 return self.things
221
222 # Check for initialisation:
223 if not self.url:
224 logging.error("No URL set - object not initialised properly?")
225 raise ValueError("No URL set - object not initialised properly?")
226
227 # Get the internal details of the grouping.
228 logging.debug("Querying {}".format(sanitise_url(self.url)))
229 page = 0
230 # TODO:: Must be a way to refactor this cleanly
231 if self.paginated:
232 # Slightly nasty, but afaik python lacks a clean way to do partial string formatting.
233 page_url = self.url + "?" + ACCESS_QP + "&" + PAGE_QP
234 while True:
235 page += 1
236 current_url = page_url.format(API_KEY, page)
237 logging.info("requesting:{}".format(sanitise_url(current_url)))
238 current_req = SESSION.get(current_url)
239 if current_req.status_code != 200:
240 logging.error("Got unexpected code {} from url {}: {}".format(current_req.status_code, sanitise_url(current_url), current_req.text))
241 break
242 current_json = current_req.json()
243 if not current_json:
244 # No more!
245 break
246 for thing in current_json:
247 self.things.append(ThingLink(thing['id'], thing['name'], thing['url']))
248 else:
249 # self.url should already have been formatted as we don't need pagination
250 logging.info("requesting:{}".format(sanitise_url(self.url)))
251 current_req = SESSION.get(self.url)
252 if current_req.status_code != 200:
253 logging.error("Got unexpected code {} from url {}: {}".format(current_req.status_code, sanitise_url(current_url), current_req.text))
254 else:
255 current_json = current_req.json()
256 for thing in current_json:
257 logging.info(thing)
258 self.things.append(ThingLink(thing['id'], thing['name'], thing['url']))
259 logging.info("Found {} things.".format(len(self.things)))
260 return self.things
261
262 def download(self):
263 """ Downloads all the files in a collection """
264 if not self.things:
265 self.get()
266
267 if not self.download_dir:
268 raise ValueError(
269 "No download_dir set - invalidly initialised object?")
270
271 base_dir = os.getcwd()
272 try:
273 os.mkdir(self.download_dir)
274 except FileExistsError:
275 logging.info("Target directory {} already exists. Assuming a resume."
276 .format(self.download_dir))
277 logging.info("Downloading {} thing(s).".format(self.total))
278 for idx, thing in enumerate(self.things):
279 logging.info("Downloading thing {} - {}".format(idx, thing))
280 RC = Thing(thing).download(self.download_dir, self.compress)
281 if self.quick and RC==State.ALREADY_DOWNLOADED:
282 logging.info("Caught up, stopping.")
283 return
284
285 class Collection(Grouping):
286 """ Holds details of a collection. """
287
288 def __init__(self, user, name, directory, quick, compress):
289 Grouping.__init__(self, quick, compress)
290 self.user = user
291 self.name = name
292 self.paginated = False
293 # need to figure out the the ID for the collection
294 collection_url = API_USER_COLLECTIONS.format(user, API_KEY)
295 try:
296 current_req = SESSION.get(collection_url)
297 except requests.exceptions.ConnectionError as error:
298 logging.error("Unable to connect for thing {}: {}".format(
299 self.thing_id, error))
300 return
301 if current_req.status_code != 200:
302 logging.error("Got unexpected code {} from url {}: {}".format(current_req.status_code, sanitise_url(collection_url), current_req.text))
303 return
304 collection_list = current_req.json()
305 try:
306 # case insensitive to retain parity with previous behaviour
307 collection = [x for x in collection_list if x['name'].casefold() == name.casefold()][0]
308 except IndexError:
309 logging.error("Unable to find collection {} for user {}".format(name, user))
310 return
311 self.collection_id = collection['id']
312 self.url = API_COLLECTION_THINGS.format(self.collection_id, API_KEY)
313
314 self.download_dir = os.path.join(directory,
315 "{}-{}".format(slugify(self.user), slugify(self.name)))
316
317
318 class Designs(Grouping):
319 """ Holds details of all of a users' designs. """
320
321 def __init__(self, user, directory, quick, compress):
322 Grouping.__init__(self, quick, compress)
323 self.user = user
324 self.url = API_USER_DESIGNS.format(user)
325 self.paginated = True
326 self.download_dir = os.path.join(
327 directory, "{} designs".format(slugify(self.user)))
328
329
330 class Thing:
331 """ An individual design on thingiverse. """
332
333 def __init__(self, thing_link):
334 self.thing_id = thing_link.thing_id
335 self.name = thing_link.name
336 self.api_link = thing_link.api_link
337 self.last_time = None
338 self._parsed = False
339 self._needs_download = True
340 self.text = None
341 self.download_dir = None
342 self.time_stamp = None
343 self._file_links = FileLinks()
344 self._image_links = []
345
346 def _parse(self, base_dir):
347 """ Work out what, if anything needs to be done. """
348 if self._parsed:
349 return
350
351
352 # First get the broad details
353 url = API_THING_DETAILS.format(self.thing_id, API_KEY)
354 try:
355 current_req = SESSION.get(url)
356 except requests.exceptions.ConnectionError as error:
357 logging.error("Unable to connect for thing {}: {}".format(
358 self.thing_id, error))
359 return
360 # Check for DMCA
361 if current_req.status_code == 403:
362 logging.error("Access to thing {} is forbidden".format(self.thing_id))
363 return
364 if current_req.status_code != 200:
365 logging.error("Got unexpected code {} from url {}: {}".format(current_req.status_code, sanitise_url(url), current_req.text))
366 return
367
368 thing_json = current_req.json()
369 try:
370 self._license = thing_json['license']
371 except KeyError:
372 logging.warning("No license found for thing {}?".format(self.thing_id))
373
374 # TODO: Get non-html version of this?
375 try:
376 self._details = thing_json['details']
377 except KeyError:
378 logging.warning("No description found for thing {}?".format(self.thing_id))
379
380
381
382 # Now get the file details
383 file_url = API_THING_FILES.format(self.thing_id, API_KEY)
384
385 try:
386 current_req = SESSION.get(file_url)
387 except requests.exceptions.ConnectionError as error:
388 logging.error("Unable to connect for thing {}: {}".format(
389 self.thing_id, error))
390 return
391
392 if current_req.status_code != 200:
393 logging.error("Unexpected status code {} for {}: {}".format(current_req.status_code, sanitise_url(file_url), current_req.text))
394 return
395
396 link_list = current_req.json()
397
398 if not link_list:
399 logging.error("No files found for thing {} - probably thingiverse being broken, try again later".format(self.thing_id))
400
401 for link in link_list:
402 logging.debug("Parsing link: {}".format(sanitise_url(link['url'])))
403 try:
404 datestamp = datetime.datetime.strptime(link['date'], DEFAULT_DATETIME_FORMAT)
405 self._file_links.append(FileLink(link['name'], datestamp, link['url'] + API_THING_DOWNLOAD.format(API_KEY)))
406 except ValueError:
407 logging.error(link['date'])
408
409 # Finally get the image links
410 image_url = API_THING_IMAGES.format(self.thing_id, API_KEY)
411
412 try:
413 current_req = SESSION.get(image_url)
414 except requests.exceptions.ConnectionError as error:
415 logging.error("Unable to connect for thing {}: {}".format(
416 self.thing_id, error))
417 return
418
419 if current_req.status_code != 200:
420 logging.error("Unexpected status code {} for {}: {}".format(current_req.status_code, sanitise_url(image_url), current_req.text))
421 return
422
423 image_list = current_req.json()
424
425 if not image_list:
426 logging.warning("No images found for thing {} - probably thingiverse being iffy as this seems unlikely".format(self.thing_id))
427
428 for image in image_list:
429 logging.debug("parsing image: {}".format(image))
430 try:
431 name = slugify(image['name'])
432 # TODO: fallback to other types
433 url = [x for x in image['sizes'] if x['type']=='display' and x['size']=='large'][0]['url']
434 except KeyError:
435 logging.warning("Missing image for {}".format(name))
436 self._image_links.append(ImageLink(name, url))
437
438 self.slug = "{} - {}".format(self.thing_id, slugify(self.name))
439 self.download_dir = os.path.join(base_dir, self.slug)
440
441 self._handle_old_directory(base_dir)
442
443 logging.debug("Parsing {} ({})".format(self.thing_id, self.name))
444 latest, self.last_time = self._find_last_download(base_dir)
445
446 if not latest:
447 # Not yet downloaded
448 self._parsed = True
449 return
450
451
452 logging.info("last downloaded version: {}".format(self.last_time))
453
454 # OK, so we have a timestamp, lets see if there is anything new to get
455 # First off, are we comparing an old download that threw away the timestamp?
456 ignore_time = self.last_time == strip_time(self.last_time)
457 try:
458 # TODO: Allow for comparison at the exact time
459 files_last_update = self._file_links.last_update
460 if ignore_time:
461 logging.info("Dropping time from comparison stamp as old-style download dir")
462 files_last_update = strip_time(files_last_update)
463
464
465 if files_last_update > self.last_time:
466 logging.info(
467 "Found new/updated files {}".format(self._file_links.last_update))
468 self._needs_download = True
469 self._parsed = True
470 return
471 except TypeError:
472 logging.warning("No files found for {}.".format(self.thing_id))
473
474 # Got here, so nope, no new files.
475 self._needs_download = False
476 self._parsed = True
477
478 def _handle_old_directory(self, base_dir):
479 """ Deal with any old directories from previous versions of the code.
480 """
481 old_dir = os.path.join(base_dir, slugify(self.name))
482 if os.path.exists(old_dir):
483 logging.warning("Found old style download_dir. Moving.")
484 rename_unique(old_dir, self.download_dir)
485
486 def _handle_outdated_directory(self, base_dir):
487 """ Move the current download directory sideways if the thing has changed.
488 """
489 if not os.path.exists(self.download_dir):
490 # No old directory to move.
491 return None
492 timestamp_file = os.path.join(self.download_dir, TIMESTAMP_FILE)
493 if not os.path.exists(timestamp_file):
494 # Old form of download directory
495 target_dir_name = "{} - old".format(self.download_dir)
496 else:
497 target_dir_name = "{} - {}".format(self.download_dir, self.last_time.strftime(SAFE_DATETIME_FORMAT))
498 return rename_unique(self.download_dir, target_dir_name)
499
500 def _find_last_download(self, base_dir):
501 """ Look for the most recent previous download (if any) of the thing.
502 """
503 logging.info("Looking for old things")
504
505 # First the DL directory itself.
506 timestamp_file = os.path.join(self.download_dir, TIMESTAMP_FILE)
507
508 latest = None
509 latest_time = None
510
511 try:
512 logging.debug("Checking for existing download in normal place.")
513 with open(timestamp_file) as ts_fh:
514 timestamp_text = ts_fh.read().strip()
515 latest_time = datetime.datetime.strptime(timestamp_text, DEFAULT_DATETIME_FORMAT)
516 latest = self.download_dir
517 except FileNotFoundError:
518 # No existing download directory. huh.
519 pass
520 except TypeError:
521 logging.warning("Invalid timestamp file found in {}".format(self.download_dir))
522
523 # TODO: Maybe look for old download directories.
524
525
526 # Now look for 7z files
527 candidates = glob.glob(os.path.join(base_dir, "{}*.7z".format(self.thing_id)))
528 # +3 to allow for ' - '
529 leading_length =len(self.slug)+3
530 for path in candidates:
531 candidate = os.path.basename(path)
532 try:
533 logging.debug("Examining '{}' - '{}'".format(candidate, candidate[leading_length:-3]))
534 candidate_time = datetime.datetime.strptime(candidate[leading_length:-3], SAFE_DATETIME_FORMAT)
535 except ValueError:
536 logging.warning("There was an error finding the date in {}. Ignoring.".format(candidate))
537 continue
538 try:
539 if candidate_time > latest_time:
540 latest_time = candidate_time
541 latest = candidate
542 except TypeError:
543 latest_time = candidate_time
544 latest = candidate
545 logging.info("Found last old thing: {} / {}".format(latest,latest_time))
546 return (latest, latest_time)
547
548
549
550 def download(self, base_dir, compress):
551 """ Download all files for a given thing.
552 Returns True iff the thing is now downloaded (not iff it downloads the thing!)
553 """
554 if not self._parsed:
555 self._parse(base_dir)
556
557 if not self._parsed:
558 logging.error(
559 "Unable to parse {} - aborting download".format(self.thing_id))
560 return State.FAILED
561
562 if not self._needs_download:
563 logging.info("{} - {} already downloaded - skipping.".format(self.thing_id, self.name))
564 return State.ALREADY_DOWNLOADED
565
566 if not self._file_links:
567 logging.error("{} - {} appears to have no files. Thingiverse acting up again?".format(self.thing_id, self.name))
568 return State.FAILED
569
570 # Have we already downloaded some things?
571 renamed_dir = self._handle_outdated_directory(base_dir)
572
573 # Get the list of files to download
574
575 new_file_links = []
576 old_file_links = []
577 self.time_stamp = None
578
579 if not self.last_time:
580 # If we don't have anything to copy from, then it is all new.
581 logging.debug("No last time, downloading all files")
582 new_file_links = self._file_links
583 self.time_stamp = new_file_links[0].last_update
584
585 for file_link in new_file_links:
586 self.time_stamp = max(self.time_stamp, file_link.last_update)
587 logging.debug("New timestamp will be {}".format(self.time_stamp))
588 else:
589 self.time_stamp = self.last_time
590 for file_link in self._file_links:
591 if file_link.last_update > self.last_time:
592 new_file_links.append(file_link)
593 self.time_stamp = max(self.time_stamp, file_link.last_update)
594 else:
595 old_file_links.append(file_link)
596
597 logging.debug("new timestamp {}".format(self.time_stamp))
598
599 # OK. Time to get to work.
600 logging.debug("Generating download_dir")
601 os.mkdir(self.download_dir)
602 filelist_file = os.path.join(self.download_dir, "filelist.txt")
603 with open(filelist_file, 'w', encoding="utf-8") as fl_handle:
604 for fl in self._file_links:
605 fl_handle.write("{},{},{}\n".format(fl.link, fl.name, fl.last_update))
606
607
608 # First grab the cached files (if any)
609 logging.info("Copying {} unchanged files.".format(len(old_file_links)))
610 if renamed_dir:
611 for file_link in old_file_links:
612 try:
613 old_file = os.path.join(renamed_dir, file_link.name)
614 new_file = truncate_name(os.path.join(self.download_dir, file_link.name))
615 logging.debug("Copying {} to {}".format(old_file, new_file))
616 copyfile(old_file, new_file)
617 except FileNotFoundError:
618 logging.warning(
619 "Unable to find {} in old archive, redownloading".format(file_link.name))
620 new_file_links.append(file_link)
621 except TypeError:
622 # Not altogether sure how this could occur, possibly with some combination of the old file types
623 logging.warning(
624 "Typeerror looking for {} in {}".format(file_link.name, renamed_dir))
625 new_file_links.append(file_link)
626
627
628 # Now download the new ones
629 logging.info("Downloading {} new files of {}".format(
630 len(new_file_links), len(self._file_links)))
631 try:
632 for file_link in new_file_links:
633 file_name = truncate_name(os.path.join(self.download_dir, file_link.name))
634 logging.debug("Downloading {} from {} to {}".format(
635 file_link.name, file_link.link, file_name))
636 data_req = SESSION.get(file_link.link)
637 if data_req.status_code != 200:
638 logging.error("Unexpected status code {} for {}: {}".format(data_req.status_code, sanitise_url(file_link.link), data_req.text))
639 fail_dir(self.download_dir)
640 return State.FAILED
641
642
643 with open(file_name, 'wb') as handle:
644 handle.write(data_req.content)
645 except Exception as exception:
646 logging.error("Failed to download {} - {}".format(file_link.name, exception))
647 fail_dir(self.download_dir)
648 return State.FAILED
649
650
651 # People like images.
652 image_dir = os.path.join(self.download_dir, 'images')
653 logging.info("Downloading {} images.".format(len(self._image_links)))
654 try:
655 os.mkdir(image_dir)
656 for imagelink in self._image_links:
657 filename = os.path.join(image_dir, imagelink.name)
658 image_req = SESSION.get(imagelink.link)
659 if image_req.status_code != 200:
660 logging.error("Unexpected status code {} for {}: {}".format(image_req.status_code, sanitise_url(file_link.link), image_req.text))
661 fail_dir(self.download_dir)
662 return State.FAILED
663 with open(truncate_name(filename), 'wb') as handle:
664 handle.write(image_req.content)
665 except Exception as exception:
666 logging.error("Failed to download {} - {}".format(imagelink.name, exception))
667 fail_dir(self.download_dir)
668 return State.FAILED
669
670 # Best get some licenses
671 logging.info("writing license file")
672 try:
673 if self._license:
674 with open(truncate_name(os.path.join(self.download_dir, 'license.txt')), 'w', encoding="utf-8") as license_handle:
675 license_handle.write("{}\n".format(self._license))
676 except IOError as exception:
677 logging.warning("Failed to write license! {}".format(exception))
678
679 logging.info("writing readme")
680 try:
681 if self._details:
682 with open(truncate_name(os.path.join(self.download_dir, 'readme.txt')), 'w', encoding="utf-8") as readme_handle:
683 readme_handle.write("{}\n".format(self._details))
684 except IOError as exception:
685 logging.warning("Failed to write readme! {}".format(exception))
686
687 try:
688 # Now write the timestamp
689 with open(os.path.join(self.download_dir,TIMESTAMP_FILE), 'w', encoding="utf-8") as timestamp_handle:
690 timestamp_handle.write(self.time_stamp.__str__())
691 except Exception as exception:
692 logging.error("Failed to write timestamp file - {}".format(exception))
693 fail_dir(self.download_dir)
694 return State.FAILED
695 self._needs_download = False
696 logging.debug("Download of {} finished".format(self.name))
697 if not compress:
698 return State.OK
699
700
701 thing_dir = "{} - {} - {}".format(self.thing_id,
702 slugify(self.name),
703 self.time_stamp.strftime(SAFE_DATETIME_FORMAT))
704 file_name = os.path.join(base_dir,
705 "{}.7z".format(thing_dir))
706 logging.debug("Compressing {} to {}".format(
707 self.name,
708 file_name))
709 with py7zr.SevenZipFile(file_name, 'w', filters=SEVENZIP_FILTERS) as archive:
710 archive.writeall(self.download_dir, thing_dir)
711 logging.debug("Compression of {} finished.".format(self.name))
712 shutil.rmtree(self.download_dir)
713 logging.debug("Removed temporary download dir of {}.".format(self.name))
714 return State.OK
715
716
717
718
719 def do_batch(batch_file, download_dir, quick, compress):
720 """ Read a file in line by line, parsing each as a set of calls to this script."""
721 with open(batch_file) as handle:
722 for line in handle:
723 line = line.strip()
724 if not line:
725 # Skip empty lines
726 continue
727 logging.info("Handling instruction {}".format(line))
728 command_arr = line.split()
729 if command_arr[0] == "thing":
730 logging.debug(
731 "Handling batch thing instruction: {}".format(line))
732 Thing(command_arr[1]).download(download_dir, compress)
733 continue
734 if command_arr[0] == "collection":
735 logging.debug(
736 "Handling batch collection instruction: {}".format(line))
737 Collection(command_arr[1], command_arr[2],
738 download_dir, quick, compress).download()
739 continue
740 if command_arr[0] == "user":
741 logging.debug(
742 "Handling batch collection instruction: {}".format(line))
743 Designs(command_arr[1], download_dir, quick, compress).download()
744 continue
745 logging.warning("Unable to parse current instruction. Skipping.")
746
747
748 def main():
749 """ Entry point for script being run as a command. """
750 parser = argparse.ArgumentParser()
751 parser.add_argument("-l", "--log-level", choices=[
752 'debug', 'info', 'warning'], default='info', help="level of logging desired")
753 parser.add_argument("-d", "--directory",
754 help="Target directory to download into")
755 parser.add_argument("-f", "--log-file",
756 help="Place to log debug information to")
757 parser.add_argument("-q", "--quick", action="store_true",
758 help="Assume date ordering on posts")
759 parser.add_argument("-c", "--compress", action="store_true",
760 help="Compress files")
761 parser.add_argument("-a", "--api-key",
762 help="API key for thingiverse")
763
764
765 subparsers = parser.add_subparsers(
766 help="Type of thing to download", dest="subcommand")
767 collection_parser = subparsers.add_parser(
768 'collection', help="Download one or more entire collection(s)")
769 collection_parser.add_argument(
770 "owner", help="The owner of the collection(s) to get")
771 collection_parser.add_argument(
772 "collections", nargs="+", help="Space seperated list of the name(s) of collection to get")
773 thing_parser = subparsers.add_parser(
774 'thing', help="Download a single thing.")
775 thing_parser.add_argument(
776 "things", nargs="*", help="Space seperated list of thing ID(s) to download")
777 user_parser = subparsers.add_parser(
778 "user", help="Download all things by one or more users")
779 user_parser.add_argument(
780 "users", nargs="+", help="A space seperated list of the user(s) to get the designs of")
781 batch_parser = subparsers.add_parser(
782 "batch", help="Perform multiple actions written in a text file")
783 batch_parser.add_argument(
784 "batch_file", help="The name of the file to read.")
785 subparsers.add_parser("version", help="Show the current version")
786
787 args = parser.parse_args()
788 if not args.subcommand:
789 parser.print_help()
790 sys.exit(1)
791 if not args.directory:
792 args.directory = os.getcwd()
793
794 logger = logging.getLogger()
795 formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
796 logger.setLevel(logging.DEBUG)
797 console_handler = logging.StreamHandler()
798 console_handler.setLevel(args.log_level.upper())
799
800 global API_KEY
801 if args.api_key:
802 API_KEY=args.api_key
803 else:
804 try:
805 with open("api.key") as fh:
806 API_KEY=fh.read().strip()
807 except Exception as e:
808 logging.error("Either specify the api-key on the command line or in a file called 'api.key'")
809 logging.error("Exception: {}".format(e))
810 return
811
812 logger.addHandler(console_handler)
813 if args.log_file:
814 file_handler = logging.FileHandler(args.log_file)
815 file_handler.setLevel(logging.DEBUG)
816 file_handler.setFormatter(formatter)
817 logger.addHandler(file_handler)
818
819
820 # Start downloader
821 thing_queue = multiprocessing.JoinableQueue()
822 logging.debug("starting {} downloader(s)".format(DOWNLOADER_COUNT))
823 downloaders = [Downloader(thing_queue, args.directory, args.compress) for _ in range(DOWNLOADER_COUNT)]
824 for downloader in downloaders:
825 downloader.start()
826
827
828 if args.subcommand.startswith("collection"):
829 for collection in args.collections:
830 Collection(args.owner, collection, args.directory, args.quick, args.compress).download()
831 if args.subcommand == "thing":
832 for thing in args.things:
833 thing_queue.put(thing)
834 if args.subcommand == "user":
835 for user in args.users:
836 Designs(user, args.directory, args.quick, args.compress).download()
837 if args.subcommand == "version":
838 print("thingy_grabber.py version {}".format(VERSION))
839 if args.subcommand == "batch":
840 do_batch(args.batch_file, args.directory, args.quick, args.compress)
841
842 # Stop the downloader processes
843 for downloader in downloaders:
844 thing_queue.put(None)
845
846
847 if __name__ == "__main__":
848 multiprocessing.freeze_support()
849 main()