16f84dbd32f3382aa0636003d8e1827c9902ba61
[clinton/thingy_grabber.git] / thingy_grabber.py
1 #!/usr/bin/env python3
2 """
3 Thingiverse bulk downloader
4 """
5
6 import re
7 import sys
8 import os
9 import argparse
10 import unicodedata
11 import requests
12 import logging
13 import multiprocessing
14 import enum
15 import datetime
16 from shutil import copyfile
17 from dataclasses import dataclass
18 import atexit
19 import py7zr
20 import glob
21 import shutil
22
23 SEVENZIP_FILTERS = [{'id': py7zr.FILTER_LZMA2}]
24
25 # I don't think this is exported by datetime
26 DEFAULT_DATETIME_FORMAT = '%Y-%m-%d %H:%M:%S'
27 # Windows cannot handle : in filenames
28 SAFE_DATETIME_FORMAT = '%Y-%m-%d %H.%M.%S'
29
30 API_BASE="https://api.thingiverse.com"
31 ACCESS_QP="access_token={}"
32 PAGE_QP="page={}"
33 API_USER_DESIGNS = API_BASE + "/users/{}/things/"
34 API_USER_COLLECTIONS = API_BASE + "/users/{}/collections/all?" + ACCESS_QP
35
36 # Currently useless as it gives the same info as the matching element in API_USER_COLLECTIONS
37 API_COLLECTION = API_BASE + "/collections/{}/?" + ACCESS_QP
38 API_COLLECTION_THINGS = API_BASE + "/collections/{}/things/?" + ACCESS_QP
39
40 API_THING_DETAILS = API_BASE + "/things/{}/?" + ACCESS_QP
41 API_THING_FILES = API_BASE + "/things/{}/files/?" + ACCESS_QP
42 API_THING_IMAGES = API_BASE + "/things/{}/images/?" + ACCESS_QP
43
44 API_KEY = None
45
46 DOWNLOADER_COUNT = 1
47 RETRY_COUNT = 3
48
49 MAX_PATH_LENGTH = 250
50
51 VERSION = "0.10.1"
52
53 TIMESTAMP_FILE = "timestamp.txt"
54
55 SESSION = requests.Session()
56
57 @dataclass
58 class ThingLink:
59 thing_id: str
60 name: str
61 api_link: str
62
63 @dataclass
64 class FileLink:
65 name: str
66 last_update: datetime.datetime
67 link: str
68
69 @dataclass
70 class ImageLink:
71 name: str
72 link: str
73
74 class FileLinks:
75 def __init__(self, initial_links=[]):
76 self.links = []
77 self.last_update = None
78 for link in initial_links:
79 self.append(link)
80
81 def __iter__(self):
82 return iter(self.links)
83
84 def __getitem__(self, item):
85 return self.links[item]
86
87 def __len__(self):
88 return len(self.links)
89
90 def append(self, link):
91 try:
92 self.last_update = max(self.last_update, link.last_update)
93 except TypeError:
94 self.last_update = link.last_update
95 self.links.append(link)
96
97
98 class State(enum.Enum):
99 OK = enum.auto()
100 FAILED = enum.auto()
101 ALREADY_DOWNLOADED = enum.auto()
102
103 def sanitise_url(url):
104 """ remove api keys from an url
105 """
106 return re.sub(r'access_token=\w*',
107 'access_token=***',
108 url)
109
110 def strip_time(date_obj):
111 """ Takes a datetime object and returns another with the time set to 00:00
112 """
113 return datetime.datetime.combine(date_obj.date(), datetime.time())
114
115 def rename_unique(dir_name, target_dir_name):
116 """ Move a directory sideways to a new name, ensuring it is unique.
117 """
118 target_dir = target_dir_name
119 inc = 0
120 while os.path.exists(target_dir):
121 target_dir = "{}_{}".format(target_dir_name, inc)
122 inc += 1
123 os.rename(dir_name, target_dir)
124 return target_dir
125
126
127 def fail_dir(dir_name):
128 """ When a download has failed, move it sideways.
129 """
130 return rename_unique(dir_name,"{}_failed".format(dir_name))
131
132
133 def truncate_name(file_name):
134 """ Ensure the filename is not too long for, well windows basically.
135 """
136 path = os.path.abspath(file_name)
137 if len(path) <= MAX_PATH_LENGTH:
138 return path
139 to_cut = len(path) - (MAX_PATH_LENGTH + 3)
140 base, extension = os.path.splitext(path)
141 inc = 0
142 new_path = "{}_{}{}".format(base, inc, extension)
143 while os.path.exists(new_path):
144 new_path = "{}_{}{}".format(base, inc, extension)
145 inc += 1
146 return new_path
147
148
149 def strip_ws(value):
150 """ Remove whitespace from a string """
151 return str(NO_WHITESPACE_REGEX.sub('-', value))
152
153
154 def slugify(value):
155 """
156 Normalise string, removes invalid for filename charactersr
157 and converts string to lowercase.
158 """
159 logging.debug("Sluggyfying {}".format(value))
160 value = unicodedata.normalize('NFKC', value).lower().strip()
161 value = re.sub(r'[\\/<>:\?\*\|"]', '', value)
162 value = re.sub(r'\.*$', '', value)
163 return value
164
165
166 class Downloader(multiprocessing.Process):
167 """
168 Class to handle downloading the things we have found to get.
169 """
170
171 def __init__(self, thing_queue, download_directory, compress):
172 multiprocessing.Process.__init__(self)
173 # TODO: add parameters
174 self.thing_queue = thing_queue
175 self.download_directory = download_directory
176 self.compress = compress
177
178 def run(self):
179 """ actual download loop.
180 """
181 while True:
182 thing_id = self.thing_queue.get()
183 if thing_id is None:
184 logging.info("Shutting download queue")
185 self.thing_queue.task_done()
186 break
187 logging.info("Handling id {}".format(thing_id))
188 Thing(thing_id).download(self.download_directory, self.compress)
189 self.thing_queue.task_done()
190 return
191
192
193
194
195
196 class Grouping:
197 """ Holds details of a group of things for download
198 This is effectively (although not actually) an abstract class
199 - use Collection or Designs instead.
200 """
201
202 def __init__(self, quick, compress):
203 self.things = []
204 self.total = 0
205 self.req_id = None
206 self.last_page = 0
207 self.per_page = None
208 # Should we stop downloading when we hit a known datestamp?
209 self.quick = quick
210 self.compress = compress
211 # These should be set by child classes.
212 self.url = None
213 self.download_dir = None
214
215 def get(self):
216 """ retrieve the things of the grouping. """
217 if self.things:
218 # We've already done it.
219 return self.things
220
221 # Check for initialisation:
222 if not self.url:
223 logging.error("No URL set - object not initialised properly?")
224 raise ValueError("No URL set - object not initialised properly?")
225
226 # Get the internal details of the grouping.
227 logging.debug("Querying {}".format(sanitise_url(self.url)))
228 page = 0
229 # TODO:: Must be a way to refactor this cleanly
230 if self.paginated:
231 # Slightly nasty, but afaik python lacks a clean way to do partial string formatting.
232 page_url = self.url + "?" + ACCESS_QP + "&" + PAGE_QP
233 while True:
234 page += 1
235 current_url = page_url.format(API_KEY, page)
236 logging.info("requesting:{}".format(sanitise_url(current_url)))
237 current_req = SESSION.get(current_url)
238 if current_req.status_code != 200:
239 logging.error("Got unexpected code {} from url {}: {}".format(current_req.status_code, sanitise_url(current_url), current_req.text))
240 break
241 current_json = current_req.json()
242 if not current_json:
243 # No more!
244 break
245 for thing in current_json:
246 self.things.append(ThingLink(thing['id'], thing['name'], thing['url']))
247 else:
248 # self.url should already have been formatted as we don't need pagination
249 logging.info("requesting:{}".format(sanitise_url(self.url)))
250 current_req = SESSION.get(self.url)
251 if current_req.status_code != 200:
252 logging.error("Got unexpected code {} from url {}: {}".format(current_req.status_code, sanitise_url(current_url), current_req.text))
253 else:
254 current_json = current_req.json()
255 for thing in current_json:
256 logging.info(thing)
257 self.things.append(ThingLink(thing['id'], thing['name'], thing['url']))
258 logging.info("Found {} things.".format(len(self.things)))
259 return self.things
260
261 def download(self):
262 """ Downloads all the files in a collection """
263 if not self.things:
264 self.get()
265
266 if not self.download_dir:
267 raise ValueError(
268 "No download_dir set - invalidly initialised object?")
269
270 base_dir = os.getcwd()
271 try:
272 os.mkdir(self.download_dir)
273 except FileExistsError:
274 logging.info("Target directory {} already exists. Assuming a resume."
275 .format(self.download_dir))
276 logging.info("Downloading {} thing(s).".format(self.total))
277 for idx, thing in enumerate(self.things):
278 logging.info("Downloading thing {} - {}".format(idx, thing))
279 RC = Thing(thing).download(self.download_dir, self.compress)
280 if self.quick and RC==State.ALREADY_DOWNLOADED:
281 logging.info("Caught up, stopping.")
282 return
283
284 class Collection(Grouping):
285 """ Holds details of a collection. """
286
287 def __init__(self, user, name, directory, quick, compress):
288 Grouping.__init__(self, quick, compress)
289 self.user = user
290 self.name = name
291 self.paginated = False
292 # need to figure out the the ID for the collection
293 collection_url = API_USER_COLLECTIONS.format(user, API_KEY)
294 try:
295 current_req = SESSION.get(collection_url)
296 except requests.exceptions.ConnectionError as error:
297 logging.error("Unable to connect for thing {}: {}".format(
298 self.thing_id, error))
299 return
300 if current_req.status_code != 200:
301 logging.error("Got unexpected code {} from url {}: {}".format(current_req.status_code, sanitise_url(collection_url), current_req.text))
302 return
303 collection_list = current_req.json()
304 try:
305 # case insensitive to retain parity with previous behaviour
306 collection = [x for x in collection_list if x['name'].casefold() == name.casefold()][0]
307 except IndexError:
308 logging.error("Unable to find collection {} for user {}".format(name, user))
309 return
310 self.collection_id = collection['id']
311 self.url = API_COLLECTION_THINGS.format(self.collection_id, API_KEY)
312
313 self.download_dir = os.path.join(directory,
314 "{}-{}".format(slugify(self.user), slugify(self.name)))
315
316
317 class Designs(Grouping):
318 """ Holds details of all of a users' designs. """
319
320 def __init__(self, user, directory, quick, compress):
321 Grouping.__init__(self, quick, compress)
322 self.user = user
323 self.url = API_USER_DESIGNS.format(user)
324 self.paginated = True
325 self.download_dir = os.path.join(
326 directory, "{} designs".format(slugify(self.user)))
327
328
329 class Thing:
330 """ An individual design on thingiverse. """
331
332 def __init__(self, thing_link):
333 self.thing_id = thing_link.thing_id
334 self.name = thing_link.name
335 self.api_link = thing_link.api_link
336 self.last_time = None
337 self._parsed = False
338 self._needs_download = True
339 self.text = None
340 self.download_dir = None
341 self.time_stamp = None
342 self._file_links = FileLinks()
343 self._image_links = []
344
345 def _parse(self, base_dir):
346 """ Work out what, if anything needs to be done. """
347 if self._parsed:
348 return
349
350
351 # First get the broad details
352 url = API_THING_DETAILS.format(self.thing_id, API_KEY)
353 try:
354 current_req = SESSION.get(url)
355 except requests.exceptions.ConnectionError as error:
356 logging.error("Unable to connect for thing {}: {}".format(
357 self.thing_id, error))
358 return
359 # Check for DMCA
360 if current_req.status_code == 403:
361 logging.error("Access to thing {} is forbidden".format(self.thing_id))
362 return
363 if current_req.status_code != 200:
364 logging.error("Got unexpected code {} from url {}: {}".format(current_req.status_code, sanitise_url(url), current_req.text))
365 return
366
367 thing_json = current_req.json()
368 try:
369 self._license = thing_json['license']
370 except KeyError:
371 logging.warning("No license found for thing {}?".format(self.thing_id))
372
373 # TODO: Get non-html version of this?
374 try:
375 self._details = thing_json['details']
376 except KeyError:
377 logging.warning("No description found for thing {}?".format(self.thing_id))
378
379
380
381 # Now get the file details
382 file_url = API_THING_FILES.format(self.thing_id, API_KEY)
383
384 try:
385 current_req = SESSION.get(file_url)
386 except requests.exceptions.ConnectionError as error:
387 logging.error("Unable to connect for thing {}: {}".format(
388 self.thing_id, error))
389 return
390
391 if current_req.status_code != 200:
392 logging.error("Unexpected status code {} for {}: {}".format(current_req.status_code, sanitise_url(file_url), current_req.text))
393 return
394
395 link_list = current_req.json()
396
397 if not link_list:
398 logging.error("No files found for thing {} - probably thingiverse being broken, try again later".format(self.thing_id))
399
400 for link in link_list:
401 logging.debug("Parsing link: {}".format(sanitise_url(link['url'])))
402 try:
403 datestamp = datetime.datetime.strptime(link['date'], DEFAULT_DATETIME_FORMAT)
404 self._file_links.append(FileLink(link['name'], datestamp, link['url']))
405 except ValueError:
406 logging.error(link['date'])
407
408 # Finally get the image links
409 image_url = API_THING_IMAGES.format(self.thing_id, API_KEY)
410
411 try:
412 current_req = SESSION.get(image_url)
413 except requests.exceptions.ConnectionError as error:
414 logging.error("Unable to connect for thing {}: {}".format(
415 self.thing_id, error))
416 return
417
418 if current_req.status_code != 200:
419 logging.error("Unexpected status code {} for {}: {}".format(current_req.status_code, sanitise_url(image_url), current_req.text))
420 return
421
422 image_list = current_req.json()
423
424 if not image_list:
425 logging.warning("No images found for thing {} - probably thingiverse being iffy as this seems unlikely".format(self.thing_id))
426
427 for image in image_list:
428 logging.debug("parsing image: {}".format(image))
429 try:
430 name = slugify(image['name'])
431 # TODO: fallback to other types
432 url = [x for x in image['sizes'] if x['type']=='display' and x['size']=='large'][0]['url']
433 except KeyError:
434 logging.warning("Missing image for {}".format(name))
435 self._image_links.append(ImageLink(name, url))
436
437 self.slug = "{} - {}".format(self.thing_id, slugify(self.name))
438 self.download_dir = os.path.join(base_dir, self.slug)
439
440 self._handle_old_directory(base_dir)
441
442 logging.debug("Parsing {} ({})".format(self.thing_id, self.name))
443 latest, self.last_time = self._find_last_download(base_dir)
444
445 if not latest:
446 # Not yet downloaded
447 self._parsed = True
448 return
449
450
451 logging.info("last downloaded version: {}".format(self.last_time))
452
453 # OK, so we have a timestamp, lets see if there is anything new to get
454 # First off, are we comparing an old download that threw away the timestamp?
455 ignore_time = self.last_time == strip_time(self.last_time)
456 try:
457 # TODO: Allow for comparison at the exact time
458 files_last_update = self._file_links.last_update
459 if ignore_time:
460 logging.info("Dropping time from comparison stamp as old-style download dir")
461 files_last_update = strip_time(files_last_update)
462
463
464 if files_last_update > self.last_time:
465 logging.info(
466 "Found new/updated files {}".format(self._file_links.last_update))
467 self._needs_download = True
468 self._parsed = True
469 return
470 except TypeError:
471 logging.warning("No files found for {}.".format(self.thing_id))
472
473 # Got here, so nope, no new files.
474 self._needs_download = False
475 self._parsed = True
476
477 def _handle_old_directory(self, base_dir):
478 """ Deal with any old directories from previous versions of the code.
479 """
480 old_dir = os.path.join(base_dir, slugify(self.name))
481 if os.path.exists(old_dir):
482 logging.warning("Found old style download_dir. Moving.")
483 rename_unique(old_dir, self.download_dir)
484
485 def _handle_outdated_directory(self, base_dir):
486 """ Move the current download directory sideways if the thing has changed.
487 """
488 if not os.path.exists(self.download_dir):
489 # No old directory to move.
490 return None
491 timestamp_file = os.path.join(self.download_dir, TIMESTAMP_FILE)
492 if not os.path.exists(timestamp_file):
493 # Old form of download directory
494 target_dir_name = "{} - old".format(self.download_dir)
495 else:
496 target_dir_name = "{} - {}".format(self.download_dir, self.last_time.strftime(SAFE_DATETIME_FORMAT))
497 return rename_unique(self.download_dir, target_dir_name)
498
499 def _find_last_download(self, base_dir):
500 """ Look for the most recent previous download (if any) of the thing.
501 """
502 logging.info("Looking for old things")
503
504 # First the DL directory itself.
505 timestamp_file = os.path.join(self.download_dir, TIMESTAMP_FILE)
506
507 latest = None
508 latest_time = None
509
510 try:
511 logging.debug("Checking for existing download in normal place.")
512 with open(timestamp_file) as ts_fh:
513 timestamp_text = ts_fh.read().strip()
514 latest_time = datetime.datetime.strptime(timestamp_text, DEFAULT_DATETIME_FORMAT)
515 latest = self.download_dir
516 except FileNotFoundError:
517 # No existing download directory. huh.
518 pass
519 except TypeError:
520 logging.warning("Invalid timestamp file found in {}".format(self.download_dir))
521
522 # TODO: Maybe look for old download directories.
523
524
525 # Now look for 7z files
526 candidates = glob.glob(os.path.join(base_dir, "{}*.7z".format(self.thing_id)))
527 # +3 to allow for ' - '
528 leading_length =len(self.slug)+3
529 for path in candidates:
530 candidate = os.path.basename(path)
531 try:
532 logging.debug("Examining '{}' - '{}'".format(candidate, candidate[leading_length:-3]))
533 candidate_time = datetime.datetime.strptime(candidate[leading_length:-3], SAFE_DATETIME_FORMAT)
534 except ValueError:
535 logging.warning("There was an error finding the date in {}. Ignoring.".format(candidate))
536 continue
537 try:
538 if candidate_time > latest_time:
539 latest_time = candidate_time
540 latest = candidate
541 except TypeError:
542 latest_time = candidate_time
543 latest = candidate
544 logging.info("Found last old thing: {} / {}".format(latest,latest_time))
545 return (latest, latest_time)
546
547
548
549 def download(self, base_dir, compress):
550 """ Download all files for a given thing.
551 Returns True iff the thing is now downloaded (not iff it downloads the thing!)
552 """
553 if not self._parsed:
554 self._parse(base_dir)
555
556 if not self._parsed:
557 logging.error(
558 "Unable to parse {} - aborting download".format(self.thing_id))
559 return State.FAILED
560
561 if not self._needs_download:
562 logging.info("{} - {} already downloaded - skipping.".format(self.thing_id, self.name))
563 return State.ALREADY_DOWNLOADED
564
565 if not self._file_links:
566 logging.error("{} - {} appears to have no files. Thingiverse acting up again?".format(self.thing_id, self.name))
567 return State.FAILED
568
569 # Have we already downloaded some things?
570 renamed_dir = self._handle_outdated_directory(base_dir)
571
572 # Get the list of files to download
573
574 new_file_links = []
575 old_file_links = []
576 self.time_stamp = None
577
578 if not self.last_time:
579 # If we don't have anything to copy from, then it is all new.
580 logging.debug("No last time, downloading all files")
581 new_file_links = self._file_links
582 self.time_stamp = new_file_links[0].last_update
583
584 for file_link in new_file_links:
585 self.time_stamp = max(self.time_stamp, file_link.last_update)
586 logging.debug("New timestamp will be {}".format(self.time_stamp))
587 else:
588 self.time_stamp = self.last_time
589 for file_link in self._file_links:
590 if file_link.last_update > self.last_time:
591 new_file_links.append(file_link)
592 self.time_stamp = max(self.time_stamp, file_link.last_update)
593 else:
594 old_file_links.append(file_link)
595
596 logging.debug("new timestamp {}".format(self.time_stamp))
597
598 # OK. Time to get to work.
599 logging.debug("Generating download_dir")
600 os.mkdir(self.download_dir)
601 filelist_file = os.path.join(self.download_dir, "filelist.txt")
602 url_suffix = "/?" + ACCESS_QP.format(API_KEY)
603 with open(filelist_file, 'w', encoding="utf-8") as fl_handle:
604 for fl in self._file_links:
605 fl_handle.write("{},{},{}\n".format(fl.link, fl.name, fl.last_update))
606
607
608 # First grab the cached files (if any)
609 logging.info("Copying {} unchanged files.".format(len(old_file_links)))
610 if renamed_dir:
611 for file_link in old_file_links:
612 try:
613 old_file = os.path.join(renamed_dir, file_link.name)
614 new_file = truncate_name(os.path.join(self.download_dir, file_link.name))
615 logging.debug("Copying {} to {}".format(old_file, new_file))
616 copyfile(old_file, new_file)
617 except FileNotFoundError:
618 logging.warning(
619 "Unable to find {} in old archive, redownloading".format(file_link.name))
620 new_file_links.append(file_link)
621 except TypeError:
622 # Not altogether sure how this could occur, possibly with some combination of the old file types
623 logging.warning(
624 "Typeerror looking for {} in {}".format(file_link.name, renamed_dir))
625 new_file_links.append(file_link)
626
627
628 # Now download the new ones
629 logging.info("Downloading {} new files of {}".format(
630 len(new_file_links), len(self._file_links)))
631 try:
632 for file_link in new_file_links:
633 file_name = truncate_name(os.path.join(self.download_dir, file_link.name))
634 logging.debug("Downloading {} from {} to {}".format(
635 file_link.name, file_link.link, file_name))
636 data_req = SESSION.get(file_link.link + url_suffix)
637 if data_req.status_code != 200:
638 logging.error("Unexpected status code {} for {}: {}".format(data_req.status_code, sanitise_url(file_link.link), data_req.text))
639 fail_dir(self.download_dir)
640 return State.FAILED
641
642
643 with open(file_name, 'wb') as handle:
644 handle.write(data_req.content)
645 except Exception as exception:
646 logging.error("Failed to download {} - {}".format(file_link.name, exception))
647 fail_dir(self.download_dir)
648 return State.FAILED
649
650
651 # People like images.
652 image_dir = os.path.join(self.download_dir, 'images')
653 logging.info("Downloading {} images.".format(len(self._image_links)))
654 try:
655 os.mkdir(image_dir)
656 for imagelink in self._image_links:
657 filename = os.path.join(image_dir, imagelink.name)
658 image_req = SESSION.get(imagelink.link)
659 if image_req.status_code != 200:
660 logging.error("Unexpected status code {} for {}: {}".format(image_req.status_code, sanitise_url(file_link.link), image_req.text))
661 fail_dir(self.download_dir)
662 return State.FAILED
663 with open(truncate_name(filename), 'wb') as handle:
664 handle.write(image_req.content)
665 except Exception as exception:
666 logging.error("Failed to download {} - {}".format(imagelink.name, exception))
667 fail_dir(self.download_dir)
668 return State.FAILED
669
670 # Best get some licenses
671 logging.info("writing license file")
672 try:
673 if self._license:
674 with open(truncate_name(os.path.join(self.download_dir, 'license.txt')), 'w', encoding="utf-8") as license_handle:
675 license_handle.write("{}\n".format(self._license))
676 except IOError as exception:
677 logging.warning("Failed to write license! {}".format(exception))
678
679 logging.info("writing readme")
680 try:
681 if self._details:
682 with open(truncate_name(os.path.join(self.download_dir, 'readme.txt')), 'w', encoding="utf-8") as readme_handle:
683 readme_handle.write("{}\n".format(self._details))
684 except IOError as exception:
685 logging.warning("Failed to write readme! {}".format(exception))
686
687 try:
688 # Now write the timestamp
689 with open(os.path.join(self.download_dir,TIMESTAMP_FILE), 'w', encoding="utf-8") as timestamp_handle:
690 timestamp_handle.write(self.time_stamp.__str__())
691 except Exception as exception:
692 logging.error("Failed to write timestamp file - {}".format(exception))
693 fail_dir(self.download_dir)
694 return State.FAILED
695 self._needs_download = False
696 logging.debug("Download of {} finished".format(self.name))
697 if not compress:
698 return State.OK
699
700
701 thing_dir = "{} - {} - {}".format(self.thing_id,
702 slugify(self.name),
703 self.time_stamp.strftime(SAFE_DATETIME_FORMAT))
704 file_name = os.path.join(base_dir,
705 "{}.7z".format(thing_dir))
706 logging.debug("Compressing {} to {}".format(
707 self.name,
708 file_name))
709 with py7zr.SevenZipFile(file_name, 'w', filters=SEVENZIP_FILTERS) as archive:
710 archive.writeall(self.download_dir, thing_dir)
711 logging.debug("Compression of {} finished.".format(self.name))
712 shutil.rmtree(self.download_dir)
713 logging.debug("Removed temporary download dir of {}.".format(self.name))
714 return State.OK
715
716
717
718
719 def do_batch(batch_file, download_dir, quick, compress):
720 """ Read a file in line by line, parsing each as a set of calls to this script."""
721 with open(batch_file) as handle:
722 for line in handle:
723 line = line.strip()
724 if not line:
725 # Skip empty lines
726 continue
727 logging.info("Handling instruction {}".format(line))
728 command_arr = line.split()
729 if command_arr[0] == "thing":
730 logging.debug(
731 "Handling batch thing instruction: {}".format(line))
732 Thing(command_arr[1]).download(download_dir, compress)
733 continue
734 if command_arr[0] == "collection":
735 logging.debug(
736 "Handling batch collection instruction: {}".format(line))
737 Collection(command_arr[1], command_arr[2],
738 download_dir, quick, compress).download()
739 continue
740 if command_arr[0] == "user":
741 logging.debug(
742 "Handling batch collection instruction: {}".format(line))
743 Designs(command_arr[1], download_dir, quick, compress).download()
744 continue
745 logging.warning("Unable to parse current instruction. Skipping.")
746
747
748 def main():
749 """ Entry point for script being run as a command. """
750 parser = argparse.ArgumentParser()
751 parser.add_argument("-l", "--log-level", choices=[
752 'debug', 'info', 'warning'], default='info', help="level of logging desired")
753 parser.add_argument("-d", "--directory",
754 help="Target directory to download into")
755 parser.add_argument("-f", "--log-file",
756 help="Place to log debug information to")
757 parser.add_argument("-q", "--quick", action="store_true",
758 help="Assume date ordering on posts")
759 parser.add_argument("-c", "--compress", action="store_true",
760 help="Compress files")
761 parser.add_argument("-a", "--api-key",
762 help="API key for thingiverse")
763
764
765 subparsers = parser.add_subparsers(
766 help="Type of thing to download", dest="subcommand")
767 collection_parser = subparsers.add_parser(
768 'collection', help="Download one or more entire collection(s)")
769 collection_parser.add_argument(
770 "owner", help="The owner of the collection(s) to get")
771 collection_parser.add_argument(
772 "collections", nargs="+", help="Space seperated list of the name(s) of collection to get")
773 thing_parser = subparsers.add_parser(
774 'thing', help="Download a single thing.")
775 thing_parser.add_argument(
776 "things", nargs="*", help="Space seperated list of thing ID(s) to download")
777 user_parser = subparsers.add_parser(
778 "user", help="Download all things by one or more users")
779 user_parser.add_argument(
780 "users", nargs="+", help="A space seperated list of the user(s) to get the designs of")
781 batch_parser = subparsers.add_parser(
782 "batch", help="Perform multiple actions written in a text file")
783 batch_parser.add_argument(
784 "batch_file", help="The name of the file to read.")
785 subparsers.add_parser("version", help="Show the current version")
786
787 args = parser.parse_args()
788 if not args.subcommand:
789 parser.print_help()
790 sys.exit(1)
791 if not args.directory:
792 args.directory = os.getcwd()
793
794 logger = logging.getLogger()
795 formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
796 logger.setLevel(logging.DEBUG)
797 console_handler = logging.StreamHandler()
798 console_handler.setLevel(args.log_level.upper())
799
800 global API_KEY
801 if args.api_key:
802 API_KEY=args.api_key
803 else:
804 try:
805 with open("api.key") as fh:
806 API_KEY=fh.read().strip()
807 except Exception as e:
808 logging.error("Either specify the api-key on the command line or in a file called 'api.key'")
809 logging.error("Exception: {}".format(e))
810 return
811
812 logger.addHandler(console_handler)
813 if args.log_file:
814 file_handler = logging.FileHandler(args.log_file)
815 file_handler.setLevel(logging.DEBUG)
816 file_handler.setFormatter(formatter)
817 logger.addHandler(file_handler)
818
819
820 # Start downloader
821 thing_queue = multiprocessing.JoinableQueue()
822 logging.debug("starting {} downloader(s)".format(DOWNLOADER_COUNT))
823 downloaders = [Downloader(thing_queue, args.directory, args.compress) for _ in range(DOWNLOADER_COUNT)]
824 for downloader in downloaders:
825 downloader.start()
826
827
828 if args.subcommand.startswith("collection"):
829 for collection in args.collections:
830 Collection(args.owner, collection, args.directory, args.quick, args.compress).download()
831 if args.subcommand == "thing":
832 for thing in args.things:
833 thing_queue.put(thing)
834 if args.subcommand == "user":
835 for user in args.users:
836 Designs(user, args.directory, args.quick, args.compress).download()
837 if args.subcommand == "version":
838 print("thingy_grabber.py version {}".format(VERSION))
839 if args.subcommand == "batch":
840 do_batch(args.batch_file, args.directory, args.quick, args.compress)
841
842 # Stop the downloader processes
843 for downloader in downloaders:
844 thing_queue.put(None)
845
846
847 if __name__ == "__main__":
848 multiprocessing.freeze_support()
849 main()