update changelog
[clinton/thingy_grabber.git] / thingy_grabber.py
1 #!/usr/bin/env python3
2 """
3 Thingiverse bulk downloader
4 """
5
6 import re
7 import sys
8 import os
9 import argparse
10 import unicodedata
11 import requests
12 import logging
13 import multiprocessing
14 import enum
15 import datetime
16 from shutil import copyfile
17 from dataclasses import dataclass
18 import py7zr
19 import glob
20 import shutil
21 from io import StringIO
22 from html.parser import HTMLParser
23
24 SEVENZIP_FILTERS = [{'id': py7zr.FILTER_LZMA2}]
25
26 # I don't think this is exported by datetime
27 DEFAULT_DATETIME_FORMAT = '%Y-%m-%d %H:%M:%S'
28 # Windows cannot handle : in filenames
29 SAFE_DATETIME_FORMAT = '%Y-%m-%d %H.%M.%S'
30
31 API_BASE = "https://api.thingiverse.com"
32 ACCESS_QP = "access_token={}"
33 PAGE_QP = "page={}"
34 API_USER_DESIGNS = API_BASE + "/users/{}/things/?" + ACCESS_QP
35 API_USER_COLLECTIONS = API_BASE + "/users/{}/collections/all?" + ACCESS_QP
36
37 # Currently useless as it gives the same info as the matching element in API_USER_COLLECTIONS
38 API_COLLECTION = API_BASE + "/collections/{}/?" + ACCESS_QP
39 API_COLLECTION_THINGS = API_BASE + "/collections/{}/things/?" + ACCESS_QP
40
41 API_THING_DETAILS = API_BASE + "/things/{}/?" + ACCESS_QP
42 API_THING_FILES = API_BASE + "/things/{}/files/?" + ACCESS_QP
43 API_THING_IMAGES = API_BASE + "/things/{}/images/?" + ACCESS_QP
44 API_THING_DOWNLOAD = "/download/?" + ACCESS_QP
45
46 DOWNLOADER_COUNT = 1
47 RETRY_COUNT = 3
48
49 MAX_PATH_LENGTH = 250
50
51 VERSION = "0.10.5"
52
53 TIMESTAMP_FILE = "timestamp.txt"
54
55 SESSION = requests.Session()
56
57
58 class MLStripper(HTMLParser):
59 """ Turns HTML markup into plain text
60 """
61
62 def error(self, message):
63 raise ValueError(message)
64
65 def __init__(self):
66 super().__init__()
67 self.reset()
68 self.strict = False
69 self.convert_charrefs = True
70 self.text = StringIO()
71
72 def handle_data(self, d):
73 self.text.write(d)
74
75 def get_data(self):
76 return self.text.getvalue()
77
78 @staticmethod
79 def strip_tags(html):
80 s = MLStripper()
81 s.feed(html)
82 return s.get_data()
83
84
85 @dataclass
86 class ThingLink:
87 thing_id: str
88 name: str
89 api_link: str
90
91
92 @dataclass
93 class FileLink:
94 name: str
95 last_update: datetime.datetime
96 link: str
97
98
99 @dataclass
100 class ImageLink:
101 name: str
102 link: str
103
104
105 class FileLinks:
106 def __init__(self, initial_links=None):
107 if initial_links is None:
108 initial_links = []
109 self.links = []
110 self.last_update = None
111 for link in initial_links:
112 self.append(link)
113
114 def __iter__(self):
115 return iter(self.links)
116
117 def __getitem__(self, item):
118 return self.links[item]
119
120 def __len__(self):
121 return len(self.links)
122
123 def append(self, link):
124 try:
125 self.last_update = max(self.last_update, link.last_update)
126 except TypeError:
127 self.last_update = link.last_update
128 self.links.append(link)
129
130
131 class State(enum.Enum):
132 OK = enum.auto()
133 FAILED = enum.auto()
134 ALREADY_DOWNLOADED = enum.auto()
135
136
137 def sanitise_url(url):
138 """ remove api keys from an url
139 """
140 return re.sub(r'access_token=\w*',
141 'access_token=***',
142 url)
143
144
145 def strip_time(date_obj):
146 """ Takes a datetime object and returns another with the time set to 00:00
147 """
148 return datetime.datetime.combine(date_obj.date(), datetime.time())
149
150
151 def rename_unique(dir_name, target_dir_name):
152 """ Move a directory sideways to a new name, ensuring it is unique.
153 """
154 target_dir = target_dir_name
155 inc = 0
156 while os.path.exists(target_dir):
157 target_dir = "{}_{}".format(target_dir_name, inc)
158 inc += 1
159 os.rename(dir_name, target_dir)
160 return target_dir
161
162
163 def fail_dir(dir_name):
164 """ When a download has failed, move it sideways.
165 """
166 return rename_unique(dir_name, "{}_failed".format(dir_name))
167
168
169 def truncate_name(file_name):
170 """ Ensure the filename is not too long for, well windows basically.
171 """
172 path = os.path.abspath(file_name)
173 if len(path) <= MAX_PATH_LENGTH:
174 return path
175 base, extension = os.path.splitext(path)
176 inc = 0
177 new_path = "{}_{}{}".format(base, inc, extension)
178 while os.path.exists(new_path):
179 new_path = "{}_{}{}".format(base, inc, extension)
180 inc += 1
181 return new_path
182
183
184 def slugify(value):
185 """
186 Normalise string, removes invalid for filename charactersr
187 and converts string to lowercase.
188 """
189 logging.debug("Sluggyfying {}".format(value))
190 value = unicodedata.normalize('NFKC', value).lower().strip()
191 value = re.sub(r'[\\/<>:?*|"]', '', value)
192 value = re.sub(r'\.*$', '', value)
193 return value.strip()
194
195
196 class Downloader(multiprocessing.Process):
197 """
198 Class to handle downloading the things we have found to get.
199 """
200
201 def __init__(self, thing_queue, download_directory, compress, api_key):
202 multiprocessing.Process.__init__(self)
203 # TODO: add parameters
204 self.thing_queue = thing_queue
205 self.download_directory = download_directory
206 self.compress = compress
207 self.api_key = api_key
208
209 def run(self):
210 """ actual download loop.
211 """
212 while True:
213 thing_id = self.thing_queue.get()
214 if thing_id is None:
215 logging.info("Shutting download queue")
216 self.thing_queue.task_done()
217 break
218 thing = None
219 if isinstance(thing_id, str):
220 thing = Thing.from_thing_id(thing_id)
221 if isinstance(thing_id, ThingLink):
222 thing = Thing(thing_id)
223 if not thing:
224 logging.error("Don't know how to handle thing_id {}".format(thing_id))
225 else:
226 logging.info("Handling id {}".format(thing_id))
227 thing.download(self.download_directory, self.compress, self.api_key)
228 self.thing_queue.task_done()
229 return
230
231
232 class Grouping:
233 """ Holds details of a group of things for download
234 This is effectively (although not actually) an abstract class
235 - use Collection or Designs instead.
236 """
237
238 def __init__(self, quick, compress, api_key):
239 self.things = []
240 self.total = 0
241 self.req_id = None
242 self.last_page = 0
243 self.per_page = None
244 # Should we stop downloading when we hit a known datestamp?
245 self.quick = quick
246 self.compress = compress
247 self.api_key = api_key
248 # These should be set by child classes.
249 self.url = None
250 self.download_dir = None
251
252 @property
253 def get(self):
254 """ retrieve the things of the grouping. """
255 if self.things:
256 # We've already done it.
257 return self.things
258
259 # Check for initialisation:
260 if not self.url:
261 logging.error("No URL set - object not initialised properly?")
262 raise ValueError("No URL set - object not initialised properly?")
263
264 # Get the internal details of the grouping.
265 logging.debug("Querying {}".format(sanitise_url(self.url)))
266
267 # follow next links until all items are found
268 current_url = self.url
269 while current_url != None:
270 logging.info("requesting:{}".format(sanitise_url(current_url)))
271 current_req = SESSION.get(current_url)
272 current_url = current_req.links.get('next', {}).get('url')
273 if current_req.status_code != 200:
274 logging.error(
275 "Got unexpected code {} from url {}: {}".format(current_req.status_code, sanitise_url(self.url),
276 current_req.text))
277 else:
278 current_json = current_req.json()
279 for thing in current_json:
280 logging.debug(thing)
281 self.things.append(ThingLink(thing['id'], thing['name'], thing['url']))
282 logging.info("Found {} things.".format(len(self.things)))
283 return self.things
284
285 def download(self):
286 """ Downloads all the files in a collection """
287 if not self.things:
288 self.get
289
290 if not self.download_dir:
291 raise ValueError(
292 "No download_dir set - invalidly initialised object?")
293
294 try:
295 os.mkdir(self.download_dir)
296 except FileExistsError:
297 logging.info("Target directory {} already exists. Assuming a resume."
298 .format(self.download_dir))
299 logging.info("Downloading {} thing(s).".format(self.total))
300 for idx, thing in enumerate(self.things):
301 logging.info("Downloading thing {} - {}".format(idx, thing))
302 return_code = Thing(thing).download(self.download_dir, self.compress, self.api_key)
303 if self.quick and return_code == State.ALREADY_DOWNLOADED:
304 logging.info("Caught up, stopping.")
305 return
306
307
308 class Collection(Grouping):
309 """ Holds details of a collection. """
310
311 def __init__(self, user, name, directory, quick, compress, api_key):
312 Grouping.__init__(self, quick, compress, api_key)
313 self.user = user
314 self.name = name
315 self.paginated = False
316 # need to figure out the the ID for the collection
317 collection_url = API_USER_COLLECTIONS.format(user, api_key)
318 try:
319 current_req = SESSION.get(collection_url)
320 except requests.exceptions.ConnectionError as error:
321 logging.error("Unable to connect for collections for user {}: {}".format(
322 self.user, error))
323 return
324 if current_req.status_code != 200:
325 logging.error(
326 "Got unexpected code {} from url {}: {}".format(current_req.status_code, sanitise_url(collection_url),
327 current_req.text))
328 return
329 collection_list = current_req.json()
330 try:
331 # case insensitive to retain parity with previous behaviour
332 collection = [x for x in collection_list if x['name'].casefold() == name.casefold()][0]
333 except IndexError:
334 logging.error("Unable to find collection {} for user {}".format(name, user))
335 return
336 self.collection_id = collection['id']
337 self.url = API_COLLECTION_THINGS.format(self.collection_id, api_key)
338
339 self.download_dir = os.path.join(directory,
340 "{}-{}".format(slugify(self.user), slugify(self.name)))
341
342
343 class Designs(Grouping):
344 """ Holds details of all of a users' designs. """
345
346 def __init__(self, user, directory, quick, compress, api_key):
347 Grouping.__init__(self, quick, compress, api_key)
348 self.user = user
349 self.url = API_USER_DESIGNS.format(user, api_key)
350 self.download_dir = os.path.join(
351 directory, "{} designs".format(slugify(self.user)))
352
353
354 class Thing:
355 """ An individual design on thingiverse. """
356
357 def __init__(self, thing_link):
358 self.thing_id = thing_link.thing_id
359 self.name = thing_link.name
360 self.last_time = None
361 self._parsed = False
362 self._needs_download = True
363 self.text = None
364 self.download_dir = None
365 self.time_stamp = None
366 self._file_links = FileLinks()
367 self._image_links = []
368
369 @classmethod
370 def from_thing_id(cls, thing_id):
371 """
372 Factory method that looks up a thing by ID and creates a Thing object for it
373 :param thing_id: to look up
374 :return: Thing or None
375 """
376 return Thing(ThingLink(thing_id, "", ""))
377
378 def _parse(self, base_dir, api_key):
379 """ Work out what, if anything needs to be done. """
380 if self._parsed:
381 return
382
383 # First get the broad details
384 url = API_THING_DETAILS.format(self.thing_id, api_key)
385 try:
386 current_req = SESSION.get(url)
387 except requests.exceptions.ConnectionError as error:
388 logging.error("Unable to connect for thing {}: {}".format(
389 self.thing_id, error))
390 return
391 # Check for DMCA
392 if current_req.status_code == 403:
393 logging.error("Access to thing {} is forbidden".format(self.thing_id))
394 return
395 if current_req.status_code != 200:
396 logging.error("Got unexpected code {} from url {}: {}".format(current_req.status_code, sanitise_url(url),
397 current_req.text))
398 return
399
400 thing_json = current_req.json()
401 try:
402 self._license = thing_json['license']
403 except KeyError:
404 logging.warning("No license found for thing {}?".format(self.thing_id))
405
406 details = None
407 try:
408 details = thing_json['details']
409 except KeyError:
410 logging.warning("No description found for thing {}?".format(self.thing_id))
411
412 if details:
413 try:
414 self._details = MLStripper.strip_tags(details)
415 except ValueError as e:
416 logging.warning("Unable to strip HTML from readme: {}".format(e))
417 self._details = details
418
419 if not self.name:
420 # Probably generated with factory method.
421 try:
422 self.name = thing_json['name']
423 except KeyError:
424 logging.warning("No name found for thing {}?".format(self.thing_id))
425 self.name = self.thing_id
426
427 # Now get the file details
428 file_url = API_THING_FILES.format(self.thing_id, api_key)
429
430 try:
431 current_req = SESSION.get(file_url)
432 except requests.exceptions.ConnectionError as error:
433 logging.error("Unable to connect for thing {}: {}".format(
434 self.thing_id, error))
435 return
436
437 if current_req.status_code != 200:
438 logging.error("Unexpected status code {} for {}: {}".format(current_req.status_code, sanitise_url(file_url),
439 current_req.text))
440 return
441
442 link_list = current_req.json()
443
444 if not link_list:
445 logging.error("No files found for thing {} - probably thingiverse being broken, try again later".format(
446 self.thing_id))
447
448 for link in link_list:
449 logging.debug("Parsing link: {}".format(sanitise_url(link['url'])))
450 try:
451 datestamp = datetime.datetime.strptime(link['date'], DEFAULT_DATETIME_FORMAT)
452 self._file_links.append(
453 FileLink(link['name'], datestamp, link['url'] + API_THING_DOWNLOAD.format(api_key)))
454 except ValueError:
455 logging.error(link['date'])
456
457 # Finally get the image links
458 image_url = API_THING_IMAGES.format(self.thing_id, api_key)
459
460 try:
461 current_req = SESSION.get(image_url)
462 except requests.exceptions.ConnectionError as error:
463 logging.error("Unable to connect for thing {}: {}".format(
464 self.thing_id, error))
465 return
466
467 if current_req.status_code != 200:
468 logging.error(
469 "Unexpected status code {} for {}: {}".format(current_req.status_code, sanitise_url(image_url),
470 current_req.text))
471 return
472
473 image_list = current_req.json()
474
475 if not image_list:
476 logging.warning(
477 "No images found for thing {} - probably thingiverse being iffy as this seems unlikely".format(
478 self.thing_id))
479
480 for image in image_list:
481 logging.debug("parsing image: {}".format(image))
482 name = None
483 try:
484 name = slugify(image['name'])
485 # TODO: fallback to other types
486 url = [x for x in image['sizes'] if x['type'] == 'display' and x['size'] == 'large'][0]['url']
487 except KeyError:
488 logging.warning("Missing image for {}".format(name))
489 self._image_links.append(ImageLink(name, url))
490
491 self.slug = "{} - {}".format(self.thing_id, slugify(self.name))
492 self.download_dir = os.path.join(base_dir, self.slug)
493
494 self._handle_old_directory(base_dir)
495
496 logging.debug("Parsing {} ({})".format(self.thing_id, self.name))
497 latest, self.last_time = self._find_last_download(base_dir)
498
499 if not latest:
500 # Not yet downloaded
501 self._parsed = True
502 return
503
504 logging.info("last downloaded version: {}".format(self.last_time))
505
506 # OK, so we have a timestamp, lets see if there is anything new to get
507 # First off, are we comparing an old download that threw away the timestamp?
508 ignore_time = self.last_time == strip_time(self.last_time)
509 try:
510 # TODO: Allow for comparison at the exact time
511 files_last_update = self._file_links.last_update
512 if ignore_time:
513 logging.info("Dropping time from comparison stamp as old-style download dir")
514 files_last_update = strip_time(files_last_update)
515
516 if files_last_update > self.last_time:
517 logging.info(
518 "Found new/updated files {}".format(self._file_links.last_update))
519 self._needs_download = True
520 self._parsed = True
521 return
522 except TypeError:
523 logging.warning("No files found for {}.".format(self.thing_id))
524
525 # Got here, so nope, no new files.
526 self._needs_download = False
527 self._parsed = True
528
529 def _handle_old_directory(self, base_dir):
530 """ Deal with any old directories from previous versions of the code.
531 """
532 old_dir = os.path.join(base_dir, slugify(self.name))
533 if os.path.exists(old_dir):
534 logging.warning("Found old style download_dir. Moving.")
535 rename_unique(old_dir, self.download_dir)
536
537 def _handle_outdated_directory(self):
538 """ Move the current download directory sideways if the thing has changed.
539 """
540 if not os.path.exists(self.download_dir):
541 # No old directory to move.
542 return None
543 timestamp_file = os.path.join(self.download_dir, TIMESTAMP_FILE)
544 if not os.path.exists(timestamp_file):
545 # Old form of download directory
546 target_dir_name = "{} - old".format(self.download_dir)
547 else:
548 target_dir_name = "{} - {}".format(self.download_dir, self.last_time.strftime(SAFE_DATETIME_FORMAT))
549 return rename_unique(self.download_dir, target_dir_name)
550
551 def _find_last_download(self, base_dir):
552 """ Look for the most recent previous download (if any) of the thing.
553 """
554 logging.info("Looking for old things")
555
556 # First the DL directory itself.
557 timestamp_file = os.path.join(self.download_dir, TIMESTAMP_FILE)
558
559 latest = None
560 latest_time = None
561
562 try:
563 logging.debug("Checking for existing download in normal place.")
564 with open(timestamp_file) as ts_fh:
565 timestamp_text = ts_fh.read().strip()
566 latest_time = datetime.datetime.strptime(timestamp_text, DEFAULT_DATETIME_FORMAT)
567 latest = self.download_dir
568 except FileNotFoundError:
569 # No existing download directory. huh.
570 pass
571 except TypeError:
572 logging.warning("Invalid timestamp file found in {}".format(self.download_dir))
573
574 # TODO: Maybe look for old download directories.
575
576 # Now look for 7z files
577 candidates = glob.glob(os.path.join(base_dir, "{}*.7z".format(self.thing_id)))
578 # +3 to allow for ' - '
579 leading_length = len(self.slug) + 3
580 for path in candidates:
581 candidate = os.path.basename(path)
582 try:
583 logging.debug("Examining '{}' - '{}'".format(candidate, candidate[leading_length:-3]))
584 candidate_time = datetime.datetime.strptime(candidate[leading_length:-3], SAFE_DATETIME_FORMAT)
585 except ValueError:
586 logging.warning("There was an error finding the date in {}. Ignoring.".format(candidate))
587 continue
588 try:
589 if candidate_time > latest_time:
590 latest_time = candidate_time
591 latest = candidate
592 except TypeError:
593 latest_time = candidate_time
594 latest = candidate
595 logging.info("Found last old thing: {} / {}".format(latest, latest_time))
596 return latest, latest_time
597
598 def download(self, base_dir, compress, api_key):
599 """ Download all files for a given thing.
600 Returns True iff the thing is now downloaded (not iff it downloads the thing!)
601 """
602 if not self._parsed:
603 self._parse(base_dir, api_key)
604
605 if not self._parsed:
606 logging.error(
607 "Unable to parse {} - aborting download".format(self.thing_id))
608 return State.FAILED
609
610 if not self._needs_download:
611 logging.info("{} - {} already downloaded - skipping.".format(self.thing_id, self.name))
612 return State.ALREADY_DOWNLOADED
613
614 if not self._file_links:
615 logging.error(
616 "{} - {} appears to have no files. Thingiverse acting up again?".format(self.thing_id, self.name))
617 return State.FAILED
618
619 # Have we already downloaded some things?
620 renamed_dir = self._handle_outdated_directory()
621
622 # Get the list of files to download
623
624 new_file_links = []
625 old_file_links = []
626 self.time_stamp = None
627
628 if not self.last_time:
629 # If we don't have anything to copy from, then it is all new.
630 logging.debug("No last time, downloading all files")
631 new_file_links = self._file_links
632 self.time_stamp = new_file_links[0].last_update
633
634 for file_link in new_file_links:
635 self.time_stamp = max(self.time_stamp, file_link.last_update)
636 logging.debug("New timestamp will be {}".format(self.time_stamp))
637 else:
638 self.time_stamp = self.last_time
639 for file_link in self._file_links:
640 if file_link.last_update > self.last_time:
641 new_file_links.append(file_link)
642 self.time_stamp = max(self.time_stamp, file_link.last_update)
643 else:
644 old_file_links.append(file_link)
645
646 logging.debug("new timestamp {}".format(self.time_stamp))
647
648 # OK. Time to get to work.
649 logging.debug("Generating download_dir")
650 os.mkdir(self.download_dir)
651 filelist_file = os.path.join(self.download_dir, "filelist.txt")
652 with open(filelist_file, 'w', encoding="utf-8") as fl_handle:
653 for fl in self._file_links:
654 fl_handle.write("{},{},{}\n".format(fl.link, fl.name, fl.last_update))
655
656 # First grab the cached files (if any)
657 logging.info("Copying {} unchanged files.".format(len(old_file_links)))
658 if renamed_dir:
659 for file_link in old_file_links:
660 try:
661 old_file = os.path.join(renamed_dir, file_link.name)
662 new_file = truncate_name(os.path.join(self.download_dir, file_link.name))
663 logging.debug("Copying {} to {}".format(old_file, new_file))
664 copyfile(old_file, new_file)
665 except FileNotFoundError:
666 logging.warning(
667 "Unable to find {} in old archive, redownloading".format(file_link.name))
668 new_file_links.append(file_link)
669 except TypeError:
670 # Not altogether sure how this could occur, possibly with some combination of the old file types
671 logging.warning(
672 "Typeerror looking for {} in {}".format(file_link.name, renamed_dir))
673 new_file_links.append(file_link)
674
675 # Now download the new ones
676 logging.info("Downloading {} new files of {}".format(
677 len(new_file_links), len(self._file_links)))
678 try:
679 for file_link in new_file_links:
680 file_name = truncate_name(os.path.join(self.download_dir, file_link.name))
681 logging.debug("Downloading {} from {} to {}".format(
682 file_link.name, file_link.link, file_name))
683 data_req = SESSION.get(file_link.link)
684 if data_req.status_code != 200:
685 logging.error("Unexpected status code {} for {}: {}".format(data_req.status_code,
686 sanitise_url(file_link.link),
687 data_req.text))
688 fail_dir(self.download_dir)
689 return State.FAILED
690
691 with open(file_name, 'wb') as handle:
692 handle.write(data_req.content)
693 except Exception as exception:
694 logging.error("Failed to download {} - {}".format(file_link.name, exception))
695 fail_dir(self.download_dir)
696 return State.FAILED
697
698 # People like images.
699 image_dir = os.path.join(self.download_dir, 'images')
700 logging.info("Downloading {} images.".format(len(self._image_links)))
701 try:
702 os.mkdir(image_dir)
703 for imagelink in self._image_links:
704 filename = os.path.join(image_dir, imagelink.name)
705 image_req = SESSION.get(imagelink.link)
706 if image_req.status_code != 200:
707 logging.error("Unexpected status code {} for {}: {}".format(image_req.status_code,
708 sanitise_url(imagelink.link),
709 image_req.text))
710 fail_dir(self.download_dir)
711 return State.FAILED
712 with open(truncate_name(filename), 'wb') as handle:
713 handle.write(image_req.content)
714 except Exception as exception:
715 logging.error("Failed to download {} - {}".format(imagelink.name, exception))
716 fail_dir(self.download_dir)
717 return State.FAILED
718
719 # Best get some licenses
720 logging.info("writing license file")
721 try:
722 if self._license:
723 with open(truncate_name(os.path.join(self.download_dir, 'license.txt')), 'w',
724 encoding="utf-8") as license_handle:
725 license_handle.write("{}\n".format(self._license))
726 except IOError as exception:
727 logging.warning("Failed to write license! {}".format(exception))
728
729 logging.info("writing readme")
730 try:
731 if self._details:
732 with open(truncate_name(os.path.join(self.download_dir, 'readme.txt')), 'w',
733 encoding="utf-8") as readme_handle:
734 readme_handle.write("{}\n".format(self._details))
735 except IOError as exception:
736 logging.warning("Failed to write readme! {}".format(exception))
737
738 try:
739 # Now write the timestamp
740 with open(os.path.join(self.download_dir, TIMESTAMP_FILE), 'w', encoding="utf-8") as timestamp_handle:
741 timestamp_handle.write(self.time_stamp.__str__())
742 except Exception as exception:
743 logging.error("Failed to write timestamp file - {}".format(exception))
744 fail_dir(self.download_dir)
745 return State.FAILED
746 self._needs_download = False
747 logging.debug("Download of {} finished".format(self.name))
748 if not compress:
749 return State.OK
750
751 thing_dir = "{} - {} - {}".format(self.thing_id,
752 slugify(self.name),
753 self.time_stamp.strftime(SAFE_DATETIME_FORMAT))
754 file_name = os.path.join(base_dir,
755 "{}.7z".format(thing_dir))
756 logging.debug("Compressing {} to {}".format(
757 self.name,
758 file_name))
759 with py7zr.SevenZipFile(file_name, 'w', filters=SEVENZIP_FILTERS) as archive:
760 archive.writeall(self.download_dir, thing_dir)
761 logging.debug("Compression of {} finished.".format(self.name))
762 shutil.rmtree(self.download_dir)
763 logging.debug("Removed temporary download dir of {}.".format(self.name))
764 return State.OK
765
766
767 def do_batch(batch_file, download_dir, quick, compress, api_key):
768 """ Read a file in line by line, parsing each as a set of calls to this script."""
769 with open(batch_file) as handle:
770 for line in handle:
771 line = line.strip()
772 if not line:
773 # Skip empty lines
774 continue
775 logging.info("Handling instruction {}".format(line))
776 command_arr = line.split()
777 if command_arr[0] == "thing":
778 logging.debug(
779 "Handling batch thing instruction: {}".format(line))
780 Thing.from_thing_id(command_arr[1]).download(download_dir, compress, api_key)
781 continue
782 if command_arr[0] == "collection":
783 logging.debug(
784 "Handling batch collection instruction: {}".format(line))
785 Collection(command_arr[1], command_arr[2],
786 download_dir, quick, compress, api_key).download()
787 continue
788 if command_arr[0] == "user":
789 logging.debug(
790 "Handling batch collection instruction: {}".format(line))
791 Designs(command_arr[1], download_dir, quick, compress, api_key).download()
792 continue
793 logging.warning("Unable to parse current instruction. Skipping.")
794
795
796 def main():
797 """ Entry point for script being run as a command. """
798 parser = argparse.ArgumentParser()
799 parser.add_argument("-l", "--log-level", choices=[
800 'debug', 'info', 'warning'], default='info', help="level of logging desired")
801 parser.add_argument("-d", "--directory",
802 help="Target directory to download into")
803 parser.add_argument("-f", "--log-file",
804 help="Place to log debug information to")
805 parser.add_argument("-q", "--quick", action="store_true",
806 help="Assume date ordering on posts")
807 parser.add_argument("-c", "--compress", action="store_true",
808 help="Compress files")
809 parser.add_argument("-a", "--api-key",
810 help="API key for thingiverse")
811
812 subparsers = parser.add_subparsers(
813 help="Type of thing to download", dest="subcommand")
814 collection_parser = subparsers.add_parser(
815 'collection', help="Download one or more entire collection(s)")
816 collection_parser.add_argument(
817 "owner", help="The owner of the collection(s) to get")
818 collection_parser.add_argument(
819 "collections", nargs="+", help="Space seperated list of the name(s) of collection to get")
820 thing_parser = subparsers.add_parser(
821 'thing', help="Download a single thing.")
822 thing_parser.add_argument(
823 "things", nargs="*", help="Space seperated list of thing ID(s) to download")
824 user_parser = subparsers.add_parser(
825 "user", help="Download all things by one or more users")
826 user_parser.add_argument(
827 "users", nargs="+", help="A space seperated list of the user(s) to get the designs of")
828 batch_parser = subparsers.add_parser(
829 "batch", help="Perform multiple actions written in a text file")
830 batch_parser.add_argument(
831 "batch_file", help="The name of the file to read.")
832 subparsers.add_parser("version", help="Show the current version")
833
834 args = parser.parse_args()
835 if not args.subcommand:
836 parser.print_help()
837 sys.exit(1)
838 if not args.directory:
839 args.directory = os.getcwd()
840
841 logger = logging.getLogger()
842 formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
843 logger.setLevel(logging.DEBUG)
844 console_handler = logging.StreamHandler()
845 console_handler.setLevel(args.log_level.upper())
846
847 if args.api_key:
848 api_key = args.api_key
849 else:
850 try:
851 with open("api.key") as fh:
852 api_key = fh.read().strip()
853 except Exception as e:
854 logging.error("Either specify the api-key on the command line or in a file called 'api.key'")
855 logging.error("Exception: {}".format(e))
856 return
857
858 logger.addHandler(console_handler)
859 if args.log_file:
860 file_handler = logging.FileHandler(args.log_file)
861 file_handler.setLevel(logging.DEBUG)
862 file_handler.setFormatter(formatter)
863 logger.addHandler(file_handler)
864
865 # Start downloader
866 thing_queue = multiprocessing.JoinableQueue()
867 logging.debug("starting {} downloader(s)".format(DOWNLOADER_COUNT))
868 downloaders = [Downloader(thing_queue, args.directory, args.compress, api_key) for _ in range(DOWNLOADER_COUNT)]
869 for downloader in downloaders:
870 downloader.start()
871
872 if args.subcommand.startswith("collection"):
873 for collection in args.collections:
874 Collection(args.owner, collection, args.directory, args.quick, args.compress, api_key).download()
875 if args.subcommand == "thing":
876 for thing in args.things:
877 thing_queue.put(thing)
878 if args.subcommand == "user":
879 for user in args.users:
880 Designs(user, args.directory, args.quick, args.compress, api_key).download()
881 if args.subcommand == "version":
882 print("thingy_grabber.py version {}".format(VERSION))
883 if args.subcommand == "batch":
884 do_batch(args.batch_file, args.directory, args.quick, args.compress, api_key)
885
886 # Stop the downloader processes
887 for _ in downloaders:
888 thing_queue.put(None)
889
890
891 if __name__ == "__main__":
892 multiprocessing.freeze_support()
893 main()