Only output file download error text when logging is turned up.
[clinton/thingy_grabber.git] / thingy_grabber.py
1 #!/usr/bin/env python3
2 """
3 Thingiverse bulk downloader
4 """
5
6 import re
7 import sys
8 import os
9 import argparse
10 import unicodedata
11 import requests
12 import logging
13 import multiprocessing
14 import enum
15 import datetime
16 from shutil import copyfile
17 from dataclasses import dataclass
18 import py7zr
19 import glob
20 import shutil
21 from io import StringIO
22 from html.parser import HTMLParser
23
24 SEVENZIP_FILTERS = [{'id': py7zr.FILTER_LZMA2}]
25
26 # I don't think this is exported by datetime
27 DEFAULT_DATETIME_FORMAT = '%Y-%m-%d %H:%M:%S'
28 # Windows cannot handle : in filenames
29 SAFE_DATETIME_FORMAT = '%Y-%m-%d %H.%M.%S'
30
31 API_BASE = "https://api.thingiverse.com"
32 ACCESS_QP = "access_token={}"
33 PAGE_QP = "page={}"
34 API_USER_DESIGNS = API_BASE + "/users/{}/things/?" + ACCESS_QP
35 API_USER_COLLECTIONS = API_BASE + "/users/{}/collections/all?" + ACCESS_QP
36
37 # Currently useless as it gives the same info as the matching element in API_USER_COLLECTIONS
38 API_COLLECTION = API_BASE + "/collections/{}/?" + ACCESS_QP
39 API_COLLECTION_THINGS = API_BASE + "/collections/{}/things/?" + ACCESS_QP
40
41 API_THING_DETAILS = API_BASE + "/things/{}/?" + ACCESS_QP
42 API_THING_FILES = API_BASE + "/things/{}/files/?" + ACCESS_QP
43 API_THING_IMAGES = API_BASE + "/things/{}/images/?" + ACCESS_QP
44 API_THING_DOWNLOAD = "/download/?" + ACCESS_QP
45
46 DOWNLOADER_COUNT = 1
47 RETRY_COUNT = 3
48
49 MAX_PATH_LENGTH = 250
50
51 VERSION = "0.10.5"
52
53 TIMESTAMP_FILE = "timestamp.txt"
54
55 SESSION = requests.Session()
56
57
58 class MLStripper(HTMLParser):
59 """ Turns HTML markup into plain text
60 """
61
62 def error(self, message):
63 raise ValueError(message)
64
65 def __init__(self):
66 super().__init__()
67 self.reset()
68 self.strict = False
69 self.convert_charrefs = True
70 self.text = StringIO()
71
72 def handle_data(self, d):
73 self.text.write(d)
74
75 def get_data(self):
76 return self.text.getvalue()
77
78 @staticmethod
79 def strip_tags(html):
80 s = MLStripper()
81 s.feed(html)
82 return s.get_data()
83
84
85 @dataclass
86 class ThingLink:
87 thing_id: str
88 name: str
89 api_link: str
90
91
92 @dataclass
93 class FileLink:
94 name: str
95 last_update: datetime.datetime
96 link: str
97
98
99 @dataclass
100 class ImageLink:
101 name: str
102 link: str
103
104
105 class FileLinks:
106 def __init__(self, initial_links=None):
107 if initial_links is None:
108 initial_links = []
109 self.links = []
110 self.last_update = None
111 for link in initial_links:
112 self.append(link)
113
114 def __iter__(self):
115 return iter(self.links)
116
117 def __getitem__(self, item):
118 return self.links[item]
119
120 def __len__(self):
121 return len(self.links)
122
123 def append(self, link):
124 try:
125 self.last_update = max(self.last_update, link.last_update)
126 except TypeError:
127 self.last_update = link.last_update
128 self.links.append(link)
129
130
131 class State(enum.Enum):
132 OK = enum.auto()
133 FAILED = enum.auto()
134 ALREADY_DOWNLOADED = enum.auto()
135
136
137 def sanitise_url(url):
138 """ remove api keys from an url
139 """
140 return re.sub(r'access_token=\w*',
141 'access_token=***',
142 url)
143
144
145 def strip_time(date_obj):
146 """ Takes a datetime object and returns another with the time set to 00:00
147 """
148 return datetime.datetime.combine(date_obj.date(), datetime.time())
149
150
151 def rename_unique(dir_name, target_dir_name):
152 """ Move a directory sideways to a new name, ensuring it is unique.
153 """
154 target_dir = target_dir_name
155 inc = 0
156 while os.path.exists(target_dir):
157 target_dir = "{}_{}".format(target_dir_name, inc)
158 inc += 1
159 os.rename(dir_name, target_dir)
160 return target_dir
161
162
163 def fail_dir(dir_name):
164 """ When a download has failed, move it sideways.
165 """
166 return rename_unique(dir_name, "{}_failed".format(dir_name))
167
168
169 def truncate_name(file_name):
170 """ Ensure the filename is not too long for, well windows basically.
171 """
172 path = os.path.abspath(file_name)
173 if len(path) <= MAX_PATH_LENGTH:
174 return path
175 base, extension = os.path.splitext(path)
176 inc = 0
177 new_path = "{}_{}{}".format(base, inc, extension)
178 while os.path.exists(new_path):
179 new_path = "{}_{}{}".format(base, inc, extension)
180 inc += 1
181 return new_path
182
183
184 def slugify(value):
185 """
186 Normalise string, removes invalid for filename charactersr
187 and converts string to lowercase.
188 """
189 logging.debug("Sluggyfying {}".format(value))
190 value = unicodedata.normalize('NFKC', value).lower().strip()
191 value = re.sub(r'[\\/<>:?*|"]', '', value)
192 value = re.sub(r'\.*$', '', value)
193 return value.strip()
194
195
196 class Downloader(multiprocessing.Process):
197 """
198 Class to handle downloading the things we have found to get.
199 """
200
201 def __init__(self, thing_queue, download_directory, compress, api_key):
202 multiprocessing.Process.__init__(self)
203 # TODO: add parameters
204 self.thing_queue = thing_queue
205 self.download_directory = download_directory
206 self.compress = compress
207 self.api_key = api_key
208
209 def run(self):
210 """ actual download loop.
211 """
212 while True:
213 thing_id = self.thing_queue.get()
214 if thing_id is None:
215 logging.info("Shutting download queue")
216 self.thing_queue.task_done()
217 break
218 thing = None
219 if isinstance(thing_id, str):
220 thing = Thing.from_thing_id(thing_id)
221 if isinstance(thing_id, ThingLink):
222 thing = Thing(thing_id)
223 if not thing:
224 logging.error("Don't know how to handle thing_id {}".format(thing_id))
225 else:
226 logging.info("Handling id {}".format(thing_id))
227 thing.download(self.download_directory, self.compress, self.api_key)
228 self.thing_queue.task_done()
229 return
230
231
232 class Grouping:
233 """ Holds details of a group of things for download
234 This is effectively (although not actually) an abstract class
235 - use Collection or Designs instead.
236 """
237
238 def __init__(self, quick, compress, api_key):
239 self.things = []
240 self.total = 0
241 self.req_id = None
242 self.last_page = 0
243 self.per_page = None
244 # Should we stop downloading when we hit a known datestamp?
245 self.quick = quick
246 self.compress = compress
247 self.api_key = api_key
248 # These should be set by child classes.
249 self.url = None
250 self.download_dir = None
251
252 @property
253 def get(self):
254 """ retrieve the things of the grouping. """
255 if self.things:
256 # We've already done it.
257 return self.things
258
259 # Check for initialisation:
260 if not self.url:
261 logging.error("No URL set - object not initialised properly?")
262 raise ValueError("No URL set - object not initialised properly?")
263
264 # Get the internal details of the grouping.
265 logging.debug("Querying {}".format(sanitise_url(self.url)))
266
267 # follow next links until all items are found
268 current_url = self.url
269 while current_url != None:
270 logging.info("requesting:{}".format(sanitise_url(current_url)))
271 current_req = SESSION.get(current_url)
272 current_url = current_req.links.get('next', {}).get('url')
273 if current_req.status_code != 200:
274 logging.error(
275 "Got unexpected code {} from url {}: {}".format(current_req.status_code, sanitise_url(self.url),
276 current_req.text))
277 else:
278 current_json = current_req.json()
279 for thing in current_json:
280 logging.debug(thing)
281 self.things.append(ThingLink(thing['id'], thing['name'], thing['url']))
282 logging.info("Found {} things.".format(len(self.things)))
283 return self.things
284
285 def download(self):
286 """ Downloads all the files in a collection """
287 if not self.things:
288 self.get
289
290 if not self.download_dir:
291 raise ValueError(
292 "No download_dir set - invalidly initialised object?")
293
294 try:
295 os.mkdir(self.download_dir)
296 except FileExistsError:
297 logging.info("Target directory {} already exists. Assuming a resume."
298 .format(self.download_dir))
299 logging.info("Downloading {} thing(s).".format(self.total))
300 for idx, thing in enumerate(self.things):
301 logging.info("Downloading thing {} - {}".format(idx, thing))
302 return_code = Thing(thing).download(self.download_dir, self.compress, self.api_key)
303 if self.quick and return_code == State.ALREADY_DOWNLOADED:
304 logging.info("Caught up, stopping.")
305 return
306
307
308 class Collection(Grouping):
309 """ Holds details of a collection. """
310
311 def __init__(self, user, name, directory, quick, compress, api_key):
312 Grouping.__init__(self, quick, compress, api_key)
313 self.user = user
314 self.name = name
315 self.paginated = False
316 # need to figure out the the ID for the collection
317 collection_url = API_USER_COLLECTIONS.format(user, api_key)
318 try:
319 current_req = SESSION.get(collection_url)
320 except requests.exceptions.ConnectionError as error:
321 logging.error("Unable to connect for collections for user {}: {}".format(
322 self.user, error))
323 return
324 if current_req.status_code != 200:
325 logging.error(
326 "Got unexpected code {} from url {}: {}".format(current_req.status_code, sanitise_url(collection_url),
327 current_req.text))
328 return
329 collection_list = current_req.json()
330 try:
331 # case insensitive to retain parity with previous behaviour
332 collection = [x for x in collection_list if x['name'].casefold() == name.casefold()][0]
333 except IndexError:
334 logging.error("Unable to find collection {} for user {}".format(name, user))
335 return
336 self.collection_id = collection['id']
337 self.url = API_COLLECTION_THINGS.format(self.collection_id, api_key)
338
339 self.download_dir = os.path.join(directory,
340 "{}-{}".format(slugify(self.user), slugify(self.name)))
341
342
343 class Designs(Grouping):
344 """ Holds details of all of a users' designs. """
345
346 def __init__(self, user, directory, quick, compress, api_key):
347 Grouping.__init__(self, quick, compress, api_key)
348 self.user = user
349 self.url = API_USER_DESIGNS.format(user, api_key)
350 self.download_dir = os.path.join(
351 directory, "{} designs".format(slugify(self.user)))
352
353
354 class Thing:
355 """ An individual design on thingiverse. """
356
357 def __init__(self, thing_link):
358 self.thing_id = thing_link.thing_id
359 self.name = thing_link.name
360 self.last_time = None
361 self._parsed = False
362 self._needs_download = True
363 self.text = None
364 self.download_dir = None
365 self.time_stamp = None
366 self._file_links = FileLinks()
367 self._image_links = []
368
369 @classmethod
370 def from_thing_id(cls, thing_id):
371 """
372 Factory method that looks up a thing by ID and creates a Thing object for it
373 :param thing_id: to look up
374 :return: Thing or None
375 """
376 return Thing(ThingLink(thing_id, "", ""))
377
378 def _parse(self, base_dir, api_key):
379 """ Work out what, if anything needs to be done. """
380 if self._parsed:
381 return
382
383 # First get the broad details
384 url = API_THING_DETAILS.format(self.thing_id, api_key)
385 try:
386 current_req = SESSION.get(url)
387 except requests.exceptions.ConnectionError as error:
388 logging.error("Unable to connect for thing {}: {}".format(
389 self.thing_id, error))
390 return
391 # Check for DMCA
392 if current_req.status_code == 403:
393 logging.error("Access to thing {} is forbidden".format(self.thing_id))
394 return
395 if current_req.status_code != 200:
396 logging.error("Got unexpected code {} from url {}: {}".format(current_req.status_code, sanitise_url(url),
397 current_req.text))
398 return
399
400 thing_json = current_req.json()
401 try:
402 self._license = thing_json['license']
403 except KeyError:
404 logging.warning("No license found for thing {}?".format(self.thing_id))
405
406 details = None
407 try:
408 details = thing_json['details']
409 except KeyError:
410 logging.warning("No description found for thing {}?".format(self.thing_id))
411
412 if details:
413 try:
414 self._details = MLStripper.strip_tags(details)
415 except ValueError as e:
416 logging.warning("Unable to strip HTML from readme: {}".format(e))
417 self._details = details
418
419 if not self.name:
420 # Probably generated with factory method.
421 try:
422 self.name = thing_json['name']
423 except KeyError:
424 logging.warning("No name found for thing {}?".format(self.thing_id))
425 self.name = self.thing_id
426
427 # Now get the file details
428 file_url = API_THING_FILES.format(self.thing_id, api_key)
429
430 try:
431 current_req = SESSION.get(file_url)
432 except requests.exceptions.ConnectionError as error:
433 logging.error("Unable to connect for thing {}: {}".format(
434 self.thing_id, error))
435 return
436
437 if current_req.status_code != 200:
438 logging.error("Unexpected status code {} for {}: {}".format(current_req.status_code, sanitise_url(file_url),
439 current_req.text))
440 return
441
442 link_list = current_req.json()
443
444 if not link_list:
445 logging.error("No files found for thing {} - probably thingiverse being broken, try again later".format(
446 self.thing_id))
447
448 for link in link_list:
449 logging.debug("Parsing link: {}".format(sanitise_url(link['url'])))
450 try:
451 datestamp = datetime.datetime.strptime(link['date'], DEFAULT_DATETIME_FORMAT)
452 self._file_links.append(
453 FileLink(link['name'], datestamp, link['url'] + API_THING_DOWNLOAD.format(api_key)))
454 except ValueError:
455 logging.error(link['date'])
456
457 # Finally get the image links
458 image_url = API_THING_IMAGES.format(self.thing_id, api_key)
459
460 try:
461 current_req = SESSION.get(image_url)
462 except requests.exceptions.ConnectionError as error:
463 logging.error("Unable to connect for thing {}: {}".format(
464 self.thing_id, error))
465 return
466
467 if current_req.status_code != 200:
468 logging.error(
469 "Unexpected status code {} for {}: {}".format(current_req.status_code, sanitise_url(image_url),
470 current_req.text))
471 return
472
473 image_list = current_req.json()
474
475 if not image_list:
476 logging.warning(
477 "No images found for thing {} - probably thingiverse being iffy as this seems unlikely".format(
478 self.thing_id))
479
480 for image in image_list:
481 logging.debug("parsing image: {}".format(image))
482 name = None
483 try:
484 name = slugify(image['name'])
485 # TODO: fallback to other types
486 url = [x for x in image['sizes'] if x['type'] == 'display' and x['size'] == 'large'][0]['url']
487 except KeyError:
488 logging.warning("Missing image for {}".format(name))
489 self._image_links.append(ImageLink(name, url))
490
491 self.slug = "{} - {}".format(self.thing_id, slugify(self.name))
492 self.download_dir = os.path.join(base_dir, self.slug)
493
494 self._handle_old_directory(base_dir)
495
496 logging.debug("Parsing {} ({})".format(self.thing_id, self.name))
497 latest, self.last_time = self._find_last_download(base_dir)
498
499 if not latest:
500 # Not yet downloaded
501 self._parsed = True
502 return
503
504 logging.info("last downloaded version: {}".format(self.last_time))
505
506 # OK, so we have a timestamp, lets see if there is anything new to get
507 # First off, are we comparing an old download that threw away the timestamp?
508 ignore_time = self.last_time == strip_time(self.last_time)
509 try:
510 # TODO: Allow for comparison at the exact time
511 files_last_update = self._file_links.last_update
512 if ignore_time:
513 logging.info("Dropping time from comparison stamp as old-style download dir")
514 files_last_update = strip_time(files_last_update)
515
516 if files_last_update > self.last_time:
517 logging.info(
518 "Found new/updated files {}".format(self._file_links.last_update))
519 self._needs_download = True
520 self._parsed = True
521 return
522 except TypeError:
523 logging.warning("No files found for {}.".format(self.thing_id))
524
525 # Got here, so nope, no new files.
526 self._needs_download = False
527 self._parsed = True
528
529 def _handle_old_directory(self, base_dir):
530 """ Deal with any old directories from previous versions of the code.
531 """
532 old_dir = os.path.join(base_dir, slugify(self.name))
533 if os.path.exists(old_dir):
534 logging.warning("Found old style download_dir. Moving.")
535 rename_unique(old_dir, self.download_dir)
536
537 def _handle_outdated_directory(self):
538 """ Move the current download directory sideways if the thing has changed.
539 """
540 if not os.path.exists(self.download_dir):
541 # No old directory to move.
542 return None
543 timestamp_file = os.path.join(self.download_dir, TIMESTAMP_FILE)
544 if not os.path.exists(timestamp_file):
545 # Old form of download directory
546 target_dir_name = "{} - old".format(self.download_dir)
547 else:
548 target_dir_name = "{} - {}".format(self.download_dir, self.last_time.strftime(SAFE_DATETIME_FORMAT))
549 return rename_unique(self.download_dir, target_dir_name)
550
551 def _find_last_download(self, base_dir):
552 """ Look for the most recent previous download (if any) of the thing.
553 """
554 logging.info("Looking for old things")
555
556 # First the DL directory itself.
557 timestamp_file = os.path.join(self.download_dir, TIMESTAMP_FILE)
558
559 latest = None
560 latest_time = None
561
562 try:
563 logging.debug("Checking for existing download in normal place.")
564 with open(timestamp_file) as ts_fh:
565 timestamp_text = ts_fh.read().strip()
566 latest_time = datetime.datetime.strptime(timestamp_text, DEFAULT_DATETIME_FORMAT)
567 latest = self.download_dir
568 except FileNotFoundError:
569 # No existing download directory. huh.
570 pass
571 except TypeError:
572 logging.warning("Invalid timestamp file found in {}".format(self.download_dir))
573
574 # TODO: Maybe look for old download directories.
575
576 # Now look for 7z files
577 candidates = glob.glob(os.path.join(base_dir, "{}*.7z".format(self.thing_id)))
578 # +3 to allow for ' - '
579 leading_length = len(self.slug) + 3
580 for path in candidates:
581 candidate = os.path.basename(path)
582 try:
583 logging.debug("Examining '{}' - '{}'".format(candidate, candidate[leading_length:-3]))
584 candidate_time = datetime.datetime.strptime(candidate[leading_length:-3], SAFE_DATETIME_FORMAT)
585 except ValueError:
586 logging.warning("There was an error finding the date in {}. Ignoring.".format(candidate))
587 continue
588 try:
589 if candidate_time > latest_time:
590 latest_time = candidate_time
591 latest = candidate
592 except TypeError:
593 latest_time = candidate_time
594 latest = candidate
595 logging.info("Found last old thing: {} / {}".format(latest, latest_time))
596 return latest, latest_time
597
598 def download(self, base_dir, compress, api_key):
599 """ Download all files for a given thing.
600 Returns True iff the thing is now downloaded (not iff it downloads the thing!)
601 """
602 if not self._parsed:
603 self._parse(base_dir, api_key)
604
605 if not self._parsed:
606 logging.error(
607 "Unable to parse {} - aborting download".format(self.thing_id))
608 return State.FAILED
609
610 if not self._needs_download:
611 logging.info("{} - {} already downloaded - skipping.".format(self.thing_id, self.name))
612 return State.ALREADY_DOWNLOADED
613
614 if not self._file_links:
615 logging.error(
616 "{} - {} appears to have no files. Thingiverse acting up again?".format(self.thing_id, self.name))
617 return State.FAILED
618
619 # Have we already downloaded some things?
620 renamed_dir = self._handle_outdated_directory()
621
622 # Get the list of files to download
623
624 new_file_links = []
625 old_file_links = []
626 self.time_stamp = None
627
628 if not self.last_time:
629 # If we don't have anything to copy from, then it is all new.
630 logging.debug("No last time, downloading all files")
631 new_file_links = self._file_links
632 self.time_stamp = new_file_links[0].last_update
633
634 for file_link in new_file_links:
635 self.time_stamp = max(self.time_stamp, file_link.last_update)
636 logging.debug("New timestamp will be {}".format(self.time_stamp))
637 else:
638 self.time_stamp = self.last_time
639 for file_link in self._file_links:
640 if file_link.last_update > self.last_time:
641 new_file_links.append(file_link)
642 self.time_stamp = max(self.time_stamp, file_link.last_update)
643 else:
644 old_file_links.append(file_link)
645
646 logging.debug("new timestamp {}".format(self.time_stamp))
647
648 # OK. Time to get to work.
649 logging.debug("Generating download_dir")
650 os.mkdir(self.download_dir)
651 filelist_file = os.path.join(self.download_dir, "filelist.txt")
652 with open(filelist_file, 'w', encoding="utf-8") as fl_handle:
653 for fl in self._file_links:
654 fl_handle.write("{},{},{}\n".format(fl.link, fl.name, fl.last_update))
655
656 # First grab the cached files (if any)
657 logging.info("Copying {} unchanged files.".format(len(old_file_links)))
658 if renamed_dir:
659 for file_link in old_file_links:
660 try:
661 old_file = os.path.join(renamed_dir, file_link.name)
662 new_file = truncate_name(os.path.join(self.download_dir, file_link.name))
663 logging.debug("Copying {} to {}".format(old_file, new_file))
664 copyfile(old_file, new_file)
665 except FileNotFoundError:
666 logging.warning(
667 "Unable to find {} in old archive, redownloading".format(file_link.name))
668 new_file_links.append(file_link)
669 except TypeError:
670 # Not altogether sure how this could occur, possibly with some combination of the old file types
671 logging.warning(
672 "Typeerror looking for {} in {}".format(file_link.name, renamed_dir))
673 new_file_links.append(file_link)
674
675 # Now download the new ones
676 logging.info("Downloading {} new files of {}".format(
677 len(new_file_links), len(self._file_links)))
678 try:
679 for file_link in new_file_links:
680 file_name = truncate_name(os.path.join(self.download_dir, file_link.name))
681 logging.debug("Downloading {} from {} to {}".format(
682 file_link.name, file_link.link, file_name))
683 data_req = SESSION.get(file_link.link)
684 if data_req.status_code != 200:
685 logging.error("Unexpected status code {} for {}".format(data_req.status_code,
686 sanitise_url(file_link.link)))
687 logging.debug("Unexpected status code {} for {}: {}".format(data_req.status_code,
688 sanitise_url(file_link.link),
689 data_req.text))
690 fail_dir(self.download_dir)
691 return State.FAILED
692
693 with open(file_name, 'wb') as handle:
694 handle.write(data_req.content)
695 except Exception as exception:
696 logging.error("Failed to download {} - {}".format(file_link.name, exception))
697 fail_dir(self.download_dir)
698 return State.FAILED
699
700 # People like images.
701 image_dir = os.path.join(self.download_dir, 'images')
702 logging.info("Downloading {} images.".format(len(self._image_links)))
703 try:
704 os.mkdir(image_dir)
705 for imagelink in self._image_links:
706 filename = os.path.join(image_dir, imagelink.name)
707 image_req = SESSION.get(imagelink.link)
708 if image_req.status_code != 200:
709 logging.error("Unexpected status code {} for {}: {}".format(image_req.status_code,
710 sanitise_url(imagelink.link),
711 image_req.text))
712 fail_dir(self.download_dir)
713 return State.FAILED
714 with open(truncate_name(filename), 'wb') as handle:
715 handle.write(image_req.content)
716 except Exception as exception:
717 logging.error("Failed to download {} - {}".format(imagelink.name, exception))
718 fail_dir(self.download_dir)
719 return State.FAILED
720
721 # Best get some licenses
722 logging.info("writing license file")
723 try:
724 if self._license:
725 with open(truncate_name(os.path.join(self.download_dir, 'license.txt')), 'w',
726 encoding="utf-8") as license_handle:
727 license_handle.write("{}\n".format(self._license))
728 except IOError as exception:
729 logging.warning("Failed to write license! {}".format(exception))
730
731 logging.info("writing readme")
732 try:
733 if self._details:
734 with open(truncate_name(os.path.join(self.download_dir, 'readme.txt')), 'w',
735 encoding="utf-8") as readme_handle:
736 readme_handle.write("{}\n".format(self._details))
737 except IOError as exception:
738 logging.warning("Failed to write readme! {}".format(exception))
739
740 try:
741 # Now write the timestamp
742 with open(os.path.join(self.download_dir, TIMESTAMP_FILE), 'w', encoding="utf-8") as timestamp_handle:
743 timestamp_handle.write(self.time_stamp.__str__())
744 except Exception as exception:
745 logging.error("Failed to write timestamp file - {}".format(exception))
746 fail_dir(self.download_dir)
747 return State.FAILED
748 self._needs_download = False
749 logging.debug("Download of {} finished".format(self.name))
750 if not compress:
751 return State.OK
752
753 thing_dir = "{} - {} - {}".format(self.thing_id,
754 slugify(self.name),
755 self.time_stamp.strftime(SAFE_DATETIME_FORMAT))
756 file_name = os.path.join(base_dir,
757 "{}.7z".format(thing_dir))
758 logging.debug("Compressing {} to {}".format(
759 self.name,
760 file_name))
761 with py7zr.SevenZipFile(file_name, 'w', filters=SEVENZIP_FILTERS) as archive:
762 archive.writeall(self.download_dir, thing_dir)
763 logging.debug("Compression of {} finished.".format(self.name))
764 shutil.rmtree(self.download_dir)
765 logging.debug("Removed temporary download dir of {}.".format(self.name))
766 return State.OK
767
768
769 def do_batch(batch_file, download_dir, quick, compress, api_key):
770 """ Read a file in line by line, parsing each as a set of calls to this script."""
771 with open(batch_file) as handle:
772 for line in handle:
773 line = line.strip()
774 if not line:
775 # Skip empty lines
776 continue
777 logging.info("Handling instruction {}".format(line))
778 command_arr = line.split()
779 if command_arr[0] == "thing":
780 logging.debug(
781 "Handling batch thing instruction: {}".format(line))
782 Thing.from_thing_id(command_arr[1]).download(download_dir, compress, api_key)
783 continue
784 if command_arr[0] == "collection":
785 logging.debug(
786 "Handling batch collection instruction: {}".format(line))
787 Collection(command_arr[1], command_arr[2],
788 download_dir, quick, compress, api_key).download()
789 continue
790 if command_arr[0] == "user":
791 logging.debug(
792 "Handling batch collection instruction: {}".format(line))
793 Designs(command_arr[1], download_dir, quick, compress, api_key).download()
794 continue
795 logging.warning("Unable to parse current instruction. Skipping.")
796
797
798 def main():
799 """ Entry point for script being run as a command. """
800 parser = argparse.ArgumentParser()
801 parser.add_argument("-l", "--log-level", choices=[
802 'debug', 'info', 'warning'], default='info', help="level of logging desired")
803 parser.add_argument("-d", "--directory",
804 help="Target directory to download into")
805 parser.add_argument("-f", "--log-file",
806 help="Place to log debug information to")
807 parser.add_argument("-q", "--quick", action="store_true",
808 help="Assume date ordering on posts")
809 parser.add_argument("-c", "--compress", action="store_true",
810 help="Compress files")
811 parser.add_argument("-a", "--api-key",
812 help="API key for thingiverse")
813
814 subparsers = parser.add_subparsers(
815 help="Type of thing to download", dest="subcommand")
816 collection_parser = subparsers.add_parser(
817 'collection', help="Download one or more entire collection(s)")
818 collection_parser.add_argument(
819 "owner", help="The owner of the collection(s) to get")
820 collection_parser.add_argument(
821 "collections", nargs="+", help="Space seperated list of the name(s) of collection to get")
822 thing_parser = subparsers.add_parser(
823 'thing', help="Download a single thing.")
824 thing_parser.add_argument(
825 "things", nargs="*", help="Space seperated list of thing ID(s) to download")
826 user_parser = subparsers.add_parser(
827 "user", help="Download all things by one or more users")
828 user_parser.add_argument(
829 "users", nargs="+", help="A space seperated list of the user(s) to get the designs of")
830 batch_parser = subparsers.add_parser(
831 "batch", help="Perform multiple actions written in a text file")
832 batch_parser.add_argument(
833 "batch_file", help="The name of the file to read.")
834 subparsers.add_parser("version", help="Show the current version")
835
836 args = parser.parse_args()
837 if not args.subcommand:
838 parser.print_help()
839 sys.exit(1)
840 if not args.directory:
841 args.directory = os.getcwd()
842
843 logger = logging.getLogger()
844 formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
845 logger.setLevel(logging.DEBUG)
846 console_handler = logging.StreamHandler()
847 console_handler.setLevel(args.log_level.upper())
848
849 if args.api_key:
850 api_key = args.api_key
851 else:
852 try:
853 with open("api.key") as fh:
854 api_key = fh.read().strip()
855 except Exception as e:
856 logging.error("Either specify the api-key on the command line or in a file called 'api.key'")
857 logging.error("Exception: {}".format(e))
858 return
859
860 logger.addHandler(console_handler)
861 if args.log_file:
862 file_handler = logging.FileHandler(args.log_file)
863 file_handler.setLevel(logging.DEBUG)
864 file_handler.setFormatter(formatter)
865 logger.addHandler(file_handler)
866
867 # Start downloader
868 thing_queue = multiprocessing.JoinableQueue()
869 logging.debug("starting {} downloader(s)".format(DOWNLOADER_COUNT))
870 downloaders = [Downloader(thing_queue, args.directory, args.compress, api_key) for _ in range(DOWNLOADER_COUNT)]
871 for downloader in downloaders:
872 downloader.start()
873
874 if args.subcommand.startswith("collection"):
875 for collection in args.collections:
876 Collection(args.owner, collection, args.directory, args.quick, args.compress, api_key).download()
877 if args.subcommand == "thing":
878 for thing in args.things:
879 thing_queue.put(thing)
880 if args.subcommand == "user":
881 for user in args.users:
882 Designs(user, args.directory, args.quick, args.compress, api_key).download()
883 if args.subcommand == "version":
884 print("thingy_grabber.py version {}".format(VERSION))
885 if args.subcommand == "batch":
886 do_batch(args.batch_file, args.directory, args.quick, args.compress, api_key)
887
888 # Stop the downloader processes
889 for _ in downloaders:
890 thing_queue.put(None)
891
892
893 if __name__ == "__main__":
894 multiprocessing.freeze_support()
895 main()