Fix global batch
[clinton/thingy_grabber.git] / thingy_grabber.py
1 #!/usr/bin/env python3
2 """
3 Thingiverse bulk downloader
4 """
5
6 import re
7 import sys
8 import os
9 import argparse
10 import unicodedata
11 import requests
12 import logging
13 import multiprocessing
14 import enum
15 import datetime
16 from shutil import copyfile
17 from dataclasses import dataclass
18 import py7zr
19 import glob
20 import shutil
21 from io import StringIO
22 from html.parser import HTMLParser
23
24 SEVENZIP_FILTERS = [{'id': py7zr.FILTER_LZMA2}]
25
26 # I don't think this is exported by datetime
27 DEFAULT_DATETIME_FORMAT = '%Y-%m-%d %H:%M:%S'
28 # Windows cannot handle : in filenames
29 SAFE_DATETIME_FORMAT = '%Y-%m-%d %H.%M.%S'
30
31 API_BASE = "https://api.thingiverse.com"
32 ACCESS_QP = "access_token={}"
33 PAGE_QP = "page={}"
34 API_USER_DESIGNS = API_BASE + "/users/{}/things/?" + ACCESS_QP
35 API_USER_COLLECTIONS = API_BASE + "/users/{}/collections/all?" + ACCESS_QP
36
37 # Currently useless as it gives the same info as the matching element in API_USER_COLLECTIONS
38 API_COLLECTION = API_BASE + "/collections/{}/?" + ACCESS_QP
39 API_COLLECTION_THINGS = API_BASE + "/collections/{}/things/?" + ACCESS_QP
40
41 API_THING_DETAILS = API_BASE + "/things/{}/?" + ACCESS_QP
42 API_THING_FILES = API_BASE + "/things/{}/files/?" + ACCESS_QP
43 API_THING_IMAGES = API_BASE + "/things/{}/images/?" + ACCESS_QP
44 API_THING_DOWNLOAD = "/download/?" + ACCESS_QP
45
46 DOWNLOADER_COUNT = 1
47 RETRY_COUNT = 3
48
49 MAX_PATH_LENGTH = 250
50
51 VERSION = "0.10.4"
52
53 TIMESTAMP_FILE = "timestamp.txt"
54
55 SESSION = requests.Session()
56
57
58 class MLStripper(HTMLParser):
59 """ Turns HTML markup into plain text
60 """
61
62 def error(self, message):
63 raise ValueError(message)
64
65 def __init__(self):
66 super().__init__()
67 self.reset()
68 self.strict = False
69 self.convert_charrefs = True
70 self.text = StringIO()
71
72 def handle_data(self, d):
73 self.text.write(d)
74
75 def get_data(self):
76 return self.text.getvalue()
77
78 @staticmethod
79 def strip_tags(html):
80 s = MLStripper()
81 s.feed(html)
82 return s.get_data()
83
84
85 @dataclass
86 class ThingLink:
87 thing_id: str
88 name: str
89 api_link: str
90
91
92 @dataclass
93 class FileLink:
94 name: str
95 last_update: datetime.datetime
96 link: str
97
98
99 @dataclass
100 class ImageLink:
101 name: str
102 link: str
103
104
105 class FileLinks:
106 def __init__(self, initial_links=None):
107 if initial_links is None:
108 initial_links = []
109 self.links = []
110 self.last_update = None
111 for link in initial_links:
112 self.append(link)
113
114 def __iter__(self):
115 return iter(self.links)
116
117 def __getitem__(self, item):
118 return self.links[item]
119
120 def __len__(self):
121 return len(self.links)
122
123 def append(self, link):
124 try:
125 self.last_update = max(self.last_update, link.last_update)
126 except TypeError:
127 self.last_update = link.last_update
128 self.links.append(link)
129
130
131 class State(enum.Enum):
132 OK = enum.auto()
133 FAILED = enum.auto()
134 ALREADY_DOWNLOADED = enum.auto()
135
136
137 def sanitise_url(url):
138 """ remove api keys from an url
139 """
140 return re.sub(r'access_token=\w*',
141 'access_token=***',
142 url)
143
144
145 def strip_time(date_obj):
146 """ Takes a datetime object and returns another with the time set to 00:00
147 """
148 return datetime.datetime.combine(date_obj.date(), datetime.time())
149
150
151 def rename_unique(dir_name, target_dir_name):
152 """ Move a directory sideways to a new name, ensuring it is unique.
153 """
154 target_dir = target_dir_name
155 inc = 0
156 while os.path.exists(target_dir):
157 target_dir = "{}_{}".format(target_dir_name, inc)
158 inc += 1
159 os.rename(dir_name, target_dir)
160 return target_dir
161
162
163 def fail_dir(dir_name):
164 """ When a download has failed, move it sideways.
165 """
166 return rename_unique(dir_name, "{}_failed".format(dir_name))
167
168
169 def truncate_name(file_name):
170 """ Ensure the filename is not too long for, well windows basically.
171 """
172 path = os.path.abspath(file_name)
173 if len(path) <= MAX_PATH_LENGTH:
174 return path
175 base, extension = os.path.splitext(path)
176 inc = 0
177 new_path = "{}_{}{}".format(base, inc, extension)
178 while os.path.exists(new_path):
179 new_path = "{}_{}{}".format(base, inc, extension)
180 inc += 1
181 return new_path
182
183
184 def slugify(value):
185 """
186 Normalise string, removes invalid for filename charactersr
187 and converts string to lowercase.
188 """
189 logging.debug("Sluggyfying {}".format(value))
190 value = unicodedata.normalize('NFKC', value).lower().strip()
191 value = re.sub(r'[\\/<>:?*|"]', '', value)
192 value = re.sub(r'\.*$', '', value)
193 return value.strip()
194
195
196 class Downloader(multiprocessing.Process):
197 """
198 Class to handle downloading the things we have found to get.
199 """
200
201 def __init__(self, thing_queue, download_directory, compress, api_key):
202 multiprocessing.Process.__init__(self)
203 # TODO: add parameters
204 self.thing_queue = thing_queue
205 self.download_directory = download_directory
206 self.compress = compress
207 self.api_key = api_key
208
209 def run(self):
210 """ actual download loop.
211 """
212 while True:
213 thing_id = self.thing_queue.get()
214 if thing_id is None:
215 logging.info("Shutting download queue")
216 self.thing_queue.task_done()
217 break
218 thing = None
219 if isinstance(thing_id, str):
220 thing = Thing.from_thing_id(thing_id)
221 if isinstance(thing_id, ThingLink):
222 thing = Thing(thing_id)
223 if not thing:
224 logging.error("Don't know how to handle thing_id {}".format(thing_id))
225 else:
226 logging.info("Handling id {}".format(thing_id))
227 thing.download(self.download_directory, self.compress, self.api_key)
228 self.thing_queue.task_done()
229 return
230
231
232 class Grouping:
233 """ Holds details of a group of things for download
234 This is effectively (although not actually) an abstract class
235 - use Collection or Designs instead.
236 """
237
238 def __init__(self, quick, compress, api_key):
239 self.things = []
240 self.total = 0
241 self.req_id = None
242 self.last_page = 0
243 self.per_page = None
244 # Should we stop downloading when we hit a known datestamp?
245 self.quick = quick
246 self.compress = compress
247 self.api_key = api_key
248 # These should be set by child classes.
249 self.url = None
250 self.download_dir = None
251
252 @property
253 def get(self):
254 """ retrieve the things of the grouping. """
255 if self.things:
256 # We've already done it.
257 return self.things
258
259 # Check for initialisation:
260 if not self.url:
261 logging.error("No URL set - object not initialised properly?")
262 raise ValueError("No URL set - object not initialised properly?")
263
264 # Get the internal details of the grouping.
265 logging.debug("Querying {}".format(sanitise_url(self.url)))
266
267 # self.url should already have been formatted as we don't need pagination
268 logging.info("requesting:{}".format(sanitise_url(self.url)))
269 current_req = SESSION.get(self.url)
270 if current_req.status_code != 200:
271 logging.error(
272 "Got unexpected code {} from url {}: {}".format(current_req.status_code, sanitise_url(self.url),
273 current_req.text))
274 else:
275 current_json = current_req.json()
276 for thing in current_json:
277 logging.info(thing)
278 self.things.append(ThingLink(thing['id'], thing['name'], thing['url']))
279 logging.info("Found {} things.".format(len(self.things)))
280 return self.things
281
282 def download(self):
283 """ Downloads all the files in a collection """
284 if not self.things:
285 self.get
286
287 if not self.download_dir:
288 raise ValueError(
289 "No download_dir set - invalidly initialised object?")
290
291 try:
292 os.mkdir(self.download_dir)
293 except FileExistsError:
294 logging.info("Target directory {} already exists. Assuming a resume."
295 .format(self.download_dir))
296 logging.info("Downloading {} thing(s).".format(self.total))
297 for idx, thing in enumerate(self.things):
298 logging.info("Downloading thing {} - {}".format(idx, thing))
299 return_code = Thing(thing).download(self.download_dir, self.compress, self.api_key)
300 if self.quick and return_code == State.ALREADY_DOWNLOADED:
301 logging.info("Caught up, stopping.")
302 return
303
304
305 class Collection(Grouping):
306 """ Holds details of a collection. """
307
308 def __init__(self, user, name, directory, quick, compress, api_key):
309 Grouping.__init__(self, quick, compress, api_key)
310 self.user = user
311 self.name = name
312 self.paginated = False
313 # need to figure out the the ID for the collection
314 collection_url = API_USER_COLLECTIONS.format(user, api_key)
315 try:
316 current_req = SESSION.get(collection_url)
317 except requests.exceptions.ConnectionError as error:
318 logging.error("Unable to connect for collections for user {}: {}".format(
319 self.user, error))
320 return
321 if current_req.status_code != 200:
322 logging.error(
323 "Got unexpected code {} from url {}: {}".format(current_req.status_code, sanitise_url(collection_url),
324 current_req.text))
325 return
326 collection_list = current_req.json()
327 try:
328 # case insensitive to retain parity with previous behaviour
329 collection = [x for x in collection_list if x['name'].casefold() == name.casefold()][0]
330 except IndexError:
331 logging.error("Unable to find collection {} for user {}".format(name, user))
332 return
333 self.collection_id = collection['id']
334 self.url = API_COLLECTION_THINGS.format(self.collection_id, api_key)
335
336 self.download_dir = os.path.join(directory,
337 "{}-{}".format(slugify(self.user), slugify(self.name)))
338
339
340 class Designs(Grouping):
341 """ Holds details of all of a users' designs. """
342
343 def __init__(self, user, directory, quick, compress, api_key):
344 Grouping.__init__(self, quick, compress, api_key)
345 self.user = user
346 self.url = API_USER_DESIGNS.format(user, api_key)
347 self.download_dir = os.path.join(
348 directory, "{} designs".format(slugify(self.user)))
349
350
351 class Thing:
352 """ An individual design on thingiverse. """
353
354 def __init__(self, thing_link):
355 self.thing_id = thing_link.thing_id
356 self.name = thing_link.name
357 self.last_time = None
358 self._parsed = False
359 self._needs_download = True
360 self.text = None
361 self.download_dir = None
362 self.time_stamp = None
363 self._file_links = FileLinks()
364 self._image_links = []
365
366 @classmethod
367 def from_thing_id(cls, thing_id):
368 """
369 Factory method that looks up a thing by ID and creates a Thing object for it
370 :param thing_id: to look up
371 :return: Thing or None
372 """
373 return Thing(ThingLink(thing_id, "", ""))
374
375 def _parse(self, base_dir, api_key):
376 """ Work out what, if anything needs to be done. """
377 if self._parsed:
378 return
379
380 # First get the broad details
381 url = API_THING_DETAILS.format(self.thing_id, api_key)
382 try:
383 current_req = SESSION.get(url)
384 except requests.exceptions.ConnectionError as error:
385 logging.error("Unable to connect for thing {}: {}".format(
386 self.thing_id, error))
387 return
388 # Check for DMCA
389 if current_req.status_code == 403:
390 logging.error("Access to thing {} is forbidden".format(self.thing_id))
391 return
392 if current_req.status_code != 200:
393 logging.error("Got unexpected code {} from url {}: {}".format(current_req.status_code, sanitise_url(url),
394 current_req.text))
395 return
396
397 thing_json = current_req.json()
398 try:
399 self._license = thing_json['license']
400 except KeyError:
401 logging.warning("No license found for thing {}?".format(self.thing_id))
402
403 details = None
404 try:
405 details = thing_json['details']
406 except KeyError:
407 logging.warning("No description found for thing {}?".format(self.thing_id))
408
409 if details:
410 try:
411 self._details = MLStripper.strip_tags(details)
412 except ValueError as e:
413 logging.warning("Unable to strip HTML from readme: {}".format(e))
414 self._details = details
415
416 if not self.name:
417 # Probably generated with factory method.
418 try:
419 self.name = thing_json['name']
420 except KeyError:
421 logging.warning("No name found for thing {}?".format(self.thing_id))
422 self.name = self.thing_id
423
424 # Now get the file details
425 file_url = API_THING_FILES.format(self.thing_id, api_key)
426
427 try:
428 current_req = SESSION.get(file_url)
429 except requests.exceptions.ConnectionError as error:
430 logging.error("Unable to connect for thing {}: {}".format(
431 self.thing_id, error))
432 return
433
434 if current_req.status_code != 200:
435 logging.error("Unexpected status code {} for {}: {}".format(current_req.status_code, sanitise_url(file_url),
436 current_req.text))
437 return
438
439 link_list = current_req.json()
440
441 if not link_list:
442 logging.error("No files found for thing {} - probably thingiverse being broken, try again later".format(
443 self.thing_id))
444
445 for link in link_list:
446 logging.debug("Parsing link: {}".format(sanitise_url(link['url'])))
447 try:
448 datestamp = datetime.datetime.strptime(link['date'], DEFAULT_DATETIME_FORMAT)
449 self._file_links.append(
450 FileLink(link['name'], datestamp, link['url'] + API_THING_DOWNLOAD.format(api_key)))
451 except ValueError:
452 logging.error(link['date'])
453
454 # Finally get the image links
455 image_url = API_THING_IMAGES.format(self.thing_id, api_key)
456
457 try:
458 current_req = SESSION.get(image_url)
459 except requests.exceptions.ConnectionError as error:
460 logging.error("Unable to connect for thing {}: {}".format(
461 self.thing_id, error))
462 return
463
464 if current_req.status_code != 200:
465 logging.error(
466 "Unexpected status code {} for {}: {}".format(current_req.status_code, sanitise_url(image_url),
467 current_req.text))
468 return
469
470 image_list = current_req.json()
471
472 if not image_list:
473 logging.warning(
474 "No images found for thing {} - probably thingiverse being iffy as this seems unlikely".format(
475 self.thing_id))
476
477 for image in image_list:
478 logging.debug("parsing image: {}".format(image))
479 name = None
480 try:
481 name = slugify(image['name'])
482 # TODO: fallback to other types
483 url = [x for x in image['sizes'] if x['type'] == 'display' and x['size'] == 'large'][0]['url']
484 except KeyError:
485 logging.warning("Missing image for {}".format(name))
486 self._image_links.append(ImageLink(name, url))
487
488 self.slug = "{} - {}".format(self.thing_id, slugify(self.name))
489 self.download_dir = os.path.join(base_dir, self.slug)
490
491 self._handle_old_directory(base_dir)
492
493 logging.debug("Parsing {} ({})".format(self.thing_id, self.name))
494 latest, self.last_time = self._find_last_download(base_dir)
495
496 if not latest:
497 # Not yet downloaded
498 self._parsed = True
499 return
500
501 logging.info("last downloaded version: {}".format(self.last_time))
502
503 # OK, so we have a timestamp, lets see if there is anything new to get
504 # First off, are we comparing an old download that threw away the timestamp?
505 ignore_time = self.last_time == strip_time(self.last_time)
506 try:
507 # TODO: Allow for comparison at the exact time
508 files_last_update = self._file_links.last_update
509 if ignore_time:
510 logging.info("Dropping time from comparison stamp as old-style download dir")
511 files_last_update = strip_time(files_last_update)
512
513 if files_last_update > self.last_time:
514 logging.info(
515 "Found new/updated files {}".format(self._file_links.last_update))
516 self._needs_download = True
517 self._parsed = True
518 return
519 except TypeError:
520 logging.warning("No files found for {}.".format(self.thing_id))
521
522 # Got here, so nope, no new files.
523 self._needs_download = False
524 self._parsed = True
525
526 def _handle_old_directory(self, base_dir):
527 """ Deal with any old directories from previous versions of the code.
528 """
529 old_dir = os.path.join(base_dir, slugify(self.name))
530 if os.path.exists(old_dir):
531 logging.warning("Found old style download_dir. Moving.")
532 rename_unique(old_dir, self.download_dir)
533
534 def _handle_outdated_directory(self):
535 """ Move the current download directory sideways if the thing has changed.
536 """
537 if not os.path.exists(self.download_dir):
538 # No old directory to move.
539 return None
540 timestamp_file = os.path.join(self.download_dir, TIMESTAMP_FILE)
541 if not os.path.exists(timestamp_file):
542 # Old form of download directory
543 target_dir_name = "{} - old".format(self.download_dir)
544 else:
545 target_dir_name = "{} - {}".format(self.download_dir, self.last_time.strftime(SAFE_DATETIME_FORMAT))
546 return rename_unique(self.download_dir, target_dir_name)
547
548 def _find_last_download(self, base_dir):
549 """ Look for the most recent previous download (if any) of the thing.
550 """
551 logging.info("Looking for old things")
552
553 # First the DL directory itself.
554 timestamp_file = os.path.join(self.download_dir, TIMESTAMP_FILE)
555
556 latest = None
557 latest_time = None
558
559 try:
560 logging.debug("Checking for existing download in normal place.")
561 with open(timestamp_file) as ts_fh:
562 timestamp_text = ts_fh.read().strip()
563 latest_time = datetime.datetime.strptime(timestamp_text, DEFAULT_DATETIME_FORMAT)
564 latest = self.download_dir
565 except FileNotFoundError:
566 # No existing download directory. huh.
567 pass
568 except TypeError:
569 logging.warning("Invalid timestamp file found in {}".format(self.download_dir))
570
571 # TODO: Maybe look for old download directories.
572
573 # Now look for 7z files
574 candidates = glob.glob(os.path.join(base_dir, "{}*.7z".format(self.thing_id)))
575 # +3 to allow for ' - '
576 leading_length = len(self.slug) + 3
577 for path in candidates:
578 candidate = os.path.basename(path)
579 try:
580 logging.debug("Examining '{}' - '{}'".format(candidate, candidate[leading_length:-3]))
581 candidate_time = datetime.datetime.strptime(candidate[leading_length:-3], SAFE_DATETIME_FORMAT)
582 except ValueError:
583 logging.warning("There was an error finding the date in {}. Ignoring.".format(candidate))
584 continue
585 try:
586 if candidate_time > latest_time:
587 latest_time = candidate_time
588 latest = candidate
589 except TypeError:
590 latest_time = candidate_time
591 latest = candidate
592 logging.info("Found last old thing: {} / {}".format(latest, latest_time))
593 return latest, latest_time
594
595 def download(self, base_dir, compress, api_key):
596 """ Download all files for a given thing.
597 Returns True iff the thing is now downloaded (not iff it downloads the thing!)
598 """
599 if not self._parsed:
600 self._parse(base_dir, api_key)
601
602 if not self._parsed:
603 logging.error(
604 "Unable to parse {} - aborting download".format(self.thing_id))
605 return State.FAILED
606
607 if not self._needs_download:
608 logging.info("{} - {} already downloaded - skipping.".format(self.thing_id, self.name))
609 return State.ALREADY_DOWNLOADED
610
611 if not self._file_links:
612 logging.error(
613 "{} - {} appears to have no files. Thingiverse acting up again?".format(self.thing_id, self.name))
614 return State.FAILED
615
616 # Have we already downloaded some things?
617 renamed_dir = self._handle_outdated_directory()
618
619 # Get the list of files to download
620
621 new_file_links = []
622 old_file_links = []
623 self.time_stamp = None
624
625 if not self.last_time:
626 # If we don't have anything to copy from, then it is all new.
627 logging.debug("No last time, downloading all files")
628 new_file_links = self._file_links
629 self.time_stamp = new_file_links[0].last_update
630
631 for file_link in new_file_links:
632 self.time_stamp = max(self.time_stamp, file_link.last_update)
633 logging.debug("New timestamp will be {}".format(self.time_stamp))
634 else:
635 self.time_stamp = self.last_time
636 for file_link in self._file_links:
637 if file_link.last_update > self.last_time:
638 new_file_links.append(file_link)
639 self.time_stamp = max(self.time_stamp, file_link.last_update)
640 else:
641 old_file_links.append(file_link)
642
643 logging.debug("new timestamp {}".format(self.time_stamp))
644
645 # OK. Time to get to work.
646 logging.debug("Generating download_dir")
647 os.mkdir(self.download_dir)
648 filelist_file = os.path.join(self.download_dir, "filelist.txt")
649 with open(filelist_file, 'w', encoding="utf-8") as fl_handle:
650 for fl in self._file_links:
651 fl_handle.write("{},{},{}\n".format(fl.link, fl.name, fl.last_update))
652
653 # First grab the cached files (if any)
654 logging.info("Copying {} unchanged files.".format(len(old_file_links)))
655 if renamed_dir:
656 for file_link in old_file_links:
657 try:
658 old_file = os.path.join(renamed_dir, file_link.name)
659 new_file = truncate_name(os.path.join(self.download_dir, file_link.name))
660 logging.debug("Copying {} to {}".format(old_file, new_file))
661 copyfile(old_file, new_file)
662 except FileNotFoundError:
663 logging.warning(
664 "Unable to find {} in old archive, redownloading".format(file_link.name))
665 new_file_links.append(file_link)
666 except TypeError:
667 # Not altogether sure how this could occur, possibly with some combination of the old file types
668 logging.warning(
669 "Typeerror looking for {} in {}".format(file_link.name, renamed_dir))
670 new_file_links.append(file_link)
671
672 # Now download the new ones
673 logging.info("Downloading {} new files of {}".format(
674 len(new_file_links), len(self._file_links)))
675 try:
676 for file_link in new_file_links:
677 file_name = truncate_name(os.path.join(self.download_dir, file_link.name))
678 logging.debug("Downloading {} from {} to {}".format(
679 file_link.name, file_link.link, file_name))
680 data_req = SESSION.get(file_link.link)
681 if data_req.status_code != 200:
682 logging.error("Unexpected status code {} for {}: {}".format(data_req.status_code,
683 sanitise_url(file_link.link),
684 data_req.text))
685 fail_dir(self.download_dir)
686 return State.FAILED
687
688 with open(file_name, 'wb') as handle:
689 handle.write(data_req.content)
690 except Exception as exception:
691 logging.error("Failed to download {} - {}".format(file_link.name, exception))
692 fail_dir(self.download_dir)
693 return State.FAILED
694
695 # People like images.
696 image_dir = os.path.join(self.download_dir, 'images')
697 logging.info("Downloading {} images.".format(len(self._image_links)))
698 try:
699 os.mkdir(image_dir)
700 for imagelink in self._image_links:
701 filename = os.path.join(image_dir, imagelink.name)
702 image_req = SESSION.get(imagelink.link)
703 if image_req.status_code != 200:
704 logging.error("Unexpected status code {} for {}: {}".format(image_req.status_code,
705 sanitise_url(imagelink.link),
706 image_req.text))
707 fail_dir(self.download_dir)
708 return State.FAILED
709 with open(truncate_name(filename), 'wb') as handle:
710 handle.write(image_req.content)
711 except Exception as exception:
712 logging.error("Failed to download {} - {}".format(imagelink.name, exception))
713 fail_dir(self.download_dir)
714 return State.FAILED
715
716 # Best get some licenses
717 logging.info("writing license file")
718 try:
719 if self._license:
720 with open(truncate_name(os.path.join(self.download_dir, 'license.txt')), 'w',
721 encoding="utf-8") as license_handle:
722 license_handle.write("{}\n".format(self._license))
723 except IOError as exception:
724 logging.warning("Failed to write license! {}".format(exception))
725
726 logging.info("writing readme")
727 try:
728 if self._details:
729 with open(truncate_name(os.path.join(self.download_dir, 'readme.txt')), 'w',
730 encoding="utf-8") as readme_handle:
731 readme_handle.write("{}\n".format(self._details))
732 except IOError as exception:
733 logging.warning("Failed to write readme! {}".format(exception))
734
735 try:
736 # Now write the timestamp
737 with open(os.path.join(self.download_dir, TIMESTAMP_FILE), 'w', encoding="utf-8") as timestamp_handle:
738 timestamp_handle.write(self.time_stamp.__str__())
739 except Exception as exception:
740 logging.error("Failed to write timestamp file - {}".format(exception))
741 fail_dir(self.download_dir)
742 return State.FAILED
743 self._needs_download = False
744 logging.debug("Download of {} finished".format(self.name))
745 if not compress:
746 return State.OK
747
748 thing_dir = "{} - {} - {}".format(self.thing_id,
749 slugify(self.name),
750 self.time_stamp.strftime(SAFE_DATETIME_FORMAT))
751 file_name = os.path.join(base_dir,
752 "{}.7z".format(thing_dir))
753 logging.debug("Compressing {} to {}".format(
754 self.name,
755 file_name))
756 with py7zr.SevenZipFile(file_name, 'w', filters=SEVENZIP_FILTERS) as archive:
757 archive.writeall(self.download_dir, thing_dir)
758 logging.debug("Compression of {} finished.".format(self.name))
759 shutil.rmtree(self.download_dir)
760 logging.debug("Removed temporary download dir of {}.".format(self.name))
761 return State.OK
762
763
764 def do_batch(batch_file, download_dir, quick, compress, api_key):
765 """ Read a file in line by line, parsing each as a set of calls to this script."""
766 with open(batch_file) as handle:
767 for line in handle:
768 line = line.strip()
769 if not line:
770 # Skip empty lines
771 continue
772 logging.info("Handling instruction {}".format(line))
773 command_arr = line.split()
774 if command_arr[0] == "thing":
775 logging.debug(
776 "Handling batch thing instruction: {}".format(line))
777 Thing.from_thing_id(command_arr[1]).download(download_dir, compress, api_key)
778 continue
779 if command_arr[0] == "collection":
780 logging.debug(
781 "Handling batch collection instruction: {}".format(line))
782 Collection(command_arr[1], command_arr[2],
783 download_dir, quick, compress, api_key).download()
784 continue
785 if command_arr[0] == "user":
786 logging.debug(
787 "Handling batch collection instruction: {}".format(line))
788 Designs(command_arr[1], download_dir, quick, compress, api_key).download()
789 continue
790 logging.warning("Unable to parse current instruction. Skipping.")
791
792
793 def main():
794 """ Entry point for script being run as a command. """
795 parser = argparse.ArgumentParser()
796 parser.add_argument("-l", "--log-level", choices=[
797 'debug', 'info', 'warning'], default='info', help="level of logging desired")
798 parser.add_argument("-d", "--directory",
799 help="Target directory to download into")
800 parser.add_argument("-f", "--log-file",
801 help="Place to log debug information to")
802 parser.add_argument("-q", "--quick", action="store_true",
803 help="Assume date ordering on posts")
804 parser.add_argument("-c", "--compress", action="store_true",
805 help="Compress files")
806 parser.add_argument("-a", "--api-key",
807 help="API key for thingiverse")
808
809 subparsers = parser.add_subparsers(
810 help="Type of thing to download", dest="subcommand")
811 collection_parser = subparsers.add_parser(
812 'collection', help="Download one or more entire collection(s)")
813 collection_parser.add_argument(
814 "owner", help="The owner of the collection(s) to get")
815 collection_parser.add_argument(
816 "collections", nargs="+", help="Space seperated list of the name(s) of collection to get")
817 thing_parser = subparsers.add_parser(
818 'thing', help="Download a single thing.")
819 thing_parser.add_argument(
820 "things", nargs="*", help="Space seperated list of thing ID(s) to download")
821 user_parser = subparsers.add_parser(
822 "user", help="Download all things by one or more users")
823 user_parser.add_argument(
824 "users", nargs="+", help="A space seperated list of the user(s) to get the designs of")
825 batch_parser = subparsers.add_parser(
826 "batch", help="Perform multiple actions written in a text file")
827 batch_parser.add_argument(
828 "batch_file", help="The name of the file to read.")
829 subparsers.add_parser("version", help="Show the current version")
830
831 args = parser.parse_args()
832 if not args.subcommand:
833 parser.print_help()
834 sys.exit(1)
835 if not args.directory:
836 args.directory = os.getcwd()
837
838 logger = logging.getLogger()
839 formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
840 logger.setLevel(logging.DEBUG)
841 console_handler = logging.StreamHandler()
842 console_handler.setLevel(args.log_level.upper())
843
844 if args.api_key:
845 api_key = args.api_key
846 else:
847 try:
848 with open("api.key") as fh:
849 api_key = fh.read().strip()
850 except Exception as e:
851 logging.error("Either specify the api-key on the command line or in a file called 'api.key'")
852 logging.error("Exception: {}".format(e))
853 return
854
855 logger.addHandler(console_handler)
856 if args.log_file:
857 file_handler = logging.FileHandler(args.log_file)
858 file_handler.setLevel(logging.DEBUG)
859 file_handler.setFormatter(formatter)
860 logger.addHandler(file_handler)
861
862 # Start downloader
863 thing_queue = multiprocessing.JoinableQueue()
864 logging.debug("starting {} downloader(s)".format(DOWNLOADER_COUNT))
865 downloaders = [Downloader(thing_queue, args.directory, args.compress, api_key) for _ in range(DOWNLOADER_COUNT)]
866 for downloader in downloaders:
867 downloader.start()
868
869 if args.subcommand.startswith("collection"):
870 for collection in args.collections:
871 Collection(args.owner, collection, args.directory, args.quick, args.compress, api_key).download()
872 if args.subcommand == "thing":
873 for thing in args.things:
874 thing_queue.put(thing)
875 if args.subcommand == "user":
876 for user in args.users:
877 Designs(user, args.directory, args.quick, args.compress, api_key).download()
878 if args.subcommand == "version":
879 print("thingy_grabber.py version {}".format(VERSION))
880 if args.subcommand == "batch":
881 do_batch(args.batch_file, args.directory, args.quick, args.compress, api_key)
882
883 # Stop the downloader processes
884 for _ in downloaders:
885 thing_queue.put(None)
886
887
888 if __name__ == "__main__":
889 multiprocessing.freeze_support()
890 main()