Api (#14)
[clinton/thingy_grabber.git] / thingy_grabber.py
1 #!/usr/bin/env python3
2 """
3 Thingiverse bulk downloader
4 """
5
6 import re
7 import sys
8 import os
9 import argparse
10 import unicodedata
11 import requests
12 import logging
13 import multiprocessing
14 import enum
15 import datetime
16 from shutil import copyfile
17 from dataclasses import dataclass
18 import atexit
19 import py7zr
20 import glob
21 import shutil
22
23 SEVENZIP_FILTERS = [{'id': py7zr.FILTER_LZMA2}]
24
25 # I don't think this is exported by datetime
26 DEFAULT_DATETIME_FORMAT = '%Y-%m-%d %H:%M:%S'
27 # Windows cannot handle : in filenames
28 SAFE_DATETIME_FORMAT = '%Y-%m-%d %H.%M.%S'
29
30 API_BASE="https://api.thingiverse.com"
31 ACCESS_QP="access_token={}"
32 PAGE_QP="page={}"
33 API_USER_DESIGNS = API_BASE + "/users/{}/things/"
34 API_USER_COLLECTIONS = API_BASE + "/users/{}/collections/all?" + ACCESS_QP
35
36 # Currently useless as it gives the same info as the matching element in API_USER_COLLECTIONS
37 API_COLLECTION = API_BASE + "/collections/{}/?" + ACCESS_QP
38 API_COLLECTION_THINGS = API_BASE + "/collections/{}/things/?" + ACCESS_QP
39
40 API_THING_DETAILS = API_BASE + "/things/{}/?" + ACCESS_QP
41 API_THING_FILES = API_BASE + "/things/{}/files/?" + ACCESS_QP
42 API_THING_IMAGES = API_BASE + "/things/{}/images/?" + ACCESS_QP
43
44 API_KEY = None
45
46 DOWNLOADER_COUNT = 1
47 RETRY_COUNT = 3
48
49 MAX_PATH_LENGTH = 250
50
51 VERSION = "0.10.0"
52
53 TIMESTAMP_FILE = "timestamp.txt"
54
55 SESSION = requests.Session()
56
57 @dataclass
58 class ThingLink:
59 thing_id: str
60 name: str
61 api_link: str
62
63 @dataclass
64 class FileLink:
65 name: str
66 last_update: datetime.datetime
67 link: str
68
69 @dataclass
70 class ImageLink:
71 name: str
72 link: str
73
74 class FileLinks:
75 def __init__(self, initial_links=[]):
76 self.links = []
77 self.last_update = None
78 for link in initial_links:
79 self.append(link)
80
81 def __iter__(self):
82 return iter(self.links)
83
84 def __getitem__(self, item):
85 return self.links[item]
86
87 def __len__(self):
88 return len(self.links)
89
90 def append(self, link):
91 try:
92 self.last_update = max(self.last_update, link.last_update)
93 except TypeError:
94 self.last_update = link.last_update
95 self.links.append(link)
96
97
98 class State(enum.Enum):
99 OK = enum.auto()
100 FAILED = enum.auto()
101 ALREADY_DOWNLOADED = enum.auto()
102
103 def sanitise_url(url):
104 """ remove api keys from an url
105 """
106 return re.sub(r'access_token=\w*',
107 'access_token=***',
108 url)
109
110 def strip_time(date_obj):
111 """ Takes a datetime object and returns another with the time set to 00:00
112 """
113 return datetime.datetime.combine(date_obj.date(), datetime.time())
114
115 def rename_unique(dir_name, target_dir_name):
116 """ Move a directory sideways to a new name, ensuring it is unique.
117 """
118 target_dir = target_dir_name
119 inc = 0
120 while os.path.exists(target_dir):
121 target_dir = "{}_{}".format(target_dir_name, inc)
122 inc += 1
123 os.rename(dir_name, target_dir)
124 return target_dir
125
126
127 def fail_dir(dir_name):
128 """ When a download has failed, move it sideways.
129 """
130 return rename_unique(dir_name,"{}_failed".format(dir_name))
131
132
133 def truncate_name(file_name):
134 """ Ensure the filename is not too long for, well windows basically.
135 """
136 path = os.path.abspath(file_name)
137 if len(path) <= MAX_PATH_LENGTH:
138 return path
139 to_cut = len(path) - (MAX_PATH_LENGTH + 3)
140 base, extension = os.path.splitext(path)
141 inc = 0
142 new_path = "{}_{}{}".format(base, inc, extension)
143 while os.path.exists(new_path):
144 new_path = "{}_{}{}".format(base, inc, extension)
145 inc += 1
146 return new_path
147
148
149 def strip_ws(value):
150 """ Remove whitespace from a string """
151 return str(NO_WHITESPACE_REGEX.sub('-', value))
152
153
154 def slugify(value):
155 """
156 Normalise string, removes invalid for filename charactersr
157 and converts string to lowercase.
158 """
159 logging.debug("Sluggyfying {}".format(value))
160 value = unicodedata.normalize('NFKC', value).lower().strip()
161 value = re.sub(r'[\\/<>:\?\*\|"]', '', value)
162 value = re.sub(r'\.*$', '', value)
163 return value
164
165
166 class Downloader(multiprocessing.Process):
167 """
168 Class to handle downloading the things we have found to get.
169 """
170
171 def __init__(self, thing_queue, download_directory, compress):
172 multiprocessing.Process.__init__(self)
173 # TODO: add parameters
174 self.thing_queue = thing_queue
175 self.download_directory = download_directory
176 self.compress = compress
177
178 def run(self):
179 """ actual download loop.
180 """
181 while True:
182 thing_id = self.thing_queue.get()
183 if thing_id is None:
184 logging.info("Shutting download queue")
185 self.thing_queue.task_done()
186 break
187 logging.info("Handling id {}".format(thing_id))
188 Thing(thing_id).download(self.download_directory, self.compress)
189 self.thing_queue.task_done()
190 return
191
192
193
194
195
196 class Grouping:
197 """ Holds details of a group of things for download
198 This is effectively (although not actually) an abstract class
199 - use Collection or Designs instead.
200 """
201
202 def __init__(self, quick, compress):
203 self.things = []
204 self.total = 0
205 self.req_id = None
206 self.last_page = 0
207 self.per_page = None
208 # Should we stop downloading when we hit a known datestamp?
209 self.quick = quick
210 self.compress = compress
211 # These should be set by child classes.
212 self.url = None
213 self.download_dir = None
214
215 def get(self):
216 """ retrieve the things of the grouping. """
217 if self.things:
218 # We've already done it.
219 return self.things
220
221 # Check for initialisation:
222 if not self.url:
223 logging.error("No URL set - object not initialised properly?")
224 raise ValueError("No URL set - object not initialised properly?")
225
226 # Get the internal details of the grouping.
227 logging.debug("Querying {}".format(sanitise_url(self.url)))
228 page = 0
229 # TODO:: Must be a way to refactor this cleanly
230 if self.paginated:
231 # Slightly nasty, but afaik python lacks a clean way to do partial string formatting.
232 page_url = self.url + "?" + ACCESS_QP + "&" + PAGE_QP
233 while True:
234 page += 1
235 current_url = page_url.format(API_KEY, page)
236 logging.info("requesting:{}".format(sanitise_url(current_url)))
237 current_req = SESSION.get(current_url)
238 if current_req.status_code != 200:
239 logging.error("Got unexpected code {} from url {}: {}".format(current_req.status_code, sanitise_url(current_url), current_req.text))
240 break
241 current_json = current_req.json()
242 if not current_json:
243 # No more!
244 break
245 for thing in current_json:
246 self.things.append(ThingLink(thing['id'], thing['name'], thing['url']))
247 else:
248 # self.url should already have been formatted as we don't need pagination
249 logging.info("requesting:{}".format(sanitise_url(self.url)))
250 current_req = SESSION.get(self.url)
251 if current_req.status_code != 200:
252 logging.error("Got unexpected code {} from url {}: {}".format(current_req.status_code, sanitise_url(current_url), current_req.text))
253 else:
254 current_json = current_req.json()
255 for thing in current_json:
256 logging.info(thing)
257 self.things.append(ThingLink(thing['id'], thing['name'], thing['url']))
258 logging.info("Found {} things.".format(len(self.things)))
259 return self.things
260
261 def download(self):
262 """ Downloads all the files in a collection """
263 if not self.things:
264 self.get()
265
266 if not self.download_dir:
267 raise ValueError(
268 "No download_dir set - invalidly initialised object?")
269
270 base_dir = os.getcwd()
271 try:
272 os.mkdir(self.download_dir)
273 except FileExistsError:
274 logging.info("Target directory {} already exists. Assuming a resume."
275 .format(self.download_dir))
276 logging.info("Downloading {} thing(s).".format(self.total))
277 for idx, thing in enumerate(self.things):
278 logging.info("Downloading thing {} - {}".format(idx, thing))
279 RC = Thing(thing).download(self.download_dir, self.compress)
280 if self.quick and RC==State.ALREADY_DOWNLOADED:
281 logging.info("Caught up, stopping.")
282 return
283
284 class Collection(Grouping):
285 """ Holds details of a collection. """
286
287 def __init__(self, user, name, directory, quick, compress):
288 Grouping.__init__(self, quick, compress)
289 self.user = user
290 self.name = name
291 self.paginated = False
292 # need to figure out the the ID for the collection
293 collection_url = API_USER_COLLECTIONS.format(user, API_KEY)
294 try:
295 current_req = SESSION.get(collection_url)
296 except requests.exceptions.ConnectionError as error:
297 logging.error("Unable to connect for thing {}: {}".format(
298 self.thing_id, error))
299 return
300 if current_req.status_code != 200:
301 logging.error("Got unexpected code {} from url {}: {}".format(current_req.status_code, sanitise_url(collection_url), current_req.text))
302 return
303 collection_list = current_req.json()
304 try:
305 # case insensitive to retain parity with previous behaviour
306 collection = [x for x in collection_list if x['name'].casefold() == name.casefold()][0]
307 except IndexError:
308 logging.error("Unable to find collection {} for user {}".format(name, user))
309 return
310 self.collection_id = collection['id']
311 self.url = API_COLLECTION_THINGS.format(self.collection_id, API_KEY)
312
313 self.download_dir = os.path.join(directory,
314 "{}-{}".format(slugify(self.user), slugify(self.name)))
315
316
317 class Designs(Grouping):
318 """ Holds details of all of a users' designs. """
319
320 def __init__(self, user, directory, quick, compress):
321 Grouping.__init__(self, quick, compress)
322 self.user = user
323 self.url = API_USER_DESIGNS.format(user)
324 self.paginated = True
325 self.download_dir = os.path.join(
326 directory, "{} designs".format(slugify(self.user)))
327
328
329 class Thing:
330 """ An individual design on thingiverse. """
331
332 def __init__(self, thing_link):
333 self.thing_id = thing_link.thing_id
334 self.name = thing_link.name
335 self.api_link = thing_link.api_link
336 self.last_time = None
337 self._parsed = False
338 self._needs_download = True
339 self.text = None
340 self.download_dir = None
341 self.time_stamp = None
342 self._file_links = FileLinks()
343 self._image_links = []
344
345 def _parse(self, base_dir):
346 """ Work out what, if anything needs to be done. """
347 if self._parsed:
348 return
349
350
351 # First get the broad details
352 url = API_THING_DETAILS.format(self.thing_id, API_KEY)
353 try:
354 current_req = SESSION.get(url)
355 except requests.exceptions.ConnectionError as error:
356 logging.error("Unable to connect for thing {}: {}".format(
357 self.thing_id, error))
358 return
359 # Check for DMCA
360 if current_req.status_code == 403:
361 logging.error("Access to thing {} is forbidden".format(self.thing_id))
362 return
363 if current_req.status_code != 200:
364 logging.error("Got unexpected code {} from url {}: {}".format(current_req.status_code, sanitise_url(url), current_req.text))
365 return
366
367 thing_json = current_req.json()
368 try:
369 self._license = thing_json['license']
370 except KeyError:
371 logging.warning("No license found for thing {}?".format(self.thing_id))
372
373 # TODO: Get non-html version of this?
374 try:
375 self._details = thing_json['details']
376 except KeyError:
377 logging.warning("No description found for thing {}?".format(self.thing_id))
378
379
380
381 # Now get the file details
382 file_url = API_THING_FILES.format(self.thing_id, API_KEY)
383
384 try:
385 current_req = SESSION.get(file_url)
386 except requests.exceptions.ConnectionError as error:
387 logging.error("Unable to connect for thing {}: {}".format(
388 self.thing_id, error))
389 return
390
391 if current_req.status_code != 200:
392 logging.error("Unexpected status code {} for {}: {}".format(current_req.status_code, sanitise_url(file_url), current_req.text))
393 return
394
395 link_list = current_req.json()
396
397 if not link_list:
398 logging.error("No files found for thing {} - probably thingiverse being broken, try again later".format(self.thing_id))
399
400 for link in link_list:
401 logging.debug("Parsing link: {}".format(sanitise_url(link['url'])))
402 try:
403 datestamp = datetime.datetime.strptime(link['date'], DEFAULT_DATETIME_FORMAT)
404 self._file_links.append(FileLink(link['name'], datestamp, link['url']))
405 except ValueError:
406 logging.error(link['date'])
407
408 # Finally get the image links
409 image_url = API_THING_IMAGES.format(self.thing_id, API_KEY)
410
411 try:
412 current_req = SESSION.get(image_url)
413 except requests.exceptions.ConnectionError as error:
414 logging.error("Unable to connect for thing {}: {}".format(
415 self.thing_id, error))
416 return
417
418 if current_req.status_code != 200:
419 logging.error("Unexpected status code {} for {}: {}".format(current_req.status_code, sanitise_url(image_url), current_req.text))
420 return
421
422 image_list = current_req.json()
423
424 if not image_list:
425 logging.warning("No images found for thing {} - probably thingiverse being iffy as this seems unlikely".format(self.thing_id))
426
427 for image in image_list:
428 logging.debug("parsing image: {}".format(image))
429 try:
430 name = slugify(image['name'])
431 # TODO: fallback to other types
432 url = [x for x in image['sizes'] if x['type']=='display' and x['size']=='large'][0]['url']
433 except KeyError:
434 logging.warning("Missing image for {}".format(name))
435 self._image_links.append(ImageLink(name, url))
436
437 self.slug = "{} - {}".format(self.thing_id, slugify(self.name))
438 self.download_dir = os.path.join(base_dir, self.slug)
439
440 self._handle_old_directory(base_dir)
441
442 logging.debug("Parsing {} ({})".format(self.thing_id, self.name))
443 latest, self.last_time = self._find_last_download(base_dir)
444
445 if not latest:
446 # Not yet downloaded
447 self._parsed = True
448 return
449
450
451 logging.info("last downloaded version: {}".format(self.last_time))
452
453 # OK, so we have a timestamp, lets see if there is anything new to get
454 # First off, are we comparing an old download that threw away the timestamp?
455 ignore_time = self.last_time == strip_time(self.last_time)
456 try:
457 # TODO: Allow for comparison at the exact time
458 files_last_update = self._file_links.last_update
459 if ignore_time:
460 logging.info("Dropping time from comparison stamp as old-style download dir")
461 files_last_update = strip_time(files_last_update)
462
463
464 if files_last_update > self.last_time:
465 logging.info(
466 "Found new/updated files {}".format(self._file_links.last_update))
467 self._needs_download = True
468 self._parsed = True
469 return
470 except TypeError:
471 logging.warning("No files found for {}.".format(self.thing_id))
472
473 # Got here, so nope, no new files.
474 self._needs_download = False
475 self._parsed = True
476
477 def _handle_old_directory(self, base_dir):
478 """ Deal with any old directories from previous versions of the code.
479 """
480 old_dir = os.path.join(base_dir, slugify(self.name))
481 if os.path.exists(old_dir):
482 logging.warning("Found old style download_dir. Moving.")
483 rename_unique(old_dir, self.download_dir)
484
485 def _handle_outdated_directory(self, base_dir):
486 """ Move the current download directory sideways if the thing has changed.
487 """
488 if not os.path.exists(self.download_dir):
489 # No old directory to move.
490 return None
491 timestamp_file = os.path.join(self.download_dir, TIMESTAMP_FILE)
492 if not os.path.exists(timestamp_file):
493 # Old form of download directory
494 target_dir_name = "{} - old".format(self.download_dir)
495 else:
496 target_dir_name = "{} - {}".format(self.download_dir, self.last_time.strftime(SAFE_DATETIME_FORMAT))
497 return rename_unique(self.download_dir, target_dir_name)
498
499 def _find_last_download(self, base_dir):
500 """ Look for the most recent previous download (if any) of the thing.
501 """
502 logging.info("Looking for old things")
503
504 # First the DL directory itself.
505 timestamp_file = os.path.join(self.download_dir, TIMESTAMP_FILE)
506
507 latest = None
508 latest_time = None
509
510 try:
511 logging.debug("Checking for existing download in normal place.")
512 with open(timestamp_file) as ts_fh:
513 timestamp_text = ts_fh.read().strip()
514 latest_time = datetime.datetime.strptime(timestamp_text, DEFAULT_DATETIME_FORMAT)
515 latest = self.download_dir
516 except FileNotFoundError:
517 # No existing download directory. huh.
518 pass
519 except TypeError:
520 logging.warning("Invalid timestamp file found in {}".format(self.download_dir))
521
522 # TODO: Maybe look for old download directories.
523
524
525 # Now look for 7z files
526 candidates = glob.glob(os.path.join(base_dir, "{}*.7z".format(self.thing_id)))
527 # +3 to allow for ' - '
528 leading_length =len(self.slug)+3
529 for path in candidates:
530 candidate = os.path.basename(path)
531 try:
532 logging.debug("Examining '{}' - '{}'".format(candidate, candidate[leading_length:-3]))
533 candidate_time = datetime.datetime.strptime(candidate[leading_length:-3], SAFE_DATETIME_FORMAT)
534 except ValueError:
535 logging.warning("There was an error finding the date in {}. Ignoring.".format(candidate))
536 continue
537 try:
538 if candidate_time > latest_time:
539 latest_time = candidate_time
540 latest = candidate
541 except TypeError:
542 latest_time = candidate_time
543 latest = candidate
544 logging.info("Found last old thing: {} / {}".format(latest,latest_time))
545 return (latest, latest_time)
546
547
548
549 def download(self, base_dir, compress):
550 """ Download all files for a given thing.
551 Returns True iff the thing is now downloaded (not iff it downloads the thing!)
552 """
553 if not self._parsed:
554 self._parse(base_dir)
555
556 if not self._parsed:
557 logging.error(
558 "Unable to parse {} - aborting download".format(self.thing_id))
559 return State.FAILED
560
561 if not self._needs_download:
562 logging.info("{} - {} already downloaded - skipping.".format(self.thing_id, self.name))
563 return State.ALREADY_DOWNLOADED
564
565 if not self._file_links:
566 logging.error("{} - {} appears to have no files. Thingiverse acting up again?".format(self.thing_id, self.name))
567 return State.FAILED
568
569 # Have we already downloaded some things?
570 renamed_dir = self._handle_outdated_directory(base_dir)
571
572 # Get the list of files to download
573
574 new_file_links = []
575 old_file_links = []
576 self.time_stamp = None
577
578 if not self.last_time:
579 # If we don't have anything to copy from, then it is all new.
580 logging.debug("No last time, downloading all files")
581 new_file_links = self._file_links
582 self.time_stamp = new_file_links[0].last_update
583
584 for file_link in new_file_links:
585 self.time_stamp = max(self.time_stamp, file_link.last_update)
586 logging.debug("New timestamp will be {}".format(self.time_stamp))
587 else:
588 self.time_stamp = self.last_time
589 for file_link in self._file_links:
590 if file_link.last_update > self.last_time:
591 new_file_links.append(file_link)
592 self.time_stamp = max(self.time_stamp, file_link.last_update)
593 else:
594 old_file_links.append(file_link)
595
596 logging.debug("new timestamp {}".format(self.time_stamp))
597
598 # OK. Time to get to work.
599 logging.debug("Generating download_dir")
600 os.mkdir(self.download_dir)
601 filelist_file = os.path.join(self.download_dir, "filelist.txt")
602 url_suffix = "/?" + ACCESS_QP.format(API_KEY)
603 with open(filelist_file, 'w', encoding="utf-8") as fl_handle:
604 for fl in self._file_links:
605 fl_handle.write("{},{},{}\n".format(fl.link, fl.name, fl.last_update))
606
607
608 # First grab the cached files (if any)
609 logging.info("Copying {} unchanged files.".format(len(old_file_links)))
610 for file_link in old_file_links:
611 old_file = os.path.join(renamed_dir, file_link.name)
612 new_file = truncate_name(os.path.join(self.download_dir, file_link.name))
613 try:
614 logging.debug("Copying {} to {}".format(old_file, new_file))
615 copyfile(old_file, new_file)
616 except FileNotFoundError:
617 logging.warning(
618 "Unable to find {} in old archive, redownloading".format(file_link["title"]))
619 new_file_links.append(file_link)
620
621 # Now download the new ones
622 logging.info("Downloading {} new files of {}".format(
623 len(new_file_links), len(self._file_links)))
624 try:
625 for file_link in new_file_links:
626 file_name = truncate_name(os.path.join(self.download_dir, file_link.name))
627 logging.debug("Downloading {} from {} to {}".format(
628 file_link.name, file_link.link, file_name))
629 data_req = SESSION.get(file_link.link + url_suffix)
630 if data_req.status_code != 200:
631 logging.error("Unexpected status code {} for {}: {}".format(data_req.status_code, sanitise_url(file_link.link), data_req.text))
632 fail_dir(self.download_dir)
633 return State.FAILED
634
635
636 with open(file_name, 'wb') as handle:
637 handle.write(data_req.content)
638 except Exception as exception:
639 logging.error("Failed to download {} - {}".format(file_link.name, exception))
640 fail_dir(self.download_dir)
641 return State.FAILED
642
643
644 # People like images.
645 image_dir = os.path.join(self.download_dir, 'images')
646 logging.info("Downloading {} images.".format(len(self._image_links)))
647 try:
648 os.mkdir(image_dir)
649 for imagelink in self._image_links:
650 filename = os.path.join(image_dir, imagelink.name)
651 image_req = SESSION.get(imagelink.link)
652 if image_req.status_code != 200:
653 logging.error("Unexpected status code {} for {}: {}".format(image_req.status_code, sanitise_url(file_link.link), image_req.text))
654 fail_dir(self.download_dir)
655 return State.FAILED
656 with open(truncate_name(filename), 'wb') as handle:
657 handle.write(image_req.content)
658 except Exception as exception:
659 logging.error("Failed to download {} - {}".format(imagelink.name, exception))
660 fail_dir(self.download_dir)
661 return State.FAILED
662
663 # Best get some licenses
664 logging.info("writing license file")
665 try:
666 if self._license:
667 with open(truncate_name(os.path.join(self.download_dir, 'license.txt')), 'w', encoding="utf-8") as license_handle:
668 license_handle.write("{}\n".format(self._license))
669 except IOError as exception:
670 logging.warning("Failed to write license! {}".format(exception))
671
672 logging.info("writing readme")
673 try:
674 if self._details:
675 with open(truncate_name(os.path.join(self.download_dir, 'readme.txt')), 'w', encoding="utf-8") as readme_handle:
676 readme_handle.write("{}\n".format(self._details))
677 except IOError as exception:
678 logging.warning("Failed to write readme! {}".format(exception))
679
680 try:
681 # Now write the timestamp
682 with open(os.path.join(self.download_dir,TIMESTAMP_FILE), 'w', encoding="utf-8") as timestamp_handle:
683 timestamp_handle.write(self.time_stamp.__str__())
684 except Exception as exception:
685 logging.error("Failed to write timestamp file - {}".format(exception))
686 fail_dir(self.download_dir)
687 return State.FAILED
688 self._needs_download = False
689 logging.debug("Download of {} finished".format(self.name))
690 if not compress:
691 return State.OK
692
693
694 thing_dir = "{} - {} - {}".format(self.thing_id,
695 slugify(self.name),
696 self.time_stamp.strftime(SAFE_DATETIME_FORMAT))
697 file_name = os.path.join(base_dir,
698 "{}.7z".format(thing_dir))
699 logging.debug("Compressing {} to {}".format(
700 self.name,
701 file_name))
702 with py7zr.SevenZipFile(file_name, 'w', filters=SEVENZIP_FILTERS) as archive:
703 archive.writeall(self.download_dir, thing_dir)
704 logging.debug("Compression of {} finished.".format(self.name))
705 shutil.rmtree(self.download_dir)
706 logging.debug("Removed temporary download dir of {}.".format(self.name))
707 return State.OK
708
709
710
711
712 def do_batch(batch_file, download_dir, quick, compress):
713 """ Read a file in line by line, parsing each as a set of calls to this script."""
714 with open(batch_file) as handle:
715 for line in handle:
716 line = line.strip()
717 if not line:
718 # Skip empty lines
719 continue
720 logging.info("Handling instruction {}".format(line))
721 command_arr = line.split()
722 if command_arr[0] == "thing":
723 logging.debug(
724 "Handling batch thing instruction: {}".format(line))
725 Thing(command_arr[1]).download(download_dir, compress)
726 continue
727 if command_arr[0] == "collection":
728 logging.debug(
729 "Handling batch collection instruction: {}".format(line))
730 Collection(command_arr[1], command_arr[2],
731 download_dir, quick, compress).download()
732 continue
733 if command_arr[0] == "user":
734 logging.debug(
735 "Handling batch collection instruction: {}".format(line))
736 Designs(command_arr[1], download_dir, quick, compress).download()
737 continue
738 logging.warning("Unable to parse current instruction. Skipping.")
739
740
741 def main():
742 """ Entry point for script being run as a command. """
743 parser = argparse.ArgumentParser()
744 parser.add_argument("-l", "--log-level", choices=[
745 'debug', 'info', 'warning'], default='info', help="level of logging desired")
746 parser.add_argument("-d", "--directory",
747 help="Target directory to download into")
748 parser.add_argument("-f", "--log-file",
749 help="Place to log debug information to")
750 parser.add_argument("-q", "--quick", action="store_true",
751 help="Assume date ordering on posts")
752 parser.add_argument("-c", "--compress", action="store_true",
753 help="Compress files")
754 parser.add_argument("-a", "--api-key",
755 help="API key for thingiverse")
756
757
758 subparsers = parser.add_subparsers(
759 help="Type of thing to download", dest="subcommand")
760 collection_parser = subparsers.add_parser(
761 'collection', help="Download one or more entire collection(s)")
762 collection_parser.add_argument(
763 "owner", help="The owner of the collection(s) to get")
764 collection_parser.add_argument(
765 "collections", nargs="+", help="Space seperated list of the name(s) of collection to get")
766 thing_parser = subparsers.add_parser(
767 'thing', help="Download a single thing.")
768 thing_parser.add_argument(
769 "things", nargs="*", help="Space seperated list of thing ID(s) to download")
770 user_parser = subparsers.add_parser(
771 "user", help="Download all things by one or more users")
772 user_parser.add_argument(
773 "users", nargs="+", help="A space seperated list of the user(s) to get the designs of")
774 batch_parser = subparsers.add_parser(
775 "batch", help="Perform multiple actions written in a text file")
776 batch_parser.add_argument(
777 "batch_file", help="The name of the file to read.")
778 subparsers.add_parser("version", help="Show the current version")
779
780 args = parser.parse_args()
781 if not args.subcommand:
782 parser.print_help()
783 sys.exit(1)
784 if not args.directory:
785 args.directory = os.getcwd()
786
787 logger = logging.getLogger()
788 formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
789 logger.setLevel(logging.DEBUG)
790 console_handler = logging.StreamHandler()
791 console_handler.setLevel(args.log_level.upper())
792
793 global API_KEY
794 if args.api_key:
795 API_KEY=args.api_key
796 else:
797 try:
798 with open("api.key") as fh:
799 API_KEY=fh.read().strip()
800 except Exception as e:
801 logging.error("Either specify the api-key on the command line or in a file called 'api.key'")
802 logging.error("Exception: {}".format(e))
803 return
804
805 logger.addHandler(console_handler)
806 if args.log_file:
807 file_handler = logging.FileHandler(args.log_file)
808 file_handler.setLevel(logging.DEBUG)
809 file_handler.setFormatter(formatter)
810 logger.addHandler(file_handler)
811
812
813 # Start downloader
814 thing_queue = multiprocessing.JoinableQueue()
815 logging.debug("starting {} downloader(s)".format(DOWNLOADER_COUNT))
816 downloaders = [Downloader(thing_queue, args.directory, args.compress) for _ in range(DOWNLOADER_COUNT)]
817 for downloader in downloaders:
818 downloader.start()
819
820
821 if args.subcommand.startswith("collection"):
822 for collection in args.collections:
823 Collection(args.owner, collection, args.directory, args.quick, args.compress).download()
824 if args.subcommand == "thing":
825 for thing in args.things:
826 thing_queue.put(thing)
827 if args.subcommand == "user":
828 for user in args.users:
829 Designs(user, args.directory, args.quick, args.compress).download()
830 if args.subcommand == "version":
831 print("thingy_grabber.py version {}".format(VERSION))
832 if args.subcommand == "batch":
833 do_batch(args.batch_file, args.directory, args.quick, args.compress)
834
835 # Stop the downloader processes
836 for downloader in downloaders:
837 thing_queue.put(None)
838
839
840 if __name__ == "__main__":
841 multiprocessing.freeze_support()
842 main()