a53b16441eaedd36e7548a051e3cb5d7647c50cb
[clinton/thingy_grabber.git] / thingy_grabber.py
1 #!/usr/bin/env python3
2 """
3 Thingiverse bulk downloader
4 """
5
6 import re
7 import sys
8 import os
9 import argparse
10 import unicodedata
11 import requests
12 import logging
13 import multiprocessing
14 import enum
15 import datetime
16 from shutil import copyfile
17 from dataclasses import dataclass
18 import py7zr
19 import glob
20 import shutil
21
22 SEVENZIP_FILTERS = [{'id': py7zr.FILTER_LZMA2}]
23
24 # I don't think this is exported by datetime
25 DEFAULT_DATETIME_FORMAT = '%Y-%m-%d %H:%M:%S'
26 # Windows cannot handle : in filenames
27 SAFE_DATETIME_FORMAT = '%Y-%m-%d %H.%M.%S'
28
29 API_BASE = "https://api.thingiverse.com"
30 ACCESS_QP = "access_token={}"
31 PAGE_QP = "page={}"
32 API_USER_DESIGNS = API_BASE + "/users/{}/things/"
33 API_USER_COLLECTIONS = API_BASE + "/users/{}/collections/all?" + ACCESS_QP
34
35 # Currently useless as it gives the same info as the matching element in API_USER_COLLECTIONS
36 API_COLLECTION = API_BASE + "/collections/{}/?" + ACCESS_QP
37 API_COLLECTION_THINGS = API_BASE + "/collections/{}/things/?" + ACCESS_QP
38
39 API_THING_DETAILS = API_BASE + "/things/{}/?" + ACCESS_QP
40 API_THING_FILES = API_BASE + "/things/{}/files/?" + ACCESS_QP
41 API_THING_IMAGES = API_BASE + "/things/{}/images/?" + ACCESS_QP
42 API_THING_DOWNLOAD = "/download/?" + ACCESS_QP
43
44 API_KEY = None
45
46 DOWNLOADER_COUNT = 1
47 RETRY_COUNT = 3
48
49 MAX_PATH_LENGTH = 250
50
51 VERSION = "0.10.2"
52
53 TIMESTAMP_FILE = "timestamp.txt"
54
55 SESSION = requests.Session()
56
57
58 @dataclass
59 class ThingLink:
60 thing_id: str
61 name: str
62 api_link: str
63
64
65 @dataclass
66 class FileLink:
67 name: str
68 last_update: datetime.datetime
69 link: str
70
71
72 @dataclass
73 class ImageLink:
74 name: str
75 link: str
76
77
78 class FileLinks:
79 def __init__(self, initial_links=None):
80 if initial_links is None:
81 initial_links = []
82 self.links = []
83 self.last_update = None
84 for link in initial_links:
85 self.append(link)
86
87 def __iter__(self):
88 return iter(self.links)
89
90 def __getitem__(self, item):
91 return self.links[item]
92
93 def __len__(self):
94 return len(self.links)
95
96 def append(self, link):
97 try:
98 self.last_update = max(self.last_update, link.last_update)
99 except TypeError:
100 self.last_update = link.last_update
101 self.links.append(link)
102
103
104 class State(enum.Enum):
105 OK = enum.auto()
106 FAILED = enum.auto()
107 ALREADY_DOWNLOADED = enum.auto()
108
109
110 def sanitise_url(url):
111 """ remove api keys from an url
112 """
113 return re.sub(r'access_token=\w*',
114 'access_token=***',
115 url)
116
117
118 def strip_time(date_obj):
119 """ Takes a datetime object and returns another with the time set to 00:00
120 """
121 return datetime.datetime.combine(date_obj.date(), datetime.time())
122
123
124 def rename_unique(dir_name, target_dir_name):
125 """ Move a directory sideways to a new name, ensuring it is unique.
126 """
127 target_dir = target_dir_name
128 inc = 0
129 while os.path.exists(target_dir):
130 target_dir = "{}_{}".format(target_dir_name, inc)
131 inc += 1
132 os.rename(dir_name, target_dir)
133 return target_dir
134
135
136 def fail_dir(dir_name):
137 """ When a download has failed, move it sideways.
138 """
139 return rename_unique(dir_name, "{}_failed".format(dir_name))
140
141
142 def truncate_name(file_name):
143 """ Ensure the filename is not too long for, well windows basically.
144 """
145 path = os.path.abspath(file_name)
146 if len(path) <= MAX_PATH_LENGTH:
147 return path
148 to_cut = len(path) - (MAX_PATH_LENGTH + 3)
149 base, extension = os.path.splitext(path)
150 inc = 0
151 new_path = "{}_{}{}".format(base, inc, extension)
152 while os.path.exists(new_path):
153 new_path = "{}_{}{}".format(base, inc, extension)
154 inc += 1
155 return new_path
156
157
158 def slugify(value):
159 """
160 Normalise string, removes invalid for filename charactersr
161 and converts string to lowercase.
162 """
163 logging.debug("Sluggyfying {}".format(value))
164 value = unicodedata.normalize('NFKC', value).lower().strip()
165 value = re.sub(r'[\\/<>:?*|"]', '', value)
166 value = re.sub(r'\.*$', '', value)
167 return value
168
169
170 class Downloader(multiprocessing.Process):
171 """
172 Class to handle downloading the things we have found to get.
173 """
174
175 def __init__(self, thing_queue, download_directory, compress):
176 multiprocessing.Process.__init__(self)
177 # TODO: add parameters
178 self.thing_queue = thing_queue
179 self.download_directory = download_directory
180 self.compress = compress
181
182 def run(self):
183 """ actual download loop.
184 """
185 while True:
186 thing_id = self.thing_queue.get
187 if thing_id is None:
188 logging.info("Shutting download queue")
189 self.thing_queue.task_done()
190 break
191 logging.info("Handling id {}".format(thing_id))
192 Thing(thing_id).download(self.download_directory, self.compress)
193 self.thing_queue.task_done()
194 return
195
196
197 class Grouping:
198 """ Holds details of a group of things for download
199 This is effectively (although not actually) an abstract class
200 - use Collection or Designs instead.
201 """
202
203 def __init__(self, quick, compress):
204 self.things = []
205 self.total = 0
206 self.req_id = None
207 self.last_page = 0
208 self.per_page = None
209 # Should we stop downloading when we hit a known datestamp?
210 self.quick = quick
211 self.compress = compress
212 # These should be set by child classes.
213 self.url = None
214 self.download_dir = None
215
216 @property
217 def get(self):
218 """ retrieve the things of the grouping. """
219 if self.things:
220 # We've already done it.
221 return self.things
222
223 # Check for initialisation:
224 if not self.url:
225 logging.error("No URL set - object not initialised properly?")
226 raise ValueError("No URL set - object not initialised properly?")
227
228 # Get the internal details of the grouping.
229 logging.debug("Querying {}".format(sanitise_url(self.url)))
230 page = 0
231
232 # self.url should already have been formatted as we don't need pagination
233 logging.info("requesting:{}".format(sanitise_url(self.url)))
234 current_req = SESSION.get(self.url)
235 if current_req.status_code != 200:
236 logging.error(
237 "Got unexpected code {} from url {}: {}".format(current_req.status_code, sanitise_url(self.url),
238 current_req.text))
239 else:
240 current_json = current_req.json()
241 for thing in current_json:
242 logging.info(thing)
243 self.things.append(ThingLink(thing['id'], thing['name'], thing['url']))
244 logging.info("Found {} things.".format(len(self.things)))
245 return self.things
246
247 def download(self):
248 """ Downloads all the files in a collection """
249 if not self.things:
250 self.get()
251
252 if not self.download_dir:
253 raise ValueError(
254 "No download_dir set - invalidly initialised object?")
255
256 base_dir = os.getcwd()
257 try:
258 os.mkdir(self.download_dir)
259 except FileExistsError:
260 logging.info("Target directory {} already exists. Assuming a resume."
261 .format(self.download_dir))
262 logging.info("Downloading {} thing(s).".format(self.total))
263 for idx, thing in enumerate(self.things):
264 logging.info("Downloading thing {} - {}".format(idx, thing))
265 RC = Thing(thing).download(self.download_dir, self.compress)
266 if self.quick and RC == State.ALREADY_DOWNLOADED:
267 logging.info("Caught up, stopping.")
268 return
269
270
271 class Collection(Grouping):
272 """ Holds details of a collection. """
273
274 def __init__(self, user, name, directory, quick, compress):
275 Grouping.__init__(self, quick, compress)
276 self.user = user
277 self.name = name
278 self.paginated = False
279 # need to figure out the the ID for the collection
280 collection_url = API_USER_COLLECTIONS.format(user, API_KEY)
281 try:
282 current_req = SESSION.get(collection_url)
283 except requests.exceptions.ConnectionError as error:
284 logging.error("Unable to connect for collections for user {}: {}".format(
285 self.user, error))
286 return
287 if current_req.status_code != 200:
288 logging.error(
289 "Got unexpected code {} from url {}: {}".format(current_req.status_code, sanitise_url(collection_url),
290 current_req.text))
291 return
292 collection_list = current_req.json()
293 try:
294 # case insensitive to retain parity with previous behaviour
295 collection = [x for x in collection_list if x['name'].casefold() == name.casefold()][0]
296 except IndexError:
297 logging.error("Unable to find collection {} for user {}".format(name, user))
298 return
299 self.collection_id = collection['id']
300 self.url = API_COLLECTION_THINGS.format(self.collection_id, API_KEY)
301
302 self.download_dir = os.path.join(directory,
303 "{}-{}".format(slugify(self.user), slugify(self.name)))
304
305
306 class Designs(Grouping):
307 """ Holds details of all of a users' designs. """
308
309 def __init__(self, user, directory, quick, compress):
310 Grouping.__init__(self, quick, compress)
311 self.user = user
312 self.url = API_USER_DESIGNS.format(user)
313 self.paginated = True
314 self.download_dir = os.path.join(
315 directory, "{} designs".format(slugify(self.user)))
316
317
318 class Thing:
319 """ An individual design on thingiverse. """
320
321 def __init__(self, thing_link):
322 self.thing_id = thing_link.thing_id
323 self.name = thing_link.name
324 self.api_link = thing_link.api_link
325 self.last_time = None
326 self._parsed = False
327 self._needs_download = True
328 self.text = None
329 self.download_dir = None
330 self.time_stamp = None
331 self._file_links = FileLinks()
332 self._image_links = []
333
334 def _parse(self, base_dir):
335 """ Work out what, if anything needs to be done. """
336 if self._parsed:
337 return
338
339 # First get the broad details
340 url = API_THING_DETAILS.format(self.thing_id, API_KEY)
341 try:
342 current_req = SESSION.get(url)
343 except requests.exceptions.ConnectionError as error:
344 logging.error("Unable to connect for thing {}: {}".format(
345 self.thing_id, error))
346 return
347 # Check for DMCA
348 if current_req.status_code == 403:
349 logging.error("Access to thing {} is forbidden".format(self.thing_id))
350 return
351 if current_req.status_code != 200:
352 logging.error("Got unexpected code {} from url {}: {}".format(current_req.status_code, sanitise_url(url),
353 current_req.text))
354 return
355
356 thing_json = current_req.json()
357 try:
358 self._license = thing_json['license']
359 except KeyError:
360 logging.warning("No license found for thing {}?".format(self.thing_id))
361
362 # TODO: Get non-html version of this?
363 try:
364 self._details = thing_json['details']
365 except KeyError:
366 logging.warning("No description found for thing {}?".format(self.thing_id))
367
368 # Now get the file details
369 file_url = API_THING_FILES.format(self.thing_id, API_KEY)
370
371 try:
372 current_req = SESSION.get(file_url)
373 except requests.exceptions.ConnectionError as error:
374 logging.error("Unable to connect for thing {}: {}".format(
375 self.thing_id, error))
376 return
377
378 if current_req.status_code != 200:
379 logging.error("Unexpected status code {} for {}: {}".format(current_req.status_code, sanitise_url(file_url),
380 current_req.text))
381 return
382
383 link_list = current_req.json()
384
385 if not link_list:
386 logging.error("No files found for thing {} - probably thingiverse being broken, try again later".format(
387 self.thing_id))
388
389 for link in link_list:
390 logging.debug("Parsing link: {}".format(sanitise_url(link['url'])))
391 try:
392 datestamp = datetime.datetime.strptime(link['date'], DEFAULT_DATETIME_FORMAT)
393 self._file_links.append(
394 FileLink(link['name'], datestamp, link['url'] + API_THING_DOWNLOAD.format(API_KEY)))
395 except ValueError:
396 logging.error(link['date'])
397
398 # Finally get the image links
399 image_url = API_THING_IMAGES.format(self.thing_id, API_KEY)
400
401 try:
402 current_req = SESSION.get(image_url)
403 except requests.exceptions.ConnectionError as error:
404 logging.error("Unable to connect for thing {}: {}".format(
405 self.thing_id, error))
406 return
407
408 if current_req.status_code != 200:
409 logging.error(
410 "Unexpected status code {} for {}: {}".format(current_req.status_code, sanitise_url(image_url),
411 current_req.text))
412 return
413
414 image_list = current_req.json()
415
416 if not image_list:
417 logging.warning(
418 "No images found for thing {} - probably thingiverse being iffy as this seems unlikely".format(
419 self.thing_id))
420
421 for image in image_list:
422 logging.debug("parsing image: {}".format(image))
423 name = None
424 try:
425 name = slugify(image['name'])
426 # TODO: fallback to other types
427 url = [x for x in image['sizes'] if x['type'] == 'display' and x['size'] == 'large'][0]['url']
428 except KeyError:
429 logging.warning("Missing image for {}".format(name))
430 self._image_links.append(ImageLink(name, url))
431
432 self.slug = "{} - {}".format(self.thing_id, slugify(self.name))
433 self.download_dir = os.path.join(base_dir, self.slug)
434
435 self._handle_old_directory(base_dir)
436
437 logging.debug("Parsing {} ({})".format(self.thing_id, self.name))
438 latest, self.last_time = self._find_last_download(base_dir)
439
440 if not latest:
441 # Not yet downloaded
442 self._parsed = True
443 return
444
445 logging.info("last downloaded version: {}".format(self.last_time))
446
447 # OK, so we have a timestamp, lets see if there is anything new to get
448 # First off, are we comparing an old download that threw away the timestamp?
449 ignore_time = self.last_time == strip_time(self.last_time)
450 try:
451 # TODO: Allow for comparison at the exact time
452 files_last_update = self._file_links.last_update
453 if ignore_time:
454 logging.info("Dropping time from comparison stamp as old-style download dir")
455 files_last_update = strip_time(files_last_update)
456
457 if files_last_update > self.last_time:
458 logging.info(
459 "Found new/updated files {}".format(self._file_links.last_update))
460 self._needs_download = True
461 self._parsed = True
462 return
463 except TypeError:
464 logging.warning("No files found for {}.".format(self.thing_id))
465
466 # Got here, so nope, no new files.
467 self._needs_download = False
468 self._parsed = True
469
470 def _handle_old_directory(self, base_dir):
471 """ Deal with any old directories from previous versions of the code.
472 """
473 old_dir = os.path.join(base_dir, slugify(self.name))
474 if os.path.exists(old_dir):
475 logging.warning("Found old style download_dir. Moving.")
476 rename_unique(old_dir, self.download_dir)
477
478 def _handle_outdated_directory(self):
479 """ Move the current download directory sideways if the thing has changed.
480 """
481 if not os.path.exists(self.download_dir):
482 # No old directory to move.
483 return None
484 timestamp_file = os.path.join(self.download_dir, TIMESTAMP_FILE)
485 if not os.path.exists(timestamp_file):
486 # Old form of download directory
487 target_dir_name = "{} - old".format(self.download_dir)
488 else:
489 target_dir_name = "{} - {}".format(self.download_dir, self.last_time.strftime(SAFE_DATETIME_FORMAT))
490 return rename_unique(self.download_dir, target_dir_name)
491
492 def _find_last_download(self, base_dir):
493 """ Look for the most recent previous download (if any) of the thing.
494 """
495 logging.info("Looking for old things")
496
497 # First the DL directory itself.
498 timestamp_file = os.path.join(self.download_dir, TIMESTAMP_FILE)
499
500 latest = None
501 latest_time = None
502
503 try:
504 logging.debug("Checking for existing download in normal place.")
505 with open(timestamp_file) as ts_fh:
506 timestamp_text = ts_fh.read().strip()
507 latest_time = datetime.datetime.strptime(timestamp_text, DEFAULT_DATETIME_FORMAT)
508 latest = self.download_dir
509 except FileNotFoundError:
510 # No existing download directory. huh.
511 pass
512 except TypeError:
513 logging.warning("Invalid timestamp file found in {}".format(self.download_dir))
514
515 # TODO: Maybe look for old download directories.
516
517 # Now look for 7z files
518 candidates = glob.glob(os.path.join(base_dir, "{}*.7z".format(self.thing_id)))
519 # +3 to allow for ' - '
520 leading_length = len(self.slug) + 3
521 for path in candidates:
522 candidate = os.path.basename(path)
523 try:
524 logging.debug("Examining '{}' - '{}'".format(candidate, candidate[leading_length:-3]))
525 candidate_time = datetime.datetime.strptime(candidate[leading_length:-3], SAFE_DATETIME_FORMAT)
526 except ValueError:
527 logging.warning("There was an error finding the date in {}. Ignoring.".format(candidate))
528 continue
529 try:
530 if candidate_time > latest_time:
531 latest_time = candidate_time
532 latest = candidate
533 except TypeError:
534 latest_time = candidate_time
535 latest = candidate
536 logging.info("Found last old thing: {} / {}".format(latest, latest_time))
537 return (latest, latest_time)
538
539 def download(self, base_dir, compress):
540 """ Download all files for a given thing.
541 Returns True iff the thing is now downloaded (not iff it downloads the thing!)
542 """
543 if not self._parsed:
544 self._parse(base_dir)
545
546 if not self._parsed:
547 logging.error(
548 "Unable to parse {} - aborting download".format(self.thing_id))
549 return State.FAILED
550
551 if not self._needs_download:
552 logging.info("{} - {} already downloaded - skipping.".format(self.thing_id, self.name))
553 return State.ALREADY_DOWNLOADED
554
555 if not self._file_links:
556 logging.error(
557 "{} - {} appears to have no files. Thingiverse acting up again?".format(self.thing_id, self.name))
558 return State.FAILED
559
560 # Have we already downloaded some things?
561 renamed_dir = self._handle_outdated_directory()
562
563 # Get the list of files to download
564
565 new_file_links = []
566 old_file_links = []
567 self.time_stamp = None
568
569 if not self.last_time:
570 # If we don't have anything to copy from, then it is all new.
571 logging.debug("No last time, downloading all files")
572 new_file_links = self._file_links
573 self.time_stamp = new_file_links[0].last_update
574
575 for file_link in new_file_links:
576 self.time_stamp = max(self.time_stamp, file_link.last_update)
577 logging.debug("New timestamp will be {}".format(self.time_stamp))
578 else:
579 self.time_stamp = self.last_time
580 for file_link in self._file_links:
581 if file_link.last_update > self.last_time:
582 new_file_links.append(file_link)
583 self.time_stamp = max(self.time_stamp, file_link.last_update)
584 else:
585 old_file_links.append(file_link)
586
587 logging.debug("new timestamp {}".format(self.time_stamp))
588
589 # OK. Time to get to work.
590 logging.debug("Generating download_dir")
591 os.mkdir(self.download_dir)
592 filelist_file = os.path.join(self.download_dir, "filelist.txt")
593 with open(filelist_file, 'w', encoding="utf-8") as fl_handle:
594 for fl in self._file_links:
595 fl_handle.write("{},{},{}\n".format(fl.link, fl.name, fl.last_update))
596
597 # First grab the cached files (if any)
598 logging.info("Copying {} unchanged files.".format(len(old_file_links)))
599 if renamed_dir:
600 for file_link in old_file_links:
601 try:
602 old_file = os.path.join(renamed_dir, file_link.name)
603 new_file = truncate_name(os.path.join(self.download_dir, file_link.name))
604 logging.debug("Copying {} to {}".format(old_file, new_file))
605 copyfile(old_file, new_file)
606 except FileNotFoundError:
607 logging.warning(
608 "Unable to find {} in old archive, redownloading".format(file_link.name))
609 new_file_links.append(file_link)
610 except TypeError:
611 # Not altogether sure how this could occur, possibly with some combination of the old file types
612 logging.warning(
613 "Typeerror looking for {} in {}".format(file_link.name, renamed_dir))
614 new_file_links.append(file_link)
615
616 # Now download the new ones
617 logging.info("Downloading {} new files of {}".format(
618 len(new_file_links), len(self._file_links)))
619 try:
620 for file_link in new_file_links:
621 file_name = truncate_name(os.path.join(self.download_dir, file_link.name))
622 logging.debug("Downloading {} from {} to {}".format(
623 file_link.name, file_link.link, file_name))
624 data_req = SESSION.get(file_link.link)
625 if data_req.status_code != 200:
626 logging.error("Unexpected status code {} for {}: {}".format(data_req.status_code,
627 sanitise_url(file_link.link),
628 data_req.text))
629 fail_dir(self.download_dir)
630 return State.FAILED
631
632 with open(file_name, 'wb') as handle:
633 handle.write(data_req.content)
634 except Exception as exception:
635 logging.error("Failed to download {} - {}".format(file_link.name, exception))
636 fail_dir(self.download_dir)
637 return State.FAILED
638
639 # People like images.
640 image_dir = os.path.join(self.download_dir, 'images')
641 logging.info("Downloading {} images.".format(len(self._image_links)))
642 try:
643 os.mkdir(image_dir)
644 for imagelink in self._image_links:
645 filename = os.path.join(image_dir, imagelink.name)
646 image_req = SESSION.get(imagelink.link)
647 if image_req.status_code != 200:
648 logging.error("Unexpected status code {} for {}: {}".format(image_req.status_code,
649 sanitise_url(imagelink.link),
650 image_req.text))
651 fail_dir(self.download_dir)
652 return State.FAILED
653 with open(truncate_name(filename), 'wb') as handle:
654 handle.write(image_req.content)
655 except Exception as exception:
656 logging.error("Failed to download {} - {}".format(imagelink.name, exception))
657 fail_dir(self.download_dir)
658 return State.FAILED
659
660 # Best get some licenses
661 logging.info("writing license file")
662 try:
663 if self._license:
664 with open(truncate_name(os.path.join(self.download_dir, 'license.txt')), 'w',
665 encoding="utf-8") as license_handle:
666 license_handle.write("{}\n".format(self._license))
667 except IOError as exception:
668 logging.warning("Failed to write license! {}".format(exception))
669
670 logging.info("writing readme")
671 try:
672 if self._details:
673 with open(truncate_name(os.path.join(self.download_dir, 'readme.txt')), 'w',
674 encoding="utf-8") as readme_handle:
675 readme_handle.write("{}\n".format(self._details))
676 except IOError as exception:
677 logging.warning("Failed to write readme! {}".format(exception))
678
679 try:
680 # Now write the timestamp
681 with open(os.path.join(self.download_dir, TIMESTAMP_FILE), 'w', encoding="utf-8") as timestamp_handle:
682 timestamp_handle.write(self.time_stamp.__str__())
683 except Exception as exception:
684 logging.error("Failed to write timestamp file - {}".format(exception))
685 fail_dir(self.download_dir)
686 return State.FAILED
687 self._needs_download = False
688 logging.debug("Download of {} finished".format(self.name))
689 if not compress:
690 return State.OK
691
692 thing_dir = "{} - {} - {}".format(self.thing_id,
693 slugify(self.name),
694 self.time_stamp.strftime(SAFE_DATETIME_FORMAT))
695 file_name = os.path.join(base_dir,
696 "{}.7z".format(thing_dir))
697 logging.debug("Compressing {} to {}".format(
698 self.name,
699 file_name))
700 with py7zr.SevenZipFile(file_name, 'w', filters=SEVENZIP_FILTERS) as archive:
701 archive.writeall(self.download_dir, thing_dir)
702 logging.debug("Compression of {} finished.".format(self.name))
703 shutil.rmtree(self.download_dir)
704 logging.debug("Removed temporary download dir of {}.".format(self.name))
705 return State.OK
706
707
708 def do_batch(batch_file, download_dir, quick, compress):
709 """ Read a file in line by line, parsing each as a set of calls to this script."""
710 with open(batch_file) as handle:
711 for line in handle:
712 line = line.strip()
713 if not line:
714 # Skip empty lines
715 continue
716 logging.info("Handling instruction {}".format(line))
717 command_arr = line.split()
718 if command_arr[0] == "thing":
719 logging.debug(
720 "Handling batch thing instruction: {}".format(line))
721 Thing(command_arr[1]).download(download_dir, compress)
722 continue
723 if command_arr[0] == "collection":
724 logging.debug(
725 "Handling batch collection instruction: {}".format(line))
726 Collection(command_arr[1], command_arr[2],
727 download_dir, quick, compress).download()
728 continue
729 if command_arr[0] == "user":
730 logging.debug(
731 "Handling batch collection instruction: {}".format(line))
732 Designs(command_arr[1], download_dir, quick, compress).download()
733 continue
734 logging.warning("Unable to parse current instruction. Skipping.")
735
736
737 def main():
738 """ Entry point for script being run as a command. """
739 parser = argparse.ArgumentParser()
740 parser.add_argument("-l", "--log-level", choices=[
741 'debug', 'info', 'warning'], default='info', help="level of logging desired")
742 parser.add_argument("-d", "--directory",
743 help="Target directory to download into")
744 parser.add_argument("-f", "--log-file",
745 help="Place to log debug information to")
746 parser.add_argument("-q", "--quick", action="store_true",
747 help="Assume date ordering on posts")
748 parser.add_argument("-c", "--compress", action="store_true",
749 help="Compress files")
750 parser.add_argument("-a", "--api-key",
751 help="API key for thingiverse")
752
753 subparsers = parser.add_subparsers(
754 help="Type of thing to download", dest="subcommand")
755 collection_parser = subparsers.add_parser(
756 'collection', help="Download one or more entire collection(s)")
757 collection_parser.add_argument(
758 "owner", help="The owner of the collection(s) to get")
759 collection_parser.add_argument(
760 "collections", nargs="+", help="Space seperated list of the name(s) of collection to get")
761 thing_parser = subparsers.add_parser(
762 'thing', help="Download a single thing.")
763 thing_parser.add_argument(
764 "things", nargs="*", help="Space seperated list of thing ID(s) to download")
765 user_parser = subparsers.add_parser(
766 "user", help="Download all things by one or more users")
767 user_parser.add_argument(
768 "users", nargs="+", help="A space seperated list of the user(s) to get the designs of")
769 batch_parser = subparsers.add_parser(
770 "batch", help="Perform multiple actions written in a text file")
771 batch_parser.add_argument(
772 "batch_file", help="The name of the file to read.")
773 subparsers.add_parser("version", help="Show the current version")
774
775 args = parser.parse_args()
776 if not args.subcommand:
777 parser.print_help()
778 sys.exit(1)
779 if not args.directory:
780 args.directory = os.getcwd()
781
782 logger = logging.getLogger()
783 formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
784 logger.setLevel(logging.DEBUG)
785 console_handler = logging.StreamHandler()
786 console_handler.setLevel(args.log_level.upper())
787
788 global API_KEY
789 if args.api_key:
790 API_KEY = args.api_key
791 else:
792 try:
793 with open("api.key") as fh:
794 API_KEY = fh.read().strip()
795 except Exception as e:
796 logging.error("Either specify the api-key on the command line or in a file called 'api.key'")
797 logging.error("Exception: {}".format(e))
798 return
799
800 logger.addHandler(console_handler)
801 if args.log_file:
802 file_handler = logging.FileHandler(args.log_file)
803 file_handler.setLevel(logging.DEBUG)
804 file_handler.setFormatter(formatter)
805 logger.addHandler(file_handler)
806
807 # Start downloader
808 thing_queue = multiprocessing.JoinableQueue()
809 logging.debug("starting {} downloader(s)".format(DOWNLOADER_COUNT))
810 downloaders = [Downloader(thing_queue, args.directory, args.compress) for _ in range(DOWNLOADER_COUNT)]
811 for downloader in downloaders:
812 downloader.start()
813
814 if args.subcommand.startswith("collection"):
815 for collection in args.collections:
816 Collection(args.owner, collection, args.directory, args.quick, args.compress).download()
817 if args.subcommand == "thing":
818 for thing in args.things:
819 thing_queue.put(thing)
820 if args.subcommand == "user":
821 for user in args.users:
822 Designs(user, args.directory, args.quick, args.compress).download()
823 if args.subcommand == "version":
824 print("thingy_grabber.py version {}".format(VERSION))
825 if args.subcommand == "batch":
826 do_batch(args.batch_file, args.directory, args.quick, args.compress)
827
828 # Stop the downloader processes
829 for _ in downloaders:
830 thing_queue.put(None)
831
832
833 if __name__ == "__main__":
834 multiprocessing.freeze_support()
835 main()