fix single thing downloads
[clinton/thingy_grabber.git] / thingy_grabber.py
1 #!/usr/bin/env python3
2 """
3 Thingiverse bulk downloader
4 """
5
6 import re
7 import sys
8 import os
9 import argparse
10 import unicodedata
11 import requests
12 import logging
13 import multiprocessing
14 import enum
15 import datetime
16 from shutil import copyfile
17 from dataclasses import dataclass
18 import py7zr
19 import glob
20 import shutil
21
22 SEVENZIP_FILTERS = [{'id': py7zr.FILTER_LZMA2}]
23
24 # I don't think this is exported by datetime
25 DEFAULT_DATETIME_FORMAT = '%Y-%m-%d %H:%M:%S'
26 # Windows cannot handle : in filenames
27 SAFE_DATETIME_FORMAT = '%Y-%m-%d %H.%M.%S'
28
29 API_BASE = "https://api.thingiverse.com"
30 ACCESS_QP = "access_token={}"
31 PAGE_QP = "page={}"
32 API_USER_DESIGNS = API_BASE + "/users/{}/things/"
33 API_USER_COLLECTIONS = API_BASE + "/users/{}/collections/all?" + ACCESS_QP
34
35 # Currently useless as it gives the same info as the matching element in API_USER_COLLECTIONS
36 API_COLLECTION = API_BASE + "/collections/{}/?" + ACCESS_QP
37 API_COLLECTION_THINGS = API_BASE + "/collections/{}/things/?" + ACCESS_QP
38
39 API_THING_DETAILS = API_BASE + "/things/{}/?" + ACCESS_QP
40 API_THING_FILES = API_BASE + "/things/{}/files/?" + ACCESS_QP
41 API_THING_IMAGES = API_BASE + "/things/{}/images/?" + ACCESS_QP
42 API_THING_DOWNLOAD = "/download/?" + ACCESS_QP
43
44 API_KEY = None
45
46 DOWNLOADER_COUNT = 1
47 RETRY_COUNT = 3
48
49 MAX_PATH_LENGTH = 250
50
51 VERSION = "0.10.2"
52
53 TIMESTAMP_FILE = "timestamp.txt"
54
55 SESSION = requests.Session()
56
57
58 @dataclass
59 class ThingLink:
60 thing_id: str
61 name: str
62 api_link: str
63
64
65 @dataclass
66 class FileLink:
67 name: str
68 last_update: datetime.datetime
69 link: str
70
71
72 @dataclass
73 class ImageLink:
74 name: str
75 link: str
76
77
78 class FileLinks:
79 def __init__(self, initial_links=None):
80 if initial_links is None:
81 initial_links = []
82 self.links = []
83 self.last_update = None
84 for link in initial_links:
85 self.append(link)
86
87 def __iter__(self):
88 return iter(self.links)
89
90 def __getitem__(self, item):
91 return self.links[item]
92
93 def __len__(self):
94 return len(self.links)
95
96 def append(self, link):
97 try:
98 self.last_update = max(self.last_update, link.last_update)
99 except TypeError:
100 self.last_update = link.last_update
101 self.links.append(link)
102
103
104 class State(enum.Enum):
105 OK = enum.auto()
106 FAILED = enum.auto()
107 ALREADY_DOWNLOADED = enum.auto()
108
109
110 def sanitise_url(url):
111 """ remove api keys from an url
112 """
113 return re.sub(r'access_token=\w*',
114 'access_token=***',
115 url)
116
117
118 def strip_time(date_obj):
119 """ Takes a datetime object and returns another with the time set to 00:00
120 """
121 return datetime.datetime.combine(date_obj.date(), datetime.time())
122
123
124 def rename_unique(dir_name, target_dir_name):
125 """ Move a directory sideways to a new name, ensuring it is unique.
126 """
127 target_dir = target_dir_name
128 inc = 0
129 while os.path.exists(target_dir):
130 target_dir = "{}_{}".format(target_dir_name, inc)
131 inc += 1
132 os.rename(dir_name, target_dir)
133 return target_dir
134
135
136 def fail_dir(dir_name):
137 """ When a download has failed, move it sideways.
138 """
139 return rename_unique(dir_name, "{}_failed".format(dir_name))
140
141
142 def truncate_name(file_name):
143 """ Ensure the filename is not too long for, well windows basically.
144 """
145 path = os.path.abspath(file_name)
146 if len(path) <= MAX_PATH_LENGTH:
147 return path
148 base, extension = os.path.splitext(path)
149 inc = 0
150 new_path = "{}_{}{}".format(base, inc, extension)
151 while os.path.exists(new_path):
152 new_path = "{}_{}{}".format(base, inc, extension)
153 inc += 1
154 return new_path
155
156
157 def slugify(value):
158 """
159 Normalise string, removes invalid for filename charactersr
160 and converts string to lowercase.
161 """
162 logging.debug("Sluggyfying {}".format(value))
163 value = unicodedata.normalize('NFKC', value).lower().strip()
164 value = re.sub(r'[\\/<>:?*|"]', '', value)
165 value = re.sub(r'\.*$', '', value)
166 return value
167
168
169 class Downloader(multiprocessing.Process):
170 """
171 Class to handle downloading the things we have found to get.
172 """
173
174 def __init__(self, thing_queue, download_directory, compress, api_key):
175 multiprocessing.Process.__init__(self)
176 # TODO: add parameters
177 self.thing_queue = thing_queue
178 self.download_directory = download_directory
179 self.compress = compress
180 self.api_key = api_key
181
182 def run(self):
183 """ actual download loop.
184 """
185 while True:
186 thing_id = self.thing_queue.get()
187 if thing_id is None:
188 logging.info("Shutting download queue")
189 self.thing_queue.task_done()
190 break
191 thing = None
192 if isinstance(thing_id, str):
193 thing = Thing.from_thing_id(thing_id)
194 if isinstance(thing_id, ThingLink):
195 thing = Thing(thing_id)
196 if not thing:
197 logging.error("Don't know how to handle thing_id {}".format(thing_id))
198 else:
199 logging.info("Handling id {}".format(thing_id))
200 thing.download(self.download_directory, self.compress, self.api_key)
201 self.thing_queue.task_done()
202 return
203
204
205 class Grouping:
206 """ Holds details of a group of things for download
207 This is effectively (although not actually) an abstract class
208 - use Collection or Designs instead.
209 """
210
211 def __init__(self, quick, compress):
212 self.things = []
213 self.total = 0
214 self.req_id = None
215 self.last_page = 0
216 self.per_page = None
217 # Should we stop downloading when we hit a known datestamp?
218 self.quick = quick
219 self.compress = compress
220 # These should be set by child classes.
221 self.url = None
222 self.download_dir = None
223
224 @property
225 def get(self):
226 """ retrieve the things of the grouping. """
227 if self.things:
228 # We've already done it.
229 return self.things
230
231 # Check for initialisation:
232 if not self.url:
233 logging.error("No URL set - object not initialised properly?")
234 raise ValueError("No URL set - object not initialised properly?")
235
236 # Get the internal details of the grouping.
237 logging.debug("Querying {}".format(sanitise_url(self.url)))
238
239 # self.url should already have been formatted as we don't need pagination
240 logging.info("requesting:{}".format(sanitise_url(self.url)))
241 current_req = SESSION.get(self.url)
242 if current_req.status_code != 200:
243 logging.error(
244 "Got unexpected code {} from url {}: {}".format(current_req.status_code, sanitise_url(self.url),
245 current_req.text))
246 else:
247 current_json = current_req.json()
248 for thing in current_json:
249 logging.info(thing)
250 self.things.append(ThingLink(thing['id'], thing['name'], thing['url']))
251 logging.info("Found {} things.".format(len(self.things)))
252 return self.things
253
254 def download(self):
255 """ Downloads all the files in a collection """
256 if not self.things:
257 self.get()
258
259 if not self.download_dir:
260 raise ValueError(
261 "No download_dir set - invalidly initialised object?")
262
263 base_dir = os.getcwd()
264 try:
265 os.mkdir(self.download_dir)
266 except FileExistsError:
267 logging.info("Target directory {} already exists. Assuming a resume."
268 .format(self.download_dir))
269 logging.info("Downloading {} thing(s).".format(self.total))
270 for idx, thing in enumerate(self.things):
271 logging.info("Downloading thing {} - {}".format(idx, thing))
272 return_code = Thing(thing).download(self.download_dir, self.compress)
273 if self.quick and return_code == State.ALREADY_DOWNLOADED:
274 logging.info("Caught up, stopping.")
275 return
276
277
278 class Collection(Grouping):
279 """ Holds details of a collection. """
280
281 def __init__(self, user, name, directory, quick, compress):
282 Grouping.__init__(self, quick, compress)
283 self.user = user
284 self.name = name
285 self.paginated = False
286 # need to figure out the the ID for the collection
287 collection_url = API_USER_COLLECTIONS.format(user, API_KEY)
288 try:
289 current_req = SESSION.get(collection_url)
290 except requests.exceptions.ConnectionError as error:
291 logging.error("Unable to connect for collections for user {}: {}".format(
292 self.user, error))
293 return
294 if current_req.status_code != 200:
295 logging.error(
296 "Got unexpected code {} from url {}: {}".format(current_req.status_code, sanitise_url(collection_url),
297 current_req.text))
298 return
299 collection_list = current_req.json()
300 try:
301 # case insensitive to retain parity with previous behaviour
302 collection = [x for x in collection_list if x['name'].casefold() == name.casefold()][0]
303 except IndexError:
304 logging.error("Unable to find collection {} for user {}".format(name, user))
305 return
306 self.collection_id = collection['id']
307 self.url = API_COLLECTION_THINGS.format(self.collection_id, API_KEY)
308
309 self.download_dir = os.path.join(directory,
310 "{}-{}".format(slugify(self.user), slugify(self.name)))
311
312
313 class Designs(Grouping):
314 """ Holds details of all of a users' designs. """
315
316 def __init__(self, user, directory, quick, compress):
317 Grouping.__init__(self, quick, compress)
318 self.user = user
319 self.url = API_USER_DESIGNS.format(user)
320 self.paginated = True
321 self.download_dir = os.path.join(
322 directory, "{} designs".format(slugify(self.user)))
323
324
325 class Thing:
326 """ An individual design on thingiverse. """
327
328 def __init__(self, thing_link):
329 self.thing_id = thing_link.thing_id
330 self.name = thing_link.name
331 self.last_time = None
332 self._parsed = False
333 self._needs_download = True
334 self.text = None
335 self.download_dir = None
336 self.time_stamp = None
337 self._file_links = FileLinks()
338 self._image_links = []
339
340 @classmethod
341 def from_thing_id(cls, thing_id):
342 """
343 Factory method that looks up a thing by ID and creates a Thing object for it
344 :param thing_id: to look up
345 :return: Thing or None
346 """
347 return Thing(ThingLink(thing_id, "", ""))
348
349
350 def _parse(self, base_dir, api_key):
351 """ Work out what, if anything needs to be done. """
352 if self._parsed:
353 return
354
355 # First get the broad details
356 url = API_THING_DETAILS.format(self.thing_id, api_key)
357 logging.error(url)
358 try:
359 current_req = SESSION.get(url)
360 except requests.exceptions.ConnectionError as error:
361 logging.error("Unable to connect for thing {}: {}".format(
362 self.thing_id, error))
363 return
364 # Check for DMCA
365 if current_req.status_code == 403:
366 logging.error("Access to thing {} is forbidden".format(self.thing_id))
367 return
368 if current_req.status_code != 200:
369 logging.error("Got unexpected code {} from url {}: {}".format(current_req.status_code, sanitise_url(url),
370 current_req.text))
371 return
372
373 thing_json = current_req.json()
374 try:
375 self._license = thing_json['license']
376 except KeyError:
377 logging.warning("No license found for thing {}?".format(self.thing_id))
378
379 # TODO: Get non-html version of this?
380 try:
381 self._details = thing_json['details']
382 except KeyError:
383 logging.warning("No description found for thing {}?".format(self.thing_id))
384
385 if not self.name:
386 # Probably generated with factory method.
387 try:
388 self.name = thing_json['name']
389 except KeyError:
390 logging.warning("No name found for thing {}?".format(self.thing_id))
391 self.name = self.thing_id
392
393 # Now get the file details
394 file_url = API_THING_FILES.format(self.thing_id, api_key)
395
396 try:
397 current_req = SESSION.get(file_url)
398 except requests.exceptions.ConnectionError as error:
399 logging.error("Unable to connect for thing {}: {}".format(
400 self.thing_id, error))
401 return
402
403 if current_req.status_code != 200:
404 logging.error("Unexpected status code {} for {}: {}".format(current_req.status_code, sanitise_url(file_url),
405 current_req.text))
406 return
407
408 link_list = current_req.json()
409
410 if not link_list:
411 logging.error("No files found for thing {} - probably thingiverse being broken, try again later".format(
412 self.thing_id))
413
414 for link in link_list:
415 logging.debug("Parsing link: {}".format(sanitise_url(link['url'])))
416 try:
417 datestamp = datetime.datetime.strptime(link['date'], DEFAULT_DATETIME_FORMAT)
418 self._file_links.append(
419 FileLink(link['name'], datestamp, link['url'] + API_THING_DOWNLOAD.format(api_key)))
420 except ValueError:
421 logging.error(link['date'])
422
423 # Finally get the image links
424 image_url = API_THING_IMAGES.format(self.thing_id, api_key)
425
426 try:
427 current_req = SESSION.get(image_url)
428 except requests.exceptions.ConnectionError as error:
429 logging.error("Unable to connect for thing {}: {}".format(
430 self.thing_id, error))
431 return
432
433 if current_req.status_code != 200:
434 logging.error(
435 "Unexpected status code {} for {}: {}".format(current_req.status_code, sanitise_url(image_url),
436 current_req.text))
437 return
438
439 image_list = current_req.json()
440
441 if not image_list:
442 logging.warning(
443 "No images found for thing {} - probably thingiverse being iffy as this seems unlikely".format(
444 self.thing_id))
445
446 for image in image_list:
447 logging.debug("parsing image: {}".format(image))
448 name = None
449 try:
450 name = slugify(image['name'])
451 # TODO: fallback to other types
452 url = [x for x in image['sizes'] if x['type'] == 'display' and x['size'] == 'large'][0]['url']
453 except KeyError:
454 logging.warning("Missing image for {}".format(name))
455 self._image_links.append(ImageLink(name, url))
456
457 self.slug = "{} - {}".format(self.thing_id, slugify(self.name))
458 self.download_dir = os.path.join(base_dir, self.slug)
459
460 self._handle_old_directory(base_dir)
461
462 logging.debug("Parsing {} ({})".format(self.thing_id, self.name))
463 latest, self.last_time = self._find_last_download(base_dir)
464
465 if not latest:
466 # Not yet downloaded
467 self._parsed = True
468 return
469
470 logging.info("last downloaded version: {}".format(self.last_time))
471
472 # OK, so we have a timestamp, lets see if there is anything new to get
473 # First off, are we comparing an old download that threw away the timestamp?
474 ignore_time = self.last_time == strip_time(self.last_time)
475 try:
476 # TODO: Allow for comparison at the exact time
477 files_last_update = self._file_links.last_update
478 if ignore_time:
479 logging.info("Dropping time from comparison stamp as old-style download dir")
480 files_last_update = strip_time(files_last_update)
481
482 if files_last_update > self.last_time:
483 logging.info(
484 "Found new/updated files {}".format(self._file_links.last_update))
485 self._needs_download = True
486 self._parsed = True
487 return
488 except TypeError:
489 logging.warning("No files found for {}.".format(self.thing_id))
490
491 # Got here, so nope, no new files.
492 self._needs_download = False
493 self._parsed = True
494
495 def _handle_old_directory(self, base_dir):
496 """ Deal with any old directories from previous versions of the code.
497 """
498 old_dir = os.path.join(base_dir, slugify(self.name))
499 if os.path.exists(old_dir):
500 logging.warning("Found old style download_dir. Moving.")
501 rename_unique(old_dir, self.download_dir)
502
503 def _handle_outdated_directory(self):
504 """ Move the current download directory sideways if the thing has changed.
505 """
506 if not os.path.exists(self.download_dir):
507 # No old directory to move.
508 return None
509 timestamp_file = os.path.join(self.download_dir, TIMESTAMP_FILE)
510 if not os.path.exists(timestamp_file):
511 # Old form of download directory
512 target_dir_name = "{} - old".format(self.download_dir)
513 else:
514 target_dir_name = "{} - {}".format(self.download_dir, self.last_time.strftime(SAFE_DATETIME_FORMAT))
515 return rename_unique(self.download_dir, target_dir_name)
516
517 def _find_last_download(self, base_dir):
518 """ Look for the most recent previous download (if any) of the thing.
519 """
520 logging.info("Looking for old things")
521
522 # First the DL directory itself.
523 timestamp_file = os.path.join(self.download_dir, TIMESTAMP_FILE)
524
525 latest = None
526 latest_time = None
527
528 try:
529 logging.debug("Checking for existing download in normal place.")
530 with open(timestamp_file) as ts_fh:
531 timestamp_text = ts_fh.read().strip()
532 latest_time = datetime.datetime.strptime(timestamp_text, DEFAULT_DATETIME_FORMAT)
533 latest = self.download_dir
534 except FileNotFoundError:
535 # No existing download directory. huh.
536 pass
537 except TypeError:
538 logging.warning("Invalid timestamp file found in {}".format(self.download_dir))
539
540 # TODO: Maybe look for old download directories.
541
542 # Now look for 7z files
543 candidates = glob.glob(os.path.join(base_dir, "{}*.7z".format(self.thing_id)))
544 # +3 to allow for ' - '
545 leading_length = len(self.slug) + 3
546 for path in candidates:
547 candidate = os.path.basename(path)
548 try:
549 logging.debug("Examining '{}' - '{}'".format(candidate, candidate[leading_length:-3]))
550 candidate_time = datetime.datetime.strptime(candidate[leading_length:-3], SAFE_DATETIME_FORMAT)
551 except ValueError:
552 logging.warning("There was an error finding the date in {}. Ignoring.".format(candidate))
553 continue
554 try:
555 if candidate_time > latest_time:
556 latest_time = candidate_time
557 latest = candidate
558 except TypeError:
559 latest_time = candidate_time
560 latest = candidate
561 logging.info("Found last old thing: {} / {}".format(latest, latest_time))
562 return latest, latest_time
563
564 def download(self, base_dir, compress, api_key):
565 """ Download all files for a given thing.
566 Returns True iff the thing is now downloaded (not iff it downloads the thing!)
567 """
568 if not self._parsed:
569 self._parse(base_dir, api_key)
570
571 if not self._parsed:
572 logging.error(
573 "Unable to parse {} - aborting download".format(self.thing_id))
574 return State.FAILED
575
576 if not self._needs_download:
577 logging.info("{} - {} already downloaded - skipping.".format(self.thing_id, self.name))
578 return State.ALREADY_DOWNLOADED
579
580 if not self._file_links:
581 logging.error(
582 "{} - {} appears to have no files. Thingiverse acting up again?".format(self.thing_id, self.name))
583 return State.FAILED
584
585 # Have we already downloaded some things?
586 renamed_dir = self._handle_outdated_directory()
587
588 # Get the list of files to download
589
590 new_file_links = []
591 old_file_links = []
592 self.time_stamp = None
593
594 if not self.last_time:
595 # If we don't have anything to copy from, then it is all new.
596 logging.debug("No last time, downloading all files")
597 new_file_links = self._file_links
598 self.time_stamp = new_file_links[0].last_update
599
600 for file_link in new_file_links:
601 self.time_stamp = max(self.time_stamp, file_link.last_update)
602 logging.debug("New timestamp will be {}".format(self.time_stamp))
603 else:
604 self.time_stamp = self.last_time
605 for file_link in self._file_links:
606 if file_link.last_update > self.last_time:
607 new_file_links.append(file_link)
608 self.time_stamp = max(self.time_stamp, file_link.last_update)
609 else:
610 old_file_links.append(file_link)
611
612 logging.debug("new timestamp {}".format(self.time_stamp))
613
614 # OK. Time to get to work.
615 logging.debug("Generating download_dir")
616 os.mkdir(self.download_dir)
617 filelist_file = os.path.join(self.download_dir, "filelist.txt")
618 with open(filelist_file, 'w', encoding="utf-8") as fl_handle:
619 for fl in self._file_links:
620 fl_handle.write("{},{},{}\n".format(fl.link, fl.name, fl.last_update))
621
622 # First grab the cached files (if any)
623 logging.info("Copying {} unchanged files.".format(len(old_file_links)))
624 if renamed_dir:
625 for file_link in old_file_links:
626 try:
627 old_file = os.path.join(renamed_dir, file_link.name)
628 new_file = truncate_name(os.path.join(self.download_dir, file_link.name))
629 logging.debug("Copying {} to {}".format(old_file, new_file))
630 copyfile(old_file, new_file)
631 except FileNotFoundError:
632 logging.warning(
633 "Unable to find {} in old archive, redownloading".format(file_link.name))
634 new_file_links.append(file_link)
635 except TypeError:
636 # Not altogether sure how this could occur, possibly with some combination of the old file types
637 logging.warning(
638 "Typeerror looking for {} in {}".format(file_link.name, renamed_dir))
639 new_file_links.append(file_link)
640
641 # Now download the new ones
642 logging.info("Downloading {} new files of {}".format(
643 len(new_file_links), len(self._file_links)))
644 try:
645 for file_link in new_file_links:
646 file_name = truncate_name(os.path.join(self.download_dir, file_link.name))
647 logging.debug("Downloading {} from {} to {}".format(
648 file_link.name, file_link.link, file_name))
649 data_req = SESSION.get(file_link.link)
650 if data_req.status_code != 200:
651 logging.error("Unexpected status code {} for {}: {}".format(data_req.status_code,
652 sanitise_url(file_link.link),
653 data_req.text))
654 fail_dir(self.download_dir)
655 return State.FAILED
656
657 with open(file_name, 'wb') as handle:
658 handle.write(data_req.content)
659 except Exception as exception:
660 logging.error("Failed to download {} - {}".format(file_link.name, exception))
661 fail_dir(self.download_dir)
662 return State.FAILED
663
664 # People like images.
665 image_dir = os.path.join(self.download_dir, 'images')
666 logging.info("Downloading {} images.".format(len(self._image_links)))
667 try:
668 os.mkdir(image_dir)
669 for imagelink in self._image_links:
670 filename = os.path.join(image_dir, imagelink.name)
671 image_req = SESSION.get(imagelink.link)
672 if image_req.status_code != 200:
673 logging.error("Unexpected status code {} for {}: {}".format(image_req.status_code,
674 sanitise_url(imagelink.link),
675 image_req.text))
676 fail_dir(self.download_dir)
677 return State.FAILED
678 with open(truncate_name(filename), 'wb') as handle:
679 handle.write(image_req.content)
680 except Exception as exception:
681 logging.error("Failed to download {} - {}".format(imagelink.name, exception))
682 fail_dir(self.download_dir)
683 return State.FAILED
684
685 # Best get some licenses
686 logging.info("writing license file")
687 try:
688 if self._license:
689 with open(truncate_name(os.path.join(self.download_dir, 'license.txt')), 'w',
690 encoding="utf-8") as license_handle:
691 license_handle.write("{}\n".format(self._license))
692 except IOError as exception:
693 logging.warning("Failed to write license! {}".format(exception))
694
695 logging.info("writing readme")
696 try:
697 if self._details:
698 with open(truncate_name(os.path.join(self.download_dir, 'readme.txt')), 'w',
699 encoding="utf-8") as readme_handle:
700 readme_handle.write("{}\n".format(self._details))
701 except IOError as exception:
702 logging.warning("Failed to write readme! {}".format(exception))
703
704 try:
705 # Now write the timestamp
706 with open(os.path.join(self.download_dir, TIMESTAMP_FILE), 'w', encoding="utf-8") as timestamp_handle:
707 timestamp_handle.write(self.time_stamp.__str__())
708 except Exception as exception:
709 logging.error("Failed to write timestamp file - {}".format(exception))
710 fail_dir(self.download_dir)
711 return State.FAILED
712 self._needs_download = False
713 logging.debug("Download of {} finished".format(self.name))
714 if not compress:
715 return State.OK
716
717 thing_dir = "{} - {} - {}".format(self.thing_id,
718 slugify(self.name),
719 self.time_stamp.strftime(SAFE_DATETIME_FORMAT))
720 file_name = os.path.join(base_dir,
721 "{}.7z".format(thing_dir))
722 logging.debug("Compressing {} to {}".format(
723 self.name,
724 file_name))
725 with py7zr.SevenZipFile(file_name, 'w', filters=SEVENZIP_FILTERS) as archive:
726 archive.writeall(self.download_dir, thing_dir)
727 logging.debug("Compression of {} finished.".format(self.name))
728 shutil.rmtree(self.download_dir)
729 logging.debug("Removed temporary download dir of {}.".format(self.name))
730 return State.OK
731
732
733 def do_batch(batch_file, download_dir, quick, compress):
734 """ Read a file in line by line, parsing each as a set of calls to this script."""
735 with open(batch_file) as handle:
736 for line in handle:
737 line = line.strip()
738 if not line:
739 # Skip empty lines
740 continue
741 logging.info("Handling instruction {}".format(line))
742 command_arr = line.split()
743 if command_arr[0] == "thing":
744 logging.debug(
745 "Handling batch thing instruction: {}".format(line))
746 Thing.from_thing_id(command_arr[1]).download(download_dir, compress)
747 continue
748 if command_arr[0] == "collection":
749 logging.debug(
750 "Handling batch collection instruction: {}".format(line))
751 Collection(command_arr[1], command_arr[2],
752 download_dir, quick, compress).download()
753 continue
754 if command_arr[0] == "user":
755 logging.debug(
756 "Handling batch collection instruction: {}".format(line))
757 Designs(command_arr[1], download_dir, quick, compress).download()
758 continue
759 logging.warning("Unable to parse current instruction. Skipping.")
760
761
762 def main():
763 """ Entry point for script being run as a command. """
764 parser = argparse.ArgumentParser()
765 parser.add_argument("-l", "--log-level", choices=[
766 'debug', 'info', 'warning'], default='info', help="level of logging desired")
767 parser.add_argument("-d", "--directory",
768 help="Target directory to download into")
769 parser.add_argument("-f", "--log-file",
770 help="Place to log debug information to")
771 parser.add_argument("-q", "--quick", action="store_true",
772 help="Assume date ordering on posts")
773 parser.add_argument("-c", "--compress", action="store_true",
774 help="Compress files")
775 parser.add_argument("-a", "--api-key",
776 help="API key for thingiverse")
777
778 subparsers = parser.add_subparsers(
779 help="Type of thing to download", dest="subcommand")
780 collection_parser = subparsers.add_parser(
781 'collection', help="Download one or more entire collection(s)")
782 collection_parser.add_argument(
783 "owner", help="The owner of the collection(s) to get")
784 collection_parser.add_argument(
785 "collections", nargs="+", help="Space seperated list of the name(s) of collection to get")
786 thing_parser = subparsers.add_parser(
787 'thing', help="Download a single thing.")
788 thing_parser.add_argument(
789 "things", nargs="*", help="Space seperated list of thing ID(s) to download")
790 user_parser = subparsers.add_parser(
791 "user", help="Download all things by one or more users")
792 user_parser.add_argument(
793 "users", nargs="+", help="A space seperated list of the user(s) to get the designs of")
794 batch_parser = subparsers.add_parser(
795 "batch", help="Perform multiple actions written in a text file")
796 batch_parser.add_argument(
797 "batch_file", help="The name of the file to read.")
798 subparsers.add_parser("version", help="Show the current version")
799
800 args = parser.parse_args()
801 if not args.subcommand:
802 parser.print_help()
803 sys.exit(1)
804 if not args.directory:
805 args.directory = os.getcwd()
806
807 logger = logging.getLogger()
808 formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
809 logger.setLevel(logging.DEBUG)
810 console_handler = logging.StreamHandler()
811 console_handler.setLevel(args.log_level.upper())
812
813 global API_KEY
814 if args.api_key:
815 API_KEY = args.api_key
816 else:
817 try:
818 with open("api.key") as fh:
819 API_KEY = fh.read().strip()
820 except Exception as e:
821 logging.error("Either specify the api-key on the command line or in a file called 'api.key'")
822 logging.error("Exception: {}".format(e))
823 return
824
825 logger.addHandler(console_handler)
826 if args.log_file:
827 file_handler = logging.FileHandler(args.log_file)
828 file_handler.setLevel(logging.DEBUG)
829 file_handler.setFormatter(formatter)
830 logger.addHandler(file_handler)
831
832 # Start downloader
833 thing_queue = multiprocessing.JoinableQueue()
834 logging.debug("starting {} downloader(s)".format(DOWNLOADER_COUNT))
835 downloaders = [Downloader(thing_queue, args.directory, args.compress, API_KEY) for _ in range(DOWNLOADER_COUNT)]
836 for downloader in downloaders:
837 downloader.start()
838
839 if args.subcommand.startswith("collection"):
840 for collection in args.collections:
841 Collection(args.owner, collection, args.directory, args.quick, args.compress).download()
842 if args.subcommand == "thing":
843 for thing in args.things:
844 thing_queue.put(thing)
845 if args.subcommand == "user":
846 for user in args.users:
847 Designs(user, args.directory, args.quick, args.compress).download()
848 if args.subcommand == "version":
849 print("thingy_grabber.py version {}".format(VERSION))
850 if args.subcommand == "batch":
851 do_batch(args.batch_file, args.directory, args.quick, args.compress)
852
853 # Stop the downloader processes
854 for _ in downloaders:
855 thing_queue.put(None)
856
857
858 if __name__ == "__main__":
859 multiprocessing.freeze_support()
860 main()