fix individual thing grabs
[clinton/thingy_grabber.git] / thingy_grabber.py
1 #!/usr/bin/env python3
2 """
3 Thingiverse bulk downloader
4 """
5
6 import re
7 import sys
8 import os
9 import argparse
10 import unicodedata
11 import requests
12 import logging
13 import multiprocessing
14 import enum
15 import datetime
16 from shutil import copyfile
17 from dataclasses import dataclass
18 import py7zr
19 import glob
20 import shutil
21
22 SEVENZIP_FILTERS = [{'id': py7zr.FILTER_LZMA2}]
23
24 # I don't think this is exported by datetime
25 DEFAULT_DATETIME_FORMAT = '%Y-%m-%d %H:%M:%S'
26 # Windows cannot handle : in filenames
27 SAFE_DATETIME_FORMAT = '%Y-%m-%d %H.%M.%S'
28
29 API_BASE = "https://api.thingiverse.com"
30 ACCESS_QP = "access_token={}"
31 PAGE_QP = "page={}"
32 API_USER_DESIGNS = API_BASE + "/users/{}/things/"
33 API_USER_COLLECTIONS = API_BASE + "/users/{}/collections/all?" + ACCESS_QP
34
35 # Currently useless as it gives the same info as the matching element in API_USER_COLLECTIONS
36 API_COLLECTION = API_BASE + "/collections/{}/?" + ACCESS_QP
37 API_COLLECTION_THINGS = API_BASE + "/collections/{}/things/?" + ACCESS_QP
38
39 API_THING_DETAILS = API_BASE + "/things/{}/?" + ACCESS_QP
40 API_THING_FILES = API_BASE + "/things/{}/files/?" + ACCESS_QP
41 API_THING_IMAGES = API_BASE + "/things/{}/images/?" + ACCESS_QP
42 API_THING_DOWNLOAD = "/download/?" + ACCESS_QP
43
44 API_KEY = None
45
46 DOWNLOADER_COUNT = 1
47 RETRY_COUNT = 3
48
49 MAX_PATH_LENGTH = 250
50
51 VERSION = "0.10.3"
52
53 TIMESTAMP_FILE = "timestamp.txt"
54
55 SESSION = requests.Session()
56
57
58 @dataclass
59 class ThingLink:
60 thing_id: str
61 name: str
62 api_link: str
63
64
65 @dataclass
66 class FileLink:
67 name: str
68 last_update: datetime.datetime
69 link: str
70
71
72 @dataclass
73 class ImageLink:
74 name: str
75 link: str
76
77
78 class FileLinks:
79 def __init__(self, initial_links=None):
80 if initial_links is None:
81 initial_links = []
82 self.links = []
83 self.last_update = None
84 for link in initial_links:
85 self.append(link)
86
87 def __iter__(self):
88 return iter(self.links)
89
90 def __getitem__(self, item):
91 return self.links[item]
92
93 def __len__(self):
94 return len(self.links)
95
96 def append(self, link):
97 try:
98 self.last_update = max(self.last_update, link.last_update)
99 except TypeError:
100 self.last_update = link.last_update
101 self.links.append(link)
102
103
104 class State(enum.Enum):
105 OK = enum.auto()
106 FAILED = enum.auto()
107 ALREADY_DOWNLOADED = enum.auto()
108
109
110 def sanitise_url(url):
111 """ remove api keys from an url
112 """
113 return re.sub(r'access_token=\w*',
114 'access_token=***',
115 url)
116
117
118 def strip_time(date_obj):
119 """ Takes a datetime object and returns another with the time set to 00:00
120 """
121 return datetime.datetime.combine(date_obj.date(), datetime.time())
122
123
124 def rename_unique(dir_name, target_dir_name):
125 """ Move a directory sideways to a new name, ensuring it is unique.
126 """
127 target_dir = target_dir_name
128 inc = 0
129 while os.path.exists(target_dir):
130 target_dir = "{}_{}".format(target_dir_name, inc)
131 inc += 1
132 os.rename(dir_name, target_dir)
133 return target_dir
134
135
136 def fail_dir(dir_name):
137 """ When a download has failed, move it sideways.
138 """
139 return rename_unique(dir_name, "{}_failed".format(dir_name))
140
141
142 def truncate_name(file_name):
143 """ Ensure the filename is not too long for, well windows basically.
144 """
145 path = os.path.abspath(file_name)
146 if len(path) <= MAX_PATH_LENGTH:
147 return path
148 base, extension = os.path.splitext(path)
149 inc = 0
150 new_path = "{}_{}{}".format(base, inc, extension)
151 while os.path.exists(new_path):
152 new_path = "{}_{}{}".format(base, inc, extension)
153 inc += 1
154 return new_path
155
156
157 def slugify(value):
158 """
159 Normalise string, removes invalid for filename charactersr
160 and converts string to lowercase.
161 """
162 logging.debug("Sluggyfying {}".format(value))
163 value = unicodedata.normalize('NFKC', value).lower().strip()
164 value = re.sub(r'[\\/<>:?*|"]', '', value)
165 value = re.sub(r'\.*$', '', value)
166 return value.strip()
167
168
169 class Downloader(multiprocessing.Process):
170 """
171 Class to handle downloading the things we have found to get.
172 """
173
174 def __init__(self, thing_queue, download_directory, compress, api_key):
175 multiprocessing.Process.__init__(self)
176 # TODO: add parameters
177 self.thing_queue = thing_queue
178 self.download_directory = download_directory
179 self.compress = compress
180 self.api_key = api_key
181
182 def run(self):
183 """ actual download loop.
184 """
185 while True:
186 thing_id = self.thing_queue.get()
187 if thing_id is None:
188 logging.info("Shutting download queue")
189 self.thing_queue.task_done()
190 break
191 thing = None
192 if isinstance(thing_id, str):
193 thing = Thing.from_thing_id(thing_id)
194 if isinstance(thing_id, ThingLink):
195 thing = Thing(thing_id)
196 if not thing:
197 logging.error("Don't know how to handle thing_id {}".format(thing_id))
198 else:
199 logging.info("Handling id {}".format(thing_id))
200 thing.download(self.download_directory, self.compress, self.api_key)
201 self.thing_queue.task_done()
202 return
203
204
205 class Grouping:
206 """ Holds details of a group of things for download
207 This is effectively (although not actually) an abstract class
208 - use Collection or Designs instead.
209 """
210
211 def __init__(self, quick, compress):
212 self.things = []
213 self.total = 0
214 self.req_id = None
215 self.last_page = 0
216 self.per_page = None
217 # Should we stop downloading when we hit a known datestamp?
218 self.quick = quick
219 self.compress = compress
220 # These should be set by child classes.
221 self.url = None
222 self.download_dir = None
223
224 @property
225 def get(self):
226 """ retrieve the things of the grouping. """
227 if self.things:
228 # We've already done it.
229 return self.things
230
231 # Check for initialisation:
232 if not self.url:
233 logging.error("No URL set - object not initialised properly?")
234 raise ValueError("No URL set - object not initialised properly?")
235
236 # Get the internal details of the grouping.
237 logging.debug("Querying {}".format(sanitise_url(self.url)))
238
239 # self.url should already have been formatted as we don't need pagination
240 logging.info("requesting:{}".format(sanitise_url(self.url)))
241 current_req = SESSION.get(self.url)
242 if current_req.status_code != 200:
243 logging.error(
244 "Got unexpected code {} from url {}: {}".format(current_req.status_code, sanitise_url(self.url),
245 current_req.text))
246 else:
247 current_json = current_req.json()
248 for thing in current_json:
249 logging.info(thing)
250 self.things.append(ThingLink(thing['id'], thing['name'], thing['url']))
251 logging.info("Found {} things.".format(len(self.things)))
252 return self.things
253
254 def download(self):
255 """ Downloads all the files in a collection """
256 if not self.things:
257 self.get()
258
259 if not self.download_dir:
260 raise ValueError(
261 "No download_dir set - invalidly initialised object?")
262
263 base_dir = os.getcwd()
264 try:
265 os.mkdir(self.download_dir)
266 except FileExistsError:
267 logging.info("Target directory {} already exists. Assuming a resume."
268 .format(self.download_dir))
269 logging.info("Downloading {} thing(s).".format(self.total))
270 for idx, thing in enumerate(self.things):
271 logging.info("Downloading thing {} - {}".format(idx, thing))
272 return_code = Thing(thing).download(self.download_dir, self.compress)
273 if self.quick and return_code == State.ALREADY_DOWNLOADED:
274 logging.info("Caught up, stopping.")
275 return
276
277
278 class Collection(Grouping):
279 """ Holds details of a collection. """
280
281 def __init__(self, user, name, directory, quick, compress):
282 Grouping.__init__(self, quick, compress)
283 self.user = user
284 self.name = name
285 self.paginated = False
286 # need to figure out the the ID for the collection
287 collection_url = API_USER_COLLECTIONS.format(user, API_KEY)
288 try:
289 current_req = SESSION.get(collection_url)
290 except requests.exceptions.ConnectionError as error:
291 logging.error("Unable to connect for collections for user {}: {}".format(
292 self.user, error))
293 return
294 if current_req.status_code != 200:
295 logging.error(
296 "Got unexpected code {} from url {}: {}".format(current_req.status_code, sanitise_url(collection_url),
297 current_req.text))
298 return
299 collection_list = current_req.json()
300 try:
301 # case insensitive to retain parity with previous behaviour
302 collection = [x for x in collection_list if x['name'].casefold() == name.casefold()][0]
303 except IndexError:
304 logging.error("Unable to find collection {} for user {}".format(name, user))
305 return
306 self.collection_id = collection['id']
307 self.url = API_COLLECTION_THINGS.format(self.collection_id, API_KEY)
308
309 self.download_dir = os.path.join(directory,
310 "{}-{}".format(slugify(self.user), slugify(self.name)))
311
312
313 class Designs(Grouping):
314 """ Holds details of all of a users' designs. """
315
316 def __init__(self, user, directory, quick, compress):
317 Grouping.__init__(self, quick, compress)
318 self.user = user
319 self.url = API_USER_DESIGNS.format(user)
320 self.paginated = True
321 self.download_dir = os.path.join(
322 directory, "{} designs".format(slugify(self.user)))
323
324
325 class Thing:
326 """ An individual design on thingiverse. """
327
328 def __init__(self, thing_link):
329 self.thing_id = thing_link.thing_id
330 self.name = thing_link.name
331 self.last_time = None
332 self._parsed = False
333 self._needs_download = True
334 self.text = None
335 self.download_dir = None
336 self.time_stamp = None
337 self._file_links = FileLinks()
338 self._image_links = []
339
340 @classmethod
341 def from_thing_id(cls, thing_id):
342 """
343 Factory method that looks up a thing by ID and creates a Thing object for it
344 :param thing_id: to look up
345 :return: Thing or None
346 """
347 return Thing(ThingLink(thing_id, "", ""))
348
349
350 def _parse(self, base_dir, api_key):
351 """ Work out what, if anything needs to be done. """
352 if self._parsed:
353 return
354
355 # First get the broad details
356 url = API_THING_DETAILS.format(self.thing_id, api_key)
357 logging.error(url)
358 try:
359 current_req = SESSION.get(url)
360 except requests.exceptions.ConnectionError as error:
361 logging.error("Unable to connect for thing {}: {}".format(
362 self.thing_id, error))
363 return
364 # Check for DMCA
365 if current_req.status_code == 403:
366 logging.error("Access to thing {} is forbidden".format(self.thing_id))
367 return
368 if current_req.status_code != 200:
369 logging.error("Got unexpected code {} from url {}: {}".format(current_req.status_code, sanitise_url(url),
370 current_req.text))
371 return
372
373 thing_json = current_req.json()
374 try:
375 self._license = thing_json['license']
376 except KeyError:
377 logging.warning("No license found for thing {}?".format(self.thing_id))
378
379 # TODO: Get non-html version of this?
380 try:
381 self._details = thing_json['details']
382 except KeyError:
383 logging.warning("No description found for thing {}?".format(self.thing_id))
384
385 if not self.name:
386 # Probably generated with factory method.
387 try:
388 self.name = thing_json['name']
389 except KeyError:
390 logging.warning("No name found for thing {}?".format(self.thing_id))
391 self.name = self.thing_id
392
393 # Now get the file details
394 file_url = API_THING_FILES.format(self.thing_id, api_key)
395
396 try:
397 current_req = SESSION.get(file_url)
398 except requests.exceptions.ConnectionError as error:
399 logging.error("Unable to connect for thing {}: {}".format(
400 self.thing_id, error))
401 return
402
403 if current_req.status_code != 200:
404 logging.error("Unexpected status code {} for {}: {}".format(current_req.status_code, sanitise_url(file_url),
405 current_req.text))
406 return
407
408 link_list = current_req.json()
409
410 if not link_list:
411 logging.error("No files found for thing {} - probably thingiverse being broken, try again later".format(
412 self.thing_id))
413
414 for link in link_list:
415 logging.debug("Parsing link: {}".format(sanitise_url(link['url'])))
416 try:
417 datestamp = datetime.datetime.strptime(link['date'], DEFAULT_DATETIME_FORMAT)
418 self._file_links.append(
419 FileLink(link['name'], datestamp, link['url'] + API_THING_DOWNLOAD.format(api_key)))
420 except ValueError:
421 logging.error(link['date'])
422
423 # Finally get the image links
424 image_url = API_THING_IMAGES.format(self.thing_id, api_key)
425
426 try:
427 current_req = SESSION.get(image_url)
428 except requests.exceptions.ConnectionError as error:
429 logging.error("Unable to connect for thing {}: {}".format(
430 self.thing_id, error))
431 return
432
433 if current_req.status_code != 200:
434 logging.error(
435 "Unexpected status code {} for {}: {}".format(current_req.status_code, sanitise_url(image_url),
436 current_req.text))
437 return
438
439 image_list = current_req.json()
440
441 if not image_list:
442 logging.warning(
443 "No images found for thing {} - probably thingiverse being iffy as this seems unlikely".format(
444 self.thing_id))
445
446 for image in image_list:
447 logging.debug("parsing image: {}".format(image))
448 name = None
449 try:
450 name = slugify(image['name'])
451 # TODO: fallback to other types
452 url = [x for x in image['sizes'] if x['type'] == 'display' and x['size'] == 'large'][0]['url']
453 except KeyError:
454 logging.warning("Missing image for {}".format(name))
455 self._image_links.append(ImageLink(name, url))
456
457 self.slug = "{} - {}".format(self.thing_id, slugify(self.name))
458 self.download_dir = os.path.join(base_dir, self.slug)
459
460 self._handle_old_directory(base_dir)
461
462 logging.debug("Parsing {} ({})".format(self.thing_id, self.name))
463 latest, self.last_time = self._find_last_download(base_dir)
464
465 if not latest:
466 # Not yet downloaded
467 self._parsed = True
468 return
469
470 logging.info("last downloaded version: {}".format(self.last_time))
471
472 # OK, so we have a timestamp, lets see if there is anything new to get
473 # First off, are we comparing an old download that threw away the timestamp?
474 ignore_time = self.last_time == strip_time(self.last_time)
475 try:
476 # TODO: Allow for comparison at the exact time
477 files_last_update = self._file_links.last_update
478 if ignore_time:
479 logging.info("Dropping time from comparison stamp as old-style download dir")
480 files_last_update = strip_time(files_last_update)
481
482 if files_last_update > self.last_time:
483 logging.info(
484 "Found new/updated files {}".format(self._file_links.last_update))
485 self._needs_download = True
486 self._parsed = True
487 return
488 except TypeError:
489 logging.warning("No files found for {}.".format(self.thing_id))
490
491 # Got here, so nope, no new files.
492 self._needs_download = False
493 self._parsed = True
494
495 def _handle_old_directory(self, base_dir):
496 """ Deal with any old directories from previous versions of the code.
497 """
498 old_dir = os.path.join(base_dir, slugify(self.name))
499 if os.path.exists(old_dir):
500 logging.warning("Found old style download_dir. Moving.")
501 rename_unique(old_dir, self.download_dir)
502
503 def _handle_outdated_directory(self):
504 """ Move the current download directory sideways if the thing has changed.
505 """
506 if not os.path.exists(self.download_dir):
507 # No old directory to move.
508 return None
509 timestamp_file = os.path.join(self.download_dir, TIMESTAMP_FILE)
510 if not os.path.exists(timestamp_file):
511 # Old form of download directory
512 target_dir_name = "{} - old".format(self.download_dir)
513 else:
514 target_dir_name = "{} - {}".format(self.download_dir, self.last_time.strftime(SAFE_DATETIME_FORMAT))
515 return rename_unique(self.download_dir, target_dir_name)
516
517 def _find_last_download(self, base_dir):
518 """ Look for the most recent previous download (if any) of the thing.
519 """
520 logging.info("Looking for old things")
521
522 # First the DL directory itself.
523 timestamp_file = os.path.join(self.download_dir, TIMESTAMP_FILE)
524
525 latest = None
526 latest_time = None
527
528 try:
529 logging.debug("Checking for existing download in normal place.")
530 with open(timestamp_file) as ts_fh:
531 timestamp_text = ts_fh.read().strip()
532 latest_time = datetime.datetime.strptime(timestamp_text, DEFAULT_DATETIME_FORMAT)
533 latest = self.download_dir
534 except FileNotFoundError:
535 # No existing download directory. huh.
536 pass
537 except TypeError:
538 logging.warning("Invalid timestamp file found in {}".format(self.download_dir))
539
540 # TODO: Maybe look for old download directories.
541
542 # Now look for 7z files
543 candidates = glob.glob(os.path.join(base_dir, "{}*.7z".format(self.thing_id)))
544 # +3 to allow for ' - '
545 leading_length = len(self.slug) + 3
546 for path in candidates:
547 candidate = os.path.basename(path)
548 try:
549 logging.debug("Examining '{}' - '{}'".format(candidate, candidate[leading_length:-3]))
550 candidate_time = datetime.datetime.strptime(candidate[leading_length:-3], SAFE_DATETIME_FORMAT)
551 except ValueError:
552 logging.warning("There was an error finding the date in {}. Ignoring.".format(candidate))
553 continue
554 try:
555 if candidate_time > latest_time:
556 latest_time = candidate_time
557 latest = candidate
558 except TypeError:
559 latest_time = candidate_time
560 latest = candidate
561 logging.info("Found last old thing: {} / {}".format(latest, latest_time))
562 return latest, latest_time
563
564 def download(self, base_dir, compress, api_key):
565 """ Download all files for a given thing.
566 Returns True iff the thing is now downloaded (not iff it downloads the thing!)
567 """
568 if not self._parsed:
569 self._parse(base_dir, api_key)
570
571 if not self._parsed:
572 logging.error(
573 "Unable to parse {} - aborting download".format(self.thing_id))
574 return State.FAILED
575
576 if not self._needs_download:
577 logging.info("{} - {} already downloaded - skipping.".format(self.thing_id, self.name))
578 return State.ALREADY_DOWNLOADED
579
580 if not self._file_links:
581 logging.error(
582 "{} - {} appears to have no files. Thingiverse acting up again?".format(self.thing_id, self.name))
583 return State.FAILED
584
585 # Have we already downloaded some things?
586 renamed_dir = self._handle_outdated_directory()
587
588 # Get the list of files to download
589
590 new_file_links = []
591 old_file_links = []
592 self.time_stamp = None
593
594 if not self.last_time:
595 # If we don't have anything to copy from, then it is all new.
596 logging.debug("No last time, downloading all files")
597 new_file_links = self._file_links
598 self.time_stamp = new_file_links[0].last_update
599
600 for file_link in new_file_links:
601 self.time_stamp = max(self.time_stamp, file_link.last_update)
602 logging.debug("New timestamp will be {}".format(self.time_stamp))
603 else:
604 self.time_stamp = self.last_time
605 for file_link in self._file_links:
606 if file_link.last_update > self.last_time:
607 new_file_links.append(file_link)
608 self.time_stamp = max(self.time_stamp, file_link.last_update)
609 else:
610 old_file_links.append(file_link)
611
612 logging.debug("new timestamp {}".format(self.time_stamp))
613
614 # OK. Time to get to work.
615 logging.debug("Generating download_dir")
616 os.mkdir(self.download_dir)
617 filelist_file = os.path.join(self.download_dir, "filelist.txt")
618 logging.error("\nd:{}\nf:{}".format(self.download_dir, filelist_file))
619 with open(filelist_file, 'w', encoding="utf-8") as fl_handle:
620 for fl in self._file_links:
621 fl_handle.write("{},{},{}\n".format(fl.link, fl.name, fl.last_update))
622
623 # First grab the cached files (if any)
624 logging.info("Copying {} unchanged files.".format(len(old_file_links)))
625 if renamed_dir:
626 for file_link in old_file_links:
627 try:
628 old_file = os.path.join(renamed_dir, file_link.name)
629 new_file = truncate_name(os.path.join(self.download_dir, file_link.name))
630 logging.debug("Copying {} to {}".format(old_file, new_file))
631 copyfile(old_file, new_file)
632 except FileNotFoundError:
633 logging.warning(
634 "Unable to find {} in old archive, redownloading".format(file_link.name))
635 new_file_links.append(file_link)
636 except TypeError:
637 # Not altogether sure how this could occur, possibly with some combination of the old file types
638 logging.warning(
639 "Typeerror looking for {} in {}".format(file_link.name, renamed_dir))
640 new_file_links.append(file_link)
641
642 # Now download the new ones
643 logging.info("Downloading {} new files of {}".format(
644 len(new_file_links), len(self._file_links)))
645 try:
646 for file_link in new_file_links:
647 file_name = truncate_name(os.path.join(self.download_dir, file_link.name))
648 logging.debug("Downloading {} from {} to {}".format(
649 file_link.name, file_link.link, file_name))
650 data_req = SESSION.get(file_link.link)
651 if data_req.status_code != 200:
652 logging.error("Unexpected status code {} for {}: {}".format(data_req.status_code,
653 sanitise_url(file_link.link),
654 data_req.text))
655 fail_dir(self.download_dir)
656 return State.FAILED
657
658 with open(file_name, 'wb') as handle:
659 handle.write(data_req.content)
660 except Exception as exception:
661 logging.error("Failed to download {} - {}".format(file_link.name, exception))
662 fail_dir(self.download_dir)
663 return State.FAILED
664
665 # People like images.
666 image_dir = os.path.join(self.download_dir, 'images')
667 logging.info("Downloading {} images.".format(len(self._image_links)))
668 try:
669 os.mkdir(image_dir)
670 for imagelink in self._image_links:
671 filename = os.path.join(image_dir, imagelink.name)
672 image_req = SESSION.get(imagelink.link)
673 if image_req.status_code != 200:
674 logging.error("Unexpected status code {} for {}: {}".format(image_req.status_code,
675 sanitise_url(imagelink.link),
676 image_req.text))
677 fail_dir(self.download_dir)
678 return State.FAILED
679 with open(truncate_name(filename), 'wb') as handle:
680 handle.write(image_req.content)
681 except Exception as exception:
682 logging.error("Failed to download {} - {}".format(imagelink.name, exception))
683 fail_dir(self.download_dir)
684 return State.FAILED
685
686 # Best get some licenses
687 logging.info("writing license file")
688 try:
689 if self._license:
690 with open(truncate_name(os.path.join(self.download_dir, 'license.txt')), 'w',
691 encoding="utf-8") as license_handle:
692 license_handle.write("{}\n".format(self._license))
693 except IOError as exception:
694 logging.warning("Failed to write license! {}".format(exception))
695
696 logging.info("writing readme")
697 try:
698 if self._details:
699 with open(truncate_name(os.path.join(self.download_dir, 'readme.txt')), 'w',
700 encoding="utf-8") as readme_handle:
701 readme_handle.write("{}\n".format(self._details))
702 except IOError as exception:
703 logging.warning("Failed to write readme! {}".format(exception))
704
705 try:
706 # Now write the timestamp
707 with open(os.path.join(self.download_dir, TIMESTAMP_FILE), 'w', encoding="utf-8") as timestamp_handle:
708 timestamp_handle.write(self.time_stamp.__str__())
709 except Exception as exception:
710 logging.error("Failed to write timestamp file - {}".format(exception))
711 fail_dir(self.download_dir)
712 return State.FAILED
713 self._needs_download = False
714 logging.debug("Download of {} finished".format(self.name))
715 if not compress:
716 return State.OK
717
718 thing_dir = "{} - {} - {}".format(self.thing_id,
719 slugify(self.name),
720 self.time_stamp.strftime(SAFE_DATETIME_FORMAT))
721 file_name = os.path.join(base_dir,
722 "{}.7z".format(thing_dir))
723 logging.debug("Compressing {} to {}".format(
724 self.name,
725 file_name))
726 with py7zr.SevenZipFile(file_name, 'w', filters=SEVENZIP_FILTERS) as archive:
727 archive.writeall(self.download_dir, thing_dir)
728 logging.debug("Compression of {} finished.".format(self.name))
729 shutil.rmtree(self.download_dir)
730 logging.debug("Removed temporary download dir of {}.".format(self.name))
731 return State.OK
732
733
734 def do_batch(batch_file, download_dir, quick, compress):
735 """ Read a file in line by line, parsing each as a set of calls to this script."""
736 with open(batch_file) as handle:
737 for line in handle:
738 line = line.strip()
739 if not line:
740 # Skip empty lines
741 continue
742 logging.info("Handling instruction {}".format(line))
743 command_arr = line.split()
744 if command_arr[0] == "thing":
745 logging.debug(
746 "Handling batch thing instruction: {}".format(line))
747 Thing.from_thing_id(command_arr[1]).download(download_dir, compress)
748 continue
749 if command_arr[0] == "collection":
750 logging.debug(
751 "Handling batch collection instruction: {}".format(line))
752 Collection(command_arr[1], command_arr[2],
753 download_dir, quick, compress).download()
754 continue
755 if command_arr[0] == "user":
756 logging.debug(
757 "Handling batch collection instruction: {}".format(line))
758 Designs(command_arr[1], download_dir, quick, compress).download()
759 continue
760 logging.warning("Unable to parse current instruction. Skipping.")
761
762
763 def main():
764 """ Entry point for script being run as a command. """
765 parser = argparse.ArgumentParser()
766 parser.add_argument("-l", "--log-level", choices=[
767 'debug', 'info', 'warning'], default='info', help="level of logging desired")
768 parser.add_argument("-d", "--directory",
769 help="Target directory to download into")
770 parser.add_argument("-f", "--log-file",
771 help="Place to log debug information to")
772 parser.add_argument("-q", "--quick", action="store_true",
773 help="Assume date ordering on posts")
774 parser.add_argument("-c", "--compress", action="store_true",
775 help="Compress files")
776 parser.add_argument("-a", "--api-key",
777 help="API key for thingiverse")
778
779 subparsers = parser.add_subparsers(
780 help="Type of thing to download", dest="subcommand")
781 collection_parser = subparsers.add_parser(
782 'collection', help="Download one or more entire collection(s)")
783 collection_parser.add_argument(
784 "owner", help="The owner of the collection(s) to get")
785 collection_parser.add_argument(
786 "collections", nargs="+", help="Space seperated list of the name(s) of collection to get")
787 thing_parser = subparsers.add_parser(
788 'thing', help="Download a single thing.")
789 thing_parser.add_argument(
790 "things", nargs="*", help="Space seperated list of thing ID(s) to download")
791 user_parser = subparsers.add_parser(
792 "user", help="Download all things by one or more users")
793 user_parser.add_argument(
794 "users", nargs="+", help="A space seperated list of the user(s) to get the designs of")
795 batch_parser = subparsers.add_parser(
796 "batch", help="Perform multiple actions written in a text file")
797 batch_parser.add_argument(
798 "batch_file", help="The name of the file to read.")
799 subparsers.add_parser("version", help="Show the current version")
800
801 args = parser.parse_args()
802 if not args.subcommand:
803 parser.print_help()
804 sys.exit(1)
805 if not args.directory:
806 args.directory = os.getcwd()
807
808 logger = logging.getLogger()
809 formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
810 logger.setLevel(logging.DEBUG)
811 console_handler = logging.StreamHandler()
812 console_handler.setLevel(args.log_level.upper())
813
814 global API_KEY
815 if args.api_key:
816 API_KEY = args.api_key
817 else:
818 try:
819 with open("api.key") as fh:
820 API_KEY = fh.read().strip()
821 except Exception as e:
822 logging.error("Either specify the api-key on the command line or in a file called 'api.key'")
823 logging.error("Exception: {}".format(e))
824 return
825
826 logger.addHandler(console_handler)
827 if args.log_file:
828 file_handler = logging.FileHandler(args.log_file)
829 file_handler.setLevel(logging.DEBUG)
830 file_handler.setFormatter(formatter)
831 logger.addHandler(file_handler)
832
833 # Start downloader
834 thing_queue = multiprocessing.JoinableQueue()
835 logging.debug("starting {} downloader(s)".format(DOWNLOADER_COUNT))
836 downloaders = [Downloader(thing_queue, args.directory, args.compress, API_KEY) for _ in range(DOWNLOADER_COUNT)]
837 for downloader in downloaders:
838 downloader.start()
839
840 if args.subcommand.startswith("collection"):
841 for collection in args.collections:
842 Collection(args.owner, collection, args.directory, args.quick, args.compress).download()
843 if args.subcommand == "thing":
844 for thing in args.things:
845 thing_queue.put(thing)
846 if args.subcommand == "user":
847 for user in args.users:
848 Designs(user, args.directory, args.quick, args.compress).download()
849 if args.subcommand == "version":
850 print("thingy_grabber.py version {}".format(VERSION))
851 if args.subcommand == "batch":
852 do_batch(args.batch_file, args.directory, args.quick, args.compress)
853
854 # Stop the downloader processes
855 for _ in downloaders:
856 thing_queue.put(None)
857
858
859 if __name__ == "__main__":
860 multiprocessing.freeze_support()
861 main()