remove globals
[clinton/thingy_grabber.git] / thingy_grabber.py
1 #!/usr/bin/env python3
2 """
3 Thingiverse bulk downloader
4 """
5
6 import re
7 import sys
8 import os
9 import argparse
10 import unicodedata
11 import requests
12 import logging
13 import multiprocessing
14 import enum
15 import datetime
16 from shutil import copyfile
17 from dataclasses import dataclass
18 import py7zr
19 import glob
20 import shutil
21
22 SEVENZIP_FILTERS = [{'id': py7zr.FILTER_LZMA2}]
23
24 # I don't think this is exported by datetime
25 DEFAULT_DATETIME_FORMAT = '%Y-%m-%d %H:%M:%S'
26 # Windows cannot handle : in filenames
27 SAFE_DATETIME_FORMAT = '%Y-%m-%d %H.%M.%S'
28
29 API_BASE = "https://api.thingiverse.com"
30 ACCESS_QP = "access_token={}"
31 PAGE_QP = "page={}"
32 API_USER_DESIGNS = API_BASE + "/users/{}/things/?" + ACCESS_QP
33 API_USER_COLLECTIONS = API_BASE + "/users/{}/collections/all?" + ACCESS_QP
34
35 # Currently useless as it gives the same info as the matching element in API_USER_COLLECTIONS
36 API_COLLECTION = API_BASE + "/collections/{}/?" + ACCESS_QP
37 API_COLLECTION_THINGS = API_BASE + "/collections/{}/things/?" + ACCESS_QP
38
39 API_THING_DETAILS = API_BASE + "/things/{}/?" + ACCESS_QP
40 API_THING_FILES = API_BASE + "/things/{}/files/?" + ACCESS_QP
41 API_THING_IMAGES = API_BASE + "/things/{}/images/?" + ACCESS_QP
42 API_THING_DOWNLOAD = "/download/?" + ACCESS_QP
43
44 DOWNLOADER_COUNT = 1
45 RETRY_COUNT = 3
46
47 MAX_PATH_LENGTH = 250
48
49 VERSION = "0.10.3"
50
51 TIMESTAMP_FILE = "timestamp.txt"
52
53 SESSION = requests.Session()
54
55
56 @dataclass
57 class ThingLink:
58 thing_id: str
59 name: str
60 api_link: str
61
62
63 @dataclass
64 class FileLink:
65 name: str
66 last_update: datetime.datetime
67 link: str
68
69
70 @dataclass
71 class ImageLink:
72 name: str
73 link: str
74
75
76 class FileLinks:
77 def __init__(self, initial_links=None):
78 if initial_links is None:
79 initial_links = []
80 self.links = []
81 self.last_update = None
82 for link in initial_links:
83 self.append(link)
84
85 def __iter__(self):
86 return iter(self.links)
87
88 def __getitem__(self, item):
89 return self.links[item]
90
91 def __len__(self):
92 return len(self.links)
93
94 def append(self, link):
95 try:
96 self.last_update = max(self.last_update, link.last_update)
97 except TypeError:
98 self.last_update = link.last_update
99 self.links.append(link)
100
101
102 class State(enum.Enum):
103 OK = enum.auto()
104 FAILED = enum.auto()
105 ALREADY_DOWNLOADED = enum.auto()
106
107
108 def sanitise_url(url):
109 """ remove api keys from an url
110 """
111 return re.sub(r'access_token=\w*',
112 'access_token=***',
113 url)
114
115
116 def strip_time(date_obj):
117 """ Takes a datetime object and returns another with the time set to 00:00
118 """
119 return datetime.datetime.combine(date_obj.date(), datetime.time())
120
121
122 def rename_unique(dir_name, target_dir_name):
123 """ Move a directory sideways to a new name, ensuring it is unique.
124 """
125 target_dir = target_dir_name
126 inc = 0
127 while os.path.exists(target_dir):
128 target_dir = "{}_{}".format(target_dir_name, inc)
129 inc += 1
130 os.rename(dir_name, target_dir)
131 return target_dir
132
133
134 def fail_dir(dir_name):
135 """ When a download has failed, move it sideways.
136 """
137 return rename_unique(dir_name, "{}_failed".format(dir_name))
138
139
140 def truncate_name(file_name):
141 """ Ensure the filename is not too long for, well windows basically.
142 """
143 path = os.path.abspath(file_name)
144 if len(path) <= MAX_PATH_LENGTH:
145 return path
146 base, extension = os.path.splitext(path)
147 inc = 0
148 new_path = "{}_{}{}".format(base, inc, extension)
149 while os.path.exists(new_path):
150 new_path = "{}_{}{}".format(base, inc, extension)
151 inc += 1
152 return new_path
153
154
155 def slugify(value):
156 """
157 Normalise string, removes invalid for filename charactersr
158 and converts string to lowercase.
159 """
160 logging.debug("Sluggyfying {}".format(value))
161 value = unicodedata.normalize('NFKC', value).lower().strip()
162 value = re.sub(r'[\\/<>:?*|"]', '', value)
163 value = re.sub(r'\.*$', '', value)
164 return value.strip()
165
166
167 class Downloader(multiprocessing.Process):
168 """
169 Class to handle downloading the things we have found to get.
170 """
171
172 def __init__(self, thing_queue, download_directory, compress, api_key):
173 multiprocessing.Process.__init__(self)
174 # TODO: add parameters
175 self.thing_queue = thing_queue
176 self.download_directory = download_directory
177 self.compress = compress
178 self.api_key = api_key
179
180 def run(self):
181 """ actual download loop.
182 """
183 while True:
184 thing_id = self.thing_queue.get()
185 if thing_id is None:
186 logging.info("Shutting download queue")
187 self.thing_queue.task_done()
188 break
189 thing = None
190 if isinstance(thing_id, str):
191 thing = Thing.from_thing_id(thing_id)
192 if isinstance(thing_id, ThingLink):
193 thing = Thing(thing_id)
194 if not thing:
195 logging.error("Don't know how to handle thing_id {}".format(thing_id))
196 else:
197 logging.info("Handling id {}".format(thing_id))
198 thing.download(self.download_directory, self.compress, self.api_key)
199 self.thing_queue.task_done()
200 return
201
202
203 class Grouping:
204 """ Holds details of a group of things for download
205 This is effectively (although not actually) an abstract class
206 - use Collection or Designs instead.
207 """
208
209 def __init__(self, quick, compress, api_key):
210 self.things = []
211 self.total = 0
212 self.req_id = None
213 self.last_page = 0
214 self.per_page = None
215 # Should we stop downloading when we hit a known datestamp?
216 self.quick = quick
217 self.compress = compress
218 self.api_key = api_key
219 # These should be set by child classes.
220 self.url = None
221 self.download_dir = None
222
223
224 @property
225 def get(self):
226 """ retrieve the things of the grouping. """
227 if self.things:
228 # We've already done it.
229 return self.things
230
231 # Check for initialisation:
232 if not self.url:
233 logging.error("No URL set - object not initialised properly?")
234 raise ValueError("No URL set - object not initialised properly?")
235
236 # Get the internal details of the grouping.
237 logging.debug("Querying {}".format(sanitise_url(self.url)))
238
239 # self.url should already have been formatted as we don't need pagination
240 logging.info("requesting:{}".format(sanitise_url(self.url)))
241 current_req = SESSION.get(self.url)
242 if current_req.status_code != 200:
243 logging.error(
244 "Got unexpected code {} from url {}: {}".format(current_req.status_code, sanitise_url(self.url),
245 current_req.text))
246 else:
247 current_json = current_req.json()
248 for thing in current_json:
249 logging.info(thing)
250 self.things.append(ThingLink(thing['id'], thing['name'], thing['url']))
251 logging.info("Found {} things.".format(len(self.things)))
252 return self.things
253
254 def download(self):
255 """ Downloads all the files in a collection """
256 if not self.things:
257 self.get
258
259 if not self.download_dir:
260 raise ValueError(
261 "No download_dir set - invalidly initialised object?")
262
263 base_dir = os.getcwd()
264 try:
265 os.mkdir(self.download_dir)
266 except FileExistsError:
267 logging.info("Target directory {} already exists. Assuming a resume."
268 .format(self.download_dir))
269 logging.info("Downloading {} thing(s).".format(self.total))
270 for idx, thing in enumerate(self.things):
271 logging.info("Downloading thing {} - {}".format(idx, thing))
272 return_code = Thing(thing).download(self.download_dir, self.compress, self.api_key)
273 if self.quick and return_code == State.ALREADY_DOWNLOADED:
274 logging.info("Caught up, stopping.")
275 return
276
277
278 class Collection(Grouping):
279 """ Holds details of a collection. """
280
281 def __init__(self, user, name, directory, quick, compress, api_key):
282 Grouping.__init__(self, quick, compress, api_key)
283 self.user = user
284 self.name = name
285 self.paginated = False
286 # need to figure out the the ID for the collection
287 collection_url = API_USER_COLLECTIONS.format(user, api_key)
288 try:
289 current_req = SESSION.get(collection_url)
290 except requests.exceptions.ConnectionError as error:
291 logging.error("Unable to connect for collections for user {}: {}".format(
292 self.user, error))
293 return
294 if current_req.status_code != 200:
295 logging.error(
296 "Got unexpected code {} from url {}: {}".format(current_req.status_code, sanitise_url(collection_url),
297 current_req.text))
298 return
299 collection_list = current_req.json()
300 try:
301 # case insensitive to retain parity with previous behaviour
302 collection = [x for x in collection_list if x['name'].casefold() == name.casefold()][0]
303 except IndexError:
304 logging.error("Unable to find collection {} for user {}".format(name, user))
305 return
306 self.collection_id = collection['id']
307 self.url = API_COLLECTION_THINGS.format(self.collection_id, api_key)
308
309 self.download_dir = os.path.join(directory,
310 "{}-{}".format(slugify(self.user), slugify(self.name)))
311
312
313 class Designs(Grouping):
314 """ Holds details of all of a users' designs. """
315
316 def __init__(self, user, directory, quick, compress, api_key):
317 Grouping.__init__(self, quick, compress, api_key)
318 self.user = user
319 self.url = API_USER_DESIGNS.format(user, api_key)
320 self.download_dir = os.path.join(
321 directory, "{} designs".format(slugify(self.user)))
322
323
324 class Thing:
325 """ An individual design on thingiverse. """
326
327 def __init__(self, thing_link):
328 self.thing_id = thing_link.thing_id
329 self.name = thing_link.name
330 self.last_time = None
331 self._parsed = False
332 self._needs_download = True
333 self.text = None
334 self.download_dir = None
335 self.time_stamp = None
336 self._file_links = FileLinks()
337 self._image_links = []
338
339 @classmethod
340 def from_thing_id(cls, thing_id):
341 """
342 Factory method that looks up a thing by ID and creates a Thing object for it
343 :param thing_id: to look up
344 :return: Thing or None
345 """
346 return Thing(ThingLink(thing_id, "", ""))
347
348
349 def _parse(self, base_dir, api_key):
350 """ Work out what, if anything needs to be done. """
351 if self._parsed:
352 return
353
354 # First get the broad details
355 url = API_THING_DETAILS.format(self.thing_id, api_key)
356 logging.error(url)
357 try:
358 current_req = SESSION.get(url)
359 except requests.exceptions.ConnectionError as error:
360 logging.error("Unable to connect for thing {}: {}".format(
361 self.thing_id, error))
362 return
363 # Check for DMCA
364 if current_req.status_code == 403:
365 logging.error("Access to thing {} is forbidden".format(self.thing_id))
366 return
367 if current_req.status_code != 200:
368 logging.error("Got unexpected code {} from url {}: {}".format(current_req.status_code, sanitise_url(url),
369 current_req.text))
370 return
371
372 thing_json = current_req.json()
373 try:
374 self._license = thing_json['license']
375 except KeyError:
376 logging.warning("No license found for thing {}?".format(self.thing_id))
377
378 # TODO: Get non-html version of this?
379 try:
380 self._details = thing_json['details']
381 except KeyError:
382 logging.warning("No description found for thing {}?".format(self.thing_id))
383
384 if not self.name:
385 # Probably generated with factory method.
386 try:
387 self.name = thing_json['name']
388 except KeyError:
389 logging.warning("No name found for thing {}?".format(self.thing_id))
390 self.name = self.thing_id
391
392 # Now get the file details
393 file_url = API_THING_FILES.format(self.thing_id, api_key)
394
395 try:
396 current_req = SESSION.get(file_url)
397 except requests.exceptions.ConnectionError as error:
398 logging.error("Unable to connect for thing {}: {}".format(
399 self.thing_id, error))
400 return
401
402 if current_req.status_code != 200:
403 logging.error("Unexpected status code {} for {}: {}".format(current_req.status_code, sanitise_url(file_url),
404 current_req.text))
405 return
406
407 link_list = current_req.json()
408
409 if not link_list:
410 logging.error("No files found for thing {} - probably thingiverse being broken, try again later".format(
411 self.thing_id))
412
413 for link in link_list:
414 logging.debug("Parsing link: {}".format(sanitise_url(link['url'])))
415 try:
416 datestamp = datetime.datetime.strptime(link['date'], DEFAULT_DATETIME_FORMAT)
417 self._file_links.append(
418 FileLink(link['name'], datestamp, link['url'] + API_THING_DOWNLOAD.format(api_key)))
419 except ValueError:
420 logging.error(link['date'])
421
422 # Finally get the image links
423 image_url = API_THING_IMAGES.format(self.thing_id, api_key)
424
425 try:
426 current_req = SESSION.get(image_url)
427 except requests.exceptions.ConnectionError as error:
428 logging.error("Unable to connect for thing {}: {}".format(
429 self.thing_id, error))
430 return
431
432 if current_req.status_code != 200:
433 logging.error(
434 "Unexpected status code {} for {}: {}".format(current_req.status_code, sanitise_url(image_url),
435 current_req.text))
436 return
437
438 image_list = current_req.json()
439
440 if not image_list:
441 logging.warning(
442 "No images found for thing {} - probably thingiverse being iffy as this seems unlikely".format(
443 self.thing_id))
444
445 for image in image_list:
446 logging.debug("parsing image: {}".format(image))
447 name = None
448 try:
449 name = slugify(image['name'])
450 # TODO: fallback to other types
451 url = [x for x in image['sizes'] if x['type'] == 'display' and x['size'] == 'large'][0]['url']
452 except KeyError:
453 logging.warning("Missing image for {}".format(name))
454 self._image_links.append(ImageLink(name, url))
455
456 self.slug = "{} - {}".format(self.thing_id, slugify(self.name))
457 self.download_dir = os.path.join(base_dir, self.slug)
458
459 self._handle_old_directory(base_dir)
460
461 logging.debug("Parsing {} ({})".format(self.thing_id, self.name))
462 latest, self.last_time = self._find_last_download(base_dir)
463
464 if not latest:
465 # Not yet downloaded
466 self._parsed = True
467 return
468
469 logging.info("last downloaded version: {}".format(self.last_time))
470
471 # OK, so we have a timestamp, lets see if there is anything new to get
472 # First off, are we comparing an old download that threw away the timestamp?
473 ignore_time = self.last_time == strip_time(self.last_time)
474 try:
475 # TODO: Allow for comparison at the exact time
476 files_last_update = self._file_links.last_update
477 if ignore_time:
478 logging.info("Dropping time from comparison stamp as old-style download dir")
479 files_last_update = strip_time(files_last_update)
480
481 if files_last_update > self.last_time:
482 logging.info(
483 "Found new/updated files {}".format(self._file_links.last_update))
484 self._needs_download = True
485 self._parsed = True
486 return
487 except TypeError:
488 logging.warning("No files found for {}.".format(self.thing_id))
489
490 # Got here, so nope, no new files.
491 self._needs_download = False
492 self._parsed = True
493
494 def _handle_old_directory(self, base_dir):
495 """ Deal with any old directories from previous versions of the code.
496 """
497 old_dir = os.path.join(base_dir, slugify(self.name))
498 if os.path.exists(old_dir):
499 logging.warning("Found old style download_dir. Moving.")
500 rename_unique(old_dir, self.download_dir)
501
502 def _handle_outdated_directory(self):
503 """ Move the current download directory sideways if the thing has changed.
504 """
505 if not os.path.exists(self.download_dir):
506 # No old directory to move.
507 return None
508 timestamp_file = os.path.join(self.download_dir, TIMESTAMP_FILE)
509 if not os.path.exists(timestamp_file):
510 # Old form of download directory
511 target_dir_name = "{} - old".format(self.download_dir)
512 else:
513 target_dir_name = "{} - {}".format(self.download_dir, self.last_time.strftime(SAFE_DATETIME_FORMAT))
514 return rename_unique(self.download_dir, target_dir_name)
515
516 def _find_last_download(self, base_dir):
517 """ Look for the most recent previous download (if any) of the thing.
518 """
519 logging.info("Looking for old things")
520
521 # First the DL directory itself.
522 timestamp_file = os.path.join(self.download_dir, TIMESTAMP_FILE)
523
524 latest = None
525 latest_time = None
526
527 try:
528 logging.debug("Checking for existing download in normal place.")
529 with open(timestamp_file) as ts_fh:
530 timestamp_text = ts_fh.read().strip()
531 latest_time = datetime.datetime.strptime(timestamp_text, DEFAULT_DATETIME_FORMAT)
532 latest = self.download_dir
533 except FileNotFoundError:
534 # No existing download directory. huh.
535 pass
536 except TypeError:
537 logging.warning("Invalid timestamp file found in {}".format(self.download_dir))
538
539 # TODO: Maybe look for old download directories.
540
541 # Now look for 7z files
542 candidates = glob.glob(os.path.join(base_dir, "{}*.7z".format(self.thing_id)))
543 # +3 to allow for ' - '
544 leading_length = len(self.slug) + 3
545 for path in candidates:
546 candidate = os.path.basename(path)
547 try:
548 logging.debug("Examining '{}' - '{}'".format(candidate, candidate[leading_length:-3]))
549 candidate_time = datetime.datetime.strptime(candidate[leading_length:-3], SAFE_DATETIME_FORMAT)
550 except ValueError:
551 logging.warning("There was an error finding the date in {}. Ignoring.".format(candidate))
552 continue
553 try:
554 if candidate_time > latest_time:
555 latest_time = candidate_time
556 latest = candidate
557 except TypeError:
558 latest_time = candidate_time
559 latest = candidate
560 logging.info("Found last old thing: {} / {}".format(latest, latest_time))
561 return latest, latest_time
562
563 def download(self, base_dir, compress, api_key):
564 """ Download all files for a given thing.
565 Returns True iff the thing is now downloaded (not iff it downloads the thing!)
566 """
567 if not self._parsed:
568 self._parse(base_dir, api_key)
569
570 if not self._parsed:
571 logging.error(
572 "Unable to parse {} - aborting download".format(self.thing_id))
573 return State.FAILED
574
575 if not self._needs_download:
576 logging.info("{} - {} already downloaded - skipping.".format(self.thing_id, self.name))
577 return State.ALREADY_DOWNLOADED
578
579 if not self._file_links:
580 logging.error(
581 "{} - {} appears to have no files. Thingiverse acting up again?".format(self.thing_id, self.name))
582 return State.FAILED
583
584 # Have we already downloaded some things?
585 renamed_dir = self._handle_outdated_directory()
586
587 # Get the list of files to download
588
589 new_file_links = []
590 old_file_links = []
591 self.time_stamp = None
592
593 if not self.last_time:
594 # If we don't have anything to copy from, then it is all new.
595 logging.debug("No last time, downloading all files")
596 new_file_links = self._file_links
597 self.time_stamp = new_file_links[0].last_update
598
599 for file_link in new_file_links:
600 self.time_stamp = max(self.time_stamp, file_link.last_update)
601 logging.debug("New timestamp will be {}".format(self.time_stamp))
602 else:
603 self.time_stamp = self.last_time
604 for file_link in self._file_links:
605 if file_link.last_update > self.last_time:
606 new_file_links.append(file_link)
607 self.time_stamp = max(self.time_stamp, file_link.last_update)
608 else:
609 old_file_links.append(file_link)
610
611 logging.debug("new timestamp {}".format(self.time_stamp))
612
613 # OK. Time to get to work.
614 logging.debug("Generating download_dir")
615 os.mkdir(self.download_dir)
616 filelist_file = os.path.join(self.download_dir, "filelist.txt")
617 logging.error("\nd:{}\nf:{}".format(self.download_dir, filelist_file))
618 with open(filelist_file, 'w', encoding="utf-8") as fl_handle:
619 for fl in self._file_links:
620 fl_handle.write("{},{},{}\n".format(fl.link, fl.name, fl.last_update))
621
622 # First grab the cached files (if any)
623 logging.info("Copying {} unchanged files.".format(len(old_file_links)))
624 if renamed_dir:
625 for file_link in old_file_links:
626 try:
627 old_file = os.path.join(renamed_dir, file_link.name)
628 new_file = truncate_name(os.path.join(self.download_dir, file_link.name))
629 logging.debug("Copying {} to {}".format(old_file, new_file))
630 copyfile(old_file, new_file)
631 except FileNotFoundError:
632 logging.warning(
633 "Unable to find {} in old archive, redownloading".format(file_link.name))
634 new_file_links.append(file_link)
635 except TypeError:
636 # Not altogether sure how this could occur, possibly with some combination of the old file types
637 logging.warning(
638 "Typeerror looking for {} in {}".format(file_link.name, renamed_dir))
639 new_file_links.append(file_link)
640
641 # Now download the new ones
642 logging.info("Downloading {} new files of {}".format(
643 len(new_file_links), len(self._file_links)))
644 try:
645 for file_link in new_file_links:
646 file_name = truncate_name(os.path.join(self.download_dir, file_link.name))
647 logging.debug("Downloading {} from {} to {}".format(
648 file_link.name, file_link.link, file_name))
649 data_req = SESSION.get(file_link.link)
650 if data_req.status_code != 200:
651 logging.error("Unexpected status code {} for {}: {}".format(data_req.status_code,
652 sanitise_url(file_link.link),
653 data_req.text))
654 fail_dir(self.download_dir)
655 return State.FAILED
656
657 with open(file_name, 'wb') as handle:
658 handle.write(data_req.content)
659 except Exception as exception:
660 logging.error("Failed to download {} - {}".format(file_link.name, exception))
661 fail_dir(self.download_dir)
662 return State.FAILED
663
664 # People like images.
665 image_dir = os.path.join(self.download_dir, 'images')
666 logging.info("Downloading {} images.".format(len(self._image_links)))
667 try:
668 os.mkdir(image_dir)
669 for imagelink in self._image_links:
670 filename = os.path.join(image_dir, imagelink.name)
671 image_req = SESSION.get(imagelink.link)
672 if image_req.status_code != 200:
673 logging.error("Unexpected status code {} for {}: {}".format(image_req.status_code,
674 sanitise_url(imagelink.link),
675 image_req.text))
676 fail_dir(self.download_dir)
677 return State.FAILED
678 with open(truncate_name(filename), 'wb') as handle:
679 handle.write(image_req.content)
680 except Exception as exception:
681 logging.error("Failed to download {} - {}".format(imagelink.name, exception))
682 fail_dir(self.download_dir)
683 return State.FAILED
684
685 # Best get some licenses
686 logging.info("writing license file")
687 try:
688 if self._license:
689 with open(truncate_name(os.path.join(self.download_dir, 'license.txt')), 'w',
690 encoding="utf-8") as license_handle:
691 license_handle.write("{}\n".format(self._license))
692 except IOError as exception:
693 logging.warning("Failed to write license! {}".format(exception))
694
695 logging.info("writing readme")
696 try:
697 if self._details:
698 with open(truncate_name(os.path.join(self.download_dir, 'readme.txt')), 'w',
699 encoding="utf-8") as readme_handle:
700 readme_handle.write("{}\n".format(self._details))
701 except IOError as exception:
702 logging.warning("Failed to write readme! {}".format(exception))
703
704 try:
705 # Now write the timestamp
706 with open(os.path.join(self.download_dir, TIMESTAMP_FILE), 'w', encoding="utf-8") as timestamp_handle:
707 timestamp_handle.write(self.time_stamp.__str__())
708 except Exception as exception:
709 logging.error("Failed to write timestamp file - {}".format(exception))
710 fail_dir(self.download_dir)
711 return State.FAILED
712 self._needs_download = False
713 logging.debug("Download of {} finished".format(self.name))
714 if not compress:
715 return State.OK
716
717 thing_dir = "{} - {} - {}".format(self.thing_id,
718 slugify(self.name),
719 self.time_stamp.strftime(SAFE_DATETIME_FORMAT))
720 file_name = os.path.join(base_dir,
721 "{}.7z".format(thing_dir))
722 logging.debug("Compressing {} to {}".format(
723 self.name,
724 file_name))
725 with py7zr.SevenZipFile(file_name, 'w', filters=SEVENZIP_FILTERS) as archive:
726 archive.writeall(self.download_dir, thing_dir)
727 logging.debug("Compression of {} finished.".format(self.name))
728 shutil.rmtree(self.download_dir)
729 logging.debug("Removed temporary download dir of {}.".format(self.name))
730 return State.OK
731
732
733 def do_batch(batch_file, download_dir, quick, compress):
734 """ Read a file in line by line, parsing each as a set of calls to this script."""
735 with open(batch_file) as handle:
736 for line in handle:
737 line = line.strip()
738 if not line:
739 # Skip empty lines
740 continue
741 logging.info("Handling instruction {}".format(line))
742 command_arr = line.split()
743 if command_arr[0] == "thing":
744 logging.debug(
745 "Handling batch thing instruction: {}".format(line))
746 Thing.from_thing_id(command_arr[1]).download(download_dir, compress)
747 continue
748 if command_arr[0] == "collection":
749 logging.debug(
750 "Handling batch collection instruction: {}".format(line))
751 Collection(command_arr[1], command_arr[2],
752 download_dir, quick, compress).download()
753 continue
754 if command_arr[0] == "user":
755 logging.debug(
756 "Handling batch collection instruction: {}".format(line))
757 Designs(command_arr[1], download_dir, quick, compress).download()
758 continue
759 logging.warning("Unable to parse current instruction. Skipping.")
760
761
762 def main():
763 """ Entry point for script being run as a command. """
764 parser = argparse.ArgumentParser()
765 parser.add_argument("-l", "--log-level", choices=[
766 'debug', 'info', 'warning'], default='info', help="level of logging desired")
767 parser.add_argument("-d", "--directory",
768 help="Target directory to download into")
769 parser.add_argument("-f", "--log-file",
770 help="Place to log debug information to")
771 parser.add_argument("-q", "--quick", action="store_true",
772 help="Assume date ordering on posts")
773 parser.add_argument("-c", "--compress", action="store_true",
774 help="Compress files")
775 parser.add_argument("-a", "--api-key",
776 help="API key for thingiverse")
777
778 subparsers = parser.add_subparsers(
779 help="Type of thing to download", dest="subcommand")
780 collection_parser = subparsers.add_parser(
781 'collection', help="Download one or more entire collection(s)")
782 collection_parser.add_argument(
783 "owner", help="The owner of the collection(s) to get")
784 collection_parser.add_argument(
785 "collections", nargs="+", help="Space seperated list of the name(s) of collection to get")
786 thing_parser = subparsers.add_parser(
787 'thing', help="Download a single thing.")
788 thing_parser.add_argument(
789 "things", nargs="*", help="Space seperated list of thing ID(s) to download")
790 user_parser = subparsers.add_parser(
791 "user", help="Download all things by one or more users")
792 user_parser.add_argument(
793 "users", nargs="+", help="A space seperated list of the user(s) to get the designs of")
794 batch_parser = subparsers.add_parser(
795 "batch", help="Perform multiple actions written in a text file")
796 batch_parser.add_argument(
797 "batch_file", help="The name of the file to read.")
798 subparsers.add_parser("version", help="Show the current version")
799
800 args = parser.parse_args()
801 if not args.subcommand:
802 parser.print_help()
803 sys.exit(1)
804 if not args.directory:
805 args.directory = os.getcwd()
806
807 logger = logging.getLogger()
808 formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
809 logger.setLevel(logging.DEBUG)
810 console_handler = logging.StreamHandler()
811 console_handler.setLevel(args.log_level.upper())
812
813
814 if args.api_key:
815 api_key = args.api_key
816 else:
817 try:
818 with open("api.key") as fh:
819 api_key = fh.read().strip()
820 except Exception as e:
821 logging.error("Either specify the api-key on the command line or in a file called 'api.key'")
822 logging.error("Exception: {}".format(e))
823 return
824
825 logger.addHandler(console_handler)
826 if args.log_file:
827 file_handler = logging.FileHandler(args.log_file)
828 file_handler.setLevel(logging.DEBUG)
829 file_handler.setFormatter(formatter)
830 logger.addHandler(file_handler)
831
832 # Start downloader
833 thing_queue = multiprocessing.JoinableQueue()
834 logging.debug("starting {} downloader(s)".format(DOWNLOADER_COUNT))
835 downloaders = [Downloader(thing_queue, args.directory, args.compress, api_key) for _ in range(DOWNLOADER_COUNT)]
836 for downloader in downloaders:
837 downloader.start()
838
839 if args.subcommand.startswith("collection"):
840 for collection in args.collections:
841 Collection(args.owner, collection, args.directory, args.quick, args.compress, api_key).download()
842 if args.subcommand == "thing":
843 for thing in args.things:
844 thing_queue.put(thing)
845 if args.subcommand == "user":
846 for user in args.users:
847 Designs(user, args.directory, args.quick, args.compress, api_key).download()
848 if args.subcommand == "version":
849 print("thingy_grabber.py version {}".format(VERSION))
850 if args.subcommand == "batch":
851 do_batch(args.batch_file, args.directory, args.quick, args.compress)
852
853 # Stop the downloader processes
854 for _ in downloaders:
855 thing_queue.put(None)
856
857
858 if __name__ == "__main__":
859 multiprocessing.freeze_support()
860 main()