thingy_grabber.py

   1 #!/usr/bin/env python3
   2 """
   3 Thingiverse bulk downloader
   4 """
   5
   6 import re
   7 import sys
   8 import os
   9 import argparse
  10 import unicodedata
  11 import requests
  12 import logging
  13 import multiprocessing
  14 import enum
  15 import datetime
  16 from shutil import copyfile
  17 from dataclasses import dataclass
  18 import py7zr
  19 import glob
  20 import shutil
  21
  22 SEVENZIP_FILTERS = [{'id': py7zr.FILTER_LZMA2}]
  23
  24 # I don't think this is exported by datetime
  25 DEFAULT_DATETIME_FORMAT = '%Y-%m-%d %H:%M:%S'
  26 # Windows cannot handle : in filenames
  27 SAFE_DATETIME_FORMAT = '%Y-%m-%d %H.%M.%S'
  28
  29 API_BASE = "https://api.thingiverse.com"
  30 ACCESS_QP = "access_token={}"
  31 PAGE_QP = "page={}"
  32 API_USER_DESIGNS = API_BASE + "/users/{}/things/"
  33 API_USER_COLLECTIONS = API_BASE + "/users/{}/collections/all?" + ACCESS_QP
  34
  35 # Currently useless as it gives the same info as the matching element in API_USER_COLLECTIONS
  36 API_COLLECTION = API_BASE + "/collections/{}/?" + ACCESS_QP
  37 API_COLLECTION_THINGS = API_BASE + "/collections/{}/things/?" + ACCESS_QP
  38
  39 API_THING_DETAILS = API_BASE + "/things/{}/?" + ACCESS_QP
  40 API_THING_FILES = API_BASE + "/things/{}/files/?" + ACCESS_QP
  41 API_THING_IMAGES = API_BASE + "/things/{}/images/?" + ACCESS_QP
  42 API_THING_DOWNLOAD = "/download/?" + ACCESS_QP
  43
  44 API_KEY = None
  45
  46 DOWNLOADER_COUNT = 1
  47 RETRY_COUNT = 3
  48
  49 MAX_PATH_LENGTH = 250
  50
  51 VERSION = "0.10.2"
  52
  53 TIMESTAMP_FILE = "timestamp.txt"
  54
  55 SESSION = requests.Session()
  56
  57
  58 @dataclass
  59 class ThingLink:
  60     thing_id: str
  61     name: str
  62     api_link: str
  63
  64
  65 @dataclass
  66 class FileLink:
  67     name: str
  68     last_update: datetime.datetime
  69     link: str
  70
  71
  72 @dataclass
  73 class ImageLink:
  74     name: str
  75     link: str
  76
  77
  78 class FileLinks:
  79     def __init__(self, initial_links=None):
  80         if initial_links is None:
  81             initial_links = []
  82         self.links = []
  83         self.last_update = None
  84         for link in initial_links:
  85             self.append(link)
  86
  87     def __iter__(self):
  88         return iter(self.links)
  89
  90     def __getitem__(self, item):
  91         return self.links[item]
  92
  93     def __len__(self):
  94         return len(self.links)
  95
  96     def append(self, link):
  97         try:
  98             self.last_update = max(self.last_update, link.last_update)
  99         except TypeError:
 100             self.last_update = link.last_update
 101         self.links.append(link)
 102
 103
 104 class State(enum.Enum):
 105     OK = enum.auto()
 106     FAILED = enum.auto()
 107     ALREADY_DOWNLOADED = enum.auto()
 108
 109
 110 def sanitise_url(url):
 111     """ remove api keys from an url
 112     """
 113     return re.sub(r'access_token=\w*',
 114                   'access_token=***',
 115                   url)
 116
 117
 118 def strip_time(date_obj):
 119     """ Takes a datetime object and returns another with the time set to 00:00
 120     """
 121     return datetime.datetime.combine(date_obj.date(), datetime.time())
 122
 123
 124 def rename_unique(dir_name, target_dir_name):
 125     """ Move a directory sideways to a new name, ensuring it is unique.
 126     """
 127     target_dir = target_dir_name
 128     inc = 0
 129     while os.path.exists(target_dir):
 130         target_dir = "{}_{}".format(target_dir_name, inc)
 131         inc += 1
 132     os.rename(dir_name, target_dir)
 133     return target_dir
 134
 135
 136 def fail_dir(dir_name):
 137     """ When a download has failed, move it sideways.
 138     """
 139     return rename_unique(dir_name, "{}_failed".format(dir_name))
 140
 141
 142 def truncate_name(file_name):
 143     """ Ensure the filename is not too long for, well windows basically.
 144     """
 145     path = os.path.abspath(file_name)
 146     if len(path) <= MAX_PATH_LENGTH:
 147         return path
 148     base, extension = os.path.splitext(path)
 149     inc = 0
 150     new_path = "{}_{}{}".format(base, inc, extension)
 151     while os.path.exists(new_path):
 152         new_path = "{}_{}{}".format(base, inc, extension)
 153         inc += 1
 154     return new_path
 155
 156
 157 def slugify(value):
 158     """
 159     Normalise string, removes invalid for filename charactersr
 160     and converts string to lowercase.
 161     """
 162     logging.debug("Sluggyfying {}".format(value))
 163     value = unicodedata.normalize('NFKC', value).lower().strip()
 164     value = re.sub(r'[\\/<>:?*|"]', '', value)
 165     value = re.sub(r'\.*$', '', value)
 166     return value
 167
 168
 169 class Downloader(multiprocessing.Process):
 170     """
 171     Class to handle downloading the things we have found to get.
 172     """
 173
 174     def __init__(self, thing_queue, download_directory, compress, api_key):
 175         multiprocessing.Process.__init__(self)
 176         # TODO: add parameters
 177         self.thing_queue = thing_queue
 178         self.download_directory = download_directory
 179         self.compress = compress
 180         self.api_key = api_key
 181
 182     def run(self):
 183         """ actual download loop.
 184         """
 185         while True:
 186             thing_id = self.thing_queue.get()
 187             if thing_id is None:
 188                 logging.info("Shutting download queue")
 189                 self.thing_queue.task_done()
 190                 break
 191             thing = None
 192             if isinstance(thing_id, str):
 193                 thing = Thing.from_thing_id(thing_id)
 194             if isinstance(thing_id, ThingLink):
 195                 thing = Thing(thing_id)
 196             if not thing:
 197                 logging.error("Don't know how to handle thing_id {}".format(thing_id))
 198             else:
 199                 logging.info("Handling id {}".format(thing_id))
 200                 thing.download(self.download_directory, self.compress, self.api_key)
 201             self.thing_queue.task_done()
 202         return
 203
 204
 205 class Grouping:
 206     """ Holds details of a group of things for download
 207         This is effectively (although not actually) an abstract class
 208         - use Collection or Designs instead.
 209     """
 210
 211     def __init__(self, quick, compress):
 212         self.things = []
 213         self.total = 0
 214         self.req_id = None
 215         self.last_page = 0
 216         self.per_page = None
 217         # Should we stop downloading when we hit a known datestamp?
 218         self.quick = quick
 219         self.compress = compress
 220         # These should be set by child classes.
 221         self.url = None
 222         self.download_dir = None
 223
 224     @property
 225     def get(self):
 226         """ retrieve the things of the grouping. """
 227         if self.things:
 228             # We've already done it.
 229             return self.things
 230
 231         # Check for initialisation:
 232         if not self.url:
 233             logging.error("No URL set - object not initialised properly?")
 234             raise ValueError("No URL set - object not initialised properly?")
 235
 236         # Get the internal details of the grouping.
 237         logging.debug("Querying {}".format(sanitise_url(self.url)))
 238
 239         # self.url should already have been formatted as we don't need pagination
 240         logging.info("requesting:{}".format(sanitise_url(self.url)))
 241         current_req = SESSION.get(self.url)
 242         if current_req.status_code != 200:
 243             logging.error(
 244                 "Got unexpected code {} from url {}: {}".format(current_req.status_code, sanitise_url(self.url),
 245                                                                 current_req.text))
 246         else:
 247             current_json = current_req.json()
 248             for thing in current_json:
 249                 logging.info(thing)
 250                 self.things.append(ThingLink(thing['id'], thing['name'], thing['url']))
 251         logging.info("Found {} things.".format(len(self.things)))
 252         return self.things
 253
 254     def download(self):
 255         """ Downloads all the files in a collection """
 256         if not self.things:
 257             self.get()
 258
 259         if not self.download_dir:
 260             raise ValueError(
 261                 "No download_dir set - invalidly initialised object?")
 262
 263         base_dir = os.getcwd()
 264         try:
 265             os.mkdir(self.download_dir)
 266         except FileExistsError:
 267             logging.info("Target directory {} already exists. Assuming a resume."
 268                          .format(self.download_dir))
 269         logging.info("Downloading {} thing(s).".format(self.total))
 270         for idx, thing in enumerate(self.things):
 271             logging.info("Downloading thing {} - {}".format(idx, thing))
 272             return_code = Thing(thing).download(self.download_dir, self.compress)
 273             if self.quick and return_code == State.ALREADY_DOWNLOADED:
 274                 logging.info("Caught up, stopping.")
 275                 return
 276
 277
 278 class Collection(Grouping):
 279     """ Holds details of a collection. """
 280
 281     def __init__(self, user, name, directory, quick, compress):
 282         Grouping.__init__(self, quick, compress)
 283         self.user = user
 284         self.name = name
 285         self.paginated = False
 286         # need to figure out the the ID for the collection
 287         collection_url = API_USER_COLLECTIONS.format(user, API_KEY)
 288         try:
 289             current_req = SESSION.get(collection_url)
 290         except requests.exceptions.ConnectionError as error:
 291             logging.error("Unable to connect for collections for user {}: {}".format(
 292                 self.user, error))
 293             return
 294         if current_req.status_code != 200:
 295             logging.error(
 296                 "Got unexpected code {} from url {}: {}".format(current_req.status_code, sanitise_url(collection_url),
 297                                                                 current_req.text))
 298             return
 299         collection_list = current_req.json()
 300         try:
 301             # case insensitive to retain parity with previous behaviour
 302             collection = [x for x in collection_list if x['name'].casefold() == name.casefold()][0]
 303         except IndexError:
 304             logging.error("Unable to find collection {} for user {}".format(name, user))
 305             return
 306         self.collection_id = collection['id']
 307         self.url = API_COLLECTION_THINGS.format(self.collection_id, API_KEY)
 308
 309         self.download_dir = os.path.join(directory,
 310                                          "{}-{}".format(slugify(self.user), slugify(self.name)))
 311
 312
 313 class Designs(Grouping):
 314     """ Holds details of all of a users' designs. """
 315
 316     def __init__(self, user, directory, quick, compress):
 317         Grouping.__init__(self, quick, compress)
 318         self.user = user
 319         self.url = API_USER_DESIGNS.format(user)
 320         self.paginated = True
 321         self.download_dir = os.path.join(
 322             directory, "{} designs".format(slugify(self.user)))
 323
 324
 325 class Thing:
 326     """ An individual design on thingiverse. """
 327
 328     def __init__(self, thing_link):
 329         self.thing_id = thing_link.thing_id
 330         self.name = thing_link.name
 331         self.last_time = None
 332         self._parsed = False
 333         self._needs_download = True
 334         self.text = None
 335         self.download_dir = None
 336         self.time_stamp = None
 337         self._file_links = FileLinks()
 338         self._image_links = []
 339
 340     @classmethod
 341     def from_thing_id(cls, thing_id):
 342         """
 343         Factory method that looks up a thing by ID and creates a Thing object for it
 344         :param thing_id: to look up
 345         :return: Thing or None
 346         """
 347         return Thing(ThingLink(thing_id, "", ""))
 348
 349
 350     def _parse(self, base_dir, api_key):
 351         """ Work out what, if anything needs to be done. """
 352         if self._parsed:
 353             return
 354
 355         # First get the broad details
 356         url = API_THING_DETAILS.format(self.thing_id, api_key)
 357         logging.error(url)
 358         try:
 359             current_req = SESSION.get(url)
 360         except requests.exceptions.ConnectionError as error:
 361             logging.error("Unable to connect for thing {}: {}".format(
 362                 self.thing_id, error))
 363             return
 364         # Check for DMCA
 365         if current_req.status_code == 403:
 366             logging.error("Access to thing {} is forbidden".format(self.thing_id))
 367             return
 368         if current_req.status_code != 200:
 369             logging.error("Got unexpected code {} from url {}: {}".format(current_req.status_code, sanitise_url(url),
 370                                                                           current_req.text))
 371             return
 372
 373         thing_json = current_req.json()
 374         try:
 375             self._license = thing_json['license']
 376         except KeyError:
 377             logging.warning("No license found for thing {}?".format(self.thing_id))
 378
 379         # TODO: Get non-html version of this?
 380         try:
 381             self._details = thing_json['details']
 382         except KeyError:
 383             logging.warning("No description found for thing {}?".format(self.thing_id))
 384
 385         if not self.name:
 386             # Probably generated with factory method.
 387             try:
 388                 self.name = thing_json['name']
 389             except KeyError:
 390                 logging.warning("No name found for thing {}?".format(self.thing_id))
 391                 self.name = self.thing_id
 392
 393         # Now get the file details
 394         file_url = API_THING_FILES.format(self.thing_id, api_key)
 395
 396         try:
 397             current_req = SESSION.get(file_url)
 398         except requests.exceptions.ConnectionError as error:
 399             logging.error("Unable to connect for thing {}: {}".format(
 400                 self.thing_id, error))
 401             return
 402
 403         if current_req.status_code != 200:
 404             logging.error("Unexpected status code {} for {}: {}".format(current_req.status_code, sanitise_url(file_url),
 405                                                                         current_req.text))
 406             return
 407
 408         link_list = current_req.json()
 409
 410         if not link_list:
 411             logging.error("No files found for thing {} - probably thingiverse being broken, try again later".format(
 412                 self.thing_id))
 413
 414         for link in link_list:
 415             logging.debug("Parsing link: {}".format(sanitise_url(link['url'])))
 416             try:
 417                 datestamp = datetime.datetime.strptime(link['date'], DEFAULT_DATETIME_FORMAT)
 418                 self._file_links.append(
 419                     FileLink(link['name'], datestamp, link['url'] + API_THING_DOWNLOAD.format(api_key)))
 420             except ValueError:
 421                 logging.error(link['date'])
 422
 423         # Finally get the image links
 424         image_url = API_THING_IMAGES.format(self.thing_id, api_key)
 425
 426         try:
 427             current_req = SESSION.get(image_url)
 428         except requests.exceptions.ConnectionError as error:
 429             logging.error("Unable to connect for thing {}: {}".format(
 430                 self.thing_id, error))
 431             return
 432
 433         if current_req.status_code != 200:
 434             logging.error(
 435                 "Unexpected status code {} for {}: {}".format(current_req.status_code, sanitise_url(image_url),
 436                                                               current_req.text))
 437             return
 438
 439         image_list = current_req.json()
 440
 441         if not image_list:
 442             logging.warning(
 443                 "No images found for thing {} - probably thingiverse being iffy as this seems unlikely".format(
 444                     self.thing_id))
 445
 446         for image in image_list:
 447             logging.debug("parsing image: {}".format(image))
 448             name = None
 449             try:
 450                 name = slugify(image['name'])
 451                 # TODO: fallback to other types
 452                 url = [x for x in image['sizes'] if x['type'] == 'display' and x['size'] == 'large'][0]['url']
 453             except KeyError:
 454                 logging.warning("Missing image for {}".format(name))
 455             self._image_links.append(ImageLink(name, url))
 456
 457         self.slug = "{} - {}".format(self.thing_id, slugify(self.name))
 458         self.download_dir = os.path.join(base_dir, self.slug)
 459
 460         self._handle_old_directory(base_dir)
 461
 462         logging.debug("Parsing {} ({})".format(self.thing_id, self.name))
 463         latest, self.last_time = self._find_last_download(base_dir)
 464
 465         if not latest:
 466             # Not yet downloaded
 467             self._parsed = True
 468             return
 469
 470         logging.info("last downloaded version: {}".format(self.last_time))
 471
 472         # OK, so we have a timestamp, lets see if there is anything new to get
 473         # First off, are we comparing an old download that threw away the timestamp?
 474         ignore_time = self.last_time == strip_time(self.last_time)
 475         try:
 476             # TODO: Allow for comparison at the exact time
 477             files_last_update = self._file_links.last_update
 478             if ignore_time:
 479                 logging.info("Dropping time from comparison stamp as old-style download dir")
 480                 files_last_update = strip_time(files_last_update)
 481
 482             if files_last_update > self.last_time:
 483                 logging.info(
 484                     "Found new/updated files {}".format(self._file_links.last_update))
 485                 self._needs_download = True
 486                 self._parsed = True
 487                 return
 488         except TypeError:
 489             logging.warning("No files found for {}.".format(self.thing_id))
 490
 491         # Got here, so nope, no new files.
 492         self._needs_download = False
 493         self._parsed = True
 494
 495     def _handle_old_directory(self, base_dir):
 496         """ Deal with any old directories from previous versions of the code.
 497         """
 498         old_dir = os.path.join(base_dir, slugify(self.name))
 499         if os.path.exists(old_dir):
 500             logging.warning("Found old style download_dir. Moving.")
 501             rename_unique(old_dir, self.download_dir)
 502
 503     def _handle_outdated_directory(self):
 504         """ Move the current download directory sideways if the thing has changed.
 505         """
 506         if not os.path.exists(self.download_dir):
 507             # No old directory to move.
 508             return None
 509         timestamp_file = os.path.join(self.download_dir, TIMESTAMP_FILE)
 510         if not os.path.exists(timestamp_file):
 511             # Old form of download directory
 512             target_dir_name = "{} - old".format(self.download_dir)
 513         else:
 514             target_dir_name = "{} - {}".format(self.download_dir, self.last_time.strftime(SAFE_DATETIME_FORMAT))
 515         return rename_unique(self.download_dir, target_dir_name)
 516
 517     def _find_last_download(self, base_dir):
 518         """ Look for the most recent previous download (if any) of the thing.
 519         """
 520         logging.info("Looking for old things")
 521
 522         # First the DL directory itself.
 523         timestamp_file = os.path.join(self.download_dir, TIMESTAMP_FILE)
 524
 525         latest = None
 526         latest_time = None
 527
 528         try:
 529             logging.debug("Checking for existing download in normal place.")
 530             with open(timestamp_file) as ts_fh:
 531                 timestamp_text = ts_fh.read().strip()
 532             latest_time = datetime.datetime.strptime(timestamp_text, DEFAULT_DATETIME_FORMAT)
 533             latest = self.download_dir
 534         except FileNotFoundError:
 535             # No existing download directory. huh.
 536             pass
 537         except TypeError:
 538             logging.warning("Invalid timestamp file found in {}".format(self.download_dir))
 539
 540         # TODO:  Maybe look for old download directories.
 541
 542         # Now look for 7z files
 543         candidates = glob.glob(os.path.join(base_dir, "{}*.7z".format(self.thing_id)))
 544         # +3 to allow for ' - '
 545         leading_length = len(self.slug) + 3
 546         for path in candidates:
 547             candidate = os.path.basename(path)
 548             try:
 549                 logging.debug("Examining '{}' - '{}'".format(candidate, candidate[leading_length:-3]))
 550                 candidate_time = datetime.datetime.strptime(candidate[leading_length:-3], SAFE_DATETIME_FORMAT)
 551             except ValueError:
 552                 logging.warning("There was an error finding the date in {}. Ignoring.".format(candidate))
 553                 continue
 554             try:
 555                 if candidate_time > latest_time:
 556                     latest_time = candidate_time
 557                     latest = candidate
 558             except TypeError:
 559                 latest_time = candidate_time
 560                 latest = candidate
 561         logging.info("Found last old thing: {} / {}".format(latest, latest_time))
 562         return latest, latest_time
 563
 564     def download(self, base_dir, compress, api_key):
 565         """ Download all files for a given thing.
 566             Returns True iff the thing is now downloaded (not iff it downloads the thing!)
 567         """
 568         if not self._parsed:
 569             self._parse(base_dir, api_key)
 570
 571         if not self._parsed:
 572             logging.error(
 573                 "Unable to parse {} - aborting download".format(self.thing_id))
 574             return State.FAILED
 575
 576         if not self._needs_download:
 577             logging.info("{} - {} already downloaded - skipping.".format(self.thing_id, self.name))
 578             return State.ALREADY_DOWNLOADED
 579
 580         if not self._file_links:
 581             logging.error(
 582                 "{} - {} appears to have no files. Thingiverse acting up again?".format(self.thing_id, self.name))
 583             return State.FAILED
 584
 585         # Have we already downloaded some things?
 586         renamed_dir = self._handle_outdated_directory()
 587
 588         # Get the list of files to download
 589
 590         new_file_links = []
 591         old_file_links = []
 592         self.time_stamp = None
 593
 594         if not self.last_time:
 595             # If we don't have anything to copy from, then it is all new.
 596             logging.debug("No last time, downloading all files")
 597             new_file_links = self._file_links
 598             self.time_stamp = new_file_links[0].last_update
 599
 600             for file_link in new_file_links:
 601                 self.time_stamp = max(self.time_stamp, file_link.last_update)
 602             logging.debug("New timestamp will be {}".format(self.time_stamp))
 603         else:
 604             self.time_stamp = self.last_time
 605             for file_link in self._file_links:
 606                 if file_link.last_update > self.last_time:
 607                     new_file_links.append(file_link)
 608                     self.time_stamp = max(self.time_stamp, file_link.last_update)
 609                 else:
 610                     old_file_links.append(file_link)
 611
 612         logging.debug("new timestamp {}".format(self.time_stamp))
 613
 614         # OK. Time to get to work.
 615         logging.debug("Generating download_dir")
 616         os.mkdir(self.download_dir)
 617         filelist_file = os.path.join(self.download_dir, "filelist.txt")
 618         with open(filelist_file, 'w', encoding="utf-8") as fl_handle:
 619             for fl in self._file_links:
 620                 fl_handle.write("{},{},{}\n".format(fl.link, fl.name, fl.last_update))
 621
 622         # First grab the cached files (if any)
 623         logging.info("Copying {} unchanged files.".format(len(old_file_links)))
 624         if renamed_dir:
 625             for file_link in old_file_links:
 626                 try:
 627                     old_file = os.path.join(renamed_dir, file_link.name)
 628                     new_file = truncate_name(os.path.join(self.download_dir, file_link.name))
 629                     logging.debug("Copying {} to {}".format(old_file, new_file))
 630                     copyfile(old_file, new_file)
 631                 except FileNotFoundError:
 632                     logging.warning(
 633                         "Unable to find {} in old archive, redownloading".format(file_link.name))
 634                     new_file_links.append(file_link)
 635                 except TypeError:
 636                     # Not altogether sure how this could occur, possibly with some combination of the old file types
 637                     logging.warning(
 638                         "Typeerror looking for {} in {}".format(file_link.name, renamed_dir))
 639                     new_file_links.append(file_link)
 640
 641         # Now download the new ones
 642         logging.info("Downloading {} new files of {}".format(
 643             len(new_file_links), len(self._file_links)))
 644         try:
 645             for file_link in new_file_links:
 646                 file_name = truncate_name(os.path.join(self.download_dir, file_link.name))
 647                 logging.debug("Downloading {} from {} to {}".format(
 648                     file_link.name, file_link.link, file_name))
 649                 data_req = SESSION.get(file_link.link)
 650                 if data_req.status_code != 200:
 651                     logging.error("Unexpected status code {} for {}: {}".format(data_req.status_code,
 652                                                                                 sanitise_url(file_link.link),
 653                                                                                 data_req.text))
 654                     fail_dir(self.download_dir)
 655                     return State.FAILED
 656
 657                 with open(file_name, 'wb') as handle:
 658                     handle.write(data_req.content)
 659         except Exception as exception:
 660             logging.error("Failed to download {} - {}".format(file_link.name, exception))
 661             fail_dir(self.download_dir)
 662             return State.FAILED
 663
 664         # People like images.
 665         image_dir = os.path.join(self.download_dir, 'images')
 666         logging.info("Downloading {} images.".format(len(self._image_links)))
 667         try:
 668             os.mkdir(image_dir)
 669             for imagelink in self._image_links:
 670                 filename = os.path.join(image_dir, imagelink.name)
 671                 image_req = SESSION.get(imagelink.link)
 672                 if image_req.status_code != 200:
 673                     logging.error("Unexpected status code {} for {}: {}".format(image_req.status_code,
 674                                                                                 sanitise_url(imagelink.link),
 675                                                                                 image_req.text))
 676                     fail_dir(self.download_dir)
 677                     return State.FAILED
 678                 with open(truncate_name(filename), 'wb') as handle:
 679                     handle.write(image_req.content)
 680         except Exception as exception:
 681             logging.error("Failed to download {} - {}".format(imagelink.name, exception))
 682             fail_dir(self.download_dir)
 683             return State.FAILED
 684
 685         # Best get some licenses
 686         logging.info("writing license file")
 687         try:
 688             if self._license:
 689                 with open(truncate_name(os.path.join(self.download_dir, 'license.txt')), 'w',
 690                           encoding="utf-8") as license_handle:
 691                     license_handle.write("{}\n".format(self._license))
 692         except IOError as exception:
 693             logging.warning("Failed to write license! {}".format(exception))
 694
 695         logging.info("writing readme")
 696         try:
 697             if self._details:
 698                 with open(truncate_name(os.path.join(self.download_dir, 'readme.txt')), 'w',
 699                           encoding="utf-8") as readme_handle:
 700                     readme_handle.write("{}\n".format(self._details))
 701         except IOError as exception:
 702             logging.warning("Failed to write readme! {}".format(exception))
 703
 704         try:
 705             # Now write the timestamp
 706             with open(os.path.join(self.download_dir, TIMESTAMP_FILE), 'w', encoding="utf-8") as timestamp_handle:
 707                 timestamp_handle.write(self.time_stamp.__str__())
 708         except Exception as exception:
 709             logging.error("Failed to write timestamp file - {}".format(exception))
 710             fail_dir(self.download_dir)
 711             return State.FAILED
 712         self._needs_download = False
 713         logging.debug("Download of {} finished".format(self.name))
 714         if not compress:
 715             return State.OK
 716
 717         thing_dir = "{} - {} - {}".format(self.thing_id,
 718                                           slugify(self.name),
 719                                           self.time_stamp.strftime(SAFE_DATETIME_FORMAT))
 720         file_name = os.path.join(base_dir,
 721                                  "{}.7z".format(thing_dir))
 722         logging.debug("Compressing {} to {}".format(
 723             self.name,
 724             file_name))
 725         with py7zr.SevenZipFile(file_name, 'w', filters=SEVENZIP_FILTERS) as archive:
 726             archive.writeall(self.download_dir, thing_dir)
 727         logging.debug("Compression of {} finished.".format(self.name))
 728         shutil.rmtree(self.download_dir)
 729         logging.debug("Removed temporary download dir of {}.".format(self.name))
 730         return State.OK
 731
 732
 733 def do_batch(batch_file, download_dir, quick, compress):
 734     """ Read a file in line by line, parsing each as a set of calls to this script."""
 735     with open(batch_file) as handle:
 736         for line in handle:
 737             line = line.strip()
 738             if not line:
 739                 # Skip empty lines
 740                 continue
 741             logging.info("Handling instruction {}".format(line))
 742             command_arr = line.split()
 743             if command_arr[0] == "thing":
 744                 logging.debug(
 745                     "Handling batch thing instruction: {}".format(line))
 746                 Thing.from_thing_id(command_arr[1]).download(download_dir, compress)
 747                 continue
 748             if command_arr[0] == "collection":
 749                 logging.debug(
 750                     "Handling batch collection instruction: {}".format(line))
 751                 Collection(command_arr[1], command_arr[2],
 752                            download_dir, quick, compress).download()
 753                 continue
 754             if command_arr[0] == "user":
 755                 logging.debug(
 756                     "Handling batch collection instruction: {}".format(line))
 757                 Designs(command_arr[1], download_dir, quick, compress).download()
 758                 continue
 759             logging.warning("Unable to parse current instruction. Skipping.")
 760
 761
 762 def main():
 763     """ Entry point for script being run as a command. """
 764     parser = argparse.ArgumentParser()
 765     parser.add_argument("-l", "--log-level", choices=[
 766         'debug', 'info', 'warning'], default='info', help="level of logging desired")
 767     parser.add_argument("-d", "--directory",
 768                         help="Target directory to download into")
 769     parser.add_argument("-f", "--log-file",
 770                         help="Place to log debug information to")
 771     parser.add_argument("-q", "--quick", action="store_true",
 772                         help="Assume date ordering on posts")
 773     parser.add_argument("-c", "--compress", action="store_true",
 774                         help="Compress files")
 775     parser.add_argument("-a", "--api-key",
 776                         help="API key for thingiverse")
 777
 778     subparsers = parser.add_subparsers(
 779         help="Type of thing to download", dest="subcommand")
 780     collection_parser = subparsers.add_parser(
 781         'collection', help="Download one or more entire collection(s)")
 782     collection_parser.add_argument(
 783         "owner", help="The owner of the collection(s) to get")
 784     collection_parser.add_argument(
 785         "collections", nargs="+", help="Space seperated list of the name(s) of collection to get")
 786     thing_parser = subparsers.add_parser(
 787         'thing', help="Download a single thing.")
 788     thing_parser.add_argument(
 789         "things", nargs="*", help="Space seperated list of thing ID(s) to download")
 790     user_parser = subparsers.add_parser(
 791         "user", help="Download all things by one or more users")
 792     user_parser.add_argument(
 793         "users", nargs="+", help="A space seperated list of the user(s) to get the designs of")
 794     batch_parser = subparsers.add_parser(
 795         "batch", help="Perform multiple actions written in a text file")
 796     batch_parser.add_argument(
 797         "batch_file", help="The name of the file to read.")
 798     subparsers.add_parser("version", help="Show the current version")
 799
 800     args = parser.parse_args()
 801     if not args.subcommand:
 802         parser.print_help()
 803         sys.exit(1)
 804     if not args.directory:
 805         args.directory = os.getcwd()
 806
 807     logger = logging.getLogger()
 808     formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
 809     logger.setLevel(logging.DEBUG)
 810     console_handler = logging.StreamHandler()
 811     console_handler.setLevel(args.log_level.upper())
 812
 813     global API_KEY
 814     if args.api_key:
 815         API_KEY = args.api_key
 816     else:
 817         try:
 818             with open("api.key") as fh:
 819                 API_KEY = fh.read().strip()
 820         except Exception as e:
 821             logging.error("Either specify the api-key on the command line or in a file called 'api.key'")
 822             logging.error("Exception: {}".format(e))
 823             return
 824
 825     logger.addHandler(console_handler)
 826     if args.log_file:
 827         file_handler = logging.FileHandler(args.log_file)
 828         file_handler.setLevel(logging.DEBUG)
 829         file_handler.setFormatter(formatter)
 830         logger.addHandler(file_handler)
 831
 832     # Start downloader
 833     thing_queue = multiprocessing.JoinableQueue()
 834     logging.debug("starting {} downloader(s)".format(DOWNLOADER_COUNT))
 835     downloaders = [Downloader(thing_queue, args.directory, args.compress, API_KEY) for _ in range(DOWNLOADER_COUNT)]
 836     for downloader in downloaders:
 837         downloader.start()
 838
 839     if args.subcommand.startswith("collection"):
 840         for collection in args.collections:
 841             Collection(args.owner, collection, args.directory, args.quick, args.compress).download()
 842     if args.subcommand == "thing":
 843         for thing in args.things:
 844             thing_queue.put(thing)
 845     if args.subcommand == "user":
 846         for user in args.users:
 847             Designs(user, args.directory, args.quick, args.compress).download()
 848     if args.subcommand == "version":
 849         print("thingy_grabber.py version {}".format(VERSION))
 850     if args.subcommand == "batch":
 851         do_batch(args.batch_file, args.directory, args.quick, args.compress)
 852
 853     # Stop the downloader processes
 854     for _ in downloaders:
 855         thing_queue.put(None)
 856
 857
 858 if __name__ == "__main__":
 859     multiprocessing.freeze_support()
 860     main()