thingy_grabber.py

   1 #!/usr/bin/env python3
   2 """
   3 Thingiverse bulk downloader
   4 """
   5
   6 import re
   7 import sys
   8 import os
   9 import argparse
  10 import unicodedata
  11 import requests
  12 import logging
  13 import multiprocessing
  14 import enum
  15 import datetime
  16 from shutil import copyfile
  17 from dataclasses import dataclass
  18 import py7zr
  19 import glob
  20 import shutil
  21
  22 SEVENZIP_FILTERS = [{'id': py7zr.FILTER_LZMA2}]
  23
  24 # I don't think this is exported by datetime
  25 DEFAULT_DATETIME_FORMAT = '%Y-%m-%d %H:%M:%S'
  26 # Windows cannot handle : in filenames
  27 SAFE_DATETIME_FORMAT = '%Y-%m-%d %H.%M.%S'
  28
  29 API_BASE = "https://api.thingiverse.com"
  30 ACCESS_QP = "access_token={}"
  31 PAGE_QP = "page={}"
  32 API_USER_DESIGNS = API_BASE + "/users/{}/things/"
  33 API_USER_COLLECTIONS = API_BASE + "/users/{}/collections/all?" + ACCESS_QP
  34
  35 # Currently useless as it gives the same info as the matching element in API_USER_COLLECTIONS
  36 API_COLLECTION = API_BASE + "/collections/{}/?" + ACCESS_QP
  37 API_COLLECTION_THINGS = API_BASE + "/collections/{}/things/?" + ACCESS_QP
  38
  39 API_THING_DETAILS = API_BASE + "/things/{}/?" + ACCESS_QP
  40 API_THING_FILES = API_BASE + "/things/{}/files/?" + ACCESS_QP
  41 API_THING_IMAGES = API_BASE + "/things/{}/images/?" + ACCESS_QP
  42 API_THING_DOWNLOAD = "/download/?" + ACCESS_QP
  43
  44 API_KEY = None
  45
  46 DOWNLOADER_COUNT = 1
  47 RETRY_COUNT = 3
  48
  49 MAX_PATH_LENGTH = 250
  50
  51 VERSION = "0.10.3"
  52
  53 TIMESTAMP_FILE = "timestamp.txt"
  54
  55 SESSION = requests.Session()
  56
  57
  58 @dataclass
  59 class ThingLink:
  60     thing_id: str
  61     name: str
  62     api_link: str
  63
  64
  65 @dataclass
  66 class FileLink:
  67     name: str
  68     last_update: datetime.datetime
  69     link: str
  70
  71
  72 @dataclass
  73 class ImageLink:
  74     name: str
  75     link: str
  76
  77
  78 class FileLinks:
  79     def __init__(self, initial_links=None):
  80         if initial_links is None:
  81             initial_links = []
  82         self.links = []
  83         self.last_update = None
  84         for link in initial_links:
  85             self.append(link)
  86
  87     def __iter__(self):
  88         return iter(self.links)
  89
  90     def __getitem__(self, item):
  91         return self.links[item]
  92
  93     def __len__(self):
  94         return len(self.links)
  95
  96     def append(self, link):
  97         try:
  98             self.last_update = max(self.last_update, link.last_update)
  99         except TypeError:
 100             self.last_update = link.last_update
 101         self.links.append(link)
 102
 103
 104 class State(enum.Enum):
 105     OK = enum.auto()
 106     FAILED = enum.auto()
 107     ALREADY_DOWNLOADED = enum.auto()
 108
 109
 110 def sanitise_url(url):
 111     """ remove api keys from an url
 112     """
 113     return re.sub(r'access_token=\w*',
 114                   'access_token=***',
 115                   url)
 116
 117
 118 def strip_time(date_obj):
 119     """ Takes a datetime object and returns another with the time set to 00:00
 120     """
 121     return datetime.datetime.combine(date_obj.date(), datetime.time())
 122
 123
 124 def rename_unique(dir_name, target_dir_name):
 125     """ Move a directory sideways to a new name, ensuring it is unique.
 126     """
 127     target_dir = target_dir_name
 128     inc = 0
 129     while os.path.exists(target_dir):
 130         target_dir = "{}_{}".format(target_dir_name, inc)
 131         inc += 1
 132     os.rename(dir_name, target_dir)
 133     return target_dir
 134
 135
 136 def fail_dir(dir_name):
 137     """ When a download has failed, move it sideways.
 138     """
 139     return rename_unique(dir_name, "{}_failed".format(dir_name))
 140
 141
 142 def truncate_name(file_name):
 143     """ Ensure the filename is not too long for, well windows basically.
 144     """
 145     path = os.path.abspath(file_name)
 146     if len(path) <= MAX_PATH_LENGTH:
 147         return path
 148     base, extension = os.path.splitext(path)
 149     inc = 0
 150     new_path = "{}_{}{}".format(base, inc, extension)
 151     while os.path.exists(new_path):
 152         new_path = "{}_{}{}".format(base, inc, extension)
 153         inc += 1
 154     return new_path
 155
 156
 157 def slugify(value):
 158     """
 159     Normalise string, removes invalid for filename charactersr
 160     and converts string to lowercase.
 161     """
 162     logging.debug("Sluggyfying {}".format(value))
 163     value = unicodedata.normalize('NFKC', value).lower().strip()
 164     value = re.sub(r'[\\/<>:?*|"]', '', value)
 165     value = re.sub(r'\.*$', '', value)
 166     return value.strip()
 167
 168
 169 class Downloader(multiprocessing.Process):
 170     """
 171     Class to handle downloading the things we have found to get.
 172     """
 173
 174     def __init__(self, thing_queue, download_directory, compress, api_key):
 175         multiprocessing.Process.__init__(self)
 176         # TODO: add parameters
 177         self.thing_queue = thing_queue
 178         self.download_directory = download_directory
 179         self.compress = compress
 180         self.api_key = api_key
 181
 182     def run(self):
 183         """ actual download loop.
 184         """
 185         while True:
 186             thing_id = self.thing_queue.get()
 187             if thing_id is None:
 188                 logging.info("Shutting download queue")
 189                 self.thing_queue.task_done()
 190                 break
 191             thing = None
 192             if isinstance(thing_id, str):
 193                 thing = Thing.from_thing_id(thing_id)
 194             if isinstance(thing_id, ThingLink):
 195                 thing = Thing(thing_id)
 196             if not thing:
 197                 logging.error("Don't know how to handle thing_id {}".format(thing_id))
 198             else:
 199                 logging.info("Handling id {}".format(thing_id))
 200                 thing.download(self.download_directory, self.compress, self.api_key)
 201             self.thing_queue.task_done()
 202         return
 203
 204
 205 class Grouping:
 206     """ Holds details of a group of things for download
 207         This is effectively (although not actually) an abstract class
 208         - use Collection or Designs instead.
 209     """
 210
 211     def __init__(self, quick, compress):
 212         self.things = []
 213         self.total = 0
 214         self.req_id = None
 215         self.last_page = 0
 216         self.per_page = None
 217         # Should we stop downloading when we hit a known datestamp?
 218         self.quick = quick
 219         self.compress = compress
 220         # These should be set by child classes.
 221         self.url = None
 222         self.download_dir = None
 223
 224     @property
 225     def get(self):
 226         """ retrieve the things of the grouping. """
 227         if self.things:
 228             # We've already done it.
 229             return self.things
 230
 231         # Check for initialisation:
 232         if not self.url:
 233             logging.error("No URL set - object not initialised properly?")
 234             raise ValueError("No URL set - object not initialised properly?")
 235
 236         # Get the internal details of the grouping.
 237         logging.debug("Querying {}".format(sanitise_url(self.url)))
 238
 239         # self.url should already have been formatted as we don't need pagination
 240         logging.info("requesting:{}".format(sanitise_url(self.url)))
 241         current_req = SESSION.get(self.url)
 242         if current_req.status_code != 200:
 243             logging.error(
 244                 "Got unexpected code {} from url {}: {}".format(current_req.status_code, sanitise_url(self.url),
 245                                                                 current_req.text))
 246         else:
 247             current_json = current_req.json()
 248             for thing in current_json:
 249                 logging.info(thing)
 250                 self.things.append(ThingLink(thing['id'], thing['name'], thing['url']))
 251         logging.info("Found {} things.".format(len(self.things)))
 252         return self.things
 253
 254     def download(self):
 255         """ Downloads all the files in a collection """
 256         if not self.things:
 257             self.get()
 258
 259         if not self.download_dir:
 260             raise ValueError(
 261                 "No download_dir set - invalidly initialised object?")
 262
 263         base_dir = os.getcwd()
 264         try:
 265             os.mkdir(self.download_dir)
 266         except FileExistsError:
 267             logging.info("Target directory {} already exists. Assuming a resume."
 268                          .format(self.download_dir))
 269         logging.info("Downloading {} thing(s).".format(self.total))
 270         for idx, thing in enumerate(self.things):
 271             logging.info("Downloading thing {} - {}".format(idx, thing))
 272             return_code = Thing(thing).download(self.download_dir, self.compress)
 273             if self.quick and return_code == State.ALREADY_DOWNLOADED:
 274                 logging.info("Caught up, stopping.")
 275                 return
 276
 277
 278 class Collection(Grouping):
 279     """ Holds details of a collection. """
 280
 281     def __init__(self, user, name, directory, quick, compress):
 282         Grouping.__init__(self, quick, compress)
 283         self.user = user
 284         self.name = name
 285         self.paginated = False
 286         # need to figure out the the ID for the collection
 287         collection_url = API_USER_COLLECTIONS.format(user, API_KEY)
 288         try:
 289             current_req = SESSION.get(collection_url)
 290         except requests.exceptions.ConnectionError as error:
 291             logging.error("Unable to connect for collections for user {}: {}".format(
 292                 self.user, error))
 293             return
 294         if current_req.status_code != 200:
 295             logging.error(
 296                 "Got unexpected code {} from url {}: {}".format(current_req.status_code, sanitise_url(collection_url),
 297                                                                 current_req.text))
 298             return
 299         collection_list = current_req.json()
 300         try:
 301             # case insensitive to retain parity with previous behaviour
 302             collection = [x for x in collection_list if x['name'].casefold() == name.casefold()][0]
 303         except IndexError:
 304             logging.error("Unable to find collection {} for user {}".format(name, user))
 305             return
 306         self.collection_id = collection['id']
 307         self.url = API_COLLECTION_THINGS.format(self.collection_id, API_KEY)
 308
 309         self.download_dir = os.path.join(directory,
 310                                          "{}-{}".format(slugify(self.user), slugify(self.name)))
 311
 312
 313 class Designs(Grouping):
 314     """ Holds details of all of a users' designs. """
 315
 316     def __init__(self, user, directory, quick, compress):
 317         Grouping.__init__(self, quick, compress)
 318         self.user = user
 319         self.url = API_USER_DESIGNS.format(user)
 320         self.paginated = True
 321         self.download_dir = os.path.join(
 322             directory, "{} designs".format(slugify(self.user)))
 323
 324
 325 class Thing:
 326     """ An individual design on thingiverse. """
 327
 328     def __init__(self, thing_link):
 329         self.thing_id = thing_link.thing_id
 330         self.name = thing_link.name
 331         self.last_time = None
 332         self._parsed = False
 333         self._needs_download = True
 334         self.text = None
 335         self.download_dir = None
 336         self.time_stamp = None
 337         self._file_links = FileLinks()
 338         self._image_links = []
 339
 340     @classmethod
 341     def from_thing_id(cls, thing_id):
 342         """
 343         Factory method that looks up a thing by ID and creates a Thing object for it
 344         :param thing_id: to look up
 345         :return: Thing or None
 346         """
 347         return Thing(ThingLink(thing_id, "", ""))
 348
 349
 350     def _parse(self, base_dir, api_key):
 351         """ Work out what, if anything needs to be done. """
 352         if self._parsed:
 353             return
 354
 355         # First get the broad details
 356         url = API_THING_DETAILS.format(self.thing_id, api_key)
 357         logging.error(url)
 358         try:
 359             current_req = SESSION.get(url)
 360         except requests.exceptions.ConnectionError as error:
 361             logging.error("Unable to connect for thing {}: {}".format(
 362                 self.thing_id, error))
 363             return
 364         # Check for DMCA
 365         if current_req.status_code == 403:
 366             logging.error("Access to thing {} is forbidden".format(self.thing_id))
 367             return
 368         if current_req.status_code != 200:
 369             logging.error("Got unexpected code {} from url {}: {}".format(current_req.status_code, sanitise_url(url),
 370                                                                           current_req.text))
 371             return
 372
 373         thing_json = current_req.json()
 374         try:
 375             self._license = thing_json['license']
 376         except KeyError:
 377             logging.warning("No license found for thing {}?".format(self.thing_id))
 378
 379         # TODO: Get non-html version of this?
 380         try:
 381             self._details = thing_json['details']
 382         except KeyError:
 383             logging.warning("No description found for thing {}?".format(self.thing_id))
 384
 385         if not self.name:
 386             # Probably generated with factory method.
 387             try:
 388                 self.name = thing_json['name']
 389             except KeyError:
 390                 logging.warning("No name found for thing {}?".format(self.thing_id))
 391                 self.name = self.thing_id
 392
 393         # Now get the file details
 394         file_url = API_THING_FILES.format(self.thing_id, api_key)
 395
 396         try:
 397             current_req = SESSION.get(file_url)
 398         except requests.exceptions.ConnectionError as error:
 399             logging.error("Unable to connect for thing {}: {}".format(
 400                 self.thing_id, error))
 401             return
 402
 403         if current_req.status_code != 200:
 404             logging.error("Unexpected status code {} for {}: {}".format(current_req.status_code, sanitise_url(file_url),
 405                                                                         current_req.text))
 406             return
 407
 408         link_list = current_req.json()
 409
 410         if not link_list:
 411             logging.error("No files found for thing {} - probably thingiverse being broken, try again later".format(
 412                 self.thing_id))
 413
 414         for link in link_list:
 415             logging.debug("Parsing link: {}".format(sanitise_url(link['url'])))
 416             try:
 417                 datestamp = datetime.datetime.strptime(link['date'], DEFAULT_DATETIME_FORMAT)
 418                 self._file_links.append(
 419                     FileLink(link['name'], datestamp, link['url'] + API_THING_DOWNLOAD.format(api_key)))
 420             except ValueError:
 421                 logging.error(link['date'])
 422
 423         # Finally get the image links
 424         image_url = API_THING_IMAGES.format(self.thing_id, api_key)
 425
 426         try:
 427             current_req = SESSION.get(image_url)
 428         except requests.exceptions.ConnectionError as error:
 429             logging.error("Unable to connect for thing {}: {}".format(
 430                 self.thing_id, error))
 431             return
 432
 433         if current_req.status_code != 200:
 434             logging.error(
 435                 "Unexpected status code {} for {}: {}".format(current_req.status_code, sanitise_url(image_url),
 436                                                               current_req.text))
 437             return
 438
 439         image_list = current_req.json()
 440
 441         if not image_list:
 442             logging.warning(
 443                 "No images found for thing {} - probably thingiverse being iffy as this seems unlikely".format(
 444                     self.thing_id))
 445
 446         for image in image_list:
 447             logging.debug("parsing image: {}".format(image))
 448             name = None
 449             try:
 450                 name = slugify(image['name'])
 451                 # TODO: fallback to other types
 452                 url = [x for x in image['sizes'] if x['type'] == 'display' and x['size'] == 'large'][0]['url']
 453             except KeyError:
 454                 logging.warning("Missing image for {}".format(name))
 455             self._image_links.append(ImageLink(name, url))
 456
 457         self.slug = "{} - {}".format(self.thing_id, slugify(self.name))
 458         self.download_dir = os.path.join(base_dir, self.slug)
 459
 460         self._handle_old_directory(base_dir)
 461
 462         logging.debug("Parsing {} ({})".format(self.thing_id, self.name))
 463         latest, self.last_time = self._find_last_download(base_dir)
 464
 465         if not latest:
 466             # Not yet downloaded
 467             self._parsed = True
 468             return
 469
 470         logging.info("last downloaded version: {}".format(self.last_time))
 471
 472         # OK, so we have a timestamp, lets see if there is anything new to get
 473         # First off, are we comparing an old download that threw away the timestamp?
 474         ignore_time = self.last_time == strip_time(self.last_time)
 475         try:
 476             # TODO: Allow for comparison at the exact time
 477             files_last_update = self._file_links.last_update
 478             if ignore_time:
 479                 logging.info("Dropping time from comparison stamp as old-style download dir")
 480                 files_last_update = strip_time(files_last_update)
 481
 482             if files_last_update > self.last_time:
 483                 logging.info(
 484                     "Found new/updated files {}".format(self._file_links.last_update))
 485                 self._needs_download = True
 486                 self._parsed = True
 487                 return
 488         except TypeError:
 489             logging.warning("No files found for {}.".format(self.thing_id))
 490
 491         # Got here, so nope, no new files.
 492         self._needs_download = False
 493         self._parsed = True
 494
 495     def _handle_old_directory(self, base_dir):
 496         """ Deal with any old directories from previous versions of the code.
 497         """
 498         old_dir = os.path.join(base_dir, slugify(self.name))
 499         if os.path.exists(old_dir):
 500             logging.warning("Found old style download_dir. Moving.")
 501             rename_unique(old_dir, self.download_dir)
 502
 503     def _handle_outdated_directory(self):
 504         """ Move the current download directory sideways if the thing has changed.
 505         """
 506         if not os.path.exists(self.download_dir):
 507             # No old directory to move.
 508             return None
 509         timestamp_file = os.path.join(self.download_dir, TIMESTAMP_FILE)
 510         if not os.path.exists(timestamp_file):
 511             # Old form of download directory
 512             target_dir_name = "{} - old".format(self.download_dir)
 513         else:
 514             target_dir_name = "{} - {}".format(self.download_dir, self.last_time.strftime(SAFE_DATETIME_FORMAT))
 515         return rename_unique(self.download_dir, target_dir_name)
 516
 517     def _find_last_download(self, base_dir):
 518         """ Look for the most recent previous download (if any) of the thing.
 519         """
 520         logging.info("Looking for old things")
 521
 522         # First the DL directory itself.
 523         timestamp_file = os.path.join(self.download_dir, TIMESTAMP_FILE)
 524
 525         latest = None
 526         latest_time = None
 527
 528         try:
 529             logging.debug("Checking for existing download in normal place.")
 530             with open(timestamp_file) as ts_fh:
 531                 timestamp_text = ts_fh.read().strip()
 532             latest_time = datetime.datetime.strptime(timestamp_text, DEFAULT_DATETIME_FORMAT)
 533             latest = self.download_dir
 534         except FileNotFoundError:
 535             # No existing download directory. huh.
 536             pass
 537         except TypeError:
 538             logging.warning("Invalid timestamp file found in {}".format(self.download_dir))
 539
 540         # TODO:  Maybe look for old download directories.
 541
 542         # Now look for 7z files
 543         candidates = glob.glob(os.path.join(base_dir, "{}*.7z".format(self.thing_id)))
 544         # +3 to allow for ' - '
 545         leading_length = len(self.slug) + 3
 546         for path in candidates:
 547             candidate = os.path.basename(path)
 548             try:
 549                 logging.debug("Examining '{}' - '{}'".format(candidate, candidate[leading_length:-3]))
 550                 candidate_time = datetime.datetime.strptime(candidate[leading_length:-3], SAFE_DATETIME_FORMAT)
 551             except ValueError:
 552                 logging.warning("There was an error finding the date in {}. Ignoring.".format(candidate))
 553                 continue
 554             try:
 555                 if candidate_time > latest_time:
 556                     latest_time = candidate_time
 557                     latest = candidate
 558             except TypeError:
 559                 latest_time = candidate_time
 560                 latest = candidate
 561         logging.info("Found last old thing: {} / {}".format(latest, latest_time))
 562         return latest, latest_time
 563
 564     def download(self, base_dir, compress, api_key):
 565         """ Download all files for a given thing.
 566             Returns True iff the thing is now downloaded (not iff it downloads the thing!)
 567         """
 568         if not self._parsed:
 569             self._parse(base_dir, api_key)
 570
 571         if not self._parsed:
 572             logging.error(
 573                 "Unable to parse {} - aborting download".format(self.thing_id))
 574             return State.FAILED
 575
 576         if not self._needs_download:
 577             logging.info("{} - {} already downloaded - skipping.".format(self.thing_id, self.name))
 578             return State.ALREADY_DOWNLOADED
 579
 580         if not self._file_links:
 581             logging.error(
 582                 "{} - {} appears to have no files. Thingiverse acting up again?".format(self.thing_id, self.name))
 583             return State.FAILED
 584
 585         # Have we already downloaded some things?
 586         renamed_dir = self._handle_outdated_directory()
 587
 588         # Get the list of files to download
 589
 590         new_file_links = []
 591         old_file_links = []
 592         self.time_stamp = None
 593
 594         if not self.last_time:
 595             # If we don't have anything to copy from, then it is all new.
 596             logging.debug("No last time, downloading all files")
 597             new_file_links = self._file_links
 598             self.time_stamp = new_file_links[0].last_update
 599
 600             for file_link in new_file_links:
 601                 self.time_stamp = max(self.time_stamp, file_link.last_update)
 602             logging.debug("New timestamp will be {}".format(self.time_stamp))
 603         else:
 604             self.time_stamp = self.last_time
 605             for file_link in self._file_links:
 606                 if file_link.last_update > self.last_time:
 607                     new_file_links.append(file_link)
 608                     self.time_stamp = max(self.time_stamp, file_link.last_update)
 609                 else:
 610                     old_file_links.append(file_link)
 611
 612         logging.debug("new timestamp {}".format(self.time_stamp))
 613
 614         # OK. Time to get to work.
 615         logging.debug("Generating download_dir")
 616         os.mkdir(self.download_dir)
 617         filelist_file = os.path.join(self.download_dir, "filelist.txt")
 618         logging.error("\nd:{}\nf:{}".format(self.download_dir, filelist_file))
 619         with open(filelist_file, 'w', encoding="utf-8") as fl_handle:
 620             for fl in self._file_links:
 621                 fl_handle.write("{},{},{}\n".format(fl.link, fl.name, fl.last_update))
 622
 623         # First grab the cached files (if any)
 624         logging.info("Copying {} unchanged files.".format(len(old_file_links)))
 625         if renamed_dir:
 626             for file_link in old_file_links:
 627                 try:
 628                     old_file = os.path.join(renamed_dir, file_link.name)
 629                     new_file = truncate_name(os.path.join(self.download_dir, file_link.name))
 630                     logging.debug("Copying {} to {}".format(old_file, new_file))
 631                     copyfile(old_file, new_file)
 632                 except FileNotFoundError:
 633                     logging.warning(
 634                         "Unable to find {} in old archive, redownloading".format(file_link.name))
 635                     new_file_links.append(file_link)
 636                 except TypeError:
 637                     # Not altogether sure how this could occur, possibly with some combination of the old file types
 638                     logging.warning(
 639                         "Typeerror looking for {} in {}".format(file_link.name, renamed_dir))
 640                     new_file_links.append(file_link)
 641
 642         # Now download the new ones
 643         logging.info("Downloading {} new files of {}".format(
 644             len(new_file_links), len(self._file_links)))
 645         try:
 646             for file_link in new_file_links:
 647                 file_name = truncate_name(os.path.join(self.download_dir, file_link.name))
 648                 logging.debug("Downloading {} from {} to {}".format(
 649                     file_link.name, file_link.link, file_name))
 650                 data_req = SESSION.get(file_link.link)
 651                 if data_req.status_code != 200:
 652                     logging.error("Unexpected status code {} for {}: {}".format(data_req.status_code,
 653                                                                                 sanitise_url(file_link.link),
 654                                                                                 data_req.text))
 655                     fail_dir(self.download_dir)
 656                     return State.FAILED
 657
 658                 with open(file_name, 'wb') as handle:
 659                     handle.write(data_req.content)
 660         except Exception as exception:
 661             logging.error("Failed to download {} - {}".format(file_link.name, exception))
 662             fail_dir(self.download_dir)
 663             return State.FAILED
 664
 665         # People like images.
 666         image_dir = os.path.join(self.download_dir, 'images')
 667         logging.info("Downloading {} images.".format(len(self._image_links)))
 668         try:
 669             os.mkdir(image_dir)
 670             for imagelink in self._image_links:
 671                 filename = os.path.join(image_dir, imagelink.name)
 672                 image_req = SESSION.get(imagelink.link)
 673                 if image_req.status_code != 200:
 674                     logging.error("Unexpected status code {} for {}: {}".format(image_req.status_code,
 675                                                                                 sanitise_url(imagelink.link),
 676                                                                                 image_req.text))
 677                     fail_dir(self.download_dir)
 678                     return State.FAILED
 679                 with open(truncate_name(filename), 'wb') as handle:
 680                     handle.write(image_req.content)
 681         except Exception as exception:
 682             logging.error("Failed to download {} - {}".format(imagelink.name, exception))
 683             fail_dir(self.download_dir)
 684             return State.FAILED
 685
 686         # Best get some licenses
 687         logging.info("writing license file")
 688         try:
 689             if self._license:
 690                 with open(truncate_name(os.path.join(self.download_dir, 'license.txt')), 'w',
 691                           encoding="utf-8") as license_handle:
 692                     license_handle.write("{}\n".format(self._license))
 693         except IOError as exception:
 694             logging.warning("Failed to write license! {}".format(exception))
 695
 696         logging.info("writing readme")
 697         try:
 698             if self._details:
 699                 with open(truncate_name(os.path.join(self.download_dir, 'readme.txt')), 'w',
 700                           encoding="utf-8") as readme_handle:
 701                     readme_handle.write("{}\n".format(self._details))
 702         except IOError as exception:
 703             logging.warning("Failed to write readme! {}".format(exception))
 704
 705         try:
 706             # Now write the timestamp
 707             with open(os.path.join(self.download_dir, TIMESTAMP_FILE), 'w', encoding="utf-8") as timestamp_handle:
 708                 timestamp_handle.write(self.time_stamp.__str__())
 709         except Exception as exception:
 710             logging.error("Failed to write timestamp file - {}".format(exception))
 711             fail_dir(self.download_dir)
 712             return State.FAILED
 713         self._needs_download = False
 714         logging.debug("Download of {} finished".format(self.name))
 715         if not compress:
 716             return State.OK
 717
 718         thing_dir = "{} - {} - {}".format(self.thing_id,
 719                                           slugify(self.name),
 720                                           self.time_stamp.strftime(SAFE_DATETIME_FORMAT))
 721         file_name = os.path.join(base_dir,
 722                                  "{}.7z".format(thing_dir))
 723         logging.debug("Compressing {} to {}".format(
 724             self.name,
 725             file_name))
 726         with py7zr.SevenZipFile(file_name, 'w', filters=SEVENZIP_FILTERS) as archive:
 727             archive.writeall(self.download_dir, thing_dir)
 728         logging.debug("Compression of {} finished.".format(self.name))
 729         shutil.rmtree(self.download_dir)
 730         logging.debug("Removed temporary download dir of {}.".format(self.name))
 731         return State.OK
 732
 733
 734 def do_batch(batch_file, download_dir, quick, compress):
 735     """ Read a file in line by line, parsing each as a set of calls to this script."""
 736     with open(batch_file) as handle:
 737         for line in handle:
 738             line = line.strip()
 739             if not line:
 740                 # Skip empty lines
 741                 continue
 742             logging.info("Handling instruction {}".format(line))
 743             command_arr = line.split()
 744             if command_arr[0] == "thing":
 745                 logging.debug(
 746                     "Handling batch thing instruction: {}".format(line))
 747                 Thing.from_thing_id(command_arr[1]).download(download_dir, compress)
 748                 continue
 749             if command_arr[0] == "collection":
 750                 logging.debug(
 751                     "Handling batch collection instruction: {}".format(line))
 752                 Collection(command_arr[1], command_arr[2],
 753                            download_dir, quick, compress).download()
 754                 continue
 755             if command_arr[0] == "user":
 756                 logging.debug(
 757                     "Handling batch collection instruction: {}".format(line))
 758                 Designs(command_arr[1], download_dir, quick, compress).download()
 759                 continue
 760             logging.warning("Unable to parse current instruction. Skipping.")
 761
 762
 763 def main():
 764     """ Entry point for script being run as a command. """
 765     parser = argparse.ArgumentParser()
 766     parser.add_argument("-l", "--log-level", choices=[
 767         'debug', 'info', 'warning'], default='info', help="level of logging desired")
 768     parser.add_argument("-d", "--directory",
 769                         help="Target directory to download into")
 770     parser.add_argument("-f", "--log-file",
 771                         help="Place to log debug information to")
 772     parser.add_argument("-q", "--quick", action="store_true",
 773                         help="Assume date ordering on posts")
 774     parser.add_argument("-c", "--compress", action="store_true",
 775                         help="Compress files")
 776     parser.add_argument("-a", "--api-key",
 777                         help="API key for thingiverse")
 778
 779     subparsers = parser.add_subparsers(
 780         help="Type of thing to download", dest="subcommand")
 781     collection_parser = subparsers.add_parser(
 782         'collection', help="Download one or more entire collection(s)")
 783     collection_parser.add_argument(
 784         "owner", help="The owner of the collection(s) to get")
 785     collection_parser.add_argument(
 786         "collections", nargs="+", help="Space seperated list of the name(s) of collection to get")
 787     thing_parser = subparsers.add_parser(
 788         'thing', help="Download a single thing.")
 789     thing_parser.add_argument(
 790         "things", nargs="*", help="Space seperated list of thing ID(s) to download")
 791     user_parser = subparsers.add_parser(
 792         "user", help="Download all things by one or more users")
 793     user_parser.add_argument(
 794         "users", nargs="+", help="A space seperated list of the user(s) to get the designs of")
 795     batch_parser = subparsers.add_parser(
 796         "batch", help="Perform multiple actions written in a text file")
 797     batch_parser.add_argument(
 798         "batch_file", help="The name of the file to read.")
 799     subparsers.add_parser("version", help="Show the current version")
 800
 801     args = parser.parse_args()
 802     if not args.subcommand:
 803         parser.print_help()
 804         sys.exit(1)
 805     if not args.directory:
 806         args.directory = os.getcwd()
 807
 808     logger = logging.getLogger()
 809     formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
 810     logger.setLevel(logging.DEBUG)
 811     console_handler = logging.StreamHandler()
 812     console_handler.setLevel(args.log_level.upper())
 813
 814     global API_KEY
 815     if args.api_key:
 816         API_KEY = args.api_key
 817     else:
 818         try:
 819             with open("api.key") as fh:
 820                 API_KEY = fh.read().strip()
 821         except Exception as e:
 822             logging.error("Either specify the api-key on the command line or in a file called 'api.key'")
 823             logging.error("Exception: {}".format(e))
 824             return
 825
 826     logger.addHandler(console_handler)
 827     if args.log_file:
 828         file_handler = logging.FileHandler(args.log_file)
 829         file_handler.setLevel(logging.DEBUG)
 830         file_handler.setFormatter(formatter)
 831         logger.addHandler(file_handler)
 832
 833     # Start downloader
 834     thing_queue = multiprocessing.JoinableQueue()
 835     logging.debug("starting {} downloader(s)".format(DOWNLOADER_COUNT))
 836     downloaders = [Downloader(thing_queue, args.directory, args.compress, API_KEY) for _ in range(DOWNLOADER_COUNT)]
 837     for downloader in downloaders:
 838         downloader.start()
 839
 840     if args.subcommand.startswith("collection"):
 841         for collection in args.collections:
 842             Collection(args.owner, collection, args.directory, args.quick, args.compress).download()
 843     if args.subcommand == "thing":
 844         for thing in args.things:
 845             thing_queue.put(thing)
 846     if args.subcommand == "user":
 847         for user in args.users:
 848             Designs(user, args.directory, args.quick, args.compress).download()
 849     if args.subcommand == "version":
 850         print("thingy_grabber.py version {}".format(VERSION))
 851     if args.subcommand == "batch":
 852         do_batch(args.batch_file, args.directory, args.quick, args.compress)
 853
 854     # Stop the downloader processes
 855     for _ in downloaders:
 856         thing_queue.put(None)
 857
 858
 859 if __name__ == "__main__":
 860     multiprocessing.freeze_support()
 861     main()