thingy_grabber.py

   1 #!/usr/bin/env python3
   2 """
   3 Thingiverse bulk downloader
   4 """
   5
   6 import re
   7 import sys
   8 import os
   9 import argparse
  10 import unicodedata
  11 import requests
  12 import logging
  13 import multiprocessing
  14 import enum
  15 import datetime
  16 from shutil import copyfile
  17 from dataclasses import dataclass
  18 import py7zr
  19 import glob
  20 import shutil
  21
  22 SEVENZIP_FILTERS = [{'id': py7zr.FILTER_LZMA2}]
  23
  24 # I don't think this is exported by datetime
  25 DEFAULT_DATETIME_FORMAT = '%Y-%m-%d %H:%M:%S'
  26 # Windows cannot handle : in filenames
  27 SAFE_DATETIME_FORMAT = '%Y-%m-%d %H.%M.%S'
  28
  29 API_BASE = "https://api.thingiverse.com"
  30 ACCESS_QP = "access_token={}"
  31 PAGE_QP = "page={}"
  32 API_USER_DESIGNS = API_BASE + "/users/{}/things/?" + ACCESS_QP
  33 API_USER_COLLECTIONS = API_BASE + "/users/{}/collections/all?" + ACCESS_QP
  34
  35 # Currently useless as it gives the same info as the matching element in API_USER_COLLECTIONS
  36 API_COLLECTION = API_BASE + "/collections/{}/?" + ACCESS_QP
  37 API_COLLECTION_THINGS = API_BASE + "/collections/{}/things/?" + ACCESS_QP
  38
  39 API_THING_DETAILS = API_BASE + "/things/{}/?" + ACCESS_QP
  40 API_THING_FILES = API_BASE + "/things/{}/files/?" + ACCESS_QP
  41 API_THING_IMAGES = API_BASE + "/things/{}/images/?" + ACCESS_QP
  42 API_THING_DOWNLOAD = "/download/?" + ACCESS_QP
  43
  44 DOWNLOADER_COUNT = 1
  45 RETRY_COUNT = 3
  46
  47 MAX_PATH_LENGTH = 250
  48
  49 VERSION = "0.10.3"
  50
  51 TIMESTAMP_FILE = "timestamp.txt"
  52
  53 SESSION = requests.Session()
  54
  55
  56 @dataclass
  57 class ThingLink:
  58     thing_id: str
  59     name: str
  60     api_link: str
  61
  62
  63 @dataclass
  64 class FileLink:
  65     name: str
  66     last_update: datetime.datetime
  67     link: str
  68
  69
  70 @dataclass
  71 class ImageLink:
  72     name: str
  73     link: str
  74
  75
  76 class FileLinks:
  77     def __init__(self, initial_links=None):
  78         if initial_links is None:
  79             initial_links = []
  80         self.links = []
  81         self.last_update = None
  82         for link in initial_links:
  83             self.append(link)
  84
  85     def __iter__(self):
  86         return iter(self.links)
  87
  88     def __getitem__(self, item):
  89         return self.links[item]
  90
  91     def __len__(self):
  92         return len(self.links)
  93
  94     def append(self, link):
  95         try:
  96             self.last_update = max(self.last_update, link.last_update)
  97         except TypeError:
  98             self.last_update = link.last_update
  99         self.links.append(link)
 100
 101
 102 class State(enum.Enum):
 103     OK = enum.auto()
 104     FAILED = enum.auto()
 105     ALREADY_DOWNLOADED = enum.auto()
 106
 107
 108 def sanitise_url(url):
 109     """ remove api keys from an url
 110     """
 111     return re.sub(r'access_token=\w*',
 112                   'access_token=***',
 113                   url)
 114
 115
 116 def strip_time(date_obj):
 117     """ Takes a datetime object and returns another with the time set to 00:00
 118     """
 119     return datetime.datetime.combine(date_obj.date(), datetime.time())
 120
 121
 122 def rename_unique(dir_name, target_dir_name):
 123     """ Move a directory sideways to a new name, ensuring it is unique.
 124     """
 125     target_dir = target_dir_name
 126     inc = 0
 127     while os.path.exists(target_dir):
 128         target_dir = "{}_{}".format(target_dir_name, inc)
 129         inc += 1
 130     os.rename(dir_name, target_dir)
 131     return target_dir
 132
 133
 134 def fail_dir(dir_name):
 135     """ When a download has failed, move it sideways.
 136     """
 137     return rename_unique(dir_name, "{}_failed".format(dir_name))
 138
 139
 140 def truncate_name(file_name):
 141     """ Ensure the filename is not too long for, well windows basically.
 142     """
 143     path = os.path.abspath(file_name)
 144     if len(path) <= MAX_PATH_LENGTH:
 145         return path
 146     base, extension = os.path.splitext(path)
 147     inc = 0
 148     new_path = "{}_{}{}".format(base, inc, extension)
 149     while os.path.exists(new_path):
 150         new_path = "{}_{}{}".format(base, inc, extension)
 151         inc += 1
 152     return new_path
 153
 154
 155 def slugify(value):
 156     """
 157     Normalise string, removes invalid for filename charactersr
 158     and converts string to lowercase.
 159     """
 160     logging.debug("Sluggyfying {}".format(value))
 161     value = unicodedata.normalize('NFKC', value).lower().strip()
 162     value = re.sub(r'[\\/<>:?*|"]', '', value)
 163     value = re.sub(r'\.*$', '', value)
 164     return value.strip()
 165
 166
 167 class Downloader(multiprocessing.Process):
 168     """
 169     Class to handle downloading the things we have found to get.
 170     """
 171
 172     def __init__(self, thing_queue, download_directory, compress, api_key):
 173         multiprocessing.Process.__init__(self)
 174         # TODO: add parameters
 175         self.thing_queue = thing_queue
 176         self.download_directory = download_directory
 177         self.compress = compress
 178         self.api_key = api_key
 179
 180     def run(self):
 181         """ actual download loop.
 182         """
 183         while True:
 184             thing_id = self.thing_queue.get()
 185             if thing_id is None:
 186                 logging.info("Shutting download queue")
 187                 self.thing_queue.task_done()
 188                 break
 189             thing = None
 190             if isinstance(thing_id, str):
 191                 thing = Thing.from_thing_id(thing_id)
 192             if isinstance(thing_id, ThingLink):
 193                 thing = Thing(thing_id)
 194             if not thing:
 195                 logging.error("Don't know how to handle thing_id {}".format(thing_id))
 196             else:
 197                 logging.info("Handling id {}".format(thing_id))
 198                 thing.download(self.download_directory, self.compress, self.api_key)
 199             self.thing_queue.task_done()
 200         return
 201
 202
 203 class Grouping:
 204     """ Holds details of a group of things for download
 205         This is effectively (although not actually) an abstract class
 206         - use Collection or Designs instead.
 207     """
 208
 209     def __init__(self, quick, compress, api_key):
 210         self.things = []
 211         self.total = 0
 212         self.req_id = None
 213         self.last_page = 0
 214         self.per_page = None
 215         # Should we stop downloading when we hit a known datestamp?
 216         self.quick = quick
 217         self.compress = compress
 218         self.api_key = api_key
 219         # These should be set by child classes.
 220         self.url = None
 221         self.download_dir = None
 222
 223
 224     @property
 225     def get(self):
 226         """ retrieve the things of the grouping. """
 227         if self.things:
 228             # We've already done it.
 229             return self.things
 230
 231         # Check for initialisation:
 232         if not self.url:
 233             logging.error("No URL set - object not initialised properly?")
 234             raise ValueError("No URL set - object not initialised properly?")
 235
 236         # Get the internal details of the grouping.
 237         logging.debug("Querying {}".format(sanitise_url(self.url)))
 238
 239         # self.url should already have been formatted as we don't need pagination
 240         logging.info("requesting:{}".format(sanitise_url(self.url)))
 241         current_req = SESSION.get(self.url)
 242         if current_req.status_code != 200:
 243             logging.error(
 244                 "Got unexpected code {} from url {}: {}".format(current_req.status_code, sanitise_url(self.url),
 245                                                                 current_req.text))
 246         else:
 247             current_json = current_req.json()
 248             for thing in current_json:
 249                 logging.info(thing)
 250                 self.things.append(ThingLink(thing['id'], thing['name'], thing['url']))
 251         logging.info("Found {} things.".format(len(self.things)))
 252         return self.things
 253
 254     def download(self):
 255         """ Downloads all the files in a collection """
 256         if not self.things:
 257             self.get
 258
 259         if not self.download_dir:
 260             raise ValueError(
 261                 "No download_dir set - invalidly initialised object?")
 262
 263         base_dir = os.getcwd()
 264         try:
 265             os.mkdir(self.download_dir)
 266         except FileExistsError:
 267             logging.info("Target directory {} already exists. Assuming a resume."
 268                          .format(self.download_dir))
 269         logging.info("Downloading {} thing(s).".format(self.total))
 270         for idx, thing in enumerate(self.things):
 271             logging.info("Downloading thing {} - {}".format(idx, thing))
 272             return_code = Thing(thing).download(self.download_dir, self.compress, self.api_key)
 273             if self.quick and return_code == State.ALREADY_DOWNLOADED:
 274                 logging.info("Caught up, stopping.")
 275                 return
 276
 277
 278 class Collection(Grouping):
 279     """ Holds details of a collection. """
 280
 281     def __init__(self, user, name, directory, quick, compress, api_key):
 282         Grouping.__init__(self, quick, compress, api_key)
 283         self.user = user
 284         self.name = name
 285         self.paginated = False
 286         # need to figure out the the ID for the collection
 287         collection_url = API_USER_COLLECTIONS.format(user, api_key)
 288         try:
 289             current_req = SESSION.get(collection_url)
 290         except requests.exceptions.ConnectionError as error:
 291             logging.error("Unable to connect for collections for user {}: {}".format(
 292                 self.user, error))
 293             return
 294         if current_req.status_code != 200:
 295             logging.error(
 296                 "Got unexpected code {} from url {}: {}".format(current_req.status_code, sanitise_url(collection_url),
 297                                                                 current_req.text))
 298             return
 299         collection_list = current_req.json()
 300         try:
 301             # case insensitive to retain parity with previous behaviour
 302             collection = [x for x in collection_list if x['name'].casefold() == name.casefold()][0]
 303         except IndexError:
 304             logging.error("Unable to find collection {} for user {}".format(name, user))
 305             return
 306         self.collection_id = collection['id']
 307         self.url = API_COLLECTION_THINGS.format(self.collection_id, api_key)
 308
 309         self.download_dir = os.path.join(directory,
 310                                          "{}-{}".format(slugify(self.user), slugify(self.name)))
 311
 312
 313 class Designs(Grouping):
 314     """ Holds details of all of a users' designs. """
 315
 316     def __init__(self, user, directory, quick, compress, api_key):
 317         Grouping.__init__(self, quick, compress, api_key)
 318         self.user = user
 319         self.url = API_USER_DESIGNS.format(user, api_key)
 320         self.download_dir = os.path.join(
 321             directory, "{} designs".format(slugify(self.user)))
 322
 323
 324 class Thing:
 325     """ An individual design on thingiverse. """
 326
 327     def __init__(self, thing_link):
 328         self.thing_id = thing_link.thing_id
 329         self.name = thing_link.name
 330         self.last_time = None
 331         self._parsed = False
 332         self._needs_download = True
 333         self.text = None
 334         self.download_dir = None
 335         self.time_stamp = None
 336         self._file_links = FileLinks()
 337         self._image_links = []
 338
 339     @classmethod
 340     def from_thing_id(cls, thing_id):
 341         """
 342         Factory method that looks up a thing by ID and creates a Thing object for it
 343         :param thing_id: to look up
 344         :return: Thing or None
 345         """
 346         return Thing(ThingLink(thing_id, "", ""))
 347
 348
 349     def _parse(self, base_dir, api_key):
 350         """ Work out what, if anything needs to be done. """
 351         if self._parsed:
 352             return
 353
 354         # First get the broad details
 355         url = API_THING_DETAILS.format(self.thing_id, api_key)
 356         logging.error(url)
 357         try:
 358             current_req = SESSION.get(url)
 359         except requests.exceptions.ConnectionError as error:
 360             logging.error("Unable to connect for thing {}: {}".format(
 361                 self.thing_id, error))
 362             return
 363         # Check for DMCA
 364         if current_req.status_code == 403:
 365             logging.error("Access to thing {} is forbidden".format(self.thing_id))
 366             return
 367         if current_req.status_code != 200:
 368             logging.error("Got unexpected code {} from url {}: {}".format(current_req.status_code, sanitise_url(url),
 369                                                                           current_req.text))
 370             return
 371
 372         thing_json = current_req.json()
 373         try:
 374             self._license = thing_json['license']
 375         except KeyError:
 376             logging.warning("No license found for thing {}?".format(self.thing_id))
 377
 378         # TODO: Get non-html version of this?
 379         try:
 380             self._details = thing_json['details']
 381         except KeyError:
 382             logging.warning("No description found for thing {}?".format(self.thing_id))
 383
 384         if not self.name:
 385             # Probably generated with factory method.
 386             try:
 387                 self.name = thing_json['name']
 388             except KeyError:
 389                 logging.warning("No name found for thing {}?".format(self.thing_id))
 390                 self.name = self.thing_id
 391
 392         # Now get the file details
 393         file_url = API_THING_FILES.format(self.thing_id, api_key)
 394
 395         try:
 396             current_req = SESSION.get(file_url)
 397         except requests.exceptions.ConnectionError as error:
 398             logging.error("Unable to connect for thing {}: {}".format(
 399                 self.thing_id, error))
 400             return
 401
 402         if current_req.status_code != 200:
 403             logging.error("Unexpected status code {} for {}: {}".format(current_req.status_code, sanitise_url(file_url),
 404                                                                         current_req.text))
 405             return
 406
 407         link_list = current_req.json()
 408
 409         if not link_list:
 410             logging.error("No files found for thing {} - probably thingiverse being broken, try again later".format(
 411                 self.thing_id))
 412
 413         for link in link_list:
 414             logging.debug("Parsing link: {}".format(sanitise_url(link['url'])))
 415             try:
 416                 datestamp = datetime.datetime.strptime(link['date'], DEFAULT_DATETIME_FORMAT)
 417                 self._file_links.append(
 418                     FileLink(link['name'], datestamp, link['url'] + API_THING_DOWNLOAD.format(api_key)))
 419             except ValueError:
 420                 logging.error(link['date'])
 421
 422         # Finally get the image links
 423         image_url = API_THING_IMAGES.format(self.thing_id, api_key)
 424
 425         try:
 426             current_req = SESSION.get(image_url)
 427         except requests.exceptions.ConnectionError as error:
 428             logging.error("Unable to connect for thing {}: {}".format(
 429                 self.thing_id, error))
 430             return
 431
 432         if current_req.status_code != 200:
 433             logging.error(
 434                 "Unexpected status code {} for {}: {}".format(current_req.status_code, sanitise_url(image_url),
 435                                                               current_req.text))
 436             return
 437
 438         image_list = current_req.json()
 439
 440         if not image_list:
 441             logging.warning(
 442                 "No images found for thing {} - probably thingiverse being iffy as this seems unlikely".format(
 443                     self.thing_id))
 444
 445         for image in image_list:
 446             logging.debug("parsing image: {}".format(image))
 447             name = None
 448             try:
 449                 name = slugify(image['name'])
 450                 # TODO: fallback to other types
 451                 url = [x for x in image['sizes'] if x['type'] == 'display' and x['size'] == 'large'][0]['url']
 452             except KeyError:
 453                 logging.warning("Missing image for {}".format(name))
 454             self._image_links.append(ImageLink(name, url))
 455
 456         self.slug = "{} - {}".format(self.thing_id, slugify(self.name))
 457         self.download_dir = os.path.join(base_dir, self.slug)
 458
 459         self._handle_old_directory(base_dir)
 460
 461         logging.debug("Parsing {} ({})".format(self.thing_id, self.name))
 462         latest, self.last_time = self._find_last_download(base_dir)
 463
 464         if not latest:
 465             # Not yet downloaded
 466             self._parsed = True
 467             return
 468
 469         logging.info("last downloaded version: {}".format(self.last_time))
 470
 471         # OK, so we have a timestamp, lets see if there is anything new to get
 472         # First off, are we comparing an old download that threw away the timestamp?
 473         ignore_time = self.last_time == strip_time(self.last_time)
 474         try:
 475             # TODO: Allow for comparison at the exact time
 476             files_last_update = self._file_links.last_update
 477             if ignore_time:
 478                 logging.info("Dropping time from comparison stamp as old-style download dir")
 479                 files_last_update = strip_time(files_last_update)
 480
 481             if files_last_update > self.last_time:
 482                 logging.info(
 483                     "Found new/updated files {}".format(self._file_links.last_update))
 484                 self._needs_download = True
 485                 self._parsed = True
 486                 return
 487         except TypeError:
 488             logging.warning("No files found for {}.".format(self.thing_id))
 489
 490         # Got here, so nope, no new files.
 491         self._needs_download = False
 492         self._parsed = True
 493
 494     def _handle_old_directory(self, base_dir):
 495         """ Deal with any old directories from previous versions of the code.
 496         """
 497         old_dir = os.path.join(base_dir, slugify(self.name))
 498         if os.path.exists(old_dir):
 499             logging.warning("Found old style download_dir. Moving.")
 500             rename_unique(old_dir, self.download_dir)
 501
 502     def _handle_outdated_directory(self):
 503         """ Move the current download directory sideways if the thing has changed.
 504         """
 505         if not os.path.exists(self.download_dir):
 506             # No old directory to move.
 507             return None
 508         timestamp_file = os.path.join(self.download_dir, TIMESTAMP_FILE)
 509         if not os.path.exists(timestamp_file):
 510             # Old form of download directory
 511             target_dir_name = "{} - old".format(self.download_dir)
 512         else:
 513             target_dir_name = "{} - {}".format(self.download_dir, self.last_time.strftime(SAFE_DATETIME_FORMAT))
 514         return rename_unique(self.download_dir, target_dir_name)
 515
 516     def _find_last_download(self, base_dir):
 517         """ Look for the most recent previous download (if any) of the thing.
 518         """
 519         logging.info("Looking for old things")
 520
 521         # First the DL directory itself.
 522         timestamp_file = os.path.join(self.download_dir, TIMESTAMP_FILE)
 523
 524         latest = None
 525         latest_time = None
 526
 527         try:
 528             logging.debug("Checking for existing download in normal place.")
 529             with open(timestamp_file) as ts_fh:
 530                 timestamp_text = ts_fh.read().strip()
 531             latest_time = datetime.datetime.strptime(timestamp_text, DEFAULT_DATETIME_FORMAT)
 532             latest = self.download_dir
 533         except FileNotFoundError:
 534             # No existing download directory. huh.
 535             pass
 536         except TypeError:
 537             logging.warning("Invalid timestamp file found in {}".format(self.download_dir))
 538
 539         # TODO:  Maybe look for old download directories.
 540
 541         # Now look for 7z files
 542         candidates = glob.glob(os.path.join(base_dir, "{}*.7z".format(self.thing_id)))
 543         # +3 to allow for ' - '
 544         leading_length = len(self.slug) + 3
 545         for path in candidates:
 546             candidate = os.path.basename(path)
 547             try:
 548                 logging.debug("Examining '{}' - '{}'".format(candidate, candidate[leading_length:-3]))
 549                 candidate_time = datetime.datetime.strptime(candidate[leading_length:-3], SAFE_DATETIME_FORMAT)
 550             except ValueError:
 551                 logging.warning("There was an error finding the date in {}. Ignoring.".format(candidate))
 552                 continue
 553             try:
 554                 if candidate_time > latest_time:
 555                     latest_time = candidate_time
 556                     latest = candidate
 557             except TypeError:
 558                 latest_time = candidate_time
 559                 latest = candidate
 560         logging.info("Found last old thing: {} / {}".format(latest, latest_time))
 561         return latest, latest_time
 562
 563     def download(self, base_dir, compress, api_key):
 564         """ Download all files for a given thing.
 565             Returns True iff the thing is now downloaded (not iff it downloads the thing!)
 566         """
 567         if not self._parsed:
 568             self._parse(base_dir, api_key)
 569
 570         if not self._parsed:
 571             logging.error(
 572                 "Unable to parse {} - aborting download".format(self.thing_id))
 573             return State.FAILED
 574
 575         if not self._needs_download:
 576             logging.info("{} - {} already downloaded - skipping.".format(self.thing_id, self.name))
 577             return State.ALREADY_DOWNLOADED
 578
 579         if not self._file_links:
 580             logging.error(
 581                 "{} - {} appears to have no files. Thingiverse acting up again?".format(self.thing_id, self.name))
 582             return State.FAILED
 583
 584         # Have we already downloaded some things?
 585         renamed_dir = self._handle_outdated_directory()
 586
 587         # Get the list of files to download
 588
 589         new_file_links = []
 590         old_file_links = []
 591         self.time_stamp = None
 592
 593         if not self.last_time:
 594             # If we don't have anything to copy from, then it is all new.
 595             logging.debug("No last time, downloading all files")
 596             new_file_links = self._file_links
 597             self.time_stamp = new_file_links[0].last_update
 598
 599             for file_link in new_file_links:
 600                 self.time_stamp = max(self.time_stamp, file_link.last_update)
 601             logging.debug("New timestamp will be {}".format(self.time_stamp))
 602         else:
 603             self.time_stamp = self.last_time
 604             for file_link in self._file_links:
 605                 if file_link.last_update > self.last_time:
 606                     new_file_links.append(file_link)
 607                     self.time_stamp = max(self.time_stamp, file_link.last_update)
 608                 else:
 609                     old_file_links.append(file_link)
 610
 611         logging.debug("new timestamp {}".format(self.time_stamp))
 612
 613         # OK. Time to get to work.
 614         logging.debug("Generating download_dir")
 615         os.mkdir(self.download_dir)
 616         filelist_file = os.path.join(self.download_dir, "filelist.txt")
 617         logging.error("\nd:{}\nf:{}".format(self.download_dir, filelist_file))
 618         with open(filelist_file, 'w', encoding="utf-8") as fl_handle:
 619             for fl in self._file_links:
 620                 fl_handle.write("{},{},{}\n".format(fl.link, fl.name, fl.last_update))
 621
 622         # First grab the cached files (if any)
 623         logging.info("Copying {} unchanged files.".format(len(old_file_links)))
 624         if renamed_dir:
 625             for file_link in old_file_links:
 626                 try:
 627                     old_file = os.path.join(renamed_dir, file_link.name)
 628                     new_file = truncate_name(os.path.join(self.download_dir, file_link.name))
 629                     logging.debug("Copying {} to {}".format(old_file, new_file))
 630                     copyfile(old_file, new_file)
 631                 except FileNotFoundError:
 632                     logging.warning(
 633                         "Unable to find {} in old archive, redownloading".format(file_link.name))
 634                     new_file_links.append(file_link)
 635                 except TypeError:
 636                     # Not altogether sure how this could occur, possibly with some combination of the old file types
 637                     logging.warning(
 638                         "Typeerror looking for {} in {}".format(file_link.name, renamed_dir))
 639                     new_file_links.append(file_link)
 640
 641         # Now download the new ones
 642         logging.info("Downloading {} new files of {}".format(
 643             len(new_file_links), len(self._file_links)))
 644         try:
 645             for file_link in new_file_links:
 646                 file_name = truncate_name(os.path.join(self.download_dir, file_link.name))
 647                 logging.debug("Downloading {} from {} to {}".format(
 648                     file_link.name, file_link.link, file_name))
 649                 data_req = SESSION.get(file_link.link)
 650                 if data_req.status_code != 200:
 651                     logging.error("Unexpected status code {} for {}: {}".format(data_req.status_code,
 652                                                                                 sanitise_url(file_link.link),
 653                                                                                 data_req.text))
 654                     fail_dir(self.download_dir)
 655                     return State.FAILED
 656
 657                 with open(file_name, 'wb') as handle:
 658                     handle.write(data_req.content)
 659         except Exception as exception:
 660             logging.error("Failed to download {} - {}".format(file_link.name, exception))
 661             fail_dir(self.download_dir)
 662             return State.FAILED
 663
 664         # People like images.
 665         image_dir = os.path.join(self.download_dir, 'images')
 666         logging.info("Downloading {} images.".format(len(self._image_links)))
 667         try:
 668             os.mkdir(image_dir)
 669             for imagelink in self._image_links:
 670                 filename = os.path.join(image_dir, imagelink.name)
 671                 image_req = SESSION.get(imagelink.link)
 672                 if image_req.status_code != 200:
 673                     logging.error("Unexpected status code {} for {}: {}".format(image_req.status_code,
 674                                                                                 sanitise_url(imagelink.link),
 675                                                                                 image_req.text))
 676                     fail_dir(self.download_dir)
 677                     return State.FAILED
 678                 with open(truncate_name(filename), 'wb') as handle:
 679                     handle.write(image_req.content)
 680         except Exception as exception:
 681             logging.error("Failed to download {} - {}".format(imagelink.name, exception))
 682             fail_dir(self.download_dir)
 683             return State.FAILED
 684
 685         # Best get some licenses
 686         logging.info("writing license file")
 687         try:
 688             if self._license:
 689                 with open(truncate_name(os.path.join(self.download_dir, 'license.txt')), 'w',
 690                           encoding="utf-8") as license_handle:
 691                     license_handle.write("{}\n".format(self._license))
 692         except IOError as exception:
 693             logging.warning("Failed to write license! {}".format(exception))
 694
 695         logging.info("writing readme")
 696         try:
 697             if self._details:
 698                 with open(truncate_name(os.path.join(self.download_dir, 'readme.txt')), 'w',
 699                           encoding="utf-8") as readme_handle:
 700                     readme_handle.write("{}\n".format(self._details))
 701         except IOError as exception:
 702             logging.warning("Failed to write readme! {}".format(exception))
 703
 704         try:
 705             # Now write the timestamp
 706             with open(os.path.join(self.download_dir, TIMESTAMP_FILE), 'w', encoding="utf-8") as timestamp_handle:
 707                 timestamp_handle.write(self.time_stamp.__str__())
 708         except Exception as exception:
 709             logging.error("Failed to write timestamp file - {}".format(exception))
 710             fail_dir(self.download_dir)
 711             return State.FAILED
 712         self._needs_download = False
 713         logging.debug("Download of {} finished".format(self.name))
 714         if not compress:
 715             return State.OK
 716
 717         thing_dir = "{} - {} - {}".format(self.thing_id,
 718                                           slugify(self.name),
 719                                           self.time_stamp.strftime(SAFE_DATETIME_FORMAT))
 720         file_name = os.path.join(base_dir,
 721                                  "{}.7z".format(thing_dir))
 722         logging.debug("Compressing {} to {}".format(
 723             self.name,
 724             file_name))
 725         with py7zr.SevenZipFile(file_name, 'w', filters=SEVENZIP_FILTERS) as archive:
 726             archive.writeall(self.download_dir, thing_dir)
 727         logging.debug("Compression of {} finished.".format(self.name))
 728         shutil.rmtree(self.download_dir)
 729         logging.debug("Removed temporary download dir of {}.".format(self.name))
 730         return State.OK
 731
 732
 733 def do_batch(batch_file, download_dir, quick, compress):
 734     """ Read a file in line by line, parsing each as a set of calls to this script."""
 735     with open(batch_file) as handle:
 736         for line in handle:
 737             line = line.strip()
 738             if not line:
 739                 # Skip empty lines
 740                 continue
 741             logging.info("Handling instruction {}".format(line))
 742             command_arr = line.split()
 743             if command_arr[0] == "thing":
 744                 logging.debug(
 745                     "Handling batch thing instruction: {}".format(line))
 746                 Thing.from_thing_id(command_arr[1]).download(download_dir, compress)
 747                 continue
 748             if command_arr[0] == "collection":
 749                 logging.debug(
 750                     "Handling batch collection instruction: {}".format(line))
 751                 Collection(command_arr[1], command_arr[2],
 752                            download_dir, quick, compress).download()
 753                 continue
 754             if command_arr[0] == "user":
 755                 logging.debug(
 756                     "Handling batch collection instruction: {}".format(line))
 757                 Designs(command_arr[1], download_dir, quick, compress).download()
 758                 continue
 759             logging.warning("Unable to parse current instruction. Skipping.")
 760
 761
 762 def main():
 763     """ Entry point for script being run as a command. """
 764     parser = argparse.ArgumentParser()
 765     parser.add_argument("-l", "--log-level", choices=[
 766         'debug', 'info', 'warning'], default='info', help="level of logging desired")
 767     parser.add_argument("-d", "--directory",
 768                         help="Target directory to download into")
 769     parser.add_argument("-f", "--log-file",
 770                         help="Place to log debug information to")
 771     parser.add_argument("-q", "--quick", action="store_true",
 772                         help="Assume date ordering on posts")
 773     parser.add_argument("-c", "--compress", action="store_true",
 774                         help="Compress files")
 775     parser.add_argument("-a", "--api-key",
 776                         help="API key for thingiverse")
 777
 778     subparsers = parser.add_subparsers(
 779         help="Type of thing to download", dest="subcommand")
 780     collection_parser = subparsers.add_parser(
 781         'collection', help="Download one or more entire collection(s)")
 782     collection_parser.add_argument(
 783         "owner", help="The owner of the collection(s) to get")
 784     collection_parser.add_argument(
 785         "collections", nargs="+", help="Space seperated list of the name(s) of collection to get")
 786     thing_parser = subparsers.add_parser(
 787         'thing', help="Download a single thing.")
 788     thing_parser.add_argument(
 789         "things", nargs="*", help="Space seperated list of thing ID(s) to download")
 790     user_parser = subparsers.add_parser(
 791         "user", help="Download all things by one or more users")
 792     user_parser.add_argument(
 793         "users", nargs="+", help="A space seperated list of the user(s) to get the designs of")
 794     batch_parser = subparsers.add_parser(
 795         "batch", help="Perform multiple actions written in a text file")
 796     batch_parser.add_argument(
 797         "batch_file", help="The name of the file to read.")
 798     subparsers.add_parser("version", help="Show the current version")
 799
 800     args = parser.parse_args()
 801     if not args.subcommand:
 802         parser.print_help()
 803         sys.exit(1)
 804     if not args.directory:
 805         args.directory = os.getcwd()
 806
 807     logger = logging.getLogger()
 808     formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
 809     logger.setLevel(logging.DEBUG)
 810     console_handler = logging.StreamHandler()
 811     console_handler.setLevel(args.log_level.upper())
 812
 813
 814     if args.api_key:
 815         api_key = args.api_key
 816     else:
 817         try:
 818             with open("api.key") as fh:
 819                 api_key = fh.read().strip()
 820         except Exception as e:
 821             logging.error("Either specify the api-key on the command line or in a file called 'api.key'")
 822             logging.error("Exception: {}".format(e))
 823             return
 824
 825     logger.addHandler(console_handler)
 826     if args.log_file:
 827         file_handler = logging.FileHandler(args.log_file)
 828         file_handler.setLevel(logging.DEBUG)
 829         file_handler.setFormatter(formatter)
 830         logger.addHandler(file_handler)
 831
 832     # Start downloader
 833     thing_queue = multiprocessing.JoinableQueue()
 834     logging.debug("starting {} downloader(s)".format(DOWNLOADER_COUNT))
 835     downloaders = [Downloader(thing_queue, args.directory, args.compress, api_key) for _ in range(DOWNLOADER_COUNT)]
 836     for downloader in downloaders:
 837         downloader.start()
 838
 839     if args.subcommand.startswith("collection"):
 840         for collection in args.collections:
 841             Collection(args.owner, collection, args.directory, args.quick, args.compress, api_key).download()
 842     if args.subcommand == "thing":
 843         for thing in args.things:
 844             thing_queue.put(thing)
 845     if args.subcommand == "user":
 846         for user in args.users:
 847             Designs(user, args.directory, args.quick, args.compress, api_key).download()
 848     if args.subcommand == "version":
 849         print("thingy_grabber.py version {}".format(VERSION))
 850     if args.subcommand == "batch":
 851         do_batch(args.batch_file, args.directory, args.quick, args.compress)
 852
 853     # Stop the downloader processes
 854     for _ in downloaders:
 855         thing_queue.put(None)
 856
 857
 858 if __name__ == "__main__":
 859     multiprocessing.freeze_support()
 860     main()