thingy_grabber.py

   1 #!/usr/bin/env python3
   2 """
   3 Thingiverse bulk downloader
   4 """
   5
   6 import re
   7 import sys
   8 import os
   9 import argparse
  10 import unicodedata
  11 import requests
  12 import logging
  13 import multiprocessing
  14 import enum
  15 import datetime
  16 from shutil import copyfile
  17 from dataclasses import dataclass
  18 import py7zr
  19 import glob
  20 import shutil
  21 from io import StringIO
  22 from html.parser import HTMLParser
  23
  24 SEVENZIP_FILTERS = [{'id': py7zr.FILTER_LZMA2}]
  25
  26 # I don't think this is exported by datetime
  27 DEFAULT_DATETIME_FORMAT = '%Y-%m-%d %H:%M:%S'
  28 # Windows cannot handle : in filenames
  29 SAFE_DATETIME_FORMAT = '%Y-%m-%d %H.%M.%S'
  30
  31 API_BASE = "https://api.thingiverse.com"
  32 ACCESS_QP = "access_token={}"
  33 PAGE_QP = "page={}"
  34 API_USER_DESIGNS = API_BASE + "/users/{}/things/?" + ACCESS_QP
  35 API_USER_COLLECTIONS = API_BASE + "/users/{}/collections/all?" + ACCESS_QP
  36
  37 # Currently useless as it gives the same info as the matching element in API_USER_COLLECTIONS
  38 API_COLLECTION = API_BASE + "/collections/{}/?" + ACCESS_QP
  39 API_COLLECTION_THINGS = API_BASE + "/collections/{}/things/?" + ACCESS_QP
  40
  41 API_THING_DETAILS = API_BASE + "/things/{}/?" + ACCESS_QP
  42 API_THING_FILES = API_BASE + "/things/{}/files/?" + ACCESS_QP
  43 API_THING_IMAGES = API_BASE + "/things/{}/images/?" + ACCESS_QP
  44 API_THING_DOWNLOAD = "/download/?" + ACCESS_QP
  45
  46 DOWNLOADER_COUNT = 1
  47 RETRY_COUNT = 3
  48
  49 MAX_PATH_LENGTH = 250
  50
  51 VERSION = "0.10.5"
  52
  53 TIMESTAMP_FILE = "timestamp.txt"
  54
  55 SESSION = requests.Session()
  56
  57
  58 class MLStripper(HTMLParser):
  59     """ Turns HTML markup into plain text
  60     """
  61
  62     def error(self, message):
  63         raise ValueError(message)
  64
  65     def __init__(self):
  66         super().__init__()
  67         self.reset()
  68         self.strict = False
  69         self.convert_charrefs = True
  70         self.text = StringIO()
  71
  72     def handle_data(self, d):
  73         self.text.write(d)
  74
  75     def get_data(self):
  76         return self.text.getvalue()
  77
  78     @staticmethod
  79     def strip_tags(html):
  80         s = MLStripper()
  81         s.feed(html)
  82         return s.get_data()
  83
  84
  85 @dataclass
  86 class ThingLink:
  87     thing_id: str
  88     name: str
  89     api_link: str
  90
  91
  92 @dataclass
  93 class FileLink:
  94     name: str
  95     last_update: datetime.datetime
  96     link: str
  97
  98
  99 @dataclass
 100 class ImageLink:
 101     name: str
 102     link: str
 103
 104
 105 class FileLinks:
 106     def __init__(self, initial_links=None):
 107         if initial_links is None:
 108             initial_links = []
 109         self.links = []
 110         self.last_update = None
 111         for link in initial_links:
 112             self.append(link)
 113
 114     def __iter__(self):
 115         return iter(self.links)
 116
 117     def __getitem__(self, item):
 118         return self.links[item]
 119
 120     def __len__(self):
 121         return len(self.links)
 122
 123     def append(self, link):
 124         try:
 125             self.last_update = max(self.last_update, link.last_update)
 126         except TypeError:
 127             self.last_update = link.last_update
 128         self.links.append(link)
 129
 130
 131 class State(enum.Enum):
 132     OK = enum.auto()
 133     FAILED = enum.auto()
 134     ALREADY_DOWNLOADED = enum.auto()
 135
 136
 137 def sanitise_url(url):
 138     """ remove api keys from an url
 139     """
 140     return re.sub(r'access_token=\w*',
 141                   'access_token=***',
 142                   url)
 143
 144
 145 def strip_time(date_obj):
 146     """ Takes a datetime object and returns another with the time set to 00:00
 147     """
 148     return datetime.datetime.combine(date_obj.date(), datetime.time())
 149
 150
 151 def rename_unique(dir_name, target_dir_name):
 152     """ Move a directory sideways to a new name, ensuring it is unique.
 153     """
 154     target_dir = target_dir_name
 155     inc = 0
 156     while os.path.exists(target_dir):
 157         target_dir = "{}_{}".format(target_dir_name, inc)
 158         inc += 1
 159     os.rename(dir_name, target_dir)
 160     return target_dir
 161
 162
 163 def fail_dir(dir_name):
 164     """ When a download has failed, move it sideways.
 165     """
 166     return rename_unique(dir_name, "{}_failed".format(dir_name))
 167
 168
 169 def truncate_name(file_name):
 170     """ Ensure the filename is not too long for, well windows basically.
 171     """
 172     path = os.path.abspath(file_name)
 173     if len(path) <= MAX_PATH_LENGTH:
 174         return path
 175     base, extension = os.path.splitext(path)
 176     inc = 0
 177     new_path = "{}_{}{}".format(base, inc, extension)
 178     while os.path.exists(new_path):
 179         new_path = "{}_{}{}".format(base, inc, extension)
 180         inc += 1
 181     return new_path
 182
 183
 184 def slugify(value):
 185     """
 186     Normalise string, removes invalid for filename charactersr
 187     and converts string to lowercase.
 188     """
 189     logging.debug("Sluggyfying {}".format(value))
 190     value = unicodedata.normalize('NFKC', value).lower().strip()
 191     value = re.sub(r'[\\/<>:?*|"]', '', value)
 192     value = re.sub(r'\.*$', '', value)
 193     return value.strip()
 194
 195
 196 class Downloader(multiprocessing.Process):
 197     """
 198     Class to handle downloading the things we have found to get.
 199     """
 200
 201     def __init__(self, thing_queue, download_directory, compress, api_key):
 202         multiprocessing.Process.__init__(self)
 203         # TODO: add parameters
 204         self.thing_queue = thing_queue
 205         self.download_directory = download_directory
 206         self.compress = compress
 207         self.api_key = api_key
 208
 209     def run(self):
 210         """ actual download loop.
 211         """
 212         while True:
 213             thing_id = self.thing_queue.get()
 214             if thing_id is None:
 215                 logging.info("Shutting download queue")
 216                 self.thing_queue.task_done()
 217                 break
 218             thing = None
 219             if isinstance(thing_id, str):
 220                 thing = Thing.from_thing_id(thing_id)
 221             if isinstance(thing_id, ThingLink):
 222                 thing = Thing(thing_id)
 223             if not thing:
 224                 logging.error("Don't know how to handle thing_id {}".format(thing_id))
 225             else:
 226                 logging.info("Handling id {}".format(thing_id))
 227                 thing.download(self.download_directory, self.compress, self.api_key)
 228             self.thing_queue.task_done()
 229         return
 230
 231
 232 class Grouping:
 233     """ Holds details of a group of things for download
 234         This is effectively (although not actually) an abstract class
 235         - use Collection or Designs instead.
 236     """
 237
 238     def __init__(self, quick, compress, api_key):
 239         self.things = []
 240         self.total = 0
 241         self.req_id = None
 242         self.last_page = 0
 243         self.per_page = None
 244         # Should we stop downloading when we hit a known datestamp?
 245         self.quick = quick
 246         self.compress = compress
 247         self.api_key = api_key
 248         # These should be set by child classes.
 249         self.url = None
 250         self.download_dir = None
 251
 252     @property
 253     def get(self):
 254         """ retrieve the things of the grouping. """
 255         if self.things:
 256             # We've already done it.
 257             return self.things
 258
 259         # Check for initialisation:
 260         if not self.url:
 261             logging.error("No URL set - object not initialised properly?")
 262             raise ValueError("No URL set - object not initialised properly?")
 263
 264         # Get the internal details of the grouping.
 265         logging.debug("Querying {}".format(sanitise_url(self.url)))
 266
 267         # follow next links until all items are found
 268         current_url = self.url
 269         while current_url != None:
 270             logging.info("requesting:{}".format(sanitise_url(current_url)))
 271             current_req = SESSION.get(current_url)
 272             current_url = current_req.links.get('next', {}).get('url')
 273             if current_req.status_code != 200:
 274                 logging.error(
 275                     "Got unexpected code {} from url {}: {}".format(current_req.status_code, sanitise_url(self.url),
 276                                                                     current_req.text))
 277             else:
 278                 current_json = current_req.json()
 279                 for thing in current_json:
 280                     logging.debug(thing)
 281                     self.things.append(ThingLink(thing['id'], thing['name'], thing['url']))
 282         logging.info("Found {} things.".format(len(self.things)))
 283         return self.things
 284
 285     def download(self):
 286         """ Downloads all the files in a collection """
 287         if not self.things:
 288             self.get
 289
 290         if not self.download_dir:
 291             raise ValueError(
 292                 "No download_dir set - invalidly initialised object?")
 293
 294         try:
 295             os.mkdir(self.download_dir)
 296         except FileExistsError:
 297             logging.info("Target directory {} already exists. Assuming a resume."
 298                          .format(self.download_dir))
 299         logging.info("Downloading {} thing(s).".format(self.total))
 300         for idx, thing in enumerate(self.things):
 301             logging.info("Downloading thing {} - {}".format(idx, thing))
 302             return_code = Thing(thing).download(self.download_dir, self.compress, self.api_key)
 303             if self.quick and return_code == State.ALREADY_DOWNLOADED:
 304                 logging.info("Caught up, stopping.")
 305                 return
 306
 307
 308 class Collection(Grouping):
 309     """ Holds details of a collection. """
 310
 311     def __init__(self, user, name, directory, quick, compress, api_key):
 312         Grouping.__init__(self, quick, compress, api_key)
 313         self.user = user
 314         self.name = name
 315         self.paginated = False
 316         # need to figure out the the ID for the collection
 317         collection_url = API_USER_COLLECTIONS.format(user, api_key)
 318         try:
 319             current_req = SESSION.get(collection_url)
 320         except requests.exceptions.ConnectionError as error:
 321             logging.error("Unable to connect for collections for user {}: {}".format(
 322                 self.user, error))
 323             return
 324         if current_req.status_code != 200:
 325             logging.error(
 326                 "Got unexpected code {} from url {}: {}".format(current_req.status_code, sanitise_url(collection_url),
 327                                                                 current_req.text))
 328             return
 329         collection_list = current_req.json()
 330         try:
 331             # case insensitive to retain parity with previous behaviour
 332             collection = [x for x in collection_list if x['name'].casefold() == name.casefold()][0]
 333         except IndexError:
 334             logging.error("Unable to find collection {} for user {}".format(name, user))
 335             return
 336         self.collection_id = collection['id']
 337         self.url = API_COLLECTION_THINGS.format(self.collection_id, api_key)
 338
 339         self.download_dir = os.path.join(directory,
 340                                          "{}-{}".format(slugify(self.user), slugify(self.name)))
 341
 342
 343 class Designs(Grouping):
 344     """ Holds details of all of a users' designs. """
 345
 346     def __init__(self, user, directory, quick, compress, api_key):
 347         Grouping.__init__(self, quick, compress, api_key)
 348         self.user = user
 349         self.url = API_USER_DESIGNS.format(user, api_key)
 350         self.download_dir = os.path.join(
 351             directory, "{} designs".format(slugify(self.user)))
 352
 353
 354 class Thing:
 355     """ An individual design on thingiverse. """
 356
 357     def __init__(self, thing_link):
 358         self.thing_id = thing_link.thing_id
 359         self.name = thing_link.name
 360         self.last_time = None
 361         self._parsed = False
 362         self._needs_download = True
 363         self.text = None
 364         self.download_dir = None
 365         self.time_stamp = None
 366         self._file_links = FileLinks()
 367         self._image_links = []
 368
 369     @classmethod
 370     def from_thing_id(cls, thing_id):
 371         """
 372         Factory method that looks up a thing by ID and creates a Thing object for it
 373         :param thing_id: to look up
 374         :return: Thing or None
 375         """
 376         return Thing(ThingLink(thing_id, "", ""))
 377
 378     def _parse(self, base_dir, api_key):
 379         """ Work out what, if anything needs to be done. """
 380         if self._parsed:
 381             return
 382
 383         # First get the broad details
 384         url = API_THING_DETAILS.format(self.thing_id, api_key)
 385         try:
 386             current_req = SESSION.get(url)
 387         except requests.exceptions.ConnectionError as error:
 388             logging.error("Unable to connect for thing {}: {}".format(
 389                 self.thing_id, error))
 390             return
 391         # Check for DMCA
 392         if current_req.status_code == 403:
 393             logging.error("Access to thing {} is forbidden".format(self.thing_id))
 394             return
 395         if current_req.status_code != 200:
 396             logging.error("Got unexpected code {} from url {}: {}".format(current_req.status_code, sanitise_url(url),
 397                                                                           current_req.text))
 398             return
 399
 400         thing_json = current_req.json()
 401         try:
 402             self._license = thing_json['license']
 403         except KeyError:
 404             logging.warning("No license found for thing {}?".format(self.thing_id))
 405
 406         details = None
 407         try:
 408             details = thing_json['details']
 409         except KeyError:
 410             logging.warning("No description found for thing {}?".format(self.thing_id))
 411
 412         if details:
 413             try:
 414                 self._details = MLStripper.strip_tags(details)
 415             except ValueError as e:
 416                 logging.warning("Unable to strip HTML from readme: {}".format(e))
 417                 self._details = details
 418
 419         if not self.name:
 420             # Probably generated with factory method.
 421             try:
 422                 self.name = thing_json['name']
 423             except KeyError:
 424                 logging.warning("No name found for thing {}?".format(self.thing_id))
 425                 self.name = self.thing_id
 426
 427         # Now get the file details
 428         file_url = API_THING_FILES.format(self.thing_id, api_key)
 429
 430         try:
 431             current_req = SESSION.get(file_url)
 432         except requests.exceptions.ConnectionError as error:
 433             logging.error("Unable to connect for thing {}: {}".format(
 434                 self.thing_id, error))
 435             return
 436
 437         if current_req.status_code != 200:
 438             logging.error("Unexpected status code {} for {}: {}".format(current_req.status_code, sanitise_url(file_url),
 439                                                                         current_req.text))
 440             return
 441
 442         link_list = current_req.json()
 443
 444         if not link_list:
 445             logging.error("No files found for thing {} - probably thingiverse being broken, try again later".format(
 446                 self.thing_id))
 447
 448         for link in link_list:
 449             logging.debug("Parsing link: {}".format(sanitise_url(link['url'])))
 450             try:
 451                 datestamp = datetime.datetime.strptime(link['date'], DEFAULT_DATETIME_FORMAT)
 452                 self._file_links.append(
 453                     FileLink(link['name'], datestamp, link['url'] + API_THING_DOWNLOAD.format(api_key)))
 454             except ValueError:
 455                 logging.error(link['date'])
 456
 457         # Finally get the image links
 458         image_url = API_THING_IMAGES.format(self.thing_id, api_key)
 459
 460         try:
 461             current_req = SESSION.get(image_url)
 462         except requests.exceptions.ConnectionError as error:
 463             logging.error("Unable to connect for thing {}: {}".format(
 464                 self.thing_id, error))
 465             return
 466
 467         if current_req.status_code != 200:
 468             logging.error(
 469                 "Unexpected status code {} for {}: {}".format(current_req.status_code, sanitise_url(image_url),
 470                                                               current_req.text))
 471             return
 472
 473         image_list = current_req.json()
 474
 475         if not image_list:
 476             logging.warning(
 477                 "No images found for thing {} - probably thingiverse being iffy as this seems unlikely".format(
 478                     self.thing_id))
 479
 480         for image in image_list:
 481             logging.debug("parsing image: {}".format(image))
 482             name = None
 483             try:
 484                 name = slugify(image['name'])
 485                 # TODO: fallback to other types
 486                 url = [x for x in image['sizes'] if x['type'] == 'display' and x['size'] == 'large'][0]['url']
 487             except KeyError:
 488                 logging.warning("Missing image for {}".format(name))
 489             self._image_links.append(ImageLink(name, url))
 490
 491         self.slug = "{} - {}".format(self.thing_id, slugify(self.name))
 492         self.download_dir = os.path.join(base_dir, self.slug)
 493
 494         self._handle_old_directory(base_dir)
 495
 496         logging.debug("Parsing {} ({})".format(self.thing_id, self.name))
 497         latest, self.last_time = self._find_last_download(base_dir)
 498
 499         if not latest:
 500             # Not yet downloaded
 501             self._parsed = True
 502             return
 503
 504         logging.info("last downloaded version: {}".format(self.last_time))
 505
 506         # OK, so we have a timestamp, lets see if there is anything new to get
 507         # First off, are we comparing an old download that threw away the timestamp?
 508         ignore_time = self.last_time == strip_time(self.last_time)
 509         try:
 510             # TODO: Allow for comparison at the exact time
 511             files_last_update = self._file_links.last_update
 512             if ignore_time:
 513                 logging.info("Dropping time from comparison stamp as old-style download dir")
 514                 files_last_update = strip_time(files_last_update)
 515
 516             if files_last_update > self.last_time:
 517                 logging.info(
 518                     "Found new/updated files {}".format(self._file_links.last_update))
 519                 self._needs_download = True
 520                 self._parsed = True
 521                 return
 522         except TypeError:
 523             logging.warning("No files found for {}.".format(self.thing_id))
 524
 525         # Got here, so nope, no new files.
 526         self._needs_download = False
 527         self._parsed = True
 528
 529     def _handle_old_directory(self, base_dir):
 530         """ Deal with any old directories from previous versions of the code.
 531         """
 532         old_dir = os.path.join(base_dir, slugify(self.name))
 533         if os.path.exists(old_dir):
 534             logging.warning("Found old style download_dir. Moving.")
 535             rename_unique(old_dir, self.download_dir)
 536
 537     def _handle_outdated_directory(self):
 538         """ Move the current download directory sideways if the thing has changed.
 539         """
 540         if not os.path.exists(self.download_dir):
 541             # No old directory to move.
 542             return None
 543         timestamp_file = os.path.join(self.download_dir, TIMESTAMP_FILE)
 544         if not os.path.exists(timestamp_file):
 545             # Old form of download directory
 546             target_dir_name = "{} - old".format(self.download_dir)
 547         else:
 548             target_dir_name = "{} - {}".format(self.download_dir, self.last_time.strftime(SAFE_DATETIME_FORMAT))
 549         return rename_unique(self.download_dir, target_dir_name)
 550
 551     def _find_last_download(self, base_dir):
 552         """ Look for the most recent previous download (if any) of the thing.
 553         """
 554         logging.info("Looking for old things")
 555
 556         # First the DL directory itself.
 557         timestamp_file = os.path.join(self.download_dir, TIMESTAMP_FILE)
 558
 559         latest = None
 560         latest_time = None
 561
 562         try:
 563             logging.debug("Checking for existing download in normal place.")
 564             with open(timestamp_file) as ts_fh:
 565                 timestamp_text = ts_fh.read().strip()
 566             latest_time = datetime.datetime.strptime(timestamp_text, DEFAULT_DATETIME_FORMAT)
 567             latest = self.download_dir
 568         except FileNotFoundError:
 569             # No existing download directory. huh.
 570             pass
 571         except TypeError:
 572             logging.warning("Invalid timestamp file found in {}".format(self.download_dir))
 573
 574         # TODO:  Maybe look for old download directories.
 575
 576         # Now look for 7z files
 577         candidates = glob.glob(os.path.join(base_dir, "{}*.7z".format(self.thing_id)))
 578         # +3 to allow for ' - '
 579         leading_length = len(self.slug) + 3
 580         for path in candidates:
 581             candidate = os.path.basename(path)
 582             try:
 583                 logging.debug("Examining '{}' - '{}'".format(candidate, candidate[leading_length:-3]))
 584                 candidate_time = datetime.datetime.strptime(candidate[leading_length:-3], SAFE_DATETIME_FORMAT)
 585             except ValueError:
 586                 logging.warning("There was an error finding the date in {}. Ignoring.".format(candidate))
 587                 continue
 588             try:
 589                 if candidate_time > latest_time:
 590                     latest_time = candidate_time
 591                     latest = candidate
 592             except TypeError:
 593                 latest_time = candidate_time
 594                 latest = candidate
 595         logging.info("Found last old thing: {} / {}".format(latest, latest_time))
 596         return latest, latest_time
 597
 598     def download(self, base_dir, compress, api_key):
 599         """ Download all files for a given thing.
 600             Returns True iff the thing is now downloaded (not iff it downloads the thing!)
 601         """
 602         if not self._parsed:
 603             self._parse(base_dir, api_key)
 604
 605         if not self._parsed:
 606             logging.error(
 607                 "Unable to parse {} - aborting download".format(self.thing_id))
 608             return State.FAILED
 609
 610         if not self._needs_download:
 611             logging.info("{} - {} already downloaded - skipping.".format(self.thing_id, self.name))
 612             return State.ALREADY_DOWNLOADED
 613
 614         if not self._file_links:
 615             logging.error(
 616                 "{} - {} appears to have no files. Thingiverse acting up again?".format(self.thing_id, self.name))
 617             return State.FAILED
 618
 619         # Have we already downloaded some things?
 620         renamed_dir = self._handle_outdated_directory()
 621
 622         # Get the list of files to download
 623
 624         new_file_links = []
 625         old_file_links = []
 626         self.time_stamp = None
 627
 628         if not self.last_time:
 629             # If we don't have anything to copy from, then it is all new.
 630             logging.debug("No last time, downloading all files")
 631             new_file_links = self._file_links
 632             self.time_stamp = new_file_links[0].last_update
 633
 634             for file_link in new_file_links:
 635                 self.time_stamp = max(self.time_stamp, file_link.last_update)
 636             logging.debug("New timestamp will be {}".format(self.time_stamp))
 637         else:
 638             self.time_stamp = self.last_time
 639             for file_link in self._file_links:
 640                 if file_link.last_update > self.last_time:
 641                     new_file_links.append(file_link)
 642                     self.time_stamp = max(self.time_stamp, file_link.last_update)
 643                 else:
 644                     old_file_links.append(file_link)
 645
 646         logging.debug("new timestamp {}".format(self.time_stamp))
 647
 648         # OK. Time to get to work.
 649         logging.debug("Generating download_dir")
 650         os.mkdir(self.download_dir)
 651         filelist_file = os.path.join(self.download_dir, "filelist.txt")
 652         with open(filelist_file, 'w', encoding="utf-8") as fl_handle:
 653             for fl in self._file_links:
 654                 fl_handle.write("{},{},{}\n".format(fl.link, fl.name, fl.last_update))
 655
 656         # First grab the cached files (if any)
 657         logging.info("Copying {} unchanged files.".format(len(old_file_links)))
 658         if renamed_dir:
 659             for file_link in old_file_links:
 660                 try:
 661                     old_file = os.path.join(renamed_dir, file_link.name)
 662                     new_file = truncate_name(os.path.join(self.download_dir, file_link.name))
 663                     logging.debug("Copying {} to {}".format(old_file, new_file))
 664                     copyfile(old_file, new_file)
 665                 except FileNotFoundError:
 666                     logging.warning(
 667                         "Unable to find {} in old archive, redownloading".format(file_link.name))
 668                     new_file_links.append(file_link)
 669                 except TypeError:
 670                     # Not altogether sure how this could occur, possibly with some combination of the old file types
 671                     logging.warning(
 672                         "Typeerror looking for {} in {}".format(file_link.name, renamed_dir))
 673                     new_file_links.append(file_link)
 674
 675         # Now download the new ones
 676         logging.info("Downloading {} new files of {}".format(
 677             len(new_file_links), len(self._file_links)))
 678         try:
 679             for file_link in new_file_links:
 680                 file_name = truncate_name(os.path.join(self.download_dir, file_link.name))
 681                 logging.debug("Downloading {} from {} to {}".format(
 682                     file_link.name, file_link.link, file_name))
 683                 data_req = SESSION.get(file_link.link)
 684                 if data_req.status_code != 200:
 685                     logging.error("Unexpected status code {} for {}: {}".format(data_req.status_code,
 686                                                                                 sanitise_url(file_link.link),
 687                                                                                 data_req.text))
 688                     fail_dir(self.download_dir)
 689                     return State.FAILED
 690
 691                 with open(file_name, 'wb') as handle:
 692                     handle.write(data_req.content)
 693         except Exception as exception:
 694             logging.error("Failed to download {} - {}".format(file_link.name, exception))
 695             fail_dir(self.download_dir)
 696             return State.FAILED
 697
 698         # People like images.
 699         image_dir = os.path.join(self.download_dir, 'images')
 700         logging.info("Downloading {} images.".format(len(self._image_links)))
 701         try:
 702             os.mkdir(image_dir)
 703             for imagelink in self._image_links:
 704                 filename = os.path.join(image_dir, imagelink.name)
 705                 image_req = SESSION.get(imagelink.link)
 706                 if image_req.status_code != 200:
 707                     logging.error("Unexpected status code {} for {}: {}".format(image_req.status_code,
 708                                                                                 sanitise_url(imagelink.link),
 709                                                                                 image_req.text))
 710                     fail_dir(self.download_dir)
 711                     return State.FAILED
 712                 with open(truncate_name(filename), 'wb') as handle:
 713                     handle.write(image_req.content)
 714         except Exception as exception:
 715             logging.error("Failed to download {} - {}".format(imagelink.name, exception))
 716             fail_dir(self.download_dir)
 717             return State.FAILED
 718
 719         # Best get some licenses
 720         logging.info("writing license file")
 721         try:
 722             if self._license:
 723                 with open(truncate_name(os.path.join(self.download_dir, 'license.txt')), 'w',
 724                           encoding="utf-8") as license_handle:
 725                     license_handle.write("{}\n".format(self._license))
 726         except IOError as exception:
 727             logging.warning("Failed to write license! {}".format(exception))
 728
 729         logging.info("writing readme")
 730         try:
 731             if self._details:
 732                 with open(truncate_name(os.path.join(self.download_dir, 'readme.txt')), 'w',
 733                           encoding="utf-8") as readme_handle:
 734                     readme_handle.write("{}\n".format(self._details))
 735         except IOError as exception:
 736             logging.warning("Failed to write readme! {}".format(exception))
 737
 738         try:
 739             # Now write the timestamp
 740             with open(os.path.join(self.download_dir, TIMESTAMP_FILE), 'w', encoding="utf-8") as timestamp_handle:
 741                 timestamp_handle.write(self.time_stamp.__str__())
 742         except Exception as exception:
 743             logging.error("Failed to write timestamp file - {}".format(exception))
 744             fail_dir(self.download_dir)
 745             return State.FAILED
 746         self._needs_download = False
 747         logging.debug("Download of {} finished".format(self.name))
 748         if not compress:
 749             return State.OK
 750
 751         thing_dir = "{} - {} - {}".format(self.thing_id,
 752                                           slugify(self.name),
 753                                           self.time_stamp.strftime(SAFE_DATETIME_FORMAT))
 754         file_name = os.path.join(base_dir,
 755                                  "{}.7z".format(thing_dir))
 756         logging.debug("Compressing {} to {}".format(
 757             self.name,
 758             file_name))
 759         with py7zr.SevenZipFile(file_name, 'w', filters=SEVENZIP_FILTERS) as archive:
 760             archive.writeall(self.download_dir, thing_dir)
 761         logging.debug("Compression of {} finished.".format(self.name))
 762         shutil.rmtree(self.download_dir)
 763         logging.debug("Removed temporary download dir of {}.".format(self.name))
 764         return State.OK
 765
 766
 767 def do_batch(batch_file, download_dir, quick, compress, api_key):
 768     """ Read a file in line by line, parsing each as a set of calls to this script."""
 769     with open(batch_file) as handle:
 770         for line in handle:
 771             line = line.strip()
 772             if not line:
 773                 # Skip empty lines
 774                 continue
 775             logging.info("Handling instruction {}".format(line))
 776             command_arr = line.split()
 777             if command_arr[0] == "thing":
 778                 logging.debug(
 779                     "Handling batch thing instruction: {}".format(line))
 780                 Thing.from_thing_id(command_arr[1]).download(download_dir, compress, api_key)
 781                 continue
 782             if command_arr[0] == "collection":
 783                 logging.debug(
 784                     "Handling batch collection instruction: {}".format(line))
 785                 Collection(command_arr[1], command_arr[2],
 786                            download_dir, quick, compress, api_key).download()
 787                 continue
 788             if command_arr[0] == "user":
 789                 logging.debug(
 790                     "Handling batch collection instruction: {}".format(line))
 791                 Designs(command_arr[1], download_dir, quick, compress, api_key).download()
 792                 continue
 793             logging.warning("Unable to parse current instruction. Skipping.")
 794
 795
 796 def main():
 797     """ Entry point for script being run as a command. """
 798     parser = argparse.ArgumentParser()
 799     parser.add_argument("-l", "--log-level", choices=[
 800         'debug', 'info', 'warning'], default='info', help="level of logging desired")
 801     parser.add_argument("-d", "--directory",
 802                         help="Target directory to download into")
 803     parser.add_argument("-f", "--log-file",
 804                         help="Place to log debug information to")
 805     parser.add_argument("-q", "--quick", action="store_true",
 806                         help="Assume date ordering on posts")
 807     parser.add_argument("-c", "--compress", action="store_true",
 808                         help="Compress files")
 809     parser.add_argument("-a", "--api-key",
 810                         help="API key for thingiverse")
 811
 812     subparsers = parser.add_subparsers(
 813         help="Type of thing to download", dest="subcommand")
 814     collection_parser = subparsers.add_parser(
 815         'collection', help="Download one or more entire collection(s)")
 816     collection_parser.add_argument(
 817         "owner", help="The owner of the collection(s) to get")
 818     collection_parser.add_argument(
 819         "collections", nargs="+", help="Space seperated list of the name(s) of collection to get")
 820     thing_parser = subparsers.add_parser(
 821         'thing', help="Download a single thing.")
 822     thing_parser.add_argument(
 823         "things", nargs="*", help="Space seperated list of thing ID(s) to download")
 824     user_parser = subparsers.add_parser(
 825         "user", help="Download all things by one or more users")
 826     user_parser.add_argument(
 827         "users", nargs="+", help="A space seperated list of the user(s) to get the designs of")
 828     batch_parser = subparsers.add_parser(
 829         "batch", help="Perform multiple actions written in a text file")
 830     batch_parser.add_argument(
 831         "batch_file", help="The name of the file to read.")
 832     subparsers.add_parser("version", help="Show the current version")
 833
 834     args = parser.parse_args()
 835     if not args.subcommand:
 836         parser.print_help()
 837         sys.exit(1)
 838     if not args.directory:
 839         args.directory = os.getcwd()
 840
 841     logger = logging.getLogger()
 842     formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
 843     logger.setLevel(logging.DEBUG)
 844     console_handler = logging.StreamHandler()
 845     console_handler.setLevel(args.log_level.upper())
 846
 847     if args.api_key:
 848         api_key = args.api_key
 849     else:
 850         try:
 851             with open("api.key") as fh:
 852                 api_key = fh.read().strip()
 853         except Exception as e:
 854             logging.error("Either specify the api-key on the command line or in a file called 'api.key'")
 855             logging.error("Exception: {}".format(e))
 856             return
 857
 858     logger.addHandler(console_handler)
 859     if args.log_file:
 860         file_handler = logging.FileHandler(args.log_file)
 861         file_handler.setLevel(logging.DEBUG)
 862         file_handler.setFormatter(formatter)
 863         logger.addHandler(file_handler)
 864
 865     # Start downloader
 866     thing_queue = multiprocessing.JoinableQueue()
 867     logging.debug("starting {} downloader(s)".format(DOWNLOADER_COUNT))
 868     downloaders = [Downloader(thing_queue, args.directory, args.compress, api_key) for _ in range(DOWNLOADER_COUNT)]
 869     for downloader in downloaders:
 870         downloader.start()
 871
 872     if args.subcommand.startswith("collection"):
 873         for collection in args.collections:
 874             Collection(args.owner, collection, args.directory, args.quick, args.compress, api_key).download()
 875     if args.subcommand == "thing":
 876         for thing in args.things:
 877             thing_queue.put(thing)
 878     if args.subcommand == "user":
 879         for user in args.users:
 880             Designs(user, args.directory, args.quick, args.compress, api_key).download()
 881     if args.subcommand == "version":
 882         print("thingy_grabber.py version {}".format(VERSION))
 883     if args.subcommand == "batch":
 884         do_batch(args.batch_file, args.directory, args.quick, args.compress, api_key)
 885
 886     # Stop the downloader processes
 887     for _ in downloaders:
 888         thing_queue.put(None)
 889
 890
 891 if __name__ == "__main__":
 892     multiprocessing.freeze_support()
 893     main()