thingy_grabber.py

   1 #!/usr/bin/env python3
   2 """
   3 Thingiverse bulk downloader
   4 """
   5
   6 import re
   7 import sys
   8 import os
   9 import argparse
  10 import unicodedata
  11 import requests
  12 import logging
  13 import multiprocessing
  14 import enum
  15 import datetime
  16 from shutil import copyfile
  17 from dataclasses import dataclass
  18 import py7zr
  19 import glob
  20 import shutil
  21 from io import StringIO
  22 from html.parser import HTMLParser
  23
  24 SEVENZIP_FILTERS = [{'id': py7zr.FILTER_LZMA2}]
  25
  26 # I don't think this is exported by datetime
  27 DEFAULT_DATETIME_FORMAT = '%Y-%m-%d %H:%M:%S'
  28 # Windows cannot handle : in filenames
  29 SAFE_DATETIME_FORMAT = '%Y-%m-%d %H.%M.%S'
  30
  31 API_BASE = "https://api.thingiverse.com"
  32 ACCESS_QP = "access_token={}"
  33 PAGE_QP = "page={}"
  34 API_USER_DESIGNS = API_BASE + "/users/{}/things/?" + ACCESS_QP
  35 API_USER_COLLECTIONS = API_BASE + "/users/{}/collections/all?" + ACCESS_QP
  36
  37 # Currently useless as it gives the same info as the matching element in API_USER_COLLECTIONS
  38 API_COLLECTION = API_BASE + "/collections/{}/?" + ACCESS_QP
  39 API_COLLECTION_THINGS = API_BASE + "/collections/{}/things/?" + ACCESS_QP
  40
  41 API_THING_DETAILS = API_BASE + "/things/{}/?" + ACCESS_QP
  42 API_THING_FILES = API_BASE + "/things/{}/files/?" + ACCESS_QP
  43 API_THING_IMAGES = API_BASE + "/things/{}/images/?" + ACCESS_QP
  44 API_THING_DOWNLOAD = "/download/?" + ACCESS_QP
  45
  46 DOWNLOADER_COUNT = 1
  47 RETRY_COUNT = 3
  48
  49 MAX_PATH_LENGTH = 250
  50
  51 VERSION = "0.10.4"
  52
  53 TIMESTAMP_FILE = "timestamp.txt"
  54
  55 SESSION = requests.Session()
  56
  57
  58 class MLStripper(HTMLParser):
  59     """ Turns HTML markup into plain text
  60     """
  61
  62     def error(self, message):
  63         raise ValueError(message)
  64
  65     def __init__(self):
  66         super().__init__()
  67         self.reset()
  68         self.strict = False
  69         self.convert_charrefs= True
  70         self.text = StringIO()
  71
  72     def handle_data(self, d):
  73         self.text.write(d)
  74
  75     def get_data(self):
  76         return self.text.getvalue()
  77
  78     @staticmethod
  79     def strip_tags(html):
  80         s = MLStripper()
  81         s.feed(html)
  82         return s.get_data()
  83
  84 @dataclass
  85 class ThingLink:
  86     thing_id: str
  87     name: str
  88     api_link: str
  89
  90
  91 @dataclass
  92 class FileLink:
  93     name: str
  94     last_update: datetime.datetime
  95     link: str
  96
  97
  98 @dataclass
  99 class ImageLink:
 100     name: str
 101     link: str
 102
 103
 104 class FileLinks:
 105     def __init__(self, initial_links=None):
 106         if initial_links is None:
 107             initial_links = []
 108         self.links = []
 109         self.last_update = None
 110         for link in initial_links:
 111             self.append(link)
 112
 113     def __iter__(self):
 114         return iter(self.links)
 115
 116     def __getitem__(self, item):
 117         return self.links[item]
 118
 119     def __len__(self):
 120         return len(self.links)
 121
 122     def append(self, link):
 123         try:
 124             self.last_update = max(self.last_update, link.last_update)
 125         except TypeError:
 126             self.last_update = link.last_update
 127         self.links.append(link)
 128
 129
 130 class State(enum.Enum):
 131     OK = enum.auto()
 132     FAILED = enum.auto()
 133     ALREADY_DOWNLOADED = enum.auto()
 134
 135
 136 def sanitise_url(url):
 137     """ remove api keys from an url
 138     """
 139     return re.sub(r'access_token=\w*',
 140                   'access_token=***',
 141                   url)
 142
 143
 144 def strip_time(date_obj):
 145     """ Takes a datetime object and returns another with the time set to 00:00
 146     """
 147     return datetime.datetime.combine(date_obj.date(), datetime.time())
 148
 149
 150 def rename_unique(dir_name, target_dir_name):
 151     """ Move a directory sideways to a new name, ensuring it is unique.
 152     """
 153     target_dir = target_dir_name
 154     inc = 0
 155     while os.path.exists(target_dir):
 156         target_dir = "{}_{}".format(target_dir_name, inc)
 157         inc += 1
 158     os.rename(dir_name, target_dir)
 159     return target_dir
 160
 161
 162 def fail_dir(dir_name):
 163     """ When a download has failed, move it sideways.
 164     """
 165     return rename_unique(dir_name, "{}_failed".format(dir_name))
 166
 167
 168 def truncate_name(file_name):
 169     """ Ensure the filename is not too long for, well windows basically.
 170     """
 171     path = os.path.abspath(file_name)
 172     if len(path) <= MAX_PATH_LENGTH:
 173         return path
 174     base, extension = os.path.splitext(path)
 175     inc = 0
 176     new_path = "{}_{}{}".format(base, inc, extension)
 177     while os.path.exists(new_path):
 178         new_path = "{}_{}{}".format(base, inc, extension)
 179         inc += 1
 180     return new_path
 181
 182
 183 def slugify(value):
 184     """
 185     Normalise string, removes invalid for filename charactersr
 186     and converts string to lowercase.
 187     """
 188     logging.debug("Sluggyfying {}".format(value))
 189     value = unicodedata.normalize('NFKC', value).lower().strip()
 190     value = re.sub(r'[\\/<>:?*|"]', '', value)
 191     value = re.sub(r'\.*$', '', value)
 192     return value.strip()
 193
 194
 195 class Downloader(multiprocessing.Process):
 196     """
 197     Class to handle downloading the things we have found to get.
 198     """
 199
 200     def __init__(self, thing_queue, download_directory, compress, api_key):
 201         multiprocessing.Process.__init__(self)
 202         # TODO: add parameters
 203         self.thing_queue = thing_queue
 204         self.download_directory = download_directory
 205         self.compress = compress
 206         self.api_key = api_key
 207
 208     def run(self):
 209         """ actual download loop.
 210         """
 211         while True:
 212             thing_id = self.thing_queue.get()
 213             if thing_id is None:
 214                 logging.info("Shutting download queue")
 215                 self.thing_queue.task_done()
 216                 break
 217             thing = None
 218             if isinstance(thing_id, str):
 219                 thing = Thing.from_thing_id(thing_id)
 220             if isinstance(thing_id, ThingLink):
 221                 thing = Thing(thing_id)
 222             if not thing:
 223                 logging.error("Don't know how to handle thing_id {}".format(thing_id))
 224             else:
 225                 logging.info("Handling id {}".format(thing_id))
 226                 thing.download(self.download_directory, self.compress, self.api_key)
 227             self.thing_queue.task_done()
 228         return
 229
 230
 231 class Grouping:
 232     """ Holds details of a group of things for download
 233         This is effectively (although not actually) an abstract class
 234         - use Collection or Designs instead.
 235     """
 236
 237     def __init__(self, quick, compress, api_key):
 238         self.things = []
 239         self.total = 0
 240         self.req_id = None
 241         self.last_page = 0
 242         self.per_page = None
 243         # Should we stop downloading when we hit a known datestamp?
 244         self.quick = quick
 245         self.compress = compress
 246         self.api_key = api_key
 247         # These should be set by child classes.
 248         self.url = None
 249         self.download_dir = None
 250
 251
 252     @property
 253     def get(self):
 254         """ retrieve the things of the grouping. """
 255         if self.things:
 256             # We've already done it.
 257             return self.things
 258
 259         # Check for initialisation:
 260         if not self.url:
 261             logging.error("No URL set - object not initialised properly?")
 262             raise ValueError("No URL set - object not initialised properly?")
 263
 264         # Get the internal details of the grouping.
 265         logging.debug("Querying {}".format(sanitise_url(self.url)))
 266
 267         # self.url should already have been formatted as we don't need pagination
 268         logging.info("requesting:{}".format(sanitise_url(self.url)))
 269         current_req = SESSION.get(self.url)
 270         if current_req.status_code != 200:
 271             logging.error(
 272                 "Got unexpected code {} from url {}: {}".format(current_req.status_code, sanitise_url(self.url),
 273                                                                 current_req.text))
 274         else:
 275             current_json = current_req.json()
 276             for thing in current_json:
 277                 logging.info(thing)
 278                 self.things.append(ThingLink(thing['id'], thing['name'], thing['url']))
 279         logging.info("Found {} things.".format(len(self.things)))
 280         return self.things
 281
 282     def download(self):
 283         """ Downloads all the files in a collection """
 284         if not self.things:
 285             self.get
 286
 287         if not self.download_dir:
 288             raise ValueError(
 289                 "No download_dir set - invalidly initialised object?")
 290
 291         base_dir = os.getcwd()
 292         try:
 293             os.mkdir(self.download_dir)
 294         except FileExistsError:
 295             logging.info("Target directory {} already exists. Assuming a resume."
 296                          .format(self.download_dir))
 297         logging.info("Downloading {} thing(s).".format(self.total))
 298         for idx, thing in enumerate(self.things):
 299             logging.info("Downloading thing {} - {}".format(idx, thing))
 300             return_code = Thing(thing).download(self.download_dir, self.compress, self.api_key)
 301             if self.quick and return_code == State.ALREADY_DOWNLOADED:
 302                 logging.info("Caught up, stopping.")
 303                 return
 304
 305
 306 class Collection(Grouping):
 307     """ Holds details of a collection. """
 308
 309     def __init__(self, user, name, directory, quick, compress, api_key):
 310         Grouping.__init__(self, quick, compress, api_key)
 311         self.user = user
 312         self.name = name
 313         self.paginated = False
 314         # need to figure out the the ID for the collection
 315         collection_url = API_USER_COLLECTIONS.format(user, api_key)
 316         try:
 317             current_req = SESSION.get(collection_url)
 318         except requests.exceptions.ConnectionError as error:
 319             logging.error("Unable to connect for collections for user {}: {}".format(
 320                 self.user, error))
 321             return
 322         if current_req.status_code != 200:
 323             logging.error(
 324                 "Got unexpected code {} from url {}: {}".format(current_req.status_code, sanitise_url(collection_url),
 325                                                                 current_req.text))
 326             return
 327         collection_list = current_req.json()
 328         try:
 329             # case insensitive to retain parity with previous behaviour
 330             collection = [x for x in collection_list if x['name'].casefold() == name.casefold()][0]
 331         except IndexError:
 332             logging.error("Unable to find collection {} for user {}".format(name, user))
 333             return
 334         self.collection_id = collection['id']
 335         self.url = API_COLLECTION_THINGS.format(self.collection_id, api_key)
 336
 337         self.download_dir = os.path.join(directory,
 338                                          "{}-{}".format(slugify(self.user), slugify(self.name)))
 339
 340
 341 class Designs(Grouping):
 342     """ Holds details of all of a users' designs. """
 343
 344     def __init__(self, user, directory, quick, compress, api_key):
 345         Grouping.__init__(self, quick, compress, api_key)
 346         self.user = user
 347         self.url = API_USER_DESIGNS.format(user, api_key)
 348         self.download_dir = os.path.join(
 349             directory, "{} designs".format(slugify(self.user)))
 350
 351
 352 class Thing:
 353     """ An individual design on thingiverse. """
 354
 355     def __init__(self, thing_link):
 356         self.thing_id = thing_link.thing_id
 357         self.name = thing_link.name
 358         self.last_time = None
 359         self._parsed = False
 360         self._needs_download = True
 361         self.text = None
 362         self.download_dir = None
 363         self.time_stamp = None
 364         self._file_links = FileLinks()
 365         self._image_links = []
 366
 367     @classmethod
 368     def from_thing_id(cls, thing_id):
 369         """
 370         Factory method that looks up a thing by ID and creates a Thing object for it
 371         :param thing_id: to look up
 372         :return: Thing or None
 373         """
 374         return Thing(ThingLink(thing_id, "", ""))
 375
 376
 377     def _parse(self, base_dir, api_key):
 378         """ Work out what, if anything needs to be done. """
 379         if self._parsed:
 380             return
 381
 382         # First get the broad details
 383         url = API_THING_DETAILS.format(self.thing_id, api_key)
 384         try:
 385             current_req = SESSION.get(url)
 386         except requests.exceptions.ConnectionError as error:
 387             logging.error("Unable to connect for thing {}: {}".format(
 388                 self.thing_id, error))
 389             return
 390         # Check for DMCA
 391         if current_req.status_code == 403:
 392             logging.error("Access to thing {} is forbidden".format(self.thing_id))
 393             return
 394         if current_req.status_code != 200:
 395             logging.error("Got unexpected code {} from url {}: {}".format(current_req.status_code, sanitise_url(url),
 396                                                                           current_req.text))
 397             return
 398
 399         thing_json = current_req.json()
 400         try:
 401             self._license = thing_json['license']
 402         except KeyError:
 403             logging.warning("No license found for thing {}?".format(self.thing_id))
 404
 405         details = None
 406         try:
 407             details = thing_json['details']
 408         except KeyError:
 409             logging.warning("No description found for thing {}?".format(self.thing_id))
 410
 411
 412         if details:
 413             try:
 414                 self._details = MLStripper.strip_tags(details)
 415             except ValueError as e:
 416                 logging.warning("Unable to strip HTML from readme: {}".format(e))
 417                 self._details = details
 418
 419
 420         if not self.name:
 421             # Probably generated with factory method.
 422             try:
 423                 self.name = thing_json['name']
 424             except KeyError:
 425                 logging.warning("No name found for thing {}?".format(self.thing_id))
 426                 self.name = self.thing_id
 427
 428         # Now get the file details
 429         file_url = API_THING_FILES.format(self.thing_id, api_key)
 430
 431         try:
 432             current_req = SESSION.get(file_url)
 433         except requests.exceptions.ConnectionError as error:
 434             logging.error("Unable to connect for thing {}: {}".format(
 435                 self.thing_id, error))
 436             return
 437
 438         if current_req.status_code != 200:
 439             logging.error("Unexpected status code {} for {}: {}".format(current_req.status_code, sanitise_url(file_url),
 440                                                                         current_req.text))
 441             return
 442
 443         link_list = current_req.json()
 444
 445         if not link_list:
 446             logging.error("No files found for thing {} - probably thingiverse being broken, try again later".format(
 447                 self.thing_id))
 448
 449         for link in link_list:
 450             logging.debug("Parsing link: {}".format(sanitise_url(link['url'])))
 451             try:
 452                 datestamp = datetime.datetime.strptime(link['date'], DEFAULT_DATETIME_FORMAT)
 453                 self._file_links.append(
 454                     FileLink(link['name'], datestamp, link['url'] + API_THING_DOWNLOAD.format(api_key)))
 455             except ValueError:
 456                 logging.error(link['date'])
 457
 458         # Finally get the image links
 459         image_url = API_THING_IMAGES.format(self.thing_id, api_key)
 460
 461         try:
 462             current_req = SESSION.get(image_url)
 463         except requests.exceptions.ConnectionError as error:
 464             logging.error("Unable to connect for thing {}: {}".format(
 465                 self.thing_id, error))
 466             return
 467
 468         if current_req.status_code != 200:
 469             logging.error(
 470                 "Unexpected status code {} for {}: {}".format(current_req.status_code, sanitise_url(image_url),
 471                                                               current_req.text))
 472             return
 473
 474         image_list = current_req.json()
 475
 476         if not image_list:
 477             logging.warning(
 478                 "No images found for thing {} - probably thingiverse being iffy as this seems unlikely".format(
 479                     self.thing_id))
 480
 481         for image in image_list:
 482             logging.debug("parsing image: {}".format(image))
 483             name = None
 484             try:
 485                 name = slugify(image['name'])
 486                 # TODO: fallback to other types
 487                 url = [x for x in image['sizes'] if x['type'] == 'display' and x['size'] == 'large'][0]['url']
 488             except KeyError:
 489                 logging.warning("Missing image for {}".format(name))
 490             self._image_links.append(ImageLink(name, url))
 491
 492         self.slug = "{} - {}".format(self.thing_id, slugify(self.name))
 493         self.download_dir = os.path.join(base_dir, self.slug)
 494
 495         self._handle_old_directory(base_dir)
 496
 497         logging.debug("Parsing {} ({})".format(self.thing_id, self.name))
 498         latest, self.last_time = self._find_last_download(base_dir)
 499
 500         if not latest:
 501             # Not yet downloaded
 502             self._parsed = True
 503             return
 504
 505         logging.info("last downloaded version: {}".format(self.last_time))
 506
 507         # OK, so we have a timestamp, lets see if there is anything new to get
 508         # First off, are we comparing an old download that threw away the timestamp?
 509         ignore_time = self.last_time == strip_time(self.last_time)
 510         try:
 511             # TODO: Allow for comparison at the exact time
 512             files_last_update = self._file_links.last_update
 513             if ignore_time:
 514                 logging.info("Dropping time from comparison stamp as old-style download dir")
 515                 files_last_update = strip_time(files_last_update)
 516
 517             if files_last_update > self.last_time:
 518                 logging.info(
 519                     "Found new/updated files {}".format(self._file_links.last_update))
 520                 self._needs_download = True
 521                 self._parsed = True
 522                 return
 523         except TypeError:
 524             logging.warning("No files found for {}.".format(self.thing_id))
 525
 526         # Got here, so nope, no new files.
 527         self._needs_download = False
 528         self._parsed = True
 529
 530     def _handle_old_directory(self, base_dir):
 531         """ Deal with any old directories from previous versions of the code.
 532         """
 533         old_dir = os.path.join(base_dir, slugify(self.name))
 534         if os.path.exists(old_dir):
 535             logging.warning("Found old style download_dir. Moving.")
 536             rename_unique(old_dir, self.download_dir)
 537
 538     def _handle_outdated_directory(self):
 539         """ Move the current download directory sideways if the thing has changed.
 540         """
 541         if not os.path.exists(self.download_dir):
 542             # No old directory to move.
 543             return None
 544         timestamp_file = os.path.join(self.download_dir, TIMESTAMP_FILE)
 545         if not os.path.exists(timestamp_file):
 546             # Old form of download directory
 547             target_dir_name = "{} - old".format(self.download_dir)
 548         else:
 549             target_dir_name = "{} - {}".format(self.download_dir, self.last_time.strftime(SAFE_DATETIME_FORMAT))
 550         return rename_unique(self.download_dir, target_dir_name)
 551
 552     def _find_last_download(self, base_dir):
 553         """ Look for the most recent previous download (if any) of the thing.
 554         """
 555         logging.info("Looking for old things")
 556
 557         # First the DL directory itself.
 558         timestamp_file = os.path.join(self.download_dir, TIMESTAMP_FILE)
 559
 560         latest = None
 561         latest_time = None
 562
 563         try:
 564             logging.debug("Checking for existing download in normal place.")
 565             with open(timestamp_file) as ts_fh:
 566                 timestamp_text = ts_fh.read().strip()
 567             latest_time = datetime.datetime.strptime(timestamp_text, DEFAULT_DATETIME_FORMAT)
 568             latest = self.download_dir
 569         except FileNotFoundError:
 570             # No existing download directory. huh.
 571             pass
 572         except TypeError:
 573             logging.warning("Invalid timestamp file found in {}".format(self.download_dir))
 574
 575         # TODO:  Maybe look for old download directories.
 576
 577         # Now look for 7z files
 578         candidates = glob.glob(os.path.join(base_dir, "{}*.7z".format(self.thing_id)))
 579         # +3 to allow for ' - '
 580         leading_length = len(self.slug) + 3
 581         for path in candidates:
 582             candidate = os.path.basename(path)
 583             try:
 584                 logging.debug("Examining '{}' - '{}'".format(candidate, candidate[leading_length:-3]))
 585                 candidate_time = datetime.datetime.strptime(candidate[leading_length:-3], SAFE_DATETIME_FORMAT)
 586             except ValueError:
 587                 logging.warning("There was an error finding the date in {}. Ignoring.".format(candidate))
 588                 continue
 589             try:
 590                 if candidate_time > latest_time:
 591                     latest_time = candidate_time
 592                     latest = candidate
 593             except TypeError:
 594                 latest_time = candidate_time
 595                 latest = candidate
 596         logging.info("Found last old thing: {} / {}".format(latest, latest_time))
 597         return latest, latest_time
 598
 599     def download(self, base_dir, compress, api_key):
 600         """ Download all files for a given thing.
 601             Returns True iff the thing is now downloaded (not iff it downloads the thing!)
 602         """
 603         if not self._parsed:
 604             self._parse(base_dir, api_key)
 605
 606         if not self._parsed:
 607             logging.error(
 608                 "Unable to parse {} - aborting download".format(self.thing_id))
 609             return State.FAILED
 610
 611         if not self._needs_download:
 612             logging.info("{} - {} already downloaded - skipping.".format(self.thing_id, self.name))
 613             return State.ALREADY_DOWNLOADED
 614
 615         if not self._file_links:
 616             logging.error(
 617                 "{} - {} appears to have no files. Thingiverse acting up again?".format(self.thing_id, self.name))
 618             return State.FAILED
 619
 620         # Have we already downloaded some things?
 621         renamed_dir = self._handle_outdated_directory()
 622
 623         # Get the list of files to download
 624
 625         new_file_links = []
 626         old_file_links = []
 627         self.time_stamp = None
 628
 629         if not self.last_time:
 630             # If we don't have anything to copy from, then it is all new.
 631             logging.debug("No last time, downloading all files")
 632             new_file_links = self._file_links
 633             self.time_stamp = new_file_links[0].last_update
 634
 635             for file_link in new_file_links:
 636                 self.time_stamp = max(self.time_stamp, file_link.last_update)
 637             logging.debug("New timestamp will be {}".format(self.time_stamp))
 638         else:
 639             self.time_stamp = self.last_time
 640             for file_link in self._file_links:
 641                 if file_link.last_update > self.last_time:
 642                     new_file_links.append(file_link)
 643                     self.time_stamp = max(self.time_stamp, file_link.last_update)
 644                 else:
 645                     old_file_links.append(file_link)
 646
 647         logging.debug("new timestamp {}".format(self.time_stamp))
 648
 649         # OK. Time to get to work.
 650         logging.debug("Generating download_dir")
 651         os.mkdir(self.download_dir)
 652         filelist_file = os.path.join(self.download_dir, "filelist.txt")
 653         with open(filelist_file, 'w', encoding="utf-8") as fl_handle:
 654             for fl in self._file_links:
 655                 fl_handle.write("{},{},{}\n".format(fl.link, fl.name, fl.last_update))
 656
 657         # First grab the cached files (if any)
 658         logging.info("Copying {} unchanged files.".format(len(old_file_links)))
 659         if renamed_dir:
 660             for file_link in old_file_links:
 661                 try:
 662                     old_file = os.path.join(renamed_dir, file_link.name)
 663                     new_file = truncate_name(os.path.join(self.download_dir, file_link.name))
 664                     logging.debug("Copying {} to {}".format(old_file, new_file))
 665                     copyfile(old_file, new_file)
 666                 except FileNotFoundError:
 667                     logging.warning(
 668                         "Unable to find {} in old archive, redownloading".format(file_link.name))
 669                     new_file_links.append(file_link)
 670                 except TypeError:
 671                     # Not altogether sure how this could occur, possibly with some combination of the old file types
 672                     logging.warning(
 673                         "Typeerror looking for {} in {}".format(file_link.name, renamed_dir))
 674                     new_file_links.append(file_link)
 675
 676         # Now download the new ones
 677         logging.info("Downloading {} new files of {}".format(
 678             len(new_file_links), len(self._file_links)))
 679         try:
 680             for file_link in new_file_links:
 681                 file_name = truncate_name(os.path.join(self.download_dir, file_link.name))
 682                 logging.debug("Downloading {} from {} to {}".format(
 683                     file_link.name, file_link.link, file_name))
 684                 data_req = SESSION.get(file_link.link)
 685                 if data_req.status_code != 200:
 686                     logging.error("Unexpected status code {} for {}: {}".format(data_req.status_code,
 687                                                                                 sanitise_url(file_link.link),
 688                                                                                 data_req.text))
 689                     fail_dir(self.download_dir)
 690                     return State.FAILED
 691
 692                 with open(file_name, 'wb') as handle:
 693                     handle.write(data_req.content)
 694         except Exception as exception:
 695             logging.error("Failed to download {} - {}".format(file_link.name, exception))
 696             fail_dir(self.download_dir)
 697             return State.FAILED
 698
 699         # People like images.
 700         image_dir = os.path.join(self.download_dir, 'images')
 701         logging.info("Downloading {} images.".format(len(self._image_links)))
 702         try:
 703             os.mkdir(image_dir)
 704             for imagelink in self._image_links:
 705                 filename = os.path.join(image_dir, imagelink.name)
 706                 image_req = SESSION.get(imagelink.link)
 707                 if image_req.status_code != 200:
 708                     logging.error("Unexpected status code {} for {}: {}".format(image_req.status_code,
 709                                                                                 sanitise_url(imagelink.link),
 710                                                                                 image_req.text))
 711                     fail_dir(self.download_dir)
 712                     return State.FAILED
 713                 with open(truncate_name(filename), 'wb') as handle:
 714                     handle.write(image_req.content)
 715         except Exception as exception:
 716             logging.error("Failed to download {} - {}".format(imagelink.name, exception))
 717             fail_dir(self.download_dir)
 718             return State.FAILED
 719
 720         # Best get some licenses
 721         logging.info("writing license file")
 722         try:
 723             if self._license:
 724                 with open(truncate_name(os.path.join(self.download_dir, 'license.txt')), 'w',
 725                           encoding="utf-8") as license_handle:
 726                     license_handle.write("{}\n".format(self._license))
 727         except IOError as exception:
 728             logging.warning("Failed to write license! {}".format(exception))
 729
 730         logging.info("writing readme")
 731         try:
 732             if self._details:
 733                 with open(truncate_name(os.path.join(self.download_dir, 'readme.txt')), 'w',
 734                           encoding="utf-8") as readme_handle:
 735                     readme_handle.write("{}\n".format(self._details))
 736         except IOError as exception:
 737             logging.warning("Failed to write readme! {}".format(exception))
 738
 739         try:
 740             # Now write the timestamp
 741             with open(os.path.join(self.download_dir, TIMESTAMP_FILE), 'w', encoding="utf-8") as timestamp_handle:
 742                 timestamp_handle.write(self.time_stamp.__str__())
 743         except Exception as exception:
 744             logging.error("Failed to write timestamp file - {}".format(exception))
 745             fail_dir(self.download_dir)
 746             return State.FAILED
 747         self._needs_download = False
 748         logging.debug("Download of {} finished".format(self.name))
 749         if not compress:
 750             return State.OK
 751
 752         thing_dir = "{} - {} - {}".format(self.thing_id,
 753                                           slugify(self.name),
 754                                           self.time_stamp.strftime(SAFE_DATETIME_FORMAT))
 755         file_name = os.path.join(base_dir,
 756                                  "{}.7z".format(thing_dir))
 757         logging.debug("Compressing {} to {}".format(
 758             self.name,
 759             file_name))
 760         with py7zr.SevenZipFile(file_name, 'w', filters=SEVENZIP_FILTERS) as archive:
 761             archive.writeall(self.download_dir, thing_dir)
 762         logging.debug("Compression of {} finished.".format(self.name))
 763         shutil.rmtree(self.download_dir)
 764         logging.debug("Removed temporary download dir of {}.".format(self.name))
 765         return State.OK
 766
 767
 768 def do_batch(batch_file, download_dir, quick, compress):
 769     """ Read a file in line by line, parsing each as a set of calls to this script."""
 770     with open(batch_file) as handle:
 771         for line in handle:
 772             line = line.strip()
 773             if not line:
 774                 # Skip empty lines
 775                 continue
 776             logging.info("Handling instruction {}".format(line))
 777             command_arr = line.split()
 778             if command_arr[0] == "thing":
 779                 logging.debug(
 780                     "Handling batch thing instruction: {}".format(line))
 781                 Thing.from_thing_id(command_arr[1]).download(download_dir, compress)
 782                 continue
 783             if command_arr[0] == "collection":
 784                 logging.debug(
 785                     "Handling batch collection instruction: {}".format(line))
 786                 Collection(command_arr[1], command_arr[2],
 787                            download_dir, quick, compress).download()
 788                 continue
 789             if command_arr[0] == "user":
 790                 logging.debug(
 791                     "Handling batch collection instruction: {}".format(line))
 792                 Designs(command_arr[1], download_dir, quick, compress).download()
 793                 continue
 794             logging.warning("Unable to parse current instruction. Skipping.")
 795
 796
 797 def main():
 798     """ Entry point for script being run as a command. """
 799     parser = argparse.ArgumentParser()
 800     parser.add_argument("-l", "--log-level", choices=[
 801         'debug', 'info', 'warning'], default='info', help="level of logging desired")
 802     parser.add_argument("-d", "--directory",
 803                         help="Target directory to download into")
 804     parser.add_argument("-f", "--log-file",
 805                         help="Place to log debug information to")
 806     parser.add_argument("-q", "--quick", action="store_true",
 807                         help="Assume date ordering on posts")
 808     parser.add_argument("-c", "--compress", action="store_true",
 809                         help="Compress files")
 810     parser.add_argument("-a", "--api-key",
 811                         help="API key for thingiverse")
 812
 813     subparsers = parser.add_subparsers(
 814         help="Type of thing to download", dest="subcommand")
 815     collection_parser = subparsers.add_parser(
 816         'collection', help="Download one or more entire collection(s)")
 817     collection_parser.add_argument(
 818         "owner", help="The owner of the collection(s) to get")
 819     collection_parser.add_argument(
 820         "collections", nargs="+", help="Space seperated list of the name(s) of collection to get")
 821     thing_parser = subparsers.add_parser(
 822         'thing', help="Download a single thing.")
 823     thing_parser.add_argument(
 824         "things", nargs="*", help="Space seperated list of thing ID(s) to download")
 825     user_parser = subparsers.add_parser(
 826         "user", help="Download all things by one or more users")
 827     user_parser.add_argument(
 828         "users", nargs="+", help="A space seperated list of the user(s) to get the designs of")
 829     batch_parser = subparsers.add_parser(
 830         "batch", help="Perform multiple actions written in a text file")
 831     batch_parser.add_argument(
 832         "batch_file", help="The name of the file to read.")
 833     subparsers.add_parser("version", help="Show the current version")
 834
 835     args = parser.parse_args()
 836     if not args.subcommand:
 837         parser.print_help()
 838         sys.exit(1)
 839     if not args.directory:
 840         args.directory = os.getcwd()
 841
 842     logger = logging.getLogger()
 843     formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
 844     logger.setLevel(logging.DEBUG)
 845     console_handler = logging.StreamHandler()
 846     console_handler.setLevel(args.log_level.upper())
 847
 848
 849     if args.api_key:
 850         api_key = args.api_key
 851     else:
 852         try:
 853             with open("api.key") as fh:
 854                 api_key = fh.read().strip()
 855         except Exception as e:
 856             logging.error("Either specify the api-key on the command line or in a file called 'api.key'")
 857             logging.error("Exception: {}".format(e))
 858             return
 859
 860     logger.addHandler(console_handler)
 861     if args.log_file:
 862         file_handler = logging.FileHandler(args.log_file)
 863         file_handler.setLevel(logging.DEBUG)
 864         file_handler.setFormatter(formatter)
 865         logger.addHandler(file_handler)
 866
 867     # Start downloader
 868     thing_queue = multiprocessing.JoinableQueue()
 869     logging.debug("starting {} downloader(s)".format(DOWNLOADER_COUNT))
 870     downloaders = [Downloader(thing_queue, args.directory, args.compress, api_key) for _ in range(DOWNLOADER_COUNT)]
 871     for downloader in downloaders:
 872         downloader.start()
 873
 874     if args.subcommand.startswith("collection"):
 875         for collection in args.collections:
 876             Collection(args.owner, collection, args.directory, args.quick, args.compress, api_key).download()
 877     if args.subcommand == "thing":
 878         for thing in args.things:
 879             thing_queue.put(thing)
 880     if args.subcommand == "user":
 881         for user in args.users:
 882             Designs(user, args.directory, args.quick, args.compress, api_key).download()
 883     if args.subcommand == "version":
 884         print("thingy_grabber.py version {}".format(VERSION))
 885     if args.subcommand == "batch":
 886         do_batch(args.batch_file, args.directory, args.quick, args.compress)
 887
 888     # Stop the downloader processes
 889     for _ in downloaders:
 890         thing_queue.put(None)
 891
 892
 893 if __name__ == "__main__":
 894     multiprocessing.freeze_support()
 895     main()