thingy_grabber.py

   1 #!/usr/bin/env python3
   2 """
   3 Thingiverse bulk downloader
   4 """
   5
   6 import re
   7 import sys
   8 import os
   9 import argparse
  10 import unicodedata
  11 import requests
  12 import logging
  13 import multiprocessing
  14 import enum
  15 import datetime
  16 from shutil import copyfile
  17 from dataclasses import dataclass
  18 import py7zr
  19 import glob
  20 import shutil
  21 from io import StringIO
  22 from html.parser import HTMLParser
  23
  24 SEVENZIP_FILTERS = [{'id': py7zr.FILTER_LZMA2}]
  25
  26 # I don't think this is exported by datetime
  27 DEFAULT_DATETIME_FORMAT = '%Y-%m-%d %H:%M:%S'
  28 # Windows cannot handle : in filenames
  29 SAFE_DATETIME_FORMAT = '%Y-%m-%d %H.%M.%S'
  30
  31 API_BASE = "https://api.thingiverse.com"
  32 ACCESS_QP = "access_token={}"
  33 PAGE_QP = "page={}"
  34 API_USER_DESIGNS = API_BASE + "/users/{}/things/?" + ACCESS_QP
  35 API_USER_COLLECTIONS = API_BASE + "/users/{}/collections/all?" + ACCESS_QP
  36
  37 # Currently useless as it gives the same info as the matching element in API_USER_COLLECTIONS
  38 API_COLLECTION = API_BASE + "/collections/{}/?" + ACCESS_QP
  39 API_COLLECTION_THINGS = API_BASE + "/collections/{}/things/?" + ACCESS_QP
  40
  41 API_THING_DETAILS = API_BASE + "/things/{}/?" + ACCESS_QP
  42 API_THING_FILES = API_BASE + "/things/{}/files/?" + ACCESS_QP
  43 API_THING_IMAGES = API_BASE + "/things/{}/images/?" + ACCESS_QP
  44 API_THING_DOWNLOAD = "/download/?" + ACCESS_QP
  45
  46 DOWNLOADER_COUNT = 1
  47 RETRY_COUNT = 3
  48
  49 MAX_PATH_LENGTH = 250
  50
  51 VERSION = "0.10.4"
  52
  53 TIMESTAMP_FILE = "timestamp.txt"
  54
  55 SESSION = requests.Session()
  56
  57
  58 class MLStripper(HTMLParser):
  59     """ Turns HTML markup into plain text
  60     """
  61
  62     def error(self, message):
  63         raise ValueError(message)
  64
  65     def __init__(self):
  66         super().__init__()
  67         self.reset()
  68         self.strict = False
  69         self.convert_charrefs = True
  70         self.text = StringIO()
  71
  72     def handle_data(self, d):
  73         self.text.write(d)
  74
  75     def get_data(self):
  76         return self.text.getvalue()
  77
  78     @staticmethod
  79     def strip_tags(html):
  80         s = MLStripper()
  81         s.feed(html)
  82         return s.get_data()
  83
  84
  85 @dataclass
  86 class ThingLink:
  87     thing_id: str
  88     name: str
  89     api_link: str
  90
  91
  92 @dataclass
  93 class FileLink:
  94     name: str
  95     last_update: datetime.datetime
  96     link: str
  97
  98
  99 @dataclass
 100 class ImageLink:
 101     name: str
 102     link: str
 103
 104
 105 class FileLinks:
 106     def __init__(self, initial_links=None):
 107         if initial_links is None:
 108             initial_links = []
 109         self.links = []
 110         self.last_update = None
 111         for link in initial_links:
 112             self.append(link)
 113
 114     def __iter__(self):
 115         return iter(self.links)
 116
 117     def __getitem__(self, item):
 118         return self.links[item]
 119
 120     def __len__(self):
 121         return len(self.links)
 122
 123     def append(self, link):
 124         try:
 125             self.last_update = max(self.last_update, link.last_update)
 126         except TypeError:
 127             self.last_update = link.last_update
 128         self.links.append(link)
 129
 130
 131 class State(enum.Enum):
 132     OK = enum.auto()
 133     FAILED = enum.auto()
 134     ALREADY_DOWNLOADED = enum.auto()
 135
 136
 137 def sanitise_url(url):
 138     """ remove api keys from an url
 139     """
 140     return re.sub(r'access_token=\w*',
 141                   'access_token=***',
 142                   url)
 143
 144
 145 def strip_time(date_obj):
 146     """ Takes a datetime object and returns another with the time set to 00:00
 147     """
 148     return datetime.datetime.combine(date_obj.date(), datetime.time())
 149
 150
 151 def rename_unique(dir_name, target_dir_name):
 152     """ Move a directory sideways to a new name, ensuring it is unique.
 153     """
 154     target_dir = target_dir_name
 155     inc = 0
 156     while os.path.exists(target_dir):
 157         target_dir = "{}_{}".format(target_dir_name, inc)
 158         inc += 1
 159     os.rename(dir_name, target_dir)
 160     return target_dir
 161
 162
 163 def fail_dir(dir_name):
 164     """ When a download has failed, move it sideways.
 165     """
 166     return rename_unique(dir_name, "{}_failed".format(dir_name))
 167
 168
 169 def truncate_name(file_name):
 170     """ Ensure the filename is not too long for, well windows basically.
 171     """
 172     path = os.path.abspath(file_name)
 173     if len(path) <= MAX_PATH_LENGTH:
 174         return path
 175     base, extension = os.path.splitext(path)
 176     inc = 0
 177     new_path = "{}_{}{}".format(base, inc, extension)
 178     while os.path.exists(new_path):
 179         new_path = "{}_{}{}".format(base, inc, extension)
 180         inc += 1
 181     return new_path
 182
 183
 184 def slugify(value):
 185     """
 186     Normalise string, removes invalid for filename charactersr
 187     and converts string to lowercase.
 188     """
 189     logging.debug("Sluggyfying {}".format(value))
 190     value = unicodedata.normalize('NFKC', value).lower().strip()
 191     value = re.sub(r'[\\/<>:?*|"]', '', value)
 192     value = re.sub(r'\.*$', '', value)
 193     return value.strip()
 194
 195
 196 class Downloader(multiprocessing.Process):
 197     """
 198     Class to handle downloading the things we have found to get.
 199     """
 200
 201     def __init__(self, thing_queue, download_directory, compress, api_key):
 202         multiprocessing.Process.__init__(self)
 203         # TODO: add parameters
 204         self.thing_queue = thing_queue
 205         self.download_directory = download_directory
 206         self.compress = compress
 207         self.api_key = api_key
 208
 209     def run(self):
 210         """ actual download loop.
 211         """
 212         while True:
 213             thing_id = self.thing_queue.get()
 214             if thing_id is None:
 215                 logging.info("Shutting download queue")
 216                 self.thing_queue.task_done()
 217                 break
 218             thing = None
 219             if isinstance(thing_id, str):
 220                 thing = Thing.from_thing_id(thing_id)
 221             if isinstance(thing_id, ThingLink):
 222                 thing = Thing(thing_id)
 223             if not thing:
 224                 logging.error("Don't know how to handle thing_id {}".format(thing_id))
 225             else:
 226                 logging.info("Handling id {}".format(thing_id))
 227                 thing.download(self.download_directory, self.compress, self.api_key)
 228             self.thing_queue.task_done()
 229         return
 230
 231
 232 class Grouping:
 233     """ Holds details of a group of things for download
 234         This is effectively (although not actually) an abstract class
 235         - use Collection or Designs instead.
 236     """
 237
 238     def __init__(self, quick, compress, api_key):
 239         self.things = []
 240         self.total = 0
 241         self.req_id = None
 242         self.last_page = 0
 243         self.per_page = None
 244         # Should we stop downloading when we hit a known datestamp?
 245         self.quick = quick
 246         self.compress = compress
 247         self.api_key = api_key
 248         # These should be set by child classes.
 249         self.url = None
 250         self.download_dir = None
 251
 252     @property
 253     def get(self):
 254         """ retrieve the things of the grouping. """
 255         if self.things:
 256             # We've already done it.
 257             return self.things
 258
 259         # Check for initialisation:
 260         if not self.url:
 261             logging.error("No URL set - object not initialised properly?")
 262             raise ValueError("No URL set - object not initialised properly?")
 263
 264         # Get the internal details of the grouping.
 265         logging.debug("Querying {}".format(sanitise_url(self.url)))
 266
 267         # self.url should already have been formatted as we don't need pagination
 268         logging.info("requesting:{}".format(sanitise_url(self.url)))
 269         current_req = SESSION.get(self.url)
 270         if current_req.status_code != 200:
 271             logging.error(
 272                 "Got unexpected code {} from url {}: {}".format(current_req.status_code, sanitise_url(self.url),
 273                                                                 current_req.text))
 274         else:
 275             current_json = current_req.json()
 276             for thing in current_json:
 277                 logging.info(thing)
 278                 self.things.append(ThingLink(thing['id'], thing['name'], thing['url']))
 279         logging.info("Found {} things.".format(len(self.things)))
 280         return self.things
 281
 282     def download(self):
 283         """ Downloads all the files in a collection """
 284         if not self.things:
 285             self.get
 286
 287         if not self.download_dir:
 288             raise ValueError(
 289                 "No download_dir set - invalidly initialised object?")
 290
 291         try:
 292             os.mkdir(self.download_dir)
 293         except FileExistsError:
 294             logging.info("Target directory {} already exists. Assuming a resume."
 295                          .format(self.download_dir))
 296         logging.info("Downloading {} thing(s).".format(self.total))
 297         for idx, thing in enumerate(self.things):
 298             logging.info("Downloading thing {} - {}".format(idx, thing))
 299             return_code = Thing(thing).download(self.download_dir, self.compress, self.api_key)
 300             if self.quick and return_code == State.ALREADY_DOWNLOADED:
 301                 logging.info("Caught up, stopping.")
 302                 return
 303
 304
 305 class Collection(Grouping):
 306     """ Holds details of a collection. """
 307
 308     def __init__(self, user, name, directory, quick, compress, api_key):
 309         Grouping.__init__(self, quick, compress, api_key)
 310         self.user = user
 311         self.name = name
 312         self.paginated = False
 313         # need to figure out the the ID for the collection
 314         collection_url = API_USER_COLLECTIONS.format(user, api_key)
 315         try:
 316             current_req = SESSION.get(collection_url)
 317         except requests.exceptions.ConnectionError as error:
 318             logging.error("Unable to connect for collections for user {}: {}".format(
 319                 self.user, error))
 320             return
 321         if current_req.status_code != 200:
 322             logging.error(
 323                 "Got unexpected code {} from url {}: {}".format(current_req.status_code, sanitise_url(collection_url),
 324                                                                 current_req.text))
 325             return
 326         collection_list = current_req.json()
 327         try:
 328             # case insensitive to retain parity with previous behaviour
 329             collection = [x for x in collection_list if x['name'].casefold() == name.casefold()][0]
 330         except IndexError:
 331             logging.error("Unable to find collection {} for user {}".format(name, user))
 332             return
 333         self.collection_id = collection['id']
 334         self.url = API_COLLECTION_THINGS.format(self.collection_id, api_key)
 335
 336         self.download_dir = os.path.join(directory,
 337                                          "{}-{}".format(slugify(self.user), slugify(self.name)))
 338
 339
 340 class Designs(Grouping):
 341     """ Holds details of all of a users' designs. """
 342
 343     def __init__(self, user, directory, quick, compress, api_key):
 344         Grouping.__init__(self, quick, compress, api_key)
 345         self.user = user
 346         self.url = API_USER_DESIGNS.format(user, api_key)
 347         self.download_dir = os.path.join(
 348             directory, "{} designs".format(slugify(self.user)))
 349
 350
 351 class Thing:
 352     """ An individual design on thingiverse. """
 353
 354     def __init__(self, thing_link):
 355         self.thing_id = thing_link.thing_id
 356         self.name = thing_link.name
 357         self.last_time = None
 358         self._parsed = False
 359         self._needs_download = True
 360         self.text = None
 361         self.download_dir = None
 362         self.time_stamp = None
 363         self._file_links = FileLinks()
 364         self._image_links = []
 365
 366     @classmethod
 367     def from_thing_id(cls, thing_id):
 368         """
 369         Factory method that looks up a thing by ID and creates a Thing object for it
 370         :param thing_id: to look up
 371         :return: Thing or None
 372         """
 373         return Thing(ThingLink(thing_id, "", ""))
 374
 375     def _parse(self, base_dir, api_key):
 376         """ Work out what, if anything needs to be done. """
 377         if self._parsed:
 378             return
 379
 380         # First get the broad details
 381         url = API_THING_DETAILS.format(self.thing_id, api_key)
 382         try:
 383             current_req = SESSION.get(url)
 384         except requests.exceptions.ConnectionError as error:
 385             logging.error("Unable to connect for thing {}: {}".format(
 386                 self.thing_id, error))
 387             return
 388         # Check for DMCA
 389         if current_req.status_code == 403:
 390             logging.error("Access to thing {} is forbidden".format(self.thing_id))
 391             return
 392         if current_req.status_code != 200:
 393             logging.error("Got unexpected code {} from url {}: {}".format(current_req.status_code, sanitise_url(url),
 394                                                                           current_req.text))
 395             return
 396
 397         thing_json = current_req.json()
 398         try:
 399             self._license = thing_json['license']
 400         except KeyError:
 401             logging.warning("No license found for thing {}?".format(self.thing_id))
 402
 403         details = None
 404         try:
 405             details = thing_json['details']
 406         except KeyError:
 407             logging.warning("No description found for thing {}?".format(self.thing_id))
 408
 409         if details:
 410             try:
 411                 self._details = MLStripper.strip_tags(details)
 412             except ValueError as e:
 413                 logging.warning("Unable to strip HTML from readme: {}".format(e))
 414                 self._details = details
 415
 416         if not self.name:
 417             # Probably generated with factory method.
 418             try:
 419                 self.name = thing_json['name']
 420             except KeyError:
 421                 logging.warning("No name found for thing {}?".format(self.thing_id))
 422                 self.name = self.thing_id
 423
 424         # Now get the file details
 425         file_url = API_THING_FILES.format(self.thing_id, api_key)
 426
 427         try:
 428             current_req = SESSION.get(file_url)
 429         except requests.exceptions.ConnectionError as error:
 430             logging.error("Unable to connect for thing {}: {}".format(
 431                 self.thing_id, error))
 432             return
 433
 434         if current_req.status_code != 200:
 435             logging.error("Unexpected status code {} for {}: {}".format(current_req.status_code, sanitise_url(file_url),
 436                                                                         current_req.text))
 437             return
 438
 439         link_list = current_req.json()
 440
 441         if not link_list:
 442             logging.error("No files found for thing {} - probably thingiverse being broken, try again later".format(
 443                 self.thing_id))
 444
 445         for link in link_list:
 446             logging.debug("Parsing link: {}".format(sanitise_url(link['url'])))
 447             try:
 448                 datestamp = datetime.datetime.strptime(link['date'], DEFAULT_DATETIME_FORMAT)
 449                 self._file_links.append(
 450                     FileLink(link['name'], datestamp, link['url'] + API_THING_DOWNLOAD.format(api_key)))
 451             except ValueError:
 452                 logging.error(link['date'])
 453
 454         # Finally get the image links
 455         image_url = API_THING_IMAGES.format(self.thing_id, api_key)
 456
 457         try:
 458             current_req = SESSION.get(image_url)
 459         except requests.exceptions.ConnectionError as error:
 460             logging.error("Unable to connect for thing {}: {}".format(
 461                 self.thing_id, error))
 462             return
 463
 464         if current_req.status_code != 200:
 465             logging.error(
 466                 "Unexpected status code {} for {}: {}".format(current_req.status_code, sanitise_url(image_url),
 467                                                               current_req.text))
 468             return
 469
 470         image_list = current_req.json()
 471
 472         if not image_list:
 473             logging.warning(
 474                 "No images found for thing {} - probably thingiverse being iffy as this seems unlikely".format(
 475                     self.thing_id))
 476
 477         for image in image_list:
 478             logging.debug("parsing image: {}".format(image))
 479             name = None
 480             try:
 481                 name = slugify(image['name'])
 482                 # TODO: fallback to other types
 483                 url = [x for x in image['sizes'] if x['type'] == 'display' and x['size'] == 'large'][0]['url']
 484             except KeyError:
 485                 logging.warning("Missing image for {}".format(name))
 486             self._image_links.append(ImageLink(name, url))
 487
 488         self.slug = "{} - {}".format(self.thing_id, slugify(self.name))
 489         self.download_dir = os.path.join(base_dir, self.slug)
 490
 491         self._handle_old_directory(base_dir)
 492
 493         logging.debug("Parsing {} ({})".format(self.thing_id, self.name))
 494         latest, self.last_time = self._find_last_download(base_dir)
 495
 496         if not latest:
 497             # Not yet downloaded
 498             self._parsed = True
 499             return
 500
 501         logging.info("last downloaded version: {}".format(self.last_time))
 502
 503         # OK, so we have a timestamp, lets see if there is anything new to get
 504         # First off, are we comparing an old download that threw away the timestamp?
 505         ignore_time = self.last_time == strip_time(self.last_time)
 506         try:
 507             # TODO: Allow for comparison at the exact time
 508             files_last_update = self._file_links.last_update
 509             if ignore_time:
 510                 logging.info("Dropping time from comparison stamp as old-style download dir")
 511                 files_last_update = strip_time(files_last_update)
 512
 513             if files_last_update > self.last_time:
 514                 logging.info(
 515                     "Found new/updated files {}".format(self._file_links.last_update))
 516                 self._needs_download = True
 517                 self._parsed = True
 518                 return
 519         except TypeError:
 520             logging.warning("No files found for {}.".format(self.thing_id))
 521
 522         # Got here, so nope, no new files.
 523         self._needs_download = False
 524         self._parsed = True
 525
 526     def _handle_old_directory(self, base_dir):
 527         """ Deal with any old directories from previous versions of the code.
 528         """
 529         old_dir = os.path.join(base_dir, slugify(self.name))
 530         if os.path.exists(old_dir):
 531             logging.warning("Found old style download_dir. Moving.")
 532             rename_unique(old_dir, self.download_dir)
 533
 534     def _handle_outdated_directory(self):
 535         """ Move the current download directory sideways if the thing has changed.
 536         """
 537         if not os.path.exists(self.download_dir):
 538             # No old directory to move.
 539             return None
 540         timestamp_file = os.path.join(self.download_dir, TIMESTAMP_FILE)
 541         if not os.path.exists(timestamp_file):
 542             # Old form of download directory
 543             target_dir_name = "{} - old".format(self.download_dir)
 544         else:
 545             target_dir_name = "{} - {}".format(self.download_dir, self.last_time.strftime(SAFE_DATETIME_FORMAT))
 546         return rename_unique(self.download_dir, target_dir_name)
 547
 548     def _find_last_download(self, base_dir):
 549         """ Look for the most recent previous download (if any) of the thing.
 550         """
 551         logging.info("Looking for old things")
 552
 553         # First the DL directory itself.
 554         timestamp_file = os.path.join(self.download_dir, TIMESTAMP_FILE)
 555
 556         latest = None
 557         latest_time = None
 558
 559         try:
 560             logging.debug("Checking for existing download in normal place.")
 561             with open(timestamp_file) as ts_fh:
 562                 timestamp_text = ts_fh.read().strip()
 563             latest_time = datetime.datetime.strptime(timestamp_text, DEFAULT_DATETIME_FORMAT)
 564             latest = self.download_dir
 565         except FileNotFoundError:
 566             # No existing download directory. huh.
 567             pass
 568         except TypeError:
 569             logging.warning("Invalid timestamp file found in {}".format(self.download_dir))
 570
 571         # TODO:  Maybe look for old download directories.
 572
 573         # Now look for 7z files
 574         candidates = glob.glob(os.path.join(base_dir, "{}*.7z".format(self.thing_id)))
 575         # +3 to allow for ' - '
 576         leading_length = len(self.slug) + 3
 577         for path in candidates:
 578             candidate = os.path.basename(path)
 579             try:
 580                 logging.debug("Examining '{}' - '{}'".format(candidate, candidate[leading_length:-3]))
 581                 candidate_time = datetime.datetime.strptime(candidate[leading_length:-3], SAFE_DATETIME_FORMAT)
 582             except ValueError:
 583                 logging.warning("There was an error finding the date in {}. Ignoring.".format(candidate))
 584                 continue
 585             try:
 586                 if candidate_time > latest_time:
 587                     latest_time = candidate_time
 588                     latest = candidate
 589             except TypeError:
 590                 latest_time = candidate_time
 591                 latest = candidate
 592         logging.info("Found last old thing: {} / {}".format(latest, latest_time))
 593         return latest, latest_time
 594
 595     def download(self, base_dir, compress, api_key):
 596         """ Download all files for a given thing.
 597             Returns True iff the thing is now downloaded (not iff it downloads the thing!)
 598         """
 599         if not self._parsed:
 600             self._parse(base_dir, api_key)
 601
 602         if not self._parsed:
 603             logging.error(
 604                 "Unable to parse {} - aborting download".format(self.thing_id))
 605             return State.FAILED
 606
 607         if not self._needs_download:
 608             logging.info("{} - {} already downloaded - skipping.".format(self.thing_id, self.name))
 609             return State.ALREADY_DOWNLOADED
 610
 611         if not self._file_links:
 612             logging.error(
 613                 "{} - {} appears to have no files. Thingiverse acting up again?".format(self.thing_id, self.name))
 614             return State.FAILED
 615
 616         # Have we already downloaded some things?
 617         renamed_dir = self._handle_outdated_directory()
 618
 619         # Get the list of files to download
 620
 621         new_file_links = []
 622         old_file_links = []
 623         self.time_stamp = None
 624
 625         if not self.last_time:
 626             # If we don't have anything to copy from, then it is all new.
 627             logging.debug("No last time, downloading all files")
 628             new_file_links = self._file_links
 629             self.time_stamp = new_file_links[0].last_update
 630
 631             for file_link in new_file_links:
 632                 self.time_stamp = max(self.time_stamp, file_link.last_update)
 633             logging.debug("New timestamp will be {}".format(self.time_stamp))
 634         else:
 635             self.time_stamp = self.last_time
 636             for file_link in self._file_links:
 637                 if file_link.last_update > self.last_time:
 638                     new_file_links.append(file_link)
 639                     self.time_stamp = max(self.time_stamp, file_link.last_update)
 640                 else:
 641                     old_file_links.append(file_link)
 642
 643         logging.debug("new timestamp {}".format(self.time_stamp))
 644
 645         # OK. Time to get to work.
 646         logging.debug("Generating download_dir")
 647         os.mkdir(self.download_dir)
 648         filelist_file = os.path.join(self.download_dir, "filelist.txt")
 649         with open(filelist_file, 'w', encoding="utf-8") as fl_handle:
 650             for fl in self._file_links:
 651                 fl_handle.write("{},{},{}\n".format(fl.link, fl.name, fl.last_update))
 652
 653         # First grab the cached files (if any)
 654         logging.info("Copying {} unchanged files.".format(len(old_file_links)))
 655         if renamed_dir:
 656             for file_link in old_file_links:
 657                 try:
 658                     old_file = os.path.join(renamed_dir, file_link.name)
 659                     new_file = truncate_name(os.path.join(self.download_dir, file_link.name))
 660                     logging.debug("Copying {} to {}".format(old_file, new_file))
 661                     copyfile(old_file, new_file)
 662                 except FileNotFoundError:
 663                     logging.warning(
 664                         "Unable to find {} in old archive, redownloading".format(file_link.name))
 665                     new_file_links.append(file_link)
 666                 except TypeError:
 667                     # Not altogether sure how this could occur, possibly with some combination of the old file types
 668                     logging.warning(
 669                         "Typeerror looking for {} in {}".format(file_link.name, renamed_dir))
 670                     new_file_links.append(file_link)
 671
 672         # Now download the new ones
 673         logging.info("Downloading {} new files of {}".format(
 674             len(new_file_links), len(self._file_links)))
 675         try:
 676             for file_link in new_file_links:
 677                 file_name = truncate_name(os.path.join(self.download_dir, file_link.name))
 678                 logging.debug("Downloading {} from {} to {}".format(
 679                     file_link.name, file_link.link, file_name))
 680                 data_req = SESSION.get(file_link.link)
 681                 if data_req.status_code != 200:
 682                     logging.error("Unexpected status code {} for {}: {}".format(data_req.status_code,
 683                                                                                 sanitise_url(file_link.link),
 684                                                                                 data_req.text))
 685                     fail_dir(self.download_dir)
 686                     return State.FAILED
 687
 688                 with open(file_name, 'wb') as handle:
 689                     handle.write(data_req.content)
 690         except Exception as exception:
 691             logging.error("Failed to download {} - {}".format(file_link.name, exception))
 692             fail_dir(self.download_dir)
 693             return State.FAILED
 694
 695         # People like images.
 696         image_dir = os.path.join(self.download_dir, 'images')
 697         logging.info("Downloading {} images.".format(len(self._image_links)))
 698         try:
 699             os.mkdir(image_dir)
 700             for imagelink in self._image_links:
 701                 filename = os.path.join(image_dir, imagelink.name)
 702                 image_req = SESSION.get(imagelink.link)
 703                 if image_req.status_code != 200:
 704                     logging.error("Unexpected status code {} for {}: {}".format(image_req.status_code,
 705                                                                                 sanitise_url(imagelink.link),
 706                                                                                 image_req.text))
 707                     fail_dir(self.download_dir)
 708                     return State.FAILED
 709                 with open(truncate_name(filename), 'wb') as handle:
 710                     handle.write(image_req.content)
 711         except Exception as exception:
 712             logging.error("Failed to download {} - {}".format(imagelink.name, exception))
 713             fail_dir(self.download_dir)
 714             return State.FAILED
 715
 716         # Best get some licenses
 717         logging.info("writing license file")
 718         try:
 719             if self._license:
 720                 with open(truncate_name(os.path.join(self.download_dir, 'license.txt')), 'w',
 721                           encoding="utf-8") as license_handle:
 722                     license_handle.write("{}\n".format(self._license))
 723         except IOError as exception:
 724             logging.warning("Failed to write license! {}".format(exception))
 725
 726         logging.info("writing readme")
 727         try:
 728             if self._details:
 729                 with open(truncate_name(os.path.join(self.download_dir, 'readme.txt')), 'w',
 730                           encoding="utf-8") as readme_handle:
 731                     readme_handle.write("{}\n".format(self._details))
 732         except IOError as exception:
 733             logging.warning("Failed to write readme! {}".format(exception))
 734
 735         try:
 736             # Now write the timestamp
 737             with open(os.path.join(self.download_dir, TIMESTAMP_FILE), 'w', encoding="utf-8") as timestamp_handle:
 738                 timestamp_handle.write(self.time_stamp.__str__())
 739         except Exception as exception:
 740             logging.error("Failed to write timestamp file - {}".format(exception))
 741             fail_dir(self.download_dir)
 742             return State.FAILED
 743         self._needs_download = False
 744         logging.debug("Download of {} finished".format(self.name))
 745         if not compress:
 746             return State.OK
 747
 748         thing_dir = "{} - {} - {}".format(self.thing_id,
 749                                           slugify(self.name),
 750                                           self.time_stamp.strftime(SAFE_DATETIME_FORMAT))
 751         file_name = os.path.join(base_dir,
 752                                  "{}.7z".format(thing_dir))
 753         logging.debug("Compressing {} to {}".format(
 754             self.name,
 755             file_name))
 756         with py7zr.SevenZipFile(file_name, 'w', filters=SEVENZIP_FILTERS) as archive:
 757             archive.writeall(self.download_dir, thing_dir)
 758         logging.debug("Compression of {} finished.".format(self.name))
 759         shutil.rmtree(self.download_dir)
 760         logging.debug("Removed temporary download dir of {}.".format(self.name))
 761         return State.OK
 762
 763
 764 def do_batch(batch_file, download_dir, quick, compress, api_key):
 765     """ Read a file in line by line, parsing each as a set of calls to this script."""
 766     with open(batch_file) as handle:
 767         for line in handle:
 768             line = line.strip()
 769             if not line:
 770                 # Skip empty lines
 771                 continue
 772             logging.info("Handling instruction {}".format(line))
 773             command_arr = line.split()
 774             if command_arr[0] == "thing":
 775                 logging.debug(
 776                     "Handling batch thing instruction: {}".format(line))
 777                 Thing.from_thing_id(command_arr[1]).download(download_dir, compress, api_key)
 778                 continue
 779             if command_arr[0] == "collection":
 780                 logging.debug(
 781                     "Handling batch collection instruction: {}".format(line))
 782                 Collection(command_arr[1], command_arr[2],
 783                            download_dir, quick, compress, api_key).download()
 784                 continue
 785             if command_arr[0] == "user":
 786                 logging.debug(
 787                     "Handling batch collection instruction: {}".format(line))
 788                 Designs(command_arr[1], download_dir, quick, compress, api_key).download()
 789                 continue
 790             logging.warning("Unable to parse current instruction. Skipping.")
 791
 792
 793 def main():
 794     """ Entry point for script being run as a command. """
 795     parser = argparse.ArgumentParser()
 796     parser.add_argument("-l", "--log-level", choices=[
 797         'debug', 'info', 'warning'], default='info', help="level of logging desired")
 798     parser.add_argument("-d", "--directory",
 799                         help="Target directory to download into")
 800     parser.add_argument("-f", "--log-file",
 801                         help="Place to log debug information to")
 802     parser.add_argument("-q", "--quick", action="store_true",
 803                         help="Assume date ordering on posts")
 804     parser.add_argument("-c", "--compress", action="store_true",
 805                         help="Compress files")
 806     parser.add_argument("-a", "--api-key",
 807                         help="API key for thingiverse")
 808
 809     subparsers = parser.add_subparsers(
 810         help="Type of thing to download", dest="subcommand")
 811     collection_parser = subparsers.add_parser(
 812         'collection', help="Download one or more entire collection(s)")
 813     collection_parser.add_argument(
 814         "owner", help="The owner of the collection(s) to get")
 815     collection_parser.add_argument(
 816         "collections", nargs="+", help="Space seperated list of the name(s) of collection to get")
 817     thing_parser = subparsers.add_parser(
 818         'thing', help="Download a single thing.")
 819     thing_parser.add_argument(
 820         "things", nargs="*", help="Space seperated list of thing ID(s) to download")
 821     user_parser = subparsers.add_parser(
 822         "user", help="Download all things by one or more users")
 823     user_parser.add_argument(
 824         "users", nargs="+", help="A space seperated list of the user(s) to get the designs of")
 825     batch_parser = subparsers.add_parser(
 826         "batch", help="Perform multiple actions written in a text file")
 827     batch_parser.add_argument(
 828         "batch_file", help="The name of the file to read.")
 829     subparsers.add_parser("version", help="Show the current version")
 830
 831     args = parser.parse_args()
 832     if not args.subcommand:
 833         parser.print_help()
 834         sys.exit(1)
 835     if not args.directory:
 836         args.directory = os.getcwd()
 837
 838     logger = logging.getLogger()
 839     formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
 840     logger.setLevel(logging.DEBUG)
 841     console_handler = logging.StreamHandler()
 842     console_handler.setLevel(args.log_level.upper())
 843
 844     if args.api_key:
 845         api_key = args.api_key
 846     else:
 847         try:
 848             with open("api.key") as fh:
 849                 api_key = fh.read().strip()
 850         except Exception as e:
 851             logging.error("Either specify the api-key on the command line or in a file called 'api.key'")
 852             logging.error("Exception: {}".format(e))
 853             return
 854
 855     logger.addHandler(console_handler)
 856     if args.log_file:
 857         file_handler = logging.FileHandler(args.log_file)
 858         file_handler.setLevel(logging.DEBUG)
 859         file_handler.setFormatter(formatter)
 860         logger.addHandler(file_handler)
 861
 862     # Start downloader
 863     thing_queue = multiprocessing.JoinableQueue()
 864     logging.debug("starting {} downloader(s)".format(DOWNLOADER_COUNT))
 865     downloaders = [Downloader(thing_queue, args.directory, args.compress, api_key) for _ in range(DOWNLOADER_COUNT)]
 866     for downloader in downloaders:
 867         downloader.start()
 868
 869     if args.subcommand.startswith("collection"):
 870         for collection in args.collections:
 871             Collection(args.owner, collection, args.directory, args.quick, args.compress, api_key).download()
 872     if args.subcommand == "thing":
 873         for thing in args.things:
 874             thing_queue.put(thing)
 875     if args.subcommand == "user":
 876         for user in args.users:
 877             Designs(user, args.directory, args.quick, args.compress, api_key).download()
 878     if args.subcommand == "version":
 879         print("thingy_grabber.py version {}".format(VERSION))
 880     if args.subcommand == "batch":
 881         do_batch(args.batch_file, args.directory, args.quick, args.compress, api_key)
 882
 883     # Stop the downloader processes
 884     for _ in downloaders:
 885         thing_queue.put(None)
 886
 887
 888 if __name__ == "__main__":
 889     multiprocessing.freeze_support()
 890     main()