thingy_grabber.py

   1 #!/usr/bin/env python3
   2 """
   3 Thingiverse bulk downloader
   4 """
   5
   6 import re
   7 import sys
   8 import os
   9 import argparse
  10 import unicodedata
  11 import requests
  12 import logging
  13 import multiprocessing
  14 import enum
  15 import datetime
  16 from shutil import copyfile
  17 from dataclasses import dataclass
  18 import py7zr
  19 import glob
  20 import shutil
  21 from io import StringIO
  22 from html.parser import HTMLParser
  23
  24 SEVENZIP_FILTERS = [{'id': py7zr.FILTER_LZMA2}]
  25
  26 # I don't think this is exported by datetime
  27 DEFAULT_DATETIME_FORMAT = '%Y-%m-%d %H:%M:%S'
  28 # Windows cannot handle : in filenames
  29 SAFE_DATETIME_FORMAT = '%Y-%m-%d %H.%M.%S'
  30
  31 API_BASE = "https://api.thingiverse.com"
  32 ACCESS_QP = "access_token={}"
  33 PAGE_QP = "page={}"
  34 API_USER_DESIGNS = API_BASE + "/users/{}/things/?" + ACCESS_QP
  35 API_USER_COLLECTIONS = API_BASE + "/users/{}/collections/all?" + ACCESS_QP
  36
  37 # Currently useless as it gives the same info as the matching element in API_USER_COLLECTIONS
  38 API_COLLECTION = API_BASE + "/collections/{}/?" + ACCESS_QP
  39 API_COLLECTION_THINGS = API_BASE + "/collections/{}/things/?" + ACCESS_QP
  40
  41 API_THING_DETAILS = API_BASE + "/things/{}/?" + ACCESS_QP
  42 API_THING_FILES = API_BASE + "/things/{}/files/?" + ACCESS_QP
  43 API_THING_IMAGES = API_BASE + "/things/{}/images/?" + ACCESS_QP
  44 API_THING_DOWNLOAD = "/download/?" + ACCESS_QP
  45
  46 DOWNLOADER_COUNT = 1
  47 RETRY_COUNT = 3
  48
  49 MAX_PATH_LENGTH = 250
  50
  51 VERSION = "0.10.5"
  52
  53 TIMESTAMP_FILE = "timestamp.txt"
  54
  55 SESSION = requests.Session()
  56
  57
  58 class MLStripper(HTMLParser):
  59     """ Turns HTML markup into plain text
  60     """
  61
  62     def error(self, message):
  63         raise ValueError(message)
  64
  65     def __init__(self):
  66         super().__init__()
  67         self.reset()
  68         self.strict = False
  69         self.convert_charrefs = True
  70         self.text = StringIO()
  71
  72     def handle_data(self, d):
  73         self.text.write(d)
  74
  75     def get_data(self):
  76         return self.text.getvalue()
  77
  78     @staticmethod
  79     def strip_tags(html):
  80         s = MLStripper()
  81         s.feed(html)
  82         return s.get_data()
  83
  84
  85 @dataclass
  86 class ThingLink:
  87     thing_id: str
  88     name: str
  89     api_link: str
  90
  91
  92 @dataclass
  93 class FileLink:
  94     name: str
  95     last_update: datetime.datetime
  96     link: str
  97
  98
  99 @dataclass
 100 class ImageLink:
 101     name: str
 102     link: str
 103
 104
 105 class FileLinks:
 106     def __init__(self, initial_links=None):
 107         if initial_links is None:
 108             initial_links = []
 109         self.links = []
 110         self.last_update = None
 111         for link in initial_links:
 112             self.append(link)
 113
 114     def __iter__(self):
 115         return iter(self.links)
 116
 117     def __getitem__(self, item):
 118         return self.links[item]
 119
 120     def __len__(self):
 121         return len(self.links)
 122
 123     def append(self, link):
 124         try:
 125             self.last_update = max(self.last_update, link.last_update)
 126         except TypeError:
 127             self.last_update = link.last_update
 128         self.links.append(link)
 129
 130
 131 class State(enum.Enum):
 132     OK = enum.auto()
 133     FAILED = enum.auto()
 134     ALREADY_DOWNLOADED = enum.auto()
 135
 136
 137 def sanitise_url(url):
 138     """ remove api keys from an url
 139     """
 140     return re.sub(r'access_token=\w*',
 141                   'access_token=***',
 142                   url)
 143
 144
 145 def strip_time(date_obj):
 146     """ Takes a datetime object and returns another with the time set to 00:00
 147     """
 148     return datetime.datetime.combine(date_obj.date(), datetime.time())
 149
 150
 151 def rename_unique(dir_name, target_dir_name):
 152     """ Move a directory sideways to a new name, ensuring it is unique.
 153     """
 154     target_dir = target_dir_name
 155     inc = 0
 156     while os.path.exists(target_dir):
 157         target_dir = "{}_{}".format(target_dir_name, inc)
 158         inc += 1
 159     os.rename(dir_name, target_dir)
 160     return target_dir
 161
 162
 163 def fail_dir(dir_name):
 164     """ When a download has failed, move it sideways.
 165     """
 166     return rename_unique(dir_name, "{}_failed".format(dir_name))
 167
 168
 169 def truncate_name(file_name):
 170     """ Ensure the filename is not too long for, well windows basically.
 171     """
 172     path = os.path.abspath(file_name)
 173     if len(path) <= MAX_PATH_LENGTH:
 174         return path
 175     base, extension = os.path.splitext(path)
 176     inc = 0
 177     new_path = "{}_{}{}".format(base, inc, extension)
 178     while os.path.exists(new_path):
 179         new_path = "{}_{}{}".format(base, inc, extension)
 180         inc += 1
 181     return new_path
 182
 183
 184 def slugify(value):
 185     """
 186     Normalise string, removes invalid for filename charactersr
 187     and converts string to lowercase.
 188     """
 189     logging.debug("Sluggyfying {}".format(value))
 190     value = unicodedata.normalize('NFKC', value).lower().strip()
 191     value = re.sub(r'[\\/<>:?*|"]', '', value)
 192     value = re.sub(r'\.*$', '', value)
 193     return value.strip()
 194
 195
 196 class Downloader(multiprocessing.Process):
 197     """
 198     Class to handle downloading the things we have found to get.
 199     """
 200
 201     def __init__(self, thing_queue, download_directory, compress, api_key):
 202         multiprocessing.Process.__init__(self)
 203         # TODO: add parameters
 204         self.thing_queue = thing_queue
 205         self.download_directory = download_directory
 206         self.compress = compress
 207         self.api_key = api_key
 208
 209     def run(self):
 210         """ actual download loop.
 211         """
 212         while True:
 213             thing_id = self.thing_queue.get()
 214             if thing_id is None:
 215                 logging.info("Shutting download queue")
 216                 self.thing_queue.task_done()
 217                 break
 218             thing = None
 219             if isinstance(thing_id, str):
 220                 thing = Thing.from_thing_id(thing_id)
 221             if isinstance(thing_id, ThingLink):
 222                 thing = Thing(thing_id)
 223             if not thing:
 224                 logging.error("Don't know how to handle thing_id {}".format(thing_id))
 225             else:
 226                 logging.info("Handling id {}".format(thing_id))
 227                 thing.download(self.download_directory, self.compress, self.api_key)
 228             self.thing_queue.task_done()
 229         return
 230
 231
 232 class Grouping:
 233     """ Holds details of a group of things for download
 234         This is effectively (although not actually) an abstract class
 235         - use Collection or Designs instead.
 236     """
 237
 238     def __init__(self, quick, compress, api_key):
 239         self.things = []
 240         self.total = 0
 241         self.req_id = None
 242         self.last_page = 0
 243         self.per_page = None
 244         # Should we stop downloading when we hit a known datestamp?
 245         self.quick = quick
 246         self.compress = compress
 247         self.api_key = api_key
 248         # These should be set by child classes.
 249         self.url = None
 250         self.download_dir = None
 251
 252     @property
 253     def get(self):
 254         """ retrieve the things of the grouping. """
 255         if self.things:
 256             # We've already done it.
 257             return self.things
 258
 259         # Check for initialisation:
 260         if not self.url:
 261             logging.error("No URL set - object not initialised properly?")
 262             raise ValueError("No URL set - object not initialised properly?")
 263
 264         # Get the internal details of the grouping.
 265         logging.debug("Querying {}".format(sanitise_url(self.url)))
 266
 267         # follow next links until all items are found
 268         current_url = self.url
 269         while current_url != None:
 270             logging.info("requesting:{}".format(sanitise_url(current_url)))
 271             current_req = SESSION.get(current_url)
 272             current_url = current_req.links.get('next', {}).get('url')
 273             if current_req.status_code != 200:
 274                 logging.error(
 275                     "Got unexpected code {} from url {}: {}".format(current_req.status_code, sanitise_url(self.url),
 276                                                                     current_req.text))
 277             else:
 278                 current_json = current_req.json()
 279                 for thing in current_json:
 280                     logging.debug(thing)
 281                     self.things.append(ThingLink(thing['id'], thing['name'], thing['url']))
 282         logging.info("Found {} things.".format(len(self.things)))
 283         return self.things
 284
 285     def download(self):
 286         """ Downloads all the files in a collection """
 287         if not self.things:
 288             self.get
 289
 290         if not self.download_dir:
 291             raise ValueError(
 292                 "No download_dir set - invalidly initialised object?")
 293
 294         try:
 295             os.mkdir(self.download_dir)
 296         except FileExistsError:
 297             logging.info("Target directory {} already exists. Assuming a resume."
 298                          .format(self.download_dir))
 299         logging.info("Downloading {} thing(s).".format(self.total))
 300         for idx, thing in enumerate(self.things):
 301             logging.info("Downloading thing {} - {}".format(idx, thing))
 302             return_code = Thing(thing).download(self.download_dir, self.compress, self.api_key)
 303             if self.quick and return_code == State.ALREADY_DOWNLOADED:
 304                 logging.info("Caught up, stopping.")
 305                 return
 306
 307
 308 class Collection(Grouping):
 309     """ Holds details of a collection. """
 310
 311     def __init__(self, user, name, directory, quick, compress, api_key):
 312         Grouping.__init__(self, quick, compress, api_key)
 313         self.user = user
 314         self.name = name
 315         self.paginated = False
 316         # need to figure out the the ID for the collection
 317         collection_url = API_USER_COLLECTIONS.format(user, api_key)
 318         try:
 319             current_req = SESSION.get(collection_url)
 320         except requests.exceptions.ConnectionError as error:
 321             logging.error("Unable to connect for collections for user {}: {}".format(
 322                 self.user, error))
 323             return
 324         if current_req.status_code != 200:
 325             logging.error(
 326                 "Got unexpected code {} from url {}: {}".format(current_req.status_code, sanitise_url(collection_url),
 327                                                                 current_req.text))
 328             return
 329         collection_list = current_req.json()
 330         try:
 331             # case insensitive to retain parity with previous behaviour
 332             collection = [x for x in collection_list if x['name'].casefold() == name.casefold()][0]
 333         except IndexError:
 334             logging.error("Unable to find collection {} for user {}".format(name, user))
 335             return
 336         self.collection_id = collection['id']
 337         self.url = API_COLLECTION_THINGS.format(self.collection_id, api_key)
 338
 339         self.download_dir = os.path.join(directory,
 340                                          "{}-{}".format(slugify(self.user), slugify(self.name)))
 341
 342
 343 class Designs(Grouping):
 344     """ Holds details of all of a users' designs. """
 345
 346     def __init__(self, user, directory, quick, compress, api_key):
 347         Grouping.__init__(self, quick, compress, api_key)
 348         self.user = user
 349         self.url = API_USER_DESIGNS.format(user, api_key)
 350         self.download_dir = os.path.join(
 351             directory, "{} designs".format(slugify(self.user)))
 352
 353
 354 class Thing:
 355     """ An individual design on thingiverse. """
 356
 357     def __init__(self, thing_link):
 358         self.thing_id = thing_link.thing_id
 359         self.name = thing_link.name
 360         self.last_time = None
 361         self._parsed = False
 362         self._needs_download = True
 363         self.text = None
 364         self.download_dir = None
 365         self.time_stamp = None
 366         self._file_links = FileLinks()
 367         self._image_links = []
 368
 369     @classmethod
 370     def from_thing_id(cls, thing_id):
 371         """
 372         Factory method that looks up a thing by ID and creates a Thing object for it
 373         :param thing_id: to look up
 374         :return: Thing or None
 375         """
 376         return Thing(ThingLink(thing_id, "", ""))
 377
 378     def _parse(self, base_dir, api_key):
 379         """ Work out what, if anything needs to be done. """
 380         if self._parsed:
 381             return
 382
 383         # First get the broad details
 384         url = API_THING_DETAILS.format(self.thing_id, api_key)
 385         try:
 386             current_req = SESSION.get(url)
 387         except requests.exceptions.ConnectionError as error:
 388             logging.error("Unable to connect for thing {}: {}".format(
 389                 self.thing_id, error))
 390             return
 391         # Check for DMCA
 392         if current_req.status_code == 403:
 393             logging.error("Access to thing {} is forbidden".format(self.thing_id))
 394             return
 395         if current_req.status_code != 200:
 396             logging.error("Got unexpected code {} from url {}: {}".format(current_req.status_code, sanitise_url(url),
 397                                                                           current_req.text))
 398             return
 399
 400         thing_json = current_req.json()
 401         try:
 402             self._license = thing_json['license']
 403         except KeyError:
 404             logging.warning("No license found for thing {}?".format(self.thing_id))
 405
 406         details = None
 407         try:
 408             details = thing_json['details']
 409         except KeyError:
 410             logging.warning("No description found for thing {}?".format(self.thing_id))
 411
 412         if details:
 413             try:
 414                 self._details = MLStripper.strip_tags(details)
 415             except ValueError as e:
 416                 logging.warning("Unable to strip HTML from readme: {}".format(e))
 417                 self._details = details
 418
 419         if not self.name:
 420             # Probably generated with factory method.
 421             try:
 422                 self.name = thing_json['name']
 423             except KeyError:
 424                 logging.warning("No name found for thing {}?".format(self.thing_id))
 425                 self.name = self.thing_id
 426
 427         # Now get the file details
 428         file_url = API_THING_FILES.format(self.thing_id, api_key)
 429
 430         try:
 431             current_req = SESSION.get(file_url)
 432         except requests.exceptions.ConnectionError as error:
 433             logging.error("Unable to connect for thing {}: {}".format(
 434                 self.thing_id, error))
 435             return
 436
 437         if current_req.status_code != 200:
 438             logging.error("Unexpected status code {} for {}: {}".format(current_req.status_code, sanitise_url(file_url),
 439                                                                         current_req.text))
 440             return
 441
 442         link_list = current_req.json()
 443
 444         if not link_list:
 445             logging.error("No files found for thing {} - probably thingiverse being broken, try again later".format(
 446                 self.thing_id))
 447
 448         for link in link_list:
 449             logging.debug("Parsing link: {}".format(sanitise_url(link['url'])))
 450             try:
 451                 datestamp = datetime.datetime.strptime(link['date'], DEFAULT_DATETIME_FORMAT)
 452                 self._file_links.append(
 453                     FileLink(link['name'], datestamp, link['url'] + API_THING_DOWNLOAD.format(api_key)))
 454             except ValueError:
 455                 logging.error(link['date'])
 456
 457         # Finally get the image links
 458         image_url = API_THING_IMAGES.format(self.thing_id, api_key)
 459
 460         try:
 461             current_req = SESSION.get(image_url)
 462         except requests.exceptions.ConnectionError as error:
 463             logging.error("Unable to connect for thing {}: {}".format(
 464                 self.thing_id, error))
 465             return
 466
 467         if current_req.status_code != 200:
 468             logging.error(
 469                 "Unexpected status code {} for {}: {}".format(current_req.status_code, sanitise_url(image_url),
 470                                                               current_req.text))
 471             return
 472
 473         image_list = current_req.json()
 474
 475         if not image_list:
 476             logging.warning(
 477                 "No images found for thing {} - probably thingiverse being iffy as this seems unlikely".format(
 478                     self.thing_id))
 479
 480         for image in image_list:
 481             logging.debug("parsing image: {}".format(image))
 482             name = None
 483             try:
 484                 name = slugify(image['name'])
 485                 # TODO: fallback to other types
 486                 url = [x for x in image['sizes'] if x['type'] == 'display' and x['size'] == 'large'][0]['url']
 487             except KeyError:
 488                 logging.warning("Missing image for {}".format(name))
 489             self._image_links.append(ImageLink(name, url))
 490
 491         self.slug = "{} - {}".format(self.thing_id, slugify(self.name))
 492         self.download_dir = os.path.join(base_dir, self.slug)
 493
 494         self._handle_old_directory(base_dir)
 495
 496         logging.debug("Parsing {} ({})".format(self.thing_id, self.name))
 497         latest, self.last_time = self._find_last_download(base_dir)
 498
 499         if not latest:
 500             # Not yet downloaded
 501             self._parsed = True
 502             return
 503
 504         logging.info("last downloaded version: {}".format(self.last_time))
 505
 506         # OK, so we have a timestamp, lets see if there is anything new to get
 507         # First off, are we comparing an old download that threw away the timestamp?
 508         ignore_time = self.last_time == strip_time(self.last_time)
 509         try:
 510             # TODO: Allow for comparison at the exact time
 511             files_last_update = self._file_links.last_update
 512             if ignore_time:
 513                 logging.info("Dropping time from comparison stamp as old-style download dir")
 514                 files_last_update = strip_time(files_last_update)
 515
 516             if files_last_update > self.last_time:
 517                 logging.info(
 518                     "Found new/updated files {}".format(self._file_links.last_update))
 519                 self._needs_download = True
 520                 self._parsed = True
 521                 return
 522         except TypeError:
 523             logging.warning("No files found for {}.".format(self.thing_id))
 524
 525         # Got here, so nope, no new files.
 526         self._needs_download = False
 527         self._parsed = True
 528
 529     def _handle_old_directory(self, base_dir):
 530         """ Deal with any old directories from previous versions of the code.
 531         """
 532         old_dir = os.path.join(base_dir, slugify(self.name))
 533         if os.path.exists(old_dir):
 534             logging.warning("Found old style download_dir. Moving.")
 535             rename_unique(old_dir, self.download_dir)
 536
 537     def _handle_outdated_directory(self):
 538         """ Move the current download directory sideways if the thing has changed.
 539         """
 540         if not os.path.exists(self.download_dir):
 541             # No old directory to move.
 542             return None
 543         timestamp_file = os.path.join(self.download_dir, TIMESTAMP_FILE)
 544         if not os.path.exists(timestamp_file):
 545             # Old form of download directory
 546             target_dir_name = "{} - old".format(self.download_dir)
 547         else:
 548             target_dir_name = "{} - {}".format(self.download_dir, self.last_time.strftime(SAFE_DATETIME_FORMAT))
 549         return rename_unique(self.download_dir, target_dir_name)
 550
 551     def _find_last_download(self, base_dir):
 552         """ Look for the most recent previous download (if any) of the thing.
 553         """
 554         logging.info("Looking for old things")
 555
 556         # First the DL directory itself.
 557         timestamp_file = os.path.join(self.download_dir, TIMESTAMP_FILE)
 558
 559         latest = None
 560         latest_time = None
 561
 562         try:
 563             logging.debug("Checking for existing download in normal place.")
 564             with open(timestamp_file) as ts_fh:
 565                 timestamp_text = ts_fh.read().strip()
 566             latest_time = datetime.datetime.strptime(timestamp_text, DEFAULT_DATETIME_FORMAT)
 567             latest = self.download_dir
 568         except FileNotFoundError:
 569             # No existing download directory. huh.
 570             pass
 571         except TypeError:
 572             logging.warning("Invalid timestamp file found in {}".format(self.download_dir))
 573
 574         # TODO:  Maybe look for old download directories.
 575
 576         # Now look for 7z files
 577         candidates = glob.glob(os.path.join(base_dir, "{}*.7z".format(self.thing_id)))
 578         # +3 to allow for ' - '
 579         leading_length = len(self.slug) + 3
 580         for path in candidates:
 581             candidate = os.path.basename(path)
 582             try:
 583                 logging.debug("Examining '{}' - '{}'".format(candidate, candidate[leading_length:-3]))
 584                 candidate_time = datetime.datetime.strptime(candidate[leading_length:-3], SAFE_DATETIME_FORMAT)
 585             except ValueError:
 586                 logging.warning("There was an error finding the date in {}. Ignoring.".format(candidate))
 587                 continue
 588             try:
 589                 if candidate_time > latest_time:
 590                     latest_time = candidate_time
 591                     latest = candidate
 592             except TypeError:
 593                 latest_time = candidate_time
 594                 latest = candidate
 595         logging.info("Found last old thing: {} / {}".format(latest, latest_time))
 596         return latest, latest_time
 597
 598     def download(self, base_dir, compress, api_key):
 599         """ Download all files for a given thing.
 600             Returns True iff the thing is now downloaded (not iff it downloads the thing!)
 601         """
 602         if not self._parsed:
 603             self._parse(base_dir, api_key)
 604
 605         if not self._parsed:
 606             logging.error(
 607                 "Unable to parse {} - aborting download".format(self.thing_id))
 608             return State.FAILED
 609
 610         if not self._needs_download:
 611             logging.info("{} - {} already downloaded - skipping.".format(self.thing_id, self.name))
 612             return State.ALREADY_DOWNLOADED
 613
 614         if not self._file_links:
 615             logging.error(
 616                 "{} - {} appears to have no files. Thingiverse acting up again?".format(self.thing_id, self.name))
 617             return State.FAILED
 618
 619         # Have we already downloaded some things?
 620         renamed_dir = self._handle_outdated_directory()
 621
 622         # Get the list of files to download
 623
 624         new_file_links = []
 625         old_file_links = []
 626         self.time_stamp = None
 627
 628         if not self.last_time:
 629             # If we don't have anything to copy from, then it is all new.
 630             logging.debug("No last time, downloading all files")
 631             new_file_links = self._file_links
 632             self.time_stamp = new_file_links[0].last_update
 633
 634             for file_link in new_file_links:
 635                 self.time_stamp = max(self.time_stamp, file_link.last_update)
 636             logging.debug("New timestamp will be {}".format(self.time_stamp))
 637         else:
 638             self.time_stamp = self.last_time
 639             for file_link in self._file_links:
 640                 if file_link.last_update > self.last_time:
 641                     new_file_links.append(file_link)
 642                     self.time_stamp = max(self.time_stamp, file_link.last_update)
 643                 else:
 644                     old_file_links.append(file_link)
 645
 646         logging.debug("new timestamp {}".format(self.time_stamp))
 647
 648         # OK. Time to get to work.
 649         logging.debug("Generating download_dir")
 650         os.mkdir(self.download_dir)
 651         filelist_file = os.path.join(self.download_dir, "filelist.txt")
 652         with open(filelist_file, 'w', encoding="utf-8") as fl_handle:
 653             for fl in self._file_links:
 654                 fl_handle.write("{},{},{}\n".format(fl.link, fl.name, fl.last_update))
 655
 656         # First grab the cached files (if any)
 657         logging.info("Copying {} unchanged files.".format(len(old_file_links)))
 658         if renamed_dir:
 659             for file_link in old_file_links:
 660                 try:
 661                     old_file = os.path.join(renamed_dir, file_link.name)
 662                     new_file = truncate_name(os.path.join(self.download_dir, file_link.name))
 663                     logging.debug("Copying {} to {}".format(old_file, new_file))
 664                     copyfile(old_file, new_file)
 665                 except FileNotFoundError:
 666                     logging.warning(
 667                         "Unable to find {} in old archive, redownloading".format(file_link.name))
 668                     new_file_links.append(file_link)
 669                 except TypeError:
 670                     # Not altogether sure how this could occur, possibly with some combination of the old file types
 671                     logging.warning(
 672                         "Typeerror looking for {} in {}".format(file_link.name, renamed_dir))
 673                     new_file_links.append(file_link)
 674
 675         # Now download the new ones
 676         logging.info("Downloading {} new files of {}".format(
 677             len(new_file_links), len(self._file_links)))
 678         try:
 679             for file_link in new_file_links:
 680                 file_name = truncate_name(os.path.join(self.download_dir, file_link.name))
 681                 logging.debug("Downloading {} from {} to {}".format(
 682                     file_link.name, file_link.link, file_name))
 683                 data_req = SESSION.get(file_link.link)
 684                 if data_req.status_code != 200:
 685                     logging.error("Unexpected status code {} for {}".format(data_req.status_code,
 686                                                                                 sanitise_url(file_link.link)))
 687                     logging.debug("Unexpected status code {} for {}: {}".format(data_req.status_code,
 688                                                                                 sanitise_url(file_link.link),
 689                                                                                 data_req.text))
 690                     fail_dir(self.download_dir)
 691                     return State.FAILED
 692
 693                 with open(file_name, 'wb') as handle:
 694                     handle.write(data_req.content)
 695         except Exception as exception:
 696             logging.error("Failed to download {} - {}".format(file_link.name, exception))
 697             fail_dir(self.download_dir)
 698             return State.FAILED
 699
 700         # People like images.
 701         image_dir = os.path.join(self.download_dir, 'images')
 702         logging.info("Downloading {} images.".format(len(self._image_links)))
 703         try:
 704             os.mkdir(image_dir)
 705             for imagelink in self._image_links:
 706                 filename = os.path.join(image_dir, imagelink.name)
 707                 image_req = SESSION.get(imagelink.link)
 708                 if image_req.status_code != 200:
 709                     logging.error("Unexpected status code {} for {}: {}".format(image_req.status_code,
 710                                                                                 sanitise_url(imagelink.link),
 711                                                                                 image_req.text))
 712                     fail_dir(self.download_dir)
 713                     return State.FAILED
 714                 with open(truncate_name(filename), 'wb') as handle:
 715                     handle.write(image_req.content)
 716         except Exception as exception:
 717             logging.error("Failed to download {} - {}".format(imagelink.name, exception))
 718             fail_dir(self.download_dir)
 719             return State.FAILED
 720
 721         # Best get some licenses
 722         logging.info("writing license file")
 723         try:
 724             if self._license:
 725                 with open(truncate_name(os.path.join(self.download_dir, 'license.txt')), 'w',
 726                           encoding="utf-8") as license_handle:
 727                     license_handle.write("{}\n".format(self._license))
 728         except IOError as exception:
 729             logging.warning("Failed to write license! {}".format(exception))
 730
 731         logging.info("writing readme")
 732         try:
 733             if self._details:
 734                 with open(truncate_name(os.path.join(self.download_dir, 'readme.txt')), 'w',
 735                           encoding="utf-8") as readme_handle:
 736                     readme_handle.write("{}\n".format(self._details))
 737         except IOError as exception:
 738             logging.warning("Failed to write readme! {}".format(exception))
 739
 740         try:
 741             # Now write the timestamp
 742             with open(os.path.join(self.download_dir, TIMESTAMP_FILE), 'w', encoding="utf-8") as timestamp_handle:
 743                 timestamp_handle.write(self.time_stamp.__str__())
 744         except Exception as exception:
 745             logging.error("Failed to write timestamp file - {}".format(exception))
 746             fail_dir(self.download_dir)
 747             return State.FAILED
 748         self._needs_download = False
 749         logging.debug("Download of {} finished".format(self.name))
 750         if not compress:
 751             return State.OK
 752
 753         thing_dir = "{} - {} - {}".format(self.thing_id,
 754                                           slugify(self.name),
 755                                           self.time_stamp.strftime(SAFE_DATETIME_FORMAT))
 756         file_name = os.path.join(base_dir,
 757                                  "{}.7z".format(thing_dir))
 758         logging.debug("Compressing {} to {}".format(
 759             self.name,
 760             file_name))
 761         with py7zr.SevenZipFile(file_name, 'w', filters=SEVENZIP_FILTERS) as archive:
 762             archive.writeall(self.download_dir, thing_dir)
 763         logging.debug("Compression of {} finished.".format(self.name))
 764         shutil.rmtree(self.download_dir)
 765         logging.debug("Removed temporary download dir of {}.".format(self.name))
 766         return State.OK
 767
 768
 769 def do_batch(batch_file, download_dir, quick, compress, api_key):
 770     """ Read a file in line by line, parsing each as a set of calls to this script."""
 771     with open(batch_file) as handle:
 772         for line in handle:
 773             line = line.strip()
 774             if not line:
 775                 # Skip empty lines
 776                 continue
 777             logging.info("Handling instruction {}".format(line))
 778             command_arr = line.split()
 779             if command_arr[0] == "thing":
 780                 logging.debug(
 781                     "Handling batch thing instruction: {}".format(line))
 782                 Thing.from_thing_id(command_arr[1]).download(download_dir, compress, api_key)
 783                 continue
 784             if command_arr[0] == "collection":
 785                 logging.debug(
 786                     "Handling batch collection instruction: {}".format(line))
 787                 Collection(command_arr[1], command_arr[2],
 788                            download_dir, quick, compress, api_key).download()
 789                 continue
 790             if command_arr[0] == "user":
 791                 logging.debug(
 792                     "Handling batch collection instruction: {}".format(line))
 793                 Designs(command_arr[1], download_dir, quick, compress, api_key).download()
 794                 continue
 795             logging.warning("Unable to parse current instruction. Skipping.")
 796
 797
 798 def main():
 799     """ Entry point for script being run as a command. """
 800     parser = argparse.ArgumentParser()
 801     parser.add_argument("-l", "--log-level", choices=[
 802         'debug', 'info', 'warning'], default='info', help="level of logging desired")
 803     parser.add_argument("-d", "--directory",
 804                         help="Target directory to download into")
 805     parser.add_argument("-f", "--log-file",
 806                         help="Place to log debug information to")
 807     parser.add_argument("-q", "--quick", action="store_true",
 808                         help="Assume date ordering on posts")
 809     parser.add_argument("-c", "--compress", action="store_true",
 810                         help="Compress files")
 811     parser.add_argument("-a", "--api-key",
 812                         help="API key for thingiverse")
 813
 814     subparsers = parser.add_subparsers(
 815         help="Type of thing to download", dest="subcommand")
 816     collection_parser = subparsers.add_parser(
 817         'collection', help="Download one or more entire collection(s)")
 818     collection_parser.add_argument(
 819         "owner", help="The owner of the collection(s) to get")
 820     collection_parser.add_argument(
 821         "collections", nargs="+", help="Space seperated list of the name(s) of collection to get")
 822     thing_parser = subparsers.add_parser(
 823         'thing', help="Download a single thing.")
 824     thing_parser.add_argument(
 825         "things", nargs="*", help="Space seperated list of thing ID(s) to download")
 826     user_parser = subparsers.add_parser(
 827         "user", help="Download all things by one or more users")
 828     user_parser.add_argument(
 829         "users", nargs="+", help="A space seperated list of the user(s) to get the designs of")
 830     batch_parser = subparsers.add_parser(
 831         "batch", help="Perform multiple actions written in a text file")
 832     batch_parser.add_argument(
 833         "batch_file", help="The name of the file to read.")
 834     subparsers.add_parser("version", help="Show the current version")
 835
 836     args = parser.parse_args()
 837     if not args.subcommand:
 838         parser.print_help()
 839         sys.exit(1)
 840     if not args.directory:
 841         args.directory = os.getcwd()
 842
 843     logger = logging.getLogger()
 844     formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
 845     logger.setLevel(logging.DEBUG)
 846     console_handler = logging.StreamHandler()
 847     console_handler.setLevel(args.log_level.upper())
 848
 849     if args.api_key:
 850         api_key = args.api_key
 851     else:
 852         try:
 853             with open("api.key") as fh:
 854                 api_key = fh.read().strip()
 855         except Exception as e:
 856             logging.error("Either specify the api-key on the command line or in a file called 'api.key'")
 857             logging.error("Exception: {}".format(e))
 858             return
 859
 860     logger.addHandler(console_handler)
 861     if args.log_file:
 862         file_handler = logging.FileHandler(args.log_file)
 863         file_handler.setLevel(logging.DEBUG)
 864         file_handler.setFormatter(formatter)
 865         logger.addHandler(file_handler)
 866
 867     # Start downloader
 868     thing_queue = multiprocessing.JoinableQueue()
 869     logging.debug("starting {} downloader(s)".format(DOWNLOADER_COUNT))
 870     downloaders = [Downloader(thing_queue, args.directory, args.compress, api_key) for _ in range(DOWNLOADER_COUNT)]
 871     for downloader in downloaders:
 872         downloader.start()
 873
 874     if args.subcommand.startswith("collection"):
 875         for collection in args.collections:
 876             Collection(args.owner, collection, args.directory, args.quick, args.compress, api_key).download()
 877     if args.subcommand == "thing":
 878         for thing in args.things:
 879             thing_queue.put(thing)
 880     if args.subcommand == "user":
 881         for user in args.users:
 882             Designs(user, args.directory, args.quick, args.compress, api_key).download()
 883     if args.subcommand == "version":
 884         print("thingy_grabber.py version {}".format(VERSION))
 885     if args.subcommand == "batch":
 886         do_batch(args.batch_file, args.directory, args.quick, args.compress, api_key)
 887
 888     # Stop the downloader processes
 889     for _ in downloaders:
 890         thing_queue.put(None)
 891
 892
 893 if __name__ == "__main__":
 894     multiprocessing.freeze_support()
 895     main()