thingy_grabber.py

   1 #!/usr/bin/env python3
   2 """
   3 Thingiverse bulk downloader
   4 """
   5
   6 import re
   7 import sys
   8 import os
   9 import argparse
  10 import unicodedata
  11 import requests
  12 import logging
  13 import multiprocessing
  14 import enum
  15 import datetime
  16 from shutil import copyfile
  17 from dataclasses import dataclass
  18 import atexit
  19 import py7zr
  20 import glob
  21 import shutil
  22
  23 SEVENZIP_FILTERS = [{'id': py7zr.FILTER_LZMA2}]
  24
  25 # I don't think this is exported by datetime
  26 DEFAULT_DATETIME_FORMAT = '%Y-%m-%d %H:%M:%S'
  27 # Windows cannot handle : in filenames
  28 SAFE_DATETIME_FORMAT = '%Y-%m-%d %H.%M.%S'
  29
  30 API_BASE="https://api.thingiverse.com"
  31 ACCESS_QP="access_token={}"
  32 PAGE_QP="page={}"
  33 API_USER_DESIGNS = API_BASE + "/users/{}/things/"
  34 API_USER_COLLECTIONS = API_BASE + "/users/{}/collections/all?" + ACCESS_QP
  35
  36 # Currently useless as it gives the same info as the matching element in API_USER_COLLECTIONS
  37 API_COLLECTION = API_BASE + "/collections/{}/?" + ACCESS_QP
  38 API_COLLECTION_THINGS = API_BASE + "/collections/{}/things/?" + ACCESS_QP
  39
  40 API_THING_DETAILS = API_BASE + "/things/{}/?" + ACCESS_QP
  41 API_THING_FILES = API_BASE + "/things/{}/files/?" + ACCESS_QP
  42 API_THING_IMAGES = API_BASE + "/things/{}/images/?" + ACCESS_QP
  43
  44 API_KEY = None
  45
  46 DOWNLOADER_COUNT = 1
  47 RETRY_COUNT = 3
  48
  49 MAX_PATH_LENGTH = 250
  50
  51 VERSION = "0.10.0"
  52
  53 TIMESTAMP_FILE = "timestamp.txt"
  54
  55 SESSION = requests.Session()
  56
  57 @dataclass
  58 class ThingLink:
  59     thing_id: str
  60     name: str
  61     api_link: str
  62
  63 @dataclass
  64 class FileLink:
  65     name: str
  66     last_update: datetime.datetime
  67     link: str
  68
  69 @dataclass
  70 class ImageLink:
  71     name: str
  72     link: str
  73
  74 class FileLinks:
  75     def __init__(self, initial_links=[]):
  76         self.links = []
  77         self.last_update = None
  78         for link in initial_links:
  79             self.append(link)
  80
  81     def __iter__(self):
  82         return iter(self.links)
  83
  84     def __getitem__(self, item):
  85         return self.links[item]
  86
  87     def __len__(self):
  88         return len(self.links)
  89
  90     def append(self, link):
  91         try:
  92             self.last_update = max(self.last_update, link.last_update)
  93         except TypeError:
  94             self.last_update = link.last_update
  95         self.links.append(link)
  96
  97
  98 class State(enum.Enum):
  99     OK = enum.auto()
 100     FAILED = enum.auto()
 101     ALREADY_DOWNLOADED = enum.auto()
 102
 103 def sanitise_url(url):
 104     """ remove api keys from an url
 105     """
 106     return re.sub(r'access_token=\w*',
 107                   'access_token=***',
 108                   url)
 109
 110 def strip_time(date_obj):
 111     """ Takes a datetime object and returns another with the time set to 00:00
 112     """
 113     return datetime.datetime.combine(date_obj.date(), datetime.time())
 114
 115 def rename_unique(dir_name, target_dir_name):
 116     """ Move a directory sideways to a new name, ensuring it is unique.
 117     """
 118     target_dir = target_dir_name
 119     inc = 0
 120     while os.path.exists(target_dir):
 121       target_dir = "{}_{}".format(target_dir_name, inc)
 122       inc += 1
 123     os.rename(dir_name, target_dir)
 124     return target_dir
 125
 126
 127 def fail_dir(dir_name):
 128     """ When a download has failed, move it sideways.
 129     """
 130     return rename_unique(dir_name,"{}_failed".format(dir_name))
 131
 132
 133 def truncate_name(file_name):
 134     """ Ensure the filename is not too long for, well windows basically.
 135     """
 136     path = os.path.abspath(file_name)
 137     if len(path) <= MAX_PATH_LENGTH:
 138         return path
 139     to_cut = len(path) - (MAX_PATH_LENGTH + 3)
 140     base, extension = os.path.splitext(path)
 141     inc = 0
 142     new_path = "{}_{}{}".format(base, inc, extension)
 143     while os.path.exists(new_path):
 144         new_path = "{}_{}{}".format(base, inc, extension)
 145         inc += 1
 146     return new_path
 147
 148
 149 def strip_ws(value):
 150     """ Remove whitespace from a string """
 151     return str(NO_WHITESPACE_REGEX.sub('-', value))
 152
 153
 154 def slugify(value):
 155     """
 156     Normalise string, removes invalid for filename charactersr
 157     and converts string to lowercase.
 158     """
 159     logging.debug("Sluggyfying {}".format(value))
 160     value = unicodedata.normalize('NFKC', value).lower().strip()
 161     value = re.sub(r'[\\/<>:\?\*\|"]', '', value)
 162     value = re.sub(r'\.*$', '', value)
 163     return value
 164
 165
 166 class Downloader(multiprocessing.Process):
 167     """
 168     Class to handle downloading the things we have found to get.
 169     """
 170
 171     def __init__(self, thing_queue, download_directory, compress):
 172         multiprocessing.Process.__init__(self)
 173         # TODO: add parameters
 174         self.thing_queue = thing_queue
 175         self.download_directory = download_directory
 176         self.compress = compress
 177
 178     def run(self):
 179         """ actual download loop.
 180         """
 181         while True:
 182             thing_id = self.thing_queue.get()
 183             if thing_id is None:
 184                 logging.info("Shutting download queue")
 185                 self.thing_queue.task_done()
 186                 break
 187             logging.info("Handling id {}".format(thing_id))
 188             Thing(thing_id).download(self.download_directory, self.compress)
 189             self.thing_queue.task_done()
 190         return
 191
 192
 193
 194
 195
 196 class Grouping:
 197     """ Holds details of a group of things for download
 198         This is effectively (although not actually) an abstract class
 199         - use Collection or Designs instead.
 200     """
 201
 202     def __init__(self, quick, compress):
 203         self.things = []
 204         self.total = 0
 205         self.req_id = None
 206         self.last_page = 0
 207         self.per_page = None
 208         # Should we stop downloading when we hit a known datestamp?
 209         self.quick = quick
 210         self.compress = compress
 211         # These should be set by child classes.
 212         self.url = None
 213         self.download_dir = None
 214
 215     def get(self):
 216         """ retrieve the things of the grouping. """
 217         if self.things:
 218             # We've already done it.
 219             return self.things
 220
 221         # Check for initialisation:
 222         if not self.url:
 223             logging.error("No URL set - object not initialised properly?")
 224             raise ValueError("No URL set - object not initialised properly?")
 225
 226         # Get the internal details of the grouping.
 227         logging.debug("Querying {}".format(sanitise_url(self.url)))
 228         page = 0
 229         # TODO:: Must be a way to refactor this cleanly
 230         if self.paginated:
 231         # Slightly nasty, but afaik python lacks a clean way to do partial string formatting.
 232             page_url = self.url + "?" + ACCESS_QP + "&" + PAGE_QP
 233             while True:
 234                 page += 1
 235                 current_url = page_url.format(API_KEY, page)
 236                 logging.info("requesting:{}".format(sanitise_url(current_url)))
 237                 current_req = SESSION.get(current_url)
 238                 if current_req.status_code != 200:
 239                     logging.error("Got unexpected code {} from url {}: {}".format(current_req.status_code, sanitise_url(current_url), current_req.text))
 240                     break
 241                 current_json = current_req.json()
 242                 if not current_json:
 243                     # No more!
 244                     break
 245                 for thing in current_json:
 246                     self.things.append(ThingLink(thing['id'], thing['name'], thing['url']))
 247         else:
 248             # self.url should already have been formatted as we don't need pagination
 249             logging.info("requesting:{}".format(sanitise_url(self.url)))
 250             current_req = SESSION.get(self.url)
 251             if current_req.status_code != 200:
 252                 logging.error("Got unexpected code {} from url {}: {}".format(current_req.status_code, sanitise_url(current_url), current_req.text))
 253             else:
 254                 current_json = current_req.json()
 255                 for thing in current_json:
 256                     logging.info(thing)
 257                     self.things.append(ThingLink(thing['id'], thing['name'], thing['url']))
 258         logging.info("Found {} things.".format(len(self.things)))
 259         return self.things
 260
 261     def download(self):
 262         """ Downloads all the files in a collection """
 263         if not self.things:
 264             self.get()
 265
 266         if not self.download_dir:
 267             raise ValueError(
 268                 "No download_dir set - invalidly initialised object?")
 269
 270         base_dir = os.getcwd()
 271         try:
 272             os.mkdir(self.download_dir)
 273         except FileExistsError:
 274             logging.info("Target directory {} already exists. Assuming a resume."
 275                          .format(self.download_dir))
 276         logging.info("Downloading {} thing(s).".format(self.total))
 277         for idx, thing in enumerate(self.things):
 278             logging.info("Downloading thing {} - {}".format(idx, thing))
 279             RC = Thing(thing).download(self.download_dir, self.compress)
 280             if self.quick and RC==State.ALREADY_DOWNLOADED:
 281                 logging.info("Caught up, stopping.")
 282                 return
 283
 284 class Collection(Grouping):
 285     """ Holds details of a collection. """
 286
 287     def __init__(self, user, name, directory, quick, compress):
 288         Grouping.__init__(self, quick, compress)
 289         self.user = user
 290         self.name = name
 291         self.paginated = False
 292         # need to figure out the the ID for the collection
 293         collection_url = API_USER_COLLECTIONS.format(user, API_KEY)
 294         try:
 295             current_req = SESSION.get(collection_url)
 296         except requests.exceptions.ConnectionError as error:
 297             logging.error("Unable to connect for thing {}: {}".format(
 298                 self.thing_id, error))
 299             return
 300         if current_req.status_code != 200:
 301             logging.error("Got unexpected code {} from url {}: {}".format(current_req.status_code, sanitise_url(collection_url), current_req.text))
 302             return
 303         collection_list = current_req.json()
 304         try:
 305             # case insensitive to retain parity with previous behaviour
 306             collection = [x for x in collection_list if x['name'].casefold() == name.casefold()][0]
 307         except IndexError:
 308             logging.error("Unable to find collection {} for user {}".format(name, user))
 309             return
 310         self.collection_id = collection['id']
 311         self.url = API_COLLECTION_THINGS.format(self.collection_id, API_KEY)
 312
 313         self.download_dir = os.path.join(directory,
 314                                          "{}-{}".format(slugify(self.user), slugify(self.name)))
 315
 316
 317 class Designs(Grouping):
 318     """ Holds details of all of a users' designs. """
 319
 320     def __init__(self, user, directory, quick, compress):
 321         Grouping.__init__(self, quick, compress)
 322         self.user = user
 323         self.url = API_USER_DESIGNS.format(user)
 324         self.paginated = True
 325         self.download_dir = os.path.join(
 326             directory, "{} designs".format(slugify(self.user)))
 327
 328
 329 class Thing:
 330     """ An individual design on thingiverse. """
 331
 332     def __init__(self, thing_link):
 333         self.thing_id = thing_link.thing_id
 334         self.name = thing_link.name
 335         self.api_link = thing_link.api_link
 336         self.last_time = None
 337         self._parsed = False
 338         self._needs_download = True
 339         self.text = None
 340         self.download_dir = None
 341         self.time_stamp = None
 342         self._file_links = FileLinks()
 343         self._image_links = []
 344
 345     def _parse(self, base_dir):
 346         """ Work out what, if anything needs to be done. """
 347         if self._parsed:
 348             return
 349
 350
 351         # First get the broad details
 352         url = API_THING_DETAILS.format(self.thing_id, API_KEY)
 353         try:
 354             current_req = SESSION.get(url)
 355         except requests.exceptions.ConnectionError as error:
 356             logging.error("Unable to connect for thing {}: {}".format(
 357                 self.thing_id, error))
 358             return
 359         # Check for DMCA
 360         if current_req.status_code == 403:
 361             logging.error("Access to thing {} is forbidden".format(self.thing_id))
 362             return
 363         if current_req.status_code != 200:
 364             logging.error("Got unexpected code {} from url {}: {}".format(current_req.status_code, sanitise_url(url), current_req.text))
 365             return
 366
 367         thing_json = current_req.json()
 368         try:
 369             self._license = thing_json['license']
 370         except KeyError:
 371             logging.warning("No license found for thing {}?".format(self.thing_id))
 372
 373         # TODO: Get non-html version of this?
 374         try:
 375             self._details = thing_json['details']
 376         except KeyError:
 377             logging.warning("No description found for thing {}?".format(self.thing_id))
 378
 379
 380
 381         # Now get the file details
 382         file_url = API_THING_FILES.format(self.thing_id, API_KEY)
 383
 384         try:
 385             current_req = SESSION.get(file_url)
 386         except requests.exceptions.ConnectionError as error:
 387             logging.error("Unable to connect for thing {}: {}".format(
 388                 self.thing_id, error))
 389             return
 390
 391         if current_req.status_code != 200:
 392             logging.error("Unexpected status code {} for {}: {}".format(current_req.status_code, sanitise_url(file_url), current_req.text))
 393             return
 394
 395         link_list = current_req.json()
 396
 397         if not link_list:
 398             logging.error("No files found for thing {} - probably thingiverse being broken, try again later".format(self.thing_id))
 399
 400         for link in link_list:
 401             logging.debug("Parsing link: {}".format(sanitise_url(link['url'])))
 402             try:
 403                 datestamp = datetime.datetime.strptime(link['date'], DEFAULT_DATETIME_FORMAT)
 404                 self._file_links.append(FileLink(link['name'], datestamp, link['url']))
 405             except ValueError:
 406                 logging.error(link['date'])
 407
 408         # Finally get the image links
 409         image_url = API_THING_IMAGES.format(self.thing_id, API_KEY)
 410
 411         try:
 412             current_req = SESSION.get(image_url)
 413         except requests.exceptions.ConnectionError as error:
 414             logging.error("Unable to connect for thing {}: {}".format(
 415                 self.thing_id, error))
 416             return
 417
 418         if current_req.status_code != 200:
 419             logging.error("Unexpected status code {} for {}: {}".format(current_req.status_code, sanitise_url(image_url), current_req.text))
 420             return
 421
 422         image_list = current_req.json()
 423
 424         if not image_list:
 425             logging.warning("No images found for thing {} - probably thingiverse being iffy as this seems unlikely".format(self.thing_id))
 426
 427         for image in image_list:
 428             logging.debug("parsing image: {}".format(image))
 429             try:
 430                 name = slugify(image['name'])
 431                 # TODO: fallback to other types
 432                 url = [x for x in image['sizes'] if x['type']=='display' and x['size']=='large'][0]['url']
 433             except KeyError:
 434                 logging.warning("Missing image for {}".format(name))
 435             self._image_links.append(ImageLink(name, url))
 436
 437         self.slug = "{} - {}".format(self.thing_id, slugify(self.name))
 438         self.download_dir = os.path.join(base_dir, self.slug)
 439
 440         self._handle_old_directory(base_dir)
 441
 442         logging.debug("Parsing {} ({})".format(self.thing_id, self.name))
 443         latest, self.last_time = self._find_last_download(base_dir)
 444
 445         if not latest:
 446                 # Not yet downloaded
 447                 self._parsed = True
 448                 return
 449
 450
 451         logging.info("last downloaded version: {}".format(self.last_time))
 452
 453         # OK, so we have a timestamp, lets see if there is anything new to get
 454         # First off, are we comparing an old download that threw away the timestamp?
 455         ignore_time = self.last_time == strip_time(self.last_time)
 456         try:
 457             # TODO: Allow for comparison at the exact time
 458             files_last_update = self._file_links.last_update
 459             if ignore_time:
 460                 logging.info("Dropping time from comparison stamp as old-style download dir")
 461                 files_last_update = strip_time(files_last_update)
 462
 463
 464             if files_last_update > self.last_time:
 465                 logging.info(
 466                     "Found new/updated files {}".format(self._file_links.last_update))
 467                 self._needs_download = True
 468                 self._parsed = True
 469                 return
 470         except TypeError:
 471             logging.warning("No files found for {}.".format(self.thing_id))
 472
 473         # Got here, so nope, no new files.
 474         self._needs_download = False
 475         self._parsed = True
 476
 477     def _handle_old_directory(self, base_dir):
 478         """ Deal with any old directories from previous versions of the code.
 479         """
 480         old_dir = os.path.join(base_dir, slugify(self.name))
 481         if os.path.exists(old_dir):
 482             logging.warning("Found old style download_dir. Moving.")
 483             rename_unique(old_dir, self.download_dir)
 484
 485     def _handle_outdated_directory(self, base_dir):
 486         """ Move the current download directory sideways if the thing has changed.
 487         """
 488         if not os.path.exists(self.download_dir):
 489             # No old directory to move.
 490             return None
 491         timestamp_file = os.path.join(self.download_dir, TIMESTAMP_FILE)
 492         if not os.path.exists(timestamp_file):
 493             # Old form of download directory
 494             target_dir_name = "{} - old".format(self.download_dir)
 495         else:
 496             target_dir_name = "{} - {}".format(self.download_dir, self.last_time.strftime(SAFE_DATETIME_FORMAT))
 497         return rename_unique(self.download_dir, target_dir_name)
 498
 499     def _find_last_download(self, base_dir):
 500         """ Look for the most recent previous download (if any) of the thing.
 501         """
 502         logging.info("Looking for old things")
 503
 504         # First the DL directory itself.
 505         timestamp_file = os.path.join(self.download_dir, TIMESTAMP_FILE)
 506
 507         latest = None
 508         latest_time = None
 509
 510         try:
 511             logging.debug("Checking for existing download in normal place.")
 512             with open(timestamp_file) as ts_fh:
 513                 timestamp_text = ts_fh.read().strip()
 514             latest_time = datetime.datetime.strptime(timestamp_text, DEFAULT_DATETIME_FORMAT)
 515             latest = self.download_dir
 516         except FileNotFoundError:
 517             # No existing download directory. huh.
 518             pass
 519         except TypeError:
 520             logging.warning("Invalid timestamp file found in {}".format(self.download_dir))
 521
 522         # TODO:  Maybe look for old download directories.
 523
 524
 525         # Now look for 7z files
 526         candidates = glob.glob(os.path.join(base_dir, "{}*.7z".format(self.thing_id)))
 527         # +3 to allow for ' - '
 528         leading_length =len(self.slug)+3
 529         for path in candidates:
 530             candidate = os.path.basename(path)
 531             try:
 532                 logging.debug("Examining '{}' - '{}'".format(candidate, candidate[leading_length:-3]))
 533                 candidate_time = datetime.datetime.strptime(candidate[leading_length:-3], SAFE_DATETIME_FORMAT)
 534             except ValueError:
 535                 logging.warning("There was an error finding the date in {}. Ignoring.".format(candidate))
 536                 continue
 537             try:
 538                 if candidate_time > latest_time:
 539                     latest_time = candidate_time
 540                     latest = candidate
 541             except TypeError:
 542                 latest_time = candidate_time
 543                 latest = candidate
 544         logging.info("Found last old thing: {} / {}".format(latest,latest_time))
 545         return (latest, latest_time)
 546
 547
 548
 549     def download(self, base_dir, compress):
 550         """ Download all files for a given thing.
 551             Returns True iff the thing is now downloaded (not iff it downloads the thing!)
 552         """
 553         if not self._parsed:
 554             self._parse(base_dir)
 555
 556         if not self._parsed:
 557             logging.error(
 558                 "Unable to parse {} - aborting download".format(self.thing_id))
 559             return State.FAILED
 560
 561         if not self._needs_download:
 562             logging.info("{} - {} already downloaded - skipping.".format(self.thing_id, self.name))
 563             return State.ALREADY_DOWNLOADED
 564
 565         if not self._file_links:
 566             logging.error("{} - {} appears to have no files. Thingiverse acting up again?".format(self.thing_id, self.name))
 567             return State.FAILED
 568
 569         # Have we already downloaded some things?
 570         renamed_dir = self._handle_outdated_directory(base_dir)
 571
 572         # Get the list of files to download
 573
 574         new_file_links = []
 575         old_file_links = []
 576         self.time_stamp = None
 577
 578         if not self.last_time:
 579             # If we don't have anything to copy from, then it is all new.
 580             logging.debug("No last time, downloading all files")
 581             new_file_links = self._file_links
 582             self.time_stamp = new_file_links[0].last_update
 583
 584             for file_link in new_file_links:
 585                 self.time_stamp = max(self.time_stamp, file_link.last_update)
 586             logging.debug("New timestamp will be {}".format(self.time_stamp))
 587         else:
 588             self.time_stamp = self.last_time
 589             for file_link in self._file_links:
 590                 if file_link.last_update > self.last_time:
 591                     new_file_links.append(file_link)
 592                     self.time_stamp = max(self.time_stamp, file_link.last_update)
 593                 else:
 594                     old_file_links.append(file_link)
 595
 596         logging.debug("new timestamp {}".format(self.time_stamp))
 597
 598         # OK. Time to get to work.
 599         logging.debug("Generating download_dir")
 600         os.mkdir(self.download_dir)
 601         filelist_file = os.path.join(self.download_dir, "filelist.txt")
 602         url_suffix = "/?" + ACCESS_QP.format(API_KEY)
 603         with open(filelist_file, 'w', encoding="utf-8") as fl_handle:
 604             for fl in self._file_links:
 605               fl_handle.write("{},{},{}\n".format(fl.link, fl.name, fl.last_update))
 606
 607
 608         # First grab the cached files (if any)
 609         logging.info("Copying {} unchanged files.".format(len(old_file_links)))
 610         for file_link in old_file_links:
 611             old_file = os.path.join(renamed_dir, file_link.name)
 612             new_file = truncate_name(os.path.join(self.download_dir, file_link.name))
 613             try:
 614                 logging.debug("Copying {} to {}".format(old_file, new_file))
 615                 copyfile(old_file, new_file)
 616             except FileNotFoundError:
 617                 logging.warning(
 618                     "Unable to find {} in old archive, redownloading".format(file_link["title"]))
 619                 new_file_links.append(file_link)
 620
 621         # Now download the new ones
 622         logging.info("Downloading {} new files of {}".format(
 623             len(new_file_links), len(self._file_links)))
 624         try:
 625             for file_link in new_file_links:
 626                 file_name = truncate_name(os.path.join(self.download_dir, file_link.name))
 627                 logging.debug("Downloading {} from {} to {}".format(
 628                     file_link.name, file_link.link, file_name))
 629                 data_req = SESSION.get(file_link.link + url_suffix)
 630                 if data_req.status_code != 200:
 631                     logging.error("Unexpected status code {} for {}: {}".format(data_req.status_code, sanitise_url(file_link.link), data_req.text))
 632                     fail_dir(self.download_dir)
 633                     return State.FAILED
 634
 635
 636                 with open(file_name, 'wb') as handle:
 637                     handle.write(data_req.content)
 638         except Exception as exception:
 639             logging.error("Failed to download {} - {}".format(file_link.name, exception))
 640             fail_dir(self.download_dir)
 641             return State.FAILED
 642
 643
 644         # People like images.
 645         image_dir = os.path.join(self.download_dir, 'images')
 646         logging.info("Downloading {} images.".format(len(self._image_links)))
 647         try:
 648             os.mkdir(image_dir)
 649             for imagelink in self._image_links:
 650                 filename = os.path.join(image_dir, imagelink.name)
 651                 image_req = SESSION.get(imagelink.link)
 652                 if image_req.status_code != 200:
 653                     logging.error("Unexpected status code {} for {}: {}".format(image_req.status_code, sanitise_url(file_link.link), image_req.text))
 654                     fail_dir(self.download_dir)
 655                     return State.FAILED
 656                 with open(truncate_name(filename), 'wb') as handle:
 657                     handle.write(image_req.content)
 658         except Exception as exception:
 659             logging.error("Failed to download {} - {}".format(imagelink.name, exception))
 660             fail_dir(self.download_dir)
 661             return State.FAILED
 662
 663         # Best get some licenses
 664         logging.info("writing license file")
 665         try:
 666             if self._license:
 667                 with open(truncate_name(os.path.join(self.download_dir, 'license.txt')), 'w', encoding="utf-8") as license_handle:
 668                     license_handle.write("{}\n".format(self._license))
 669         except IOError as exception:
 670             logging.warning("Failed to write license! {}".format(exception))
 671
 672         logging.info("writing readme")
 673         try:
 674             if self._details:
 675                 with open(truncate_name(os.path.join(self.download_dir, 'readme.txt')), 'w', encoding="utf-8") as readme_handle:
 676                     readme_handle.write("{}\n".format(self._details))
 677         except IOError as exception:
 678             logging.warning("Failed to write readme! {}".format(exception))
 679
 680         try:
 681             # Now write the timestamp
 682             with open(os.path.join(self.download_dir,TIMESTAMP_FILE), 'w', encoding="utf-8") as timestamp_handle:
 683                 timestamp_handle.write(self.time_stamp.__str__())
 684         except Exception as exception:
 685             logging.error("Failed to write timestamp file - {}".format(exception))
 686             fail_dir(self.download_dir)
 687             return State.FAILED
 688         self._needs_download = False
 689         logging.debug("Download of {} finished".format(self.name))
 690         if not compress:
 691             return State.OK
 692
 693
 694         thing_dir = "{} - {} - {}".format(self.thing_id,
 695             slugify(self.name),
 696             self.time_stamp.strftime(SAFE_DATETIME_FORMAT))
 697         file_name = os.path.join(base_dir,
 698             "{}.7z".format(thing_dir))
 699         logging.debug("Compressing {} to {}".format(
 700             self.name,
 701             file_name))
 702         with py7zr.SevenZipFile(file_name, 'w', filters=SEVENZIP_FILTERS) as archive:
 703             archive.writeall(self.download_dir, thing_dir)
 704         logging.debug("Compression of {} finished.".format(self.name))
 705         shutil.rmtree(self.download_dir)
 706         logging.debug("Removed temporary download dir of {}.".format(self.name))
 707         return State.OK
 708
 709
 710
 711
 712 def do_batch(batch_file, download_dir, quick, compress):
 713     """ Read a file in line by line, parsing each as a set of calls to this script."""
 714     with open(batch_file) as handle:
 715         for line in handle:
 716             line = line.strip()
 717             if not line:
 718                 # Skip empty lines
 719                 continue
 720             logging.info("Handling instruction {}".format(line))
 721             command_arr = line.split()
 722             if command_arr[0] == "thing":
 723                 logging.debug(
 724                     "Handling batch thing instruction: {}".format(line))
 725                 Thing(command_arr[1]).download(download_dir, compress)
 726                 continue
 727             if command_arr[0] == "collection":
 728                 logging.debug(
 729                     "Handling batch collection instruction: {}".format(line))
 730                 Collection(command_arr[1], command_arr[2],
 731                            download_dir, quick, compress).download()
 732                 continue
 733             if command_arr[0] == "user":
 734                 logging.debug(
 735                     "Handling batch collection instruction: {}".format(line))
 736                 Designs(command_arr[1], download_dir, quick, compress).download()
 737                 continue
 738             logging.warning("Unable to parse current instruction. Skipping.")
 739
 740
 741 def main():
 742     """ Entry point for script being run as a command. """
 743     parser = argparse.ArgumentParser()
 744     parser.add_argument("-l", "--log-level", choices=[
 745                         'debug', 'info', 'warning'], default='info', help="level of logging desired")
 746     parser.add_argument("-d", "--directory",
 747                         help="Target directory to download into")
 748     parser.add_argument("-f", "--log-file",
 749                         help="Place to log debug information to")
 750     parser.add_argument("-q", "--quick", action="store_true",
 751                         help="Assume date ordering on posts")
 752     parser.add_argument("-c", "--compress", action="store_true",
 753                         help="Compress files")
 754     parser.add_argument("-a", "--api-key",
 755                         help="API key for thingiverse")
 756
 757
 758     subparsers = parser.add_subparsers(
 759         help="Type of thing to download", dest="subcommand")
 760     collection_parser = subparsers.add_parser(
 761         'collection', help="Download one or more entire collection(s)")
 762     collection_parser.add_argument(
 763         "owner", help="The owner of the collection(s) to get")
 764     collection_parser.add_argument(
 765         "collections", nargs="+",  help="Space seperated list of the name(s) of collection to get")
 766     thing_parser = subparsers.add_parser(
 767         'thing', help="Download a single thing.")
 768     thing_parser.add_argument(
 769         "things", nargs="*", help="Space seperated list of thing ID(s) to download")
 770     user_parser = subparsers.add_parser(
 771         "user",  help="Download all things by one or more users")
 772     user_parser.add_argument(
 773         "users", nargs="+", help="A space seperated list of the user(s) to get the designs of")
 774     batch_parser = subparsers.add_parser(
 775         "batch", help="Perform multiple actions written in a text file")
 776     batch_parser.add_argument(
 777         "batch_file", help="The name of the file to read.")
 778     subparsers.add_parser("version", help="Show the current version")
 779
 780     args = parser.parse_args()
 781     if not args.subcommand:
 782         parser.print_help()
 783         sys.exit(1)
 784     if not args.directory:
 785         args.directory = os.getcwd()
 786
 787     logger = logging.getLogger()
 788     formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
 789     logger.setLevel(logging.DEBUG)
 790     console_handler = logging.StreamHandler()
 791     console_handler.setLevel(args.log_level.upper())
 792
 793     global API_KEY
 794     if args.api_key:
 795         API_KEY=args.api_key
 796     else:
 797         try:
 798             with open("api.key") as fh:
 799                 API_KEY=fh.read().strip()
 800         except Exception as e:
 801             logging.error("Either specify the api-key on the command line or in a file called 'api.key'")
 802             logging.error("Exception: {}".format(e))
 803             return
 804
 805     logger.addHandler(console_handler)
 806     if args.log_file:
 807         file_handler = logging.FileHandler(args.log_file)
 808         file_handler.setLevel(logging.DEBUG)
 809         file_handler.setFormatter(formatter)
 810         logger.addHandler(file_handler)
 811
 812
 813     # Start downloader
 814     thing_queue = multiprocessing.JoinableQueue()
 815     logging.debug("starting {} downloader(s)".format(DOWNLOADER_COUNT))
 816     downloaders = [Downloader(thing_queue, args.directory, args.compress) for _ in range(DOWNLOADER_COUNT)]
 817     for downloader in downloaders:
 818         downloader.start()
 819
 820
 821     if args.subcommand.startswith("collection"):
 822         for collection in args.collections:
 823             Collection(args.owner, collection, args.directory, args.quick, args.compress).download()
 824     if args.subcommand == "thing":
 825         for thing in args.things:
 826             thing_queue.put(thing)
 827     if args.subcommand == "user":
 828         for user in args.users:
 829             Designs(user, args.directory, args.quick, args.compress).download()
 830     if args.subcommand == "version":
 831         print("thingy_grabber.py version {}".format(VERSION))
 832     if args.subcommand == "batch":
 833         do_batch(args.batch_file, args.directory, args.quick, args.compress)
 834
 835     # Stop the downloader processes
 836     for downloader in downloaders:
 837         thing_queue.put(None)
 838
 839
 840 if __name__ == "__main__":
 841     multiprocessing.freeze_support()
 842     main()