thingy_grabber.py

   1 #!/usr/bin/env python3
   2 """
   3 Thingiverse bulk downloader
   4 """
   5
   6 import re
   7 import sys
   8 import os
   9 import argparse
  10 import unicodedata
  11 import requests
  12 import logging
  13 import multiprocessing
  14 import enum
  15 import datetime
  16 from shutil import copyfile
  17 from dataclasses import dataclass
  18 import atexit
  19 import py7zr
  20 import glob
  21 import shutil
  22
  23 SEVENZIP_FILTERS = [{'id': py7zr.FILTER_LZMA2}]
  24
  25 # I don't think this is exported by datetime
  26 DEFAULT_DATETIME_FORMAT = '%Y-%m-%d %H:%M:%S'
  27 # Windows cannot handle : in filenames
  28 SAFE_DATETIME_FORMAT = '%Y-%m-%d %H.%M.%S'
  29
  30 API_BASE="https://api.thingiverse.com"
  31 ACCESS_QP="access_token={}"
  32 PAGE_QP="page={}"
  33 API_USER_DESIGNS = API_BASE + "/users/{}/things/"
  34 API_USER_COLLECTIONS = API_BASE + "/users/{}/collections/all?" + ACCESS_QP
  35
  36 # Currently useless as it gives the same info as the matching element in API_USER_COLLECTIONS
  37 API_COLLECTION = API_BASE + "/collections/{}/?" + ACCESS_QP
  38 API_COLLECTION_THINGS = API_BASE + "/collections/{}/things/?" + ACCESS_QP
  39
  40 API_THING_DETAILS = API_BASE + "/things/{}/?" + ACCESS_QP
  41 API_THING_FILES = API_BASE + "/things/{}/files/?" + ACCESS_QP
  42 API_THING_IMAGES = API_BASE + "/things/{}/images/?" + ACCESS_QP
  43
  44 API_KEY = None
  45
  46 DOWNLOADER_COUNT = 1
  47 RETRY_COUNT = 3
  48
  49 MAX_PATH_LENGTH = 250
  50
  51 VERSION = "0.10.1"
  52
  53 TIMESTAMP_FILE = "timestamp.txt"
  54
  55 SESSION = requests.Session()
  56
  57 @dataclass
  58 class ThingLink:
  59     thing_id: str
  60     name: str
  61     api_link: str
  62
  63 @dataclass
  64 class FileLink:
  65     name: str
  66     last_update: datetime.datetime
  67     link: str
  68
  69 @dataclass
  70 class ImageLink:
  71     name: str
  72     link: str
  73
  74 class FileLinks:
  75     def __init__(self, initial_links=[]):
  76         self.links = []
  77         self.last_update = None
  78         for link in initial_links:
  79             self.append(link)
  80
  81     def __iter__(self):
  82         return iter(self.links)
  83
  84     def __getitem__(self, item):
  85         return self.links[item]
  86
  87     def __len__(self):
  88         return len(self.links)
  89
  90     def append(self, link):
  91         try:
  92             self.last_update = max(self.last_update, link.last_update)
  93         except TypeError:
  94             self.last_update = link.last_update
  95         self.links.append(link)
  96
  97
  98 class State(enum.Enum):
  99     OK = enum.auto()
 100     FAILED = enum.auto()
 101     ALREADY_DOWNLOADED = enum.auto()
 102
 103 def sanitise_url(url):
 104     """ remove api keys from an url
 105     """
 106     return re.sub(r'access_token=\w*',
 107                   'access_token=***',
 108                   url)
 109
 110 def strip_time(date_obj):
 111     """ Takes a datetime object and returns another with the time set to 00:00
 112     """
 113     return datetime.datetime.combine(date_obj.date(), datetime.time())
 114
 115 def rename_unique(dir_name, target_dir_name):
 116     """ Move a directory sideways to a new name, ensuring it is unique.
 117     """
 118     target_dir = target_dir_name
 119     inc = 0
 120     while os.path.exists(target_dir):
 121       target_dir = "{}_{}".format(target_dir_name, inc)
 122       inc += 1
 123     os.rename(dir_name, target_dir)
 124     return target_dir
 125
 126
 127 def fail_dir(dir_name):
 128     """ When a download has failed, move it sideways.
 129     """
 130     return rename_unique(dir_name,"{}_failed".format(dir_name))
 131
 132
 133 def truncate_name(file_name):
 134     """ Ensure the filename is not too long for, well windows basically.
 135     """
 136     path = os.path.abspath(file_name)
 137     if len(path) <= MAX_PATH_LENGTH:
 138         return path
 139     to_cut = len(path) - (MAX_PATH_LENGTH + 3)
 140     base, extension = os.path.splitext(path)
 141     inc = 0
 142     new_path = "{}_{}{}".format(base, inc, extension)
 143     while os.path.exists(new_path):
 144         new_path = "{}_{}{}".format(base, inc, extension)
 145         inc += 1
 146     return new_path
 147
 148
 149 def strip_ws(value):
 150     """ Remove whitespace from a string """
 151     return str(NO_WHITESPACE_REGEX.sub('-', value))
 152
 153
 154 def slugify(value):
 155     """
 156     Normalise string, removes invalid for filename charactersr
 157     and converts string to lowercase.
 158     """
 159     logging.debug("Sluggyfying {}".format(value))
 160     value = unicodedata.normalize('NFKC', value).lower().strip()
 161     value = re.sub(r'[\\/<>:\?\*\|"]', '', value)
 162     value = re.sub(r'\.*$', '', value)
 163     return value
 164
 165
 166 class Downloader(multiprocessing.Process):
 167     """
 168     Class to handle downloading the things we have found to get.
 169     """
 170
 171     def __init__(self, thing_queue, download_directory, compress):
 172         multiprocessing.Process.__init__(self)
 173         # TODO: add parameters
 174         self.thing_queue = thing_queue
 175         self.download_directory = download_directory
 176         self.compress = compress
 177
 178     def run(self):
 179         """ actual download loop.
 180         """
 181         while True:
 182             thing_id = self.thing_queue.get()
 183             if thing_id is None:
 184                 logging.info("Shutting download queue")
 185                 self.thing_queue.task_done()
 186                 break
 187             logging.info("Handling id {}".format(thing_id))
 188             Thing(thing_id).download(self.download_directory, self.compress)
 189             self.thing_queue.task_done()
 190         return
 191
 192
 193
 194
 195
 196 class Grouping:
 197     """ Holds details of a group of things for download
 198         This is effectively (although not actually) an abstract class
 199         - use Collection or Designs instead.
 200     """
 201
 202     def __init__(self, quick, compress):
 203         self.things = []
 204         self.total = 0
 205         self.req_id = None
 206         self.last_page = 0
 207         self.per_page = None
 208         # Should we stop downloading when we hit a known datestamp?
 209         self.quick = quick
 210         self.compress = compress
 211         # These should be set by child classes.
 212         self.url = None
 213         self.download_dir = None
 214
 215     def get(self):
 216         """ retrieve the things of the grouping. """
 217         if self.things:
 218             # We've already done it.
 219             return self.things
 220
 221         # Check for initialisation:
 222         if not self.url:
 223             logging.error("No URL set - object not initialised properly?")
 224             raise ValueError("No URL set - object not initialised properly?")
 225
 226         # Get the internal details of the grouping.
 227         logging.debug("Querying {}".format(sanitise_url(self.url)))
 228         page = 0
 229         # TODO:: Must be a way to refactor this cleanly
 230         if self.paginated:
 231         # Slightly nasty, but afaik python lacks a clean way to do partial string formatting.
 232             page_url = self.url + "?" + ACCESS_QP + "&" + PAGE_QP
 233             while True:
 234                 page += 1
 235                 current_url = page_url.format(API_KEY, page)
 236                 logging.info("requesting:{}".format(sanitise_url(current_url)))
 237                 current_req = SESSION.get(current_url)
 238                 if current_req.status_code != 200:
 239                     logging.error("Got unexpected code {} from url {}: {}".format(current_req.status_code, sanitise_url(current_url), current_req.text))
 240                     break
 241                 current_json = current_req.json()
 242                 if not current_json:
 243                     # No more!
 244                     break
 245                 for thing in current_json:
 246                     self.things.append(ThingLink(thing['id'], thing['name'], thing['url']))
 247         else:
 248             # self.url should already have been formatted as we don't need pagination
 249             logging.info("requesting:{}".format(sanitise_url(self.url)))
 250             current_req = SESSION.get(self.url)
 251             if current_req.status_code != 200:
 252                 logging.error("Got unexpected code {} from url {}: {}".format(current_req.status_code, sanitise_url(current_url), current_req.text))
 253             else:
 254                 current_json = current_req.json()
 255                 for thing in current_json:
 256                     logging.info(thing)
 257                     self.things.append(ThingLink(thing['id'], thing['name'], thing['url']))
 258         logging.info("Found {} things.".format(len(self.things)))
 259         return self.things
 260
 261     def download(self):
 262         """ Downloads all the files in a collection """
 263         if not self.things:
 264             self.get()
 265
 266         if not self.download_dir:
 267             raise ValueError(
 268                 "No download_dir set - invalidly initialised object?")
 269
 270         base_dir = os.getcwd()
 271         try:
 272             os.mkdir(self.download_dir)
 273         except FileExistsError:
 274             logging.info("Target directory {} already exists. Assuming a resume."
 275                          .format(self.download_dir))
 276         logging.info("Downloading {} thing(s).".format(self.total))
 277         for idx, thing in enumerate(self.things):
 278             logging.info("Downloading thing {} - {}".format(idx, thing))
 279             RC = Thing(thing).download(self.download_dir, self.compress)
 280             if self.quick and RC==State.ALREADY_DOWNLOADED:
 281                 logging.info("Caught up, stopping.")
 282                 return
 283
 284 class Collection(Grouping):
 285     """ Holds details of a collection. """
 286
 287     def __init__(self, user, name, directory, quick, compress):
 288         Grouping.__init__(self, quick, compress)
 289         self.user = user
 290         self.name = name
 291         self.paginated = False
 292         # need to figure out the the ID for the collection
 293         collection_url = API_USER_COLLECTIONS.format(user, API_KEY)
 294         try:
 295             current_req = SESSION.get(collection_url)
 296         except requests.exceptions.ConnectionError as error:
 297             logging.error("Unable to connect for thing {}: {}".format(
 298                 self.thing_id, error))
 299             return
 300         if current_req.status_code != 200:
 301             logging.error("Got unexpected code {} from url {}: {}".format(current_req.status_code, sanitise_url(collection_url), current_req.text))
 302             return
 303         collection_list = current_req.json()
 304         try:
 305             # case insensitive to retain parity with previous behaviour
 306             collection = [x for x in collection_list if x['name'].casefold() == name.casefold()][0]
 307         except IndexError:
 308             logging.error("Unable to find collection {} for user {}".format(name, user))
 309             return
 310         self.collection_id = collection['id']
 311         self.url = API_COLLECTION_THINGS.format(self.collection_id, API_KEY)
 312
 313         self.download_dir = os.path.join(directory,
 314                                          "{}-{}".format(slugify(self.user), slugify(self.name)))
 315
 316
 317 class Designs(Grouping):
 318     """ Holds details of all of a users' designs. """
 319
 320     def __init__(self, user, directory, quick, compress):
 321         Grouping.__init__(self, quick, compress)
 322         self.user = user
 323         self.url = API_USER_DESIGNS.format(user)
 324         self.paginated = True
 325         self.download_dir = os.path.join(
 326             directory, "{} designs".format(slugify(self.user)))
 327
 328
 329 class Thing:
 330     """ An individual design on thingiverse. """
 331
 332     def __init__(self, thing_link):
 333         self.thing_id = thing_link.thing_id
 334         self.name = thing_link.name
 335         self.api_link = thing_link.api_link
 336         self.last_time = None
 337         self._parsed = False
 338         self._needs_download = True
 339         self.text = None
 340         self.download_dir = None
 341         self.time_stamp = None
 342         self._file_links = FileLinks()
 343         self._image_links = []
 344
 345     def _parse(self, base_dir):
 346         """ Work out what, if anything needs to be done. """
 347         if self._parsed:
 348             return
 349
 350
 351         # First get the broad details
 352         url = API_THING_DETAILS.format(self.thing_id, API_KEY)
 353         try:
 354             current_req = SESSION.get(url)
 355         except requests.exceptions.ConnectionError as error:
 356             logging.error("Unable to connect for thing {}: {}".format(
 357                 self.thing_id, error))
 358             return
 359         # Check for DMCA
 360         if current_req.status_code == 403:
 361             logging.error("Access to thing {} is forbidden".format(self.thing_id))
 362             return
 363         if current_req.status_code != 200:
 364             logging.error("Got unexpected code {} from url {}: {}".format(current_req.status_code, sanitise_url(url), current_req.text))
 365             return
 366
 367         thing_json = current_req.json()
 368         try:
 369             self._license = thing_json['license']
 370         except KeyError:
 371             logging.warning("No license found for thing {}?".format(self.thing_id))
 372
 373         # TODO: Get non-html version of this?
 374         try:
 375             self._details = thing_json['details']
 376         except KeyError:
 377             logging.warning("No description found for thing {}?".format(self.thing_id))
 378
 379
 380
 381         # Now get the file details
 382         file_url = API_THING_FILES.format(self.thing_id, API_KEY)
 383
 384         try:
 385             current_req = SESSION.get(file_url)
 386         except requests.exceptions.ConnectionError as error:
 387             logging.error("Unable to connect for thing {}: {}".format(
 388                 self.thing_id, error))
 389             return
 390
 391         if current_req.status_code != 200:
 392             logging.error("Unexpected status code {} for {}: {}".format(current_req.status_code, sanitise_url(file_url), current_req.text))
 393             return
 394
 395         link_list = current_req.json()
 396
 397         if not link_list:
 398             logging.error("No files found for thing {} - probably thingiverse being broken, try again later".format(self.thing_id))
 399
 400         for link in link_list:
 401             logging.debug("Parsing link: {}".format(sanitise_url(link['url'])))
 402             try:
 403                 datestamp = datetime.datetime.strptime(link['date'], DEFAULT_DATETIME_FORMAT)
 404                 self._file_links.append(FileLink(link['name'], datestamp, link['url']))
 405             except ValueError:
 406                 logging.error(link['date'])
 407
 408         # Finally get the image links
 409         image_url = API_THING_IMAGES.format(self.thing_id, API_KEY)
 410
 411         try:
 412             current_req = SESSION.get(image_url)
 413         except requests.exceptions.ConnectionError as error:
 414             logging.error("Unable to connect for thing {}: {}".format(
 415                 self.thing_id, error))
 416             return
 417
 418         if current_req.status_code != 200:
 419             logging.error("Unexpected status code {} for {}: {}".format(current_req.status_code, sanitise_url(image_url), current_req.text))
 420             return
 421
 422         image_list = current_req.json()
 423
 424         if not image_list:
 425             logging.warning("No images found for thing {} - probably thingiverse being iffy as this seems unlikely".format(self.thing_id))
 426
 427         for image in image_list:
 428             logging.debug("parsing image: {}".format(image))
 429             try:
 430                 name = slugify(image['name'])
 431                 # TODO: fallback to other types
 432                 url = [x for x in image['sizes'] if x['type']=='display' and x['size']=='large'][0]['url']
 433             except KeyError:
 434                 logging.warning("Missing image for {}".format(name))
 435             self._image_links.append(ImageLink(name, url))
 436
 437         self.slug = "{} - {}".format(self.thing_id, slugify(self.name))
 438         self.download_dir = os.path.join(base_dir, self.slug)
 439
 440         self._handle_old_directory(base_dir)
 441
 442         logging.debug("Parsing {} ({})".format(self.thing_id, self.name))
 443         latest, self.last_time = self._find_last_download(base_dir)
 444
 445         if not latest:
 446                 # Not yet downloaded
 447                 self._parsed = True
 448                 return
 449
 450
 451         logging.info("last downloaded version: {}".format(self.last_time))
 452
 453         # OK, so we have a timestamp, lets see if there is anything new to get
 454         # First off, are we comparing an old download that threw away the timestamp?
 455         ignore_time = self.last_time == strip_time(self.last_time)
 456         try:
 457             # TODO: Allow for comparison at the exact time
 458             files_last_update = self._file_links.last_update
 459             if ignore_time:
 460                 logging.info("Dropping time from comparison stamp as old-style download dir")
 461                 files_last_update = strip_time(files_last_update)
 462
 463
 464             if files_last_update > self.last_time:
 465                 logging.info(
 466                     "Found new/updated files {}".format(self._file_links.last_update))
 467                 self._needs_download = True
 468                 self._parsed = True
 469                 return
 470         except TypeError:
 471             logging.warning("No files found for {}.".format(self.thing_id))
 472
 473         # Got here, so nope, no new files.
 474         self._needs_download = False
 475         self._parsed = True
 476
 477     def _handle_old_directory(self, base_dir):
 478         """ Deal with any old directories from previous versions of the code.
 479         """
 480         old_dir = os.path.join(base_dir, slugify(self.name))
 481         if os.path.exists(old_dir):
 482             logging.warning("Found old style download_dir. Moving.")
 483             rename_unique(old_dir, self.download_dir)
 484
 485     def _handle_outdated_directory(self, base_dir):
 486         """ Move the current download directory sideways if the thing has changed.
 487         """
 488         if not os.path.exists(self.download_dir):
 489             # No old directory to move.
 490             return None
 491         timestamp_file = os.path.join(self.download_dir, TIMESTAMP_FILE)
 492         if not os.path.exists(timestamp_file):
 493             # Old form of download directory
 494             target_dir_name = "{} - old".format(self.download_dir)
 495         else:
 496             target_dir_name = "{} - {}".format(self.download_dir, self.last_time.strftime(SAFE_DATETIME_FORMAT))
 497         return rename_unique(self.download_dir, target_dir_name)
 498
 499     def _find_last_download(self, base_dir):
 500         """ Look for the most recent previous download (if any) of the thing.
 501         """
 502         logging.info("Looking for old things")
 503
 504         # First the DL directory itself.
 505         timestamp_file = os.path.join(self.download_dir, TIMESTAMP_FILE)
 506
 507         latest = None
 508         latest_time = None
 509
 510         try:
 511             logging.debug("Checking for existing download in normal place.")
 512             with open(timestamp_file) as ts_fh:
 513                 timestamp_text = ts_fh.read().strip()
 514             latest_time = datetime.datetime.strptime(timestamp_text, DEFAULT_DATETIME_FORMAT)
 515             latest = self.download_dir
 516         except FileNotFoundError:
 517             # No existing download directory. huh.
 518             pass
 519         except TypeError:
 520             logging.warning("Invalid timestamp file found in {}".format(self.download_dir))
 521
 522         # TODO:  Maybe look for old download directories.
 523
 524
 525         # Now look for 7z files
 526         candidates = glob.glob(os.path.join(base_dir, "{}*.7z".format(self.thing_id)))
 527         # +3 to allow for ' - '
 528         leading_length =len(self.slug)+3
 529         for path in candidates:
 530             candidate = os.path.basename(path)
 531             try:
 532                 logging.debug("Examining '{}' - '{}'".format(candidate, candidate[leading_length:-3]))
 533                 candidate_time = datetime.datetime.strptime(candidate[leading_length:-3], SAFE_DATETIME_FORMAT)
 534             except ValueError:
 535                 logging.warning("There was an error finding the date in {}. Ignoring.".format(candidate))
 536                 continue
 537             try:
 538                 if candidate_time > latest_time:
 539                     latest_time = candidate_time
 540                     latest = candidate
 541             except TypeError:
 542                 latest_time = candidate_time
 543                 latest = candidate
 544         logging.info("Found last old thing: {} / {}".format(latest,latest_time))
 545         return (latest, latest_time)
 546
 547
 548
 549     def download(self, base_dir, compress):
 550         """ Download all files for a given thing.
 551             Returns True iff the thing is now downloaded (not iff it downloads the thing!)
 552         """
 553         if not self._parsed:
 554             self._parse(base_dir)
 555
 556         if not self._parsed:
 557             logging.error(
 558                 "Unable to parse {} - aborting download".format(self.thing_id))
 559             return State.FAILED
 560
 561         if not self._needs_download:
 562             logging.info("{} - {} already downloaded - skipping.".format(self.thing_id, self.name))
 563             return State.ALREADY_DOWNLOADED
 564
 565         if not self._file_links:
 566             logging.error("{} - {} appears to have no files. Thingiverse acting up again?".format(self.thing_id, self.name))
 567             return State.FAILED
 568
 569         # Have we already downloaded some things?
 570         renamed_dir = self._handle_outdated_directory(base_dir)
 571
 572         # Get the list of files to download
 573
 574         new_file_links = []
 575         old_file_links = []
 576         self.time_stamp = None
 577
 578         if not self.last_time:
 579             # If we don't have anything to copy from, then it is all new.
 580             logging.debug("No last time, downloading all files")
 581             new_file_links = self._file_links
 582             self.time_stamp = new_file_links[0].last_update
 583
 584             for file_link in new_file_links:
 585                 self.time_stamp = max(self.time_stamp, file_link.last_update)
 586             logging.debug("New timestamp will be {}".format(self.time_stamp))
 587         else:
 588             self.time_stamp = self.last_time
 589             for file_link in self._file_links:
 590                 if file_link.last_update > self.last_time:
 591                     new_file_links.append(file_link)
 592                     self.time_stamp = max(self.time_stamp, file_link.last_update)
 593                 else:
 594                     old_file_links.append(file_link)
 595
 596         logging.debug("new timestamp {}".format(self.time_stamp))
 597
 598         # OK. Time to get to work.
 599         logging.debug("Generating download_dir")
 600         os.mkdir(self.download_dir)
 601         filelist_file = os.path.join(self.download_dir, "filelist.txt")
 602         url_suffix = "/?" + ACCESS_QP.format(API_KEY)
 603         with open(filelist_file, 'w', encoding="utf-8") as fl_handle:
 604             for fl in self._file_links:
 605               fl_handle.write("{},{},{}\n".format(fl.link, fl.name, fl.last_update))
 606
 607
 608         # First grab the cached files (if any)
 609         logging.info("Copying {} unchanged files.".format(len(old_file_links)))
 610         if renamed_dir:
 611             for file_link in old_file_links:
 612                 try:
 613                     old_file = os.path.join(renamed_dir, file_link.name)
 614                     new_file = truncate_name(os.path.join(self.download_dir, file_link.name))
 615                     logging.debug("Copying {} to {}".format(old_file, new_file))
 616                     copyfile(old_file, new_file)
 617                 except FileNotFoundError:
 618                     logging.warning(
 619                         "Unable to find {} in old archive, redownloading".format(file_link.name))
 620                     new_file_links.append(file_link)
 621                 except TypeError:
 622                     # Not altogether sure how this could occur, possibly with some combination of the old file types
 623                     logging.warning(
 624                         "Typeerror looking for {} in {}".format(file_link.name, renamed_dir))
 625                     new_file_links.append(file_link)
 626
 627
 628         # Now download the new ones
 629         logging.info("Downloading {} new files of {}".format(
 630             len(new_file_links), len(self._file_links)))
 631         try:
 632             for file_link in new_file_links:
 633                 file_name = truncate_name(os.path.join(self.download_dir, file_link.name))
 634                 logging.debug("Downloading {} from {} to {}".format(
 635                     file_link.name, file_link.link, file_name))
 636                 data_req = SESSION.get(file_link.link + url_suffix)
 637                 if data_req.status_code != 200:
 638                     logging.error("Unexpected status code {} for {}: {}".format(data_req.status_code, sanitise_url(file_link.link), data_req.text))
 639                     fail_dir(self.download_dir)
 640                     return State.FAILED
 641
 642
 643                 with open(file_name, 'wb') as handle:
 644                     handle.write(data_req.content)
 645         except Exception as exception:
 646             logging.error("Failed to download {} - {}".format(file_link.name, exception))
 647             fail_dir(self.download_dir)
 648             return State.FAILED
 649
 650
 651         # People like images.
 652         image_dir = os.path.join(self.download_dir, 'images')
 653         logging.info("Downloading {} images.".format(len(self._image_links)))
 654         try:
 655             os.mkdir(image_dir)
 656             for imagelink in self._image_links:
 657                 filename = os.path.join(image_dir, imagelink.name)
 658                 image_req = SESSION.get(imagelink.link)
 659                 if image_req.status_code != 200:
 660                     logging.error("Unexpected status code {} for {}: {}".format(image_req.status_code, sanitise_url(file_link.link), image_req.text))
 661                     fail_dir(self.download_dir)
 662                     return State.FAILED
 663                 with open(truncate_name(filename), 'wb') as handle:
 664                     handle.write(image_req.content)
 665         except Exception as exception:
 666             logging.error("Failed to download {} - {}".format(imagelink.name, exception))
 667             fail_dir(self.download_dir)
 668             return State.FAILED
 669
 670         # Best get some licenses
 671         logging.info("writing license file")
 672         try:
 673             if self._license:
 674                 with open(truncate_name(os.path.join(self.download_dir, 'license.txt')), 'w', encoding="utf-8") as license_handle:
 675                     license_handle.write("{}\n".format(self._license))
 676         except IOError as exception:
 677             logging.warning("Failed to write license! {}".format(exception))
 678
 679         logging.info("writing readme")
 680         try:
 681             if self._details:
 682                 with open(truncate_name(os.path.join(self.download_dir, 'readme.txt')), 'w', encoding="utf-8") as readme_handle:
 683                     readme_handle.write("{}\n".format(self._details))
 684         except IOError as exception:
 685             logging.warning("Failed to write readme! {}".format(exception))
 686
 687         try:
 688             # Now write the timestamp
 689             with open(os.path.join(self.download_dir,TIMESTAMP_FILE), 'w', encoding="utf-8") as timestamp_handle:
 690                 timestamp_handle.write(self.time_stamp.__str__())
 691         except Exception as exception:
 692             logging.error("Failed to write timestamp file - {}".format(exception))
 693             fail_dir(self.download_dir)
 694             return State.FAILED
 695         self._needs_download = False
 696         logging.debug("Download of {} finished".format(self.name))
 697         if not compress:
 698             return State.OK
 699
 700
 701         thing_dir = "{} - {} - {}".format(self.thing_id,
 702             slugify(self.name),
 703             self.time_stamp.strftime(SAFE_DATETIME_FORMAT))
 704         file_name = os.path.join(base_dir,
 705             "{}.7z".format(thing_dir))
 706         logging.debug("Compressing {} to {}".format(
 707             self.name,
 708             file_name))
 709         with py7zr.SevenZipFile(file_name, 'w', filters=SEVENZIP_FILTERS) as archive:
 710             archive.writeall(self.download_dir, thing_dir)
 711         logging.debug("Compression of {} finished.".format(self.name))
 712         shutil.rmtree(self.download_dir)
 713         logging.debug("Removed temporary download dir of {}.".format(self.name))
 714         return State.OK
 715
 716
 717
 718
 719 def do_batch(batch_file, download_dir, quick, compress):
 720     """ Read a file in line by line, parsing each as a set of calls to this script."""
 721     with open(batch_file) as handle:
 722         for line in handle:
 723             line = line.strip()
 724             if not line:
 725                 # Skip empty lines
 726                 continue
 727             logging.info("Handling instruction {}".format(line))
 728             command_arr = line.split()
 729             if command_arr[0] == "thing":
 730                 logging.debug(
 731                     "Handling batch thing instruction: {}".format(line))
 732                 Thing(command_arr[1]).download(download_dir, compress)
 733                 continue
 734             if command_arr[0] == "collection":
 735                 logging.debug(
 736                     "Handling batch collection instruction: {}".format(line))
 737                 Collection(command_arr[1], command_arr[2],
 738                            download_dir, quick, compress).download()
 739                 continue
 740             if command_arr[0] == "user":
 741                 logging.debug(
 742                     "Handling batch collection instruction: {}".format(line))
 743                 Designs(command_arr[1], download_dir, quick, compress).download()
 744                 continue
 745             logging.warning("Unable to parse current instruction. Skipping.")
 746
 747
 748 def main():
 749     """ Entry point for script being run as a command. """
 750     parser = argparse.ArgumentParser()
 751     parser.add_argument("-l", "--log-level", choices=[
 752                         'debug', 'info', 'warning'], default='info', help="level of logging desired")
 753     parser.add_argument("-d", "--directory",
 754                         help="Target directory to download into")
 755     parser.add_argument("-f", "--log-file",
 756                         help="Place to log debug information to")
 757     parser.add_argument("-q", "--quick", action="store_true",
 758                         help="Assume date ordering on posts")
 759     parser.add_argument("-c", "--compress", action="store_true",
 760                         help="Compress files")
 761     parser.add_argument("-a", "--api-key",
 762                         help="API key for thingiverse")
 763
 764
 765     subparsers = parser.add_subparsers(
 766         help="Type of thing to download", dest="subcommand")
 767     collection_parser = subparsers.add_parser(
 768         'collection', help="Download one or more entire collection(s)")
 769     collection_parser.add_argument(
 770         "owner", help="The owner of the collection(s) to get")
 771     collection_parser.add_argument(
 772         "collections", nargs="+",  help="Space seperated list of the name(s) of collection to get")
 773     thing_parser = subparsers.add_parser(
 774         'thing', help="Download a single thing.")
 775     thing_parser.add_argument(
 776         "things", nargs="*", help="Space seperated list of thing ID(s) to download")
 777     user_parser = subparsers.add_parser(
 778         "user",  help="Download all things by one or more users")
 779     user_parser.add_argument(
 780         "users", nargs="+", help="A space seperated list of the user(s) to get the designs of")
 781     batch_parser = subparsers.add_parser(
 782         "batch", help="Perform multiple actions written in a text file")
 783     batch_parser.add_argument(
 784         "batch_file", help="The name of the file to read.")
 785     subparsers.add_parser("version", help="Show the current version")
 786
 787     args = parser.parse_args()
 788     if not args.subcommand:
 789         parser.print_help()
 790         sys.exit(1)
 791     if not args.directory:
 792         args.directory = os.getcwd()
 793
 794     logger = logging.getLogger()
 795     formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
 796     logger.setLevel(logging.DEBUG)
 797     console_handler = logging.StreamHandler()
 798     console_handler.setLevel(args.log_level.upper())
 799
 800     global API_KEY
 801     if args.api_key:
 802         API_KEY=args.api_key
 803     else:
 804         try:
 805             with open("api.key") as fh:
 806                 API_KEY=fh.read().strip()
 807         except Exception as e:
 808             logging.error("Either specify the api-key on the command line or in a file called 'api.key'")
 809             logging.error("Exception: {}".format(e))
 810             return
 811
 812     logger.addHandler(console_handler)
 813     if args.log_file:
 814         file_handler = logging.FileHandler(args.log_file)
 815         file_handler.setLevel(logging.DEBUG)
 816         file_handler.setFormatter(formatter)
 817         logger.addHandler(file_handler)
 818
 819
 820     # Start downloader
 821     thing_queue = multiprocessing.JoinableQueue()
 822     logging.debug("starting {} downloader(s)".format(DOWNLOADER_COUNT))
 823     downloaders = [Downloader(thing_queue, args.directory, args.compress) for _ in range(DOWNLOADER_COUNT)]
 824     for downloader in downloaders:
 825         downloader.start()
 826
 827
 828     if args.subcommand.startswith("collection"):
 829         for collection in args.collections:
 830             Collection(args.owner, collection, args.directory, args.quick, args.compress).download()
 831     if args.subcommand == "thing":
 832         for thing in args.things:
 833             thing_queue.put(thing)
 834     if args.subcommand == "user":
 835         for user in args.users:
 836             Designs(user, args.directory, args.quick, args.compress).download()
 837     if args.subcommand == "version":
 838         print("thingy_grabber.py version {}".format(VERSION))
 839     if args.subcommand == "batch":
 840         do_batch(args.batch_file, args.directory, args.quick, args.compress)
 841
 842     # Stop the downloader processes
 843     for downloader in downloaders:
 844         thing_queue.put(None)
 845
 846
 847 if __name__ == "__main__":
 848     multiprocessing.freeze_support()
 849     main()