thingy_grabber.py

   1 #!/usr/bin/env python3
   2 """
   3 Thingiverse bulk downloader
   4 """
   5
   6 import re
   7 import sys
   8 import os
   9 import argparse
  10 import unicodedata
  11 import requests
  12 import logging
  13 import multiprocessing
  14 import enum
  15 import datetime
  16 from shutil import copyfile
  17 from dataclasses import dataclass
  18 import atexit
  19 import py7zr
  20 import glob
  21 import shutil
  22
  23 SEVENZIP_FILTERS = [{'id': py7zr.FILTER_LZMA2}]
  24
  25 # I don't think this is exported by datetime
  26 DEFAULT_DATETIME_FORMAT = '%Y-%m-%d %H:%M:%S'
  27 # Windows cannot handle : in filenames
  28 SAFE_DATETIME_FORMAT = '%Y-%m-%d %H.%M.%S'
  29
  30 API_BASE="https://api.thingiverse.com"
  31 ACCESS_QP="access_token={}"
  32 PAGE_QP="page={}"
  33 API_USER_DESIGNS = API_BASE + "/users/{}/things/"
  34 API_USER_COLLECTIONS = API_BASE + "/users/{}/collections/all?" + ACCESS_QP
  35
  36 # Currently useless as it gives the same info as the matching element in API_USER_COLLECTIONS
  37 API_COLLECTION = API_BASE + "/collections/{}/?" + ACCESS_QP
  38 API_COLLECTION_THINGS = API_BASE + "/collections/{}/things/?" + ACCESS_QP
  39
  40 API_THING_DETAILS = API_BASE + "/things/{}/?" + ACCESS_QP
  41 API_THING_FILES = API_BASE + "/things/{}/files/?" + ACCESS_QP
  42 API_THING_IMAGES = API_BASE + "/things/{}/images/?" + ACCESS_QP
  43 API_THING_DOWNLOAD = "/download/?" + ACCESS_QP
  44
  45 API_KEY = None
  46
  47 DOWNLOADER_COUNT = 1
  48 RETRY_COUNT = 3
  49
  50 MAX_PATH_LENGTH = 250
  51
  52 VERSION = "0.10.2"
  53
  54 TIMESTAMP_FILE = "timestamp.txt"
  55
  56 SESSION = requests.Session()
  57
  58 @dataclass
  59 class ThingLink:
  60     thing_id: str
  61     name: str
  62     api_link: str
  63
  64 @dataclass
  65 class FileLink:
  66     name: str
  67     last_update: datetime.datetime
  68     link: str
  69
  70 @dataclass
  71 class ImageLink:
  72     name: str
  73     link: str
  74
  75 class FileLinks:
  76     def __init__(self, initial_links=[]):
  77         self.links = []
  78         self.last_update = None
  79         for link in initial_links:
  80             self.append(link)
  81
  82     def __iter__(self):
  83         return iter(self.links)
  84
  85     def __getitem__(self, item):
  86         return self.links[item]
  87
  88     def __len__(self):
  89         return len(self.links)
  90
  91     def append(self, link):
  92         try:
  93             self.last_update = max(self.last_update, link.last_update)
  94         except TypeError:
  95             self.last_update = link.last_update
  96         self.links.append(link)
  97
  98
  99 class State(enum.Enum):
 100     OK = enum.auto()
 101     FAILED = enum.auto()
 102     ALREADY_DOWNLOADED = enum.auto()
 103
 104 def sanitise_url(url):
 105     """ remove api keys from an url
 106     """
 107     return re.sub(r'access_token=\w*',
 108                   'access_token=***',
 109                   url)
 110
 111 def strip_time(date_obj):
 112     """ Takes a datetime object and returns another with the time set to 00:00
 113     """
 114     return datetime.datetime.combine(date_obj.date(), datetime.time())
 115
 116 def rename_unique(dir_name, target_dir_name):
 117     """ Move a directory sideways to a new name, ensuring it is unique.
 118     """
 119     target_dir = target_dir_name
 120     inc = 0
 121     while os.path.exists(target_dir):
 122       target_dir = "{}_{}".format(target_dir_name, inc)
 123       inc += 1
 124     os.rename(dir_name, target_dir)
 125     return target_dir
 126
 127
 128 def fail_dir(dir_name):
 129     """ When a download has failed, move it sideways.
 130     """
 131     return rename_unique(dir_name,"{}_failed".format(dir_name))
 132
 133
 134 def truncate_name(file_name):
 135     """ Ensure the filename is not too long for, well windows basically.
 136     """
 137     path = os.path.abspath(file_name)
 138     if len(path) <= MAX_PATH_LENGTH:
 139         return path
 140     to_cut = len(path) - (MAX_PATH_LENGTH + 3)
 141     base, extension = os.path.splitext(path)
 142     inc = 0
 143     new_path = "{}_{}{}".format(base, inc, extension)
 144     while os.path.exists(new_path):
 145         new_path = "{}_{}{}".format(base, inc, extension)
 146         inc += 1
 147     return new_path
 148
 149
 150 def strip_ws(value):
 151     """ Remove whitespace from a string """
 152     return str(NO_WHITESPACE_REGEX.sub('-', value))
 153
 154
 155 def slugify(value):
 156     """
 157     Normalise string, removes invalid for filename charactersr
 158     and converts string to lowercase.
 159     """
 160     logging.debug("Sluggyfying {}".format(value))
 161     value = unicodedata.normalize('NFKC', value).lower().strip()
 162     value = re.sub(r'[\\/<>:\?\*\|"]', '', value)
 163     value = re.sub(r'\.*$', '', value)
 164     return value
 165
 166
 167 class Downloader(multiprocessing.Process):
 168     """
 169     Class to handle downloading the things we have found to get.
 170     """
 171
 172     def __init__(self, thing_queue, download_directory, compress):
 173         multiprocessing.Process.__init__(self)
 174         # TODO: add parameters
 175         self.thing_queue = thing_queue
 176         self.download_directory = download_directory
 177         self.compress = compress
 178
 179     def run(self):
 180         """ actual download loop.
 181         """
 182         while True:
 183             thing_id = self.thing_queue.get()
 184             if thing_id is None:
 185                 logging.info("Shutting download queue")
 186                 self.thing_queue.task_done()
 187                 break
 188             logging.info("Handling id {}".format(thing_id))
 189             Thing(thing_id).download(self.download_directory, self.compress)
 190             self.thing_queue.task_done()
 191         return
 192
 193
 194
 195
 196
 197 class Grouping:
 198     """ Holds details of a group of things for download
 199         This is effectively (although not actually) an abstract class
 200         - use Collection or Designs instead.
 201     """
 202
 203     def __init__(self, quick, compress):
 204         self.things = []
 205         self.total = 0
 206         self.req_id = None
 207         self.last_page = 0
 208         self.per_page = None
 209         # Should we stop downloading when we hit a known datestamp?
 210         self.quick = quick
 211         self.compress = compress
 212         # These should be set by child classes.
 213         self.url = None
 214         self.download_dir = None
 215
 216     def get(self):
 217         """ retrieve the things of the grouping. """
 218         if self.things:
 219             # We've already done it.
 220             return self.things
 221
 222         # Check for initialisation:
 223         if not self.url:
 224             logging.error("No URL set - object not initialised properly?")
 225             raise ValueError("No URL set - object not initialised properly?")
 226
 227         # Get the internal details of the grouping.
 228         logging.debug("Querying {}".format(sanitise_url(self.url)))
 229         page = 0
 230         # TODO:: Must be a way to refactor this cleanly
 231         if self.paginated:
 232         # Slightly nasty, but afaik python lacks a clean way to do partial string formatting.
 233             page_url = self.url + "?" + ACCESS_QP + "&" + PAGE_QP
 234             while True:
 235                 page += 1
 236                 current_url = page_url.format(API_KEY, page)
 237                 logging.info("requesting:{}".format(sanitise_url(current_url)))
 238                 current_req = SESSION.get(current_url)
 239                 if current_req.status_code != 200:
 240                     logging.error("Got unexpected code {} from url {}: {}".format(current_req.status_code, sanitise_url(current_url), current_req.text))
 241                     break
 242                 current_json = current_req.json()
 243                 if not current_json:
 244                     # No more!
 245                     break
 246                 for thing in current_json:
 247                     self.things.append(ThingLink(thing['id'], thing['name'], thing['url']))
 248         else:
 249             # self.url should already have been formatted as we don't need pagination
 250             logging.info("requesting:{}".format(sanitise_url(self.url)))
 251             current_req = SESSION.get(self.url)
 252             if current_req.status_code != 200:
 253                 logging.error("Got unexpected code {} from url {}: {}".format(current_req.status_code, sanitise_url(current_url), current_req.text))
 254             else:
 255                 current_json = current_req.json()
 256                 for thing in current_json:
 257                     logging.info(thing)
 258                     self.things.append(ThingLink(thing['id'], thing['name'], thing['url']))
 259         logging.info("Found {} things.".format(len(self.things)))
 260         return self.things
 261
 262     def download(self):
 263         """ Downloads all the files in a collection """
 264         if not self.things:
 265             self.get()
 266
 267         if not self.download_dir:
 268             raise ValueError(
 269                 "No download_dir set - invalidly initialised object?")
 270
 271         base_dir = os.getcwd()
 272         try:
 273             os.mkdir(self.download_dir)
 274         except FileExistsError:
 275             logging.info("Target directory {} already exists. Assuming a resume."
 276                          .format(self.download_dir))
 277         logging.info("Downloading {} thing(s).".format(self.total))
 278         for idx, thing in enumerate(self.things):
 279             logging.info("Downloading thing {} - {}".format(idx, thing))
 280             RC = Thing(thing).download(self.download_dir, self.compress)
 281             if self.quick and RC==State.ALREADY_DOWNLOADED:
 282                 logging.info("Caught up, stopping.")
 283                 return
 284
 285 class Collection(Grouping):
 286     """ Holds details of a collection. """
 287
 288     def __init__(self, user, name, directory, quick, compress):
 289         Grouping.__init__(self, quick, compress)
 290         self.user = user
 291         self.name = name
 292         self.paginated = False
 293         # need to figure out the the ID for the collection
 294         collection_url = API_USER_COLLECTIONS.format(user, API_KEY)
 295         try:
 296             current_req = SESSION.get(collection_url)
 297         except requests.exceptions.ConnectionError as error:
 298             logging.error("Unable to connect for thing {}: {}".format(
 299                 self.thing_id, error))
 300             return
 301         if current_req.status_code != 200:
 302             logging.error("Got unexpected code {} from url {}: {}".format(current_req.status_code, sanitise_url(collection_url), current_req.text))
 303             return
 304         collection_list = current_req.json()
 305         try:
 306             # case insensitive to retain parity with previous behaviour
 307             collection = [x for x in collection_list if x['name'].casefold() == name.casefold()][0]
 308         except IndexError:
 309             logging.error("Unable to find collection {} for user {}".format(name, user))
 310             return
 311         self.collection_id = collection['id']
 312         self.url = API_COLLECTION_THINGS.format(self.collection_id, API_KEY)
 313
 314         self.download_dir = os.path.join(directory,
 315                                          "{}-{}".format(slugify(self.user), slugify(self.name)))
 316
 317
 318 class Designs(Grouping):
 319     """ Holds details of all of a users' designs. """
 320
 321     def __init__(self, user, directory, quick, compress):
 322         Grouping.__init__(self, quick, compress)
 323         self.user = user
 324         self.url = API_USER_DESIGNS.format(user)
 325         self.paginated = True
 326         self.download_dir = os.path.join(
 327             directory, "{} designs".format(slugify(self.user)))
 328
 329
 330 class Thing:
 331     """ An individual design on thingiverse. """
 332
 333     def __init__(self, thing_link):
 334         self.thing_id = thing_link.thing_id
 335         self.name = thing_link.name
 336         self.api_link = thing_link.api_link
 337         self.last_time = None
 338         self._parsed = False
 339         self._needs_download = True
 340         self.text = None
 341         self.download_dir = None
 342         self.time_stamp = None
 343         self._file_links = FileLinks()
 344         self._image_links = []
 345
 346     def _parse(self, base_dir):
 347         """ Work out what, if anything needs to be done. """
 348         if self._parsed:
 349             return
 350
 351
 352         # First get the broad details
 353         url = API_THING_DETAILS.format(self.thing_id, API_KEY)
 354         try:
 355             current_req = SESSION.get(url)
 356         except requests.exceptions.ConnectionError as error:
 357             logging.error("Unable to connect for thing {}: {}".format(
 358                 self.thing_id, error))
 359             return
 360         # Check for DMCA
 361         if current_req.status_code == 403:
 362             logging.error("Access to thing {} is forbidden".format(self.thing_id))
 363             return
 364         if current_req.status_code != 200:
 365             logging.error("Got unexpected code {} from url {}: {}".format(current_req.status_code, sanitise_url(url), current_req.text))
 366             return
 367
 368         thing_json = current_req.json()
 369         try:
 370             self._license = thing_json['license']
 371         except KeyError:
 372             logging.warning("No license found for thing {}?".format(self.thing_id))
 373
 374         # TODO: Get non-html version of this?
 375         try:
 376             self._details = thing_json['details']
 377         except KeyError:
 378             logging.warning("No description found for thing {}?".format(self.thing_id))
 379
 380
 381
 382         # Now get the file details
 383         file_url = API_THING_FILES.format(self.thing_id, API_KEY)
 384
 385         try:
 386             current_req = SESSION.get(file_url)
 387         except requests.exceptions.ConnectionError as error:
 388             logging.error("Unable to connect for thing {}: {}".format(
 389                 self.thing_id, error))
 390             return
 391
 392         if current_req.status_code != 200:
 393             logging.error("Unexpected status code {} for {}: {}".format(current_req.status_code, sanitise_url(file_url), current_req.text))
 394             return
 395
 396         link_list = current_req.json()
 397
 398         if not link_list:
 399             logging.error("No files found for thing {} - probably thingiverse being broken, try again later".format(self.thing_id))
 400
 401         for link in link_list:
 402             logging.debug("Parsing link: {}".format(sanitise_url(link['url'])))
 403             try:
 404                 datestamp = datetime.datetime.strptime(link['date'], DEFAULT_DATETIME_FORMAT)
 405                 self._file_links.append(FileLink(link['name'], datestamp, link['url'] + API_THING_DOWNLOAD.format(API_KEY)))
 406             except ValueError:
 407                 logging.error(link['date'])
 408
 409         # Finally get the image links
 410         image_url = API_THING_IMAGES.format(self.thing_id, API_KEY)
 411
 412         try:
 413             current_req = SESSION.get(image_url)
 414         except requests.exceptions.ConnectionError as error:
 415             logging.error("Unable to connect for thing {}: {}".format(
 416                 self.thing_id, error))
 417             return
 418
 419         if current_req.status_code != 200:
 420             logging.error("Unexpected status code {} for {}: {}".format(current_req.status_code, sanitise_url(image_url), current_req.text))
 421             return
 422
 423         image_list = current_req.json()
 424
 425         if not image_list:
 426             logging.warning("No images found for thing {} - probably thingiverse being iffy as this seems unlikely".format(self.thing_id))
 427
 428         for image in image_list:
 429             logging.debug("parsing image: {}".format(image))
 430             try:
 431                 name = slugify(image['name'])
 432                 # TODO: fallback to other types
 433                 url = [x for x in image['sizes'] if x['type']=='display' and x['size']=='large'][0]['url']
 434             except KeyError:
 435                 logging.warning("Missing image for {}".format(name))
 436             self._image_links.append(ImageLink(name, url))
 437
 438         self.slug = "{} - {}".format(self.thing_id, slugify(self.name))
 439         self.download_dir = os.path.join(base_dir, self.slug)
 440
 441         self._handle_old_directory(base_dir)
 442
 443         logging.debug("Parsing {} ({})".format(self.thing_id, self.name))
 444         latest, self.last_time = self._find_last_download(base_dir)
 445
 446         if not latest:
 447                 # Not yet downloaded
 448                 self._parsed = True
 449                 return
 450
 451
 452         logging.info("last downloaded version: {}".format(self.last_time))
 453
 454         # OK, so we have a timestamp, lets see if there is anything new to get
 455         # First off, are we comparing an old download that threw away the timestamp?
 456         ignore_time = self.last_time == strip_time(self.last_time)
 457         try:
 458             # TODO: Allow for comparison at the exact time
 459             files_last_update = self._file_links.last_update
 460             if ignore_time:
 461                 logging.info("Dropping time from comparison stamp as old-style download dir")
 462                 files_last_update = strip_time(files_last_update)
 463
 464
 465             if files_last_update > self.last_time:
 466                 logging.info(
 467                     "Found new/updated files {}".format(self._file_links.last_update))
 468                 self._needs_download = True
 469                 self._parsed = True
 470                 return
 471         except TypeError:
 472             logging.warning("No files found for {}.".format(self.thing_id))
 473
 474         # Got here, so nope, no new files.
 475         self._needs_download = False
 476         self._parsed = True
 477
 478     def _handle_old_directory(self, base_dir):
 479         """ Deal with any old directories from previous versions of the code.
 480         """
 481         old_dir = os.path.join(base_dir, slugify(self.name))
 482         if os.path.exists(old_dir):
 483             logging.warning("Found old style download_dir. Moving.")
 484             rename_unique(old_dir, self.download_dir)
 485
 486     def _handle_outdated_directory(self, base_dir):
 487         """ Move the current download directory sideways if the thing has changed.
 488         """
 489         if not os.path.exists(self.download_dir):
 490             # No old directory to move.
 491             return None
 492         timestamp_file = os.path.join(self.download_dir, TIMESTAMP_FILE)
 493         if not os.path.exists(timestamp_file):
 494             # Old form of download directory
 495             target_dir_name = "{} - old".format(self.download_dir)
 496         else:
 497             target_dir_name = "{} - {}".format(self.download_dir, self.last_time.strftime(SAFE_DATETIME_FORMAT))
 498         return rename_unique(self.download_dir, target_dir_name)
 499
 500     def _find_last_download(self, base_dir):
 501         """ Look for the most recent previous download (if any) of the thing.
 502         """
 503         logging.info("Looking for old things")
 504
 505         # First the DL directory itself.
 506         timestamp_file = os.path.join(self.download_dir, TIMESTAMP_FILE)
 507
 508         latest = None
 509         latest_time = None
 510
 511         try:
 512             logging.debug("Checking for existing download in normal place.")
 513             with open(timestamp_file) as ts_fh:
 514                 timestamp_text = ts_fh.read().strip()
 515             latest_time = datetime.datetime.strptime(timestamp_text, DEFAULT_DATETIME_FORMAT)
 516             latest = self.download_dir
 517         except FileNotFoundError:
 518             # No existing download directory. huh.
 519             pass
 520         except TypeError:
 521             logging.warning("Invalid timestamp file found in {}".format(self.download_dir))
 522
 523         # TODO:  Maybe look for old download directories.
 524
 525
 526         # Now look for 7z files
 527         candidates = glob.glob(os.path.join(base_dir, "{}*.7z".format(self.thing_id)))
 528         # +3 to allow for ' - '
 529         leading_length =len(self.slug)+3
 530         for path in candidates:
 531             candidate = os.path.basename(path)
 532             try:
 533                 logging.debug("Examining '{}' - '{}'".format(candidate, candidate[leading_length:-3]))
 534                 candidate_time = datetime.datetime.strptime(candidate[leading_length:-3], SAFE_DATETIME_FORMAT)
 535             except ValueError:
 536                 logging.warning("There was an error finding the date in {}. Ignoring.".format(candidate))
 537                 continue
 538             try:
 539                 if candidate_time > latest_time:
 540                     latest_time = candidate_time
 541                     latest = candidate
 542             except TypeError:
 543                 latest_time = candidate_time
 544                 latest = candidate
 545         logging.info("Found last old thing: {} / {}".format(latest,latest_time))
 546         return (latest, latest_time)
 547
 548
 549
 550     def download(self, base_dir, compress):
 551         """ Download all files for a given thing.
 552             Returns True iff the thing is now downloaded (not iff it downloads the thing!)
 553         """
 554         if not self._parsed:
 555             self._parse(base_dir)
 556
 557         if not self._parsed:
 558             logging.error(
 559                 "Unable to parse {} - aborting download".format(self.thing_id))
 560             return State.FAILED
 561
 562         if not self._needs_download:
 563             logging.info("{} - {} already downloaded - skipping.".format(self.thing_id, self.name))
 564             return State.ALREADY_DOWNLOADED
 565
 566         if not self._file_links:
 567             logging.error("{} - {} appears to have no files. Thingiverse acting up again?".format(self.thing_id, self.name))
 568             return State.FAILED
 569
 570         # Have we already downloaded some things?
 571         renamed_dir = self._handle_outdated_directory(base_dir)
 572
 573         # Get the list of files to download
 574
 575         new_file_links = []
 576         old_file_links = []
 577         self.time_stamp = None
 578
 579         if not self.last_time:
 580             # If we don't have anything to copy from, then it is all new.
 581             logging.debug("No last time, downloading all files")
 582             new_file_links = self._file_links
 583             self.time_stamp = new_file_links[0].last_update
 584
 585             for file_link in new_file_links:
 586                 self.time_stamp = max(self.time_stamp, file_link.last_update)
 587             logging.debug("New timestamp will be {}".format(self.time_stamp))
 588         else:
 589             self.time_stamp = self.last_time
 590             for file_link in self._file_links:
 591                 if file_link.last_update > self.last_time:
 592                     new_file_links.append(file_link)
 593                     self.time_stamp = max(self.time_stamp, file_link.last_update)
 594                 else:
 595                     old_file_links.append(file_link)
 596
 597         logging.debug("new timestamp {}".format(self.time_stamp))
 598
 599         # OK. Time to get to work.
 600         logging.debug("Generating download_dir")
 601         os.mkdir(self.download_dir)
 602         filelist_file = os.path.join(self.download_dir, "filelist.txt")
 603         with open(filelist_file, 'w', encoding="utf-8") as fl_handle:
 604             for fl in self._file_links:
 605               fl_handle.write("{},{},{}\n".format(fl.link, fl.name, fl.last_update))
 606
 607
 608         # First grab the cached files (if any)
 609         logging.info("Copying {} unchanged files.".format(len(old_file_links)))
 610         if renamed_dir:
 611             for file_link in old_file_links:
 612                 try:
 613                     old_file = os.path.join(renamed_dir, file_link.name)
 614                     new_file = truncate_name(os.path.join(self.download_dir, file_link.name))
 615                     logging.debug("Copying {} to {}".format(old_file, new_file))
 616                     copyfile(old_file, new_file)
 617                 except FileNotFoundError:
 618                     logging.warning(
 619                         "Unable to find {} in old archive, redownloading".format(file_link.name))
 620                     new_file_links.append(file_link)
 621                 except TypeError:
 622                     # Not altogether sure how this could occur, possibly with some combination of the old file types
 623                     logging.warning(
 624                         "Typeerror looking for {} in {}".format(file_link.name, renamed_dir))
 625                     new_file_links.append(file_link)
 626
 627
 628         # Now download the new ones
 629         logging.info("Downloading {} new files of {}".format(
 630             len(new_file_links), len(self._file_links)))
 631         try:
 632             for file_link in new_file_links:
 633                 file_name = truncate_name(os.path.join(self.download_dir, file_link.name))
 634                 logging.debug("Downloading {} from {} to {}".format(
 635                     file_link.name, file_link.link, file_name))
 636                 data_req = SESSION.get(file_link.link)
 637                 if data_req.status_code != 200:
 638                     logging.error("Unexpected status code {} for {}: {}".format(data_req.status_code, sanitise_url(file_link.link), data_req.text))
 639                     fail_dir(self.download_dir)
 640                     return State.FAILED
 641
 642
 643                 with open(file_name, 'wb') as handle:
 644                     handle.write(data_req.content)
 645         except Exception as exception:
 646             logging.error("Failed to download {} - {}".format(file_link.name, exception))
 647             fail_dir(self.download_dir)
 648             return State.FAILED
 649
 650
 651         # People like images.
 652         image_dir = os.path.join(self.download_dir, 'images')
 653         logging.info("Downloading {} images.".format(len(self._image_links)))
 654         try:
 655             os.mkdir(image_dir)
 656             for imagelink in self._image_links:
 657                 filename = os.path.join(image_dir, imagelink.name)
 658                 image_req = SESSION.get(imagelink.link)
 659                 if image_req.status_code != 200:
 660                     logging.error("Unexpected status code {} for {}: {}".format(image_req.status_code, sanitise_url(file_link.link), image_req.text))
 661                     fail_dir(self.download_dir)
 662                     return State.FAILED
 663                 with open(truncate_name(filename), 'wb') as handle:
 664                     handle.write(image_req.content)
 665         except Exception as exception:
 666             logging.error("Failed to download {} - {}".format(imagelink.name, exception))
 667             fail_dir(self.download_dir)
 668             return State.FAILED
 669
 670         # Best get some licenses
 671         logging.info("writing license file")
 672         try:
 673             if self._license:
 674                 with open(truncate_name(os.path.join(self.download_dir, 'license.txt')), 'w', encoding="utf-8") as license_handle:
 675                     license_handle.write("{}\n".format(self._license))
 676         except IOError as exception:
 677             logging.warning("Failed to write license! {}".format(exception))
 678
 679         logging.info("writing readme")
 680         try:
 681             if self._details:
 682                 with open(truncate_name(os.path.join(self.download_dir, 'readme.txt')), 'w', encoding="utf-8") as readme_handle:
 683                     readme_handle.write("{}\n".format(self._details))
 684         except IOError as exception:
 685             logging.warning("Failed to write readme! {}".format(exception))
 686
 687         try:
 688             # Now write the timestamp
 689             with open(os.path.join(self.download_dir,TIMESTAMP_FILE), 'w', encoding="utf-8") as timestamp_handle:
 690                 timestamp_handle.write(self.time_stamp.__str__())
 691         except Exception as exception:
 692             logging.error("Failed to write timestamp file - {}".format(exception))
 693             fail_dir(self.download_dir)
 694             return State.FAILED
 695         self._needs_download = False
 696         logging.debug("Download of {} finished".format(self.name))
 697         if not compress:
 698             return State.OK
 699
 700
 701         thing_dir = "{} - {} - {}".format(self.thing_id,
 702             slugify(self.name),
 703             self.time_stamp.strftime(SAFE_DATETIME_FORMAT))
 704         file_name = os.path.join(base_dir,
 705             "{}.7z".format(thing_dir))
 706         logging.debug("Compressing {} to {}".format(
 707             self.name,
 708             file_name))
 709         with py7zr.SevenZipFile(file_name, 'w', filters=SEVENZIP_FILTERS) as archive:
 710             archive.writeall(self.download_dir, thing_dir)
 711         logging.debug("Compression of {} finished.".format(self.name))
 712         shutil.rmtree(self.download_dir)
 713         logging.debug("Removed temporary download dir of {}.".format(self.name))
 714         return State.OK
 715
 716
 717
 718
 719 def do_batch(batch_file, download_dir, quick, compress):
 720     """ Read a file in line by line, parsing each as a set of calls to this script."""
 721     with open(batch_file) as handle:
 722         for line in handle:
 723             line = line.strip()
 724             if not line:
 725                 # Skip empty lines
 726                 continue
 727             logging.info("Handling instruction {}".format(line))
 728             command_arr = line.split()
 729             if command_arr[0] == "thing":
 730                 logging.debug(
 731                     "Handling batch thing instruction: {}".format(line))
 732                 Thing(command_arr[1]).download(download_dir, compress)
 733                 continue
 734             if command_arr[0] == "collection":
 735                 logging.debug(
 736                     "Handling batch collection instruction: {}".format(line))
 737                 Collection(command_arr[1], command_arr[2],
 738                            download_dir, quick, compress).download()
 739                 continue
 740             if command_arr[0] == "user":
 741                 logging.debug(
 742                     "Handling batch collection instruction: {}".format(line))
 743                 Designs(command_arr[1], download_dir, quick, compress).download()
 744                 continue
 745             logging.warning("Unable to parse current instruction. Skipping.")
 746
 747
 748 def main():
 749     """ Entry point for script being run as a command. """
 750     parser = argparse.ArgumentParser()
 751     parser.add_argument("-l", "--log-level", choices=[
 752                         'debug', 'info', 'warning'], default='info', help="level of logging desired")
 753     parser.add_argument("-d", "--directory",
 754                         help="Target directory to download into")
 755     parser.add_argument("-f", "--log-file",
 756                         help="Place to log debug information to")
 757     parser.add_argument("-q", "--quick", action="store_true",
 758                         help="Assume date ordering on posts")
 759     parser.add_argument("-c", "--compress", action="store_true",
 760                         help="Compress files")
 761     parser.add_argument("-a", "--api-key",
 762                         help="API key for thingiverse")
 763
 764
 765     subparsers = parser.add_subparsers(
 766         help="Type of thing to download", dest="subcommand")
 767     collection_parser = subparsers.add_parser(
 768         'collection', help="Download one or more entire collection(s)")
 769     collection_parser.add_argument(
 770         "owner", help="The owner of the collection(s) to get")
 771     collection_parser.add_argument(
 772         "collections", nargs="+",  help="Space seperated list of the name(s) of collection to get")
 773     thing_parser = subparsers.add_parser(
 774         'thing', help="Download a single thing.")
 775     thing_parser.add_argument(
 776         "things", nargs="*", help="Space seperated list of thing ID(s) to download")
 777     user_parser = subparsers.add_parser(
 778         "user",  help="Download all things by one or more users")
 779     user_parser.add_argument(
 780         "users", nargs="+", help="A space seperated list of the user(s) to get the designs of")
 781     batch_parser = subparsers.add_parser(
 782         "batch", help="Perform multiple actions written in a text file")
 783     batch_parser.add_argument(
 784         "batch_file", help="The name of the file to read.")
 785     subparsers.add_parser("version", help="Show the current version")
 786
 787     args = parser.parse_args()
 788     if not args.subcommand:
 789         parser.print_help()
 790         sys.exit(1)
 791     if not args.directory:
 792         args.directory = os.getcwd()
 793
 794     logger = logging.getLogger()
 795     formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
 796     logger.setLevel(logging.DEBUG)
 797     console_handler = logging.StreamHandler()
 798     console_handler.setLevel(args.log_level.upper())
 799
 800     global API_KEY
 801     if args.api_key:
 802         API_KEY=args.api_key
 803     else:
 804         try:
 805             with open("api.key") as fh:
 806                 API_KEY=fh.read().strip()
 807         except Exception as e:
 808             logging.error("Either specify the api-key on the command line or in a file called 'api.key'")
 809             logging.error("Exception: {}".format(e))
 810             return
 811
 812     logger.addHandler(console_handler)
 813     if args.log_file:
 814         file_handler = logging.FileHandler(args.log_file)
 815         file_handler.setLevel(logging.DEBUG)
 816         file_handler.setFormatter(formatter)
 817         logger.addHandler(file_handler)
 818
 819
 820     # Start downloader
 821     thing_queue = multiprocessing.JoinableQueue()
 822     logging.debug("starting {} downloader(s)".format(DOWNLOADER_COUNT))
 823     downloaders = [Downloader(thing_queue, args.directory, args.compress) for _ in range(DOWNLOADER_COUNT)]
 824     for downloader in downloaders:
 825         downloader.start()
 826
 827
 828     if args.subcommand.startswith("collection"):
 829         for collection in args.collections:
 830             Collection(args.owner, collection, args.directory, args.quick, args.compress).download()
 831     if args.subcommand == "thing":
 832         for thing in args.things:
 833             thing_queue.put(thing)
 834     if args.subcommand == "user":
 835         for user in args.users:
 836             Designs(user, args.directory, args.quick, args.compress).download()
 837     if args.subcommand == "version":
 838         print("thingy_grabber.py version {}".format(VERSION))
 839     if args.subcommand == "batch":
 840         do_batch(args.batch_file, args.directory, args.quick, args.compress)
 841
 842     # Stop the downloader processes
 843     for downloader in downloaders:
 844         thing_queue.put(None)
 845
 846
 847 if __name__ == "__main__":
 848     multiprocessing.freeze_support()
 849     main()