thingy_grabber.py

   1 #!/usr/bin/env python3
   2 """
   3 Thingiverse bulk downloader
   4 """
   5
   6 import re
   7 import sys
   8 import os
   9 import argparse
  10 import unicodedata
  11 import requests
  12 import logging
  13 import multiprocessing
  14 import enum
  15 import datetime
  16 from shutil import copyfile
  17 from dataclasses import dataclass
  18 import py7zr
  19 import glob
  20 import shutil
  21
  22 SEVENZIP_FILTERS = [{'id': py7zr.FILTER_LZMA2}]
  23
  24 # I don't think this is exported by datetime
  25 DEFAULT_DATETIME_FORMAT = '%Y-%m-%d %H:%M:%S'
  26 # Windows cannot handle : in filenames
  27 SAFE_DATETIME_FORMAT = '%Y-%m-%d %H.%M.%S'
  28
  29 API_BASE = "https://api.thingiverse.com"
  30 ACCESS_QP = "access_token={}"
  31 PAGE_QP = "page={}"
  32 API_USER_DESIGNS = API_BASE + "/users/{}/things/"
  33 API_USER_COLLECTIONS = API_BASE + "/users/{}/collections/all?" + ACCESS_QP
  34
  35 # Currently useless as it gives the same info as the matching element in API_USER_COLLECTIONS
  36 API_COLLECTION = API_BASE + "/collections/{}/?" + ACCESS_QP
  37 API_COLLECTION_THINGS = API_BASE + "/collections/{}/things/?" + ACCESS_QP
  38
  39 API_THING_DETAILS = API_BASE + "/things/{}/?" + ACCESS_QP
  40 API_THING_FILES = API_BASE + "/things/{}/files/?" + ACCESS_QP
  41 API_THING_IMAGES = API_BASE + "/things/{}/images/?" + ACCESS_QP
  42 API_THING_DOWNLOAD = "/download/?" + ACCESS_QP
  43
  44 API_KEY = None
  45
  46 DOWNLOADER_COUNT = 1
  47 RETRY_COUNT = 3
  48
  49 MAX_PATH_LENGTH = 250
  50
  51 VERSION = "0.10.2"
  52
  53 TIMESTAMP_FILE = "timestamp.txt"
  54
  55 SESSION = requests.Session()
  56
  57
  58 @dataclass
  59 class ThingLink:
  60     thing_id: str
  61     name: str
  62     api_link: str
  63
  64
  65 @dataclass
  66 class FileLink:
  67     name: str
  68     last_update: datetime.datetime
  69     link: str
  70
  71
  72 @dataclass
  73 class ImageLink:
  74     name: str
  75     link: str
  76
  77
  78 class FileLinks:
  79     def __init__(self, initial_links=None):
  80         if initial_links is None:
  81             initial_links = []
  82         self.links = []
  83         self.last_update = None
  84         for link in initial_links:
  85             self.append(link)
  86
  87     def __iter__(self):
  88         return iter(self.links)
  89
  90     def __getitem__(self, item):
  91         return self.links[item]
  92
  93     def __len__(self):
  94         return len(self.links)
  95
  96     def append(self, link):
  97         try:
  98             self.last_update = max(self.last_update, link.last_update)
  99         except TypeError:
 100             self.last_update = link.last_update
 101         self.links.append(link)
 102
 103
 104 class State(enum.Enum):
 105     OK = enum.auto()
 106     FAILED = enum.auto()
 107     ALREADY_DOWNLOADED = enum.auto()
 108
 109
 110 def sanitise_url(url):
 111     """ remove api keys from an url
 112     """
 113     return re.sub(r'access_token=\w*',
 114                   'access_token=***',
 115                   url)
 116
 117
 118 def strip_time(date_obj):
 119     """ Takes a datetime object and returns another with the time set to 00:00
 120     """
 121     return datetime.datetime.combine(date_obj.date(), datetime.time())
 122
 123
 124 def rename_unique(dir_name, target_dir_name):
 125     """ Move a directory sideways to a new name, ensuring it is unique.
 126     """
 127     target_dir = target_dir_name
 128     inc = 0
 129     while os.path.exists(target_dir):
 130         target_dir = "{}_{}".format(target_dir_name, inc)
 131         inc += 1
 132     os.rename(dir_name, target_dir)
 133     return target_dir
 134
 135
 136 def fail_dir(dir_name):
 137     """ When a download has failed, move it sideways.
 138     """
 139     return rename_unique(dir_name, "{}_failed".format(dir_name))
 140
 141
 142 def truncate_name(file_name):
 143     """ Ensure the filename is not too long for, well windows basically.
 144     """
 145     path = os.path.abspath(file_name)
 146     if len(path) <= MAX_PATH_LENGTH:
 147         return path
 148     to_cut = len(path) - (MAX_PATH_LENGTH + 3)
 149     base, extension = os.path.splitext(path)
 150     inc = 0
 151     new_path = "{}_{}{}".format(base, inc, extension)
 152     while os.path.exists(new_path):
 153         new_path = "{}_{}{}".format(base, inc, extension)
 154         inc += 1
 155     return new_path
 156
 157
 158 def slugify(value):
 159     """
 160     Normalise string, removes invalid for filename charactersr
 161     and converts string to lowercase.
 162     """
 163     logging.debug("Sluggyfying {}".format(value))
 164     value = unicodedata.normalize('NFKC', value).lower().strip()
 165     value = re.sub(r'[\\/<>:?*|"]', '', value)
 166     value = re.sub(r'\.*$', '', value)
 167     return value
 168
 169
 170 class Downloader(multiprocessing.Process):
 171     """
 172     Class to handle downloading the things we have found to get.
 173     """
 174
 175     def __init__(self, thing_queue, download_directory, compress):
 176         multiprocessing.Process.__init__(self)
 177         # TODO: add parameters
 178         self.thing_queue = thing_queue
 179         self.download_directory = download_directory
 180         self.compress = compress
 181
 182     def run(self):
 183         """ actual download loop.
 184         """
 185         while True:
 186             thing_id = self.thing_queue.get
 187             if thing_id is None:
 188                 logging.info("Shutting download queue")
 189                 self.thing_queue.task_done()
 190                 break
 191             logging.info("Handling id {}".format(thing_id))
 192             Thing(thing_id).download(self.download_directory, self.compress)
 193             self.thing_queue.task_done()
 194         return
 195
 196
 197 class Grouping:
 198     """ Holds details of a group of things for download
 199         This is effectively (although not actually) an abstract class
 200         - use Collection or Designs instead.
 201     """
 202
 203     def __init__(self, quick, compress):
 204         self.things = []
 205         self.total = 0
 206         self.req_id = None
 207         self.last_page = 0
 208         self.per_page = None
 209         # Should we stop downloading when we hit a known datestamp?
 210         self.quick = quick
 211         self.compress = compress
 212         # These should be set by child classes.
 213         self.url = None
 214         self.download_dir = None
 215
 216     @property
 217     def get(self):
 218         """ retrieve the things of the grouping. """
 219         if self.things:
 220             # We've already done it.
 221             return self.things
 222
 223         # Check for initialisation:
 224         if not self.url:
 225             logging.error("No URL set - object not initialised properly?")
 226             raise ValueError("No URL set - object not initialised properly?")
 227
 228         # Get the internal details of the grouping.
 229         logging.debug("Querying {}".format(sanitise_url(self.url)))
 230         page = 0
 231
 232         # self.url should already have been formatted as we don't need pagination
 233         logging.info("requesting:{}".format(sanitise_url(self.url)))
 234         current_req = SESSION.get(self.url)
 235         if current_req.status_code != 200:
 236             logging.error(
 237                 "Got unexpected code {} from url {}: {}".format(current_req.status_code, sanitise_url(self.url),
 238                                                                 current_req.text))
 239         else:
 240             current_json = current_req.json()
 241             for thing in current_json:
 242                 logging.info(thing)
 243                 self.things.append(ThingLink(thing['id'], thing['name'], thing['url']))
 244         logging.info("Found {} things.".format(len(self.things)))
 245         return self.things
 246
 247     def download(self):
 248         """ Downloads all the files in a collection """
 249         if not self.things:
 250             self.get()
 251
 252         if not self.download_dir:
 253             raise ValueError(
 254                 "No download_dir set - invalidly initialised object?")
 255
 256         base_dir = os.getcwd()
 257         try:
 258             os.mkdir(self.download_dir)
 259         except FileExistsError:
 260             logging.info("Target directory {} already exists. Assuming a resume."
 261                          .format(self.download_dir))
 262         logging.info("Downloading {} thing(s).".format(self.total))
 263         for idx, thing in enumerate(self.things):
 264             logging.info("Downloading thing {} - {}".format(idx, thing))
 265             RC = Thing(thing).download(self.download_dir, self.compress)
 266             if self.quick and RC == State.ALREADY_DOWNLOADED:
 267                 logging.info("Caught up, stopping.")
 268                 return
 269
 270
 271 class Collection(Grouping):
 272     """ Holds details of a collection. """
 273
 274     def __init__(self, user, name, directory, quick, compress):
 275         Grouping.__init__(self, quick, compress)
 276         self.user = user
 277         self.name = name
 278         self.paginated = False
 279         # need to figure out the the ID for the collection
 280         collection_url = API_USER_COLLECTIONS.format(user, API_KEY)
 281         try:
 282             current_req = SESSION.get(collection_url)
 283         except requests.exceptions.ConnectionError as error:
 284             logging.error("Unable to connect for collections for user {}: {}".format(
 285                 self.user, error))
 286             return
 287         if current_req.status_code != 200:
 288             logging.error(
 289                 "Got unexpected code {} from url {}: {}".format(current_req.status_code, sanitise_url(collection_url),
 290                                                                 current_req.text))
 291             return
 292         collection_list = current_req.json()
 293         try:
 294             # case insensitive to retain parity with previous behaviour
 295             collection = [x for x in collection_list if x['name'].casefold() == name.casefold()][0]
 296         except IndexError:
 297             logging.error("Unable to find collection {} for user {}".format(name, user))
 298             return
 299         self.collection_id = collection['id']
 300         self.url = API_COLLECTION_THINGS.format(self.collection_id, API_KEY)
 301
 302         self.download_dir = os.path.join(directory,
 303                                          "{}-{}".format(slugify(self.user), slugify(self.name)))
 304
 305
 306 class Designs(Grouping):
 307     """ Holds details of all of a users' designs. """
 308
 309     def __init__(self, user, directory, quick, compress):
 310         Grouping.__init__(self, quick, compress)
 311         self.user = user
 312         self.url = API_USER_DESIGNS.format(user)
 313         self.paginated = True
 314         self.download_dir = os.path.join(
 315             directory, "{} designs".format(slugify(self.user)))
 316
 317
 318 class Thing:
 319     """ An individual design on thingiverse. """
 320
 321     def __init__(self, thing_link):
 322         self.thing_id = thing_link.thing_id
 323         self.name = thing_link.name
 324         self.api_link = thing_link.api_link
 325         self.last_time = None
 326         self._parsed = False
 327         self._needs_download = True
 328         self.text = None
 329         self.download_dir = None
 330         self.time_stamp = None
 331         self._file_links = FileLinks()
 332         self._image_links = []
 333
 334     def _parse(self, base_dir):
 335         """ Work out what, if anything needs to be done. """
 336         if self._parsed:
 337             return
 338
 339         # First get the broad details
 340         url = API_THING_DETAILS.format(self.thing_id, API_KEY)
 341         try:
 342             current_req = SESSION.get(url)
 343         except requests.exceptions.ConnectionError as error:
 344             logging.error("Unable to connect for thing {}: {}".format(
 345                 self.thing_id, error))
 346             return
 347         # Check for DMCA
 348         if current_req.status_code == 403:
 349             logging.error("Access to thing {} is forbidden".format(self.thing_id))
 350             return
 351         if current_req.status_code != 200:
 352             logging.error("Got unexpected code {} from url {}: {}".format(current_req.status_code, sanitise_url(url),
 353                                                                           current_req.text))
 354             return
 355
 356         thing_json = current_req.json()
 357         try:
 358             self._license = thing_json['license']
 359         except KeyError:
 360             logging.warning("No license found for thing {}?".format(self.thing_id))
 361
 362         # TODO: Get non-html version of this?
 363         try:
 364             self._details = thing_json['details']
 365         except KeyError:
 366             logging.warning("No description found for thing {}?".format(self.thing_id))
 367
 368         # Now get the file details
 369         file_url = API_THING_FILES.format(self.thing_id, API_KEY)
 370
 371         try:
 372             current_req = SESSION.get(file_url)
 373         except requests.exceptions.ConnectionError as error:
 374             logging.error("Unable to connect for thing {}: {}".format(
 375                 self.thing_id, error))
 376             return
 377
 378         if current_req.status_code != 200:
 379             logging.error("Unexpected status code {} for {}: {}".format(current_req.status_code, sanitise_url(file_url),
 380                                                                         current_req.text))
 381             return
 382
 383         link_list = current_req.json()
 384
 385         if not link_list:
 386             logging.error("No files found for thing {} - probably thingiverse being broken, try again later".format(
 387                 self.thing_id))
 388
 389         for link in link_list:
 390             logging.debug("Parsing link: {}".format(sanitise_url(link['url'])))
 391             try:
 392                 datestamp = datetime.datetime.strptime(link['date'], DEFAULT_DATETIME_FORMAT)
 393                 self._file_links.append(
 394                     FileLink(link['name'], datestamp, link['url'] + API_THING_DOWNLOAD.format(API_KEY)))
 395             except ValueError:
 396                 logging.error(link['date'])
 397
 398         # Finally get the image links
 399         image_url = API_THING_IMAGES.format(self.thing_id, API_KEY)
 400
 401         try:
 402             current_req = SESSION.get(image_url)
 403         except requests.exceptions.ConnectionError as error:
 404             logging.error("Unable to connect for thing {}: {}".format(
 405                 self.thing_id, error))
 406             return
 407
 408         if current_req.status_code != 200:
 409             logging.error(
 410                 "Unexpected status code {} for {}: {}".format(current_req.status_code, sanitise_url(image_url),
 411                                                               current_req.text))
 412             return
 413
 414         image_list = current_req.json()
 415
 416         if not image_list:
 417             logging.warning(
 418                 "No images found for thing {} - probably thingiverse being iffy as this seems unlikely".format(
 419                     self.thing_id))
 420
 421         for image in image_list:
 422             logging.debug("parsing image: {}".format(image))
 423             name = None
 424             try:
 425                 name = slugify(image['name'])
 426                 # TODO: fallback to other types
 427                 url = [x for x in image['sizes'] if x['type'] == 'display' and x['size'] == 'large'][0]['url']
 428             except KeyError:
 429                 logging.warning("Missing image for {}".format(name))
 430             self._image_links.append(ImageLink(name, url))
 431
 432         self.slug = "{} - {}".format(self.thing_id, slugify(self.name))
 433         self.download_dir = os.path.join(base_dir, self.slug)
 434
 435         self._handle_old_directory(base_dir)
 436
 437         logging.debug("Parsing {} ({})".format(self.thing_id, self.name))
 438         latest, self.last_time = self._find_last_download(base_dir)
 439
 440         if not latest:
 441             # Not yet downloaded
 442             self._parsed = True
 443             return
 444
 445         logging.info("last downloaded version: {}".format(self.last_time))
 446
 447         # OK, so we have a timestamp, lets see if there is anything new to get
 448         # First off, are we comparing an old download that threw away the timestamp?
 449         ignore_time = self.last_time == strip_time(self.last_time)
 450         try:
 451             # TODO: Allow for comparison at the exact time
 452             files_last_update = self._file_links.last_update
 453             if ignore_time:
 454                 logging.info("Dropping time from comparison stamp as old-style download dir")
 455                 files_last_update = strip_time(files_last_update)
 456
 457             if files_last_update > self.last_time:
 458                 logging.info(
 459                     "Found new/updated files {}".format(self._file_links.last_update))
 460                 self._needs_download = True
 461                 self._parsed = True
 462                 return
 463         except TypeError:
 464             logging.warning("No files found for {}.".format(self.thing_id))
 465
 466         # Got here, so nope, no new files.
 467         self._needs_download = False
 468         self._parsed = True
 469
 470     def _handle_old_directory(self, base_dir):
 471         """ Deal with any old directories from previous versions of the code.
 472         """
 473         old_dir = os.path.join(base_dir, slugify(self.name))
 474         if os.path.exists(old_dir):
 475             logging.warning("Found old style download_dir. Moving.")
 476             rename_unique(old_dir, self.download_dir)
 477
 478     def _handle_outdated_directory(self):
 479         """ Move the current download directory sideways if the thing has changed.
 480         """
 481         if not os.path.exists(self.download_dir):
 482             # No old directory to move.
 483             return None
 484         timestamp_file = os.path.join(self.download_dir, TIMESTAMP_FILE)
 485         if not os.path.exists(timestamp_file):
 486             # Old form of download directory
 487             target_dir_name = "{} - old".format(self.download_dir)
 488         else:
 489             target_dir_name = "{} - {}".format(self.download_dir, self.last_time.strftime(SAFE_DATETIME_FORMAT))
 490         return rename_unique(self.download_dir, target_dir_name)
 491
 492     def _find_last_download(self, base_dir):
 493         """ Look for the most recent previous download (if any) of the thing.
 494         """
 495         logging.info("Looking for old things")
 496
 497         # First the DL directory itself.
 498         timestamp_file = os.path.join(self.download_dir, TIMESTAMP_FILE)
 499
 500         latest = None
 501         latest_time = None
 502
 503         try:
 504             logging.debug("Checking for existing download in normal place.")
 505             with open(timestamp_file) as ts_fh:
 506                 timestamp_text = ts_fh.read().strip()
 507             latest_time = datetime.datetime.strptime(timestamp_text, DEFAULT_DATETIME_FORMAT)
 508             latest = self.download_dir
 509         except FileNotFoundError:
 510             # No existing download directory. huh.
 511             pass
 512         except TypeError:
 513             logging.warning("Invalid timestamp file found in {}".format(self.download_dir))
 514
 515         # TODO:  Maybe look for old download directories.
 516
 517         # Now look for 7z files
 518         candidates = glob.glob(os.path.join(base_dir, "{}*.7z".format(self.thing_id)))
 519         # +3 to allow for ' - '
 520         leading_length = len(self.slug) + 3
 521         for path in candidates:
 522             candidate = os.path.basename(path)
 523             try:
 524                 logging.debug("Examining '{}' - '{}'".format(candidate, candidate[leading_length:-3]))
 525                 candidate_time = datetime.datetime.strptime(candidate[leading_length:-3], SAFE_DATETIME_FORMAT)
 526             except ValueError:
 527                 logging.warning("There was an error finding the date in {}. Ignoring.".format(candidate))
 528                 continue
 529             try:
 530                 if candidate_time > latest_time:
 531                     latest_time = candidate_time
 532                     latest = candidate
 533             except TypeError:
 534                 latest_time = candidate_time
 535                 latest = candidate
 536         logging.info("Found last old thing: {} / {}".format(latest, latest_time))
 537         return (latest, latest_time)
 538
 539     def download(self, base_dir, compress):
 540         """ Download all files for a given thing.
 541             Returns True iff the thing is now downloaded (not iff it downloads the thing!)
 542         """
 543         if not self._parsed:
 544             self._parse(base_dir)
 545
 546         if not self._parsed:
 547             logging.error(
 548                 "Unable to parse {} - aborting download".format(self.thing_id))
 549             return State.FAILED
 550
 551         if not self._needs_download:
 552             logging.info("{} - {} already downloaded - skipping.".format(self.thing_id, self.name))
 553             return State.ALREADY_DOWNLOADED
 554
 555         if not self._file_links:
 556             logging.error(
 557                 "{} - {} appears to have no files. Thingiverse acting up again?".format(self.thing_id, self.name))
 558             return State.FAILED
 559
 560         # Have we already downloaded some things?
 561         renamed_dir = self._handle_outdated_directory()
 562
 563         # Get the list of files to download
 564
 565         new_file_links = []
 566         old_file_links = []
 567         self.time_stamp = None
 568
 569         if not self.last_time:
 570             # If we don't have anything to copy from, then it is all new.
 571             logging.debug("No last time, downloading all files")
 572             new_file_links = self._file_links
 573             self.time_stamp = new_file_links[0].last_update
 574
 575             for file_link in new_file_links:
 576                 self.time_stamp = max(self.time_stamp, file_link.last_update)
 577             logging.debug("New timestamp will be {}".format(self.time_stamp))
 578         else:
 579             self.time_stamp = self.last_time
 580             for file_link in self._file_links:
 581                 if file_link.last_update > self.last_time:
 582                     new_file_links.append(file_link)
 583                     self.time_stamp = max(self.time_stamp, file_link.last_update)
 584                 else:
 585                     old_file_links.append(file_link)
 586
 587         logging.debug("new timestamp {}".format(self.time_stamp))
 588
 589         # OK. Time to get to work.
 590         logging.debug("Generating download_dir")
 591         os.mkdir(self.download_dir)
 592         filelist_file = os.path.join(self.download_dir, "filelist.txt")
 593         with open(filelist_file, 'w', encoding="utf-8") as fl_handle:
 594             for fl in self._file_links:
 595                 fl_handle.write("{},{},{}\n".format(fl.link, fl.name, fl.last_update))
 596
 597         # First grab the cached files (if any)
 598         logging.info("Copying {} unchanged files.".format(len(old_file_links)))
 599         if renamed_dir:
 600             for file_link in old_file_links:
 601                 try:
 602                     old_file = os.path.join(renamed_dir, file_link.name)
 603                     new_file = truncate_name(os.path.join(self.download_dir, file_link.name))
 604                     logging.debug("Copying {} to {}".format(old_file, new_file))
 605                     copyfile(old_file, new_file)
 606                 except FileNotFoundError:
 607                     logging.warning(
 608                         "Unable to find {} in old archive, redownloading".format(file_link.name))
 609                     new_file_links.append(file_link)
 610                 except TypeError:
 611                     # Not altogether sure how this could occur, possibly with some combination of the old file types
 612                     logging.warning(
 613                         "Typeerror looking for {} in {}".format(file_link.name, renamed_dir))
 614                     new_file_links.append(file_link)
 615
 616         # Now download the new ones
 617         logging.info("Downloading {} new files of {}".format(
 618             len(new_file_links), len(self._file_links)))
 619         try:
 620             for file_link in new_file_links:
 621                 file_name = truncate_name(os.path.join(self.download_dir, file_link.name))
 622                 logging.debug("Downloading {} from {} to {}".format(
 623                     file_link.name, file_link.link, file_name))
 624                 data_req = SESSION.get(file_link.link)
 625                 if data_req.status_code != 200:
 626                     logging.error("Unexpected status code {} for {}: {}".format(data_req.status_code,
 627                                                                                 sanitise_url(file_link.link),
 628                                                                                 data_req.text))
 629                     fail_dir(self.download_dir)
 630                     return State.FAILED
 631
 632                 with open(file_name, 'wb') as handle:
 633                     handle.write(data_req.content)
 634         except Exception as exception:
 635             logging.error("Failed to download {} - {}".format(file_link.name, exception))
 636             fail_dir(self.download_dir)
 637             return State.FAILED
 638
 639         # People like images.
 640         image_dir = os.path.join(self.download_dir, 'images')
 641         logging.info("Downloading {} images.".format(len(self._image_links)))
 642         try:
 643             os.mkdir(image_dir)
 644             for imagelink in self._image_links:
 645                 filename = os.path.join(image_dir, imagelink.name)
 646                 image_req = SESSION.get(imagelink.link)
 647                 if image_req.status_code != 200:
 648                     logging.error("Unexpected status code {} for {}: {}".format(image_req.status_code,
 649                                                                                 sanitise_url(imagelink.link),
 650                                                                                 image_req.text))
 651                     fail_dir(self.download_dir)
 652                     return State.FAILED
 653                 with open(truncate_name(filename), 'wb') as handle:
 654                     handle.write(image_req.content)
 655         except Exception as exception:
 656             logging.error("Failed to download {} - {}".format(imagelink.name, exception))
 657             fail_dir(self.download_dir)
 658             return State.FAILED
 659
 660         # Best get some licenses
 661         logging.info("writing license file")
 662         try:
 663             if self._license:
 664                 with open(truncate_name(os.path.join(self.download_dir, 'license.txt')), 'w',
 665                           encoding="utf-8") as license_handle:
 666                     license_handle.write("{}\n".format(self._license))
 667         except IOError as exception:
 668             logging.warning("Failed to write license! {}".format(exception))
 669
 670         logging.info("writing readme")
 671         try:
 672             if self._details:
 673                 with open(truncate_name(os.path.join(self.download_dir, 'readme.txt')), 'w',
 674                           encoding="utf-8") as readme_handle:
 675                     readme_handle.write("{}\n".format(self._details))
 676         except IOError as exception:
 677             logging.warning("Failed to write readme! {}".format(exception))
 678
 679         try:
 680             # Now write the timestamp
 681             with open(os.path.join(self.download_dir, TIMESTAMP_FILE), 'w', encoding="utf-8") as timestamp_handle:
 682                 timestamp_handle.write(self.time_stamp.__str__())
 683         except Exception as exception:
 684             logging.error("Failed to write timestamp file - {}".format(exception))
 685             fail_dir(self.download_dir)
 686             return State.FAILED
 687         self._needs_download = False
 688         logging.debug("Download of {} finished".format(self.name))
 689         if not compress:
 690             return State.OK
 691
 692         thing_dir = "{} - {} - {}".format(self.thing_id,
 693                                           slugify(self.name),
 694                                           self.time_stamp.strftime(SAFE_DATETIME_FORMAT))
 695         file_name = os.path.join(base_dir,
 696                                  "{}.7z".format(thing_dir))
 697         logging.debug("Compressing {} to {}".format(
 698             self.name,
 699             file_name))
 700         with py7zr.SevenZipFile(file_name, 'w', filters=SEVENZIP_FILTERS) as archive:
 701             archive.writeall(self.download_dir, thing_dir)
 702         logging.debug("Compression of {} finished.".format(self.name))
 703         shutil.rmtree(self.download_dir)
 704         logging.debug("Removed temporary download dir of {}.".format(self.name))
 705         return State.OK
 706
 707
 708 def do_batch(batch_file, download_dir, quick, compress):
 709     """ Read a file in line by line, parsing each as a set of calls to this script."""
 710     with open(batch_file) as handle:
 711         for line in handle:
 712             line = line.strip()
 713             if not line:
 714                 # Skip empty lines
 715                 continue
 716             logging.info("Handling instruction {}".format(line))
 717             command_arr = line.split()
 718             if command_arr[0] == "thing":
 719                 logging.debug(
 720                     "Handling batch thing instruction: {}".format(line))
 721                 Thing(command_arr[1]).download(download_dir, compress)
 722                 continue
 723             if command_arr[0] == "collection":
 724                 logging.debug(
 725                     "Handling batch collection instruction: {}".format(line))
 726                 Collection(command_arr[1], command_arr[2],
 727                            download_dir, quick, compress).download()
 728                 continue
 729             if command_arr[0] == "user":
 730                 logging.debug(
 731                     "Handling batch collection instruction: {}".format(line))
 732                 Designs(command_arr[1], download_dir, quick, compress).download()
 733                 continue
 734             logging.warning("Unable to parse current instruction. Skipping.")
 735
 736
 737 def main():
 738     """ Entry point for script being run as a command. """
 739     parser = argparse.ArgumentParser()
 740     parser.add_argument("-l", "--log-level", choices=[
 741         'debug', 'info', 'warning'], default='info', help="level of logging desired")
 742     parser.add_argument("-d", "--directory",
 743                         help="Target directory to download into")
 744     parser.add_argument("-f", "--log-file",
 745                         help="Place to log debug information to")
 746     parser.add_argument("-q", "--quick", action="store_true",
 747                         help="Assume date ordering on posts")
 748     parser.add_argument("-c", "--compress", action="store_true",
 749                         help="Compress files")
 750     parser.add_argument("-a", "--api-key",
 751                         help="API key for thingiverse")
 752
 753     subparsers = parser.add_subparsers(
 754         help="Type of thing to download", dest="subcommand")
 755     collection_parser = subparsers.add_parser(
 756         'collection', help="Download one or more entire collection(s)")
 757     collection_parser.add_argument(
 758         "owner", help="The owner of the collection(s) to get")
 759     collection_parser.add_argument(
 760         "collections", nargs="+", help="Space seperated list of the name(s) of collection to get")
 761     thing_parser = subparsers.add_parser(
 762         'thing', help="Download a single thing.")
 763     thing_parser.add_argument(
 764         "things", nargs="*", help="Space seperated list of thing ID(s) to download")
 765     user_parser = subparsers.add_parser(
 766         "user", help="Download all things by one or more users")
 767     user_parser.add_argument(
 768         "users", nargs="+", help="A space seperated list of the user(s) to get the designs of")
 769     batch_parser = subparsers.add_parser(
 770         "batch", help="Perform multiple actions written in a text file")
 771     batch_parser.add_argument(
 772         "batch_file", help="The name of the file to read.")
 773     subparsers.add_parser("version", help="Show the current version")
 774
 775     args = parser.parse_args()
 776     if not args.subcommand:
 777         parser.print_help()
 778         sys.exit(1)
 779     if not args.directory:
 780         args.directory = os.getcwd()
 781
 782     logger = logging.getLogger()
 783     formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
 784     logger.setLevel(logging.DEBUG)
 785     console_handler = logging.StreamHandler()
 786     console_handler.setLevel(args.log_level.upper())
 787
 788     global API_KEY
 789     if args.api_key:
 790         API_KEY = args.api_key
 791     else:
 792         try:
 793             with open("api.key") as fh:
 794                 API_KEY = fh.read().strip()
 795         except Exception as e:
 796             logging.error("Either specify the api-key on the command line or in a file called 'api.key'")
 797             logging.error("Exception: {}".format(e))
 798             return
 799
 800     logger.addHandler(console_handler)
 801     if args.log_file:
 802         file_handler = logging.FileHandler(args.log_file)
 803         file_handler.setLevel(logging.DEBUG)
 804         file_handler.setFormatter(formatter)
 805         logger.addHandler(file_handler)
 806
 807     # Start downloader
 808     thing_queue = multiprocessing.JoinableQueue()
 809     logging.debug("starting {} downloader(s)".format(DOWNLOADER_COUNT))
 810     downloaders = [Downloader(thing_queue, args.directory, args.compress) for _ in range(DOWNLOADER_COUNT)]
 811     for downloader in downloaders:
 812         downloader.start()
 813
 814     if args.subcommand.startswith("collection"):
 815         for collection in args.collections:
 816             Collection(args.owner, collection, args.directory, args.quick, args.compress).download()
 817     if args.subcommand == "thing":
 818         for thing in args.things:
 819             thing_queue.put(thing)
 820     if args.subcommand == "user":
 821         for user in args.users:
 822             Designs(user, args.directory, args.quick, args.compress).download()
 823     if args.subcommand == "version":
 824         print("thingy_grabber.py version {}".format(VERSION))
 825     if args.subcommand == "batch":
 826         do_batch(args.batch_file, args.directory, args.quick, args.compress)
 827
 828     # Stop the downloader processes
 829     for _ in downloaders:
 830         thing_queue.put(None)
 831
 832
 833 if __name__ == "__main__":
 834     multiprocessing.freeze_support()
 835     main()