Api (#14) v0.10.0
authorOliver Matthews <oliver@codersoffortune.net>
Mon, 27 Jul 2020 13:01:09 +0000 (14:01 +0100)
committerGitHub <noreply@github.com>
Mon, 27 Jul 2020 13:01:09 +0000 (14:01 +0100)
* grabbing users with API works
* read api from key file
* image links working
* collections now work
* update README
* make collection names case insensitive
* warn users on the update timestamps
* detect things using old timestamps and handle gracefully
* filter api keys from log files
* missed a couple
* Check status codes from requests.

.gitignore
README.md
requirements.yml
thingy_grabber.py

index f0ee661..9c7176b 100644 (file)
@@ -105,4 +105,4 @@ venv.bak/
 
 # VS Code
 *.code-workspace
-.vscode
\ No newline at end of file
+.vscodeapi.key
index 0243605..f79df1a 100644 (file)
--- a/README.md
+++ b/README.md
@@ -1,9 +1,10 @@
 # thingy_grabber
-Script for archiving thingiverse things. Due to this being a glorified webscraper, it's going to be very fragile.
+Script for archiving thingiverse things.
 
 ## Usage:
 ````
-usage: thingy_grabber.py [-h] [-l {debug,info,warning}] [-d DIRECTORY] [-f LOG_FILE] [-q] [-c] {collection,thing,user,batch,version} ...
+usage: thingy_grabber.py [-h] [-l {debug,info,warning}] [-d DIRECTORY] [-f LOG_FILE] [-q] [-c] [-a API_KEY]
+                         {collection,thing,user,batch,version} ...
 
 positional arguments:
   {collection,thing,user,batch,version}
@@ -24,8 +25,19 @@ optional arguments:
                         Place to log debug information to
   -q, --quick           Assume date ordering on posts
   -c, --compress        Compress files
+  -a API_KEY, --api-key API_KEY
+                        API key for thingiverse
 ````
 
+## API KEYs
+Thingy_grabber v0.10.0 accesses thingiverse in a _substantially_ different way to before. The plus side is it should be more reliable, possibly faster and no longer needs selenium or a firefox instance (and so drastically reduces memory overhead). The downside is you are _going_ to have to do something to continue using the app - basically get yourself an API KEY.
+
+To do this, go to https://www.thingiverse.com/apps/create and create your own selecting Desktop app.
+Once you have your key, either specify it on the command line or put it in a text file called `api.key` whereever you are running the script from - the script will auto load it.
+
+### Why can't I use yours? 
+Because API keys can (are?) rate limited.
+
 ### Things
 `thingy_grabber.py thing thingid1 thingid2 ...`
 This will create a directory named after the title of the thing(s) with the given ID(s) and download the files into it.
@@ -78,7 +90,7 @@ Download all designs by jim and bob into directories under `c:\downloads`, give
 `
 
 ## Requirements
-python3, beautifulsoup4, requests, lxml, py7xr (>=0.8.2)
+python3, requests, py7xr (>=0.8.2)
 
 ## Current features:
 - can download an entire collection, creating seperate subdirs for each thing in the collection
@@ -86,6 +98,8 @@ python3, beautifulsoup4, requests, lxml, py7xr (>=0.8.2)
 - If there is an updated file, the old directory will be moved to `name_timestamp` where `timestamp` is the last upload time of the old files. The code will then copy unchanged files across and download any new ones.
 
 ## Changelog
+* v0.10.0
+  - API access! new -a option to provide an API key for more stable access.
 * v0.9.0
   - Compression! New -c option will use 7z to create an archival copy of the file once downloaded. 
     Note that although it will use the presence of 7z files to determine if a file has been updated, it currently _won't_ read old files from inside the 7z for handling updates, resulting in marginally larger bandwidth usage when dealing with partially updated things. This will be fixed later.
index 3620e3c..7a032d7 100644 (file)
@@ -2,7 +2,5 @@ name: thingy
 channels:
   - defaults
 dependencies:
-  - beautifulsoup4
-  - lxml
   - requests
   - py7zr
index c96f1a3..62861c2 100755 (executable)
@@ -14,14 +14,7 @@ import multiprocessing
 import enum
 import datetime
 from shutil import copyfile
-from bs4 import BeautifulSoup
 from dataclasses import dataclass
-import selenium
-from selenium import webdriver
-from selenium.webdriver.common.by import By
-from selenium.webdriver.support.ui import WebDriverWait
-from selenium.webdriver.support import expected_conditions as EC
-from selenium.webdriver.firefox.options import Options
 import atexit
 import py7zr
 import glob
@@ -34,33 +27,38 @@ DEFAULT_DATETIME_FORMAT = '%Y-%m-%d %H:%M:%S'
 # Windows cannot handle : in filenames
 SAFE_DATETIME_FORMAT = '%Y-%m-%d %H.%M.%S'
 
-URL_BASE = "https://www.thingiverse.com"
-URL_COLLECTION = URL_BASE + "/ajax/thingcollection/list_collected_things"
-USER_COLLECTION = URL_BASE + "/ajax/user/designs"
+API_BASE="https://api.thingiverse.com"
+ACCESS_QP="access_token={}"
+PAGE_QP="page={}"
+API_USER_DESIGNS = API_BASE + "/users/{}/things/"
+API_USER_COLLECTIONS = API_BASE + "/users/{}/collections/all?" + ACCESS_QP
 
-ID_REGEX = re.compile(r'"id":(\d*),')
-TOTAL_REGEX = re.compile(r'"total":(\d*),')
-LAST_PAGE_REGEX = re.compile(r'"last_page":(\d*),')
-# This appears to be fixed at 12, but if it changes would screw the rest up.
-PER_PAGE_REGEX = re.compile(r'"per_page":(\d*),')
-NO_WHITESPACE_REGEX = re.compile(r'[-\s]+')
+# Currently useless as it gives the same info as the matching element in API_USER_COLLECTIONS
+API_COLLECTION = API_BASE + "/collections/{}/?" + ACCESS_QP
+API_COLLECTION_THINGS = API_BASE + "/collections/{}/things/?" + ACCESS_QP
+
+API_THING_DETAILS = API_BASE + "/things/{}/?" + ACCESS_QP
+API_THING_FILES = API_BASE + "/things/{}/files/?" + ACCESS_QP
+API_THING_IMAGES = API_BASE + "/things/{}/images/?" + ACCESS_QP
+
+API_KEY = None
 
 DOWNLOADER_COUNT = 1
 RETRY_COUNT = 3
 
 MAX_PATH_LENGTH = 250
 
-VERSION = "0.9.0"
+VERSION = "0.10.0"
 
 TIMESTAMP_FILE = "timestamp.txt"
 
-#BROWSER = webdriver.PhantomJS('./phantomjs')
-options = Options()
-options.add_argument("--headless")
-BROWSER = webdriver.Firefox(options=options)
-
-BROWSER.set_window_size(1980, 1080)
+SESSION = requests.Session()
 
+@dataclass
+class ThingLink:
+    thing_id: str
+    name: str
+    api_link: str
 
 @dataclass
 class FileLink:
@@ -68,6 +66,11 @@ class FileLink:
     last_update: datetime.datetime
     link: str
 
+@dataclass
+class ImageLink:
+    name: str
+    link: str
+
 class FileLinks:
     def __init__(self, initial_links=[]):
         self.links = []
@@ -97,6 +100,18 @@ class State(enum.Enum):
     FAILED = enum.auto()
     ALREADY_DOWNLOADED = enum.auto()
 
+def sanitise_url(url):
+    """ remove api keys from an url
+    """
+    return re.sub(r'access_token=\w*',
+                  'access_token=***',
+                  url)
+
+def strip_time(date_obj):
+    """ Takes a datetime object and returns another with the time set to 00:00
+    """
+    return datetime.datetime.combine(date_obj.date(), datetime.time())
+
 def rename_unique(dir_name, target_dir_name):
     """ Move a directory sideways to a new name, ensuring it is unique.
     """
@@ -141,59 +156,12 @@ def slugify(value):
     Normalise string, removes invalid for filename charactersr
     and converts string to lowercase.
     """
+    logging.debug("Sluggyfying {}".format(value))
     value = unicodedata.normalize('NFKC', value).lower().strip()
     value = re.sub(r'[\\/<>:\?\*\|"]', '', value)
     value = re.sub(r'\.*$', '', value)
     return value
 
-class PageChecker(object):
-    def __init__(self):
-        self.log = []
-        self.title = None
-        self.file_count = None
-        self.files = None
-        self.images = None
-        self.license = None
-
-
-    def __call__(self, _):
-        try:
-            self.log.append("call")
-            if self.title is None:
-                # first find the name
-                name = EC._find_element(BROWSER, (By.CSS_SELECTOR, "[class^=ThingPage__modelName]"))
-                if name is None: 
-                    return False
-                self.title = name.text
-
-            if self.file_count is None:
-                # OK. Do we know how many files we have to download?
-                metrics = EC._find_elements(BROWSER, (By.CSS_SELECTOR, "[class^=MetricButton]"))
-                self.log.append("got some metrics: {}".format(len(metrics)))
-                cur_count = int([x.text.split("\n")[0] for x in metrics if x.text.endswith("\nThing Files")][0])
-                self.log.append(cur_count)
-                if cur_count == 0:
-                    return False
-                self.file_count = cur_count
-                
-            self.log.append("looking for {} files".format(self.file_count))
-            fileRows = EC._find_elements(BROWSER, (By.CSS_SELECTOR, "[class^=ThingFile__fileRow]"))
-            self.log.append("found {} files".format(len(fileRows)))
-            if len(fileRows) < self.file_count:
-                return False
-
-            self.log.append("Looking for images")
-            # By this point _should_ have loaded all the images
-            self.images = EC._find_elements(BROWSER, (By.CSS_SELECTOR, "[class^=thumb]"))
-            self.license = EC._find_element(BROWSER, (By.CSS_SELECTOR, "[class^=License__licenseText]")).text
-            self.log.append("found {} images".format(len(self.images)))
-            self.files = fileRows
-            return True
-        except Exception:
-            return False
-
-
-
 
 class Downloader(multiprocessing.Process):
     """
@@ -243,16 +211,6 @@ class Grouping:
         # These should be set by child classes.
         self.url = None
         self.download_dir = None
-        self.collection_url = None
-
-    def _get_small_grouping(self, req):
-        """ Handle small groupings """
-        soup = BeautifulSoup(req.text, features='lxml')
-        links = soup.find_all('a', {'class': 'card-img-holder'})
-        self.things = [x['href'].split(':')[1] for x in links]
-        self.total = len(self.things)
-
-        return self.things
 
     def get(self):
         """ retrieve the things of the grouping. """
@@ -266,29 +224,38 @@ class Grouping:
             raise ValueError("No URL set - object not initialised properly?")
 
         # Get the internal details of the grouping.
-        logging.debug("Querying {}".format(self.url))
-        c_req = requests.get(self.url)
-        total = TOTAL_REGEX.search(c_req.text)
-        if total is None:
-            # This is a small (<13) items grouping. Pull the list from this req.
-            return self._get_small_grouping(c_req)
-        self.total = total.groups()[0]
-        self.req_id = ID_REGEX.search(c_req.text).groups()[0]
-        self.last_page = int(LAST_PAGE_REGEX.search(c_req.text).groups()[0])
-        self.per_page = PER_PAGE_REGEX.search(c_req.text).groups()[0]
-        parameters = {
-            'base_url': self.url,
-            'page': '1',
-            'per_page': '12',
-            'id': self.req_id
-        }
-        for current_page in range(1, self.last_page + 1):
-            parameters['page'] = current_page
-            req = requests.post(self.collection_url, parameters)
-            soup = BeautifulSoup(req.text, features='lxml')
-            links = soup.find_all('a', {'class': 'card-img-holder'})
-            self.things += [x['href'].split(':')[1] for x in links]
-
+        logging.debug("Querying {}".format(sanitise_url(self.url)))
+        page = 0
+        # TODO:: Must be a way to refactor this cleanly
+        if self.paginated:
+        # Slightly nasty, but afaik python lacks a clean way to do partial string formatting.
+            page_url = self.url + "?" + ACCESS_QP + "&" + PAGE_QP
+            while True:
+                page += 1
+                current_url = page_url.format(API_KEY, page)
+                logging.info("requesting:{}".format(sanitise_url(current_url)))
+                current_req = SESSION.get(current_url)
+                if current_req.status_code != 200:
+                    logging.error("Got unexpected code {} from url {}: {}".format(current_req.status_code, sanitise_url(current_url), current_req.text))
+                    break
+                current_json = current_req.json()
+                if not current_json:
+                    # No more!
+                    break
+                for thing in current_json:
+                    self.things.append(ThingLink(thing['id'], thing['name'], thing['url']))
+        else:
+            # self.url should already have been formatted as we don't need pagination
+            logging.info("requesting:{}".format(sanitise_url(self.url)))
+            current_req = SESSION.get(self.url)
+            if current_req.status_code != 200:
+                logging.error("Got unexpected code {} from url {}: {}".format(current_req.status_code, sanitise_url(current_url), current_req.text))
+            else:
+                current_json = current_req.json()
+                for thing in current_json:
+                    logging.info(thing)
+                    self.things.append(ThingLink(thing['id'], thing['name'], thing['url']))
+        logging.info("Found {} things.".format(len(self.things)))
         return self.things
 
     def download(self):
@@ -314,10 +281,6 @@ class Grouping:
                 logging.info("Caught up, stopping.")
                 return
 
-
-
-
-
 class Collection(Grouping):
     """ Holds details of a collection. """
 
@@ -325,11 +288,30 @@ class Collection(Grouping):
         Grouping.__init__(self, quick, compress)
         self.user = user
         self.name = name
-        self.url = "{}/{}/collections/{}".format(
-            URL_BASE, self.user, strip_ws(self.name))
+        self.paginated = False
+        # need to figure out the the ID for the collection
+        collection_url = API_USER_COLLECTIONS.format(user, API_KEY)
+        try:
+            current_req = SESSION.get(collection_url)
+        except requests.exceptions.ConnectionError as error:
+            logging.error("Unable to connect for thing {}: {}".format(
+                self.thing_id, error))
+            return
+        if current_req.status_code != 200:
+            logging.error("Got unexpected code {} from url {}: {}".format(current_req.status_code, sanitise_url(collection_url), current_req.text))
+            return
+        collection_list = current_req.json()
+        try:
+            # case insensitive to retain parity with previous behaviour
+            collection = [x for x in collection_list if x['name'].casefold() == name.casefold()][0]
+        except IndexError:
+            logging.error("Unable to find collection {} for user {}".format(name, user))
+            return
+        self.collection_id = collection['id']
+        self.url = API_COLLECTION_THINGS.format(self.collection_id, API_KEY)
+
         self.download_dir = os.path.join(directory,
                                          "{}-{}".format(slugify(self.user), slugify(self.name)))
-        self.collection_url = URL_COLLECTION
 
 
 class Designs(Grouping):
@@ -338,80 +320,126 @@ class Designs(Grouping):
     def __init__(self, user, directory, quick, compress):
         Grouping.__init__(self, quick, compress)
         self.user = user
-        self.url = "{}/{}/designs".format(URL_BASE, self.user)
+        self.url = API_USER_DESIGNS.format(user)
+        self.paginated = True
         self.download_dir = os.path.join(
             directory, "{} designs".format(slugify(self.user)))
-        self.collection_url = USER_COLLECTION
 
 
 class Thing:
     """ An individual design on thingiverse. """
 
-    def __init__(self, thing_id):
-        self.thing_id = thing_id
+    def __init__(self, thing_link):
+        self.thing_id = thing_link.thing_id
+        self.name = thing_link.name
+        self.api_link = thing_link.api_link
         self.last_time = None
         self._parsed = False
         self._needs_download = True
         self.text = None
-        self.title = None
         self.download_dir = None
         self.time_stamp = None
         self._file_links = FileLinks()
+        self._image_links = []
 
     def _parse(self, base_dir):
         """ Work out what, if anything needs to be done. """
         if self._parsed:
             return
 
-        url = "{}/thing:{}/files".format(URL_BASE, self.thing_id)
+
+        # First get the broad details
+        url = API_THING_DETAILS.format(self.thing_id, API_KEY)
         try:
-            BROWSER.get(url)
-            wait = WebDriverWait(BROWSER, 60)
-            pc = PageChecker()
-            wait.until(pc)
+            current_req = SESSION.get(url)
         except requests.exceptions.ConnectionError as error:
             logging.error("Unable to connect for thing {}: {}".format(
                 self.thing_id, error))
             return
-        except selenium.common.exceptions.TimeoutException:
-            logging.error(pc.log)
-            logging.error("Timeout trying to parse thing {}".format(self.thing_id))
+        # Check for DMCA
+        if current_req.status_code == 403:
+            logging.error("Access to thing {} is forbidden".format(self.thing_id))
             return
+        if current_req.status_code != 200:
+            logging.error("Got unexpected code {} from url {}: {}".format(current_req.status_code, sanitise_url(url), current_req.text))
+            return
+
+        thing_json = current_req.json()
+        try:
+            self._license = thing_json['license']
+        except KeyError:
+            logging.warning("No license found for thing {}?".format(self.thing_id))
+
+        # TODO: Get non-html version of this?
+        try:
+            self._details = thing_json['details']
+        except KeyError:
+            logging.warning("No description found for thing {}?".format(self.thing_id))
 
-        self.title = pc.title
-        if not pc.files:
+
+
+        # Now get the file details
+        file_url = API_THING_FILES.format(self.thing_id, API_KEY)
+
+        try:
+            current_req = SESSION.get(file_url)
+        except requests.exceptions.ConnectionError as error:
+            logging.error("Unable to connect for thing {}: {}".format(
+                self.thing_id, error))
+            return
+
+        if current_req.status_code != 200:
+            logging.error("Unexpected status code {} for {}: {}".format(current_req.status_code, sanitise_url(file_url), current_req.text))
+            return
+
+        link_list = current_req.json()
+
+        if not link_list:
             logging.error("No files found for thing {} - probably thingiverse being broken, try again later".format(self.thing_id))
-        for link in pc.files:
-            logging.debug("Parsing link: {}".format(link.text))
-            link_link = link.find_element_by_xpath(".//a").get_attribute("href")
-            if link_link.endswith("/zip"):
-                # bulk link.
-                continue
-            try:
-                link_title, link_details, _ = link.text.split("\n")
-            except ValueError:
-                # If it is a filetype that doesn't generate a picture, then we get an extra field at the start.
-                _, link_title, link_details, _ = link.text.split("\n")
-                
-            #link_details will be something like '461 kb | Updated 06-11-2019 | 373 Downloads'
-            #need to convert from M D Y to Y M D
-            link_date = [int(x) for x in link_details.split("|")[1].split()[-1].split("-")]
+
+        for link in link_list:
+            logging.debug("Parsing link: {}".format(sanitise_url(link['url'])))
             try:
-                self._file_links.append(FileLink(link_title, datetime.datetime(link_date[2], link_date[0], link_date[1]), link_link))
+                datestamp = datetime.datetime.strptime(link['date'], DEFAULT_DATETIME_FORMAT)
+                self._file_links.append(FileLink(link['name'], datestamp, link['url']))
             except ValueError:
-                logging.error(link_date)
+                logging.error(link['date'])
+
+        # Finally get the image links
+        image_url = API_THING_IMAGES.format(self.thing_id, API_KEY)
+
+        try:
+            current_req = SESSION.get(image_url)
+        except requests.exceptions.ConnectionError as error:
+            logging.error("Unable to connect for thing {}: {}".format(
+                self.thing_id, error))
+            return
+
+        if current_req.status_code != 200:
+            logging.error("Unexpected status code {} for {}: {}".format(current_req.status_code, sanitise_url(image_url), current_req.text))
+            return
 
-        self._image_links=[x.find_element_by_xpath(".//img").get_attribute("src") for x in pc.images]
-        self._license = pc.license
-        self.pc = pc
+        image_list = current_req.json()
 
+        if not image_list:
+            logging.warning("No images found for thing {} - probably thingiverse being iffy as this seems unlikely".format(self.thing_id))
 
-        self.slug = "{} - {}".format(self.thing_id, slugify(self.title))
+        for image in image_list:
+            logging.debug("parsing image: {}".format(image))
+            try:
+                name = slugify(image['name'])
+                # TODO: fallback to other types
+                url = [x for x in image['sizes'] if x['type']=='display' and x['size']=='large'][0]['url']
+            except KeyError:
+                logging.warning("Missing image for {}".format(name))
+            self._image_links.append(ImageLink(name, url))
+
+        self.slug = "{} - {}".format(self.thing_id, slugify(self.name))
         self.download_dir = os.path.join(base_dir, self.slug)
 
         self._handle_old_directory(base_dir)
 
-        logging.debug("Parsing {} ({})".format(self.thing_id, self.title))
+        logging.debug("Parsing {} ({})".format(self.thing_id, self.name))
         latest, self.last_time = self._find_last_download(base_dir)
 
         if not latest:
@@ -423,8 +451,17 @@ class Thing:
         logging.info("last downloaded version: {}".format(self.last_time))
 
         # OK, so we have a timestamp, lets see if there is anything new to get
+        # First off, are we comparing an old download that threw away the timestamp?
+        ignore_time = self.last_time == strip_time(self.last_time)
         try:
-            if self._file_links.last_update > self.last_time:
+            # TODO: Allow for comparison at the exact time
+            files_last_update = self._file_links.last_update
+            if ignore_time:
+                logging.info("Dropping time from comparison stamp as old-style download dir")
+                files_last_update = strip_time(files_last_update)
+
+
+            if files_last_update > self.last_time:
                 logging.info(
                     "Found new/updated files {}".format(self._file_links.last_update))
                 self._needs_download = True
@@ -440,7 +477,7 @@ class Thing:
     def _handle_old_directory(self, base_dir):
         """ Deal with any old directories from previous versions of the code.
         """
-        old_dir = os.path.join(base_dir, slugify(self.title))
+        old_dir = os.path.join(base_dir, slugify(self.name))
         if os.path.exists(old_dir):
             logging.warning("Found old style download_dir. Moving.")
             rename_unique(old_dir, self.download_dir)
@@ -522,11 +559,11 @@ class Thing:
             return State.FAILED
 
         if not self._needs_download:
-            print("{} - {} already downloaded - skipping.".format(self.thing_id, self.title))
+            logging.info("{} - {} already downloaded - skipping.".format(self.thing_id, self.name))
             return State.ALREADY_DOWNLOADED
 
         if not self._file_links:
-            print("{} - {} appears to have no files. Thingiverse acting up again?".format(self.thing_id, self.title))
+            logging.error("{} - {} appears to have no files. Thingiverse acting up again?".format(self.thing_id, self.name))
             return State.FAILED
 
         # Have we already downloaded some things?
@@ -562,16 +599,10 @@ class Thing:
         logging.debug("Generating download_dir")
         os.mkdir(self.download_dir)
         filelist_file = os.path.join(self.download_dir, "filelist.txt")
+        url_suffix = "/?" + ACCESS_QP.format(API_KEY)
         with open(filelist_file, 'w', encoding="utf-8") as fl_handle:
             for fl in self._file_links:
-              base_link = fl.link
-              try:
-                fl.link=requests.get(fl.link, allow_redirects=False).headers['location']
-              except Exception:
-                # Sometimes Thingiverse just gives us the direct link the first time. Not sure why.
-                pass
-              
-              fl_handle.write("{},{},{}, {}\n".format(fl.link, fl.name, fl.last_update, base_link))
+              fl_handle.write("{},{},{}\n".format(fl.link, fl.name, fl.last_update))
 
 
         # First grab the cached files (if any)
@@ -595,7 +626,13 @@ class Thing:
                 file_name = truncate_name(os.path.join(self.download_dir, file_link.name))
                 logging.debug("Downloading {} from {} to {}".format(
                     file_link.name, file_link.link, file_name))
-                data_req = requests.get(file_link.link)
+                data_req = SESSION.get(file_link.link + url_suffix)
+                if data_req.status_code != 200:
+                    logging.error("Unexpected status code {} for {}: {}".format(data_req.status_code, sanitise_url(file_link.link), data_req.text))
+                    fail_dir(self.download_dir)
+                    return State.FAILED
+                   
+
                 with open(file_name, 'wb') as handle:
                     handle.write(data_req.content)
         except Exception as exception:
@@ -604,39 +641,27 @@ class Thing:
             return State.FAILED
 
 
-        # People like images. But this doesn't work yet.
+        # People like images.
         image_dir = os.path.join(self.download_dir, 'images')
         logging.info("Downloading {} images.".format(len(self._image_links)))
         try:
             os.mkdir(image_dir)
             for imagelink in self._image_links:
-                filename = os.path.basename(imagelink)
-                if filename.endswith('stl'):
-                    filename = "{}.png".format(filename)
-                image_req = requests.get(imagelink)
-                with open(truncate_name(os.path.join(image_dir, filename)), 'wb') as handle:
+                filename = os.path.join(image_dir, imagelink.name)
+                image_req = SESSION.get(imagelink.link)
+                if image_req.status_code != 200:
+                    logging.error("Unexpected status code {} for {}: {}".format(image_req.status_code, sanitise_url(file_link.link), image_req.text))
+                    fail_dir(self.download_dir)
+                    return State.FAILED
+                with open(truncate_name(filename), 'wb') as handle:
                     handle.write(image_req.content)
         except Exception as exception:
-            print("Failed to download {} - {}".format(filename, exception))
+            logging.error("Failed to download {} - {}".format(imagelink.name, exception))
             fail_dir(self.download_dir)
             return State.FAILED
 
-        """
-        # instructions are good too.
-        logging.info("Downloading readme")
-        try:
-            readme_txt = soup.find('meta', property='og:description')[
-                'content']
-            with open(os.path.join(self.download_dir, 'readme.txt'), 'w') as readme_handle:
-                readme_handle.write("{}\n".format(readme_txt))
-        except (TypeError, KeyError) as exception:
-            logging.warning("No readme? {}".format(exception))
-        except IOError as exception:
-            logging.warning("Failed to write readme! {}".format(exception))
-
-        """
         # Best get some licenses
-        logging.info("Downloading license")
+        logging.info("writing license file")
         try:
             if self._license:
                 with open(truncate_name(os.path.join(self.download_dir, 'license.txt')), 'w', encoding="utf-8") as license_handle:
@@ -644,33 +669,41 @@ class Thing:
         except IOError as exception:
             logging.warning("Failed to write license! {}".format(exception))
 
+        logging.info("writing readme")
+        try:
+            if self._details:
+                with open(truncate_name(os.path.join(self.download_dir, 'readme.txt')), 'w', encoding="utf-8") as readme_handle:
+                    readme_handle.write("{}\n".format(self._details))
+        except IOError as exception:
+            logging.warning("Failed to write readme! {}".format(exception))
+
         try:
             # Now write the timestamp
             with open(os.path.join(self.download_dir,TIMESTAMP_FILE), 'w', encoding="utf-8") as timestamp_handle:
                 timestamp_handle.write(self.time_stamp.__str__())
         except Exception as exception:
-            print("Failed to write timestamp file - {}".format(exception))
+            logging.error("Failed to write timestamp file - {}".format(exception))
             fail_dir(self.download_dir)
             return State.FAILED
         self._needs_download = False
-        logging.debug("Download of {} finished".format(self.title))
+        logging.debug("Download of {} finished".format(self.name))
         if not compress:
             return State.OK
 
 
         thing_dir = "{} - {} - {}".format(self.thing_id,
-            slugify(self.title),
+            slugify(self.name),
             self.time_stamp.strftime(SAFE_DATETIME_FORMAT))
         file_name = os.path.join(base_dir,
             "{}.7z".format(thing_dir))
         logging.debug("Compressing {} to {}".format(
-            self.title,
+            self.name,
             file_name))
         with py7zr.SevenZipFile(file_name, 'w', filters=SEVENZIP_FILTERS) as archive:
             archive.writeall(self.download_dir, thing_dir)
-        logging.debug("Compression of {} finished.".format(self.title))
+        logging.debug("Compression of {} finished.".format(self.name))
         shutil.rmtree(self.download_dir)
-        logging.debug("Removed temporary download dir of {}.".format(self.title))
+        logging.debug("Removed temporary download dir of {}.".format(self.name))
         return State.OK
 
 
@@ -718,7 +751,9 @@ def main():
                         help="Assume date ordering on posts")
     parser.add_argument("-c", "--compress", action="store_true",
                         help="Compress files")
-                        
+    parser.add_argument("-a", "--api-key",
+                        help="API key for thingiverse")
+            
 
     subparsers = parser.add_subparsers(
         help="Type of thing to download", dest="subcommand")
@@ -755,6 +790,18 @@ def main():
     console_handler = logging.StreamHandler()
     console_handler.setLevel(args.log_level.upper())
 
+    global API_KEY
+    if args.api_key:
+        API_KEY=args.api_key
+    else:
+        try:
+            with open("api.key") as fh:
+                API_KEY=fh.read().strip()
+        except Exception as e:
+            logging.error("Either specify the api-key on the command line or in a file called 'api.key'")
+            logging.error("Exception: {}".format(e))
+            return
+
     logger.addHandler(console_handler)
     if args.log_file:
         file_handler = logging.FileHandler(args.log_file)
@@ -789,7 +836,6 @@ def main():
     for downloader in downloaders:
         thing_queue.put(None)
 
-atexit.register(BROWSER.quit)
 
 if __name__ == "__main__":    
     multiprocessing.freeze_support()