thingy_grabber.py

   1 #!/usr/bin/env python3
   2 """
   3 Thingiverse bulk downloader
   4 """
   5
   6 import re
   7 import sys
   8 import os
   9 import argparse
  10 import unicodedata
  11 import requests
  12 import logging
  13 import multiprocessing
  14 import enum
  15 import datetime
  16 from shutil import copyfile
  17 from bs4 import BeautifulSoup
  18 from dataclasses import dataclass
  19 import selenium
  20 from selenium import webdriver
  21 from selenium.webdriver.common.by import By
  22 from selenium.webdriver.support.ui import WebDriverWait
  23 from selenium.webdriver.support import expected_conditions as EC
  24 from selenium.webdriver.firefox.options import Options
  25 import atexit
  26 import py7zr
  27
  28 SEVENZIP_FILTERS = [{'id': py7zr.FILTER_LZMA2}]
  29
  30 URL_BASE = "https://www.thingiverse.com"
  31 URL_COLLECTION = URL_BASE + "/ajax/thingcollection/list_collected_things"
  32 USER_COLLECTION = URL_BASE + "/ajax/user/designs"
  33
  34 ID_REGEX = re.compile(r'"id":(\d*),')
  35 TOTAL_REGEX = re.compile(r'"total":(\d*),')
  36 LAST_PAGE_REGEX = re.compile(r'"last_page":(\d*),')
  37 # This appears to be fixed at 12, but if it changes would screw the rest up.
  38 PER_PAGE_REGEX = re.compile(r'"per_page":(\d*),')
  39 NO_WHITESPACE_REGEX = re.compile(r'[-\s]+')
  40
  41 DOWNLOADER_COUNT = 1
  42 RETRY_COUNT = 3
  43
  44 MAX_PATH_LENGTH = 250
  45
  46 VERSION = "0.9.0"
  47
  48
  49 #BROWSER = webdriver.PhantomJS('./phantomjs')
  50 options = Options()
  51 options.add_argument("--headless")
  52 BROWSER = webdriver.Firefox(options=options)
  53
  54 BROWSER.set_window_size(1980, 1080)
  55
  56
  57 @dataclass
  58 class FileLink:
  59     name: str
  60     last_update: datetime.datetime
  61     link: str
  62
  63 class FileLinks:
  64     def __init__(self, initial_links=[]):
  65         self.links = []
  66         self.last_update = None
  67         for link in initial_links:
  68             self.append(link)
  69
  70     def __iter__(self):
  71         return iter(self.links)
  72
  73     def __getitem__(self, item):
  74         return self.links[item]
  75
  76     def __len__(self):
  77         return len(self.links)
  78
  79     def append(self, link):
  80         try:
  81             self.last_update = max(self.last_update, link.last_update)
  82         except TypeError:
  83             self.last_update = link.last_update
  84         self.links.append(link)
  85
  86
  87 class State(enum.Enum):
  88     OK = enum.auto()
  89     FAILED = enum.auto()
  90     ALREADY_DOWNLOADED = enum.auto()
  91
  92
  93 def fail_dir(dir_name):
  94     """ When a download has failed, move it sideways.
  95     """
  96     target_dir = "{}_failed".format(dir_name)
  97     inc = 0
  98     while os.path.exists(target_dir):
  99       target_dir = "{}_failed_{}".format(dir_name, inc)
 100       inc += 1
 101     os.rename(dir_name, target_dir)
 102
 103
 104 def truncate_name(file_name):
 105     """ Ensure the filename is not too long for, well windows basically.
 106     """
 107     path = os.path.abspath(file_name)
 108     if len(path) <= MAX_PATH_LENGTH:
 109         return path
 110     to_cut = len(path) - (MAX_PATH_LENGTH + 3)
 111     base, extension = os.path.splitext(path)
 112     inc = 0
 113     new_path = "{}_{}{}".format(base, inc, extension)
 114     while os.path.exists(new_path):
 115         new_path = "{}_{}{}".format(base, inc, extension)
 116         inc += 1
 117     return new_path
 118
 119
 120 def strip_ws(value):
 121     """ Remove whitespace from a string """
 122     return str(NO_WHITESPACE_REGEX.sub('-', value))
 123
 124
 125 def slugify(value):
 126     """
 127     Normalise string, removes invalid for filename charactersr
 128     and converts string to lowercase.
 129     """
 130     value = unicodedata.normalize('NFKC', value).lower().strip()
 131     value = re.sub(r'[\\/<>:\?\*\|"]', '', value)
 132     value = re.sub(r'\.*$', '', value)
 133     return value
 134
 135 class PageChecker(object):
 136     def __init__(self):
 137         self.log = []
 138         self.title = None
 139         self.file_count = None
 140         self.files = None
 141         self.images = None
 142         self.license = None
 143
 144
 145     def __call__(self, _):
 146         try:
 147             self.log.append("call")
 148             if self.title is None:
 149                 # first find the name
 150                 name = EC._find_element(BROWSER, (By.CSS_SELECTOR, "[class^=ThingPage__modelName]"))
 151                 if name is None:
 152                     return False
 153                 self.title = name.text
 154
 155             if self.file_count is None:
 156                 # OK. Do we know how many files we have to download?
 157                 metrics = EC._find_elements(BROWSER, (By.CSS_SELECTOR, "[class^=MetricButton]"))
 158                 self.log.append("got some metrics: {}".format(len(metrics)))
 159                 cur_count = int([x.text.split("\n")[0] for x in metrics if x.text.endswith("\nThing Files")][0])
 160                 self.log.append(cur_count)
 161                 if cur_count == 0:
 162                     return False
 163                 self.file_count = cur_count
 164
 165             self.log.append("looking for {} files".format(self.file_count))
 166             fileRows = EC._find_elements(BROWSER, (By.CSS_SELECTOR, "[class^=ThingFile__fileRow]"))
 167             self.log.append("found {} files".format(len(fileRows)))
 168             if len(fileRows) < self.file_count:
 169                 return False
 170
 171             self.log.append("Looking for images")
 172             # By this point _should_ have loaded all the images
 173             self.images = EC._find_elements(BROWSER, (By.CSS_SELECTOR, "[class^=thumb]"))
 174             self.license = EC._find_element(BROWSER, (By.CSS_SELECTOR, "[class^=License__licenseText]")).text
 175             self.log.append("found {} images".format(len(self.images)))
 176             self.files = fileRows
 177             return True
 178         except Exception:
 179             return False
 180
 181
 182
 183
 184 class Downloader(multiprocessing.Process):
 185     """
 186     Class to handle downloading the things we have found to get.
 187     """
 188
 189     def __init__(self, thing_queue, download_directory, compress):
 190         multiprocessing.Process.__init__(self)
 191         # TODO: add parameters
 192         self.thing_queue = thing_queue
 193         self.download_directory = download_directory
 194         self.compress = compress
 195
 196     def run(self):
 197         """ actual download loop.
 198         """
 199         while True:
 200             thing_id = self.thing_queue.get()
 201             if thing_id is None:
 202                 logging.info("Shutting download queue")
 203                 self.thing_queue.task_done()
 204                 break
 205             logging.info("Handling id {}".format(thing_id))
 206             Thing(thing_id).download(self.download_directory, self.compress)
 207             self.thing_queue.task_done()
 208         return
 209
 210
 211
 212
 213
 214 class Grouping:
 215     """ Holds details of a group of things for download
 216         This is effectively (although not actually) an abstract class
 217         - use Collection or Designs instead.
 218     """
 219
 220     def __init__(self, quick, compress):
 221         self.things = []
 222         self.total = 0
 223         self.req_id = None
 224         self.last_page = 0
 225         self.per_page = None
 226         # Should we stop downloading when we hit a known datestamp?
 227         self.quick = quick
 228         self.compress = compress
 229         # These should be set by child classes.
 230         self.url = None
 231         self.download_dir = None
 232         self.collection_url = None
 233
 234     def _get_small_grouping(self, req):
 235         """ Handle small groupings """
 236         soup = BeautifulSoup(req.text, features='lxml')
 237         links = soup.find_all('a', {'class': 'card-img-holder'})
 238         self.things = [x['href'].split(':')[1] for x in links]
 239         self.total = len(self.things)
 240
 241         return self.things
 242
 243     def get(self):
 244         """ retrieve the things of the grouping. """
 245         if self.things:
 246             # We've already done it.
 247             return self.things
 248
 249         # Check for initialisation:
 250         if not self.url:
 251             logging.error("No URL set - object not initialised properly?")
 252             raise ValueError("No URL set - object not initialised properly?")
 253
 254         # Get the internal details of the grouping.
 255         logging.debug("Querying {}".format(self.url))
 256         c_req = requests.get(self.url)
 257         total = TOTAL_REGEX.search(c_req.text)
 258         if total is None:
 259             # This is a small (<13) items grouping. Pull the list from this req.
 260             return self._get_small_grouping(c_req)
 261         self.total = total.groups()[0]
 262         self.req_id = ID_REGEX.search(c_req.text).groups()[0]
 263         self.last_page = int(LAST_PAGE_REGEX.search(c_req.text).groups()[0])
 264         self.per_page = PER_PAGE_REGEX.search(c_req.text).groups()[0]
 265         parameters = {
 266             'base_url': self.url,
 267             'page': '1',
 268             'per_page': '12',
 269             'id': self.req_id
 270         }
 271         for current_page in range(1, self.last_page + 1):
 272             parameters['page'] = current_page
 273             req = requests.post(self.collection_url, parameters)
 274             soup = BeautifulSoup(req.text, features='lxml')
 275             links = soup.find_all('a', {'class': 'card-img-holder'})
 276             self.things += [x['href'].split(':')[1] for x in links]
 277
 278         return self.things
 279
 280     def download(self):
 281         """ Downloads all the files in a collection """
 282         if not self.things:
 283             self.get()
 284
 285         if not self.download_dir:
 286             raise ValueError(
 287                 "No download_dir set - invalidly initialised object?")
 288
 289         base_dir = os.getcwd()
 290         try:
 291             os.mkdir(self.download_dir)
 292         except FileExistsError:
 293             logging.info("Target directory {} already exists. Assuming a resume."
 294                          .format(self.download_dir))
 295         logging.info("Downloading {} thing(s).".format(self.total))
 296         for idx, thing in enumerate(self.things):
 297             logging.info("Downloading thing {} - {}".format(idx, thing))
 298             RC = Thing(thing).download(self.download_dir, self.compress)
 299             if self.quick and RC==State.ALREADY_DOWNLOADED:
 300                 logging.info("Caught up, stopping.")
 301                 return
 302
 303
 304
 305
 306
 307 class Collection(Grouping):
 308     """ Holds details of a collection. """
 309
 310     def __init__(self, user, name, directory, quick, compress):
 311         Grouping.__init__(self, quick, compress)
 312         self.user = user
 313         self.name = name
 314         self.url = "{}/{}/collections/{}".format(
 315             URL_BASE, self.user, strip_ws(self.name))
 316         self.download_dir = os.path.join(directory,
 317                                          "{}-{}".format(slugify(self.user), slugify(self.name)))
 318         self.collection_url = URL_COLLECTION
 319
 320
 321 class Designs(Grouping):
 322     """ Holds details of all of a users' designs. """
 323
 324     def __init__(self, user, directory, quick, compress):
 325         Grouping.__init__(self, quick, compress)
 326         self.user = user
 327         self.url = "{}/{}/designs".format(URL_BASE, self.user)
 328         self.download_dir = os.path.join(
 329             directory, "{} designs".format(slugify(self.user)))
 330         self.collection_url = USER_COLLECTION
 331
 332
 333 class Thing:
 334     """ An individual design on thingiverse. """
 335
 336     def __init__(self, thing_id):
 337         self.thing_id = thing_id
 338         self.last_time = None
 339         self._parsed = False
 340         self._needs_download = True
 341         self.text = None
 342         self.title = None
 343         self.download_dir = None
 344         self.time_stamp = None
 345         self._file_links = FileLinks()
 346
 347     def _parse(self, base_dir):
 348         """ Work out what, if anything needs to be done. """
 349         if self._parsed:
 350             return
 351
 352         url = "{}/thing:{}/files".format(URL_BASE, self.thing_id)
 353         try:
 354             BROWSER.get(url)
 355             wait = WebDriverWait(BROWSER, 60)
 356             pc = PageChecker()
 357             wait.until(pc)
 358         except requests.exceptions.ConnectionError as error:
 359             logging.error("Unable to connect for thing {}: {}".format(
 360                 self.thing_id, error))
 361             return
 362         except selenium.common.exceptions.TimeoutException:
 363             logging.error(pc.log)
 364             logging.error("Timeout trying to parse thing {}".format(self.thing_id))
 365             return
 366
 367         self.title = pc.title
 368         if not pc.files:
 369             logging.error("No files found for thing {} - probably thingiverse being broken, try again later".format(self.thing_id))
 370         for link in pc.files:
 371             logging.debug("Parsing link: {}".format(link.text))
 372             link_link = link.find_element_by_xpath(".//a").get_attribute("href")
 373             if link_link.endswith("/zip"):
 374                 # bulk link.
 375                 continue
 376             try:
 377                 link_title, link_details, _ = link.text.split("\n")
 378             except ValueError:
 379                 # If it is a filetype that doesn't generate a picture, then we get an extra field at the start.
 380                 _, link_title, link_details, _ = link.text.split("\n")
 381
 382             #link_details will be something like '461 kb | Updated 06-11-2019 | 373 Downloads'
 383             #need to convert from M D Y to Y M D
 384             link_date = [int(x) for x in link_details.split("|")[1].split()[-1].split("-")]
 385             try:
 386                 self._file_links.append(FileLink(link_title, datetime.datetime(link_date[2], link_date[0], link_date[1]), link_link))
 387             except ValueError:
 388                 logging.error(link_date)
 389
 390         self._image_links=[x.find_element_by_xpath(".//img").get_attribute("src") for x in pc.images]
 391         self._license = pc.license
 392         self.pc = pc
 393
 394
 395         self.old_download_dir = os.path.join(base_dir, slugify(self.title))
 396         self.download_dir = os.path.join(base_dir, "{} - {}".format(self.thing_id, slugify(self.title)))
 397
 398         logging.debug("Parsing {} ({})".format(self.thing_id, self.title))
 399
 400         if not os.path.exists(self.download_dir):
 401             logging.info("Looking for old dir at {}".format(self.old_download_dir))
 402             if os.path.exists(self.old_download_dir):
 403                 logging.warning("Found previous style download directory. Moving it from {} to {}".format(self.old_download_dir, self.download_dir))
 404                 os.rename(self.old_download_dir, self.download_dir)
 405             else:
 406                 # Not yet downloaded
 407                 self._parsed = True
 408                 return
 409
 410         timestamp_file = os.path.join(self.download_dir, 'timestamp.txt')
 411         if not os.path.exists(timestamp_file):
 412             # Old download from before
 413             logging.warning(
 414                 "Old-style download directory found. Assuming update required.")
 415             self._parsed = True
 416             return
 417
 418         try:
 419             with open(timestamp_file, 'r') as timestamp_handle:
 420                 # add the .split(' ')[0] to remove the timestamp from the old style timestamps
 421                 last_bits = [int(x) for x in timestamp_handle.readlines()[0].split(' ')[0].split("-")]
 422                 logging.warning(last_bits)
 423                 if last_bits[0] == 0:
 424                     last_bits[0] = 1
 425                 if last_bits[1] == 0:
 426                     last_bits[1] = 1
 427                 if last_bits[2] == 0:
 428                     last_bits[2] = 1980
 429                 try:
 430                     self.last_time = datetime.datetime(last_bits[0], last_bits[1], last_bits[2])
 431                 except ValueError:
 432                     # This one appears to be M D Y
 433                     self.last_time = datetime.datetime(last_bits[2], last_bits[0], last_bits[1])
 434
 435             logging.info("last downloaded version: {}".format(self.last_time))
 436         except FileNotFoundError:
 437             # Not run on this thing before.
 438             logging.info(
 439                 "Old-style download directory found. Assuming update required.")
 440             self.last_time = None
 441             self._needs_download = True
 442             self._parsed = True
 443             return
 444
 445         # OK, so we have a timestamp, lets see if there is anything new to get
 446         try:
 447             if self._file_links.last_update > self.last_time:
 448                 logging.info(
 449                     "Found new/updated files {}".format(self._file_links.last_update))
 450                 self._needs_download = True
 451                 self._parsed = True
 452                 return
 453         except TypeError:
 454             logging.warning("No files found for {}.".format(self.thing_id))
 455
 456         # Got here, so nope, no new files.
 457         self._needs_download = False
 458         self._parsed = True
 459
 460     def download(self, base_dir, compress):
 461         """ Download all files for a given thing.
 462             Returns True iff the thing is now downloaded (not iff it downloads the thing!)
 463         """
 464         if not self._parsed:
 465             self._parse(base_dir)
 466
 467         if not self._parsed:
 468             logging.error(
 469                 "Unable to parse {} - aborting download".format(self.thing_id))
 470             return State.FAILED
 471
 472         if not self._needs_download:
 473             print("{} - {} already downloaded - skipping.".format(self.thing_id, self.title))
 474             return State.ALREADY_DOWNLOADED
 475
 476         if not self._file_links:
 477             print("{} - {} appears to have no files. Thingiverse acting up again?".format(self.thing_id, self.title))
 478             return State.FAILED
 479
 480         # Have we already downloaded some things?
 481         timestamp_file = os.path.join(self.download_dir, 'timestamp.txt')
 482         prev_dir = None
 483         if os.path.exists(self.download_dir):
 484             if not os.path.exists(timestamp_file):
 485                 # edge case: old style dir w/out timestamp.
 486                 logging.warning("Old style download dir found at {}".format(self.title))
 487                 prev_count = 0
 488                 target_dir = "{}_old".format(self.download_dir)
 489                 while os.path.exists(target_dir):
 490                     prev_count = prev_count + 1
 491                     target_dir = "{}_old_{}".format(self.download_dir, prev_count)
 492                 os.rename(self.download_dir, target_dir)
 493             else:
 494                 prev_dir = "{}_{}".format(self.download_dir, slugify(self.last_time.__str__()))
 495                 os.rename(self.download_dir, prev_dir)
 496
 497         # Get the list of files to download
 498
 499         new_file_links = []
 500         old_file_links = []
 501         self.time_stamp = None
 502
 503         if not self.last_time:
 504             # If we don't have anything to copy from, then it is all new.
 505             logging.debug("No last time, downloading all files")
 506             new_file_links = self._file_links
 507             self.time_stamp = new_file_links[0].last_update
 508
 509             for file_link in new_file_links:
 510                 self.time_stamp = max(self.time_stamp, file_link.last_update)
 511             logging.debug("New timestamp will be {}".format(self.time_stamp))
 512         else:
 513             self.time_stamp = self.last_time
 514             for file_link in self._file_links:
 515                 if file_link.last_update > self.last_time:
 516                     new_file_links.append(file_link)
 517                     self.time_stamp = max(self.time_stamp, file_link.last_update)
 518                 else:
 519                     old_file_links.append(file_link)
 520
 521         logging.debug("new timestamp {}".format(self.time_stamp))
 522
 523         # OK. Time to get to work.
 524         logging.debug("Generating download_dir")
 525         os.mkdir(self.download_dir)
 526         filelist_file = os.path.join(self.download_dir, "filelist.txt")
 527         with open(filelist_file, 'w', encoding="utf-8") as fl_handle:
 528             for fl in self._file_links:
 529               base_link = fl.link
 530               try:
 531                 fl.link=requests.get(fl.link, allow_redirects=False).headers['location']
 532               except Exception:
 533                 # Sometimes Thingiverse just gives us the direct link the first time. Not sure why.
 534                 pass
 535
 536               fl_handle.write("{},{},{}, {}\n".format(fl.link, fl.name, fl.last_update, base_link))
 537
 538
 539         # First grab the cached files (if any)
 540         logging.info("Copying {} unchanged files.".format(len(old_file_links)))
 541         for file_link in old_file_links:
 542             old_file = os.path.join(prev_dir, file_link.name)
 543             new_file = truncate_name(os.path.join(self.download_dir, file_link.name))
 544             try:
 545                 logging.debug("Copying {} to {}".format(old_file, new_file))
 546                 copyfile(old_file, new_file)
 547             except FileNotFoundError:
 548                 logging.warning(
 549                     "Unable to find {} in old archive, redownloading".format(file_link["title"]))
 550                 new_file_links.append(file_link)
 551
 552         # Now download the new ones
 553         logging.info("Downloading {} new files of {}".format(
 554             len(new_file_links), len(self._file_links)))
 555         try:
 556             for file_link in new_file_links:
 557                 file_name = truncate_name(os.path.join(self.download_dir, file_link.name))
 558                 logging.debug("Downloading {} from {} to {}".format(
 559                     file_link.name, file_link.link, file_name))
 560                 data_req = requests.get(file_link.link)
 561                 with open(file_name, 'wb') as handle:
 562                     handle.write(data_req.content)
 563         except Exception as exception:
 564             logging.error("Failed to download {} - {}".format(file_link.name, exception))
 565             fail_dir(self.download_dir)
 566             return State.FAILED
 567
 568
 569         # People like images. But this doesn't work yet.
 570         image_dir = os.path.join(self.download_dir, 'images')
 571         logging.info("Downloading {} images.".format(len(self._image_links)))
 572         try:
 573             os.mkdir(image_dir)
 574             for imagelink in self._image_links:
 575                 filename = os.path.basename(imagelink)
 576                 if filename.endswith('stl'):
 577                     filename = "{}.png".format(filename)
 578                 image_req = requests.get(imagelink)
 579                 with open(truncate_name(os.path.join(image_dir, filename)), 'wb') as handle:
 580                     handle.write(image_req.content)
 581         except Exception as exception:
 582             print("Failed to download {} - {}".format(filename, exception))
 583             fail_dir(self.download_dir)
 584             return State.FAILED
 585
 586         """
 587         # instructions are good too.
 588         logging.info("Downloading readme")
 589         try:
 590             readme_txt = soup.find('meta', property='og:description')[
 591                 'content']
 592             with open(os.path.join(self.download_dir, 'readme.txt'), 'w') as readme_handle:
 593                 readme_handle.write("{}\n".format(readme_txt))
 594         except (TypeError, KeyError) as exception:
 595             logging.warning("No readme? {}".format(exception))
 596         except IOError as exception:
 597             logging.warning("Failed to write readme! {}".format(exception))
 598
 599         """
 600         # Best get some licenses
 601         logging.info("Downloading license")
 602         try:
 603             if self._license:
 604                 with open(truncate_name(os.path.join(self.download_dir, 'license.txt')), 'w', encoding="utf-8") as license_handle:
 605                     license_handle.write("{}\n".format(self._license))
 606         except IOError as exception:
 607             logging.warning("Failed to write license! {}".format(exception))
 608
 609         try:
 610             # Now write the timestamp
 611             with open(timestamp_file, 'w', encoding="utf-8") as timestamp_handle:
 612                 timestamp_handle.write(self.time_stamp.__str__())
 613         except Exception as exception:
 614             print("Failed to write timestamp file - {}".format(exception))
 615             fail_dir(self.download_dir)
 616             return State.FAILED
 617         self._needs_download = False
 618         logging.debug("Download of {} finished".format(self.title))
 619         if not compress:
 620             return State.OK
 621
 622
 623         thing_dir = "{} - {} - {}".format(self.thing_id,
 624             slugify(self.title),
 625             self.time_stamp)
 626         file_name = os.path.join(base_dir,
 627             "{}.7z".format(thing_dir))
 628         logging.debug("Compressing {} to {}".format(
 629             self.title,
 630             file_name))
 631         #with libarchive.file_writer(filename, 'lzma', '7z') as archive:
 632         with py7zr.SevenZipFile(file_name, 'w', filters=SEVENZIP_FILTERS) as archive:
 633         #with py7zr.SevenZipFile(file_name, 'w' ) as archive:
 634             archive.writeall(self.download_dir, thing_dir)
 635         logging.debug("Compression of {} finished.".format(self.title))
 636         return State.OK
 637
 638
 639
 640
 641 def do_batch(batch_file, download_dir, quick, compress):
 642     """ Read a file in line by line, parsing each as a set of calls to this script."""
 643     with open(batch_file) as handle:
 644         for line in handle:
 645             line = line.strip()
 646             if not line:
 647                 # Skip empty lines
 648                 continue
 649             logging.info("Handling instruction {}".format(line))
 650             command_arr = line.split()
 651             if command_arr[0] == "thing":
 652                 logging.debug(
 653                     "Handling batch thing instruction: {}".format(line))
 654                 Thing(command_arr[1]).download(download_dir, compress)
 655                 continue
 656             if command_arr[0] == "collection":
 657                 logging.debug(
 658                     "Handling batch collection instruction: {}".format(line))
 659                 Collection(command_arr[1], command_arr[2],
 660                            download_dir, quick, compress).download()
 661                 continue
 662             if command_arr[0] == "user":
 663                 logging.debug(
 664                     "Handling batch collection instruction: {}".format(line))
 665                 Designs(command_arr[1], download_dir, quick, compress).download()
 666                 continue
 667             logging.warning("Unable to parse current instruction. Skipping.")
 668
 669
 670 def main():
 671     """ Entry point for script being run as a command. """
 672     parser = argparse.ArgumentParser()
 673     parser.add_argument("-l", "--log-level", choices=[
 674                         'debug', 'info', 'warning'], default='info', help="level of logging desired")
 675     parser.add_argument("-d", "--directory",
 676                         help="Target directory to download into")
 677     parser.add_argument("-f", "--log-file",
 678                         help="Place to log debug information to")
 679     parser.add_argument("-q", "--quick", action="store_true",
 680                         help="Assume date ordering on posts")
 681     parser.add_argument("-c", "--compress", action="store_true",
 682                         help="Compress files")
 683
 684
 685     subparsers = parser.add_subparsers(
 686         help="Type of thing to download", dest="subcommand")
 687     collection_parser = subparsers.add_parser(
 688         'collection', help="Download one or more entire collection(s)")
 689     collection_parser.add_argument(
 690         "owner", help="The owner of the collection(s) to get")
 691     collection_parser.add_argument(
 692         "collections", nargs="+",  help="Space seperated list of the name(s) of collection to get")
 693     thing_parser = subparsers.add_parser(
 694         'thing', help="Download a single thing.")
 695     thing_parser.add_argument(
 696         "things", nargs="*", help="Space seperated list of thing ID(s) to download")
 697     user_parser = subparsers.add_parser(
 698         "user",  help="Download all things by one or more users")
 699     user_parser.add_argument(
 700         "users", nargs="+", help="A space seperated list of the user(s) to get the designs of")
 701     batch_parser = subparsers.add_parser(
 702         "batch", help="Perform multiple actions written in a text file")
 703     batch_parser.add_argument(
 704         "batch_file", help="The name of the file to read.")
 705     subparsers.add_parser("version", help="Show the current version")
 706
 707     args = parser.parse_args()
 708     if not args.subcommand:
 709         parser.print_help()
 710         sys.exit(1)
 711     if not args.directory:
 712         args.directory = os.getcwd()
 713
 714     logger = logging.getLogger()
 715     formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
 716     logger.setLevel(logging.DEBUG)
 717     console_handler = logging.StreamHandler()
 718     console_handler.setLevel(args.log_level.upper())
 719
 720     logger.addHandler(console_handler)
 721     if args.log_file:
 722         file_handler = logging.FileHandler(args.log_file)
 723         file_handler.setLevel(logging.DEBUG)
 724         file_handler.setFormatter(formatter)
 725         logger.addHandler(file_handler)
 726
 727
 728     # Start downloader
 729     thing_queue = multiprocessing.JoinableQueue()
 730     logging.debug("starting {} downloader(s)".format(DOWNLOADER_COUNT))
 731     downloaders = [Downloader(thing_queue, args.directory, args.compress) for _ in range(DOWNLOADER_COUNT)]
 732     for downloader in downloaders:
 733         downloader.start()
 734
 735
 736     if args.subcommand.startswith("collection"):
 737         for collection in args.collections:
 738             Collection(args.owner, collection, args.directory, args.quick, args.compress).download()
 739     if args.subcommand == "thing":
 740         for thing in args.things:
 741             thing_queue.put(thing)
 742     if args.subcommand == "user":
 743         for user in args.users:
 744             Designs(user, args.directory, args.quick, args.compress).download()
 745     if args.subcommand == "version":
 746         print("thingy_grabber.py version {}".format(VERSION))
 747     if args.subcommand == "batch":
 748         do_batch(args.batch_file, args.directory, args.quick, args.compress)
 749
 750     # Stop the downloader processes
 751     for downloader in downloaders:
 752         thing_queue.put(None)
 753
 754 atexit.register(BROWSER.quit)
 755
 756 if __name__ == "__main__":
 757     multiprocessing.freeze_support()
 758     main()