[clinton/thingy_grabber.git] / thingy_grabber.py

#!/usr/bin/env python3
"""
Thingiverse bulk downloader
"""

import re
import sys
import os
import argparse
import unicodedata
import requests
import logging
from shutil import copyfile
from bs4 import BeautifulSoup

URL_BASE = "https://www.thingiverse.com"
URL_COLLECTION = URL_BASE + "/ajax/thingcollection/list_collected_things"
USER_COLLECTION = URL_BASE + "/ajax/user/designs"

ID_REGEX = re.compile(r'"id":(\d*),')
TOTAL_REGEX = re.compile(r'"total":(\d*),')
LAST_PAGE_REGEX = re.compile(r'"last_page":(\d*),')
# This appears to be fixed at 12, but if it changes would screw the rest up.
PER_PAGE_REGEX = re.compile(r'"per_page":(\d*),')
NO_WHITESPACE_REGEX = re.compile(r'[-\s]+')

VERSION = "0.5.1"


def strip_ws(value):
    """ Remove whitespace from a string """
    return str(NO_WHITESPACE_REGEX.sub('-', value))


def slugify(value):
    """
    Normalizes string, converts to lowercase, removes non-alpha characters,
    and converts spaces to hyphens.
    """
    value = unicodedata.normalize('NFKD', value).encode(
        'ascii', 'ignore').decode()
    value = str(re.sub(r'[^\w\s-]', '', value).strip())
    value = str(NO_WHITESPACE_REGEX.sub('-', value))
    #value = str(re.sub(r'[-\s]+', '-', value))
    return value


class Grouping:
    """ Holds details of a group of things for download
        This is effectively (although not actually) an abstract class
        - use Collection or Designs instead.
    """

    def __init__(self):
        self.things = []
        self.total = 0
        self.req_id = None
        self.last_page = 0
        self.per_page = None
        # These should be set by child classes.
        self.url = None
        self.download_dir = None
        self.collection_url = None

    def _get_small_grouping(self, req):
        """ Handle small groupings """
        soup = BeautifulSoup(req.text, features='lxml')
        links = soup.find_all('a', {'class': 'card-img-holder'})
        self.things = [x['href'].split(':')[1] for x in links]
        self.total = len(self.things)

        return self.things

    def get(self):
        """ retrieve the things of the grouping. """
        if self.things:
            # We've already done it.
            return self.things

        # Check for initialisation:
        if not self.url:
            logging.error("No URL set - object not initialised properly?")
            raise ValueError("No URL set - object not initialised properly?")

        # Get the internal details of the grouping.
        logging.debug("Querying {}".format(self.url))
        c_req = requests.get(self.url)
        total = TOTAL_REGEX.search(c_req.text)
        if total is None:
            # This is a small (<13) items grouping. Pull the list from this req.
            return self._get_small_grouping(c_req)
        self.total = total.groups()[0]
        self.req_id = ID_REGEX.search(c_req.text).groups()[0]
        self.last_page = int(LAST_PAGE_REGEX.search(c_req.text).groups()[0])
        self.per_page = PER_PAGE_REGEX.search(c_req.text).groups()[0]
        parameters = {
            'base_url': self.url,
            'page': '1',
            'per_page': '12',
            'id': self.req_id
        }
        for current_page in range(1, self.last_page + 1):
            parameters['page'] = current_page
            req = requests.post(self.collection_url, parameters)
            soup = BeautifulSoup(req.text, features='lxml')
            links = soup.find_all('a', {'class': 'card-img-holder'})
            self.things += [x['href'].split(':')[1] for x in links]

        return self.things

    def download(self):
        """ Downloads all the files in a collection """
        if not self.things:
            self.get()

        if not self.download_dir:
            raise ValueError(
                "No download_dir set - invalidly initialised object?")

        base_dir = os.getcwd()
        try:
            os.mkdir(self.download_dir)
        except FileExistsError:
            logging.info("Target directory {} already exists. Assuming a resume."
                         .format(self.download_dir))
        logging.info("Downloading {} thing(s).".format(self.total))
        for idx, thing in enumerate(self.things):
            logging.info("Downloading thing {}".format(idx))
            Thing(thing).download(self.download_dir)


class Collection(Grouping):
    """ Holds details of a collection. """

    def __init__(self, user, name, directory):
        Grouping.__init__(self)
        self.user = user
        self.name = name
        self.url = "{}/{}/collections/{}".format(
            URL_BASE, self.user, strip_ws(self.name))
        self.download_dir = os.path.join(directory,
                                         "{}-{}".format(slugify(self.user), slugify(self.name)))
        self.collection_url = URL_COLLECTION


class Designs(Grouping):
    """ Holds details of all of a users' designs. """

    def __init__(self, user, directory):
        Grouping.__init__(self)
        self.user = user
        self.url = "{}/{}/designs".format(URL_BASE, self.user)
        self.download_dir = os.path.join(
            directory, "{} designs".format(slugify(self.user)))
        self.collection_url = USER_COLLECTION


class Thing:
    """ An individual design on thingiverse. """

    def __init__(self, thing_id):
        self.thing_id = thing_id
        self.last_time = None
        self._parsed = False
        self._needs_download = True
        self.text = None
        self.title = None
        self.download_dir = None

    def _parse(self, base_dir):
        """ Work out what, if anything needs to be done. """
        if self._parsed:
            return

        url = "{}/thing:{}/files".format(URL_BASE, self.thing_id)
        req = requests.get(url)
        self.text = req.text
        soup = BeautifulSoup(self.text, features='lxml')
        #import code
        #code.interact(local=dict(globals(), **locals()))
        self.title = slugify(soup.find_all('h1')[0].text.strip())
        self.download_dir = os.path.join(base_dir, self.title)

        logging.debug("Parsing {} ({})".format(self.thing_id, self.title))

        if not os.path.exists(self.download_dir):
            # Not yet downloaded
            self._parsed = True
            return

        timestamp_file = os.path.join(self.download_dir, 'timestamp.txt')
        if not os.path.exists(timestamp_file):
            # Old download from before
            logging.warning(
                "Old-style download directory found. Assuming update required.")
            self._parsed = True
            return

        try:
            with open(timestamp_file, 'r') as timestamp_handle:
                self.last_time = timestamp_handle.readlines()[0]
            logging.info("last downloaded version: {}".format(self.last_time))
        except FileNotFoundError:
            # Not run on this thing before.
            logging.info(
                "Old-style download directory found. Assuming update required.")
            self.last_time = None
            self._parsed = True
            return

        # OK, so we have a timestamp, lets see if there is anything new to get
        file_links = soup.find_all('a', {'class': 'file-download'})
        for file_link in file_links:
            timestamp = file_link.find_all('time')[0]['datetime']
            logging.debug("Checking {} (updated {})".format(
                file_link["title"], timestamp))
            if timestamp > self.last_time:
                logging.info(
                    "Found new/updated file {}".format(file_link["title"]))
                self._needs_download = True
                self._parsed = True
                return
        # Got here, so nope, no new files.
        self._needs_download = False
        self._parsed = True

    def download(self, base_dir):
        """ Download all files for a given thing. """
        if not self._parsed:
            self._parse(base_dir)

        if not self._needs_download:
            print("{} already downloaded - skipping.".format(self.title))
            return

        # Have we already downloaded some things?
        timestamp_file = os.path.join(self.download_dir, 'timestamp.txt')
        prev_dir = None
        if os.path.exists(self.download_dir):
            if not os.path.exists(timestamp_file):
                # edge case: old style dir w/out timestamp.
                logging.warning(
                    "Old style download dir found for {}".format(self.title))
                os.rename(self.download_dir,
                          "{}_old".format(self.download_dir))
            else:
                prev_dir = "{}_{}".format(self.download_dir, self.last_time)
                os.rename(self.download_dir, prev_dir)

        # Get the list of files to download
        soup = BeautifulSoup(self.text, features='lxml')
        file_links = soup.find_all('a', {'class': 'file-download'})

        new_file_links = []
        old_file_links = []
        new_last_time = None

        if not self.last_time:
            # If we don't have anything to copy from, then it is all new.
            new_file_links = file_links
            new_last_time = file_links[0].find_all('time')[0]['datetime']
            for file_link in file_links:
                timestamp = file_link.find_all('time')[0]['datetime']
                logging.debug("Found file {} from {}".format(
                    file_link["title"], timestamp))
                if timestamp > new_last_time:
                    new_last_time = timestamp
        else:
            for file_link in file_links:
                timestamp = file_link.find_all('time')[0]['datetime']
                logging.debug("Checking {} (updated {})".format(
                    file_link["title"], timestamp))
                if timestamp > self.last_time:
                    new_file_links.append(file_link)
                else:
                    old_file_links.append(file_link)
                if not new_last_time or timestamp > new_last_time:
                    new_last_time = timestamp

        logging.debug("new timestamp {}".format(new_last_time))

        # OK. Time to get to work.
        logging.debug("Generating download_dir")
        os.mkdir(self.download_dir)
        # First grab the cached files (if any)
        logging.info("Copying {} unchanged files.".format(len(old_file_links)))
        for file_link in old_file_links:
            old_file = os.path.join(prev_dir, file_link["title"])
            new_file = os.path.join(self.download_dir, file_link["title"])
            try:
                logging.debug("Copying {} to {}".format(old_file, new_file))
                copyfile(old_file, new_file)
            except FileNotFoundError:
                logging.warning(
                    "Unable to find {} in old archive, redownloading".format(file_link["title"]))
                new_file_links.append(file_link)

        # Now download the new ones
        files = [("{}{}".format(URL_BASE, x['href']), x["title"])
                 for x in new_file_links]
        logging.info("Downloading {} new files of {}".format(
            len(new_file_links), len(file_links)))
        try:
            for url, name in files:
                file_name = os.path.join(self.download_dir, name)
                logging.debug("Downloading {} from {} to {}".format(
                    name, url, file_name))
                data_req = requests.get(url)
                with open(file_name, 'wb') as handle:
                    handle.write(data_req.content)
        except Exception as exception:
            logging.error("Failed to download {} - {}".format(name, exception))
            os.rename(self.download_dir, "{}_failed".format(self.download_dir))
            return

        # People like images
        image_dir = os.path.join(self.download_dir, 'images')
        imagelinks = soup.find_all('span', {'class': 'gallery-slider'})[0] \
                         .find_all('div', {'class': 'gallery-photo'})
        logging.info("Downloading {} images.".format(len(imagelinks)))
        try:
            os.mkdir(image_dir)
            for imagelink in imagelinks:
                url = imagelink['data-full']
                filename = os.path.basename(url)
                if filename.endswith('stl'):
                    filename = "{}.png".format(filename)
                image_req = requests.get(url)
                with open(os.path.join(image_dir, filename), 'wb') as handle:
                    handle.write(image_req.content)
        except Exception as exception:
            print("Failed to download {} - {}".format(filename, exception))
            os.rename(self.download_dir, "{}_failed".format(self.download_dir))
            return

        try:
            # Now write the timestamp
            with open(timestamp_file, 'w') as timestamp_handle:
                timestamp_handle.write(new_last_time)
        except Exception as exception:
            print("Failed to write timestamp file - {}".format(exception))
            os.rename(self.download_dir, "{}_failed".format(self.download_dir))
            return
        self._needs_download = False
        logging.debug("Download of {} finished".format(self.title))


def do_batch(batch_file, download_dir):
    """ Read a file in line by line, parsing each as a set of calls to this script."""
    with open(batch_file) as handle:
        for line in handle:
            line = line.strip()
            logging.info("Handling instruction {}".format(line))
            command_arr = line.split()
            if command_arr[0] == "thing":
                logging.debug(
                    "Handling batch thing instruction: {}".format(line))
                Thing(command_arr[1]).download(download_dir)
                continue
            if command_arr[0] == "collection":
                logging.debug(
                    "Handling batch collection instruction: {}".format(line))
                Collection(command_arr[1], command_arr[2],
                           download_dir).download()
                continue
            if command_arr[0] == "user":
                logging.debug(
                    "Handling batch collection instruction: {}".format(line))
                Designs(command_arr[1], download_dir).download()
                continue
            logging.warning("Unable to parse current instruction. Skipping.")


def main():
    """ Entry point for script being run as a command. """
    parser = argparse.ArgumentParser()
    parser.add_argument("-l", "--log-level", choices=[
                        'debug', 'info', 'warning'], default='info', help="level of logging desired")
    parser.add_argument("-d", "--directory",
                        help="Target directory to download into")
    subparsers = parser.add_subparsers(
        help="Type of thing to download", dest="subcommand")
    collection_parser = subparsers.add_parser(
        'collection', help="Download an entire collection")
    collection_parser.add_argument(
        "owner", help="The owner of the collection to get")
    collection_parser.add_argument(
        "collection", help="The name of the collection to get")
    thing_parser = subparsers.add_parser(
        'thing', help="Download a single thing.")
    thing_parser.add_argument("thing", help="Thing ID to download")
    user_parser = subparsers.add_parser(
        "user", help="Download all things by a user")
    user_parser.add_argument("user", help="The user to get the designs of")
    batch_parser = subparsers.add_parser(
        "batch", help="Perform multiple actions written in a text file")
    batch_parser.add_argument(
        "batch_file", help="The name of the file to read.")
    subparsers.add_parser("version", help="Show the current version")

    args = parser.parse_args()
    if not args.subcommand:
        parser.print_help()
        sys.exit(1)
    if not args.directory:
        args.directory = os.getcwd()
    logging.basicConfig(level=getattr(logging, args.log_level.upper()))

    if args.subcommand.startswith("collection"):
        Collection(args.owner, args.collection, args.directory).download()
    if args.subcommand == "thing":
        Thing(args.thing).download(args.directory)
    if args.subcommand == "user":
        Designs(args.user, args.directory).download()
    if args.subcommand == "version":
        print("thingy_grabber.py version {}".format(VERSION))
    if args.subcommand == "batch":
        do_batch(args.batch_file, args.directory)


if __name__ == "__main__":
    main()
Commit	Line	Data
975060c9 OM	1	#!/usr/bin/env python3
	2	"""
	3	Thingiverse bulk downloader
	4	"""
	5
	6	import re
4a98996b	7	import sys
975060c9 OM	8	import os
	9	import argparse
	10	import unicodedata
	11	import requests
fa2f3251	12	import logging
3c82f75b	13	from shutil import copyfile
975060c9 OM	14	from bs4 import BeautifulSoup
	15
	16	URL_BASE = "https://www.thingiverse.com"
	17	URL_COLLECTION = URL_BASE + "/ajax/thingcollection/list_collected_things"
948bd56f	18	USER_COLLECTION = URL_BASE + "/ajax/user/designs"
975060c9 OM	19
	20	ID_REGEX = re.compile(r'"id":(\d*),')
	21	TOTAL_REGEX = re.compile(r'"total":(\d*),')
	22	LAST_PAGE_REGEX = re.compile(r'"last_page":(\d*),')
	23	# This appears to be fixed at 12, but if it changes would screw the rest up.
	24	PER_PAGE_REGEX = re.compile(r'"per_page":(\d*),')
dd8c35f4 OM	25	NO_WHITESPACE_REGEX = re.compile(r'[-\s]+')
dd8c35f4 OM	26
dbdb1782 OM	27	VERSION = "0.5.1"
dbdb1782 OM	28
db8066ec	29
dd8c35f4 OM	30	def strip_ws(value):
	31	""" Remove whitespace from a string """
	32	return str(NO_WHITESPACE_REGEX.sub('-', value))
975060c9	33
dbdb1782	34
975060c9 OM	35	def slugify(value):
	36	"""
	37	Normalizes string, converts to lowercase, removes non-alpha characters,
	38	and converts spaces to hyphens.
	39	"""
dbdb1782 OM	40	value = unicodedata.normalize('NFKD', value).encode(
dbdb1782 OM	41	'ascii', 'ignore').decode()
975060c9	42	value = str(re.sub(r'[^\w\s-]', '', value).strip())
dd8c35f4 OM	43	value = str(NO_WHITESPACE_REGEX.sub('-', value))
dd8c35f4 OM	44	#value = str(re.sub(r'[-\s]+', '-', value))
975060c9 OM	45	return value
975060c9 OM	46
dbdb1782	47
3522a3bf	48	class Grouping:
d66f1f78	49	""" Holds details of a group of things for download
3c82f75b OM	50	This is effectively (although not actually) an abstract class
	51	- use Collection or Designs instead.
	52	"""
dbdb1782	53
3522a3bf	54	def __init__(self):
975060c9 OM	55	self.things = []
	56	self.total = 0
	57	self.req_id = None
	58	self.last_page = 0
	59	self.per_page = None
948bd56f	60	# These should be set by child classes.
3522a3bf OM	61	self.url = None
3522a3bf OM	62	self.download_dir = None
948bd56f	63	self.collection_url = None
975060c9	64
3522a3bf OM	65	def _get_small_grouping(self, req):
3522a3bf OM	66	""" Handle small groupings """
975060c9	67	soup = BeautifulSoup(req.text, features='lxml')
dbdb1782	68	links = soup.find_all('a', {'class': 'card-img-holder'})
975060c9	69	self.things = [x['href'].split(':')[1] for x in links]
fa2f3251	70	self.total = len(self.things)
975060c9 OM	71
	72	return self.things
	73
3522a3bf OM	74	def get(self):
3522a3bf OM	75	""" retrieve the things of the grouping. """
975060c9 OM	76	if self.things:
	77	# We've already done it.
	78	return self.things
	79
3522a3bf OM	80	# Check for initialisation:
3522a3bf OM	81	if not self.url:
fa2f3251	82	logging.error("No URL set - object not initialised properly?")
3522a3bf OM	83	raise ValueError("No URL set - object not initialised properly?")
	84
	85	# Get the internal details of the grouping.
fa2f3251	86	logging.debug("Querying {}".format(self.url))
3522a3bf	87	c_req = requests.get(self.url)
975060c9 OM	88	total = TOTAL_REGEX.search(c_req.text)
975060c9 OM	89	if total is None:
3522a3bf OM	90	# This is a small (<13) items grouping. Pull the list from this req.
3522a3bf OM	91	return self._get_small_grouping(c_req)
975060c9 OM	92	self.total = total.groups()[0]
	93	self.req_id = ID_REGEX.search(c_req.text).groups()[0]
	94	self.last_page = int(LAST_PAGE_REGEX.search(c_req.text).groups()[0])
	95	self.per_page = PER_PAGE_REGEX.search(c_req.text).groups()[0]
	96	parameters = {
dbdb1782 OM	97	'base_url': self.url,
	98	'page': '1',
	99	'per_page': '12',
	100	'id': self.req_id
975060c9 OM	101	}
	102	for current_page in range(1, self.last_page + 1):
	103	parameters['page'] = current_page
948bd56f	104	req = requests.post(self.collection_url, parameters)
975060c9	105	soup = BeautifulSoup(req.text, features='lxml')
dbdb1782	106	links = soup.find_all('a', {'class': 'card-img-holder'})
975060c9 OM	107	self.things += [x['href'].split(':')[1] for x in links]
	108
	109	return self.things
	110
	111	def download(self):
	112	""" Downloads all the files in a collection """
	113	if not self.things:
3522a3bf OM	114	self.get()
	115
	116	if not self.download_dir:
dbdb1782 OM	117	raise ValueError(
dbdb1782 OM	118	"No download_dir set - invalidly initialised object?")
3522a3bf	119
975060c9	120	base_dir = os.getcwd()
975060c9	121	try:
3522a3bf	122	os.mkdir(self.download_dir)
975060c9	123	except FileExistsError:
fa2f3251	124	logging.info("Target directory {} already exists. Assuming a resume."
dbdb1782	125	.format(self.download_dir))
fa2f3251	126	logging.info("Downloading {} thing(s).".format(self.total))
dbdb1782	127	for idx, thing in enumerate(self.things):
fa2f3251	128	logging.info("Downloading thing {}".format(idx))
3c82f75b	129	Thing(thing).download(self.download_dir)
975060c9	130
dbdb1782	131
3522a3bf OM	132	class Collection(Grouping):
3522a3bf OM	133	""" Holds details of a collection. """
dbdb1782	134
d66f1f78	135	def __init__(self, user, name, directory):
3522a3bf OM	136	Grouping.__init__(self)
	137	self.user = user
	138	self.name = name
3c82f75b OM	139	self.url = "{}/{}/collections/{}".format(
3c82f75b OM	140	URL_BASE, self.user, strip_ws(self.name))
d66f1f78	141	self.download_dir = os.path.join(directory,
3c82f75b	142	"{}-{}".format(slugify(self.user), slugify(self.name)))
948bd56f	143	self.collection_url = URL_COLLECTION
3522a3bf	144
dbdb1782	145
3522a3bf OM	146	class Designs(Grouping):
3522a3bf OM	147	""" Holds details of all of a users' designs. """
dbdb1782	148
d66f1f78	149	def __init__(self, user, directory):
3522a3bf OM	150	Grouping.__init__(self)
	151	self.user = user
	152	self.url = "{}/{}/designs".format(URL_BASE, self.user)
dbdb1782 OM	153	self.download_dir = os.path.join(
dbdb1782 OM	154	directory, "{} designs".format(slugify(self.user)))
948bd56f	155	self.collection_url = USER_COLLECTION
975060c9	156
dbdb1782	157
3c82f75b OM	158	class Thing:
3c82f75b OM	159	""" An individual design on thingiverse. """
dbdb1782	160
3c82f75b OM	161	def __init__(self, thing_id):
	162	self.thing_id = thing_id
	163	self.last_time = None
	164	self._parsed = False
	165	self._needs_download = True
	166	self.text = None
	167	self.title = None
	168	self.download_dir = None
975060c9	169
3c82f75b OM	170	def _parse(self, base_dir):
	171	""" Work out what, if anything needs to be done. """
	172	if self._parsed:
	173	return
e36c2a07	174
3c82f75b OM	175	url = "{}/thing:{}/files".format(URL_BASE, self.thing_id)
	176	req = requests.get(url)
	177	self.text = req.text
	178	soup = BeautifulSoup(self.text, features='lxml')
680039fe OM	179	#import code
680039fe OM	180	#code.interact(local=dict(globals(), **locals()))
3c82f75b OM	181	self.title = slugify(soup.find_all('h1')[0].text.strip())
	182	self.download_dir = os.path.join(base_dir, self.title)
	183
fa2f3251 OM	184	logging.debug("Parsing {} ({})".format(self.thing_id, self.title))
fa2f3251 OM	185
3c82f75b OM	186	if not os.path.exists(self.download_dir):
	187	# Not yet downloaded
	188	self._parsed = True
	189	return
	190
	191	timestamp_file = os.path.join(self.download_dir, 'timestamp.txt')
	192	if not os.path.exists(timestamp_file):
	193	# Old download from before
dbdb1782 OM	194	logging.warning(
dbdb1782 OM	195	"Old-style download directory found. Assuming update required.")
3c82f75b OM	196	self._parsed = True
	197	return
	198
	199	try:
	200	with open(timestamp_file, 'r') as timestamp_handle:
	201	self.last_time = timestamp_handle.readlines()[0]
fa2f3251	202	logging.info("last downloaded version: {}".format(self.last_time))
3c82f75b OM	203	except FileNotFoundError:
3c82f75b OM	204	# Not run on this thing before.
dbdb1782 OM	205	logging.info(
dbdb1782 OM	206	"Old-style download directory found. Assuming update required.")
3c82f75b OM	207	self.last_time = None
	208	self._parsed = True
	209	return
	210
	211	# OK, so we have a timestamp, lets see if there is anything new to get
dbdb1782	212	file_links = soup.find_all('a', {'class': 'file-download'})
3c82f75b OM	213	for file_link in file_links:
3c82f75b OM	214	timestamp = file_link.find_all('time')[0]['datetime']
dbdb1782 OM	215	logging.debug("Checking {} (updated {})".format(
dbdb1782 OM	216	file_link["title"], timestamp))
3c82f75b	217	if timestamp > self.last_time:
dbdb1782 OM	218	logging.info(
dbdb1782 OM	219	"Found new/updated file {}".format(file_link["title"]))
3c82f75b OM	220	self._needs_download = True
	221	self._parsed = True
	222	return
	223	# Got here, so nope, no new files.
3c82f75b OM	224	self._needs_download = False
	225	self._parsed = True
	226
	227	def download(self, base_dir):
	228	""" Download all files for a given thing. """
	229	if not self._parsed:
	230	self._parse(base_dir)
	231
	232	if not self._needs_download:
fa2f3251	233	print("{} already downloaded - skipping.".format(self.title))
3c82f75b OM	234	return
	235
	236	# Have we already downloaded some things?
	237	timestamp_file = os.path.join(self.download_dir, 'timestamp.txt')
	238	prev_dir = None
	239	if os.path.exists(self.download_dir):
	240	if not os.path.exists(timestamp_file):
	241	# edge case: old style dir w/out timestamp.
dbdb1782 OM	242	logging.warning(
	243	"Old style download dir found for {}".format(self.title))
	244	os.rename(self.download_dir,
	245	"{}_old".format(self.download_dir))
3c82f75b OM	246	else:
	247	prev_dir = "{}_{}".format(self.download_dir, self.last_time)
	248	os.rename(self.download_dir, prev_dir)
	249
	250	# Get the list of files to download
	251	soup = BeautifulSoup(self.text, features='lxml')
dbdb1782	252	file_links = soup.find_all('a', {'class': 'file-download'})
3c82f75b OM	253
	254	new_file_links = []
	255	old_file_links = []
	256	new_last_time = None
	257
	258	if not self.last_time:
	259	# If we don't have anything to copy from, then it is all new.
	260	new_file_links = file_links
	261	new_last_time = file_links[0].find_all('time')[0]['datetime']
	262	for file_link in file_links:
	263	timestamp = file_link.find_all('time')[0]['datetime']
dbdb1782 OM	264	logging.debug("Found file {} from {}".format(
dbdb1782 OM	265	file_link["title"], timestamp))
3c82f75b OM	266	if timestamp > new_last_time:
	267	new_last_time = timestamp
	268	else:
	269	for file_link in file_links:
	270	timestamp = file_link.find_all('time')[0]['datetime']
dbdb1782 OM	271	logging.debug("Checking {} (updated {})".format(
dbdb1782 OM	272	file_link["title"], timestamp))
3c82f75b OM	273	if timestamp > self.last_time:
	274	new_file_links.append(file_link)
	275	else:
	276	old_file_links.append(file_link)
	277	if not new_last_time or timestamp > new_last_time:
	278	new_last_time = timestamp
	279
fa2f3251	280	logging.debug("new timestamp {}".format(new_last_time))
3c82f75b OM	281
3c82f75b OM	282	# OK. Time to get to work.
fa2f3251	283	logging.debug("Generating download_dir")
3c82f75b OM	284	os.mkdir(self.download_dir)
3c82f75b OM	285	# First grab the cached files (if any)
fa2f3251	286	logging.info("Copying {} unchanged files.".format(len(old_file_links)))
3c82f75b OM	287	for file_link in old_file_links:
	288	old_file = os.path.join(prev_dir, file_link["title"])
	289	new_file = os.path.join(self.download_dir, file_link["title"])
	290	try:
fa2f3251	291	logging.debug("Copying {} to {}".format(old_file, new_file))
3c82f75b OM	292	copyfile(old_file, new_file)
3c82f75b OM	293	except FileNotFoundError:
dbdb1782 OM	294	logging.warning(
dbdb1782 OM	295	"Unable to find {} in old archive, redownloading".format(file_link["title"]))
3c82f75b OM	296	new_file_links.append(file_link)
	297
	298	# Now download the new ones
dbdb1782 OM	299	files = [("{}{}".format(URL_BASE, x['href']), x["title"])
	300	for x in new_file_links]
	301	logging.info("Downloading {} new files of {}".format(
	302	len(new_file_links), len(file_links)))
3c82f75b OM	303	try:
	304	for url, name in files:
	305	file_name = os.path.join(self.download_dir, name)
dbdb1782 OM	306	logging.debug("Downloading {} from {} to {}".format(
dbdb1782 OM	307	name, url, file_name))
3c82f75b OM	308	data_req = requests.get(url)
	309	with open(file_name, 'wb') as handle:
	310	handle.write(data_req.content)
	311	except Exception as exception:
fa2f3251	312	logging.error("Failed to download {} - {}".format(name, exception))
3c82f75b OM	313	os.rename(self.download_dir, "{}_failed".format(self.download_dir))
	314	return
	315
680039fe OM	316	# People like images
680039fe OM	317	image_dir = os.path.join(self.download_dir, 'images')
dbdb1782 OM	318	imagelinks = soup.find_all('span', {'class': 'gallery-slider'})[0] \
dbdb1782 OM	319	.find_all('div', {'class': 'gallery-photo'})
fa2f3251	320	logging.info("Downloading {} images.".format(len(imagelinks)))
680039fe OM	321	try:
680039fe OM	322	os.mkdir(image_dir)
fa2f3251	323	for imagelink in imagelinks:
680039fe OM	324	url = imagelink['data-full']
	325	filename = os.path.basename(url)
	326	if filename.endswith('stl'):
	327	filename = "{}.png".format(filename)
	328	image_req = requests.get(url)
	329	with open(os.path.join(image_dir, filename), 'wb') as handle:
	330	handle.write(image_req.content)
	331	except Exception as exception:
	332	print("Failed to download {} - {}".format(filename, exception))
	333	os.rename(self.download_dir, "{}_failed".format(self.download_dir))
	334	return
	335
3c82f75b OM	336	try:
	337	# Now write the timestamp
	338	with open(timestamp_file, 'w') as timestamp_handle:
	339	timestamp_handle.write(new_last_time)
	340	except Exception as exception:
	341	print("Failed to write timestamp file - {}".format(exception))
	342	os.rename(self.download_dir, "{}_failed".format(self.download_dir))
	343	return
	344	self._needs_download = False
fa2f3251	345	logging.debug("Download of {} finished".format(self.title))
975060c9	346
dbdb1782	347
1ab49020 OM	348	def do_batch(batch_file, download_dir):
	349	""" Read a file in line by line, parsing each as a set of calls to this script."""
	350	with open(batch_file) as handle:
	351	for line in handle:
	352	line = line.strip()
	353	logging.info("Handling instruction {}".format(line))
	354	command_arr = line.split()
	355	if command_arr[0] == "thing":
dbdb1782 OM	356	logging.debug(
dbdb1782 OM	357	"Handling batch thing instruction: {}".format(line))
1ab49020 OM	358	Thing(command_arr[1]).download(download_dir)
	359	continue
	360	if command_arr[0] == "collection":
dbdb1782 OM	361	logging.debug(
	362	"Handling batch collection instruction: {}".format(line))
	363	Collection(command_arr[1], command_arr[2],
	364	download_dir).download()
1ab49020 OM	365	continue
1ab49020 OM	366	if command_arr[0] == "user":
dbdb1782 OM	367	logging.debug(
dbdb1782 OM	368	"Handling batch collection instruction: {}".format(line))
1ab49020 OM	369	Designs(command_arr[1], download_dir).download()
	370	continue
	371	logging.warning("Unable to parse current instruction. Skipping.")
	372
dbdb1782	373
975060c9 OM	374	def main():
	375	""" Entry point for script being run as a command. """
	376	parser = argparse.ArgumentParser()
dbdb1782 OM	377	parser.add_argument("-l", "--log-level", choices=[
	378	'debug', 'info', 'warning'], default='info', help="level of logging desired")
	379	parser.add_argument("-d", "--directory",
	380	help="Target directory to download into")
	381	subparsers = parser.add_subparsers(
	382	help="Type of thing to download", dest="subcommand")
	383	collection_parser = subparsers.add_parser(
	384	'collection', help="Download an entire collection")
	385	collection_parser.add_argument(
	386	"owner", help="The owner of the collection to get")
	387	collection_parser.add_argument(
	388	"collection", help="The name of the collection to get")
	389	thing_parser = subparsers.add_parser(
	390	'thing', help="Download a single thing.")
4a98996b	391	thing_parser.add_argument("thing", help="Thing ID to download")
dbdb1782 OM	392	user_parser = subparsers.add_parser(
dbdb1782 OM	393	"user", help="Download all things by a user")
3522a3bf	394	user_parser.add_argument("user", help="The user to get the designs of")
dbdb1782 OM	395	batch_parser = subparsers.add_parser(
	396	"batch", help="Perform multiple actions written in a text file")
	397	batch_parser.add_argument(
	398	"batch_file", help="The name of the file to read.")
680039fe	399	subparsers.add_parser("version", help="Show the current version")
4a98996b	400
975060c9	401	args = parser.parse_args()
4a98996b OM	402	if not args.subcommand:
	403	parser.print_help()
	404	sys.exit(1)
d66f1f78 OM	405	if not args.directory:
d66f1f78 OM	406	args.directory = os.getcwd()
fa2f3251 OM	407	logging.basicConfig(level=getattr(logging, args.log_level.upper()))
fa2f3251 OM	408
4a98996b	409	if args.subcommand.startswith("collection"):
1ab49020	410	Collection(args.owner, args.collection, args.directory).download()
4a98996b	411	if args.subcommand == "thing":
d66f1f78	412	Thing(args.thing).download(args.directory)
3522a3bf	413	if args.subcommand == "user":
1ab49020	414	Designs(args.user, args.directory).download()
db8066ec OM	415	if args.subcommand == "version":
db8066ec OM	416	print("thingy_grabber.py version {}".format(VERSION))
1ab49020 OM	417	if args.subcommand == "batch":
	418	do_batch(args.batch_file, args.directory)
	419
975060c9 OM	420
	421	if __name__ == "__main__":
	422	main()