[clinton/thingy_grabber.git] / thingy_grabber.py

#!/usr/bin/env python3
"""
Thingiverse bulk downloader
"""

import re
import sys
import os
import argparse
import unicodedata
import requests
import logging
from shutil import copyfile
from bs4 import BeautifulSoup

URL_BASE = "https://www.thingiverse.com"
URL_COLLECTION = URL_BASE + "/ajax/thingcollection/list_collected_things"
USER_COLLECTION = URL_BASE + "/ajax/user/designs"

ID_REGEX = re.compile(r'"id":(\d*),')
TOTAL_REGEX = re.compile(r'"total":(\d*),')
LAST_PAGE_REGEX = re.compile(r'"last_page":(\d*),')
# This appears to be fixed at 12, but if it changes would screw the rest up.
PER_PAGE_REGEX = re.compile(r'"per_page":(\d*),')
NO_WHITESPACE_REGEX = re.compile(r'[-\s]+')

VERSION = "0.4.0"

def strip_ws(value):
    """ Remove whitespace from a string """
    return str(NO_WHITESPACE_REGEX.sub('-', value))

def slugify(value):
    """
    Normalizes string, converts to lowercase, removes non-alpha characters,
    and converts spaces to hyphens.
    """
    value = unicodedata.normalize('NFKD', value).encode('ascii', 'ignore').decode()
    value = str(re.sub(r'[^\w\s-]', '', value).strip())
    value = str(NO_WHITESPACE_REGEX.sub('-', value))
    #value = str(re.sub(r'[-\s]+', '-', value))
    return value

class Grouping:
    """ Holds details of a group of things for download
        This is effectively (although not actually) an abstract class
        - use Collection or Designs instead.
    """
    def __init__(self):
        self.things = []
        self.total = 0
        self.req_id = None
        self.last_page = 0
        self.per_page = None
        # These should be set by child classes.
        self.url = None
        self.download_dir = None
        self.collection_url = None

    def _get_small_grouping(self, req):
        """ Handle small groupings """
        soup = BeautifulSoup(req.text, features='lxml')
        links = soup.find_all('a', {'class':'card-img-holder'})
        self.things = [x['href'].split(':')[1] for x in links]
        self.total = len(self.things)

        return self.things

    def get(self):
        """ retrieve the things of the grouping. """
        if self.things:
            # We've already done it.
            return self.things

        # Check for initialisation:
        if not self.url:
            logging.error("No URL set - object not initialised properly?")
            raise ValueError("No URL set - object not initialised properly?")

        # Get the internal details of the grouping.
        logging.debug("Querying {}".format(self.url))
        c_req = requests.get(self.url)
        total = TOTAL_REGEX.search(c_req.text)
        if total is None:
            # This is a small (<13) items grouping. Pull the list from this req.
            return self._get_small_grouping(c_req)
        self.total = total.groups()[0]
        self.req_id = ID_REGEX.search(c_req.text).groups()[0]
        self.last_page = int(LAST_PAGE_REGEX.search(c_req.text).groups()[0])
        self.per_page = PER_PAGE_REGEX.search(c_req.text).groups()[0]
        parameters = {
            'base_url':self.url,
            'page':'1',
            'per_page':'12',
            'id':self.req_id
        }
        for current_page in range(1, self.last_page + 1):
            parameters['page'] = current_page
            req = requests.post(self.collection_url, parameters)
            soup = BeautifulSoup(req.text, features='lxml')
            links = soup.find_all('a', {'class':'card-img-holder'})
            self.things += [x['href'].split(':')[1] for x in links]

        return self.things

    def download(self):
        """ Downloads all the files in a collection """
        if not self.things:
            self.get()

        if not self.download_dir:
            raise ValueError("No download_dir set - invalidly initialised object?")

        base_dir = os.getcwd()
        try:
            os.mkdir(self.download_dir)
        except FileExistsError:
            logging.info("Target directory {} already exists. Assuming a resume."
                   .format(self.download_dir))
        logging.info("Downloading {} thing(s).".format(self.total))
        for idx,thing in enumerate(self.things):
            logging.info("Downloading thing {}".format(idx))
            Thing(thing).download(self.download_dir)

class Collection(Grouping):
    """ Holds details of a collection. """
    def __init__(self, user, name, directory):
        Grouping.__init__(self)
        self.user = user
        self.name = name
        self.url = "{}/{}/collections/{}".format(
            URL_BASE, self.user, strip_ws(self.name))
        self.download_dir = os.path.join(directory,
                                         "{}-{}".format(slugify(self.user), slugify(self.name)))
        self.collection_url = URL_COLLECTION

class Designs(Grouping):
    """ Holds details of all of a users' designs. """
    def __init__(self, user, directory):
        Grouping.__init__(self)
        self.user = user
        self.url = "{}/{}/designs".format(URL_BASE, self.user)
        self.download_dir = os.path.join(directory, "{} designs".format(slugify(self.user)))
        self.collection_url = USER_COLLECTION

class Thing:
    """ An individual design on thingiverse. """
    def __init__(self, thing_id):
        self.thing_id = thing_id
        self.last_time = None
        self._parsed = False
        self._needs_download = True
        self.text = None
        self.title = None
        self.download_dir = None

    def _parse(self, base_dir):
        """ Work out what, if anything needs to be done. """
        if self._parsed:
            return

        url = "{}/thing:{}/files".format(URL_BASE, self.thing_id)
        req = requests.get(url)
        self.text = req.text
        soup = BeautifulSoup(self.text, features='lxml')
        #import code
        #code.interact(local=dict(globals(), **locals()))
        self.title = slugify(soup.find_all('h1')[0].text.strip())
        self.download_dir = os.path.join(base_dir, self.title)

        logging.debug("Parsing {} ({})".format(self.thing_id, self.title))

        if not os.path.exists(self.download_dir):
            # Not yet downloaded
            self._parsed = True
            return

        timestamp_file = os.path.join(self.download_dir, 'timestamp.txt')
        if not os.path.exists(timestamp_file):
            # Old download from before
            logging.warning("Old-style download directory found. Assuming update required.")
            self._parsed = True
            return

        try:
            with open(timestamp_file, 'r') as timestamp_handle:
                self.last_time = timestamp_handle.readlines()[0]
            logging.info("last downloaded version: {}".format(self.last_time))
        except FileNotFoundError:
            # Not run on this thing before.
            logging.info("Old-style download directory found. Assuming update required.")
            self.last_time = None
            self._parsed = True
            return

        # OK, so we have a timestamp, lets see if there is anything new to get
        file_links = soup.find_all('a', {'class':'file-download'})
        for file_link in file_links:
            timestamp = file_link.find_all('time')[0]['datetime']
            logging.debug("Checking {} (updated {})".format(file_link["title"], timestamp))
            if timestamp > self.last_time:
                logging.info("Found new/updated file {}".format(file_link["title"]))
                self._needs_download = True
                self._parsed = True
                return
        # Got here, so nope, no new files.
        self._needs_download = False
        self._parsed = True

    def download(self, base_dir):
        """ Download all files for a given thing. """
        if not self._parsed:
            self._parse(base_dir)

        if not self._needs_download:
            print("{} already downloaded - skipping.".format(self.title))
            return

        # Have we already downloaded some things?
        timestamp_file = os.path.join(self.download_dir, 'timestamp.txt')
        prev_dir = None
        if os.path.exists(self.download_dir):
            if not os.path.exists(timestamp_file):
                # edge case: old style dir w/out timestamp.
                logging.warning("Old style download dir found for {}".format(self.title))
                os.rename(self.download_dir, "{}_old".format(self.download_dir))
            else:
                prev_dir = "{}_{}".format(self.download_dir, self.last_time)
                os.rename(self.download_dir, prev_dir)

        # Get the list of files to download
        soup = BeautifulSoup(self.text, features='lxml')
        file_links = soup.find_all('a', {'class':'file-download'})

        new_file_links = []
        old_file_links = []
        new_last_time = None

        if not self.last_time:
            # If we don't have anything to copy from, then it is all new.
            new_file_links = file_links
            new_last_time = file_links[0].find_all('time')[0]['datetime']
            for file_link in file_links:
                timestamp = file_link.find_all('time')[0]['datetime']
                logging.debug("Found file {} from {}".format(file_link["title"], timestamp))
                if timestamp > new_last_time:
                    new_last_time = timestamp
        else:
            for file_link in file_links:
                timestamp = file_link.find_all('time')[0]['datetime']
                logging.debug("Checking {} (updated {})".format(file_link["title"], timestamp))
                if timestamp > self.last_time:
                    new_file_links.append(file_link)
                else:
                    old_file_links.append(file_link)
                if not new_last_time or timestamp > new_last_time:
                    new_last_time = timestamp

        logging.debug("new timestamp {}".format(new_last_time))

        # OK. Time to get to work.
        logging.debug("Generating download_dir")
        os.mkdir(self.download_dir)
        # First grab the cached files (if any)
        logging.info("Copying {} unchanged files.".format(len(old_file_links)))
        for file_link in old_file_links:
            old_file = os.path.join(prev_dir, file_link["title"])
            new_file = os.path.join(self.download_dir, file_link["title"])
            try:
                logging.debug("Copying {} to {}".format(old_file, new_file))
                copyfile(old_file, new_file)
            except FileNotFoundError:
                logging.warning("Unable to find {} in old archive, redownloading".format(file_link["title"]))
                new_file_links.append(file_link)

        # Now download the new ones
        files = [("{}{}".format(URL_BASE, x['href']), x["title"]) for x in new_file_links]
        logging.info("Downloading {} new files of {}".format(len(new_file_links), len(file_links)))
        try:
            for url, name in files:
                file_name = os.path.join(self.download_dir, name)
                logging.debug("Downloading {} from {} to {}".format(name, url, file_name))
                data_req = requests.get(url)
                with open(file_name, 'wb') as handle:
                    handle.write(data_req.content)
        except Exception as exception:
            logging.error("Failed to download {} - {}".format(name, exception))
            os.rename(self.download_dir, "{}_failed".format(self.download_dir))
            return

        # People like images
        image_dir = os.path.join(self.download_dir, 'images')
        imagelinks = soup.find_all('span', {'class':'gallery-slider'})[0] \
                         .find_all('div', {'class':'gallery-photo'})
        logging.info("Downloading {} images.".format(len(imagelinks)))
        try:
            os.mkdir(image_dir)
            for imagelink in imagelinks:
                url = imagelink['data-full']
                filename = os.path.basename(url)
                if filename.endswith('stl'):
                    filename = "{}.png".format(filename)
                image_req = requests.get(url)
                with open(os.path.join(image_dir, filename), 'wb') as handle:
                    handle.write(image_req.content)
        except Exception as exception:
            print("Failed to download {} - {}".format(filename, exception))
            os.rename(self.download_dir, "{}_failed".format(self.download_dir))
            return


        try:
            # Now write the timestamp
            with open(timestamp_file, 'w') as timestamp_handle:
                timestamp_handle.write(new_last_time)
        except Exception as exception:
            print("Failed to write timestamp file - {}".format(exception))
            os.rename(self.download_dir, "{}_failed".format(self.download_dir))
            return
        self._needs_download = False
        logging.debug("Download of {} finished".format(self.title))

def do_batch(batch_file, download_dir):
    """ Read a file in line by line, parsing each as a set of calls to this script."""
    with open(batch_file) as handle:
        for line in handle:
            line = line.strip()
            logging.info("Handling instruction {}".format(line))
            command_arr = line.split()
            if command_arr[0] == "thing":
                logging.debug("Handling batch thing instruction: {}".format(line))
                Thing(command_arr[1]).download(download_dir)
                continue
            if command_arr[0] == "collection":
                logging.debug("Handling batch collection instruction: {}".format(line))
                Collection(command_arr[1], command_arr[2], download_dir).download()
                continue
            if command_arr[0] == "user":
                logging.debug("Handling batch collection instruction: {}".format(line))
                Designs(command_arr[1], download_dir).download()
                continue
            logging.warning("Unable to parse current instruction. Skipping.")

def main():
    """ Entry point for script being run as a command. """
    parser = argparse.ArgumentParser()
    parser.add_argument("-l", "--log-level", choices=['debug','info','warning'], default='info', help="level of logging desired")
    parser.add_argument("-d", "--directory", help="Target directory to download into")
    subparsers = parser.add_subparsers(help="Type of thing to download", dest="subcommand")
    collection_parser = subparsers.add_parser('collection', help="Download an entire collection")
    collection_parser.add_argument("owner", help="The owner of the collection to get")
    collection_parser.add_argument("collection", help="The name of the collection to get")
    thing_parser = subparsers.add_parser('thing', help="Download a single thing.")
    thing_parser.add_argument("thing", help="Thing ID to download")
    user_parser = subparsers.add_parser("user", help="Download all things by a user")
    user_parser.add_argument("user", help="The user to get the designs of")
    batch_parser = subparsers.add_parser("batch", help="Perform multiple actions written in a text file")
    batch_parser.add_argument("batch_file", help="The name of the file to read.")
    subparsers.add_parser("version", help="Show the current version")

    args = parser.parse_args()
    if not args.subcommand:
        parser.print_help()
        sys.exit(1)
    if not args.directory:
        args.directory = os.getcwd()
    logging.basicConfig(level=getattr(logging, args.log_level.upper()))


    if args.subcommand.startswith("collection"):
        Collection(args.owner, args.collection, args.directory).download()
    if args.subcommand == "thing":
        Thing(args.thing).download(args.directory)
    if args.subcommand == "user":
        Designs(args.user, args.directory).download()
    if args.subcommand == "version":
        print("thingy_grabber.py version {}".format(VERSION))
    if args.subcommand == "batch":
        do_batch(args.batch_file, args.directory)


if __name__ == "__main__":
    main()
Commit	Line	Data
975060c9 OM	1	#!/usr/bin/env python3
	2	"""
	3	Thingiverse bulk downloader
	4	"""
	5
	6	import re
4a98996b	7	import sys
975060c9 OM	8	import os
	9	import argparse
	10	import unicodedata
	11	import requests
fa2f3251	12	import logging
3c82f75b	13	from shutil import copyfile
975060c9 OM	14	from bs4 import BeautifulSoup
	15
	16	URL_BASE = "https://www.thingiverse.com"
	17	URL_COLLECTION = URL_BASE + "/ajax/thingcollection/list_collected_things"
948bd56f	18	USER_COLLECTION = URL_BASE + "/ajax/user/designs"
975060c9 OM	19
	20	ID_REGEX = re.compile(r'"id":(\d*),')
	21	TOTAL_REGEX = re.compile(r'"total":(\d*),')
	22	LAST_PAGE_REGEX = re.compile(r'"last_page":(\d*),')
	23	# This appears to be fixed at 12, but if it changes would screw the rest up.
	24	PER_PAGE_REGEX = re.compile(r'"per_page":(\d*),')
dd8c35f4 OM	25	NO_WHITESPACE_REGEX = re.compile(r'[-\s]+')
dd8c35f4 OM	26
db8066ec OM	27	VERSION = "0.4.0"
db8066ec OM	28
dd8c35f4 OM	29	def strip_ws(value):
	30	""" Remove whitespace from a string """
	31	return str(NO_WHITESPACE_REGEX.sub('-', value))
975060c9 OM	32
	33	def slugify(value):
	34	"""
	35	Normalizes string, converts to lowercase, removes non-alpha characters,
	36	and converts spaces to hyphens.
	37	"""
	38	value = unicodedata.normalize('NFKD', value).encode('ascii', 'ignore').decode()
	39	value = str(re.sub(r'[^\w\s-]', '', value).strip())
dd8c35f4 OM	40	value = str(NO_WHITESPACE_REGEX.sub('-', value))
dd8c35f4 OM	41	#value = str(re.sub(r'[-\s]+', '-', value))
975060c9 OM	42	return value
975060c9 OM	43
3522a3bf	44	class Grouping:
d66f1f78	45	""" Holds details of a group of things for download
3c82f75b OM	46	This is effectively (although not actually) an abstract class
	47	- use Collection or Designs instead.
	48	"""
3522a3bf	49	def __init__(self):
975060c9 OM	50	self.things = []
	51	self.total = 0
	52	self.req_id = None
	53	self.last_page = 0
	54	self.per_page = None
948bd56f	55	# These should be set by child classes.
3522a3bf OM	56	self.url = None
3522a3bf OM	57	self.download_dir = None
948bd56f	58	self.collection_url = None
975060c9	59
3522a3bf OM	60	def _get_small_grouping(self, req):
3522a3bf OM	61	""" Handle small groupings """
975060c9 OM	62	soup = BeautifulSoup(req.text, features='lxml')
	63	links = soup.find_all('a', {'class':'card-img-holder'})
	64	self.things = [x['href'].split(':')[1] for x in links]
fa2f3251	65	self.total = len(self.things)
975060c9 OM	66
	67	return self.things
	68
3522a3bf OM	69	def get(self):
3522a3bf OM	70	""" retrieve the things of the grouping. """
975060c9 OM	71	if self.things:
	72	# We've already done it.
	73	return self.things
	74
3522a3bf OM	75	# Check for initialisation:
3522a3bf OM	76	if not self.url:
fa2f3251	77	logging.error("No URL set - object not initialised properly?")
3522a3bf OM	78	raise ValueError("No URL set - object not initialised properly?")
	79
	80	# Get the internal details of the grouping.
fa2f3251	81	logging.debug("Querying {}".format(self.url))
3522a3bf	82	c_req = requests.get(self.url)
975060c9 OM	83	total = TOTAL_REGEX.search(c_req.text)
975060c9 OM	84	if total is None:
3522a3bf OM	85	# This is a small (<13) items grouping. Pull the list from this req.
3522a3bf OM	86	return self._get_small_grouping(c_req)
975060c9 OM	87	self.total = total.groups()[0]
	88	self.req_id = ID_REGEX.search(c_req.text).groups()[0]
	89	self.last_page = int(LAST_PAGE_REGEX.search(c_req.text).groups()[0])
	90	self.per_page = PER_PAGE_REGEX.search(c_req.text).groups()[0]
	91	parameters = {
3522a3bf	92	'base_url':self.url,
975060c9 OM	93	'page':'1',
	94	'per_page':'12',
	95	'id':self.req_id
	96	}
	97	for current_page in range(1, self.last_page + 1):
	98	parameters['page'] = current_page
948bd56f	99	req = requests.post(self.collection_url, parameters)
975060c9 OM	100	soup = BeautifulSoup(req.text, features='lxml')
	101	links = soup.find_all('a', {'class':'card-img-holder'})
	102	self.things += [x['href'].split(':')[1] for x in links]
	103
	104	return self.things
	105
	106	def download(self):
	107	""" Downloads all the files in a collection """
	108	if not self.things:
3522a3bf OM	109	self.get()
	110
	111	if not self.download_dir:
	112	raise ValueError("No download_dir set - invalidly initialised object?")
	113
975060c9	114	base_dir = os.getcwd()
975060c9	115	try:
3522a3bf	116	os.mkdir(self.download_dir)
975060c9	117	except FileExistsError:
fa2f3251 OM	118	logging.info("Target directory {} already exists. Assuming a resume."
	119	.format(self.download_dir))
	120	logging.info("Downloading {} thing(s).".format(self.total))
	121	for idx,thing in enumerate(self.things):
	122	logging.info("Downloading thing {}".format(idx))
3c82f75b	123	Thing(thing).download(self.download_dir)
975060c9	124
3522a3bf OM	125	class Collection(Grouping):
3522a3bf OM	126	""" Holds details of a collection. """
d66f1f78	127	def __init__(self, user, name, directory):
3522a3bf OM	128	Grouping.__init__(self)
	129	self.user = user
	130	self.name = name
3c82f75b OM	131	self.url = "{}/{}/collections/{}".format(
3c82f75b OM	132	URL_BASE, self.user, strip_ws(self.name))
d66f1f78	133	self.download_dir = os.path.join(directory,
3c82f75b	134	"{}-{}".format(slugify(self.user), slugify(self.name)))
948bd56f	135	self.collection_url = URL_COLLECTION
3522a3bf OM	136
	137	class Designs(Grouping):
	138	""" Holds details of all of a users' designs. """
d66f1f78	139	def __init__(self, user, directory):
3522a3bf OM	140	Grouping.__init__(self)
	141	self.user = user
	142	self.url = "{}/{}/designs".format(URL_BASE, self.user)
d66f1f78	143	self.download_dir = os.path.join(directory, "{} designs".format(slugify(self.user)))
948bd56f	144	self.collection_url = USER_COLLECTION
975060c9	145
3c82f75b OM	146	class Thing:
	147	""" An individual design on thingiverse. """
	148	def __init__(self, thing_id):
	149	self.thing_id = thing_id
	150	self.last_time = None
	151	self._parsed = False
	152	self._needs_download = True
	153	self.text = None
	154	self.title = None
	155	self.download_dir = None
975060c9	156
3c82f75b OM	157	def _parse(self, base_dir):
	158	""" Work out what, if anything needs to be done. """
	159	if self._parsed:
	160	return
e36c2a07	161
3c82f75b OM	162	url = "{}/thing:{}/files".format(URL_BASE, self.thing_id)
	163	req = requests.get(url)
	164	self.text = req.text
	165	soup = BeautifulSoup(self.text, features='lxml')
680039fe OM	166	#import code
680039fe OM	167	#code.interact(local=dict(globals(), **locals()))
3c82f75b OM	168	self.title = slugify(soup.find_all('h1')[0].text.strip())
	169	self.download_dir = os.path.join(base_dir, self.title)
	170
fa2f3251 OM	171	logging.debug("Parsing {} ({})".format(self.thing_id, self.title))
fa2f3251 OM	172
3c82f75b OM	173	if not os.path.exists(self.download_dir):
	174	# Not yet downloaded
	175	self._parsed = True
	176	return
	177
	178	timestamp_file = os.path.join(self.download_dir, 'timestamp.txt')
	179	if not os.path.exists(timestamp_file):
	180	# Old download from before
fa2f3251	181	logging.warning("Old-style download directory found. Assuming update required.")
3c82f75b OM	182	self._parsed = True
	183	return
	184
	185	try:
	186	with open(timestamp_file, 'r') as timestamp_handle:
	187	self.last_time = timestamp_handle.readlines()[0]
fa2f3251	188	logging.info("last downloaded version: {}".format(self.last_time))
3c82f75b OM	189	except FileNotFoundError:
3c82f75b OM	190	# Not run on this thing before.
fa2f3251	191	logging.info("Old-style download directory found. Assuming update required.")
3c82f75b OM	192	self.last_time = None
	193	self._parsed = True
	194	return
	195
	196	# OK, so we have a timestamp, lets see if there is anything new to get
	197	file_links = soup.find_all('a', {'class':'file-download'})
	198	for file_link in file_links:
	199	timestamp = file_link.find_all('time')[0]['datetime']
fa2f3251	200	logging.debug("Checking {} (updated {})".format(file_link["title"], timestamp))
3c82f75b	201	if timestamp > self.last_time:
fa2f3251	202	logging.info("Found new/updated file {}".format(file_link["title"]))
3c82f75b OM	203	self._needs_download = True
	204	self._parsed = True
	205	return
	206	# Got here, so nope, no new files.
3c82f75b OM	207	self._needs_download = False
	208	self._parsed = True
	209
	210	def download(self, base_dir):
	211	""" Download all files for a given thing. """
	212	if not self._parsed:
	213	self._parse(base_dir)
	214
	215	if not self._needs_download:
fa2f3251	216	print("{} already downloaded - skipping.".format(self.title))
3c82f75b OM	217	return
	218
	219	# Have we already downloaded some things?
	220	timestamp_file = os.path.join(self.download_dir, 'timestamp.txt')
	221	prev_dir = None
	222	if os.path.exists(self.download_dir):
	223	if not os.path.exists(timestamp_file):
	224	# edge case: old style dir w/out timestamp.
fa2f3251	225	logging.warning("Old style download dir found for {}".format(self.title))
3c82f75b OM	226	os.rename(self.download_dir, "{}_old".format(self.download_dir))
	227	else:
	228	prev_dir = "{}_{}".format(self.download_dir, self.last_time)
	229	os.rename(self.download_dir, prev_dir)
	230
	231	# Get the list of files to download
	232	soup = BeautifulSoup(self.text, features='lxml')
	233	file_links = soup.find_all('a', {'class':'file-download'})
	234
	235	new_file_links = []
	236	old_file_links = []
	237	new_last_time = None
	238
	239	if not self.last_time:
	240	# If we don't have anything to copy from, then it is all new.
	241	new_file_links = file_links
	242	new_last_time = file_links[0].find_all('time')[0]['datetime']
	243	for file_link in file_links:
	244	timestamp = file_link.find_all('time')[0]['datetime']
fa2f3251	245	logging.debug("Found file {} from {}".format(file_link["title"], timestamp))
3c82f75b OM	246	if timestamp > new_last_time:
	247	new_last_time = timestamp
	248	else:
	249	for file_link in file_links:
	250	timestamp = file_link.find_all('time')[0]['datetime']
fa2f3251	251	logging.debug("Checking {} (updated {})".format(file_link["title"], timestamp))
3c82f75b OM	252	if timestamp > self.last_time:
	253	new_file_links.append(file_link)
	254	else:
	255	old_file_links.append(file_link)
	256	if not new_last_time or timestamp > new_last_time:
	257	new_last_time = timestamp
	258
fa2f3251	259	logging.debug("new timestamp {}".format(new_last_time))
3c82f75b OM	260
3c82f75b OM	261	# OK. Time to get to work.
fa2f3251	262	logging.debug("Generating download_dir")
3c82f75b OM	263	os.mkdir(self.download_dir)
3c82f75b OM	264	# First grab the cached files (if any)
fa2f3251	265	logging.info("Copying {} unchanged files.".format(len(old_file_links)))
3c82f75b OM	266	for file_link in old_file_links:
	267	old_file = os.path.join(prev_dir, file_link["title"])
	268	new_file = os.path.join(self.download_dir, file_link["title"])
	269	try:
fa2f3251	270	logging.debug("Copying {} to {}".format(old_file, new_file))
3c82f75b OM	271	copyfile(old_file, new_file)
3c82f75b OM	272	except FileNotFoundError:
fa2f3251	273	logging.warning("Unable to find {} in old archive, redownloading".format(file_link["title"]))
3c82f75b OM	274	new_file_links.append(file_link)
	275
	276	# Now download the new ones
	277	files = [("{}{}".format(URL_BASE, x['href']), x["title"]) for x in new_file_links]
fa2f3251	278	logging.info("Downloading {} new files of {}".format(len(new_file_links), len(file_links)))
3c82f75b OM	279	try:
	280	for url, name in files:
	281	file_name = os.path.join(self.download_dir, name)
fa2f3251	282	logging.debug("Downloading {} from {} to {}".format(name, url, file_name))
3c82f75b OM	283	data_req = requests.get(url)
	284	with open(file_name, 'wb') as handle:
	285	handle.write(data_req.content)
	286	except Exception as exception:
fa2f3251	287	logging.error("Failed to download {} - {}".format(name, exception))
3c82f75b OM	288	os.rename(self.download_dir, "{}_failed".format(self.download_dir))
	289	return
	290
680039fe OM	291	# People like images
680039fe OM	292	image_dir = os.path.join(self.download_dir, 'images')
fa2f3251 OM	293	imagelinks = soup.find_all('span', {'class':'gallery-slider'})[0] \
	294	.find_all('div', {'class':'gallery-photo'})
	295	logging.info("Downloading {} images.".format(len(imagelinks)))
680039fe OM	296	try:
680039fe OM	297	os.mkdir(image_dir)
fa2f3251	298	for imagelink in imagelinks:
680039fe OM	299	url = imagelink['data-full']
	300	filename = os.path.basename(url)
	301	if filename.endswith('stl'):
	302	filename = "{}.png".format(filename)
	303	image_req = requests.get(url)
	304	with open(os.path.join(image_dir, filename), 'wb') as handle:
	305	handle.write(image_req.content)
	306	except Exception as exception:
	307	print("Failed to download {} - {}".format(filename, exception))
	308	os.rename(self.download_dir, "{}_failed".format(self.download_dir))
	309	return
	310
	311
	312
	313
3c82f75b OM	314	try:
	315	# Now write the timestamp
	316	with open(timestamp_file, 'w') as timestamp_handle:
	317	timestamp_handle.write(new_last_time)
	318	except Exception as exception:
	319	print("Failed to write timestamp file - {}".format(exception))
	320	os.rename(self.download_dir, "{}_failed".format(self.download_dir))
	321	return
	322	self._needs_download = False
fa2f3251	323	logging.debug("Download of {} finished".format(self.title))
975060c9	324
1ab49020 OM	325	def do_batch(batch_file, download_dir):
	326	""" Read a file in line by line, parsing each as a set of calls to this script."""
	327	with open(batch_file) as handle:
	328	for line in handle:
	329	line = line.strip()
	330	logging.info("Handling instruction {}".format(line))
	331	command_arr = line.split()
	332	if command_arr[0] == "thing":
	333	logging.debug("Handling batch thing instruction: {}".format(line))
	334	Thing(command_arr[1]).download(download_dir)
	335	continue
	336	if command_arr[0] == "collection":
	337	logging.debug("Handling batch collection instruction: {}".format(line))
	338	Collection(command_arr[1], command_arr[2], download_dir).download()
	339	continue
	340	if command_arr[0] == "user":
	341	logging.debug("Handling batch collection instruction: {}".format(line))
	342	Designs(command_arr[1], download_dir).download()
	343	continue
	344	logging.warning("Unable to parse current instruction. Skipping.")
	345
975060c9 OM	346	def main():
	347	""" Entry point for script being run as a command. """
	348	parser = argparse.ArgumentParser()
fa2f3251	349	parser.add_argument("-l", "--log-level", choices=['debug','info','warning'], default='info', help="level of logging desired")
d66f1f78	350	parser.add_argument("-d", "--directory", help="Target directory to download into")
4a98996b OM	351	subparsers = parser.add_subparsers(help="Type of thing to download", dest="subcommand")
	352	collection_parser = subparsers.add_parser('collection', help="Download an entire collection")
	353	collection_parser.add_argument("owner", help="The owner of the collection to get")
	354	collection_parser.add_argument("collection", help="The name of the collection to get")
	355	thing_parser = subparsers.add_parser('thing', help="Download a single thing.")
	356	thing_parser.add_argument("thing", help="Thing ID to download")
3522a3bf OM	357	user_parser = subparsers.add_parser("user", help="Download all things by a user")
3522a3bf OM	358	user_parser.add_argument("user", help="The user to get the designs of")
1ab49020 OM	359	batch_parser = subparsers.add_parser("batch", help="Perform multiple actions written in a text file")
1ab49020 OM	360	batch_parser.add_argument("batch_file", help="The name of the file to read.")
680039fe	361	subparsers.add_parser("version", help="Show the current version")
4a98996b	362
975060c9	363	args = parser.parse_args()
4a98996b OM	364	if not args.subcommand:
	365	parser.print_help()
	366	sys.exit(1)
d66f1f78 OM	367	if not args.directory:
d66f1f78 OM	368	args.directory = os.getcwd()
fa2f3251 OM	369	logging.basicConfig(level=getattr(logging, args.log_level.upper()))
fa2f3251 OM	370
d66f1f78	371
4a98996b	372	if args.subcommand.startswith("collection"):
1ab49020	373	Collection(args.owner, args.collection, args.directory).download()
4a98996b	374	if args.subcommand == "thing":
d66f1f78	375	Thing(args.thing).download(args.directory)
3522a3bf	376	if args.subcommand == "user":
1ab49020	377	Designs(args.user, args.directory).download()
db8066ec OM	378	if args.subcommand == "version":
db8066ec OM	379	print("thingy_grabber.py version {}".format(VERSION))
1ab49020 OM	380	if args.subcommand == "batch":
	381	do_batch(args.batch_file, args.directory)
	382
975060c9 OM	383
	384	if __name__ == "__main__":
	385	main()