HCoop Git - clinton/thingy_grabber.git/blame_incremental

... / ...

Commit	Line	Data
	1	#!/usr/bin/env python3
	2	"""
	3	Thingiverse bulk downloader
	4	"""
	5
	6	import re
	7	import sys
	8	import os
	9	import argparse
	10	import unicodedata
	11	import requests
	12	import logging
	13	import multiprocessing
	14	import enum
	15	import datetime
	16	from shutil import copyfile
	17	from bs4 import BeautifulSoup
	18	from dataclasses import dataclass
	19	import selenium
	20	from selenium import webdriver
	21	from selenium.webdriver.common.by import By
	22	from selenium.webdriver.support.ui import WebDriverWait
	23	from selenium.webdriver.support import expected_conditions as EC
	24	from selenium.webdriver.firefox.options import Options
	25	import atexit
	26	import py7zr
	27
	28	SEVENZIP_FILTERS = [{'id': py7zr.FILTER_LZMA2}]
	29
	30	URL_BASE = "https://www.thingiverse.com"
	31	URL_COLLECTION = URL_BASE + "/ajax/thingcollection/list_collected_things"
	32	USER_COLLECTION = URL_BASE + "/ajax/user/designs"
	33
	34	ID_REGEX = re.compile(r'"id":(\d*),')
	35	TOTAL_REGEX = re.compile(r'"total":(\d*),')
	36	LAST_PAGE_REGEX = re.compile(r'"last_page":(\d*),')
	37	# This appears to be fixed at 12, but if it changes would screw the rest up.
	38	PER_PAGE_REGEX = re.compile(r'"per_page":(\d*),')
	39	NO_WHITESPACE_REGEX = re.compile(r'[-\s]+')
	40
	41	DOWNLOADER_COUNT = 1
	42	RETRY_COUNT = 3
	43
	44	MAX_PATH_LENGTH = 250
	45
	46	VERSION = "0.9.0"
	47
	48
	49	#BROWSER = webdriver.PhantomJS('./phantomjs')
	50	options = Options()
	51	options.add_argument("--headless")
	52	BROWSER = webdriver.Firefox(options=options)
	53
	54	BROWSER.set_window_size(1980, 1080)
	55
	56
	57	@dataclass
	58	class FileLink:
	59	name: str
	60	last_update: datetime.datetime
	61	link: str
	62
	63	class FileLinks:
	64	def __init__(self, initial_links=[]):
	65	self.links = []
	66	self.last_update = None
	67	for link in initial_links:
	68	self.append(link)
	69
	70	def __iter__(self):
	71	return iter(self.links)
	72
	73	def __getitem__(self, item):
	74	return self.links[item]
	75
	76	def __len__(self):
	77	return len(self.links)
	78
	79	def append(self, link):
	80	try:
	81	self.last_update = max(self.last_update, link.last_update)
	82	except TypeError:
	83	self.last_update = link.last_update
	84	self.links.append(link)
	85
	86
	87	class State(enum.Enum):
	88	OK = enum.auto()
	89	FAILED = enum.auto()
	90	ALREADY_DOWNLOADED = enum.auto()
	91
	92
	93	def fail_dir(dir_name):
	94	""" When a download has failed, move it sideways.
	95	"""
	96	target_dir = "{}_failed".format(dir_name)
	97	inc = 0
	98	while os.path.exists(target_dir):
	99	target_dir = "{}_failed_{}".format(dir_name, inc)
	100	inc += 1
	101	os.rename(dir_name, target_dir)
	102
	103
	104	def truncate_name(file_name):
	105	""" Ensure the filename is not too long for, well windows basically.
	106	"""
	107	path = os.path.abspath(file_name)
	108	if len(path) <= MAX_PATH_LENGTH:
	109	return path
	110	to_cut = len(path) - (MAX_PATH_LENGTH + 3)
	111	base, extension = os.path.splitext(path)
	112	inc = 0
	113	new_path = "{}_{}{}".format(base, inc, extension)
	114	while os.path.exists(new_path):
	115	new_path = "{}_{}{}".format(base, inc, extension)
	116	inc += 1
	117	return new_path
	118
	119
	120	def strip_ws(value):
	121	""" Remove whitespace from a string """
	122	return str(NO_WHITESPACE_REGEX.sub('-', value))
	123
	124
	125	def slugify(value):
	126	"""
	127	Normalise string, removes invalid for filename charactersr
	128	and converts string to lowercase.
	129	"""
	130	value = unicodedata.normalize('NFKC', value).lower().strip()
	131	value = re.sub(r'[\\/<>:\?\*\\|"]', '', value)
	132	value = re.sub(r'\.*$', '', value)
	133	return value
	134
	135	class PageChecker(object):
	136	def __init__(self):
	137	self.log = []
	138	self.title = None
	139	self.file_count = None
	140	self.files = None
	141	self.images = None
	142	self.license = None
	143
	144
	145	def __call__(self, _):
	146	try:
	147	self.log.append("call")
	148	if self.title is None:
	149	# first find the name
	150	name = EC._find_element(BROWSER, (By.CSS_SELECTOR, "[class^=ThingPage__modelName]"))
	151	if name is None:
	152	return False
	153	self.title = name.text
	154
	155	if self.file_count is None:
	156	# OK. Do we know how many files we have to download?
	157	metrics = EC._find_elements(BROWSER, (By.CSS_SELECTOR, "[class^=MetricButton]"))
	158	self.log.append("got some metrics: {}".format(len(metrics)))
	159	cur_count = int([x.text.split("\n")[0] for x in metrics if x.text.endswith("\nThing Files")][0])
	160	self.log.append(cur_count)
	161	if cur_count == 0:
	162	return False
	163	self.file_count = cur_count
	164
	165	self.log.append("looking for {} files".format(self.file_count))
	166	fileRows = EC._find_elements(BROWSER, (By.CSS_SELECTOR, "[class^=ThingFile__fileRow]"))
	167	self.log.append("found {} files".format(len(fileRows)))
	168	if len(fileRows) < self.file_count:
	169	return False
	170
	171	self.log.append("Looking for images")
	172	# By this point _should_ have loaded all the images
	173	self.images = EC._find_elements(BROWSER, (By.CSS_SELECTOR, "[class^=thumb]"))
	174	self.license = EC._find_element(BROWSER, (By.CSS_SELECTOR, "[class^=License__licenseText]")).text
	175	self.log.append("found {} images".format(len(self.images)))
	176	self.files = fileRows
	177	return True
	178	except Exception:
	179	return False
	180
	181
	182
	183
	184	class Downloader(multiprocessing.Process):
	185	"""
	186	Class to handle downloading the things we have found to get.
	187	"""
	188
	189	def __init__(self, thing_queue, download_directory, compress):
	190	multiprocessing.Process.__init__(self)
	191	# TODO: add parameters
	192	self.thing_queue = thing_queue
	193	self.download_directory = download_directory
	194	self.compress = compress
	195
	196	def run(self):
	197	""" actual download loop.
	198	"""
	199	while True:
	200	thing_id = self.thing_queue.get()
	201	if thing_id is None:
	202	logging.info("Shutting download queue")
	203	self.thing_queue.task_done()
	204	break
	205	logging.info("Handling id {}".format(thing_id))
	206	Thing(thing_id).download(self.download_directory, self.compress)
	207	self.thing_queue.task_done()
	208	return
	209
	210
	211
	212
	213
	214	class Grouping:
	215	""" Holds details of a group of things for download
	216	This is effectively (although not actually) an abstract class
	217	- use Collection or Designs instead.
	218	"""
	219
	220	def __init__(self, quick, compress):
	221	self.things = []
	222	self.total = 0
	223	self.req_id = None
	224	self.last_page = 0
	225	self.per_page = None
	226	# Should we stop downloading when we hit a known datestamp?
	227	self.quick = quick
	228	self.compress = compress
	229	# These should be set by child classes.
	230	self.url = None
	231	self.download_dir = None
	232	self.collection_url = None
	233
	234	def _get_small_grouping(self, req):
	235	""" Handle small groupings """
	236	soup = BeautifulSoup(req.text, features='lxml')
	237	links = soup.find_all('a', {'class': 'card-img-holder'})
	238	self.things = [x['href'].split(':')[1] for x in links]
	239	self.total = len(self.things)
	240
	241	return self.things
	242
	243	def get(self):
	244	""" retrieve the things of the grouping. """
	245	if self.things:
	246	# We've already done it.
	247	return self.things
	248
	249	# Check for initialisation:
	250	if not self.url:
	251	logging.error("No URL set - object not initialised properly?")
	252	raise ValueError("No URL set - object not initialised properly?")
	253
	254	# Get the internal details of the grouping.
	255	logging.debug("Querying {}".format(self.url))
	256	c_req = requests.get(self.url)
	257	total = TOTAL_REGEX.search(c_req.text)
	258	if total is None:
	259	# This is a small (<13) items grouping. Pull the list from this req.
	260	return self._get_small_grouping(c_req)
	261	self.total = total.groups()[0]
	262	self.req_id = ID_REGEX.search(c_req.text).groups()[0]
	263	self.last_page = int(LAST_PAGE_REGEX.search(c_req.text).groups()[0])
	264	self.per_page = PER_PAGE_REGEX.search(c_req.text).groups()[0]
	265	parameters = {
	266	'base_url': self.url,
	267	'page': '1',
	268	'per_page': '12',
	269	'id': self.req_id
	270	}
	271	for current_page in range(1, self.last_page + 1):
	272	parameters['page'] = current_page
	273	req = requests.post(self.collection_url, parameters)
	274	soup = BeautifulSoup(req.text, features='lxml')
	275	links = soup.find_all('a', {'class': 'card-img-holder'})
	276	self.things += [x['href'].split(':')[1] for x in links]
	277
	278	return self.things
	279
	280	def download(self):
	281	""" Downloads all the files in a collection """
	282	if not self.things:
	283	self.get()
	284
	285	if not self.download_dir:
	286	raise ValueError(
	287	"No download_dir set - invalidly initialised object?")
	288
	289	base_dir = os.getcwd()
	290	try:
	291	os.mkdir(self.download_dir)
	292	except FileExistsError:
	293	logging.info("Target directory {} already exists. Assuming a resume."
	294	.format(self.download_dir))
	295	logging.info("Downloading {} thing(s).".format(self.total))
	296	for idx, thing in enumerate(self.things):
	297	logging.info("Downloading thing {} - {}".format(idx, thing))
	298	RC = Thing(thing).download(self.download_dir, self.compress)
	299	if self.quick and RC==State.ALREADY_DOWNLOADED:
	300	logging.info("Caught up, stopping.")
	301	return
	302
	303
	304
	305
	306
	307	class Collection(Grouping):
	308	""" Holds details of a collection. """
	309
	310	def __init__(self, user, name, directory, quick, compress):
	311	Grouping.__init__(self, quick, compress)
	312	self.user = user
	313	self.name = name
	314	self.url = "{}/{}/collections/{}".format(
	315	URL_BASE, self.user, strip_ws(self.name))
	316	self.download_dir = os.path.join(directory,
	317	"{}-{}".format(slugify(self.user), slugify(self.name)))
	318	self.collection_url = URL_COLLECTION
	319
	320
	321	class Designs(Grouping):
	322	""" Holds details of all of a users' designs. """
	323
	324	def __init__(self, user, directory, quick, compress):
	325	Grouping.__init__(self, quick, compress)
	326	self.user = user
	327	self.url = "{}/{}/designs".format(URL_BASE, self.user)
	328	self.download_dir = os.path.join(
	329	directory, "{} designs".format(slugify(self.user)))
	330	self.collection_url = USER_COLLECTION
	331
	332
	333	class Thing:
	334	""" An individual design on thingiverse. """
	335
	336	def __init__(self, thing_id):
	337	self.thing_id = thing_id
	338	self.last_time = None
	339	self._parsed = False
	340	self._needs_download = True
	341	self.text = None
	342	self.title = None
	343	self.download_dir = None
	344	self.time_stamp = None
	345	self._file_links = FileLinks()
	346
	347	def _parse(self, base_dir):
	348	""" Work out what, if anything needs to be done. """
	349	if self._parsed:
	350	return
	351
	352	url = "{}/thing:{}/files".format(URL_BASE, self.thing_id)
	353	try:
	354	BROWSER.get(url)
	355	wait = WebDriverWait(BROWSER, 60)
	356	pc = PageChecker()
	357	wait.until(pc)
	358	except requests.exceptions.ConnectionError as error:
	359	logging.error("Unable to connect for thing {}: {}".format(
	360	self.thing_id, error))
	361	return
	362	except selenium.common.exceptions.TimeoutException:
	363	logging.error(pc.log)
	364	logging.error("Timeout trying to parse thing {}".format(self.thing_id))
	365	return
	366
	367	self.title = pc.title
	368	if not pc.files:
	369	logging.error("No files found for thing {} - probably thingiverse being broken, try again later".format(self.thing_id))
	370	for link in pc.files:
	371	logging.debug("Parsing link: {}".format(link.text))
	372	link_link = link.find_element_by_xpath(".//a").get_attribute("href")
	373	if link_link.endswith("/zip"):
	374	# bulk link.
	375	continue
	376	try:
	377	link_title, link_details, _ = link.text.split("\n")
	378	except ValueError:
	379	# If it is a filetype that doesn't generate a picture, then we get an extra field at the start.
	380	_, link_title, link_details, _ = link.text.split("\n")
	381
	382	#link_details will be something like '461 kb \| Updated 06-11-2019 \| 373 Downloads'
	383	#need to convert from M D Y to Y M D
	384	link_date = [int(x) for x in link_details.split("\|")[1].split()[-1].split("-")]
	385	try:
	386	self._file_links.append(FileLink(link_title, datetime.datetime(link_date[2], link_date[0], link_date[1]), link_link))
	387	except ValueError:
	388	logging.error(link_date)
	389
	390	self._image_links=[x.find_element_by_xpath(".//img").get_attribute("src") for x in pc.images]
	391	self._license = pc.license
	392	self.pc = pc
	393
	394
	395	self.old_download_dir = os.path.join(base_dir, slugify(self.title))
	396	self.download_dir = os.path.join(base_dir, "{} - {}".format(self.thing_id, slugify(self.title)))
	397
	398	logging.debug("Parsing {} ({})".format(self.thing_id, self.title))
	399
	400	if not os.path.exists(self.download_dir):
	401	logging.info("Looking for old dir at {}".format(self.old_download_dir))
	402	if os.path.exists(self.old_download_dir):
	403	logging.warning("Found previous style download directory. Moving it from {} to {}".format(self.old_download_dir, self.download_dir))
	404	os.rename(self.old_download_dir, self.download_dir)
	405	else:
	406	# Not yet downloaded
	407	self._parsed = True
	408	return
	409
	410	timestamp_file = os.path.join(self.download_dir, 'timestamp.txt')
	411	if not os.path.exists(timestamp_file):
	412	# Old download from before
	413	logging.warning(
	414	"Old-style download directory found. Assuming update required.")
	415	self._parsed = True
	416	return
	417
	418	try:
	419	with open(timestamp_file, 'r') as timestamp_handle:
	420	# add the .split(' ')[0] to remove the timestamp from the old style timestamps
	421	last_bits = [int(x) for x in timestamp_handle.readlines()[0].split(' ')[0].split("-")]
	422	logging.warning(last_bits)
	423	if last_bits[0] == 0:
	424	last_bits[0] = 1
	425	if last_bits[1] == 0:
	426	last_bits[1] = 1
	427	if last_bits[2] == 0:
	428	last_bits[2] = 1980
	429	try:
	430	self.last_time = datetime.datetime(last_bits[0], last_bits[1], last_bits[2])
	431	except ValueError:
	432	# This one appears to be M D Y
	433	self.last_time = datetime.datetime(last_bits[2], last_bits[0], last_bits[1])
	434
	435	logging.info("last downloaded version: {}".format(self.last_time))
	436	except FileNotFoundError:
	437	# Not run on this thing before.
	438	logging.info(
	439	"Old-style download directory found. Assuming update required.")
	440	self.last_time = None
	441	self._needs_download = True
	442	self._parsed = True
	443	return
	444
	445	# OK, so we have a timestamp, lets see if there is anything new to get
	446	try:
	447	if self._file_links.last_update > self.last_time:
	448	logging.info(
	449	"Found new/updated files {}".format(self._file_links.last_update))
	450	self._needs_download = True
	451	self._parsed = True
	452	return
	453	except TypeError:
	454	logging.warning("No files found for {}.".format(self.thing_id))
	455
	456	# Got here, so nope, no new files.
	457	self._needs_download = False
	458	self._parsed = True
	459
	460	def download(self, base_dir, compress):
	461	""" Download all files for a given thing.
	462	Returns True iff the thing is now downloaded (not iff it downloads the thing!)
	463	"""
	464	if not self._parsed:
	465	self._parse(base_dir)
	466
	467	if not self._parsed:
	468	logging.error(
	469	"Unable to parse {} - aborting download".format(self.thing_id))
	470	return State.FAILED
	471
	472	if not self._needs_download:
	473	print("{} - {} already downloaded - skipping.".format(self.thing_id, self.title))
	474	return State.ALREADY_DOWNLOADED
	475
	476	if not self._file_links:
	477	print("{} - {} appears to have no files. Thingiverse acting up again?".format(self.thing_id, self.title))
	478	return State.FAILED
	479
	480	# Have we already downloaded some things?
	481	timestamp_file = os.path.join(self.download_dir, 'timestamp.txt')
	482	prev_dir = None
	483	if os.path.exists(self.download_dir):
	484	if not os.path.exists(timestamp_file):
	485	# edge case: old style dir w/out timestamp.
	486	logging.warning("Old style download dir found at {}".format(self.title))
	487	prev_count = 0
	488	target_dir = "{}_old".format(self.download_dir)
	489	while os.path.exists(target_dir):
	490	prev_count = prev_count + 1
	491	target_dir = "{}_old_{}".format(self.download_dir, prev_count)
	492	os.rename(self.download_dir, target_dir)
	493	else:
	494	prev_dir = "{}_{}".format(self.download_dir, slugify(self.last_time.__str__()))
	495	os.rename(self.download_dir, prev_dir)
	496
	497	# Get the list of files to download
	498
	499	new_file_links = []
	500	old_file_links = []
	501	self.time_stamp = None
	502
	503	if not self.last_time:
	504	# If we don't have anything to copy from, then it is all new.
	505	logging.debug("No last time, downloading all files")
	506	new_file_links = self._file_links
	507	self.time_stamp = new_file_links[0].last_update
	508
	509	for file_link in new_file_links:
	510	self.time_stamp = max(self.time_stamp, file_link.last_update)
	511	logging.debug("New timestamp will be {}".format(self.time_stamp))
	512	else:
	513	self.time_stamp = self.last_time
	514	for file_link in self._file_links:
	515	if file_link.last_update > self.last_time:
	516	new_file_links.append(file_link)
	517	self.time_stamp = max(self.time_stamp, file_link.last_update)
	518	else:
	519	old_file_links.append(file_link)
	520
	521	logging.debug("new timestamp {}".format(self.time_stamp))
	522
	523	# OK. Time to get to work.
	524	logging.debug("Generating download_dir")
	525	os.mkdir(self.download_dir)
	526	filelist_file = os.path.join(self.download_dir, "filelist.txt")
	527	with open(filelist_file, 'w', encoding="utf-8") as fl_handle:
	528	for fl in self._file_links:
	529	base_link = fl.link
	530	try:
	531	fl.link=requests.get(fl.link, allow_redirects=False).headers['location']
	532	except Exception:
	533	# Sometimes Thingiverse just gives us the direct link the first time. Not sure why.
	534	pass
	535
	536	fl_handle.write("{},{},{}, {}\n".format(fl.link, fl.name, fl.last_update, base_link))
	537
	538
	539	# First grab the cached files (if any)
	540	logging.info("Copying {} unchanged files.".format(len(old_file_links)))
	541	for file_link in old_file_links:
	542	old_file = os.path.join(prev_dir, file_link.name)
	543	new_file = truncate_name(os.path.join(self.download_dir, file_link.name))
	544	try:
	545	logging.debug("Copying {} to {}".format(old_file, new_file))
	546	copyfile(old_file, new_file)
	547	except FileNotFoundError:
	548	logging.warning(
	549	"Unable to find {} in old archive, redownloading".format(file_link["title"]))
	550	new_file_links.append(file_link)
	551
	552	# Now download the new ones
	553	logging.info("Downloading {} new files of {}".format(
	554	len(new_file_links), len(self._file_links)))
	555	try:
	556	for file_link in new_file_links:
	557	file_name = truncate_name(os.path.join(self.download_dir, file_link.name))
	558	logging.debug("Downloading {} from {} to {}".format(
	559	file_link.name, file_link.link, file_name))
	560	data_req = requests.get(file_link.link)
	561	with open(file_name, 'wb') as handle:
	562	handle.write(data_req.content)
	563	except Exception as exception:
	564	logging.error("Failed to download {} - {}".format(file_link.name, exception))
	565	fail_dir(self.download_dir)
	566	return State.FAILED
	567
	568
	569	# People like images. But this doesn't work yet.
	570	image_dir = os.path.join(self.download_dir, 'images')
	571	logging.info("Downloading {} images.".format(len(self._image_links)))
	572	try:
	573	os.mkdir(image_dir)
	574	for imagelink in self._image_links:
	575	filename = os.path.basename(imagelink)
	576	if filename.endswith('stl'):
	577	filename = "{}.png".format(filename)
	578	image_req = requests.get(imagelink)
	579	with open(truncate_name(os.path.join(image_dir, filename)), 'wb') as handle:
	580	handle.write(image_req.content)
	581	except Exception as exception:
	582	print("Failed to download {} - {}".format(filename, exception))
	583	fail_dir(self.download_dir)
	584	return State.FAILED
	585
	586	"""
	587	# instructions are good too.
	588	logging.info("Downloading readme")
	589	try:
	590	readme_txt = soup.find('meta', property='og:description')[
	591	'content']
	592	with open(os.path.join(self.download_dir, 'readme.txt'), 'w') as readme_handle:
	593	readme_handle.write("{}\n".format(readme_txt))
	594	except (TypeError, KeyError) as exception:
	595	logging.warning("No readme? {}".format(exception))
	596	except IOError as exception:
	597	logging.warning("Failed to write readme! {}".format(exception))
	598
	599	"""
	600	# Best get some licenses
	601	logging.info("Downloading license")
	602	try:
	603	if self._license:
	604	with open(truncate_name(os.path.join(self.download_dir, 'license.txt')), 'w', encoding="utf-8") as license_handle:
	605	license_handle.write("{}\n".format(self._license))
	606	except IOError as exception:
	607	logging.warning("Failed to write license! {}".format(exception))
	608
	609	try:
	610	# Now write the timestamp
	611	with open(timestamp_file, 'w', encoding="utf-8") as timestamp_handle:
	612	timestamp_handle.write(self.time_stamp.__str__())
	613	except Exception as exception:
	614	print("Failed to write timestamp file - {}".format(exception))
	615	fail_dir(self.download_dir)
	616	return State.FAILED
	617	self._needs_download = False
	618	logging.debug("Download of {} finished".format(self.title))
	619	if not compress:
	620	return State.OK
	621
	622
	623	thing_dir = "{} - {} - {}".format(self.thing_id,
	624	slugify(self.title),
	625	self.time_stamp)
	626	file_name = os.path.join(base_dir,
	627	"{}.7z".format(thing_dir))
	628	logging.debug("Compressing {} to {}".format(
	629	self.title,
	630	file_name))
	631	#with libarchive.file_writer(filename, 'lzma', '7z') as archive:
	632	with py7zr.SevenZipFile(file_name, 'w', filters=SEVENZIP_FILTERS) as archive:
	633	#with py7zr.SevenZipFile(file_name, 'w' ) as archive:
	634	archive.writeall(self.download_dir, thing_dir)
	635	logging.debug("Compression of {} finished.".format(self.title))
	636	return State.OK
	637
	638
	639
	640
	641	def do_batch(batch_file, download_dir, quick, compress):
	642	""" Read a file in line by line, parsing each as a set of calls to this script."""
	643	with open(batch_file) as handle:
	644	for line in handle:
	645	line = line.strip()
	646	if not line:
	647	# Skip empty lines
	648	continue
	649	logging.info("Handling instruction {}".format(line))
	650	command_arr = line.split()
	651	if command_arr[0] == "thing":
	652	logging.debug(
	653	"Handling batch thing instruction: {}".format(line))
	654	Thing(command_arr[1]).download(download_dir, compress)
	655	continue
	656	if command_arr[0] == "collection":
	657	logging.debug(
	658	"Handling batch collection instruction: {}".format(line))
	659	Collection(command_arr[1], command_arr[2],
	660	download_dir, quick, compress).download()
	661	continue
	662	if command_arr[0] == "user":
	663	logging.debug(
	664	"Handling batch collection instruction: {}".format(line))
	665	Designs(command_arr[1], download_dir, quick, compress).download()
	666	continue
	667	logging.warning("Unable to parse current instruction. Skipping.")
	668
	669
	670	def main():
	671	""" Entry point for script being run as a command. """
	672	parser = argparse.ArgumentParser()
	673	parser.add_argument("-l", "--log-level", choices=[
	674	'debug', 'info', 'warning'], default='info', help="level of logging desired")
	675	parser.add_argument("-d", "--directory",
	676	help="Target directory to download into")
	677	parser.add_argument("-f", "--log-file",
	678	help="Place to log debug information to")
	679	parser.add_argument("-q", "--quick", action="store_true",
	680	help="Assume date ordering on posts")
	681	parser.add_argument("-c", "--compress", action="store_true",
	682	help="Compress files")
	683
	684
	685	subparsers = parser.add_subparsers(
	686	help="Type of thing to download", dest="subcommand")
	687	collection_parser = subparsers.add_parser(
	688	'collection', help="Download one or more entire collection(s)")
	689	collection_parser.add_argument(
	690	"owner", help="The owner of the collection(s) to get")
	691	collection_parser.add_argument(
	692	"collections", nargs="+", help="Space seperated list of the name(s) of collection to get")
	693	thing_parser = subparsers.add_parser(
	694	'thing', help="Download a single thing.")
	695	thing_parser.add_argument(
	696	"things", nargs="*", help="Space seperated list of thing ID(s) to download")
	697	user_parser = subparsers.add_parser(
	698	"user", help="Download all things by one or more users")
	699	user_parser.add_argument(
	700	"users", nargs="+", help="A space seperated list of the user(s) to get the designs of")
	701	batch_parser = subparsers.add_parser(
	702	"batch", help="Perform multiple actions written in a text file")
	703	batch_parser.add_argument(
	704	"batch_file", help="The name of the file to read.")
	705	subparsers.add_parser("version", help="Show the current version")
	706
	707	args = parser.parse_args()
	708	if not args.subcommand:
	709	parser.print_help()
	710	sys.exit(1)
	711	if not args.directory:
	712	args.directory = os.getcwd()
	713
	714	logger = logging.getLogger()
	715	formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
	716	logger.setLevel(logging.DEBUG)
	717	console_handler = logging.StreamHandler()
	718	console_handler.setLevel(args.log_level.upper())
	719
	720	logger.addHandler(console_handler)
	721	if args.log_file:
	722	file_handler = logging.FileHandler(args.log_file)
	723	file_handler.setLevel(logging.DEBUG)
	724	file_handler.setFormatter(formatter)
	725	logger.addHandler(file_handler)
	726
	727
	728	# Start downloader
	729	thing_queue = multiprocessing.JoinableQueue()
	730	logging.debug("starting {} downloader(s)".format(DOWNLOADER_COUNT))
	731	downloaders = [Downloader(thing_queue, args.directory, args.compress) for _ in range(DOWNLOADER_COUNT)]
	732	for downloader in downloaders:
	733	downloader.start()
	734
	735
	736	if args.subcommand.startswith("collection"):
	737	for collection in args.collections:
	738	Collection(args.owner, collection, args.directory, args.quick, args.compress).download()
	739	if args.subcommand == "thing":
	740	for thing in args.things:
	741	thing_queue.put(thing)
	742	if args.subcommand == "user":
	743	for user in args.users:
	744	Designs(user, args.directory, args.quick, args.compress).download()
	745	if args.subcommand == "version":
	746	print("thingy_grabber.py version {}".format(VERSION))
	747	if args.subcommand == "batch":
	748	do_batch(args.batch_file, args.directory, args.quick, args.compress)
	749
	750	# Stop the downloader processes
	751	for downloader in downloaders:
	752	thing_queue.put(None)
	753
	754	atexit.register(BROWSER.quit)
	755
	756	if __name__ == "__main__":
	757	multiprocessing.freeze_support()
	758	main()