HCoop Git - clinton/thingy_grabber.git/blame_incremental

... / ...

Commit	Line	Data
	1	#!/usr/bin/env python3
	2	"""
	3	Thingiverse bulk downloader
	4	"""
	5
	6	import re
	7	import sys
	8	import os
	9	import argparse
	10	import unicodedata
	11	import requests
	12	import logging
	13	import multiprocessing
	14	import enum
	15	import datetime
	16	from shutil import copyfile
	17	from dataclasses import dataclass
	18	import py7zr
	19	import glob
	20	import shutil
	21
	22	SEVENZIP_FILTERS = [{'id': py7zr.FILTER_LZMA2}]
	23
	24	# I don't think this is exported by datetime
	25	DEFAULT_DATETIME_FORMAT = '%Y-%m-%d %H:%M:%S'
	26	# Windows cannot handle : in filenames
	27	SAFE_DATETIME_FORMAT = '%Y-%m-%d %H.%M.%S'
	28
	29	API_BASE = "https://api.thingiverse.com"
	30	ACCESS_QP = "access_token={}"
	31	PAGE_QP = "page={}"
	32	API_USER_DESIGNS = API_BASE + "/users/{}/things/"
	33	API_USER_COLLECTIONS = API_BASE + "/users/{}/collections/all?" + ACCESS_QP
	34
	35	# Currently useless as it gives the same info as the matching element in API_USER_COLLECTIONS
	36	API_COLLECTION = API_BASE + "/collections/{}/?" + ACCESS_QP
	37	API_COLLECTION_THINGS = API_BASE + "/collections/{}/things/?" + ACCESS_QP
	38
	39	API_THING_DETAILS = API_BASE + "/things/{}/?" + ACCESS_QP
	40	API_THING_FILES = API_BASE + "/things/{}/files/?" + ACCESS_QP
	41	API_THING_IMAGES = API_BASE + "/things/{}/images/?" + ACCESS_QP
	42	API_THING_DOWNLOAD = "/download/?" + ACCESS_QP
	43
	44	API_KEY = None
	45
	46	DOWNLOADER_COUNT = 1
	47	RETRY_COUNT = 3
	48
	49	MAX_PATH_LENGTH = 250
	50
	51	VERSION = "0.10.2"
	52
	53	TIMESTAMP_FILE = "timestamp.txt"
	54
	55	SESSION = requests.Session()
	56
	57
	58	@dataclass
	59	class ThingLink:
	60	thing_id: str
	61	name: str
	62	api_link: str
	63
	64
	65	@dataclass
	66	class FileLink:
	67	name: str
	68	last_update: datetime.datetime
	69	link: str
	70
	71
	72	@dataclass
	73	class ImageLink:
	74	name: str
	75	link: str
	76
	77
	78	class FileLinks:
	79	def __init__(self, initial_links=None):
	80	if initial_links is None:
	81	initial_links = []
	82	self.links = []
	83	self.last_update = None
	84	for link in initial_links:
	85	self.append(link)
	86
	87	def __iter__(self):
	88	return iter(self.links)
	89
	90	def __getitem__(self, item):
	91	return self.links[item]
	92
	93	def __len__(self):
	94	return len(self.links)
	95
	96	def append(self, link):
	97	try:
	98	self.last_update = max(self.last_update, link.last_update)
	99	except TypeError:
	100	self.last_update = link.last_update
	101	self.links.append(link)
	102
	103
	104	class State(enum.Enum):
	105	OK = enum.auto()
	106	FAILED = enum.auto()
	107	ALREADY_DOWNLOADED = enum.auto()
	108
	109
	110	def sanitise_url(url):
	111	""" remove api keys from an url
	112	"""
	113	return re.sub(r'access_token=\w*',
	114	'access_token=***',
	115	url)
	116
	117
	118	def strip_time(date_obj):
	119	""" Takes a datetime object and returns another with the time set to 00:00
	120	"""
	121	return datetime.datetime.combine(date_obj.date(), datetime.time())
	122
	123
	124	def rename_unique(dir_name, target_dir_name):
	125	""" Move a directory sideways to a new name, ensuring it is unique.
	126	"""
	127	target_dir = target_dir_name
	128	inc = 0
	129	while os.path.exists(target_dir):
	130	target_dir = "{}_{}".format(target_dir_name, inc)
	131	inc += 1
	132	os.rename(dir_name, target_dir)
	133	return target_dir
	134
	135
	136	def fail_dir(dir_name):
	137	""" When a download has failed, move it sideways.
	138	"""
	139	return rename_unique(dir_name, "{}_failed".format(dir_name))
	140
	141
	142	def truncate_name(file_name):
	143	""" Ensure the filename is not too long for, well windows basically.
	144	"""
	145	path = os.path.abspath(file_name)
	146	if len(path) <= MAX_PATH_LENGTH:
	147	return path
	148	base, extension = os.path.splitext(path)
	149	inc = 0
	150	new_path = "{}_{}{}".format(base, inc, extension)
	151	while os.path.exists(new_path):
	152	new_path = "{}_{}{}".format(base, inc, extension)
	153	inc += 1
	154	return new_path
	155
	156
	157	def slugify(value):
	158	"""
	159	Normalise string, removes invalid for filename charactersr
	160	and converts string to lowercase.
	161	"""
	162	logging.debug("Sluggyfying {}".format(value))
	163	value = unicodedata.normalize('NFKC', value).lower().strip()
	164	value = re.sub(r'[\\/<>:?*\|"]', '', value)
	165	value = re.sub(r'\.*$', '', value)
	166	return value
	167
	168
	169	class Downloader(multiprocessing.Process):
	170	"""
	171	Class to handle downloading the things we have found to get.
	172	"""
	173
	174	def __init__(self, thing_queue, download_directory, compress, api_key):
	175	multiprocessing.Process.__init__(self)
	176	# TODO: add parameters
	177	self.thing_queue = thing_queue
	178	self.download_directory = download_directory
	179	self.compress = compress
	180	self.api_key = api_key
	181
	182	def run(self):
	183	""" actual download loop.
	184	"""
	185	while True:
	186	thing_id = self.thing_queue.get()
	187	if thing_id is None:
	188	logging.info("Shutting download queue")
	189	self.thing_queue.task_done()
	190	break
	191	thing = None
	192	if isinstance(thing_id, str):
	193	thing = Thing.from_thing_id(thing_id)
	194	if isinstance(thing_id, ThingLink):
	195	thing = Thing(thing_id)
	196	if not thing:
	197	logging.error("Don't know how to handle thing_id {}".format(thing_id))
	198	else:
	199	logging.info("Handling id {}".format(thing_id))
	200	thing.download(self.download_directory, self.compress, self.api_key)
	201	self.thing_queue.task_done()
	202	return
	203
	204
	205	class Grouping:
	206	""" Holds details of a group of things for download
	207	This is effectively (although not actually) an abstract class
	208	- use Collection or Designs instead.
	209	"""
	210
	211	def __init__(self, quick, compress):
	212	self.things = []
	213	self.total = 0
	214	self.req_id = None
	215	self.last_page = 0
	216	self.per_page = None
	217	# Should we stop downloading when we hit a known datestamp?
	218	self.quick = quick
	219	self.compress = compress
	220	# These should be set by child classes.
	221	self.url = None
	222	self.download_dir = None
	223
	224	@property
	225	def get(self):
	226	""" retrieve the things of the grouping. """
	227	if self.things:
	228	# We've already done it.
	229	return self.things
	230
	231	# Check for initialisation:
	232	if not self.url:
	233	logging.error("No URL set - object not initialised properly?")
	234	raise ValueError("No URL set - object not initialised properly?")
	235
	236	# Get the internal details of the grouping.
	237	logging.debug("Querying {}".format(sanitise_url(self.url)))
	238
	239	# self.url should already have been formatted as we don't need pagination
	240	logging.info("requesting:{}".format(sanitise_url(self.url)))
	241	current_req = SESSION.get(self.url)
	242	if current_req.status_code != 200:
	243	logging.error(
	244	"Got unexpected code {} from url {}: {}".format(current_req.status_code, sanitise_url(self.url),
	245	current_req.text))
	246	else:
	247	current_json = current_req.json()
	248	for thing in current_json:
	249	logging.info(thing)
	250	self.things.append(ThingLink(thing['id'], thing['name'], thing['url']))
	251	logging.info("Found {} things.".format(len(self.things)))
	252	return self.things
	253
	254	def download(self):
	255	""" Downloads all the files in a collection """
	256	if not self.things:
	257	self.get()
	258
	259	if not self.download_dir:
	260	raise ValueError(
	261	"No download_dir set - invalidly initialised object?")
	262
	263	base_dir = os.getcwd()
	264	try:
	265	os.mkdir(self.download_dir)
	266	except FileExistsError:
	267	logging.info("Target directory {} already exists. Assuming a resume."
	268	.format(self.download_dir))
	269	logging.info("Downloading {} thing(s).".format(self.total))
	270	for idx, thing in enumerate(self.things):
	271	logging.info("Downloading thing {} - {}".format(idx, thing))
	272	return_code = Thing(thing).download(self.download_dir, self.compress)
	273	if self.quick and return_code == State.ALREADY_DOWNLOADED:
	274	logging.info("Caught up, stopping.")
	275	return
	276
	277
	278	class Collection(Grouping):
	279	""" Holds details of a collection. """
	280
	281	def __init__(self, user, name, directory, quick, compress):
	282	Grouping.__init__(self, quick, compress)
	283	self.user = user
	284	self.name = name
	285	self.paginated = False
	286	# need to figure out the the ID for the collection
	287	collection_url = API_USER_COLLECTIONS.format(user, API_KEY)
	288	try:
	289	current_req = SESSION.get(collection_url)
	290	except requests.exceptions.ConnectionError as error:
	291	logging.error("Unable to connect for collections for user {}: {}".format(
	292	self.user, error))
	293	return
	294	if current_req.status_code != 200:
	295	logging.error(
	296	"Got unexpected code {} from url {}: {}".format(current_req.status_code, sanitise_url(collection_url),
	297	current_req.text))
	298	return
	299	collection_list = current_req.json()
	300	try:
	301	# case insensitive to retain parity with previous behaviour
	302	collection = [x for x in collection_list if x['name'].casefold() == name.casefold()][0]
	303	except IndexError:
	304	logging.error("Unable to find collection {} for user {}".format(name, user))
	305	return
	306	self.collection_id = collection['id']
	307	self.url = API_COLLECTION_THINGS.format(self.collection_id, API_KEY)
	308
	309	self.download_dir = os.path.join(directory,
	310	"{}-{}".format(slugify(self.user), slugify(self.name)))
	311
	312
	313	class Designs(Grouping):
	314	""" Holds details of all of a users' designs. """
	315
	316	def __init__(self, user, directory, quick, compress):
	317	Grouping.__init__(self, quick, compress)
	318	self.user = user
	319	self.url = API_USER_DESIGNS.format(user)
	320	self.paginated = True
	321	self.download_dir = os.path.join(
	322	directory, "{} designs".format(slugify(self.user)))
	323
	324
	325	class Thing:
	326	""" An individual design on thingiverse. """
	327
	328	def __init__(self, thing_link):
	329	self.thing_id = thing_link.thing_id
	330	self.name = thing_link.name
	331	self.last_time = None
	332	self._parsed = False
	333	self._needs_download = True
	334	self.text = None
	335	self.download_dir = None
	336	self.time_stamp = None
	337	self._file_links = FileLinks()
	338	self._image_links = []
	339
	340	@classmethod
	341	def from_thing_id(cls, thing_id):
	342	"""
	343	Factory method that looks up a thing by ID and creates a Thing object for it
	344	:param thing_id: to look up
	345	:return: Thing or None
	346	"""
	347	return Thing(ThingLink(thing_id, "", ""))
	348
	349
	350	def _parse(self, base_dir, api_key):
	351	""" Work out what, if anything needs to be done. """
	352	if self._parsed:
	353	return
	354
	355	# First get the broad details
	356	url = API_THING_DETAILS.format(self.thing_id, api_key)
	357	logging.error(url)
	358	try:
	359	current_req = SESSION.get(url)
	360	except requests.exceptions.ConnectionError as error:
	361	logging.error("Unable to connect for thing {}: {}".format(
	362	self.thing_id, error))
	363	return
	364	# Check for DMCA
	365	if current_req.status_code == 403:
	366	logging.error("Access to thing {} is forbidden".format(self.thing_id))
	367	return
	368	if current_req.status_code != 200:
	369	logging.error("Got unexpected code {} from url {}: {}".format(current_req.status_code, sanitise_url(url),
	370	current_req.text))
	371	return
	372
	373	thing_json = current_req.json()
	374	try:
	375	self._license = thing_json['license']
	376	except KeyError:
	377	logging.warning("No license found for thing {}?".format(self.thing_id))
	378
	379	# TODO: Get non-html version of this?
	380	try:
	381	self._details = thing_json['details']
	382	except KeyError:
	383	logging.warning("No description found for thing {}?".format(self.thing_id))
	384
	385	if not self.name:
	386	# Probably generated with factory method.
	387	try:
	388	self.name = thing_json['name']
	389	except KeyError:
	390	logging.warning("No name found for thing {}?".format(self.thing_id))
	391	self.name = self.thing_id
	392
	393	# Now get the file details
	394	file_url = API_THING_FILES.format(self.thing_id, api_key)
	395
	396	try:
	397	current_req = SESSION.get(file_url)
	398	except requests.exceptions.ConnectionError as error:
	399	logging.error("Unable to connect for thing {}: {}".format(
	400	self.thing_id, error))
	401	return
	402
	403	if current_req.status_code != 200:
	404	logging.error("Unexpected status code {} for {}: {}".format(current_req.status_code, sanitise_url(file_url),
	405	current_req.text))
	406	return
	407
	408	link_list = current_req.json()
	409
	410	if not link_list:
	411	logging.error("No files found for thing {} - probably thingiverse being broken, try again later".format(
	412	self.thing_id))
	413
	414	for link in link_list:
	415	logging.debug("Parsing link: {}".format(sanitise_url(link['url'])))
	416	try:
	417	datestamp = datetime.datetime.strptime(link['date'], DEFAULT_DATETIME_FORMAT)
	418	self._file_links.append(
	419	FileLink(link['name'], datestamp, link['url'] + API_THING_DOWNLOAD.format(api_key)))
	420	except ValueError:
	421	logging.error(link['date'])
	422
	423	# Finally get the image links
	424	image_url = API_THING_IMAGES.format(self.thing_id, api_key)
	425
	426	try:
	427	current_req = SESSION.get(image_url)
	428	except requests.exceptions.ConnectionError as error:
	429	logging.error("Unable to connect for thing {}: {}".format(
	430	self.thing_id, error))
	431	return
	432
	433	if current_req.status_code != 200:
	434	logging.error(
	435	"Unexpected status code {} for {}: {}".format(current_req.status_code, sanitise_url(image_url),
	436	current_req.text))
	437	return
	438
	439	image_list = current_req.json()
	440
	441	if not image_list:
	442	logging.warning(
	443	"No images found for thing {} - probably thingiverse being iffy as this seems unlikely".format(
	444	self.thing_id))
	445
	446	for image in image_list:
	447	logging.debug("parsing image: {}".format(image))
	448	name = None
	449	try:
	450	name = slugify(image['name'])
	451	# TODO: fallback to other types
	452	url = [x for x in image['sizes'] if x['type'] == 'display' and x['size'] == 'large'][0]['url']
	453	except KeyError:
	454	logging.warning("Missing image for {}".format(name))
	455	self._image_links.append(ImageLink(name, url))
	456
	457	self.slug = "{} - {}".format(self.thing_id, slugify(self.name))
	458	self.download_dir = os.path.join(base_dir, self.slug)
	459
	460	self._handle_old_directory(base_dir)
	461
	462	logging.debug("Parsing {} ({})".format(self.thing_id, self.name))
	463	latest, self.last_time = self._find_last_download(base_dir)
	464
	465	if not latest:
	466	# Not yet downloaded
	467	self._parsed = True
	468	return
	469
	470	logging.info("last downloaded version: {}".format(self.last_time))
	471
	472	# OK, so we have a timestamp, lets see if there is anything new to get
	473	# First off, are we comparing an old download that threw away the timestamp?
	474	ignore_time = self.last_time == strip_time(self.last_time)
	475	try:
	476	# TODO: Allow for comparison at the exact time
	477	files_last_update = self._file_links.last_update
	478	if ignore_time:
	479	logging.info("Dropping time from comparison stamp as old-style download dir")
	480	files_last_update = strip_time(files_last_update)
	481
	482	if files_last_update > self.last_time:
	483	logging.info(
	484	"Found new/updated files {}".format(self._file_links.last_update))
	485	self._needs_download = True
	486	self._parsed = True
	487	return
	488	except TypeError:
	489	logging.warning("No files found for {}.".format(self.thing_id))
	490
	491	# Got here, so nope, no new files.
	492	self._needs_download = False
	493	self._parsed = True
	494
	495	def _handle_old_directory(self, base_dir):
	496	""" Deal with any old directories from previous versions of the code.
	497	"""
	498	old_dir = os.path.join(base_dir, slugify(self.name))
	499	if os.path.exists(old_dir):
	500	logging.warning("Found old style download_dir. Moving.")
	501	rename_unique(old_dir, self.download_dir)
	502
	503	def _handle_outdated_directory(self):
	504	""" Move the current download directory sideways if the thing has changed.
	505	"""
	506	if not os.path.exists(self.download_dir):
	507	# No old directory to move.
	508	return None
	509	timestamp_file = os.path.join(self.download_dir, TIMESTAMP_FILE)
	510	if not os.path.exists(timestamp_file):
	511	# Old form of download directory
	512	target_dir_name = "{} - old".format(self.download_dir)
	513	else:
	514	target_dir_name = "{} - {}".format(self.download_dir, self.last_time.strftime(SAFE_DATETIME_FORMAT))
	515	return rename_unique(self.download_dir, target_dir_name)
	516
	517	def _find_last_download(self, base_dir):
	518	""" Look for the most recent previous download (if any) of the thing.
	519	"""
	520	logging.info("Looking for old things")
	521
	522	# First the DL directory itself.
	523	timestamp_file = os.path.join(self.download_dir, TIMESTAMP_FILE)
	524
	525	latest = None
	526	latest_time = None
	527
	528	try:
	529	logging.debug("Checking for existing download in normal place.")
	530	with open(timestamp_file) as ts_fh:
	531	timestamp_text = ts_fh.read().strip()
	532	latest_time = datetime.datetime.strptime(timestamp_text, DEFAULT_DATETIME_FORMAT)
	533	latest = self.download_dir
	534	except FileNotFoundError:
	535	# No existing download directory. huh.
	536	pass
	537	except TypeError:
	538	logging.warning("Invalid timestamp file found in {}".format(self.download_dir))
	539
	540	# TODO: Maybe look for old download directories.
	541
	542	# Now look for 7z files
	543	candidates = glob.glob(os.path.join(base_dir, "{}*.7z".format(self.thing_id)))
	544	# +3 to allow for ' - '
	545	leading_length = len(self.slug) + 3
	546	for path in candidates:
	547	candidate = os.path.basename(path)
	548	try:
	549	logging.debug("Examining '{}' - '{}'".format(candidate, candidate[leading_length:-3]))
	550	candidate_time = datetime.datetime.strptime(candidate[leading_length:-3], SAFE_DATETIME_FORMAT)
	551	except ValueError:
	552	logging.warning("There was an error finding the date in {}. Ignoring.".format(candidate))
	553	continue
	554	try:
	555	if candidate_time > latest_time:
	556	latest_time = candidate_time
	557	latest = candidate
	558	except TypeError:
	559	latest_time = candidate_time
	560	latest = candidate
	561	logging.info("Found last old thing: {} / {}".format(latest, latest_time))
	562	return latest, latest_time
	563
	564	def download(self, base_dir, compress, api_key):
	565	""" Download all files for a given thing.
	566	Returns True iff the thing is now downloaded (not iff it downloads the thing!)
	567	"""
	568	if not self._parsed:
	569	self._parse(base_dir, api_key)
	570
	571	if not self._parsed:
	572	logging.error(
	573	"Unable to parse {} - aborting download".format(self.thing_id))
	574	return State.FAILED
	575
	576	if not self._needs_download:
	577	logging.info("{} - {} already downloaded - skipping.".format(self.thing_id, self.name))
	578	return State.ALREADY_DOWNLOADED
	579
	580	if not self._file_links:
	581	logging.error(
	582	"{} - {} appears to have no files. Thingiverse acting up again?".format(self.thing_id, self.name))
	583	return State.FAILED
	584
	585	# Have we already downloaded some things?
	586	renamed_dir = self._handle_outdated_directory()
	587
	588	# Get the list of files to download
	589
	590	new_file_links = []
	591	old_file_links = []
	592	self.time_stamp = None
	593
	594	if not self.last_time:
	595	# If we don't have anything to copy from, then it is all new.
	596	logging.debug("No last time, downloading all files")
	597	new_file_links = self._file_links
	598	self.time_stamp = new_file_links[0].last_update
	599
	600	for file_link in new_file_links:
	601	self.time_stamp = max(self.time_stamp, file_link.last_update)
	602	logging.debug("New timestamp will be {}".format(self.time_stamp))
	603	else:
	604	self.time_stamp = self.last_time
	605	for file_link in self._file_links:
	606	if file_link.last_update > self.last_time:
	607	new_file_links.append(file_link)
	608	self.time_stamp = max(self.time_stamp, file_link.last_update)
	609	else:
	610	old_file_links.append(file_link)
	611
	612	logging.debug("new timestamp {}".format(self.time_stamp))
	613
	614	# OK. Time to get to work.
	615	logging.debug("Generating download_dir")
	616	os.mkdir(self.download_dir)
	617	filelist_file = os.path.join(self.download_dir, "filelist.txt")
	618	with open(filelist_file, 'w', encoding="utf-8") as fl_handle:
	619	for fl in self._file_links:
	620	fl_handle.write("{},{},{}\n".format(fl.link, fl.name, fl.last_update))
	621
	622	# First grab the cached files (if any)
	623	logging.info("Copying {} unchanged files.".format(len(old_file_links)))
	624	if renamed_dir:
	625	for file_link in old_file_links:
	626	try:
	627	old_file = os.path.join(renamed_dir, file_link.name)
	628	new_file = truncate_name(os.path.join(self.download_dir, file_link.name))
	629	logging.debug("Copying {} to {}".format(old_file, new_file))
	630	copyfile(old_file, new_file)
	631	except FileNotFoundError:
	632	logging.warning(
	633	"Unable to find {} in old archive, redownloading".format(file_link.name))
	634	new_file_links.append(file_link)
	635	except TypeError:
	636	# Not altogether sure how this could occur, possibly with some combination of the old file types
	637	logging.warning(
	638	"Typeerror looking for {} in {}".format(file_link.name, renamed_dir))
	639	new_file_links.append(file_link)
	640
	641	# Now download the new ones
	642	logging.info("Downloading {} new files of {}".format(
	643	len(new_file_links), len(self._file_links)))
	644	try:
	645	for file_link in new_file_links:
	646	file_name = truncate_name(os.path.join(self.download_dir, file_link.name))
	647	logging.debug("Downloading {} from {} to {}".format(
	648	file_link.name, file_link.link, file_name))
	649	data_req = SESSION.get(file_link.link)
	650	if data_req.status_code != 200:
	651	logging.error("Unexpected status code {} for {}: {}".format(data_req.status_code,
	652	sanitise_url(file_link.link),
	653	data_req.text))
	654	fail_dir(self.download_dir)
	655	return State.FAILED
	656
	657	with open(file_name, 'wb') as handle:
	658	handle.write(data_req.content)
	659	except Exception as exception:
	660	logging.error("Failed to download {} - {}".format(file_link.name, exception))
	661	fail_dir(self.download_dir)
	662	return State.FAILED
	663
	664	# People like images.
	665	image_dir = os.path.join(self.download_dir, 'images')
	666	logging.info("Downloading {} images.".format(len(self._image_links)))
	667	try:
	668	os.mkdir(image_dir)
	669	for imagelink in self._image_links:
	670	filename = os.path.join(image_dir, imagelink.name)
	671	image_req = SESSION.get(imagelink.link)
	672	if image_req.status_code != 200:
	673	logging.error("Unexpected status code {} for {}: {}".format(image_req.status_code,
	674	sanitise_url(imagelink.link),
	675	image_req.text))
	676	fail_dir(self.download_dir)
	677	return State.FAILED
	678	with open(truncate_name(filename), 'wb') as handle:
	679	handle.write(image_req.content)
	680	except Exception as exception:
	681	logging.error("Failed to download {} - {}".format(imagelink.name, exception))
	682	fail_dir(self.download_dir)
	683	return State.FAILED
	684
	685	# Best get some licenses
	686	logging.info("writing license file")
	687	try:
	688	if self._license:
	689	with open(truncate_name(os.path.join(self.download_dir, 'license.txt')), 'w',
	690	encoding="utf-8") as license_handle:
	691	license_handle.write("{}\n".format(self._license))
	692	except IOError as exception:
	693	logging.warning("Failed to write license! {}".format(exception))
	694
	695	logging.info("writing readme")
	696	try:
	697	if self._details:
	698	with open(truncate_name(os.path.join(self.download_dir, 'readme.txt')), 'w',
	699	encoding="utf-8") as readme_handle:
	700	readme_handle.write("{}\n".format(self._details))
	701	except IOError as exception:
	702	logging.warning("Failed to write readme! {}".format(exception))
	703
	704	try:
	705	# Now write the timestamp
	706	with open(os.path.join(self.download_dir, TIMESTAMP_FILE), 'w', encoding="utf-8") as timestamp_handle:
	707	timestamp_handle.write(self.time_stamp.__str__())
	708	except Exception as exception:
	709	logging.error("Failed to write timestamp file - {}".format(exception))
	710	fail_dir(self.download_dir)
	711	return State.FAILED
	712	self._needs_download = False
	713	logging.debug("Download of {} finished".format(self.name))
	714	if not compress:
	715	return State.OK
	716
	717	thing_dir = "{} - {} - {}".format(self.thing_id,
	718	slugify(self.name),
	719	self.time_stamp.strftime(SAFE_DATETIME_FORMAT))
	720	file_name = os.path.join(base_dir,
	721	"{}.7z".format(thing_dir))
	722	logging.debug("Compressing {} to {}".format(
	723	self.name,
	724	file_name))
	725	with py7zr.SevenZipFile(file_name, 'w', filters=SEVENZIP_FILTERS) as archive:
	726	archive.writeall(self.download_dir, thing_dir)
	727	logging.debug("Compression of {} finished.".format(self.name))
	728	shutil.rmtree(self.download_dir)
	729	logging.debug("Removed temporary download dir of {}.".format(self.name))
	730	return State.OK
	731
	732
	733	def do_batch(batch_file, download_dir, quick, compress):
	734	""" Read a file in line by line, parsing each as a set of calls to this script."""
	735	with open(batch_file) as handle:
	736	for line in handle:
	737	line = line.strip()
	738	if not line:
	739	# Skip empty lines
	740	continue
	741	logging.info("Handling instruction {}".format(line))
	742	command_arr = line.split()
	743	if command_arr[0] == "thing":
	744	logging.debug(
	745	"Handling batch thing instruction: {}".format(line))
	746	Thing.from_thing_id(command_arr[1]).download(download_dir, compress)
	747	continue
	748	if command_arr[0] == "collection":
	749	logging.debug(
	750	"Handling batch collection instruction: {}".format(line))
	751	Collection(command_arr[1], command_arr[2],
	752	download_dir, quick, compress).download()
	753	continue
	754	if command_arr[0] == "user":
	755	logging.debug(
	756	"Handling batch collection instruction: {}".format(line))
	757	Designs(command_arr[1], download_dir, quick, compress).download()
	758	continue
	759	logging.warning("Unable to parse current instruction. Skipping.")
	760
	761
	762	def main():
	763	""" Entry point for script being run as a command. """
	764	parser = argparse.ArgumentParser()
	765	parser.add_argument("-l", "--log-level", choices=[
	766	'debug', 'info', 'warning'], default='info', help="level of logging desired")
	767	parser.add_argument("-d", "--directory",
	768	help="Target directory to download into")
	769	parser.add_argument("-f", "--log-file",
	770	help="Place to log debug information to")
	771	parser.add_argument("-q", "--quick", action="store_true",
	772	help="Assume date ordering on posts")
	773	parser.add_argument("-c", "--compress", action="store_true",
	774	help="Compress files")
	775	parser.add_argument("-a", "--api-key",
	776	help="API key for thingiverse")
	777
	778	subparsers = parser.add_subparsers(
	779	help="Type of thing to download", dest="subcommand")
	780	collection_parser = subparsers.add_parser(
	781	'collection', help="Download one or more entire collection(s)")
	782	collection_parser.add_argument(
	783	"owner", help="The owner of the collection(s) to get")
	784	collection_parser.add_argument(
	785	"collections", nargs="+", help="Space seperated list of the name(s) of collection to get")
	786	thing_parser = subparsers.add_parser(
	787	'thing', help="Download a single thing.")
	788	thing_parser.add_argument(
	789	"things", nargs="*", help="Space seperated list of thing ID(s) to download")
	790	user_parser = subparsers.add_parser(
	791	"user", help="Download all things by one or more users")
	792	user_parser.add_argument(
	793	"users", nargs="+", help="A space seperated list of the user(s) to get the designs of")
	794	batch_parser = subparsers.add_parser(
	795	"batch", help="Perform multiple actions written in a text file")
	796	batch_parser.add_argument(
	797	"batch_file", help="The name of the file to read.")
	798	subparsers.add_parser("version", help="Show the current version")
	799
	800	args = parser.parse_args()
	801	if not args.subcommand:
	802	parser.print_help()
	803	sys.exit(1)
	804	if not args.directory:
	805	args.directory = os.getcwd()
	806
	807	logger = logging.getLogger()
	808	formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
	809	logger.setLevel(logging.DEBUG)
	810	console_handler = logging.StreamHandler()
	811	console_handler.setLevel(args.log_level.upper())
	812
	813	global API_KEY
	814	if args.api_key:
	815	API_KEY = args.api_key
	816	else:
	817	try:
	818	with open("api.key") as fh:
	819	API_KEY = fh.read().strip()
	820	except Exception as e:
	821	logging.error("Either specify the api-key on the command line or in a file called 'api.key'")
	822	logging.error("Exception: {}".format(e))
	823	return
	824
	825	logger.addHandler(console_handler)
	826	if args.log_file:
	827	file_handler = logging.FileHandler(args.log_file)
	828	file_handler.setLevel(logging.DEBUG)
	829	file_handler.setFormatter(formatter)
	830	logger.addHandler(file_handler)
	831
	832	# Start downloader
	833	thing_queue = multiprocessing.JoinableQueue()
	834	logging.debug("starting {} downloader(s)".format(DOWNLOADER_COUNT))
	835	downloaders = [Downloader(thing_queue, args.directory, args.compress, API_KEY) for _ in range(DOWNLOADER_COUNT)]
	836	for downloader in downloaders:
	837	downloader.start()
	838
	839	if args.subcommand.startswith("collection"):
	840	for collection in args.collections:
	841	Collection(args.owner, collection, args.directory, args.quick, args.compress).download()
	842	if args.subcommand == "thing":
	843	for thing in args.things:
	844	thing_queue.put(thing)
	845	if args.subcommand == "user":
	846	for user in args.users:
	847	Designs(user, args.directory, args.quick, args.compress).download()
	848	if args.subcommand == "version":
	849	print("thingy_grabber.py version {}".format(VERSION))
	850	if args.subcommand == "batch":
	851	do_batch(args.batch_file, args.directory, args.quick, args.compress)
	852
	853	# Stop the downloader processes
	854	for _ in downloaders:
	855	thing_queue.put(None)
	856
	857
	858	if __name__ == "__main__":
	859	multiprocessing.freeze_support()
	860	main()