Api (#14)
[clinton/thingy_grabber.git] / thingy_grabber.py
CommitLineData
975060c9
OM
1#!/usr/bin/env python3
2"""
3Thingiverse bulk downloader
4"""
5
6import re
4a98996b 7import sys
975060c9
OM
8import os
9import argparse
10import unicodedata
11import requests
fa2f3251 12import logging
6a777954 13import multiprocessing
7b84ba6d 14import enum
fb28c59b 15import datetime
3c82f75b 16from shutil import copyfile
b497d705 17from dataclasses import dataclass
d194b140 18import atexit
9828dabe 19import py7zr
8ed15058
OM
20import glob
21import shutil
975060c9 22
ae598d73
OM
23SEVENZIP_FILTERS = [{'id': py7zr.FILTER_LZMA2}]
24
8ed15058
OM
25# I don't think this is exported by datetime
26DEFAULT_DATETIME_FORMAT = '%Y-%m-%d %H:%M:%S'
3ac180ed
OM
27# Windows cannot handle : in filenames
28SAFE_DATETIME_FORMAT = '%Y-%m-%d %H.%M.%S'
8ed15058 29
e45ba963
OM
30API_BASE="https://api.thingiverse.com"
31ACCESS_QP="access_token={}"
32PAGE_QP="page={}"
33API_USER_DESIGNS = API_BASE + "/users/{}/things/"
34API_USER_COLLECTIONS = API_BASE + "/users/{}/collections/all?" + ACCESS_QP
975060c9 35
e45ba963
OM
36# Currently useless as it gives the same info as the matching element in API_USER_COLLECTIONS
37API_COLLECTION = API_BASE + "/collections/{}/?" + ACCESS_QP
38API_COLLECTION_THINGS = API_BASE + "/collections/{}/things/?" + ACCESS_QP
39
40API_THING_DETAILS = API_BASE + "/things/{}/?" + ACCESS_QP
41API_THING_FILES = API_BASE + "/things/{}/files/?" + ACCESS_QP
42API_THING_IMAGES = API_BASE + "/things/{}/images/?" + ACCESS_QP
43
44API_KEY = None
dd8c35f4 45
6a777954 46DOWNLOADER_COUNT = 1
7b84ba6d 47RETRY_COUNT = 3
6a777954 48
65bd8b43
OM
49MAX_PATH_LENGTH = 250
50
e45ba963 51VERSION = "0.10.0"
dbdb1782 52
8ed15058 53TIMESTAMP_FILE = "timestamp.txt"
b497d705 54
e45ba963 55SESSION = requests.Session()
b497d705 56
e45ba963
OM
57@dataclass
58class ThingLink:
59 thing_id: str
60 name: str
61 api_link: str
b497d705
OM
62
63@dataclass
64class FileLink:
65 name: str
ae598d73
OM
66 last_update: datetime.datetime
67 link: str
68
e45ba963
OM
69@dataclass
70class ImageLink:
71 name: str
72 link: str
73
ae598d73
OM
74class FileLinks:
75 def __init__(self, initial_links=[]):
76 self.links = []
77 self.last_update = None
78 for link in initial_links:
79 self.append(link)
80
81 def __iter__(self):
82 return iter(self.links)
83
84 def __getitem__(self, item):
85 return self.links[item]
86
87 def __len__(self):
88 return len(self.links)
89
90 def append(self, link):
91 try:
92 self.last_update = max(self.last_update, link.last_update)
93 except TypeError:
94 self.last_update = link.last_update
95 self.links.append(link)
8ed15058 96
b497d705 97
7b84ba6d
OM
98class State(enum.Enum):
99 OK = enum.auto()
100 FAILED = enum.auto()
101 ALREADY_DOWNLOADED = enum.auto()
102
e45ba963
OM
103def sanitise_url(url):
104 """ remove api keys from an url
105 """
106 return re.sub(r'access_token=\w*',
107 'access_token=***',
108 url)
109
110def strip_time(date_obj):
111 """ Takes a datetime object and returns another with the time set to 00:00
112 """
113 return datetime.datetime.combine(date_obj.date(), datetime.time())
114
8ed15058
OM
115def rename_unique(dir_name, target_dir_name):
116 """ Move a directory sideways to a new name, ensuring it is unique.
65bd8b43 117 """
8ed15058 118 target_dir = target_dir_name
65bd8b43
OM
119 inc = 0
120 while os.path.exists(target_dir):
8ed15058 121 target_dir = "{}_{}".format(target_dir_name, inc)
65bd8b43
OM
122 inc += 1
123 os.rename(dir_name, target_dir)
8ed15058
OM
124 return target_dir
125
126
127def fail_dir(dir_name):
128 """ When a download has failed, move it sideways.
129 """
130 return rename_unique(dir_name,"{}_failed".format(dir_name))
65bd8b43
OM
131
132
133def truncate_name(file_name):
134 """ Ensure the filename is not too long for, well windows basically.
135 """
136 path = os.path.abspath(file_name)
137 if len(path) <= MAX_PATH_LENGTH:
138 return path
139 to_cut = len(path) - (MAX_PATH_LENGTH + 3)
140 base, extension = os.path.splitext(path)
141 inc = 0
142 new_path = "{}_{}{}".format(base, inc, extension)
143 while os.path.exists(new_path):
144 new_path = "{}_{}{}".format(base, inc, extension)
145 inc += 1
146 return new_path
147
148
dd8c35f4
OM
149def strip_ws(value):
150 """ Remove whitespace from a string """
151 return str(NO_WHITESPACE_REGEX.sub('-', value))
975060c9 152
dbdb1782 153
975060c9
OM
154def slugify(value):
155 """
d194b140
OM
156 Normalise string, removes invalid for filename charactersr
157 and converts string to lowercase.
975060c9 158 """
e45ba963 159 logging.debug("Sluggyfying {}".format(value))
d194b140 160 value = unicodedata.normalize('NFKC', value).lower().strip()
65bd8b43
OM
161 value = re.sub(r'[\\/<>:\?\*\|"]', '', value)
162 value = re.sub(r'\.*$', '', value)
163 return value
975060c9 164
b497d705 165
6a777954
OM
166class Downloader(multiprocessing.Process):
167 """
168 Class to handle downloading the things we have found to get.
169 """
170
ae598d73 171 def __init__(self, thing_queue, download_directory, compress):
6a777954
OM
172 multiprocessing.Process.__init__(self)
173 # TODO: add parameters
174 self.thing_queue = thing_queue
175 self.download_directory = download_directory
ae598d73 176 self.compress = compress
6a777954
OM
177
178 def run(self):
179 """ actual download loop.
180 """
181 while True:
182 thing_id = self.thing_queue.get()
183 if thing_id is None:
184 logging.info("Shutting download queue")
185 self.thing_queue.task_done()
186 break
187 logging.info("Handling id {}".format(thing_id))
ae598d73 188 Thing(thing_id).download(self.download_directory, self.compress)
6a777954
OM
189 self.thing_queue.task_done()
190 return
191
7b84ba6d 192
6a777954
OM
193
194
dbdb1782 195
3522a3bf 196class Grouping:
d66f1f78 197 """ Holds details of a group of things for download
3c82f75b
OM
198 This is effectively (although not actually) an abstract class
199 - use Collection or Designs instead.
200 """
dbdb1782 201
ae598d73 202 def __init__(self, quick, compress):
975060c9
OM
203 self.things = []
204 self.total = 0
205 self.req_id = None
206 self.last_page = 0
207 self.per_page = None
7b84ba6d
OM
208 # Should we stop downloading when we hit a known datestamp?
209 self.quick = quick
ae598d73 210 self.compress = compress
948bd56f 211 # These should be set by child classes.
3522a3bf
OM
212 self.url = None
213 self.download_dir = None
975060c9 214
3522a3bf
OM
215 def get(self):
216 """ retrieve the things of the grouping. """
975060c9
OM
217 if self.things:
218 # We've already done it.
219 return self.things
220
3522a3bf
OM
221 # Check for initialisation:
222 if not self.url:
fa2f3251 223 logging.error("No URL set - object not initialised properly?")
3522a3bf
OM
224 raise ValueError("No URL set - object not initialised properly?")
225
226 # Get the internal details of the grouping.
e45ba963
OM
227 logging.debug("Querying {}".format(sanitise_url(self.url)))
228 page = 0
229 # TODO:: Must be a way to refactor this cleanly
230 if self.paginated:
231 # Slightly nasty, but afaik python lacks a clean way to do partial string formatting.
232 page_url = self.url + "?" + ACCESS_QP + "&" + PAGE_QP
233 while True:
234 page += 1
235 current_url = page_url.format(API_KEY, page)
236 logging.info("requesting:{}".format(sanitise_url(current_url)))
237 current_req = SESSION.get(current_url)
238 if current_req.status_code != 200:
239 logging.error("Got unexpected code {} from url {}: {}".format(current_req.status_code, sanitise_url(current_url), current_req.text))
240 break
241 current_json = current_req.json()
242 if not current_json:
243 # No more!
244 break
245 for thing in current_json:
246 self.things.append(ThingLink(thing['id'], thing['name'], thing['url']))
247 else:
248 # self.url should already have been formatted as we don't need pagination
249 logging.info("requesting:{}".format(sanitise_url(self.url)))
250 current_req = SESSION.get(self.url)
251 if current_req.status_code != 200:
252 logging.error("Got unexpected code {} from url {}: {}".format(current_req.status_code, sanitise_url(current_url), current_req.text))
253 else:
254 current_json = current_req.json()
255 for thing in current_json:
256 logging.info(thing)
257 self.things.append(ThingLink(thing['id'], thing['name'], thing['url']))
258 logging.info("Found {} things.".format(len(self.things)))
975060c9
OM
259 return self.things
260
261 def download(self):
262 """ Downloads all the files in a collection """
263 if not self.things:
3522a3bf
OM
264 self.get()
265
266 if not self.download_dir:
dbdb1782
OM
267 raise ValueError(
268 "No download_dir set - invalidly initialised object?")
3522a3bf 269
975060c9 270 base_dir = os.getcwd()
975060c9 271 try:
3522a3bf 272 os.mkdir(self.download_dir)
975060c9 273 except FileExistsError:
fa2f3251 274 logging.info("Target directory {} already exists. Assuming a resume."
dbdb1782 275 .format(self.download_dir))
fa2f3251 276 logging.info("Downloading {} thing(s).".format(self.total))
dbdb1782 277 for idx, thing in enumerate(self.things):
fb28c59b 278 logging.info("Downloading thing {} - {}".format(idx, thing))
ae598d73 279 RC = Thing(thing).download(self.download_dir, self.compress)
7b84ba6d
OM
280 if self.quick and RC==State.ALREADY_DOWNLOADED:
281 logging.info("Caught up, stopping.")
282 return
975060c9 283
3522a3bf
OM
284class Collection(Grouping):
285 """ Holds details of a collection. """
dbdb1782 286
ae598d73
OM
287 def __init__(self, user, name, directory, quick, compress):
288 Grouping.__init__(self, quick, compress)
3522a3bf
OM
289 self.user = user
290 self.name = name
e45ba963
OM
291 self.paginated = False
292 # need to figure out the the ID for the collection
293 collection_url = API_USER_COLLECTIONS.format(user, API_KEY)
294 try:
295 current_req = SESSION.get(collection_url)
296 except requests.exceptions.ConnectionError as error:
297 logging.error("Unable to connect for thing {}: {}".format(
298 self.thing_id, error))
299 return
300 if current_req.status_code != 200:
301 logging.error("Got unexpected code {} from url {}: {}".format(current_req.status_code, sanitise_url(collection_url), current_req.text))
302 return
303 collection_list = current_req.json()
304 try:
305 # case insensitive to retain parity with previous behaviour
306 collection = [x for x in collection_list if x['name'].casefold() == name.casefold()][0]
307 except IndexError:
308 logging.error("Unable to find collection {} for user {}".format(name, user))
309 return
310 self.collection_id = collection['id']
311 self.url = API_COLLECTION_THINGS.format(self.collection_id, API_KEY)
312
d66f1f78 313 self.download_dir = os.path.join(directory,
3c82f75b 314 "{}-{}".format(slugify(self.user), slugify(self.name)))
3522a3bf 315
dbdb1782 316
3522a3bf
OM
317class Designs(Grouping):
318 """ Holds details of all of a users' designs. """
dbdb1782 319
ae598d73
OM
320 def __init__(self, user, directory, quick, compress):
321 Grouping.__init__(self, quick, compress)
3522a3bf 322 self.user = user
e45ba963
OM
323 self.url = API_USER_DESIGNS.format(user)
324 self.paginated = True
dbdb1782
OM
325 self.download_dir = os.path.join(
326 directory, "{} designs".format(slugify(self.user)))
975060c9 327
dbdb1782 328
3c82f75b
OM
329class Thing:
330 """ An individual design on thingiverse. """
dbdb1782 331
e45ba963
OM
332 def __init__(self, thing_link):
333 self.thing_id = thing_link.thing_id
334 self.name = thing_link.name
335 self.api_link = thing_link.api_link
3c82f75b
OM
336 self.last_time = None
337 self._parsed = False
338 self._needs_download = True
339 self.text = None
3c82f75b 340 self.download_dir = None
ae598d73
OM
341 self.time_stamp = None
342 self._file_links = FileLinks()
e45ba963 343 self._image_links = []
975060c9 344
3c82f75b
OM
345 def _parse(self, base_dir):
346 """ Work out what, if anything needs to be done. """
347 if self._parsed:
348 return
e36c2a07 349
e45ba963
OM
350
351 # First get the broad details
352 url = API_THING_DETAILS.format(self.thing_id, API_KEY)
e0e69fc6 353 try:
e45ba963 354 current_req = SESSION.get(url)
e0e69fc6 355 except requests.exceptions.ConnectionError as error:
8cdd1b54
OM
356 logging.error("Unable to connect for thing {}: {}".format(
357 self.thing_id, error))
358 return
e45ba963
OM
359 # Check for DMCA
360 if current_req.status_code == 403:
361 logging.error("Access to thing {} is forbidden".format(self.thing_id))
fb28c59b 362 return
e45ba963
OM
363 if current_req.status_code != 200:
364 logging.error("Got unexpected code {} from url {}: {}".format(current_req.status_code, sanitise_url(url), current_req.text))
365 return
366
367 thing_json = current_req.json()
368 try:
369 self._license = thing_json['license']
370 except KeyError:
371 logging.warning("No license found for thing {}?".format(self.thing_id))
372
373 # TODO: Get non-html version of this?
374 try:
375 self._details = thing_json['details']
376 except KeyError:
377 logging.warning("No description found for thing {}?".format(self.thing_id))
e0e69fc6 378
e45ba963
OM
379
380
381 # Now get the file details
382 file_url = API_THING_FILES.format(self.thing_id, API_KEY)
383
384 try:
385 current_req = SESSION.get(file_url)
386 except requests.exceptions.ConnectionError as error:
387 logging.error("Unable to connect for thing {}: {}".format(
388 self.thing_id, error))
389 return
390
391 if current_req.status_code != 200:
392 logging.error("Unexpected status code {} for {}: {}".format(current_req.status_code, sanitise_url(file_url), current_req.text))
393 return
394
395 link_list = current_req.json()
396
397 if not link_list:
247c2cd5 398 logging.error("No files found for thing {} - probably thingiverse being broken, try again later".format(self.thing_id))
e45ba963
OM
399
400 for link in link_list:
401 logging.debug("Parsing link: {}".format(sanitise_url(link['url'])))
fb28c59b 402 try:
e45ba963
OM
403 datestamp = datetime.datetime.strptime(link['date'], DEFAULT_DATETIME_FORMAT)
404 self._file_links.append(FileLink(link['name'], datestamp, link['url']))
fb28c59b 405 except ValueError:
e45ba963
OM
406 logging.error(link['date'])
407
408 # Finally get the image links
409 image_url = API_THING_IMAGES.format(self.thing_id, API_KEY)
410
411 try:
412 current_req = SESSION.get(image_url)
413 except requests.exceptions.ConnectionError as error:
414 logging.error("Unable to connect for thing {}: {}".format(
415 self.thing_id, error))
416 return
417
418 if current_req.status_code != 200:
419 logging.error("Unexpected status code {} for {}: {}".format(current_req.status_code, sanitise_url(image_url), current_req.text))
420 return
fb28c59b 421
e45ba963 422 image_list = current_req.json()
e0e69fc6 423
e45ba963
OM
424 if not image_list:
425 logging.warning("No images found for thing {} - probably thingiverse being iffy as this seems unlikely".format(self.thing_id))
e0e69fc6 426
e45ba963
OM
427 for image in image_list:
428 logging.debug("parsing image: {}".format(image))
429 try:
430 name = slugify(image['name'])
431 # TODO: fallback to other types
432 url = [x for x in image['sizes'] if x['type']=='display' and x['size']=='large'][0]['url']
433 except KeyError:
434 logging.warning("Missing image for {}".format(name))
435 self._image_links.append(ImageLink(name, url))
436
437 self.slug = "{} - {}".format(self.thing_id, slugify(self.name))
8ed15058
OM
438 self.download_dir = os.path.join(base_dir, self.slug)
439
440 self._handle_old_directory(base_dir)
3c82f75b 441
e45ba963 442 logging.debug("Parsing {} ({})".format(self.thing_id, self.name))
8ed15058 443 latest, self.last_time = self._find_last_download(base_dir)
fa2f3251 444
8ed15058 445 if not latest:
3b497b1a
M
446 # Not yet downloaded
447 self._parsed = True
448 return
3c82f75b 449
3c82f75b 450
8ed15058 451 logging.info("last downloaded version: {}".format(self.last_time))
3c82f75b
OM
452
453 # OK, so we have a timestamp, lets see if there is anything new to get
e45ba963
OM
454 # First off, are we comparing an old download that threw away the timestamp?
455 ignore_time = self.last_time == strip_time(self.last_time)
ae598d73 456 try:
e45ba963
OM
457 # TODO: Allow for comparison at the exact time
458 files_last_update = self._file_links.last_update
459 if ignore_time:
460 logging.info("Dropping time from comparison stamp as old-style download dir")
461 files_last_update = strip_time(files_last_update)
462
463
464 if files_last_update > self.last_time:
dbdb1782 465 logging.info(
ae598d73 466 "Found new/updated files {}".format(self._file_links.last_update))
3c82f75b
OM
467 self._needs_download = True
468 self._parsed = True
469 return
ae598d73
OM
470 except TypeError:
471 logging.warning("No files found for {}.".format(self.thing_id))
b497d705 472
3c82f75b 473 # Got here, so nope, no new files.
3c82f75b
OM
474 self._needs_download = False
475 self._parsed = True
476
8ed15058
OM
477 def _handle_old_directory(self, base_dir):
478 """ Deal with any old directories from previous versions of the code.
479 """
e45ba963 480 old_dir = os.path.join(base_dir, slugify(self.name))
8ed15058
OM
481 if os.path.exists(old_dir):
482 logging.warning("Found old style download_dir. Moving.")
483 rename_unique(old_dir, self.download_dir)
484
485 def _handle_outdated_directory(self, base_dir):
486 """ Move the current download directory sideways if the thing has changed.
487 """
488 if not os.path.exists(self.download_dir):
489 # No old directory to move.
490 return None
491 timestamp_file = os.path.join(self.download_dir, TIMESTAMP_FILE)
492 if not os.path.exists(timestamp_file):
493 # Old form of download directory
494 target_dir_name = "{} - old".format(self.download_dir)
495 else:
3ac180ed 496 target_dir_name = "{} - {}".format(self.download_dir, self.last_time.strftime(SAFE_DATETIME_FORMAT))
8ed15058
OM
497 return rename_unique(self.download_dir, target_dir_name)
498
499 def _find_last_download(self, base_dir):
500 """ Look for the most recent previous download (if any) of the thing.
501 """
502 logging.info("Looking for old things")
503
504 # First the DL directory itself.
505 timestamp_file = os.path.join(self.download_dir, TIMESTAMP_FILE)
506
507 latest = None
508 latest_time = None
509
510 try:
511 logging.debug("Checking for existing download in normal place.")
512 with open(timestamp_file) as ts_fh:
513 timestamp_text = ts_fh.read().strip()
514 latest_time = datetime.datetime.strptime(timestamp_text, DEFAULT_DATETIME_FORMAT)
515 latest = self.download_dir
516 except FileNotFoundError:
517 # No existing download directory. huh.
518 pass
519 except TypeError:
520 logging.warning("Invalid timestamp file found in {}".format(self.download_dir))
521
522 # TODO: Maybe look for old download directories.
523
524
525 # Now look for 7z files
526 candidates = glob.glob(os.path.join(base_dir, "{}*.7z".format(self.thing_id)))
527 # +3 to allow for ' - '
528 leading_length =len(self.slug)+3
529 for path in candidates:
530 candidate = os.path.basename(path)
531 try:
532 logging.debug("Examining '{}' - '{}'".format(candidate, candidate[leading_length:-3]))
3ac180ed 533 candidate_time = datetime.datetime.strptime(candidate[leading_length:-3], SAFE_DATETIME_FORMAT)
8ed15058
OM
534 except ValueError:
535 logging.warning("There was an error finding the date in {}. Ignoring.".format(candidate))
536 continue
537 try:
538 if candidate_time > latest_time:
539 latest_time = candidate_time
540 latest = candidate
541 except TypeError:
542 latest_time = candidate_time
543 latest = candidate
544 logging.info("Found last old thing: {} / {}".format(latest,latest_time))
545 return (latest, latest_time)
546
547
548
ae598d73 549 def download(self, base_dir, compress):
7b84ba6d
OM
550 """ Download all files for a given thing.
551 Returns True iff the thing is now downloaded (not iff it downloads the thing!)
552 """
3c82f75b
OM
553 if not self._parsed:
554 self._parse(base_dir)
555
e0e69fc6 556 if not self._parsed:
8cdd1b54
OM
557 logging.error(
558 "Unable to parse {} - aborting download".format(self.thing_id))
7b84ba6d 559 return State.FAILED
e0e69fc6 560
3c82f75b 561 if not self._needs_download:
e45ba963 562 logging.info("{} - {} already downloaded - skipping.".format(self.thing_id, self.name))
7b84ba6d 563 return State.ALREADY_DOWNLOADED
3c82f75b 564
247c2cd5 565 if not self._file_links:
e45ba963 566 logging.error("{} - {} appears to have no files. Thingiverse acting up again?".format(self.thing_id, self.name))
247c2cd5
OM
567 return State.FAILED
568
3c82f75b 569 # Have we already downloaded some things?
8ed15058 570 renamed_dir = self._handle_outdated_directory(base_dir)
3c82f75b
OM
571
572 # Get the list of files to download
3c82f75b
OM
573
574 new_file_links = []
575 old_file_links = []
ae598d73 576 self.time_stamp = None
3c82f75b
OM
577
578 if not self.last_time:
579 # If we don't have anything to copy from, then it is all new.
b497d705
OM
580 logging.debug("No last time, downloading all files")
581 new_file_links = self._file_links
ae598d73 582 self.time_stamp = new_file_links[0].last_update
b497d705
OM
583
584 for file_link in new_file_links:
ae598d73
OM
585 self.time_stamp = max(self.time_stamp, file_link.last_update)
586 logging.debug("New timestamp will be {}".format(self.time_stamp))
3c82f75b 587 else:
ae598d73 588 self.time_stamp = self.last_time
b497d705
OM
589 for file_link in self._file_links:
590 if file_link.last_update > self.last_time:
3c82f75b 591 new_file_links.append(file_link)
ae598d73 592 self.time_stamp = max(self.time_stamp, file_link.last_update)
3c82f75b
OM
593 else:
594 old_file_links.append(file_link)
3c82f75b 595
ae598d73 596 logging.debug("new timestamp {}".format(self.time_stamp))
3c82f75b
OM
597
598 # OK. Time to get to work.
fa2f3251 599 logging.debug("Generating download_dir")
3c82f75b 600 os.mkdir(self.download_dir)
b497d705 601 filelist_file = os.path.join(self.download_dir, "filelist.txt")
e45ba963 602 url_suffix = "/?" + ACCESS_QP.format(API_KEY)
d194b140 603 with open(filelist_file, 'w', encoding="utf-8") as fl_handle:
b497d705 604 for fl in self._file_links:
e45ba963 605 fl_handle.write("{},{},{}\n".format(fl.link, fl.name, fl.last_update))
b497d705
OM
606
607
3c82f75b 608 # First grab the cached files (if any)
fa2f3251 609 logging.info("Copying {} unchanged files.".format(len(old_file_links)))
3c82f75b 610 for file_link in old_file_links:
8ed15058 611 old_file = os.path.join(renamed_dir, file_link.name)
65bd8b43 612 new_file = truncate_name(os.path.join(self.download_dir, file_link.name))
3c82f75b 613 try:
fa2f3251 614 logging.debug("Copying {} to {}".format(old_file, new_file))
3c82f75b
OM
615 copyfile(old_file, new_file)
616 except FileNotFoundError:
dbdb1782
OM
617 logging.warning(
618 "Unable to find {} in old archive, redownloading".format(file_link["title"]))
3c82f75b
OM
619 new_file_links.append(file_link)
620
621 # Now download the new ones
dbdb1782 622 logging.info("Downloading {} new files of {}".format(
b497d705 623 len(new_file_links), len(self._file_links)))
3c82f75b 624 try:
b497d705 625 for file_link in new_file_links:
65bd8b43 626 file_name = truncate_name(os.path.join(self.download_dir, file_link.name))
dbdb1782 627 logging.debug("Downloading {} from {} to {}".format(
b497d705 628 file_link.name, file_link.link, file_name))
e45ba963
OM
629 data_req = SESSION.get(file_link.link + url_suffix)
630 if data_req.status_code != 200:
631 logging.error("Unexpected status code {} for {}: {}".format(data_req.status_code, sanitise_url(file_link.link), data_req.text))
632 fail_dir(self.download_dir)
633 return State.FAILED
634
635
3c82f75b
OM
636 with open(file_name, 'wb') as handle:
637 handle.write(data_req.content)
638 except Exception as exception:
b497d705 639 logging.error("Failed to download {} - {}".format(file_link.name, exception))
65bd8b43 640 fail_dir(self.download_dir)
7b84ba6d 641 return State.FAILED
3c82f75b 642
b497d705 643
e45ba963 644 # People like images.
680039fe 645 image_dir = os.path.join(self.download_dir, 'images')
fb28c59b 646 logging.info("Downloading {} images.".format(len(self._image_links)))
680039fe
OM
647 try:
648 os.mkdir(image_dir)
fb28c59b 649 for imagelink in self._image_links:
e45ba963
OM
650 filename = os.path.join(image_dir, imagelink.name)
651 image_req = SESSION.get(imagelink.link)
652 if image_req.status_code != 200:
653 logging.error("Unexpected status code {} for {}: {}".format(image_req.status_code, sanitise_url(file_link.link), image_req.text))
654 fail_dir(self.download_dir)
655 return State.FAILED
656 with open(truncate_name(filename), 'wb') as handle:
680039fe
OM
657 handle.write(image_req.content)
658 except Exception as exception:
e45ba963 659 logging.error("Failed to download {} - {}".format(imagelink.name, exception))
65bd8b43 660 fail_dir(self.download_dir)
7b84ba6d 661 return State.FAILED
680039fe 662
4f75dd69 663 # Best get some licenses
e45ba963 664 logging.info("writing license file")
4f75dd69 665 try:
fb28c59b 666 if self._license:
65bd8b43 667 with open(truncate_name(os.path.join(self.download_dir, 'license.txt')), 'w', encoding="utf-8") as license_handle:
fb28c59b 668 license_handle.write("{}\n".format(self._license))
4f75dd69
OM
669 except IOError as exception:
670 logging.warning("Failed to write license! {}".format(exception))
fb28c59b 671
e45ba963
OM
672 logging.info("writing readme")
673 try:
674 if self._details:
675 with open(truncate_name(os.path.join(self.download_dir, 'readme.txt')), 'w', encoding="utf-8") as readme_handle:
676 readme_handle.write("{}\n".format(self._details))
677 except IOError as exception:
678 logging.warning("Failed to write readme! {}".format(exception))
679
3c82f75b
OM
680 try:
681 # Now write the timestamp
8ed15058 682 with open(os.path.join(self.download_dir,TIMESTAMP_FILE), 'w', encoding="utf-8") as timestamp_handle:
ae598d73 683 timestamp_handle.write(self.time_stamp.__str__())
3c82f75b 684 except Exception as exception:
e45ba963 685 logging.error("Failed to write timestamp file - {}".format(exception))
65bd8b43 686 fail_dir(self.download_dir)
7b84ba6d 687 return State.FAILED
3c82f75b 688 self._needs_download = False
e45ba963 689 logging.debug("Download of {} finished".format(self.name))
ae598d73
OM
690 if not compress:
691 return State.OK
692
693
694 thing_dir = "{} - {} - {}".format(self.thing_id,
e45ba963 695 slugify(self.name),
3ac180ed 696 self.time_stamp.strftime(SAFE_DATETIME_FORMAT))
ae598d73
OM
697 file_name = os.path.join(base_dir,
698 "{}.7z".format(thing_dir))
699 logging.debug("Compressing {} to {}".format(
e45ba963 700 self.name,
ae598d73 701 file_name))
ae598d73 702 with py7zr.SevenZipFile(file_name, 'w', filters=SEVENZIP_FILTERS) as archive:
ae598d73 703 archive.writeall(self.download_dir, thing_dir)
e45ba963 704 logging.debug("Compression of {} finished.".format(self.name))
8ed15058 705 shutil.rmtree(self.download_dir)
e45ba963 706 logging.debug("Removed temporary download dir of {}.".format(self.name))
7b84ba6d 707 return State.OK
975060c9 708
dbdb1782 709
ae598d73
OM
710
711
712def do_batch(batch_file, download_dir, quick, compress):
1ab49020
OM
713 """ Read a file in line by line, parsing each as a set of calls to this script."""
714 with open(batch_file) as handle:
715 for line in handle:
716 line = line.strip()
cf280385
M
717 if not line:
718 # Skip empty lines
719 continue
1ab49020
OM
720 logging.info("Handling instruction {}".format(line))
721 command_arr = line.split()
722 if command_arr[0] == "thing":
dbdb1782
OM
723 logging.debug(
724 "Handling batch thing instruction: {}".format(line))
ae598d73 725 Thing(command_arr[1]).download(download_dir, compress)
1ab49020
OM
726 continue
727 if command_arr[0] == "collection":
dbdb1782
OM
728 logging.debug(
729 "Handling batch collection instruction: {}".format(line))
730 Collection(command_arr[1], command_arr[2],
ae598d73 731 download_dir, quick, compress).download()
1ab49020
OM
732 continue
733 if command_arr[0] == "user":
dbdb1782
OM
734 logging.debug(
735 "Handling batch collection instruction: {}".format(line))
ae598d73 736 Designs(command_arr[1], download_dir, quick, compress).download()
1ab49020
OM
737 continue
738 logging.warning("Unable to parse current instruction. Skipping.")
739
dbdb1782 740
975060c9
OM
741def main():
742 """ Entry point for script being run as a command. """
743 parser = argparse.ArgumentParser()
dbdb1782
OM
744 parser.add_argument("-l", "--log-level", choices=[
745 'debug', 'info', 'warning'], default='info', help="level of logging desired")
746 parser.add_argument("-d", "--directory",
747 help="Target directory to download into")
4f94efc8
OM
748 parser.add_argument("-f", "--log-file",
749 help="Place to log debug information to")
7b84ba6d
OM
750 parser.add_argument("-q", "--quick", action="store_true",
751 help="Assume date ordering on posts")
ae598d73
OM
752 parser.add_argument("-c", "--compress", action="store_true",
753 help="Compress files")
e45ba963
OM
754 parser.add_argument("-a", "--api-key",
755 help="API key for thingiverse")
756
7b84ba6d 757
dbdb1782
OM
758 subparsers = parser.add_subparsers(
759 help="Type of thing to download", dest="subcommand")
760 collection_parser = subparsers.add_parser(
b7bfef68 761 'collection', help="Download one or more entire collection(s)")
dbdb1782 762 collection_parser.add_argument(
b7bfef68 763 "owner", help="The owner of the collection(s) to get")
dbdb1782 764 collection_parser.add_argument(
b7bfef68 765 "collections", nargs="+", help="Space seperated list of the name(s) of collection to get")
dbdb1782
OM
766 thing_parser = subparsers.add_parser(
767 'thing', help="Download a single thing.")
8cdd1b54
OM
768 thing_parser.add_argument(
769 "things", nargs="*", help="Space seperated list of thing ID(s) to download")
dbdb1782 770 user_parser = subparsers.add_parser(
b7bfef68 771 "user", help="Download all things by one or more users")
8cdd1b54
OM
772 user_parser.add_argument(
773 "users", nargs="+", help="A space seperated list of the user(s) to get the designs of")
dbdb1782
OM
774 batch_parser = subparsers.add_parser(
775 "batch", help="Perform multiple actions written in a text file")
776 batch_parser.add_argument(
777 "batch_file", help="The name of the file to read.")
680039fe 778 subparsers.add_parser("version", help="Show the current version")
4a98996b 779
975060c9 780 args = parser.parse_args()
4a98996b
OM
781 if not args.subcommand:
782 parser.print_help()
783 sys.exit(1)
d66f1f78
OM
784 if not args.directory:
785 args.directory = os.getcwd()
4f94efc8
OM
786
787 logger = logging.getLogger()
788 formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
789 logger.setLevel(logging.DEBUG)
790 console_handler = logging.StreamHandler()
791 console_handler.setLevel(args.log_level.upper())
792
e45ba963
OM
793 global API_KEY
794 if args.api_key:
795 API_KEY=args.api_key
796 else:
797 try:
798 with open("api.key") as fh:
799 API_KEY=fh.read().strip()
800 except Exception as e:
801 logging.error("Either specify the api-key on the command line or in a file called 'api.key'")
802 logging.error("Exception: {}".format(e))
803 return
804
4f94efc8
OM
805 logger.addHandler(console_handler)
806 if args.log_file:
807 file_handler = logging.FileHandler(args.log_file)
808 file_handler.setLevel(logging.DEBUG)
809 file_handler.setFormatter(formatter)
810 logger.addHandler(file_handler)
fa2f3251 811
6a777954
OM
812
813 # Start downloader
814 thing_queue = multiprocessing.JoinableQueue()
815 logging.debug("starting {} downloader(s)".format(DOWNLOADER_COUNT))
ae598d73 816 downloaders = [Downloader(thing_queue, args.directory, args.compress) for _ in range(DOWNLOADER_COUNT)]
6a777954
OM
817 for downloader in downloaders:
818 downloader.start()
819
820
4a98996b 821 if args.subcommand.startswith("collection"):
b7bfef68 822 for collection in args.collections:
ae598d73 823 Collection(args.owner, collection, args.directory, args.quick, args.compress).download()
4a98996b 824 if args.subcommand == "thing":
b7bfef68 825 for thing in args.things:
6a777954 826 thing_queue.put(thing)
3522a3bf 827 if args.subcommand == "user":
b7bfef68 828 for user in args.users:
ae598d73 829 Designs(user, args.directory, args.quick, args.compress).download()
db8066ec
OM
830 if args.subcommand == "version":
831 print("thingy_grabber.py version {}".format(VERSION))
1ab49020 832 if args.subcommand == "batch":
ae598d73 833 do_batch(args.batch_file, args.directory, args.quick, args.compress)
1ab49020 834
6a777954
OM
835 # Stop the downloader processes
836 for downloader in downloaders:
837 thing_queue.put(None)
975060c9 838
d194b140 839
0930777e
OM
840if __name__ == "__main__":
841 multiprocessing.freeze_support()
975060c9 842 main()