Update README.md
[clinton/thingy_grabber.git] / thingy_grabber.py
CommitLineData
975060c9
OM
1#!/usr/bin/env python3
2"""
3Thingiverse bulk downloader
4"""
5
6import re
4a98996b 7import sys
975060c9
OM
8import os
9import argparse
10import unicodedata
11import requests
fa2f3251 12import logging
6a777954 13import multiprocessing
7b84ba6d 14import enum
fb28c59b 15import datetime
3c82f75b 16from shutil import copyfile
b497d705 17from dataclasses import dataclass
d194b140 18import atexit
9828dabe 19import py7zr
8ed15058
OM
20import glob
21import shutil
975060c9 22
ae598d73
OM
23SEVENZIP_FILTERS = [{'id': py7zr.FILTER_LZMA2}]
24
8ed15058
OM
25# I don't think this is exported by datetime
26DEFAULT_DATETIME_FORMAT = '%Y-%m-%d %H:%M:%S'
3ac180ed
OM
27# Windows cannot handle : in filenames
28SAFE_DATETIME_FORMAT = '%Y-%m-%d %H.%M.%S'
8ed15058 29
e45ba963
OM
30API_BASE="https://api.thingiverse.com"
31ACCESS_QP="access_token={}"
32PAGE_QP="page={}"
33API_USER_DESIGNS = API_BASE + "/users/{}/things/"
34API_USER_COLLECTIONS = API_BASE + "/users/{}/collections/all?" + ACCESS_QP
975060c9 35
e45ba963
OM
36# Currently useless as it gives the same info as the matching element in API_USER_COLLECTIONS
37API_COLLECTION = API_BASE + "/collections/{}/?" + ACCESS_QP
38API_COLLECTION_THINGS = API_BASE + "/collections/{}/things/?" + ACCESS_QP
39
40API_THING_DETAILS = API_BASE + "/things/{}/?" + ACCESS_QP
41API_THING_FILES = API_BASE + "/things/{}/files/?" + ACCESS_QP
42API_THING_IMAGES = API_BASE + "/things/{}/images/?" + ACCESS_QP
10f0238d 43API_THING_DOWNLOAD = "/download/?" + ACCESS_QP
e45ba963
OM
44
45API_KEY = None
dd8c35f4 46
6a777954 47DOWNLOADER_COUNT = 1
7b84ba6d 48RETRY_COUNT = 3
6a777954 49
65bd8b43
OM
50MAX_PATH_LENGTH = 250
51
10f0238d 52VERSION = "0.10.2"
dbdb1782 53
8ed15058 54TIMESTAMP_FILE = "timestamp.txt"
b497d705 55
e45ba963 56SESSION = requests.Session()
b497d705 57
e45ba963
OM
58@dataclass
59class ThingLink:
60 thing_id: str
61 name: str
62 api_link: str
b497d705
OM
63
64@dataclass
65class FileLink:
66 name: str
ae598d73
OM
67 last_update: datetime.datetime
68 link: str
69
e45ba963
OM
70@dataclass
71class ImageLink:
72 name: str
73 link: str
74
ae598d73
OM
75class FileLinks:
76 def __init__(self, initial_links=[]):
77 self.links = []
78 self.last_update = None
79 for link in initial_links:
80 self.append(link)
81
82 def __iter__(self):
83 return iter(self.links)
84
85 def __getitem__(self, item):
86 return self.links[item]
87
88 def __len__(self):
89 return len(self.links)
90
91 def append(self, link):
92 try:
93 self.last_update = max(self.last_update, link.last_update)
94 except TypeError:
95 self.last_update = link.last_update
96 self.links.append(link)
8ed15058 97
b497d705 98
7b84ba6d
OM
99class State(enum.Enum):
100 OK = enum.auto()
101 FAILED = enum.auto()
102 ALREADY_DOWNLOADED = enum.auto()
103
e45ba963
OM
104def sanitise_url(url):
105 """ remove api keys from an url
106 """
107 return re.sub(r'access_token=\w*',
108 'access_token=***',
109 url)
110
111def strip_time(date_obj):
112 """ Takes a datetime object and returns another with the time set to 00:00
113 """
114 return datetime.datetime.combine(date_obj.date(), datetime.time())
115
8ed15058
OM
116def rename_unique(dir_name, target_dir_name):
117 """ Move a directory sideways to a new name, ensuring it is unique.
65bd8b43 118 """
8ed15058 119 target_dir = target_dir_name
65bd8b43
OM
120 inc = 0
121 while os.path.exists(target_dir):
8ed15058 122 target_dir = "{}_{}".format(target_dir_name, inc)
65bd8b43
OM
123 inc += 1
124 os.rename(dir_name, target_dir)
8ed15058
OM
125 return target_dir
126
127
128def fail_dir(dir_name):
129 """ When a download has failed, move it sideways.
130 """
131 return rename_unique(dir_name,"{}_failed".format(dir_name))
65bd8b43
OM
132
133
134def truncate_name(file_name):
135 """ Ensure the filename is not too long for, well windows basically.
136 """
137 path = os.path.abspath(file_name)
138 if len(path) <= MAX_PATH_LENGTH:
139 return path
140 to_cut = len(path) - (MAX_PATH_LENGTH + 3)
141 base, extension = os.path.splitext(path)
142 inc = 0
143 new_path = "{}_{}{}".format(base, inc, extension)
144 while os.path.exists(new_path):
145 new_path = "{}_{}{}".format(base, inc, extension)
146 inc += 1
147 return new_path
148
149
dd8c35f4
OM
150def strip_ws(value):
151 """ Remove whitespace from a string """
152 return str(NO_WHITESPACE_REGEX.sub('-', value))
975060c9 153
dbdb1782 154
975060c9
OM
155def slugify(value):
156 """
d194b140
OM
157 Normalise string, removes invalid for filename charactersr
158 and converts string to lowercase.
975060c9 159 """
e45ba963 160 logging.debug("Sluggyfying {}".format(value))
d194b140 161 value = unicodedata.normalize('NFKC', value).lower().strip()
65bd8b43
OM
162 value = re.sub(r'[\\/<>:\?\*\|"]', '', value)
163 value = re.sub(r'\.*$', '', value)
164 return value
975060c9 165
b497d705 166
6a777954
OM
167class Downloader(multiprocessing.Process):
168 """
169 Class to handle downloading the things we have found to get.
170 """
171
ae598d73 172 def __init__(self, thing_queue, download_directory, compress):
6a777954
OM
173 multiprocessing.Process.__init__(self)
174 # TODO: add parameters
175 self.thing_queue = thing_queue
176 self.download_directory = download_directory
ae598d73 177 self.compress = compress
6a777954
OM
178
179 def run(self):
180 """ actual download loop.
181 """
182 while True:
183 thing_id = self.thing_queue.get()
184 if thing_id is None:
185 logging.info("Shutting download queue")
186 self.thing_queue.task_done()
187 break
188 logging.info("Handling id {}".format(thing_id))
ae598d73 189 Thing(thing_id).download(self.download_directory, self.compress)
6a777954
OM
190 self.thing_queue.task_done()
191 return
192
7b84ba6d 193
6a777954
OM
194
195
dbdb1782 196
3522a3bf 197class Grouping:
d66f1f78 198 """ Holds details of a group of things for download
3c82f75b
OM
199 This is effectively (although not actually) an abstract class
200 - use Collection or Designs instead.
201 """
dbdb1782 202
ae598d73 203 def __init__(self, quick, compress):
975060c9
OM
204 self.things = []
205 self.total = 0
206 self.req_id = None
207 self.last_page = 0
208 self.per_page = None
7b84ba6d
OM
209 # Should we stop downloading when we hit a known datestamp?
210 self.quick = quick
ae598d73 211 self.compress = compress
948bd56f 212 # These should be set by child classes.
3522a3bf
OM
213 self.url = None
214 self.download_dir = None
975060c9 215
3522a3bf
OM
216 def get(self):
217 """ retrieve the things of the grouping. """
975060c9
OM
218 if self.things:
219 # We've already done it.
220 return self.things
221
3522a3bf
OM
222 # Check for initialisation:
223 if not self.url:
fa2f3251 224 logging.error("No URL set - object not initialised properly?")
3522a3bf
OM
225 raise ValueError("No URL set - object not initialised properly?")
226
227 # Get the internal details of the grouping.
e45ba963
OM
228 logging.debug("Querying {}".format(sanitise_url(self.url)))
229 page = 0
230 # TODO:: Must be a way to refactor this cleanly
231 if self.paginated:
232 # Slightly nasty, but afaik python lacks a clean way to do partial string formatting.
233 page_url = self.url + "?" + ACCESS_QP + "&" + PAGE_QP
234 while True:
235 page += 1
236 current_url = page_url.format(API_KEY, page)
237 logging.info("requesting:{}".format(sanitise_url(current_url)))
238 current_req = SESSION.get(current_url)
239 if current_req.status_code != 200:
240 logging.error("Got unexpected code {} from url {}: {}".format(current_req.status_code, sanitise_url(current_url), current_req.text))
241 break
242 current_json = current_req.json()
243 if not current_json:
244 # No more!
245 break
246 for thing in current_json:
247 self.things.append(ThingLink(thing['id'], thing['name'], thing['url']))
248 else:
249 # self.url should already have been formatted as we don't need pagination
250 logging.info("requesting:{}".format(sanitise_url(self.url)))
251 current_req = SESSION.get(self.url)
252 if current_req.status_code != 200:
253 logging.error("Got unexpected code {} from url {}: {}".format(current_req.status_code, sanitise_url(current_url), current_req.text))
254 else:
255 current_json = current_req.json()
256 for thing in current_json:
257 logging.info(thing)
258 self.things.append(ThingLink(thing['id'], thing['name'], thing['url']))
259 logging.info("Found {} things.".format(len(self.things)))
975060c9
OM
260 return self.things
261
262 def download(self):
263 """ Downloads all the files in a collection """
264 if not self.things:
3522a3bf
OM
265 self.get()
266
267 if not self.download_dir:
dbdb1782
OM
268 raise ValueError(
269 "No download_dir set - invalidly initialised object?")
3522a3bf 270
975060c9 271 base_dir = os.getcwd()
975060c9 272 try:
3522a3bf 273 os.mkdir(self.download_dir)
975060c9 274 except FileExistsError:
fa2f3251 275 logging.info("Target directory {} already exists. Assuming a resume."
dbdb1782 276 .format(self.download_dir))
fa2f3251 277 logging.info("Downloading {} thing(s).".format(self.total))
dbdb1782 278 for idx, thing in enumerate(self.things):
fb28c59b 279 logging.info("Downloading thing {} - {}".format(idx, thing))
ae598d73 280 RC = Thing(thing).download(self.download_dir, self.compress)
7b84ba6d
OM
281 if self.quick and RC==State.ALREADY_DOWNLOADED:
282 logging.info("Caught up, stopping.")
283 return
975060c9 284
3522a3bf
OM
285class Collection(Grouping):
286 """ Holds details of a collection. """
dbdb1782 287
ae598d73
OM
288 def __init__(self, user, name, directory, quick, compress):
289 Grouping.__init__(self, quick, compress)
3522a3bf
OM
290 self.user = user
291 self.name = name
e45ba963
OM
292 self.paginated = False
293 # need to figure out the the ID for the collection
294 collection_url = API_USER_COLLECTIONS.format(user, API_KEY)
295 try:
296 current_req = SESSION.get(collection_url)
297 except requests.exceptions.ConnectionError as error:
298 logging.error("Unable to connect for thing {}: {}".format(
299 self.thing_id, error))
300 return
301 if current_req.status_code != 200:
302 logging.error("Got unexpected code {} from url {}: {}".format(current_req.status_code, sanitise_url(collection_url), current_req.text))
303 return
304 collection_list = current_req.json()
305 try:
306 # case insensitive to retain parity with previous behaviour
307 collection = [x for x in collection_list if x['name'].casefold() == name.casefold()][0]
308 except IndexError:
309 logging.error("Unable to find collection {} for user {}".format(name, user))
310 return
311 self.collection_id = collection['id']
312 self.url = API_COLLECTION_THINGS.format(self.collection_id, API_KEY)
313
d66f1f78 314 self.download_dir = os.path.join(directory,
3c82f75b 315 "{}-{}".format(slugify(self.user), slugify(self.name)))
3522a3bf 316
dbdb1782 317
3522a3bf
OM
318class Designs(Grouping):
319 """ Holds details of all of a users' designs. """
dbdb1782 320
ae598d73
OM
321 def __init__(self, user, directory, quick, compress):
322 Grouping.__init__(self, quick, compress)
3522a3bf 323 self.user = user
e45ba963
OM
324 self.url = API_USER_DESIGNS.format(user)
325 self.paginated = True
dbdb1782
OM
326 self.download_dir = os.path.join(
327 directory, "{} designs".format(slugify(self.user)))
975060c9 328
dbdb1782 329
3c82f75b
OM
330class Thing:
331 """ An individual design on thingiverse. """
dbdb1782 332
e45ba963
OM
333 def __init__(self, thing_link):
334 self.thing_id = thing_link.thing_id
335 self.name = thing_link.name
336 self.api_link = thing_link.api_link
3c82f75b
OM
337 self.last_time = None
338 self._parsed = False
339 self._needs_download = True
340 self.text = None
3c82f75b 341 self.download_dir = None
ae598d73
OM
342 self.time_stamp = None
343 self._file_links = FileLinks()
e45ba963 344 self._image_links = []
975060c9 345
3c82f75b
OM
346 def _parse(self, base_dir):
347 """ Work out what, if anything needs to be done. """
348 if self._parsed:
349 return
e36c2a07 350
e45ba963
OM
351
352 # First get the broad details
353 url = API_THING_DETAILS.format(self.thing_id, API_KEY)
e0e69fc6 354 try:
e45ba963 355 current_req = SESSION.get(url)
e0e69fc6 356 except requests.exceptions.ConnectionError as error:
8cdd1b54
OM
357 logging.error("Unable to connect for thing {}: {}".format(
358 self.thing_id, error))
359 return
e45ba963
OM
360 # Check for DMCA
361 if current_req.status_code == 403:
362 logging.error("Access to thing {} is forbidden".format(self.thing_id))
fb28c59b 363 return
e45ba963
OM
364 if current_req.status_code != 200:
365 logging.error("Got unexpected code {} from url {}: {}".format(current_req.status_code, sanitise_url(url), current_req.text))
366 return
367
368 thing_json = current_req.json()
369 try:
370 self._license = thing_json['license']
371 except KeyError:
372 logging.warning("No license found for thing {}?".format(self.thing_id))
373
374 # TODO: Get non-html version of this?
375 try:
376 self._details = thing_json['details']
377 except KeyError:
378 logging.warning("No description found for thing {}?".format(self.thing_id))
e0e69fc6 379
e45ba963
OM
380
381
382 # Now get the file details
383 file_url = API_THING_FILES.format(self.thing_id, API_KEY)
384
385 try:
386 current_req = SESSION.get(file_url)
387 except requests.exceptions.ConnectionError as error:
388 logging.error("Unable to connect for thing {}: {}".format(
389 self.thing_id, error))
390 return
391
392 if current_req.status_code != 200:
393 logging.error("Unexpected status code {} for {}: {}".format(current_req.status_code, sanitise_url(file_url), current_req.text))
394 return
395
396 link_list = current_req.json()
397
398 if not link_list:
247c2cd5 399 logging.error("No files found for thing {} - probably thingiverse being broken, try again later".format(self.thing_id))
e45ba963
OM
400
401 for link in link_list:
402 logging.debug("Parsing link: {}".format(sanitise_url(link['url'])))
fb28c59b 403 try:
e45ba963 404 datestamp = datetime.datetime.strptime(link['date'], DEFAULT_DATETIME_FORMAT)
10f0238d 405 self._file_links.append(FileLink(link['name'], datestamp, link['url'] + API_THING_DOWNLOAD.format(API_KEY)))
fb28c59b 406 except ValueError:
e45ba963
OM
407 logging.error(link['date'])
408
409 # Finally get the image links
410 image_url = API_THING_IMAGES.format(self.thing_id, API_KEY)
411
412 try:
413 current_req = SESSION.get(image_url)
414 except requests.exceptions.ConnectionError as error:
415 logging.error("Unable to connect for thing {}: {}".format(
416 self.thing_id, error))
417 return
418
419 if current_req.status_code != 200:
420 logging.error("Unexpected status code {} for {}: {}".format(current_req.status_code, sanitise_url(image_url), current_req.text))
421 return
fb28c59b 422
e45ba963 423 image_list = current_req.json()
e0e69fc6 424
e45ba963
OM
425 if not image_list:
426 logging.warning("No images found for thing {} - probably thingiverse being iffy as this seems unlikely".format(self.thing_id))
e0e69fc6 427
e45ba963
OM
428 for image in image_list:
429 logging.debug("parsing image: {}".format(image))
430 try:
431 name = slugify(image['name'])
432 # TODO: fallback to other types
433 url = [x for x in image['sizes'] if x['type']=='display' and x['size']=='large'][0]['url']
434 except KeyError:
435 logging.warning("Missing image for {}".format(name))
436 self._image_links.append(ImageLink(name, url))
437
438 self.slug = "{} - {}".format(self.thing_id, slugify(self.name))
8ed15058
OM
439 self.download_dir = os.path.join(base_dir, self.slug)
440
441 self._handle_old_directory(base_dir)
3c82f75b 442
e45ba963 443 logging.debug("Parsing {} ({})".format(self.thing_id, self.name))
8ed15058 444 latest, self.last_time = self._find_last_download(base_dir)
fa2f3251 445
8ed15058 446 if not latest:
3b497b1a
M
447 # Not yet downloaded
448 self._parsed = True
449 return
3c82f75b 450
3c82f75b 451
8ed15058 452 logging.info("last downloaded version: {}".format(self.last_time))
3c82f75b
OM
453
454 # OK, so we have a timestamp, lets see if there is anything new to get
e45ba963
OM
455 # First off, are we comparing an old download that threw away the timestamp?
456 ignore_time = self.last_time == strip_time(self.last_time)
ae598d73 457 try:
e45ba963
OM
458 # TODO: Allow for comparison at the exact time
459 files_last_update = self._file_links.last_update
460 if ignore_time:
461 logging.info("Dropping time from comparison stamp as old-style download dir")
462 files_last_update = strip_time(files_last_update)
463
464
465 if files_last_update > self.last_time:
dbdb1782 466 logging.info(
ae598d73 467 "Found new/updated files {}".format(self._file_links.last_update))
3c82f75b
OM
468 self._needs_download = True
469 self._parsed = True
470 return
ae598d73
OM
471 except TypeError:
472 logging.warning("No files found for {}.".format(self.thing_id))
b497d705 473
3c82f75b 474 # Got here, so nope, no new files.
3c82f75b
OM
475 self._needs_download = False
476 self._parsed = True
477
8ed15058
OM
478 def _handle_old_directory(self, base_dir):
479 """ Deal with any old directories from previous versions of the code.
480 """
e45ba963 481 old_dir = os.path.join(base_dir, slugify(self.name))
8ed15058
OM
482 if os.path.exists(old_dir):
483 logging.warning("Found old style download_dir. Moving.")
484 rename_unique(old_dir, self.download_dir)
485
486 def _handle_outdated_directory(self, base_dir):
487 """ Move the current download directory sideways if the thing has changed.
488 """
489 if not os.path.exists(self.download_dir):
490 # No old directory to move.
491 return None
492 timestamp_file = os.path.join(self.download_dir, TIMESTAMP_FILE)
493 if not os.path.exists(timestamp_file):
494 # Old form of download directory
495 target_dir_name = "{} - old".format(self.download_dir)
496 else:
3ac180ed 497 target_dir_name = "{} - {}".format(self.download_dir, self.last_time.strftime(SAFE_DATETIME_FORMAT))
8ed15058
OM
498 return rename_unique(self.download_dir, target_dir_name)
499
500 def _find_last_download(self, base_dir):
501 """ Look for the most recent previous download (if any) of the thing.
502 """
503 logging.info("Looking for old things")
504
505 # First the DL directory itself.
506 timestamp_file = os.path.join(self.download_dir, TIMESTAMP_FILE)
507
508 latest = None
509 latest_time = None
510
511 try:
512 logging.debug("Checking for existing download in normal place.")
513 with open(timestamp_file) as ts_fh:
514 timestamp_text = ts_fh.read().strip()
515 latest_time = datetime.datetime.strptime(timestamp_text, DEFAULT_DATETIME_FORMAT)
516 latest = self.download_dir
517 except FileNotFoundError:
518 # No existing download directory. huh.
519 pass
520 except TypeError:
521 logging.warning("Invalid timestamp file found in {}".format(self.download_dir))
522
523 # TODO: Maybe look for old download directories.
524
525
526 # Now look for 7z files
527 candidates = glob.glob(os.path.join(base_dir, "{}*.7z".format(self.thing_id)))
528 # +3 to allow for ' - '
529 leading_length =len(self.slug)+3
530 for path in candidates:
531 candidate = os.path.basename(path)
532 try:
533 logging.debug("Examining '{}' - '{}'".format(candidate, candidate[leading_length:-3]))
3ac180ed 534 candidate_time = datetime.datetime.strptime(candidate[leading_length:-3], SAFE_DATETIME_FORMAT)
8ed15058
OM
535 except ValueError:
536 logging.warning("There was an error finding the date in {}. Ignoring.".format(candidate))
537 continue
538 try:
539 if candidate_time > latest_time:
540 latest_time = candidate_time
541 latest = candidate
542 except TypeError:
543 latest_time = candidate_time
544 latest = candidate
545 logging.info("Found last old thing: {} / {}".format(latest,latest_time))
546 return (latest, latest_time)
547
548
549
ae598d73 550 def download(self, base_dir, compress):
7b84ba6d
OM
551 """ Download all files for a given thing.
552 Returns True iff the thing is now downloaded (not iff it downloads the thing!)
553 """
3c82f75b
OM
554 if not self._parsed:
555 self._parse(base_dir)
556
e0e69fc6 557 if not self._parsed:
8cdd1b54
OM
558 logging.error(
559 "Unable to parse {} - aborting download".format(self.thing_id))
7b84ba6d 560 return State.FAILED
e0e69fc6 561
3c82f75b 562 if not self._needs_download:
e45ba963 563 logging.info("{} - {} already downloaded - skipping.".format(self.thing_id, self.name))
7b84ba6d 564 return State.ALREADY_DOWNLOADED
3c82f75b 565
247c2cd5 566 if not self._file_links:
e45ba963 567 logging.error("{} - {} appears to have no files. Thingiverse acting up again?".format(self.thing_id, self.name))
247c2cd5
OM
568 return State.FAILED
569
3c82f75b 570 # Have we already downloaded some things?
8ed15058 571 renamed_dir = self._handle_outdated_directory(base_dir)
3c82f75b
OM
572
573 # Get the list of files to download
3c82f75b
OM
574
575 new_file_links = []
576 old_file_links = []
ae598d73 577 self.time_stamp = None
3c82f75b
OM
578
579 if not self.last_time:
580 # If we don't have anything to copy from, then it is all new.
b497d705
OM
581 logging.debug("No last time, downloading all files")
582 new_file_links = self._file_links
ae598d73 583 self.time_stamp = new_file_links[0].last_update
b497d705
OM
584
585 for file_link in new_file_links:
ae598d73
OM
586 self.time_stamp = max(self.time_stamp, file_link.last_update)
587 logging.debug("New timestamp will be {}".format(self.time_stamp))
3c82f75b 588 else:
ae598d73 589 self.time_stamp = self.last_time
b497d705
OM
590 for file_link in self._file_links:
591 if file_link.last_update > self.last_time:
3c82f75b 592 new_file_links.append(file_link)
ae598d73 593 self.time_stamp = max(self.time_stamp, file_link.last_update)
3c82f75b
OM
594 else:
595 old_file_links.append(file_link)
3c82f75b 596
ae598d73 597 logging.debug("new timestamp {}".format(self.time_stamp))
3c82f75b
OM
598
599 # OK. Time to get to work.
fa2f3251 600 logging.debug("Generating download_dir")
3c82f75b 601 os.mkdir(self.download_dir)
b497d705 602 filelist_file = os.path.join(self.download_dir, "filelist.txt")
d194b140 603 with open(filelist_file, 'w', encoding="utf-8") as fl_handle:
b497d705 604 for fl in self._file_links:
e45ba963 605 fl_handle.write("{},{},{}\n".format(fl.link, fl.name, fl.last_update))
b497d705
OM
606
607
3c82f75b 608 # First grab the cached files (if any)
fa2f3251 609 logging.info("Copying {} unchanged files.".format(len(old_file_links)))
e6d8def4
OM
610 if renamed_dir:
611 for file_link in old_file_links:
612 try:
613 old_file = os.path.join(renamed_dir, file_link.name)
614 new_file = truncate_name(os.path.join(self.download_dir, file_link.name))
615 logging.debug("Copying {} to {}".format(old_file, new_file))
616 copyfile(old_file, new_file)
617 except FileNotFoundError:
618 logging.warning(
619 "Unable to find {} in old archive, redownloading".format(file_link.name))
620 new_file_links.append(file_link)
621 except TypeError:
622 # Not altogether sure how this could occur, possibly with some combination of the old file types
623 logging.warning(
624 "Typeerror looking for {} in {}".format(file_link.name, renamed_dir))
625 new_file_links.append(file_link)
626
3c82f75b
OM
627
628 # Now download the new ones
dbdb1782 629 logging.info("Downloading {} new files of {}".format(
b497d705 630 len(new_file_links), len(self._file_links)))
3c82f75b 631 try:
b497d705 632 for file_link in new_file_links:
65bd8b43 633 file_name = truncate_name(os.path.join(self.download_dir, file_link.name))
dbdb1782 634 logging.debug("Downloading {} from {} to {}".format(
b497d705 635 file_link.name, file_link.link, file_name))
10f0238d 636 data_req = SESSION.get(file_link.link)
e45ba963
OM
637 if data_req.status_code != 200:
638 logging.error("Unexpected status code {} for {}: {}".format(data_req.status_code, sanitise_url(file_link.link), data_req.text))
639 fail_dir(self.download_dir)
640 return State.FAILED
641
642
3c82f75b
OM
643 with open(file_name, 'wb') as handle:
644 handle.write(data_req.content)
645 except Exception as exception:
b497d705 646 logging.error("Failed to download {} - {}".format(file_link.name, exception))
65bd8b43 647 fail_dir(self.download_dir)
7b84ba6d 648 return State.FAILED
3c82f75b 649
b497d705 650
e45ba963 651 # People like images.
680039fe 652 image_dir = os.path.join(self.download_dir, 'images')
fb28c59b 653 logging.info("Downloading {} images.".format(len(self._image_links)))
680039fe
OM
654 try:
655 os.mkdir(image_dir)
fb28c59b 656 for imagelink in self._image_links:
e45ba963
OM
657 filename = os.path.join(image_dir, imagelink.name)
658 image_req = SESSION.get(imagelink.link)
659 if image_req.status_code != 200:
660 logging.error("Unexpected status code {} for {}: {}".format(image_req.status_code, sanitise_url(file_link.link), image_req.text))
661 fail_dir(self.download_dir)
662 return State.FAILED
663 with open(truncate_name(filename), 'wb') as handle:
680039fe
OM
664 handle.write(image_req.content)
665 except Exception as exception:
e45ba963 666 logging.error("Failed to download {} - {}".format(imagelink.name, exception))
65bd8b43 667 fail_dir(self.download_dir)
7b84ba6d 668 return State.FAILED
680039fe 669
4f75dd69 670 # Best get some licenses
e45ba963 671 logging.info("writing license file")
4f75dd69 672 try:
fb28c59b 673 if self._license:
65bd8b43 674 with open(truncate_name(os.path.join(self.download_dir, 'license.txt')), 'w', encoding="utf-8") as license_handle:
fb28c59b 675 license_handle.write("{}\n".format(self._license))
4f75dd69
OM
676 except IOError as exception:
677 logging.warning("Failed to write license! {}".format(exception))
fb28c59b 678
e45ba963
OM
679 logging.info("writing readme")
680 try:
681 if self._details:
682 with open(truncate_name(os.path.join(self.download_dir, 'readme.txt')), 'w', encoding="utf-8") as readme_handle:
683 readme_handle.write("{}\n".format(self._details))
684 except IOError as exception:
685 logging.warning("Failed to write readme! {}".format(exception))
686
3c82f75b
OM
687 try:
688 # Now write the timestamp
8ed15058 689 with open(os.path.join(self.download_dir,TIMESTAMP_FILE), 'w', encoding="utf-8") as timestamp_handle:
ae598d73 690 timestamp_handle.write(self.time_stamp.__str__())
3c82f75b 691 except Exception as exception:
e45ba963 692 logging.error("Failed to write timestamp file - {}".format(exception))
65bd8b43 693 fail_dir(self.download_dir)
7b84ba6d 694 return State.FAILED
3c82f75b 695 self._needs_download = False
e45ba963 696 logging.debug("Download of {} finished".format(self.name))
ae598d73
OM
697 if not compress:
698 return State.OK
699
700
701 thing_dir = "{} - {} - {}".format(self.thing_id,
e45ba963 702 slugify(self.name),
3ac180ed 703 self.time_stamp.strftime(SAFE_DATETIME_FORMAT))
ae598d73
OM
704 file_name = os.path.join(base_dir,
705 "{}.7z".format(thing_dir))
706 logging.debug("Compressing {} to {}".format(
e45ba963 707 self.name,
ae598d73 708 file_name))
ae598d73 709 with py7zr.SevenZipFile(file_name, 'w', filters=SEVENZIP_FILTERS) as archive:
ae598d73 710 archive.writeall(self.download_dir, thing_dir)
e45ba963 711 logging.debug("Compression of {} finished.".format(self.name))
8ed15058 712 shutil.rmtree(self.download_dir)
e45ba963 713 logging.debug("Removed temporary download dir of {}.".format(self.name))
7b84ba6d 714 return State.OK
975060c9 715
dbdb1782 716
ae598d73
OM
717
718
719def do_batch(batch_file, download_dir, quick, compress):
1ab49020
OM
720 """ Read a file in line by line, parsing each as a set of calls to this script."""
721 with open(batch_file) as handle:
722 for line in handle:
723 line = line.strip()
cf280385
M
724 if not line:
725 # Skip empty lines
726 continue
1ab49020
OM
727 logging.info("Handling instruction {}".format(line))
728 command_arr = line.split()
729 if command_arr[0] == "thing":
dbdb1782
OM
730 logging.debug(
731 "Handling batch thing instruction: {}".format(line))
ae598d73 732 Thing(command_arr[1]).download(download_dir, compress)
1ab49020
OM
733 continue
734 if command_arr[0] == "collection":
dbdb1782
OM
735 logging.debug(
736 "Handling batch collection instruction: {}".format(line))
737 Collection(command_arr[1], command_arr[2],
ae598d73 738 download_dir, quick, compress).download()
1ab49020
OM
739 continue
740 if command_arr[0] == "user":
dbdb1782
OM
741 logging.debug(
742 "Handling batch collection instruction: {}".format(line))
ae598d73 743 Designs(command_arr[1], download_dir, quick, compress).download()
1ab49020
OM
744 continue
745 logging.warning("Unable to parse current instruction. Skipping.")
746
dbdb1782 747
975060c9
OM
748def main():
749 """ Entry point for script being run as a command. """
750 parser = argparse.ArgumentParser()
dbdb1782
OM
751 parser.add_argument("-l", "--log-level", choices=[
752 'debug', 'info', 'warning'], default='info', help="level of logging desired")
753 parser.add_argument("-d", "--directory",
754 help="Target directory to download into")
4f94efc8
OM
755 parser.add_argument("-f", "--log-file",
756 help="Place to log debug information to")
7b84ba6d
OM
757 parser.add_argument("-q", "--quick", action="store_true",
758 help="Assume date ordering on posts")
ae598d73
OM
759 parser.add_argument("-c", "--compress", action="store_true",
760 help="Compress files")
e45ba963
OM
761 parser.add_argument("-a", "--api-key",
762 help="API key for thingiverse")
763
7b84ba6d 764
dbdb1782
OM
765 subparsers = parser.add_subparsers(
766 help="Type of thing to download", dest="subcommand")
767 collection_parser = subparsers.add_parser(
b7bfef68 768 'collection', help="Download one or more entire collection(s)")
dbdb1782 769 collection_parser.add_argument(
b7bfef68 770 "owner", help="The owner of the collection(s) to get")
dbdb1782 771 collection_parser.add_argument(
b7bfef68 772 "collections", nargs="+", help="Space seperated list of the name(s) of collection to get")
dbdb1782
OM
773 thing_parser = subparsers.add_parser(
774 'thing', help="Download a single thing.")
8cdd1b54
OM
775 thing_parser.add_argument(
776 "things", nargs="*", help="Space seperated list of thing ID(s) to download")
dbdb1782 777 user_parser = subparsers.add_parser(
b7bfef68 778 "user", help="Download all things by one or more users")
8cdd1b54
OM
779 user_parser.add_argument(
780 "users", nargs="+", help="A space seperated list of the user(s) to get the designs of")
dbdb1782
OM
781 batch_parser = subparsers.add_parser(
782 "batch", help="Perform multiple actions written in a text file")
783 batch_parser.add_argument(
784 "batch_file", help="The name of the file to read.")
680039fe 785 subparsers.add_parser("version", help="Show the current version")
4a98996b 786
975060c9 787 args = parser.parse_args()
4a98996b
OM
788 if not args.subcommand:
789 parser.print_help()
790 sys.exit(1)
d66f1f78
OM
791 if not args.directory:
792 args.directory = os.getcwd()
4f94efc8
OM
793
794 logger = logging.getLogger()
795 formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
796 logger.setLevel(logging.DEBUG)
797 console_handler = logging.StreamHandler()
798 console_handler.setLevel(args.log_level.upper())
799
e45ba963
OM
800 global API_KEY
801 if args.api_key:
802 API_KEY=args.api_key
803 else:
804 try:
805 with open("api.key") as fh:
806 API_KEY=fh.read().strip()
807 except Exception as e:
808 logging.error("Either specify the api-key on the command line or in a file called 'api.key'")
809 logging.error("Exception: {}".format(e))
810 return
811
4f94efc8
OM
812 logger.addHandler(console_handler)
813 if args.log_file:
814 file_handler = logging.FileHandler(args.log_file)
815 file_handler.setLevel(logging.DEBUG)
816 file_handler.setFormatter(formatter)
817 logger.addHandler(file_handler)
fa2f3251 818
6a777954
OM
819
820 # Start downloader
821 thing_queue = multiprocessing.JoinableQueue()
822 logging.debug("starting {} downloader(s)".format(DOWNLOADER_COUNT))
ae598d73 823 downloaders = [Downloader(thing_queue, args.directory, args.compress) for _ in range(DOWNLOADER_COUNT)]
6a777954
OM
824 for downloader in downloaders:
825 downloader.start()
826
827
4a98996b 828 if args.subcommand.startswith("collection"):
b7bfef68 829 for collection in args.collections:
ae598d73 830 Collection(args.owner, collection, args.directory, args.quick, args.compress).download()
4a98996b 831 if args.subcommand == "thing":
b7bfef68 832 for thing in args.things:
6a777954 833 thing_queue.put(thing)
3522a3bf 834 if args.subcommand == "user":
b7bfef68 835 for user in args.users:
ae598d73 836 Designs(user, args.directory, args.quick, args.compress).download()
db8066ec
OM
837 if args.subcommand == "version":
838 print("thingy_grabber.py version {}".format(VERSION))
1ab49020 839 if args.subcommand == "batch":
ae598d73 840 do_batch(args.batch_file, args.directory, args.quick, args.compress)
1ab49020 841
6a777954
OM
842 # Stop the downloader processes
843 for downloader in downloaders:
844 thing_queue.put(None)
975060c9 845
d194b140 846
0930777e
OM
847if __name__ == "__main__":
848 multiprocessing.freeze_support()
975060c9 849 main()