Add custom FileLinks class, 7z writing initial implementation
[clinton/thingy_grabber.git] / thingy_grabber.py
1 #!/usr/bin/env python3
2 """
3 Thingiverse bulk downloader
4 """
5
6 import re
7 import sys
8 import os
9 import argparse
10 import unicodedata
11 import requests
12 import logging
13 import multiprocessing
14 import enum
15 import datetime
16 from shutil import copyfile
17 from bs4 import BeautifulSoup
18 from dataclasses import dataclass
19 import selenium
20 from selenium import webdriver
21 from selenium.webdriver.common.by import By
22 from selenium.webdriver.support.ui import WebDriverWait
23 from selenium.webdriver.support import expected_conditions as EC
24 from selenium.webdriver.firefox.options import Options
25 import atexit
26 import py7zr
27
28 SEVENZIP_FILTERS = [{'id': py7zr.FILTER_LZMA2}]
29
30 URL_BASE = "https://www.thingiverse.com"
31 URL_COLLECTION = URL_BASE + "/ajax/thingcollection/list_collected_things"
32 USER_COLLECTION = URL_BASE + "/ajax/user/designs"
33
34 ID_REGEX = re.compile(r'"id":(\d*),')
35 TOTAL_REGEX = re.compile(r'"total":(\d*),')
36 LAST_PAGE_REGEX = re.compile(r'"last_page":(\d*),')
37 # This appears to be fixed at 12, but if it changes would screw the rest up.
38 PER_PAGE_REGEX = re.compile(r'"per_page":(\d*),')
39 NO_WHITESPACE_REGEX = re.compile(r'[-\s]+')
40
41 DOWNLOADER_COUNT = 1
42 RETRY_COUNT = 3
43
44 MAX_PATH_LENGTH = 250
45
46 VERSION = "0.9.0"
47
48
49 #BROWSER = webdriver.PhantomJS('./phantomjs')
50 options = Options()
51 options.add_argument("--headless")
52 BROWSER = webdriver.Firefox(options=options)
53
54 BROWSER.set_window_size(1980, 1080)
55
56
57 @dataclass
58 class FileLink:
59 name: str
60 last_update: datetime.datetime
61 link: str
62
63 class FileLinks:
64 def __init__(self, initial_links=[]):
65 self.links = []
66 self.last_update = None
67 for link in initial_links:
68 self.append(link)
69
70 def __iter__(self):
71 return iter(self.links)
72
73 def __getitem__(self, item):
74 return self.links[item]
75
76 def __len__(self):
77 return len(self.links)
78
79 def append(self, link):
80 try:
81 self.last_update = max(self.last_update, link.last_update)
82 except TypeError:
83 self.last_update = link.last_update
84 self.links.append(link)
85
86
87 class State(enum.Enum):
88 OK = enum.auto()
89 FAILED = enum.auto()
90 ALREADY_DOWNLOADED = enum.auto()
91
92
93 def fail_dir(dir_name):
94 """ When a download has failed, move it sideways.
95 """
96 target_dir = "{}_failed".format(dir_name)
97 inc = 0
98 while os.path.exists(target_dir):
99 target_dir = "{}_failed_{}".format(dir_name, inc)
100 inc += 1
101 os.rename(dir_name, target_dir)
102
103
104 def truncate_name(file_name):
105 """ Ensure the filename is not too long for, well windows basically.
106 """
107 path = os.path.abspath(file_name)
108 if len(path) <= MAX_PATH_LENGTH:
109 return path
110 to_cut = len(path) - (MAX_PATH_LENGTH + 3)
111 base, extension = os.path.splitext(path)
112 inc = 0
113 new_path = "{}_{}{}".format(base, inc, extension)
114 while os.path.exists(new_path):
115 new_path = "{}_{}{}".format(base, inc, extension)
116 inc += 1
117 return new_path
118
119
120 def strip_ws(value):
121 """ Remove whitespace from a string """
122 return str(NO_WHITESPACE_REGEX.sub('-', value))
123
124
125 def slugify(value):
126 """
127 Normalise string, removes invalid for filename charactersr
128 and converts string to lowercase.
129 """
130 value = unicodedata.normalize('NFKC', value).lower().strip()
131 value = re.sub(r'[\\/<>:\?\*\|"]', '', value)
132 value = re.sub(r'\.*$', '', value)
133 return value
134
135 class PageChecker(object):
136 def __init__(self):
137 self.log = []
138 self.title = None
139 self.file_count = None
140 self.files = None
141 self.images = None
142 self.license = None
143
144
145 def __call__(self, _):
146 try:
147 self.log.append("call")
148 if self.title is None:
149 # first find the name
150 name = EC._find_element(BROWSER, (By.CSS_SELECTOR, "[class^=ThingPage__modelName]"))
151 if name is None:
152 return False
153 self.title = name.text
154
155 if self.file_count is None:
156 # OK. Do we know how many files we have to download?
157 metrics = EC._find_elements(BROWSER, (By.CSS_SELECTOR, "[class^=MetricButton]"))
158 self.log.append("got some metrics: {}".format(len(metrics)))
159 cur_count = int([x.text.split("\n")[0] for x in metrics if x.text.endswith("\nThing Files")][0])
160 self.log.append(cur_count)
161 if cur_count == 0:
162 return False
163 self.file_count = cur_count
164
165 self.log.append("looking for {} files".format(self.file_count))
166 fileRows = EC._find_elements(BROWSER, (By.CSS_SELECTOR, "[class^=ThingFile__fileRow]"))
167 self.log.append("found {} files".format(len(fileRows)))
168 if len(fileRows) < self.file_count:
169 return False
170
171 self.log.append("Looking for images")
172 # By this point _should_ have loaded all the images
173 self.images = EC._find_elements(BROWSER, (By.CSS_SELECTOR, "[class^=thumb]"))
174 self.license = EC._find_element(BROWSER, (By.CSS_SELECTOR, "[class^=License__licenseText]")).text
175 self.log.append("found {} images".format(len(self.images)))
176 self.files = fileRows
177 return True
178 except Exception:
179 return False
180
181
182
183
184 class Downloader(multiprocessing.Process):
185 """
186 Class to handle downloading the things we have found to get.
187 """
188
189 def __init__(self, thing_queue, download_directory, compress):
190 multiprocessing.Process.__init__(self)
191 # TODO: add parameters
192 self.thing_queue = thing_queue
193 self.download_directory = download_directory
194 self.compress = compress
195
196 def run(self):
197 """ actual download loop.
198 """
199 while True:
200 thing_id = self.thing_queue.get()
201 if thing_id is None:
202 logging.info("Shutting download queue")
203 self.thing_queue.task_done()
204 break
205 logging.info("Handling id {}".format(thing_id))
206 Thing(thing_id).download(self.download_directory, self.compress)
207 self.thing_queue.task_done()
208 return
209
210
211
212
213
214 class Grouping:
215 """ Holds details of a group of things for download
216 This is effectively (although not actually) an abstract class
217 - use Collection or Designs instead.
218 """
219
220 def __init__(self, quick, compress):
221 self.things = []
222 self.total = 0
223 self.req_id = None
224 self.last_page = 0
225 self.per_page = None
226 # Should we stop downloading when we hit a known datestamp?
227 self.quick = quick
228 self.compress = compress
229 # These should be set by child classes.
230 self.url = None
231 self.download_dir = None
232 self.collection_url = None
233
234 def _get_small_grouping(self, req):
235 """ Handle small groupings """
236 soup = BeautifulSoup(req.text, features='lxml')
237 links = soup.find_all('a', {'class': 'card-img-holder'})
238 self.things = [x['href'].split(':')[1] for x in links]
239 self.total = len(self.things)
240
241 return self.things
242
243 def get(self):
244 """ retrieve the things of the grouping. """
245 if self.things:
246 # We've already done it.
247 return self.things
248
249 # Check for initialisation:
250 if not self.url:
251 logging.error("No URL set - object not initialised properly?")
252 raise ValueError("No URL set - object not initialised properly?")
253
254 # Get the internal details of the grouping.
255 logging.debug("Querying {}".format(self.url))
256 c_req = requests.get(self.url)
257 total = TOTAL_REGEX.search(c_req.text)
258 if total is None:
259 # This is a small (<13) items grouping. Pull the list from this req.
260 return self._get_small_grouping(c_req)
261 self.total = total.groups()[0]
262 self.req_id = ID_REGEX.search(c_req.text).groups()[0]
263 self.last_page = int(LAST_PAGE_REGEX.search(c_req.text).groups()[0])
264 self.per_page = PER_PAGE_REGEX.search(c_req.text).groups()[0]
265 parameters = {
266 'base_url': self.url,
267 'page': '1',
268 'per_page': '12',
269 'id': self.req_id
270 }
271 for current_page in range(1, self.last_page + 1):
272 parameters['page'] = current_page
273 req = requests.post(self.collection_url, parameters)
274 soup = BeautifulSoup(req.text, features='lxml')
275 links = soup.find_all('a', {'class': 'card-img-holder'})
276 self.things += [x['href'].split(':')[1] for x in links]
277
278 return self.things
279
280 def download(self):
281 """ Downloads all the files in a collection """
282 if not self.things:
283 self.get()
284
285 if not self.download_dir:
286 raise ValueError(
287 "No download_dir set - invalidly initialised object?")
288
289 base_dir = os.getcwd()
290 try:
291 os.mkdir(self.download_dir)
292 except FileExistsError:
293 logging.info("Target directory {} already exists. Assuming a resume."
294 .format(self.download_dir))
295 logging.info("Downloading {} thing(s).".format(self.total))
296 for idx, thing in enumerate(self.things):
297 logging.info("Downloading thing {} - {}".format(idx, thing))
298 RC = Thing(thing).download(self.download_dir, self.compress)
299 if self.quick and RC==State.ALREADY_DOWNLOADED:
300 logging.info("Caught up, stopping.")
301 return
302
303
304
305
306
307 class Collection(Grouping):
308 """ Holds details of a collection. """
309
310 def __init__(self, user, name, directory, quick, compress):
311 Grouping.__init__(self, quick, compress)
312 self.user = user
313 self.name = name
314 self.url = "{}/{}/collections/{}".format(
315 URL_BASE, self.user, strip_ws(self.name))
316 self.download_dir = os.path.join(directory,
317 "{}-{}".format(slugify(self.user), slugify(self.name)))
318 self.collection_url = URL_COLLECTION
319
320
321 class Designs(Grouping):
322 """ Holds details of all of a users' designs. """
323
324 def __init__(self, user, directory, quick, compress):
325 Grouping.__init__(self, quick, compress)
326 self.user = user
327 self.url = "{}/{}/designs".format(URL_BASE, self.user)
328 self.download_dir = os.path.join(
329 directory, "{} designs".format(slugify(self.user)))
330 self.collection_url = USER_COLLECTION
331
332
333 class Thing:
334 """ An individual design on thingiverse. """
335
336 def __init__(self, thing_id):
337 self.thing_id = thing_id
338 self.last_time = None
339 self._parsed = False
340 self._needs_download = True
341 self.text = None
342 self.title = None
343 self.download_dir = None
344 self.time_stamp = None
345 self._file_links = FileLinks()
346
347 def _parse(self, base_dir):
348 """ Work out what, if anything needs to be done. """
349 if self._parsed:
350 return
351
352 url = "{}/thing:{}/files".format(URL_BASE, self.thing_id)
353 try:
354 BROWSER.get(url)
355 wait = WebDriverWait(BROWSER, 60)
356 pc = PageChecker()
357 wait.until(pc)
358 except requests.exceptions.ConnectionError as error:
359 logging.error("Unable to connect for thing {}: {}".format(
360 self.thing_id, error))
361 return
362 except selenium.common.exceptions.TimeoutException:
363 logging.error(pc.log)
364 logging.error("Timeout trying to parse thing {}".format(self.thing_id))
365 return
366
367 self.title = pc.title
368 if not pc.files:
369 logging.error("No files found for thing {} - probably thingiverse being broken, try again later".format(self.thing_id))
370 for link in pc.files:
371 logging.debug("Parsing link: {}".format(link.text))
372 link_link = link.find_element_by_xpath(".//a").get_attribute("href")
373 if link_link.endswith("/zip"):
374 # bulk link.
375 continue
376 try:
377 link_title, link_details, _ = link.text.split("\n")
378 except ValueError:
379 # If it is a filetype that doesn't generate a picture, then we get an extra field at the start.
380 _, link_title, link_details, _ = link.text.split("\n")
381
382 #link_details will be something like '461 kb | Updated 06-11-2019 | 373 Downloads'
383 #need to convert from M D Y to Y M D
384 link_date = [int(x) for x in link_details.split("|")[1].split()[-1].split("-")]
385 try:
386 self._file_links.append(FileLink(link_title, datetime.datetime(link_date[2], link_date[0], link_date[1]), link_link))
387 except ValueError:
388 logging.error(link_date)
389
390 self._image_links=[x.find_element_by_xpath(".//img").get_attribute("src") for x in pc.images]
391 self._license = pc.license
392 self.pc = pc
393
394
395 self.old_download_dir = os.path.join(base_dir, slugify(self.title))
396 self.download_dir = os.path.join(base_dir, "{} - {}".format(self.thing_id, slugify(self.title)))
397
398 logging.debug("Parsing {} ({})".format(self.thing_id, self.title))
399
400 if not os.path.exists(self.download_dir):
401 logging.info("Looking for old dir at {}".format(self.old_download_dir))
402 if os.path.exists(self.old_download_dir):
403 logging.warning("Found previous style download directory. Moving it from {} to {}".format(self.old_download_dir, self.download_dir))
404 os.rename(self.old_download_dir, self.download_dir)
405 else:
406 # Not yet downloaded
407 self._parsed = True
408 return
409
410 timestamp_file = os.path.join(self.download_dir, 'timestamp.txt')
411 if not os.path.exists(timestamp_file):
412 # Old download from before
413 logging.warning(
414 "Old-style download directory found. Assuming update required.")
415 self._parsed = True
416 return
417
418 try:
419 with open(timestamp_file, 'r') as timestamp_handle:
420 # add the .split(' ')[0] to remove the timestamp from the old style timestamps
421 last_bits = [int(x) for x in timestamp_handle.readlines()[0].split(' ')[0].split("-")]
422 logging.warning(last_bits)
423 if last_bits[0] == 0:
424 last_bits[0] = 1
425 if last_bits[1] == 0:
426 last_bits[1] = 1
427 if last_bits[2] == 0:
428 last_bits[2] = 1980
429 try:
430 self.last_time = datetime.datetime(last_bits[0], last_bits[1], last_bits[2])
431 except ValueError:
432 # This one appears to be M D Y
433 self.last_time = datetime.datetime(last_bits[2], last_bits[0], last_bits[1])
434
435 logging.info("last downloaded version: {}".format(self.last_time))
436 except FileNotFoundError:
437 # Not run on this thing before.
438 logging.info(
439 "Old-style download directory found. Assuming update required.")
440 self.last_time = None
441 self._needs_download = True
442 self._parsed = True
443 return
444
445 # OK, so we have a timestamp, lets see if there is anything new to get
446 try:
447 if self._file_links.last_update > self.last_time:
448 logging.info(
449 "Found new/updated files {}".format(self._file_links.last_update))
450 self._needs_download = True
451 self._parsed = True
452 return
453 except TypeError:
454 logging.warning("No files found for {}.".format(self.thing_id))
455
456 # Got here, so nope, no new files.
457 self._needs_download = False
458 self._parsed = True
459
460 def download(self, base_dir, compress):
461 """ Download all files for a given thing.
462 Returns True iff the thing is now downloaded (not iff it downloads the thing!)
463 """
464 if not self._parsed:
465 self._parse(base_dir)
466
467 if not self._parsed:
468 logging.error(
469 "Unable to parse {} - aborting download".format(self.thing_id))
470 return State.FAILED
471
472 if not self._needs_download:
473 print("{} - {} already downloaded - skipping.".format(self.thing_id, self.title))
474 return State.ALREADY_DOWNLOADED
475
476 if not self._file_links:
477 print("{} - {} appears to have no files. Thingiverse acting up again?".format(self.thing_id, self.title))
478 return State.FAILED
479
480 # Have we already downloaded some things?
481 timestamp_file = os.path.join(self.download_dir, 'timestamp.txt')
482 prev_dir = None
483 if os.path.exists(self.download_dir):
484 if not os.path.exists(timestamp_file):
485 # edge case: old style dir w/out timestamp.
486 logging.warning("Old style download dir found at {}".format(self.title))
487 prev_count = 0
488 target_dir = "{}_old".format(self.download_dir)
489 while os.path.exists(target_dir):
490 prev_count = prev_count + 1
491 target_dir = "{}_old_{}".format(self.download_dir, prev_count)
492 os.rename(self.download_dir, target_dir)
493 else:
494 prev_dir = "{}_{}".format(self.download_dir, slugify(self.last_time.__str__()))
495 os.rename(self.download_dir, prev_dir)
496
497 # Get the list of files to download
498
499 new_file_links = []
500 old_file_links = []
501 self.time_stamp = None
502
503 if not self.last_time:
504 # If we don't have anything to copy from, then it is all new.
505 logging.debug("No last time, downloading all files")
506 new_file_links = self._file_links
507 self.time_stamp = new_file_links[0].last_update
508
509 for file_link in new_file_links:
510 self.time_stamp = max(self.time_stamp, file_link.last_update)
511 logging.debug("New timestamp will be {}".format(self.time_stamp))
512 else:
513 self.time_stamp = self.last_time
514 for file_link in self._file_links:
515 if file_link.last_update > self.last_time:
516 new_file_links.append(file_link)
517 self.time_stamp = max(self.time_stamp, file_link.last_update)
518 else:
519 old_file_links.append(file_link)
520
521 logging.debug("new timestamp {}".format(self.time_stamp))
522
523 # OK. Time to get to work.
524 logging.debug("Generating download_dir")
525 os.mkdir(self.download_dir)
526 filelist_file = os.path.join(self.download_dir, "filelist.txt")
527 with open(filelist_file, 'w', encoding="utf-8") as fl_handle:
528 for fl in self._file_links:
529 base_link = fl.link
530 try:
531 fl.link=requests.get(fl.link, allow_redirects=False).headers['location']
532 except Exception:
533 # Sometimes Thingiverse just gives us the direct link the first time. Not sure why.
534 pass
535
536 fl_handle.write("{},{},{}, {}\n".format(fl.link, fl.name, fl.last_update, base_link))
537
538
539 # First grab the cached files (if any)
540 logging.info("Copying {} unchanged files.".format(len(old_file_links)))
541 for file_link in old_file_links:
542 old_file = os.path.join(prev_dir, file_link.name)
543 new_file = truncate_name(os.path.join(self.download_dir, file_link.name))
544 try:
545 logging.debug("Copying {} to {}".format(old_file, new_file))
546 copyfile(old_file, new_file)
547 except FileNotFoundError:
548 logging.warning(
549 "Unable to find {} in old archive, redownloading".format(file_link["title"]))
550 new_file_links.append(file_link)
551
552 # Now download the new ones
553 logging.info("Downloading {} new files of {}".format(
554 len(new_file_links), len(self._file_links)))
555 try:
556 for file_link in new_file_links:
557 file_name = truncate_name(os.path.join(self.download_dir, file_link.name))
558 logging.debug("Downloading {} from {} to {}".format(
559 file_link.name, file_link.link, file_name))
560 data_req = requests.get(file_link.link)
561 with open(file_name, 'wb') as handle:
562 handle.write(data_req.content)
563 except Exception as exception:
564 logging.error("Failed to download {} - {}".format(file_link.name, exception))
565 fail_dir(self.download_dir)
566 return State.FAILED
567
568
569 # People like images. But this doesn't work yet.
570 image_dir = os.path.join(self.download_dir, 'images')
571 logging.info("Downloading {} images.".format(len(self._image_links)))
572 try:
573 os.mkdir(image_dir)
574 for imagelink in self._image_links:
575 filename = os.path.basename(imagelink)
576 if filename.endswith('stl'):
577 filename = "{}.png".format(filename)
578 image_req = requests.get(imagelink)
579 with open(truncate_name(os.path.join(image_dir, filename)), 'wb') as handle:
580 handle.write(image_req.content)
581 except Exception as exception:
582 print("Failed to download {} - {}".format(filename, exception))
583 fail_dir(self.download_dir)
584 return State.FAILED
585
586 """
587 # instructions are good too.
588 logging.info("Downloading readme")
589 try:
590 readme_txt = soup.find('meta', property='og:description')[
591 'content']
592 with open(os.path.join(self.download_dir, 'readme.txt'), 'w') as readme_handle:
593 readme_handle.write("{}\n".format(readme_txt))
594 except (TypeError, KeyError) as exception:
595 logging.warning("No readme? {}".format(exception))
596 except IOError as exception:
597 logging.warning("Failed to write readme! {}".format(exception))
598
599 """
600 # Best get some licenses
601 logging.info("Downloading license")
602 try:
603 if self._license:
604 with open(truncate_name(os.path.join(self.download_dir, 'license.txt')), 'w', encoding="utf-8") as license_handle:
605 license_handle.write("{}\n".format(self._license))
606 except IOError as exception:
607 logging.warning("Failed to write license! {}".format(exception))
608
609 try:
610 # Now write the timestamp
611 with open(timestamp_file, 'w', encoding="utf-8") as timestamp_handle:
612 timestamp_handle.write(self.time_stamp.__str__())
613 except Exception as exception:
614 print("Failed to write timestamp file - {}".format(exception))
615 fail_dir(self.download_dir)
616 return State.FAILED
617 self._needs_download = False
618 logging.debug("Download of {} finished".format(self.title))
619 if not compress:
620 return State.OK
621
622
623 thing_dir = "{} - {} - {}".format(self.thing_id,
624 slugify(self.title),
625 self.time_stamp)
626 file_name = os.path.join(base_dir,
627 "{}.7z".format(thing_dir))
628 logging.debug("Compressing {} to {}".format(
629 self.title,
630 file_name))
631 #with libarchive.file_writer(filename, 'lzma', '7z') as archive:
632 with py7zr.SevenZipFile(file_name, 'w', filters=SEVENZIP_FILTERS) as archive:
633 #with py7zr.SevenZipFile(file_name, 'w' ) as archive:
634 archive.writeall(self.download_dir, thing_dir)
635 logging.debug("Compression of {} finished.".format(self.title))
636 return State.OK
637
638
639
640
641 def do_batch(batch_file, download_dir, quick, compress):
642 """ Read a file in line by line, parsing each as a set of calls to this script."""
643 with open(batch_file) as handle:
644 for line in handle:
645 line = line.strip()
646 if not line:
647 # Skip empty lines
648 continue
649 logging.info("Handling instruction {}".format(line))
650 command_arr = line.split()
651 if command_arr[0] == "thing":
652 logging.debug(
653 "Handling batch thing instruction: {}".format(line))
654 Thing(command_arr[1]).download(download_dir, compress)
655 continue
656 if command_arr[0] == "collection":
657 logging.debug(
658 "Handling batch collection instruction: {}".format(line))
659 Collection(command_arr[1], command_arr[2],
660 download_dir, quick, compress).download()
661 continue
662 if command_arr[0] == "user":
663 logging.debug(
664 "Handling batch collection instruction: {}".format(line))
665 Designs(command_arr[1], download_dir, quick, compress).download()
666 continue
667 logging.warning("Unable to parse current instruction. Skipping.")
668
669
670 def main():
671 """ Entry point for script being run as a command. """
672 parser = argparse.ArgumentParser()
673 parser.add_argument("-l", "--log-level", choices=[
674 'debug', 'info', 'warning'], default='info', help="level of logging desired")
675 parser.add_argument("-d", "--directory",
676 help="Target directory to download into")
677 parser.add_argument("-f", "--log-file",
678 help="Place to log debug information to")
679 parser.add_argument("-q", "--quick", action="store_true",
680 help="Assume date ordering on posts")
681 parser.add_argument("-c", "--compress", action="store_true",
682 help="Compress files")
683
684
685 subparsers = parser.add_subparsers(
686 help="Type of thing to download", dest="subcommand")
687 collection_parser = subparsers.add_parser(
688 'collection', help="Download one or more entire collection(s)")
689 collection_parser.add_argument(
690 "owner", help="The owner of the collection(s) to get")
691 collection_parser.add_argument(
692 "collections", nargs="+", help="Space seperated list of the name(s) of collection to get")
693 thing_parser = subparsers.add_parser(
694 'thing', help="Download a single thing.")
695 thing_parser.add_argument(
696 "things", nargs="*", help="Space seperated list of thing ID(s) to download")
697 user_parser = subparsers.add_parser(
698 "user", help="Download all things by one or more users")
699 user_parser.add_argument(
700 "users", nargs="+", help="A space seperated list of the user(s) to get the designs of")
701 batch_parser = subparsers.add_parser(
702 "batch", help="Perform multiple actions written in a text file")
703 batch_parser.add_argument(
704 "batch_file", help="The name of the file to read.")
705 subparsers.add_parser("version", help="Show the current version")
706
707 args = parser.parse_args()
708 if not args.subcommand:
709 parser.print_help()
710 sys.exit(1)
711 if not args.directory:
712 args.directory = os.getcwd()
713
714 logger = logging.getLogger()
715 formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
716 logger.setLevel(logging.DEBUG)
717 console_handler = logging.StreamHandler()
718 console_handler.setLevel(args.log_level.upper())
719
720 logger.addHandler(console_handler)
721 if args.log_file:
722 file_handler = logging.FileHandler(args.log_file)
723 file_handler.setLevel(logging.DEBUG)
724 file_handler.setFormatter(formatter)
725 logger.addHandler(file_handler)
726
727
728 # Start downloader
729 thing_queue = multiprocessing.JoinableQueue()
730 logging.debug("starting {} downloader(s)".format(DOWNLOADER_COUNT))
731 downloaders = [Downloader(thing_queue, args.directory, args.compress) for _ in range(DOWNLOADER_COUNT)]
732 for downloader in downloaders:
733 downloader.start()
734
735
736 if args.subcommand.startswith("collection"):
737 for collection in args.collections:
738 Collection(args.owner, collection, args.directory, args.quick, args.compress).download()
739 if args.subcommand == "thing":
740 for thing in args.things:
741 thing_queue.put(thing)
742 if args.subcommand == "user":
743 for user in args.users:
744 Designs(user, args.directory, args.quick, args.compress).download()
745 if args.subcommand == "version":
746 print("thingy_grabber.py version {}".format(VERSION))
747 if args.subcommand == "batch":
748 do_batch(args.batch_file, args.directory, args.quick, args.compress)
749
750 # Stop the downloader processes
751 for downloader in downloaders:
752 thing_queue.put(None)
753
754 atexit.register(BROWSER.quit)
755
756 if __name__ == "__main__":
757 multiprocessing.freeze_support()
758 main()