Remove unicode characters from filenames - fixes #6
[clinton/thingy_grabber.git] / thingy_grabber.py
CommitLineData
975060c9
OM
1#!/usr/bin/env python3
2"""
3Thingiverse bulk downloader
4"""
5
6import re
4a98996b 7import sys
975060c9
OM
8import os
9import argparse
10import unicodedata
11import requests
fa2f3251 12import logging
6a777954 13import multiprocessing
7b84ba6d 14import enum
fb28c59b 15import datetime
3c82f75b 16from shutil import copyfile
975060c9 17from bs4 import BeautifulSoup
b497d705
OM
18from dataclasses import dataclass
19import selenium
20from selenium import webdriver
21from selenium.webdriver.common.by import By
22from selenium.webdriver.support.ui import WebDriverWait
23from selenium.webdriver.support import expected_conditions as EC
24from selenium.webdriver.firefox.options import Options
975060c9
OM
25
26URL_BASE = "https://www.thingiverse.com"
27URL_COLLECTION = URL_BASE + "/ajax/thingcollection/list_collected_things"
948bd56f 28USER_COLLECTION = URL_BASE + "/ajax/user/designs"
975060c9
OM
29
30ID_REGEX = re.compile(r'"id":(\d*),')
31TOTAL_REGEX = re.compile(r'"total":(\d*),')
32LAST_PAGE_REGEX = re.compile(r'"last_page":(\d*),')
33# This appears to be fixed at 12, but if it changes would screw the rest up.
34PER_PAGE_REGEX = re.compile(r'"per_page":(\d*),')
dd8c35f4
OM
35NO_WHITESPACE_REGEX = re.compile(r'[-\s]+')
36
6a777954 37DOWNLOADER_COUNT = 1
7b84ba6d 38RETRY_COUNT = 3
6a777954 39
cef8aa7a 40VERSION = "0.8.2"
dbdb1782 41
b497d705
OM
42
43#BROWSER = webdriver.PhantomJS('./phantomjs')
44options = Options()
fb28c59b 45options.add_argument("--headless")
b497d705
OM
46BROWSER = webdriver.Firefox(options=options)
47
48BROWSER.set_window_size(1980, 1080)
49
50
51@dataclass
52class FileLink:
53 name: str
54 last_update: str
fb28c59b 55 link: datetime.datetime
b497d705
OM
56
57
7b84ba6d
OM
58class State(enum.Enum):
59 OK = enum.auto()
60 FAILED = enum.auto()
61 ALREADY_DOWNLOADED = enum.auto()
62
dbdb1782 63
dd8c35f4
OM
64def strip_ws(value):
65 """ Remove whitespace from a string """
66 return str(NO_WHITESPACE_REGEX.sub('-', value))
975060c9 67
dbdb1782 68
cef8aa7a
OM
69def strip_invalid_chars(value):
70 """
71 Normalizes string, converts to lowercase, removes non-alpha characters.
72 """
73 return unicodedata.normalize('NFKD', value).encode(
74 'ascii', 'ignore').decode()
75
76
975060c9
OM
77def slugify(value):
78 """
79 Normalizes string, converts to lowercase, removes non-alpha characters,
80 and converts spaces to hyphens.
81 """
cef8aa7a 82 value = strip_invalid_chars(value)
975060c9 83 value = str(re.sub(r'[^\w\s-]', '', value).strip())
cef8aa7a 84 value = strip_ws(value)
975060c9
OM
85 return value
86
b497d705
OM
87class PageChecker(object):
88 def __init__(self):
89 self.log = []
90 self.title = None
91 self.file_count = None
92 self.files = None
fb28c59b
OM
93 self.images = None
94 self.license = None
b497d705
OM
95
96
97 def __call__(self, _):
98 try:
99 self.log.append("call")
100 if self.title is None:
101 # first find the name
102 name = EC._find_element(BROWSER, (By.CSS_SELECTOR, "[class^=ThingPage__modelName]"))
103 if name is None:
104 return False
105 self.title = name.text
106
107 if self.file_count is None:
108 # OK. Do we know how many files we have to download?
109 metrics = EC._find_elements(BROWSER, (By.CSS_SELECTOR, "[class^=MetricButton]"))
110 self.log.append("got some metrics: {}".format(len(metrics)))
111 cur_count = int([x.text.split("\n")[0] for x in metrics if x.text.endswith("\nThing Files")][0])
112 self.log.append(cur_count)
113 if cur_count == 0:
114 return False
115 self.file_count = cur_count
116
117 self.log.append("looking for {} files".format(self.file_count))
118 fileRows = EC._find_elements(BROWSER, (By.CSS_SELECTOR, "[class^=ThingFile__fileRow]"))
119 self.log.append("found {} files".format(len(fileRows)))
fb28c59b
OM
120 if len(fileRows) < self.file_count:
121 return False
122
123 self.log.append("Looking for images")
124 # By this point _should_ have loaded all the images
125 self.images = EC._find_elements(BROWSER, (By.CSS_SELECTOR, "[class^=thumb]"))
126 self.license = EC._find_element(BROWSER, (By.CSS_SELECTOR, "[class^=License__licenseText]")).text
127 self.log.append("found {} images".format(len(self.images)))
128 self.files = fileRows
129 return True
b497d705
OM
130 except Exception:
131 return False
132
133
134
135
6a777954
OM
136class Downloader(multiprocessing.Process):
137 """
138 Class to handle downloading the things we have found to get.
139 """
140
141 def __init__(self, thing_queue, download_directory):
142 multiprocessing.Process.__init__(self)
143 # TODO: add parameters
144 self.thing_queue = thing_queue
145 self.download_directory = download_directory
146
147 def run(self):
148 """ actual download loop.
149 """
150 while True:
151 thing_id = self.thing_queue.get()
152 if thing_id is None:
153 logging.info("Shutting download queue")
154 self.thing_queue.task_done()
155 break
156 logging.info("Handling id {}".format(thing_id))
157 Thing(thing_id).download(self.download_directory)
158 self.thing_queue.task_done()
159 return
160
7b84ba6d 161
6a777954
OM
162
163
dbdb1782 164
3522a3bf 165class Grouping:
d66f1f78 166 """ Holds details of a group of things for download
3c82f75b
OM
167 This is effectively (although not actually) an abstract class
168 - use Collection or Designs instead.
169 """
dbdb1782 170
7b84ba6d 171 def __init__(self, quick):
975060c9
OM
172 self.things = []
173 self.total = 0
174 self.req_id = None
175 self.last_page = 0
176 self.per_page = None
7b84ba6d
OM
177 # Should we stop downloading when we hit a known datestamp?
178 self.quick = quick
948bd56f 179 # These should be set by child classes.
3522a3bf
OM
180 self.url = None
181 self.download_dir = None
948bd56f 182 self.collection_url = None
975060c9 183
3522a3bf
OM
184 def _get_small_grouping(self, req):
185 """ Handle small groupings """
975060c9 186 soup = BeautifulSoup(req.text, features='lxml')
dbdb1782 187 links = soup.find_all('a', {'class': 'card-img-holder'})
975060c9 188 self.things = [x['href'].split(':')[1] for x in links]
fa2f3251 189 self.total = len(self.things)
975060c9
OM
190
191 return self.things
192
3522a3bf
OM
193 def get(self):
194 """ retrieve the things of the grouping. """
975060c9
OM
195 if self.things:
196 # We've already done it.
197 return self.things
198
3522a3bf
OM
199 # Check for initialisation:
200 if not self.url:
fa2f3251 201 logging.error("No URL set - object not initialised properly?")
3522a3bf
OM
202 raise ValueError("No URL set - object not initialised properly?")
203
204 # Get the internal details of the grouping.
fa2f3251 205 logging.debug("Querying {}".format(self.url))
3522a3bf 206 c_req = requests.get(self.url)
975060c9
OM
207 total = TOTAL_REGEX.search(c_req.text)
208 if total is None:
3522a3bf
OM
209 # This is a small (<13) items grouping. Pull the list from this req.
210 return self._get_small_grouping(c_req)
975060c9
OM
211 self.total = total.groups()[0]
212 self.req_id = ID_REGEX.search(c_req.text).groups()[0]
213 self.last_page = int(LAST_PAGE_REGEX.search(c_req.text).groups()[0])
214 self.per_page = PER_PAGE_REGEX.search(c_req.text).groups()[0]
215 parameters = {
dbdb1782
OM
216 'base_url': self.url,
217 'page': '1',
218 'per_page': '12',
219 'id': self.req_id
975060c9
OM
220 }
221 for current_page in range(1, self.last_page + 1):
222 parameters['page'] = current_page
948bd56f 223 req = requests.post(self.collection_url, parameters)
975060c9 224 soup = BeautifulSoup(req.text, features='lxml')
dbdb1782 225 links = soup.find_all('a', {'class': 'card-img-holder'})
975060c9
OM
226 self.things += [x['href'].split(':')[1] for x in links]
227
228 return self.things
229
230 def download(self):
231 """ Downloads all the files in a collection """
232 if not self.things:
3522a3bf
OM
233 self.get()
234
235 if not self.download_dir:
dbdb1782
OM
236 raise ValueError(
237 "No download_dir set - invalidly initialised object?")
3522a3bf 238
975060c9 239 base_dir = os.getcwd()
975060c9 240 try:
3522a3bf 241 os.mkdir(self.download_dir)
975060c9 242 except FileExistsError:
fa2f3251 243 logging.info("Target directory {} already exists. Assuming a resume."
dbdb1782 244 .format(self.download_dir))
fa2f3251 245 logging.info("Downloading {} thing(s).".format(self.total))
dbdb1782 246 for idx, thing in enumerate(self.things):
fb28c59b 247 logging.info("Downloading thing {} - {}".format(idx, thing))
7b84ba6d
OM
248 RC = Thing(thing).download(self.download_dir)
249 if self.quick and RC==State.ALREADY_DOWNLOADED:
250 logging.info("Caught up, stopping.")
251 return
975060c9 252
dbdb1782 253
3522a3bf
OM
254class Collection(Grouping):
255 """ Holds details of a collection. """
dbdb1782 256
7b84ba6d
OM
257 def __init__(self, user, name, directory, quick):
258 Grouping.__init__(self, quick)
3522a3bf
OM
259 self.user = user
260 self.name = name
3c82f75b
OM
261 self.url = "{}/{}/collections/{}".format(
262 URL_BASE, self.user, strip_ws(self.name))
d66f1f78 263 self.download_dir = os.path.join(directory,
3c82f75b 264 "{}-{}".format(slugify(self.user), slugify(self.name)))
948bd56f 265 self.collection_url = URL_COLLECTION
3522a3bf 266
dbdb1782 267
3522a3bf
OM
268class Designs(Grouping):
269 """ Holds details of all of a users' designs. """
dbdb1782 270
7b84ba6d
OM
271 def __init__(self, user, directory, quick):
272 Grouping.__init__(self, quick)
3522a3bf
OM
273 self.user = user
274 self.url = "{}/{}/designs".format(URL_BASE, self.user)
dbdb1782
OM
275 self.download_dir = os.path.join(
276 directory, "{} designs".format(slugify(self.user)))
948bd56f 277 self.collection_url = USER_COLLECTION
975060c9 278
dbdb1782 279
3c82f75b
OM
280class Thing:
281 """ An individual design on thingiverse. """
dbdb1782 282
3c82f75b
OM
283 def __init__(self, thing_id):
284 self.thing_id = thing_id
285 self.last_time = None
286 self._parsed = False
287 self._needs_download = True
288 self.text = None
289 self.title = None
290 self.download_dir = None
975060c9 291
3c82f75b
OM
292 def _parse(self, base_dir):
293 """ Work out what, if anything needs to be done. """
294 if self._parsed:
295 return
e36c2a07 296
3c82f75b 297 url = "{}/thing:{}/files".format(URL_BASE, self.thing_id)
e0e69fc6 298 try:
b497d705 299 BROWSER.get(url)
fb28c59b 300 wait = WebDriverWait(BROWSER, 60)
b497d705
OM
301 pc = PageChecker()
302 wait.until(pc)
e0e69fc6 303 except requests.exceptions.ConnectionError as error:
8cdd1b54
OM
304 logging.error("Unable to connect for thing {}: {}".format(
305 self.thing_id, error))
306 return
fb28c59b
OM
307 except selenium.common.exceptions.TimeoutException:
308 logging.error(pc.log)
309 logging.error("Timeout trying to parse thing {}".format(self.thing_id))
310 return
e0e69fc6 311
b497d705
OM
312 self.title = pc.title
313 self._file_links=[]
314 for link in pc.files:
fb28c59b 315 logging.debug("Parsing link: {}".format(link.text))
b497d705 316 link_link = link.find_element_by_xpath(".//a").get_attribute("href")
fb28c59b
OM
317 if link_link.endswith("/zip"):
318 # bulk link.
319 continue
320 try:
321 link_title, link_details, _ = link.text.split("\n")
322 except ValueError:
323 # If it is a filetype that doesn't generate a picture, then we get an extra field at the start.
324 _, link_title, link_details, _ = link.text.split("\n")
325
326 #link_details will be something like '461 kb | Updated 06-11-2019 | 373 Downloads'
327 #need to convert from M D Y to Y M D
1267e583 328 link_date = [int(x) for x in link_details.split("|")[1].split()[-1].split("-")]
fb28c59b 329 try:
cef8aa7a 330 self._file_links.append(FileLink(strip_invalid_chars(link_title), datetime.datetime(link_date[2], link_date[0], link_date[1]), link_link))
fb28c59b
OM
331 except ValueError:
332 logging.error(link_date)
333
334 self._image_links=[x.find_element_by_xpath(".//img").get_attribute("src") for x in pc.images]
335 self._license = pc.license
336 self.pc = pc
e0e69fc6 337
e0e69fc6 338
fb28c59b
OM
339 self.old_download_dir = os.path.join(base_dir, slugify(self.title))
340 self.download_dir = os.path.join(base_dir, "{} - {}".format(self.thing_id, slugify(self.title)))
3c82f75b 341
fa2f3251
OM
342 logging.debug("Parsing {} ({})".format(self.thing_id, self.title))
343
3c82f75b 344 if not os.path.exists(self.download_dir):
fb28c59b 345 logging.info("Looking for old dir at {}".format(self.old_download_dir))
3b497b1a 346 if os.path.exists(self.old_download_dir):
fb28c59b
OM
347 logging.warning("Found previous style download directory. Moving it from {} to {}".format(self.old_download_dir, self.download_dir))
348 os.rename(self.old_download_dir, self.download_dir)
3b497b1a
M
349 else:
350 # Not yet downloaded
351 self._parsed = True
352 return
3c82f75b
OM
353
354 timestamp_file = os.path.join(self.download_dir, 'timestamp.txt')
355 if not os.path.exists(timestamp_file):
356 # Old download from before
dbdb1782
OM
357 logging.warning(
358 "Old-style download directory found. Assuming update required.")
3c82f75b
OM
359 self._parsed = True
360 return
361
362 try:
363 with open(timestamp_file, 'r') as timestamp_handle:
b497d705 364 # add the .split(' ')[0] to remove the timestamp from the old style timestamps
fb28c59b
OM
365 last_bits = [int(x) for x in timestamp_handle.readlines()[0].split(' ')[0].split("-")]
366 logging.warning(last_bits)
367 try:
368 self.last_time = datetime.datetime(last_bits[0], last_bits[1], last_bits[2])
369 except ValueError:
370 # This one appears to be M D Y
371 self.last_time = datetime.datetime(last_bits[2], last_bits[0], last_bits[1])
372
fa2f3251 373 logging.info("last downloaded version: {}".format(self.last_time))
3c82f75b
OM
374 except FileNotFoundError:
375 # Not run on this thing before.
dbdb1782
OM
376 logging.info(
377 "Old-style download directory found. Assuming update required.")
3c82f75b 378 self.last_time = None
b497d705 379 self._needs_download = True
3c82f75b
OM
380 self._parsed = True
381 return
382
383 # OK, so we have a timestamp, lets see if there is anything new to get
b497d705
OM
384 for file_link in self._file_links:
385 if file_link.last_update > self.last_time:
dbdb1782 386 logging.info(
fb28c59b 387 "Found new/updated file {} - {}".format(file_link.name, file_link.last_update))
3c82f75b
OM
388 self._needs_download = True
389 self._parsed = True
390 return
b497d705 391
3c82f75b 392 # Got here, so nope, no new files.
3c82f75b
OM
393 self._needs_download = False
394 self._parsed = True
395
396 def download(self, base_dir):
7b84ba6d
OM
397 """ Download all files for a given thing.
398 Returns True iff the thing is now downloaded (not iff it downloads the thing!)
399 """
3c82f75b
OM
400 if not self._parsed:
401 self._parse(base_dir)
402
e0e69fc6 403 if not self._parsed:
8cdd1b54
OM
404 logging.error(
405 "Unable to parse {} - aborting download".format(self.thing_id))
7b84ba6d 406 return State.FAILED
e0e69fc6 407
3c82f75b 408 if not self._needs_download:
7b84ba6d
OM
409 print("{} - {} already downloaded - skipping.".format(self.thing_id, self.title))
410 return State.ALREADY_DOWNLOADED
3c82f75b
OM
411
412 # Have we already downloaded some things?
413 timestamp_file = os.path.join(self.download_dir, 'timestamp.txt')
414 prev_dir = None
415 if os.path.exists(self.download_dir):
416 if not os.path.exists(timestamp_file):
417 # edge case: old style dir w/out timestamp.
fb28c59b 418 logging.warning("Old style download dir found at {}".format(self.title))
4f94efc8
OM
419 prev_count = 0
420 target_dir = "{}_old".format(self.download_dir)
421 while os.path.exists(target_dir):
422 prev_count = prev_count + 1
423 target_dir = "{}_old_{}".format(self.download_dir, prev_count)
424 os.rename(self.download_dir, target_dir)
3c82f75b 425 else:
fb28c59b 426 prev_dir = "{}_{}".format(self.download_dir, slugify(self.last_time.__str__()))
3c82f75b
OM
427 os.rename(self.download_dir, prev_dir)
428
429 # Get the list of files to download
3c82f75b
OM
430
431 new_file_links = []
432 old_file_links = []
433 new_last_time = None
434
435 if not self.last_time:
436 # If we don't have anything to copy from, then it is all new.
b497d705
OM
437 logging.debug("No last time, downloading all files")
438 new_file_links = self._file_links
439 new_last_time = new_file_links[0].last_update
440
441 for file_link in new_file_links:
442 new_last_time = max(new_last_time, file_link.last_update)
443 logging.debug("New timestamp will be {}".format(new_last_time))
3c82f75b 444 else:
b497d705
OM
445 new_last_time = self.last_time
446 for file_link in self._file_links:
447 if file_link.last_update > self.last_time:
3c82f75b 448 new_file_links.append(file_link)
b497d705 449 new_last_time = max(new_last_time, file_link.last_update)
3c82f75b
OM
450 else:
451 old_file_links.append(file_link)
3c82f75b 452
fa2f3251 453 logging.debug("new timestamp {}".format(new_last_time))
3c82f75b
OM
454
455 # OK. Time to get to work.
fa2f3251 456 logging.debug("Generating download_dir")
3c82f75b 457 os.mkdir(self.download_dir)
b497d705
OM
458 filelist_file = os.path.join(self.download_dir, "filelist.txt")
459 with open(filelist_file, 'w') as fl_handle:
460 for fl in self._file_links:
461 base_link = fl.link
462 try:
463 fl.link=requests.get(fl.link, allow_redirects=False).headers['location']
fb28c59b
OM
464 except Exception:
465 # Sometimes Thingiverse just gives us the direct link the first time. Not sure why.
466 pass
b497d705 467
fb28c59b 468 fl_handle.write("{},{},{}, {}\n".format(fl.link, fl.name, fl.last_update, base_link))
b497d705
OM
469
470
3c82f75b 471 # First grab the cached files (if any)
fa2f3251 472 logging.info("Copying {} unchanged files.".format(len(old_file_links)))
3c82f75b 473 for file_link in old_file_links:
b497d705
OM
474 old_file = os.path.join(prev_dir, file_link.name)
475 new_file = os.path.join(self.download_dir, file_link.name)
3c82f75b 476 try:
fa2f3251 477 logging.debug("Copying {} to {}".format(old_file, new_file))
3c82f75b
OM
478 copyfile(old_file, new_file)
479 except FileNotFoundError:
dbdb1782
OM
480 logging.warning(
481 "Unable to find {} in old archive, redownloading".format(file_link["title"]))
3c82f75b
OM
482 new_file_links.append(file_link)
483
484 # Now download the new ones
dbdb1782 485 logging.info("Downloading {} new files of {}".format(
b497d705 486 len(new_file_links), len(self._file_links)))
3c82f75b 487 try:
b497d705
OM
488 for file_link in new_file_links:
489 file_name = os.path.join(self.download_dir, file_link.name)
dbdb1782 490 logging.debug("Downloading {} from {} to {}".format(
b497d705
OM
491 file_link.name, file_link.link, file_name))
492 data_req = requests.get(file_link.link)
3c82f75b
OM
493 with open(file_name, 'wb') as handle:
494 handle.write(data_req.content)
495 except Exception as exception:
b497d705 496 logging.error("Failed to download {} - {}".format(file_link.name, exception))
3c82f75b 497 os.rename(self.download_dir, "{}_failed".format(self.download_dir))
7b84ba6d 498 return State.FAILED
3c82f75b 499
b497d705 500
b497d705 501 # People like images. But this doesn't work yet.
680039fe 502 image_dir = os.path.join(self.download_dir, 'images')
fb28c59b 503 logging.info("Downloading {} images.".format(len(self._image_links)))
680039fe
OM
504 try:
505 os.mkdir(image_dir)
fb28c59b
OM
506 for imagelink in self._image_links:
507 filename = os.path.basename(imagelink)
680039fe
OM
508 if filename.endswith('stl'):
509 filename = "{}.png".format(filename)
fb28c59b 510 image_req = requests.get(imagelink)
680039fe
OM
511 with open(os.path.join(image_dir, filename), 'wb') as handle:
512 handle.write(image_req.content)
513 except Exception as exception:
514 print("Failed to download {} - {}".format(filename, exception))
515 os.rename(self.download_dir, "{}_failed".format(self.download_dir))
7b84ba6d 516 return State.FAILED
680039fe 517
fb28c59b 518 """
4f75dd69
OM
519 # instructions are good too.
520 logging.info("Downloading readme")
521 try:
8cdd1b54
OM
522 readme_txt = soup.find('meta', property='og:description')[
523 'content']
524 with open(os.path.join(self.download_dir, 'readme.txt'), 'w') as readme_handle:
4f75dd69
OM
525 readme_handle.write("{}\n".format(readme_txt))
526 except (TypeError, KeyError) as exception:
527 logging.warning("No readme? {}".format(exception))
528 except IOError as exception:
529 logging.warning("Failed to write readme! {}".format(exception))
530
fb28c59b 531 """
4f75dd69
OM
532 # Best get some licenses
533 logging.info("Downloading license")
534 try:
fb28c59b 535 if self._license:
8cdd1b54 536 with open(os.path.join(self.download_dir, 'license.txt'), 'w') as license_handle:
fb28c59b 537 license_handle.write("{}\n".format(self._license))
4f75dd69
OM
538 except IOError as exception:
539 logging.warning("Failed to write license! {}".format(exception))
fb28c59b 540
3c82f75b
OM
541 try:
542 # Now write the timestamp
543 with open(timestamp_file, 'w') as timestamp_handle:
fb28c59b 544 timestamp_handle.write(new_last_time.__str__())
3c82f75b
OM
545 except Exception as exception:
546 print("Failed to write timestamp file - {}".format(exception))
547 os.rename(self.download_dir, "{}_failed".format(self.download_dir))
7b84ba6d 548 return State.FAILED
3c82f75b 549 self._needs_download = False
fa2f3251 550 logging.debug("Download of {} finished".format(self.title))
7b84ba6d 551 return State.OK
975060c9 552
dbdb1782 553
7b84ba6d 554def do_batch(batch_file, download_dir, quick):
1ab49020
OM
555 """ Read a file in line by line, parsing each as a set of calls to this script."""
556 with open(batch_file) as handle:
557 for line in handle:
558 line = line.strip()
cf280385
M
559 if not line:
560 # Skip empty lines
561 continue
1ab49020
OM
562 logging.info("Handling instruction {}".format(line))
563 command_arr = line.split()
564 if command_arr[0] == "thing":
dbdb1782
OM
565 logging.debug(
566 "Handling batch thing instruction: {}".format(line))
1ab49020
OM
567 Thing(command_arr[1]).download(download_dir)
568 continue
569 if command_arr[0] == "collection":
dbdb1782
OM
570 logging.debug(
571 "Handling batch collection instruction: {}".format(line))
572 Collection(command_arr[1], command_arr[2],
7b84ba6d 573 download_dir, quick).download()
1ab49020
OM
574 continue
575 if command_arr[0] == "user":
dbdb1782
OM
576 logging.debug(
577 "Handling batch collection instruction: {}".format(line))
7b84ba6d 578 Designs(command_arr[1], download_dir, quick).download()
1ab49020
OM
579 continue
580 logging.warning("Unable to parse current instruction. Skipping.")
581
dbdb1782 582
975060c9
OM
583def main():
584 """ Entry point for script being run as a command. """
585 parser = argparse.ArgumentParser()
dbdb1782
OM
586 parser.add_argument("-l", "--log-level", choices=[
587 'debug', 'info', 'warning'], default='info', help="level of logging desired")
588 parser.add_argument("-d", "--directory",
589 help="Target directory to download into")
4f94efc8
OM
590 parser.add_argument("-f", "--log-file",
591 help="Place to log debug information to")
7b84ba6d
OM
592 parser.add_argument("-q", "--quick", action="store_true",
593 help="Assume date ordering on posts")
594
dbdb1782
OM
595 subparsers = parser.add_subparsers(
596 help="Type of thing to download", dest="subcommand")
597 collection_parser = subparsers.add_parser(
b7bfef68 598 'collection', help="Download one or more entire collection(s)")
dbdb1782 599 collection_parser.add_argument(
b7bfef68 600 "owner", help="The owner of the collection(s) to get")
dbdb1782 601 collection_parser.add_argument(
b7bfef68 602 "collections", nargs="+", help="Space seperated list of the name(s) of collection to get")
dbdb1782
OM
603 thing_parser = subparsers.add_parser(
604 'thing', help="Download a single thing.")
8cdd1b54
OM
605 thing_parser.add_argument(
606 "things", nargs="*", help="Space seperated list of thing ID(s) to download")
dbdb1782 607 user_parser = subparsers.add_parser(
b7bfef68 608 "user", help="Download all things by one or more users")
8cdd1b54
OM
609 user_parser.add_argument(
610 "users", nargs="+", help="A space seperated list of the user(s) to get the designs of")
dbdb1782
OM
611 batch_parser = subparsers.add_parser(
612 "batch", help="Perform multiple actions written in a text file")
613 batch_parser.add_argument(
614 "batch_file", help="The name of the file to read.")
680039fe 615 subparsers.add_parser("version", help="Show the current version")
4a98996b 616
975060c9 617 args = parser.parse_args()
4a98996b
OM
618 if not args.subcommand:
619 parser.print_help()
620 sys.exit(1)
d66f1f78
OM
621 if not args.directory:
622 args.directory = os.getcwd()
4f94efc8
OM
623
624 logger = logging.getLogger()
625 formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
626 logger.setLevel(logging.DEBUG)
627 console_handler = logging.StreamHandler()
628 console_handler.setLevel(args.log_level.upper())
629
630 logger.addHandler(console_handler)
631 if args.log_file:
632 file_handler = logging.FileHandler(args.log_file)
633 file_handler.setLevel(logging.DEBUG)
634 file_handler.setFormatter(formatter)
635 logger.addHandler(file_handler)
fa2f3251 636
6a777954
OM
637
638 # Start downloader
639 thing_queue = multiprocessing.JoinableQueue()
640 logging.debug("starting {} downloader(s)".format(DOWNLOADER_COUNT))
641 downloaders = [Downloader(thing_queue, args.directory) for _ in range(DOWNLOADER_COUNT)]
642 for downloader in downloaders:
643 downloader.start()
644
645
4a98996b 646 if args.subcommand.startswith("collection"):
b7bfef68 647 for collection in args.collections:
7b84ba6d 648 Collection(args.owner, collection, args.directory, args.quick).download()
4a98996b 649 if args.subcommand == "thing":
b7bfef68 650 for thing in args.things:
6a777954 651 thing_queue.put(thing)
3522a3bf 652 if args.subcommand == "user":
b7bfef68 653 for user in args.users:
7b84ba6d 654 Designs(user, args.directory, args.quick).download()
db8066ec
OM
655 if args.subcommand == "version":
656 print("thingy_grabber.py version {}".format(VERSION))
1ab49020 657 if args.subcommand == "batch":
7b84ba6d 658 do_batch(args.batch_file, args.directory, args.quick)
1ab49020 659
6a777954
OM
660 # Stop the downloader processes
661 for downloader in downloaders:
662 thing_queue.put(None)
975060c9 663
0930777e
OM
664if __name__ == "__main__":
665 multiprocessing.freeze_support()
975060c9 666 main()