Deal with thingiverse returning no files for a thing.
[clinton/thingy_grabber.git] / thingy_grabber.py
CommitLineData
975060c9
OM
1#!/usr/bin/env python3
2"""
3Thingiverse bulk downloader
4"""
5
6import re
4a98996b 7import sys
975060c9
OM
8import os
9import argparse
10import unicodedata
11import requests
fa2f3251 12import logging
6a777954 13import multiprocessing
7b84ba6d 14import enum
fb28c59b 15import datetime
3c82f75b 16from shutil import copyfile
975060c9 17from bs4 import BeautifulSoup
b497d705
OM
18from dataclasses import dataclass
19import selenium
20from selenium import webdriver
21from selenium.webdriver.common.by import By
22from selenium.webdriver.support.ui import WebDriverWait
23from selenium.webdriver.support import expected_conditions as EC
24from selenium.webdriver.firefox.options import Options
d194b140 25import atexit
975060c9
OM
26
27URL_BASE = "https://www.thingiverse.com"
28URL_COLLECTION = URL_BASE + "/ajax/thingcollection/list_collected_things"
948bd56f 29USER_COLLECTION = URL_BASE + "/ajax/user/designs"
975060c9
OM
30
31ID_REGEX = re.compile(r'"id":(\d*),')
32TOTAL_REGEX = re.compile(r'"total":(\d*),')
33LAST_PAGE_REGEX = re.compile(r'"last_page":(\d*),')
34# This appears to be fixed at 12, but if it changes would screw the rest up.
35PER_PAGE_REGEX = re.compile(r'"per_page":(\d*),')
dd8c35f4
OM
36NO_WHITESPACE_REGEX = re.compile(r'[-\s]+')
37
6a777954 38DOWNLOADER_COUNT = 1
7b84ba6d 39RETRY_COUNT = 3
6a777954 40
65bd8b43
OM
41MAX_PATH_LENGTH = 250
42
247c2cd5 43VERSION = "0.8.6"
dbdb1782 44
b497d705
OM
45
46#BROWSER = webdriver.PhantomJS('./phantomjs')
47options = Options()
fb28c59b 48options.add_argument("--headless")
b497d705
OM
49BROWSER = webdriver.Firefox(options=options)
50
51BROWSER.set_window_size(1980, 1080)
52
53
54@dataclass
55class FileLink:
56 name: str
57 last_update: str
fb28c59b 58 link: datetime.datetime
b497d705
OM
59
60
7b84ba6d
OM
61class State(enum.Enum):
62 OK = enum.auto()
63 FAILED = enum.auto()
64 ALREADY_DOWNLOADED = enum.auto()
65
dbdb1782 66
65bd8b43
OM
67def fail_dir(dir_name):
68 """ When a download has failed, move it sideways.
69 """
70 target_dir = "{}_failed".format(dir_name)
71 inc = 0
72 while os.path.exists(target_dir):
73 target_dir = "{}_failed_{}".format(dir_name, inc)
74 inc += 1
75 os.rename(dir_name, target_dir)
76
77
78def truncate_name(file_name):
79 """ Ensure the filename is not too long for, well windows basically.
80 """
81 path = os.path.abspath(file_name)
82 if len(path) <= MAX_PATH_LENGTH:
83 return path
84 to_cut = len(path) - (MAX_PATH_LENGTH + 3)
85 base, extension = os.path.splitext(path)
86 inc = 0
87 new_path = "{}_{}{}".format(base, inc, extension)
88 while os.path.exists(new_path):
89 new_path = "{}_{}{}".format(base, inc, extension)
90 inc += 1
91 return new_path
92
93
dd8c35f4
OM
94def strip_ws(value):
95 """ Remove whitespace from a string """
96 return str(NO_WHITESPACE_REGEX.sub('-', value))
975060c9 97
dbdb1782 98
975060c9
OM
99def slugify(value):
100 """
d194b140
OM
101 Normalise string, removes invalid for filename charactersr
102 and converts string to lowercase.
975060c9 103 """
d194b140 104 value = unicodedata.normalize('NFKC', value).lower().strip()
65bd8b43
OM
105 value = re.sub(r'[\\/<>:\?\*\|"]', '', value)
106 value = re.sub(r'\.*$', '', value)
107 return value
975060c9 108
b497d705
OM
109class PageChecker(object):
110 def __init__(self):
111 self.log = []
112 self.title = None
113 self.file_count = None
114 self.files = None
fb28c59b
OM
115 self.images = None
116 self.license = None
b497d705
OM
117
118
119 def __call__(self, _):
120 try:
121 self.log.append("call")
122 if self.title is None:
123 # first find the name
124 name = EC._find_element(BROWSER, (By.CSS_SELECTOR, "[class^=ThingPage__modelName]"))
125 if name is None:
126 return False
127 self.title = name.text
128
129 if self.file_count is None:
130 # OK. Do we know how many files we have to download?
131 metrics = EC._find_elements(BROWSER, (By.CSS_SELECTOR, "[class^=MetricButton]"))
132 self.log.append("got some metrics: {}".format(len(metrics)))
133 cur_count = int([x.text.split("\n")[0] for x in metrics if x.text.endswith("\nThing Files")][0])
134 self.log.append(cur_count)
135 if cur_count == 0:
136 return False
137 self.file_count = cur_count
138
139 self.log.append("looking for {} files".format(self.file_count))
140 fileRows = EC._find_elements(BROWSER, (By.CSS_SELECTOR, "[class^=ThingFile__fileRow]"))
141 self.log.append("found {} files".format(len(fileRows)))
fb28c59b
OM
142 if len(fileRows) < self.file_count:
143 return False
144
145 self.log.append("Looking for images")
146 # By this point _should_ have loaded all the images
147 self.images = EC._find_elements(BROWSER, (By.CSS_SELECTOR, "[class^=thumb]"))
148 self.license = EC._find_element(BROWSER, (By.CSS_SELECTOR, "[class^=License__licenseText]")).text
149 self.log.append("found {} images".format(len(self.images)))
150 self.files = fileRows
151 return True
b497d705
OM
152 except Exception:
153 return False
154
155
156
157
6a777954
OM
158class Downloader(multiprocessing.Process):
159 """
160 Class to handle downloading the things we have found to get.
161 """
162
163 def __init__(self, thing_queue, download_directory):
164 multiprocessing.Process.__init__(self)
165 # TODO: add parameters
166 self.thing_queue = thing_queue
167 self.download_directory = download_directory
168
169 def run(self):
170 """ actual download loop.
171 """
172 while True:
173 thing_id = self.thing_queue.get()
174 if thing_id is None:
175 logging.info("Shutting download queue")
176 self.thing_queue.task_done()
177 break
178 logging.info("Handling id {}".format(thing_id))
179 Thing(thing_id).download(self.download_directory)
180 self.thing_queue.task_done()
181 return
182
7b84ba6d 183
6a777954
OM
184
185
dbdb1782 186
3522a3bf 187class Grouping:
d66f1f78 188 """ Holds details of a group of things for download
3c82f75b
OM
189 This is effectively (although not actually) an abstract class
190 - use Collection or Designs instead.
191 """
dbdb1782 192
7b84ba6d 193 def __init__(self, quick):
975060c9
OM
194 self.things = []
195 self.total = 0
196 self.req_id = None
197 self.last_page = 0
198 self.per_page = None
7b84ba6d
OM
199 # Should we stop downloading when we hit a known datestamp?
200 self.quick = quick
948bd56f 201 # These should be set by child classes.
3522a3bf
OM
202 self.url = None
203 self.download_dir = None
948bd56f 204 self.collection_url = None
975060c9 205
3522a3bf
OM
206 def _get_small_grouping(self, req):
207 """ Handle small groupings """
975060c9 208 soup = BeautifulSoup(req.text, features='lxml')
dbdb1782 209 links = soup.find_all('a', {'class': 'card-img-holder'})
975060c9 210 self.things = [x['href'].split(':')[1] for x in links]
fa2f3251 211 self.total = len(self.things)
975060c9
OM
212
213 return self.things
214
3522a3bf
OM
215 def get(self):
216 """ retrieve the things of the grouping. """
975060c9
OM
217 if self.things:
218 # We've already done it.
219 return self.things
220
3522a3bf
OM
221 # Check for initialisation:
222 if not self.url:
fa2f3251 223 logging.error("No URL set - object not initialised properly?")
3522a3bf
OM
224 raise ValueError("No URL set - object not initialised properly?")
225
226 # Get the internal details of the grouping.
fa2f3251 227 logging.debug("Querying {}".format(self.url))
3522a3bf 228 c_req = requests.get(self.url)
975060c9
OM
229 total = TOTAL_REGEX.search(c_req.text)
230 if total is None:
3522a3bf
OM
231 # This is a small (<13) items grouping. Pull the list from this req.
232 return self._get_small_grouping(c_req)
975060c9
OM
233 self.total = total.groups()[0]
234 self.req_id = ID_REGEX.search(c_req.text).groups()[0]
235 self.last_page = int(LAST_PAGE_REGEX.search(c_req.text).groups()[0])
236 self.per_page = PER_PAGE_REGEX.search(c_req.text).groups()[0]
237 parameters = {
dbdb1782
OM
238 'base_url': self.url,
239 'page': '1',
240 'per_page': '12',
241 'id': self.req_id
975060c9
OM
242 }
243 for current_page in range(1, self.last_page + 1):
244 parameters['page'] = current_page
948bd56f 245 req = requests.post(self.collection_url, parameters)
975060c9 246 soup = BeautifulSoup(req.text, features='lxml')
dbdb1782 247 links = soup.find_all('a', {'class': 'card-img-holder'})
975060c9
OM
248 self.things += [x['href'].split(':')[1] for x in links]
249
250 return self.things
251
252 def download(self):
253 """ Downloads all the files in a collection """
254 if not self.things:
3522a3bf
OM
255 self.get()
256
257 if not self.download_dir:
dbdb1782
OM
258 raise ValueError(
259 "No download_dir set - invalidly initialised object?")
3522a3bf 260
975060c9 261 base_dir = os.getcwd()
975060c9 262 try:
3522a3bf 263 os.mkdir(self.download_dir)
975060c9 264 except FileExistsError:
fa2f3251 265 logging.info("Target directory {} already exists. Assuming a resume."
dbdb1782 266 .format(self.download_dir))
fa2f3251 267 logging.info("Downloading {} thing(s).".format(self.total))
dbdb1782 268 for idx, thing in enumerate(self.things):
fb28c59b 269 logging.info("Downloading thing {} - {}".format(idx, thing))
7b84ba6d
OM
270 RC = Thing(thing).download(self.download_dir)
271 if self.quick and RC==State.ALREADY_DOWNLOADED:
272 logging.info("Caught up, stopping.")
273 return
975060c9 274
dbdb1782 275
3522a3bf
OM
276class Collection(Grouping):
277 """ Holds details of a collection. """
dbdb1782 278
7b84ba6d
OM
279 def __init__(self, user, name, directory, quick):
280 Grouping.__init__(self, quick)
3522a3bf
OM
281 self.user = user
282 self.name = name
3c82f75b
OM
283 self.url = "{}/{}/collections/{}".format(
284 URL_BASE, self.user, strip_ws(self.name))
d66f1f78 285 self.download_dir = os.path.join(directory,
3c82f75b 286 "{}-{}".format(slugify(self.user), slugify(self.name)))
948bd56f 287 self.collection_url = URL_COLLECTION
3522a3bf 288
dbdb1782 289
3522a3bf
OM
290class Designs(Grouping):
291 """ Holds details of all of a users' designs. """
dbdb1782 292
7b84ba6d
OM
293 def __init__(self, user, directory, quick):
294 Grouping.__init__(self, quick)
3522a3bf
OM
295 self.user = user
296 self.url = "{}/{}/designs".format(URL_BASE, self.user)
dbdb1782
OM
297 self.download_dir = os.path.join(
298 directory, "{} designs".format(slugify(self.user)))
948bd56f 299 self.collection_url = USER_COLLECTION
975060c9 300
dbdb1782 301
3c82f75b
OM
302class Thing:
303 """ An individual design on thingiverse. """
dbdb1782 304
3c82f75b
OM
305 def __init__(self, thing_id):
306 self.thing_id = thing_id
307 self.last_time = None
308 self._parsed = False
309 self._needs_download = True
310 self.text = None
311 self.title = None
312 self.download_dir = None
975060c9 313
3c82f75b
OM
314 def _parse(self, base_dir):
315 """ Work out what, if anything needs to be done. """
316 if self._parsed:
317 return
e36c2a07 318
3c82f75b 319 url = "{}/thing:{}/files".format(URL_BASE, self.thing_id)
e0e69fc6 320 try:
b497d705 321 BROWSER.get(url)
fb28c59b 322 wait = WebDriverWait(BROWSER, 60)
b497d705
OM
323 pc = PageChecker()
324 wait.until(pc)
e0e69fc6 325 except requests.exceptions.ConnectionError as error:
8cdd1b54
OM
326 logging.error("Unable to connect for thing {}: {}".format(
327 self.thing_id, error))
328 return
fb28c59b
OM
329 except selenium.common.exceptions.TimeoutException:
330 logging.error(pc.log)
331 logging.error("Timeout trying to parse thing {}".format(self.thing_id))
332 return
e0e69fc6 333
b497d705
OM
334 self.title = pc.title
335 self._file_links=[]
247c2cd5
OM
336 if not pc.files:
337 logging.error("No files found for thing {} - probably thingiverse being broken, try again later".format(self.thing_id))
b497d705 338 for link in pc.files:
fb28c59b 339 logging.debug("Parsing link: {}".format(link.text))
b497d705 340 link_link = link.find_element_by_xpath(".//a").get_attribute("href")
fb28c59b
OM
341 if link_link.endswith("/zip"):
342 # bulk link.
343 continue
344 try:
345 link_title, link_details, _ = link.text.split("\n")
346 except ValueError:
347 # If it is a filetype that doesn't generate a picture, then we get an extra field at the start.
348 _, link_title, link_details, _ = link.text.split("\n")
349
350 #link_details will be something like '461 kb | Updated 06-11-2019 | 373 Downloads'
351 #need to convert from M D Y to Y M D
1267e583 352 link_date = [int(x) for x in link_details.split("|")[1].split()[-1].split("-")]
fb28c59b 353 try:
d194b140 354 self._file_links.append(FileLink(link_title, datetime.datetime(link_date[2], link_date[0], link_date[1]), link_link))
fb28c59b
OM
355 except ValueError:
356 logging.error(link_date)
357
358 self._image_links=[x.find_element_by_xpath(".//img").get_attribute("src") for x in pc.images]
d194b140 359 self._license = pc.license
fb28c59b 360 self.pc = pc
e0e69fc6 361
e0e69fc6 362
fb28c59b
OM
363 self.old_download_dir = os.path.join(base_dir, slugify(self.title))
364 self.download_dir = os.path.join(base_dir, "{} - {}".format(self.thing_id, slugify(self.title)))
3c82f75b 365
fa2f3251
OM
366 logging.debug("Parsing {} ({})".format(self.thing_id, self.title))
367
3c82f75b 368 if not os.path.exists(self.download_dir):
fb28c59b 369 logging.info("Looking for old dir at {}".format(self.old_download_dir))
3b497b1a 370 if os.path.exists(self.old_download_dir):
fb28c59b
OM
371 logging.warning("Found previous style download directory. Moving it from {} to {}".format(self.old_download_dir, self.download_dir))
372 os.rename(self.old_download_dir, self.download_dir)
3b497b1a
M
373 else:
374 # Not yet downloaded
375 self._parsed = True
376 return
3c82f75b
OM
377
378 timestamp_file = os.path.join(self.download_dir, 'timestamp.txt')
379 if not os.path.exists(timestamp_file):
380 # Old download from before
dbdb1782
OM
381 logging.warning(
382 "Old-style download directory found. Assuming update required.")
3c82f75b
OM
383 self._parsed = True
384 return
385
386 try:
387 with open(timestamp_file, 'r') as timestamp_handle:
b497d705 388 # add the .split(' ')[0] to remove the timestamp from the old style timestamps
fb28c59b
OM
389 last_bits = [int(x) for x in timestamp_handle.readlines()[0].split(' ')[0].split("-")]
390 logging.warning(last_bits)
391 try:
392 self.last_time = datetime.datetime(last_bits[0], last_bits[1], last_bits[2])
393 except ValueError:
394 # This one appears to be M D Y
395 self.last_time = datetime.datetime(last_bits[2], last_bits[0], last_bits[1])
396
fa2f3251 397 logging.info("last downloaded version: {}".format(self.last_time))
3c82f75b
OM
398 except FileNotFoundError:
399 # Not run on this thing before.
dbdb1782
OM
400 logging.info(
401 "Old-style download directory found. Assuming update required.")
3c82f75b 402 self.last_time = None
b497d705 403 self._needs_download = True
3c82f75b
OM
404 self._parsed = True
405 return
406
407 # OK, so we have a timestamp, lets see if there is anything new to get
b497d705
OM
408 for file_link in self._file_links:
409 if file_link.last_update > self.last_time:
dbdb1782 410 logging.info(
fb28c59b 411 "Found new/updated file {} - {}".format(file_link.name, file_link.last_update))
3c82f75b
OM
412 self._needs_download = True
413 self._parsed = True
414 return
b497d705 415
3c82f75b 416 # Got here, so nope, no new files.
3c82f75b
OM
417 self._needs_download = False
418 self._parsed = True
419
420 def download(self, base_dir):
7b84ba6d
OM
421 """ Download all files for a given thing.
422 Returns True iff the thing is now downloaded (not iff it downloads the thing!)
423 """
3c82f75b
OM
424 if not self._parsed:
425 self._parse(base_dir)
426
e0e69fc6 427 if not self._parsed:
8cdd1b54
OM
428 logging.error(
429 "Unable to parse {} - aborting download".format(self.thing_id))
7b84ba6d 430 return State.FAILED
e0e69fc6 431
3c82f75b 432 if not self._needs_download:
7b84ba6d
OM
433 print("{} - {} already downloaded - skipping.".format(self.thing_id, self.title))
434 return State.ALREADY_DOWNLOADED
3c82f75b 435
247c2cd5
OM
436 if not self._file_links:
437 print("{} - {} appears to have no files. Thingiverse acting up again?".format(self.thing_id, self.title))
438 return State.FAILED
439
3c82f75b
OM
440 # Have we already downloaded some things?
441 timestamp_file = os.path.join(self.download_dir, 'timestamp.txt')
442 prev_dir = None
443 if os.path.exists(self.download_dir):
444 if not os.path.exists(timestamp_file):
445 # edge case: old style dir w/out timestamp.
fb28c59b 446 logging.warning("Old style download dir found at {}".format(self.title))
4f94efc8
OM
447 prev_count = 0
448 target_dir = "{}_old".format(self.download_dir)
449 while os.path.exists(target_dir):
450 prev_count = prev_count + 1
451 target_dir = "{}_old_{}".format(self.download_dir, prev_count)
452 os.rename(self.download_dir, target_dir)
3c82f75b 453 else:
fb28c59b 454 prev_dir = "{}_{}".format(self.download_dir, slugify(self.last_time.__str__()))
3c82f75b
OM
455 os.rename(self.download_dir, prev_dir)
456
457 # Get the list of files to download
3c82f75b
OM
458
459 new_file_links = []
460 old_file_links = []
461 new_last_time = None
462
463 if not self.last_time:
464 # If we don't have anything to copy from, then it is all new.
b497d705
OM
465 logging.debug("No last time, downloading all files")
466 new_file_links = self._file_links
467 new_last_time = new_file_links[0].last_update
468
469 for file_link in new_file_links:
470 new_last_time = max(new_last_time, file_link.last_update)
471 logging.debug("New timestamp will be {}".format(new_last_time))
3c82f75b 472 else:
b497d705
OM
473 new_last_time = self.last_time
474 for file_link in self._file_links:
475 if file_link.last_update > self.last_time:
3c82f75b 476 new_file_links.append(file_link)
b497d705 477 new_last_time = max(new_last_time, file_link.last_update)
3c82f75b
OM
478 else:
479 old_file_links.append(file_link)
3c82f75b 480
fa2f3251 481 logging.debug("new timestamp {}".format(new_last_time))
3c82f75b
OM
482
483 # OK. Time to get to work.
fa2f3251 484 logging.debug("Generating download_dir")
3c82f75b 485 os.mkdir(self.download_dir)
b497d705 486 filelist_file = os.path.join(self.download_dir, "filelist.txt")
d194b140 487 with open(filelist_file, 'w', encoding="utf-8") as fl_handle:
b497d705
OM
488 for fl in self._file_links:
489 base_link = fl.link
490 try:
491 fl.link=requests.get(fl.link, allow_redirects=False).headers['location']
fb28c59b
OM
492 except Exception:
493 # Sometimes Thingiverse just gives us the direct link the first time. Not sure why.
494 pass
b497d705 495
fb28c59b 496 fl_handle.write("{},{},{}, {}\n".format(fl.link, fl.name, fl.last_update, base_link))
b497d705
OM
497
498
3c82f75b 499 # First grab the cached files (if any)
fa2f3251 500 logging.info("Copying {} unchanged files.".format(len(old_file_links)))
3c82f75b 501 for file_link in old_file_links:
b497d705 502 old_file = os.path.join(prev_dir, file_link.name)
65bd8b43 503 new_file = truncate_name(os.path.join(self.download_dir, file_link.name))
3c82f75b 504 try:
fa2f3251 505 logging.debug("Copying {} to {}".format(old_file, new_file))
3c82f75b
OM
506 copyfile(old_file, new_file)
507 except FileNotFoundError:
dbdb1782
OM
508 logging.warning(
509 "Unable to find {} in old archive, redownloading".format(file_link["title"]))
3c82f75b
OM
510 new_file_links.append(file_link)
511
512 # Now download the new ones
dbdb1782 513 logging.info("Downloading {} new files of {}".format(
b497d705 514 len(new_file_links), len(self._file_links)))
3c82f75b 515 try:
b497d705 516 for file_link in new_file_links:
65bd8b43 517 file_name = truncate_name(os.path.join(self.download_dir, file_link.name))
dbdb1782 518 logging.debug("Downloading {} from {} to {}".format(
b497d705
OM
519 file_link.name, file_link.link, file_name))
520 data_req = requests.get(file_link.link)
3c82f75b
OM
521 with open(file_name, 'wb') as handle:
522 handle.write(data_req.content)
523 except Exception as exception:
b497d705 524 logging.error("Failed to download {} - {}".format(file_link.name, exception))
65bd8b43 525 fail_dir(self.download_dir)
7b84ba6d 526 return State.FAILED
3c82f75b 527
b497d705 528
b497d705 529 # People like images. But this doesn't work yet.
680039fe 530 image_dir = os.path.join(self.download_dir, 'images')
fb28c59b 531 logging.info("Downloading {} images.".format(len(self._image_links)))
680039fe
OM
532 try:
533 os.mkdir(image_dir)
fb28c59b
OM
534 for imagelink in self._image_links:
535 filename = os.path.basename(imagelink)
680039fe
OM
536 if filename.endswith('stl'):
537 filename = "{}.png".format(filename)
fb28c59b 538 image_req = requests.get(imagelink)
65bd8b43 539 with open(truncate_name(os.path.join(image_dir, filename)), 'wb') as handle:
680039fe
OM
540 handle.write(image_req.content)
541 except Exception as exception:
542 print("Failed to download {} - {}".format(filename, exception))
65bd8b43 543 fail_dir(self.download_dir)
7b84ba6d 544 return State.FAILED
680039fe 545
fb28c59b 546 """
4f75dd69
OM
547 # instructions are good too.
548 logging.info("Downloading readme")
549 try:
8cdd1b54
OM
550 readme_txt = soup.find('meta', property='og:description')[
551 'content']
552 with open(os.path.join(self.download_dir, 'readme.txt'), 'w') as readme_handle:
4f75dd69
OM
553 readme_handle.write("{}\n".format(readme_txt))
554 except (TypeError, KeyError) as exception:
555 logging.warning("No readme? {}".format(exception))
556 except IOError as exception:
557 logging.warning("Failed to write readme! {}".format(exception))
558
fb28c59b 559 """
4f75dd69
OM
560 # Best get some licenses
561 logging.info("Downloading license")
562 try:
fb28c59b 563 if self._license:
65bd8b43 564 with open(truncate_name(os.path.join(self.download_dir, 'license.txt')), 'w', encoding="utf-8") as license_handle:
fb28c59b 565 license_handle.write("{}\n".format(self._license))
4f75dd69
OM
566 except IOError as exception:
567 logging.warning("Failed to write license! {}".format(exception))
fb28c59b 568
3c82f75b
OM
569 try:
570 # Now write the timestamp
d194b140 571 with open(timestamp_file, 'w', encoding="utf-8") as timestamp_handle:
fb28c59b 572 timestamp_handle.write(new_last_time.__str__())
3c82f75b
OM
573 except Exception as exception:
574 print("Failed to write timestamp file - {}".format(exception))
65bd8b43 575 fail_dir(self.download_dir)
7b84ba6d 576 return State.FAILED
3c82f75b 577 self._needs_download = False
fa2f3251 578 logging.debug("Download of {} finished".format(self.title))
7b84ba6d 579 return State.OK
975060c9 580
dbdb1782 581
7b84ba6d 582def do_batch(batch_file, download_dir, quick):
1ab49020
OM
583 """ Read a file in line by line, parsing each as a set of calls to this script."""
584 with open(batch_file) as handle:
585 for line in handle:
586 line = line.strip()
cf280385
M
587 if not line:
588 # Skip empty lines
589 continue
1ab49020
OM
590 logging.info("Handling instruction {}".format(line))
591 command_arr = line.split()
592 if command_arr[0] == "thing":
dbdb1782
OM
593 logging.debug(
594 "Handling batch thing instruction: {}".format(line))
1ab49020
OM
595 Thing(command_arr[1]).download(download_dir)
596 continue
597 if command_arr[0] == "collection":
dbdb1782
OM
598 logging.debug(
599 "Handling batch collection instruction: {}".format(line))
600 Collection(command_arr[1], command_arr[2],
7b84ba6d 601 download_dir, quick).download()
1ab49020
OM
602 continue
603 if command_arr[0] == "user":
dbdb1782
OM
604 logging.debug(
605 "Handling batch collection instruction: {}".format(line))
7b84ba6d 606 Designs(command_arr[1], download_dir, quick).download()
1ab49020
OM
607 continue
608 logging.warning("Unable to parse current instruction. Skipping.")
609
dbdb1782 610
975060c9
OM
611def main():
612 """ Entry point for script being run as a command. """
613 parser = argparse.ArgumentParser()
dbdb1782
OM
614 parser.add_argument("-l", "--log-level", choices=[
615 'debug', 'info', 'warning'], default='info', help="level of logging desired")
616 parser.add_argument("-d", "--directory",
617 help="Target directory to download into")
4f94efc8
OM
618 parser.add_argument("-f", "--log-file",
619 help="Place to log debug information to")
7b84ba6d
OM
620 parser.add_argument("-q", "--quick", action="store_true",
621 help="Assume date ordering on posts")
622
dbdb1782
OM
623 subparsers = parser.add_subparsers(
624 help="Type of thing to download", dest="subcommand")
625 collection_parser = subparsers.add_parser(
b7bfef68 626 'collection', help="Download one or more entire collection(s)")
dbdb1782 627 collection_parser.add_argument(
b7bfef68 628 "owner", help="The owner of the collection(s) to get")
dbdb1782 629 collection_parser.add_argument(
b7bfef68 630 "collections", nargs="+", help="Space seperated list of the name(s) of collection to get")
dbdb1782
OM
631 thing_parser = subparsers.add_parser(
632 'thing', help="Download a single thing.")
8cdd1b54
OM
633 thing_parser.add_argument(
634 "things", nargs="*", help="Space seperated list of thing ID(s) to download")
dbdb1782 635 user_parser = subparsers.add_parser(
b7bfef68 636 "user", help="Download all things by one or more users")
8cdd1b54
OM
637 user_parser.add_argument(
638 "users", nargs="+", help="A space seperated list of the user(s) to get the designs of")
dbdb1782
OM
639 batch_parser = subparsers.add_parser(
640 "batch", help="Perform multiple actions written in a text file")
641 batch_parser.add_argument(
642 "batch_file", help="The name of the file to read.")
680039fe 643 subparsers.add_parser("version", help="Show the current version")
4a98996b 644
975060c9 645 args = parser.parse_args()
4a98996b
OM
646 if not args.subcommand:
647 parser.print_help()
648 sys.exit(1)
d66f1f78
OM
649 if not args.directory:
650 args.directory = os.getcwd()
4f94efc8
OM
651
652 logger = logging.getLogger()
653 formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
654 logger.setLevel(logging.DEBUG)
655 console_handler = logging.StreamHandler()
656 console_handler.setLevel(args.log_level.upper())
657
658 logger.addHandler(console_handler)
659 if args.log_file:
660 file_handler = logging.FileHandler(args.log_file)
661 file_handler.setLevel(logging.DEBUG)
662 file_handler.setFormatter(formatter)
663 logger.addHandler(file_handler)
fa2f3251 664
6a777954
OM
665
666 # Start downloader
667 thing_queue = multiprocessing.JoinableQueue()
668 logging.debug("starting {} downloader(s)".format(DOWNLOADER_COUNT))
669 downloaders = [Downloader(thing_queue, args.directory) for _ in range(DOWNLOADER_COUNT)]
670 for downloader in downloaders:
671 downloader.start()
672
673
4a98996b 674 if args.subcommand.startswith("collection"):
b7bfef68 675 for collection in args.collections:
7b84ba6d 676 Collection(args.owner, collection, args.directory, args.quick).download()
4a98996b 677 if args.subcommand == "thing":
b7bfef68 678 for thing in args.things:
6a777954 679 thing_queue.put(thing)
3522a3bf 680 if args.subcommand == "user":
b7bfef68 681 for user in args.users:
7b84ba6d 682 Designs(user, args.directory, args.quick).download()
db8066ec
OM
683 if args.subcommand == "version":
684 print("thingy_grabber.py version {}".format(VERSION))
1ab49020 685 if args.subcommand == "batch":
7b84ba6d 686 do_batch(args.batch_file, args.directory, args.quick)
1ab49020 687
6a777954
OM
688 # Stop the downloader processes
689 for downloader in downloaders:
690 thing_queue.put(None)
975060c9 691
d194b140
OM
692atexit.register(BROWSER.quit)
693
0930777e
OM
694if __name__ == "__main__":
695 multiprocessing.freeze_support()
975060c9 696 main()