ensure timestamps are always valid
[clinton/thingy_grabber.git] / thingy_grabber.py
CommitLineData
975060c9
OM
1#!/usr/bin/env python3
2"""
3Thingiverse bulk downloader
4"""
5
6import re
4a98996b 7import sys
975060c9
OM
8import os
9import argparse
10import unicodedata
11import requests
fa2f3251 12import logging
6a777954 13import multiprocessing
7b84ba6d 14import enum
fb28c59b 15import datetime
3c82f75b 16from shutil import copyfile
975060c9 17from bs4 import BeautifulSoup
b497d705
OM
18from dataclasses import dataclass
19import selenium
20from selenium import webdriver
21from selenium.webdriver.common.by import By
22from selenium.webdriver.support.ui import WebDriverWait
23from selenium.webdriver.support import expected_conditions as EC
24from selenium.webdriver.firefox.options import Options
d194b140 25import atexit
9828dabe 26import py7zr
975060c9
OM
27
28URL_BASE = "https://www.thingiverse.com"
29URL_COLLECTION = URL_BASE + "/ajax/thingcollection/list_collected_things"
948bd56f 30USER_COLLECTION = URL_BASE + "/ajax/user/designs"
975060c9
OM
31
32ID_REGEX = re.compile(r'"id":(\d*),')
33TOTAL_REGEX = re.compile(r'"total":(\d*),')
34LAST_PAGE_REGEX = re.compile(r'"last_page":(\d*),')
35# This appears to be fixed at 12, but if it changes would screw the rest up.
36PER_PAGE_REGEX = re.compile(r'"per_page":(\d*),')
dd8c35f4
OM
37NO_WHITESPACE_REGEX = re.compile(r'[-\s]+')
38
6a777954 39DOWNLOADER_COUNT = 1
7b84ba6d 40RETRY_COUNT = 3
6a777954 41
65bd8b43
OM
42MAX_PATH_LENGTH = 250
43
9828dabe 44VERSION = "0.8.7"
dbdb1782 45
b497d705
OM
46
47#BROWSER = webdriver.PhantomJS('./phantomjs')
48options = Options()
fb28c59b 49options.add_argument("--headless")
b497d705
OM
50BROWSER = webdriver.Firefox(options=options)
51
52BROWSER.set_window_size(1980, 1080)
53
54
55@dataclass
56class FileLink:
57 name: str
58 last_update: str
fb28c59b 59 link: datetime.datetime
b497d705
OM
60
61
7b84ba6d
OM
62class State(enum.Enum):
63 OK = enum.auto()
64 FAILED = enum.auto()
65 ALREADY_DOWNLOADED = enum.auto()
66
dbdb1782 67
65bd8b43
OM
68def fail_dir(dir_name):
69 """ When a download has failed, move it sideways.
70 """
71 target_dir = "{}_failed".format(dir_name)
72 inc = 0
73 while os.path.exists(target_dir):
74 target_dir = "{}_failed_{}".format(dir_name, inc)
75 inc += 1
76 os.rename(dir_name, target_dir)
77
78
79def truncate_name(file_name):
80 """ Ensure the filename is not too long for, well windows basically.
81 """
82 path = os.path.abspath(file_name)
83 if len(path) <= MAX_PATH_LENGTH:
84 return path
85 to_cut = len(path) - (MAX_PATH_LENGTH + 3)
86 base, extension = os.path.splitext(path)
87 inc = 0
88 new_path = "{}_{}{}".format(base, inc, extension)
89 while os.path.exists(new_path):
90 new_path = "{}_{}{}".format(base, inc, extension)
91 inc += 1
92 return new_path
93
94
dd8c35f4
OM
95def strip_ws(value):
96 """ Remove whitespace from a string """
97 return str(NO_WHITESPACE_REGEX.sub('-', value))
975060c9 98
dbdb1782 99
975060c9
OM
100def slugify(value):
101 """
d194b140
OM
102 Normalise string, removes invalid for filename charactersr
103 and converts string to lowercase.
975060c9 104 """
d194b140 105 value = unicodedata.normalize('NFKC', value).lower().strip()
65bd8b43
OM
106 value = re.sub(r'[\\/<>:\?\*\|"]', '', value)
107 value = re.sub(r'\.*$', '', value)
108 return value
975060c9 109
b497d705
OM
110class PageChecker(object):
111 def __init__(self):
112 self.log = []
113 self.title = None
114 self.file_count = None
115 self.files = None
fb28c59b
OM
116 self.images = None
117 self.license = None
b497d705
OM
118
119
120 def __call__(self, _):
121 try:
122 self.log.append("call")
123 if self.title is None:
124 # first find the name
125 name = EC._find_element(BROWSER, (By.CSS_SELECTOR, "[class^=ThingPage__modelName]"))
126 if name is None:
127 return False
128 self.title = name.text
129
130 if self.file_count is None:
131 # OK. Do we know how many files we have to download?
132 metrics = EC._find_elements(BROWSER, (By.CSS_SELECTOR, "[class^=MetricButton]"))
133 self.log.append("got some metrics: {}".format(len(metrics)))
134 cur_count = int([x.text.split("\n")[0] for x in metrics if x.text.endswith("\nThing Files")][0])
135 self.log.append(cur_count)
136 if cur_count == 0:
137 return False
138 self.file_count = cur_count
139
140 self.log.append("looking for {} files".format(self.file_count))
141 fileRows = EC._find_elements(BROWSER, (By.CSS_SELECTOR, "[class^=ThingFile__fileRow]"))
142 self.log.append("found {} files".format(len(fileRows)))
fb28c59b
OM
143 if len(fileRows) < self.file_count:
144 return False
145
146 self.log.append("Looking for images")
147 # By this point _should_ have loaded all the images
148 self.images = EC._find_elements(BROWSER, (By.CSS_SELECTOR, "[class^=thumb]"))
149 self.license = EC._find_element(BROWSER, (By.CSS_SELECTOR, "[class^=License__licenseText]")).text
150 self.log.append("found {} images".format(len(self.images)))
151 self.files = fileRows
152 return True
b497d705
OM
153 except Exception:
154 return False
155
156
157
158
6a777954
OM
159class Downloader(multiprocessing.Process):
160 """
161 Class to handle downloading the things we have found to get.
162 """
163
164 def __init__(self, thing_queue, download_directory):
165 multiprocessing.Process.__init__(self)
166 # TODO: add parameters
167 self.thing_queue = thing_queue
168 self.download_directory = download_directory
169
170 def run(self):
171 """ actual download loop.
172 """
173 while True:
174 thing_id = self.thing_queue.get()
175 if thing_id is None:
176 logging.info("Shutting download queue")
177 self.thing_queue.task_done()
178 break
179 logging.info("Handling id {}".format(thing_id))
180 Thing(thing_id).download(self.download_directory)
181 self.thing_queue.task_done()
182 return
183
7b84ba6d 184
6a777954
OM
185
186
dbdb1782 187
3522a3bf 188class Grouping:
d66f1f78 189 """ Holds details of a group of things for download
3c82f75b
OM
190 This is effectively (although not actually) an abstract class
191 - use Collection or Designs instead.
192 """
dbdb1782 193
7b84ba6d 194 def __init__(self, quick):
975060c9
OM
195 self.things = []
196 self.total = 0
197 self.req_id = None
198 self.last_page = 0
199 self.per_page = None
7b84ba6d
OM
200 # Should we stop downloading when we hit a known datestamp?
201 self.quick = quick
948bd56f 202 # These should be set by child classes.
3522a3bf
OM
203 self.url = None
204 self.download_dir = None
948bd56f 205 self.collection_url = None
975060c9 206
3522a3bf
OM
207 def _get_small_grouping(self, req):
208 """ Handle small groupings """
975060c9 209 soup = BeautifulSoup(req.text, features='lxml')
dbdb1782 210 links = soup.find_all('a', {'class': 'card-img-holder'})
975060c9 211 self.things = [x['href'].split(':')[1] for x in links]
fa2f3251 212 self.total = len(self.things)
975060c9
OM
213
214 return self.things
215
3522a3bf
OM
216 def get(self):
217 """ retrieve the things of the grouping. """
975060c9
OM
218 if self.things:
219 # We've already done it.
220 return self.things
221
3522a3bf
OM
222 # Check for initialisation:
223 if not self.url:
fa2f3251 224 logging.error("No URL set - object not initialised properly?")
3522a3bf
OM
225 raise ValueError("No URL set - object not initialised properly?")
226
227 # Get the internal details of the grouping.
fa2f3251 228 logging.debug("Querying {}".format(self.url))
3522a3bf 229 c_req = requests.get(self.url)
975060c9
OM
230 total = TOTAL_REGEX.search(c_req.text)
231 if total is None:
3522a3bf
OM
232 # This is a small (<13) items grouping. Pull the list from this req.
233 return self._get_small_grouping(c_req)
975060c9
OM
234 self.total = total.groups()[0]
235 self.req_id = ID_REGEX.search(c_req.text).groups()[0]
236 self.last_page = int(LAST_PAGE_REGEX.search(c_req.text).groups()[0])
237 self.per_page = PER_PAGE_REGEX.search(c_req.text).groups()[0]
238 parameters = {
dbdb1782
OM
239 'base_url': self.url,
240 'page': '1',
241 'per_page': '12',
242 'id': self.req_id
975060c9
OM
243 }
244 for current_page in range(1, self.last_page + 1):
245 parameters['page'] = current_page
948bd56f 246 req = requests.post(self.collection_url, parameters)
975060c9 247 soup = BeautifulSoup(req.text, features='lxml')
dbdb1782 248 links = soup.find_all('a', {'class': 'card-img-holder'})
975060c9
OM
249 self.things += [x['href'].split(':')[1] for x in links]
250
251 return self.things
252
253 def download(self):
254 """ Downloads all the files in a collection """
255 if not self.things:
3522a3bf
OM
256 self.get()
257
258 if not self.download_dir:
dbdb1782
OM
259 raise ValueError(
260 "No download_dir set - invalidly initialised object?")
3522a3bf 261
975060c9 262 base_dir = os.getcwd()
975060c9 263 try:
3522a3bf 264 os.mkdir(self.download_dir)
975060c9 265 except FileExistsError:
fa2f3251 266 logging.info("Target directory {} already exists. Assuming a resume."
dbdb1782 267 .format(self.download_dir))
fa2f3251 268 logging.info("Downloading {} thing(s).".format(self.total))
dbdb1782 269 for idx, thing in enumerate(self.things):
fb28c59b 270 logging.info("Downloading thing {} - {}".format(idx, thing))
7b84ba6d
OM
271 RC = Thing(thing).download(self.download_dir)
272 if self.quick and RC==State.ALREADY_DOWNLOADED:
273 logging.info("Caught up, stopping.")
274 return
975060c9 275
dbdb1782 276
3522a3bf
OM
277class Collection(Grouping):
278 """ Holds details of a collection. """
dbdb1782 279
7b84ba6d
OM
280 def __init__(self, user, name, directory, quick):
281 Grouping.__init__(self, quick)
3522a3bf
OM
282 self.user = user
283 self.name = name
3c82f75b
OM
284 self.url = "{}/{}/collections/{}".format(
285 URL_BASE, self.user, strip_ws(self.name))
d66f1f78 286 self.download_dir = os.path.join(directory,
3c82f75b 287 "{}-{}".format(slugify(self.user), slugify(self.name)))
948bd56f 288 self.collection_url = URL_COLLECTION
3522a3bf 289
dbdb1782 290
3522a3bf
OM
291class Designs(Grouping):
292 """ Holds details of all of a users' designs. """
dbdb1782 293
7b84ba6d
OM
294 def __init__(self, user, directory, quick):
295 Grouping.__init__(self, quick)
3522a3bf
OM
296 self.user = user
297 self.url = "{}/{}/designs".format(URL_BASE, self.user)
dbdb1782
OM
298 self.download_dir = os.path.join(
299 directory, "{} designs".format(slugify(self.user)))
948bd56f 300 self.collection_url = USER_COLLECTION
975060c9 301
dbdb1782 302
3c82f75b
OM
303class Thing:
304 """ An individual design on thingiverse. """
dbdb1782 305
3c82f75b
OM
306 def __init__(self, thing_id):
307 self.thing_id = thing_id
308 self.last_time = None
309 self._parsed = False
310 self._needs_download = True
311 self.text = None
312 self.title = None
313 self.download_dir = None
975060c9 314
3c82f75b
OM
315 def _parse(self, base_dir):
316 """ Work out what, if anything needs to be done. """
317 if self._parsed:
318 return
e36c2a07 319
3c82f75b 320 url = "{}/thing:{}/files".format(URL_BASE, self.thing_id)
e0e69fc6 321 try:
b497d705 322 BROWSER.get(url)
fb28c59b 323 wait = WebDriverWait(BROWSER, 60)
b497d705
OM
324 pc = PageChecker()
325 wait.until(pc)
e0e69fc6 326 except requests.exceptions.ConnectionError as error:
8cdd1b54
OM
327 logging.error("Unable to connect for thing {}: {}".format(
328 self.thing_id, error))
329 return
fb28c59b
OM
330 except selenium.common.exceptions.TimeoutException:
331 logging.error(pc.log)
332 logging.error("Timeout trying to parse thing {}".format(self.thing_id))
333 return
e0e69fc6 334
b497d705
OM
335 self.title = pc.title
336 self._file_links=[]
247c2cd5
OM
337 if not pc.files:
338 logging.error("No files found for thing {} - probably thingiverse being broken, try again later".format(self.thing_id))
b497d705 339 for link in pc.files:
fb28c59b 340 logging.debug("Parsing link: {}".format(link.text))
b497d705 341 link_link = link.find_element_by_xpath(".//a").get_attribute("href")
fb28c59b
OM
342 if link_link.endswith("/zip"):
343 # bulk link.
344 continue
345 try:
346 link_title, link_details, _ = link.text.split("\n")
347 except ValueError:
348 # If it is a filetype that doesn't generate a picture, then we get an extra field at the start.
349 _, link_title, link_details, _ = link.text.split("\n")
350
351 #link_details will be something like '461 kb | Updated 06-11-2019 | 373 Downloads'
352 #need to convert from M D Y to Y M D
1267e583 353 link_date = [int(x) for x in link_details.split("|")[1].split()[-1].split("-")]
fb28c59b 354 try:
d194b140 355 self._file_links.append(FileLink(link_title, datetime.datetime(link_date[2], link_date[0], link_date[1]), link_link))
fb28c59b
OM
356 except ValueError:
357 logging.error(link_date)
358
359 self._image_links=[x.find_element_by_xpath(".//img").get_attribute("src") for x in pc.images]
d194b140 360 self._license = pc.license
fb28c59b 361 self.pc = pc
e0e69fc6 362
e0e69fc6 363
fb28c59b
OM
364 self.old_download_dir = os.path.join(base_dir, slugify(self.title))
365 self.download_dir = os.path.join(base_dir, "{} - {}".format(self.thing_id, slugify(self.title)))
3c82f75b 366
fa2f3251
OM
367 logging.debug("Parsing {} ({})".format(self.thing_id, self.title))
368
3c82f75b 369 if not os.path.exists(self.download_dir):
fb28c59b 370 logging.info("Looking for old dir at {}".format(self.old_download_dir))
3b497b1a 371 if os.path.exists(self.old_download_dir):
fb28c59b
OM
372 logging.warning("Found previous style download directory. Moving it from {} to {}".format(self.old_download_dir, self.download_dir))
373 os.rename(self.old_download_dir, self.download_dir)
3b497b1a
M
374 else:
375 # Not yet downloaded
376 self._parsed = True
377 return
3c82f75b
OM
378
379 timestamp_file = os.path.join(self.download_dir, 'timestamp.txt')
380 if not os.path.exists(timestamp_file):
381 # Old download from before
dbdb1782
OM
382 logging.warning(
383 "Old-style download directory found. Assuming update required.")
3c82f75b
OM
384 self._parsed = True
385 return
386
387 try:
388 with open(timestamp_file, 'r') as timestamp_handle:
b497d705 389 # add the .split(' ')[0] to remove the timestamp from the old style timestamps
fb28c59b
OM
390 last_bits = [int(x) for x in timestamp_handle.readlines()[0].split(' ')[0].split("-")]
391 logging.warning(last_bits)
9828dabe
OM
392 if last_bits[0] == 0:
393 last_bits[0] = 1
394 if last_bits[1] == 0:
395 last_bits[1] = 1
396 if last_bits[2] == 0:
397 last_bits[2] = 1980
fb28c59b
OM
398 try:
399 self.last_time = datetime.datetime(last_bits[0], last_bits[1], last_bits[2])
400 except ValueError:
401 # This one appears to be M D Y
402 self.last_time = datetime.datetime(last_bits[2], last_bits[0], last_bits[1])
403
fa2f3251 404 logging.info("last downloaded version: {}".format(self.last_time))
3c82f75b
OM
405 except FileNotFoundError:
406 # Not run on this thing before.
dbdb1782
OM
407 logging.info(
408 "Old-style download directory found. Assuming update required.")
3c82f75b 409 self.last_time = None
b497d705 410 self._needs_download = True
3c82f75b
OM
411 self._parsed = True
412 return
413
414 # OK, so we have a timestamp, lets see if there is anything new to get
b497d705
OM
415 for file_link in self._file_links:
416 if file_link.last_update > self.last_time:
dbdb1782 417 logging.info(
fb28c59b 418 "Found new/updated file {} - {}".format(file_link.name, file_link.last_update))
3c82f75b
OM
419 self._needs_download = True
420 self._parsed = True
421 return
b497d705 422
3c82f75b 423 # Got here, so nope, no new files.
3c82f75b
OM
424 self._needs_download = False
425 self._parsed = True
426
427 def download(self, base_dir):
7b84ba6d
OM
428 """ Download all files for a given thing.
429 Returns True iff the thing is now downloaded (not iff it downloads the thing!)
430 """
3c82f75b
OM
431 if not self._parsed:
432 self._parse(base_dir)
433
e0e69fc6 434 if not self._parsed:
8cdd1b54
OM
435 logging.error(
436 "Unable to parse {} - aborting download".format(self.thing_id))
7b84ba6d 437 return State.FAILED
e0e69fc6 438
3c82f75b 439 if not self._needs_download:
7b84ba6d
OM
440 print("{} - {} already downloaded - skipping.".format(self.thing_id, self.title))
441 return State.ALREADY_DOWNLOADED
3c82f75b 442
247c2cd5
OM
443 if not self._file_links:
444 print("{} - {} appears to have no files. Thingiverse acting up again?".format(self.thing_id, self.title))
445 return State.FAILED
446
3c82f75b
OM
447 # Have we already downloaded some things?
448 timestamp_file = os.path.join(self.download_dir, 'timestamp.txt')
449 prev_dir = None
450 if os.path.exists(self.download_dir):
451 if not os.path.exists(timestamp_file):
452 # edge case: old style dir w/out timestamp.
fb28c59b 453 logging.warning("Old style download dir found at {}".format(self.title))
4f94efc8
OM
454 prev_count = 0
455 target_dir = "{}_old".format(self.download_dir)
456 while os.path.exists(target_dir):
457 prev_count = prev_count + 1
458 target_dir = "{}_old_{}".format(self.download_dir, prev_count)
459 os.rename(self.download_dir, target_dir)
3c82f75b 460 else:
fb28c59b 461 prev_dir = "{}_{}".format(self.download_dir, slugify(self.last_time.__str__()))
3c82f75b
OM
462 os.rename(self.download_dir, prev_dir)
463
464 # Get the list of files to download
3c82f75b
OM
465
466 new_file_links = []
467 old_file_links = []
468 new_last_time = None
469
470 if not self.last_time:
471 # If we don't have anything to copy from, then it is all new.
b497d705
OM
472 logging.debug("No last time, downloading all files")
473 new_file_links = self._file_links
474 new_last_time = new_file_links[0].last_update
475
476 for file_link in new_file_links:
477 new_last_time = max(new_last_time, file_link.last_update)
478 logging.debug("New timestamp will be {}".format(new_last_time))
3c82f75b 479 else:
b497d705
OM
480 new_last_time = self.last_time
481 for file_link in self._file_links:
482 if file_link.last_update > self.last_time:
3c82f75b 483 new_file_links.append(file_link)
b497d705 484 new_last_time = max(new_last_time, file_link.last_update)
3c82f75b
OM
485 else:
486 old_file_links.append(file_link)
3c82f75b 487
fa2f3251 488 logging.debug("new timestamp {}".format(new_last_time))
3c82f75b
OM
489
490 # OK. Time to get to work.
fa2f3251 491 logging.debug("Generating download_dir")
3c82f75b 492 os.mkdir(self.download_dir)
b497d705 493 filelist_file = os.path.join(self.download_dir, "filelist.txt")
d194b140 494 with open(filelist_file, 'w', encoding="utf-8") as fl_handle:
b497d705
OM
495 for fl in self._file_links:
496 base_link = fl.link
497 try:
498 fl.link=requests.get(fl.link, allow_redirects=False).headers['location']
fb28c59b
OM
499 except Exception:
500 # Sometimes Thingiverse just gives us the direct link the first time. Not sure why.
501 pass
b497d705 502
fb28c59b 503 fl_handle.write("{},{},{}, {}\n".format(fl.link, fl.name, fl.last_update, base_link))
b497d705
OM
504
505
3c82f75b 506 # First grab the cached files (if any)
fa2f3251 507 logging.info("Copying {} unchanged files.".format(len(old_file_links)))
3c82f75b 508 for file_link in old_file_links:
b497d705 509 old_file = os.path.join(prev_dir, file_link.name)
65bd8b43 510 new_file = truncate_name(os.path.join(self.download_dir, file_link.name))
3c82f75b 511 try:
fa2f3251 512 logging.debug("Copying {} to {}".format(old_file, new_file))
3c82f75b
OM
513 copyfile(old_file, new_file)
514 except FileNotFoundError:
dbdb1782
OM
515 logging.warning(
516 "Unable to find {} in old archive, redownloading".format(file_link["title"]))
3c82f75b
OM
517 new_file_links.append(file_link)
518
519 # Now download the new ones
dbdb1782 520 logging.info("Downloading {} new files of {}".format(
b497d705 521 len(new_file_links), len(self._file_links)))
3c82f75b 522 try:
b497d705 523 for file_link in new_file_links:
65bd8b43 524 file_name = truncate_name(os.path.join(self.download_dir, file_link.name))
dbdb1782 525 logging.debug("Downloading {} from {} to {}".format(
b497d705
OM
526 file_link.name, file_link.link, file_name))
527 data_req = requests.get(file_link.link)
3c82f75b
OM
528 with open(file_name, 'wb') as handle:
529 handle.write(data_req.content)
530 except Exception as exception:
b497d705 531 logging.error("Failed to download {} - {}".format(file_link.name, exception))
65bd8b43 532 fail_dir(self.download_dir)
7b84ba6d 533 return State.FAILED
3c82f75b 534
b497d705 535
b497d705 536 # People like images. But this doesn't work yet.
680039fe 537 image_dir = os.path.join(self.download_dir, 'images')
fb28c59b 538 logging.info("Downloading {} images.".format(len(self._image_links)))
680039fe
OM
539 try:
540 os.mkdir(image_dir)
fb28c59b
OM
541 for imagelink in self._image_links:
542 filename = os.path.basename(imagelink)
680039fe
OM
543 if filename.endswith('stl'):
544 filename = "{}.png".format(filename)
fb28c59b 545 image_req = requests.get(imagelink)
65bd8b43 546 with open(truncate_name(os.path.join(image_dir, filename)), 'wb') as handle:
680039fe
OM
547 handle.write(image_req.content)
548 except Exception as exception:
549 print("Failed to download {} - {}".format(filename, exception))
65bd8b43 550 fail_dir(self.download_dir)
7b84ba6d 551 return State.FAILED
680039fe 552
fb28c59b 553 """
4f75dd69
OM
554 # instructions are good too.
555 logging.info("Downloading readme")
556 try:
8cdd1b54
OM
557 readme_txt = soup.find('meta', property='og:description')[
558 'content']
559 with open(os.path.join(self.download_dir, 'readme.txt'), 'w') as readme_handle:
4f75dd69
OM
560 readme_handle.write("{}\n".format(readme_txt))
561 except (TypeError, KeyError) as exception:
562 logging.warning("No readme? {}".format(exception))
563 except IOError as exception:
564 logging.warning("Failed to write readme! {}".format(exception))
565
fb28c59b 566 """
4f75dd69
OM
567 # Best get some licenses
568 logging.info("Downloading license")
569 try:
fb28c59b 570 if self._license:
65bd8b43 571 with open(truncate_name(os.path.join(self.download_dir, 'license.txt')), 'w', encoding="utf-8") as license_handle:
fb28c59b 572 license_handle.write("{}\n".format(self._license))
4f75dd69
OM
573 except IOError as exception:
574 logging.warning("Failed to write license! {}".format(exception))
fb28c59b 575
3c82f75b
OM
576 try:
577 # Now write the timestamp
d194b140 578 with open(timestamp_file, 'w', encoding="utf-8") as timestamp_handle:
fb28c59b 579 timestamp_handle.write(new_last_time.__str__())
3c82f75b
OM
580 except Exception as exception:
581 print("Failed to write timestamp file - {}".format(exception))
65bd8b43 582 fail_dir(self.download_dir)
7b84ba6d 583 return State.FAILED
3c82f75b 584 self._needs_download = False
fa2f3251 585 logging.debug("Download of {} finished".format(self.title))
7b84ba6d 586 return State.OK
975060c9 587
dbdb1782 588
7b84ba6d 589def do_batch(batch_file, download_dir, quick):
1ab49020
OM
590 """ Read a file in line by line, parsing each as a set of calls to this script."""
591 with open(batch_file) as handle:
592 for line in handle:
593 line = line.strip()
cf280385
M
594 if not line:
595 # Skip empty lines
596 continue
1ab49020
OM
597 logging.info("Handling instruction {}".format(line))
598 command_arr = line.split()
599 if command_arr[0] == "thing":
dbdb1782
OM
600 logging.debug(
601 "Handling batch thing instruction: {}".format(line))
1ab49020
OM
602 Thing(command_arr[1]).download(download_dir)
603 continue
604 if command_arr[0] == "collection":
dbdb1782
OM
605 logging.debug(
606 "Handling batch collection instruction: {}".format(line))
607 Collection(command_arr[1], command_arr[2],
7b84ba6d 608 download_dir, quick).download()
1ab49020
OM
609 continue
610 if command_arr[0] == "user":
dbdb1782
OM
611 logging.debug(
612 "Handling batch collection instruction: {}".format(line))
7b84ba6d 613 Designs(command_arr[1], download_dir, quick).download()
1ab49020
OM
614 continue
615 logging.warning("Unable to parse current instruction. Skipping.")
616
dbdb1782 617
975060c9
OM
618def main():
619 """ Entry point for script being run as a command. """
620 parser = argparse.ArgumentParser()
dbdb1782
OM
621 parser.add_argument("-l", "--log-level", choices=[
622 'debug', 'info', 'warning'], default='info', help="level of logging desired")
623 parser.add_argument("-d", "--directory",
624 help="Target directory to download into")
4f94efc8
OM
625 parser.add_argument("-f", "--log-file",
626 help="Place to log debug information to")
7b84ba6d
OM
627 parser.add_argument("-q", "--quick", action="store_true",
628 help="Assume date ordering on posts")
629
dbdb1782
OM
630 subparsers = parser.add_subparsers(
631 help="Type of thing to download", dest="subcommand")
632 collection_parser = subparsers.add_parser(
b7bfef68 633 'collection', help="Download one or more entire collection(s)")
dbdb1782 634 collection_parser.add_argument(
b7bfef68 635 "owner", help="The owner of the collection(s) to get")
dbdb1782 636 collection_parser.add_argument(
b7bfef68 637 "collections", nargs="+", help="Space seperated list of the name(s) of collection to get")
dbdb1782
OM
638 thing_parser = subparsers.add_parser(
639 'thing', help="Download a single thing.")
8cdd1b54
OM
640 thing_parser.add_argument(
641 "things", nargs="*", help="Space seperated list of thing ID(s) to download")
dbdb1782 642 user_parser = subparsers.add_parser(
b7bfef68 643 "user", help="Download all things by one or more users")
8cdd1b54
OM
644 user_parser.add_argument(
645 "users", nargs="+", help="A space seperated list of the user(s) to get the designs of")
dbdb1782
OM
646 batch_parser = subparsers.add_parser(
647 "batch", help="Perform multiple actions written in a text file")
648 batch_parser.add_argument(
649 "batch_file", help="The name of the file to read.")
680039fe 650 subparsers.add_parser("version", help="Show the current version")
4a98996b 651
975060c9 652 args = parser.parse_args()
4a98996b
OM
653 if not args.subcommand:
654 parser.print_help()
655 sys.exit(1)
d66f1f78
OM
656 if not args.directory:
657 args.directory = os.getcwd()
4f94efc8
OM
658
659 logger = logging.getLogger()
660 formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
661 logger.setLevel(logging.DEBUG)
662 console_handler = logging.StreamHandler()
663 console_handler.setLevel(args.log_level.upper())
664
665 logger.addHandler(console_handler)
666 if args.log_file:
667 file_handler = logging.FileHandler(args.log_file)
668 file_handler.setLevel(logging.DEBUG)
669 file_handler.setFormatter(formatter)
670 logger.addHandler(file_handler)
fa2f3251 671
6a777954
OM
672
673 # Start downloader
674 thing_queue = multiprocessing.JoinableQueue()
675 logging.debug("starting {} downloader(s)".format(DOWNLOADER_COUNT))
676 downloaders = [Downloader(thing_queue, args.directory) for _ in range(DOWNLOADER_COUNT)]
677 for downloader in downloaders:
678 downloader.start()
679
680
4a98996b 681 if args.subcommand.startswith("collection"):
b7bfef68 682 for collection in args.collections:
7b84ba6d 683 Collection(args.owner, collection, args.directory, args.quick).download()
4a98996b 684 if args.subcommand == "thing":
b7bfef68 685 for thing in args.things:
6a777954 686 thing_queue.put(thing)
3522a3bf 687 if args.subcommand == "user":
b7bfef68 688 for user in args.users:
7b84ba6d 689 Designs(user, args.directory, args.quick).download()
db8066ec
OM
690 if args.subcommand == "version":
691 print("thingy_grabber.py version {}".format(VERSION))
1ab49020 692 if args.subcommand == "batch":
7b84ba6d 693 do_batch(args.batch_file, args.directory, args.quick)
1ab49020 694
6a777954
OM
695 # Stop the downloader processes
696 for downloader in downloaders:
697 thing_queue.put(None)
975060c9 698
d194b140
OM
699atexit.register(BROWSER.quit)
700
0930777e
OM
701if __name__ == "__main__":
702 multiprocessing.freeze_support()
975060c9 703 main()