+class PageChecker(object):
+ def __init__(self):
+ self.log = []
+ self.title = None
+ self.file_count = None
+ self.files = None
+ self.images = None
+ self.license = None
+
+
+ def __call__(self, _):
+ try:
+ self.log.append("call")
+ if self.title is None:
+ # first find the name
+ name = EC._find_element(BROWSER, (By.CSS_SELECTOR, "[class^=ThingPage__modelName]"))
+ if name is None:
+ return False
+ self.title = name.text
+
+ if self.file_count is None:
+ # OK. Do we know how many files we have to download?
+ metrics = EC._find_elements(BROWSER, (By.CSS_SELECTOR, "[class^=MetricButton]"))
+ self.log.append("got some metrics: {}".format(len(metrics)))
+ cur_count = int([x.text.split("\n")[0] for x in metrics if x.text.endswith("\nThing Files")][0])
+ self.log.append(cur_count)
+ if cur_count == 0:
+ return False
+ self.file_count = cur_count
+
+ self.log.append("looking for {} files".format(self.file_count))
+ fileRows = EC._find_elements(BROWSER, (By.CSS_SELECTOR, "[class^=ThingFile__fileRow]"))
+ self.log.append("found {} files".format(len(fileRows)))
+ if len(fileRows) < self.file_count:
+ return False
+
+ self.log.append("Looking for images")
+ # By this point _should_ have loaded all the images
+ self.images = EC._find_elements(BROWSER, (By.CSS_SELECTOR, "[class^=thumb]"))
+ self.license = EC._find_element(BROWSER, (By.CSS_SELECTOR, "[class^=License__licenseText]")).text
+ self.log.append("found {} images".format(len(self.images)))
+ self.files = fileRows
+ return True
+ except Exception:
+ return False
+
+
+
+
+class Downloader(multiprocessing.Process):
+ """
+ Class to handle downloading the things we have found to get.
+ """
+
+ def __init__(self, thing_queue, download_directory):
+ multiprocessing.Process.__init__(self)
+ # TODO: add parameters
+ self.thing_queue = thing_queue
+ self.download_directory = download_directory
+
+ def run(self):
+ """ actual download loop.
+ """
+ while True:
+ thing_id = self.thing_queue.get()
+ if thing_id is None:
+ logging.info("Shutting download queue")
+ self.thing_queue.task_done()
+ break
+ logging.info("Handling id {}".format(thing_id))
+ Thing(thing_id).download(self.download_directory)
+ self.thing_queue.task_done()
+ return
+
+
+
+