X-Git-Url: https://git.hcoop.net/clinton/thingy_grabber.git/blobdiff_plain/a7152c35bfb21e27530cc5d491d26e7a0147ceeb..e36c2a0717493840b5631d7841dccb7c14b2e815:/thingy_grabber.py diff --git a/thingy_grabber.py b/thingy_grabber.py index f2b57d8..587d47e 100755 --- a/thingy_grabber.py +++ b/thingy_grabber.py @@ -117,25 +117,57 @@ def download_thing(thing): try: os.mkdir(title) except FileExistsError: - print("Directory for {} ({}) already exists, skipping".format(thing, title)) - return + pass + print("Downloading {} ({})".format(thing, title)) os.chdir(title) + last_time = None + + try: + with open('timestamp.txt', 'r') as fh: + last_time = fh.readlines()[0] + if VERBOSE: + print("last downloaded version: {}".format(last_time)) + except FileNotFoundError: + # Not run on this thing before. + if VERBOSE: + print('Directory for thing already exists, checking for update.') + last_time = None file_links = file_soup.find_all('a', {'class':'file-download'}) - files = [("{}{}".format(URL_BASE, x['href']), x["title"]) for x in file_links] + new_last_time = last_time + new_file_links = [] + + for file_link in file_links: + timestamp = file_link.find_all('time')[0]['datetime'] + if VERBOSE: + print("Checking {} (updated {})".format(file_link["title"], timestamp)) + if not last_time or timestamp > last_time: + new_file_links.append(file_link) + if not new_last_time or timestamp > new_last_time: + new_last_time = timestamp + + if last_time and new_last_time <= last_time: + print("Thing already downloaded. Skipping.") + files = [("{}{}".format(URL_BASE, x['href']), x["title"]) for x in new_file_links] try: for url, name in files: + if VERBOSE: + print("Downloading {} from {}".format(name, url)) data_req = requests.get(url) with open(name, 'wb') as handle: handle.write(data_req.content) + # now write timestamp + with open('timestamp.txt', 'w') as fh: + fh.write(new_last_time) except Exception as exception: print("Failed to download {} - {}".format(name, exception)) os.chdir(base_dir) os.rename(title, "{}_failed".format(title)) return + os.chdir(base_dir) def main():