from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.firefox.options import Options
+import atexit
+import py7zr
URL_BASE = "https://www.thingiverse.com"
URL_COLLECTION = URL_BASE + "/ajax/thingcollection/list_collected_things"
DOWNLOADER_COUNT = 1
RETRY_COUNT = 3
-VERSION = "0.8.1"
+MAX_PATH_LENGTH = 250
+
+VERSION = "0.8.7"
#BROWSER = webdriver.PhantomJS('./phantomjs')
ALREADY_DOWNLOADED = enum.auto()
+def fail_dir(dir_name):
+ """ When a download has failed, move it sideways.
+ """
+ target_dir = "{}_failed".format(dir_name)
+ inc = 0
+ while os.path.exists(target_dir):
+ target_dir = "{}_failed_{}".format(dir_name, inc)
+ inc += 1
+ os.rename(dir_name, target_dir)
+
+
+def truncate_name(file_name):
+ """ Ensure the filename is not too long for, well windows basically.
+ """
+ path = os.path.abspath(file_name)
+ if len(path) <= MAX_PATH_LENGTH:
+ return path
+ to_cut = len(path) - (MAX_PATH_LENGTH + 3)
+ base, extension = os.path.splitext(path)
+ inc = 0
+ new_path = "{}_{}{}".format(base, inc, extension)
+ while os.path.exists(new_path):
+ new_path = "{}_{}{}".format(base, inc, extension)
+ inc += 1
+ return new_path
+
+
def strip_ws(value):
""" Remove whitespace from a string """
return str(NO_WHITESPACE_REGEX.sub('-', value))
def slugify(value):
"""
- Normalizes string, converts to lowercase, removes non-alpha characters,
- and converts spaces to hyphens.
+ Normalise string, removes invalid for filename charactersr
+ and converts string to lowercase.
"""
- value = unicodedata.normalize('NFKD', value).encode(
- 'ascii', 'ignore').decode()
- value = str(re.sub(r'[^\w\s-]', '', value).strip())
- value = str(NO_WHITESPACE_REGEX.sub('-', value))
+ value = unicodedata.normalize('NFKC', value).lower().strip()
+ value = re.sub(r'[\\/<>:\?\*\|"]', '', value)
+ value = re.sub(r'\.*$', '', value)
return value
class PageChecker(object):
self.title = pc.title
self._file_links=[]
+ if not pc.files:
+ logging.error("No files found for thing {} - probably thingiverse being broken, try again later".format(self.thing_id))
for link in pc.files:
logging.debug("Parsing link: {}".format(link.text))
link_link = link.find_element_by_xpath(".//a").get_attribute("href")
#link_details will be something like '461 kb | Updated 06-11-2019 | 373 Downloads'
#need to convert from M D Y to Y M D
link_date = [int(x) for x in link_details.split("|")[1].split()[-1].split("-")]
- logging.error(link_details)
try:
self._file_links.append(FileLink(link_title, datetime.datetime(link_date[2], link_date[0], link_date[1]), link_link))
except ValueError:
# add the .split(' ')[0] to remove the timestamp from the old style timestamps
last_bits = [int(x) for x in timestamp_handle.readlines()[0].split(' ')[0].split("-")]
logging.warning(last_bits)
+ if last_bits[0] == 0:
+ last_bits[0] = 1
+ if last_bits[1] == 0:
+ last_bits[1] = 1
+ if last_bits[2] == 0:
+ last_bits[2] = 1980
try:
self.last_time = datetime.datetime(last_bits[0], last_bits[1], last_bits[2])
except ValueError:
print("{} - {} already downloaded - skipping.".format(self.thing_id, self.title))
return State.ALREADY_DOWNLOADED
+ if not self._file_links:
+ print("{} - {} appears to have no files. Thingiverse acting up again?".format(self.thing_id, self.title))
+ return State.FAILED
+
# Have we already downloaded some things?
timestamp_file = os.path.join(self.download_dir, 'timestamp.txt')
prev_dir = None
logging.debug("Generating download_dir")
os.mkdir(self.download_dir)
filelist_file = os.path.join(self.download_dir, "filelist.txt")
- with open(filelist_file, 'w') as fl_handle:
+ with open(filelist_file, 'w', encoding="utf-8") as fl_handle:
for fl in self._file_links:
base_link = fl.link
try:
logging.info("Copying {} unchanged files.".format(len(old_file_links)))
for file_link in old_file_links:
old_file = os.path.join(prev_dir, file_link.name)
- new_file = os.path.join(self.download_dir, file_link.name)
+ new_file = truncate_name(os.path.join(self.download_dir, file_link.name))
try:
logging.debug("Copying {} to {}".format(old_file, new_file))
copyfile(old_file, new_file)
len(new_file_links), len(self._file_links)))
try:
for file_link in new_file_links:
- file_name = os.path.join(self.download_dir, file_link.name)
+ file_name = truncate_name(os.path.join(self.download_dir, file_link.name))
logging.debug("Downloading {} from {} to {}".format(
file_link.name, file_link.link, file_name))
data_req = requests.get(file_link.link)
handle.write(data_req.content)
except Exception as exception:
logging.error("Failed to download {} - {}".format(file_link.name, exception))
- os.rename(self.download_dir, "{}_failed".format(self.download_dir))
+ fail_dir(self.download_dir)
return State.FAILED
if filename.endswith('stl'):
filename = "{}.png".format(filename)
image_req = requests.get(imagelink)
- with open(os.path.join(image_dir, filename), 'wb') as handle:
+ with open(truncate_name(os.path.join(image_dir, filename)), 'wb') as handle:
handle.write(image_req.content)
except Exception as exception:
print("Failed to download {} - {}".format(filename, exception))
- os.rename(self.download_dir, "{}_failed".format(self.download_dir))
+ fail_dir(self.download_dir)
return State.FAILED
"""
logging.info("Downloading license")
try:
if self._license:
- with open(os.path.join(self.download_dir, 'license.txt'), 'w') as license_handle:
+ with open(truncate_name(os.path.join(self.download_dir, 'license.txt')), 'w', encoding="utf-8") as license_handle:
license_handle.write("{}\n".format(self._license))
except IOError as exception:
logging.warning("Failed to write license! {}".format(exception))
try:
# Now write the timestamp
- with open(timestamp_file, 'w') as timestamp_handle:
+ with open(timestamp_file, 'w', encoding="utf-8") as timestamp_handle:
timestamp_handle.write(new_last_time.__str__())
except Exception as exception:
print("Failed to write timestamp file - {}".format(exception))
- os.rename(self.download_dir, "{}_failed".format(self.download_dir))
+ fail_dir(self.download_dir)
return State.FAILED
self._needs_download = False
logging.debug("Download of {} finished".format(self.title))
for downloader in downloaders:
thing_queue.put(None)
+atexit.register(BROWSER.quit)
+
if __name__ == "__main__":
multiprocessing.freeze_support()
main()