VERSION = "0.5.1"
-
def strip_ws(value):
""" Remove whitespace from a string """
return str(NO_WHITESPACE_REGEX.sub('-', value))
return
url = "{}/thing:{}/files".format(URL_BASE, self.thing_id)
- req = requests.get(url)
+ try:
+ req = requests.get(url)
+ except requests.exceptions.ConnectionError as error:
+ logging.error("Unable to connect for thing {}: {}".format(self.thing_id, error))
+ return
+
self.text = req.text
soup = BeautifulSoup(self.text, features='lxml')
#import code
#code.interact(local=dict(globals(), **locals()))
- self.title = slugify(soup.find_all('h1')[0].text.strip())
- self.download_dir = os.path.join(base_dir, self.title)
+ try:
+ self.title = slugify(soup.find_all('h1')[0].text.strip())
+ except IndexError:
+ logging.warning("No title found for thing {}".format(self.thing_id))
+ self.title = self.thing_id
+
+ if req.status_code == 404:
+ logging.warning("404 for thing {} - DMCA or invalid number?".format(self.thing_id))
+ return
+
+ if req.status_code > 299:
+ logging.warning("bad status code {} for thing {} - try again later?".format(req.status_code, self.thing_id))
+ return
+
+ self.old_download_dir = os.path.join(base_dir, self.title)
+ self.download_dir = os.path.join(base_dir, "{} - {}".format(self.thing_id, self.title))
logging.debug("Parsing {} ({})".format(self.thing_id, self.title))
if not os.path.exists(self.download_dir):
- # Not yet downloaded
- self._parsed = True
- return
+ if os.path.exists(self.old_download_dir):
+ logging.info("Found previous style download directory. Moving it")
+ copyfile(self.old_download_dir, self.download_dir)
+ else:
+ # Not yet downloaded
+ self._parsed = True
+ return
timestamp_file = os.path.join(self.download_dir, 'timestamp.txt')
if not os.path.exists(timestamp_file):
if not self._parsed:
self._parse(base_dir)
+ if not self._parsed:
+ logging.error("Unable to parse {} - aborting download".format(self.thing_id))
+ return
+
if not self._needs_download:
print("{} already downloaded - skipping.".format(self.title))
return
# edge case: old style dir w/out timestamp.
logging.warning(
"Old style download dir found for {}".format(self.title))
- os.rename(self.download_dir,
- "{}_old".format(self.download_dir))
+ prev_count = 0
+ target_dir = "{}_old".format(self.download_dir)
+ while os.path.exists(target_dir):
+ prev_count = prev_count + 1
+ target_dir = "{}_old_{}".format(self.download_dir, prev_count)
+ os.rename(self.download_dir, target_dir)
else:
prev_dir = "{}_{}".format(self.download_dir, self.last_time)
os.rename(self.download_dir, prev_dir)
if not self.last_time:
# If we don't have anything to copy from, then it is all new.
new_file_links = file_links
- new_last_time = file_links[0].find_all('time')[0]['datetime']
+ try:
+ new_last_time = file_links[0].find_all('time')[0]['datetime']
+ except:
+ import code
+ code.interact(local=dict(globals(), **locals()))
+
for file_link in file_links:
timestamp = file_link.find_all('time')[0]['datetime']
logging.debug("Found file {} from {}".format(
try:
os.mkdir(image_dir)
for imagelink in imagelinks:
- url = imagelink['data-full']
+ url = next(filter(None,[imagelink[x] for x in ['data-full',
+ 'data-large',
+ 'data-medium',
+ 'data-thumb']]), None)
+ if not url:
+ logging.warning("Unable to find any urls for {}".format(imagelink))
+ continue
+
filename = os.path.basename(url)
if filename.endswith('stl'):
filename = "{}.png".format(filename)
os.rename(self.download_dir, "{}_failed".format(self.download_dir))
return
+ # instructions are good too.
+ logging.info("Downloading readme")
+ try:
+ readme_txt = soup.find('meta', property='og:description')['content']
+ with open(os.path.join(self.download_dir,'readme.txt'), 'w') as readme_handle:
+ readme_handle.write("{}\n".format(readme_txt))
+ except (TypeError, KeyError) as exception:
+ logging.warning("No readme? {}".format(exception))
+ except IOError as exception:
+ logging.warning("Failed to write readme! {}".format(exception))
+
+ # Best get some licenses
+ logging.info("Downloading license")
+ try:
+ license_txt = soup.find('div',{'class':'license-text'}).text
+ if license_txt:
+ with open(os.path.join(self.download_dir,'license.txt'), 'w') as license_handle:
+ license_handle.write("{}\n".format(license_txt))
+ except AttributeError as exception:
+ logging.warning("No license? {}".format(exception))
+ except IOError as exception:
+ logging.warning("Failed to write license! {}".format(exception))
+
+
try:
# Now write the timestamp
with open(timestamp_file, 'w') as timestamp_handle:
'debug', 'info', 'warning'], default='info', help="level of logging desired")
parser.add_argument("-d", "--directory",
help="Target directory to download into")
+ parser.add_argument("-f", "--log-file",
+ help="Place to log debug information to")
subparsers = parser.add_subparsers(
help="Type of thing to download", dest="subcommand")
collection_parser = subparsers.add_parser(
- 'collection', help="Download an entire collection")
+ 'collection', help="Download one or more entire collection(s)")
collection_parser.add_argument(
- "owner", help="The owner of the collection to get")
+ "owner", help="The owner of the collection(s) to get")
collection_parser.add_argument(
- "collection", help="The name of the collection to get")
+ "collections", nargs="+", help="Space seperated list of the name(s) of collection to get")
thing_parser = subparsers.add_parser(
'thing', help="Download a single thing.")
- thing_parser.add_argument("thing", help="Thing ID to download")
+ thing_parser.add_argument("things", nargs="*", help="Space seperated list of thing ID(s) to download")
user_parser = subparsers.add_parser(
- "user", help="Download all things by a user")
- user_parser.add_argument("user", help="The user to get the designs of")
+ "user", help="Download all things by one or more users")
+ user_parser.add_argument("users", nargs="+", help="A space seperated list of the user(s) to get the designs of")
batch_parser = subparsers.add_parser(
"batch", help="Perform multiple actions written in a text file")
batch_parser.add_argument(
sys.exit(1)
if not args.directory:
args.directory = os.getcwd()
- logging.basicConfig(level=getattr(logging, args.log_level.upper()))
+
+ logger = logging.getLogger()
+ formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+ logger.setLevel(logging.DEBUG)
+ console_handler = logging.StreamHandler()
+ console_handler.setLevel(args.log_level.upper())
+
+ logger.addHandler(console_handler)
+ if args.log_file:
+ file_handler = logging.FileHandler(args.log_file)
+ file_handler.setLevel(logging.DEBUG)
+ file_handler.setFormatter(formatter)
+ logger.addHandler(file_handler)
if args.subcommand.startswith("collection"):
- Collection(args.owner, args.collection, args.directory).download()
+ for collection in args.collections:
+ Collection(args.owner, collection, args.directory).download()
if args.subcommand == "thing":
- Thing(args.thing).download(args.directory)
+ for thing in args.things:
+ Thing(thing).download(args.directory)
if args.subcommand == "user":
- Designs(args.user, args.directory).download()
+ for user in args.users:
+ Designs(user, args.directory).download()
if args.subcommand == "version":
print("thingy_grabber.py version {}".format(VERSION))
if args.subcommand == "batch":