change target download dir and get images

author Oliver Matthews <oliver@codersoffortune.net>

Wed, 27 Nov 2019 22:30:51 +0000 (22:30 +0000)

committer Oliver Matthews <oliver@codersoffortune.net>

Wed, 27 Nov 2019 22:30:51 +0000 (22:30 +0000)
author Oliver Matthews <oliver@codersoffortune.net>
Wed, 27 Nov 2019 22:30:51 +0000 (22:30 +0000)
committer Oliver Matthews <oliver@codersoffortune.net>
Wed, 27 Nov 2019 22:30:51 +0000 (22:30 +0000)
diff --git a/README.md b/README.md

index 9349b19..1e5da52 100644 (file)
--- a/README.md
+++ b/README.md
@@ -48,8 +48,15 @@ python3, beautifulsoup4, requests, lxml
  - If you run it again with the same settings, it will check for updated files and only update what has changed. This should make it suitible for syncing a collection on a cronjob
  - If there is an updated file, the old directory will be moved to `name_timestamp` where `timestamp` is the last upload time of the old files. The code will then copy unchanged files across and download any new ones.
  
+## Changelog
+* v0.4.0
+  - Added a changelog
+  - Now download associated images
+  - support `-d` to specify base download directory 
  
  ## Todo features (maybe):
+- better progress support
+- better batch mode
  - less perfunctory error checking / handling
  - attempt to use -failed dirs for resuming
-- pull down images as well
+
diff --git a/thingy_grabber.py b/thingy_grabber.py

index bfa700f..b8e5f1b 100755 (executable)
--- a/thingy_grabber.py
+++ b/thingy_grabber.py
@@ -165,6 +165,9 @@ class Thing:
          self.text = req.text
          soup = BeautifulSoup(self.text, features='lxml')
  
+        print("Found no new files for {}".format(self.title))
+        #import code
+        #code.interact(local=dict(globals(), **locals()))
          self.title = slugify(soup.find_all('h1')[0].text.strip())
          self.download_dir = os.path.join(base_dir, self.title)
  
@@ -206,7 +209,7 @@ class Thing:
                  self._parsed = True
                  return
          # Got here, so nope, no new files.
-        print("Found no new files for {}".format(self.title))
+        code.interact(local=dict(globals(), **locals()))
          self._needs_download = False
          self._parsed = True
  
@@ -294,6 +297,27 @@ class Thing:
              os.rename(self.download_dir, "{}_failed".format(self.download_dir))
              return
  
+        # People like images
+        image_dir = os.path.join(self.download_dir, 'images')
+        try:
+            os.mkdir(image_dir)
+            for imagelink in soup.find_all('span', {'class':'gallery-slider'})[0] \
+                                 .find_all('div', {'class':'gallery-photo'}):
+                url = imagelink['data-full']
+                filename = os.path.basename(url)
+                if filename.endswith('stl'):
+                    filename = "{}.png".format(filename)
+                image_req = requests.get(url)
+                with open(os.path.join(image_dir, filename), 'wb') as handle:
+                    handle.write(image_req.content)
+        except Exception as exception:
+            print("Failed to download {} - {}".format(filename, exception))
+            os.rename(self.download_dir, "{}_failed".format(self.download_dir))
+            return
+
+
+
+
          try:
              # Now write the timestamp
              with open(timestamp_file, 'w') as timestamp_handle:
@@ -319,7 +343,7 @@ def main():
      thing_parser.add_argument("thing", help="Thing ID to download")
      user_parser = subparsers.add_parser("user", help="Download all things by a user")
      user_parser.add_argument("user", help="The user to get the designs of")
-    version_parser = subparsers.add_parser("version", help="Show the current version")
+    subparsers.add_parser("version", help="Show the current version")
  
      args = parser.parse_args()
      if not args.subcommand:
author	Oliver Matthews <oliver@codersoffortune.net>
	Wed, 27 Nov 2019 22:30:51 +0000 (22:30 +0000)
committer	Oliver Matthews <oliver@codersoffortune.net>
	Wed, 27 Nov 2019 22:30:51 +0000 (22:30 +0000)
README.md		patch \| blob \| blame \| history
thingy_grabber.py		patch \| blob \| blame \| history