3 Thingiverse bulk downloader
12 from bs4
import BeautifulSoup
14 URL_BASE
= "https://www.thingiverse.com"
15 URL_COLLECTION
= URL_BASE
+ "/ajax/thingcollection/list_collected_things"
17 ID_REGEX
= re
.compile(r
'"id":(\d*),')
18 TOTAL_REGEX
= re
.compile(r
'"total":(\d*),')
19 LAST_PAGE_REGEX
= re
.compile(r
'"last_page":(\d*),')
20 # This appears to be fixed at 12, but if it changes would screw the rest up.
21 PER_PAGE_REGEX
= re
.compile(r
'"per_page":(\d*),')
22 NO_WHITESPACE_REGEX
= re
.compile(r
'[-\s]+')
27 """ Remove whitespace from a string """
28 return str(NO_WHITESPACE_REGEX
.sub('-', value
))
32 Normalizes string, converts to lowercase, removes non-alpha characters,
33 and converts spaces to hyphens.
35 value
= unicodedata
.normalize('NFKD', value
).encode('ascii', 'ignore').decode()
36 value
= str(re
.sub(r
'[^\w\s-]', '', value
).strip())
37 value
= str(NO_WHITESPACE_REGEX
.sub('-', value
))
38 #value = str(re.sub(r'[-\s]+', '-', value))
42 """ Holds details of a group of things. """
49 # These two should be set by child classes.
51 self
.download_dir
= None
53 def _get_small_grouping(self
, req
):
54 """ Handle small groupings """
55 soup
= BeautifulSoup(req
.text
, features
='lxml')
56 links
= soup
.find_all('a', {'class':'card-img-holder'})
57 self
.things
= [x
['href'].split(':')[1] for x
in links
]
62 """ retrieve the things of the grouping. """
64 # We've already done it.
67 # Check for initialisation:
69 print("No URL set - object not initialised properly?")
70 raise ValueError("No URL set - object not initialised properly?")
72 # Get the internal details of the grouping.
74 print("Querying {}".format(self
.url
))
75 c_req
= requests
.get(self
.url
)
76 total
= TOTAL_REGEX
.search(c_req
.text
)
78 # This is a small (<13) items grouping. Pull the list from this req.
79 return self
._get
_small
_grouping
(c_req
)
80 self
.total
= total
.groups()[0]
81 self
.req_id
= ID_REGEX
.search(c_req
.text
).groups()[0]
82 self
.last_page
= int(LAST_PAGE_REGEX
.search(c_req
.text
).groups()[0])
83 self
.per_page
= PER_PAGE_REGEX
.search(c_req
.text
).groups()[0]
90 for current_page
in range(1, self
.last_page
+ 1):
91 parameters
['page'] = current_page
92 req
= requests
.post(URL_COLLECTION
, parameters
)
93 soup
= BeautifulSoup(req
.text
, features
='lxml')
94 links
= soup
.find_all('a', {'class':'card-img-holder'})
95 self
.things
+= [x
['href'].split(':')[1] for x
in links
]
100 """ Downloads all the files in a collection """
104 if not self
.download_dir
:
105 raise ValueError("No download_dir set - invalidly initialised object?")
107 base_dir
= os
.getcwd()
109 os
.mkdir(self
.download_dir
)
110 except FileExistsError
:
111 print("Target directory {} already exists. Assuming a resume.".format(self
.download_dir
))
112 os
.chdir(self
.download_dir
)
113 for thing
in self
.things
:
114 download_thing(thing
)
117 class Collection(Grouping
):
118 """ Holds details of a collection. """
119 def __init__(self
, user
, name
):
120 Grouping
.__init
__(self
)
123 self
.url
= "{}/{}/collections/{}".format(URL_BASE
, self
.user
, strip_ws(self
.name
))
124 self
.download_dir
= os
.path
.join(os
.getcwd(), "{}-{}".format(slugify(self
.user
), slugify(self
.name
)))
126 class Designs(Grouping
):
127 """ Holds details of all of a users' designs. """
128 def __init__(self
, user
):
129 Grouping
.__init
__(self
)
131 self
.url
= "{}/{}/designs".format(URL_BASE
, self
.user
)
132 self
.download_dir
= os
.path
.join(os
.getcwd(), "{} designs".format(slugify(self
.user
)))
134 def download_thing(thing
):
135 """ Downloads all the files for a given thing. """
136 file_url
= "{}/thing:{}/files".format(URL_BASE
, thing
)
137 file_req
= requests
.get(file_url
)
138 file_soup
= BeautifulSoup(file_req
.text
, features
='lxml')
140 title
= slugify(file_soup
.find_all('h1')[0].text
.strip())
141 base_dir
= os
.getcwd()
144 except FileExistsError
:
147 print("Downloading {} ({})".format(thing
, title
))
152 with
open('timestamp.txt', 'r') as timestamp_handle
:
153 last_time
= timestamp_handle
.readlines()[0]
155 print("last downloaded version: {}".format(last_time
))
156 except FileNotFoundError
:
157 # Not run on this thing before.
159 print('Directory for thing already exists, checking for update.')
162 file_links
= file_soup
.find_all('a', {'class':'file-download'})
163 new_last_time
= last_time
166 for file_link
in file_links
:
167 timestamp
= file_link
.find_all('time')[0]['datetime']
169 print("Checking {} (updated {})".format(file_link
["title"], timestamp
))
170 if not last_time
or timestamp
> last_time
:
171 new_file_links
.append(file_link
)
172 if not new_last_time
or timestamp
> new_last_time
:
173 new_last_time
= timestamp
175 if last_time
and new_last_time
<= last_time
:
176 print("Thing already downloaded. Skipping.")
177 files
= [("{}{}".format(URL_BASE
, x
['href']), x
["title"]) for x
in new_file_links
]
180 for url
, name
in files
:
182 print("Downloading {} from {}".format(name
, url
))
183 data_req
= requests
.get(url
)
184 with
open(name
, 'wb') as handle
:
185 handle
.write(data_req
.content
)
186 # now write timestamp
187 with
open('timestamp.txt', 'w') as timestamp_handle
:
188 timestamp_handle
.write(new_last_time
)
189 except Exception as exception
:
190 print("Failed to download {} - {}".format(name
, exception
))
192 os
.rename(title
, "{}_failed".format(title
))
199 """ Entry point for script being run as a command. """
200 parser
= argparse
.ArgumentParser()
201 parser
.add_argument("-v", "--verbose", help="Be more verbose", action
="store_true")
202 subparsers
= parser
.add_subparsers(help="Type of thing to download", dest
="subcommand")
203 collection_parser
= subparsers
.add_parser('collection', help="Download an entire collection")
204 collection_parser
.add_argument("owner", help="The owner of the collection to get")
205 collection_parser
.add_argument("collection", help="The name of the collection to get")
206 thing_parser
= subparsers
.add_parser('thing', help="Download a single thing.")
207 thing_parser
.add_argument("thing", help="Thing ID to download")
208 user_parser
= subparsers
.add_parser("user", help="Download all things by a user")
209 user_parser
.add_argument("user", help="The user to get the designs of")
211 args
= parser
.parse_args()
212 if not args
.subcommand
:
216 VERBOSE
= args
.verbose
217 if args
.subcommand
.startswith("collection"):
218 collection
= Collection(args
.owner
, args
.collection
)
219 print(collection
.get())
220 collection
.download()
221 if args
.subcommand
== "thing":
222 download_thing(args
.thing
)
223 if args
.subcommand
== "user":
224 designs
= Designs(args
.user
)
230 if __name__
== "__main__":