Commit | Line | Data |
---|---|---|
ccd58722 TZ |
1 | ;;; registry.el --- Track and remember data items by various fields |
2 | ||
ba318903 | 3 | ;; Copyright (C) 2011-2014 Free Software Foundation, Inc. |
ccd58722 TZ |
4 | |
5 | ;; Author: Teodor Zlatanov <tzz@lifelogs.com> | |
6 | ;; Keywords: data | |
7 | ||
f872186f GM |
8 | ;; This file is part of GNU Emacs. |
9 | ||
10 | ;; GNU Emacs is free software: you can redistribute it and/or modify | |
ccd58722 TZ |
11 | ;; it under the terms of the GNU General Public License as published by |
12 | ;; the Free Software Foundation, either version 3 of the License, or | |
13 | ;; (at your option) any later version. | |
14 | ||
f872186f | 15 | ;; GNU Emacs is distributed in the hope that it will be useful, |
ccd58722 TZ |
16 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of |
17 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
18 | ;; GNU General Public License for more details. | |
19 | ||
20 | ;; You should have received a copy of the GNU General Public License | |
f872186f | 21 | ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. |
ccd58722 TZ |
22 | |
23 | ;;; Commentary: | |
24 | ||
25 | ;; This library provides a general-purpose EIEIO-based registry | |
26 | ;; database with persistence, initialized with these fields: | |
27 | ||
28 | ;; version: a float, 0.1 currently (don't change it) | |
29 | ||
30 | ;; max-hard: an integer, default 5000000 | |
31 | ||
32 | ;; max-soft: an integer, default 50000 | |
33 | ||
34 | ;; precious: a list of symbols | |
35 | ||
36 | ;; tracked: a list of symbols | |
37 | ||
38 | ;; tracker: a hashtable tuned for 100 symbols to track (you should | |
39 | ;; only access this with the :lookup2-function and the | |
40 | ;; :lookup2+-function) | |
41 | ||
42 | ;; data: a hashtable with default size 10K and resize threshold 2.0 | |
43 | ;; (this reflects the expected usage so override it if you know better) | |
44 | ||
45 | ;; ...plus methods to do all the work: `registry-search', | |
46 | ;; `registry-lookup', `registry-lookup-secondary', | |
47 | ;; `registry-lookup-secondary-value', `registry-insert', | |
48 | ;; `registry-delete', `registry-prune', `registry-size' which see | |
49 | ||
50 | ;; and with the following properties: | |
51 | ||
52 | ;; Every piece of data has a unique ID and some general-purpose fields | |
53 | ;; (F1=D1, F2=D2, F3=(a b c)...) expressed as an alist, e.g. | |
54 | ||
55 | ;; ((F1 D1) (F2 D2) (F3 a b c)) | |
56 | ||
57 | ;; Note that whether a field has one or many pieces of data, the data | |
58 | ;; is always a list of values. | |
59 | ||
60 | ;; The user decides which fields are "precious", F2 for example. At | |
61 | ;; PRUNE TIME (when the :prune-function is called), the registry will | |
62 | ;; trim any entries without the F2 field until the size is :max-soft | |
63 | ;; or less. No entries with the F2 field will be removed at PRUNE | |
64 | ;; TIME. | |
65 | ||
66 | ;; When an entry is inserted, the registry will reject new entries | |
67 | ;; if they bring it over the max-hard limit, even if they have the F2 | |
68 | ;; field. | |
69 | ||
70 | ;; The user decides which fields are "tracked", F1 for example. Any | |
71 | ;; new entry is then indexed by all the tracked fields so it can be | |
72 | ;; quickly looked up that way. The data is always a list (see example | |
73 | ;; above) and each list element is indexed. | |
74 | ||
75 | ;; Precious and tracked field names must be symbols. All other | |
76 | ;; fields can be any other Emacs Lisp types. | |
77 | ||
78 | ;;; Code: | |
79 | ||
f8fc0578 SM |
80 | (eval-when-compile (require 'cl)) |
81 | ||
6651c015 KY |
82 | (require 'eieio) |
83 | (require 'eieio-base) | |
ccd58722 TZ |
84 | |
85 | (defclass registry-db (eieio-persistent) | |
86 | ((version :initarg :version | |
87 | :initform 0.1 | |
88 | :type float | |
89 | :custom float | |
90 | :documentation "The registry version.") | |
91 | (max-hard :initarg :max-hard | |
92 | :initform 5000000 | |
93 | :type integer | |
94 | :custom integer | |
95 | :documentation "Never accept more than this many elements.") | |
96 | (max-soft :initarg :max-soft | |
97 | :initform 50000 | |
98 | :type integer | |
99 | :custom integer | |
100 | :documentation "Prune as much as possible to get to this size.") | |
652aa465 TZ |
101 | (prune-factor |
102 | :initarg :prune-factor | |
103 | :initform 0.1 | |
104 | :type float | |
105 | :custom float | |
106 | :documentation "At the max-hard limit, prune size * this entries.") | |
ccd58722 TZ |
107 | (tracked :initarg :tracked |
108 | :initform nil | |
109 | :type t | |
110 | :documentation "The tracked (indexed) fields, a list of symbols.") | |
111 | (precious :initarg :precious | |
112 | :initform nil | |
113 | :type t | |
114 | :documentation "The precious fields, a list of symbols.") | |
115 | (tracker :initarg :tracker | |
116 | :type hash-table | |
117 | :documentation "The field tracking hashtable.") | |
118 | (data :initarg :data | |
119 | :type hash-table | |
120 | :documentation "The data hashtable."))) | |
121 | ||
f38a45fa DE |
122 | (defmethod initialize-instance :AFTER ((this registry-db) slots) |
123 | "Set value of data slot of THIS after initialization." | |
124 | (with-slots (data tracker) this | |
125 | (unless (member :data slots) | |
126 | (setq data | |
127 | (make-hash-table :size 10000 :rehash-size 2.0 :test 'equal))) | |
128 | (unless (member :tracker slots) | |
129 | (setq tracker (make-hash-table :size 100 :rehash-size 2.0))))) | |
130 | ||
131 | (defmethod registry-lookup ((db registry-db) keys) | |
132 | "Search for KEYS in the registry-db THIS. | |
58179cce | 133 | Returns an alist of the key followed by the entry in a list, not a cons cell." |
f38a45fa DE |
134 | (let ((data (oref db :data))) |
135 | (delq nil | |
136 | (mapcar | |
137 | (lambda (k) | |
138 | (when (gethash k data) | |
139 | (list k (gethash k data)))) | |
140 | keys)))) | |
141 | ||
142 | (defmethod registry-lookup-breaks-before-lexbind ((db registry-db) keys) | |
143 | "Search for KEYS in the registry-db THIS. | |
58179cce | 144 | Returns an alist of the key followed by the entry in a list, not a cons cell." |
f38a45fa DE |
145 | (let ((data (oref db :data))) |
146 | (delq nil | |
147 | (loop for key in keys | |
148 | when (gethash key data) | |
149 | collect (list key (gethash key data)))))) | |
150 | ||
151 | (defmethod registry-lookup-secondary ((db registry-db) tracksym | |
152 | &optional create) | |
153 | "Search for TRACKSYM in the registry-db THIS. | |
ccd58722 | 154 | When CREATE is not nil, create the secondary index hashtable if needed." |
f38a45fa DE |
155 | (let ((h (gethash tracksym (oref db :tracker)))) |
156 | (if h | |
157 | h | |
158 | (when create | |
159 | (puthash tracksym | |
160 | (make-hash-table :size 800 :rehash-size 2.0 :test 'equal) | |
161 | (oref db :tracker)) | |
162 | (gethash tracksym (oref db :tracker)))))) | |
163 | ||
164 | (defmethod registry-lookup-secondary-value ((db registry-db) tracksym val | |
165 | &optional set) | |
166 | "Search for TRACKSYM with value VAL in the registry-db THIS. | |
ccd58722 | 167 | When SET is not nil, set it for VAL (use t for an empty list)." |
f38a45fa DE |
168 | ;; either we're asked for creation or there should be an existing index |
169 | (when (or set (registry-lookup-secondary db tracksym)) | |
170 | ;; set the entry if requested, | |
171 | (when set | |
172 | (puthash val (if (eq t set) '() set) | |
173 | (registry-lookup-secondary db tracksym t))) | |
174 | (gethash val (registry-lookup-secondary db tracksym)))) | |
ccd58722 TZ |
175 | |
176 | (defun registry--match (mode entry check-list) | |
177 | ;; for all members | |
178 | (when check-list | |
179 | (let ((key (nth 0 (nth 0 check-list))) | |
180 | (vals (cdr-safe (nth 0 check-list))) | |
181 | found) | |
182 | (while (and key vals (not found)) | |
183 | (setq found (case mode | |
184 | (:member | |
185 | (member (car-safe vals) (cdr-safe (assoc key entry)))) | |
186 | (:regex | |
187 | (string-match (car vals) | |
188 | (mapconcat | |
189 | 'prin1-to-string | |
190 | (cdr-safe (assoc key entry)) | |
191 | "\0")))) | |
192 | vals (cdr-safe vals))) | |
193 | (or found | |
194 | (registry--match mode entry (cdr-safe check-list)))))) | |
195 | ||
f38a45fa DE |
196 | (defmethod registry-search ((db registry-db) &rest spec) |
197 | "Search for SPEC across the registry-db THIS. | |
ccd58722 TZ |
198 | For example calling with :member '(a 1 2) will match entry '((a 3 1)). |
199 | Calling with :all t (any non-nil value) will match all. | |
200 | Calling with :regex '\(a \"h.llo\") will match entry '((a \"hullo\" \"bye\"). | |
201 | The test order is to check :all first, then :member, then :regex." | |
f38a45fa DE |
202 | (when db |
203 | (let ((all (plist-get spec :all)) | |
204 | (member (plist-get spec :member)) | |
205 | (regex (plist-get spec :regex))) | |
206 | (loop for k being the hash-keys of (oref db :data) | |
207 | using (hash-values v) | |
208 | when (or | |
209 | ;; :all non-nil returns all | |
210 | all | |
211 | ;; member matching | |
212 | (and member (registry--match :member v member)) | |
213 | ;; regex matching | |
214 | (and regex (registry--match :regex v regex))) | |
215 | collect k)))) | |
216 | ||
217 | (defmethod registry-delete ((db registry-db) keys assert &rest spec) | |
218 | "Delete KEYS from the registry-db THIS. | |
ccd58722 TZ |
219 | If KEYS is nil, use SPEC to do a search. |
220 | Updates the secondary ('tracked') indices as well. | |
221 | With assert non-nil, errors out if the key does not exist already." | |
f38a45fa DE |
222 | (let* ((data (oref db :data)) |
223 | (keys (or keys | |
224 | (apply 'registry-search db spec))) | |
225 | (tracked (oref db :tracked))) | |
226 | ||
227 | (dolist (key keys) | |
228 | (let ((entry (gethash key data))) | |
229 | (when assert | |
230 | (assert entry nil | |
8abee653 | 231 | "Key %s does not exist in database" key)) |
f38a45fa DE |
232 | ;; clean entry from the secondary indices |
233 | (dolist (tr tracked) | |
234 | ;; is this tracked symbol indexed? | |
235 | (when (registry-lookup-secondary db tr) | |
236 | ;; for every value in the entry under that key... | |
237 | (dolist (val (cdr-safe (assq tr entry))) | |
238 | (let* ((value-keys (registry-lookup-secondary-value | |
239 | db tr val))) | |
240 | (when (member key value-keys) | |
241 | ;; override the previous value | |
242 | (registry-lookup-secondary-value | |
243 | db tr val | |
244 | ;; with the indexed keys MINUS the current key | |
245 | ;; (we pass t when the list is empty) | |
246 | (or (delete key value-keys) t))))))) | |
247 | (remhash key data))) | |
248 | keys)) | |
249 | ||
250 | (defmethod registry-size ((db registry-db)) | |
251 | "Returns the size of the registry-db object THIS. | |
c7641e3c | 252 | This is the key count of the :data slot." |
f38a45fa | 253 | (hash-table-count (oref db :data))) |
c7641e3c | 254 | |
f38a45fa DE |
255 | (defmethod registry-full ((db registry-db)) |
256 | "Checks if registry-db THIS is full." | |
257 | (>= (registry-size db) | |
258 | (oref db :max-hard))) | |
81d7704c | 259 | |
f38a45fa DE |
260 | (defmethod registry-insert ((db registry-db) key entry) |
261 | "Insert ENTRY under KEY into the registry-db THIS. | |
ccd58722 TZ |
262 | Updates the secondary ('tracked') indices as well. |
263 | Errors out if the key exists already." | |
264 | ||
f38a45fa DE |
265 | (assert (not (gethash key (oref db :data))) nil |
266 | "Key already exists in database") | |
267 | ||
268 | (assert (not (registry-full db)) | |
269 | nil | |
270 | "registry max-hard size limit reached") | |
271 | ||
272 | ;; store the entry | |
273 | (puthash key entry (oref db :data)) | |
274 | ||
275 | ;; store the secondary indices | |
276 | (dolist (tr (oref db :tracked)) | |
277 | ;; for every value in the entry under that key... | |
278 | (dolist (val (cdr-safe (assq tr entry))) | |
279 | (let* ((value-keys (registry-lookup-secondary-value db tr val))) | |
280 | (pushnew key value-keys :test 'equal) | |
281 | (registry-lookup-secondary-value db tr val value-keys)))) | |
282 | entry) | |
283 | ||
284 | (defmethod registry-reindex ((db registry-db)) | |
285 | "Rebuild the secondary indices of registry-db THIS." | |
286 | (let ((count 0) | |
287 | (expected (* (length (oref db :tracked)) (registry-size db)))) | |
cf8b0c27 | 288 | (dolist (tr (oref db :tracked)) |
f38a45fa DE |
289 | (let (values) |
290 | (maphash | |
291 | (lambda (key v) | |
292 | (incf count) | |
293 | (when (and (< 0 expected) | |
294 | (= 0 (mod count 1000))) | |
295 | (message "reindexing: %d of %d (%.2f%%)" | |
296 | count expected (/ (* 100 count) expected))) | |
297 | (dolist (val (cdr-safe (assq tr v))) | |
298 | (let* ((value-keys (registry-lookup-secondary-value db tr val))) | |
299 | (push key value-keys) | |
300 | (registry-lookup-secondary-value db tr val value-keys)))) | |
301 | (oref db :data)))))) | |
302 | ||
303 | (defmethod registry-prune ((db registry-db) &optional sortfun) | |
304 | "Prunes the registry-db object THIS. | |
15cc1ab1 TZ |
305 | Removes only entries without the :precious keys if it can, |
306 | then removes oldest entries first. | |
307 | Returns the number of deleted entries. | |
308 | If SORTFUN is given, tries to keep entries that sort *higher*. | |
309 | SORTFUN is passed only the two keys so it must look them up directly." | |
f38a45fa DE |
310 | (dolist (collector '(registry-prune-soft-candidates |
311 | registry-prune-hard-candidates)) | |
312 | (let* ((size (registry-size db)) | |
313 | (collected (funcall collector db)) | |
314 | (limit (nth 0 collected)) | |
315 | (candidates (nth 1 collected)) | |
316 | ;; sort the candidates if SORTFUN was given | |
317 | (candidates (if sortfun (sort candidates sortfun) candidates)) | |
318 | (candidates-count (length candidates)) | |
319 | ;; are we over max-soft? | |
320 | (prune-needed (> size limit))) | |
321 | ||
322 | ;; while we have more candidates than we need to remove... | |
323 | (while (and (> candidates-count (- size limit)) candidates) | |
324 | (decf candidates-count) | |
325 | (setq candidates (cdr candidates))) | |
326 | ||
327 | (registry-delete db candidates nil) | |
328 | (length candidates)))) | |
329 | ||
330 | (defmethod registry-prune-soft-candidates ((db registry-db)) | |
331 | "Collects pruning candidates from the registry-db object THIS. | |
15cc1ab1 | 332 | Proposes only entries without the :precious keys." |
f38a45fa DE |
333 | (let* ((precious (oref db :precious)) |
334 | (precious-p (lambda (entry-key) | |
335 | (cdr (memq (car entry-key) precious)))) | |
336 | (data (oref db :data)) | |
337 | (limit (oref db :max-soft)) | |
338 | (candidates (loop for k being the hash-keys of data | |
339 | using (hash-values v) | |
340 | when (notany precious-p v) | |
341 | collect k))) | |
342 | (list limit candidates))) | |
343 | ||
344 | (defmethod registry-prune-hard-candidates ((db registry-db)) | |
345 | "Collects pruning candidates from the registry-db object THIS. | |
652aa465 | 346 | Proposes any entries over the max-hard limit minus size * prune-factor." |
f38a45fa DE |
347 | (let* ((data (oref db :data)) |
348 | ;; prune to (size * prune-factor) below the max-hard limit so | |
349 | ;; we're not pruning all the time | |
350 | (limit (max 0 (- (oref db :max-hard) | |
351 | (* (registry-size db) (oref db :prune-factor))))) | |
352 | (candidates (loop for k being the hash-keys of data | |
353 | collect k))) | |
354 | (list limit candidates))) | |
ccd58722 | 355 | |
ccd58722 TZ |
356 | (provide 'registry) |
357 | ;;; registry.el ends here |