Commit | Line | Data |
---|---|---|
ccd58722 TZ |
1 | ;;; registry.el --- Track and remember data items by various fields |
2 | ||
f872186f | 3 | ;; Copyright (C) 2011 Free Software Foundation, Inc. |
ccd58722 TZ |
4 | |
5 | ;; Author: Teodor Zlatanov <tzz@lifelogs.com> | |
6 | ;; Keywords: data | |
7 | ||
f872186f GM |
8 | ;; This file is part of GNU Emacs. |
9 | ||
10 | ;; GNU Emacs is free software: you can redistribute it and/or modify | |
ccd58722 TZ |
11 | ;; it under the terms of the GNU General Public License as published by |
12 | ;; the Free Software Foundation, either version 3 of the License, or | |
13 | ;; (at your option) any later version. | |
14 | ||
f872186f | 15 | ;; GNU Emacs is distributed in the hope that it will be useful, |
ccd58722 TZ |
16 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of |
17 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
18 | ;; GNU General Public License for more details. | |
19 | ||
20 | ;; You should have received a copy of the GNU General Public License | |
f872186f | 21 | ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. |
ccd58722 TZ |
22 | |
23 | ;;; Commentary: | |
24 | ||
25 | ;; This library provides a general-purpose EIEIO-based registry | |
26 | ;; database with persistence, initialized with these fields: | |
27 | ||
28 | ;; version: a float, 0.1 currently (don't change it) | |
29 | ||
30 | ;; max-hard: an integer, default 5000000 | |
31 | ||
32 | ;; max-soft: an integer, default 50000 | |
33 | ||
34 | ;; precious: a list of symbols | |
35 | ||
36 | ;; tracked: a list of symbols | |
37 | ||
38 | ;; tracker: a hashtable tuned for 100 symbols to track (you should | |
39 | ;; only access this with the :lookup2-function and the | |
40 | ;; :lookup2+-function) | |
41 | ||
42 | ;; data: a hashtable with default size 10K and resize threshold 2.0 | |
43 | ;; (this reflects the expected usage so override it if you know better) | |
44 | ||
45 | ;; ...plus methods to do all the work: `registry-search', | |
46 | ;; `registry-lookup', `registry-lookup-secondary', | |
47 | ;; `registry-lookup-secondary-value', `registry-insert', | |
48 | ;; `registry-delete', `registry-prune', `registry-size' which see | |
49 | ||
50 | ;; and with the following properties: | |
51 | ||
52 | ;; Every piece of data has a unique ID and some general-purpose fields | |
53 | ;; (F1=D1, F2=D2, F3=(a b c)...) expressed as an alist, e.g. | |
54 | ||
55 | ;; ((F1 D1) (F2 D2) (F3 a b c)) | |
56 | ||
57 | ;; Note that whether a field has one or many pieces of data, the data | |
58 | ;; is always a list of values. | |
59 | ||
60 | ;; The user decides which fields are "precious", F2 for example. At | |
61 | ;; PRUNE TIME (when the :prune-function is called), the registry will | |
62 | ;; trim any entries without the F2 field until the size is :max-soft | |
63 | ;; or less. No entries with the F2 field will be removed at PRUNE | |
64 | ;; TIME. | |
65 | ||
66 | ;; When an entry is inserted, the registry will reject new entries | |
67 | ;; if they bring it over the max-hard limit, even if they have the F2 | |
68 | ;; field. | |
69 | ||
70 | ;; The user decides which fields are "tracked", F1 for example. Any | |
71 | ;; new entry is then indexed by all the tracked fields so it can be | |
72 | ;; quickly looked up that way. The data is always a list (see example | |
73 | ;; above) and each list element is indexed. | |
74 | ||
75 | ;; Precious and tracked field names must be symbols. All other | |
76 | ;; fields can be any other Emacs Lisp types. | |
77 | ||
78 | ;;; Code: | |
79 | ||
f8fc0578 SM |
80 | (eval-when-compile (require 'cl)) |
81 | ||
42b23765 | 82 | (eval-when-compile |
2237da9c | 83 | (when (null (ignore-errors (require 'ert))) |
42b23765 TZ |
84 | (defmacro* ert-deftest (name () &body docstring-keys-and-body)))) |
85 | ||
2237da9c G |
86 | (ignore-errors |
87 | (require 'ert)) | |
ccd58722 TZ |
88 | (eval-and-compile |
89 | (or (ignore-errors (progn | |
90 | (require 'eieio) | |
91 | (require 'eieio-base))) | |
92 | ;; gnus-fallback-lib/ from gnus/lisp/gnus-fallback-lib | |
93 | (ignore-errors | |
94 | (let ((load-path (cons (expand-file-name | |
95 | "gnus-fallback-lib/eieio" | |
96 | (file-name-directory (locate-library "gnus"))) | |
97 | load-path))) | |
98 | (require 'eieio) | |
99 | (require 'eieio-base))) | |
100 | (error | |
101 | "eieio not found in `load-path' or gnus-fallback-lib/ directory."))) | |
102 | ||
103 | (defclass registry-db (eieio-persistent) | |
104 | ((version :initarg :version | |
105 | :initform 0.1 | |
106 | :type float | |
107 | :custom float | |
108 | :documentation "The registry version.") | |
109 | (max-hard :initarg :max-hard | |
110 | :initform 5000000 | |
111 | :type integer | |
112 | :custom integer | |
113 | :documentation "Never accept more than this many elements.") | |
114 | (max-soft :initarg :max-soft | |
115 | :initform 50000 | |
116 | :type integer | |
117 | :custom integer | |
118 | :documentation "Prune as much as possible to get to this size.") | |
652aa465 TZ |
119 | (prune-factor |
120 | :initarg :prune-factor | |
121 | :initform 0.1 | |
122 | :type float | |
123 | :custom float | |
124 | :documentation "At the max-hard limit, prune size * this entries.") | |
ccd58722 TZ |
125 | (tracked :initarg :tracked |
126 | :initform nil | |
127 | :type t | |
128 | :documentation "The tracked (indexed) fields, a list of symbols.") | |
129 | (precious :initarg :precious | |
130 | :initform nil | |
131 | :type t | |
132 | :documentation "The precious fields, a list of symbols.") | |
133 | (tracker :initarg :tracker | |
134 | :type hash-table | |
135 | :documentation "The field tracking hashtable.") | |
136 | (data :initarg :data | |
137 | :type hash-table | |
138 | :documentation "The data hashtable."))) | |
139 | ||
8d6d9c8f KY |
140 | (eval-and-compile |
141 | (defmethod initialize-instance :AFTER ((this registry-db) slots) | |
142 | "Set value of data slot of THIS after initialization." | |
143 | (with-slots (data tracker) this | |
144 | (unless (member :data slots) | |
145 | (setq data | |
146 | (make-hash-table :size 10000 :rehash-size 2.0 :test 'equal))) | |
147 | (unless (member :tracker slots) | |
148 | (setq tracker (make-hash-table :size 100 :rehash-size 2.0))))) | |
149 | ||
150 | (defmethod registry-lookup ((db registry-db) keys) | |
151 | "Search for KEYS in the registry-db THIS. | |
58179cce | 152 | Returns an alist of the key followed by the entry in a list, not a cons cell." |
8d6d9c8f KY |
153 | (let ((data (oref db :data))) |
154 | (delq nil | |
155 | (mapcar | |
156 | (lambda (k) | |
157 | (when (gethash k data) | |
158 | (list k (gethash k data)))) | |
159 | keys)))) | |
160 | ||
161 | (defmethod registry-lookup-breaks-before-lexbind ((db registry-db) keys) | |
162 | "Search for KEYS in the registry-db THIS. | |
58179cce | 163 | Returns an alist of the key followed by the entry in a list, not a cons cell." |
8d6d9c8f KY |
164 | (let ((data (oref db :data))) |
165 | (delq nil | |
166 | (loop for key in keys | |
167 | when (gethash key data) | |
168 | collect (list key (gethash key data)))))) | |
169 | ||
170 | (defmethod registry-lookup-secondary ((db registry-db) tracksym | |
171 | &optional create) | |
172 | "Search for TRACKSYM in the registry-db THIS. | |
ccd58722 | 173 | When CREATE is not nil, create the secondary index hashtable if needed." |
8d6d9c8f KY |
174 | (let ((h (gethash tracksym (oref db :tracker)))) |
175 | (if h | |
176 | h | |
177 | (when create | |
178 | (puthash tracksym | |
179 | (make-hash-table :size 800 :rehash-size 2.0 :test 'equal) | |
180 | (oref db :tracker)) | |
181 | (gethash tracksym (oref db :tracker)))))) | |
182 | ||
183 | (defmethod registry-lookup-secondary-value ((db registry-db) tracksym val | |
184 | &optional set) | |
185 | "Search for TRACKSYM with value VAL in the registry-db THIS. | |
ccd58722 | 186 | When SET is not nil, set it for VAL (use t for an empty list)." |
8d6d9c8f KY |
187 | ;; either we're asked for creation or there should be an existing index |
188 | (when (or set (registry-lookup-secondary db tracksym)) | |
189 | ;; set the entry if requested, | |
190 | (when set | |
191 | (puthash val (if (eq t set) '() set) | |
192 | (registry-lookup-secondary db tracksym t))) | |
193 | (gethash val (registry-lookup-secondary db tracksym))))) | |
ccd58722 TZ |
194 | |
195 | (defun registry--match (mode entry check-list) | |
196 | ;; for all members | |
197 | (when check-list | |
198 | (let ((key (nth 0 (nth 0 check-list))) | |
199 | (vals (cdr-safe (nth 0 check-list))) | |
200 | found) | |
201 | (while (and key vals (not found)) | |
202 | (setq found (case mode | |
203 | (:member | |
204 | (member (car-safe vals) (cdr-safe (assoc key entry)))) | |
205 | (:regex | |
206 | (string-match (car vals) | |
207 | (mapconcat | |
208 | 'prin1-to-string | |
209 | (cdr-safe (assoc key entry)) | |
210 | "\0")))) | |
211 | vals (cdr-safe vals))) | |
212 | (or found | |
213 | (registry--match mode entry (cdr-safe check-list)))))) | |
214 | ||
8d6d9c8f KY |
215 | (eval-and-compile |
216 | (defmethod registry-search ((db registry-db) &rest spec) | |
217 | "Search for SPEC across the registry-db THIS. | |
ccd58722 TZ |
218 | For example calling with :member '(a 1 2) will match entry '((a 3 1)). |
219 | Calling with :all t (any non-nil value) will match all. | |
220 | Calling with :regex '\(a \"h.llo\") will match entry '((a \"hullo\" \"bye\"). | |
221 | The test order is to check :all first, then :member, then :regex." | |
8d6d9c8f KY |
222 | (when db |
223 | (let ((all (plist-get spec :all)) | |
224 | (member (plist-get spec :member)) | |
225 | (regex (plist-get spec :regex))) | |
226 | (loop for k being the hash-keys of (oref db :data) | |
227 | using (hash-values v) | |
228 | when (or | |
229 | ;; :all non-nil returns all | |
230 | all | |
231 | ;; member matching | |
232 | (and member (registry--match :member v member)) | |
233 | ;; regex matching | |
234 | (and regex (registry--match :regex v regex))) | |
235 | collect k)))) | |
236 | ||
237 | (defmethod registry-delete ((db registry-db) keys assert &rest spec) | |
238 | "Delete KEYS from the registry-db THIS. | |
ccd58722 TZ |
239 | If KEYS is nil, use SPEC to do a search. |
240 | Updates the secondary ('tracked') indices as well. | |
241 | With assert non-nil, errors out if the key does not exist already." | |
8d6d9c8f KY |
242 | (let* ((data (oref db :data)) |
243 | (keys (or keys | |
244 | (apply 'registry-search db spec))) | |
245 | (tracked (oref db :tracked))) | |
246 | ||
247 | (dolist (key keys) | |
248 | (let ((entry (gethash key data))) | |
249 | (when assert | |
250 | (assert entry nil | |
251 | "Key %s does not exists in database" key)) | |
252 | ;; clean entry from the secondary indices | |
253 | (dolist (tr tracked) | |
254 | ;; is this tracked symbol indexed? | |
255 | (when (registry-lookup-secondary db tr) | |
256 | ;; for every value in the entry under that key... | |
257 | (dolist (val (cdr-safe (assq tr entry))) | |
258 | (let* ((value-keys (registry-lookup-secondary-value | |
259 | db tr val))) | |
260 | (when (member key value-keys) | |
261 | ;; override the previous value | |
262 | (registry-lookup-secondary-value | |
263 | db tr val | |
264 | ;; with the indexed keys MINUS the current key | |
265 | ;; (we pass t when the list is empty) | |
266 | (or (delete key value-keys) t))))))) | |
267 | (remhash key data))) | |
268 | keys)) | |
269 | ||
81d7704c TZ |
270 | (defmethod registry-full ((db registry-db)) |
271 | "Checks if registry-db THIS is full." | |
272 | (>= (registry-size db) | |
273 | (oref db :max-hard))) | |
274 | ||
8d6d9c8f KY |
275 | (defmethod registry-insert ((db registry-db) key entry) |
276 | "Insert ENTRY under KEY into the registry-db THIS. | |
ccd58722 TZ |
277 | Updates the secondary ('tracked') indices as well. |
278 | Errors out if the key exists already." | |
279 | ||
8d6d9c8f KY |
280 | (assert (not (gethash key (oref db :data))) nil |
281 | "Key already exists in database") | |
282 | ||
81d7704c | 283 | (assert (not (registry-full db)) |
8d6d9c8f | 284 | nil |
c2f51e23 | 285 | "registry max-hard size limit reached") |
8d6d9c8f KY |
286 | |
287 | ;; store the entry | |
288 | (puthash key entry (oref db :data)) | |
289 | ||
290 | ;; store the secondary indices | |
cf8b0c27 | 291 | (dolist (tr (oref db :tracked)) |
8d6d9c8f KY |
292 | ;; for every value in the entry under that key... |
293 | (dolist (val (cdr-safe (assq tr entry))) | |
294 | (let* ((value-keys (registry-lookup-secondary-value db tr val))) | |
295 | (pushnew key value-keys :test 'equal) | |
296 | (registry-lookup-secondary-value db tr val value-keys)))) | |
297 | entry) | |
298 | ||
299 | (defmethod registry-reindex ((db registry-db)) | |
300 | "Rebuild the secondary indices of registry-db THIS." | |
301 | (let ((count 0) | |
302 | (expected (* (length (oref db :tracked)) (registry-size db)))) | |
303 | (dolist (tr (oref db :tracked)) | |
304 | (let (values) | |
305 | (maphash | |
306 | (lambda (key v) | |
307 | (incf count) | |
308 | (when (and (< 0 expected) | |
309 | (= 0 (mod count 1000))) | |
310 | (message "reindexing: %d of %d (%.2f%%)" | |
67a2aecd | 311 | count expected (/ (* 100 count) expected))) |
8d6d9c8f KY |
312 | (dolist (val (cdr-safe (assq tr v))) |
313 | (let* ((value-keys (registry-lookup-secondary-value db tr val))) | |
314 | (push key value-keys) | |
315 | (registry-lookup-secondary-value db tr val value-keys)))) | |
316 | (oref db :data)))))) | |
317 | ||
318 | (defmethod registry-size ((db registry-db)) | |
319 | "Returns the size of the registry-db object THIS. | |
ccd58722 | 320 | This is the key count of the :data slot." |
8d6d9c8f | 321 | (hash-table-count (oref db :data))) |
ccd58722 | 322 | |
15cc1ab1 | 323 | (defmethod registry-prune ((db registry-db) &optional sortfun) |
8d6d9c8f | 324 | "Prunes the registry-db object THIS. |
15cc1ab1 TZ |
325 | Removes only entries without the :precious keys if it can, |
326 | then removes oldest entries first. | |
327 | Returns the number of deleted entries. | |
328 | If SORTFUN is given, tries to keep entries that sort *higher*. | |
329 | SORTFUN is passed only the two keys so it must look them up directly." | |
330 | (dolist (collector '(registry-prune-soft-candidates | |
331 | registry-prune-hard-candidates)) | |
332 | (let* ((size (registry-size db)) | |
333 | (collected (funcall collector db)) | |
334 | (limit (nth 0 collected)) | |
335 | (candidates (nth 1 collected)) | |
336 | ;; sort the candidates if SORTFUN was given | |
337 | (candidates (if sortfun (sort candidates sortfun) candidates)) | |
338 | (candidates-count (length candidates)) | |
339 | ;; are we over max-soft? | |
340 | (prune-needed (> size limit))) | |
341 | ||
342 | ;; while we have more candidates than we need to remove... | |
343 | (while (and (> candidates-count (- size limit)) candidates) | |
344 | (decf candidates-count) | |
345 | (setq candidates (cdr candidates))) | |
346 | ||
347 | (registry-delete db candidates nil) | |
348 | (length candidates)))) | |
349 | ||
350 | (defmethod registry-prune-soft-candidates ((db registry-db)) | |
351 | "Collects pruning candidates from the registry-db object THIS. | |
352 | Proposes only entries without the :precious keys." | |
8d6d9c8f KY |
353 | (let* ((precious (oref db :precious)) |
354 | (precious-p (lambda (entry-key) | |
355 | (cdr (memq (car entry-key) precious)))) | |
356 | (data (oref db :data)) | |
357 | (limit (oref db :max-soft)) | |
8d6d9c8f KY |
358 | (candidates (loop for k being the hash-keys of data |
359 | using (hash-values v) | |
360 | when (notany precious-p v) | |
15cc1ab1 TZ |
361 | collect k))) |
362 | (list limit candidates))) | |
8d6d9c8f | 363 | |
15cc1ab1 TZ |
364 | (defmethod registry-prune-hard-candidates ((db registry-db)) |
365 | "Collects pruning candidates from the registry-db object THIS. | |
652aa465 | 366 | Proposes any entries over the max-hard limit minus size * prune-factor." |
15cc1ab1 | 367 | (let* ((data (oref db :data)) |
652aa465 TZ |
368 | ;; prune to (size * prune-factor) below the max-hard limit so |
369 | ;; we're not pruning all the time | |
370 | (limit (max 0 (- (oref db :max-hard) | |
371 | (* (registry-size db) (oref db :prune-factor))))) | |
15cc1ab1 TZ |
372 | (candidates (loop for k being the hash-keys of data |
373 | collect k))) | |
374 | (list limit candidates)))) | |
ccd58722 TZ |
375 | |
376 | (ert-deftest registry-instantiation-test () | |
377 | (should (registry-db "Testing"))) | |
378 | ||
379 | (ert-deftest registry-match-test () | |
380 | (let ((entry '((hello "goodbye" "bye") (blank)))) | |
381 | ||
382 | (message "Testing :regex matching") | |
383 | (should (registry--match :regex entry '((hello "nye" "bye")))) | |
384 | (should (registry--match :regex entry '((hello "good")))) | |
385 | (should-not (registry--match :regex entry '((hello "nye")))) | |
386 | (should-not (registry--match :regex entry '((hello)))) | |
387 | ||
388 | (message "Testing :member matching") | |
389 | (should (registry--match :member entry '((hello "bye")))) | |
390 | (should (registry--match :member entry '((hello "goodbye")))) | |
391 | (should-not (registry--match :member entry '((hello "good")))) | |
392 | (should-not (registry--match :member entry '((hello "nye")))) | |
393 | (should-not (registry--match :member entry '((hello))))) | |
394 | (message "Done with matching testing.")) | |
395 | ||
396 | (defun registry-make-testable-db (n &optional name file) | |
397 | (let* ((db (registry-db | |
398 | (or name "Testing") | |
399 | :file (or file "unused") | |
400 | :max-hard n | |
401 | :max-soft 0 ; keep nothing not precious | |
402 | :precious '(extra more-extra) | |
403 | :tracked '(sender subject groups)))) | |
404 | (dotimes (i n) | |
405 | (registry-insert db i `((sender "me") | |
406 | (subject "about you") | |
407 | (more-extra) ; empty data key should be pruned | |
408 | ;; first 5 entries will NOT have this extra data | |
409 | ,@(when (< 5 i) (list (list 'extra "more data"))) | |
410 | (groups ,(number-to-string i))))) | |
411 | db)) | |
412 | ||
413 | (ert-deftest registry-usage-test () | |
414 | (let* ((n 100) | |
415 | (db (registry-make-testable-db n))) | |
416 | (message "size %d" n) | |
417 | (should (= n (registry-size db))) | |
418 | (message "max-hard test") | |
419 | (should-error (registry-insert db "new" '())) | |
420 | (message "Individual lookup") | |
421 | (should (= 58 (caadr (registry-lookup db '(1 58 99))))) | |
422 | (message "Grouped individual lookup") | |
423 | (should (= 3 (length (registry-lookup db '(1 58 99))))) | |
4523dc7f G |
424 | (when (boundp 'lexical-binding) |
425 | (message "Individual lookup (breaks before lexbind)") | |
426 | (should (= 58 | |
cf8b0c27 | 427 | (caadr (registry-lookup-breaks-before-lexbind db '(1 58 99))))) |
4523dc7f G |
428 | (message "Grouped individual lookup (breaks before lexbind)") |
429 | (should (= 3 | |
cf8b0c27 TZ |
430 | (length (registry-lookup-breaks-before-lexbind db |
431 | '(1 58 99)))))) | |
ccd58722 TZ |
432 | (message "Search") |
433 | (should (= n (length (registry-search db :all t)))) | |
434 | (should (= n (length (registry-search db :member '((sender "me")))))) | |
435 | (message "Secondary index search") | |
436 | (should (= n (length (registry-lookup-secondary-value db 'sender "me")))) | |
437 | (should (equal '(74) (registry-lookup-secondary-value db 'groups "74"))) | |
438 | (message "Delete") | |
439 | (should (registry-delete db '(1) t)) | |
440 | (decf n) | |
441 | (message "Search after delete") | |
442 | (should (= n (length (registry-search db :all t)))) | |
443 | (message "Secondary search after delete") | |
444 | (should (= n (length (registry-lookup-secondary-value db 'sender "me")))) | |
9820ccdf TZ |
445 | ;; (message "Pruning") |
446 | ;; (let* ((tokeep (registry-search db :member '((extra "more data")))) | |
447 | ;; (count (- n (length tokeep))) | |
448 | ;; (pruned (registry-prune db)) | |
449 | ;; (prune-count (length pruned))) | |
450 | ;; (message "Expecting to prune %d entries and pruned %d" | |
451 | ;; count prune-count) | |
452 | ;; (should (and (= count 5) | |
453 | ;; (= count prune-count)))) | |
ccd58722 TZ |
454 | (message "Done with usage testing."))) |
455 | ||
456 | (ert-deftest registry-persistence-test () | |
457 | (let* ((n 100) | |
458 | (tempfile (make-temp-file "registry-persistence-")) | |
459 | (name "persistence tester") | |
460 | (db (registry-make-testable-db n name tempfile)) | |
461 | size back) | |
462 | (message "Saving to %s" tempfile) | |
463 | (eieio-persistent-save db) | |
464 | (setq size (nth 7 (file-attributes tempfile))) | |
465 | (message "Saved to %s: size %d" tempfile size) | |
466 | (should (< 0 size)) | |
467 | (with-temp-buffer | |
468 | (insert-file-contents-literally tempfile) | |
469 | (should (looking-at (concat ";; Object " | |
470 | name | |
471 | "\n;; EIEIO PERSISTENT OBJECT")))) | |
472 | (message "Reading object back") | |
473 | (setq back (eieio-persistent-read tempfile)) | |
474 | (should back) | |
475 | (message "Read object back: %d keys, expected %d==%d" | |
476 | (registry-size back) n (registry-size db)) | |
477 | (should (= (registry-size back) n)) | |
478 | (should (= (registry-size back) (registry-size db))) | |
479 | (delete-file tempfile)) | |
480 | (message "Done with persistence testing.")) | |
481 | ||
482 | (provide 'registry) | |
483 | ;;; registry.el ends here |