Commit | Line | Data |
---|---|---|
ccd58722 TZ |
1 | ;;; registry.el --- Track and remember data items by various fields |
2 | ||
f872186f | 3 | ;; Copyright (C) 2011 Free Software Foundation, Inc. |
ccd58722 TZ |
4 | |
5 | ;; Author: Teodor Zlatanov <tzz@lifelogs.com> | |
6 | ;; Keywords: data | |
7 | ||
f872186f GM |
8 | ;; This file is part of GNU Emacs. |
9 | ||
10 | ;; GNU Emacs is free software: you can redistribute it and/or modify | |
ccd58722 TZ |
11 | ;; it under the terms of the GNU General Public License as published by |
12 | ;; the Free Software Foundation, either version 3 of the License, or | |
13 | ;; (at your option) any later version. | |
14 | ||
f872186f | 15 | ;; GNU Emacs is distributed in the hope that it will be useful, |
ccd58722 TZ |
16 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of |
17 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
18 | ;; GNU General Public License for more details. | |
19 | ||
20 | ;; You should have received a copy of the GNU General Public License | |
f872186f | 21 | ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. |
ccd58722 TZ |
22 | |
23 | ;;; Commentary: | |
24 | ||
25 | ;; This library provides a general-purpose EIEIO-based registry | |
26 | ;; database with persistence, initialized with these fields: | |
27 | ||
28 | ;; version: a float, 0.1 currently (don't change it) | |
29 | ||
30 | ;; max-hard: an integer, default 5000000 | |
31 | ||
32 | ;; max-soft: an integer, default 50000 | |
33 | ||
34 | ;; precious: a list of symbols | |
35 | ||
36 | ;; tracked: a list of symbols | |
37 | ||
38 | ;; tracker: a hashtable tuned for 100 symbols to track (you should | |
39 | ;; only access this with the :lookup2-function and the | |
40 | ;; :lookup2+-function) | |
41 | ||
42 | ;; data: a hashtable with default size 10K and resize threshold 2.0 | |
43 | ;; (this reflects the expected usage so override it if you know better) | |
44 | ||
45 | ;; ...plus methods to do all the work: `registry-search', | |
46 | ;; `registry-lookup', `registry-lookup-secondary', | |
47 | ;; `registry-lookup-secondary-value', `registry-insert', | |
48 | ;; `registry-delete', `registry-prune', `registry-size' which see | |
49 | ||
50 | ;; and with the following properties: | |
51 | ||
52 | ;; Every piece of data has a unique ID and some general-purpose fields | |
53 | ;; (F1=D1, F2=D2, F3=(a b c)...) expressed as an alist, e.g. | |
54 | ||
55 | ;; ((F1 D1) (F2 D2) (F3 a b c)) | |
56 | ||
57 | ;; Note that whether a field has one or many pieces of data, the data | |
58 | ;; is always a list of values. | |
59 | ||
60 | ;; The user decides which fields are "precious", F2 for example. At | |
61 | ;; PRUNE TIME (when the :prune-function is called), the registry will | |
62 | ;; trim any entries without the F2 field until the size is :max-soft | |
63 | ;; or less. No entries with the F2 field will be removed at PRUNE | |
64 | ;; TIME. | |
65 | ||
66 | ;; When an entry is inserted, the registry will reject new entries | |
67 | ;; if they bring it over the max-hard limit, even if they have the F2 | |
68 | ;; field. | |
69 | ||
70 | ;; The user decides which fields are "tracked", F1 for example. Any | |
71 | ;; new entry is then indexed by all the tracked fields so it can be | |
72 | ;; quickly looked up that way. The data is always a list (see example | |
73 | ;; above) and each list element is indexed. | |
74 | ||
75 | ;; Precious and tracked field names must be symbols. All other | |
76 | ;; fields can be any other Emacs Lisp types. | |
77 | ||
78 | ;;; Code: | |
79 | ||
42b23765 | 80 | (eval-when-compile |
2237da9c | 81 | (when (null (ignore-errors (require 'ert))) |
42b23765 TZ |
82 | (defmacro* ert-deftest (name () &body docstring-keys-and-body)))) |
83 | ||
2237da9c G |
84 | (ignore-errors |
85 | (require 'ert)) | |
86 | ||
ccd58722 TZ |
87 | (eval-when-compile (require 'cl)) |
88 | (eval-and-compile | |
89 | (or (ignore-errors (progn | |
90 | (require 'eieio) | |
91 | (require 'eieio-base))) | |
92 | ;; gnus-fallback-lib/ from gnus/lisp/gnus-fallback-lib | |
93 | (ignore-errors | |
94 | (let ((load-path (cons (expand-file-name | |
95 | "gnus-fallback-lib/eieio" | |
96 | (file-name-directory (locate-library "gnus"))) | |
97 | load-path))) | |
98 | (require 'eieio) | |
99 | (require 'eieio-base))) | |
100 | (error | |
101 | "eieio not found in `load-path' or gnus-fallback-lib/ directory."))) | |
102 | ||
103 | (defclass registry-db (eieio-persistent) | |
104 | ((version :initarg :version | |
105 | :initform 0.1 | |
106 | :type float | |
107 | :custom float | |
108 | :documentation "The registry version.") | |
109 | (max-hard :initarg :max-hard | |
110 | :initform 5000000 | |
111 | :type integer | |
112 | :custom integer | |
113 | :documentation "Never accept more than this many elements.") | |
114 | (max-soft :initarg :max-soft | |
115 | :initform 50000 | |
116 | :type integer | |
117 | :custom integer | |
118 | :documentation "Prune as much as possible to get to this size.") | |
119 | (tracked :initarg :tracked | |
120 | :initform nil | |
121 | :type t | |
122 | :documentation "The tracked (indexed) fields, a list of symbols.") | |
123 | (precious :initarg :precious | |
124 | :initform nil | |
125 | :type t | |
126 | :documentation "The precious fields, a list of symbols.") | |
127 | (tracker :initarg :tracker | |
128 | :type hash-table | |
129 | :documentation "The field tracking hashtable.") | |
130 | (data :initarg :data | |
131 | :type hash-table | |
132 | :documentation "The data hashtable."))) | |
133 | ||
2237da9c | 134 | (defmethod initialize-instance :AFTER ((this registry-db) slots) |
ccd58722 TZ |
135 | "Set value of data slot of THIS after initialization." |
136 | (with-slots (data tracker) this | |
137 | (unless (member :data slots) | |
138 | (setq data (make-hash-table :size 10000 :rehash-size 2.0 :test 'equal))) | |
139 | (unless (member :tracker slots) | |
140 | (setq tracker (make-hash-table :size 100 :rehash-size 2.0))))) | |
141 | ||
142 | (defmethod registry-lookup ((db registry-db) keys) | |
143 | "Search for KEYS in the registry-db THIS. | |
144 | Returns a alist of the key followed by the entry in a list, not a cons cell." | |
145 | (let ((data (oref db :data))) | |
146 | (delq nil | |
147 | (mapcar | |
148 | (lambda (k) | |
149 | (when (gethash k data) | |
150 | (list k (gethash k data)))) | |
151 | keys)))) | |
152 | ||
153 | (defmethod registry-lookup-breaks-before-lexbind ((db registry-db) keys) | |
154 | "Search for KEYS in the registry-db THIS. | |
155 | Returns a alist of the key followed by the entry in a list, not a cons cell." | |
156 | (let ((data (oref db :data))) | |
157 | (delq nil | |
158 | (loop for key in keys | |
159 | when (gethash key data) | |
160 | collect (list key (gethash key data)))))) | |
161 | ||
162 | (defmethod registry-lookup-secondary ((db registry-db) tracksym | |
163 | &optional create) | |
164 | "Search for TRACKSYM in the registry-db THIS. | |
165 | When CREATE is not nil, create the secondary index hashtable if needed." | |
166 | (let ((h (gethash tracksym (oref db :tracker)))) | |
167 | (if h | |
168 | h | |
169 | (when create | |
170 | (puthash tracksym | |
171 | (make-hash-table :size 800 :rehash-size 2.0 :test 'equal) | |
172 | (oref db :tracker)) | |
173 | (gethash tracksym (oref db :tracker)))))) | |
174 | ||
175 | (defmethod registry-lookup-secondary-value ((db registry-db) tracksym val | |
176 | &optional set) | |
177 | "Search for TRACKSYM with value VAL in the registry-db THIS. | |
178 | When SET is not nil, set it for VAL (use t for an empty list)." | |
179 | ;; either we're asked for creation or there should be an existing index | |
180 | (when (or set (registry-lookup-secondary db tracksym)) | |
181 | ;; set the entry if requested, | |
182 | (when set | |
183 | (puthash val (if (eq t set) '() set) | |
184 | (registry-lookup-secondary db tracksym t))) | |
185 | (gethash val (registry-lookup-secondary db tracksym)))) | |
186 | ||
187 | (defun registry--match (mode entry check-list) | |
188 | ;; for all members | |
189 | (when check-list | |
190 | (let ((key (nth 0 (nth 0 check-list))) | |
191 | (vals (cdr-safe (nth 0 check-list))) | |
192 | found) | |
193 | (while (and key vals (not found)) | |
194 | (setq found (case mode | |
195 | (:member | |
196 | (member (car-safe vals) (cdr-safe (assoc key entry)))) | |
197 | (:regex | |
198 | (string-match (car vals) | |
199 | (mapconcat | |
200 | 'prin1-to-string | |
201 | (cdr-safe (assoc key entry)) | |
202 | "\0")))) | |
203 | vals (cdr-safe vals))) | |
204 | (or found | |
205 | (registry--match mode entry (cdr-safe check-list)))))) | |
206 | ||
207 | (defmethod registry-search ((db registry-db) &rest spec) | |
208 | "Search for SPEC across the registry-db THIS. | |
209 | For example calling with :member '(a 1 2) will match entry '((a 3 1)). | |
210 | Calling with :all t (any non-nil value) will match all. | |
211 | Calling with :regex '\(a \"h.llo\") will match entry '((a \"hullo\" \"bye\"). | |
212 | The test order is to check :all first, then :member, then :regex." | |
213 | (when db | |
214 | (let ((all (plist-get spec :all)) | |
215 | (member (plist-get spec :member)) | |
216 | (regex (plist-get spec :regex))) | |
217 | (loop for k being the hash-keys of (oref db :data) using (hash-values v) | |
218 | when (or | |
219 | ;; :all non-nil returns all | |
220 | all | |
221 | ;; member matching | |
222 | (and member (registry--match :member v member)) | |
223 | ;; regex matching | |
224 | (and regex (registry--match :regex v regex))) | |
225 | collect k)))) | |
226 | ||
227 | (defmethod registry-delete ((db registry-db) keys assert &rest spec) | |
228 | "Delete KEYS from the registry-db THIS. | |
229 | If KEYS is nil, use SPEC to do a search. | |
230 | Updates the secondary ('tracked') indices as well. | |
231 | With assert non-nil, errors out if the key does not exist already." | |
232 | (let* ((data (oref db :data)) | |
233 | (keys (or keys | |
234 | (apply 'registry-search db spec))) | |
235 | (tracked (oref db :tracked))) | |
236 | ||
237 | (dolist (key keys) | |
238 | (let ((entry (gethash key data))) | |
239 | (when assert | |
240 | (assert entry nil | |
241 | "Key %s does not exists in database" key)) | |
242 | ;; clean entry from the secondary indices | |
243 | (dolist (tr tracked) | |
244 | ;; is this tracked symbol indexed? | |
245 | (when (registry-lookup-secondary db tr) | |
246 | ;; for every value in the entry under that key... | |
247 | (dolist (val (cdr-safe (assq tr entry))) | |
248 | (let* ((value-keys (registry-lookup-secondary-value db tr val))) | |
249 | (when (member key value-keys) | |
250 | ;; override the previous value | |
251 | (registry-lookup-secondary-value | |
252 | db tr val | |
253 | ;; with the indexed keys MINUS the current key | |
254 | ;; (we pass t when the list is empty) | |
255 | (or (delete key value-keys) t))))))) | |
256 | (remhash key data))) | |
257 | keys)) | |
258 | ||
259 | (defmethod registry-insert ((db registry-db) key entry) | |
260 | "Insert ENTRY under KEY into the registry-db THIS. | |
261 | Updates the secondary ('tracked') indices as well. | |
262 | Errors out if the key exists already." | |
263 | ||
264 | (assert (not (gethash key (oref db :data))) nil | |
265 | "Key already exists in database") | |
266 | ||
267 | (assert (< (registry-size db) | |
268 | (oref db :max-hard)) | |
269 | nil | |
270 | "max-hard size limit reached") | |
271 | ||
272 | ;; store the entry | |
273 | (puthash key entry (oref db :data)) | |
274 | ||
275 | ;; store the secondary indices | |
276 | (dolist (tr (oref db :tracked)) | |
277 | ;; for every value in the entry under that key... | |
278 | (dolist (val (cdr-safe (assq tr entry))) | |
279 | (let* ((value-keys (registry-lookup-secondary-value db tr val))) | |
280 | (pushnew key value-keys :test 'equal) | |
281 | (registry-lookup-secondary-value db tr val value-keys)))) | |
282 | entry) | |
283 | ||
284 | (defmethod registry-size ((db registry-db)) | |
285 | "Returns the size of the registry-db object THIS. | |
286 | This is the key count of the :data slot." | |
287 | (hash-table-count (oref db :data))) | |
288 | ||
289 | (defmethod registry-prune ((db registry-db)) | |
290 | "Prunes the registry-db object THIS. | |
291 | Removes only entries without the :precious keys." | |
292 | (let* ((precious (oref db :precious)) | |
293 | (precious-p (lambda (entry-key) (cdr (memq (car entry-key) precious)))) | |
294 | (data (oref db :data)) | |
295 | (limit (oref db :max-soft)) | |
296 | (size (registry-size db)) | |
297 | (candidates (loop for k being the hash-keys of data | |
298 | using (hash-values v) | |
299 | when (notany precious-p v) | |
300 | collect k)) | |
301 | (candidates-count (length candidates)) | |
302 | ;; are we over max-soft? | |
303 | (prune-needed (> size limit))) | |
304 | ||
305 | ;; while we have more candidates than we need to remove... | |
306 | (while (and (> candidates-count (- size limit)) candidates) | |
307 | (decf candidates-count) | |
308 | (setq candidates (cdr candidates))) | |
309 | ||
310 | (registry-delete db candidates nil))) | |
311 | ||
312 | (ert-deftest registry-instantiation-test () | |
313 | (should (registry-db "Testing"))) | |
314 | ||
315 | (ert-deftest registry-match-test () | |
316 | (let ((entry '((hello "goodbye" "bye") (blank)))) | |
317 | ||
318 | (message "Testing :regex matching") | |
319 | (should (registry--match :regex entry '((hello "nye" "bye")))) | |
320 | (should (registry--match :regex entry '((hello "good")))) | |
321 | (should-not (registry--match :regex entry '((hello "nye")))) | |
322 | (should-not (registry--match :regex entry '((hello)))) | |
323 | ||
324 | (message "Testing :member matching") | |
325 | (should (registry--match :member entry '((hello "bye")))) | |
326 | (should (registry--match :member entry '((hello "goodbye")))) | |
327 | (should-not (registry--match :member entry '((hello "good")))) | |
328 | (should-not (registry--match :member entry '((hello "nye")))) | |
329 | (should-not (registry--match :member entry '((hello))))) | |
330 | (message "Done with matching testing.")) | |
331 | ||
332 | (defun registry-make-testable-db (n &optional name file) | |
333 | (let* ((db (registry-db | |
334 | (or name "Testing") | |
335 | :file (or file "unused") | |
336 | :max-hard n | |
337 | :max-soft 0 ; keep nothing not precious | |
338 | :precious '(extra more-extra) | |
339 | :tracked '(sender subject groups)))) | |
340 | (dotimes (i n) | |
341 | (registry-insert db i `((sender "me") | |
342 | (subject "about you") | |
343 | (more-extra) ; empty data key should be pruned | |
344 | ;; first 5 entries will NOT have this extra data | |
345 | ,@(when (< 5 i) (list (list 'extra "more data"))) | |
346 | (groups ,(number-to-string i))))) | |
347 | db)) | |
348 | ||
349 | (ert-deftest registry-usage-test () | |
350 | (let* ((n 100) | |
351 | (db (registry-make-testable-db n))) | |
352 | (message "size %d" n) | |
353 | (should (= n (registry-size db))) | |
354 | (message "max-hard test") | |
355 | (should-error (registry-insert db "new" '())) | |
356 | (message "Individual lookup") | |
357 | (should (= 58 (caadr (registry-lookup db '(1 58 99))))) | |
358 | (message "Grouped individual lookup") | |
359 | (should (= 3 (length (registry-lookup db '(1 58 99))))) | |
4523dc7f G |
360 | (when (boundp 'lexical-binding) |
361 | (message "Individual lookup (breaks before lexbind)") | |
362 | (should (= 58 | |
363 | (caadr (registry-lookup-breaks-before-lexbind db '(1 58 99))))) | |
364 | (message "Grouped individual lookup (breaks before lexbind)") | |
365 | (should (= 3 | |
366 | (length (registry-lookup-breaks-before-lexbind db '(1 58 99)))))) | |
ccd58722 TZ |
367 | (message "Search") |
368 | (should (= n (length (registry-search db :all t)))) | |
369 | (should (= n (length (registry-search db :member '((sender "me")))))) | |
370 | (message "Secondary index search") | |
371 | (should (= n (length (registry-lookup-secondary-value db 'sender "me")))) | |
372 | (should (equal '(74) (registry-lookup-secondary-value db 'groups "74"))) | |
373 | (message "Delete") | |
374 | (should (registry-delete db '(1) t)) | |
375 | (decf n) | |
376 | (message "Search after delete") | |
377 | (should (= n (length (registry-search db :all t)))) | |
378 | (message "Secondary search after delete") | |
379 | (should (= n (length (registry-lookup-secondary-value db 'sender "me")))) | |
380 | (message "Pruning") | |
381 | (let* ((tokeep (registry-search db :member '((extra "more data")))) | |
382 | (count (- n (length tokeep))) | |
383 | (pruned (registry-prune db)) | |
384 | (prune-count (length pruned))) | |
385 | (message "Expecting to prune %d entries and pruned %d" | |
386 | count prune-count) | |
387 | (should (and (= count 5) | |
388 | (= count prune-count)))) | |
389 | (message "Done with usage testing."))) | |
390 | ||
391 | (ert-deftest registry-persistence-test () | |
392 | (let* ((n 100) | |
393 | (tempfile (make-temp-file "registry-persistence-")) | |
394 | (name "persistence tester") | |
395 | (db (registry-make-testable-db n name tempfile)) | |
396 | size back) | |
397 | (message "Saving to %s" tempfile) | |
398 | (eieio-persistent-save db) | |
399 | (setq size (nth 7 (file-attributes tempfile))) | |
400 | (message "Saved to %s: size %d" tempfile size) | |
401 | (should (< 0 size)) | |
402 | (with-temp-buffer | |
403 | (insert-file-contents-literally tempfile) | |
404 | (should (looking-at (concat ";; Object " | |
405 | name | |
406 | "\n;; EIEIO PERSISTENT OBJECT")))) | |
407 | (message "Reading object back") | |
408 | (setq back (eieio-persistent-read tempfile)) | |
409 | (should back) | |
410 | (message "Read object back: %d keys, expected %d==%d" | |
411 | (registry-size back) n (registry-size db)) | |
412 | (should (= (registry-size back) n)) | |
413 | (should (= (registry-size back) (registry-size db))) | |
414 | (delete-file tempfile)) | |
415 | (message "Done with persistence testing.")) | |
416 | ||
417 | (provide 'registry) | |
418 | ;;; registry.el ends here |