gnus-art.el (article-treat-date): Assume that gnus-article-date-headers may be a...
[bpt/emacs.git] / lisp / gnus / registry.el
CommitLineData
ccd58722
TZ
1;;; registry.el --- Track and remember data items by various fields
2
f872186f 3;; Copyright (C) 2011 Free Software Foundation, Inc.
ccd58722
TZ
4
5;; Author: Teodor Zlatanov <tzz@lifelogs.com>
6;; Keywords: data
7
f872186f
GM
8;; This file is part of GNU Emacs.
9
10;; GNU Emacs is free software: you can redistribute it and/or modify
ccd58722
TZ
11;; it under the terms of the GNU General Public License as published by
12;; the Free Software Foundation, either version 3 of the License, or
13;; (at your option) any later version.
14
f872186f 15;; GNU Emacs is distributed in the hope that it will be useful,
ccd58722
TZ
16;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18;; GNU General Public License for more details.
19
20;; You should have received a copy of the GNU General Public License
f872186f 21;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
ccd58722
TZ
22
23;;; Commentary:
24
25;; This library provides a general-purpose EIEIO-based registry
26;; database with persistence, initialized with these fields:
27
28;; version: a float, 0.1 currently (don't change it)
29
30;; max-hard: an integer, default 5000000
31
32;; max-soft: an integer, default 50000
33
34;; precious: a list of symbols
35
36;; tracked: a list of symbols
37
38;; tracker: a hashtable tuned for 100 symbols to track (you should
39;; only access this with the :lookup2-function and the
40;; :lookup2+-function)
41
42;; data: a hashtable with default size 10K and resize threshold 2.0
43;; (this reflects the expected usage so override it if you know better)
44
45;; ...plus methods to do all the work: `registry-search',
46;; `registry-lookup', `registry-lookup-secondary',
47;; `registry-lookup-secondary-value', `registry-insert',
48;; `registry-delete', `registry-prune', `registry-size' which see
49
50;; and with the following properties:
51
52;; Every piece of data has a unique ID and some general-purpose fields
53;; (F1=D1, F2=D2, F3=(a b c)...) expressed as an alist, e.g.
54
55;; ((F1 D1) (F2 D2) (F3 a b c))
56
57;; Note that whether a field has one or many pieces of data, the data
58;; is always a list of values.
59
60;; The user decides which fields are "precious", F2 for example. At
61;; PRUNE TIME (when the :prune-function is called), the registry will
62;; trim any entries without the F2 field until the size is :max-soft
63;; or less. No entries with the F2 field will be removed at PRUNE
64;; TIME.
65
66;; When an entry is inserted, the registry will reject new entries
67;; if they bring it over the max-hard limit, even if they have the F2
68;; field.
69
70;; The user decides which fields are "tracked", F1 for example. Any
71;; new entry is then indexed by all the tracked fields so it can be
72;; quickly looked up that way. The data is always a list (see example
73;; above) and each list element is indexed.
74
75;; Precious and tracked field names must be symbols. All other
76;; fields can be any other Emacs Lisp types.
77
78;;; Code:
79
42b23765 80(eval-when-compile
2237da9c 81 (when (null (ignore-errors (require 'ert)))
42b23765
TZ
82 (defmacro* ert-deftest (name () &body docstring-keys-and-body))))
83
2237da9c
G
84(ignore-errors
85 (require 'ert))
86
ccd58722
TZ
87(eval-when-compile (require 'cl))
88(eval-and-compile
89 (or (ignore-errors (progn
90 (require 'eieio)
91 (require 'eieio-base)))
92 ;; gnus-fallback-lib/ from gnus/lisp/gnus-fallback-lib
93 (ignore-errors
94 (let ((load-path (cons (expand-file-name
95 "gnus-fallback-lib/eieio"
96 (file-name-directory (locate-library "gnus")))
97 load-path)))
98 (require 'eieio)
99 (require 'eieio-base)))
100 (error
101 "eieio not found in `load-path' or gnus-fallback-lib/ directory.")))
102
103(defclass registry-db (eieio-persistent)
104 ((version :initarg :version
105 :initform 0.1
106 :type float
107 :custom float
108 :documentation "The registry version.")
109 (max-hard :initarg :max-hard
110 :initform 5000000
111 :type integer
112 :custom integer
113 :documentation "Never accept more than this many elements.")
114 (max-soft :initarg :max-soft
115 :initform 50000
116 :type integer
117 :custom integer
118 :documentation "Prune as much as possible to get to this size.")
119 (tracked :initarg :tracked
120 :initform nil
121 :type t
122 :documentation "The tracked (indexed) fields, a list of symbols.")
123 (precious :initarg :precious
124 :initform nil
125 :type t
126 :documentation "The precious fields, a list of symbols.")
127 (tracker :initarg :tracker
128 :type hash-table
129 :documentation "The field tracking hashtable.")
130 (data :initarg :data
131 :type hash-table
132 :documentation "The data hashtable.")))
133
2237da9c 134(defmethod initialize-instance :AFTER ((this registry-db) slots)
ccd58722
TZ
135 "Set value of data slot of THIS after initialization."
136 (with-slots (data tracker) this
137 (unless (member :data slots)
138 (setq data (make-hash-table :size 10000 :rehash-size 2.0 :test 'equal)))
139 (unless (member :tracker slots)
140 (setq tracker (make-hash-table :size 100 :rehash-size 2.0)))))
141
142(defmethod registry-lookup ((db registry-db) keys)
143 "Search for KEYS in the registry-db THIS.
144Returns a alist of the key followed by the entry in a list, not a cons cell."
145 (let ((data (oref db :data)))
146 (delq nil
147 (mapcar
148 (lambda (k)
149 (when (gethash k data)
150 (list k (gethash k data))))
151 keys))))
152
153(defmethod registry-lookup-breaks-before-lexbind ((db registry-db) keys)
154 "Search for KEYS in the registry-db THIS.
155Returns a alist of the key followed by the entry in a list, not a cons cell."
156 (let ((data (oref db :data)))
157 (delq nil
158 (loop for key in keys
159 when (gethash key data)
160 collect (list key (gethash key data))))))
161
162(defmethod registry-lookup-secondary ((db registry-db) tracksym
163 &optional create)
164 "Search for TRACKSYM in the registry-db THIS.
165When CREATE is not nil, create the secondary index hashtable if needed."
166 (let ((h (gethash tracksym (oref db :tracker))))
167 (if h
168 h
169 (when create
170 (puthash tracksym
171 (make-hash-table :size 800 :rehash-size 2.0 :test 'equal)
172 (oref db :tracker))
173 (gethash tracksym (oref db :tracker))))))
174
175(defmethod registry-lookup-secondary-value ((db registry-db) tracksym val
176 &optional set)
177 "Search for TRACKSYM with value VAL in the registry-db THIS.
178When SET is not nil, set it for VAL (use t for an empty list)."
179 ;; either we're asked for creation or there should be an existing index
180 (when (or set (registry-lookup-secondary db tracksym))
181 ;; set the entry if requested,
182 (when set
183 (puthash val (if (eq t set) '() set)
184 (registry-lookup-secondary db tracksym t)))
185 (gethash val (registry-lookup-secondary db tracksym))))
186
187(defun registry--match (mode entry check-list)
188 ;; for all members
189 (when check-list
190 (let ((key (nth 0 (nth 0 check-list)))
191 (vals (cdr-safe (nth 0 check-list)))
192 found)
193 (while (and key vals (not found))
194 (setq found (case mode
195 (:member
196 (member (car-safe vals) (cdr-safe (assoc key entry))))
197 (:regex
198 (string-match (car vals)
199 (mapconcat
200 'prin1-to-string
201 (cdr-safe (assoc key entry))
202 "\0"))))
203 vals (cdr-safe vals)))
204 (or found
205 (registry--match mode entry (cdr-safe check-list))))))
206
207(defmethod registry-search ((db registry-db) &rest spec)
208 "Search for SPEC across the registry-db THIS.
209For example calling with :member '(a 1 2) will match entry '((a 3 1)).
210Calling with :all t (any non-nil value) will match all.
211Calling with :regex '\(a \"h.llo\") will match entry '((a \"hullo\" \"bye\").
212The test order is to check :all first, then :member, then :regex."
213 (when db
214 (let ((all (plist-get spec :all))
215 (member (plist-get spec :member))
216 (regex (plist-get spec :regex)))
217 (loop for k being the hash-keys of (oref db :data) using (hash-values v)
218 when (or
219 ;; :all non-nil returns all
220 all
221 ;; member matching
222 (and member (registry--match :member v member))
223 ;; regex matching
224 (and regex (registry--match :regex v regex)))
225 collect k))))
226
227(defmethod registry-delete ((db registry-db) keys assert &rest spec)
228 "Delete KEYS from the registry-db THIS.
229If KEYS is nil, use SPEC to do a search.
230Updates the secondary ('tracked') indices as well.
231With assert non-nil, errors out if the key does not exist already."
232 (let* ((data (oref db :data))
233 (keys (or keys
234 (apply 'registry-search db spec)))
235 (tracked (oref db :tracked)))
236
237 (dolist (key keys)
238 (let ((entry (gethash key data)))
239 (when assert
240 (assert entry nil
241 "Key %s does not exists in database" key))
242 ;; clean entry from the secondary indices
243 (dolist (tr tracked)
244 ;; is this tracked symbol indexed?
245 (when (registry-lookup-secondary db tr)
246 ;; for every value in the entry under that key...
247 (dolist (val (cdr-safe (assq tr entry)))
248 (let* ((value-keys (registry-lookup-secondary-value db tr val)))
249 (when (member key value-keys)
250 ;; override the previous value
251 (registry-lookup-secondary-value
252 db tr val
253 ;; with the indexed keys MINUS the current key
254 ;; (we pass t when the list is empty)
255 (or (delete key value-keys) t)))))))
256 (remhash key data)))
257 keys))
258
259(defmethod registry-insert ((db registry-db) key entry)
260 "Insert ENTRY under KEY into the registry-db THIS.
261Updates the secondary ('tracked') indices as well.
262Errors out if the key exists already."
263
264 (assert (not (gethash key (oref db :data))) nil
265 "Key already exists in database")
266
267 (assert (< (registry-size db)
268 (oref db :max-hard))
269 nil
270 "max-hard size limit reached")
271
272 ;; store the entry
273 (puthash key entry (oref db :data))
274
275 ;; store the secondary indices
276 (dolist (tr (oref db :tracked))
277 ;; for every value in the entry under that key...
278 (dolist (val (cdr-safe (assq tr entry)))
279 (let* ((value-keys (registry-lookup-secondary-value db tr val)))
280 (pushnew key value-keys :test 'equal)
281 (registry-lookup-secondary-value db tr val value-keys))))
282 entry)
283
284(defmethod registry-size ((db registry-db))
285 "Returns the size of the registry-db object THIS.
286This is the key count of the :data slot."
287 (hash-table-count (oref db :data)))
288
289(defmethod registry-prune ((db registry-db))
290 "Prunes the registry-db object THIS.
291Removes only entries without the :precious keys."
292 (let* ((precious (oref db :precious))
293 (precious-p (lambda (entry-key) (cdr (memq (car entry-key) precious))))
294 (data (oref db :data))
295 (limit (oref db :max-soft))
296 (size (registry-size db))
297 (candidates (loop for k being the hash-keys of data
298 using (hash-values v)
299 when (notany precious-p v)
300 collect k))
301 (candidates-count (length candidates))
302 ;; are we over max-soft?
303 (prune-needed (> size limit)))
304
305 ;; while we have more candidates than we need to remove...
306 (while (and (> candidates-count (- size limit)) candidates)
307 (decf candidates-count)
308 (setq candidates (cdr candidates)))
309
310 (registry-delete db candidates nil)))
311
312(ert-deftest registry-instantiation-test ()
313 (should (registry-db "Testing")))
314
315(ert-deftest registry-match-test ()
316 (let ((entry '((hello "goodbye" "bye") (blank))))
317
318 (message "Testing :regex matching")
319 (should (registry--match :regex entry '((hello "nye" "bye"))))
320 (should (registry--match :regex entry '((hello "good"))))
321 (should-not (registry--match :regex entry '((hello "nye"))))
322 (should-not (registry--match :regex entry '((hello))))
323
324 (message "Testing :member matching")
325 (should (registry--match :member entry '((hello "bye"))))
326 (should (registry--match :member entry '((hello "goodbye"))))
327 (should-not (registry--match :member entry '((hello "good"))))
328 (should-not (registry--match :member entry '((hello "nye"))))
329 (should-not (registry--match :member entry '((hello)))))
330 (message "Done with matching testing."))
331
332(defun registry-make-testable-db (n &optional name file)
333 (let* ((db (registry-db
334 (or name "Testing")
335 :file (or file "unused")
336 :max-hard n
337 :max-soft 0 ; keep nothing not precious
338 :precious '(extra more-extra)
339 :tracked '(sender subject groups))))
340 (dotimes (i n)
341 (registry-insert db i `((sender "me")
342 (subject "about you")
343 (more-extra) ; empty data key should be pruned
344 ;; first 5 entries will NOT have this extra data
345 ,@(when (< 5 i) (list (list 'extra "more data")))
346 (groups ,(number-to-string i)))))
347 db))
348
349(ert-deftest registry-usage-test ()
350 (let* ((n 100)
351 (db (registry-make-testable-db n)))
352 (message "size %d" n)
353 (should (= n (registry-size db)))
354 (message "max-hard test")
355 (should-error (registry-insert db "new" '()))
356 (message "Individual lookup")
357 (should (= 58 (caadr (registry-lookup db '(1 58 99)))))
358 (message "Grouped individual lookup")
359 (should (= 3 (length (registry-lookup db '(1 58 99)))))
4523dc7f
G
360 (when (boundp 'lexical-binding)
361 (message "Individual lookup (breaks before lexbind)")
362 (should (= 58
363 (caadr (registry-lookup-breaks-before-lexbind db '(1 58 99)))))
364 (message "Grouped individual lookup (breaks before lexbind)")
365 (should (= 3
366 (length (registry-lookup-breaks-before-lexbind db '(1 58 99))))))
ccd58722
TZ
367 (message "Search")
368 (should (= n (length (registry-search db :all t))))
369 (should (= n (length (registry-search db :member '((sender "me"))))))
370 (message "Secondary index search")
371 (should (= n (length (registry-lookup-secondary-value db 'sender "me"))))
372 (should (equal '(74) (registry-lookup-secondary-value db 'groups "74")))
373 (message "Delete")
374 (should (registry-delete db '(1) t))
375 (decf n)
376 (message "Search after delete")
377 (should (= n (length (registry-search db :all t))))
378 (message "Secondary search after delete")
379 (should (= n (length (registry-lookup-secondary-value db 'sender "me"))))
380 (message "Pruning")
381 (let* ((tokeep (registry-search db :member '((extra "more data"))))
382 (count (- n (length tokeep)))
383 (pruned (registry-prune db))
384 (prune-count (length pruned)))
385 (message "Expecting to prune %d entries and pruned %d"
386 count prune-count)
387 (should (and (= count 5)
388 (= count prune-count))))
389 (message "Done with usage testing.")))
390
391(ert-deftest registry-persistence-test ()
392 (let* ((n 100)
393 (tempfile (make-temp-file "registry-persistence-"))
394 (name "persistence tester")
395 (db (registry-make-testable-db n name tempfile))
396 size back)
397 (message "Saving to %s" tempfile)
398 (eieio-persistent-save db)
399 (setq size (nth 7 (file-attributes tempfile)))
400 (message "Saved to %s: size %d" tempfile size)
401 (should (< 0 size))
402 (with-temp-buffer
403 (insert-file-contents-literally tempfile)
404 (should (looking-at (concat ";; Object "
405 name
406 "\n;; EIEIO PERSISTENT OBJECT"))))
407 (message "Reading object back")
408 (setq back (eieio-persistent-read tempfile))
409 (should back)
410 (message "Read object back: %d keys, expected %d==%d"
411 (registry-size back) n (registry-size db))
412 (should (= (registry-size back) n))
413 (should (= (registry-size back) (registry-size db)))
414 (delete-file tempfile))
415 (message "Done with persistence testing."))
416
417(provide 'registry)
418;;; registry.el ends here