Commit | Line | Data |
---|---|---|
ccd58722 TZ |
1 | ;;; registry.el --- Track and remember data items by various fields |
2 | ||
ab422c4d | 3 | ;; Copyright (C) 2011-2013 Free Software Foundation, Inc. |
ccd58722 TZ |
4 | |
5 | ;; Author: Teodor Zlatanov <tzz@lifelogs.com> | |
6 | ;; Keywords: data | |
7 | ||
f872186f GM |
8 | ;; This file is part of GNU Emacs. |
9 | ||
10 | ;; GNU Emacs is free software: you can redistribute it and/or modify | |
ccd58722 TZ |
11 | ;; it under the terms of the GNU General Public License as published by |
12 | ;; the Free Software Foundation, either version 3 of the License, or | |
13 | ;; (at your option) any later version. | |
14 | ||
f872186f | 15 | ;; GNU Emacs is distributed in the hope that it will be useful, |
ccd58722 TZ |
16 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of |
17 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
18 | ;; GNU General Public License for more details. | |
19 | ||
20 | ;; You should have received a copy of the GNU General Public License | |
f872186f | 21 | ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. |
ccd58722 TZ |
22 | |
23 | ;;; Commentary: | |
24 | ||
25 | ;; This library provides a general-purpose EIEIO-based registry | |
26 | ;; database with persistence, initialized with these fields: | |
27 | ||
28 | ;; version: a float, 0.1 currently (don't change it) | |
29 | ||
30 | ;; max-hard: an integer, default 5000000 | |
31 | ||
32 | ;; max-soft: an integer, default 50000 | |
33 | ||
34 | ;; precious: a list of symbols | |
35 | ||
36 | ;; tracked: a list of symbols | |
37 | ||
38 | ;; tracker: a hashtable tuned for 100 symbols to track (you should | |
39 | ;; only access this with the :lookup2-function and the | |
40 | ;; :lookup2+-function) | |
41 | ||
42 | ;; data: a hashtable with default size 10K and resize threshold 2.0 | |
43 | ;; (this reflects the expected usage so override it if you know better) | |
44 | ||
45 | ;; ...plus methods to do all the work: `registry-search', | |
46 | ;; `registry-lookup', `registry-lookup-secondary', | |
47 | ;; `registry-lookup-secondary-value', `registry-insert', | |
48 | ;; `registry-delete', `registry-prune', `registry-size' which see | |
49 | ||
50 | ;; and with the following properties: | |
51 | ||
52 | ;; Every piece of data has a unique ID and some general-purpose fields | |
53 | ;; (F1=D1, F2=D2, F3=(a b c)...) expressed as an alist, e.g. | |
54 | ||
55 | ;; ((F1 D1) (F2 D2) (F3 a b c)) | |
56 | ||
57 | ;; Note that whether a field has one or many pieces of data, the data | |
58 | ;; is always a list of values. | |
59 | ||
60 | ;; The user decides which fields are "precious", F2 for example. At | |
61 | ;; PRUNE TIME (when the :prune-function is called), the registry will | |
62 | ;; trim any entries without the F2 field until the size is :max-soft | |
63 | ;; or less. No entries with the F2 field will be removed at PRUNE | |
64 | ;; TIME. | |
65 | ||
66 | ;; When an entry is inserted, the registry will reject new entries | |
67 | ;; if they bring it over the max-hard limit, even if they have the F2 | |
68 | ;; field. | |
69 | ||
70 | ;; The user decides which fields are "tracked", F1 for example. Any | |
71 | ;; new entry is then indexed by all the tracked fields so it can be | |
72 | ;; quickly looked up that way. The data is always a list (see example | |
73 | ;; above) and each list element is indexed. | |
74 | ||
75 | ;; Precious and tracked field names must be symbols. All other | |
76 | ;; fields can be any other Emacs Lisp types. | |
77 | ||
78 | ;;; Code: | |
79 | ||
f8fc0578 SM |
80 | (eval-when-compile (require 'cl)) |
81 | ||
6651c015 KY |
82 | (require 'eieio) |
83 | (require 'eieio-base) | |
ccd58722 TZ |
84 | |
85 | (defclass registry-db (eieio-persistent) | |
86 | ((version :initarg :version | |
87 | :initform 0.1 | |
88 | :type float | |
89 | :custom float | |
90 | :documentation "The registry version.") | |
91 | (max-hard :initarg :max-hard | |
92 | :initform 5000000 | |
93 | :type integer | |
94 | :custom integer | |
95 | :documentation "Never accept more than this many elements.") | |
96 | (max-soft :initarg :max-soft | |
97 | :initform 50000 | |
98 | :type integer | |
99 | :custom integer | |
100 | :documentation "Prune as much as possible to get to this size.") | |
652aa465 TZ |
101 | (prune-factor |
102 | :initarg :prune-factor | |
103 | :initform 0.1 | |
104 | :type float | |
105 | :custom float | |
106 | :documentation "At the max-hard limit, prune size * this entries.") | |
ccd58722 TZ |
107 | (tracked :initarg :tracked |
108 | :initform nil | |
109 | :type t | |
110 | :documentation "The tracked (indexed) fields, a list of symbols.") | |
111 | (precious :initarg :precious | |
112 | :initform nil | |
113 | :type t | |
114 | :documentation "The precious fields, a list of symbols.") | |
115 | (tracker :initarg :tracker | |
116 | :type hash-table | |
117 | :documentation "The field tracking hashtable.") | |
118 | (data :initarg :data | |
119 | :type hash-table | |
120 | :documentation "The data hashtable."))) | |
121 | ||
8d6d9c8f KY |
122 | (eval-and-compile |
123 | (defmethod initialize-instance :AFTER ((this registry-db) slots) | |
124 | "Set value of data slot of THIS after initialization." | |
125 | (with-slots (data tracker) this | |
126 | (unless (member :data slots) | |
127 | (setq data | |
128 | (make-hash-table :size 10000 :rehash-size 2.0 :test 'equal))) | |
129 | (unless (member :tracker slots) | |
130 | (setq tracker (make-hash-table :size 100 :rehash-size 2.0))))) | |
131 | ||
132 | (defmethod registry-lookup ((db registry-db) keys) | |
133 | "Search for KEYS in the registry-db THIS. | |
58179cce | 134 | Returns an alist of the key followed by the entry in a list, not a cons cell." |
8d6d9c8f KY |
135 | (let ((data (oref db :data))) |
136 | (delq nil | |
137 | (mapcar | |
138 | (lambda (k) | |
139 | (when (gethash k data) | |
140 | (list k (gethash k data)))) | |
141 | keys)))) | |
142 | ||
143 | (defmethod registry-lookup-breaks-before-lexbind ((db registry-db) keys) | |
144 | "Search for KEYS in the registry-db THIS. | |
58179cce | 145 | Returns an alist of the key followed by the entry in a list, not a cons cell." |
8d6d9c8f KY |
146 | (let ((data (oref db :data))) |
147 | (delq nil | |
148 | (loop for key in keys | |
149 | when (gethash key data) | |
150 | collect (list key (gethash key data)))))) | |
151 | ||
152 | (defmethod registry-lookup-secondary ((db registry-db) tracksym | |
153 | &optional create) | |
154 | "Search for TRACKSYM in the registry-db THIS. | |
ccd58722 | 155 | When CREATE is not nil, create the secondary index hashtable if needed." |
8d6d9c8f KY |
156 | (let ((h (gethash tracksym (oref db :tracker)))) |
157 | (if h | |
158 | h | |
159 | (when create | |
160 | (puthash tracksym | |
161 | (make-hash-table :size 800 :rehash-size 2.0 :test 'equal) | |
162 | (oref db :tracker)) | |
163 | (gethash tracksym (oref db :tracker)))))) | |
164 | ||
165 | (defmethod registry-lookup-secondary-value ((db registry-db) tracksym val | |
166 | &optional set) | |
167 | "Search for TRACKSYM with value VAL in the registry-db THIS. | |
ccd58722 | 168 | When SET is not nil, set it for VAL (use t for an empty list)." |
8d6d9c8f KY |
169 | ;; either we're asked for creation or there should be an existing index |
170 | (when (or set (registry-lookup-secondary db tracksym)) | |
171 | ;; set the entry if requested, | |
172 | (when set | |
173 | (puthash val (if (eq t set) '() set) | |
174 | (registry-lookup-secondary db tracksym t))) | |
175 | (gethash val (registry-lookup-secondary db tracksym))))) | |
ccd58722 TZ |
176 | |
177 | (defun registry--match (mode entry check-list) | |
178 | ;; for all members | |
179 | (when check-list | |
180 | (let ((key (nth 0 (nth 0 check-list))) | |
181 | (vals (cdr-safe (nth 0 check-list))) | |
182 | found) | |
183 | (while (and key vals (not found)) | |
184 | (setq found (case mode | |
185 | (:member | |
186 | (member (car-safe vals) (cdr-safe (assoc key entry)))) | |
187 | (:regex | |
188 | (string-match (car vals) | |
189 | (mapconcat | |
190 | 'prin1-to-string | |
191 | (cdr-safe (assoc key entry)) | |
192 | "\0")))) | |
193 | vals (cdr-safe vals))) | |
194 | (or found | |
195 | (registry--match mode entry (cdr-safe check-list)))))) | |
196 | ||
8d6d9c8f KY |
197 | (eval-and-compile |
198 | (defmethod registry-search ((db registry-db) &rest spec) | |
199 | "Search for SPEC across the registry-db THIS. | |
ccd58722 TZ |
200 | For example calling with :member '(a 1 2) will match entry '((a 3 1)). |
201 | Calling with :all t (any non-nil value) will match all. | |
202 | Calling with :regex '\(a \"h.llo\") will match entry '((a \"hullo\" \"bye\"). | |
203 | The test order is to check :all first, then :member, then :regex." | |
8d6d9c8f KY |
204 | (when db |
205 | (let ((all (plist-get spec :all)) | |
206 | (member (plist-get spec :member)) | |
207 | (regex (plist-get spec :regex))) | |
208 | (loop for k being the hash-keys of (oref db :data) | |
209 | using (hash-values v) | |
210 | when (or | |
211 | ;; :all non-nil returns all | |
212 | all | |
213 | ;; member matching | |
214 | (and member (registry--match :member v member)) | |
215 | ;; regex matching | |
216 | (and regex (registry--match :regex v regex))) | |
217 | collect k)))) | |
218 | ||
219 | (defmethod registry-delete ((db registry-db) keys assert &rest spec) | |
220 | "Delete KEYS from the registry-db THIS. | |
ccd58722 TZ |
221 | If KEYS is nil, use SPEC to do a search. |
222 | Updates the secondary ('tracked') indices as well. | |
223 | With assert non-nil, errors out if the key does not exist already." | |
8d6d9c8f KY |
224 | (let* ((data (oref db :data)) |
225 | (keys (or keys | |
226 | (apply 'registry-search db spec))) | |
227 | (tracked (oref db :tracked))) | |
228 | ||
229 | (dolist (key keys) | |
230 | (let ((entry (gethash key data))) | |
231 | (when assert | |
232 | (assert entry nil | |
233 | "Key %s does not exists in database" key)) | |
234 | ;; clean entry from the secondary indices | |
235 | (dolist (tr tracked) | |
236 | ;; is this tracked symbol indexed? | |
237 | (when (registry-lookup-secondary db tr) | |
238 | ;; for every value in the entry under that key... | |
239 | (dolist (val (cdr-safe (assq tr entry))) | |
240 | (let* ((value-keys (registry-lookup-secondary-value | |
241 | db tr val))) | |
242 | (when (member key value-keys) | |
243 | ;; override the previous value | |
244 | (registry-lookup-secondary-value | |
245 | db tr val | |
246 | ;; with the indexed keys MINUS the current key | |
247 | ;; (we pass t when the list is empty) | |
248 | (or (delete key value-keys) t))))))) | |
249 | (remhash key data))) | |
250 | keys)) | |
251 | ||
c7641e3c GM |
252 | (defmethod registry-size ((db registry-db)) |
253 | "Returns the size of the registry-db object THIS. | |
254 | This is the key count of the :data slot." | |
255 | (hash-table-count (oref db :data))) | |
256 | ||
81d7704c TZ |
257 | (defmethod registry-full ((db registry-db)) |
258 | "Checks if registry-db THIS is full." | |
259 | (>= (registry-size db) | |
260 | (oref db :max-hard))) | |
261 | ||
8d6d9c8f KY |
262 | (defmethod registry-insert ((db registry-db) key entry) |
263 | "Insert ENTRY under KEY into the registry-db THIS. | |
ccd58722 TZ |
264 | Updates the secondary ('tracked') indices as well. |
265 | Errors out if the key exists already." | |
266 | ||
8d6d9c8f KY |
267 | (assert (not (gethash key (oref db :data))) nil |
268 | "Key already exists in database") | |
269 | ||
81d7704c | 270 | (assert (not (registry-full db)) |
8d6d9c8f | 271 | nil |
c2f51e23 | 272 | "registry max-hard size limit reached") |
8d6d9c8f KY |
273 | |
274 | ;; store the entry | |
275 | (puthash key entry (oref db :data)) | |
276 | ||
277 | ;; store the secondary indices | |
cf8b0c27 | 278 | (dolist (tr (oref db :tracked)) |
8d6d9c8f KY |
279 | ;; for every value in the entry under that key... |
280 | (dolist (val (cdr-safe (assq tr entry))) | |
281 | (let* ((value-keys (registry-lookup-secondary-value db tr val))) | |
282 | (pushnew key value-keys :test 'equal) | |
283 | (registry-lookup-secondary-value db tr val value-keys)))) | |
284 | entry) | |
285 | ||
286 | (defmethod registry-reindex ((db registry-db)) | |
287 | "Rebuild the secondary indices of registry-db THIS." | |
288 | (let ((count 0) | |
289 | (expected (* (length (oref db :tracked)) (registry-size db)))) | |
290 | (dolist (tr (oref db :tracked)) | |
291 | (let (values) | |
292 | (maphash | |
293 | (lambda (key v) | |
294 | (incf count) | |
295 | (when (and (< 0 expected) | |
296 | (= 0 (mod count 1000))) | |
297 | (message "reindexing: %d of %d (%.2f%%)" | |
67a2aecd | 298 | count expected (/ (* 100 count) expected))) |
8d6d9c8f KY |
299 | (dolist (val (cdr-safe (assq tr v))) |
300 | (let* ((value-keys (registry-lookup-secondary-value db tr val))) | |
301 | (push key value-keys) | |
302 | (registry-lookup-secondary-value db tr val value-keys)))) | |
303 | (oref db :data)))))) | |
304 | ||
15cc1ab1 | 305 | (defmethod registry-prune ((db registry-db) &optional sortfun) |
8d6d9c8f | 306 | "Prunes the registry-db object THIS. |
15cc1ab1 TZ |
307 | Removes only entries without the :precious keys if it can, |
308 | then removes oldest entries first. | |
309 | Returns the number of deleted entries. | |
310 | If SORTFUN is given, tries to keep entries that sort *higher*. | |
311 | SORTFUN is passed only the two keys so it must look them up directly." | |
312 | (dolist (collector '(registry-prune-soft-candidates | |
313 | registry-prune-hard-candidates)) | |
314 | (let* ((size (registry-size db)) | |
315 | (collected (funcall collector db)) | |
316 | (limit (nth 0 collected)) | |
317 | (candidates (nth 1 collected)) | |
318 | ;; sort the candidates if SORTFUN was given | |
319 | (candidates (if sortfun (sort candidates sortfun) candidates)) | |
320 | (candidates-count (length candidates)) | |
321 | ;; are we over max-soft? | |
322 | (prune-needed (> size limit))) | |
323 | ||
324 | ;; while we have more candidates than we need to remove... | |
325 | (while (and (> candidates-count (- size limit)) candidates) | |
326 | (decf candidates-count) | |
327 | (setq candidates (cdr candidates))) | |
328 | ||
329 | (registry-delete db candidates nil) | |
330 | (length candidates)))) | |
331 | ||
332 | (defmethod registry-prune-soft-candidates ((db registry-db)) | |
333 | "Collects pruning candidates from the registry-db object THIS. | |
334 | Proposes only entries without the :precious keys." | |
8d6d9c8f KY |
335 | (let* ((precious (oref db :precious)) |
336 | (precious-p (lambda (entry-key) | |
337 | (cdr (memq (car entry-key) precious)))) | |
338 | (data (oref db :data)) | |
339 | (limit (oref db :max-soft)) | |
8d6d9c8f KY |
340 | (candidates (loop for k being the hash-keys of data |
341 | using (hash-values v) | |
342 | when (notany precious-p v) | |
15cc1ab1 TZ |
343 | collect k))) |
344 | (list limit candidates))) | |
8d6d9c8f | 345 | |
15cc1ab1 TZ |
346 | (defmethod registry-prune-hard-candidates ((db registry-db)) |
347 | "Collects pruning candidates from the registry-db object THIS. | |
652aa465 | 348 | Proposes any entries over the max-hard limit minus size * prune-factor." |
15cc1ab1 | 349 | (let* ((data (oref db :data)) |
652aa465 TZ |
350 | ;; prune to (size * prune-factor) below the max-hard limit so |
351 | ;; we're not pruning all the time | |
352 | (limit (max 0 (- (oref db :max-hard) | |
353 | (* (registry-size db) (oref db :prune-factor))))) | |
15cc1ab1 TZ |
354 | (candidates (loop for k being the hash-keys of data |
355 | collect k))) | |
356 | (list limit candidates)))) | |
ccd58722 | 357 | |
ccd58722 TZ |
358 | (provide 'registry) |
359 | ;;; registry.el ends here |