Commit | Line | Data |
---|---|---|
b9c5136f | 1 | /* Caching facts about regions of the buffer, for optimization. |
95df8112 | 2 | |
ab422c4d PE |
3 | Copyright (C) 1985-1989, 1993, 1995, 2001-2013 Free Software Foundation, |
4 | Inc. | |
b9c5136f KH |
5 | |
6 | This file is part of GNU Emacs. | |
7 | ||
9ec0b715 | 8 | GNU Emacs is free software: you can redistribute it and/or modify |
b9c5136f | 9 | it under the terms of the GNU General Public License as published by |
9ec0b715 GM |
10 | the Free Software Foundation, either version 3 of the License, or |
11 | (at your option) any later version. | |
b9c5136f KH |
12 | |
13 | GNU Emacs is distributed in the hope that it will be useful, | |
14 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
16 | GNU General Public License for more details. | |
17 | ||
18 | You should have received a copy of the GNU General Public License | |
9ec0b715 | 19 | along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */ |
b9c5136f KH |
20 | |
21 | ||
22 | #include <config.h> | |
5890e9f7 JD |
23 | #include <stdio.h> |
24 | ||
b9c5136f | 25 | #include "lisp.h" |
e5560ff7 | 26 | #include "character.h" |
b9c5136f KH |
27 | #include "buffer.h" |
28 | #include "region-cache.h" | |
29 | ||
b9c5136f KH |
30 | \f |
31 | /* Data structures. */ | |
32 | ||
33 | /* The region cache. | |
34 | ||
35 | We want something that maps character positions in a buffer onto | |
36 | values. The representation should deal well with long runs of | |
37 | characters with the same value. | |
38 | ||
39 | The tricky part: the representation should be very cheap to | |
40 | maintain in the presence of many insertions and deletions. If the | |
41 | overhead of maintaining the cache is too high, the speedups it | |
42 | offers will be worthless. | |
43 | ||
44 | ||
45 | We represent the region cache as a sorted array of struct | |
46 | boundary's, each of which contains a buffer position and a value; | |
47 | the value applies to all the characters after the buffer position, | |
48 | until the position of the next boundary, or the end of the buffer. | |
49 | ||
50 | The cache always has a boundary whose position is BUF_BEG, so | |
51 | there's always a value associated with every character in the | |
52 | buffer. Since the cache is sorted, this is always the first | |
53 | element of the cache. | |
54 | ||
55 | To facilitate the insertion and deletion of boundaries in the | |
56 | cache, the cache has a gap, just like Emacs's text buffers do. | |
57 | ||
58 | To help boundary positions float along with insertions and | |
59 | deletions, all boundary positions before the cache gap are stored | |
60 | relative to BUF_BEG (buf) (thus they're >= 0), and all boundary | |
61 | positions after the gap are stored relative to BUF_Z (buf) (thus | |
62 | they're <= 0). Look at BOUNDARY_POS to see this in action. See | |
63 | revalidate_region_cache to see how this helps. */ | |
64 | ||
65 | struct boundary { | |
0065d054 | 66 | ptrdiff_t pos; |
b9c5136f KH |
67 | int value; |
68 | }; | |
69 | ||
70 | struct region_cache { | |
71 | /* A sorted array of locations where the known-ness of the buffer | |
72 | changes. */ | |
73 | struct boundary *boundaries; | |
74 | ||
75 | /* boundaries[gap_start ... gap_start + gap_len - 1] is the gap. */ | |
0065d054 | 76 | ptrdiff_t gap_start, gap_len; |
b9c5136f KH |
77 | |
78 | /* The number of elements allocated to boundaries, not including the | |
79 | gap. */ | |
0065d054 | 80 | ptrdiff_t cache_len; |
b9c5136f KH |
81 | |
82 | /* The areas that haven't changed since the last time we cleaned out | |
83 | invalid entries from the cache. These overlap when the buffer is | |
84 | entirely unchanged. */ | |
0065d054 | 85 | ptrdiff_t beg_unchanged, end_unchanged; |
b9c5136f KH |
86 | |
87 | /* The first and last positions in the buffer. Because boundaries | |
88 | store their positions relative to the start (BEG) and end (Z) of | |
89 | the buffer, knowing these positions allows us to accurately | |
90 | interpret positions without having to pass the buffer structure | |
91 | or its endpoints around all the time. | |
92 | ||
93 | Yes, buffer_beg is always 1. It's there for symmetry with | |
94 | buffer_end and the BEG and BUF_BEG macros. */ | |
0065d054 | 95 | ptrdiff_t buffer_beg, buffer_end; |
b9c5136f KH |
96 | }; |
97 | ||
98 | /* Return the position of boundary i in cache c. */ | |
99 | #define BOUNDARY_POS(c, i) \ | |
100 | ((i) < (c)->gap_start \ | |
101 | ? (c)->buffer_beg + (c)->boundaries[(i)].pos \ | |
102 | : (c)->buffer_end + (c)->boundaries[(c)->gap_len + (i)].pos) | |
103 | ||
104 | /* Return the value for text after boundary i in cache c. */ | |
105 | #define BOUNDARY_VALUE(c, i) \ | |
106 | ((i) < (c)->gap_start \ | |
107 | ? (c)->boundaries[(i)].value \ | |
108 | : (c)->boundaries[(c)->gap_len + (i)].value) | |
109 | ||
110 | /* Set the value for text after boundary i in cache c to v. */ | |
111 | #define SET_BOUNDARY_VALUE(c, i, v) \ | |
112 | ((i) < (c)->gap_start \ | |
113 | ? ((c)->boundaries[(i)].value = (v))\ | |
114 | : ((c)->boundaries[(c)->gap_len + (i)].value = (v))) | |
115 | ||
116 | ||
117 | /* How many elements to add to the gap when we resize the buffer. */ | |
118 | #define NEW_CACHE_GAP (40) | |
119 | ||
120 | /* See invalidate_region_cache; if an invalidation would throw away | |
121 | information about this many characters, call | |
122 | revalidate_region_cache before doing the new invalidation, to | |
123 | preserve that information, instead of throwing it away. */ | |
124 | #define PRESERVE_THRESHOLD (500) | |
125 | ||
971de7fb | 126 | static void revalidate_region_cache (struct buffer *buf, struct region_cache *c); |
b9c5136f KH |
127 | |
128 | \f | |
129 | /* Interface: Allocating, initializing, and disposing of region caches. */ | |
130 | ||
131 | struct region_cache * | |
971de7fb | 132 | new_region_cache (void) |
b9c5136f | 133 | { |
38182d90 | 134 | struct region_cache *c = xmalloc (sizeof *c); |
b9c5136f KH |
135 | |
136 | c->gap_start = 0; | |
137 | c->gap_len = NEW_CACHE_GAP; | |
138 | c->cache_len = 0; | |
23f86fce DA |
139 | c->boundaries = xmalloc ((c->gap_len + c->cache_len) |
140 | * sizeof (*c->boundaries)); | |
b9c5136f KH |
141 | |
142 | c->beg_unchanged = 0; | |
143 | c->end_unchanged = 0; | |
b0ead4a8 SM |
144 | c->buffer_beg = BEG; |
145 | c->buffer_end = BEG; | |
b9c5136f KH |
146 | |
147 | /* Insert the boundary for the buffer start. */ | |
148 | c->cache_len++; | |
149 | c->gap_len--; | |
150 | c->gap_start++; | |
151 | c->boundaries[0].pos = 0; /* from buffer_beg */ | |
152 | c->boundaries[0].value = 0; | |
153 | ||
154 | return c; | |
155 | } | |
156 | ||
157 | void | |
971de7fb | 158 | free_region_cache (struct region_cache *c) |
b9c5136f KH |
159 | { |
160 | xfree (c->boundaries); | |
161 | xfree (c); | |
162 | } | |
163 | ||
164 | \f | |
165 | /* Finding positions in the cache. */ | |
166 | ||
167 | /* Return the index of the last boundary in cache C at or before POS. | |
168 | In other words, return the boundary that specifies the value for | |
169 | the region POS..(POS + 1). | |
170 | ||
171 | This operation should be logarithmic in the number of cache | |
172 | entries. It would be nice if it took advantage of locality of | |
173 | reference, too, by searching entries near the last entry found. */ | |
0065d054 PE |
174 | static ptrdiff_t |
175 | find_cache_boundary (struct region_cache *c, ptrdiff_t pos) | |
b9c5136f | 176 | { |
0065d054 | 177 | ptrdiff_t low = 0, high = c->cache_len; |
b9c5136f KH |
178 | |
179 | while (low + 1 < high) | |
180 | { | |
181 | /* mid is always a valid index, because low < high and ">> 1" | |
182 | rounds down. */ | |
0065d054 PE |
183 | ptrdiff_t mid = (low >> 1) + (high >> 1) + (low & high & 1); |
184 | ptrdiff_t boundary = BOUNDARY_POS (c, mid); | |
b9c5136f KH |
185 | |
186 | if (pos < boundary) | |
187 | high = mid; | |
188 | else | |
189 | low = mid; | |
190 | } | |
191 | ||
192 | /* Some testing. */ | |
193 | if (BOUNDARY_POS (c, low) > pos | |
194 | || (low + 1 < c->cache_len | |
195 | && BOUNDARY_POS (c, low + 1) <= pos)) | |
1088b922 | 196 | emacs_abort (); |
b9c5136f KH |
197 | |
198 | return low; | |
199 | } | |
200 | ||
201 | ||
202 | \f | |
203 | /* Moving the cache gap around, inserting, and deleting. */ | |
204 | ||
205 | ||
206 | /* Move the gap of cache C to index POS, and make sure it has space | |
207 | for at least MIN_SIZE boundaries. */ | |
208 | static void | |
0065d054 | 209 | move_cache_gap (struct region_cache *c, ptrdiff_t pos, ptrdiff_t min_size) |
b9c5136f KH |
210 | { |
211 | /* Copy these out of the cache and into registers. */ | |
0065d054 PE |
212 | ptrdiff_t gap_start = c->gap_start; |
213 | ptrdiff_t gap_len = c->gap_len; | |
214 | ptrdiff_t buffer_beg = c->buffer_beg; | |
215 | ptrdiff_t buffer_end = c->buffer_end; | |
b9c5136f KH |
216 | |
217 | if (pos < 0 | |
218 | || pos > c->cache_len) | |
1088b922 | 219 | emacs_abort (); |
b9c5136f KH |
220 | |
221 | /* We mustn't ever try to put the gap before the dummy start | |
222 | boundary. That must always be start-relative. */ | |
223 | if (pos == 0) | |
1088b922 | 224 | emacs_abort (); |
b9c5136f KH |
225 | |
226 | /* Need we move the gap right? */ | |
227 | while (gap_start < pos) | |
228 | { | |
229 | /* Copy one boundary from after to before the gap, and | |
230 | convert its position to start-relative. */ | |
231 | c->boundaries[gap_start].pos | |
232 | = (buffer_end | |
233 | + c->boundaries[gap_start + gap_len].pos | |
234 | - buffer_beg); | |
235 | c->boundaries[gap_start].value | |
236 | = c->boundaries[gap_start + gap_len].value; | |
237 | gap_start++; | |
238 | } | |
239 | ||
240 | /* To enlarge the gap, we need to re-allocate the boundary array, and | |
241 | then shift the area after the gap to the new end. Since the cost | |
242 | is proportional to the amount of stuff after the gap, we do the | |
243 | enlargement here, after a right shift but before a left shift, | |
244 | when the portion after the gap is smallest. */ | |
245 | if (gap_len < min_size) | |
246 | { | |
eb424fe3 | 247 | ptrdiff_t i, nboundaries = c->cache_len; |
b9c5136f KH |
248 | |
249 | c->boundaries = | |
eb424fe3 | 250 | xpalloc (c->boundaries, &nboundaries, min_size - gap_len, -1, |
0065d054 | 251 | sizeof *c->boundaries); |
b9c5136f KH |
252 | |
253 | /* Some systems don't provide a version of the copy routine that | |
254 | can be trusted to shift memory upward into an overlapping | |
255 | region. memmove isn't widely available. */ | |
eb424fe3 | 256 | min_size = nboundaries - c->cache_len - gap_len; |
b9c5136f KH |
257 | for (i = c->cache_len - 1; i >= gap_start; i--) |
258 | { | |
259 | c->boundaries[i + min_size].pos = c->boundaries[i + gap_len].pos; | |
260 | c->boundaries[i + min_size].value = c->boundaries[i + gap_len].value; | |
261 | } | |
262 | ||
263 | gap_len = min_size; | |
264 | } | |
265 | ||
266 | /* Need we move the gap left? */ | |
267 | while (pos < gap_start) | |
268 | { | |
269 | gap_start--; | |
270 | ||
271 | /* Copy one region from before to after the gap, and | |
272 | convert its position to end-relative. */ | |
273 | c->boundaries[gap_start + gap_len].pos | |
274 | = c->boundaries[gap_start].pos + buffer_beg - buffer_end; | |
275 | c->boundaries[gap_start + gap_len].value | |
276 | = c->boundaries[gap_start].value; | |
277 | } | |
278 | ||
279 | /* Assign these back into the cache. */ | |
280 | c->gap_start = gap_start; | |
281 | c->gap_len = gap_len; | |
282 | } | |
283 | ||
284 | ||
c4fc4e30 | 285 | /* Insert a new boundary in cache C; it will have cache index I, |
b9c5136f KH |
286 | and have the specified POS and VALUE. */ |
287 | static void | |
0065d054 | 288 | insert_cache_boundary (struct region_cache *c, ptrdiff_t i, ptrdiff_t pos, |
c098fdb8 | 289 | int value) |
b9c5136f | 290 | { |
c4fc4e30 PE |
291 | /* i must be a valid cache index. */ |
292 | if (i < 0 || i > c->cache_len) | |
1088b922 | 293 | emacs_abort (); |
b9c5136f KH |
294 | |
295 | /* We must never want to insert something before the dummy first | |
296 | boundary. */ | |
c4fc4e30 | 297 | if (i == 0) |
1088b922 | 298 | emacs_abort (); |
b9c5136f KH |
299 | |
300 | /* We must only be inserting things in order. */ | |
c4fc4e30 PE |
301 | if (! (BOUNDARY_POS (c, i - 1) < pos |
302 | && (i == c->cache_len | |
303 | || pos < BOUNDARY_POS (c, i)))) | |
1088b922 | 304 | emacs_abort (); |
b9c5136f KH |
305 | |
306 | /* The value must be different from the ones around it. However, we | |
307 | temporarily create boundaries that establish the same value as | |
308 | the subsequent boundary, so we're not going to flag that case. */ | |
c4fc4e30 | 309 | if (BOUNDARY_VALUE (c, i - 1) == value) |
1088b922 | 310 | emacs_abort (); |
b9c5136f | 311 | |
c4fc4e30 | 312 | move_cache_gap (c, i, 1); |
b9c5136f | 313 | |
c4fc4e30 PE |
314 | c->boundaries[i].pos = pos - c->buffer_beg; |
315 | c->boundaries[i].value = value; | |
b9c5136f KH |
316 | c->gap_start++; |
317 | c->gap_len--; | |
318 | c->cache_len++; | |
319 | } | |
320 | ||
321 | ||
322 | /* Delete the i'th entry from cache C if START <= i < END. */ | |
323 | ||
324 | static void | |
c098fdb8 | 325 | delete_cache_boundaries (struct region_cache *c, |
0065d054 | 326 | ptrdiff_t start, ptrdiff_t end) |
b9c5136f | 327 | { |
0065d054 | 328 | ptrdiff_t len = end - start; |
b9c5136f KH |
329 | |
330 | /* Gotta be in range. */ | |
331 | if (start < 0 | |
332 | || end > c->cache_len) | |
1088b922 | 333 | emacs_abort (); |
b9c5136f KH |
334 | |
335 | /* Gotta be in order. */ | |
336 | if (start > end) | |
1088b922 | 337 | emacs_abort (); |
b9c5136f KH |
338 | |
339 | /* Can't delete the dummy entry. */ | |
340 | if (start == 0 | |
341 | && end >= 1) | |
1088b922 | 342 | emacs_abort (); |
b9c5136f KH |
343 | |
344 | /* Minimize gap motion. If we're deleting nothing, do nothing. */ | |
345 | if (len == 0) | |
346 | ; | |
347 | /* If the gap is before the region to delete, delete from the start | |
348 | forward. */ | |
349 | else if (c->gap_start <= start) | |
350 | { | |
351 | move_cache_gap (c, start, 0); | |
352 | c->gap_len += len; | |
353 | } | |
354 | /* If the gap is after the region to delete, delete from the end | |
355 | backward. */ | |
356 | else if (end <= c->gap_start) | |
357 | { | |
358 | move_cache_gap (c, end, 0); | |
359 | c->gap_start -= len; | |
360 | c->gap_len += len; | |
361 | } | |
362 | /* If the gap is in the region to delete, just expand it. */ | |
363 | else | |
364 | { | |
365 | c->gap_start = start; | |
366 | c->gap_len += len; | |
367 | } | |
368 | ||
369 | c->cache_len -= len; | |
370 | } | |
177c0ea7 | 371 | |
b9c5136f KH |
372 | |
373 | \f | |
374 | /* Set the value for a region. */ | |
375 | ||
376 | /* Set the value in cache C for the region START..END to VALUE. */ | |
377 | static void | |
c098fdb8 | 378 | set_cache_region (struct region_cache *c, |
0065d054 | 379 | ptrdiff_t start, ptrdiff_t end, int value) |
b9c5136f KH |
380 | { |
381 | if (start > end) | |
1088b922 | 382 | emacs_abort (); |
b9c5136f KH |
383 | if (start < c->buffer_beg |
384 | || end > c->buffer_end) | |
1088b922 | 385 | emacs_abort (); |
b9c5136f KH |
386 | |
387 | /* Eliminate this case; then we can assume that start and end-1 are | |
388 | both the locations of real characters in the buffer. */ | |
389 | if (start == end) | |
390 | return; | |
177c0ea7 | 391 | |
b9c5136f KH |
392 | { |
393 | /* We need to make sure that there are no boundaries in the area | |
394 | between start to end; the whole area will have the same value, | |
395 | so those boundaries will not be necessary. | |
177c0ea7 | 396 | |
b9c5136f KH |
397 | Let start_ix be the cache index of the boundary governing the |
398 | first character of start..end, and let end_ix be the cache | |
399 | index of the earliest boundary after the last character in | |
400 | start..end. (This tortured terminology is intended to answer | |
401 | all the "< or <=?" sort of questions.) */ | |
0065d054 PE |
402 | ptrdiff_t start_ix = find_cache_boundary (c, start); |
403 | ptrdiff_t end_ix = find_cache_boundary (c, end - 1) + 1; | |
b9c5136f KH |
404 | |
405 | /* We must remember the value established by the last boundary | |
406 | before end; if that boundary's domain stretches beyond end, | |
407 | we'll need to create a new boundary at end, and that boundary | |
408 | must have that remembered value. */ | |
409 | int value_at_end = BOUNDARY_VALUE (c, end_ix - 1); | |
410 | ||
411 | /* Delete all boundaries strictly within start..end; this means | |
412 | those whose indices are between start_ix (exclusive) and end_ix | |
413 | (exclusive). */ | |
414 | delete_cache_boundaries (c, start_ix + 1, end_ix); | |
415 | ||
416 | /* Make sure we have the right value established going in to | |
417 | start..end from the left, and no unnecessary boundaries. */ | |
418 | if (BOUNDARY_POS (c, start_ix) == start) | |
419 | { | |
420 | /* Is this boundary necessary? If no, remove it; if yes, set | |
421 | its value. */ | |
422 | if (start_ix > 0 | |
423 | && BOUNDARY_VALUE (c, start_ix - 1) == value) | |
424 | { | |
425 | delete_cache_boundaries (c, start_ix, start_ix + 1); | |
426 | start_ix--; | |
427 | } | |
428 | else | |
429 | SET_BOUNDARY_VALUE (c, start_ix, value); | |
430 | } | |
431 | else | |
432 | { | |
433 | /* Do we need to add a new boundary here? */ | |
434 | if (BOUNDARY_VALUE (c, start_ix) != value) | |
435 | { | |
436 | insert_cache_boundary (c, start_ix + 1, start, value); | |
437 | start_ix++; | |
438 | } | |
439 | } | |
177c0ea7 | 440 | |
b9c5136f KH |
441 | /* This is equivalent to letting end_ix float (like a buffer |
442 | marker does) with the insertions and deletions we may have | |
443 | done. */ | |
444 | end_ix = start_ix + 1; | |
445 | ||
446 | /* Make sure we have the correct value established as we leave | |
447 | start..end to the right. */ | |
448 | if (end == c->buffer_end) | |
449 | /* There is no text after start..end; nothing to do. */ | |
450 | ; | |
451 | else if (end_ix >= c->cache_len | |
452 | || end < BOUNDARY_POS (c, end_ix)) | |
453 | { | |
454 | /* There is no boundary at end, but we may need one. */ | |
455 | if (value_at_end != value) | |
456 | insert_cache_boundary (c, end_ix, end, value_at_end); | |
457 | } | |
458 | else | |
459 | { | |
460 | /* There is a boundary at end; should it be there? */ | |
461 | if (value == BOUNDARY_VALUE (c, end_ix)) | |
462 | delete_cache_boundaries (c, end_ix, end_ix + 1); | |
463 | } | |
464 | } | |
465 | } | |
466 | ||
467 | ||
468 | \f | |
469 | /* Interface: Invalidating the cache. Private: Re-validating the cache. */ | |
470 | ||
471 | /* Indicate that a section of BUF has changed, to invalidate CACHE. | |
472 | HEAD is the number of chars unchanged at the beginning of the buffer. | |
473 | TAIL is the number of chars unchanged at the end of the buffer. | |
474 | NOTE: this is *not* the same as the ending position of modified | |
475 | region. | |
476 | (This way of specifying regions makes more sense than absolute | |
477 | buffer positions in the presence of insertions and deletions; the | |
478 | args to pass are the same before and after such an operation.) */ | |
479 | void | |
c098fdb8 | 480 | invalidate_region_cache (struct buffer *buf, struct region_cache *c, |
0065d054 | 481 | ptrdiff_t head, ptrdiff_t tail) |
b9c5136f KH |
482 | { |
483 | /* Let chead = c->beg_unchanged, and | |
484 | ctail = c->end_unchanged. | |
485 | If z-tail < beg+chead by a large amount, or | |
486 | z-ctail < beg+head by a large amount, | |
487 | ||
488 | then cutting back chead and ctail to head and tail would lose a | |
489 | lot of information that we could preserve by revalidating the | |
490 | cache before processing this invalidation. Losing that | |
491 | information may be more costly than revalidating the cache now. | |
492 | So go ahead and call revalidate_region_cache if it seems that it | |
493 | might be worthwhile. */ | |
494 | if (((BUF_BEG (buf) + c->beg_unchanged) - (BUF_Z (buf) - tail) | |
495 | > PRESERVE_THRESHOLD) | |
496 | || ((BUF_BEG (buf) + head) - (BUF_Z (buf) - c->end_unchanged) | |
497 | > PRESERVE_THRESHOLD)) | |
498 | revalidate_region_cache (buf, c); | |
499 | ||
500 | ||
501 | if (head < c->beg_unchanged) | |
502 | c->beg_unchanged = head; | |
503 | if (tail < c->end_unchanged) | |
504 | c->end_unchanged = tail; | |
505 | ||
506 | /* We now know nothing about the region between the unchanged head | |
507 | and the unchanged tail (call it the "modified region"), not even | |
508 | its length. | |
509 | ||
510 | If the modified region has shrunk in size (deletions do this), | |
511 | then the cache may now contain boundaries originally located in | |
512 | text that doesn't exist any more. | |
513 | ||
514 | If the modified region has increased in size (insertions do | |
515 | this), then there may now be boundaries in the modified region | |
516 | whose positions are wrong. | |
517 | ||
518 | Even calling BOUNDARY_POS on boundaries still in the unchanged | |
519 | head or tail may well give incorrect answers now, since | |
520 | c->buffer_beg and c->buffer_end may well be wrong now. (Well, | |
521 | okay, c->buffer_beg never changes, so boundaries in the unchanged | |
522 | head will still be okay. But it's the principle of the thing.) | |
523 | ||
524 | So things are generally a mess. | |
525 | ||
526 | But we don't clean up this mess here; that would be expensive, | |
527 | and this function gets called every time any buffer modification | |
528 | occurs. Rather, we can clean up everything in one swell foop, | |
529 | accounting for all the modifications at once, by calling | |
530 | revalidate_region_cache before we try to consult the cache the | |
531 | next time. */ | |
532 | } | |
533 | ||
534 | ||
177c0ea7 | 535 | /* Clean out any cache entries applying to the modified region, and |
b9c5136f KH |
536 | make the positions of the remaining entries accurate again. |
537 | ||
538 | After calling this function, the mess described in the comment in | |
539 | invalidate_region_cache is cleaned up. | |
540 | ||
541 | This function operates by simply throwing away everything it knows | |
542 | about the modified region. It doesn't care exactly which | |
543 | insertions and deletions took place; it just tosses it all. | |
544 | ||
545 | For example, if you insert a single character at the beginning of | |
546 | the buffer, and a single character at the end of the buffer (for | |
547 | example), without calling this function in between the two | |
548 | insertions, then the entire cache will be freed of useful | |
549 | information. On the other hand, if you do manage to call this | |
550 | function in between the two insertions, then the modified regions | |
551 | will be small in both cases, no information will be tossed, and the | |
552 | cache will know that it doesn't have knowledge of the first and | |
553 | last characters any more. | |
554 | ||
555 | Calling this function may be expensive; it does binary searches in | |
556 | the cache, and causes cache gap motion. */ | |
557 | ||
558 | static void | |
971de7fb | 559 | revalidate_region_cache (struct buffer *buf, struct region_cache *c) |
b9c5136f KH |
560 | { |
561 | /* The boundaries now in the cache are expressed relative to the | |
562 | buffer_beg and buffer_end values stored in the cache. Now, | |
563 | buffer_beg and buffer_end may not be the same as BUF_BEG (buf) | |
564 | and BUF_Z (buf), so we have two different "bases" to deal with | |
565 | --- the cache's, and the buffer's. */ | |
566 | ||
567 | /* If the entire buffer is still valid, don't waste time. Yes, this | |
568 | should be a >, not a >=; think about what beg_unchanged and | |
569 | end_unchanged get set to when the only change has been an | |
570 | insertion. */ | |
571 | if (c->buffer_beg + c->beg_unchanged | |
572 | > c->buffer_end - c->end_unchanged) | |
573 | return; | |
574 | ||
575 | /* If all the text we knew about as of the last cache revalidation | |
576 | is still there, then all of the information in the cache is still | |
577 | valid. Because c->buffer_beg and c->buffer_end are out-of-date, | |
578 | the modified region appears from the cache's point of view to be | |
579 | a null region located someplace in the buffer. | |
580 | ||
581 | Now, invalidating that empty string will have no actual affect on | |
582 | the cache; instead, we need to update the cache's basis first | |
583 | (which will give the modified region the same size in the cache | |
584 | as it has in the buffer), and then invalidate the modified | |
585 | region. */ | |
177c0ea7 | 586 | if (c->buffer_beg + c->beg_unchanged |
b9c5136f KH |
587 | == c->buffer_end - c->end_unchanged) |
588 | { | |
589 | /* Move the gap so that all the boundaries in the unchanged head | |
590 | are expressed beg-relative, and all the boundaries in the | |
591 | unchanged tail are expressed end-relative. That done, we can | |
592 | plug in the new buffer beg and end, and all the positions | |
593 | will be accurate. | |
594 | ||
595 | The boundary which has jurisdiction over the modified region | |
596 | should be left before the gap. */ | |
597 | move_cache_gap (c, | |
598 | (find_cache_boundary (c, (c->buffer_beg | |
599 | + c->beg_unchanged)) | |
600 | + 1), | |
601 | 0); | |
602 | ||
603 | c->buffer_beg = BUF_BEG (buf); | |
604 | c->buffer_end = BUF_Z (buf); | |
605 | ||
606 | /* Now that the cache's basis has been changed, the modified | |
607 | region actually takes up some space in the cache, so we can | |
608 | invalidate it. */ | |
609 | set_cache_region (c, | |
610 | c->buffer_beg + c->beg_unchanged, | |
611 | c->buffer_end - c->end_unchanged, | |
612 | 0); | |
613 | } | |
614 | ||
615 | /* Otherwise, there is a non-empty region in the cache which | |
616 | corresponds to the modified region of the buffer. */ | |
617 | else | |
618 | { | |
0065d054 | 619 | ptrdiff_t modified_ix; |
b9c5136f KH |
620 | |
621 | /* These positions are correct, relative to both the cache basis | |
622 | and the buffer basis. */ | |
623 | set_cache_region (c, | |
624 | c->buffer_beg + c->beg_unchanged, | |
625 | c->buffer_end - c->end_unchanged, | |
626 | 0); | |
627 | ||
628 | /* Now the cache contains only boundaries that are in the | |
629 | unchanged head and tail; we've disposed of any boundaries | |
630 | whose positions we can't be sure of given the information | |
631 | we've saved. | |
632 | ||
633 | If we put the cache gap between the unchanged head and the | |
634 | unchanged tail, we can adjust all the boundary positions at | |
635 | once, simply by setting buffer_beg and buffer_end. | |
636 | ||
637 | The boundary which has jurisdiction over the modified region | |
638 | should be left before the gap. */ | |
639 | modified_ix = | |
640 | find_cache_boundary (c, (c->buffer_beg + c->beg_unchanged)) + 1; | |
641 | move_cache_gap (c, modified_ix, 0); | |
642 | ||
643 | c->buffer_beg = BUF_BEG (buf); | |
644 | c->buffer_end = BUF_Z (buf); | |
645 | ||
646 | /* Now, we may have shrunk the buffer when we changed the basis, | |
647 | and brought the boundaries we created for the start and end | |
648 | of the modified region together, giving them the same | |
649 | position. If that's the case, we should collapse them into | |
650 | one boundary. Or we may even delete them both, if the values | |
651 | before and after them are the same. */ | |
652 | if (modified_ix < c->cache_len | |
653 | && (BOUNDARY_POS (c, modified_ix - 1) | |
654 | == BOUNDARY_POS (c, modified_ix))) | |
655 | { | |
656 | int value_after = BOUNDARY_VALUE (c, modified_ix); | |
657 | ||
658 | /* Should we remove both of the boundaries? Yes, if the | |
659 | latter boundary is now establishing the same value that | |
660 | the former boundary's predecessor does. */ | |
661 | if (modified_ix - 1 > 0 | |
662 | && value_after == BOUNDARY_VALUE (c, modified_ix - 2)) | |
663 | delete_cache_boundaries (c, modified_ix - 1, modified_ix + 1); | |
664 | else | |
665 | { | |
666 | /* We do need a boundary here; collapse the two | |
667 | boundaries into one. */ | |
668 | SET_BOUNDARY_VALUE (c, modified_ix - 1, value_after); | |
669 | delete_cache_boundaries (c, modified_ix, modified_ix + 1); | |
670 | } | |
671 | } | |
672 | } | |
673 | ||
674 | /* Now the entire cache is valid. */ | |
675 | c->beg_unchanged | |
676 | = c->end_unchanged | |
677 | = c->buffer_end - c->buffer_beg; | |
678 | } | |
679 | ||
680 | \f | |
681 | /* Interface: Adding information to the cache. */ | |
682 | ||
683 | /* Assert that the region of BUF between START and END (absolute | |
684 | buffer positions) is "known," for the purposes of CACHE (e.g. "has | |
685 | no newlines", in the case of the line cache). */ | |
686 | void | |
c098fdb8 | 687 | know_region_cache (struct buffer *buf, struct region_cache *c, |
0065d054 | 688 | ptrdiff_t start, ptrdiff_t end) |
b9c5136f KH |
689 | { |
690 | revalidate_region_cache (buf, c); | |
691 | ||
692 | set_cache_region (c, start, end, 1); | |
693 | } | |
694 | ||
695 | \f | |
696 | /* Interface: using the cache. */ | |
697 | ||
698 | /* Return true if the text immediately after POS in BUF is known, for | |
177c0ea7 | 699 | the purposes of CACHE. If NEXT is non-zero, set *NEXT to the nearest |
333f9019 | 700 | position after POS where the knowledge changes. */ |
b9c5136f | 701 | int |
c098fdb8 | 702 | region_cache_forward (struct buffer *buf, struct region_cache *c, |
0065d054 | 703 | ptrdiff_t pos, ptrdiff_t *next) |
b9c5136f KH |
704 | { |
705 | revalidate_region_cache (buf, c); | |
706 | ||
707 | { | |
0065d054 | 708 | ptrdiff_t i = find_cache_boundary (c, pos); |
b9c5136f | 709 | int i_value = BOUNDARY_VALUE (c, i); |
0065d054 | 710 | ptrdiff_t j; |
b9c5136f KH |
711 | |
712 | /* Beyond the end of the buffer is unknown, by definition. */ | |
713 | if (pos >= BUF_Z (buf)) | |
714 | { | |
715 | if (next) *next = BUF_Z (buf); | |
716 | i_value = 0; | |
717 | } | |
718 | else if (next) | |
719 | { | |
720 | /* Scan forward from i to find the next differing position. */ | |
721 | for (j = i + 1; j < c->cache_len; j++) | |
722 | if (BOUNDARY_VALUE (c, j) != i_value) | |
723 | break; | |
724 | ||
725 | if (j < c->cache_len) | |
726 | *next = BOUNDARY_POS (c, j); | |
727 | else | |
728 | *next = BUF_Z (buf); | |
729 | } | |
730 | ||
731 | return i_value; | |
732 | } | |
733 | } | |
734 | ||
735 | /* Return true if the text immediately before POS in BUF is known, for | |
736 | the purposes of CACHE. If NEXT is non-zero, set *NEXT to the nearest | |
333f9019 | 737 | position before POS where the knowledge changes. */ |
c098fdb8 | 738 | int region_cache_backward (struct buffer *buf, struct region_cache *c, |
0065d054 | 739 | ptrdiff_t pos, ptrdiff_t *next) |
b9c5136f KH |
740 | { |
741 | revalidate_region_cache (buf, c); | |
742 | ||
743 | /* Before the beginning of the buffer is unknown, by | |
744 | definition. */ | |
745 | if (pos <= BUF_BEG (buf)) | |
746 | { | |
747 | if (next) *next = BUF_BEG (buf); | |
748 | return 0; | |
749 | } | |
750 | ||
751 | { | |
0065d054 | 752 | ptrdiff_t i = find_cache_boundary (c, pos - 1); |
b9c5136f | 753 | int i_value = BOUNDARY_VALUE (c, i); |
0065d054 | 754 | ptrdiff_t j; |
b9c5136f KH |
755 | |
756 | if (next) | |
757 | { | |
758 | /* Scan backward from i to find the next differing position. */ | |
759 | for (j = i - 1; j >= 0; j--) | |
760 | if (BOUNDARY_VALUE (c, j) != i_value) | |
761 | break; | |
762 | ||
763 | if (j >= 0) | |
764 | *next = BOUNDARY_POS (c, j + 1); | |
765 | else | |
766 | *next = BUF_BEG (buf); | |
767 | } | |
768 | ||
769 | return i_value; | |
770 | } | |
771 | } | |
772 | ||
773 | \f | |
774 | /* Debugging: pretty-print a cache to the standard error output. */ | |
775 | ||
e3b27b31 | 776 | void pp_cache (struct region_cache *) EXTERNALLY_VISIBLE; |
b9c5136f | 777 | void |
971de7fb | 778 | pp_cache (struct region_cache *c) |
b9c5136f | 779 | { |
0065d054 PE |
780 | ptrdiff_t i; |
781 | ptrdiff_t beg_u = c->buffer_beg + c->beg_unchanged; | |
782 | ptrdiff_t end_u = c->buffer_end - c->end_unchanged; | |
b9c5136f KH |
783 | |
784 | fprintf (stderr, | |
0065d054 | 785 | "basis: %"pD"d..%"pD"d modified: %"pD"d..%"pD"d\n", |
c2982e87 PE |
786 | c->buffer_beg, c->buffer_end, |
787 | beg_u, end_u); | |
b9c5136f KH |
788 | |
789 | for (i = 0; i < c->cache_len; i++) | |
790 | { | |
0065d054 | 791 | ptrdiff_t pos = BOUNDARY_POS (c, i); |
b9c5136f KH |
792 | |
793 | putc (((pos < beg_u) ? 'v' | |
794 | : (pos == beg_u) ? '-' | |
795 | : ' '), | |
796 | stderr); | |
797 | putc (((pos > end_u) ? '^' | |
798 | : (pos == end_u) ? '-' | |
799 | : ' '), | |
800 | stderr); | |
0065d054 | 801 | fprintf (stderr, "%"pD"d : %d\n", pos, BOUNDARY_VALUE (c, i)); |
b9c5136f KH |
802 | } |
803 | } |