| 1 | /* Caching facts about regions of the buffer, for optimization. |
| 2 | |
| 3 | Copyright (C) 1985-1989, 1993, 1995, 2001-2011 |
| 4 | Free Software Foundation, Inc. |
| 5 | |
| 6 | This file is part of GNU Emacs. |
| 7 | |
| 8 | GNU Emacs is free software: you can redistribute it and/or modify |
| 9 | it under the terms of the GNU General Public License as published by |
| 10 | the Free Software Foundation, either version 3 of the License, or |
| 11 | (at your option) any later version. |
| 12 | |
| 13 | GNU Emacs is distributed in the hope that it will be useful, |
| 14 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 16 | GNU General Public License for more details. |
| 17 | |
| 18 | You should have received a copy of the GNU General Public License |
| 19 | along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */ |
| 20 | |
| 21 | |
| 22 | #include <config.h> |
| 23 | #include <stdio.h> |
| 24 | #include <setjmp.h> |
| 25 | |
| 26 | #include "lisp.h" |
| 27 | #include "buffer.h" |
| 28 | #include "region-cache.h" |
| 29 | |
| 30 | \f |
| 31 | /* Data structures. */ |
| 32 | |
| 33 | /* The region cache. |
| 34 | |
| 35 | We want something that maps character positions in a buffer onto |
| 36 | values. The representation should deal well with long runs of |
| 37 | characters with the same value. |
| 38 | |
| 39 | The tricky part: the representation should be very cheap to |
| 40 | maintain in the presence of many insertions and deletions. If the |
| 41 | overhead of maintaining the cache is too high, the speedups it |
| 42 | offers will be worthless. |
| 43 | |
| 44 | |
| 45 | We represent the region cache as a sorted array of struct |
| 46 | boundary's, each of which contains a buffer position and a value; |
| 47 | the value applies to all the characters after the buffer position, |
| 48 | until the position of the next boundary, or the end of the buffer. |
| 49 | |
| 50 | The cache always has a boundary whose position is BUF_BEG, so |
| 51 | there's always a value associated with every character in the |
| 52 | buffer. Since the cache is sorted, this is always the first |
| 53 | element of the cache. |
| 54 | |
| 55 | To facilitate the insertion and deletion of boundaries in the |
| 56 | cache, the cache has a gap, just like Emacs's text buffers do. |
| 57 | |
| 58 | To help boundary positions float along with insertions and |
| 59 | deletions, all boundary positions before the cache gap are stored |
| 60 | relative to BUF_BEG (buf) (thus they're >= 0), and all boundary |
| 61 | positions after the gap are stored relative to BUF_Z (buf) (thus |
| 62 | they're <= 0). Look at BOUNDARY_POS to see this in action. See |
| 63 | revalidate_region_cache to see how this helps. */ |
| 64 | |
| 65 | struct boundary { |
| 66 | EMACS_INT pos; |
| 67 | int value; |
| 68 | }; |
| 69 | |
| 70 | struct region_cache { |
| 71 | /* A sorted array of locations where the known-ness of the buffer |
| 72 | changes. */ |
| 73 | struct boundary *boundaries; |
| 74 | |
| 75 | /* boundaries[gap_start ... gap_start + gap_len - 1] is the gap. */ |
| 76 | EMACS_INT gap_start, gap_len; |
| 77 | |
| 78 | /* The number of elements allocated to boundaries, not including the |
| 79 | gap. */ |
| 80 | EMACS_INT cache_len; |
| 81 | |
| 82 | /* The areas that haven't changed since the last time we cleaned out |
| 83 | invalid entries from the cache. These overlap when the buffer is |
| 84 | entirely unchanged. */ |
| 85 | EMACS_INT beg_unchanged, end_unchanged; |
| 86 | |
| 87 | /* The first and last positions in the buffer. Because boundaries |
| 88 | store their positions relative to the start (BEG) and end (Z) of |
| 89 | the buffer, knowing these positions allows us to accurately |
| 90 | interpret positions without having to pass the buffer structure |
| 91 | or its endpoints around all the time. |
| 92 | |
| 93 | Yes, buffer_beg is always 1. It's there for symmetry with |
| 94 | buffer_end and the BEG and BUF_BEG macros. */ |
| 95 | EMACS_INT buffer_beg, buffer_end; |
| 96 | }; |
| 97 | |
| 98 | /* Return the position of boundary i in cache c. */ |
| 99 | #define BOUNDARY_POS(c, i) \ |
| 100 | ((i) < (c)->gap_start \ |
| 101 | ? (c)->buffer_beg + (c)->boundaries[(i)].pos \ |
| 102 | : (c)->buffer_end + (c)->boundaries[(c)->gap_len + (i)].pos) |
| 103 | |
| 104 | /* Return the value for text after boundary i in cache c. */ |
| 105 | #define BOUNDARY_VALUE(c, i) \ |
| 106 | ((i) < (c)->gap_start \ |
| 107 | ? (c)->boundaries[(i)].value \ |
| 108 | : (c)->boundaries[(c)->gap_len + (i)].value) |
| 109 | |
| 110 | /* Set the value for text after boundary i in cache c to v. */ |
| 111 | #define SET_BOUNDARY_VALUE(c, i, v) \ |
| 112 | ((i) < (c)->gap_start \ |
| 113 | ? ((c)->boundaries[(i)].value = (v))\ |
| 114 | : ((c)->boundaries[(c)->gap_len + (i)].value = (v))) |
| 115 | |
| 116 | |
| 117 | /* How many elements to add to the gap when we resize the buffer. */ |
| 118 | #define NEW_CACHE_GAP (40) |
| 119 | |
| 120 | /* See invalidate_region_cache; if an invalidation would throw away |
| 121 | information about this many characters, call |
| 122 | revalidate_region_cache before doing the new invalidation, to |
| 123 | preserve that information, instead of throwing it away. */ |
| 124 | #define PRESERVE_THRESHOLD (500) |
| 125 | |
| 126 | static void revalidate_region_cache (struct buffer *buf, struct region_cache *c); |
| 127 | |
| 128 | \f |
| 129 | /* Interface: Allocating, initializing, and disposing of region caches. */ |
| 130 | |
| 131 | struct region_cache * |
| 132 | new_region_cache (void) |
| 133 | { |
| 134 | struct region_cache *c |
| 135 | = (struct region_cache *) xmalloc (sizeof (struct region_cache)); |
| 136 | |
| 137 | c->gap_start = 0; |
| 138 | c->gap_len = NEW_CACHE_GAP; |
| 139 | c->cache_len = 0; |
| 140 | c->boundaries = |
| 141 | (struct boundary *) xmalloc ((c->gap_len + c->cache_len) |
| 142 | * sizeof (*c->boundaries)); |
| 143 | |
| 144 | c->beg_unchanged = 0; |
| 145 | c->end_unchanged = 0; |
| 146 | c->buffer_beg = BEG; |
| 147 | c->buffer_end = BEG; |
| 148 | |
| 149 | /* Insert the boundary for the buffer start. */ |
| 150 | c->cache_len++; |
| 151 | c->gap_len--; |
| 152 | c->gap_start++; |
| 153 | c->boundaries[0].pos = 0; /* from buffer_beg */ |
| 154 | c->boundaries[0].value = 0; |
| 155 | |
| 156 | return c; |
| 157 | } |
| 158 | |
| 159 | void |
| 160 | free_region_cache (struct region_cache *c) |
| 161 | { |
| 162 | xfree (c->boundaries); |
| 163 | xfree (c); |
| 164 | } |
| 165 | |
| 166 | \f |
| 167 | /* Finding positions in the cache. */ |
| 168 | |
| 169 | /* Return the index of the last boundary in cache C at or before POS. |
| 170 | In other words, return the boundary that specifies the value for |
| 171 | the region POS..(POS + 1). |
| 172 | |
| 173 | This operation should be logarithmic in the number of cache |
| 174 | entries. It would be nice if it took advantage of locality of |
| 175 | reference, too, by searching entries near the last entry found. */ |
| 176 | static EMACS_INT |
| 177 | find_cache_boundary (struct region_cache *c, EMACS_INT pos) |
| 178 | { |
| 179 | EMACS_INT low = 0, high = c->cache_len; |
| 180 | |
| 181 | while (low + 1 < high) |
| 182 | { |
| 183 | /* mid is always a valid index, because low < high and ">> 1" |
| 184 | rounds down. */ |
| 185 | EMACS_INT mid = (low + high) >> 1; |
| 186 | EMACS_INT boundary = BOUNDARY_POS (c, mid); |
| 187 | |
| 188 | if (pos < boundary) |
| 189 | high = mid; |
| 190 | else |
| 191 | low = mid; |
| 192 | } |
| 193 | |
| 194 | /* Some testing. */ |
| 195 | if (BOUNDARY_POS (c, low) > pos |
| 196 | || (low + 1 < c->cache_len |
| 197 | && BOUNDARY_POS (c, low + 1) <= pos)) |
| 198 | abort (); |
| 199 | |
| 200 | return low; |
| 201 | } |
| 202 | |
| 203 | |
| 204 | \f |
| 205 | /* Moving the cache gap around, inserting, and deleting. */ |
| 206 | |
| 207 | |
| 208 | /* Move the gap of cache C to index POS, and make sure it has space |
| 209 | for at least MIN_SIZE boundaries. */ |
| 210 | static void |
| 211 | move_cache_gap (struct region_cache *c, EMACS_INT pos, EMACS_INT min_size) |
| 212 | { |
| 213 | /* Copy these out of the cache and into registers. */ |
| 214 | EMACS_INT gap_start = c->gap_start; |
| 215 | EMACS_INT gap_len = c->gap_len; |
| 216 | EMACS_INT buffer_beg = c->buffer_beg; |
| 217 | EMACS_INT buffer_end = c->buffer_end; |
| 218 | |
| 219 | if (pos < 0 |
| 220 | || pos > c->cache_len) |
| 221 | abort (); |
| 222 | |
| 223 | /* We mustn't ever try to put the gap before the dummy start |
| 224 | boundary. That must always be start-relative. */ |
| 225 | if (pos == 0) |
| 226 | abort (); |
| 227 | |
| 228 | /* Need we move the gap right? */ |
| 229 | while (gap_start < pos) |
| 230 | { |
| 231 | /* Copy one boundary from after to before the gap, and |
| 232 | convert its position to start-relative. */ |
| 233 | c->boundaries[gap_start].pos |
| 234 | = (buffer_end |
| 235 | + c->boundaries[gap_start + gap_len].pos |
| 236 | - buffer_beg); |
| 237 | c->boundaries[gap_start].value |
| 238 | = c->boundaries[gap_start + gap_len].value; |
| 239 | gap_start++; |
| 240 | } |
| 241 | |
| 242 | /* To enlarge the gap, we need to re-allocate the boundary array, and |
| 243 | then shift the area after the gap to the new end. Since the cost |
| 244 | is proportional to the amount of stuff after the gap, we do the |
| 245 | enlargement here, after a right shift but before a left shift, |
| 246 | when the portion after the gap is smallest. */ |
| 247 | if (gap_len < min_size) |
| 248 | { |
| 249 | EMACS_INT i; |
| 250 | |
| 251 | /* Always make at least NEW_CACHE_GAP elements, as long as we're |
| 252 | expanding anyway. */ |
| 253 | if (min_size < NEW_CACHE_GAP) |
| 254 | min_size = NEW_CACHE_GAP; |
| 255 | |
| 256 | c->boundaries = |
| 257 | (struct boundary *) xrealloc (c->boundaries, |
| 258 | ((min_size + c->cache_len) |
| 259 | * sizeof (*c->boundaries))); |
| 260 | |
| 261 | /* Some systems don't provide a version of the copy routine that |
| 262 | can be trusted to shift memory upward into an overlapping |
| 263 | region. memmove isn't widely available. */ |
| 264 | min_size -= gap_len; |
| 265 | for (i = c->cache_len - 1; i >= gap_start; i--) |
| 266 | { |
| 267 | c->boundaries[i + min_size].pos = c->boundaries[i + gap_len].pos; |
| 268 | c->boundaries[i + min_size].value = c->boundaries[i + gap_len].value; |
| 269 | } |
| 270 | |
| 271 | gap_len = min_size; |
| 272 | } |
| 273 | |
| 274 | /* Need we move the gap left? */ |
| 275 | while (pos < gap_start) |
| 276 | { |
| 277 | gap_start--; |
| 278 | |
| 279 | /* Copy one region from before to after the gap, and |
| 280 | convert its position to end-relative. */ |
| 281 | c->boundaries[gap_start + gap_len].pos |
| 282 | = c->boundaries[gap_start].pos + buffer_beg - buffer_end; |
| 283 | c->boundaries[gap_start + gap_len].value |
| 284 | = c->boundaries[gap_start].value; |
| 285 | } |
| 286 | |
| 287 | /* Assign these back into the cache. */ |
| 288 | c->gap_start = gap_start; |
| 289 | c->gap_len = gap_len; |
| 290 | } |
| 291 | |
| 292 | |
| 293 | /* Insert a new boundary in cache C; it will have cache index I, |
| 294 | and have the specified POS and VALUE. */ |
| 295 | static void |
| 296 | insert_cache_boundary (struct region_cache *c, EMACS_INT i, EMACS_INT pos, |
| 297 | int value) |
| 298 | { |
| 299 | /* i must be a valid cache index. */ |
| 300 | if (i < 0 || i > c->cache_len) |
| 301 | abort (); |
| 302 | |
| 303 | /* We must never want to insert something before the dummy first |
| 304 | boundary. */ |
| 305 | if (i == 0) |
| 306 | abort (); |
| 307 | |
| 308 | /* We must only be inserting things in order. */ |
| 309 | if (! (BOUNDARY_POS (c, i - 1) < pos |
| 310 | && (i == c->cache_len |
| 311 | || pos < BOUNDARY_POS (c, i)))) |
| 312 | abort (); |
| 313 | |
| 314 | /* The value must be different from the ones around it. However, we |
| 315 | temporarily create boundaries that establish the same value as |
| 316 | the subsequent boundary, so we're not going to flag that case. */ |
| 317 | if (BOUNDARY_VALUE (c, i - 1) == value) |
| 318 | abort (); |
| 319 | |
| 320 | move_cache_gap (c, i, 1); |
| 321 | |
| 322 | c->boundaries[i].pos = pos - c->buffer_beg; |
| 323 | c->boundaries[i].value = value; |
| 324 | c->gap_start++; |
| 325 | c->gap_len--; |
| 326 | c->cache_len++; |
| 327 | } |
| 328 | |
| 329 | |
| 330 | /* Delete the i'th entry from cache C if START <= i < END. */ |
| 331 | |
| 332 | static void |
| 333 | delete_cache_boundaries (struct region_cache *c, |
| 334 | EMACS_INT start, EMACS_INT end) |
| 335 | { |
| 336 | EMACS_INT len = end - start; |
| 337 | |
| 338 | /* Gotta be in range. */ |
| 339 | if (start < 0 |
| 340 | || end > c->cache_len) |
| 341 | abort (); |
| 342 | |
| 343 | /* Gotta be in order. */ |
| 344 | if (start > end) |
| 345 | abort (); |
| 346 | |
| 347 | /* Can't delete the dummy entry. */ |
| 348 | if (start == 0 |
| 349 | && end >= 1) |
| 350 | abort (); |
| 351 | |
| 352 | /* Minimize gap motion. If we're deleting nothing, do nothing. */ |
| 353 | if (len == 0) |
| 354 | ; |
| 355 | /* If the gap is before the region to delete, delete from the start |
| 356 | forward. */ |
| 357 | else if (c->gap_start <= start) |
| 358 | { |
| 359 | move_cache_gap (c, start, 0); |
| 360 | c->gap_len += len; |
| 361 | } |
| 362 | /* If the gap is after the region to delete, delete from the end |
| 363 | backward. */ |
| 364 | else if (end <= c->gap_start) |
| 365 | { |
| 366 | move_cache_gap (c, end, 0); |
| 367 | c->gap_start -= len; |
| 368 | c->gap_len += len; |
| 369 | } |
| 370 | /* If the gap is in the region to delete, just expand it. */ |
| 371 | else |
| 372 | { |
| 373 | c->gap_start = start; |
| 374 | c->gap_len += len; |
| 375 | } |
| 376 | |
| 377 | c->cache_len -= len; |
| 378 | } |
| 379 | |
| 380 | |
| 381 | \f |
| 382 | /* Set the value for a region. */ |
| 383 | |
| 384 | /* Set the value in cache C for the region START..END to VALUE. */ |
| 385 | static void |
| 386 | set_cache_region (struct region_cache *c, |
| 387 | EMACS_INT start, EMACS_INT end, int value) |
| 388 | { |
| 389 | if (start > end) |
| 390 | abort (); |
| 391 | if (start < c->buffer_beg |
| 392 | || end > c->buffer_end) |
| 393 | abort (); |
| 394 | |
| 395 | /* Eliminate this case; then we can assume that start and end-1 are |
| 396 | both the locations of real characters in the buffer. */ |
| 397 | if (start == end) |
| 398 | return; |
| 399 | |
| 400 | { |
| 401 | /* We need to make sure that there are no boundaries in the area |
| 402 | between start to end; the whole area will have the same value, |
| 403 | so those boundaries will not be necessary. |
| 404 | |
| 405 | Let start_ix be the cache index of the boundary governing the |
| 406 | first character of start..end, and let end_ix be the cache |
| 407 | index of the earliest boundary after the last character in |
| 408 | start..end. (This tortured terminology is intended to answer |
| 409 | all the "< or <=?" sort of questions.) */ |
| 410 | EMACS_INT start_ix = find_cache_boundary (c, start); |
| 411 | EMACS_INT end_ix = find_cache_boundary (c, end - 1) + 1; |
| 412 | |
| 413 | /* We must remember the value established by the last boundary |
| 414 | before end; if that boundary's domain stretches beyond end, |
| 415 | we'll need to create a new boundary at end, and that boundary |
| 416 | must have that remembered value. */ |
| 417 | int value_at_end = BOUNDARY_VALUE (c, end_ix - 1); |
| 418 | |
| 419 | /* Delete all boundaries strictly within start..end; this means |
| 420 | those whose indices are between start_ix (exclusive) and end_ix |
| 421 | (exclusive). */ |
| 422 | delete_cache_boundaries (c, start_ix + 1, end_ix); |
| 423 | |
| 424 | /* Make sure we have the right value established going in to |
| 425 | start..end from the left, and no unnecessary boundaries. */ |
| 426 | if (BOUNDARY_POS (c, start_ix) == start) |
| 427 | { |
| 428 | /* Is this boundary necessary? If no, remove it; if yes, set |
| 429 | its value. */ |
| 430 | if (start_ix > 0 |
| 431 | && BOUNDARY_VALUE (c, start_ix - 1) == value) |
| 432 | { |
| 433 | delete_cache_boundaries (c, start_ix, start_ix + 1); |
| 434 | start_ix--; |
| 435 | } |
| 436 | else |
| 437 | SET_BOUNDARY_VALUE (c, start_ix, value); |
| 438 | } |
| 439 | else |
| 440 | { |
| 441 | /* Do we need to add a new boundary here? */ |
| 442 | if (BOUNDARY_VALUE (c, start_ix) != value) |
| 443 | { |
| 444 | insert_cache_boundary (c, start_ix + 1, start, value); |
| 445 | start_ix++; |
| 446 | } |
| 447 | } |
| 448 | |
| 449 | /* This is equivalent to letting end_ix float (like a buffer |
| 450 | marker does) with the insertions and deletions we may have |
| 451 | done. */ |
| 452 | end_ix = start_ix + 1; |
| 453 | |
| 454 | /* Make sure we have the correct value established as we leave |
| 455 | start..end to the right. */ |
| 456 | if (end == c->buffer_end) |
| 457 | /* There is no text after start..end; nothing to do. */ |
| 458 | ; |
| 459 | else if (end_ix >= c->cache_len |
| 460 | || end < BOUNDARY_POS (c, end_ix)) |
| 461 | { |
| 462 | /* There is no boundary at end, but we may need one. */ |
| 463 | if (value_at_end != value) |
| 464 | insert_cache_boundary (c, end_ix, end, value_at_end); |
| 465 | } |
| 466 | else |
| 467 | { |
| 468 | /* There is a boundary at end; should it be there? */ |
| 469 | if (value == BOUNDARY_VALUE (c, end_ix)) |
| 470 | delete_cache_boundaries (c, end_ix, end_ix + 1); |
| 471 | } |
| 472 | } |
| 473 | } |
| 474 | |
| 475 | |
| 476 | \f |
| 477 | /* Interface: Invalidating the cache. Private: Re-validating the cache. */ |
| 478 | |
| 479 | /* Indicate that a section of BUF has changed, to invalidate CACHE. |
| 480 | HEAD is the number of chars unchanged at the beginning of the buffer. |
| 481 | TAIL is the number of chars unchanged at the end of the buffer. |
| 482 | NOTE: this is *not* the same as the ending position of modified |
| 483 | region. |
| 484 | (This way of specifying regions makes more sense than absolute |
| 485 | buffer positions in the presence of insertions and deletions; the |
| 486 | args to pass are the same before and after such an operation.) */ |
| 487 | void |
| 488 | invalidate_region_cache (struct buffer *buf, struct region_cache *c, |
| 489 | EMACS_INT head, EMACS_INT tail) |
| 490 | { |
| 491 | /* Let chead = c->beg_unchanged, and |
| 492 | ctail = c->end_unchanged. |
| 493 | If z-tail < beg+chead by a large amount, or |
| 494 | z-ctail < beg+head by a large amount, |
| 495 | |
| 496 | then cutting back chead and ctail to head and tail would lose a |
| 497 | lot of information that we could preserve by revalidating the |
| 498 | cache before processing this invalidation. Losing that |
| 499 | information may be more costly than revalidating the cache now. |
| 500 | So go ahead and call revalidate_region_cache if it seems that it |
| 501 | might be worthwhile. */ |
| 502 | if (((BUF_BEG (buf) + c->beg_unchanged) - (BUF_Z (buf) - tail) |
| 503 | > PRESERVE_THRESHOLD) |
| 504 | || ((BUF_BEG (buf) + head) - (BUF_Z (buf) - c->end_unchanged) |
| 505 | > PRESERVE_THRESHOLD)) |
| 506 | revalidate_region_cache (buf, c); |
| 507 | |
| 508 | |
| 509 | if (head < c->beg_unchanged) |
| 510 | c->beg_unchanged = head; |
| 511 | if (tail < c->end_unchanged) |
| 512 | c->end_unchanged = tail; |
| 513 | |
| 514 | /* We now know nothing about the region between the unchanged head |
| 515 | and the unchanged tail (call it the "modified region"), not even |
| 516 | its length. |
| 517 | |
| 518 | If the modified region has shrunk in size (deletions do this), |
| 519 | then the cache may now contain boundaries originally located in |
| 520 | text that doesn't exist any more. |
| 521 | |
| 522 | If the modified region has increased in size (insertions do |
| 523 | this), then there may now be boundaries in the modified region |
| 524 | whose positions are wrong. |
| 525 | |
| 526 | Even calling BOUNDARY_POS on boundaries still in the unchanged |
| 527 | head or tail may well give incorrect answers now, since |
| 528 | c->buffer_beg and c->buffer_end may well be wrong now. (Well, |
| 529 | okay, c->buffer_beg never changes, so boundaries in the unchanged |
| 530 | head will still be okay. But it's the principle of the thing.) |
| 531 | |
| 532 | So things are generally a mess. |
| 533 | |
| 534 | But we don't clean up this mess here; that would be expensive, |
| 535 | and this function gets called every time any buffer modification |
| 536 | occurs. Rather, we can clean up everything in one swell foop, |
| 537 | accounting for all the modifications at once, by calling |
| 538 | revalidate_region_cache before we try to consult the cache the |
| 539 | next time. */ |
| 540 | } |
| 541 | |
| 542 | |
| 543 | /* Clean out any cache entries applying to the modified region, and |
| 544 | make the positions of the remaining entries accurate again. |
| 545 | |
| 546 | After calling this function, the mess described in the comment in |
| 547 | invalidate_region_cache is cleaned up. |
| 548 | |
| 549 | This function operates by simply throwing away everything it knows |
| 550 | about the modified region. It doesn't care exactly which |
| 551 | insertions and deletions took place; it just tosses it all. |
| 552 | |
| 553 | For example, if you insert a single character at the beginning of |
| 554 | the buffer, and a single character at the end of the buffer (for |
| 555 | example), without calling this function in between the two |
| 556 | insertions, then the entire cache will be freed of useful |
| 557 | information. On the other hand, if you do manage to call this |
| 558 | function in between the two insertions, then the modified regions |
| 559 | will be small in both cases, no information will be tossed, and the |
| 560 | cache will know that it doesn't have knowledge of the first and |
| 561 | last characters any more. |
| 562 | |
| 563 | Calling this function may be expensive; it does binary searches in |
| 564 | the cache, and causes cache gap motion. */ |
| 565 | |
| 566 | static void |
| 567 | revalidate_region_cache (struct buffer *buf, struct region_cache *c) |
| 568 | { |
| 569 | /* The boundaries now in the cache are expressed relative to the |
| 570 | buffer_beg and buffer_end values stored in the cache. Now, |
| 571 | buffer_beg and buffer_end may not be the same as BUF_BEG (buf) |
| 572 | and BUF_Z (buf), so we have two different "bases" to deal with |
| 573 | --- the cache's, and the buffer's. */ |
| 574 | |
| 575 | /* If the entire buffer is still valid, don't waste time. Yes, this |
| 576 | should be a >, not a >=; think about what beg_unchanged and |
| 577 | end_unchanged get set to when the only change has been an |
| 578 | insertion. */ |
| 579 | if (c->buffer_beg + c->beg_unchanged |
| 580 | > c->buffer_end - c->end_unchanged) |
| 581 | return; |
| 582 | |
| 583 | /* If all the text we knew about as of the last cache revalidation |
| 584 | is still there, then all of the information in the cache is still |
| 585 | valid. Because c->buffer_beg and c->buffer_end are out-of-date, |
| 586 | the modified region appears from the cache's point of view to be |
| 587 | a null region located someplace in the buffer. |
| 588 | |
| 589 | Now, invalidating that empty string will have no actual affect on |
| 590 | the cache; instead, we need to update the cache's basis first |
| 591 | (which will give the modified region the same size in the cache |
| 592 | as it has in the buffer), and then invalidate the modified |
| 593 | region. */ |
| 594 | if (c->buffer_beg + c->beg_unchanged |
| 595 | == c->buffer_end - c->end_unchanged) |
| 596 | { |
| 597 | /* Move the gap so that all the boundaries in the unchanged head |
| 598 | are expressed beg-relative, and all the boundaries in the |
| 599 | unchanged tail are expressed end-relative. That done, we can |
| 600 | plug in the new buffer beg and end, and all the positions |
| 601 | will be accurate. |
| 602 | |
| 603 | The boundary which has jurisdiction over the modified region |
| 604 | should be left before the gap. */ |
| 605 | move_cache_gap (c, |
| 606 | (find_cache_boundary (c, (c->buffer_beg |
| 607 | + c->beg_unchanged)) |
| 608 | + 1), |
| 609 | 0); |
| 610 | |
| 611 | c->buffer_beg = BUF_BEG (buf); |
| 612 | c->buffer_end = BUF_Z (buf); |
| 613 | |
| 614 | /* Now that the cache's basis has been changed, the modified |
| 615 | region actually takes up some space in the cache, so we can |
| 616 | invalidate it. */ |
| 617 | set_cache_region (c, |
| 618 | c->buffer_beg + c->beg_unchanged, |
| 619 | c->buffer_end - c->end_unchanged, |
| 620 | 0); |
| 621 | } |
| 622 | |
| 623 | /* Otherwise, there is a non-empty region in the cache which |
| 624 | corresponds to the modified region of the buffer. */ |
| 625 | else |
| 626 | { |
| 627 | EMACS_INT modified_ix; |
| 628 | |
| 629 | /* These positions are correct, relative to both the cache basis |
| 630 | and the buffer basis. */ |
| 631 | set_cache_region (c, |
| 632 | c->buffer_beg + c->beg_unchanged, |
| 633 | c->buffer_end - c->end_unchanged, |
| 634 | 0); |
| 635 | |
| 636 | /* Now the cache contains only boundaries that are in the |
| 637 | unchanged head and tail; we've disposed of any boundaries |
| 638 | whose positions we can't be sure of given the information |
| 639 | we've saved. |
| 640 | |
| 641 | If we put the cache gap between the unchanged head and the |
| 642 | unchanged tail, we can adjust all the boundary positions at |
| 643 | once, simply by setting buffer_beg and buffer_end. |
| 644 | |
| 645 | The boundary which has jurisdiction over the modified region |
| 646 | should be left before the gap. */ |
| 647 | modified_ix = |
| 648 | find_cache_boundary (c, (c->buffer_beg + c->beg_unchanged)) + 1; |
| 649 | move_cache_gap (c, modified_ix, 0); |
| 650 | |
| 651 | c->buffer_beg = BUF_BEG (buf); |
| 652 | c->buffer_end = BUF_Z (buf); |
| 653 | |
| 654 | /* Now, we may have shrunk the buffer when we changed the basis, |
| 655 | and brought the boundaries we created for the start and end |
| 656 | of the modified region together, giving them the same |
| 657 | position. If that's the case, we should collapse them into |
| 658 | one boundary. Or we may even delete them both, if the values |
| 659 | before and after them are the same. */ |
| 660 | if (modified_ix < c->cache_len |
| 661 | && (BOUNDARY_POS (c, modified_ix - 1) |
| 662 | == BOUNDARY_POS (c, modified_ix))) |
| 663 | { |
| 664 | int value_after = BOUNDARY_VALUE (c, modified_ix); |
| 665 | |
| 666 | /* Should we remove both of the boundaries? Yes, if the |
| 667 | latter boundary is now establishing the same value that |
| 668 | the former boundary's predecessor does. */ |
| 669 | if (modified_ix - 1 > 0 |
| 670 | && value_after == BOUNDARY_VALUE (c, modified_ix - 2)) |
| 671 | delete_cache_boundaries (c, modified_ix - 1, modified_ix + 1); |
| 672 | else |
| 673 | { |
| 674 | /* We do need a boundary here; collapse the two |
| 675 | boundaries into one. */ |
| 676 | SET_BOUNDARY_VALUE (c, modified_ix - 1, value_after); |
| 677 | delete_cache_boundaries (c, modified_ix, modified_ix + 1); |
| 678 | } |
| 679 | } |
| 680 | } |
| 681 | |
| 682 | /* Now the entire cache is valid. */ |
| 683 | c->beg_unchanged |
| 684 | = c->end_unchanged |
| 685 | = c->buffer_end - c->buffer_beg; |
| 686 | } |
| 687 | |
| 688 | \f |
| 689 | /* Interface: Adding information to the cache. */ |
| 690 | |
| 691 | /* Assert that the region of BUF between START and END (absolute |
| 692 | buffer positions) is "known," for the purposes of CACHE (e.g. "has |
| 693 | no newlines", in the case of the line cache). */ |
| 694 | void |
| 695 | know_region_cache (struct buffer *buf, struct region_cache *c, |
| 696 | EMACS_INT start, EMACS_INT end) |
| 697 | { |
| 698 | revalidate_region_cache (buf, c); |
| 699 | |
| 700 | set_cache_region (c, start, end, 1); |
| 701 | } |
| 702 | |
| 703 | \f |
| 704 | /* Interface: using the cache. */ |
| 705 | |
| 706 | /* Return true if the text immediately after POS in BUF is known, for |
| 707 | the purposes of CACHE. If NEXT is non-zero, set *NEXT to the nearest |
| 708 | position after POS where the knownness changes. */ |
| 709 | int |
| 710 | region_cache_forward (struct buffer *buf, struct region_cache *c, |
| 711 | EMACS_INT pos, EMACS_INT *next) |
| 712 | { |
| 713 | revalidate_region_cache (buf, c); |
| 714 | |
| 715 | { |
| 716 | EMACS_INT i = find_cache_boundary (c, pos); |
| 717 | int i_value = BOUNDARY_VALUE (c, i); |
| 718 | EMACS_INT j; |
| 719 | |
| 720 | /* Beyond the end of the buffer is unknown, by definition. */ |
| 721 | if (pos >= BUF_Z (buf)) |
| 722 | { |
| 723 | if (next) *next = BUF_Z (buf); |
| 724 | i_value = 0; |
| 725 | } |
| 726 | else if (next) |
| 727 | { |
| 728 | /* Scan forward from i to find the next differing position. */ |
| 729 | for (j = i + 1; j < c->cache_len; j++) |
| 730 | if (BOUNDARY_VALUE (c, j) != i_value) |
| 731 | break; |
| 732 | |
| 733 | if (j < c->cache_len) |
| 734 | *next = BOUNDARY_POS (c, j); |
| 735 | else |
| 736 | *next = BUF_Z (buf); |
| 737 | } |
| 738 | |
| 739 | return i_value; |
| 740 | } |
| 741 | } |
| 742 | |
| 743 | /* Return true if the text immediately before POS in BUF is known, for |
| 744 | the purposes of CACHE. If NEXT is non-zero, set *NEXT to the nearest |
| 745 | position before POS where the knownness changes. */ |
| 746 | int region_cache_backward (struct buffer *buf, struct region_cache *c, |
| 747 | EMACS_INT pos, EMACS_INT *next) |
| 748 | { |
| 749 | revalidate_region_cache (buf, c); |
| 750 | |
| 751 | /* Before the beginning of the buffer is unknown, by |
| 752 | definition. */ |
| 753 | if (pos <= BUF_BEG (buf)) |
| 754 | { |
| 755 | if (next) *next = BUF_BEG (buf); |
| 756 | return 0; |
| 757 | } |
| 758 | |
| 759 | { |
| 760 | EMACS_INT i = find_cache_boundary (c, pos - 1); |
| 761 | int i_value = BOUNDARY_VALUE (c, i); |
| 762 | EMACS_INT j; |
| 763 | |
| 764 | if (next) |
| 765 | { |
| 766 | /* Scan backward from i to find the next differing position. */ |
| 767 | for (j = i - 1; j >= 0; j--) |
| 768 | if (BOUNDARY_VALUE (c, j) != i_value) |
| 769 | break; |
| 770 | |
| 771 | if (j >= 0) |
| 772 | *next = BOUNDARY_POS (c, j + 1); |
| 773 | else |
| 774 | *next = BUF_BEG (buf); |
| 775 | } |
| 776 | |
| 777 | return i_value; |
| 778 | } |
| 779 | } |
| 780 | |
| 781 | \f |
| 782 | /* Debugging: pretty-print a cache to the standard error output. */ |
| 783 | |
| 784 | void pp_cache (struct region_cache *) EXTERNALLY_VISIBLE; |
| 785 | void |
| 786 | pp_cache (struct region_cache *c) |
| 787 | { |
| 788 | int i; |
| 789 | EMACS_INT beg_u = c->buffer_beg + c->beg_unchanged; |
| 790 | EMACS_INT end_u = c->buffer_end - c->end_unchanged; |
| 791 | |
| 792 | fprintf (stderr, |
| 793 | "basis: %ld..%ld modified: %ld..%ld\n", |
| 794 | (long)c->buffer_beg, (long)c->buffer_end, |
| 795 | (long)beg_u, (long)end_u); |
| 796 | |
| 797 | for (i = 0; i < c->cache_len; i++) |
| 798 | { |
| 799 | EMACS_INT pos = BOUNDARY_POS (c, i); |
| 800 | |
| 801 | putc (((pos < beg_u) ? 'v' |
| 802 | : (pos == beg_u) ? '-' |
| 803 | : ' '), |
| 804 | stderr); |
| 805 | putc (((pos > end_u) ? '^' |
| 806 | : (pos == end_u) ? '-' |
| 807 | : ' '), |
| 808 | stderr); |
| 809 | fprintf (stderr, "%ld : %d\n", (long)pos, BOUNDARY_VALUE (c, i)); |
| 810 | } |
| 811 | } |