| 1 | /* Header file: Caching facts about regions of the buffer, for optimization. |
| 2 | Copyright (C) 1985, 1986, 1993, 1995, 2002, 2003, 2004, |
| 3 | 2005 Free Software Foundation, Inc. |
| 4 | |
| 5 | This file is part of GNU Emacs. |
| 6 | |
| 7 | GNU Emacs is free software; you can redistribute it and/or modify |
| 8 | it under the terms of the GNU General Public License as published by |
| 9 | the Free Software Foundation; either version 2, or (at your option) |
| 10 | any later version. |
| 11 | |
| 12 | GNU Emacs is distributed in the hope that it will be useful, |
| 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 15 | GNU General Public License for more details. |
| 16 | |
| 17 | You should have received a copy of the GNU General Public License |
| 18 | along with GNU Emacs; see the file COPYING. If not, write to |
| 19 | the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, |
| 20 | Boston, MA 02110-1301, USA. */ |
| 21 | |
| 22 | |
| 23 | /* This code was written by Jim Blandy <jimb@cs.oberlin.edu> to help |
| 24 | GNU Emacs better support the gene editor written for the University |
| 25 | of Illinois at Urbana-Champagne's Ribosome Database Project (RDP). |
| 26 | |
| 27 | Emacs implements line operations (finding the beginning/end of the |
| 28 | line, vertical motion, all the redisplay stuff) by searching for |
| 29 | newlines in the buffer. Usually, this is a good design; it's very |
| 30 | clean to just represent the buffer as an unstructured string of |
| 31 | characters, and the lines in most files are very short (less than |
| 32 | eighty characters), meaning that scanning usually costs about the |
| 33 | same as the overhead of maintaining some more complicated data |
| 34 | structure. |
| 35 | |
| 36 | However, some applications, like gene editing, make use of very |
| 37 | long lines --- on the order of tens of kilobytes. In such cases, |
| 38 | it may well be worthwhile to try to avoid scanning, because the |
| 39 | scans have become two orders of magnitude more expensive. It would |
| 40 | be nice if this speedup could preserve the simplicity of the |
| 41 | existing data structure, and disturb as little of the existing code |
| 42 | as possible. |
| 43 | |
| 44 | So here's the tack. We add some caching to the scan_buffer |
| 45 | function, so that when it searches for a newline, it notes that the |
| 46 | region between the start and end of the search contained no |
| 47 | newlines; then, the next time around, it consults this cache to see |
| 48 | if there are regions of text it can skip over completely. The |
| 49 | buffer modification primitives invalidate this cache. |
| 50 | |
| 51 | (Note: Since the redisplay code needs similar information on |
| 52 | modified regions of the buffer, we can use the code that helps out |
| 53 | redisplay as a guide to where we need to add our own code to |
| 54 | invalidate our cache. prepare_to_modify_buffer seems to be the |
| 55 | central spot.) |
| 56 | |
| 57 | Note that the cache code itself never mentions newlines |
| 58 | specifically, so if you wanted to cache other properties of regions |
| 59 | of the buffer, you could use this code pretty much unchanged. So |
| 60 | this cache really holds "known/unknown" information --- "I know |
| 61 | this region has property P" vs. "I don't know if this region has |
| 62 | property P or not." */ |
| 63 | |
| 64 | |
| 65 | /* Allocate, initialize and return a new, empty region cache. */ |
| 66 | struct region_cache *new_region_cache P_ ((void)); |
| 67 | |
| 68 | /* Free a region cache. */ |
| 69 | void free_region_cache P_ ((struct region_cache *)); |
| 70 | |
| 71 | /* Assert that the region of BUF between START and END (absolute |
| 72 | buffer positions) is "known," for the purposes of CACHE (e.g. "has |
| 73 | no newlines", in the case of the line cache). */ |
| 74 | extern void know_region_cache P_ ((struct buffer *BUF, |
| 75 | struct region_cache *CACHE, |
| 76 | int START, int END)); |
| 77 | |
| 78 | /* Indicate that a section of BUF has changed, to invalidate CACHE. |
| 79 | HEAD is the number of chars unchanged at the beginning of the buffer. |
| 80 | TAIL is the number of chars unchanged at the end of the buffer. |
| 81 | NOTE: this is *not* the same as the ending position of modified |
| 82 | region. |
| 83 | (This way of specifying regions makes more sense than absolute |
| 84 | buffer positions in the presence of insertions and deletions; the |
| 85 | args to pass are the same before and after such an operation.) */ |
| 86 | extern void invalidate_region_cache P_ ((struct buffer *BUF, |
| 87 | struct region_cache *CACHE, |
| 88 | int HEAD, int TAIL)); |
| 89 | |
| 90 | /* The scanning functions. |
| 91 | |
| 92 | Basically, if you're scanning forward/backward from position POS, |
| 93 | and region_cache_forward/backward returns true, you can skip all |
| 94 | the text between POS and *NEXT. And if the function returns false, |
| 95 | you should examine all the text from POS to *NEXT, and call |
| 96 | know_region_cache depending on what you find there; this way, you |
| 97 | might be able to avoid scanning it again. */ |
| 98 | |
| 99 | /* Return true if the text immediately after POS in BUF is known, for |
| 100 | the purposes of CACHE. If NEXT is non-zero, set *NEXT to the nearest |
| 101 | position after POS where the knownness changes. */ |
| 102 | extern int region_cache_forward P_ ((struct buffer *BUF, |
| 103 | struct region_cache *CACHE, |
| 104 | int POS, |
| 105 | int *NEXT)); |
| 106 | |
| 107 | /* Return true if the text immediately before POS in BUF is known, for |
| 108 | the purposes of CACHE. If NEXT is non-zero, set *NEXT to the nearest |
| 109 | position before POS where the knownness changes. */ |
| 110 | extern int region_cache_backward P_ ((struct buffer *BUF, |
| 111 | struct region_cache *CACHE, |
| 112 | int POS, |
| 113 | int *NEXT)); |
| 114 | |
| 115 | /* arch-tag: 70f79125-ef22-4f58-9aec-a48ca2791435 |
| 116 | (do not change this comment) */ |