| 1 | /* Header file: Caching facts about regions of the buffer, for optimization. |
| 2 | Copyright (C) 1985, 1986, 1993, 1995 Free Software Foundation, Inc. |
| 3 | |
| 4 | This file is part of GNU Emacs. |
| 5 | |
| 6 | GNU Emacs is free software; you can redistribute it and/or modify |
| 7 | it under the terms of the GNU General Public License as published by |
| 8 | the Free Software Foundation; either version 2, or (at your option) |
| 9 | any later version. |
| 10 | |
| 11 | GNU Emacs is distributed in the hope that it will be useful, |
| 12 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 14 | GNU General Public License for more details. |
| 15 | |
| 16 | You should have received a copy of the GNU General Public License |
| 17 | along with GNU Emacs; see the file COPYING. If not, write to |
| 18 | the Free Software Foundation, Inc., 59 Temple Place - Suite 330, |
| 19 | Boston, MA 02111-1307, USA. */ |
| 20 | |
| 21 | |
| 22 | /* This code was written by Jim Blandy <jimb@cs.oberlin.edu> to help |
| 23 | GNU Emacs better support the gene editor written for the University |
| 24 | of Illinois at Urbana-Champagne's Ribosome Database Project (RDP). |
| 25 | |
| 26 | Emacs implements line operations (finding the beginning/end of the |
| 27 | line, vertical motion, all the redisplay stuff) by searching for |
| 28 | newlines in the buffer. Usually, this is a good design; it's very |
| 29 | clean to just represent the buffer as an unstructured string of |
| 30 | characters, and the lines in most files are very short (less than |
| 31 | eighty characters), meaning that scanning usually costs about the |
| 32 | same as the overhead of maintaining some more complicated data |
| 33 | structure. |
| 34 | |
| 35 | However, some applications, like gene editing, make use of very |
| 36 | long lines --- on the order of tens of kilobytes. In such cases, |
| 37 | it may well be worthwhile to try to avoid scanning, because the |
| 38 | scans have become two orders of magnitude more expensive. It would |
| 39 | be nice if this speedup could preserve the simplicity of the |
| 40 | existing data structure, and disturb as little of the existing code |
| 41 | as possible. |
| 42 | |
| 43 | So here's the tack. We add some caching to the scan_buffer |
| 44 | function, so that when it searches for a newline, it notes that the |
| 45 | region between the start and end of the search contained no |
| 46 | newlines; then, the next time around, it consults this cache to see |
| 47 | if there are regions of text it can skip over completely. The |
| 48 | buffer modification primitives invalidate this cache. |
| 49 | |
| 50 | (Note: Since the redisplay code needs similar information on |
| 51 | modified regions of the buffer, we can use the code that helps out |
| 52 | redisplay as a guide to where we need to add our own code to |
| 53 | invalidate our cache. prepare_to_modify_buffer seems to be the |
| 54 | central spot.) |
| 55 | |
| 56 | Note that the cache code itself never mentions newlines |
| 57 | specifically, so if you wanted to cache other properties of regions |
| 58 | of the buffer, you could use this code pretty much unchanged. So |
| 59 | this cache really holds "known/unknown" information --- "I know |
| 60 | this region has property P" vs. "I don't know if this region has |
| 61 | property P or not." */ |
| 62 | |
| 63 | |
| 64 | /* Allocate, initialize and return a new, empty region cache. */ |
| 65 | struct region_cache *new_region_cache P_ ((void)); |
| 66 | |
| 67 | /* Free a region cache. */ |
| 68 | void free_region_cache P_ ((struct region_cache *)); |
| 69 | |
| 70 | /* Assert that the region of BUF between START and END (absolute |
| 71 | buffer positions) is "known," for the purposes of CACHE (e.g. "has |
| 72 | no newlines", in the case of the line cache). */ |
| 73 | extern void know_region_cache P_ ((struct buffer *BUF, |
| 74 | struct region_cache *CACHE, |
| 75 | int START, int END)); |
| 76 | |
| 77 | /* Indicate that a section of BUF has changed, to invalidate CACHE. |
| 78 | HEAD is the number of chars unchanged at the beginning of the buffer. |
| 79 | TAIL is the number of chars unchanged at the end of the buffer. |
| 80 | NOTE: this is *not* the same as the ending position of modified |
| 81 | region. |
| 82 | (This way of specifying regions makes more sense than absolute |
| 83 | buffer positions in the presence of insertions and deletions; the |
| 84 | args to pass are the same before and after such an operation.) */ |
| 85 | extern void invalidate_region_cache P_ ((struct buffer *BUF, |
| 86 | struct region_cache *CACHE, |
| 87 | int HEAD, int TAIL)); |
| 88 | |
| 89 | /* The scanning functions. |
| 90 | |
| 91 | Basically, if you're scanning forward/backward from position POS, |
| 92 | and region_cache_forward/backward returns true, you can skip all |
| 93 | the text between POS and *NEXT. And if the function returns false, |
| 94 | you should examine all the text from POS to *NEXT, and call |
| 95 | know_region_cache depending on what you find there; this way, you |
| 96 | might be able to avoid scanning it again. */ |
| 97 | |
| 98 | /* Return true if the text immediately after POS in BUF is known, for |
| 99 | the purposes of CACHE. If NEXT is non-zero, set *NEXT to the nearest |
| 100 | position after POS where the knownness changes. */ |
| 101 | extern int region_cache_forward P_ ((struct buffer *BUF, |
| 102 | struct region_cache *CACHE, |
| 103 | int POS, |
| 104 | int *NEXT)); |
| 105 | |
| 106 | /* Return true if the text immediately before POS in BUF is known, for |
| 107 | the purposes of CACHE. If NEXT is non-zero, set *NEXT to the nearest |
| 108 | position before POS where the knownness changes. */ |
| 109 | extern int region_cache_backward P_ ((struct buffer *BUF, |
| 110 | struct region_cache *CACHE, |
| 111 | int POS, |
| 112 | int *NEXT)); |