| 1 | /* classes: h_files */ |
| 2 | |
| 3 | #ifndef SCM_STRINGS_H |
| 4 | #define SCM_STRINGS_H |
| 5 | |
| 6 | /* Copyright (C) 1995,1996,1997,1998,2000,2001, 2004, 2005, 2006, 2008, 2009, 2010, 2011, 2013 Free Software Foundation, Inc. |
| 7 | * |
| 8 | * This library is free software; you can redistribute it and/or |
| 9 | * modify it under the terms of the GNU Lesser General Public License |
| 10 | * as published by the Free Software Foundation; either version 3 of |
| 11 | * the License, or (at your option) any later version. |
| 12 | * |
| 13 | * This library is distributed in the hope that it will be useful, but |
| 14 | * WITHOUT ANY WARRANTY; without even the implied warranty of |
| 15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 16 | * Lesser General Public License for more details. |
| 17 | * |
| 18 | * You should have received a copy of the GNU Lesser General Public |
| 19 | * License along with this library; if not, write to the Free Software |
| 20 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA |
| 21 | * 02110-1301 USA |
| 22 | */ |
| 23 | |
| 24 | \f |
| 25 | |
| 26 | #include "libguile/__scm.h" |
| 27 | |
| 28 | \f |
| 29 | |
| 30 | /* String representation. |
| 31 | |
| 32 | A string is a piece of a stringbuf. A stringbuf can be used by |
| 33 | more than one string. When a string is written to and the |
| 34 | stringbuf of that string is used by more than one string, a new |
| 35 | stringbuf is created. That is, strings are copy-on-write. This |
| 36 | behavior can be used to make the substring operation quite |
| 37 | efficient. |
| 38 | |
| 39 | The implementation is tuned so that mutating a string is costly, |
| 40 | but just reading it is cheap and lock-free. |
| 41 | |
| 42 | There are also mutation-sharing strings. They refer to a part of |
| 43 | an ordinary string. Writing to a mutation-sharing string just |
| 44 | writes to the ordinary string. |
| 45 | |
| 46 | |
| 47 | Internal, low level interface to the character arrays |
| 48 | |
| 49 | - Use scm_is_narrow_string to determine is the string is narrow or |
| 50 | wide. |
| 51 | |
| 52 | - Use scm_i_string_chars or scm_i_string_wide_chars to get a |
| 53 | pointer to the byte or scm_t_wchar array of a string for reading. |
| 54 | Use scm_i_string_length to get the number of characters in that |
| 55 | array. The array is not null-terminated. |
| 56 | |
| 57 | - The array is valid as long as the corresponding SCM object is |
| 58 | protected but only until the next SCM_TICK. During such a 'safe |
| 59 | point', strings might change their representation. |
| 60 | |
| 61 | - Use scm_i_string_start_writing to get a version of the string |
| 62 | ready for reading and writing. This is a potentially costly |
| 63 | operation since it implements the copy-on-write behavior. When |
| 64 | done with the writing, call scm_i_string_stop_writing. You must |
| 65 | do this before the next SCM_TICK. (This means, before calling |
| 66 | almost any other scm_ function and you can't allow throws, of |
| 67 | course.) |
| 68 | |
| 69 | - New strings can be created with scm_i_make_string or |
| 70 | scm_i_make_wide_string. This gives access to a writable pointer |
| 71 | that remains valid as long as nobody else makes a copy-on-write |
| 72 | substring of the string. Do not call scm_i_string_stop_writing |
| 73 | for this pointer. |
| 74 | |
| 75 | - Alternately, scm_i_string_ref and scm_i_string_set_x can be used |
| 76 | to read and write strings without worrying about whether the |
| 77 | string is narrow or wide. scm_i_string_set_x still needs to be |
| 78 | bracketed by scm_i_string_start_writing and |
| 79 | scm_i_string_stop_writing. |
| 80 | |
| 81 | Legacy interface |
| 82 | |
| 83 | - SCM_STRINGP is just scm_is_string. |
| 84 | |
| 85 | - SCM_STRING_CHARS uses scm_i_string_writable_chars and immediately |
| 86 | calls scm_i_stop_writing, hoping for the best. SCM_STRING_LENGTH |
| 87 | is the same as scm_i_string_length. SCM_STRING_CHARS will throw |
| 88 | an error for strings that are not null-terminated. There is |
| 89 | no wide version of this interface. |
| 90 | */ |
| 91 | |
| 92 | /* A type indicating what strategy to take when string locale |
| 93 | conversion is unsuccessful. */ |
| 94 | typedef enum |
| 95 | { |
| 96 | SCM_FAILED_CONVERSION_ERROR = SCM_ICONVEH_ERROR, |
| 97 | SCM_FAILED_CONVERSION_QUESTION_MARK = SCM_ICONVEH_QUESTION_MARK, |
| 98 | SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE = SCM_ICONVEH_ESCAPE_SEQUENCE |
| 99 | } scm_t_string_failed_conversion_handler; |
| 100 | |
| 101 | SCM_INTERNAL SCM scm_nullstr; |
| 102 | |
| 103 | SCM_API SCM scm_string_p (SCM x); |
| 104 | SCM_API SCM scm_string (SCM chrs); |
| 105 | SCM_API SCM scm_make_string (SCM k, SCM chr); |
| 106 | SCM_API SCM scm_string_length (SCM str); |
| 107 | SCM_API SCM scm_string_bytes_per_char (SCM str); |
| 108 | SCM_API SCM scm_string_ref (SCM str, SCM k); |
| 109 | SCM_API SCM scm_string_set_x (SCM str, SCM k, SCM chr); |
| 110 | SCM_API SCM scm_substring (SCM str, SCM start, SCM end); |
| 111 | SCM_API SCM scm_substring_read_only (SCM str, SCM start, SCM end); |
| 112 | SCM_API SCM scm_substring_shared (SCM str, SCM start, SCM end); |
| 113 | SCM_API SCM scm_substring_copy (SCM str, SCM start, SCM end); |
| 114 | SCM_API SCM scm_string_append (SCM args); |
| 115 | |
| 116 | SCM_API SCM scm_from_stringn (const char *str, size_t len, const char *encoding, |
| 117 | scm_t_string_failed_conversion_handler handler); |
| 118 | SCM_API SCM scm_c_make_string (size_t len, SCM chr); |
| 119 | SCM_API size_t scm_c_string_length (SCM str); |
| 120 | SCM_API size_t scm_c_symbol_length (SCM sym); |
| 121 | SCM_API SCM scm_c_string_ref (SCM str, size_t pos); |
| 122 | SCM_API void scm_c_string_set_x (SCM str, size_t pos, SCM chr); |
| 123 | SCM_API SCM scm_c_substring (SCM str, size_t start, size_t end); |
| 124 | SCM_API SCM scm_c_substring_read_only (SCM str, size_t start, size_t end); |
| 125 | SCM_API SCM scm_c_substring_shared (SCM str, size_t start, size_t end); |
| 126 | SCM_API SCM scm_c_substring_copy (SCM str, size_t start, size_t end); |
| 127 | |
| 128 | /* Use locale encoding for user input, user output, or interacting with |
| 129 | the C library. Use latin1 for ASCII, and for literals in source |
| 130 | code. Use utf8 for interaction with modern libraries which deal in |
| 131 | UTF-8. Otherwise use scm_to_stringn or scm_from_stringn with a |
| 132 | specific encoding. */ |
| 133 | |
| 134 | SCM_API SCM scm_from_locale_string (const char *str); |
| 135 | SCM_API SCM scm_from_locale_stringn (const char *str, size_t len); |
| 136 | SCM_API SCM scm_take_locale_string (char *str); |
| 137 | SCM_API SCM scm_take_locale_stringn (char *str, size_t len); |
| 138 | SCM_API char *scm_to_locale_string (SCM str); |
| 139 | SCM_API char *scm_to_locale_stringn (SCM str, size_t *lenp); |
| 140 | |
| 141 | SCM_API SCM scm_from_latin1_string (const char *str); |
| 142 | SCM_API SCM scm_from_latin1_stringn (const char *str, size_t len); |
| 143 | SCM_API char *scm_to_latin1_string (SCM str); |
| 144 | SCM_API char *scm_to_latin1_stringn (SCM str, size_t *lenp); |
| 145 | |
| 146 | SCM_API char *scm_to_utf8_string (SCM str); |
| 147 | SCM_API char *scm_to_utf8_stringn (SCM str, size_t *lenp); |
| 148 | SCM_API SCM scm_from_utf8_string (const char *str); |
| 149 | SCM_API SCM scm_from_utf8_stringn (const char *str, size_t len); |
| 150 | |
| 151 | SCM_API scm_t_wchar *scm_to_utf32_string (SCM str); |
| 152 | SCM_API scm_t_wchar *scm_to_utf32_stringn (SCM str, size_t *lenp); |
| 153 | SCM_API SCM scm_from_utf32_string (const scm_t_wchar *str); |
| 154 | SCM_API SCM scm_from_utf32_stringn (const scm_t_wchar *str, size_t len); |
| 155 | |
| 156 | SCM_API char *scm_to_port_string (SCM str, SCM port); |
| 157 | SCM_API char *scm_to_port_stringn (SCM str, size_t *lenp, SCM port); |
| 158 | SCM_API SCM scm_from_port_string (const char *str, SCM port); |
| 159 | SCM_API SCM scm_from_port_stringn (const char *str, size_t len, SCM port); |
| 160 | |
| 161 | SCM_API char *scm_to_stringn (SCM str, size_t *lenp, const char *encoding, |
| 162 | scm_t_string_failed_conversion_handler handler); |
| 163 | SCM_API size_t scm_to_locale_stringbuf (SCM str, char *buf, size_t max_len); |
| 164 | |
| 165 | SCM_API SCM scm_string_normalize_nfd (SCM str); |
| 166 | SCM_API SCM scm_string_normalize_nfkd (SCM str); |
| 167 | SCM_API SCM scm_string_normalize_nfc (SCM str); |
| 168 | SCM_API SCM scm_string_normalize_nfkc (SCM str); |
| 169 | |
| 170 | SCM_API SCM scm_makfromstrs (int argc, char **argv); |
| 171 | |
| 172 | \f |
| 173 | /* internal constants */ |
| 174 | |
| 175 | /* Type tag for read-only strings. */ |
| 176 | #define scm_tc7_ro_string (scm_tc7_string + 0x200) |
| 177 | |
| 178 | /* Flags for shared and wide strings. */ |
| 179 | #define SCM_I_STRINGBUF_F_SHARED 0x100 |
| 180 | #define SCM_I_STRINGBUF_F_WIDE 0x400 |
| 181 | |
| 182 | SCM_INTERNAL void scm_i_print_stringbuf (SCM exp, SCM port, |
| 183 | scm_print_state *pstate); |
| 184 | |
| 185 | /* internal accessor functions. Arguments must be valid. */ |
| 186 | |
| 187 | SCM_INTERNAL SCM scm_i_make_string (size_t len, char **datap, |
| 188 | int read_only_p); |
| 189 | SCM_INTERNAL SCM scm_i_make_wide_string (size_t len, scm_t_wchar **datap, |
| 190 | int read_only_p); |
| 191 | SCM_INTERNAL SCM scm_i_set_string_read_only_x (SCM str); |
| 192 | SCM_INTERNAL SCM scm_i_substring (SCM str, size_t start, size_t end); |
| 193 | SCM_INTERNAL SCM scm_i_substring_read_only (SCM str, size_t start, size_t end); |
| 194 | SCM_INTERNAL SCM scm_i_substring_shared (SCM str, size_t start, size_t end); |
| 195 | SCM_INTERNAL SCM scm_i_substring_copy (SCM str, size_t start, size_t end); |
| 196 | SCM_INTERNAL size_t scm_i_string_length (SCM str); |
| 197 | SCM_API /* FIXME: not internal */ const char *scm_i_string_chars (SCM str); |
| 198 | SCM_API /* FIXME: not internal */ char *scm_i_string_writable_chars (SCM str); |
| 199 | SCM_INTERNAL const scm_t_wchar *scm_i_string_wide_chars (SCM str); |
| 200 | SCM_INTERNAL const void *scm_i_string_data (SCM str); |
| 201 | |
| 202 | SCM_INTERNAL SCM scm_i_string_start_writing (SCM str); |
| 203 | SCM_INTERNAL void scm_i_string_stop_writing (void); |
| 204 | SCM_INTERNAL int scm_i_is_narrow_string (SCM str); |
| 205 | SCM_INTERNAL scm_t_wchar scm_i_string_ref (SCM str, size_t x); |
| 206 | SCM_INTERNAL int scm_i_string_contains_char (SCM str, char c); |
| 207 | SCM_INTERNAL int scm_i_string_strcmp (SCM sstr, size_t start_x, const char *cstr); |
| 208 | SCM_INTERNAL void scm_i_string_set_x (SCM str, size_t p, scm_t_wchar chr); |
| 209 | /* internal functions related to symbols. */ |
| 210 | |
| 211 | SCM_INTERNAL SCM scm_i_make_symbol (SCM name, scm_t_bits flags, |
| 212 | unsigned long hash, SCM props); |
| 213 | SCM_INTERNAL SCM |
| 214 | scm_i_c_make_symbol (const char *name, size_t len, |
| 215 | scm_t_bits flags, unsigned long hash, SCM props); |
| 216 | SCM_INTERNAL const char *scm_i_symbol_chars (SCM sym); |
| 217 | SCM_INTERNAL const scm_t_wchar *scm_i_symbol_wide_chars (SCM sym); |
| 218 | SCM_INTERNAL size_t scm_i_symbol_length (SCM sym); |
| 219 | SCM_INTERNAL int scm_i_is_narrow_symbol (SCM str); |
| 220 | SCM_INTERNAL int scm_i_try_narrow_string (SCM str); |
| 221 | SCM_INTERNAL SCM scm_i_symbol_substring (SCM sym, size_t start, size_t end); |
| 222 | SCM_INTERNAL scm_t_wchar scm_i_symbol_ref (SCM sym, size_t x); |
| 223 | SCM_INTERNAL void scm_encoding_error (const char *subr, int err, |
| 224 | const char *message, SCM port, SCM chr); |
| 225 | SCM_INTERNAL void scm_decoding_error (const char *subr, int err, |
| 226 | const char *message, SCM port); |
| 227 | |
| 228 | /* internal utility functions. */ |
| 229 | |
| 230 | SCM_INTERNAL char **scm_i_allocate_string_pointers (SCM list); |
| 231 | SCM_INTERNAL void scm_i_get_substring_spec (size_t len, |
| 232 | SCM start, size_t *cstart, |
| 233 | SCM end, size_t *cend); |
| 234 | |
| 235 | /* Debugging functions */ |
| 236 | |
| 237 | SCM_API SCM scm_sys_string_dump (SCM); |
| 238 | SCM_API SCM scm_sys_symbol_dump (SCM); |
| 239 | #ifdef SCM_STRING_LENGTH_HISTOGRAM |
| 240 | SCM_API SCM scm_sys_stringbuf_hist (void); |
| 241 | #endif |
| 242 | |
| 243 | |
| 244 | |
| 245 | SCM_INTERNAL void scm_init_strings (void); |
| 246 | |
| 247 | #endif /* SCM_STRINGS_H */ |
| 248 | |
| 249 | /* |
| 250 | Local Variables: |
| 251 | c-file-style: "gnu" |
| 252 | End: |
| 253 | */ |