Commit | Line | Data |
---|---|---|
0f2d19dd JB |
1 | /* classes: h_files */ |
2 | ||
36284627 DH |
3 | #ifndef SCM_STRINGS_H |
4 | #define SCM_STRINGS_H | |
8c494e99 | 5 | |
08467a7e | 6 | /* Copyright (C) 1995,1996,1997,1998,2000,2001, 2004, 2005, 2006, 2008, 2009, 2010, 2011, 2013 Free Software Foundation, Inc. |
8c494e99 | 7 | * |
73be1d9e | 8 | * This library is free software; you can redistribute it and/or |
53befeb7 NJ |
9 | * modify it under the terms of the GNU Lesser General Public License |
10 | * as published by the Free Software Foundation; either version 3 of | |
11 | * the License, or (at your option) any later version. | |
8c494e99 | 12 | * |
53befeb7 NJ |
13 | * This library is distributed in the hope that it will be useful, but |
14 | * WITHOUT ANY WARRANTY; without even the implied warranty of | |
73be1d9e MV |
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
16 | * Lesser General Public License for more details. | |
8c494e99 | 17 | * |
73be1d9e MV |
18 | * You should have received a copy of the GNU Lesser General Public |
19 | * License along with this library; if not, write to the Free Software | |
53befeb7 NJ |
20 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA |
21 | * 02110-1301 USA | |
73be1d9e | 22 | */ |
d3a6bc94 | 23 | |
0f2d19dd JB |
24 | \f |
25 | ||
b4309c3c | 26 | #include "libguile/__scm.h" |
0f2d19dd JB |
27 | |
28 | \f | |
29 | ||
3ee86942 | 30 | /* String representation. |
c829a427 | 31 | |
3ee86942 MV |
32 | A string is a piece of a stringbuf. A stringbuf can be used by |
33 | more than one string. When a string is written to and the | |
34 | stringbuf of that string is used by more than one string, a new | |
35 | stringbuf is created. That is, strings are copy-on-write. This | |
36 | behavior can be used to make the substring operation quite | |
37 | efficient. | |
c829a427 | 38 | |
3ee86942 MV |
39 | The implementation is tuned so that mutating a string is costly, |
40 | but just reading it is cheap and lock-free. | |
0f2d19dd | 41 | |
3ee86942 MV |
42 | There are also mutation-sharing strings. They refer to a part of |
43 | an ordinary string. Writing to a mutation-sharing string just | |
44 | writes to the ordinary string. | |
45 | ||
46 | ||
47 | Internal, low level interface to the character arrays | |
48 | ||
9c44cd45 MG |
49 | - Use scm_is_narrow_string to determine is the string is narrow or |
50 | wide. | |
51 | ||
52 | - Use scm_i_string_chars or scm_i_string_wide_chars to get a | |
53 | pointer to the byte or scm_t_wchar array of a string for reading. | |
54 | Use scm_i_string_length to get the number of characters in that | |
55 | array. The array is not null-terminated. | |
3ee86942 MV |
56 | |
57 | - The array is valid as long as the corresponding SCM object is | |
58 | protected but only until the next SCM_TICK. During such a 'safe | |
59 | point', strings might change their representation. | |
60 | ||
9c44cd45 MG |
61 | - Use scm_i_string_start_writing to get a version of the string |
62 | ready for reading and writing. This is a potentially costly | |
63 | operation since it implements the copy-on-write behavior. When | |
64 | done with the writing, call scm_i_string_stop_writing. You must | |
65 | do this before the next SCM_TICK. (This means, before calling | |
66 | almost any other scm_ function and you can't allow throws, of | |
67 | course.) | |
68 | ||
69 | - New strings can be created with scm_i_make_string or | |
70 | scm_i_make_wide_string. This gives access to a writable pointer | |
71 | that remains valid as long as nobody else makes a copy-on-write | |
72 | substring of the string. Do not call scm_i_string_stop_writing | |
73 | for this pointer. | |
74 | ||
75 | - Alternately, scm_i_string_ref and scm_i_string_set_x can be used | |
76 | to read and write strings without worrying about whether the | |
77 | string is narrow or wide. scm_i_string_set_x still needs to be | |
78 | bracketed by scm_i_string_start_writing and | |
79 | scm_i_string_stop_writing. | |
3ee86942 MV |
80 | |
81 | Legacy interface | |
82 | ||
274acbda | 83 | - SCM_STRINGP is just scm_is_string. |
3ee86942 MV |
84 | |
85 | - SCM_STRING_CHARS uses scm_i_string_writable_chars and immediately | |
86 | calls scm_i_stop_writing, hoping for the best. SCM_STRING_LENGTH | |
274acbda | 87 | is the same as scm_i_string_length. SCM_STRING_CHARS will throw |
b3da54d1 | 88 | an error for strings that are not null-terminated. There is |
9c44cd45 | 89 | no wide version of this interface. |
3ee86942 | 90 | */ |
0f2d19dd | 91 | |
eca29b02 MG |
92 | /* A type indicating what strategy to take when string locale |
93 | conversion is unsuccessful. */ | |
94 | typedef enum | |
95 | { | |
96 | SCM_FAILED_CONVERSION_ERROR = SCM_ICONVEH_ERROR, | |
97 | SCM_FAILED_CONVERSION_QUESTION_MARK = SCM_ICONVEH_QUESTION_MARK, | |
98 | SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE = SCM_ICONVEH_ESCAPE_SEQUENCE | |
99 | } scm_t_string_failed_conversion_handler; | |
100 | ||
e7efe8e7 AW |
101 | SCM_INTERNAL SCM scm_nullstr; |
102 | ||
33b001fd MV |
103 | SCM_API SCM scm_string_p (SCM x); |
104 | SCM_API SCM scm_string (SCM chrs); | |
c829a427 MV |
105 | SCM_API SCM scm_make_string (SCM k, SCM chr); |
106 | SCM_API SCM scm_string_length (SCM str); | |
f8ba2bb9 | 107 | SCM_API SCM scm_string_bytes_per_char (SCM str); |
c829a427 MV |
108 | SCM_API SCM scm_string_ref (SCM str, SCM k); |
109 | SCM_API SCM scm_string_set_x (SCM str, SCM k, SCM chr); | |
110 | SCM_API SCM scm_substring (SCM str, SCM start, SCM end); | |
ed35de72 | 111 | SCM_API SCM scm_substring_read_only (SCM str, SCM start, SCM end); |
3ee86942 MV |
112 | SCM_API SCM scm_substring_shared (SCM str, SCM start, SCM end); |
113 | SCM_API SCM scm_substring_copy (SCM str, SCM start, SCM end); | |
c829a427 MV |
114 | SCM_API SCM scm_string_append (SCM args); |
115 | ||
cf313a94 MG |
116 | SCM_API SCM scm_from_stringn (const char *str, size_t len, const char *encoding, |
117 | scm_t_string_failed_conversion_handler handler); | |
3ee86942 MV |
118 | SCM_API SCM scm_c_make_string (size_t len, SCM chr); |
119 | SCM_API size_t scm_c_string_length (SCM str); | |
071bb6a8 | 120 | SCM_API size_t scm_c_symbol_length (SCM sym); |
3ee86942 MV |
121 | SCM_API SCM scm_c_string_ref (SCM str, size_t pos); |
122 | SCM_API void scm_c_string_set_x (SCM str, size_t pos, SCM chr); | |
123 | SCM_API SCM scm_c_substring (SCM str, size_t start, size_t end); | |
ed35de72 | 124 | SCM_API SCM scm_c_substring_read_only (SCM str, size_t start, size_t end); |
3ee86942 MV |
125 | SCM_API SCM scm_c_substring_shared (SCM str, size_t start, size_t end); |
126 | SCM_API SCM scm_c_substring_copy (SCM str, size_t start, size_t end); | |
0f2d19dd | 127 | |
d40e1ca8 AW |
128 | /* Use locale encoding for user input, user output, or interacting with |
129 | the C library. Use latin1 for ASCII, and for literals in source | |
130 | code. Use utf8 for interaction with modern libraries which deal in | |
131 | UTF-8. Otherwise use scm_to_stringn or scm_from_stringn with a | |
132 | specific encoding. */ | |
133 | ||
c829a427 MV |
134 | SCM_API SCM scm_from_locale_string (const char *str); |
135 | SCM_API SCM scm_from_locale_stringn (const char *str, size_t len); | |
136 | SCM_API SCM scm_take_locale_string (char *str); | |
137 | SCM_API SCM scm_take_locale_stringn (char *str, size_t len); | |
138 | SCM_API char *scm_to_locale_string (SCM str); | |
139 | SCM_API char *scm_to_locale_stringn (SCM str, size_t *lenp); | |
d40e1ca8 AW |
140 | |
141 | SCM_API SCM scm_from_latin1_string (const char *str); | |
142 | SCM_API SCM scm_from_latin1_stringn (const char *str, size_t len); | |
143 | SCM_API char *scm_to_latin1_string (SCM str); | |
144 | SCM_API char *scm_to_latin1_stringn (SCM str, size_t *lenp); | |
145 | ||
146 | SCM_API char *scm_to_utf8_string (SCM str); | |
147 | SCM_API char *scm_to_utf8_stringn (SCM str, size_t *lenp); | |
148 | SCM_API SCM scm_from_utf8_string (const char *str); | |
149 | SCM_API SCM scm_from_utf8_stringn (const char *str, size_t len); | |
150 | ||
647dc1ac LC |
151 | SCM_API scm_t_wchar *scm_to_utf32_string (SCM str); |
152 | SCM_API scm_t_wchar *scm_to_utf32_stringn (SCM str, size_t *lenp); | |
153 | SCM_API SCM scm_from_utf32_string (const scm_t_wchar *str); | |
154 | SCM_API SCM scm_from_utf32_stringn (const scm_t_wchar *str, size_t len); | |
155 | ||
08467a7e AW |
156 | SCM_API char *scm_to_port_string (SCM str, SCM port); |
157 | SCM_API char *scm_to_port_stringn (SCM str, size_t *lenp, SCM port); | |
158 | SCM_API SCM scm_from_port_string (const char *str, SCM port); | |
159 | SCM_API SCM scm_from_port_stringn (const char *str, size_t len, SCM port); | |
160 | ||
cf313a94 MG |
161 | SCM_API char *scm_to_stringn (SCM str, size_t *lenp, const char *encoding, |
162 | scm_t_string_failed_conversion_handler handler); | |
c829a427 | 163 | SCM_API size_t scm_to_locale_stringbuf (SCM str, char *buf, size_t max_len); |
6ba93e5e | 164 | |
edb7bb47 JG |
165 | SCM_API SCM scm_string_normalize_nfd (SCM str); |
166 | SCM_API SCM scm_string_normalize_nfkd (SCM str); | |
167 | SCM_API SCM scm_string_normalize_nfc (SCM str); | |
168 | SCM_API SCM scm_string_normalize_nfkc (SCM str); | |
169 | ||
3ee86942 MV |
170 | SCM_API SCM scm_makfromstrs (int argc, char **argv); |
171 | ||
35920c00 LC |
172 | \f |
173 | /* internal constants */ | |
174 | ||
175 | /* Type tag for read-only strings. */ | |
176 | #define scm_tc7_ro_string (scm_tc7_string + 0x200) | |
177 | ||
5f236208 | 178 | /* Flags for shared and wide strings. */ |
35920c00 | 179 | #define SCM_I_STRINGBUF_F_SHARED 0x100 |
5f236208 | 180 | #define SCM_I_STRINGBUF_F_WIDE 0x400 |
35920c00 | 181 | |
db071766 AW |
182 | SCM_INTERNAL void scm_i_print_stringbuf (SCM exp, SCM port, |
183 | scm_print_state *pstate); | |
35920c00 | 184 | |
3ee86942 MV |
185 | /* internal accessor functions. Arguments must be valid. */ |
186 | ||
190d4b0d LC |
187 | SCM_INTERNAL SCM scm_i_make_string (size_t len, char **datap, |
188 | int read_only_p); | |
189 | SCM_INTERNAL SCM scm_i_make_wide_string (size_t len, scm_t_wchar **datap, | |
190 | int read_only_p); | |
191 | SCM_INTERNAL SCM scm_i_set_string_read_only_x (SCM str); | |
102dbb6f LC |
192 | SCM_INTERNAL SCM scm_i_substring (SCM str, size_t start, size_t end); |
193 | SCM_INTERNAL SCM scm_i_substring_read_only (SCM str, size_t start, size_t end); | |
194 | SCM_INTERNAL SCM scm_i_substring_shared (SCM str, size_t start, size_t end); | |
195 | SCM_INTERNAL SCM scm_i_substring_copy (SCM str, size_t start, size_t end); | |
196 | SCM_INTERNAL size_t scm_i_string_length (SCM str); | |
197 | SCM_API /* FIXME: not internal */ const char *scm_i_string_chars (SCM str); | |
198 | SCM_API /* FIXME: not internal */ char *scm_i_string_writable_chars (SCM str); | |
32be5735 | 199 | SCM_INTERNAL const scm_t_wchar *scm_i_string_wide_chars (SCM str); |
100e20c7 LC |
200 | SCM_INTERNAL const void *scm_i_string_data (SCM str); |
201 | ||
9c44cd45 | 202 | SCM_INTERNAL SCM scm_i_string_start_writing (SCM str); |
102dbb6f | 203 | SCM_INTERNAL void scm_i_string_stop_writing (void); |
9c44cd45 MG |
204 | SCM_INTERNAL int scm_i_is_narrow_string (SCM str); |
205 | SCM_INTERNAL scm_t_wchar scm_i_string_ref (SCM str, size_t x); | |
889975e5 | 206 | SCM_INTERNAL int scm_i_string_contains_char (SCM str, char c); |
3f47e526 | 207 | SCM_INTERNAL int scm_i_string_strcmp (SCM sstr, size_t start_x, const char *cstr); |
9c44cd45 | 208 | SCM_INTERNAL void scm_i_string_set_x (SCM str, size_t p, scm_t_wchar chr); |
3ee86942 MV |
209 | /* internal functions related to symbols. */ |
210 | ||
102dbb6f LC |
211 | SCM_INTERNAL SCM scm_i_make_symbol (SCM name, scm_t_bits flags, |
212 | unsigned long hash, SCM props); | |
213 | SCM_INTERNAL SCM | |
fd0a5bbc HWN |
214 | scm_i_c_make_symbol (const char *name, size_t len, |
215 | scm_t_bits flags, unsigned long hash, SCM props); | |
102dbb6f | 216 | SCM_INTERNAL const char *scm_i_symbol_chars (SCM sym); |
9c44cd45 | 217 | SCM_INTERNAL const scm_t_wchar *scm_i_symbol_wide_chars (SCM sym); |
102dbb6f | 218 | SCM_INTERNAL size_t scm_i_symbol_length (SCM sym); |
9c44cd45 | 219 | SCM_INTERNAL int scm_i_is_narrow_symbol (SCM str); |
587a3355 | 220 | SCM_INTERNAL int scm_i_try_narrow_string (SCM str); |
102dbb6f | 221 | SCM_INTERNAL SCM scm_i_symbol_substring (SCM sym, size_t start, size_t end); |
9c44cd45 | 222 | SCM_INTERNAL scm_t_wchar scm_i_symbol_ref (SCM sym, size_t x); |
d14418a5 | 223 | SCM_INTERNAL void scm_encoding_error (const char *subr, int err, |
6851d3be | 224 | const char *message, SCM port, SCM chr); |
c62da8f8 LC |
225 | SCM_INTERNAL void scm_decoding_error (const char *subr, int err, |
226 | const char *message, SCM port); | |
3ee86942 | 227 | |
c829a427 | 228 | /* internal utility functions. */ |
6ba93e5e | 229 | |
102dbb6f | 230 | SCM_INTERNAL char **scm_i_allocate_string_pointers (SCM list); |
102dbb6f LC |
231 | SCM_INTERNAL void scm_i_get_substring_spec (size_t len, |
232 | SCM start, size_t *cstart, | |
233 | SCM end, size_t *cend); | |
6ba93e5e | 234 | |
6ce6923b MG |
235 | /* Debugging functions */ |
236 | ||
237 | SCM_API SCM scm_sys_string_dump (SCM); | |
238 | SCM_API SCM scm_sys_symbol_dump (SCM); | |
56a3dcd4 | 239 | #ifdef SCM_STRING_LENGTH_HISTOGRAM |
6ce6923b MG |
240 | SCM_API SCM scm_sys_stringbuf_hist (void); |
241 | #endif | |
242 | ||
cf313a94 MG |
243 | |
244 | ||
102dbb6f | 245 | SCM_INTERNAL void scm_init_strings (void); |
6ba93e5e | 246 | |
36284627 | 247 | #endif /* SCM_STRINGS_H */ |
89e00824 ML |
248 | |
249 | /* | |
250 | Local Variables: | |
251 | c-file-style: "gnu" | |
252 | End: | |
253 | */ |