Commit | Line | Data |
---|---|---|
b0322a85 CE |
1 | /* |
2 | ** Copyright 2011 Double Precision, Inc. | |
3 | ** See COPYING for distribution information. | |
4 | ** | |
5 | */ | |
6 | ||
7 | #include "unicode_config.h" | |
8 | #include "unicode.h" | |
9 | ||
10 | #include <unistd.h> | |
11 | #include <stdint.h> | |
12 | #include <stdlib.h> | |
13 | #include <string.h> | |
14 | #include <errno.h> | |
15 | ||
16 | #include "linebreaktab_internal.h" | |
17 | ||
18 | #include "linebreaktab.h" | |
19 | ||
20 | #define UNICODE_LB_SOT 0xFF | |
21 | ||
22 | struct unicode_lb_info { | |
23 | int (*cb_func)(int, void *); | |
24 | void *cb_arg; | |
25 | ||
26 | int opts; | |
27 | ||
28 | uint8_t savedclass; | |
29 | size_t savedcmcnt; | |
30 | ||
31 | uint8_t prevclass; | |
32 | uint8_t prevclass_nsp; | |
33 | ||
34 | int (*next_handler)(struct unicode_lb_info *, uint8_t); | |
35 | int (*end_handler)(struct unicode_lb_info *); | |
36 | }; | |
37 | ||
38 | ||
39 | /* http://www.unicode.org/reports/tr14/#Algorithm */ | |
40 | ||
41 | static int next_def(unicode_lb_info_t, uint8_t); | |
42 | static int end_def(unicode_lb_info_t); | |
43 | ||
44 | static int next_lb25_seenophy(unicode_lb_info_t, uint8_t); | |
45 | static int end_lb25_seenophy(unicode_lb_info_t); | |
46 | ||
47 | static int next_lb25_seennu(unicode_lb_info_t, uint8_t); | |
48 | ||
49 | static int next_lb25_seennuclcp(unicode_lb_info_t, uint8_t); | |
50 | ||
51 | static void unicode_lb_reset(unicode_lb_info_t i) | |
52 | { | |
53 | i->prevclass=i->prevclass_nsp=UNICODE_LB_SOT; | |
54 | i->next_handler=next_def; | |
55 | i->end_handler=end_def; | |
56 | } | |
57 | ||
58 | unicode_lb_info_t unicode_lb_init(int (*cb_func)(int, void *), | |
59 | void *cb_arg) | |
60 | { | |
61 | unicode_lb_info_t i=calloc(1, sizeof(struct unicode_lb_info)); | |
62 | ||
63 | i->cb_func=cb_func; | |
64 | i->cb_arg=cb_arg; | |
65 | ||
66 | unicode_lb_reset(i); | |
67 | return i; | |
68 | } | |
69 | ||
70 | int unicode_lb_end(unicode_lb_info_t i) | |
71 | { | |
72 | int rc=(*i->end_handler)(i); | |
73 | ||
74 | free(i); | |
75 | return rc; | |
76 | } | |
77 | ||
78 | void unicode_lb_set_opts(unicode_lb_info_t i, int opts) | |
79 | { | |
80 | i->opts=opts; | |
81 | } | |
82 | ||
83 | /* Default end handler has nothing to do */ | |
84 | ||
85 | static int end_def(unicode_lb_info_t i) | |
86 | { | |
87 | /* LB3 N/A */ | |
88 | return 0; | |
89 | } | |
90 | #define RESULT(x) (*i->cb_func)((x), i->cb_arg) | |
91 | ||
92 | int unicode_lb_next_cnt(unicode_lb_info_t i, | |
93 | const unicode_char *chars, | |
94 | size_t cnt) | |
95 | { | |
96 | while (cnt) | |
97 | { | |
98 | int rc=unicode_lb_next(i, *chars); | |
99 | ||
100 | if (rc) | |
101 | return rc; | |
102 | ||
103 | ++chars; | |
104 | --cnt; | |
105 | } | |
106 | return 0; | |
107 | } | |
108 | ||
109 | int unicode_lb_lookup(unicode_char ch) | |
110 | { | |
111 | return unicode_tab_lookup(ch, | |
112 | unicode_indextab, | |
113 | sizeof(unicode_indextab) | |
114 | / sizeof(unicode_indextab[0]), | |
115 | unicode_rangetab, | |
116 | unicode_classtab, | |
117 | UNICODE_LB_AL /* XX, LB1 */); | |
118 | } | |
119 | ||
120 | int unicode_lb_next(unicode_lb_info_t i, | |
121 | unicode_char ch) | |
122 | { | |
123 | return (*i->next_handler)(i, (i->opts & UNICODE_LB_OPT_DASHWJ) && | |
124 | (ch == 0x2012 || ch == 0x2013) | |
125 | ? UNICODE_LB_WJ:unicode_lb_lookup(ch)); | |
126 | } | |
127 | ||
128 | static int next_def_nolb25(unicode_lb_info_t i, | |
129 | uint8_t uclass, | |
130 | int nolb25); | |
131 | ||
132 | /* | |
133 | ** Default logic for next unicode char. | |
134 | */ | |
135 | static int next_def(unicode_lb_info_t i, | |
136 | uint8_t uclass) | |
137 | { | |
138 | return next_def_nolb25(i, uclass, 0); | |
139 | } | |
140 | ||
141 | static int next_def_nolb25(unicode_lb_info_t i, | |
142 | uint8_t uclass, | |
143 | ||
144 | /* Flag -- recursively invoked after discarding LB25 */ | |
145 | int nolb25) | |
146 | { | |
147 | ||
148 | /* Retrieve the previous unicode character's linebreak class. */ | |
149 | ||
150 | uint8_t prevclass=i->prevclass; | |
151 | uint8_t prevclass_nsp=i->prevclass_nsp; | |
152 | ||
153 | /* Save this unicode char's linebreak class, for the next goaround */ | |
154 | i->prevclass=uclass; | |
155 | ||
156 | if (uclass != UNICODE_LB_SP) | |
157 | i->prevclass_nsp=uclass; | |
158 | ||
159 | if (uclass == UNICODE_LB_NU) | |
160 | i->next_handler=next_lb25_seennu; /* LB25 */ | |
161 | ||
162 | if (prevclass == UNICODE_LB_SOT) | |
163 | { | |
164 | if (uclass == UNICODE_LB_CM) /* LB9 */ | |
165 | i->prevclass=i->prevclass_nsp=uclass=UNICODE_LB_AL; | |
166 | ||
167 | return RESULT(UNICODE_LB_NONE); /* LB2 */ | |
168 | } | |
169 | ||
170 | if (prevclass == UNICODE_LB_CR && uclass == UNICODE_LB_LF) | |
171 | return RESULT(UNICODE_LB_NONE); /* LB5 */ | |
172 | ||
173 | switch (prevclass) { | |
174 | case UNICODE_LB_BK: | |
175 | case UNICODE_LB_CR: | |
176 | case UNICODE_LB_LF: | |
177 | case UNICODE_LB_NL: | |
178 | ||
179 | if (uclass == UNICODE_LB_CM) | |
180 | { | |
181 | i->prevclass=i->prevclass_nsp=uclass=UNICODE_LB_AL; | |
182 | /* LB9 */ | |
183 | } | |
184 | ||
185 | return RESULT(UNICODE_LB_MANDATORY); /* LB4, LB5 */ | |
186 | ||
187 | case UNICODE_LB_SP: | |
188 | case UNICODE_LB_ZW: | |
189 | if (uclass == UNICODE_LB_CM) | |
190 | i->prevclass=i->prevclass_nsp=uclass=UNICODE_LB_AL; | |
191 | /* LB10 */ | |
192 | break; | |
193 | default: | |
194 | break; | |
195 | } | |
196 | ||
197 | switch (uclass) { | |
198 | ||
199 | /* LB6: */ | |
200 | case UNICODE_LB_BK: | |
201 | case UNICODE_LB_CR: | |
202 | case UNICODE_LB_LF: | |
203 | case UNICODE_LB_NL: | |
204 | ||
205 | /* LB7: */ | |
206 | case UNICODE_LB_SP: | |
207 | case UNICODE_LB_ZW: | |
208 | ||
209 | return RESULT(UNICODE_LB_NONE); | |
210 | default: | |
211 | break; | |
212 | } | |
213 | ||
214 | if (prevclass_nsp == UNICODE_LB_ZW) | |
215 | return RESULT(UNICODE_LB_ALLOWED); /* LB8 */ | |
216 | ||
217 | if (uclass == UNICODE_LB_CM) | |
218 | { | |
219 | i->prevclass=prevclass; | |
220 | i->prevclass_nsp=prevclass_nsp; | |
221 | return RESULT(UNICODE_LB_NONE); /* LB9 */ | |
222 | } | |
223 | ||
224 | if (prevclass == UNICODE_LB_WJ || uclass == UNICODE_LB_WJ) | |
225 | return RESULT(UNICODE_LB_NONE); /* LB11 */ | |
226 | ||
227 | if (prevclass == UNICODE_LB_GL) | |
228 | return RESULT(UNICODE_LB_NONE); /* LB12 */ | |
229 | ||
230 | if (uclass == UNICODE_LB_GL && | |
231 | prevclass != UNICODE_LB_SP && | |
232 | prevclass != UNICODE_LB_BA && | |
233 | prevclass != UNICODE_LB_HY) | |
234 | return RESULT(UNICODE_LB_NONE); /* LB12a */ | |
235 | ||
236 | ||
237 | switch (uclass) { | |
238 | case UNICODE_LB_SY: | |
239 | if (i->opts & UNICODE_LB_OPT_SYBREAK) | |
240 | { | |
241 | if (prevclass == UNICODE_LB_SP) | |
242 | return RESULT(UNICODE_LB_ALLOWED); | |
243 | } | |
244 | ||
245 | case UNICODE_LB_CL: | |
246 | case UNICODE_LB_CP: | |
247 | case UNICODE_LB_EX: | |
248 | case UNICODE_LB_IS: | |
249 | return RESULT(UNICODE_LB_NONE); /* LB13 */ | |
250 | default: | |
251 | break; | |
252 | } | |
253 | ||
254 | if ((i->opts & UNICODE_LB_OPT_SYBREAK) && prevclass == UNICODE_LB_SY) | |
255 | switch (uclass) { | |
256 | case UNICODE_LB_EX: | |
257 | case UNICODE_LB_AL: | |
258 | case UNICODE_LB_ID: | |
259 | return RESULT(UNICODE_LB_NONE); | |
260 | } | |
261 | ||
262 | if (prevclass_nsp == UNICODE_LB_OP) | |
263 | return RESULT(UNICODE_LB_NONE); /* LB14 */ | |
264 | ||
265 | if (prevclass_nsp == UNICODE_LB_QU && uclass == UNICODE_LB_OP) | |
266 | return RESULT(UNICODE_LB_NONE); /* LB15 */ | |
267 | ||
268 | if ((prevclass_nsp == UNICODE_LB_CL || prevclass_nsp == UNICODE_LB_CP) | |
269 | && uclass == UNICODE_LB_NS) | |
270 | return RESULT(UNICODE_LB_NONE); /* LB16 */ | |
271 | ||
272 | if (prevclass_nsp == UNICODE_LB_B2 && uclass == UNICODE_LB_B2) | |
273 | return RESULT(UNICODE_LB_NONE); /* LB17 */ | |
274 | ||
275 | if (prevclass == UNICODE_LB_SP) | |
276 | return RESULT(UNICODE_LB_ALLOWED); /* LB18 */ | |
277 | ||
278 | if (uclass == UNICODE_LB_QU || prevclass == UNICODE_LB_QU) | |
279 | return RESULT(UNICODE_LB_NONE); /* LB19 */ | |
280 | ||
281 | if (uclass == UNICODE_LB_CB || prevclass == UNICODE_LB_CB) | |
282 | return RESULT(UNICODE_LB_ALLOWED); /* LB20 */ | |
283 | ||
284 | /* LB21: */ | |
285 | ||
286 | switch (uclass) { | |
287 | case UNICODE_LB_BA: | |
288 | case UNICODE_LB_HY: | |
289 | case UNICODE_LB_NS: | |
290 | return RESULT(UNICODE_LB_NONE); | |
291 | default: | |
292 | break; | |
293 | } | |
294 | ||
295 | if (prevclass == UNICODE_LB_BB) | |
296 | return RESULT(UNICODE_LB_NONE); | |
297 | ||
298 | if (uclass == UNICODE_LB_IN) | |
299 | switch (prevclass) { | |
300 | case UNICODE_LB_AL: | |
301 | case UNICODE_LB_ID: | |
302 | case UNICODE_LB_IN: | |
303 | case UNICODE_LB_NU: | |
304 | return RESULT(UNICODE_LB_NONE); /* LB22 */ | |
305 | default: | |
306 | break; | |
307 | } | |
308 | ||
309 | ||
310 | if (prevclass == UNICODE_LB_ID && uclass == UNICODE_LB_PO) | |
311 | return RESULT(UNICODE_LB_NONE); /* LB23 */ | |
312 | if (prevclass == UNICODE_LB_AL && uclass == UNICODE_LB_NU) | |
313 | return RESULT(UNICODE_LB_NONE); /* LB23 */ | |
314 | ||
315 | if (prevclass == UNICODE_LB_NU && uclass == UNICODE_LB_AL) | |
316 | return RESULT(UNICODE_LB_NONE); /* LB23 */ | |
317 | ||
318 | ||
319 | if (prevclass == UNICODE_LB_PR && uclass == UNICODE_LB_ID) | |
320 | return RESULT(UNICODE_LB_NONE); /* LB24 */ | |
321 | if (prevclass == UNICODE_LB_PR && uclass == UNICODE_LB_AL) | |
322 | return RESULT(UNICODE_LB_NONE); /* LB24 */ | |
323 | if (prevclass == UNICODE_LB_PO && uclass == UNICODE_LB_AL) | |
324 | return RESULT(UNICODE_LB_NONE); /* LB24 */ | |
325 | ||
326 | if ((i->opts & UNICODE_LB_OPT_PRBREAK) && uclass == UNICODE_LB_PR) | |
327 | switch (prevclass) { | |
328 | case UNICODE_LB_PR: | |
329 | case UNICODE_LB_AL: | |
330 | case UNICODE_LB_ID: | |
331 | return RESULT(UNICODE_LB_NONE); | |
332 | } | |
333 | ||
334 | if (!nolb25 && | |
335 | (prevclass == UNICODE_LB_PR || prevclass == UNICODE_LB_PO)) | |
336 | { | |
337 | if (uclass == UNICODE_LB_NU) | |
338 | return RESULT(UNICODE_LB_NONE); /* LB25 */ | |
339 | ||
340 | if (uclass == UNICODE_LB_OP || uclass == UNICODE_LB_HY) | |
341 | { | |
342 | i->prevclass=prevclass; | |
343 | i->prevclass_nsp=prevclass_nsp; | |
344 | ||
345 | i->savedclass=uclass; | |
346 | i->savedcmcnt=0; | |
347 | i->next_handler=next_lb25_seenophy; | |
348 | i->end_handler=end_lb25_seenophy; | |
349 | return 0; | |
350 | } | |
351 | } | |
352 | ||
353 | if ((prevclass == UNICODE_LB_OP || prevclass == UNICODE_LB_HY) && | |
354 | uclass == UNICODE_LB_NU) | |
355 | return RESULT(UNICODE_LB_NONE); /* LB25 */ | |
356 | ||
357 | /*****/ | |
358 | ||
359 | if (prevclass == UNICODE_LB_JL) | |
360 | switch (uclass) { | |
361 | case UNICODE_LB_JL: | |
362 | case UNICODE_LB_JV: | |
363 | case UNICODE_LB_H2: | |
364 | case UNICODE_LB_H3: | |
365 | return RESULT(UNICODE_LB_NONE); /* LB26 */ | |
366 | default: | |
367 | break; | |
368 | } | |
369 | ||
370 | if ((prevclass == UNICODE_LB_JV || | |
371 | prevclass == UNICODE_LB_H2) && | |
372 | (uclass == UNICODE_LB_JV || | |
373 | uclass == UNICODE_LB_JT)) | |
374 | return RESULT(UNICODE_LB_NONE); /* LB26 */ | |
375 | ||
376 | if ((prevclass == UNICODE_LB_JT || | |
377 | prevclass == UNICODE_LB_H3) && | |
378 | uclass == UNICODE_LB_JT) | |
379 | return RESULT(UNICODE_LB_NONE); /* LB26 */ | |
380 | ||
381 | ||
382 | switch (prevclass) { | |
383 | case UNICODE_LB_JL: | |
384 | case UNICODE_LB_JV: | |
385 | case UNICODE_LB_JT: | |
386 | case UNICODE_LB_H2: | |
387 | case UNICODE_LB_H3: | |
388 | if (uclass == UNICODE_LB_IN || uclass == UNICODE_LB_PO) | |
389 | return RESULT(UNICODE_LB_NONE); /* LB27 */ | |
390 | default: | |
391 | break; | |
392 | } | |
393 | ||
394 | switch (uclass) { | |
395 | case UNICODE_LB_JL: | |
396 | case UNICODE_LB_JV: | |
397 | case UNICODE_LB_JT: | |
398 | case UNICODE_LB_H2: | |
399 | case UNICODE_LB_H3: | |
400 | if (prevclass == UNICODE_LB_PR) | |
401 | return RESULT(UNICODE_LB_NONE); /* LB27 */ | |
402 | default: | |
403 | break; | |
404 | } | |
405 | ||
406 | if (prevclass == UNICODE_LB_AL && uclass == UNICODE_LB_AL) | |
407 | return RESULT(UNICODE_LB_NONE); /* LB28 */ | |
408 | ||
409 | if (prevclass == UNICODE_LB_IS && uclass == UNICODE_LB_AL) | |
410 | return RESULT(UNICODE_LB_NONE); /* LB29 */ | |
411 | ||
412 | if ((prevclass == UNICODE_LB_AL || prevclass == UNICODE_LB_NU) && | |
413 | uclass == UNICODE_LB_OP) | |
414 | return RESULT(UNICODE_LB_NONE); /* LB30 */ | |
415 | ||
416 | if ((uclass == UNICODE_LB_AL || uclass == UNICODE_LB_NU) && | |
417 | prevclass == UNICODE_LB_CP) | |
418 | return RESULT(UNICODE_LB_NONE); /* LB30 */ | |
419 | ||
420 | return RESULT(UNICODE_LB_ALLOWED); /* LB31 */ | |
421 | } | |
422 | ||
423 | /* | |
424 | ** Seen (PR|PO)(OP|HY), without returning the linebreak property for the second | |
425 | ** character, but NU did not follow. Backtrack. | |
426 | */ | |
427 | ||
428 | static int unwind_lb25_seenophy(unicode_lb_info_t i) | |
429 | { | |
430 | int rc; | |
431 | ||
432 | /*uint8_t class=i->savedclass;*/ | |
433 | int nolb25_flag=1; | |
434 | ||
435 | i->next_handler=next_def; | |
436 | i->end_handler=end_def; | |
437 | ||
438 | do | |
439 | { | |
440 | rc=next_def_nolb25(i, i->savedclass, nolb25_flag); | |
441 | ||
442 | if (rc) | |
443 | return rc; | |
444 | ||
445 | /*class=UNICODE_LB_CM;*/ | |
446 | nolb25_flag=0; | |
447 | } while (i->savedcmcnt--); | |
448 | return 0; | |
449 | } | |
450 | ||
451 | /* | |
452 | ** Seen (PR|PO)(OP|HY), without returning the linebreak property for the second | |
453 | ** character. If there's now a NU, we found the modified LB25 regexp. | |
454 | */ | |
455 | ||
456 | static int next_lb25_seenophy(unicode_lb_info_t i, | |
457 | uint8_t uclass) | |
458 | { | |
459 | int rc; | |
460 | ||
461 | if (uclass == UNICODE_LB_CM) | |
462 | { | |
463 | ++i->savedcmcnt; /* Keep track of CMs, and try again */ | |
464 | return 0; | |
465 | } | |
466 | ||
467 | if (uclass != UNICODE_LB_NU) | |
468 | { | |
469 | rc=unwind_lb25_seenophy(i); | |
470 | ||
471 | if (rc) | |
472 | return rc; | |
473 | ||
474 | return next_def_nolb25(i, uclass, 0); | |
475 | } | |
476 | ||
477 | do | |
478 | { | |
479 | rc=RESULT(UNICODE_LB_NONE); /* (OP|HY) feedback */ | |
480 | ||
481 | if (rc) | |
482 | return rc; | |
483 | } while (i->savedcmcnt--); | |
484 | ||
485 | i->next_handler=next_lb25_seennu; | |
486 | i->end_handler=end_def; | |
487 | i->prevclass=i->prevclass_nsp=uclass; | |
488 | return RESULT(UNICODE_LB_NONE); | |
489 | } | |
490 | ||
491 | /* | |
492 | ** Seen (PR|PO)(OP|HY), and now The End. Unwind, and give up. | |
493 | */ | |
494 | ||
495 | static int end_lb25_seenophy(unicode_lb_info_t i) | |
496 | { | |
497 | int rc=unwind_lb25_seenophy(i); | |
498 | ||
499 | if (rc == 0) | |
500 | rc=end_def(i); | |
501 | return rc; | |
502 | } | |
503 | ||
504 | /* | |
505 | ** Seen an NU, modified LB25 regexp. | |
506 | */ | |
507 | static int next_lb25_seennu(unicode_lb_info_t i, uint8_t uclass) | |
508 | { | |
509 | if (uclass == UNICODE_LB_NU || uclass == UNICODE_LB_SY || | |
510 | uclass == UNICODE_LB_IS) | |
511 | { | |
512 | i->prevclass=i->prevclass_nsp=uclass; | |
513 | return RESULT(UNICODE_LB_NONE); | |
514 | } | |
515 | ||
516 | if (uclass == UNICODE_LB_CM) | |
517 | return RESULT(UNICODE_LB_NONE); /* LB9 */ | |
518 | ||
519 | if (uclass == UNICODE_LB_CL || uclass == UNICODE_LB_CP) | |
520 | { | |
521 | i->prevclass=i->prevclass_nsp=uclass; | |
522 | i->next_handler=next_lb25_seennuclcp; | |
523 | i->end_handler=end_def; | |
524 | return RESULT(UNICODE_LB_NONE); | |
525 | } | |
526 | ||
527 | i->next_handler=next_def; | |
528 | i->end_handler=end_def; | |
529 | ||
530 | if (uclass == UNICODE_LB_PR || uclass == UNICODE_LB_PO) | |
531 | { | |
532 | i->prevclass=i->prevclass_nsp=uclass; | |
533 | return RESULT(UNICODE_LB_NONE); | |
534 | } | |
535 | ||
536 | return next_def(i, uclass); /* Not a prefix, process normally */ | |
537 | } | |
538 | ||
539 | /* | |
540 | ** Seen CL|CP, in the modified LB25 regexp. | |
541 | */ | |
542 | static int next_lb25_seennuclcp(unicode_lb_info_t i, uint8_t uclass) | |
543 | { | |
544 | if (uclass == UNICODE_LB_CM) | |
545 | return RESULT(UNICODE_LB_NONE); /* LB9 */ | |
546 | ||
547 | i->next_handler=next_def; | |
548 | i->end_handler=end_def; | |
549 | ||
550 | if (uclass == UNICODE_LB_PR || uclass == UNICODE_LB_PO) | |
551 | { | |
552 | i->prevclass=i->prevclass_nsp=uclass; | |
553 | ||
554 | return RESULT(UNICODE_LB_NONE); | |
555 | } | |
556 | ||
557 | return next_def(i, uclass); | |
558 | } | |
559 | ||
560 | /******************/ | |
561 | ||
562 | struct unicode_lbc_info { | |
563 | unicode_lb_info_t handle; | |
564 | ||
565 | struct unicode_buf buf; | |
566 | ||
567 | size_t buf_ptr; | |
568 | ||
569 | int (*cb_func)(int, unicode_char, void *); | |
570 | void *cb_arg; | |
571 | }; | |
572 | ||
573 | static int unicode_lbc_callback(int value, void *ptr) | |
574 | { | |
575 | unicode_lbc_info_t h=(unicode_lbc_info_t)ptr; | |
576 | ||
577 | if (h->buf_ptr >= unicode_buf_len(&h->buf)) | |
578 | { | |
579 | errno=EINVAL; | |
580 | return -1; /* Shouldn't happen */ | |
581 | } | |
582 | ||
583 | return (*h->cb_func)(value, unicode_buf_ptr(&h->buf)[h->buf_ptr++], | |
584 | h->cb_arg); | |
585 | } | |
586 | ||
587 | unicode_lbc_info_t unicode_lbc_init(int (*cb_func)(int, unicode_char, void *), | |
588 | void *cb_arg) | |
589 | { | |
590 | unicode_lbc_info_t h= | |
591 | (unicode_lbc_info_t)calloc(1, sizeof(struct unicode_lbc_info)); | |
592 | ||
593 | if (!h) | |
594 | return NULL; | |
595 | ||
596 | h->cb_func=cb_func; | |
597 | h->cb_arg=cb_arg; | |
598 | ||
599 | if ((h->handle=unicode_lb_init(unicode_lbc_callback, h)) == NULL) | |
600 | { | |
601 | free(h); | |
602 | return NULL; | |
603 | } | |
604 | unicode_buf_init(&h->buf, (size_t)-1); | |
605 | return h; | |
606 | } | |
607 | ||
608 | void unicode_lbc_set_opts(unicode_lbc_info_t i, int opts) | |
609 | { | |
610 | unicode_lb_set_opts(i->handle, opts); | |
611 | } | |
612 | ||
613 | int unicode_lbc_next(unicode_lbc_info_t i, unicode_char ch) | |
614 | { | |
615 | if (i->buf_ptr >= unicode_buf_len(&i->buf)) | |
616 | { | |
617 | i->buf_ptr=0; | |
618 | unicode_buf_clear(&i->buf); | |
619 | } | |
620 | ||
621 | unicode_buf_append(&i->buf, &ch, 1); | |
622 | return unicode_lb_next(i->handle, ch); | |
623 | } | |
624 | ||
625 | int unicode_lbc_end(unicode_lbc_info_t i) | |
626 | { | |
627 | int rc=unicode_lb_end(i->handle); | |
628 | ||
629 | unicode_buf_deinit(&i->buf); | |
630 | free(i); | |
631 | return rc; | |
632 | } |