X-Git-Url: https://git.hcoop.net/hcoop/debian/courier-authlib.git/blobdiff_plain/b0322a8536f3cab76471c98122fe1df75bb7c387..d50284c4aa46bdb558ab92276d4e3ebd856fe9a6:/libs/unicode/unicode_wordbreak.c diff --git a/libs/unicode/unicode_wordbreak.c b/libs/unicode/unicode_wordbreak.c deleted file mode 100644 index dee4b52..0000000 --- a/libs/unicode/unicode_wordbreak.c +++ /dev/null @@ -1,448 +0,0 @@ -/* -** Copyright 2011 Double Precision, Inc. -** See COPYING for distribution information. -** -*/ - -#include "unicode_config.h" -#include "unicode.h" - -#include -#include -#include -#include -#include - -#include "wordbreaktab_internal.h" -#include "wordbreaktab.h" - -struct unicode_wb_info { - int (*cb_func)(int, void *); - void *cb_arg; - - uint8_t prevclass; - size_t wb4_cnt; - - size_t wb4_extra_cnt; - - int (*next_handler)(unicode_wb_info_t, uint8_t); - int (*end_handler)(unicode_wb_info_t); -}; - -static int sot(unicode_wb_info_t i, uint8_t cl); -static int wb4(unicode_wb_info_t i); -static int wb1and2_done(unicode_wb_info_t i, uint8_t cl); - -static int seen_wb67_handler(unicode_wb_info_t i, uint8_t cl); -static int seen_wb67_end_handler(unicode_wb_info_t i); -static int wb67_done(unicode_wb_info_t i, uint8_t prevclass, uint8_t cl); - -static int seen_wb1112_handler(unicode_wb_info_t i, uint8_t cl); -static int seen_wb1112_end_handler(unicode_wb_info_t i); -static int wb1112_done(unicode_wb_info_t i, uint8_t prevclass, uint8_t cl); - -unicode_wb_info_t unicode_wb_init(int (*cb_func)(int, void *), - void *cb_arg) -{ - unicode_wb_info_t i=calloc(1, sizeof(struct unicode_wb_info)); - - if (!i) - return NULL; - - i->next_handler=sot; - i->cb_func=cb_func; - i->cb_arg=cb_arg; - return i; -} - -int unicode_wb_end(unicode_wb_info_t i) -{ - int rc; - - if (i->end_handler) - rc=(*i->end_handler)(i); - else - rc=wb4(i); - - free(i); - return rc; -} - -int unicode_wb_next_cnt(unicode_wb_info_t i, - const unicode_char *chars, - size_t cnt) -{ - int rc; - - while (cnt) - { - rc=unicode_wb_next(i, *chars++); - --cnt; - if (rc) - return rc; - } - return 0; -} - -int unicode_wb_next(unicode_wb_info_t i, unicode_char ch) -{ - return (*i->next_handler) - (i, unicode_tab_lookup(ch, - unicode_indextab, - sizeof(unicode_indextab) - / sizeof(unicode_indextab[0]), - unicode_rangetab, - unicode_classtab, - UNICODE_WB_OTHER)); -} - -static int wb4(unicode_wb_info_t i) -{ - int rc=0; - - while (i->wb4_cnt > 0) - { - --i->wb4_cnt; - - if (rc == 0) - rc=(*i->cb_func)(0, i->cb_arg); - } - return rc; -} - -static int result(unicode_wb_info_t i, int flag) -{ - int rc=wb4(i); - - if (rc == 0) - rc=(*i->cb_func)(flag, i->cb_arg); - - return rc; -} - -#define SET_HANDLER(next,end) (i->next_handler=next, i->end_handler=end) - -static int sot(unicode_wb_info_t i, uint8_t cl) -{ - i->prevclass=cl; - SET_HANDLER(wb1and2_done, NULL); - - return result(i, 1); /* WB1 */ -} - -static int wb1and2_done(unicode_wb_info_t i, uint8_t cl) -{ - uint8_t prevclass=i->prevclass; - - i->prevclass=cl; - - if (prevclass == UNICODE_WB_CR && cl == UNICODE_WB_LF) - return result(i, 0); /* WB3 */ - - switch (prevclass) { - case UNICODE_WB_CR: - case UNICODE_WB_LF: - case UNICODE_WB_Newline: - return result(i, 1); /* WB3a */ - } - - switch (cl) { - case UNICODE_WB_CR: - case UNICODE_WB_LF: - case UNICODE_WB_Newline: - return result(i, 1); /* WB3b */ - } - - if (cl == UNICODE_WB_Extend || cl == UNICODE_WB_Format) - { - i->prevclass=prevclass; - ++i->wb4_cnt; - return 0; /* WB4 */ - } - - if (prevclass == UNICODE_WB_ALetter && cl == UNICODE_WB_ALetter) - { - return result(i, 0); /* WB5 */ - } - - if (prevclass == UNICODE_WB_ALetter && - (cl == UNICODE_WB_MidLetter || cl == UNICODE_WB_MidNumLet)) - { - i->wb4_extra_cnt=0; - SET_HANDLER(seen_wb67_handler, seen_wb67_end_handler); - return 0; - } - - return wb67_done(i, prevclass, cl); -} - -/* -** ALetter (MidLetter | MidNumLet ) ? -** -** prevclass cl -** -** Seen ALetter (MidLetter | MidNumLet), with the second character's status -** not returned yet. -*/ - -static int seen_wb67_handler(unicode_wb_info_t i, uint8_t cl) -{ - int rc; - uint8_t prevclass; - size_t extra_cnt; - - if (cl == UNICODE_WB_Extend || cl == UNICODE_WB_Format) - { - ++i->wb4_extra_cnt; - return 0; - } - - extra_cnt=i->wb4_extra_cnt; - - /* - ** Reset the handler to the default, then check WB6 - */ - - SET_HANDLER(wb1and2_done, NULL); - - if (cl == UNICODE_WB_ALetter) - { - rc=result(i, 0); /* WB6 */ - i->wb4_cnt=extra_cnt; - - if (rc == 0) - rc=result(i, 0); /* WB7 */ - - i->prevclass=cl; - - return rc; - } - - prevclass=i->prevclass; /* This was the second character */ - - /* - ** Process the second character, starting with WB7 - */ - - rc=wb67_done(i, UNICODE_WB_ALetter, prevclass); - - i->prevclass=prevclass; - i->wb4_cnt=extra_cnt; - - if (rc == 0) - rc=(*i->next_handler)(i, cl); - /* Process the current char now */ - - return rc; -} - -/* -** Seen ALetter (MidLetter | MidNumLet), with the second character's status -** not returned yet, and now sot. -*/ - -static int seen_wb67_end_handler(unicode_wb_info_t i) -{ - int rc; - size_t extra_cnt=i->wb4_extra_cnt; - - /* - ** Process the second character, starting with WB7. - */ - - rc=wb67_done(i, UNICODE_WB_ALetter, i->prevclass); - i->wb4_cnt=extra_cnt; - if (rc == 0) - rc=wb4(i); - return rc; -} - - -static int wb67_done(unicode_wb_info_t i, uint8_t prevclass, uint8_t cl) -{ - if (prevclass == UNICODE_WB_Numeric && cl == UNICODE_WB_Numeric) - return result(i, 0); /* WB8 */ - - if (prevclass == UNICODE_WB_ALetter && cl == UNICODE_WB_Numeric) - return result(i, 0); /* WB9 */ - - if (prevclass == UNICODE_WB_Numeric && cl == UNICODE_WB_ALetter) - return result(i, 0); /* WB10 */ - - - if (prevclass == UNICODE_WB_Numeric && - (cl == UNICODE_WB_MidNum || cl == UNICODE_WB_MidNumLet)) - { - i->wb4_extra_cnt=0; - SET_HANDLER(seen_wb1112_handler, seen_wb1112_end_handler); - return 0; - } - - return wb1112_done(i, prevclass, cl); -} - -/* -** Numeric (MidNum | MidNumLet ) ? -** -** prevclass cl -** -** Seen Numeric (MidNum | MidNumLet), with the second character's status -** not returned yet. -*/ - -static int seen_wb1112_handler(unicode_wb_info_t i, uint8_t cl) -{ - int rc; - uint8_t prevclass; - size_t extra_cnt; - - if (cl == UNICODE_WB_Extend || cl == UNICODE_WB_Format) - { - ++i->wb4_extra_cnt; - return 0; - } - - extra_cnt=i->wb4_extra_cnt; - - /* - ** Reset the handler to the default, then check WB6 - */ - - SET_HANDLER(wb1and2_done, NULL); - - if (cl == UNICODE_WB_Numeric) - { - rc=result(i, 0); /* WB11 */ - i->wb4_cnt=extra_cnt; - - if (rc == 0) - rc=result(i, 0); /* WB12 */ - - i->prevclass=cl; - - return rc; - } - - prevclass=i->prevclass; /* This was the second character */ - - /* - ** Process the second character, starting with WB7 - */ - - rc=wb1112_done(i, UNICODE_WB_Numeric, prevclass); - - i->prevclass=prevclass; - i->wb4_cnt=extra_cnt; - - if (rc == 0) - rc=(*i->next_handler)(i, cl); - /* Process the current char now */ - - return rc; -} - -/* -** Seen Numeric (MidNum | MidNumLet), with the second character's status -** not returned yet, and now sot. -*/ - -static int seen_wb1112_end_handler(unicode_wb_info_t i) -{ - int rc; - size_t extra_cnt=i->wb4_extra_cnt; - - /* - ** Process the second character, starting with WB11. - */ - - rc=wb1112_done(i, UNICODE_WB_Numeric, i->prevclass); - i->wb4_cnt=extra_cnt; - if (rc == 0) - rc=wb4(i); - return rc; -} - -static int wb1112_done(unicode_wb_info_t i, uint8_t prevclass, uint8_t cl) -{ - if (prevclass == UNICODE_WB_Katakana && - cl == UNICODE_WB_Katakana) - return result(i, 0); /* WB13 */ - - switch (prevclass) { - case UNICODE_WB_ALetter: - case UNICODE_WB_Numeric: - case UNICODE_WB_Katakana: - case UNICODE_WB_ExtendNumLet: - if (cl == UNICODE_WB_ExtendNumLet) - return result(i, 0); /* WB13a */ - } - - if (prevclass == UNICODE_WB_ExtendNumLet) - switch (cl) { - case UNICODE_WB_ALetter: - case UNICODE_WB_Numeric: - case UNICODE_WB_Katakana: - return result(i, 0); /* WB13b */ - } - - return result(i, 1); /* WB14 */ -} - -/* --------------------------------------------------------------------- */ - -struct unicode_wbscan_info { - unicode_wb_info_t wb_handle; - - int found; - size_t cnt; -}; - -static int unicode_wbscan_callback(int, void *); - -unicode_wbscan_info_t unicode_wbscan_init() -{ - unicode_wbscan_info_t i=calloc(1, sizeof(struct unicode_wbscan_info)); - - if (!i) - return NULL; - - if ((i->wb_handle=unicode_wb_init(unicode_wbscan_callback, i)) == NULL) - { - free(i); - return NULL; - } - - return i; -} - -int unicode_wbscan_next(unicode_wbscan_info_t i, unicode_char ch) -{ - if (!i->found) - unicode_wb_next(i->wb_handle, ch); - - return i->found; -} - -size_t unicode_wbscan_end(unicode_wbscan_info_t i) -{ - size_t n; - - unicode_wb_end(i->wb_handle); - - n=i->cnt; - free(i); - return n; -} - -static int unicode_wbscan_callback(int flag, void *arg) -{ - unicode_wbscan_info_t i=(unicode_wbscan_info_t)arg; - - if (flag && i->cnt > 0) - i->found=1; - - if (!i->found) - ++i->cnt; - return 0; -} -