Merge remote-tracking branch 'origin/debian'
[hcoop/debian/courier-authlib.git] / libs / unicode / unicode_wordbreak.c
diff --git a/libs/unicode/unicode_wordbreak.c b/libs/unicode/unicode_wordbreak.c
deleted file mode 100644 (file)
index dee4b52..0000000
+++ /dev/null
@@ -1,448 +0,0 @@
-/*
-** Copyright 2011 Double Precision, Inc.
-** See COPYING for distribution information.
-**
-*/
-
-#include       "unicode_config.h"
-#include       "unicode.h"
-
-#include <unistd.h>
-#include <stdint.h>
-#include <stdlib.h>
-#include <string.h>
-#include <errno.h>
-
-#include "wordbreaktab_internal.h"
-#include "wordbreaktab.h"
-
-struct unicode_wb_info {
-       int (*cb_func)(int, void *);
-       void *cb_arg;
-
-       uint8_t prevclass;
-       size_t wb4_cnt;
-
-       size_t wb4_extra_cnt;
-
-       int (*next_handler)(unicode_wb_info_t, uint8_t);
-       int (*end_handler)(unicode_wb_info_t);
-};
-
-static int sot(unicode_wb_info_t i, uint8_t cl);
-static int wb4(unicode_wb_info_t i);
-static int wb1and2_done(unicode_wb_info_t i, uint8_t cl);
-
-static int seen_wb67_handler(unicode_wb_info_t i, uint8_t cl);
-static int seen_wb67_end_handler(unicode_wb_info_t i);
-static int wb67_done(unicode_wb_info_t i, uint8_t prevclass, uint8_t cl);
-
-static int seen_wb1112_handler(unicode_wb_info_t i, uint8_t cl);
-static int seen_wb1112_end_handler(unicode_wb_info_t i);
-static int wb1112_done(unicode_wb_info_t i, uint8_t prevclass, uint8_t cl);
-
-unicode_wb_info_t unicode_wb_init(int (*cb_func)(int, void *),
-                                 void *cb_arg)
-{
-       unicode_wb_info_t i=calloc(1, sizeof(struct unicode_wb_info));
-
-       if (!i)
-               return NULL;
-
-       i->next_handler=sot;
-       i->cb_func=cb_func;
-       i->cb_arg=cb_arg;
-       return i;
-}
-
-int unicode_wb_end(unicode_wb_info_t i)
-{
-       int rc;
-
-       if (i->end_handler)
-               rc=(*i->end_handler)(i);
-       else
-               rc=wb4(i);
-
-       free(i);
-       return rc;
-}
-
-int unicode_wb_next_cnt(unicode_wb_info_t i,
-                       const unicode_char *chars,
-                       size_t cnt)
-{
-       int rc;
-
-       while (cnt)
-       {
-               rc=unicode_wb_next(i, *chars++);
-               --cnt;
-               if (rc)
-                       return rc;
-       }
-       return 0;
-}
-
-int unicode_wb_next(unicode_wb_info_t i, unicode_char ch)
-{
-       return (*i->next_handler)
-               (i, unicode_tab_lookup(ch,
-                                      unicode_indextab,
-                                      sizeof(unicode_indextab)
-                                      / sizeof(unicode_indextab[0]),
-                                      unicode_rangetab,
-                                      unicode_classtab,
-                                      UNICODE_WB_OTHER));
-}
-
-static int wb4(unicode_wb_info_t i)
-{
-       int rc=0;
-
-       while (i->wb4_cnt > 0)
-       {
-               --i->wb4_cnt;
-
-               if (rc == 0)
-                       rc=(*i->cb_func)(0, i->cb_arg);
-       }
-       return rc;
-}
-
-static int result(unicode_wb_info_t i, int flag)
-{
-       int rc=wb4(i);
-
-       if (rc == 0)
-               rc=(*i->cb_func)(flag, i->cb_arg);
-
-       return rc;
-}
-
-#define SET_HANDLER(next,end) (i->next_handler=next, i->end_handler=end)
-
-static int sot(unicode_wb_info_t i, uint8_t cl)
-{
-       i->prevclass=cl;
-       SET_HANDLER(wb1and2_done, NULL);
-
-       return result(i, 1);    /* WB1 */
-}
-
-static int wb1and2_done(unicode_wb_info_t i, uint8_t cl)
-{
-       uint8_t prevclass=i->prevclass;
-
-       i->prevclass=cl;
-
-       if (prevclass == UNICODE_WB_CR && cl == UNICODE_WB_LF)
-               return result(i, 0); /* WB3 */
-
-       switch (prevclass) {
-       case UNICODE_WB_CR:
-       case UNICODE_WB_LF:
-       case UNICODE_WB_Newline:
-               return result(i, 1); /* WB3a */
-       }
-
-       switch (cl) {
-       case UNICODE_WB_CR:
-       case UNICODE_WB_LF:
-       case UNICODE_WB_Newline:
-               return result(i, 1); /* WB3b */
-       }
-
-       if (cl == UNICODE_WB_Extend || cl == UNICODE_WB_Format)
-       {
-               i->prevclass=prevclass;
-               ++i->wb4_cnt;
-               return 0; /* WB4 */
-       }
-
-       if (prevclass == UNICODE_WB_ALetter && cl == UNICODE_WB_ALetter)
-       {
-               return result(i, 0); /* WB5 */
-       }
-
-       if (prevclass == UNICODE_WB_ALetter &&
-           (cl == UNICODE_WB_MidLetter || cl == UNICODE_WB_MidNumLet))
-       {
-               i->wb4_extra_cnt=0;
-               SET_HANDLER(seen_wb67_handler, seen_wb67_end_handler);
-               return 0;
-       }
-
-       return wb67_done(i, prevclass, cl);
-}
-
-/*
-**              ALetter     (MidLetter | MidNumLet )     ?
-**
-**                                  prevclass            cl
-**
-** Seen ALetter (MidLetter | MidNumLet), with the second character's status
-** not returned yet.
-*/
-
-static int seen_wb67_handler(unicode_wb_info_t i, uint8_t cl)
-{
-       int rc;
-       uint8_t prevclass;
-       size_t extra_cnt;
-
-       if (cl == UNICODE_WB_Extend || cl == UNICODE_WB_Format)
-       {
-               ++i->wb4_extra_cnt;
-               return 0;
-       }
-
-       extra_cnt=i->wb4_extra_cnt;
-
-       /*
-       ** Reset the handler to the default, then check WB6
-       */
-
-       SET_HANDLER(wb1and2_done, NULL);
-
-       if (cl == UNICODE_WB_ALetter)
-       {
-               rc=result(i, 0); /* WB6 */
-               i->wb4_cnt=extra_cnt;
-
-               if (rc == 0)
-                       rc=result(i, 0); /* WB7 */
-
-               i->prevclass=cl;
-                       
-               return rc;
-       }
-
-       prevclass=i->prevclass; /* This was the second character */
-
-       /*
-       ** Process the second character, starting with WB7
-       */
-
-       rc=wb67_done(i, UNICODE_WB_ALetter, prevclass);
-
-       i->prevclass=prevclass;
-       i->wb4_cnt=extra_cnt;
-
-       if (rc == 0)
-               rc=(*i->next_handler)(i, cl);
-       /* Process the current char now */
-
-       return rc;
-}
-
-/*
-** Seen ALetter (MidLetter | MidNumLet), with the second character's status
-** not returned yet, and now sot.
-*/
-
-static int seen_wb67_end_handler(unicode_wb_info_t i)
-{
-       int rc;
-       size_t extra_cnt=i->wb4_extra_cnt;
-
-       /*
-       ** Process the second character, starting with WB7.
-       */
-
-       rc=wb67_done(i, UNICODE_WB_ALetter, i->prevclass);
-       i->wb4_cnt=extra_cnt;
-       if (rc == 0)
-               rc=wb4(i);
-       return rc;
-}
-
-
-static int wb67_done(unicode_wb_info_t i, uint8_t prevclass, uint8_t cl)
-{
-       if (prevclass == UNICODE_WB_Numeric && cl == UNICODE_WB_Numeric)
-               return result(i, 0); /* WB8 */
-
-       if (prevclass == UNICODE_WB_ALetter && cl == UNICODE_WB_Numeric)
-               return result(i, 0); /* WB9 */
-
-       if (prevclass == UNICODE_WB_Numeric && cl == UNICODE_WB_ALetter)
-               return result(i, 0); /* WB10 */
-
-
-       if (prevclass == UNICODE_WB_Numeric &&
-           (cl == UNICODE_WB_MidNum || cl == UNICODE_WB_MidNumLet))
-       {
-               i->wb4_extra_cnt=0;
-               SET_HANDLER(seen_wb1112_handler, seen_wb1112_end_handler);
-               return 0;
-       }
-
-       return wb1112_done(i, prevclass, cl);
-}
-
-/*
-**              Numeric     (MidNum | MidNumLet )     ?
-**
-**                               prevclass            cl
-**
-** Seen Numeric (MidNum | MidNumLet), with the second character's status
-** not returned yet.
-*/
-
-static int seen_wb1112_handler(unicode_wb_info_t i, uint8_t cl)
-{
-       int rc;
-       uint8_t prevclass;
-       size_t extra_cnt;
-
-       if (cl == UNICODE_WB_Extend || cl == UNICODE_WB_Format)
-       {
-               ++i->wb4_extra_cnt;
-               return 0;
-       }
-
-       extra_cnt=i->wb4_extra_cnt;
-
-       /*
-       ** Reset the handler to the default, then check WB6
-       */
-
-       SET_HANDLER(wb1and2_done, NULL);
-
-       if (cl == UNICODE_WB_Numeric)
-       {
-               rc=result(i, 0); /* WB11 */
-               i->wb4_cnt=extra_cnt;
-
-               if (rc == 0)
-                       rc=result(i, 0); /* WB12 */
-
-               i->prevclass=cl;
-                       
-               return rc;
-       }
-
-       prevclass=i->prevclass; /* This was the second character */
-
-       /*
-       ** Process the second character, starting with WB7
-       */
-
-       rc=wb1112_done(i, UNICODE_WB_Numeric, prevclass);
-
-       i->prevclass=prevclass;
-       i->wb4_cnt=extra_cnt;
-
-       if (rc == 0)
-               rc=(*i->next_handler)(i, cl);
-       /* Process the current char now */
-
-       return rc;
-}
-
-/*
-** Seen Numeric (MidNum | MidNumLet), with the second character's status
-** not returned yet, and now sot.
-*/
-
-static int seen_wb1112_end_handler(unicode_wb_info_t i)
-{
-       int rc;
-       size_t extra_cnt=i->wb4_extra_cnt;
-
-       /*
-       ** Process the second character, starting with WB11.
-       */
-
-       rc=wb1112_done(i, UNICODE_WB_Numeric, i->prevclass);
-       i->wb4_cnt=extra_cnt;
-       if (rc == 0)
-               rc=wb4(i);
-       return rc;
-}
-
-static int wb1112_done(unicode_wb_info_t i, uint8_t prevclass, uint8_t cl)
-{
-       if (prevclass == UNICODE_WB_Katakana &&
-           cl == UNICODE_WB_Katakana)
-               return result(i, 0); /* WB13 */
-
-       switch (prevclass) {
-       case UNICODE_WB_ALetter:
-       case UNICODE_WB_Numeric:
-       case UNICODE_WB_Katakana:
-       case UNICODE_WB_ExtendNumLet:
-               if (cl == UNICODE_WB_ExtendNumLet)
-                       return result(i, 0); /* WB13a */
-       }
-
-       if (prevclass == UNICODE_WB_ExtendNumLet)
-               switch (cl) {
-               case UNICODE_WB_ALetter:
-               case UNICODE_WB_Numeric:
-               case UNICODE_WB_Katakana:
-                       return result(i, 0); /* WB13b */
-               }
-
-       return result(i, 1); /* WB14 */
-}
-
-/* --------------------------------------------------------------------- */
-
-struct unicode_wbscan_info {
-       unicode_wb_info_t wb_handle;
-
-       int found;
-       size_t cnt;
-};
-
-static int unicode_wbscan_callback(int, void *);
-
-unicode_wbscan_info_t unicode_wbscan_init()
-{
-       unicode_wbscan_info_t i=calloc(1, sizeof(struct unicode_wbscan_info));
-
-       if (!i)
-               return NULL;
-
-       if ((i->wb_handle=unicode_wb_init(unicode_wbscan_callback, i)) == NULL)
-       {
-               free(i);
-               return NULL;
-       }
-
-       return i;
-}
-
-int unicode_wbscan_next(unicode_wbscan_info_t i, unicode_char ch)
-{
-       if (!i->found)
-               unicode_wb_next(i->wb_handle, ch);
-
-       return i->found;
-}
-
-size_t unicode_wbscan_end(unicode_wbscan_info_t i)
-{
-       size_t n;
-
-       unicode_wb_end(i->wb_handle);
-
-       n=i->cnt;
-       free(i);
-       return n;
-}
-
-static int unicode_wbscan_callback(int flag, void *arg)
-{
-       unicode_wbscan_info_t i=(unicode_wbscan_info_t)arg;
-
-       if (flag && i->cnt > 0)
-               i->found=1;
-
-       if (!i->found)
-               ++i->cnt;
-       return 0;
-}
-