Import Upstream version 0.66.4
[hcoop/debian/courier-authlib.git] / libs / unicode / unicode_linebreak.c
diff --git a/libs/unicode/unicode_linebreak.c b/libs/unicode/unicode_linebreak.c
deleted file mode 100644 (file)
index 1105dec..0000000
+++ /dev/null
@@ -1,632 +0,0 @@
-/*
-** Copyright 2011 Double Precision, Inc.
-** See COPYING for distribution information.
-**
-*/
-
-#include       "unicode_config.h"
-#include       "unicode.h"
-
-#include <unistd.h>
-#include <stdint.h>
-#include <stdlib.h>
-#include <string.h>
-#include <errno.h>
-
-#include "linebreaktab_internal.h"
-
-#include "linebreaktab.h"
-
-#define UNICODE_LB_SOT 0xFF
-
-struct unicode_lb_info {
-       int (*cb_func)(int, void *);
-       void *cb_arg;
-
-       int opts;
-
-       uint8_t savedclass;
-       size_t savedcmcnt;
-
-       uint8_t prevclass;
-       uint8_t prevclass_nsp;
-
-       int (*next_handler)(struct unicode_lb_info *, uint8_t);
-       int (*end_handler)(struct unicode_lb_info *);
-};
-
-
-/* http://www.unicode.org/reports/tr14/#Algorithm */
-
-static int next_def(unicode_lb_info_t, uint8_t);
-static int end_def(unicode_lb_info_t);
-
-static int next_lb25_seenophy(unicode_lb_info_t, uint8_t);
-static int end_lb25_seenophy(unicode_lb_info_t);
-
-static int next_lb25_seennu(unicode_lb_info_t, uint8_t);
-
-static int next_lb25_seennuclcp(unicode_lb_info_t, uint8_t);
-
-static void unicode_lb_reset(unicode_lb_info_t i)
-{
-       i->prevclass=i->prevclass_nsp=UNICODE_LB_SOT;
-       i->next_handler=next_def;
-       i->end_handler=end_def;
-}
-
-unicode_lb_info_t unicode_lb_init(int (*cb_func)(int, void *),
-                                 void *cb_arg)
-{
-       unicode_lb_info_t i=calloc(1, sizeof(struct unicode_lb_info));
-
-       i->cb_func=cb_func;
-       i->cb_arg=cb_arg;
-
-       unicode_lb_reset(i);
-       return i;
-}
-
-int unicode_lb_end(unicode_lb_info_t i)
-{
-       int rc=(*i->end_handler)(i);
-
-       free(i);
-       return rc;
-}
-
-void unicode_lb_set_opts(unicode_lb_info_t i, int opts)
-{
-       i->opts=opts;
-}
-
-/* Default end handler has nothing to do */
-
-static int end_def(unicode_lb_info_t i)
-{
-       /* LB3 N/A */
-       return 0;
-}
-#define RESULT(x) (*i->cb_func)((x), i->cb_arg)
-
-int unicode_lb_next_cnt(unicode_lb_info_t i,
-                       const unicode_char *chars,
-                       size_t cnt)
-{
-       while (cnt)
-       {
-               int rc=unicode_lb_next(i, *chars);
-
-               if (rc)
-                       return rc;
-
-               ++chars;
-               --cnt;
-       }
-       return 0;
-}
-
-int unicode_lb_lookup(unicode_char ch)
-{
-       return unicode_tab_lookup(ch,
-                                 unicode_indextab,
-                                 sizeof(unicode_indextab)
-                                 / sizeof(unicode_indextab[0]),
-                                 unicode_rangetab,
-                                 unicode_classtab,
-                                 UNICODE_LB_AL /* XX, LB1 */);
-}
-
-int unicode_lb_next(unicode_lb_info_t i,
-                   unicode_char ch)
-{
-       return (*i->next_handler)(i, (i->opts & UNICODE_LB_OPT_DASHWJ) &&
-                                 (ch == 0x2012 || ch == 0x2013)
-                                 ? UNICODE_LB_WJ:unicode_lb_lookup(ch));
-}
-
-static int next_def_nolb25(unicode_lb_info_t i,
-                          uint8_t uclass,
-                          int nolb25);
-
-/*
-** Default logic for next unicode char.
-*/
-static int next_def(unicode_lb_info_t i,
-                   uint8_t uclass)
-{
-       return next_def_nolb25(i, uclass, 0);
-}
-
-static int next_def_nolb25(unicode_lb_info_t i,
-                          uint8_t uclass,
-
-                          /* Flag -- recursively invoked after discarding LB25 */
-                          int nolb25)
-{
-
-       /* Retrieve the previous unicode character's linebreak class. */
-
-       uint8_t prevclass=i->prevclass;
-       uint8_t prevclass_nsp=i->prevclass_nsp;
-
-       /* Save this unicode char's linebreak class, for the next goaround */
-       i->prevclass=uclass;
-
-       if (uclass != UNICODE_LB_SP)
-               i->prevclass_nsp=uclass;
-
-       if (uclass == UNICODE_LB_NU)
-               i->next_handler=next_lb25_seennu; /* LB25 */
-
-       if (prevclass == UNICODE_LB_SOT)
-       {
-               if (uclass == UNICODE_LB_CM) /* LB9 */
-                       i->prevclass=i->prevclass_nsp=uclass=UNICODE_LB_AL;
-
-               return RESULT(UNICODE_LB_NONE); /* LB2 */
-       }
-
-       if (prevclass == UNICODE_LB_CR && uclass == UNICODE_LB_LF)
-               return RESULT(UNICODE_LB_NONE); /* LB5 */
-
-       switch (prevclass) {
-       case UNICODE_LB_BK:
-       case UNICODE_LB_CR:
-       case UNICODE_LB_LF:
-       case UNICODE_LB_NL:
-
-               if (uclass == UNICODE_LB_CM)
-               {
-                       i->prevclass=i->prevclass_nsp=uclass=UNICODE_LB_AL;
-                       /* LB9 */
-               }
-
-               return RESULT(UNICODE_LB_MANDATORY); /* LB4, LB5 */
-
-       case UNICODE_LB_SP:
-       case UNICODE_LB_ZW:
-               if (uclass == UNICODE_LB_CM)
-                       i->prevclass=i->prevclass_nsp=uclass=UNICODE_LB_AL;
-               /* LB10 */
-               break;
-       default:
-               break;
-       }
-
-       switch (uclass) {
-
-               /* LB6: */
-       case UNICODE_LB_BK:
-       case UNICODE_LB_CR:
-       case UNICODE_LB_LF:
-       case UNICODE_LB_NL:
-
-               /* LB7: */
-       case UNICODE_LB_SP:
-       case UNICODE_LB_ZW:
-
-               return RESULT(UNICODE_LB_NONE);
-       default:
-               break;
-       }
-
-       if (prevclass_nsp == UNICODE_LB_ZW)
-               return RESULT(UNICODE_LB_ALLOWED); /* LB8 */
-
-       if (uclass == UNICODE_LB_CM)
-       {
-               i->prevclass=prevclass;
-               i->prevclass_nsp=prevclass_nsp;
-               return RESULT(UNICODE_LB_NONE); /* LB9 */
-       }
-
-       if (prevclass == UNICODE_LB_WJ || uclass == UNICODE_LB_WJ)
-               return RESULT(UNICODE_LB_NONE); /* LB11 */
-
-       if (prevclass == UNICODE_LB_GL)
-               return RESULT(UNICODE_LB_NONE); /* LB12 */
-
-       if (uclass == UNICODE_LB_GL &&
-           prevclass != UNICODE_LB_SP &&
-           prevclass != UNICODE_LB_BA &&
-           prevclass != UNICODE_LB_HY)
-               return RESULT(UNICODE_LB_NONE); /* LB12a */
-
-
-       switch (uclass) {
-       case UNICODE_LB_SY:
-               if (i->opts & UNICODE_LB_OPT_SYBREAK)
-               {
-                       if (prevclass == UNICODE_LB_SP)
-                               return RESULT(UNICODE_LB_ALLOWED);
-               }
-
-       case UNICODE_LB_CL:
-       case UNICODE_LB_CP:
-       case UNICODE_LB_EX:
-       case UNICODE_LB_IS:
-               return RESULT(UNICODE_LB_NONE); /* LB13 */
-       default:
-               break;
-       }
-
-       if ((i->opts & UNICODE_LB_OPT_SYBREAK) && prevclass == UNICODE_LB_SY)
-               switch (uclass) {
-               case UNICODE_LB_EX:
-               case UNICODE_LB_AL:
-               case UNICODE_LB_ID:
-                       return RESULT(UNICODE_LB_NONE);
-               }
-
-       if (prevclass_nsp == UNICODE_LB_OP)
-               return RESULT(UNICODE_LB_NONE); /* LB14 */
-
-       if (prevclass_nsp == UNICODE_LB_QU && uclass == UNICODE_LB_OP)
-               return RESULT(UNICODE_LB_NONE); /* LB15 */
-
-       if ((prevclass_nsp == UNICODE_LB_CL || prevclass_nsp == UNICODE_LB_CP)
-           && uclass == UNICODE_LB_NS)
-               return RESULT(UNICODE_LB_NONE); /* LB16 */
-
-       if (prevclass_nsp == UNICODE_LB_B2 && uclass == UNICODE_LB_B2)
-               return RESULT(UNICODE_LB_NONE); /* LB17 */
-
-       if (prevclass == UNICODE_LB_SP)
-               return RESULT(UNICODE_LB_ALLOWED); /* LB18 */
-
-       if (uclass == UNICODE_LB_QU || prevclass == UNICODE_LB_QU)
-               return RESULT(UNICODE_LB_NONE); /* LB19 */
-
-       if (uclass == UNICODE_LB_CB || prevclass == UNICODE_LB_CB)
-               return RESULT(UNICODE_LB_ALLOWED); /* LB20 */
-
-       /* LB21: */
-
-       switch (uclass) {
-       case UNICODE_LB_BA:
-       case UNICODE_LB_HY:
-       case UNICODE_LB_NS:
-               return RESULT(UNICODE_LB_NONE);
-       default:
-               break;
-       }
-
-       if (prevclass == UNICODE_LB_BB)
-               return RESULT(UNICODE_LB_NONE);
-
-       if (uclass == UNICODE_LB_IN)
-               switch (prevclass) {
-               case UNICODE_LB_AL:
-               case UNICODE_LB_ID:
-               case UNICODE_LB_IN:
-               case UNICODE_LB_NU:
-                       return RESULT(UNICODE_LB_NONE); /* LB22 */
-               default:
-                       break;
-               }
-
-
-       if (prevclass == UNICODE_LB_ID && uclass == UNICODE_LB_PO)
-               return RESULT(UNICODE_LB_NONE); /* LB23 */
-       if (prevclass == UNICODE_LB_AL && uclass == UNICODE_LB_NU)
-               return RESULT(UNICODE_LB_NONE); /* LB23 */
-
-       if (prevclass == UNICODE_LB_NU && uclass == UNICODE_LB_AL)
-               return RESULT(UNICODE_LB_NONE); /* LB23 */
-
-
-       if (prevclass == UNICODE_LB_PR && uclass == UNICODE_LB_ID)
-               return RESULT(UNICODE_LB_NONE); /* LB24 */
-       if (prevclass == UNICODE_LB_PR && uclass == UNICODE_LB_AL)
-               return RESULT(UNICODE_LB_NONE); /* LB24 */
-       if (prevclass == UNICODE_LB_PO && uclass == UNICODE_LB_AL)
-               return RESULT(UNICODE_LB_NONE); /* LB24 */
-
-       if ((i->opts & UNICODE_LB_OPT_PRBREAK) && uclass == UNICODE_LB_PR)
-               switch (prevclass) {
-               case UNICODE_LB_PR:
-               case UNICODE_LB_AL:
-               case UNICODE_LB_ID:
-                       return RESULT(UNICODE_LB_NONE);
-               }
-               
-       if (!nolb25 &&
-           (prevclass == UNICODE_LB_PR || prevclass == UNICODE_LB_PO))
-       {
-               if (uclass == UNICODE_LB_NU)
-                       return RESULT(UNICODE_LB_NONE); /* LB25 */
-
-               if (uclass == UNICODE_LB_OP || uclass == UNICODE_LB_HY)
-               {
-                       i->prevclass=prevclass;
-                       i->prevclass_nsp=prevclass_nsp;
-
-                       i->savedclass=uclass;
-                       i->savedcmcnt=0;
-                       i->next_handler=next_lb25_seenophy;
-                       i->end_handler=end_lb25_seenophy;
-                       return 0;
-               }
-       }
-
-       if ((prevclass == UNICODE_LB_OP || prevclass == UNICODE_LB_HY) &&
-           uclass == UNICODE_LB_NU)
-               return RESULT(UNICODE_LB_NONE); /* LB25 */
-
-       /*****/
-
-       if (prevclass == UNICODE_LB_JL)
-               switch (uclass) {
-               case UNICODE_LB_JL:
-               case UNICODE_LB_JV:
-               case UNICODE_LB_H2:
-               case UNICODE_LB_H3:
-                       return RESULT(UNICODE_LB_NONE); /* LB26 */
-               default:
-                       break;
-               }
-
-       if ((prevclass == UNICODE_LB_JV ||
-            prevclass == UNICODE_LB_H2) &&
-           (uclass == UNICODE_LB_JV ||
-            uclass == UNICODE_LB_JT))
-               return RESULT(UNICODE_LB_NONE); /* LB26 */
-
-       if ((prevclass == UNICODE_LB_JT ||
-            prevclass == UNICODE_LB_H3) &&
-           uclass == UNICODE_LB_JT)
-               return RESULT(UNICODE_LB_NONE); /* LB26 */
-
-
-       switch (prevclass) {
-       case UNICODE_LB_JL:
-       case UNICODE_LB_JV:
-       case UNICODE_LB_JT:
-       case UNICODE_LB_H2:
-       case UNICODE_LB_H3:
-               if (uclass == UNICODE_LB_IN || uclass == UNICODE_LB_PO)
-                       return RESULT(UNICODE_LB_NONE); /* LB27 */
-       default:
-               break;
-       }
-
-       switch (uclass) {
-       case UNICODE_LB_JL:
-       case UNICODE_LB_JV:
-       case UNICODE_LB_JT:
-       case UNICODE_LB_H2:
-       case UNICODE_LB_H3:
-               if (prevclass == UNICODE_LB_PR)
-                       return RESULT(UNICODE_LB_NONE); /* LB27 */
-       default:
-               break;
-       }
-
-       if (prevclass == UNICODE_LB_AL && uclass == UNICODE_LB_AL)
-               return RESULT(UNICODE_LB_NONE); /* LB28 */
-
-       if (prevclass == UNICODE_LB_IS && uclass == UNICODE_LB_AL)
-               return RESULT(UNICODE_LB_NONE); /* LB29 */
-
-       if ((prevclass == UNICODE_LB_AL || prevclass == UNICODE_LB_NU) &&
-           uclass == UNICODE_LB_OP)
-               return RESULT(UNICODE_LB_NONE); /* LB30 */
-
-       if ((uclass == UNICODE_LB_AL || uclass == UNICODE_LB_NU) &&
-           prevclass == UNICODE_LB_CP)
-               return RESULT(UNICODE_LB_NONE); /* LB30 */
-
-       return RESULT(UNICODE_LB_ALLOWED); /* LB31 */
-}
-
-/*
-** Seen (PR|PO)(OP|HY), without returning the linebreak property for the second
-** character, but NU did not follow. Backtrack.
-*/
-
-static int unwind_lb25_seenophy(unicode_lb_info_t i)
-{
-       int rc;
-
-       /*uint8_t class=i->savedclass;*/
-       int nolb25_flag=1;
-
-       i->next_handler=next_def;
-       i->end_handler=end_def;
-
-       do
-       {
-               rc=next_def_nolb25(i, i->savedclass, nolb25_flag);
-
-               if (rc)
-                       return rc;
-
-               /*class=UNICODE_LB_CM;*/
-               nolb25_flag=0;
-       } while (i->savedcmcnt--);
-       return 0;
-}
-
-/*
-** Seen (PR|PO)(OP|HY), without returning the linebreak property for the second
-** character. If there's now a NU, we found the modified LB25 regexp.
-*/
-
-static int next_lb25_seenophy(unicode_lb_info_t i,
-                             uint8_t uclass)
-{
-       int rc;
-
-       if (uclass == UNICODE_LB_CM)
-       {
-               ++i->savedcmcnt; /* Keep track of CMs, and try again */
-               return 0;
-       }
-
-       if (uclass != UNICODE_LB_NU)
-       {
-               rc=unwind_lb25_seenophy(i);
-
-               if (rc)
-                       return rc;
-
-               return next_def_nolb25(i, uclass, 0);
-       }
-
-       do
-       {
-               rc=RESULT(UNICODE_LB_NONE); /* (OP|HY) feedback */
-
-               if (rc)
-                       return rc;
-       } while (i->savedcmcnt--);
-
-       i->next_handler=next_lb25_seennu;
-       i->end_handler=end_def;
-       i->prevclass=i->prevclass_nsp=uclass;
-       return RESULT(UNICODE_LB_NONE);
-}
-
-/*
-** Seen (PR|PO)(OP|HY), and now The End. Unwind, and give up.
-*/
-
-static int end_lb25_seenophy(unicode_lb_info_t i)
-{
-       int rc=unwind_lb25_seenophy(i);
-
-       if (rc == 0)
-               rc=end_def(i);
-       return rc;
-}
-
-/*
-** Seen an NU, modified LB25 regexp.
-*/
-static int next_lb25_seennu(unicode_lb_info_t i, uint8_t uclass)
-{
-       if (uclass == UNICODE_LB_NU || uclass == UNICODE_LB_SY ||
-           uclass == UNICODE_LB_IS)
-       {
-               i->prevclass=i->prevclass_nsp=uclass;
-               return RESULT(UNICODE_LB_NONE);
-       }
-
-       if (uclass == UNICODE_LB_CM)
-               return RESULT(UNICODE_LB_NONE); /* LB9 */
-
-       if (uclass == UNICODE_LB_CL || uclass == UNICODE_LB_CP)
-       {
-               i->prevclass=i->prevclass_nsp=uclass;
-               i->next_handler=next_lb25_seennuclcp;
-               i->end_handler=end_def;
-               return RESULT(UNICODE_LB_NONE);
-       }
-
-       i->next_handler=next_def;
-       i->end_handler=end_def;
-
-       if (uclass == UNICODE_LB_PR || uclass == UNICODE_LB_PO)
-       {
-               i->prevclass=i->prevclass_nsp=uclass;
-               return RESULT(UNICODE_LB_NONE);
-       }
-
-       return next_def(i, uclass); /* Not a prefix, process normally */
-}
-
-/*
-** Seen CL|CP, in the modified LB25 regexp.
-*/
-static int next_lb25_seennuclcp(unicode_lb_info_t i, uint8_t uclass)
-{
-       if (uclass == UNICODE_LB_CM)
-               return RESULT(UNICODE_LB_NONE); /* LB9 */
-
-       i->next_handler=next_def;
-       i->end_handler=end_def;
-
-       if (uclass == UNICODE_LB_PR || uclass == UNICODE_LB_PO)
-       {
-               i->prevclass=i->prevclass_nsp=uclass;
-
-               return RESULT(UNICODE_LB_NONE);
-       }
-
-       return next_def(i, uclass);
-}
-
-/******************/
-
-struct unicode_lbc_info {
-       unicode_lb_info_t handle;
-
-       struct unicode_buf buf;
-
-       size_t buf_ptr;
-
-       int (*cb_func)(int, unicode_char, void *);
-       void *cb_arg;
-};
-
-static int unicode_lbc_callback(int value, void *ptr)
-{
-       unicode_lbc_info_t h=(unicode_lbc_info_t)ptr;
-
-       if (h->buf_ptr >= unicode_buf_len(&h->buf))
-       {
-               errno=EINVAL;
-               return -1; /* Shouldn't happen */
-       }
-
-       return (*h->cb_func)(value, unicode_buf_ptr(&h->buf)[h->buf_ptr++],
-                            h->cb_arg);
-}
-
-unicode_lbc_info_t unicode_lbc_init(int (*cb_func)(int, unicode_char, void *),
-                                   void *cb_arg)
-{
-       unicode_lbc_info_t h=
-               (unicode_lbc_info_t)calloc(1, sizeof(struct unicode_lbc_info));
-
-       if (!h)
-               return NULL;
-
-       h->cb_func=cb_func;
-       h->cb_arg=cb_arg;
-
-       if ((h->handle=unicode_lb_init(unicode_lbc_callback, h)) == NULL)
-       {
-               free(h);
-               return NULL;
-       }
-       unicode_buf_init(&h->buf, (size_t)-1);
-       return h;
-}
-
-void unicode_lbc_set_opts(unicode_lbc_info_t i, int opts)
-{
-       unicode_lb_set_opts(i->handle, opts);
-}
-       
-int unicode_lbc_next(unicode_lbc_info_t i, unicode_char ch)
-{
-       if (i->buf_ptr >= unicode_buf_len(&i->buf))
-       {
-               i->buf_ptr=0;
-               unicode_buf_clear(&i->buf);
-       }
-
-       unicode_buf_append(&i->buf, &ch, 1);
-       return unicode_lb_next(i->handle, ch);
-}
-
-int unicode_lbc_end(unicode_lbc_info_t i)
-{
-       int rc=unicode_lb_end(i->handle);
-
-       unicode_buf_deinit(&i->buf);
-       free(i);
-       return rc;
-}