unicode/unicode.h

   1 #ifndef unicode_h
   2 #define unicode_h
   3
   4 /*
   5 ** Copyright 2000-2001 Double Precision, Inc.
   6 ** See COPYING for distribution information.
   7 **
   8 ** $Id: unicode.h,v 1.18 2008/07/20 16:24:52 mrsam Exp $
   9 */
  10
  11 #ifdef  __cplusplus
  12 extern "C" {
  13 #endif
  14
  15 #include        "../unicode/unicode_config.h" /* VPATH build */
  16
  17 #include        <stdlib.h>
  18
  19 #include        <stdio.h>
  20 #if HAVE_WCHAR_H
  21 #include        <wchar.h>
  22 #endif
  23
  24 #if HAVE_STDDEF_H
  25 #include        <stddef.h>
  26 #endif
  27
  28 typedef wchar_t unicode_char;
  29
  30 struct unicode_info {
  31         const char *chset;              /* Official character set */
  32         int flags;                      /* Flags */
  33
  34 #define UNICODE_UTF     1               /* Direct UTF mapping */
  35 #define UNICODE_MB      2               /* Multibyte characters present */
  36 #define UNICODE_SISO    4       /*
  37                                 ** Composite mapping, using shift in/out
  38                                 ** (verbatim text comparison may not work,
  39                                 ** must convert to UTF, or something).
  40                                 ** (replaces search_chset).
  41                                 */
  42
  43 #define UNICODE_USASCII 8       /* Character set is a US-ASCII superset */
  44 #define UNICODE_REPLACEABLE     16      /*
  45                                  * Conversion errors can be replaced by
  46                                  * adequate placeholders (replacement
  47                                  * characters).
  48                                  */
  49 #define UNICODE_HEADER_QUOPRI   32      /*
  50                                 ** Quoted-printable (Q) encoding is preferred
  51                                 ** for MIME message headers.
  52                                 */
  53 #define UNICODE_HEADER_BASE64   64      /*
  54                                 ** Base64 (B) encoding is preferred
  55                                 ** for MIME message headers.
  56                                 */
  57 #define UNICODE_BODY_QUOPRI     128     /*
  58                                 ** Quoted-printable (Q) encoding is preferred
  59                                 ** MIME message body.
  60                                 */
  61 #define UNICODE_BODY_BASE64     256     /*
  62                                 ** Base64 (B) encoding is preferred
  63                                 ** for MIME message body.
  64                                 */
  65
  66         unicode_char *(*c2u)(const struct unicode_info *, const char *, int *);
  67                 /* Convert character string in this charset to unicode */
  68
  69         char *(*u2c)(const struct unicode_info *, const unicode_char *, int *);
  70                 /* Convert unicode to character string in this charset */
  71
  72         /* Convert the string in this character set to upper/lower/titlecase */
  73
  74         char *(*toupper_func)(const struct unicode_info *,
  75                               const char *, int *);
  76         char *(*tolower_func)(const struct unicode_info *,
  77                               const char *, int *);
  78         char *(*totitle_func)(const struct unicode_info *,
  79                               const char *, int *);
  80
  81         const struct unicode_info *search_chset;
  82         } ;
  83
  84 extern const struct unicode_info unicode_ISO8859_1;
  85 extern const struct unicode_info unicode_UTF8;
  86 extern const struct unicode_info unicode_IMAP_MODUTF7;
  87
  88 extern char *unicode_iso8859_u2c(const unicode_char *, int *,
  89         const unicode_char *);
  90
  91 extern char *unicode_windows874_u2c(const unicode_char *, int *,
  92         const unicode_char *);
  93
  94 /* ISO8859 charsets all share the same functions */
  95
  96 extern unicode_char *unicode_iso8859_c2u(const char *, int *,
  97                                         const unicode_char *);
  98
  99 extern char *unicode_iso8859_convert(const char *, int *,
 100                                         const char *);
 101
 102 /* IBM864 charset has some funkiness */
 103
 104 unicode_char *unicode_ibm864_c2u(const char *, int *,
 105                                  const unicode_char *);
 106
 107 char *unicode_ibm864_u2c(const unicode_char *, int *,
 108                          const unicode_char *);
 109
 110
 111 struct unicode_chsetlist {
 112         const char *chsetname;
 113         const struct unicode_info *ptr;
 114         } ;
 115
 116 extern const struct unicode_chsetlist unicode_chsetlist[];
 117 extern const char *unicode_default_chset();
 118 extern const struct unicode_info *unicode_find(const char *);
 119
 120 /*
 121 ** UTF8 functions
 122 */
 123
 124         /* Convert Unicode to/from UTF-8 */
 125
 126 extern char *unicode_toutf8(const unicode_char *);
 127 extern unicode_char *unicode_fromutf8(const char *);
 128
 129         /* Unicode upper/lower/title case conversion functions */
 130
 131 extern unicode_char unicode_uc(unicode_char);
 132 extern unicode_char unicode_lc(unicode_char);
 133 extern unicode_char unicode_tc(unicode_char);
 134
 135         /* Convert charsets to/from UTF-8 */
 136
 137 extern char *unicode_ctoutf8(const struct unicode_info *, const char *,
 138                              int *);
 139 extern char *unicode_cfromutf8(const struct unicode_info *, const char *,
 140                                int *);
 141
 142
 143         /* Return width of unicode character */
 144
 145 extern int unicode_wcwidth(unicode_char c);
 146
 147         /* Internal functions: */
 148
 149 extern unicode_char *unicode_utf8_tou(const char *, int *);
 150 extern char *unicode_utf8_fromu(const unicode_char *, int *);
 151
 152 size_t unicode_utf8_fromu_pass(const unicode_char *, char *);
 153
 154 #define UNICODE_UTF8_MAXLEN     6
 155
 156 extern char *unicode_convert(const char *txt,
 157                              const struct unicode_info *from,
 158                              const struct unicode_info *to);
 159         /* errno=EINVAL if conversion could not be performed */
 160
 161 extern char *unicode_xconvert(const char *txt,
 162                               const struct unicode_info *from,
 163                               const struct unicode_info *to);
 164         /* Like unicode_convert(), except unconvertable chars are replaced
 165         ** by periods (or something similar), instead of aborting with EINVAL
 166         */
 167
 168
 169 extern char *unicode_convert_fromchset(const char *txt,
 170                                     const char *from,
 171                                     const struct unicode_info *to);
 172         /* Like, unicode_convert, except that we search for a character set
 173         ** from a list of chsets we support.
 174         ** errno=EINVAL if 'to' character set does not exist.
 175         */
 176
 177         /*
 178         ** Convert between unicode and modified-UTF7 encoding used for
 179         ** IMAP folder names.
 180         */
 181
 182 unicode_char *unicode_modutf7touc(const char *s, int *err);
 183
 184         /* err < 0 if out of memory, else ptr to first illegal modutf7-char */
 185         /* This can be used to test if string is a valid mod-utf7 string */
 186
 187 char *unicode_uctomodutf7(const unicode_char *);
 188
 189 char *unicode_uctomodutf7x(const unicode_char *, const unicode_char *);
 190
 191 #ifdef  __cplusplus
 192 }
 193 #endif
 194
 195 #endif