+++ /dev/null
-
-/*
-** Copyright 2000-2002 Double Precision, Inc.
-** See COPYING for distribution information.
-**
-** $Id: utf8.c,v 1.4 2002/11/18 00:54:22 mrsam Exp $
-*/
-
-#include "unicode.h"
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-unicode_char *unicode_utf8_tou(const char *cp, int *ip)
-{
-size_t l;
-size_t n=1;
-unicode_char *p, uc;
-
- for (l=0; cp[l]; ++n)
- {
- if ((cp[l] & 0x80) == 0)
- {
- ++l;
- continue;
- }
-
- if ((cp[l] & 0xE0) == 0xC0)
- {
- if ((cp[l+1] & 0xC0) == 0x80)
- {
- l += 2;
- continue;
- }
- }
-
- if ((cp[l] & 0xF0) == 0xE0)
- {
- if ((cp[l+1] & 0xC0) == 0x80 &&
- (cp[l+2] & 0xC0) == 0x80)
- {
- l += 3;
- continue;
- }
- }
-
- if ((cp[l] & 0xF8) == 0xF0)
- {
- if ((cp[l+1] & 0xC0) == 0x80 &&
- (cp[l+2] & 0xC0) == 0x80 &&
- (cp[l+3] & 0xC0) == 0x80)
- {
- l += 4;
- continue;
- }
- }
-
- if ((cp[l] & 0xFC) == 0xF8)
- {
- if ((cp[l+1] & 0xC0) == 0x80 &&
- (cp[l+2] & 0xC0) == 0x80 &&
- (cp[l+3] & 0xC0) == 0x80 &&
- (cp[l+4] & 0xC0) == 0x80)
- {
- l += 5;
- continue;
- }
- }
-
- if ((cp[l] & 0xFE) == 0xFC)
- {
- if ((cp[l+1] & 0xC0) == 0x80 &&
- (cp[l+2] & 0xC0) == 0x80 &&
- (cp[l+3] & 0xC0) == 0x80 &&
- (cp[l+4] & 0xC0) == 0x80 &&
- (cp[l+5] & 0xC0) == 0x80)
- {
- l += 6;
- continue;
- }
- }
-
- if (ip)
- {
- *ip= l;
- return (0);
- }
- ++l;
- }
- if (ip)
- *ip = -1;
- if ((p=malloc(n*sizeof(unicode_char))) == 0)
- return (0);
- n=0;
-
- for (l=0; cp[l]; p[n++]=uc)
- {
- if ((cp[l] & 0x80) == 0)
- {
- uc=cp[l];
- ++l;
- continue;
- }
-
- if ((cp[l] & 0xE0) == 0xC0)
- {
- if ((cp[l+1] & 0xC0) == 0x80)
- {
- uc=cp[l] & 0x1F;
- uc <<= 6; uc |= cp[l+1] & 0x3F;
- l += 2;
- continue;
- }
- }
-
- if ((cp[l] & 0xF0) == 0xE0)
- {
- if ((cp[l+1] & 0xC0) == 0x80 &&
- (cp[l+2] & 0xC0) == 0x80)
- {
- uc=cp[l] & 0x0F;
- uc <<= 6; uc |= cp[l+1] & 0x3F;
- uc <<= 6; uc |= cp[l+2] & 0x3F;
- l += 3;
- continue;
- }
- }
-
- if ((cp[l] & 0xF8) == 0xF0)
- {
- if ((cp[l+1] & 0xC0) == 0x80 &&
- (cp[l+2] & 0xC0) == 0x80 &&
- (cp[l+3] & 0xC0) == 0x80)
- {
- uc=cp[l] & 0x07;
- uc <<= 6; uc |= cp[l+1] & 0x3F;
- uc <<= 6; uc |= cp[l+2] & 0x3F;
- uc <<= 6; uc |= cp[l+3] & 0x3F;
- l += 4;
- continue;
- }
- }
-
- if ((cp[l] & 0xFC) == 0xF8)
- {
- if ((cp[l+1] & 0xC0) == 0x80 &&
- (cp[l+2] & 0xC0) == 0x80 &&
- (cp[l+3] & 0xC0) == 0x80 &&
- (cp[l+4] & 0xC0) == 0x80)
- {
- uc=cp[l] & 0x03;
- uc <<= 6; uc |= cp[l+1] & 0x3F;
- uc <<= 6; uc |= cp[l+2] & 0x3F;
- uc <<= 6; uc |= cp[l+3] & 0x3F;
- uc <<= 6; uc |= cp[l+4] & 0x3F;
- l += 5;
- continue;
- }
- }
-
- if ((cp[l] & 0xFE) == 0xFC)
- {
- if ((cp[l+1] & 0xC0) == 0x80 &&
- (cp[l+2] & 0xC0) == 0x80 &&
- (cp[l+3] & 0xC0) == 0x80 &&
- (cp[l+4] & 0xC0) == 0x80 &&
- (cp[l+5] & 0xC0) == 0x80)
- {
- uc=cp[l] & 0x01;
- uc <<= 6; uc |= cp[l+1] & 0x3F;
- uc <<= 6; uc |= cp[l+2] & 0x3F;
- uc <<= 6; uc |= cp[l+3] & 0x3F;
- uc <<= 6; uc |= cp[l+4] & 0x3F;
- uc <<= 6; uc |= cp[l+5] & 0x3F;
- l += 6;
- continue;
- }
- }
- uc=cp[l];
- ++l;
- }
- p[n]=0;
- return (p);
-}
-
-char *unicode_utf8_fromu(const unicode_char *cp, int *ip)
-{
- char *p=0;
- int pass;
- size_t l=0;
-
- for (pass=0; pass<2; pass++)
- {
- if (pass)
- {
- p=malloc(l+1);
- if (!p)
- {
- if (ip) *ip= -1;
- return (0);
- }
- }
-
- l=unicode_utf8_fromu_pass(cp, p);
- if (pass)
- p[l]=0;
- }
- return (p);
-}
-
-
-size_t unicode_utf8_fromu_pass(const unicode_char *cp, char *p)
-{
- size_t l=0;
- unicode_char uc;
-
- l=0;
-
- while (cp && *cp)
- {
- uc= *cp++;
-
- if ((unicode_char)uc ==
- (unicode_char)(uc & 0x007F))
- {
- if (p)
- {
- p[l]= (char)uc;
- }
- ++l;
- continue;
- }
-
- if ((unicode_char)uc ==
- (unicode_char)(uc & 0x07FF))
- {
- if (p)
- {
- p[l+1]=(char)(uc & 0x3F) | 0x80;
- uc >>= 6;
- p[l]= (char)(uc & 0x1F) | 0xC0;
- }
- l += 2;
- continue;
- }
-
- if ((unicode_char)uc ==
- (unicode_char)(uc & 0x00FFFF))
- {
- if (p)
- {
- p[l+2]=(char)(uc & 0x3F) | 0x80;
- uc >>= 6;
- p[l+1]=(char)(uc & 0x3F) | 0x80;
- uc >>= 6;
- p[l]= (char)(uc & 0x0F) | 0xE0;
- }
- l += 3;
- continue;
- }
-
- if ((unicode_char)uc ==
- (unicode_char)(uc & 0x001FFFFF))
- {
- if (p)
- {
- p[l+3]=(char)(uc & 0x3F) | 0x80;
- uc >>= 6;
- p[l+2]=(char)(uc & 0x3F) | 0x80;
- uc >>= 6;
- p[l+1]=(char)(uc & 0x3F) | 0x80;
- uc >>= 6;
- p[l]= (char)(uc & 0x07) | 0xF0;
- }
- l += 4;
- continue;
- }
-
- if ((unicode_char)uc ==
- (unicode_char)(uc & 0x03FFFFFF))
- {
- if (p)
- {
- p[l+4]=(char)(uc & 0x3F) | 0x80;
- uc >>= 6;
- p[l+3]=(char)(uc & 0x3F) | 0x80;
- uc >>= 6;
- p[l+2]=(char)(uc & 0x3F) | 0x80;
- uc >>= 6;
- p[l+1]=(char)(uc & 0x3F) | 0x80;
- uc >>= 6;
- p[l]= (char)(uc & 0x03) | 0xF8;
- }
- l += 5;
- continue;
- }
-
- if (p)
- {
- p[l+5]=(char)(uc & 0x3F) | 0x80;
- uc >>= 6;
- p[l+4]=(char)(uc & 0x3F) | 0x80;
- uc >>= 6;
- p[l+3]=(char)(uc & 0x3F) | 0x80;
- uc >>= 6;
- p[l+2]=(char)(uc & 0x3F) | 0x80;
- uc >>= 6;
- p[l+1]=(char)(uc & 0x3F) | 0x80;
- uc >>= 6;
- p[l]= (char)(uc & 0x01) | 0xFC;
- }
- l += 6;
- }
- return l;
-}