d9898ee8 |
1 | /* |
2 | ** Copyright 2003-2004 Double Precision, Inc. See COPYING for |
3 | ** distribution information. |
4 | */ |
5 | |
6 | /* |
7 | ** $Id: encodeautodetect.c,v 1.1 2004/10/21 00:10:51 mrsam Exp $ |
8 | */ |
9 | #include "encode.h" |
10 | #include <string.h> |
11 | #include <stdlib.h> |
12 | #if HAVE_LIBUNICODE |
13 | #include "../unicode/unicode.h" |
14 | #endif |
15 | |
16 | static const char *libmail_encode_autodetect(const char *charset, |
17 | int (*func)(void *), void *arg) |
18 | { |
19 | const char *encoding="7bit"; |
20 | int l=0; |
21 | int longline=0; |
22 | int c; |
23 | #if HAVE_LIBUNICODE |
24 | const struct unicode_info *ci = unicode_find(charset); |
25 | #endif |
26 | |
27 | while ((c = (*func)(arg)) != EOF) |
28 | { |
29 | unsigned char ch= (unsigned char)c; |
30 | |
31 | if (ch >= 0x80) |
32 | { |
33 | |
34 | #if HAVE_LIBUNICODE |
35 | if (!charset || !*charset) |
36 | encoding="8bit"; |
37 | else if (ci && ci->flags & UNICODE_BODY_QUOPRI) |
38 | encoding="quoted-printable"; |
39 | else if (!ci || ci->flags & UNICODE_BODY_BASE64) |
40 | encoding="base64"; |
41 | else |
42 | encoding="8bit"; |
43 | #else |
44 | encoding="8bit"; |
45 | #endif |
46 | } |
47 | |
48 | if (ch < 0x20 && |
49 | ch != '\t' && ch != '\r' && ch != '\n') |
50 | { |
51 | #if HAVE_LIBUNICODE |
52 | if (!charset || !*charset) |
53 | ; |
54 | else if (ci && ci->flags & UNICODE_BODY_QUOPRI) |
55 | encoding="quoted-printable"; |
56 | else if (!ci || ci->flags & UNICODE_BODY_BASE64) |
57 | encoding="base64"; |
58 | #else |
59 | if (charset && *charset) |
60 | encoding="quoted-printable"; |
61 | #endif |
62 | } |
63 | |
64 | if (ch == 0) |
65 | return "base64"; |
66 | |
67 | if (ch == '\n') l=0; |
68 | else if (++l > 990) |
69 | { |
70 | longline=1; |
71 | #if HAVE_LIBUNICODE |
72 | if (ci && ci->flags & UNICODE_BODY_QUOPRI) |
73 | encoding="quoted-printable"; |
74 | #else |
75 | if (charset && *charset) |
76 | encoding="quoted-printable"; |
77 | #endif |
78 | } |
79 | |
80 | } |
81 | |
82 | if (longline) |
83 | { |
84 | #if HAVE_LIBUNICODE |
85 | if (ci && ci->flags & UNICODE_BODY_QUOPRI) |
86 | encoding="quoted-printable"; |
87 | else |
88 | encoding="base64"; |
89 | #else |
90 | if (charset && *charset) |
91 | encoding="quoted-printable"; |
92 | else |
93 | encoding="base64"; |
94 | #endif |
95 | } |
96 | return encoding; |
97 | } |
98 | |
99 | struct file_info { |
100 | FILE *fp; |
101 | off_t pos; |
102 | off_t end; |
103 | }; |
104 | |
105 | static int read_file(void *arg) |
106 | { |
107 | int c; |
108 | struct file_info *fi = (struct file_info *)arg; |
109 | if (fi->end >= 0 && fi->pos > fi->end) |
110 | return EOF; |
111 | c = getc(fi->fp); |
112 | fi->pos++; |
113 | return c; |
114 | } |
115 | |
116 | static int read_string(void * arg) |
117 | { |
118 | int c; |
119 | unsigned char **strp = (unsigned char **)arg; |
120 | if (**strp == 0) |
121 | return EOF; |
122 | c = (int)**strp; |
123 | (*strp)++; |
124 | return c; |
125 | } |
126 | |
127 | const char *libmail_encode_autodetect_fp(FILE *fp, int okQp) |
128 | { |
129 | if (okQp) |
130 | return libmail_encode_autodetect_fppos(fp, "ISO-8859-1", 0, -1); |
131 | else |
132 | return libmail_encode_autodetect_fppos(fp, NULL, 0, -1); |
133 | } |
134 | |
135 | const char *libmail_encode_autodetect_fppos(FILE *fp, const char *charset, |
136 | off_t start_pos, off_t end_pos) |
137 | { |
138 | struct file_info fi; |
139 | off_t orig_pos = ftell(fp); |
140 | off_t pos = orig_pos; |
141 | const char *rc; |
142 | |
143 | if (start_pos >= 0) |
144 | { |
145 | if (fseek(fp, start_pos, SEEK_SET) == (off_t)-1) |
146 | return NULL; |
147 | else |
148 | pos = start_pos; |
149 | } |
150 | |
151 | fi.fp = fp; |
152 | fi.pos = pos; |
153 | fi.end = end_pos; |
154 | rc = libmail_encode_autodetect(charset, &read_file, &fi); |
155 | |
156 | if (fseek(fp, orig_pos, SEEK_SET) == (off_t)-1) |
157 | return NULL; |
158 | return rc; |
159 | } |
160 | |
161 | const char *libmail_encode_autodetect_str(const char *str, const char *charset) |
162 | { |
163 | return libmail_encode_autodetect(charset, &read_string, &str); |
164 | } |