00001
00002
00003
00004
00010 #if HAVE_CONFIG_H
00011 #include <config.h>
00012 #endif
00013
00014 #include <assert.h>
00015 #include <errno.h>
00016 #include <string.h>
00017 #include <ctype.h>
00018
00019 #include "iconv-p.h"
00020
00021 static size_t init_utf8(yaz_iconv_t cd, yaz_iconv_decoder_t d,
00022 unsigned char *inp,
00023 size_t inbytesleft, size_t *no_read)
00024 {
00025 if (!inp || inp[0] != 0xef)
00026 {
00027 *no_read = 0;
00028 return 0;
00029 }
00030 if (inbytesleft < 3)
00031 {
00032 yaz_iconv_set_errno(cd, YAZ_ICONV_EINVAL);
00033 return (size_t) -1;
00034 }
00035 if (inp[1] != 0xbb && inp[2] == 0xbf)
00036 *no_read = 3;
00037 else
00038 *no_read = 0;
00039 return 0;
00040 }
00041
00042 unsigned long yaz_read_UTF8_char(unsigned char *inp,
00043 size_t inbytesleft, size_t *no_read,
00044 int *error)
00045 {
00046 unsigned long x = 0;
00047
00048 *no_read = 0;
00049 if (inp[0] <= 0x7f)
00050 {
00051 x = inp[0];
00052 *no_read = 1;
00053 }
00054 else if (inp[0] <= 0xbf || inp[0] >= 0xfe)
00055 {
00056 *error = YAZ_ICONV_EILSEQ;
00057 }
00058 else if (inp[0] <= 0xdf && inbytesleft >= 2)
00059 {
00060 if ((inp[1] & 0xc0) == 0x80)
00061 {
00062 x = ((inp[0] & 0x1f) << 6) | (inp[1] & 0x3f);
00063 if (x >= 0x80)
00064 *no_read = 2;
00065 else
00066 *error = YAZ_ICONV_EILSEQ;
00067 }
00068 else
00069 *error = YAZ_ICONV_EILSEQ;
00070 }
00071 else if (inp[0] <= 0xef && inbytesleft >= 3)
00072 {
00073 if ((inp[1] & 0xc0) == 0x80 && (inp[2] & 0xc0) == 0x80)
00074 {
00075 x = ((inp[0] & 0x0f) << 12) | ((inp[1] & 0x3f) << 6) |
00076 (inp[2] & 0x3f);
00077 if (x >= 0x800)
00078 *no_read = 3;
00079 else
00080 *error = YAZ_ICONV_EILSEQ;
00081 }
00082 else
00083 *error = YAZ_ICONV_EILSEQ;
00084 }
00085 else if (inp[0] <= 0xf7 && inbytesleft >= 4)
00086 {
00087 if ((inp[1] & 0xc0) == 0x80 && (inp[2] & 0xc0) == 0x80
00088 && (inp[3] & 0xc0) == 0x80)
00089 {
00090 x = ((inp[0] & 0x07) << 18) | ((inp[1] & 0x3f) << 12) |
00091 ((inp[2] & 0x3f) << 6) | (inp[3] & 0x3f);
00092 if (x >= 0x10000)
00093 *no_read = 4;
00094 else
00095 *error = YAZ_ICONV_EILSEQ;
00096 }
00097 else
00098 *error = YAZ_ICONV_EILSEQ;
00099 }
00100 else if (inp[0] <= 0xfb && inbytesleft >= 5)
00101 {
00102 if ((inp[1] & 0xc0) == 0x80 && (inp[2] & 0xc0) == 0x80
00103 && (inp[3] & 0xc0) == 0x80 && (inp[4] & 0xc0) == 0x80)
00104 {
00105 x = ((inp[0] & 0x03) << 24) | ((inp[1] & 0x3f) << 18) |
00106 ((inp[2] & 0x3f) << 12) | ((inp[3] & 0x3f) << 6) |
00107 (inp[4] & 0x3f);
00108 if (x >= 0x200000)
00109 *no_read = 5;
00110 else
00111 *error = YAZ_ICONV_EILSEQ;
00112 }
00113 else
00114 *error = YAZ_ICONV_EILSEQ;
00115 }
00116 else if (inp[0] <= 0xfd && inbytesleft >= 6)
00117 {
00118 if ((inp[1] & 0xc0) == 0x80 && (inp[2] & 0xc0) == 0x80
00119 && (inp[3] & 0xc0) == 0x80 && (inp[4] & 0xc0) == 0x80
00120 && (inp[5] & 0xc0) == 0x80)
00121 {
00122 x = ((inp[0] & 0x01) << 30) | ((inp[1] & 0x3f) << 24) |
00123 ((inp[2] & 0x3f) << 18) | ((inp[3] & 0x3f) << 12) |
00124 ((inp[4] & 0x3f) << 6) | (inp[5] & 0x3f);
00125 if (x >= 0x4000000)
00126 *no_read = 6;
00127 else
00128 *error = YAZ_ICONV_EILSEQ;
00129 }
00130 else
00131 *error = YAZ_ICONV_EILSEQ;
00132 }
00133 else
00134 *error = YAZ_ICONV_EINVAL;
00135
00136 return x;
00137 }
00138
00139 static unsigned long read_utf8(yaz_iconv_t cd, yaz_iconv_decoder_t d,
00140 unsigned char *inp,
00141 size_t inbytesleft, size_t *no_read)
00142 {
00143 int err = 0;
00144 int r = yaz_read_UTF8_char(inp, inbytesleft, no_read, &err);
00145 yaz_iconv_set_errno(cd, err);
00146 return r;
00147 }
00148
00149
00150 static size_t write_UTF8(yaz_iconv_t cd, yaz_iconv_encoder_t en,
00151 unsigned long x,
00152 char **outbuf, size_t *outbytesleft)
00153 {
00154 int err = 0;
00155 int r = yaz_write_UTF8_char(x, outbuf, outbytesleft, &err);
00156 yaz_iconv_set_errno(cd, err);
00157 return r;
00158 }
00159
00160 size_t yaz_write_UTF8_char(unsigned long x,
00161 char **outbuf, size_t *outbytesleft,
00162 int *error)
00163 {
00164 unsigned char *outp = (unsigned char *) *outbuf;
00165
00166 if (x <= 0x7f && *outbytesleft >= 1)
00167 {
00168 *outp++ = (unsigned char) x;
00169 (*outbytesleft)--;
00170 }
00171 else if (x <= 0x7ff && *outbytesleft >= 2)
00172 {
00173 *outp++ = (unsigned char) ((x >> 6) | 0xc0);
00174 *outp++ = (unsigned char) ((x & 0x3f) | 0x80);
00175 (*outbytesleft) -= 2;
00176 }
00177 else if (x <= 0xffff && *outbytesleft >= 3)
00178 {
00179 *outp++ = (unsigned char) ((x >> 12) | 0xe0);
00180 *outp++ = (unsigned char) (((x >> 6) & 0x3f) | 0x80);
00181 *outp++ = (unsigned char) ((x & 0x3f) | 0x80);
00182 (*outbytesleft) -= 3;
00183 }
00184 else if (x <= 0x1fffff && *outbytesleft >= 4)
00185 {
00186 *outp++ = (unsigned char) ((x >> 18) | 0xf0);
00187 *outp++ = (unsigned char) (((x >> 12) & 0x3f) | 0x80);
00188 *outp++ = (unsigned char) (((x >> 6) & 0x3f) | 0x80);
00189 *outp++ = (unsigned char) ((x & 0x3f) | 0x80);
00190 (*outbytesleft) -= 4;
00191 }
00192 else if (x <= 0x3ffffff && *outbytesleft >= 5)
00193 {
00194 *outp++ = (unsigned char) ((x >> 24) | 0xf8);
00195 *outp++ = (unsigned char) (((x >> 18) & 0x3f) | 0x80);
00196 *outp++ = (unsigned char) (((x >> 12) & 0x3f) | 0x80);
00197 *outp++ = (unsigned char) (((x >> 6) & 0x3f) | 0x80);
00198 *outp++ = (unsigned char) ((x & 0x3f) | 0x80);
00199 (*outbytesleft) -= 5;
00200 }
00201 else if (*outbytesleft >= 6)
00202 {
00203 *outp++ = (unsigned char) ((x >> 30) | 0xfc);
00204 *outp++ = (unsigned char) (((x >> 24) & 0x3f) | 0x80);
00205 *outp++ = (unsigned char) (((x >> 18) & 0x3f) | 0x80);
00206 *outp++ = (unsigned char) (((x >> 12) & 0x3f) | 0x80);
00207 *outp++ = (unsigned char) (((x >> 6) & 0x3f) | 0x80);
00208 *outp++ = (unsigned char) ((x & 0x3f) | 0x80);
00209 (*outbytesleft) -= 6;
00210 }
00211 else
00212 {
00213 *error = YAZ_ICONV_E2BIG;
00214 return (size_t)(-1);
00215 }
00216 *outbuf = (char *) outp;
00217 return 0;
00218 }
00219
00220 yaz_iconv_encoder_t yaz_utf8_encoder(const char *tocode,
00221 yaz_iconv_encoder_t e)
00222
00223 {
00224 if (!yaz_matchstr(tocode, "UTF8"))
00225 {
00226 e->write_handle = write_UTF8;
00227 return e;
00228 }
00229 return 0;
00230 }
00231
00232 yaz_iconv_decoder_t yaz_utf8_decoder(const char *fromcode,
00233 yaz_iconv_decoder_t d)
00234 {
00235 if (!yaz_matchstr(fromcode, "UTF8"))
00236 {
00237 d->init_handle = init_utf8;
00238 d->read_handle = read_utf8;
00239 return d;
00240 }
00241 return 0;
00242 }
00243
00244
00245
00246
00247
00248
00249
00250
00251