00001
00002
00003
00004
00013 #if HAVE_CONFIG_H
00014 #include <config.h>
00015 #endif
00016
00017 #include <assert.h>
00018 #include <errno.h>
00019 #include <string.h>
00020 #include <ctype.h>
00021
00022 #include <yaz/xmalloc.h>
00023 #include "iconv-p.h"
00024
00025 struct decoder_data {
00026 int g0_mode;
00027 int g1_mode;
00028
00029 int comb_offset;
00030 int comb_size;
00031 unsigned long comb_x[8];
00032 size_t comb_no_read[8];
00033 };
00034
00035 yaz_conv_func_t yaz_marc8_42_conv;
00036 yaz_conv_func_t yaz_marc8_45_conv;
00037 yaz_conv_func_t yaz_marc8_67_conv;
00038 yaz_conv_func_t yaz_marc8_62_conv;
00039 yaz_conv_func_t yaz_marc8_70_conv;
00040 yaz_conv_func_t yaz_marc8_32_conv;
00041 yaz_conv_func_t yaz_marc8_4E_conv;
00042 yaz_conv_func_t yaz_marc8_51_conv;
00043 yaz_conv_func_t yaz_marc8_33_conv;
00044 yaz_conv_func_t yaz_marc8_34_conv;
00045 yaz_conv_func_t yaz_marc8_53_conv;
00046 yaz_conv_func_t yaz_marc8_31_conv;
00047
00048
00049 static unsigned long yaz_read_marc8_comb(yaz_iconv_t cd,
00050 struct decoder_data *data,
00051 unsigned char *inp,
00052 size_t inbytesleft, size_t *no_read,
00053 int *comb);
00054
00055 static unsigned long read_marc8(yaz_iconv_t cd, yaz_iconv_decoder_t d,
00056 unsigned char *inp,
00057 size_t inbytesleft, size_t *no_read)
00058 {
00059 struct decoder_data *data = (struct decoder_data *) d->data;
00060 unsigned long x;
00061 if (data->comb_offset < data->comb_size)
00062 {
00063 *no_read = data->comb_no_read[data->comb_offset];
00064 x = data->comb_x[data->comb_offset];
00065
00066
00067
00068
00069
00070
00071
00072
00073
00074
00075
00076
00077
00078 #if 0
00079 if (x == 0x0361 || x == 0x0360)
00080 *no_read += 1;
00081 #endif
00082 data->comb_offset++;
00083 return x;
00084 }
00085
00086 data->comb_offset = 0;
00087 for (data->comb_size = 0; data->comb_size < 8; data->comb_size++)
00088 {
00089 int comb = 0;
00090
00091 if (inbytesleft == 0 && data->comb_size)
00092 {
00093 yaz_iconv_set_errno(cd, YAZ_ICONV_EINVAL);
00094 x = 0;
00095 *no_read = 0;
00096 break;
00097 }
00098 x = yaz_read_marc8_comb(cd, data, inp, inbytesleft, no_read, &comb);
00099 if (!comb || !x)
00100 break;
00101 data->comb_x[data->comb_size] = x;
00102 data->comb_no_read[data->comb_size] = *no_read;
00103 inp += *no_read;
00104 inbytesleft = inbytesleft - *no_read;
00105 }
00106 return x;
00107 }
00108
00109 static unsigned long read_marc8s(yaz_iconv_t cd, yaz_iconv_decoder_t d,
00110 unsigned char *inp,
00111 size_t inbytesleft, size_t *no_read)
00112 {
00113 struct decoder_data *data = (struct decoder_data *) d->data;
00114 unsigned long x = read_marc8(cd, d, inp, inbytesleft, no_read);
00115 if (x && data->comb_size == 1)
00116 {
00117 if (yaz_iso_8859_1_lookup_x12(x, data->comb_x[0], &x))
00118 {
00119 *no_read += data->comb_no_read[0];
00120 data->comb_size = 0;
00121 }
00122 }
00123 return x;
00124 }
00125
00126 static unsigned long yaz_read_marc8_comb(yaz_iconv_t cd,
00127 struct decoder_data *data,
00128 unsigned char *inp,
00129 size_t inbytesleft, size_t *no_read,
00130 int *comb)
00131 {
00132 *no_read = 0;
00133 while (inbytesleft > 0 && *inp == 27)
00134 {
00135 int *modep = &data->g0_mode;
00136 size_t inbytesleft0 = inbytesleft;
00137
00138 inbytesleft--;
00139 inp++;
00140 if (inbytesleft == 0)
00141 goto incomplete;
00142 if (*inp == '$')
00143 {
00144 inbytesleft--;
00145 inp++;
00146 }
00147 if (inbytesleft == 0)
00148 goto incomplete;
00149 if (*inp == '(' || *inp == ',')
00150 {
00151 inbytesleft--;
00152 inp++;
00153 }
00154 else if (*inp == ')' || *inp == '-')
00155 {
00156 inbytesleft--;
00157 inp++;
00158 modep = &data->g1_mode;
00159 }
00160 if (inbytesleft == 0)
00161 goto incomplete;
00162 if (*inp == '!')
00163 {
00164 inbytesleft--;
00165 inp++;
00166 }
00167 if (inbytesleft == 0)
00168 goto incomplete;
00169 *modep = *inp++;
00170 inbytesleft--;
00171
00172 (*no_read) += inbytesleft0 - inbytesleft;
00173 }
00174 if (inbytesleft == 0)
00175 return 0;
00176 else if (*inp == ' ')
00177 {
00178 *no_read += 1;
00179 return ' ';
00180 }
00181 else
00182 {
00183 unsigned long x;
00184 size_t no_read_sub = 0;
00185 int mode = *inp < 128 ? data->g0_mode : data->g1_mode;
00186 *comb = 0;
00187
00188 switch(mode)
00189 {
00190 case 'B':
00191 case 's':
00192 x = yaz_marc8_42_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
00193 break;
00194 case 'E':
00195 x = yaz_marc8_45_conv(inp, inbytesleft, &no_read_sub, comb, 127, 128);
00196 break;
00197 case 'g':
00198 x = yaz_marc8_67_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
00199 break;
00200 case 'b':
00201 x = yaz_marc8_62_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
00202 break;
00203 case 'p':
00204 x = yaz_marc8_70_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
00205 break;
00206 case '2':
00207 x = yaz_marc8_32_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
00208 break;
00209 case 'N':
00210 x = yaz_marc8_4E_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
00211 break;
00212 case 'Q':
00213 x = yaz_marc8_51_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
00214 break;
00215 case '3':
00216 x = yaz_marc8_33_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
00217 break;
00218 case '4':
00219 x = yaz_marc8_34_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
00220 break;
00221 case 'S':
00222 x = yaz_marc8_53_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
00223 break;
00224 case '1':
00225 x = yaz_marc8_31_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
00226 break;
00227 default:
00228 *no_read = 0;
00229 yaz_iconv_set_errno(cd, YAZ_ICONV_EILSEQ);
00230 return 0;
00231 }
00232 *no_read += no_read_sub;
00233 return x;
00234 }
00235 incomplete:
00236 *no_read = 0;
00237 yaz_iconv_set_errno(cd, YAZ_ICONV_EINVAL);
00238 return 0;
00239 }
00240
00241
00242 static size_t init_marc8(yaz_iconv_t cd, yaz_iconv_decoder_t d,
00243 unsigned char *inp,
00244 size_t inbytesleft, size_t *no_read)
00245 {
00246 struct decoder_data *data = (struct decoder_data *) d->data;
00247 data->g0_mode = 'B';
00248 data->g1_mode = 'E';
00249 data->comb_offset = data->comb_size = 0;
00250 return 0;
00251 }
00252
00253 void destroy_marc8(yaz_iconv_decoder_t d)
00254 {
00255 struct decoder_data *data = (struct decoder_data *) d->data;
00256 xfree(data);
00257 }
00258
00259 yaz_iconv_decoder_t yaz_marc8_decoder(const char *fromcode,
00260 yaz_iconv_decoder_t d)
00261 {
00262 if (!yaz_matchstr(fromcode, "MARC8") || !yaz_matchstr(fromcode, "ANSEL"))
00263 d->read_handle = read_marc8;
00264 else if (!yaz_matchstr(fromcode, "MARC8s"))
00265 d->read_handle = read_marc8s;
00266 else
00267 return 0;
00268 {
00269 struct decoder_data *data = (struct decoder_data *)
00270 xmalloc(sizeof(*data));
00271 d->data = data;
00272 d->init_handle = init_marc8;
00273 d->destroy_handle = destroy_marc8;
00274 }
00275 return d;
00276 }
00277
00278
00279
00280
00281
00282
00283
00284
00285