00001
00002
00003
00004
00010 #include <string.h>
00011 #include <stdlib.h>
00012 #include <ctype.h>
00013
00014 #include "cclp.h"
00015
00016
00017
00018
00019
00020
00021
00022
00023 static int token_cmp(CCL_parser cclp, const char **kw, struct ccl_token *token)
00024 {
00025 const char **aliases;
00026 int case_sensitive = cclp->ccl_case_sensitive;
00027 int i;
00028
00029 aliases = ccl_qual_search_special(cclp->bibset, "case");
00030 if (aliases)
00031 case_sensitive = atoi(aliases[0]);
00032
00033 for (i = 0; kw[i]; i++)
00034 {
00035 if (token->len == strlen(kw[i]))
00036 {
00037 if (case_sensitive)
00038 {
00039 if (!memcmp(kw[i], token->name, token->len))
00040 return 1;
00041 }
00042 else
00043 {
00044 if (!ccl_memicmp(kw[i], token->name, token->len))
00045 return 1;
00046 }
00047 }
00048 }
00049 return 0;
00050 }
00051
00052
00053
00054
00055
00056 struct ccl_token *ccl_parser_tokenize(CCL_parser cclp, const char *command)
00057 {
00058 const char **aliases;
00059 const unsigned char *cp = (const unsigned char *) command;
00060 struct ccl_token *first = NULL;
00061 struct ccl_token *last = NULL;
00062 cclp->start_pos = command;
00063
00064 while (1)
00065 {
00066 const unsigned char *cp0 = cp;
00067 while (*cp && strchr(" \t\r\n", *cp))
00068 cp++;
00069 if (!first)
00070 {
00071 first = last = (struct ccl_token *)xmalloc(sizeof(*first));
00072 ccl_assert(first);
00073 last->prev = NULL;
00074 }
00075 else
00076 {
00077 last->next = (struct ccl_token *)xmalloc(sizeof(*first));
00078 ccl_assert(last->next);
00079 last->next->prev = last;
00080 last = last->next;
00081 }
00082 last->ws_prefix_buf = (const char *) cp0;
00083 last->ws_prefix_len = cp - cp0;
00084 last->next = NULL;
00085 last->name = (const char *) cp;
00086 last->len = 1;
00087 switch (*cp++)
00088 {
00089 case '\0':
00090 last->kind = CCL_TOK_EOL;
00091 return first;
00092 case '(':
00093 last->kind = CCL_TOK_LP;
00094 break;
00095 case ')':
00096 last->kind = CCL_TOK_RP;
00097 break;
00098 case ',':
00099 last->kind = CCL_TOK_COMMA;
00100 break;
00101 case '%':
00102 case '!':
00103 last->kind = CCL_TOK_PROX;
00104 while (isdigit(*cp))
00105 {
00106 ++ last->len;
00107 cp++;
00108 }
00109 break;
00110 case '>':
00111 case '<':
00112 case '=':
00113 if (*cp == '=' || *cp == '<' || *cp == '>')
00114 {
00115 cp++;
00116 last->kind = CCL_TOK_REL;
00117 ++ last->len;
00118 }
00119 else if (cp[-1] == '=')
00120 last->kind = CCL_TOK_EQ;
00121 else
00122 last->kind = CCL_TOK_REL;
00123 break;
00124 case '\"':
00125 last->kind = CCL_TOK_TERM;
00126 last->name = (const char *) cp;
00127 last->len = 0;
00128 while (*cp && *cp != '\"')
00129 {
00130 cp++;
00131 ++ last->len;
00132 }
00133 if (*cp == '\"')
00134 cp++;
00135 break;
00136 default:
00137 if (!strchr("(),%!><= \t\n\r", cp[-1]))
00138 {
00139 while (*cp && !strchr("(),%!><= \t\n\r", *cp))
00140 {
00141 cp++;
00142 ++ last->len;
00143 }
00144 }
00145 last->kind = CCL_TOK_TERM;
00146
00147 aliases = ccl_qual_search_special(cclp->bibset, "and");
00148 if (!aliases)
00149 aliases = cclp->ccl_token_and;
00150 if (token_cmp(cclp, aliases, last))
00151 last->kind = CCL_TOK_AND;
00152
00153 aliases = ccl_qual_search_special(cclp->bibset, "or");
00154 if (!aliases)
00155 aliases = cclp->ccl_token_or;
00156 if (token_cmp(cclp, aliases, last))
00157 last->kind = CCL_TOK_OR;
00158
00159 aliases = ccl_qual_search_special(cclp->bibset, "not");
00160 if (!aliases)
00161 aliases = cclp->ccl_token_not;
00162 if (token_cmp(cclp, aliases, last))
00163 last->kind = CCL_TOK_NOT;
00164
00165 aliases = ccl_qual_search_special(cclp->bibset, "set");
00166 if (!aliases)
00167 aliases = cclp->ccl_token_set;
00168
00169 if (token_cmp(cclp, aliases, last))
00170 last->kind = CCL_TOK_SET;
00171 }
00172 }
00173 return first;
00174 }
00175
00176 struct ccl_token *ccl_token_add(struct ccl_token *at)
00177 {
00178 struct ccl_token *n = (struct ccl_token *)xmalloc(sizeof(*n));
00179 ccl_assert(n);
00180 n->next = at->next;
00181 n->prev = at;
00182 at->next = n;
00183 if (n->next)
00184 n->next->prev = n;
00185
00186 n->kind = CCL_TOK_TERM;
00187 n->name = 0;
00188 n->len = 0;
00189 n->ws_prefix_buf = 0;
00190 n->ws_prefix_len = 0;
00191 return n;
00192 }
00193
00194
00195
00196
00197 void ccl_token_del(struct ccl_token *list)
00198 {
00199 struct ccl_token *list1;
00200
00201 while (list)
00202 {
00203 list1 = list->next;
00204 xfree(list);
00205 list = list1;
00206 }
00207 }
00208
00209 static const char **create_ar(const char *v1, const char *v2)
00210 {
00211 const char **a = (const char **) xmalloc(3 * sizeof(*a));
00212 a[0] = xstrdup(v1);
00213 if (v2)
00214 {
00215 a[1] = xstrdup(v2);
00216 a[2] = 0;
00217 }
00218 else
00219 a[1] = 0;
00220 return a;
00221 }
00222
00223 static void destroy_ar(const char **a)
00224 {
00225 if (a)
00226 {
00227 int i;
00228 for (i = 0; a[i]; i++)
00229 xfree((char *) a[i]);
00230 xfree((char **)a);
00231 }
00232 }
00233
00234 CCL_parser ccl_parser_create(CCL_bibset bibset)
00235 {
00236 CCL_parser p = (CCL_parser)xmalloc(sizeof(*p));
00237 if (!p)
00238 return p;
00239 p->look_token = NULL;
00240 p->error_code = 0;
00241 p->error_pos = NULL;
00242 p->bibset = bibset;
00243
00244 p->ccl_token_and = create_ar("and", 0);
00245 p->ccl_token_or = create_ar("or", 0);
00246 p->ccl_token_not = create_ar("not", "andnot");
00247 p->ccl_token_set = create_ar("set", 0);
00248 p->ccl_case_sensitive = 1;
00249
00250 return p;
00251 }
00252
00253 void ccl_parser_destroy(CCL_parser p)
00254 {
00255 if (!p)
00256 return;
00257 destroy_ar(p->ccl_token_and);
00258 destroy_ar(p->ccl_token_or);
00259 destroy_ar(p->ccl_token_not);
00260 destroy_ar(p->ccl_token_set);
00261 xfree(p);
00262 }
00263
00264 void ccl_parser_set_case(CCL_parser p, int case_sensitivity_flag)
00265 {
00266 if (p)
00267 p->ccl_case_sensitive = case_sensitivity_flag;
00268 }
00269
00270 int ccl_parser_get_error(CCL_parser cclp, int *pos)
00271 {
00272 if (pos && cclp->error_code)
00273 *pos = cclp->error_pos - cclp->start_pos;
00274 return cclp->error_code;
00275 }
00276
00277
00278
00279
00280
00281
00282
00283
00284