xref: /illumos-gate/usr/src/lib/libslp/javalib/com/sun/slp/Parser.java (revision 55fea89dcaa64928bed4327112404dcb3e07b79f)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2001,2003 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  *
25  */
26 
27 //  Parser.java:      LDAP Parser for those service stores that need it.
28 //  Author:           James Kempf
29 //  Created On:       Mon Apr 27 08:11:08 1998
30 //  Last Modified By: James Kempf
31 //  Last Modified On: Mon Mar  1 08:29:36 1999
32 //  Update Count:     45
33 //
34 
35 package com.sun.slp;
36 
37 import java.util.*;
38 import java.io.*;
39 
40 /**
41  * The Parser class implements LDAP query parsing for ServiceStoreInMemory.
42  * It is an internal class because it must know about the internal
43  * structure of the hashtables.
44  *
45  * @author James Kempf
46  */
47 
48 abstract class Parser extends Object {
49 
50     final private static char NONASCII_LOWER = '\u0080';
51     final private static char NONASCII_UPPER = '\uffff';
52 
53     final static char EQUAL = '=';
54     final static char LESS = '<';
55     final static char GREATER = '>';
56     private final static char STAR = '*';
57     final static char PRESENT = STAR;
58 
59     private final static char OPAREN = '(';
60     private final static char CPAREN = ')';
61     private final static char APPROX = '~';
62     private final static char NOT = '!';
63     private final static char AND = '&';
64     private final static char OR = '|';
65     private final static char SPACE = ' ';
66 
67     /**
68      * Record for returning stuff to the service store.
69      *
70      * @author James Kempf
71      */
72 
73     static final class ParserRecord extends Object {
74 
75 	Hashtable services = new Hashtable();
76 	Hashtable signatures = new Hashtable();
77 
78     }
79 
80 
81     /**
82      * The QueryEvaluator interface evaluates a term in a query, given
83      * the attribute id, the operator, the object, and whether the
84      * term is currently under negation from a not operator. Only those
85      * ServiceStore implemenations that want to use the Parser
86      * class to perform query parsing must provide this.
87      *
88      * @author James Kempf
89      */
90 
91     interface QueryEvaluator {
92 
93 	/**
94 	 * Evaluate the query, storing away the services that match.
95 	 *
96 	 * @param tag The attribute tag for the term.
97 	 * @param op The term operator.
98 	 * @param pattern the operand of the term.
99 	 * @param invert True if the results of the comparison should be
100 	 *		     inverted due to a not operator.
101 	 * @param returns Hashtable for the returns. The returns are
102 	 *		      structured exactly like the hashtable
103 	 *		      returned from findServices().
104 	 * @return True if the term matched, false if not.
105 	 */
106 
evaluate(AttributeString tag, char op, Object pattern, boolean invert, ParserRecord returns)107 	boolean evaluate(AttributeString tag,
108 			 char op,
109 			 Object pattern,
110 			 boolean invert,
111 			 ParserRecord returns)
112 	    throws ServiceLocationException;
113 
114     }
115 
116     /**
117      * Parse a query and incrementally evaluate.
118      *
119      * @param urlLevel Hashtable of langlevel hashtables containing
120      *                 registrations for the service type and scope.
121      * @param query The query. Escapes have not yet been processed.
122      * @param ret   Vector for returned records.
123      * @param locale Locale in which to interpret query strings.
124      * @param ret ParserRecord in which to return the results.
125      */
126 
127     static void
parseAndEvaluateQuery(String query, Parser.QueryEvaluator ev, Locale locale, ParserRecord ret)128 	parseAndEvaluateQuery(String query,
129 			      Parser.QueryEvaluator ev,
130 			      Locale locale,
131 			      ParserRecord ret)
132 	throws ServiceLocationException {
133 
134 	// Create and initialize lexical analyzer.
135 
136 	StreamTokenizer tk = new StreamTokenizer(new StringReader(query));
137 
138 	tk.resetSyntax();  		 // make all chars ordinary...
139 	tk.wordChars('\177','\177');	 // treat controls as part of tokens
140 	tk.wordChars('\000', SPACE);
141 	tk.ordinaryChar(NOT);              // 'NOT' operator
142 	tk.wordChars('"', '%');
143 	tk.ordinaryChar(AND);              // 'AND' operator
144 	tk.wordChars('\'', '\'');
145 	tk.ordinaryChar(OPAREN);           // filter grouping
146 	tk.ordinaryChar(CPAREN);
147 	tk.ordinaryChar(STAR);             // present operator
148 	tk.wordChars('+', '{');
149 	tk.ordinaryChar(OR);               // 'OR' operator
150 	tk.wordChars('}', '~');
151 	tk.ordinaryChar(EQUAL);            // comparision operator
152 	tk.ordinaryChar(LESS);             // less operator
153 	tk.ordinaryChar(GREATER);          // greater operator
154 	tk.ordinaryChar(APPROX);           // approx operator
155 
156 	// Begin parsing.
157 
158 	try {
159 	    ParserRecord rec = parseFilter(tk, ev, locale, false, true);
160 
161 	    // Throw exception if anything occurs after the
162 	    //  parsed expression.
163 
164 	    if (tk.nextToken() != StreamTokenizer.TT_EOF) {
165 		throw
166 		    new ServiceLocationException(
167 				ServiceLocationException.PARSE_ERROR,
168 				"par_char_closing",
169 				new Object[] {query});
170 
171 	    }
172 
173 	    // Merge in returns. Use OR operator so all returned
174 	    //  values are merged in.
175 
176 	    mergeQueryReturns(ret, rec, OR);
177 
178 	} catch (IOException ex) {
179 	    throw
180 		new ServiceLocationException(
181 				ServiceLocationException.PARSE_ERROR,
182 				"par_syn_err",
183 				new Object[] {query});
184 
185 	}
186     }
187 
188     //
189     // Routines for dealing with parse returns record.
190     //
191 
192     // Merge source to target. The target has already
193     //  been precharged with ones that must match
194     //  if the op is AND. If it's OR, then simply
195     //  stuff them in.
196 
197     private static boolean
mergeQueryReturns(ParserRecord target, ParserRecord source, char op)198 	mergeQueryReturns(ParserRecord target,
199 			  ParserRecord source,
200 			  char op) {
201 	Hashtable targetServices = target.services;
202 	Hashtable sourceServices = source.services;
203 	boolean eval;
204 
205 	if (op == AND) {
206 	    eval = mergeTablesWithAnd(targetServices, sourceServices);
207 
208 	} else {
209 	    eval = mergeTablesWithOr(targetServices, sourceServices);
210 
211 	}
212 
213 	Hashtable targetSigs = target.signatures;
214 	Hashtable sourceSigs = source.signatures;
215 
216 	if (op == AND) {
217 	    mergeTablesWithAnd(targetSigs, sourceSigs);
218 
219 	} else {
220 	    mergeTablesWithOr(targetSigs, sourceSigs);
221 
222 	}
223 
224 	return eval;
225     }
226 
227 
228     // Merge tables by removing anything from target that isn't in source.
229 
mergeTablesWithAnd(Hashtable target, Hashtable source)230     private static boolean mergeTablesWithAnd(Hashtable target,
231 					      Hashtable source) {
232 
233 	Enumeration en = target.keys();
234 
235 	// Remove any from target that aren't in source.
236 
237 	while (en.hasMoreElements()) {
238 	    Object tkey = en.nextElement();
239 
240 	    if (source.get(tkey) == null) {
241 		target.remove(tkey);
242 
243 	    }
244 	}
245 
246 	// If there's nothing left, return false to indicate no further
247 	//  evaluation needed.
248 
249 	if (target.size() <= 0) {
250 	    return false;
251 
252 	}
253 
254 	return true;
255     }
256 
257     // Merge tables by adding everything from source into target.
258 
mergeTablesWithOr(Hashtable target, Hashtable source)259     private static boolean mergeTablesWithOr(Hashtable target,
260 					     Hashtable source) {
261 
262 	Enumeration en = source.keys();
263 
264 	while (en.hasMoreElements()) {
265 	    Object skey = en.nextElement();
266 
267 	    target.put(skey, source.get(skey));
268 
269 	}
270 
271 	return true;
272     }
273 
274     //
275     // Parsing for various productions.
276     //
277 
278 
279     // Parse the filter production.
280 
281     private static ParserRecord
parseFilter(StreamTokenizer tk, Parser.QueryEvaluator ev, Locale locale, boolean invert, boolean eval)282 	parseFilter(StreamTokenizer tk,
283 		    Parser.QueryEvaluator ev,
284 		    Locale locale,
285 		    boolean invert,
286 		    boolean eval)
287 	throws ServiceLocationException, IOException {
288 
289 	ParserRecord ret = null;
290 	int tok = tk.nextToken();
291 
292 	// Check for opening paren.
293 
294 	if (tok != OPAREN) {
295 	    throw
296 		new ServiceLocationException(
297 				ServiceLocationException.PARSE_ERROR,
298 				"par_init_par",
299 				new Object[0]);
300 
301 	}
302 
303 	// Parse inside.
304 
305 	tok = tk.nextToken();
306 
307 	// Check for a logical operator.
308 
309 	if (tok == AND || tok == OR) {
310 	    ret = parseFilterlist(tk, ev, locale, (char)tok, invert, eval);
311 
312 	} else if (tok == NOT) {
313 	    ret =  parseFilter(tk, ev, locale, !invert, eval);
314 
315 	} else if (tok == StreamTokenizer.TT_WORD) {
316 	    tk.pushBack();
317 	    ret =  parseItem(tk, ev, locale, invert, eval);
318 
319 	} else {
320 
321 	    // Since we've covered the ASCII character set, the only other
322 	    //  thing that could be here is a nonASCII character. We push it
323 	    //  back and deal with it in parseItem().
324 
325 	    tk.pushBack();
326 	    ret = parseItem(tk, ev, locale, invert, eval);
327 
328 	}
329 
330 	tok = tk.nextToken();
331 
332 	// Check for closing paren.
333 
334 	if (tok != CPAREN) {
335 	    throw
336 		new ServiceLocationException(
337 				ServiceLocationException.PARSE_ERROR,
338 				"par_final_par",
339 				new Object[0]);
340 
341 	}
342 
343 	return ret;
344     }
345 
346     // Parse a filterlist production.
347 
348     private static ParserRecord
parseFilterlist(StreamTokenizer tk, Parser.QueryEvaluator ev, Locale locale, char op, boolean invert, boolean eval)349 	parseFilterlist(StreamTokenizer tk,
350 			Parser.QueryEvaluator ev,
351 			Locale locale,
352 			char op,
353 			boolean invert,
354 			boolean eval)
355 	throws ServiceLocationException, IOException {
356 	boolean match;
357 
358 	ParserRecord mrex = null;
359 
360 	// Parse through the list of filters.
361 
362 	do {
363 	    ParserRecord prex = null;
364 
365 	    if (op == AND) {
366 
367 		prex = parseFilter(tk, ev, locale, invert, eval);
368 
369 	    } else {
370 
371 		prex = parseFilter(tk, ev, locale, invert, eval);
372 
373 	    }
374 
375 	    // We need to start off with something.
376 
377 	    if (mrex == null) {
378 		mrex = prex;
379 
380 	    } else {
381 
382 		// Merge in returns.
383 
384 		eval = mergeQueryReturns(mrex, prex, op);
385 
386 	    }
387 
388 	    // Look for ending paren.
389 
390 	    int tok = tk.nextToken();
391 	    tk.pushBack();
392 
393 	    if (tok == CPAREN) {
394 
395 		return mrex;
396 
397 	    }
398 
399 	} while (true);
400 
401     }
402 
403     // Parse item.
404 
405     private static ParserRecord
parseItem(StreamTokenizer tk, Parser.QueryEvaluator ev, Locale locale, boolean invert, boolean eval)406 	parseItem(StreamTokenizer tk,
407 		  Parser.QueryEvaluator ev,
408 		  Locale locale,
409 		  boolean invert,
410 		  boolean eval)
411 	throws ServiceLocationException, IOException {
412 
413 	ParserRecord prex = new ParserRecord();
414 	AttributeString attr = parseAttr(tk, locale);
415 	char op = parseOp(tk);
416 	Object value = null;
417 
418 	// If operator is PRESENT, then check whether
419 	//  it's not really a wildcarded value. If the next
420 	//  token isn't a closing paren, then it's
421 	//  a wildcarded value.
422 
423 	if (op == PRESENT) {
424 	    int tok = tk.nextToken();
425 
426 	    tk.pushBack();  // ...in any event...
427 
428 	    if ((char)tok != CPAREN) { // It's a wildcarded pattern...
429 		op = EQUAL;
430 		value = parseValue(tk, locale);
431 
432 		// Need to convert to a wildcarded pattern. Regardless
433 		//  of type, since wildcard makes the type be a
434 		//  string.
435 
436 		value =
437 		    new AttributePattern(PRESENT + value.toString(), locale);
438 
439 	    }
440 	} else {
441 	    value = parseValue(tk, locale);
442 
443 	}
444 
445 	// Check for inappropriate pattern.
446 
447 	if (value instanceof AttributePattern &&
448 	    ((AttributePattern)value).isWildcarded() &&
449 	    op != EQUAL) {
450 	    throw
451 		new ServiceLocationException(
452 				ServiceLocationException.PARSE_ERROR,
453 				"par_wild_op",
454 				new Object[] {new Character(op)});
455 
456 	}
457 
458 	// Check for inappropriate boolean.
459 
460 	if ((value instanceof Boolean ||
461 	    value instanceof Opaque) &&
462 	    (op == GREATER || op == LESS)) {
463 	    throw
464 		new ServiceLocationException(
465 				ServiceLocationException.PARSE_ERROR,
466 				"par_bool_op",
467 				new Object[] {new Character(op)});
468 
469 	}
470 
471 	// Check for wrong operator with keyword.
472 
473 	if ((value == null || value.toString().length() <= 0) &&
474 	    op != PRESENT) {
475 	    throw
476 		new ServiceLocationException(
477 				ServiceLocationException.PARSE_ERROR,
478 				"par_key_op",
479 				new Object[] {new Character(op)});
480 	}
481 
482 	if (eval) {
483 	    /*
484 	     * Try and evaluate the query. If the evaluation failed and the
485 	     * value was an Integer or Boolean try again after converting the
486 	     * value to a String. This is because the value in the query will
487 	     * be converted to an Integer or Boolean in preference to a String
488 	     * even though the query starts out as a String.  Hence when an
489 	     * attribute is registered with a String value that can equally be
490 	     * parsed as a valid Integer or Boolean value the String will
491 	     * almost always be parsed as an Integer or Boolean. This results
492 	     * in the failing of the initial type check when performing the
493 	     * query. By converting the value to a String there is another shot
494 	     * at fulfulling the query.
495 	     */
496 	    if (!ev.evaluate(attr, op, value, invert, prex) &&
497 		    !(value instanceof AttributeString)) {
498 		ev.evaluate(attr,
499 			    op,
500 			    new AttributeString(
501 				value.toString().trim(),
502 				locale),
503 			    invert,
504 			    prex);
505 	    }
506 
507 	}
508 
509 	return prex;
510     }
511 
512     // Parse attribute tag.
513 
parseAttr(StreamTokenizer tk, Locale locale)514     private static AttributeString parseAttr(StreamTokenizer tk, Locale locale)
515 	throws ServiceLocationException, IOException {
516 
517 	String str  = parsePotentialNonASCII(tk);
518 
519 	str =
520 	    ServiceLocationAttribute.unescapeAttributeString(str, true);
521 
522 	return new AttributeString(str, locale);
523     }
524 
525     // Parse attribute operator.
526 
parseOp(StreamTokenizer tk)527     private static char parseOp(StreamTokenizer tk)
528 	throws ServiceLocationException, IOException {
529 
530 	int tok = tk.nextToken();
531 
532 	// Identify operator
533 
534 	switch (tok) {
535 
536 	case EQUAL:
537 
538 	    // Is it present?
539 
540 	    tok = tk.nextToken();
541 
542 	    if (tok == STAR) {
543 		return PRESENT;
544 
545 	    } else {
546 		tk.pushBack();
547 		return EQUAL;
548 
549 	    }
550 
551 	case APPROX: case GREATER: case LESS:
552 
553 	    // Need equals.
554 
555 	    if (tk.nextToken() != EQUAL) {
556 		break;
557 
558 	    }
559 
560 	    if (tok == APPROX) {
561 		tok = EQUAL;
562 
563 	    }
564 
565 	    return (char)tok;
566 
567 	default:
568 	    break;
569 
570 	}
571 
572 	throw
573 	    new ServiceLocationException(
574 				ServiceLocationException.PARSE_ERROR,
575 				"par_comp_op",
576 				new Object[0]);
577 
578     }
579 
580     // Parse expression value.
581 
parseValue(StreamTokenizer tk, Locale locale)582     private static Object parseValue(StreamTokenizer tk, Locale locale)
583 	throws ServiceLocationException, IOException {
584 
585 	StringBuffer buf = new StringBuffer();
586 
587 	// Parse until the next closing paren.
588 
589 	do {
590 	    int tok = tk.nextToken();
591 
592 	    if (tok == CPAREN) {
593 		tk.pushBack();
594 
595 		Object o =
596 		    ServiceLocationAttribute.evaluate(buf.toString().trim());
597 
598 		if (o instanceof String) {
599 		    o = new AttributePattern((String)o, locale);
600 
601 		} else if (o instanceof byte[]) {
602 		    o = new Opaque((byte[])o);
603 
604 		}
605 
606 		return o;
607 
608 	    } else if (tok != StreamTokenizer.TT_EOF) {
609 
610 		if (tok == StreamTokenizer.TT_WORD) {
611 		    buf.append(tk.sval);
612 
613 		} else if (tok == StreamTokenizer.TT_NUMBER) {
614 		    Assert.slpassert(false,
615 				  "par_ntok",
616 				  new Object[0]);
617 
618 		} else {
619 		    buf.append((char)tok);
620 
621 		}
622 
623 	    } else {
624 		throw
625 		    new ServiceLocationException(
626 				ServiceLocationException.PARSE_ERROR,
627 				"par_qend",
628 				new Object[0]);
629 	    }
630 	} while (true);
631 
632     }
633 
634     // NonASCII characters may be in the string. StreamTokenizer
635     //  can't handle them as part of words, so we need to resort to
636     //  this loop to handle it.
637 
parsePotentialNonASCII(StreamTokenizer tk)638     private static String parsePotentialNonASCII(StreamTokenizer tk)
639 	throws IOException {
640 
641 	StringBuffer buf = new StringBuffer();
642 
643 	do {
644 
645 	    int tok = tk.nextToken();
646 
647 	    if (tok == StreamTokenizer.TT_WORD) {
648 		buf.append(tk.sval);
649 
650 	    } else if (((char)tok >= NONASCII_LOWER) &&
651 		       ((char)tok <= NONASCII_UPPER)) {
652 		buf.append((char)tok);
653 
654 	    } else {
655 		tk.pushBack();
656 		break;
657 
658 	    }
659 
660 	} while (true);
661 
662 	return buf.toString();
663     }
664 }
665