xref: /illumos-gate/usr/src/lib/libslp/javalib/com/sun/slp/Parser.java (revision d87d03b4c0f66bf125e607ef8b0d9c5481040d20)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2001,2003 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  *
25  */
26 
27 //  Parser.java:      LDAP Parser for those service stores that need it.
28 //  Author:           James Kempf
29 //  Created On:       Mon Apr 27 08:11:08 1998
30 //  Last Modified By: James Kempf
31 //  Last Modified On: Mon Mar  1 08:29:36 1999
32 //  Update Count:     45
33 //
34 
35 package com.sun.slp;
36 
37 import java.util.*;
38 import java.io.*;
39 
40 /**
41  * The Parser class implements LDAP query parsing for ServiceStoreInMemory.
42  * It is an internal class because it must know about the internal
43  * structure of the hashtables.
44  *
45  * @author James Kempf
46  */
47 
48 abstract class Parser extends Object {
49 
50     final private static char NONASCII_LOWER = '\u0080';
51     final private static char NONASCII_UPPER = '\uffff';
52 
53     final static char EQUAL = '=';
54     final static char LESS = '<';
55     final static char GREATER = '>';
56     private final static char STAR = '*';
57     final static char PRESENT = STAR;
58 
59     private final static char OPAREN = '(';
60     private final static char CPAREN = ')';
61     private final static char APPROX = '~';
62     private final static char NOT = '!';
63     private final static char AND = '&';
64     private final static char OR = '|';
65     private final static char SPACE = ' ';
66 
67     /**
68      * Record for returning stuff to the service store.
69      *
70      * @author James Kempf
71      */
72 
73     static final class ParserRecord extends Object {
74 
75 	Hashtable services = new Hashtable();
76 	Hashtable signatures = new Hashtable();
77 
78     }
79 
80 
81     /**
82      * The QueryEvaluator interface evaluates a term in a query, given
83      * the attribute id, the operator, the object, and whether the
84      * term is currently under negation from a not operator. Only those
85      * ServiceStore implemenations that want to use the Parser
86      * class to perform query parsing must provide this.
87      *
88      * @author James Kempf
89      */
90 
91     interface QueryEvaluator {
92 
93 	/**
94 	 * Evaluate the query, storing away the services that match.
95 	 *
96 	 * @param tag The attribute tag for the term.
97 	 * @param op The term operator.
98 	 * @param pattern the operand of the term.
99 	 * @param invert True if the results of the comparison should be
100 	 *		     inverted due to a not operator.
101 	 * @param returns Hashtable for the returns. The returns are
102 	 *		      structured exactly like the hashtable
103 	 *		      returned from findServices().
104 	 * @return True if the term matched, false if not.
105 	 */
106 
107 	boolean evaluate(AttributeString tag,
108 			 char op,
109 			 Object pattern,
110 			 boolean invert,
111 			 ParserRecord returns)
112 	    throws ServiceLocationException;
113 
114     }
115 
116     /**
117      * Parse a query and incrementally evaluate.
118      *
119      * @param urlLevel Hashtable of langlevel hashtables containing
120      *                 registrations for the service type and scope.
121      * @param query The query. Escapes have not yet been processed.
122      * @param ret   Vector for returned records.
123      * @param locale Locale in which to interpret query strings.
124      * @param ret ParserRecord in which to return the results.
125      */
126 
127     static void
128 	parseAndEvaluateQuery(String query,
129 			      Parser.QueryEvaluator ev,
130 			      Locale locale,
131 			      ParserRecord ret)
132 	throws ServiceLocationException {
133 
134 	// Create and initialize lexical analyzer.
135 
136 	StreamTokenizer tk = new StreamTokenizer(new StringReader(query));
137 
138 	tk.resetSyntax();  		 // make all chars ordinary...
139 	tk.wordChars('\177','\177');	 // treat controls as part of tokens
140 	tk.wordChars('\000', SPACE);
141 	tk.ordinaryChar(NOT);              // 'NOT' operator
142 	tk.wordChars('"', '%');
143 	tk.ordinaryChar(AND);              // 'AND' operator
144 	tk.wordChars('\'', '\'');
145 	tk.ordinaryChar(OPAREN);           // filter grouping
146 	tk.ordinaryChar(CPAREN);
147 	tk.ordinaryChar(STAR);             // present operator
148 	tk.wordChars('+', '{');
149 	tk.ordinaryChar(OR);               // 'OR' operator
150 	tk.wordChars('}', '~');
151 	tk.ordinaryChar(EQUAL);            // comparision operator
152 	tk.ordinaryChar(LESS);             // less operator
153 	tk.ordinaryChar(GREATER);          // greater operator
154 	tk.ordinaryChar(APPROX);           // approx operator
155 
156 	// Begin parsing.
157 
158 	try {
159 	    ParserRecord rec = parseFilter(tk, ev, locale, false, true);
160 
161 	    // Throw exception if anything occurs after the
162 	    //  parsed expression.
163 
164 	    if (tk.nextToken() != StreamTokenizer.TT_EOF) {
165 		throw
166 		    new ServiceLocationException(
167 				ServiceLocationException.PARSE_ERROR,
168 				"par_char_closing",
169 				new Object[] {query});
170 
171 	    }
172 
173 	    // Merge in returns. Use OR operator so all returned
174 	    //  values are merged in.
175 
176 	    mergeQueryReturns(ret, rec, OR);
177 
178 	} catch (IOException ex) {
179 	    throw
180 		new ServiceLocationException(
181 				ServiceLocationException.PARSE_ERROR,
182 				"par_syn_err",
183 				new Object[] {query});
184 
185 	}
186     }
187 
188     //
189     // Routines for dealing with parse returns record.
190     //
191 
192     // Merge source to target. The target has already
193     //  been precharged with ones that must match
194     //  if the op is AND. If it's OR, then simply
195     //  stuff them in.
196 
197     private static boolean
198 	mergeQueryReturns(ParserRecord target,
199 			  ParserRecord source,
200 			  char op) {
201 	Hashtable targetServices = target.services;
202 	Hashtable sourceServices = source.services;
203 	boolean eval;
204 
205 	if (op == AND) {
206 	    eval = mergeTablesWithAnd(targetServices, sourceServices);
207 
208 	} else {
209 	    eval = mergeTablesWithOr(targetServices, sourceServices);
210 
211 	}
212 
213 	Hashtable targetSigs = target.signatures;
214 	Hashtable sourceSigs = source.signatures;
215 
216 	if (op == AND) {
217 	    mergeTablesWithAnd(targetSigs, sourceSigs);
218 
219 	} else {
220 	    mergeTablesWithOr(targetSigs, sourceSigs);
221 
222 	}
223 
224 	return eval;
225     }
226 
227 
228     // Merge tables by removing anything from target that isn't in source.
229 
230     private static boolean mergeTablesWithAnd(Hashtable target,
231 					      Hashtable source) {
232 
233 	Enumeration en = target.keys();
234 
235 	// Remove any from target that aren't in source.
236 
237 	while (en.hasMoreElements()) {
238 	    Object tkey = en.nextElement();
239 
240 	    if (source.get(tkey) == null) {
241 		target.remove(tkey);
242 
243 	    }
244 	}
245 
246 	// If there's nothing left, return false to indicate no further
247 	//  evaluation needed.
248 
249 	if (target.size() <= 0) {
250 	    return false;
251 
252 	}
253 
254 	return true;
255     }
256 
257     // Merge tables by adding everything from source into target.
258 
259     private static boolean mergeTablesWithOr(Hashtable target,
260 					     Hashtable source) {
261 
262 	Enumeration en = source.keys();
263 
264 	while (en.hasMoreElements()) {
265 	    Object skey = en.nextElement();
266 
267 	    target.put(skey, source.get(skey));
268 
269 	}
270 
271 	return true;
272     }
273 
274     //
275     // Parsing for various productions.
276     //
277 
278 
279     // Parse the filter production.
280 
281     private static ParserRecord
282 	parseFilter(StreamTokenizer tk,
283 		    Parser.QueryEvaluator ev,
284 		    Locale locale,
285 		    boolean invert,
286 		    boolean eval)
287 	throws ServiceLocationException, IOException {
288 
289 	ParserRecord ret = null;
290 	int tok = tk.nextToken();
291 
292 	// Check for opening paren.
293 
294 	if (tok != OPAREN) {
295 	    throw
296 		new ServiceLocationException(
297 				ServiceLocationException.PARSE_ERROR,
298 				"par_init_par",
299 				new Object[0]);
300 
301 	}
302 
303 	// Parse inside.
304 
305 	tok = tk.nextToken();
306 
307 	// Check for a logical operator.
308 
309 	if (tok == AND || tok == OR) {
310 	    ret = parseFilterlist(tk, ev, locale, (char)tok, invert, eval);
311 
312 	} else if (tok == NOT) {
313 	    ret =  parseFilter(tk, ev, locale, !invert, eval);
314 
315 	} else if (tok == StreamTokenizer.TT_WORD) {
316 	    tk.pushBack();
317 	    ret =  parseItem(tk, ev, locale, invert, eval);
318 
319 	} else {
320 
321 	    // Since we've covered the ASCII character set, the only other
322 	    //  thing that could be here is a nonASCII character. We push it
323 	    //  back and deal with it in parseItem().
324 
325 	    tk.pushBack();
326 	    ret = parseItem(tk, ev, locale, invert, eval);
327 
328 	}
329 
330 	tok = tk.nextToken();
331 
332 	// Check for closing paren.
333 
334 	if (tok != CPAREN) {
335 	    throw
336 		new ServiceLocationException(
337 				ServiceLocationException.PARSE_ERROR,
338 				"par_final_par",
339 				new Object[0]);
340 
341 	}
342 
343 	return ret;
344     }
345 
346     // Parse a filterlist production.
347 
348     private static ParserRecord
349 	parseFilterlist(StreamTokenizer tk,
350 			Parser.QueryEvaluator ev,
351 			Locale locale,
352 			char op,
353 			boolean invert,
354 			boolean eval)
355 	throws ServiceLocationException, IOException {
356 	boolean match;
357 
358 	ParserRecord mrex = null;
359 
360 	// Parse through the list of filters.
361 
362 	do {
363 	    ParserRecord prex = null;
364 
365 	    if (op == AND) {
366 
367 		prex = parseFilter(tk, ev, locale, invert, eval);
368 
369 	    } else {
370 
371 		prex = parseFilter(tk, ev, locale, invert, eval);
372 
373 	    }
374 
375 	    // We need to start off with something.
376 
377 	    if (mrex == null) {
378 		mrex = prex;
379 
380 	    } else {
381 
382 		// Merge in returns.
383 
384 		eval = mergeQueryReturns(mrex, prex, op);
385 
386 	    }
387 
388 	    // Look for ending paren.
389 
390 	    int tok = tk.nextToken();
391 	    tk.pushBack();
392 
393 	    if (tok == CPAREN) {
394 
395 		return mrex;
396 
397 	    }
398 
399 	} while (true);
400 
401     }
402 
403     // Parse item.
404 
405     private static ParserRecord
406 	parseItem(StreamTokenizer tk,
407 		  Parser.QueryEvaluator ev,
408 		  Locale locale,
409 		  boolean invert,
410 		  boolean eval)
411 	throws ServiceLocationException, IOException {
412 
413 	ParserRecord prex = new ParserRecord();
414 	AttributeString attr = parseAttr(tk, locale);
415 	char op = parseOp(tk);
416 	Object value = null;
417 
418 	// If operator is PRESENT, then check whether
419 	//  it's not really a wildcarded value. If the next
420 	//  token isn't a closing paren, then it's
421 	//  a wildcarded value.
422 
423 	if (op == PRESENT) {
424 	    int tok = tk.nextToken();
425 
426 	    tk.pushBack();  // ...in any event...
427 
428 	    if ((char)tok != CPAREN) { // It's a wildcarded pattern...
429 		op = EQUAL;
430 		value = parseValue(tk, locale);
431 
432 		// Need to convert to a wildcarded pattern. Regardless
433 		//  of type, since wildcard makes the type be a
434 		//  string.
435 
436 		value =
437 		    new AttributePattern(PRESENT + value.toString(), locale);
438 
439 	    }
440 	} else {
441 	    value = parseValue(tk, locale);
442 
443 	}
444 
445 	// Check for inappropriate pattern.
446 
447 	if (value instanceof AttributePattern &&
448 	    ((AttributePattern)value).isWildcarded() &&
449 	    op != EQUAL) {
450 	    throw
451 		new ServiceLocationException(
452 				ServiceLocationException.PARSE_ERROR,
453 				"par_wild_op",
454 				new Object[] {new Character(op)});
455 
456 	}
457 
458 	// Check for inappropriate boolean.
459 
460 	if ((value instanceof Boolean ||
461 	    value instanceof Opaque) &&
462 	    (op == GREATER || op == LESS)) {
463 	    throw
464 		new ServiceLocationException(
465 				ServiceLocationException.PARSE_ERROR,
466 				"par_bool_op",
467 				new Object[] {new Character(op)});
468 
469 	}
470 
471 	// Check for wrong operator with keyword.
472 
473 	if ((value == null || value.toString().length() <= 0) &&
474 	    op != PRESENT) {
475 	    throw
476 		new ServiceLocationException(
477 				ServiceLocationException.PARSE_ERROR,
478 				"par_key_op",
479 				new Object[] {new Character(op)});
480 	}
481 
482 	if (eval) {
483 	    /*
484 	     * Try and evaluate the query. If the evaluation failed and the
485 	     * value was an Integer or Boolean try again after converting the
486 	     * value to a String. This is because the value in the query will
487 	     * be converted to an Integer or Boolean in preference to a String
488 	     * even though the query starts out as a String.  Hence when an
489 	     * attribute is registered with a String value that can equally be
490 	     * parsed as a valid Integer or Boolean value the String will
491 	     * almost always be parsed as an Integer or Boolean. This results
492 	     * in the failing of the initial type check when performing the
493 	     * query. By converting the value to a String there is another shot
494 	     * at fulfulling the query.
495 	     */
496 	    if (!ev.evaluate(attr, op, value, invert, prex) &&
497 		    !(value instanceof AttributeString)) {
498 		ev.evaluate(attr,
499 			    op,
500 			    new AttributeString(
501 				value.toString().trim(),
502 				locale),
503 			    invert,
504 			    prex);
505 	    }
506 
507 	}
508 
509 	return prex;
510     }
511 
512     // Parse attribute tag.
513 
514     private static AttributeString parseAttr(StreamTokenizer tk, Locale locale)
515 	throws ServiceLocationException, IOException {
516 
517 	String str  = parsePotentialNonASCII(tk);
518 
519 	str =
520 	    ServiceLocationAttribute.unescapeAttributeString(str, true);
521 
522 	return new AttributeString(str, locale);
523     }
524 
525     // Parse attribute operator.
526 
527     private static char parseOp(StreamTokenizer tk)
528 	throws ServiceLocationException, IOException {
529 
530 	int tok = tk.nextToken();
531 
532 	// Identify operator
533 
534 	switch (tok) {
535 
536 	case EQUAL:
537 
538 	    // Is it present?
539 
540 	    tok = tk.nextToken();
541 
542 	    if (tok == STAR) {
543 		return PRESENT;
544 
545 	    } else {
546 		tk.pushBack();
547 		return EQUAL;
548 
549 	    }
550 
551 	case APPROX: case GREATER: case LESS:
552 
553 	    // Need equals.
554 
555 	    if (tk.nextToken() != EQUAL) {
556 		break;
557 
558 	    }
559 
560 	    if (tok == APPROX) {
561 		tok = EQUAL;
562 
563 	    }
564 
565 	    return (char)tok;
566 
567 	default:
568 	    break;
569 
570 	}
571 
572 	throw
573 	    new ServiceLocationException(
574 				ServiceLocationException.PARSE_ERROR,
575 				"par_comp_op",
576 				new Object[0]);
577 
578     }
579 
580     // Parse expression value.
581 
582     private static Object parseValue(StreamTokenizer tk, Locale locale)
583 	throws ServiceLocationException, IOException {
584 
585 	StringBuffer buf = new StringBuffer();
586 
587 	// Parse until the next closing paren.
588 
589 	do {
590 	    int tok = tk.nextToken();
591 
592 	    if (tok == CPAREN) {
593 		tk.pushBack();
594 
595 		Object o =
596 		    ServiceLocationAttribute.evaluate(buf.toString().trim());
597 
598 		if (o instanceof String) {
599 		    o = new AttributePattern((String)o, locale);
600 
601 		} else if (o instanceof byte[]) {
602 		    o = new Opaque((byte[])o);
603 
604 		}
605 
606 		return o;
607 
608 	    } else if (tok != StreamTokenizer.TT_EOF) {
609 
610 		if (tok == StreamTokenizer.TT_WORD) {
611 		    buf.append(tk.sval);
612 
613 		} else if (tok == StreamTokenizer.TT_NUMBER) {
614 		    Assert.slpassert(false,
615 				  "par_ntok",
616 				  new Object[0]);
617 
618 		} else {
619 		    buf.append((char)tok);
620 
621 		}
622 
623 	    } else {
624 		throw
625 		    new ServiceLocationException(
626 				ServiceLocationException.PARSE_ERROR,
627 				"par_qend",
628 				new Object[0]);
629 	    }
630 	} while (true);
631 
632     }
633 
634     // NonASCII characters may be in the string. StreamTokenizer
635     //  can't handle them as part of words, so we need to resort to
636     //  this loop to handle it.
637 
638     private static String parsePotentialNonASCII(StreamTokenizer tk)
639 	throws IOException {
640 
641 	StringBuffer buf = new StringBuffer();
642 
643 	do {
644 
645 	    int tok = tk.nextToken();
646 
647 	    if (tok == StreamTokenizer.TT_WORD) {
648 		buf.append(tk.sval);
649 
650 	    } else if (((char)tok >= NONASCII_LOWER) &&
651 		       ((char)tok <= NONASCII_UPPER)) {
652 		buf.append((char)tok);
653 
654 	    } else {
655 		tk.pushBack();
656 		break;
657 
658 	    }
659 
660 	} while (true);
661 
662 	return buf.toString();
663     }
664 }
665