xref: /illumos-gate/usr/src/lib/libslp/javalib/com/sun/slp/Parser.java (revision bb5e3b2f129cc39517b925419c22f69a378ec023)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * ident	"%Z%%M%	%I%	%E% SMI"
24  *
25  * Copyright 2001,2003 Sun Microsystems, Inc.  All rights reserved.
26  * Use is subject to license terms.
27  *
28  */
29 
30 //  SCCS Status:      %W%	%G%
31 //  Parser.java:      LDAP Parser for those service stores that need it.
32 //  Author:           James Kempf
33 //  Created On:       Mon Apr 27 08:11:08 1998
34 //  Last Modified By: James Kempf
35 //  Last Modified On: Mon Mar  1 08:29:36 1999
36 //  Update Count:     45
37 //
38 
39 package com.sun.slp;
40 
41 import java.util.*;
42 import java.io.*;
43 
44 /**
45  * The Parser class implements LDAP query parsing for ServiceStoreInMemory.
46  * It is an internal class because it must know about the internal
47  * structure of the hashtables.
48  *
49  * @version %R%.%L% %D%
50  * @author James Kempf
51  */
52 
53 abstract class Parser extends Object {
54 
55     final private static char NONASCII_LOWER = '\u0080';
56     final private static char NONASCII_UPPER = '\uffff';
57 
58     final static char EQUAL = '=';
59     final static char LESS = '<';
60     final static char GREATER = '>';
61     private final static char STAR = '*';
62     final static char PRESENT = STAR;
63 
64     private final static char OPAREN = '(';
65     private final static char CPAREN = ')';
66     private final static char APPROX = '~';
67     private final static char NOT = '!';
68     private final static char AND = '&';
69     private final static char OR = '|';
70     private final static char SPACE = ' ';
71 
72     /**
73      * Record for returning stuff to the service store.
74      *
75      * @version %R%.%L% %D%
76      * @author James Kempf
77      */
78 
79     static final class ParserRecord extends Object {
80 
81 	Hashtable services = new Hashtable();
82 	Hashtable signatures = new Hashtable();
83 
84     }
85 
86 
87     /**
88      * The QueryEvaluator interface evaluates a term in a query, given
89      * the attribute id, the operator, the object, and whether the
90      * term is currently under negation from a not operator. Only those
91      * ServiceStore implemenations that want to use the Parser
92      * class to perform query parsing must provide this.
93      *
94      * @version %R%.%L% %D%
95      * @author James Kempf
96      */
97 
98     interface QueryEvaluator {
99 
100 	/**
101 	 * Evaluate the query, storing away the services that match.
102 	 *
103 	 * @param tag The attribute tag for the term.
104 	 * @param op The term operator.
105 	 * @param pattern the operand of the term.
106 	 * @param invert True if the results of the comparison should be
107 	 *		     inverted due to a not operator.
108 	 * @param returns Hashtable for the returns. The returns are
109 	 *		      structured exactly like the hashtable
110 	 *		      returned from findServices().
111 	 * @return True if the term matched, false if not.
112 	 */
113 
114 	boolean evaluate(AttributeString tag,
115 			 char op,
116 			 Object pattern,
117 			 boolean invert,
118 			 ParserRecord returns)
119 	    throws ServiceLocationException;
120 
121     }
122 
123     /**
124      * Parse a query and incrementally evaluate.
125      *
126      * @param urlLevel Hashtable of langlevel hashtables containing
127      *                 registrations for the service type and scope.
128      * @param query The query. Escapes have not yet been processed.
129      * @param ret   Vector for returned records.
130      * @param locale Locale in which to interpret query strings.
131      * @param ret ParserRecord in which to return the results.
132      */
133 
134     static void
135 	parseAndEvaluateQuery(String query,
136 			      Parser.QueryEvaluator ev,
137 			      Locale locale,
138 			      ParserRecord ret)
139 	throws ServiceLocationException {
140 
141 	// Create and initialize lexical analyzer.
142 
143 	StreamTokenizer tk = new StreamTokenizer(new StringReader(query));
144 
145 	tk.resetSyntax();  		 // make all chars ordinary...
146 	tk.wordChars('\177','\177');	 // treat controls as part of tokens
147 	tk.wordChars('\000', SPACE);
148 	tk.ordinaryChar(NOT);              // 'NOT' operator
149 	tk.wordChars('"', '%');
150 	tk.ordinaryChar(AND);              // 'AND' operator
151 	tk.wordChars('\'', '\'');
152 	tk.ordinaryChar(OPAREN);           // filter grouping
153 	tk.ordinaryChar(CPAREN);
154 	tk.ordinaryChar(STAR);             // present operator
155 	tk.wordChars('+', '{');
156 	tk.ordinaryChar(OR);               // 'OR' operator
157 	tk.wordChars('}', '~');
158 	tk.ordinaryChar(EQUAL);            // comparision operator
159 	tk.ordinaryChar(LESS);             // less operator
160 	tk.ordinaryChar(GREATER);          // greater operator
161 	tk.ordinaryChar(APPROX);           // approx operator
162 
163 	// Begin parsing.
164 
165 	try {
166 	    ParserRecord rec = parseFilter(tk, ev, locale, false, true);
167 
168 	    // Throw exception if anything occurs after the
169 	    //  parsed expression.
170 
171 	    if (tk.nextToken() != StreamTokenizer.TT_EOF) {
172 		throw
173 		    new ServiceLocationException(
174 				ServiceLocationException.PARSE_ERROR,
175 				"par_char_closing",
176 				new Object[] {query});
177 
178 	    }
179 
180 	    // Merge in returns. Use OR operator so all returned
181 	    //  values are merged in.
182 
183 	    mergeQueryReturns(ret, rec, OR);
184 
185 	} catch (IOException ex) {
186 	    throw
187 		new ServiceLocationException(
188 				ServiceLocationException.PARSE_ERROR,
189 				"par_syn_err",
190 				new Object[] {query});
191 
192 	}
193     }
194 
195     //
196     // Routines for dealing with parse returns record.
197     //
198 
199     // Merge source to target. The target has already
200     //  been precharged with ones that must match
201     //  if the op is AND. If it's OR, then simply
202     //  stuff them in.
203 
204     private static boolean
205 	mergeQueryReturns(ParserRecord target,
206 			  ParserRecord source,
207 			  char op) {
208 	Hashtable targetServices = target.services;
209 	Hashtable sourceServices = source.services;
210 	boolean eval;
211 
212 	if (op == AND) {
213 	    eval = mergeTablesWithAnd(targetServices, sourceServices);
214 
215 	} else {
216 	    eval = mergeTablesWithOr(targetServices, sourceServices);
217 
218 	}
219 
220 	Hashtable targetSigs = target.signatures;
221 	Hashtable sourceSigs = source.signatures;
222 
223 	if (op == AND) {
224 	    mergeTablesWithAnd(targetSigs, sourceSigs);
225 
226 	} else {
227 	    mergeTablesWithOr(targetSigs, sourceSigs);
228 
229 	}
230 
231 	return eval;
232     }
233 
234 
235     // Merge tables by removing anything from target that isn't in source.
236 
237     private static boolean mergeTablesWithAnd(Hashtable target,
238 					      Hashtable source) {
239 
240 	Enumeration en = target.keys();
241 
242 	// Remove any from target that aren't in source.
243 
244 	while (en.hasMoreElements()) {
245 	    Object tkey = en.nextElement();
246 
247 	    if (source.get(tkey) == null) {
248 		target.remove(tkey);
249 
250 	    }
251 	}
252 
253 	// If there's nothing left, return false to indicate no further
254 	//  evaluation needed.
255 
256 	if (target.size() <= 0) {
257 	    return false;
258 
259 	}
260 
261 	return true;
262     }
263 
264     // Merge tables by adding everything from source into target.
265 
266     private static boolean mergeTablesWithOr(Hashtable target,
267 					     Hashtable source) {
268 
269 	Enumeration en = source.keys();
270 
271 	while (en.hasMoreElements()) {
272 	    Object skey = en.nextElement();
273 
274 	    target.put(skey, source.get(skey));
275 
276 	}
277 
278 	return true;
279     }
280 
281     //
282     // Parsing for various productions.
283     //
284 
285 
286     // Parse the filter production.
287 
288     private static ParserRecord
289 	parseFilter(StreamTokenizer tk,
290 		    Parser.QueryEvaluator ev,
291 		    Locale locale,
292 		    boolean invert,
293 		    boolean eval)
294 	throws ServiceLocationException, IOException {
295 
296 	ParserRecord ret = null;
297 	int tok = tk.nextToken();
298 
299 	// Check for opening paren.
300 
301 	if (tok != OPAREN) {
302 	    throw
303 		new ServiceLocationException(
304 				ServiceLocationException.PARSE_ERROR,
305 				"par_init_par",
306 				new Object[0]);
307 
308 	}
309 
310 	// Parse inside.
311 
312 	tok = tk.nextToken();
313 
314 	// Check for a logical operator.
315 
316 	if (tok == AND || tok == OR) {
317 	    ret = parseFilterlist(tk, ev, locale, (char)tok, invert, eval);
318 
319 	} else if (tok == NOT) {
320 	    ret =  parseFilter(tk, ev, locale, !invert, eval);
321 
322 	} else if (tok == StreamTokenizer.TT_WORD) {
323 	    tk.pushBack();
324 	    ret =  parseItem(tk, ev, locale, invert, eval);
325 
326 	} else {
327 
328 	    // Since we've covered the ASCII character set, the only other
329 	    //  thing that could be here is a nonASCII character. We push it
330 	    //  back and deal with it in parseItem().
331 
332 	    tk.pushBack();
333 	    ret = parseItem(tk, ev, locale, invert, eval);
334 
335 	}
336 
337 	tok = tk.nextToken();
338 
339 	// Check for closing paren.
340 
341 	if (tok != CPAREN) {
342 	    throw
343 		new ServiceLocationException(
344 				ServiceLocationException.PARSE_ERROR,
345 				"par_final_par",
346 				new Object[0]);
347 
348 	}
349 
350 	return ret;
351     }
352 
353     // Parse a filterlist production.
354 
355     private static ParserRecord
356 	parseFilterlist(StreamTokenizer tk,
357 			Parser.QueryEvaluator ev,
358 			Locale locale,
359 			char op,
360 			boolean invert,
361 			boolean eval)
362 	throws ServiceLocationException, IOException {
363 	boolean match;
364 
365 	ParserRecord mrex = null;
366 
367 	// Parse through the list of filters.
368 
369 	do {
370 	    ParserRecord prex = null;
371 
372 	    if (op == AND) {
373 
374 		prex = parseFilter(tk, ev, locale, invert, eval);
375 
376 	    } else {
377 
378 		prex = parseFilter(tk, ev, locale, invert, eval);
379 
380 	    }
381 
382 	    // We need to start off with something.
383 
384 	    if (mrex == null) {
385 		mrex = prex;
386 
387 	    } else {
388 
389 		// Merge in returns.
390 
391 		eval = mergeQueryReturns(mrex, prex, op);
392 
393 	    }
394 
395 	    // Look for ending paren.
396 
397 	    int tok = tk.nextToken();
398 	    tk.pushBack();
399 
400 	    if (tok == CPAREN) {
401 
402 		return mrex;
403 
404 	    }
405 
406 	} while (true);
407 
408     }
409 
410     // Parse item.
411 
412     private static ParserRecord
413 	parseItem(StreamTokenizer tk,
414 		  Parser.QueryEvaluator ev,
415 		  Locale locale,
416 		  boolean invert,
417 		  boolean eval)
418 	throws ServiceLocationException, IOException {
419 
420 	ParserRecord prex = new ParserRecord();
421 	AttributeString attr = parseAttr(tk, locale);
422 	char op = parseOp(tk);
423 	Object value = null;
424 
425 	// If operator is PRESENT, then check whether
426 	//  it's not really a wildcarded value. If the next
427 	//  token isn't a closing paren, then it's
428 	//  a wildcarded value.
429 
430 	if (op == PRESENT) {
431 	    int tok = tk.nextToken();
432 
433 	    tk.pushBack();  // ...in any event...
434 
435 	    if ((char)tok != CPAREN) { // It's a wildcarded pattern...
436 		op = EQUAL;
437 		value = parseValue(tk, locale);
438 
439 		// Need to convert to a wildcarded pattern. Regardless
440 		//  of type, since wildcard makes the type be a
441 		//  string.
442 
443 		value =
444 		    new AttributePattern(PRESENT + value.toString(), locale);
445 
446 	    }
447 	} else {
448 	    value = parseValue(tk, locale);
449 
450 	}
451 
452 	// Check for inappropriate pattern.
453 
454 	if (value instanceof AttributePattern &&
455 	    ((AttributePattern)value).isWildcarded() &&
456 	    op != EQUAL) {
457 	    throw
458 		new ServiceLocationException(
459 				ServiceLocationException.PARSE_ERROR,
460 				"par_wild_op",
461 				new Object[] {new Character(op)});
462 
463 	}
464 
465 	// Check for inappropriate boolean.
466 
467 	if ((value instanceof Boolean ||
468 	    value instanceof Opaque) &&
469 	    (op == GREATER || op == LESS)) {
470 	    throw
471 		new ServiceLocationException(
472 				ServiceLocationException.PARSE_ERROR,
473 				"par_bool_op",
474 				new Object[] {new Character(op)});
475 
476 	}
477 
478 	// Check for wrong operator with keyword.
479 
480 	if ((value == null || value.toString().length() <= 0) &&
481 	    op != PRESENT) {
482 	    throw
483 		new ServiceLocationException(
484 				ServiceLocationException.PARSE_ERROR,
485 				"par_key_op",
486 				new Object[] {new Character(op)});
487 	}
488 
489 	if (eval) {
490 	    /*
491 	     * Try and evaluate the query. If the evaluation failed and the
492 	     * value was an Integer or Boolean try again after converting the
493 	     * value to a String. This is because the value in the query will
494 	     * be converted to an Integer or Boolean in preference to a String
495 	     * even though the query starts out as a String.  Hence when an
496 	     * attribute is registered with a String value that can equally be
497 	     * parsed as a valid Integer or Boolean value the String will
498 	     * almost always be parsed as an Integer or Boolean. This results
499 	     * in the failing of the initial type check when performing the
500 	     * query. By converting the value to a String there is another shot
501 	     * at fulfulling the query.
502 	     */
503 	    if (!ev.evaluate(attr, op, value, invert, prex) &&
504 		    !(value instanceof AttributeString)) {
505 		ev.evaluate(attr,
506 			    op,
507 			    new AttributeString(
508 				value.toString().trim(),
509 				locale),
510 			    invert,
511 			    prex);
512 	    }
513 
514 	}
515 
516 	return prex;
517     }
518 
519     // Parse attribute tag.
520 
521     private static AttributeString parseAttr(StreamTokenizer tk, Locale locale)
522 	throws ServiceLocationException, IOException {
523 
524 	String str  = parsePotentialNonASCII(tk);
525 
526 	str =
527 	    ServiceLocationAttribute.unescapeAttributeString(str, true);
528 
529 	return new AttributeString(str, locale);
530     }
531 
532     // Parse attribute operator.
533 
534     private static char parseOp(StreamTokenizer tk)
535 	throws ServiceLocationException, IOException {
536 
537 	int tok = tk.nextToken();
538 
539 	// Identify operator
540 
541 	switch (tok) {
542 
543 	case EQUAL:
544 
545 	    // Is it present?
546 
547 	    tok = tk.nextToken();
548 
549 	    if (tok == STAR) {
550 		return PRESENT;
551 
552 	    } else {
553 		tk.pushBack();
554 		return EQUAL;
555 
556 	    }
557 
558 	case APPROX: case GREATER: case LESS:
559 
560 	    // Need equals.
561 
562 	    if (tk.nextToken() != EQUAL) {
563 		break;
564 
565 	    }
566 
567 	    if (tok == APPROX) {
568 		tok = EQUAL;
569 
570 	    }
571 
572 	    return (char)tok;
573 
574 	default:
575 	    break;
576 
577 	}
578 
579 	throw
580 	    new ServiceLocationException(
581 				ServiceLocationException.PARSE_ERROR,
582 				"par_comp_op",
583 				new Object[0]);
584 
585     }
586 
587     // Parse expression value.
588 
589     private static Object parseValue(StreamTokenizer tk, Locale locale)
590 	throws ServiceLocationException, IOException {
591 
592 	StringBuffer buf = new StringBuffer();
593 
594 	// Parse until the next closing paren.
595 
596 	do {
597 	    int tok = tk.nextToken();
598 
599 	    if (tok == CPAREN) {
600 		tk.pushBack();
601 
602 		Object o =
603 		    ServiceLocationAttribute.evaluate(buf.toString().trim());
604 
605 		if (o instanceof String) {
606 		    o = new AttributePattern((String)o, locale);
607 
608 		} else if (o instanceof byte[]) {
609 		    o = new Opaque((byte[])o);
610 
611 		}
612 
613 		return o;
614 
615 	    } else if (tok != StreamTokenizer.TT_EOF) {
616 
617 		if (tok == StreamTokenizer.TT_WORD) {
618 		    buf.append(tk.sval);
619 
620 		} else if (tok == StreamTokenizer.TT_NUMBER) {
621 		    Assert.slpassert(false,
622 				  "par_ntok",
623 				  new Object[0]);
624 
625 		} else {
626 		    buf.append((char)tok);
627 
628 		}
629 
630 	    } else {
631 		throw
632 		    new ServiceLocationException(
633 				ServiceLocationException.PARSE_ERROR,
634 				"par_qend",
635 				new Object[0]);
636 	    }
637 	} while (true);
638 
639     }
640 
641     // NonASCII characters may be in the string. StreamTokenizer
642     //  can't handle them as part of words, so we need to resort to
643     //  this loop to handle it.
644 
645     private static String parsePotentialNonASCII(StreamTokenizer tk)
646 	throws IOException {
647 
648 	StringBuffer buf = new StringBuffer();
649 
650 	do {
651 
652 	    int tok = tk.nextToken();
653 
654 	    if (tok == StreamTokenizer.TT_WORD) {
655 		buf.append(tk.sval);
656 
657 	    } else if (((char)tok >= NONASCII_LOWER) &&
658 		       ((char)tok <= NONASCII_UPPER)) {
659 		buf.append((char)tok);
660 
661 	    } else {
662 		tk.pushBack();
663 		break;
664 
665 	    }
666 
667 	} while (true);
668 
669 	return buf.toString();
670     }
671 }
672