xref: /titanic_44/usr/src/uts/common/fs/sockfs/nl7chttp.c (revision 45916cd2fec6e79bca5dee0421bd39e3c2910d1e)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/sysmacros.h>
30 #include <sys/strsubr.h>
31 #include <sys/promif.h>
32 #include <fs/sockfs/nl7c.h>
33 #include <fs/sockfs/nl7curi.h>
34 
35 #include <inet/nca/ncadoorhdr.h>
36 #include <inet/nca/ncalogd.h>
37 
38 /*
39  * HTTP connection persistent headers, mblk_t's, and state values stored in
40  * (struct sonode *).so_nl7c_flags & NL7C_SCHEMEPRIV.
41  */
42 
43 char	Shttp_conn_cl[] = "Connection: close\r\n";
44 char	Shttp_conn_ka[] = "Connection: Keep-Alive\r\n";
45 
46 mblk_t	*http_conn_cl;
47 mblk_t	*http_conn_ka;
48 
49 #define	HTTP_CONN_CL	0x00010000
50 #define	HTTP_CONN_KA	0x00020000
51 
52 /*
53  * HTTP scheme private state:
54  */
55 
56 typedef struct http_s {
57 	boolean_t	parsed;		/* Response parsed */
58 	uint32_t	major, minor;	/* HTTP/major.minor */
59 	uint32_t	headlen;	/* HTTP header length */
60 	clock_t		date;		/* Response Date: */
61 	clock_t		expire;		/* Response Expire: */
62 	time_t		lastmod;	/* Response Last-Modified: */
63 	str_t		accept;		/* Request Accept: */
64 	str_t		acceptchar;	/* Request Accept-Charset: */
65 	str_t		acceptenco;	/* Request Accept-Encoding: */
66 	str_t		acceptlang;	/* Request Accept-Language: */
67 	str_t		etag;		/* Request/Response ETag: */
68 	str_t		uagent;		/* Request User-Agent: */
69 } http_t;
70 
71 static kmem_cache_t *http_kmc;
72 
73 /*
74  * HTTP parser action values:
75  */
76 
77 typedef enum act_e {
78 	REQUEST		= 0x0001,
79 	NUMERIC		= 0x0002,
80 	QUALIFIER	= 0x0004,
81 	PASS		= 0x0008,
82 	FILTER		= 0x0010,
83 	NOCACHE		= 0x0020,
84 	HASH		= 0x0040,
85 	DATE		= 0x0080,
86 	ETAG		= 0x0100,
87 	RESPONSE	= 0x0200,
88 	URIABS		= 0x0400,
89 	URIREL		= 0x0800
90 } act_t;
91 
92 #define	UNDEF		PASS
93 
94 /*
95  * HTTP parser token:
96  */
97 
98 typedef struct token_s {
99 	int	tokid;			/* Token ident */
100 	char	*text;			/* Token text */
101 	act_t	act;			/* Action to take */
102 } token_t;
103 
104 /*
105  * The ttree_t (or token tree) is an ascending ordered binary tree
106  * built by ttree_build() from an array of tokens and subsequently
107  * used by ttree_line_parse() to parse multiline text data.
108  */
109 typedef struct ttree_s {
110 	token_t *tok;			/* Token */
111 	struct ttree_s *lt, *gt;	/* < and > next node */
112 } ttree_t;
113 
114 /*
115  * Note: req_tree[] and res_tree[] must be in ascending case insensitive
116  * order of the char[] strings used to initialize each element.
117  *
118  * See "nl7ctokreq.txt" and "nl7ctokres.txt" which are processed by
119  * "nl7ctokgen" to produce "nl7ctokgen.h" and included here.
120  */
121 
122 #define	INIT(s, t) {s, S##s, t}
123 
124 #include "nl7ctokgen.h"
125 
126 static ttree_t *req_tree;
127 static ttree_t *res_tree;
128 
129 /*
130  * HTTP date routines:
131  */
132 
133 static char *dow[] = {"sunday", "monday", "tuesday", "wednesday", "thursday",
134 	"friday", "saturday", 0};
135 
136 static char *months[] = {"Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul",
137 	"Aug", "Sep", "Oct", "Nov", "Dec", 0};
138 
139 static int dom[] = {0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334};
140 
141 /*
142  * http_date2time_t(const char *) - returns the time(2) value (i.e.
143  * the value 0 is Thu, 01 Jan 1970 00:00:00 GMT) for the following
144  * time formats used by HTTP request and response headers:
145  *
146  *	1) Sun, 07 Dec 1998 14:49:37 GMT	; RFC 822, updated by RFC 1123
147  *	2) Sunday, 07-Dec-98 14:49:37 GMT	; RFC 850, obsoleted by RFC 1036
148  *	3) Sun Nov  7 14:49:37 1998		; ANSI C's asctime() format
149  *	4) 60					; Time delta of N seconds
150  *
151  * On error a time_t value of -1 is returned.
152  *
153  * All dates are GMT (must be part of the date string for types
154  * 1 and 2 and not for type 1).
155  *
156  * Note, the given mstr_t pointed to by *sp will be modified.
157  */
158 
159 static time_t
160 http_date2time_t(char *cp, char *ep)
161 {
162 	char	*scp = cp;
163 	time_t	secs;
164 	char	**tpp;
165 	char	*tp;
166 	char	c, sc;
167 	ssize_t	n;
168 
169 	ssize_t	zeroleap = 1970 / 4 - 1970 / 100 + 1970 / 400;
170 	ssize_t	leap;
171 	ssize_t	year;
172 	ssize_t	month;
173 	ssize_t	day;
174 	ssize_t	hour;
175 	ssize_t	min;
176 	ssize_t	sec;
177 
178 	/* Parse and skip day-of-week (we don't use it) */
179 	tpp = dow;
180 	tp = *tpp;
181 	n = 0;
182 	while (cp < ep) {
183 		c = *cp++;
184 		if (c == ',' || c == ' ')
185 			break;
186 		c = tolower(c);
187 		if (*tp == 0 || *tp != c) {
188 			cp = scp;
189 			if ((tp = *++tpp) == NULL)
190 				break;
191 			continue;
192 		}
193 		tp++;
194 	}
195 	if (cp == NULL) {
196 		/* Not case 1-3, try 4 */
197 		while (cp < ep) {
198 			c = *cp;
199 			if (isdigit(c)) {
200 				cp++;
201 				n *= 10;
202 				n += c - '0';
203 				continue;
204 			}
205 			/* An invalid date sytax */
206 			return (-1);
207 		}
208 		/* Case 4, delta from current time */
209 		return (gethrestime_sec() + n);
210 	}
211 	if (c == ',') {
212 		/* Case 1 or 2, skip <SP> */
213 		if (cp == ep)
214 			return (-1);
215 		c = *cp++;
216 		if (c != ' ')
217 			return (-1);
218 		/* Get day of the month */
219 		if (cp == ep)
220 			return (-1);
221 		c = *cp++;
222 		if (! isdigit(c))
223 			return (-1);
224 		n = c - '0';
225 		if (cp == ep)
226 			return (-1);
227 		c = *cp++;
228 		if (! isdigit(c))
229 			return (-1);
230 		n *= 10;
231 		n += c - '0';
232 		day = n;
233 		/* Get day/month/year seperator */
234 		if (cp == ep)
235 			return (-1);
236 		sc = *cp++;
237 		if (sc != ' ' && sc != '-')
238 			return (-1);
239 		/* Parse month */
240 		tpp = months;
241 		tp = *tpp++;
242 		scp = cp;
243 		n = 0;
244 		while (cp < ep) {
245 			c = *cp;
246 			if (c == sc) {
247 				cp++;
248 				break;
249 			}
250 			c = tolower(c);
251 			if (*tp == 0 || tolower(*tp) != c) {
252 				if ((tp = *tpp++) == NULL)
253 					break;
254 				cp = scp;
255 				n++;
256 				continue;
257 			}
258 			cp++;
259 			tp++;
260 		}
261 		if (cp == NULL)
262 			return (-1);
263 		month = n;
264 		/* Get year */
265 		if (cp == ep)
266 			return (-1);
267 		c = *cp++;
268 		if (! isdigit(c))
269 			return (-1);
270 		n = c - '0';
271 		if (cp == ep)
272 			return (-1);
273 		c = *cp++;
274 		if (! isdigit(c))
275 			return (-1);
276 		n *= 10;
277 		n += c - '0';
278 		if (cp == ep)
279 			return (-1);
280 		c = *cp++;
281 		if (sc == ' ') {
282 			/* Case 1, get 2 more year digits */
283 			if (! isdigit(c))
284 				return (-1);
285 			n *= 10;
286 			n += c - '0';
287 			if (cp == ep)
288 				return (-1);
289 			c = *cp++;
290 			if (! isdigit(c))
291 				return (-1);
292 			n *= 10;
293 			n += c - '0';
294 			/* Get seperator char */
295 			if (cp == ep)
296 				return (-1);
297 			c = *cp;
298 			if (c != ' ')
299 				return (-1);
300 			cp++;
301 		} else {
302 			/*
303 			 * Case 2, 2 digit year and as this is a so-called
304 			 * Unix date format and the begining of time was
305 			 * 1970 so we can extend this obsoleted date syntax
306 			 * past the year 1999 into the year 2038 for 32 bit
307 			 * machines and through 2069 for 64 bit machines.
308 			 */
309 			if (n > 69)
310 				n += 1900;
311 			else
312 				n += 2000;
313 		}
314 		year = n;
315 		/* Get GMT time */
316 		if (c != ' ')
317 			return (-1);
318 		if (cp == ep)
319 			return (-1);
320 		c = *cp++;
321 		if (! isdigit(c))
322 			return (-1);
323 		n = c - '0';
324 		if (cp == ep)
325 			return (-1);
326 		c = *cp++;
327 		if (! isdigit(c))
328 			return (-1);
329 		n *= 10;
330 		n += c - '0';
331 		hour = n;
332 		if (cp == ep)
333 			return (-1);
334 		c = *cp++;
335 		if (c != ':')
336 			return (-1);
337 		if (cp == ep)
338 			return (-1);
339 		c = *cp++;
340 		if (! isdigit(c))
341 			return (-1);
342 		n = c - '0';
343 		if (cp == ep)
344 			return (-1);
345 		c = *cp++;
346 		if (! isdigit(c))
347 			return (-1);
348 		n *= 10;
349 		n += c - '0';
350 		min = n;
351 		if (cp == ep)
352 			return (-1);
353 		c = *cp++;
354 		if (c != ':')
355 			return (-1);
356 		if (cp == ep)
357 			return (-1);
358 		c = *cp++;
359 		if (! isdigit(c))
360 			return (-1);
361 		n = c - '0';
362 		if (cp == ep)
363 			return (-1);
364 		c = *cp++;
365 		if (! isdigit(c))
366 			return (-1);
367 		n *= 10;
368 		n += c - '0';
369 		sec = n;
370 		if (cp == ep)
371 			return (-1);
372 		c = *cp++;
373 		if (c != ' ')
374 			return (-1);
375 		if (cp == ep)
376 			return (-1);
377 		c = *cp++;
378 		if (c != 'G')
379 			return (-1);
380 		if (cp == ep)
381 			return (-1);
382 		c = *cp++;
383 		if (c != 'M')
384 			return (-1);
385 		if (cp == ep)
386 			return (-1);
387 		c = *cp++;
388 		if (c != 'T')
389 			return (-1);
390 	} else {
391 		/* case 3, parse month */
392 		sc = c;
393 		tpp = months;
394 		tp = *tpp++;
395 		scp = cp;
396 		n = 0;
397 		while (cp < ep) {
398 			c = *cp;
399 			if (c == sc) {
400 				cp++;
401 				break;
402 			}
403 			c = tolower(c);
404 			if (*tp == 0 || tolower(*tp) != c) {
405 				if ((tp = *tpp++) == NULL)
406 					break;
407 				cp = scp;
408 				n++;
409 				continue;
410 			}
411 			cp++;
412 			tp++;
413 		}
414 		if (cp == NULL)
415 			return (-1);
416 		month = n;
417 		/* Get day of the month */
418 		if (cp == ep)
419 			return (-1);
420 		c = *cp++;
421 		if (! isdigit(c))
422 			return (-1);
423 		n = c - '0';
424 		if (cp == ep)
425 			return (-1);
426 		c = *cp++;
427 		if (! isdigit(c))
428 			return (-1);
429 		n *= 10;
430 		n += c - '0';
431 		day = n;
432 		/* Skip <SP> */
433 		if (cp == ep)
434 			return (-1);
435 		c = *cp++;
436 		if (c != ' ')
437 			return (-1);
438 		/* Get time */
439 		if (cp == ep)
440 			return (-1);
441 		c = *cp++;
442 		if (! isdigit(c))
443 			return (-1);
444 		n = c - '0';
445 		if (cp == ep)
446 			return (-1);
447 		c = *cp++;
448 		if (! isdigit(c))
449 			return (-1);
450 		n *= 10;
451 		n += c - '0';
452 		hour = n;
453 		if (cp == ep)
454 			return (-1);
455 		c = *cp++;
456 		if (c != ':')
457 			return (-1);
458 		if (cp == ep)
459 			return (-1);
460 		c = *cp++;
461 		if (! isdigit(c))
462 			return (-1);
463 		n = c - '0';
464 		if (cp == ep)
465 			return (-1);
466 		c = *cp++;
467 		if (! isdigit(c))
468 			return (-1);
469 		n *= 10;
470 		n += c - '0';
471 		min = n;
472 		if (cp == ep)
473 			return (-1);
474 		c = *cp++;
475 		if (c != ':')
476 			return (-1);
477 		if (cp == ep)
478 			return (-1);
479 		c = *cp++;
480 		if (! isdigit(c))
481 			return (-1);
482 		n = c - '0';
483 		if (cp == ep)
484 			return (-1);
485 		c = *cp++;
486 		if (! isdigit(c))
487 			return (-1);
488 		n *= 10;
489 		n += c - '0';
490 		sec = n;
491 		/* Skip <SP> */
492 		if (cp == ep)
493 			return (-1);
494 		c = *cp++;
495 		if (c != ' ')
496 			return (-1);
497 		/* Get year */
498 		if (cp == ep)
499 			return (-1);
500 		c = *cp++;
501 		if (! isdigit(c))
502 			return (-1);
503 		n = c - '0';
504 		if (cp == ep)
505 			return (-1);
506 		c = *cp++;
507 		if (! isdigit(c))
508 			return (-1);
509 		n *= 10;
510 		n += c - '0';
511 		if (cp == ep)
512 			return (-1);
513 		c = *cp++;
514 		if (! isdigit(c))
515 			return (-1);
516 		n *= 10;
517 		n += c - '0';
518 		if (cp == ep)
519 			return (-1);
520 		c = *cp++;
521 		if (! isdigit(c))
522 			return (-1);
523 		n *= 10;
524 		n += c - '0';
525 		year = n;
526 	}
527 
528 	/* Last, caclulate seconds since Unix day zero */
529 	leap = year;
530 	if (month < 2)
531 		leap--;
532 	leap = leap / 4 - leap / 100 + leap / 400 - zeroleap;
533 	secs = ((((year - 1970) * 365 + dom[month] + day  - 1 + leap) * 24
534 		+ hour) * 60 + min) * 60 + sec;
535 
536 	return (secs);
537 }
538 
539 /*
540  * Given the ttree_t pointer "*t", parse the char buffer pointed to
541  * by "**cpp" of multiline text data up to the pointer "**epp", the
542  * pointer "*hash" points to the current text hash.
543  *
544  * If a match is found a pointer to the ttree_t token will be returned,
545  * "**cpp" will point to the next line, "**epp" will point to the first
546  * EOL char, "**hpp" will point to remainder of the parse data (if none,
547  * **hpp == **epp), and "*hash" will be updated.
548  *
549  * If no match, as above except "**hpp" points to the begining of the
550  * line and "*hash" wont be updated.
551  *
552  * If no EOL is found NULL is returned, "**epp" is set to NULL, no further
553  * calls can be made until additional data is ready and all arguments are
554  * reset.
555  *
556  * If EOH (i.e. an empty line) NULL is returned, "**hpp" is set to NULL,
557  * *cpp points to past EOH, no further calls can be made.
558  */
559 
560 static token_t *
561 ttree_line_parse(ttree_t *t, char **cpp, char **epp, char **hpp, unsigned *hash)
562 {
563 	char	ca, cb;			/* current line <=> parse node */
564 
565 	char	*cp = *cpp;
566 	char	*ep = *epp;
567 	unsigned hv = *hash;		/* hash value */
568 
569 	char	*tp = t->tok->text;	/* current parse text */
570 	char	*sp = cp;		/* saved *cp */
571 
572 	int	parse;			/* parse state */
573 
574 	/* Special case, check for EOH (i.e. empty line) */
575 	if (cp < ep) {
576 		ca = *cp;
577 		if (ca == '\n') {
578 			/* End of header */
579 			*cpp = ++cp;
580 			*hpp = NULL;
581 			return (NULL);
582 		} else if (ca == '\r') {
583 			cp++;
584 			if (cp < ep) {
585 				ca = *cp;
586 				if (ca == '\n') {
587 					/* End of header */
588 					*cpp = ++cp;
589 					*hpp = NULL;
590 					return (NULL);
591 				}
592 			}
593 			cp = *cpp;
594 		}
595 	}
596 	while (cp < ep) {
597 		/* Get next parse text char */
598 		cb = *tp;
599 		if (cb != 0) {
600 			/* Get next current line char */
601 			ca = *cp++;
602 			if (ca == '\r' || ca == '\n') {
603 				/* EOL, always go less than */
604 				t = t->lt;
605 			} else {
606 				/* Case insensitive */
607 				cb = tolower(cb);
608 				ca = tolower(ca);
609 				if (ca == cb) {
610 					/* Char match, next char */
611 					tp++;
612 					continue;
613 				}
614 				if (ca < cb) {
615 					/* Go less than */
616 					t = t->lt;
617 				} else {
618 					/* Go greater than */
619 					t = t->gt;
620 				}
621 			}
622 			while (t != NULL && t->tok == NULL) {
623 				/* Null node, so descend to < node */
624 				t = t->lt;
625 			}
626 			if (t != NULL) {
627 				/* Initialize for next node compare */
628 				tp = t->tok->text;
629 				cp = sp;
630 				continue;
631 			}
632 			/*
633 			 * End of tree walk, no match, return pointer
634 			 * to the start of line then below find EOL.
635 			 */
636 			*hpp = *cpp;
637 		} else {
638 			/*
639 			 * End of token text, match, return pointer to
640 			 * the rest of header text then below find EOL.
641 			 */
642 			*hpp = cp;
643 		}
644 		/*
645 		 * Find end of line. Note, the HTTP line syntax supports
646 		 * implicit multi-line if the next line starts with a <SP>
647 		 * or <HT>.
648 		 */
649 		parse = 0;
650 		while (cp < ep) {
651 			ca = *cp;
652 			if (parse == 0 && ca == '\r') {
653 				*epp = cp;
654 				parse = 1;
655 			} else if (parse == 0 && ca == '\n') {
656 				*epp = cp;
657 				parse = 2;
658 			} else if (parse == 1 && ca == '\n') {
659 				parse = 2;
660 			} else if (parse >= 2 && (ca == ' ' || ca == '\t')) {
661 				parse++;
662 			} else if (parse > 2) {
663 				parse = 0;
664 			} else if (parse == 2) {
665 				break;
666 			} else if (t != NULL && t->tok->act & HASH) {
667 				hv = hv * 33 + ca;
668 				hv &= 0xFFFFFF;
669 			}
670 			cp++;
671 		}
672 		if (parse < 2) {
673 			/* No EOL, not enough data */
674 			*epp = NULL;
675 			return (t != NULL ? t->tok : NULL);
676 		}
677 		/*
678 		 * Return updated hash value (if any), update parse current
679 		 * pointer for next call (i.e. begin of next line), and last
680 		 * return pointer to the matching token_t.
681 		 */
682 		if (t != NULL && t->tok->act & HASH)
683 			*hash = hv;
684 		*cpp = cp;
685 		return (t != NULL ? t->tok : NULL);
686 	}
687 	/*
688 	 * End of parse text, ...
689 	 */
690 	*epp = NULL;
691 	return (NULL);
692 }
693 
694 /*
695  * Given a NULL terminated array of token_t(s) ordered in ascending
696  * case insensitive order a binary tree is allocated and populated with
697  * pointers into the array and a pointer to the root node is returned.
698  *
699  * Todo, for maximum ttree parse efficiency needs to be path compressed,
700  * the function ttree_line_parse() handles the empty nodes correctly.
701  */
702 static ttree_t *
703 ttree_build(token_t *list, int sz)
704 {
705 	ttree_t *treev;
706 	int	max, lvl, inc, ix;
707 
708 	/* calc the size of the tree */
709 	for (max = 1; max < sz; max <<= 1)
710 		;
711 	/* allocate the tree */
712 	treev = kmem_alloc(sizeof (*treev) * (max - 1), KM_SLEEP);
713 
714 	/* walk the tree and populate from list vector */
715 	lvl = max;
716 	while (lvl >>= 1) {
717 		inc = lvl >> 1;
718 		for (ix = lvl; ix < max; ix += lvl << 1) {
719 			if (ix <= sz) {
720 				treev[ix - 1].tok = &list[ix - 1];
721 			} else {
722 				treev[ix - 1].tok = 0;
723 			}
724 			if (inc) {
725 				treev[ix - 1].lt = &treev[ix - inc - 1];
726 				treev[ix - 1].gt = &treev[ix + inc - 1];
727 			} else {
728 				treev[ix - 1].lt = 0;
729 				treev[ix - 1].gt = 0;
730 			}
731 		}
732 	}
733 
734 	return (&treev[(max >> 1) - 1]);
735 }
736 
737 void
738 nl7c_http_init(void)
739 {
740 	int	n;
741 
742 	http_kmc = kmem_cache_create("NL7C_http_kmc",
743 	    sizeof (http_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
744 
745 	req_tree = ttree_build(tokreq, tokreq_cnt - 1);
746 	res_tree = ttree_build(tokres, tokres_cnt - 1);
747 
748 	n = sizeof (Shttp_conn_cl) - 1;
749 	http_conn_cl = allocb_wait(n, BPRI_HI, STR_NOSIG, NULL);
750 	bcopy(Shttp_conn_cl, http_conn_cl->b_rptr, n);
751 	http_conn_cl->b_wptr += n;
752 
753 	n = sizeof (Shttp_conn_ka) - 1;
754 	http_conn_ka = allocb_wait(n, BPRI_HI, STR_NOSIG, NULL);
755 	bcopy(Shttp_conn_ka, http_conn_ka->b_rptr, n);
756 	http_conn_ka->b_wptr += n;
757 }
758 
759 void
760 nl7c_http_free(void *arg)
761 {
762 	http_t	*http = arg;
763 
764 	kmem_cache_free(http_kmc, http);
765 }
766 
767 #define	STR_T_NOTCMP_OPT(a, b, m) (					\
768     a->m.cp && b->m.cp &&						\
769 	((a->m.ep - a->m.cp) != (b->m.ep - b->m.cp) ||			\
770 	strncmp(a->m.cp, b->m.cp, (b->m.ep - b->m.cp))))
771 
772 #define	STR_T_NOTCMP(a, b, m) (						\
773     a->m.cp && ! b->m.cp ||						\
774     b->m.cp && ! a->m.cp ||						\
775     STR_T_NOTCMP_OPT(a, b, m))
776 
777 boolean_t
778 nl7c_http_cmp(void *arg1, void *arg2)
779 {
780 	http_t	*httpa = arg1;		/* Response */
781 	http_t	*httpb = arg2;		/* Request */
782 
783 	if (httpa->major != httpb->major ||
784 	    httpa->minor != httpb->minor ||
785 	    STR_T_NOTCMP(httpa, httpb, accept) ||
786 	    STR_T_NOTCMP(httpa, httpb, acceptchar) ||
787 	    STR_T_NOTCMP(httpa, httpb, acceptenco) ||
788 	    STR_T_NOTCMP(httpa, httpb, acceptlang) ||
789 	    STR_T_NOTCMP_OPT(httpa, httpb, etag))
790 		return (B_FALSE);
791 	return (B_TRUE);
792 }
793 
794 /*
795  * Return the appropriate HTTP connection persist header
796  * based on the request HTTP persistent header state.
797  */
798 
799 mblk_t *
800 nl7c_http_persist(struct sonode *so)
801 {
802 	uint64_t	flags = so->so_nl7c_flags & NL7C_SCHEMEPRIV;
803 	mblk_t		*mp;
804 
805 	if (flags & HTTP_CONN_CL)
806 		mp = dupb(http_conn_cl);
807 	else if (flags & HTTP_CONN_KA)
808 		mp = dupb(http_conn_ka);
809 	else
810 		mp = NULL;
811 	return (mp);
812 }
813 
814 /*
815  * Parse the buffer *p of size len and update the uri_desc_t *uri and our
816  * http_t *http with the results.
817  */
818 
819 boolean_t
820 nl7c_http_request(char **cpp, char *ep, uri_desc_t *uri, struct sonode *so)
821 {
822 	http_t	*http = kmem_cache_alloc(http_kmc, KM_SLEEP);
823 	char	*cp = *cpp;
824 	char	*hp;
825 	char	*sep;
826 	unsigned hash = 0;
827 	char	*HTTP = "HTTP/";
828 	token_t	*match;
829 	boolean_t persist = B_FALSE;
830 
831 	ASSERT(cp <= ep);
832 
833 	if (cp == ep) {
834 		goto pass;
835 	}
836 	/*
837 	 * Initialize any uri_desc_t and/or http_t members.
838 	 */
839 	uri->scheme = (void *)http;
840 	uri->auth.cp = NULL;
841 	uri->auth.ep = NULL;
842 	uri->resplen = -1;
843 	uri->eoh = NULL;
844 	uri->nocache = B_FALSE;
845 	http->parsed = B_FALSE;
846 	http->accept.cp = NULL;
847 	http->acceptchar.cp = NULL;
848 	http->acceptenco.cp = NULL;
849 	http->acceptlang.cp = NULL;
850 	http->etag.cp = NULL;
851 	http->uagent.cp = NULL;
852 	http->date = -1;
853 	http->expire = -1;
854 	if (*cp == '\r') {
855 		/*
856 		 * Special case for a Request-Line without an HTTP version,
857 		 * assume it's an old style, i.e. HTTP version 0.9 request.
858 		 */
859 		http->major = 0;
860 		http->minor = 9;
861 		goto got_version;
862 	}
863 	/*
864 	 * Skip URI path delimiter, must be a <SP>.
865 	 */
866 	if (*cp++ != ' ')
867 		/* Unkown or bad Request-Line format, just punt */
868 		goto pass;
869 	/*
870 	 * The URI parser has parsed through the URI and the <SP>
871 	 * delimiter, parse the HTTP/N.N version
872 	 */
873 	while (cp < ep && *HTTP == *cp) {
874 		HTTP++;
875 		cp++;
876 	}
877 	if (*HTTP != 0) {
878 		if (cp == ep)
879 			goto more;
880 		goto pass;
881 	}
882 	if (cp == ep)
883 		goto more;
884 	if (*cp < '0' || *cp > '9')
885 		goto pass;
886 	http->major = *cp++ - '0';
887 	if (cp == ep)
888 		goto more;
889 	if (*cp++ != '.')
890 		goto pass;
891 	if (cp == ep)
892 		goto more;
893 	if (*cp < '0' || *cp > '9')
894 		goto pass;
895 	http->minor = *cp++ - '0';
896 	if (cp == ep)
897 		goto more;
898 
899 got_version:
900 
901 	if (*cp++ != '\r')
902 		goto pass;
903 	if (cp == ep)
904 		goto more;
905 	if (*cp++ != '\n')
906 		goto pass;
907 	/*
908 	 * Initialize persistent state based on HTTP version.
909 	 */
910 	if (http->major == 1) {
911 		if (http->minor >= 1) {
912 			/* 1.1 persistent by default */
913 			persist = B_TRUE;
914 		} else {
915 			/* 1.0 isn't persistent by default */
916 			persist = B_FALSE;
917 		}
918 	} else if (http->major == 0) {
919 		/* Before 1.0 no persistent connections */
920 		persist = B_FALSE;
921 	} else {
922 		/* >= 2.0 not supported (yet) */
923 		goto pass;
924 	}
925 	/*
926 	 * Parse HTTP headers through the EOH
927 	 * (End Of Header, i.e. an empty line).
928 	 */
929 	for (sep = ep; cp < ep; ep = sep) {
930 		/* Get the next line */
931 		match = ttree_line_parse(req_tree, &cp, &ep, &hp, &hash);
932 		if (match != NULL) {
933 			if (match->act & QUALIFIER) {
934 				/*
935 				 * Header field text is used to qualify this
936 				 * request/response, based on qualifier type
937 				 * optionally convert and store *http.
938 				 */
939 				char	c;
940 				int	n = 0;
941 				time_t	secs;
942 
943 				ASSERT(hp != NULL && ep != NULL);
944 
945 				if (match->act & NUMERIC) {
946 					while (hp < ep) {
947 						c = *hp++;
948 						if (! isdigit(c))
949 							goto pass;
950 						n *= 10;
951 						n += c - '0';
952 					}
953 				} else if (match->act & DATE) {
954 					secs = http_date2time_t(hp, ep);
955 				}
956 				switch (match->tokid) {
957 
958 				case Qhdr_Accept_Charset:
959 					http->acceptchar.cp = hp;
960 					http->acceptchar.ep = ep;
961 					break;
962 
963 				case Qhdr_Accept_Encoding:
964 					http->acceptenco.cp = hp;
965 					http->acceptenco.ep = ep;
966 					break;
967 
968 				case Qhdr_Accept_Language:
969 					http->acceptlang.cp = hp;
970 					http->acceptlang.ep = ep;
971 					break;
972 
973 				case Qhdr_Accept:
974 					http->accept.cp = hp;
975 					http->accept.ep = ep;
976 					break;
977 
978 				case Qhdr_Authorization:
979 					goto pass;
980 
981 				case Qhdr_Connection_close:
982 					persist = B_FALSE;
983 					break;
984 
985 				case Qhdr_Connection_Keep_Alive:
986 					persist = B_TRUE;
987 					break;
988 
989 				case Qhdr_Date:
990 					http->date = secs;
991 					break;
992 
993 				case Qhdr_ETag:
994 					http->etag.cp = hp;
995 					http->etag.ep = ep;
996 					break;
997 
998 				case Qhdr_Host:
999 					uri->auth.cp = hp;
1000 					uri->auth.ep = ep;
1001 					break;
1002 
1003 				case Qhdr_If_Modified_Since:
1004 					break;
1005 
1006 				case Qhdr_If_Unmodified_Since:
1007 					break;
1008 
1009 				case Qhdr_Keep_Alive:
1010 					persist = B_TRUE;
1011 					break;
1012 
1013 				case Qhdr_User_Agent:
1014 					http->uagent.cp = hp;
1015 					http->uagent.ep = ep;
1016 					break;
1017 
1018 				default:
1019 					break;
1020 
1021 				};
1022 			}
1023 			if (match->act & NOCACHE) {
1024 				uri->nocache = B_TRUE;
1025 			}
1026 		} else if (hp == NULL) {
1027 			goto done;
1028 		} else if (ep == NULL) {
1029 			goto more;
1030 		}
1031 	}
1032 	/* No EOH found */
1033 	goto more;
1034 
1035 done:
1036 	/*
1037 	 * Initialize socket persist state and response persist type
1038 	 * flag based on the persist state of the request headers.
1039 	 *
1040 	 */
1041 	if (persist)
1042 		so->so_nl7c_flags |= NL7C_SOPERSIST;
1043 	else
1044 		so->so_nl7c_flags &= ~NL7C_SOPERSIST;
1045 
1046 	if (http->major == 1) {
1047 		if (http->minor >= 1) {
1048 			if (! persist)
1049 				so->so_nl7c_flags |= HTTP_CONN_CL;
1050 		} else {
1051 			if (persist)
1052 				so->so_nl7c_flags |= HTTP_CONN_KA;
1053 			else
1054 				so->so_nl7c_flags |= HTTP_CONN_CL;
1055 		}
1056 	}
1057 	/*
1058 	 * Last, update parse consumed text pointer.
1059 	 */
1060 	*cpp = cp;
1061 	return (B_TRUE);
1062 
1063 pass:
1064 	*cpp = NULL;
1065 more:
1066 	return (B_FALSE);
1067 }
1068 
1069 boolean_t
1070 nl7c_http_response(char **cpp, char *ep, uri_desc_t *uri, struct sonode *so)
1071 {
1072 	http_t	*http = uri->scheme;
1073 	char	*cp = *cpp;
1074 	char	*hp;
1075 	char	*scp, *sep;
1076 	unsigned hash = 0;
1077 	char	*HTTP = "HTTP/";
1078 	int	status = 0;
1079 	token_t	*match;
1080 #ifdef	NOT_YET
1081 	uint32_t major, minor;
1082 #endif
1083 	boolean_t nocache = B_FALSE;
1084 	boolean_t persist = B_FALSE;
1085 
1086 	ASSERT(http != NULL);
1087 
1088 	if (http->parsed)
1089 		return (B_TRUE);
1090 
1091 	/*
1092 	 * Parse the HTTP/N.N version. Note, there's currently no use
1093 	 * for the actual response major nor minor values as only the
1094 	 * request values are used.
1095 	 */
1096 	while (cp < ep && *HTTP == *cp) {
1097 		HTTP++;
1098 		cp++;
1099 	}
1100 	if (*HTTP != 0) {
1101 		if (cp == ep)
1102 			goto more;
1103 		goto pass;
1104 	}
1105 	if (cp == ep)
1106 		goto more;
1107 
1108 	if (*cp < '0' || *cp > '9')
1109 		goto pass;
1110 #ifdef	NOT_YET
1111 	major = *cp++ - '0';
1112 #else
1113 	cp++;
1114 #endif
1115 
1116 	if (cp == ep)
1117 		goto more;
1118 	if (*cp++ != '.')
1119 		goto pass;
1120 	if (cp == ep)
1121 		goto more;
1122 	if (*cp < '0' || *cp > '9')
1123 		goto pass;
1124 #ifdef	NOT_YET
1125 	minor = *cp++ - '0';
1126 #else
1127 	cp++;
1128 #endif
1129 
1130 	if (cp == ep)
1131 		goto more;
1132 
1133 got_version:
1134 
1135 	/*
1136 	 * Get the response code, if not 200 then pass on this response.
1137 	 */
1138 	if (*cp++ != ' ')
1139 		goto pass;
1140 	if (cp == ep)
1141 		goto more;
1142 
1143 	do {
1144 		if (*cp == ' ')
1145 			break;
1146 		if (*cp < '0' || *cp > '9')
1147 			goto pass;
1148 		if (status)
1149 			status *= 10;
1150 		status += *cp++ - '0';
1151 	} while (cp < ep);
1152 
1153 	if (status != 200)
1154 		goto pass;
1155 
1156 	/*
1157 	 * Initialize persistent state based on request HTTP version.
1158 	 */
1159 	if (http->major == 1) {
1160 		if (http->minor >= 1) {
1161 			/* 1.1 persistent by default */
1162 			persist = B_TRUE;
1163 		} else {
1164 			/* 1.0 isn't persistent by default */
1165 			persist = B_FALSE;
1166 		}
1167 	} else if (http->major == 0) {
1168 		/* Before 1.0 no persistent connections */
1169 		persist = B_FALSE;
1170 	} else {
1171 		/* >= 2.0 not supported (yet) */
1172 		goto pass;
1173 	}
1174 
1175 	/*
1176 	 * Parse HTTP headers through the EOH
1177 	 * (End Of Header, i.e. an empty line).
1178 	 */
1179 	for (sep = ep; cp < ep; ep = sep) {
1180 		/* Get the next line */
1181 		scp = cp;
1182 		match = ttree_line_parse(res_tree, &cp, &ep, &hp, &hash);
1183 		if (match != NULL) {
1184 			if (match->act & QUALIFIER) {
1185 				/*
1186 				 * Header field text is used to qualify this
1187 				 * request/response, based on qualifier type
1188 				 * optionally convert and store *http.
1189 				 */
1190 				char	c;
1191 				int	n = 0;
1192 				time_t	secs;
1193 
1194 				ASSERT(hp != NULL && ep != NULL);
1195 
1196 				if (match->act & NUMERIC) {
1197 					while (hp < ep) {
1198 						c = *hp++;
1199 						if (! isdigit(c))
1200 							goto pass;
1201 						n *= 10;
1202 						n += c - '0';
1203 					}
1204 				} else if (match->act & DATE) {
1205 					secs = http_date2time_t(hp, ep);
1206 				}
1207 				switch (match->tokid) {
1208 
1209 				case Shdr_Cache_Control_Max_Age:
1210 					break;
1211 
1212 				case Shdr_Cache_Control_No_Cache:
1213 					nocache = B_TRUE;
1214 					break;
1215 
1216 				case Shdr_Cache_Control_No_Store:
1217 					nocache = B_TRUE;
1218 					break;
1219 
1220 				case Shdr_Connection_close:
1221 					persist = B_FALSE;
1222 					break;
1223 
1224 				case Shdr_Connection_Keep_Alive:
1225 					persist = B_TRUE;
1226 					break;
1227 
1228 				case Shdr_Content_Length:
1229 					uri->resplen = n;
1230 					break;
1231 
1232 				case Shdr_Date:
1233 					http->date = secs;
1234 					break;
1235 
1236 				case Shdr_ETag:
1237 					http->etag.cp = hp;
1238 					http->etag.ep = ep;
1239 					break;
1240 
1241 				case Shdr_Expires:
1242 					http->expire = secs;
1243 					break;
1244 
1245 				case Shdr_Keep_Alive:
1246 					persist = B_TRUE;
1247 					break;
1248 
1249 				case Shdr_Last_Modified:
1250 					http->lastmod = secs;
1251 					break;
1252 
1253 				case Shdr_Set_Cookies:
1254 					nocache = B_TRUE;
1255 
1256 				default:
1257 					nocache = B_TRUE;
1258 					break;
1259 				};
1260 			}
1261 			if (match->act & FILTER) {
1262 				/*
1263 				 * Filter header, do a copyover the header
1264 				 * text, guarenteed to be at least 1 byte.
1265 				 */
1266 				char	*cop = scp;
1267 				int	n = (ep - cop) - 1;
1268 				char	filter[] = "NL7C-Filtered";
1269 
1270 				n = MIN(n, sizeof (filter) - 1);
1271 				if (n > 0)
1272 					bcopy(filter, cop, n);
1273 				cop += n;
1274 				ASSERT(cop < ep);
1275 				*cop++ = ':';
1276 				while (cop < ep)
1277 					*cop++ = ' ';
1278 			}
1279 			if (match->act & NOCACHE) {
1280 				nocache = B_TRUE;
1281 			}
1282 		} else if (hp == NULL) {
1283 			uri->eoh = scp;
1284 			goto done;
1285 		} else if (ep == NULL) {
1286 			goto more;
1287 		}
1288 	}
1289 	/* No EOH found */
1290 	goto more;
1291 
1292 done:
1293 	http->parsed = B_TRUE;
1294 
1295 	if (nocache) {
1296 		uri->nocache = B_TRUE;
1297 		goto pass;
1298 	}
1299 	if (uri->resplen == -1)
1300 		goto pass;
1301 
1302 	/* Save the HTTP header length and add to URI response length */
1303 	http->headlen = (cp - *cpp);
1304 	uri->resplen += http->headlen;
1305 
1306 	/* Set socket persist state */
1307 	if (persist)
1308 		so->so_nl7c_flags |= NL7C_SOPERSIST;
1309 	else
1310 		so->so_nl7c_flags &= ~NL7C_SOPERSIST;
1311 
1312 	if (http->expire != -1 && http->date != -1) {
1313 		if (http->expire <= http->date) {
1314 			/* No cache */
1315 			goto pass;
1316 		}
1317 		/* Have a valid expire and date so calc an lbolt expire */
1318 		uri->expire = lbolt + SEC_TO_TICK(http->expire - http->date);
1319 	} else if (nl7c_uri_ttl != -1) {
1320 		/* No valid expire speced and we have a TTL */
1321 		uri->expire = lbolt + SEC_TO_TICK(nl7c_uri_ttl);
1322 	}
1323 
1324 	*cpp = cp;
1325 	return (B_TRUE);
1326 
1327 pass:
1328 	*cpp = NULL;
1329 more:
1330 	return (B_FALSE);
1331 }
1332 
1333 boolean_t
1334 nl7c_http_log(uri_desc_t *quri, uri_desc_t *suri, nca_request_log_t *req,
1335     char **wp, char **pep, uint32_t *off)
1336 {
1337 	http_t	*qhttp = quri->scheme;
1338 	http_t	*shttp = suri->scheme;
1339 	int	sz;
1340 
1341 	if (qhttp->uagent.cp != NULL) {
1342 		sz = (qhttp->uagent.ep - qhttp->uagent.cp);
1343 		if ((*wp + sz + 1) >= *pep) goto full;
1344 		bcopy(qhttp->uagent.cp, *wp, sz);
1345 		*wp += sz;
1346 		*(*wp)++ = 0;
1347 		sz++;
1348 		req->useragent_len = sz;
1349 		req->useragent = *off;
1350 		*off += sz;
1351 	}
1352 
1353 	req->response_len -= (uint_t)shttp->headlen;
1354 
1355 	req->method = NCA_GET;
1356 
1357 	if (qhttp->major == 1) {
1358 		if (qhttp->minor == 0) {
1359 			req->version = HTTP_1_0;
1360 		} else if (qhttp->minor == 1) {
1361 			req->version = HTTP_1_1;
1362 		} else {
1363 			req->version = HTTP_0_0;
1364 		}
1365 	} else if (qhttp->major == 0) {
1366 		req->version = HTTP_0_9;
1367 	} else {
1368 		req->version = HTTP_0_0;
1369 	}
1370 
1371 	return (B_FALSE);
1372 
1373 full:
1374 	return (B_TRUE);
1375 }
1376