xref: /titanic_50/usr/src/uts/common/fs/sockfs/nl7chttp.c (revision 9e59f930aa12797575f40ccaad097ac2dd7fc4d0)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <sys/sysmacros.h>
29 #include <sys/strsubr.h>
30 #include <fs/sockfs/nl7c.h>
31 #include <fs/sockfs/nl7curi.h>
32 
33 #include <inet/nca/ncadoorhdr.h>
34 #include <inet/nca/ncalogd.h>
35 
36 
37 volatile uint64_t	nl7c_http_response_chunked = 0;
38 volatile uint64_t	nl7c_http_response_chunkparse = 0;
39 
40 volatile uint64_t	nl7c_http_response_pass1 = 0;
41 volatile uint64_t	nl7c_http_response_pass2 = 0;
42 volatile uint64_t	nl7c_http_response_304 = 0;
43 volatile uint64_t	nl7c_http_response_307 = 0;
44 volatile uint64_t	nl7c_http_response_400 = 0;
45 
46 volatile uint64_t	nl7c_http_cond_304 = 0;
47 volatile uint64_t	nl7c_http_cond_412 = 0;
48 
49 /*
50  * Some externs:
51  */
52 
53 extern uint64_t		nl7c_uri_bytes;
54 extern kmem_cache_t	*nl7c_uri_kmc;
55 extern kmem_cache_t	*nl7c_uri_rd_kmc;
56 extern void		nl7c_uri_inactive(uri_desc_t *);
57 extern uint32_t		nca_major_version;
58 extern uint32_t		nca_minor_version;
59 
60 /*
61  * HTTP connection persistent headers, mblk_t's, and state values stored in
62  * (struct sonode *).so_nl7c_flags & NL7C_SCHEMEPRIV.
63  */
64 
65 char	Shttp_conn_cl[] = "Connection: close\r\n";
66 char	Shttp_conn_ka[] = "Connection: Keep-Alive\r\n";
67 
68 mblk_t	*http_conn_cl;
69 mblk_t	*http_conn_ka;
70 
71 #define	HTTP_CONN_CL	0x00010000
72 #define	HTTP_CONN_KA	0x00020000
73 
74 /*
75  * Hex ascii Digit to Integer accumulate, if (char)c is a valid ascii
76  * hex digit then the contents of (int32_t)n will be left shifted and
77  * the new digit added in, else n will be set to -1.
78  */
79 
80 #define	hd2i(c, n) {							\
81 	(n) *= 16;							\
82 	if (isdigit(c))							\
83 		(n) += (c) - '0';					\
84 	else if ((c) >= 'a' && (c) <= 'f')				\
85 		(n) += (c) - 'W';					\
86 	else if ((c) >= 'A' && (c) <= 'F')				\
87 		(n) += (c) - '7';					\
88 	else								\
89 		(n) = -1;						\
90 }
91 
92 /*
93  * HTTP parser action values:
94  */
95 
96 typedef enum act_e {
97 	REQUEST		= 0x0001,
98 	NUMERIC		= 0x0002,
99 	QUALIFIER	= 0x0004,
100 	PASS		= 0x0008,
101 	FILTER		= 0x0010,
102 	NOCACHE		= 0x0020,
103 	HASH		= 0x0040,
104 	DATE		= 0x0080,
105 	ETAG		= 0x0100,
106 	RESPONSE	= 0x0200,
107 	URIABS		= 0x0400,
108 	URIREL		= 0x0800,
109 	HEX		= 0x1000
110 } act_t;
111 
112 #define	UNDEF		PASS
113 
114 /*
115  * HTTP parser token:
116  */
117 
118 typedef struct token_s {
119 	int	tokid;			/* Token ident */
120 	char	*text;			/* Token text */
121 	act_t	act;			/* Action to take */
122 } token_t;
123 
124 /*
125  * The ttree_t (or token tree) is an ascending ordered binary tree
126  * built by ttree_build() from an array of tokens and subsequently
127  * used by ttree_line_parse() to parse multiline text data.
128  */
129 typedef struct ttree_s {
130 	token_t *tok;			/* Token */
131 	struct ttree_s *lt, *gt;	/* < and > next node */
132 } ttree_t;
133 
134 /*
135  * Note: req_tree[] and res_tree[] must be in ascending case insensitive
136  * order of the char[] strings used to initialize each element.
137  *
138  * See "nl7ctokreq.txt" and "nl7ctokres.txt" which are processed by
139  * "nl7ctokgen" to produce "nl7ctokgen.h" and included here.
140  */
141 
142 #define	INIT(s, t) {s, S##s, t}
143 
144 #include "nl7ctokgen.h"
145 static ttree_t *req_tree;
146 static ttree_t *res_tree;
147 
148 /*
149  * HTTP scheme private state:
150  */
151 
152 typedef struct http_s {
153 	boolean_t	parsed;		/* Response parsed */
154 	uint32_t	major, minor;	/* HTTP/major.minor */
155 	uint32_t	headlen;	/* HTTP header length */
156 	clock_t		date;		/* Response Date: */
157 	clock_t		expire;		/* Response Expire: */
158 	clock_t		moddate;	/* Request *Modified-Since date */
159 	act_t		modtokid;	/* Request *Modified-Since tokid */
160 	time_t		lastmod;	/* Response Last-Modified: */
161 	str_t		accept;		/* Request Accept: */
162 	str_t		acceptchar;	/* Request Accept-Charset: */
163 	str_t		acceptenco;	/* Request Accept-Encoding: */
164 	str_t		acceptlang;	/* Request Accept-Language: */
165 	str_t		etag;		/* Request/Response ETag: */
166 	str_t		uagent;		/* Request User-Agent: */
167 } http_t;
168 
169 static kmem_cache_t *http_kmc;
170 
171 /*
172  * HTTP date routines, dow[] for day of the week, Dow[] for day of the
173  * week for the Unix epoch (i.e. day 0 is a Thu), months[] for the months
174  * of the year, and dom[] for day number of the year for the first day
175  * of each month (non leap year).
176  */
177 
178 static char *dow[] = {"sunday", "monday", "tuesday", "wednesday", "thursday",
179 	"friday", "saturday", 0};
180 
181 static char *Dow[] = {"Thu", "Fri", "Sat", "Sun", "Mon", "Tue", "Wed", 0};
182 
183 static char *months[] = {"Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul",
184 	"Aug", "Sep", "Oct", "Nov", "Dec", 0};
185 
186 static int dom[] = {0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334};
187 
188 /*
189  * http_date2time_t(const char *) - returns the time(2) value (i.e.
190  * the value 0 is Thu, 01 Jan 1970 00:00:00 GMT) for the following
191  * time formats used by HTTP request and response headers:
192  *
193  *	1) Sun, 07 Dec 1998 14:49:37 GMT	; RFC 822, updated by RFC 1123
194  *	2) Sunday, 07-Dec-98 14:49:37 GMT	; RFC 850, obsoleted by RFC 1036
195  *	3) Sun Nov  7 14:49:37 1998		; ANSI C's asctime() format
196  *	4) 60					; Time delta of N seconds
197  *
198  * On error a time_t value of -1 is returned.
199  *
200  * All dates are GMT (must be part of the date string for types
201  * 1 and 2 and not for type 1).
202  *
203  * Note, the given mstr_t pointed to by *sp will be modified.
204  */
205 
206 static time_t
207 http_date2time_t(char *cp, char *ep)
208 {
209 	char	*scp = cp;
210 	time_t	secs;
211 	char	**tpp;
212 	char	*tp;
213 	char	c, sc;
214 	ssize_t	n;
215 
216 	ssize_t	zeroleap = 1970 / 4 - 1970 / 100 + 1970 / 400;
217 	ssize_t	leap;
218 	ssize_t	year;
219 	ssize_t	month;
220 	ssize_t	day;
221 	ssize_t	hour;
222 	ssize_t	min;
223 	ssize_t	sec;
224 
225 	/* Parse and skip day-of-week (we don't use it) */
226 	tpp = dow;
227 	tp = *tpp;
228 	n = 0;
229 	while (cp < ep) {
230 		c = *cp++;
231 		if (c == ',' || c == ' ')
232 			break;
233 		c = tolower(c);
234 		if (*tp == 0 || *tp != c) {
235 			cp = scp;
236 			if ((tp = *++tpp) == NULL)
237 				break;
238 			continue;
239 		}
240 		tp++;
241 	}
242 	if (cp == NULL) {
243 		/* Not case 1-3, try 4 */
244 		while (cp < ep) {
245 			c = *cp;
246 			if (isdigit(c)) {
247 				cp++;
248 				n *= 10;
249 				n += c - '0';
250 				continue;
251 			}
252 			/* An invalid date sytax */
253 			return (-1);
254 		}
255 		/* Case 4, delta from current time */
256 		return (gethrestime_sec() + n);
257 	}
258 	if (c == ',') {
259 		/* Case 1 or 2, skip <SP> */
260 		if (cp == ep)
261 			return (-1);
262 		c = *cp++;
263 		if (c != ' ')
264 			return (-1);
265 		/* Get day of the month */
266 		if (cp == ep)
267 			return (-1);
268 		c = *cp++;
269 		if (! isdigit(c))
270 			return (-1);
271 		n = c - '0';
272 		if (cp == ep)
273 			return (-1);
274 		c = *cp++;
275 		if (! isdigit(c))
276 			return (-1);
277 		n *= 10;
278 		n += c - '0';
279 		day = n;
280 		/* Get day/month/year seperator */
281 		if (cp == ep)
282 			return (-1);
283 		sc = *cp++;
284 		if (sc != ' ' && sc != '-')
285 			return (-1);
286 		/* Parse month */
287 		tpp = months;
288 		tp = *tpp++;
289 		scp = cp;
290 		n = 0;
291 		while (cp < ep) {
292 			c = *cp;
293 			if (c == sc) {
294 				cp++;
295 				break;
296 			}
297 			c = tolower(c);
298 			if (*tp == 0 || tolower(*tp) != c) {
299 				if ((tp = *tpp++) == NULL)
300 					break;
301 				cp = scp;
302 				n++;
303 				continue;
304 			}
305 			cp++;
306 			tp++;
307 		}
308 		if (cp == NULL)
309 			return (-1);
310 		month = n;
311 		/* Get year */
312 		if (cp == ep)
313 			return (-1);
314 		c = *cp++;
315 		if (! isdigit(c))
316 			return (-1);
317 		n = c - '0';
318 		if (cp == ep)
319 			return (-1);
320 		c = *cp++;
321 		if (! isdigit(c))
322 			return (-1);
323 		n *= 10;
324 		n += c - '0';
325 		if (cp == ep)
326 			return (-1);
327 		c = *cp++;
328 		if (sc == ' ') {
329 			/* Case 1, get 2 more year digits */
330 			if (! isdigit(c))
331 				return (-1);
332 			n *= 10;
333 			n += c - '0';
334 			if (cp == ep)
335 				return (-1);
336 			c = *cp++;
337 			if (! isdigit(c))
338 				return (-1);
339 			n *= 10;
340 			n += c - '0';
341 			/* Get seperator char */
342 			if (cp == ep)
343 				return (-1);
344 			c = *cp;
345 			if (c != ' ')
346 				return (-1);
347 			cp++;
348 		} else {
349 			/*
350 			 * Case 2, 2 digit year and as this is a so-called
351 			 * Unix date format and the begining of time was
352 			 * 1970 so we can extend this obsoleted date syntax
353 			 * past the year 1999 into the year 2038 for 32 bit
354 			 * machines and through 2069 for 64 bit machines.
355 			 */
356 			if (n > 69)
357 				n += 1900;
358 			else
359 				n += 2000;
360 		}
361 		year = n;
362 		/* Get GMT time */
363 		if (c != ' ')
364 			return (-1);
365 		if (cp == ep)
366 			return (-1);
367 		c = *cp++;
368 		if (! isdigit(c))
369 			return (-1);
370 		n = c - '0';
371 		if (cp == ep)
372 			return (-1);
373 		c = *cp++;
374 		if (! isdigit(c))
375 			return (-1);
376 		n *= 10;
377 		n += c - '0';
378 		hour = n;
379 		if (cp == ep)
380 			return (-1);
381 		c = *cp++;
382 		if (c != ':')
383 			return (-1);
384 		if (cp == ep)
385 			return (-1);
386 		c = *cp++;
387 		if (! isdigit(c))
388 			return (-1);
389 		n = c - '0';
390 		if (cp == ep)
391 			return (-1);
392 		c = *cp++;
393 		if (! isdigit(c))
394 			return (-1);
395 		n *= 10;
396 		n += c - '0';
397 		min = n;
398 		if (cp == ep)
399 			return (-1);
400 		c = *cp++;
401 		if (c != ':')
402 			return (-1);
403 		if (cp == ep)
404 			return (-1);
405 		c = *cp++;
406 		if (! isdigit(c))
407 			return (-1);
408 		n = c - '0';
409 		if (cp == ep)
410 			return (-1);
411 		c = *cp++;
412 		if (! isdigit(c))
413 			return (-1);
414 		n *= 10;
415 		n += c - '0';
416 		sec = n;
417 		if (cp == ep)
418 			return (-1);
419 		c = *cp++;
420 		if (c != ' ')
421 			return (-1);
422 		if (cp == ep)
423 			return (-1);
424 		c = *cp++;
425 		if (c != 'G')
426 			return (-1);
427 		if (cp == ep)
428 			return (-1);
429 		c = *cp++;
430 		if (c != 'M')
431 			return (-1);
432 		if (cp == ep)
433 			return (-1);
434 		c = *cp++;
435 		if (c != 'T')
436 			return (-1);
437 	} else {
438 		/* case 3, parse month */
439 		sc = c;
440 		tpp = months;
441 		tp = *tpp++;
442 		scp = cp;
443 		n = 0;
444 		while (cp < ep) {
445 			c = *cp;
446 			if (c == sc) {
447 				cp++;
448 				break;
449 			}
450 			c = tolower(c);
451 			if (*tp == 0 || tolower(*tp) != c) {
452 				if ((tp = *tpp++) == NULL)
453 					break;
454 				cp = scp;
455 				n++;
456 				continue;
457 			}
458 			cp++;
459 			tp++;
460 		}
461 		if (cp == NULL)
462 			return (-1);
463 		month = n;
464 		/* Get day of the month */
465 		if (cp == ep)
466 			return (-1);
467 		c = *cp++;
468 		if (! isdigit(c))
469 			return (-1);
470 		n = c - '0';
471 		if (cp == ep)
472 			return (-1);
473 		c = *cp++;
474 		if (! isdigit(c))
475 			return (-1);
476 		n *= 10;
477 		n += c - '0';
478 		day = n;
479 		/* Skip <SP> */
480 		if (cp == ep)
481 			return (-1);
482 		c = *cp++;
483 		if (c != ' ')
484 			return (-1);
485 		/* Get time */
486 		if (cp == ep)
487 			return (-1);
488 		c = *cp++;
489 		if (! isdigit(c))
490 			return (-1);
491 		n = c - '0';
492 		if (cp == ep)
493 			return (-1);
494 		c = *cp++;
495 		if (! isdigit(c))
496 			return (-1);
497 		n *= 10;
498 		n += c - '0';
499 		hour = n;
500 		if (cp == ep)
501 			return (-1);
502 		c = *cp++;
503 		if (c != ':')
504 			return (-1);
505 		if (cp == ep)
506 			return (-1);
507 		c = *cp++;
508 		if (! isdigit(c))
509 			return (-1);
510 		n = c - '0';
511 		if (cp == ep)
512 			return (-1);
513 		c = *cp++;
514 		if (! isdigit(c))
515 			return (-1);
516 		n *= 10;
517 		n += c - '0';
518 		min = n;
519 		if (cp == ep)
520 			return (-1);
521 		c = *cp++;
522 		if (c != ':')
523 			return (-1);
524 		if (cp == ep)
525 			return (-1);
526 		c = *cp++;
527 		if (! isdigit(c))
528 			return (-1);
529 		n = c - '0';
530 		if (cp == ep)
531 			return (-1);
532 		c = *cp++;
533 		if (! isdigit(c))
534 			return (-1);
535 		n *= 10;
536 		n += c - '0';
537 		sec = n;
538 		/* Skip <SP> */
539 		if (cp == ep)
540 			return (-1);
541 		c = *cp++;
542 		if (c != ' ')
543 			return (-1);
544 		/* Get year */
545 		if (cp == ep)
546 			return (-1);
547 		c = *cp++;
548 		if (! isdigit(c))
549 			return (-1);
550 		n = c - '0';
551 		if (cp == ep)
552 			return (-1);
553 		c = *cp++;
554 		if (! isdigit(c))
555 			return (-1);
556 		n *= 10;
557 		n += c - '0';
558 		if (cp == ep)
559 			return (-1);
560 		c = *cp++;
561 		if (! isdigit(c))
562 			return (-1);
563 		n *= 10;
564 		n += c - '0';
565 		if (cp == ep)
566 			return (-1);
567 		c = *cp++;
568 		if (! isdigit(c))
569 			return (-1);
570 		n *= 10;
571 		n += c - '0';
572 		year = n;
573 	}
574 
575 	/* Last, caclulate seconds since Unix day zero */
576 	leap = year;
577 	if (month < 2)
578 		leap--;
579 	leap = leap / 4 - leap / 100 + leap / 400 - zeroleap;
580 	secs = ((((year - 1970) * 365 + dom[month] + day  - 1 + leap) * 24
581 		+ hour) * 60 + min) * 60 + sec;
582 
583 	return (secs);
584 }
585 
586 /*
587  * http_today(char *) - returns in the given char* pointer the current
588  * date in ascii with a format of (char [29]):
589  *
590  *	Sun, 07 Dec 1998 14:49:37 GMT	; RFC 822, updated by RFC 1123
591  */
592 
593 static void
594 http_today(char *cp)
595 {
596 	ssize_t	i;
597 	char	*fp;
598 
599 	ssize_t	leap;
600 	ssize_t	year;
601 	ssize_t	month;
602 	ssize_t	dow;
603 	ssize_t	day;
604 	ssize_t	hour;
605 	ssize_t	min;
606 	ssize_t	sec;
607 
608 	/* Secs since Thu, 01 Jan 1970 00:00:00 GMT */
609 	time_t	now = gethrestime_sec();
610 
611 	sec = now % 60;
612 	now /= 60;
613 	min = now % 60;
614 	now /= 60;
615 	hour = now % 24;
616 	now /= 24;
617 	dow = now % 7;
618 
619 	year = 1970;
620 	for (;;) {
621 		if (year % 4 == 0 && year % 100 != 0 || year % 400 == 0)
622 			day = 366;
623 		else
624 			day = 365;
625 		if (now < day)
626 			break;
627 		now -= day;
628 		year++;
629 	}
630 
631 	now++;
632 	if (year % 4 == 0 && year % 100 != 0 || year % 400 == 0)
633 		leap = 1;
634 	else
635 		leap = 0;
636 	month = 11;
637 	for (i = 11; i; i--) {
638 		if (i < 2)
639 			leap = 0;
640 		if (now > dom[i] + leap)
641 			break;
642 		month--;
643 	}
644 	day = now - dom[i] - leap;
645 
646 	fp = Dow[dow];
647 	*cp++ = *fp++;
648 	*cp++ = *fp++;
649 	*cp++ = *fp++;
650 	*cp++ = ',';
651 	*cp++ = ' ';
652 
653 	i = day / 10;
654 	*cp++ = '0' + i;
655 	*cp++ = '0' + (day - i * 10);
656 	*cp++ = ' ';
657 
658 	fp = months[month];
659 	*cp++ = *fp++;
660 	*cp++ = *fp++;
661 	*cp++ = *fp++;
662 	*cp++ = ' ';
663 
664 	i = year / 1000;
665 	*cp++ = '0' + i;
666 	year -= i * 1000;
667 	i = year / 100;
668 	*cp++ = '0' + i;
669 	year -= i * 100;
670 	i = year / 10;
671 	*cp++ = '0' + i;
672 	year -= i * 10;
673 	*cp++ = '0' + year;
674 	*cp++ = ' ';
675 
676 	i = hour / 10;
677 	*cp++ = '0' + i;
678 	*cp++ = '0' + (hour - i * 10);
679 	*cp++ = ':';
680 
681 	i = min / 10;
682 	*cp++ = '0' + i;
683 	*cp++ = '0' + (min - i * 10);
684 	*cp++ = ':';
685 
686 	i = sec / 10;
687 	*cp++ = '0' + i;
688 	*cp++ = '0' + (sec - i * 10);
689 	*cp++ = ' ';
690 
691 	*cp++ = 'G';
692 	*cp++ = 'M';
693 	*cp = 'T';
694 }
695 
696 /*
697  * Given the ttree_t pointer "*t", parse the char buffer pointed to
698  * by "**cpp" of multiline text data up to the pointer "**epp", the
699  * pointer "*hash" points to the current text hash.
700  *
701  * If a match is found a pointer to the ttree_t token will be returned,
702  * "**cpp" will point to the next line, "**epp" will point to the first
703  * EOL char, "**hpp" will point to remainder of the parse data (if none,
704  * **hpp == **epp), and "*hash" will be updated.
705  *
706  * If no match, as above except "**hpp" points to the begining of the
707  * line and "*hash" wont be updated.
708  *
709  * If no EOL is found NULL is returned, "**epp" is set to NULL, no further
710  * calls can be made until additional data is ready and all arguments are
711  * reset.
712  *
713  * If EOH (i.e. an empty line) NULL is returned, "**hpp" is set to NULL,
714  * *cpp points to past EOH, no further calls can be made.
715  */
716 
717 static token_t *
718 ttree_line_parse(ttree_t *t, char **cpp, char **epp, char **hpp, uint32_t *hash)
719 {
720 	char	ca, cb;			/* current line <=> parse node */
721 
722 	char	*cp = *cpp;
723 	char	*ep = *epp;
724 
725 	char	*tp = t->tok->text;	/* current parse text */
726 	char	*sp = cp;		/* saved *cp */
727 
728 	int	parse;			/* parse state */
729 
730 	uint32_t hv;			/* hash value */
731 
732 	if (hash != NULL)
733 		hv = *hash;
734 
735 	/* Special case, check for EOH (i.e. empty line) */
736 	if (cp < ep) {
737 		ca = *cp;
738 		if (ca == '\n') {
739 			/* End of header */
740 			*cpp = ++cp;
741 			*hpp = NULL;
742 			return (NULL);
743 		} else if (ca == '\r') {
744 			cp++;
745 			if (cp < ep) {
746 				ca = *cp;
747 				if (ca == '\n') {
748 					/* End of header */
749 					*cpp = ++cp;
750 					*hpp = NULL;
751 					return (NULL);
752 				}
753 			}
754 			cp = *cpp;
755 		}
756 	}
757 	while (cp < ep) {
758 		/* Get next parse text char */
759 		cb = *tp;
760 		if (cb != 0) {
761 			/* Get next current line char */
762 			ca = *cp++;
763 			/* Case insensitive */
764 			cb = tolower(cb);
765 			ca = tolower(ca);
766 			if (ca == cb) {
767 				/*
768 				 * Char match, next char.
769 				 *
770 				 * Note, parse text can contain EOL chars.
771 				 */
772 				tp++;
773 				continue;
774 			}
775 			if (ca == '\r' || ca == '\n') {
776 				/* EOL, always go less than */
777 				t = t->lt;
778 			} else if (ca < cb) {
779 				/* Go less than */
780 				t = t->lt;
781 			} else {
782 				/* Go greater than */
783 				t = t->gt;
784 			}
785 			while (t != NULL && t->tok == NULL) {
786 				/* Null node, so descend to < node */
787 				t = t->lt;
788 			}
789 			if (t != NULL) {
790 				/* Initialize for next node compare */
791 				tp = t->tok->text;
792 				cp = sp;
793 				continue;
794 			}
795 			/*
796 			 * End of tree walk, no match, return pointer
797 			 * to the start of line then below find EOL.
798 			 */
799 			*hpp = *cpp;
800 		} else {
801 			/*
802 			 * End of token text, match, return pointer to
803 			 * the rest of header text then below find EOL.
804 			 */
805 			*hpp = cp;
806 		}
807 		/*
808 		 * Find end of line. Note, the HTTP line syntax supports
809 		 * implicit multi-line if the next line starts with a <SP>
810 		 * or <HT>.
811 		 */
812 		parse = 0;
813 		while (cp < ep) {
814 			ca = *cp;
815 			if (parse == 0 && ca == '\r') {
816 				*epp = cp;
817 				parse = 1;
818 			} else if (parse == 0 && ca == '\n') {
819 				*epp = cp;
820 				parse = 2;
821 			} else if (parse == 1 && ca == '\n') {
822 				parse = 2;
823 			} else if (parse >= 2 && (ca == ' ' || ca == '\t')) {
824 				parse++;
825 			} else if (parse > 2) {
826 				parse = 0;
827 			} else if (parse == 2) {
828 				break;
829 			} else if (t != NULL && (t->tok->act & HASH) &&
830 			    hash != NULL) {
831 				CHASH(hv, ca);
832 			}
833 			cp++;
834 		}
835 		if (parse < 2) {
836 			/* No EOL, not enough data */
837 			*epp = NULL;
838 			return (t != NULL ? t->tok : NULL);
839 		}
840 		/*
841 		 * Return updated hash value (if any), update parse current
842 		 * pointer for next call (i.e. begin of next line), and last
843 		 * return pointer to the matching token_t.
844 		 */
845 		if (t != NULL && (t->tok->act & HASH) && hash != NULL)
846 			*hash = hv;
847 		*cpp = cp;
848 		return (t != NULL ? t->tok : NULL);
849 	}
850 	/*
851 	 * End of parse text, ...
852 	 */
853 	*epp = NULL;
854 	return (NULL);
855 }
856 
857 /*
858  * Given a NULL terminated array of token_t(s) ordered in ascending
859  * case insensitive order a binary tree is allocated and populated with
860  * pointers into the array and a pointer to the root node is returned.
861  *
862  * Todo, for maximum ttree parse efficiency needs to be path compressed,
863  * the function ttree_line_parse() handles the empty nodes correctly.
864  */
865 static ttree_t *
866 ttree_build(token_t *list, int sz)
867 {
868 	ttree_t *treev;
869 	int	max, lvl, inc, ix;
870 
871 	/* calc the size of the tree */
872 	for (max = 1; max < sz; max <<= 1)
873 		;
874 	/* allocate the tree */
875 	treev = kmem_alloc(sizeof (*treev) * (max - 1), KM_SLEEP);
876 
877 	/* walk the tree and populate from list vector */
878 	lvl = max;
879 	while (lvl >>= 1) {
880 		inc = lvl >> 1;
881 		for (ix = lvl; ix < max; ix += lvl << 1) {
882 			if (ix <= sz) {
883 				treev[ix - 1].tok = &list[ix - 1];
884 			} else {
885 				treev[ix - 1].tok = 0;
886 			}
887 			if (inc) {
888 				treev[ix - 1].lt = &treev[ix - inc - 1];
889 				treev[ix - 1].gt = &treev[ix + inc - 1];
890 			} else {
891 				treev[ix - 1].lt = 0;
892 				treev[ix - 1].gt = 0;
893 			}
894 		}
895 	}
896 
897 	return (&treev[(max >> 1) - 1]);
898 }
899 
900 void
901 nl7c_http_init(void)
902 {
903 	int	n;
904 
905 	http_kmc = kmem_cache_create("NL7C_http_kmc",
906 	    sizeof (http_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
907 
908 	req_tree = ttree_build(tokreq, tokreq_cnt - 1);
909 	res_tree = ttree_build(tokres, tokres_cnt - 1);
910 
911 	n = sizeof (Shttp_conn_cl) - 1;
912 	http_conn_cl = allocb_wait(n, BPRI_HI, STR_NOSIG, NULL);
913 	bcopy(Shttp_conn_cl, http_conn_cl->b_rptr, n);
914 	http_conn_cl->b_wptr += n;
915 
916 	n = sizeof (Shttp_conn_ka) - 1;
917 	http_conn_ka = allocb_wait(n, BPRI_HI, STR_NOSIG, NULL);
918 	bcopy(Shttp_conn_ka, http_conn_ka->b_rptr, n);
919 	http_conn_ka->b_wptr += n;
920 }
921 
922 void
923 nl7c_http_free(void *arg)
924 {
925 	http_t	*http = arg;
926 
927 	kmem_cache_free(http_kmc, http);
928 }
929 
930 #define	STR_T_NOTCMP_OPT(a, b, m) (					\
931     a->m.cp && b->m.cp &&						\
932 	((a->m.ep - a->m.cp) != (b->m.ep - b->m.cp) ||			\
933 	strncmp(a->m.cp, b->m.cp, (b->m.ep - b->m.cp))))
934 
935 #define	STR_T_NOTCMP(a, b, m) (						\
936     a->m.cp && ! b->m.cp ||						\
937     b->m.cp && ! a->m.cp ||						\
938     STR_T_NOTCMP_OPT(a, b, m))
939 
940 boolean_t
941 nl7c_http_cmp(void *arg1, void *arg2)
942 {
943 	http_t	*httpa = arg1;		/* Response */
944 	http_t	*httpb = arg2;		/* Request */
945 
946 	if (httpa->major != httpb->major ||
947 	    httpa->minor != httpb->minor ||
948 	    STR_T_NOTCMP(httpa, httpb, accept) ||
949 	    STR_T_NOTCMP(httpa, httpb, acceptchar) ||
950 	    STR_T_NOTCMP(httpa, httpb, acceptenco) ||
951 	    STR_T_NOTCMP(httpa, httpb, acceptlang) ||
952 	    STR_T_NOTCMP_OPT(httpa, httpb, etag))
953 		return (B_FALSE);
954 	return (B_TRUE);
955 }
956 
957 /*
958  * In-line HTTP responses:
959  */
960 
961 static char http_resp_304[] =
962 	"HTTP/#.# 304 Not Modified\r\n"
963 	"Date: #############################\r\n"
964 	"Server: NCA/#.# (Solaris)\r\n";
965 
966 static char http_resp_412[] =
967 	"HTTP/#.# 412 Precondition Failed\r\n"
968 	"Date: #############################\r\n"
969 	"Server: NCA/#.# (Solaris)\r\n";
970 
971 static uri_desc_t *
972 http_mkresponse(uri_desc_t *req, uri_desc_t *res, char *proto, int sz)
973 {
974 	http_t		*qhttp = req->scheme;
975 	http_t		*shttp = res->scheme;
976 	uri_desc_t	*uri = kmem_cache_alloc(nl7c_uri_kmc, KM_SLEEP);
977 	char		*alloc;
978 	char		*cp;
979 	char		*ep = &proto[sz];
980 	uri_rd_t	*rdp;
981 	int		cnt;
982 
983 	char		hdr_etag[] = "ETag: ";
984 
985 	/* Any optional header(s) */
986 	if (shttp->etag.cp != NULL) {
987 		/* Response has an ETag:, count it */
988 		sz += sizeof (hdr_etag) - 1 +
989 		    (shttp->etag.ep - shttp->etag.cp) + 2;
990 	}
991 	sz += 2;
992 	alloc = kmem_alloc(sz, KM_SLEEP);
993 
994 	/* Minimum temp uri initialization as needed by uri_response() */
995 	REF_INIT(uri, 1, nl7c_uri_inactive, nl7c_uri_kmc);
996 	uri->hash = URI_TEMP;
997 	uri->tail = NULL;
998 	uri->scheme = NULL;
999 	uri->reqmp = NULL;
1000 	uri->count = 0;
1001 	cv_init(&uri->waiting, NULL, CV_DEFAULT, NULL);
1002 	mutex_init(&uri->proclock, NULL, MUTEX_DEFAULT, NULL);
1003 
1004 	URI_RD_ADD(uri, rdp, sz, -1);
1005 	rdp->data.kmem = alloc;
1006 	atomic_add_64(&nl7c_uri_bytes, sz);
1007 
1008 	cp = alloc;
1009 	if (qhttp->major == 1) {
1010 		/*
1011 		 * Full response format.
1012 		 *
1013 		 * Copy to first sub char '#'.
1014 		 */
1015 		while (proto < ep) {
1016 			if (*proto == '#')
1017 				break;
1018 			*cp++ = *proto++;
1019 		}
1020 
1021 		/* Process the HTTP version substitutions */
1022 		if (*proto != '#') goto bad;
1023 		*cp++ = '0' + qhttp->major;
1024 		proto++;
1025 		while (proto < ep) {
1026 			if (*proto == '#')
1027 				break;
1028 			*cp++ = *proto++;
1029 		}
1030 		if (*proto != '#') goto bad;
1031 		*cp++ = '0' + qhttp->minor;
1032 		proto++;
1033 
1034 		/* Copy to the next sub char '#' */
1035 		while (proto < ep) {
1036 			if (*proto == '#')
1037 				break;
1038 			*cp++ = *proto++;
1039 		}
1040 
1041 		/* Process the "Date: " substitution */
1042 		if (*proto != '#') goto bad;
1043 		http_today(cp);
1044 
1045 		/* Skip to the next nonsub char '#' */
1046 		while (proto < ep) {
1047 			if (*proto != '#')
1048 				break;
1049 			cp++;
1050 			proto++;
1051 		}
1052 
1053 		/* Copy to the next sub char '#' */
1054 		while (proto < ep) {
1055 			if (*proto == '#')
1056 				break;
1057 			*cp++ = *proto++;
1058 		}
1059 
1060 		/* Process the NCA version substitutions */
1061 		if (*proto != '#') goto bad;
1062 		*cp++ = '0' + nca_major_version;
1063 		proto++;
1064 		while (proto < ep) {
1065 			if (*proto == '#')
1066 				break;
1067 			*cp++ = *proto++;
1068 		}
1069 		if (*proto != '#') goto bad;
1070 		*cp++ = '0' + nca_minor_version;
1071 		proto++;
1072 
1073 		/* Copy remainder of HTTP header */
1074 		while (proto < ep) {
1075 			*cp++ = *proto++;
1076 		}
1077 	} else {
1078 		goto bad;
1079 	}
1080 	/* Any optional header(s) */
1081 	if (shttp->etag.cp != NULL) {
1082 		/* Response has an ETag:, add it */
1083 		cnt = sizeof (hdr_etag) - 1;
1084 		bcopy(hdr_etag, cp, cnt);
1085 		cp += cnt;
1086 		cnt = (shttp->etag.ep - shttp->etag.cp);
1087 		bcopy(shttp->etag.cp, cp, cnt);
1088 		cp += cnt;
1089 		*cp++ = '\r';
1090 		*cp++ = '\n';
1091 	}
1092 	/* Last, add empty line */
1093 	uri->eoh = cp;
1094 	*cp++ = '\r';
1095 	*cp = '\n';
1096 
1097 	return (uri);
1098 
1099 bad:
1100 	/*
1101 	 * Free any resources allocated here, note that while we could
1102 	 * use the uri_inactive() to free the uri by doing a REF_RELE()
1103 	 * we instead free it here as the URI may be in less then a fully
1104 	 * initialized state.
1105 	 */
1106 	kmem_free(alloc, sz);
1107 	kmem_cache_free(nl7c_uri_kmc, uri);
1108 	return (NULL);
1109 }
1110 
1111 uri_desc_t *
1112 nl7c_http_cond(uri_desc_t *req, uri_desc_t *res)
1113 {
1114 	http_t	*qhttp = req->scheme;
1115 	time_t	qdate = qhttp->moddate;
1116 	http_t	*shttp = res->scheme;
1117 	time_t	sdate = shttp->lastmod == -1 ? shttp->date : shttp->lastmod;
1118 	uri_desc_t *uri;
1119 
1120 	if (qhttp->modtokid == Qhdr_If_Modified_Since &&
1121 	    sdate != -1 && qdate != -1 && sdate <= qdate) {
1122 		/*
1123 		 * Request is If-Modified-Since: and both response
1124 		 * and request dates are valid and response is the
1125 		 * same age as request so return a 304 response uri
1126 		 * instead of the cached response.
1127 		 */
1128 		nl7c_http_cond_304++;
1129 		uri = http_mkresponse(req, res, http_resp_304,
1130 		    sizeof (http_resp_304) - 1);
1131 		if (uri != NULL) {
1132 			/* New response uri */
1133 			REF_RELE(res);
1134 			return (uri);
1135 		}
1136 		return (res);
1137 	} else if (qhttp->modtokid == Qhdr_If_Unmodified_Since &&
1138 	    sdate != -1 && qdate != -1 && sdate >= qdate) {
1139 		/*
1140 		 * Request is If-Unmodified-Since: and both response
1141 		 * and request dates are valid and response is not the
1142 		 * same age as the request so return a 412 response
1143 		 * uri instead of the cached response.
1144 		 */
1145 		nl7c_http_cond_412++;
1146 		uri = http_mkresponse(req, res, http_resp_412,
1147 		    sizeof (http_resp_412) - 1);
1148 		if (uri != NULL) {
1149 			/* New response uri */
1150 			REF_RELE(res);
1151 			return (uri);
1152 		}
1153 		return (res);
1154 	}
1155 	/*
1156 	 * No conditional response meet or unknown type or no
1157 	 * valid dates so just return the original uri response.
1158 	 */
1159 	return (res);
1160 }
1161 
1162 /*
1163  * Return the appropriate HTTP connection persist header
1164  * based on the request HTTP persistent header state.
1165  */
1166 
1167 mblk_t *
1168 nl7c_http_persist(struct sonode *so)
1169 {
1170 	uint64_t	flags = so->so_nl7c_flags & NL7C_SCHEMEPRIV;
1171 	mblk_t		*mp;
1172 
1173 	if (flags & HTTP_CONN_CL)
1174 		mp = dupb(http_conn_cl);
1175 	else if (flags & HTTP_CONN_KA)
1176 		mp = dupb(http_conn_ka);
1177 	else
1178 		mp = NULL;
1179 	return (mp);
1180 }
1181 
1182 /*
1183  * Parse the buffer *p of size len and update the uri_desc_t *uri and our
1184  * http_t *http with the results.
1185  */
1186 
1187 boolean_t
1188 nl7c_http_request(char **cpp, char *ep, uri_desc_t *uri, struct sonode *so)
1189 {
1190 	http_t	*http = kmem_cache_alloc(http_kmc, KM_SLEEP);
1191 	char	*cp = *cpp;
1192 	char	*hp;
1193 	char	*scp, *sep;
1194 	char	*HTTP = "HTTP/";
1195 	token_t	*match;
1196 	boolean_t persist = B_FALSE;
1197 
1198 	ASSERT(cp <= ep);
1199 
1200 	if (cp == ep) {
1201 		goto bad;
1202 	}
1203 	/*
1204 	 * Initialize any uri_desc_t and/or http_t members.
1205 	 */
1206 	uri->scheme = (void *)http;
1207 	uri->auth.cp = NULL;
1208 	uri->auth.ep = NULL;
1209 	uri->resplen = URI_LEN_NOVALUE;
1210 	uri->respclen = URI_LEN_NOVALUE;
1211 	uri->eoh = NULL;
1212 	uri->nocache = B_FALSE;
1213 	uri->conditional = B_FALSE;
1214 	http->parsed = B_FALSE;
1215 	http->accept.cp = NULL;
1216 	http->acceptchar.cp = NULL;
1217 	http->acceptenco.cp = NULL;
1218 	http->acceptlang.cp = NULL;
1219 	http->etag.cp = NULL;
1220 	http->uagent.cp = NULL;
1221 	http->date = -1;
1222 	http->expire = -1;
1223 	http->lastmod = -1;
1224 	if (*cp == '\r') {
1225 		/*
1226 		 * Special case for a Request-Line without an HTTP version,
1227 		 * assume it's an old style, i.e. HTTP version 0.9 request.
1228 		 */
1229 		http->major = 0;
1230 		http->minor = 9;
1231 		goto got_version;
1232 	}
1233 	/*
1234 	 * Skip URI path delimiter, must be a <SP>.
1235 	 */
1236 	if (*cp++ != ' ')
1237 		/* Unkown or bad Request-Line format, just punt */
1238 		goto bad;
1239 	/*
1240 	 * The URI parser has parsed through the URI and the <SP>
1241 	 * delimiter, parse the HTTP/N.N version
1242 	 */
1243 	while (cp < ep && *HTTP == *cp) {
1244 		HTTP++;
1245 		cp++;
1246 	}
1247 	if (*HTTP != 0) {
1248 		if (cp == ep)
1249 			goto more;
1250 		goto bad;
1251 	}
1252 	if (cp == ep)
1253 		goto more;
1254 	if (*cp < '0' || *cp > '9')
1255 		goto bad;
1256 	http->major = *cp++ - '0';
1257 	if (cp == ep)
1258 		goto more;
1259 	if (*cp++ != '.')
1260 		goto bad;
1261 	if (cp == ep)
1262 		goto more;
1263 	if (*cp < '0' || *cp > '9')
1264 		goto bad;
1265 	http->minor = *cp++ - '0';
1266 	if (cp == ep)
1267 		goto more;
1268 
1269 got_version:
1270 
1271 	if (*cp++ != '\r')
1272 		goto bad;
1273 	if (cp == ep)
1274 		goto more;
1275 	if (*cp++ != '\n')
1276 		goto bad;
1277 	/*
1278 	 * Initialize persistent state based on HTTP version.
1279 	 */
1280 	if (http->major == 1) {
1281 		if (http->minor >= 1) {
1282 			/* 1.1 persistent by default */
1283 			persist = B_TRUE;
1284 		} else {
1285 			/* 1.0 isn't persistent by default */
1286 			persist = B_FALSE;
1287 		}
1288 	} else if (http->major == 0) {
1289 		/* Before 1.0 no persistent connections */
1290 		persist = B_FALSE;
1291 	} else {
1292 		/* >= 2.0 not supported (yet) */
1293 		goto bad;
1294 	}
1295 	/*
1296 	 * Parse HTTP headers through the EOH
1297 	 * (End Of Header, i.e. an empty line).
1298 	 */
1299 	for (sep = ep; cp < ep; ep = sep) {
1300 		/* Get the next line */
1301 		scp = cp;
1302 		match = ttree_line_parse(req_tree, &cp, &ep, &hp, &uri->hvalue);
1303 		if (match != NULL) {
1304 			if (match->act & QUALIFIER) {
1305 				/*
1306 				 * Header field text is used to qualify this
1307 				 * request/response, based on qualifier type
1308 				 * optionally convert and store *http.
1309 				 */
1310 				char	c;
1311 				int	n = 0;
1312 				time_t	secs;
1313 
1314 				ASSERT(hp != NULL && ep != NULL);
1315 
1316 				if (match->act & NUMERIC) {
1317 					while (hp < ep) {
1318 						c = *hp++;
1319 						if (! isdigit(c))
1320 							goto bad;
1321 						n *= 10;
1322 						n += c - '0';
1323 					}
1324 				} else if (match->act & DATE) {
1325 					secs = http_date2time_t(hp, ep);
1326 				}
1327 				switch (match->tokid) {
1328 
1329 				case Qhdr_Accept_Charset:
1330 					http->acceptchar.cp = hp;
1331 					http->acceptchar.ep = ep;
1332 					break;
1333 
1334 				case Qhdr_Accept_Encoding:
1335 					http->acceptenco.cp = hp;
1336 					http->acceptenco.ep = ep;
1337 					break;
1338 
1339 				case Qhdr_Accept_Language:
1340 					http->acceptlang.cp = hp;
1341 					http->acceptlang.ep = ep;
1342 					break;
1343 
1344 				case Qhdr_Accept:
1345 					http->accept.cp = hp;
1346 					http->accept.ep = ep;
1347 					break;
1348 
1349 				case Qhdr_Authorization:
1350 					goto pass;
1351 
1352 				case Qhdr_Connection_close:
1353 					persist = B_FALSE;
1354 					break;
1355 
1356 				case Qhdr_Connection_Keep_Alive:
1357 					persist = B_TRUE;
1358 					break;
1359 
1360 				case Qhdr_Date:
1361 					http->date = secs;
1362 					break;
1363 
1364 				case Qhdr_ETag:
1365 					http->etag.cp = hp;
1366 					http->etag.ep = ep;
1367 					break;
1368 
1369 				case Qhdr_Host:
1370 					uri->auth.cp = hp;
1371 					uri->auth.ep = ep;
1372 					break;
1373 
1374 				case Qhdr_If_Modified_Since:
1375 				case Qhdr_If_Unmodified_Since:
1376 					http->moddate = secs;
1377 					http->modtokid = match->tokid;
1378 					uri->conditional = B_TRUE;
1379 					break;
1380 
1381 				case Qhdr_Keep_Alive:
1382 					persist = B_TRUE;
1383 					break;
1384 
1385 				case Qhdr_User_Agent:
1386 					http->uagent.cp = hp;
1387 					http->uagent.ep = ep;
1388 					break;
1389 
1390 				default:
1391 					break;
1392 
1393 				};
1394 			}
1395 			if (match->act & FILTER) {
1396 				/*
1397 				 * Filter header, do a copyover the header
1398 				 * text, guarenteed to be at least 1 byte.
1399 				 */
1400 				char	*cop = scp;
1401 				int	n = (ep - cop) - 1;
1402 				char	filter[] = "NL7C-Filtered";
1403 
1404 				n = MIN(n, sizeof (filter) - 1);
1405 				if (n > 0)
1406 					bcopy(filter, cop, n);
1407 				cop += n;
1408 				ASSERT(cop < ep);
1409 				*cop++ = ':';
1410 				while (cop < ep)
1411 					*cop++ = ' ';
1412 			}
1413 			if (match->act & NOCACHE) {
1414 				uri->nocache = B_TRUE;
1415 			}
1416 		} else if (hp == NULL) {
1417 			goto done;
1418 		} else if (ep == NULL) {
1419 			goto more;
1420 		}
1421 	}
1422 	/* No EOH found */
1423 	goto more;
1424 
1425 done:
1426 	/*
1427 	 * Initialize socket persist state and response persist type
1428 	 * flag based on the persist state of the request headers.
1429 	 *
1430 	 */
1431 	if (persist)
1432 		so->so_nl7c_flags |= NL7C_SOPERSIST;
1433 	else
1434 		so->so_nl7c_flags &= ~NL7C_SOPERSIST;
1435 
1436 	if (http->major == 1) {
1437 		so->so_nl7c_flags &= ~NL7C_SCHEMEPRIV;
1438 		if (http->minor >= 1) {
1439 			if (! persist)
1440 				so->so_nl7c_flags |= HTTP_CONN_CL;
1441 		} else {
1442 			if (persist)
1443 				so->so_nl7c_flags |= HTTP_CONN_KA;
1444 			else
1445 				so->so_nl7c_flags |= HTTP_CONN_CL;
1446 		}
1447 	}
1448 	/*
1449 	 * Last, update parse consumed text pointer.
1450 	 */
1451 	*cpp = cp;
1452 	return (B_TRUE);
1453 
1454 pass:
1455 	*cpp = NULL;
1456 	return (B_TRUE);
1457 
1458 bad:
1459 	*cpp = NULL;
1460 more:
1461 	return (B_FALSE);
1462 }
1463 
1464 boolean_t
1465 nl7c_http_response(char **cpp, char *ep, uri_desc_t *uri, struct sonode *so)
1466 {
1467 	http_t	*http = uri->scheme;
1468 	char	*cp = *cpp;
1469 	char	*hp;
1470 	char	*scp, *sep;
1471 	char	*HTTP = "HTTP/";
1472 	int	status = 0;
1473 	token_t	*match;
1474 #ifdef	NOT_YET
1475 	uint32_t major, minor;
1476 #endif
1477 	boolean_t nocache = B_FALSE;
1478 	boolean_t persist = B_FALSE;
1479 
1480 	ASSERT(http != NULL);
1481 
1482 	if (http->parsed) {
1483 		if (uri->respclen != URI_LEN_NOVALUE) {
1484 			/* Chunked response */
1485 			sep = ep;
1486 			goto chunked;
1487 		}
1488 		/* Already parsed, nothing todo */
1489 		return (B_TRUE);
1490 	}
1491 
1492 	/*
1493 	 * Parse the HTTP/N.N version. Note, there's currently no use
1494 	 * for the actual response major nor minor values as only the
1495 	 * request values are used.
1496 	 */
1497 	while (cp < ep && *HTTP == *cp) {
1498 		HTTP++;
1499 		cp++;
1500 	}
1501 	if (*HTTP != 0) {
1502 		if (cp == ep)
1503 			goto more;
1504 		goto bad;
1505 	}
1506 	if (cp == ep)
1507 		goto more;
1508 
1509 	if (*cp < '0' || *cp > '9')
1510 		goto bad;
1511 #ifdef	NOT_YET
1512 	major = *cp++ - '0';
1513 #else
1514 	cp++;
1515 #endif
1516 
1517 	if (cp == ep)
1518 		goto more;
1519 	if (*cp++ != '.')
1520 		goto bad;
1521 	if (cp == ep)
1522 		goto more;
1523 	if (*cp < '0' || *cp > '9')
1524 		goto bad;
1525 #ifdef	NOT_YET
1526 	minor = *cp++ - '0';
1527 #else
1528 	cp++;
1529 #endif
1530 
1531 	if (cp == ep)
1532 		goto more;
1533 
1534 got_version:
1535 
1536 	/*
1537 	 * Get the response code.
1538 	 */
1539 	if (*cp++ != ' ')
1540 		goto bad;
1541 	if (cp == ep)
1542 		goto more;
1543 
1544 	do {
1545 		if (*cp == ' ')
1546 			break;
1547 		if (*cp < '0' || *cp > '9')
1548 			goto bad;
1549 		if (status)
1550 			status *= 10;
1551 		status += *cp++ - '0';
1552 	} while (cp < ep);
1553 
1554 	switch (status) {
1555 	case 200:
1556 		/*
1557 		 * The only response status we continue to process.
1558 		 */
1559 		break;
1560 	case 304:
1561 		nl7c_http_response_304++;
1562 		nocache = B_TRUE;
1563 		uri->resplen = 0;
1564 		goto pass;
1565 	case 307:
1566 		nl7c_http_response_307++;
1567 		nocache = B_TRUE;
1568 		uri->resplen = 0;
1569 		goto pass;
1570 	case 400:
1571 		nl7c_http_response_400++;
1572 		/*
1573 		 * Special case some response status codes, just mark
1574 		 * as nocache and no response length and pass on the
1575 		 * request/connection.
1576 		 */
1577 		nocache = B_TRUE;
1578 		uri->resplen = 0;
1579 		goto pass;
1580 	default:
1581 		/*
1582 		 * All other response codes result in a parse failure.
1583 		 */
1584 		goto bad;
1585 	}
1586 
1587 	/*
1588 	 * Initialize persistent state based on request HTTP version.
1589 	 */
1590 	if (http->major == 1) {
1591 		if (http->minor >= 1) {
1592 			/* 1.1 persistent by default */
1593 			persist = B_TRUE;
1594 		} else {
1595 			/* 1.0 isn't persistent by default */
1596 			persist = B_FALSE;
1597 		}
1598 	} else if (http->major == 0) {
1599 		/* Before 1.0 no persistent connections */
1600 		persist = B_FALSE;
1601 	} else {
1602 		/* >= 2.0 not supported (yet) */
1603 		goto bad;
1604 	}
1605 
1606 	/*
1607 	 * Parse HTTP headers through the EOH
1608 	 * (End Of Header, i.e. an empty line).
1609 	 */
1610 	for (sep = ep; cp < ep; ep = sep) {
1611 		/* Get the next line */
1612 		scp = cp;
1613 		match = ttree_line_parse(res_tree, &cp, &ep, &hp, NULL);
1614 		if (match != NULL) {
1615 			if (match->act & QUALIFIER) {
1616 				/*
1617 				 * Header field text is used to qualify this
1618 				 * request/response, based on qualifier type
1619 				 * optionally convert and store *http.
1620 				 */
1621 				char	c;
1622 				int	n = 0;
1623 				time_t	secs;
1624 
1625 				ASSERT(hp != NULL && ep != NULL);
1626 
1627 				if (match->act & NUMERIC) {
1628 					while (hp < ep) {
1629 						c = *hp++;
1630 						if (match->act & HEX) {
1631 							hd2i(c, n);
1632 							if (n == -1)
1633 								goto bad;
1634 						} else {
1635 							if (! isdigit(c))
1636 								goto bad;
1637 							n *= 10;
1638 							n += c - '0';
1639 						}
1640 					}
1641 				} else if (match->act & DATE) {
1642 					secs = http_date2time_t(hp, ep);
1643 				}
1644 				switch (match->tokid) {
1645 
1646 				case Shdr_Cache_Control_Max_Age:
1647 					break;
1648 
1649 				case Shdr_Cache_Control_No_Cache:
1650 					nocache = B_TRUE;
1651 					break;
1652 
1653 				case Shdr_Cache_Control_No_Store:
1654 					nocache = B_TRUE;
1655 					break;
1656 
1657 				case Shdr_Connection_close:
1658 					persist = B_FALSE;
1659 					break;
1660 
1661 				case Shdr_Connection_Keep_Alive:
1662 					persist = B_TRUE;
1663 					break;
1664 
1665 				case Shdr_Chunked:
1666 					uri->respclen = 0;
1667 					uri->resplen = 0;
1668 					nl7c_http_response_chunked++;
1669 					break;
1670 
1671 				case Shdr_Content_Length:
1672 					if (uri->respclen == URI_LEN_NOVALUE)
1673 						uri->resplen = n;
1674 					break;
1675 
1676 				case Shdr_Date:
1677 					http->date = secs;
1678 					break;
1679 
1680 				case Shdr_ETag:
1681 					http->etag.cp = hp;
1682 					http->etag.ep = ep;
1683 					break;
1684 
1685 				case Shdr_Expires:
1686 					http->expire = secs;
1687 					break;
1688 
1689 				case Shdr_Keep_Alive:
1690 					persist = B_TRUE;
1691 					break;
1692 
1693 				case Shdr_Last_Modified:
1694 					http->lastmod = secs;
1695 					break;
1696 
1697 				case Shdr_Set_Cookie:
1698 					nocache = B_TRUE;
1699 					break;
1700 
1701 				case Shdr_Server:
1702 					break;
1703 
1704 				default:
1705 					nocache = B_TRUE;
1706 					break;
1707 				};
1708 			}
1709 			if (match->act & FILTER) {
1710 				/*
1711 				 * Filter header, do a copyover the header
1712 				 * text, guarenteed to be at least 1 byte.
1713 				 */
1714 				char	*cop = scp;
1715 				int	n = (ep - cop) - 1;
1716 				char	filter[] = "NL7C-Filtered";
1717 
1718 				n = MIN(n, sizeof (filter) - 1);
1719 				if (n > 0)
1720 					bcopy(filter, cop, n);
1721 				cop += n;
1722 				ASSERT(cop < ep);
1723 				*cop++ = ':';
1724 				while (cop < ep)
1725 					*cop++ = ' ';
1726 			}
1727 			if (match->act & NOCACHE) {
1728 				nocache = B_TRUE;
1729 			}
1730 		} else if (hp == NULL) {
1731 			uri->eoh = scp;
1732 			goto done;
1733 		} else if (ep == NULL) {
1734 			goto more;
1735 		}
1736 	}
1737 	/* No EOH found */
1738 	goto more;
1739 
1740 done:
1741 	/* Parse completed */
1742 	http->parsed = B_TRUE;
1743 	/* Save the HTTP header length */
1744 	http->headlen = (cp - *cpp);
1745 	if (uri->respclen == URI_LEN_NOVALUE) {
1746 		if (uri->resplen == URI_LEN_NOVALUE) {
1747 			nl7c_http_response_pass1++;
1748 			goto pass;
1749 		}
1750 	}
1751 	/* Add header length to URI response length */
1752 	uri->resplen += http->headlen;
1753 
1754 	/* Set socket persist state */
1755 	if (persist)
1756 		so->so_nl7c_flags |= NL7C_SOPERSIST;
1757 	else
1758 		so->so_nl7c_flags &= ~NL7C_SOPERSIST;
1759 
1760 	if (http->major == 1) {
1761 		so->so_nl7c_flags &= ~NL7C_SCHEMEPRIV;
1762 		if (http->minor >= 1) {
1763 			if (! persist)
1764 				so->so_nl7c_flags |= HTTP_CONN_CL;
1765 		} else {
1766 			if (persist)
1767 				so->so_nl7c_flags |= HTTP_CONN_KA;
1768 			else
1769 				so->so_nl7c_flags |= HTTP_CONN_CL;
1770 		}
1771 	}
1772 
1773 	if (nocache) {
1774 		/*
1775 		 * Response not to be cached, only post response
1776 		 * processing code common to both non and cached
1777 		 * cases above here and code for the cached case
1778 		 * below.
1779 		 *
1780 		 * Note, chunked transfer processing is the last
1781 		 * to be done.
1782 		 */
1783 		uri->nocache = B_TRUE;
1784 		if (uri->respclen != URI_LEN_NOVALUE) {
1785 			/* Chunked response */
1786 			goto chunked;
1787 		}
1788 		/* Nothing more todo */
1789 		goto parsed;
1790 	}
1791 
1792 	if (http->expire != -1 && http->date != -1) {
1793 		if (http->expire <= http->date) {
1794 			/* ??? just pass */
1795 			nl7c_http_response_pass2++;
1796 			goto pass;
1797 		}
1798 		/* Have a valid expire and date so calc an lbolt expire */
1799 		uri->expire = lbolt + SEC_TO_TICK(http->expire - http->date);
1800 	} else if (nl7c_uri_ttl != -1) {
1801 		/* No valid expire speced and we have a TTL */
1802 		uri->expire = lbolt + SEC_TO_TICK(nl7c_uri_ttl);
1803 	}
1804 
1805 chunked:
1806 	/*
1807 	 * Chunk transfer parser and processing, a very simple parser
1808 	 * is implemented here for the common case were one, or more,
1809 	 * complete chunk(s) are passed in (i.e. length header + body).
1810 	 *
1811 	 * All other cases are passed.
1812 	 */
1813 	scp = cp;
1814 	while (uri->respclen != URI_LEN_NOVALUE && cp < sep) {
1815 		if (uri->respclen == URI_LEN_CONSUMED) {
1816 			/* Skip trailing "\r\n" */
1817 			if (cp == sep)
1818 				goto more;
1819 			if (*cp++ != '\r')
1820 				goto bad;
1821 			if (cp == sep)
1822 				goto more;
1823 			if (*cp++ != '\n')
1824 				goto bad;
1825 			uri->respclen = 0;
1826 		}
1827 		if (uri->respclen == 0) {
1828 			/* Parse a chunklen "[0-9A-Fa-f]+" */
1829 			char	c;
1830 			int	n = 0;
1831 
1832 			if (cp == sep)
1833 				goto more;
1834 			nl7c_http_response_chunkparse++;
1835 			while (cp < sep && (c = *cp++) != '\r') {
1836 				hd2i(c, n);
1837 				if (n == -1)
1838 					goto bad;
1839 			}
1840 			if (cp == sep)
1841 				goto more;
1842 			if (*cp++ != '\n')
1843 				goto bad;
1844 			uri->respclen = n;
1845 			if (n == 0) {
1846 				/* Last chunk, skip trailing "\r\n" */
1847 				if (cp == sep)
1848 					goto more;
1849 				if (*cp++ != '\r')
1850 					goto bad;
1851 				if (cp == sep)
1852 					goto more;
1853 				if (*cp++ != '\n')
1854 					goto bad;
1855 				uri->respclen = URI_LEN_NOVALUE;
1856 				break;
1857 			}
1858 		}
1859 		if (uri->respclen > 0) {
1860 			/* Consume some bytes for the current chunk */
1861 			uint32_t sz = (sep - cp);
1862 
1863 			if (sz > uri->respclen)
1864 				sz = uri->respclen;
1865 			uri->respclen -= sz;
1866 			cp += sz;
1867 			if (uri->respclen == 0) {
1868 				/* End of chunk, skip trailing "\r\n" */
1869 				if (cp == sep) {
1870 					uri->respclen = URI_LEN_CONSUMED;
1871 					goto more;
1872 				}
1873 				if (*cp++ != '\r')
1874 					goto bad;
1875 				if (cp == sep)
1876 					goto more;
1877 				if (*cp++ != '\n')
1878 					goto bad;
1879 				if (cp == sep)
1880 					goto more;
1881 			}
1882 		}
1883 	}
1884 	uri->resplen += (cp - scp);
1885 
1886 parsed:
1887 	*cpp = cp;
1888 	return (B_TRUE);
1889 
1890 pass:
1891 	*cpp = NULL;
1892 	return (B_TRUE);
1893 
1894 bad:
1895 	*cpp = NULL;
1896 	return (B_FALSE);
1897 
1898 more:
1899 	uri->resplen += (cp - scp);
1900 	*cpp = cp;
1901 	return (B_FALSE);
1902 }
1903 
1904 boolean_t
1905 nl7c_http_log(uri_desc_t *quri, uri_desc_t *suri, nca_request_log_t *req,
1906     char **wp, char **pep, uint32_t *off)
1907 {
1908 	http_t	*qhttp = quri->scheme;
1909 	http_t	*shttp = suri->scheme;
1910 	int	sz;
1911 
1912 	if (qhttp->uagent.cp != NULL) {
1913 		sz = (qhttp->uagent.ep - qhttp->uagent.cp);
1914 		if ((*wp + sz + 1) >= *pep) goto full;
1915 		bcopy(qhttp->uagent.cp, *wp, sz);
1916 		*wp += sz;
1917 		*(*wp)++ = 0;
1918 		sz++;
1919 		req->useragent_len = sz;
1920 		req->useragent = *off;
1921 		*off += sz;
1922 	}
1923 
1924 	req->response_len -= (uint_t)shttp->headlen;
1925 
1926 	req->method = NCA_GET;
1927 
1928 	if (qhttp->major == 1) {
1929 		if (qhttp->minor == 0) {
1930 			req->version = HTTP_1_0;
1931 		} else if (qhttp->minor == 1) {
1932 			req->version = HTTP_1_1;
1933 		} else {
1934 			req->version = HTTP_0_0;
1935 		}
1936 	} else if (qhttp->major == 0) {
1937 		req->version = HTTP_0_9;
1938 	} else {
1939 		req->version = HTTP_0_0;
1940 	}
1941 
1942 	return (B_FALSE);
1943 
1944 full:
1945 	return (B_TRUE);
1946 }
1947