xref: /titanic_41/usr/src/uts/common/fs/sockfs/nl7chttp.c (revision 2c2d21e98a95cba5687ec6574c974a5c6c4a6adb)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include <sys/sysmacros.h>
27 #include <sys/strsubr.h>
28 #include <fs/sockfs/nl7c.h>
29 #include <fs/sockfs/nl7curi.h>
30 #include <fs/sockfs/socktpi.h>
31 
32 #include <inet/nca/ncadoorhdr.h>
33 #include <inet/nca/ncalogd.h>
34 
35 
36 volatile uint64_t	nl7c_http_response_chunked = 0;
37 volatile uint64_t	nl7c_http_response_chunkparse = 0;
38 
39 volatile uint64_t	nl7c_http_response_pass1 = 0;
40 volatile uint64_t	nl7c_http_response_pass2 = 0;
41 volatile uint64_t	nl7c_http_response_304 = 0;
42 volatile uint64_t	nl7c_http_response_307 = 0;
43 volatile uint64_t	nl7c_http_response_400 = 0;
44 
45 volatile uint64_t	nl7c_http_cond_304 = 0;
46 volatile uint64_t	nl7c_http_cond_412 = 0;
47 
48 /*
49  * Some externs:
50  */
51 
52 extern uint64_t		nl7c_uri_bytes;
53 extern kmem_cache_t	*nl7c_uri_kmc;
54 extern kmem_cache_t	*nl7c_uri_rd_kmc;
55 extern void		nl7c_uri_inactive(uri_desc_t *);
56 extern uint32_t		nca_major_version;
57 extern uint32_t		nca_minor_version;
58 
59 /*
60  * HTTP connection persistent headers, mblk_t's, and state values stored in
61  * (struct sonode *).so_nl7c_flags & NL7C_SCHEMEPRIV.
62  */
63 
64 char	Shttp_conn_cl[] = "Connection: close\r\n";
65 char	Shttp_conn_ka[] = "Connection: Keep-Alive\r\n";
66 
67 mblk_t	*http_conn_cl;
68 mblk_t	*http_conn_ka;
69 
70 #define	HTTP_CONN_CL	0x00010000
71 #define	HTTP_CONN_KA	0x00020000
72 
73 /*
74  * Hex ascii Digit to Integer accumulate, if (char)c is a valid ascii
75  * hex digit then the contents of (int32_t)n will be left shifted and
76  * the new digit added in, else n will be set to -1.
77  */
78 
79 #define	hd2i(c, n) {							\
80 	(n) *= 16;							\
81 	if (isdigit(c))							\
82 		(n) += (c) - '0';					\
83 	else if ((c) >= 'a' && (c) <= 'f')				\
84 		(n) += (c) - 'W';					\
85 	else if ((c) >= 'A' && (c) <= 'F')				\
86 		(n) += (c) - '7';					\
87 	else								\
88 		(n) = -1;						\
89 }
90 
91 /*
92  * HTTP parser action values:
93  */
94 
95 typedef enum act_e {
96 	REQUEST		= 0x0001,
97 	NUMERIC		= 0x0002,
98 	QUALIFIER	= 0x0004,
99 	PASS		= 0x0008,
100 	FILTER		= 0x0010,
101 	NOCACHE		= 0x0020,
102 	HASH		= 0x0040,
103 	DATE		= 0x0080,
104 	ETAG		= 0x0100,
105 	RESPONSE	= 0x0200,
106 	URIABS		= 0x0400,
107 	URIREL		= 0x0800,
108 	HEX		= 0x1000
109 } act_t;
110 
111 #define	UNDEF		PASS
112 
113 /*
114  * HTTP parser token:
115  */
116 
117 typedef struct token_s {
118 	int	tokid;			/* Token ident */
119 	char	*text;			/* Token text */
120 	act_t	act;			/* Action to take */
121 } token_t;
122 
123 /*
124  * The ttree_t (or token tree) is an ascending ordered binary tree
125  * built by ttree_build() from an array of tokens and subsequently
126  * used by ttree_line_parse() to parse multiline text data.
127  */
128 typedef struct ttree_s {
129 	token_t *tok;			/* Token */
130 	struct ttree_s *lt, *gt;	/* < and > next node */
131 } ttree_t;
132 
133 /*
134  * Note: req_tree[] and res_tree[] must be in ascending case insensitive
135  * order of the char[] strings used to initialize each element.
136  *
137  * See "nl7ctokreq.txt" and "nl7ctokres.txt" which are processed by
138  * "nl7ctokgen" to produce "nl7ctokgen.h" and included here.
139  */
140 
141 #define	INIT(s, t) {s, S##s, t}
142 
143 #include "nl7ctokgen.h"
144 static ttree_t *req_tree;
145 static ttree_t *res_tree;
146 
147 /*
148  * HTTP scheme private state:
149  */
150 
151 typedef struct http_s {
152 	boolean_t	parsed;		/* Response parsed */
153 	uint32_t	major, minor;	/* HTTP/major.minor */
154 	uint32_t	headlen;	/* HTTP header length */
155 	clock_t		date;		/* Response Date: */
156 	clock_t		expire;		/* Response Expire: */
157 	clock_t		moddate;	/* Request *Modified-Since date */
158 	enum tokid_e	modtokid;	/* Request *Modified-Since tokid */
159 	time_t		lastmod;	/* Response Last-Modified: */
160 	str_t		accept;		/* Request Accept: */
161 	str_t		acceptchar;	/* Request Accept-Charset: */
162 	str_t		acceptenco;	/* Request Accept-Encoding: */
163 	str_t		acceptlang;	/* Request Accept-Language: */
164 	str_t		etag;		/* Request/Response ETag: */
165 	str_t		uagent;		/* Request User-Agent: */
166 } http_t;
167 
168 static kmem_cache_t *http_kmc;
169 
170 /*
171  * HTTP date routines, dow[] for day of the week, Dow[] for day of the
172  * week for the Unix epoch (i.e. day 0 is a Thu), months[] for the months
173  * of the year, and dom[] for day number of the year for the first day
174  * of each month (non leap year).
175  */
176 
177 static char *dow[] = {"sunday", "monday", "tuesday", "wednesday", "thursday",
178 	"friday", "saturday", 0};
179 
180 static char *Dow[] = {"Thu", "Fri", "Sat", "Sun", "Mon", "Tue", "Wed", 0};
181 
182 static char *months[] = {"Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul",
183 	"Aug", "Sep", "Oct", "Nov", "Dec", 0};
184 
185 static int dom[] = {0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334};
186 
187 /*
188  * http_date2time_t(const char *) - returns the time(2) value (i.e.
189  * the value 0 is Thu, 01 Jan 1970 00:00:00 GMT) for the following
190  * time formats used by HTTP request and response headers:
191  *
192  *	1) Sun, 07 Dec 1998 14:49:37 GMT	; RFC 822, updated by RFC 1123
193  *	2) Sunday, 07-Dec-98 14:49:37 GMT	; RFC 850, obsoleted by RFC 1036
194  *	3) Sun Nov  7 14:49:37 1998		; ANSI C's asctime() format
195  *	4) 60					; Time delta of N seconds
196  *
197  * On error a time_t value of -1 is returned.
198  *
199  * All dates are GMT (must be part of the date string for types
200  * 1 and 2 and not for type 1).
201  *
202  * Note, the given mstr_t pointed to by *sp will be modified.
203  */
204 
205 static time_t
http_date2time_t(char * cp,char * ep)206 http_date2time_t(char *cp, char *ep)
207 {
208 	char	*scp = cp;
209 	time_t	secs;
210 	char	**tpp;
211 	char	*tp;
212 	char	c, sc;
213 	ssize_t	n;
214 
215 	ssize_t	zeroleap = 1970 / 4 - 1970 / 100 + 1970 / 400;
216 	ssize_t	leap;
217 	ssize_t	year;
218 	ssize_t	month;
219 	ssize_t	day;
220 	ssize_t	hour;
221 	ssize_t	min;
222 	ssize_t	sec;
223 
224 	/* Parse and skip day-of-week (we don't use it) */
225 	tpp = dow;
226 	tp = *tpp;
227 	n = 0;
228 	while (cp < ep) {
229 		c = *cp++;
230 		if (c == ',' || c == ' ')
231 			break;
232 		c = tolower(c);
233 		if (*tp == 0 || *tp != c) {
234 			cp = scp;
235 			if ((tp = *++tpp) == NULL)
236 				break;
237 			continue;
238 		}
239 		tp++;
240 	}
241 	if (cp == NULL) {
242 		/* Not case 1-3, try 4 */
243 		while (cp < ep) {
244 			c = *cp;
245 			if (isdigit(c)) {
246 				cp++;
247 				n *= 10;
248 				n += c - '0';
249 				continue;
250 			}
251 			/* An invalid date sytax */
252 			return (-1);
253 		}
254 		/* Case 4, delta from current time */
255 		return (gethrestime_sec() + n);
256 	}
257 	if (c == ',') {
258 		/* Case 1 or 2, skip <SP> */
259 		if (cp == ep)
260 			return (-1);
261 		c = *cp++;
262 		if (c != ' ')
263 			return (-1);
264 		/* Get day of the month */
265 		if (cp == ep)
266 			return (-1);
267 		c = *cp++;
268 		if (! isdigit(c))
269 			return (-1);
270 		n = c - '0';
271 		if (cp == ep)
272 			return (-1);
273 		c = *cp++;
274 		if (! isdigit(c))
275 			return (-1);
276 		n *= 10;
277 		n += c - '0';
278 		day = n;
279 		/* Get day/month/year seperator */
280 		if (cp == ep)
281 			return (-1);
282 		sc = *cp++;
283 		if (sc != ' ' && sc != '-')
284 			return (-1);
285 		/* Parse month */
286 		tpp = months;
287 		tp = *tpp++;
288 		scp = cp;
289 		n = 0;
290 		while (cp < ep) {
291 			c = *cp;
292 			if (c == sc) {
293 				cp++;
294 				break;
295 			}
296 			c = tolower(c);
297 			if (*tp == 0 || tolower(*tp) != c) {
298 				if ((tp = *tpp++) == NULL)
299 					break;
300 				cp = scp;
301 				n++;
302 				continue;
303 			}
304 			cp++;
305 			tp++;
306 		}
307 		if (cp == NULL)
308 			return (-1);
309 		month = n;
310 		/* Get year */
311 		if (cp == ep)
312 			return (-1);
313 		c = *cp++;
314 		if (! isdigit(c))
315 			return (-1);
316 		n = c - '0';
317 		if (cp == ep)
318 			return (-1);
319 		c = *cp++;
320 		if (! isdigit(c))
321 			return (-1);
322 		n *= 10;
323 		n += c - '0';
324 		if (cp == ep)
325 			return (-1);
326 		c = *cp++;
327 		if (sc == ' ') {
328 			/* Case 1, get 2 more year digits */
329 			if (! isdigit(c))
330 				return (-1);
331 			n *= 10;
332 			n += c - '0';
333 			if (cp == ep)
334 				return (-1);
335 			c = *cp++;
336 			if (! isdigit(c))
337 				return (-1);
338 			n *= 10;
339 			n += c - '0';
340 			/* Get seperator char */
341 			if (cp == ep)
342 				return (-1);
343 			c = *cp;
344 			if (c != ' ')
345 				return (-1);
346 			cp++;
347 		} else {
348 			/*
349 			 * Case 2, 2 digit year and as this is a so-called
350 			 * Unix date format and the begining of time was
351 			 * 1970 so we can extend this obsoleted date syntax
352 			 * past the year 1999 into the year 2038 for 32 bit
353 			 * machines and through 2069 for 64 bit machines.
354 			 */
355 			if (n > 69)
356 				n += 1900;
357 			else
358 				n += 2000;
359 		}
360 		year = n;
361 		/* Get GMT time */
362 		if (c != ' ')
363 			return (-1);
364 		if (cp == ep)
365 			return (-1);
366 		c = *cp++;
367 		if (! isdigit(c))
368 			return (-1);
369 		n = c - '0';
370 		if (cp == ep)
371 			return (-1);
372 		c = *cp++;
373 		if (! isdigit(c))
374 			return (-1);
375 		n *= 10;
376 		n += c - '0';
377 		hour = n;
378 		if (cp == ep)
379 			return (-1);
380 		c = *cp++;
381 		if (c != ':')
382 			return (-1);
383 		if (cp == ep)
384 			return (-1);
385 		c = *cp++;
386 		if (! isdigit(c))
387 			return (-1);
388 		n = c - '0';
389 		if (cp == ep)
390 			return (-1);
391 		c = *cp++;
392 		if (! isdigit(c))
393 			return (-1);
394 		n *= 10;
395 		n += c - '0';
396 		min = n;
397 		if (cp == ep)
398 			return (-1);
399 		c = *cp++;
400 		if (c != ':')
401 			return (-1);
402 		if (cp == ep)
403 			return (-1);
404 		c = *cp++;
405 		if (! isdigit(c))
406 			return (-1);
407 		n = c - '0';
408 		if (cp == ep)
409 			return (-1);
410 		c = *cp++;
411 		if (! isdigit(c))
412 			return (-1);
413 		n *= 10;
414 		n += c - '0';
415 		sec = n;
416 		if (cp == ep)
417 			return (-1);
418 		c = *cp++;
419 		if (c != ' ')
420 			return (-1);
421 		if (cp == ep)
422 			return (-1);
423 		c = *cp++;
424 		if (c != 'G')
425 			return (-1);
426 		if (cp == ep)
427 			return (-1);
428 		c = *cp++;
429 		if (c != 'M')
430 			return (-1);
431 		if (cp == ep)
432 			return (-1);
433 		c = *cp++;
434 		if (c != 'T')
435 			return (-1);
436 	} else {
437 		/* case 3, parse month */
438 		sc = c;
439 		tpp = months;
440 		tp = *tpp++;
441 		scp = cp;
442 		n = 0;
443 		while (cp < ep) {
444 			c = *cp;
445 			if (c == sc) {
446 				cp++;
447 				break;
448 			}
449 			c = tolower(c);
450 			if (*tp == 0 || tolower(*tp) != c) {
451 				if ((tp = *tpp++) == NULL)
452 					break;
453 				cp = scp;
454 				n++;
455 				continue;
456 			}
457 			cp++;
458 			tp++;
459 		}
460 		if (cp == NULL)
461 			return (-1);
462 		month = n;
463 		/* Get day of the month */
464 		if (cp == ep)
465 			return (-1);
466 		c = *cp++;
467 		if (! isdigit(c))
468 			return (-1);
469 		n = c - '0';
470 		if (cp == ep)
471 			return (-1);
472 		c = *cp++;
473 		if (! isdigit(c))
474 			return (-1);
475 		n *= 10;
476 		n += c - '0';
477 		day = n;
478 		/* Skip <SP> */
479 		if (cp == ep)
480 			return (-1);
481 		c = *cp++;
482 		if (c != ' ')
483 			return (-1);
484 		/* Get time */
485 		if (cp == ep)
486 			return (-1);
487 		c = *cp++;
488 		if (! isdigit(c))
489 			return (-1);
490 		n = c - '0';
491 		if (cp == ep)
492 			return (-1);
493 		c = *cp++;
494 		if (! isdigit(c))
495 			return (-1);
496 		n *= 10;
497 		n += c - '0';
498 		hour = n;
499 		if (cp == ep)
500 			return (-1);
501 		c = *cp++;
502 		if (c != ':')
503 			return (-1);
504 		if (cp == ep)
505 			return (-1);
506 		c = *cp++;
507 		if (! isdigit(c))
508 			return (-1);
509 		n = c - '0';
510 		if (cp == ep)
511 			return (-1);
512 		c = *cp++;
513 		if (! isdigit(c))
514 			return (-1);
515 		n *= 10;
516 		n += c - '0';
517 		min = n;
518 		if (cp == ep)
519 			return (-1);
520 		c = *cp++;
521 		if (c != ':')
522 			return (-1);
523 		if (cp == ep)
524 			return (-1);
525 		c = *cp++;
526 		if (! isdigit(c))
527 			return (-1);
528 		n = c - '0';
529 		if (cp == ep)
530 			return (-1);
531 		c = *cp++;
532 		if (! isdigit(c))
533 			return (-1);
534 		n *= 10;
535 		n += c - '0';
536 		sec = n;
537 		/* Skip <SP> */
538 		if (cp == ep)
539 			return (-1);
540 		c = *cp++;
541 		if (c != ' ')
542 			return (-1);
543 		/* Get year */
544 		if (cp == ep)
545 			return (-1);
546 		c = *cp++;
547 		if (! isdigit(c))
548 			return (-1);
549 		n = c - '0';
550 		if (cp == ep)
551 			return (-1);
552 		c = *cp++;
553 		if (! isdigit(c))
554 			return (-1);
555 		n *= 10;
556 		n += c - '0';
557 		if (cp == ep)
558 			return (-1);
559 		c = *cp++;
560 		if (! isdigit(c))
561 			return (-1);
562 		n *= 10;
563 		n += c - '0';
564 		if (cp == ep)
565 			return (-1);
566 		c = *cp++;
567 		if (! isdigit(c))
568 			return (-1);
569 		n *= 10;
570 		n += c - '0';
571 		year = n;
572 	}
573 
574 	/* Last, caclulate seconds since Unix day zero */
575 	leap = year;
576 	if (month < 2)
577 		leap--;
578 	leap = leap / 4 - leap / 100 + leap / 400 - zeroleap;
579 	secs = ((((year - 1970) * 365 + dom[month] + day  - 1 + leap) * 24
580 	    + hour) * 60 + min) * 60 + sec;
581 
582 	return (secs);
583 }
584 
585 /*
586  * http_today(char *) - returns in the given char* pointer the current
587  * date in ascii with a format of (char [29]):
588  *
589  *	Sun, 07 Dec 1998 14:49:37 GMT	; RFC 822, updated by RFC 1123
590  */
591 
592 static void
http_today(char * cp)593 http_today(char *cp)
594 {
595 	ssize_t	i;
596 	char	*fp;
597 
598 	ssize_t	leap;
599 	ssize_t	year;
600 	ssize_t	month;
601 	ssize_t	dow;
602 	ssize_t	day;
603 	ssize_t	hour;
604 	ssize_t	min;
605 	ssize_t	sec;
606 
607 	/* Secs since Thu, 01 Jan 1970 00:00:00 GMT */
608 	time_t	now = gethrestime_sec();
609 
610 	sec = now % 60;
611 	now /= 60;
612 	min = now % 60;
613 	now /= 60;
614 	hour = now % 24;
615 	now /= 24;
616 	dow = now % 7;
617 
618 	year = 1970;
619 	for (;;) {
620 		if (year % 4 == 0 && year % 100 != 0 || year % 400 == 0)
621 			day = 366;
622 		else
623 			day = 365;
624 		if (now < day)
625 			break;
626 		now -= day;
627 		year++;
628 	}
629 
630 	now++;
631 	if (year % 4 == 0 && year % 100 != 0 || year % 400 == 0)
632 		leap = 1;
633 	else
634 		leap = 0;
635 	month = 11;
636 	for (i = 11; i; i--) {
637 		if (i < 2)
638 			leap = 0;
639 		if (now > dom[i] + leap)
640 			break;
641 		month--;
642 	}
643 	day = now - dom[i] - leap;
644 
645 	fp = Dow[dow];
646 	*cp++ = *fp++;
647 	*cp++ = *fp++;
648 	*cp++ = *fp++;
649 	*cp++ = ',';
650 	*cp++ = ' ';
651 
652 	i = day / 10;
653 	*cp++ = '0' + i;
654 	*cp++ = '0' + (day - i * 10);
655 	*cp++ = ' ';
656 
657 	fp = months[month];
658 	*cp++ = *fp++;
659 	*cp++ = *fp++;
660 	*cp++ = *fp++;
661 	*cp++ = ' ';
662 
663 	i = year / 1000;
664 	*cp++ = '0' + i;
665 	year -= i * 1000;
666 	i = year / 100;
667 	*cp++ = '0' + i;
668 	year -= i * 100;
669 	i = year / 10;
670 	*cp++ = '0' + i;
671 	year -= i * 10;
672 	*cp++ = '0' + year;
673 	*cp++ = ' ';
674 
675 	i = hour / 10;
676 	*cp++ = '0' + i;
677 	*cp++ = '0' + (hour - i * 10);
678 	*cp++ = ':';
679 
680 	i = min / 10;
681 	*cp++ = '0' + i;
682 	*cp++ = '0' + (min - i * 10);
683 	*cp++ = ':';
684 
685 	i = sec / 10;
686 	*cp++ = '0' + i;
687 	*cp++ = '0' + (sec - i * 10);
688 	*cp++ = ' ';
689 
690 	*cp++ = 'G';
691 	*cp++ = 'M';
692 	*cp = 'T';
693 }
694 
695 /*
696  * Given the ttree_t pointer "*t", parse the char buffer pointed to
697  * by "**cpp" of multiline text data up to the pointer "**epp", the
698  * pointer "*hash" points to the current text hash.
699  *
700  * If a match is found a pointer to the ttree_t token will be returned,
701  * "**cpp" will point to the next line, "**epp" will point to the first
702  * EOL char, "**hpp" will point to remainder of the parse data (if none,
703  * **hpp == **epp), and "*hash" will be updated.
704  *
705  * If no match, as above except "**hpp" points to the begining of the
706  * line and "*hash" wont be updated.
707  *
708  * If no EOL is found NULL is returned, "**epp" is set to NULL, no further
709  * calls can be made until additional data is ready and all arguments are
710  * reset.
711  *
712  * If EOH (i.e. an empty line) NULL is returned, "**hpp" is set to NULL,
713  * *cpp points to past EOH, no further calls can be made.
714  */
715 
716 static token_t *
ttree_line_parse(ttree_t * t,char ** cpp,char ** epp,char ** hpp,uint32_t * hash)717 ttree_line_parse(ttree_t *t, char **cpp, char **epp, char **hpp, uint32_t *hash)
718 {
719 	char	ca, cb;			/* current line <=> parse node */
720 
721 	char	*cp = *cpp;
722 	char	*ep = *epp;
723 
724 	char	*tp = t->tok->text;	/* current parse text */
725 	char	*sp = cp;		/* saved *cp */
726 
727 	int	parse;			/* parse state */
728 
729 	uint32_t hv;			/* hash value */
730 
731 	if (hash != NULL)
732 		hv = *hash;
733 
734 	/* Special case, check for EOH (i.e. empty line) */
735 	if (cp < ep) {
736 		ca = *cp;
737 		if (ca == '\n') {
738 			/* End of header */
739 			*cpp = ++cp;
740 			*hpp = NULL;
741 			return (NULL);
742 		} else if (ca == '\r') {
743 			cp++;
744 			if (cp < ep) {
745 				ca = *cp;
746 				if (ca == '\n') {
747 					/* End of header */
748 					*cpp = ++cp;
749 					*hpp = NULL;
750 					return (NULL);
751 				}
752 			}
753 			cp = *cpp;
754 		}
755 	}
756 	while (cp < ep) {
757 		/* Get next parse text char */
758 		cb = *tp;
759 		if (cb != 0) {
760 			/* Get next current line char */
761 			ca = *cp++;
762 			/* Case insensitive */
763 			cb = tolower(cb);
764 			ca = tolower(ca);
765 			if (ca == cb) {
766 				/*
767 				 * Char match, next char.
768 				 *
769 				 * Note, parse text can contain EOL chars.
770 				 */
771 				tp++;
772 				continue;
773 			}
774 			if (ca == '\r' || ca == '\n') {
775 				/* EOL, always go less than */
776 				t = t->lt;
777 			} else if (ca < cb) {
778 				/* Go less than */
779 				t = t->lt;
780 			} else {
781 				/* Go greater than */
782 				t = t->gt;
783 			}
784 			while (t != NULL && t->tok == NULL) {
785 				/* Null node, so descend to < node */
786 				t = t->lt;
787 			}
788 			if (t != NULL) {
789 				/* Initialize for next node compare */
790 				tp = t->tok->text;
791 				cp = sp;
792 				continue;
793 			}
794 			/*
795 			 * End of tree walk, no match, return pointer
796 			 * to the start of line then below find EOL.
797 			 */
798 			*hpp = *cpp;
799 		} else {
800 			/*
801 			 * End of token text, match, return pointer to
802 			 * the rest of header text then below find EOL.
803 			 */
804 			*hpp = cp;
805 		}
806 		/*
807 		 * Find end of line. Note, the HTTP line syntax supports
808 		 * implicit multi-line if the next line starts with a <SP>
809 		 * or <HT>.
810 		 */
811 		parse = 0;
812 		while (cp < ep) {
813 			ca = *cp;
814 			if (parse == 0 && ca == '\r') {
815 				*epp = cp;
816 				parse = 1;
817 			} else if (parse == 0 && ca == '\n') {
818 				*epp = cp;
819 				parse = 2;
820 			} else if (parse == 1 && ca == '\n') {
821 				parse = 2;
822 			} else if (parse >= 2 && (ca == ' ' || ca == '\t')) {
823 				parse++;
824 			} else if (parse > 2) {
825 				parse = 0;
826 			} else if (parse == 2) {
827 				break;
828 			} else if (t != NULL && (t->tok->act & HASH) &&
829 			    hash != NULL) {
830 				CHASH(hv, ca);
831 			}
832 			cp++;
833 		}
834 		if (parse < 2) {
835 			/* No EOL, not enough data */
836 			*epp = NULL;
837 			return (t != NULL ? t->tok : NULL);
838 		}
839 		/*
840 		 * Return updated hash value (if any), update parse current
841 		 * pointer for next call (i.e. begin of next line), and last
842 		 * return pointer to the matching token_t.
843 		 */
844 		if (t != NULL && (t->tok->act & HASH) && hash != NULL)
845 			*hash = hv;
846 		*cpp = cp;
847 		return (t != NULL ? t->tok : NULL);
848 	}
849 	/*
850 	 * End of parse text, ...
851 	 */
852 	*epp = NULL;
853 	return (NULL);
854 }
855 
856 /*
857  * Given a NULL terminated array of token_t(s) ordered in ascending
858  * case insensitive order a binary tree is allocated and populated with
859  * pointers into the array and a pointer to the root node is returned.
860  *
861  * Todo, for maximum ttree parse efficiency needs to be path compressed,
862  * the function ttree_line_parse() handles the empty nodes correctly.
863  */
864 static ttree_t *
ttree_build(token_t * list,int sz)865 ttree_build(token_t *list, int sz)
866 {
867 	ttree_t *treev;
868 	int	max, lvl, inc, ix;
869 
870 	/* calc the size of the tree */
871 	for (max = 1; max < sz; max <<= 1)
872 		;
873 	/* allocate the tree */
874 	treev = kmem_alloc(sizeof (*treev) * (max - 1), KM_SLEEP);
875 
876 	/* walk the tree and populate from list vector */
877 	lvl = max;
878 	while (lvl >>= 1) {
879 		inc = lvl >> 1;
880 		for (ix = lvl; ix < max; ix += lvl << 1) {
881 			if (ix <= sz) {
882 				treev[ix - 1].tok = &list[ix - 1];
883 			} else {
884 				treev[ix - 1].tok = 0;
885 			}
886 			if (inc) {
887 				treev[ix - 1].lt = &treev[ix - inc - 1];
888 				treev[ix - 1].gt = &treev[ix + inc - 1];
889 			} else {
890 				treev[ix - 1].lt = 0;
891 				treev[ix - 1].gt = 0;
892 			}
893 		}
894 	}
895 
896 	return (&treev[(max >> 1) - 1]);
897 }
898 
899 void
nl7c_http_init(void)900 nl7c_http_init(void)
901 {
902 	int	n;
903 
904 	http_kmc = kmem_cache_create("NL7C_http_kmc",
905 	    sizeof (http_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
906 
907 	req_tree = ttree_build(tokreq, tokreq_cnt - 1);
908 	res_tree = ttree_build(tokres, tokres_cnt - 1);
909 
910 	n = sizeof (Shttp_conn_cl) - 1;
911 	http_conn_cl = allocb_wait(n, BPRI_HI, STR_NOSIG, NULL);
912 	bcopy(Shttp_conn_cl, http_conn_cl->b_rptr, n);
913 	http_conn_cl->b_wptr += n;
914 
915 	n = sizeof (Shttp_conn_ka) - 1;
916 	http_conn_ka = allocb_wait(n, BPRI_HI, STR_NOSIG, NULL);
917 	bcopy(Shttp_conn_ka, http_conn_ka->b_rptr, n);
918 	http_conn_ka->b_wptr += n;
919 }
920 
921 void
nl7c_http_free(void * arg)922 nl7c_http_free(void *arg)
923 {
924 	http_t	*http = arg;
925 
926 	kmem_cache_free(http_kmc, http);
927 }
928 
929 #define	STR_T_NOTCMP_OPT(a, b, m) (					\
930     a->m.cp && b->m.cp &&						\
931 	((a->m.ep - a->m.cp) != (b->m.ep - b->m.cp) ||			\
932 	strncmp(a->m.cp, b->m.cp, (b->m.ep - b->m.cp))))
933 
934 #define	STR_T_NOTCMP(a, b, m) (						\
935     a->m.cp && ! b->m.cp ||						\
936     b->m.cp && ! a->m.cp ||						\
937     STR_T_NOTCMP_OPT(a, b, m))
938 
939 boolean_t
nl7c_http_cmp(void * arg1,void * arg2)940 nl7c_http_cmp(void *arg1, void *arg2)
941 {
942 	http_t	*httpa = arg1;		/* Response */
943 	http_t	*httpb = arg2;		/* Request */
944 
945 	if (httpa->major != httpb->major ||
946 	    httpa->minor != httpb->minor ||
947 	    STR_T_NOTCMP(httpa, httpb, accept) ||
948 	    STR_T_NOTCMP(httpa, httpb, acceptchar) ||
949 	    STR_T_NOTCMP(httpa, httpb, acceptenco) ||
950 	    STR_T_NOTCMP(httpa, httpb, acceptlang) ||
951 	    STR_T_NOTCMP_OPT(httpa, httpb, etag))
952 		return (B_FALSE);
953 	return (B_TRUE);
954 }
955 
956 /*
957  * In-line HTTP responses:
958  */
959 
960 static char http_resp_304[] =
961 	"HTTP/#.# 304 Not Modified\r\n"
962 	"Date: #############################\r\n"
963 	"Server: NCA/#.# (Solaris)\r\n";
964 
965 static char http_resp_412[] =
966 	"HTTP/#.# 412 Precondition Failed\r\n"
967 	"Date: #############################\r\n"
968 	"Server: NCA/#.# (Solaris)\r\n";
969 
970 static uri_desc_t *
http_mkresponse(uri_desc_t * req,uri_desc_t * res,char * proto,int sz)971 http_mkresponse(uri_desc_t *req, uri_desc_t *res, char *proto, int sz)
972 {
973 	http_t		*qhttp = req->scheme;
974 	http_t		*shttp = res->scheme;
975 	uri_desc_t	*uri = kmem_cache_alloc(nl7c_uri_kmc, KM_SLEEP);
976 	char		*alloc;
977 	char		*cp;
978 	char		*ep = &proto[sz];
979 	uri_rd_t	*rdp;
980 	int		cnt;
981 
982 	char		hdr_etag[] = "ETag: ";
983 
984 	/* Any optional header(s) */
985 	if (shttp->etag.cp != NULL) {
986 		/* Response has an ETag:, count it */
987 		sz += sizeof (hdr_etag) - 1 +
988 		    (shttp->etag.ep - shttp->etag.cp) + 2;
989 	}
990 	sz += 2;
991 	alloc = kmem_alloc(sz, KM_SLEEP);
992 
993 	/* Minimum temp uri initialization as needed by uri_response() */
994 	REF_INIT(uri, 1, nl7c_uri_inactive, nl7c_uri_kmc);
995 	uri->hash = URI_TEMP;
996 	uri->tail = NULL;
997 	uri->scheme = NULL;
998 	uri->reqmp = NULL;
999 	uri->count = 0;
1000 	cv_init(&uri->waiting, NULL, CV_DEFAULT, NULL);
1001 	mutex_init(&uri->proclock, NULL, MUTEX_DEFAULT, NULL);
1002 
1003 	URI_RD_ADD(uri, rdp, sz, -1);
1004 	rdp->data.kmem = alloc;
1005 	atomic_add_64(&nl7c_uri_bytes, sz);
1006 
1007 	cp = alloc;
1008 	if (qhttp->major == 1) {
1009 		/*
1010 		 * Full response format.
1011 		 *
1012 		 * Copy to first sub char '#'.
1013 		 */
1014 		while (proto < ep) {
1015 			if (*proto == '#')
1016 				break;
1017 			*cp++ = *proto++;
1018 		}
1019 
1020 		/* Process the HTTP version substitutions */
1021 		if (*proto != '#') goto bad;
1022 		*cp++ = '0' + qhttp->major;
1023 		proto++;
1024 		while (proto < ep) {
1025 			if (*proto == '#')
1026 				break;
1027 			*cp++ = *proto++;
1028 		}
1029 		if (*proto != '#') goto bad;
1030 		*cp++ = '0' + qhttp->minor;
1031 		proto++;
1032 
1033 		/* Copy to the next sub char '#' */
1034 		while (proto < ep) {
1035 			if (*proto == '#')
1036 				break;
1037 			*cp++ = *proto++;
1038 		}
1039 
1040 		/* Process the "Date: " substitution */
1041 		if (*proto != '#') goto bad;
1042 		http_today(cp);
1043 
1044 		/* Skip to the next nonsub char '#' */
1045 		while (proto < ep) {
1046 			if (*proto != '#')
1047 				break;
1048 			cp++;
1049 			proto++;
1050 		}
1051 
1052 		/* Copy to the next sub char '#' */
1053 		while (proto < ep) {
1054 			if (*proto == '#')
1055 				break;
1056 			*cp++ = *proto++;
1057 		}
1058 
1059 		/* Process the NCA version substitutions */
1060 		if (*proto != '#') goto bad;
1061 		*cp++ = '0' + nca_major_version;
1062 		proto++;
1063 		while (proto < ep) {
1064 			if (*proto == '#')
1065 				break;
1066 			*cp++ = *proto++;
1067 		}
1068 		if (*proto != '#') goto bad;
1069 		*cp++ = '0' + nca_minor_version;
1070 		proto++;
1071 
1072 		/* Copy remainder of HTTP header */
1073 		while (proto < ep) {
1074 			*cp++ = *proto++;
1075 		}
1076 	} else {
1077 		goto bad;
1078 	}
1079 	/* Any optional header(s) */
1080 	if (shttp->etag.cp != NULL) {
1081 		/* Response has an ETag:, add it */
1082 		cnt = sizeof (hdr_etag) - 1;
1083 		bcopy(hdr_etag, cp, cnt);
1084 		cp += cnt;
1085 		cnt = (shttp->etag.ep - shttp->etag.cp);
1086 		bcopy(shttp->etag.cp, cp, cnt);
1087 		cp += cnt;
1088 		*cp++ = '\r';
1089 		*cp++ = '\n';
1090 	}
1091 	/* Last, add empty line */
1092 	uri->eoh = cp;
1093 	*cp++ = '\r';
1094 	*cp = '\n';
1095 
1096 	return (uri);
1097 
1098 bad:
1099 	/*
1100 	 * Free any resources allocated here, note that while we could
1101 	 * use the uri_inactive() to free the uri by doing a REF_RELE()
1102 	 * we instead free it here as the URI may be in less then a fully
1103 	 * initialized state.
1104 	 */
1105 	kmem_free(alloc, sz);
1106 	kmem_cache_free(nl7c_uri_kmc, uri);
1107 	return (NULL);
1108 }
1109 
1110 uri_desc_t *
nl7c_http_cond(uri_desc_t * req,uri_desc_t * res)1111 nl7c_http_cond(uri_desc_t *req, uri_desc_t *res)
1112 {
1113 	http_t	*qhttp = req->scheme;
1114 	time_t	qdate = qhttp->moddate;
1115 	http_t	*shttp = res->scheme;
1116 	time_t	sdate = shttp->lastmod == -1 ? shttp->date : shttp->lastmod;
1117 	uri_desc_t *uri;
1118 
1119 	if (qhttp->modtokid == Qhdr_If_Modified_Since &&
1120 	    sdate != -1 && qdate != -1 && sdate <= qdate) {
1121 		/*
1122 		 * Request is If-Modified-Since: and both response
1123 		 * and request dates are valid and response is the
1124 		 * same age as request so return a 304 response uri
1125 		 * instead of the cached response.
1126 		 */
1127 		nl7c_http_cond_304++;
1128 		uri = http_mkresponse(req, res, http_resp_304,
1129 		    sizeof (http_resp_304) - 1);
1130 		if (uri != NULL) {
1131 			/* New response uri */
1132 			REF_RELE(res);
1133 			return (uri);
1134 		}
1135 		return (res);
1136 	} else if (qhttp->modtokid == Qhdr_If_Unmodified_Since &&
1137 	    sdate != -1 && qdate != -1 && sdate >= qdate) {
1138 		/*
1139 		 * Request is If-Unmodified-Since: and both response
1140 		 * and request dates are valid and response is not the
1141 		 * same age as the request so return a 412 response
1142 		 * uri instead of the cached response.
1143 		 */
1144 		nl7c_http_cond_412++;
1145 		uri = http_mkresponse(req, res, http_resp_412,
1146 		    sizeof (http_resp_412) - 1);
1147 		if (uri != NULL) {
1148 			/* New response uri */
1149 			REF_RELE(res);
1150 			return (uri);
1151 		}
1152 		return (res);
1153 	}
1154 	/*
1155 	 * No conditional response meet or unknown type or no
1156 	 * valid dates so just return the original uri response.
1157 	 */
1158 	return (res);
1159 }
1160 
1161 /*
1162  * Return the appropriate HTTP connection persist header
1163  * based on the request HTTP persistent header state.
1164  */
1165 
1166 mblk_t *
nl7c_http_persist(struct sonode * so)1167 nl7c_http_persist(struct sonode *so)
1168 {
1169 	uint64_t	flags = SOTOTPI(so)->sti_nl7c_flags & NL7C_SCHEMEPRIV;
1170 	mblk_t		*mp;
1171 
1172 	if (flags & HTTP_CONN_CL)
1173 		mp = dupb(http_conn_cl);
1174 	else if (flags & HTTP_CONN_KA)
1175 		mp = dupb(http_conn_ka);
1176 	else
1177 		mp = NULL;
1178 	return (mp);
1179 }
1180 
1181 /*
1182  * Parse the buffer *p of size len and update the uri_desc_t *uri and our
1183  * http_t *http with the results.
1184  */
1185 
1186 boolean_t
nl7c_http_request(char ** cpp,char * ep,uri_desc_t * uri,struct sonode * so)1187 nl7c_http_request(char **cpp, char *ep, uri_desc_t *uri, struct sonode *so)
1188 {
1189 	sotpi_info_t *sti = SOTOTPI(so);
1190 	http_t	*http = kmem_cache_alloc(http_kmc, KM_SLEEP);
1191 	char	*cp = *cpp;
1192 	char	*hp;
1193 	char	*scp, *sep;
1194 	char	*HTTP = "HTTP/";
1195 	token_t	*match;
1196 	boolean_t persist = B_FALSE;
1197 
1198 	ASSERT(cp <= ep);
1199 
1200 	if (cp == ep) {
1201 		goto bad;
1202 	}
1203 	/*
1204 	 * Initialize any uri_desc_t and/or http_t members.
1205 	 */
1206 	uri->scheme = (void *)http;
1207 	uri->auth.cp = NULL;
1208 	uri->auth.ep = NULL;
1209 	uri->resplen = URI_LEN_NOVALUE;
1210 	uri->respclen = URI_LEN_NOVALUE;
1211 	uri->eoh = NULL;
1212 	uri->nocache = B_FALSE;
1213 	uri->conditional = B_FALSE;
1214 	http->parsed = B_FALSE;
1215 	http->accept.cp = NULL;
1216 	http->acceptchar.cp = NULL;
1217 	http->acceptenco.cp = NULL;
1218 	http->acceptlang.cp = NULL;
1219 	http->etag.cp = NULL;
1220 	http->uagent.cp = NULL;
1221 	http->date = -1;
1222 	http->expire = -1;
1223 	http->lastmod = -1;
1224 	if (*cp == '\r') {
1225 		/*
1226 		 * Special case for a Request-Line without an HTTP version,
1227 		 * assume it's an old style, i.e. HTTP version 0.9 request.
1228 		 */
1229 		http->major = 0;
1230 		http->minor = 9;
1231 		goto got_version;
1232 	}
1233 	/*
1234 	 * Skip URI path delimiter, must be a <SP>.
1235 	 */
1236 	if (*cp++ != ' ')
1237 		/* Unkown or bad Request-Line format, just punt */
1238 		goto bad;
1239 	/*
1240 	 * The URI parser has parsed through the URI and the <SP>
1241 	 * delimiter, parse the HTTP/N.N version
1242 	 */
1243 	while (cp < ep && *HTTP == *cp) {
1244 		HTTP++;
1245 		cp++;
1246 	}
1247 	if (*HTTP != 0) {
1248 		if (cp == ep)
1249 			goto more;
1250 		goto bad;
1251 	}
1252 	if (cp == ep)
1253 		goto more;
1254 	if (*cp < '0' || *cp > '9')
1255 		goto bad;
1256 	http->major = *cp++ - '0';
1257 	if (cp == ep)
1258 		goto more;
1259 	if (*cp++ != '.')
1260 		goto bad;
1261 	if (cp == ep)
1262 		goto more;
1263 	if (*cp < '0' || *cp > '9')
1264 		goto bad;
1265 	http->minor = *cp++ - '0';
1266 	if (cp == ep)
1267 		goto more;
1268 
1269 got_version:
1270 
1271 	if (*cp++ != '\r')
1272 		goto bad;
1273 	if (cp == ep)
1274 		goto more;
1275 	if (*cp++ != '\n')
1276 		goto bad;
1277 	/*
1278 	 * Initialize persistent state based on HTTP version.
1279 	 */
1280 	if (http->major == 1) {
1281 		if (http->minor >= 1) {
1282 			/* 1.1 persistent by default */
1283 			persist = B_TRUE;
1284 		} else {
1285 			/* 1.0 isn't persistent by default */
1286 			persist = B_FALSE;
1287 		}
1288 	} else if (http->major == 0) {
1289 		/* Before 1.0 no persistent connections */
1290 		persist = B_FALSE;
1291 	} else {
1292 		/* >= 2.0 not supported (yet) */
1293 		goto bad;
1294 	}
1295 	/*
1296 	 * Parse HTTP headers through the EOH
1297 	 * (End Of Header, i.e. an empty line).
1298 	 */
1299 	for (sep = ep; cp < ep; ep = sep) {
1300 		/* Get the next line */
1301 		scp = cp;
1302 		match = ttree_line_parse(req_tree, &cp, &ep, &hp, &uri->hvalue);
1303 		if (match != NULL) {
1304 			if (match->act & QUALIFIER) {
1305 				/*
1306 				 * Header field text is used to qualify this
1307 				 * request/response, based on qualifier type
1308 				 * optionally convert and store *http.
1309 				 */
1310 				char	c;
1311 				int	n = 0;
1312 				time_t	secs;
1313 
1314 				ASSERT(hp != NULL && ep != NULL);
1315 
1316 				if (match->act & NUMERIC) {
1317 					while (hp < ep) {
1318 						c = *hp++;
1319 						if (! isdigit(c))
1320 							goto bad;
1321 						n *= 10;
1322 						n += c - '0';
1323 					}
1324 				} else if (match->act & DATE) {
1325 					secs = http_date2time_t(hp, ep);
1326 				}
1327 				switch (match->tokid) {
1328 
1329 				case Qhdr_Accept_Charset:
1330 					http->acceptchar.cp = hp;
1331 					http->acceptchar.ep = ep;
1332 					break;
1333 
1334 				case Qhdr_Accept_Encoding:
1335 					http->acceptenco.cp = hp;
1336 					http->acceptenco.ep = ep;
1337 					break;
1338 
1339 				case Qhdr_Accept_Language:
1340 					http->acceptlang.cp = hp;
1341 					http->acceptlang.ep = ep;
1342 					break;
1343 
1344 				case Qhdr_Accept:
1345 					http->accept.cp = hp;
1346 					http->accept.ep = ep;
1347 					break;
1348 
1349 				case Qhdr_Authorization:
1350 					goto pass;
1351 
1352 				case Qhdr_Connection_close:
1353 					persist = B_FALSE;
1354 					break;
1355 
1356 				case Qhdr_Connection_Keep_Alive:
1357 					persist = B_TRUE;
1358 					break;
1359 
1360 				case Qhdr_Date:
1361 					http->date = secs;
1362 					break;
1363 
1364 				case Qhdr_ETag:
1365 					http->etag.cp = hp;
1366 					http->etag.ep = ep;
1367 					break;
1368 
1369 				case Qhdr_Host:
1370 					uri->auth.cp = hp;
1371 					uri->auth.ep = ep;
1372 					break;
1373 
1374 				case Qhdr_If_Modified_Since:
1375 				case Qhdr_If_Unmodified_Since:
1376 					http->moddate = secs;
1377 					http->modtokid = match->tokid;
1378 					uri->conditional = B_TRUE;
1379 					break;
1380 
1381 				case Qhdr_Keep_Alive:
1382 					persist = B_TRUE;
1383 					break;
1384 
1385 				case Qhdr_User_Agent:
1386 					http->uagent.cp = hp;
1387 					http->uagent.ep = ep;
1388 					break;
1389 
1390 				default:
1391 					break;
1392 
1393 				};
1394 			}
1395 			if (match->act & FILTER) {
1396 				/*
1397 				 * Filter header, do a copyover the header
1398 				 * text, guarenteed to be at least 1 byte.
1399 				 */
1400 				char	*cop = scp;
1401 				int	n = (ep - cop) - 1;
1402 				char	filter[] = "NL7C-Filtered";
1403 
1404 				n = MIN(n, sizeof (filter) - 1);
1405 				if (n > 0)
1406 					bcopy(filter, cop, n);
1407 				cop += n;
1408 				ASSERT(cop < ep);
1409 				*cop++ = ':';
1410 				while (cop < ep)
1411 					*cop++ = ' ';
1412 			}
1413 			if (match->act & NOCACHE) {
1414 				uri->nocache = B_TRUE;
1415 			}
1416 		} else if (hp == NULL) {
1417 			goto done;
1418 		} else if (ep == NULL) {
1419 			goto more;
1420 		}
1421 	}
1422 	/* No EOH found */
1423 	goto more;
1424 
1425 done:
1426 	/*
1427 	 * Initialize socket persist state and response persist type
1428 	 * flag based on the persist state of the request headers.
1429 	 *
1430 	 */
1431 	if (persist)
1432 		sti->sti_nl7c_flags |= NL7C_SOPERSIST;
1433 	else
1434 		sti->sti_nl7c_flags &= ~NL7C_SOPERSIST;
1435 
1436 	if (http->major == 1) {
1437 		sti->sti_nl7c_flags &= ~NL7C_SCHEMEPRIV;
1438 		if (http->minor >= 1) {
1439 			if (! persist)
1440 				sti->sti_nl7c_flags |= HTTP_CONN_CL;
1441 		} else {
1442 			if (persist)
1443 				sti->sti_nl7c_flags |= HTTP_CONN_KA;
1444 			else
1445 				sti->sti_nl7c_flags |= HTTP_CONN_CL;
1446 		}
1447 	}
1448 	/*
1449 	 * Last, update parse consumed text pointer.
1450 	 */
1451 	*cpp = cp;
1452 	return (B_TRUE);
1453 
1454 pass:
1455 	*cpp = NULL;
1456 	return (B_TRUE);
1457 
1458 bad:
1459 	*cpp = NULL;
1460 more:
1461 	return (B_FALSE);
1462 }
1463 
1464 boolean_t
nl7c_http_response(char ** cpp,char * ep,uri_desc_t * uri,struct sonode * so)1465 nl7c_http_response(char **cpp, char *ep, uri_desc_t *uri, struct sonode *so)
1466 {
1467 	sotpi_info_t *sti = SOTOTPI(so);
1468 	http_t	*http = uri->scheme;
1469 	char	*cp = *cpp;
1470 	char	*hp;
1471 	char	*scp, *sep;
1472 	char	*HTTP = "HTTP/";
1473 	int	status = 0;
1474 	token_t	*match;
1475 #ifdef	NOT_YET
1476 	uint32_t major, minor;
1477 #endif
1478 	boolean_t nocache = B_FALSE;
1479 	boolean_t persist = B_FALSE;
1480 
1481 	ASSERT(http != NULL);
1482 
1483 	if (http->parsed) {
1484 		if (uri->respclen != URI_LEN_NOVALUE) {
1485 			/* Chunked response */
1486 			sep = ep;
1487 			goto chunked;
1488 		}
1489 		/* Already parsed, nothing todo */
1490 		return (B_TRUE);
1491 	}
1492 
1493 	/*
1494 	 * Parse the HTTP/N.N version. Note, there's currently no use
1495 	 * for the actual response major nor minor values as only the
1496 	 * request values are used.
1497 	 */
1498 	while (cp < ep && *HTTP == *cp) {
1499 		HTTP++;
1500 		cp++;
1501 	}
1502 	if (*HTTP != 0) {
1503 		if (cp == ep)
1504 			goto more;
1505 		goto bad;
1506 	}
1507 	if (cp == ep)
1508 		goto more;
1509 
1510 	if (*cp < '0' || *cp > '9')
1511 		goto bad;
1512 #ifdef	NOT_YET
1513 	major = *cp++ - '0';
1514 #else
1515 	cp++;
1516 #endif
1517 
1518 	if (cp == ep)
1519 		goto more;
1520 	if (*cp++ != '.')
1521 		goto bad;
1522 	if (cp == ep)
1523 		goto more;
1524 	if (*cp < '0' || *cp > '9')
1525 		goto bad;
1526 #ifdef	NOT_YET
1527 	minor = *cp++ - '0';
1528 #else
1529 	cp++;
1530 #endif
1531 
1532 	if (cp == ep)
1533 		goto more;
1534 
1535 got_version:
1536 
1537 	/*
1538 	 * Get the response code.
1539 	 */
1540 	if (*cp++ != ' ')
1541 		goto bad;
1542 	if (cp == ep)
1543 		goto more;
1544 
1545 	do {
1546 		if (*cp == ' ')
1547 			break;
1548 		if (*cp < '0' || *cp > '9')
1549 			goto bad;
1550 		if (status)
1551 			status *= 10;
1552 		status += *cp++ - '0';
1553 	} while (cp < ep);
1554 
1555 	switch (status) {
1556 	case 200:
1557 		/*
1558 		 * The only response status we continue to process.
1559 		 */
1560 		break;
1561 	case 304:
1562 		nl7c_http_response_304++;
1563 		nocache = B_TRUE;
1564 		uri->resplen = 0;
1565 		goto pass;
1566 	case 307:
1567 		nl7c_http_response_307++;
1568 		nocache = B_TRUE;
1569 		uri->resplen = 0;
1570 		goto pass;
1571 	case 400:
1572 		nl7c_http_response_400++;
1573 		/*
1574 		 * Special case some response status codes, just mark
1575 		 * as nocache and no response length and pass on the
1576 		 * request/connection.
1577 		 */
1578 		nocache = B_TRUE;
1579 		uri->resplen = 0;
1580 		goto pass;
1581 	default:
1582 		/*
1583 		 * All other response codes result in a parse failure.
1584 		 */
1585 		goto bad;
1586 	}
1587 
1588 	/*
1589 	 * Initialize persistent state based on request HTTP version.
1590 	 */
1591 	if (http->major == 1) {
1592 		if (http->minor >= 1) {
1593 			/* 1.1 persistent by default */
1594 			persist = B_TRUE;
1595 		} else {
1596 			/* 1.0 isn't persistent by default */
1597 			persist = B_FALSE;
1598 		}
1599 	} else if (http->major == 0) {
1600 		/* Before 1.0 no persistent connections */
1601 		persist = B_FALSE;
1602 	} else {
1603 		/* >= 2.0 not supported (yet) */
1604 		goto bad;
1605 	}
1606 
1607 	/*
1608 	 * Parse HTTP headers through the EOH
1609 	 * (End Of Header, i.e. an empty line).
1610 	 */
1611 	for (sep = ep; cp < ep; ep = sep) {
1612 		/* Get the next line */
1613 		scp = cp;
1614 		match = ttree_line_parse(res_tree, &cp, &ep, &hp, NULL);
1615 		if (match != NULL) {
1616 			if (match->act & QUALIFIER) {
1617 				/*
1618 				 * Header field text is used to qualify this
1619 				 * request/response, based on qualifier type
1620 				 * optionally convert and store *http.
1621 				 */
1622 				char	c;
1623 				int	n = 0;
1624 				time_t	secs;
1625 
1626 				ASSERT(hp != NULL && ep != NULL);
1627 
1628 				if (match->act & NUMERIC) {
1629 					while (hp < ep) {
1630 						c = *hp++;
1631 						if (match->act & HEX) {
1632 							hd2i(c, n);
1633 							if (n == -1)
1634 								goto bad;
1635 						} else {
1636 							if (! isdigit(c))
1637 								goto bad;
1638 							n *= 10;
1639 							n += c - '0';
1640 						}
1641 					}
1642 				} else if (match->act & DATE) {
1643 					secs = http_date2time_t(hp, ep);
1644 				}
1645 				switch (match->tokid) {
1646 
1647 				case Shdr_Cache_Control_Max_Age:
1648 					break;
1649 
1650 				case Shdr_Cache_Control_No_Cache:
1651 					nocache = B_TRUE;
1652 					break;
1653 
1654 				case Shdr_Cache_Control_No_Store:
1655 					nocache = B_TRUE;
1656 					break;
1657 
1658 				case Shdr_Connection_close:
1659 					persist = B_FALSE;
1660 					break;
1661 
1662 				case Shdr_Connection_Keep_Alive:
1663 					persist = B_TRUE;
1664 					break;
1665 
1666 				case Shdr_Chunked:
1667 					uri->respclen = 0;
1668 					uri->resplen = 0;
1669 					nl7c_http_response_chunked++;
1670 					break;
1671 
1672 				case Shdr_Content_Length:
1673 					if (uri->respclen == URI_LEN_NOVALUE)
1674 						uri->resplen = n;
1675 					break;
1676 
1677 				case Shdr_Date:
1678 					http->date = secs;
1679 					break;
1680 
1681 				case Shdr_ETag:
1682 					http->etag.cp = hp;
1683 					http->etag.ep = ep;
1684 					break;
1685 
1686 				case Shdr_Expires:
1687 					http->expire = secs;
1688 					break;
1689 
1690 				case Shdr_Keep_Alive:
1691 					persist = B_TRUE;
1692 					break;
1693 
1694 				case Shdr_Last_Modified:
1695 					http->lastmod = secs;
1696 					break;
1697 
1698 				case Shdr_Set_Cookie:
1699 					nocache = B_TRUE;
1700 					break;
1701 
1702 				case Shdr_Server:
1703 					break;
1704 
1705 				default:
1706 					nocache = B_TRUE;
1707 					break;
1708 				};
1709 			}
1710 			if (match->act & FILTER) {
1711 				/*
1712 				 * Filter header, do a copyover the header
1713 				 * text, guarenteed to be at least 1 byte.
1714 				 */
1715 				char	*cop = scp;
1716 				int	n = (ep - cop) - 1;
1717 				char	filter[] = "NL7C-Filtered";
1718 
1719 				n = MIN(n, sizeof (filter) - 1);
1720 				if (n > 0)
1721 					bcopy(filter, cop, n);
1722 				cop += n;
1723 				ASSERT(cop < ep);
1724 				*cop++ = ':';
1725 				while (cop < ep)
1726 					*cop++ = ' ';
1727 			}
1728 			if (match->act & NOCACHE) {
1729 				nocache = B_TRUE;
1730 			}
1731 		} else if (hp == NULL) {
1732 			uri->eoh = scp;
1733 			goto done;
1734 		} else if (ep == NULL) {
1735 			goto more;
1736 		}
1737 	}
1738 	/* No EOH found */
1739 	goto more;
1740 
1741 done:
1742 	/* Parse completed */
1743 	http->parsed = B_TRUE;
1744 	/* Save the HTTP header length */
1745 	http->headlen = (cp - *cpp);
1746 	if (uri->respclen == URI_LEN_NOVALUE) {
1747 		if (uri->resplen == URI_LEN_NOVALUE) {
1748 			nl7c_http_response_pass1++;
1749 			goto pass;
1750 		}
1751 	}
1752 	/* Add header length to URI response length */
1753 	uri->resplen += http->headlen;
1754 
1755 	/* Set socket persist state */
1756 	if (persist)
1757 		sti->sti_nl7c_flags |= NL7C_SOPERSIST;
1758 	else
1759 		sti->sti_nl7c_flags &= ~NL7C_SOPERSIST;
1760 
1761 	if (http->major == 1) {
1762 		sti->sti_nl7c_flags &= ~NL7C_SCHEMEPRIV;
1763 		if (http->minor >= 1) {
1764 			if (! persist)
1765 				sti->sti_nl7c_flags |= HTTP_CONN_CL;
1766 		} else {
1767 			if (persist)
1768 				sti->sti_nl7c_flags |= HTTP_CONN_KA;
1769 			else
1770 				sti->sti_nl7c_flags |= HTTP_CONN_CL;
1771 		}
1772 	}
1773 
1774 	if (nocache) {
1775 		/*
1776 		 * Response not to be cached, only post response
1777 		 * processing code common to both non and cached
1778 		 * cases above here and code for the cached case
1779 		 * below.
1780 		 *
1781 		 * Note, chunked transfer processing is the last
1782 		 * to be done.
1783 		 */
1784 		uri->nocache = B_TRUE;
1785 		if (uri->respclen != URI_LEN_NOVALUE) {
1786 			/* Chunked response */
1787 			goto chunked;
1788 		}
1789 		/* Nothing more todo */
1790 		goto parsed;
1791 	}
1792 
1793 	if (http->expire != -1 && http->date != -1) {
1794 		if (http->expire <= http->date) {
1795 			/* ??? just pass */
1796 			nl7c_http_response_pass2++;
1797 			goto pass;
1798 		}
1799 		/* Have a valid expire and date so calc an lbolt expire */
1800 		uri->expire = ddi_get_lbolt() + SEC_TO_TICK(http->expire -
1801 		    http->date);
1802 	} else if (nl7c_uri_ttl != -1) {
1803 		/* No valid expire speced and we have a TTL */
1804 		uri->expire = ddi_get_lbolt() + SEC_TO_TICK(nl7c_uri_ttl);
1805 	}
1806 
1807 chunked:
1808 	/*
1809 	 * Chunk transfer parser and processing, a very simple parser
1810 	 * is implemented here for the common case were one, or more,
1811 	 * complete chunk(s) are passed in (i.e. length header + body).
1812 	 *
1813 	 * All other cases are passed.
1814 	 */
1815 	scp = cp;
1816 	while (uri->respclen != URI_LEN_NOVALUE && cp < sep) {
1817 		if (uri->respclen == URI_LEN_CONSUMED) {
1818 			/* Skip trailing "\r\n" */
1819 			if (cp == sep)
1820 				goto more;
1821 			if (*cp++ != '\r')
1822 				goto bad;
1823 			if (cp == sep)
1824 				goto more;
1825 			if (*cp++ != '\n')
1826 				goto bad;
1827 			uri->respclen = 0;
1828 		}
1829 		if (uri->respclen == 0) {
1830 			/* Parse a chunklen "[0-9A-Fa-f]+" */
1831 			char	c;
1832 			int	n = 0;
1833 
1834 			if (cp == sep)
1835 				goto more;
1836 			nl7c_http_response_chunkparse++;
1837 			while (cp < sep && (c = *cp++) != '\r') {
1838 				hd2i(c, n);
1839 				if (n == -1)
1840 					goto bad;
1841 			}
1842 			if (cp == sep)
1843 				goto more;
1844 			if (*cp++ != '\n')
1845 				goto bad;
1846 			uri->respclen = n;
1847 			if (n == 0) {
1848 				/* Last chunk, skip trailing "\r\n" */
1849 				if (cp == sep)
1850 					goto more;
1851 				if (*cp++ != '\r')
1852 					goto bad;
1853 				if (cp == sep)
1854 					goto more;
1855 				if (*cp++ != '\n')
1856 					goto bad;
1857 				uri->respclen = URI_LEN_NOVALUE;
1858 				break;
1859 			}
1860 		}
1861 		if (uri->respclen > 0) {
1862 			/* Consume some bytes for the current chunk */
1863 			uint32_t sz = (sep - cp);
1864 
1865 			if (sz > uri->respclen)
1866 				sz = uri->respclen;
1867 			uri->respclen -= sz;
1868 			cp += sz;
1869 			if (uri->respclen == 0) {
1870 				/* End of chunk, skip trailing "\r\n" */
1871 				if (cp == sep) {
1872 					uri->respclen = URI_LEN_CONSUMED;
1873 					goto more;
1874 				}
1875 				if (*cp++ != '\r')
1876 					goto bad;
1877 				if (cp == sep)
1878 					goto more;
1879 				if (*cp++ != '\n')
1880 					goto bad;
1881 				if (cp == sep)
1882 					goto more;
1883 			}
1884 		}
1885 	}
1886 	uri->resplen += (cp - scp);
1887 
1888 parsed:
1889 	*cpp = cp;
1890 	return (B_TRUE);
1891 
1892 pass:
1893 	*cpp = NULL;
1894 	return (B_TRUE);
1895 
1896 bad:
1897 	*cpp = NULL;
1898 	return (B_FALSE);
1899 
1900 more:
1901 	uri->resplen += (cp - scp);
1902 	*cpp = cp;
1903 	return (B_FALSE);
1904 }
1905 
1906 boolean_t
nl7c_http_log(uri_desc_t * quri,uri_desc_t * suri,nca_request_log_t * req,char ** wp,char ** pep,uint32_t * off)1907 nl7c_http_log(uri_desc_t *quri, uri_desc_t *suri, nca_request_log_t *req,
1908     char **wp, char **pep, uint32_t *off)
1909 {
1910 	http_t	*qhttp = quri->scheme;
1911 	http_t	*shttp = suri->scheme;
1912 	int	sz;
1913 
1914 	if (qhttp->uagent.cp != NULL) {
1915 		sz = (qhttp->uagent.ep - qhttp->uagent.cp);
1916 		if ((*wp + sz + 1) >= *pep) goto full;
1917 		bcopy(qhttp->uagent.cp, *wp, sz);
1918 		*wp += sz;
1919 		*(*wp)++ = 0;
1920 		sz++;
1921 		req->useragent_len = sz;
1922 		req->useragent = *off;
1923 		*off += sz;
1924 	}
1925 
1926 	req->response_len -= (uint_t)shttp->headlen;
1927 
1928 	req->method = NCA_GET;
1929 
1930 	if (qhttp->major == 1) {
1931 		if (qhttp->minor == 0) {
1932 			req->version = HTTP_1_0;
1933 		} else if (qhttp->minor == 1) {
1934 			req->version = HTTP_1_1;
1935 		} else {
1936 			req->version = HTTP_0_0;
1937 		}
1938 	} else if (qhttp->major == 0) {
1939 		req->version = HTTP_0_9;
1940 	} else {
1941 		req->version = HTTP_0_0;
1942 	}
1943 
1944 	return (B_FALSE);
1945 
1946 full:
1947 	return (B_TRUE);
1948 }
1949