xref: /titanic_50/usr/src/uts/common/fs/sockfs/nl7curi.c (revision 628680125482a37a45c692030029fd62a600f914)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include <sys/strsubr.h>
27 #include <sys/strsun.h>
28 #include <sys/param.h>
29 #include <sys/sysmacros.h>
30 #include <vm/seg_map.h>
31 #include <vm/seg_kpm.h>
32 #include <sys/condvar_impl.h>
33 #include <sys/sendfile.h>
34 #include <fs/sockfs/nl7c.h>
35 #include <fs/sockfs/nl7curi.h>
36 #include <fs/sockfs/socktpi_impl.h>
37 
38 #include <inet/common.h>
39 #include <inet/ip.h>
40 #include <inet/ip6.h>
41 #include <inet/tcp.h>
42 #include <inet/led.h>
43 #include <inet/mi.h>
44 
45 #include <inet/nca/ncadoorhdr.h>
46 #include <inet/nca/ncalogd.h>
47 #include <inet/nca/ncandd.h>
48 
49 #include <sys/promif.h>
50 
51 /*
52  * Some externs:
53  */
54 
55 extern boolean_t	nl7c_logd_enabled;
56 extern void		nl7c_logd_log(uri_desc_t *, uri_desc_t *,
57 			    time_t, ipaddr_t);
58 extern boolean_t	nl7c_close_addr(struct sonode *);
59 extern struct sonode	*nl7c_addr2portso(void *);
60 extern uri_desc_t	*nl7c_http_cond(uri_desc_t *, uri_desc_t *);
61 
62 /*
63  * Various global tuneables:
64  */
65 
66 clock_t		nl7c_uri_ttl = -1;	/* TTL in seconds (-1 == infinite) */
67 
68 boolean_t	nl7c_use_kmem = B_FALSE; /* Force use of kmem (no segmap) */
69 
70 uint64_t	nl7c_file_prefetch = 1; /* File cache prefetch pages */
71 
72 uint64_t	nl7c_uri_max = 0;	/* Maximum bytes (0 == infinite) */
73 uint64_t	nl7c_uri_bytes = 0;	/* Bytes of kmem used by URIs */
74 
75 /*
76  * Locals:
77  */
78 
79 static int	uri_rd_response(struct sonode *, uri_desc_t *,
80 		    uri_rd_t *, boolean_t);
81 static int	uri_response(struct sonode *, uri_desc_t *);
82 
83 /*
84  * HTTP scheme functions called from nl7chttp.c:
85  */
86 
87 boolean_t nl7c_http_request(char **, char *, uri_desc_t *, struct sonode *);
88 boolean_t nl7c_http_response(char **, char *, uri_desc_t *, struct sonode *);
89 boolean_t nl7c_http_cmp(void *, void *);
90 mblk_t *nl7c_http_persist(struct sonode *);
91 void nl7c_http_free(void *arg);
92 void nl7c_http_init(void);
93 
94 /*
95  * Counters that need to move to kstat and/or be removed:
96  */
97 
98 volatile uint64_t nl7c_uri_request = 0;
99 volatile uint64_t nl7c_uri_hit = 0;
100 volatile uint64_t nl7c_uri_pass = 0;
101 volatile uint64_t nl7c_uri_miss = 0;
102 volatile uint64_t nl7c_uri_temp = 0;
103 volatile uint64_t nl7c_uri_more = 0;
104 volatile uint64_t nl7c_uri_data = 0;
105 volatile uint64_t nl7c_uri_sendfilev = 0;
106 volatile uint64_t nl7c_uri_reclaim_calls = 0;
107 volatile uint64_t nl7c_uri_reclaim_cnt = 0;
108 volatile uint64_t nl7c_uri_pass_urifail = 0;
109 volatile uint64_t nl7c_uri_pass_dupbfail = 0;
110 volatile uint64_t nl7c_uri_more_get = 0;
111 volatile uint64_t nl7c_uri_pass_method = 0;
112 volatile uint64_t nl7c_uri_pass_option = 0;
113 volatile uint64_t nl7c_uri_more_eol = 0;
114 volatile uint64_t nl7c_uri_more_http = 0;
115 volatile uint64_t nl7c_uri_pass_http = 0;
116 volatile uint64_t nl7c_uri_pass_addfail = 0;
117 volatile uint64_t nl7c_uri_pass_temp = 0;
118 volatile uint64_t nl7c_uri_expire = 0;
119 volatile uint64_t nl7c_uri_purge = 0;
120 volatile uint64_t nl7c_uri_NULL1 = 0;
121 volatile uint64_t nl7c_uri_NULL2 = 0;
122 volatile uint64_t nl7c_uri_close = 0;
123 volatile uint64_t nl7c_uri_temp_close = 0;
124 volatile uint64_t nl7c_uri_free = 0;
125 volatile uint64_t nl7c_uri_temp_free = 0;
126 volatile uint64_t nl7c_uri_temp_mk = 0;
127 volatile uint64_t nl7c_uri_rd_EAGAIN = 0;
128 
129 /*
130  * Various kmem_cache_t's:
131  */
132 
133 kmem_cache_t *nl7c_uri_kmc;
134 kmem_cache_t *nl7c_uri_rd_kmc;
135 static kmem_cache_t *uri_desb_kmc;
136 static kmem_cache_t *uri_segmap_kmc;
137 
138 static void uri_kmc_reclaim(void *);
139 
140 static void nl7c_uri_reclaim(void);
141 
142 /*
143  * The URI hash is a dynamically sized A/B bucket hash, when the current
144  * hash's average bucket chain length exceeds URI_HASH_AVRG a new hash of
145  * the next P2Ps[] size is created.
146  *
147  * All lookups are done in the current hash then the new hash (if any),
148  * if there is a new has then when a current hash bucket chain is examined
149  * any uri_desc_t members will be migrated to the new hash and when the
150  * last uri_desc_t has been migrated then the new hash will become the
151  * current and the previous current hash will be freed leaving a single
152  * hash.
153  *
154  * uri_hash_t - hash bucket (chain) type, contained in the uri_hash_ab[]
155  * and can be accessed only after aquiring the uri_hash_access lock (for
156  * READER or WRITER) then acquiring the lock uri_hash_t.lock, the uri_hash_t
157  * and all linked uri_desc_t.hash members are protected. Note, a REF_HOLD()
158  * is placed on all uri_desc_t uri_hash_t list members.
159  *
160  * uri_hash_access - rwlock for all uri_hash_* variables, READER for read
161  * access and WRITER for write access. Note, WRITER is only required for
162  * hash geometry changes.
163  *
164  * uri_hash_which - which uri_hash_ab[] is the current hash.
165  *
166  * uri_hash_n[] - the P2Ps[] index for each uri_hash_ab[].
167  *
168  * uri_hash_sz[] - the size for each uri_hash_ab[].
169  *
170  * uri_hash_cnt[] - the total uri_desc_t members for each uri_hash_ab[].
171  *
172  * uri_hash_overflow[] - the uri_hash_cnt[] for each uri_hash_ab[] when
173  * a new uri_hash_ab[] needs to be created.
174  *
175  * uri_hash_ab[] - the uri_hash_t entries.
176  *
177  * uri_hash_lru[] - the last uri_hash_ab[] walked for lru reclaim.
178  */
179 
180 typedef struct uri_hash_s {
181 	struct uri_desc_s	*list;		/* List of uri_t(s) */
182 	kmutex_t		lock;
183 } uri_hash_t;
184 
185 #define	URI_HASH_AVRG	5	/* Desired average hash chain length */
186 #define	URI_HASH_N_INIT	9	/* P2Ps[] initial index */
187 
188 static krwlock_t	uri_hash_access;
189 static uint32_t		uri_hash_which = 0;
190 static uint32_t		uri_hash_n[2] = {URI_HASH_N_INIT, 0};
191 static uint32_t		uri_hash_sz[2] = {0, 0};
192 static uint32_t		uri_hash_cnt[2] = {0, 0};
193 static uint32_t		uri_hash_overflow[2] = {0, 0};
194 static uri_hash_t	*uri_hash_ab[2] = {NULL, NULL};
195 static uri_hash_t	*uri_hash_lru[2] = {NULL, NULL};
196 
197 /*
198  * Primes for N of 3 - 24 where P is first prime less then (2^(N-1))+(2^(N-2))
199  * these primes have been foud to be useful for prime sized hash tables.
200  */
201 
202 static const int P2Ps[] = {
203 	0, 0, 0, 5, 11, 23, 47, 89, 191, 383, 761, 1531, 3067,
204 	6143, 12281, 24571, 49139, 98299, 196597, 393209,
205 	786431, 1572853, 3145721, 6291449, 12582893, 0};
206 
207 /*
208  * Hash macros:
209  *
210  *    H2A(char *cp, char *ep, char c) - convert the escaped octet (ASCII)
211  *    hex multichar of the format "%HH" pointeded to by *cp to a char and
212  *    return in c, *ep points to past end of (char *), on return *cp will
213  *    point to the last char consumed.
214  *
215  *    URI_HASH(unsigned hix, char *cp, char *ep) - hash the char(s) from
216  *    *cp to *ep to the unsigned hix, cp nor ep are modified.
217  *
218  *    URI_HASH_IX(unsigned hix, int which) - convert the hash value hix to
219  *    a hash index 0 - (uri_hash_sz[which] - 1).
220  *
221  *    URI_HASH_MIGRATE(from, hp, to) - migrate the uri_hash_t *hp list
222  *    uri_desc_t members from hash from to hash to.
223  *
224  *    URI_HASH_UNLINK(cur, new, hp, puri, uri) - unlink the uri_desc_t
225  *    *uri which is a member of the uri_hash_t *hp list with a previous
226  *    list member of *puri for the uri_hash_ab[] cur. After unlinking
227  *    check for cur hash empty, if so make new cur. Note, as this macro
228  *    can change a hash chain it needs to be run under hash_access as
229  *    RW_WRITER, futher as it can change the new hash to cur any access
230  *    to the hash state must be done after either dropping locks and
231  *    starting over or making sure the global state is consistent after
232  *    as before.
233  */
234 
235 #define	H2A(cp, ep, c) {						\
236 	int	_h = 2;							\
237 	int	_n = 0;							\
238 	char	_hc;							\
239 									\
240 	while (_h > 0 && ++(cp) < (ep)) {				\
241 		if (_h == 1)						\
242 			_n *= 0x10;					\
243 		_hc = *(cp);						\
244 		if (_hc >= '0' && _hc <= '9')				\
245 			_n += _hc - '0';				\
246 		else if (_hc >= 'a' || _hc <= 'f')			\
247 			_n += _hc - 'W';				\
248 		else if (_hc >= 'A' || _hc <= 'F')			\
249 			_n += _hc - '7';				\
250 		_h--;							\
251 	}								\
252 	(c) = _n;							\
253 }
254 
255 #define	URI_HASH(hv, cp, ep) {						\
256 	char	*_s = (cp);						\
257 	char	_c;							\
258 									\
259 	while (_s < (ep)) {						\
260 		if ((_c = *_s) == '%') {				\
261 			H2A(_s, (ep), _c);				\
262 		}							\
263 		CHASH(hv, _c);						\
264 		_s++;							\
265 	}								\
266 }
267 
268 #define	URI_HASH_IX(hix, which) (hix) = (hix) % (uri_hash_sz[(which)])
269 
270 #define	URI_HASH_MIGRATE(from, hp, to) {				\
271 	uri_desc_t	*_nuri;						\
272 	uint32_t	_nhix;						\
273 	uri_hash_t	*_nhp;						\
274 									\
275 	mutex_enter(&(hp)->lock);					\
276 	while ((_nuri = (hp)->list) != NULL) {				\
277 		(hp)->list = _nuri->hash;				\
278 		atomic_add_32(&uri_hash_cnt[(from)], -1);		\
279 		atomic_add_32(&uri_hash_cnt[(to)], 1);			\
280 		_nhix = _nuri->hvalue;					\
281 		URI_HASH_IX(_nhix, to);					\
282 		_nhp = &uri_hash_ab[(to)][_nhix];			\
283 		mutex_enter(&_nhp->lock);				\
284 		_nuri->hash = _nhp->list;				\
285 		_nhp->list = _nuri;					\
286 		_nuri->hit = 0;						\
287 		mutex_exit(&_nhp->lock);				\
288 	}								\
289 	mutex_exit(&(hp)->lock);					\
290 }
291 
292 #define	URI_HASH_UNLINK(cur, new, hp, puri, uri) {			\
293 	if ((puri) != NULL) {						\
294 		(puri)->hash = (uri)->hash;				\
295 	} else {							\
296 		(hp)->list = (uri)->hash;				\
297 	}								\
298 	if (atomic_add_32_nv(&uri_hash_cnt[(cur)], -1) == 0 &&		\
299 	    uri_hash_ab[(new)] != NULL) {				\
300 		kmem_free(uri_hash_ab[cur],				\
301 		    sizeof (uri_hash_t) * uri_hash_sz[cur]);		\
302 		uri_hash_ab[(cur)] = NULL;				\
303 		uri_hash_lru[(cur)] = NULL;				\
304 		uri_hash_which = (new);					\
305 	} else {							\
306 		uri_hash_lru[(cur)] = (hp);				\
307 	}								\
308 }
309 
310 void
311 nl7c_uri_init(void)
312 {
313 	uint32_t	cur = uri_hash_which;
314 
315 	rw_init(&uri_hash_access, NULL, RW_DEFAULT, NULL);
316 
317 	uri_hash_sz[cur] = P2Ps[URI_HASH_N_INIT];
318 	uri_hash_overflow[cur] = P2Ps[URI_HASH_N_INIT] * URI_HASH_AVRG;
319 	uri_hash_ab[cur] = kmem_zalloc(sizeof (uri_hash_t) * uri_hash_sz[cur],
320 	    KM_SLEEP);
321 	uri_hash_lru[cur] = uri_hash_ab[cur];
322 
323 	nl7c_uri_kmc = kmem_cache_create("NL7C_uri_kmc", sizeof (uri_desc_t),
324 	    0, NULL, NULL, uri_kmc_reclaim, NULL, NULL, 0);
325 
326 	nl7c_uri_rd_kmc = kmem_cache_create("NL7C_uri_rd_kmc",
327 	    sizeof (uri_rd_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
328 
329 	uri_desb_kmc = kmem_cache_create("NL7C_uri_desb_kmc",
330 	    sizeof (uri_desb_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
331 
332 	uri_segmap_kmc = kmem_cache_create("NL7C_uri_segmap_kmc",
333 	    sizeof (uri_segmap_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
334 
335 	nl7c_http_init();
336 }
337 
338 #define	CV_SZ	16
339 
340 void
341 nl7c_mi_report_hash(mblk_t *mp)
342 {
343 	uri_hash_t	*hp, *pend;
344 	uri_desc_t	*uri;
345 	uint32_t	cur;
346 	uint32_t	new;
347 	int		n, nz, tot;
348 	uint32_t	cv[CV_SZ + 1];
349 
350 	rw_enter(&uri_hash_access, RW_READER);
351 	cur = uri_hash_which;
352 	new = cur ? 0 : 1;
353 next:
354 	for (n = 0; n <= CV_SZ; n++)
355 		cv[n] = 0;
356 	nz = 0;
357 	tot = 0;
358 	hp = &uri_hash_ab[cur][0];
359 	pend = &uri_hash_ab[cur][uri_hash_sz[cur]];
360 	while (hp < pend) {
361 		n = 0;
362 		for (uri = hp->list; uri != NULL; uri = uri->hash) {
363 			n++;
364 		}
365 		tot += n;
366 		if (n > 0)
367 			nz++;
368 		if (n > CV_SZ)
369 			n = CV_SZ;
370 		cv[n]++;
371 		hp++;
372 	}
373 
374 	(void) mi_mpprintf(mp, "\nHash=%s, Buckets=%d, "
375 	    "Avrg=%d\nCount by bucket:", cur != new ? "CUR" : "NEW",
376 	    uri_hash_sz[cur], nz != 0 ? ((tot * 10 + 5) / nz) / 10 : 0);
377 	(void) mi_mpprintf(mp, "Free=%d", cv[0]);
378 	for (n = 1; n < CV_SZ; n++) {
379 		int	pn = 0;
380 		char	pv[5];
381 		char	*pp = pv;
382 
383 		for (pn = n; pn < 1000; pn *= 10)
384 			*pp++ = ' ';
385 		*pp = 0;
386 		(void) mi_mpprintf(mp, "%s%d=%d", pv, n, cv[n]);
387 	}
388 	(void) mi_mpprintf(mp, "Long=%d", cv[CV_SZ]);
389 
390 	if (cur != new && uri_hash_ab[new] != NULL) {
391 		cur = new;
392 		goto next;
393 	}
394 	rw_exit(&uri_hash_access);
395 }
396 
397 void
398 nl7c_mi_report_uri(mblk_t *mp)
399 {
400 	uri_hash_t	*hp;
401 	uri_desc_t	*uri;
402 	uint32_t	cur;
403 	uint32_t	new;
404 	int		ix;
405 	int		ret;
406 	char		sc;
407 
408 	rw_enter(&uri_hash_access, RW_READER);
409 	cur = uri_hash_which;
410 	new = cur ? 0 : 1;
411 next:
412 	for (ix = 0; ix < uri_hash_sz[cur]; ix++) {
413 		hp = &uri_hash_ab[cur][ix];
414 		mutex_enter(&hp->lock);
415 		uri = hp->list;
416 		while (uri != NULL) {
417 			sc = *(uri->path.ep);
418 			*(uri->path.ep) = 0;
419 			ret = mi_mpprintf(mp, "%s: %d %d %d",
420 			    uri->path.cp, (int)uri->resplen,
421 			    (int)uri->respclen, (int)uri->count);
422 			*(uri->path.ep) = sc;
423 			if (ret == -1) break;
424 			uri = uri->hash;
425 		}
426 		mutex_exit(&hp->lock);
427 		if (ret == -1) break;
428 	}
429 	if (ret != -1 && cur != new && uri_hash_ab[new] != NULL) {
430 		cur = new;
431 		goto next;
432 	}
433 	rw_exit(&uri_hash_access);
434 }
435 
436 /*
437  * The uri_desc_t ref_t inactive function called on the last REF_RELE(),
438  * free all resources contained in the uri_desc_t. Note, the uri_desc_t
439  * will be freed by REF_RELE() on return.
440  */
441 
442 void
443 nl7c_uri_inactive(uri_desc_t *uri)
444 {
445 	int64_t	 bytes = 0;
446 
447 	if (uri->tail) {
448 		uri_rd_t *rdp = &uri->response;
449 		uri_rd_t *free = NULL;
450 
451 		while (rdp) {
452 			if (rdp->off == -1) {
453 				bytes += rdp->sz;
454 				kmem_free(rdp->data.kmem, rdp->sz);
455 			} else {
456 				VN_RELE(rdp->data.vnode);
457 			}
458 			rdp = rdp->next;
459 			if (free != NULL) {
460 				kmem_cache_free(nl7c_uri_rd_kmc, free);
461 			}
462 			free = rdp;
463 		}
464 	}
465 	if (bytes) {
466 		atomic_add_64(&nl7c_uri_bytes, -bytes);
467 	}
468 	if (uri->scheme != NULL) {
469 		nl7c_http_free(uri->scheme);
470 	}
471 	if (uri->reqmp) {
472 		freeb(uri->reqmp);
473 	}
474 }
475 
476 /*
477  * The reclaim is called by the kmem subsystem when kmem is running
478  * low. More work is needed to determine the best reclaim policy, for
479  * now we just manipulate the nl7c_uri_max global maximum bytes threshold
480  * value using a simple arithmetic backoff of the value every time this
481  * function is called then call uri_reclaim() to enforce it.
482  *
483  * Note, this value remains in place and enforced for all subsequent
484  * URI request/response processing.
485  *
486  * Note, nl7c_uri_max is currently initialized to 0 or infinite such that
487  * the first call here set it to the current uri_bytes value then backoff
488  * from there.
489  *
490  * XXX how do we determine when to increase nl7c_uri_max ???
491  */
492 
493 /*ARGSUSED*/
494 static void
495 uri_kmc_reclaim(void *arg)
496 {
497 	uint64_t new_max;
498 
499 	if ((new_max = nl7c_uri_max) == 0) {
500 		/* Currently infinite, initialize to current bytes used */
501 		nl7c_uri_max = nl7c_uri_bytes;
502 		new_max = nl7c_uri_bytes;
503 	}
504 	if (new_max > 1) {
505 		/* Lower max_bytes to 93% of current value */
506 		new_max >>= 1;			/* 50% */
507 		new_max += (new_max >> 1);	/* 75% */
508 		new_max += (new_max >> 2);	/* 93% */
509 		if (new_max < nl7c_uri_max)
510 			nl7c_uri_max = new_max;
511 		else
512 			nl7c_uri_max = 1;
513 	}
514 	nl7c_uri_reclaim();
515 }
516 
517 /*
518  * Delete a uri_desc_t from the URI hash.
519  */
520 
521 static void
522 uri_delete(uri_desc_t *del)
523 {
524 	uint32_t	hix;
525 	uri_hash_t	*hp;
526 	uri_desc_t	*uri;
527 	uri_desc_t	*puri;
528 	uint32_t	cur;
529 	uint32_t	new;
530 
531 	ASSERT(del->hash != URI_TEMP);
532 	rw_enter(&uri_hash_access, RW_WRITER);
533 	cur = uri_hash_which;
534 	new = cur ? 0 : 1;
535 next:
536 	puri = NULL;
537 	hix = del->hvalue;
538 	URI_HASH_IX(hix, cur);
539 	hp = &uri_hash_ab[cur][hix];
540 	for (uri = hp->list; uri != NULL; uri = uri->hash) {
541 		if (uri != del) {
542 			puri = uri;
543 			continue;
544 		}
545 		/*
546 		 * Found the URI, unlink from the hash chain,
547 		 * drop locks, ref release it.
548 		 */
549 		URI_HASH_UNLINK(cur, new, hp, puri, uri);
550 		rw_exit(&uri_hash_access);
551 		REF_RELE(uri);
552 		return;
553 	}
554 	if (cur != new && uri_hash_ab[new] != NULL) {
555 		/*
556 		 * Not found in current hash and have a new hash so
557 		 * check the new hash next.
558 		 */
559 		cur = new;
560 		goto next;
561 	}
562 	rw_exit(&uri_hash_access);
563 }
564 
565 /*
566  * Add a uri_desc_t to the URI hash.
567  */
568 
569 static void
570 uri_add(uri_desc_t *uri, krw_t rwlock, boolean_t nonblocking)
571 {
572 	uint32_t	hix;
573 	uri_hash_t	*hp;
574 	uint32_t	cur = uri_hash_which;
575 	uint32_t	new = cur ? 0 : 1;
576 
577 	/*
578 	 * Caller of uri_add() must hold the uri_hash_access rwlock.
579 	 */
580 	ASSERT((rwlock == RW_READER && RW_READ_HELD(&uri_hash_access)) ||
581 	    (rwlock == RW_WRITER && RW_WRITE_HELD(&uri_hash_access)));
582 	/*
583 	 * uri_add() always succeeds so add a hash ref to the URI now.
584 	 */
585 	REF_HOLD(uri);
586 again:
587 	hix = uri->hvalue;
588 	URI_HASH_IX(hix, cur);
589 	if (uri_hash_ab[new] == NULL &&
590 	    uri_hash_cnt[cur] < uri_hash_overflow[cur]) {
591 		/*
592 		 * Easy case, no new hash and current hasn't overflowed,
593 		 * add URI to current hash and return.
594 		 *
595 		 * Note, the check for uri_hash_cnt[] above aren't done
596 		 * atomictally, i.e. multiple threads can be in this code
597 		 * as RW_READER and update the cnt[], this isn't a problem
598 		 * as the check is only advisory.
599 		 */
600 	fast:
601 		atomic_add_32(&uri_hash_cnt[cur], 1);
602 		hp = &uri_hash_ab[cur][hix];
603 		mutex_enter(&hp->lock);
604 		uri->hash = hp->list;
605 		hp->list = uri;
606 		mutex_exit(&hp->lock);
607 		rw_exit(&uri_hash_access);
608 		return;
609 	}
610 	if (uri_hash_ab[new] == NULL) {
611 		/*
612 		 * Need a new a or b hash, if not already RW_WRITER
613 		 * try to upgrade our lock to writer.
614 		 */
615 		if (rwlock != RW_WRITER && ! rw_tryupgrade(&uri_hash_access)) {
616 			/*
617 			 * Upgrade failed, we can't simple exit and reenter
618 			 * the lock as after the exit and before the reenter
619 			 * the whole world can change so just wait for writer
620 			 * then do everything again.
621 			 */
622 			if (nonblocking) {
623 				/*
624 				 * Can't block, use fast-path above.
625 				 *
626 				 * XXX should have a background thread to
627 				 * handle new ab[] in this case so as to
628 				 * not overflow the cur hash to much.
629 				 */
630 				goto fast;
631 			}
632 			rw_exit(&uri_hash_access);
633 			rwlock = RW_WRITER;
634 			rw_enter(&uri_hash_access, rwlock);
635 			cur = uri_hash_which;
636 			new = cur ? 0 : 1;
637 			goto again;
638 		}
639 		rwlock = RW_WRITER;
640 		if (uri_hash_ab[new] == NULL) {
641 			/*
642 			 * Still need a new hash, allocate and initialize
643 			 * the new hash.
644 			 */
645 			uri_hash_n[new] = uri_hash_n[cur] + 1;
646 			if (uri_hash_n[new] == 0) {
647 				/*
648 				 * No larger P2Ps[] value so use current,
649 				 * i.e. 2 of the largest are better than 1 ?
650 				 */
651 				uri_hash_n[new] = uri_hash_n[cur];
652 				cmn_err(CE_NOTE, "NL7C: hash index overflow");
653 			}
654 			uri_hash_sz[new] = P2Ps[uri_hash_n[new]];
655 			ASSERT(uri_hash_cnt[new] == 0);
656 			uri_hash_overflow[new] = uri_hash_sz[new] *
657 			    URI_HASH_AVRG;
658 			uri_hash_ab[new] = kmem_zalloc(sizeof (uri_hash_t) *
659 			    uri_hash_sz[new], nonblocking ? KM_NOSLEEP :
660 			    KM_SLEEP);
661 			if (uri_hash_ab[new] == NULL) {
662 				/*
663 				 * Alloc failed, use fast-path above.
664 				 *
665 				 * XXX should have a background thread to
666 				 * handle new ab[] in this case so as to
667 				 * not overflow the cur hash to much.
668 				 */
669 				goto fast;
670 			}
671 			uri_hash_lru[new] = uri_hash_ab[new];
672 		}
673 	}
674 	/*
675 	 * Hashed against current hash so migrate any current hash chain
676 	 * members, if any.
677 	 *
678 	 * Note, the hash chain list can be checked for a non empty list
679 	 * outside of the hash chain list lock as the hash chain struct
680 	 * can't be destroyed while in the uri_hash_access rwlock, worst
681 	 * case is that a non empty list is found and after acquiring the
682 	 * lock another thread beats us to it (i.e. migrated the list).
683 	 */
684 	hp = &uri_hash_ab[cur][hix];
685 	if (hp->list != NULL) {
686 		URI_HASH_MIGRATE(cur, hp, new);
687 	}
688 	/*
689 	 * If new hash has overflowed before current hash has been
690 	 * completely migrated then walk all current hash chains and
691 	 * migrate list members now.
692 	 */
693 	if (atomic_add_32_nv(&uri_hash_cnt[new], 1) >= uri_hash_overflow[new]) {
694 		for (hix = 0; hix < uri_hash_sz[cur]; hix++) {
695 			hp = &uri_hash_ab[cur][hix];
696 			if (hp->list != NULL) {
697 				URI_HASH_MIGRATE(cur, hp, new);
698 			}
699 		}
700 	}
701 	/*
702 	 * Add URI to new hash.
703 	 */
704 	hix = uri->hvalue;
705 	URI_HASH_IX(hix, new);
706 	hp = &uri_hash_ab[new][hix];
707 	mutex_enter(&hp->lock);
708 	uri->hash = hp->list;
709 	hp->list = uri;
710 	mutex_exit(&hp->lock);
711 	/*
712 	 * Last, check to see if last cur hash chain has been
713 	 * migrated, if so free cur hash and make new hash cur.
714 	 */
715 	if (uri_hash_cnt[cur] == 0) {
716 		/*
717 		 * If we don't already hold the uri_hash_access rwlock for
718 		 * RW_WRITE try to upgrade to RW_WRITE and if successful
719 		 * check again and to see if still need to do the free.
720 		 */
721 		if ((rwlock == RW_WRITER || rw_tryupgrade(&uri_hash_access)) &&
722 		    uri_hash_cnt[cur] == 0 && uri_hash_ab[new] != 0) {
723 			kmem_free(uri_hash_ab[cur],
724 			    sizeof (uri_hash_t) * uri_hash_sz[cur]);
725 			uri_hash_ab[cur] = NULL;
726 			uri_hash_lru[cur] = NULL;
727 			uri_hash_which = new;
728 		}
729 	}
730 	rw_exit(&uri_hash_access);
731 }
732 
733 /*
734  * Lookup a uri_desc_t in the URI hash, if found free the request uri_desc_t
735  * and return the found uri_desc_t with a REF_HOLD() placed on it. Else, if
736  * add B_TRUE use the request URI to create a new hash entry. Else if add
737  * B_FALSE ...
738  */
739 
740 static uri_desc_t *
741 uri_lookup(uri_desc_t *ruri, boolean_t add, boolean_t nonblocking)
742 {
743 	uint32_t	hix;
744 	uri_hash_t	*hp;
745 	uri_desc_t	*uri;
746 	uri_desc_t	*puri;
747 	uint32_t	cur;
748 	uint32_t	new;
749 	char		*rcp = ruri->path.cp;
750 	char		*rep = ruri->path.ep;
751 
752 again:
753 	rw_enter(&uri_hash_access, RW_READER);
754 	cur = uri_hash_which;
755 	new = cur ? 0 : 1;
756 nexthash:
757 	puri = NULL;
758 	hix = ruri->hvalue;
759 	URI_HASH_IX(hix, cur);
760 	hp = &uri_hash_ab[cur][hix];
761 	mutex_enter(&hp->lock);
762 	for (uri = hp->list; uri != NULL; uri = uri->hash) {
763 		char	*ap = uri->path.cp;
764 		char	*bp = rcp;
765 		char	a, b;
766 
767 		/* Compare paths */
768 		while (bp < rep && ap < uri->path.ep) {
769 			if ((a = *ap) == '%') {
770 				/* Escaped hex multichar, convert it */
771 				H2A(ap, uri->path.ep, a);
772 			}
773 			if ((b = *bp) == '%') {
774 				/* Escaped hex multichar, convert it */
775 				H2A(bp, rep, b);
776 			}
777 			if (a != b) {
778 				/* Char's don't match */
779 				goto nexturi;
780 			}
781 			ap++;
782 			bp++;
783 		}
784 		if (bp != rep || ap != uri->path.ep) {
785 			/* Not same length */
786 			goto nexturi;
787 		}
788 		ap = uri->auth.cp;
789 		bp = ruri->auth.cp;
790 		if (ap != NULL) {
791 			if (bp == NULL) {
792 				/* URI has auth request URI doesn't */
793 				goto nexturi;
794 			}
795 			while (bp < ruri->auth.ep && ap < uri->auth.ep) {
796 				if ((a = *ap) == '%') {
797 					/* Escaped hex multichar, convert it */
798 					H2A(ap, uri->path.ep, a);
799 				}
800 				if ((b = *bp) == '%') {
801 					/* Escaped hex multichar, convert it */
802 					H2A(bp, rep, b);
803 				}
804 				if (a != b) {
805 					/* Char's don't match */
806 					goto nexturi;
807 				}
808 				ap++;
809 				bp++;
810 			}
811 			if (bp != ruri->auth.ep || ap != uri->auth.ep) {
812 				/* Not same length */
813 				goto nexturi;
814 			}
815 		} else if (bp != NULL) {
816 			/* URI doesn't have auth and request URI does */
817 			goto nexturi;
818 		}
819 		/*
820 		 * Have a path/auth match so before any other processing
821 		 * of requested URI, check for expire or request no cache
822 		 * purge.
823 		 */
824 		if (uri->expire >= 0 && uri->expire <= lbolt || ruri->nocache) {
825 			/*
826 			 * URI has expired or request specified to not use
827 			 * the cached version, unlink the URI from the hash
828 			 * chain, release all locks, release the hash ref
829 			 * on the URI, and last look it up again.
830 			 *
831 			 * Note, this will cause all variants of the named
832 			 * URI to be purged.
833 			 */
834 			if (puri != NULL) {
835 				puri->hash = uri->hash;
836 			} else {
837 				hp->list = uri->hash;
838 			}
839 			mutex_exit(&hp->lock);
840 			atomic_add_32(&uri_hash_cnt[cur], -1);
841 			rw_exit(&uri_hash_access);
842 			if (ruri->nocache)
843 				nl7c_uri_purge++;
844 			else
845 				nl7c_uri_expire++;
846 			REF_RELE(uri);
847 			goto again;
848 		}
849 		if (uri->scheme != NULL) {
850 			/*
851 			 * URI has scheme private qualifier(s), if request
852 			 * URI doesn't or if no match skip this URI.
853 			 */
854 			if (ruri->scheme == NULL ||
855 			    ! nl7c_http_cmp(uri->scheme, ruri->scheme))
856 				goto nexturi;
857 		} else if (ruri->scheme != NULL) {
858 			/*
859 			 * URI doesn't have scheme private qualifiers but
860 			 * request URI does, no match, skip this URI.
861 			 */
862 			goto nexturi;
863 		}
864 		/*
865 		 * Have a match, ready URI for return, first put a reference
866 		 * hold on the URI, if this URI is currently being processed
867 		 * then have to wait for the processing to be completed and
868 		 * redo the lookup, else return it.
869 		 */
870 		REF_HOLD(uri);
871 		mutex_enter(&uri->proclock);
872 		if (uri->proc != NULL) {
873 			/* The URI is being processed, wait for completion */
874 			mutex_exit(&hp->lock);
875 			rw_exit(&uri_hash_access);
876 			if (! nonblocking &&
877 			    cv_wait_sig(&uri->waiting, &uri->proclock)) {
878 				/*
879 				 * URI has been processed but things may
880 				 * have changed while we were away so do
881 				 * most everything again.
882 				 */
883 				mutex_exit(&uri->proclock);
884 				REF_RELE(uri);
885 				goto again;
886 			} else {
887 				/*
888 				 * A nonblocking socket or an interrupted
889 				 * cv_wait_sig() in the first case can't
890 				 * block waiting for the processing of the
891 				 * uri hash hit uri to complete, in both
892 				 * cases just return failure to lookup.
893 				 */
894 				mutex_exit(&uri->proclock);
895 				REF_RELE(uri);
896 				return (NULL);
897 			}
898 		}
899 		mutex_exit(&uri->proclock);
900 		uri->hit++;
901 		mutex_exit(&hp->lock);
902 		rw_exit(&uri_hash_access);
903 		return (uri);
904 	nexturi:
905 		puri = uri;
906 	}
907 	mutex_exit(&hp->lock);
908 	if (cur != new && uri_hash_ab[new] != NULL) {
909 		/*
910 		 * Not found in current hash and have a new hash so
911 		 * check the new hash next.
912 		 */
913 		cur = new;
914 		goto nexthash;
915 	}
916 add:
917 	if (! add) {
918 		/* Lookup only so return failure */
919 		rw_exit(&uri_hash_access);
920 		return (NULL);
921 	}
922 	/*
923 	 * URI not hashed, finish intialization of the
924 	 * request URI, add it to the hash, return it.
925 	 */
926 	ruri->hit = 0;
927 	ruri->expire = -1;
928 	ruri->response.sz = 0;
929 	ruri->proc = (struct sonode *)~NULL;
930 	cv_init(&ruri->waiting, NULL, CV_DEFAULT, NULL);
931 	mutex_init(&ruri->proclock, NULL, MUTEX_DEFAULT, NULL);
932 	uri_add(ruri, RW_READER, nonblocking);
933 	/* uri_add() has done rw_exit(&uri_hash_access) */
934 	return (ruri);
935 }
936 
937 /*
938  * Reclaim URIs until max cache size threshold has been reached.
939  *
940  * A CLOCK based reclaim modified with a history (hit counter) counter.
941  */
942 
943 static void
944 nl7c_uri_reclaim(void)
945 {
946 	uri_hash_t	*hp, *start, *pend;
947 	uri_desc_t	*uri;
948 	uri_desc_t	*puri;
949 	uint32_t	cur;
950 	uint32_t	new;
951 
952 	nl7c_uri_reclaim_calls++;
953 again:
954 	rw_enter(&uri_hash_access, RW_WRITER);
955 	cur = uri_hash_which;
956 	new = cur ? 0 : 1;
957 next:
958 	hp = uri_hash_lru[cur];
959 	start = hp;
960 	pend = &uri_hash_ab[cur][uri_hash_sz[cur]];
961 	while (nl7c_uri_bytes > nl7c_uri_max) {
962 		puri = NULL;
963 		for (uri = hp->list; uri != NULL; uri = uri->hash) {
964 			if (uri->hit != 0) {
965 				/*
966 				 * Decrement URI activity counter and skip.
967 				 */
968 				uri->hit--;
969 				puri = uri;
970 				continue;
971 			}
972 			if (uri->proc != NULL) {
973 				/*
974 				 * Currently being processed by a socket, skip.
975 				 */
976 				continue;
977 			}
978 			/*
979 			 * Found a candidate, no hit(s) since added or last
980 			 * reclaim pass, unlink from it's hash chain, update
981 			 * lru scan pointer, drop lock, ref release it.
982 			 */
983 			URI_HASH_UNLINK(cur, new, hp, puri, uri);
984 			if (cur == uri_hash_which) {
985 				if (++hp == pend) {
986 					/* Wrap pointer */
987 					hp = uri_hash_ab[cur];
988 				}
989 				uri_hash_lru[cur] = hp;
990 			}
991 			rw_exit(&uri_hash_access);
992 			REF_RELE(uri);
993 			nl7c_uri_reclaim_cnt++;
994 			goto again;
995 		}
996 		if (++hp == pend) {
997 			/* Wrap pointer */
998 			hp = uri_hash_ab[cur];
999 		}
1000 		if (hp == start) {
1001 			if (cur != new && uri_hash_ab[new] != NULL) {
1002 				/*
1003 				 * Done with the current hash and have a
1004 				 * new hash so check the new hash next.
1005 				 */
1006 				cur = new;
1007 				goto next;
1008 			}
1009 		}
1010 	}
1011 	rw_exit(&uri_hash_access);
1012 }
1013 
1014 /*
1015  * Called for a socket which is being freed prior to close, e.g. errored.
1016  */
1017 
1018 void
1019 nl7c_urifree(struct sonode *so)
1020 {
1021 	sotpi_info_t *sti = SOTOTPI(so);
1022 	uri_desc_t *uri = (uri_desc_t *)sti->sti_nl7c_uri;
1023 
1024 	sti->sti_nl7c_uri = NULL;
1025 	if (uri->hash != URI_TEMP) {
1026 		uri_delete(uri);
1027 		mutex_enter(&uri->proclock);
1028 		uri->proc = NULL;
1029 		if (CV_HAS_WAITERS(&uri->waiting)) {
1030 			cv_broadcast(&uri->waiting);
1031 		}
1032 		mutex_exit(&uri->proclock);
1033 		nl7c_uri_free++;
1034 	} else {
1035 		/* No proclock as uri exclusively owned by so */
1036 		uri->proc = NULL;
1037 		nl7c_uri_temp_free++;
1038 	}
1039 	REF_RELE(uri);
1040 }
1041 
1042 /*
1043  * ...
1044  *
1045  *	< 0	need more data
1046  *
1047  *	  0	parse complete
1048  *
1049  *	> 0	parse error
1050  */
1051 
1052 volatile uint64_t nl7c_resp_pfail = 0;
1053 volatile uint64_t nl7c_resp_ntemp = 0;
1054 volatile uint64_t nl7c_resp_pass = 0;
1055 
1056 static int
1057 nl7c_resp_parse(struct sonode *so, uri_desc_t *uri, char *data, int sz)
1058 {
1059 	if (! nl7c_http_response(&data, &data[sz], uri, so)) {
1060 		if (data == NULL) {
1061 			/* Parse fail */
1062 			goto pfail;
1063 		}
1064 		/* More data */
1065 		data = NULL;
1066 	} else if (data == NULL) {
1067 		goto pass;
1068 	}
1069 	if (uri->hash != URI_TEMP && uri->nocache) {
1070 		/*
1071 		 * After response parse now no cache,
1072 		 * delete it from cache, wakeup any
1073 		 * waiters on this URI, make URI_TEMP.
1074 		 */
1075 		uri_delete(uri);
1076 		mutex_enter(&uri->proclock);
1077 		if (CV_HAS_WAITERS(&uri->waiting)) {
1078 			cv_broadcast(&uri->waiting);
1079 		}
1080 		mutex_exit(&uri->proclock);
1081 		uri->hash = URI_TEMP;
1082 		nl7c_uri_temp_mk++;
1083 	}
1084 	if (data == NULL) {
1085 		/* More data needed */
1086 		return (-1);
1087 	}
1088 	/* Success */
1089 	return (0);
1090 
1091 pfail:
1092 	nl7c_resp_pfail++;
1093 	return (EINVAL);
1094 
1095 pass:
1096 	nl7c_resp_pass++;
1097 	return (ENOTSUP);
1098 }
1099 
1100 /*
1101  * Called to sink application response data, the processing of the data
1102  * is the same for a cached or temp URI (i.e. a URI for which we aren't
1103  * going to cache the URI but want to parse it for detecting response
1104  * data end such that for a persistent connection we can parse the next
1105  * request).
1106  *
1107  * On return 0 is returned for sink success, > 0 on error, and < 0 on
1108  * no so URI (note, data not sinked).
1109  */
1110 
1111 int
1112 nl7c_data(struct sonode *so, uio_t *uio)
1113 {
1114 	sotpi_info_t	*sti = SOTOTPI(so);
1115 	uri_desc_t	*uri = (uri_desc_t *)sti->sti_nl7c_uri;
1116 	iovec_t		*iov;
1117 	int		cnt;
1118 	int		sz = uio->uio_resid;
1119 	char		*data, *alloc;
1120 	char		*bp;
1121 	uri_rd_t	*rdp;
1122 	boolean_t	first;
1123 	int		error, perror;
1124 
1125 	nl7c_uri_data++;
1126 
1127 	if (uri == NULL) {
1128 		/* Socket & NL7C out of sync, disable NL7C */
1129 		sti->sti_nl7c_flags = 0;
1130 		nl7c_uri_NULL1++;
1131 		return (-1);
1132 	}
1133 
1134 	if (sti->sti_nl7c_flags & NL7C_WAITWRITE) {
1135 		sti->sti_nl7c_flags &= ~NL7C_WAITWRITE;
1136 		first = B_TRUE;
1137 	} else {
1138 		first = B_FALSE;
1139 	}
1140 
1141 	alloc = kmem_alloc(sz, KM_SLEEP);
1142 	URI_RD_ADD(uri, rdp, sz, -1);
1143 	if (rdp == NULL) {
1144 		error = ENOMEM;
1145 		goto fail;
1146 	}
1147 
1148 	if (uri->hash != URI_TEMP && uri->count > nca_max_cache_size) {
1149 		uri_delete(uri);
1150 		uri->hash = URI_TEMP;
1151 	}
1152 	data = alloc;
1153 	alloc = NULL;
1154 	rdp->data.kmem = data;
1155 	atomic_add_64(&nl7c_uri_bytes, sz);
1156 
1157 	bp = data;
1158 	while (uio->uio_resid > 0) {
1159 		iov = uio->uio_iov;
1160 		if ((cnt = iov->iov_len) == 0) {
1161 			goto next;
1162 		}
1163 		cnt = MIN(cnt, uio->uio_resid);
1164 		error = xcopyin(iov->iov_base, bp, cnt);
1165 		if (error)
1166 			goto fail;
1167 
1168 		iov->iov_base += cnt;
1169 		iov->iov_len -= cnt;
1170 		uio->uio_resid -= cnt;
1171 		uio->uio_loffset += cnt;
1172 		bp += cnt;
1173 	next:
1174 		uio->uio_iov++;
1175 		uio->uio_iovcnt--;
1176 	}
1177 
1178 	/* Successfull sink of data, response parse the data */
1179 	perror = nl7c_resp_parse(so, uri, data, sz);
1180 
1181 	/* Send the data out the connection */
1182 	error = uri_rd_response(so, uri, rdp, first);
1183 	if (error)
1184 		goto fail;
1185 
1186 	/* Success */
1187 	if (perror == 0 &&
1188 	    ((uri->respclen == URI_LEN_NOVALUE &&
1189 	    uri->resplen == URI_LEN_NOVALUE) ||
1190 	    uri->count >= uri->resplen)) {
1191 		/*
1192 		 * No more data needed and no pending response
1193 		 * data or current data count >= response length
1194 		 * so close the URI processing for this so.
1195 		 */
1196 		nl7c_close(so);
1197 		if (! (sti->sti_nl7c_flags & NL7C_SOPERSIST)) {
1198 			/* Not a persistent connection */
1199 			sti->sti_nl7c_flags = 0;
1200 		}
1201 	}
1202 
1203 	return (0);
1204 
1205 fail:
1206 	if (alloc != NULL) {
1207 		kmem_free(alloc, sz);
1208 	}
1209 	sti->sti_nl7c_flags = 0;
1210 	nl7c_urifree(so);
1211 
1212 	return (error);
1213 }
1214 
1215 /*
1216  * Called to read data from file "*fp" at offset "*off" of length "*len"
1217  * for a maximum of "*max_rem" bytes.
1218  *
1219  * On success a pointer to the kmem_alloc()ed file data is returned, "*off"
1220  * and "*len" are updated for the acutal number of bytes read and "*max_rem"
1221  * is updated with the number of bytes remaining to be read.
1222  *
1223  * Else, "NULL" is returned.
1224  */
1225 
1226 static char *
1227 nl7c_readfile(file_t *fp, u_offset_t *off, int *len, int max, int *ret)
1228 {
1229 	vnode_t	*vp = fp->f_vnode;
1230 	int	flg = 0;
1231 	size_t	size = MIN(*len, max);
1232 	char	*data;
1233 	int	error;
1234 	uio_t	uio;
1235 	iovec_t	iov;
1236 
1237 	(void) VOP_RWLOCK(vp, flg, NULL);
1238 
1239 	if (*off > MAXOFFSET_T) {
1240 		VOP_RWUNLOCK(vp, flg, NULL);
1241 		*ret = EFBIG;
1242 		return (NULL);
1243 	}
1244 
1245 	if (*off + size > MAXOFFSET_T)
1246 		size = (ssize32_t)(MAXOFFSET_T - *off);
1247 
1248 	data = kmem_alloc(size, KM_SLEEP);
1249 
1250 	iov.iov_base = data;
1251 	iov.iov_len = size;
1252 	uio.uio_loffset = *off;
1253 	uio.uio_iov = &iov;
1254 	uio.uio_iovcnt = 1;
1255 	uio.uio_resid = size;
1256 	uio.uio_segflg = UIO_SYSSPACE;
1257 	uio.uio_llimit = MAXOFFSET_T;
1258 	uio.uio_fmode = fp->f_flag;
1259 
1260 	error = VOP_READ(vp, &uio, fp->f_flag, fp->f_cred, NULL);
1261 	VOP_RWUNLOCK(vp, flg, NULL);
1262 	*ret = error;
1263 	if (error) {
1264 		kmem_free(data, size);
1265 		return (NULL);
1266 	}
1267 	*len = size;
1268 	*off += size;
1269 	return (data);
1270 }
1271 
1272 /*
1273  * Called to sink application response sendfilev, as with nl7c_data() above
1274  * all the data will be processed by NL7C unless there's an error.
1275  */
1276 
1277 int
1278 nl7c_sendfilev(struct sonode *so, u_offset_t *fileoff, sendfilevec_t *sfvp,
1279 	int sfvc, ssize_t *xfer)
1280 {
1281 	sotpi_info_t	*sti = SOTOTPI(so);
1282 	uri_desc_t	*uri = (uri_desc_t *)sti->sti_nl7c_uri;
1283 	file_t		*fp = NULL;
1284 	vnode_t		*vp = NULL;
1285 	char		*data = NULL;
1286 	u_offset_t	off;
1287 	int		len;
1288 	int		cnt;
1289 	int		total_count = 0;
1290 	char		*alloc;
1291 	uri_rd_t	*rdp;
1292 	int		max;
1293 	int		perror;
1294 	int		error = 0;
1295 	boolean_t	first = B_TRUE;
1296 
1297 	nl7c_uri_sendfilev++;
1298 
1299 	if (uri == NULL) {
1300 		/* Socket & NL7C out of sync, disable NL7C */
1301 		sti->sti_nl7c_flags = 0;
1302 		nl7c_uri_NULL2++;
1303 		return (0);
1304 	}
1305 
1306 	if (sti->sti_nl7c_flags & NL7C_WAITWRITE)
1307 		sti->sti_nl7c_flags &= ~NL7C_WAITWRITE;
1308 
1309 	while (sfvc-- > 0) {
1310 		/*
1311 		 * off - the current sfv read file offset or user address.
1312 		 *
1313 		 * len - the current sfv length in bytes.
1314 		 *
1315 		 * cnt - number of bytes kmem_alloc()ed.
1316 		 *
1317 		 * alloc - the kmem_alloc()ed buffer of size "cnt".
1318 		 *
1319 		 * data - copy of "alloc" used for post alloc references.
1320 		 *
1321 		 * fp - the current sfv file_t pointer.
1322 		 *
1323 		 * vp - the current "*vp" vnode_t pointer.
1324 		 *
1325 		 * Note, for "data" and "fp" and "vp" a NULL value is used
1326 		 * when not allocated such that the common failure path "fail"
1327 		 * is used.
1328 		 */
1329 		off = sfvp->sfv_off;
1330 		len = sfvp->sfv_len;
1331 		cnt = len;
1332 
1333 		if (len == 0) {
1334 			sfvp++;
1335 			continue;
1336 		}
1337 
1338 		if (sfvp->sfv_fd == SFV_FD_SELF) {
1339 			/*
1340 			 * User memory, copyin() all the bytes.
1341 			 */
1342 			alloc = kmem_alloc(cnt, KM_SLEEP);
1343 			error = xcopyin((caddr_t)(uintptr_t)off, alloc, cnt);
1344 			if (error)
1345 				goto fail;
1346 		} else {
1347 			/*
1348 			 * File descriptor, prefetch some bytes.
1349 			 */
1350 			if ((fp = getf(sfvp->sfv_fd)) == NULL) {
1351 				error = EBADF;
1352 				goto fail;
1353 			}
1354 			if ((fp->f_flag & FREAD) == 0) {
1355 				error = EACCES;
1356 				goto fail;
1357 			}
1358 			vp = fp->f_vnode;
1359 			if (vp->v_type != VREG) {
1360 				error = EINVAL;
1361 				goto fail;
1362 			}
1363 			VN_HOLD(vp);
1364 
1365 			/* Read max_rem bytes from file for prefetch */
1366 			if (nl7c_use_kmem) {
1367 				max = cnt;
1368 			} else {
1369 				max = MAXBSIZE * nl7c_file_prefetch;
1370 			}
1371 			alloc = nl7c_readfile(fp, &off, &cnt, max, &error);
1372 			if (alloc == NULL)
1373 				goto fail;
1374 
1375 			releasef(sfvp->sfv_fd);
1376 			fp = NULL;
1377 		}
1378 		URI_RD_ADD(uri, rdp, cnt, -1);
1379 		if (rdp == NULL) {
1380 			error = ENOMEM;
1381 			goto fail;
1382 		}
1383 		data = alloc;
1384 		alloc = NULL;
1385 		rdp->data.kmem = data;
1386 		total_count += cnt;
1387 		if (uri->hash != URI_TEMP && total_count > nca_max_cache_size) {
1388 			uri_delete(uri);
1389 			uri->hash = URI_TEMP;
1390 		}
1391 
1392 		/* Response parse */
1393 		perror = nl7c_resp_parse(so, uri, data, len);
1394 
1395 		/* Send kmem data out the connection */
1396 		error = uri_rd_response(so, uri, rdp, first);
1397 
1398 		if (error)
1399 			goto fail;
1400 
1401 		if (sfvp->sfv_fd != SFV_FD_SELF) {
1402 			/*
1403 			 * File descriptor, if any bytes left save vnode_t.
1404 			 */
1405 			if (len > cnt) {
1406 				/* More file data so add it */
1407 				URI_RD_ADD(uri, rdp, len - cnt, off);
1408 				if (rdp == NULL) {
1409 					error = ENOMEM;
1410 					goto fail;
1411 				}
1412 				rdp->data.vnode = vp;
1413 
1414 				/* Send vnode data out the connection */
1415 				error = uri_rd_response(so, uri, rdp, first);
1416 			} else {
1417 				/* All file data fit in the prefetch */
1418 				VN_RELE(vp);
1419 			}
1420 			*fileoff += len;
1421 			vp = NULL;
1422 		}
1423 		*xfer += len;
1424 		sfvp++;
1425 
1426 		if (first)
1427 			first = B_FALSE;
1428 	}
1429 	if (total_count > 0) {
1430 		atomic_add_64(&nl7c_uri_bytes, total_count);
1431 	}
1432 	if (perror == 0 &&
1433 	    ((uri->respclen == URI_LEN_NOVALUE &&
1434 	    uri->resplen == URI_LEN_NOVALUE) ||
1435 	    uri->count >= uri->resplen)) {
1436 		/*
1437 		 * No more data needed and no pending response
1438 		 * data or current data count >= response length
1439 		 * so close the URI processing for this so.
1440 		 */
1441 		nl7c_close(so);
1442 		if (! (sti->sti_nl7c_flags & NL7C_SOPERSIST)) {
1443 			/* Not a persistent connection */
1444 			sti->sti_nl7c_flags = 0;
1445 		}
1446 	}
1447 
1448 	return (0);
1449 
1450 fail:
1451 	if (error == EPIPE)
1452 		tsignal(curthread, SIGPIPE);
1453 
1454 	if (alloc != NULL)
1455 		kmem_free(data, len);
1456 
1457 	if (vp != NULL)
1458 		VN_RELE(vp);
1459 
1460 	if (fp != NULL)
1461 		releasef(sfvp->sfv_fd);
1462 
1463 	if (total_count > 0) {
1464 		atomic_add_64(&nl7c_uri_bytes, total_count);
1465 	}
1466 
1467 	sti->sti_nl7c_flags = 0;
1468 	nl7c_urifree(so);
1469 
1470 	return (error);
1471 }
1472 
1473 /*
1474  * Called for a socket which is closing or when an application has
1475  * completed sending all the response data (i.e. for a persistent
1476  * connection called once for each completed application response).
1477  */
1478 
1479 void
1480 nl7c_close(struct sonode *so)
1481 {
1482 	sotpi_info_t	*sti = SOTOTPI(so);
1483 	uri_desc_t 	*uri = (uri_desc_t *)sti->sti_nl7c_uri;
1484 
1485 	if (uri == NULL) {
1486 		/*
1487 		 * No URI being processed so might be a listen()er
1488 		 * if so do any cleanup, else nothing more to do.
1489 		 */
1490 		if (so->so_state & SS_ACCEPTCONN) {
1491 			(void) nl7c_close_addr(so);
1492 		}
1493 		return;
1494 	}
1495 	sti->sti_nl7c_uri = NULL;
1496 	if (uri->hash != URI_TEMP) {
1497 		mutex_enter(&uri->proclock);
1498 		uri->proc = NULL;
1499 		if (CV_HAS_WAITERS(&uri->waiting)) {
1500 			cv_broadcast(&uri->waiting);
1501 		}
1502 		mutex_exit(&uri->proclock);
1503 		nl7c_uri_close++;
1504 	} else {
1505 		/* No proclock as uri exclusively owned by so */
1506 		uri->proc = NULL;
1507 		nl7c_uri_temp_close++;
1508 	}
1509 	REF_RELE(uri);
1510 	if (nl7c_uri_max > 0 && nl7c_uri_bytes > nl7c_uri_max) {
1511 		nl7c_uri_reclaim();
1512 	}
1513 }
1514 
1515 /*
1516  * The uri_segmap_t ref_t inactive function called on the last REF_RELE(),
1517  * release the segmap mapping. Note, the uri_segmap_t will be freed by
1518  * REF_RELE() on return.
1519  */
1520 
1521 void
1522 uri_segmap_inactive(uri_segmap_t *smp)
1523 {
1524 	if (!segmap_kpm) {
1525 		(void) segmap_fault(kas.a_hat, segkmap, smp->base,
1526 		    smp->len, F_SOFTUNLOCK, S_OTHER);
1527 	}
1528 	(void) segmap_release(segkmap, smp->base, SM_DONTNEED);
1529 	VN_RELE(smp->vp);
1530 }
1531 
1532 /*
1533  * The call-back for desballoc()ed mblk_t's, if a segmap mapped mblk_t
1534  * release the reference, one per desballoc() of a segmap page, if a rd_t
1535  * mapped mblk_t release the reference, one per desballoc() of a uri_desc_t,
1536  * last kmem free the uri_desb_t.
1537  */
1538 
1539 static void
1540 uri_desb_free(uri_desb_t *desb)
1541 {
1542 	if (desb->segmap != NULL) {
1543 		REF_RELE(desb->segmap);
1544 	}
1545 	REF_RELE(desb->uri);
1546 	kmem_cache_free(uri_desb_kmc, desb);
1547 }
1548 
1549 /*
1550  * Segmap map up to a page of a uri_rd_t file descriptor.
1551  */
1552 
1553 uri_segmap_t *
1554 uri_segmap_map(uri_rd_t *rdp, int bytes)
1555 {
1556 	uri_segmap_t	*segmap = kmem_cache_alloc(uri_segmap_kmc, KM_SLEEP);
1557 	int		len = MIN(rdp->sz, MAXBSIZE);
1558 
1559 	if (len > bytes)
1560 		len = bytes;
1561 
1562 	REF_INIT(segmap, 1, uri_segmap_inactive, uri_segmap_kmc);
1563 	segmap->len = len;
1564 	VN_HOLD(rdp->data.vnode);
1565 	segmap->vp = rdp->data.vnode;
1566 
1567 	segmap->base = segmap_getmapflt(segkmap, segmap->vp, rdp->off, len,
1568 	    segmap_kpm ? SM_FAULT : 0, S_READ);
1569 
1570 	if (segmap_fault(kas.a_hat, segkmap, segmap->base, len,
1571 	    F_SOFTLOCK, S_READ) != 0) {
1572 		REF_RELE(segmap);
1573 		return (NULL);
1574 	}
1575 	return (segmap);
1576 }
1577 
1578 /*
1579  * Chop up the kernel virtual memory area *data of size *sz bytes for
1580  * a maximum of *bytes bytes into an besballoc()ed mblk_t chain using
1581  * the given template uri_desb_t *temp of max_mblk bytes per.
1582  *
1583  * The values of *data, *sz, and *bytes are updated on return, the
1584  * mblk_t chain is returned.
1585  */
1586 
1587 static mblk_t *
1588 uri_desb_chop(
1589 	char 		**data,
1590 	size_t		*sz,
1591 	int 		*bytes,
1592 	uri_desb_t 	*temp,
1593 	int		max_mblk,
1594 	char		*eoh,
1595 	mblk_t		*persist
1596 )
1597 {
1598 	char		*ldata = *data;
1599 	size_t		lsz = *sz;
1600 	int		lbytes = bytes ? *bytes : lsz;
1601 	uri_desb_t	*desb;
1602 	mblk_t		*mp = NULL;
1603 	mblk_t		*nmp, *pmp = NULL;
1604 	int		msz;
1605 
1606 	if (lbytes == 0 && lsz == 0)
1607 		return (NULL);
1608 
1609 	while (lbytes > 0 && lsz > 0) {
1610 		msz = MIN(lbytes, max_mblk);
1611 		msz = MIN(msz, lsz);
1612 		if (persist && eoh >= ldata && eoh < &ldata[msz]) {
1613 			msz = (eoh - ldata);
1614 			pmp = persist;
1615 			persist = NULL;
1616 			if (msz == 0) {
1617 				nmp = pmp;
1618 				pmp = NULL;
1619 				goto zero;
1620 			}
1621 		}
1622 		desb = kmem_cache_alloc(uri_desb_kmc, KM_SLEEP);
1623 		REF_HOLD(temp->uri);
1624 		if (temp->segmap) {
1625 			REF_HOLD(temp->segmap);
1626 		}
1627 		bcopy(temp, desb, sizeof (*desb));
1628 		desb->frtn.free_arg = (caddr_t)desb;
1629 		nmp = desballoc((uchar_t *)ldata, msz, BPRI_HI, &desb->frtn);
1630 		if (nmp == NULL) {
1631 			if (temp->segmap) {
1632 				REF_RELE(temp->segmap);
1633 			}
1634 			REF_RELE(temp->uri);
1635 			if (mp != NULL) {
1636 				mp->b_next = NULL;
1637 				freemsg(mp);
1638 			}
1639 			if (persist != NULL) {
1640 				freeb(persist);
1641 			}
1642 			return (NULL);
1643 		}
1644 		nmp->b_wptr += msz;
1645 	zero:
1646 		if (mp != NULL) {
1647 			mp->b_next->b_cont = nmp;
1648 		} else {
1649 			mp = nmp;
1650 		}
1651 		if (pmp != NULL) {
1652 			nmp->b_cont = pmp;
1653 			nmp = pmp;
1654 			pmp = NULL;
1655 		}
1656 		mp->b_next = nmp;
1657 		ldata += msz;
1658 		lsz -= msz;
1659 		lbytes -= msz;
1660 	}
1661 	*data = ldata;
1662 	*sz = lsz;
1663 	if (bytes)
1664 		*bytes = lbytes;
1665 	return (mp);
1666 }
1667 
1668 /*
1669  * Experimential noqwait (i.e. no canput()/qwait() checks), just send
1670  * the entire mblk_t chain down without flow-control checks.
1671  */
1672 
1673 static int
1674 kstrwritempnoqwait(struct vnode *vp, mblk_t *mp)
1675 {
1676 	struct stdata *stp;
1677 	int error = 0;
1678 
1679 	ASSERT(vp->v_stream);
1680 	stp = vp->v_stream;
1681 
1682 	/* Fast check of flags before acquiring the lock */
1683 	if (stp->sd_flag & (STWRERR|STRHUP|STPLEX)) {
1684 		mutex_enter(&stp->sd_lock);
1685 		error = strgeterr(stp, STWRERR|STRHUP|STPLEX, 0);
1686 		mutex_exit(&stp->sd_lock);
1687 		if (error != 0) {
1688 			if (!(stp->sd_flag & STPLEX) &&
1689 			    (stp->sd_wput_opt & SW_SIGPIPE)) {
1690 				error = EPIPE;
1691 			}
1692 			return (error);
1693 		}
1694 	}
1695 	putnext(stp->sd_wrq, mp);
1696 	return (0);
1697 }
1698 
1699 /*
1700  * Send the URI uri_desc_t *uri response uri_rd_t *rdp out the socket_t *so.
1701  */
1702 
1703 static int
1704 uri_rd_response(struct sonode *so,
1705     uri_desc_t *uri,
1706     uri_rd_t *rdp,
1707     boolean_t first)
1708 {
1709 	vnode_t		*vp = SOTOV(so);
1710 	int		max_mblk = (int)vp->v_stream->sd_maxblk;
1711 	int		wsz;
1712 	mblk_t		*mp, *wmp, *persist;
1713 	int		write_bytes;
1714 	uri_rd_t	rd;
1715 	uri_desb_t	desb;
1716 	uri_segmap_t	*segmap = NULL;
1717 	char		*segmap_data;
1718 	size_t		segmap_sz;
1719 	int		error;
1720 	int		fflg = ((so->so_state & SS_NDELAY) ? FNDELAY : 0) |
1721 	    ((so->so_state & SS_NONBLOCK) ? FNONBLOCK : 0);
1722 
1723 
1724 	/* Initialize template uri_desb_t */
1725 	desb.frtn.free_func = uri_desb_free;
1726 	desb.frtn.free_arg = NULL;
1727 	desb.uri = uri;
1728 
1729 	/* Get a local copy of the rd_t */
1730 	bcopy(rdp, &rd, sizeof (rd));
1731 	do {
1732 		if (first) {
1733 			/*
1734 			 * For first kstrwrite() enough data to get
1735 			 * things going, note non blocking version of
1736 			 * kstrwrite() will be used below.
1737 			 */
1738 			write_bytes = P2ROUNDUP((max_mblk * 4),
1739 			    MAXBSIZE * nl7c_file_prefetch);
1740 		} else {
1741 			if ((write_bytes = so->so_sndbuf) == 0)
1742 				write_bytes = vp->v_stream->sd_qn_maxpsz;
1743 			ASSERT(write_bytes > 0);
1744 			write_bytes = P2ROUNDUP(write_bytes, MAXBSIZE);
1745 		}
1746 		/*
1747 		 * Chop up to a write_bytes worth of data.
1748 		 */
1749 		wmp = NULL;
1750 		wsz = write_bytes;
1751 		do {
1752 			if (rd.sz == 0)
1753 				break;
1754 			if (rd.off == -1) {
1755 				if (uri->eoh >= rd.data.kmem &&
1756 				    uri->eoh < &rd.data.kmem[rd.sz]) {
1757 					persist = nl7c_http_persist(so);
1758 				} else {
1759 					persist = NULL;
1760 				}
1761 				desb.segmap = NULL;
1762 				mp = uri_desb_chop(&rd.data.kmem, &rd.sz,
1763 				    &wsz, &desb, max_mblk, uri->eoh, persist);
1764 				if (mp == NULL) {
1765 					error = ENOMEM;
1766 					goto invalidate;
1767 				}
1768 			} else {
1769 				if (segmap == NULL) {
1770 					segmap = uri_segmap_map(&rd,
1771 					    write_bytes);
1772 					if (segmap == NULL) {
1773 						error = ENOMEM;
1774 						goto invalidate;
1775 					}
1776 					desb.segmap = segmap;
1777 					segmap_data = segmap->base;
1778 					segmap_sz = segmap->len;
1779 				}
1780 				mp = uri_desb_chop(&segmap_data, &segmap_sz,
1781 				    &wsz, &desb, max_mblk, NULL, NULL);
1782 				if (mp == NULL) {
1783 					error = ENOMEM;
1784 					goto invalidate;
1785 				}
1786 				if (segmap_sz == 0) {
1787 					rd.sz -= segmap->len;
1788 					rd.off += segmap->len;
1789 					REF_RELE(segmap);
1790 					segmap = NULL;
1791 				}
1792 			}
1793 			if (wmp == NULL) {
1794 				wmp = mp;
1795 			} else {
1796 				wmp->b_next->b_cont = mp;
1797 				wmp->b_next = mp->b_next;
1798 				mp->b_next = NULL;
1799 			}
1800 		} while (wsz > 0 && rd.sz > 0);
1801 
1802 		wmp->b_next = NULL;
1803 		if (first) {
1804 			/* First kstrwrite(), use noqwait */
1805 			if ((error = kstrwritempnoqwait(vp, wmp)) != 0)
1806 				goto invalidate;
1807 			/*
1808 			 * For the rest of the kstrwrite()s use SO_SNDBUF
1809 			 * worth of data at a time, note these kstrwrite()s
1810 			 * may (will) block one or more times.
1811 			 */
1812 			first = B_FALSE;
1813 		} else {
1814 			if ((error = kstrwritemp(vp, wmp, fflg)) != 0) {
1815 				if (error == EAGAIN) {
1816 					nl7c_uri_rd_EAGAIN++;
1817 					if ((error =
1818 					    kstrwritempnoqwait(vp, wmp)) != 0)
1819 						goto invalidate;
1820 				} else
1821 					goto invalidate;
1822 			}
1823 		}
1824 	} while (rd.sz > 0);
1825 
1826 	return (0);
1827 
1828 invalidate:
1829 	if (segmap) {
1830 		REF_RELE(segmap);
1831 	}
1832 	if (wmp)
1833 		freemsg(wmp);
1834 
1835 	return (error);
1836 }
1837 
1838 /*
1839  * Send the URI uri_desc_t *uri response out the socket_t *so.
1840  */
1841 
1842 static int
1843 uri_response(struct sonode *so, uri_desc_t *uri)
1844 {
1845 	uri_rd_t	*rdp = &uri->response;
1846 	boolean_t	first = B_TRUE;
1847 	int		error;
1848 
1849 	while (rdp != NULL) {
1850 		error = uri_rd_response(so, uri, rdp, first);
1851 		if (error != 0) {
1852 			goto invalidate;
1853 		}
1854 		first = B_FALSE;
1855 		rdp = rdp->next;
1856 	}
1857 	return (0);
1858 
1859 invalidate:
1860 	uri_delete(uri);
1861 	return (error);
1862 }
1863 
1864 /*
1865  * The pchars[] array is indexed by a char to determine if it's a
1866  * valid URI path component chararcter where:
1867  *
1868  *    pchar       = unreserved | escaped |
1869  *                  ":" | "@" | "&" | "=" | "+" | "$" | ","
1870  *
1871  *    unreserved  = alphanum | mark
1872  *
1873  *    alphanum    = alpha | digit
1874  *
1875  *    alpha       = lowalpha | upalpha
1876  *
1877  *    lowalpha    = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" |
1878  *                  "i" | "j" | "k" | "l" | "m" | "n" | "o" | "p" |
1879  *                  "q" | "r" | "s" | "t" | "u" | "v" | "w" | "x" |
1880  *                  "y" | "z"
1881  *
1882  *    upalpha     = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" |
1883  *                  "I" | "J" | "K" | "L" | "M" | "N" | "O" | "P" |
1884  *                  "Q" | "R" | "S" | "T" | "U" | "V" | "W" | "X" |
1885  *                  "Y" | "Z"
1886  *
1887  *    digit       = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" |
1888  *                  "8" | "9"
1889  *
1890  *    mark        = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
1891  *
1892  *    escaped     = "%" hex hex
1893  *    hex         = digit | "A" | "B" | "C" | "D" | "E" | "F" |
1894  *                  "a" | "b" | "c" | "d" | "e" | "f"
1895  */
1896 
1897 static char pchars[] = {
1898     0, 0, 0, 0, 0, 0, 0, 0,	/* 0x00 - 0x07 */
1899     0, 0, 0, 0, 0, 0, 0, 0,	/* 0x08 - 0x0F */
1900     0, 0, 0, 0, 0, 0, 0, 0,	/* 0x10 - 0x17 */
1901     0, 0, 0, 0, 0, 0, 0, 0,	/* 0x18 - 0x1F */
1902     0, 1, 0, 0, 1, 1, 1, 1,	/* 0x20 - 0x27 */
1903     0, 0, 1, 1, 1, 1, 1, 1,	/* 0x28 - 0x2F */
1904     1, 1, 1, 1, 1, 1, 1, 1,	/* 0x30 - 0x37 */
1905     1, 1, 1, 0, 0, 1, 0, 0,	/* 0x38 - 0x3F */
1906     1, 1, 1, 1, 1, 1, 1, 1,	/* 0x40 - 0x47 */
1907     1, 1, 1, 1, 1, 1, 1, 1,	/* 0x48 - 0x4F */
1908     1, 1, 1, 1, 1, 1, 1, 1,	/* 0x50 - 0x57 */
1909     1, 1, 1, 0, 0, 0, 0, 1,	/* 0x58 - 0x5F */
1910     0, 1, 1, 1, 1, 1, 1, 1,	/* 0x60 - 0x67 */
1911     1, 1, 1, 1, 1, 1, 1, 1,	/* 0x68 - 0x6F */
1912     1, 1, 1, 1, 1, 1, 1, 1,	/* 0x70 - 0x77 */
1913     1, 1, 1, 0, 0, 0, 1, 0	/* 0x78 - 0x7F */
1914 };
1915 
1916 #define	PCHARS_MASK 0x7F
1917 
1918 /*
1919  * This is the main L7 request message parse, we are called each time
1920  * new data is availble for a socket, each time a single buffer of the
1921  * entire message to date is given.
1922  *
1923  * Here we parse the request looking for the URI, parse it, and if a
1924  * supported scheme call the scheme parser to commplete the parse of any
1925  * headers which may further qualify the identity of the requested object
1926  * then lookup it up in the URI hash.
1927  *
1928  * Return B_TRUE for more processing.
1929  *
1930  * Note, at this time the parser supports the generic message format as
1931  * specified in RFC 822 with potentional limitations as specified in RFC
1932  * 2616 for HTTP messages.
1933  *
1934  * Note, the caller supports an mblk_t chain, for now the parser(s)
1935  * require the complete header in a single mblk_t. This is the common
1936  * case and certainly for high performance environments, if at a future
1937  * date mblk_t chains are important the parse can be reved to process
1938  * mblk_t chains.
1939  */
1940 
1941 boolean_t
1942 nl7c_parse(struct sonode *so, boolean_t nonblocking, boolean_t *ret)
1943 {
1944 	sotpi_info_t *sti = SOTOTPI(so);
1945 	char	*cp = (char *)sti->sti_nl7c_rcv_mp->b_rptr;
1946 	char	*ep = (char *)sti->sti_nl7c_rcv_mp->b_wptr;
1947 	char	*get = "GET ";
1948 	char	*post = "POST ";
1949 	char	c;
1950 	char	*uris;
1951 	uri_desc_t *uri = NULL;
1952 	uri_desc_t *ruri = NULL;
1953 	mblk_t	*reqmp;
1954 	uint32_t hv = 0;
1955 
1956 	if ((reqmp = dupb(sti->sti_nl7c_rcv_mp)) == NULL) {
1957 		nl7c_uri_pass_dupbfail++;
1958 		goto pass;
1959 	}
1960 	/*
1961 	 * Allocate and initialize minimumal state for the request
1962 	 * uri_desc_t, in the cache hit case this uri_desc_t will
1963 	 * be freed.
1964 	 */
1965 	uri = kmem_cache_alloc(nl7c_uri_kmc, KM_SLEEP);
1966 	REF_INIT(uri, 1, nl7c_uri_inactive, nl7c_uri_kmc);
1967 	uri->hash = NULL;
1968 	uri->tail = NULL;
1969 	uri->scheme = NULL;
1970 	uri->count = 0;
1971 	uri->reqmp = reqmp;
1972 
1973 	/*
1974 	 * Set request time to current time.
1975 	 */
1976 	sti->sti_nl7c_rtime = gethrestime_sec();
1977 
1978 	/*
1979 	 * Parse the Request-Line for the URI.
1980 	 *
1981 	 * For backwards HTTP version compatable reasons skip any leading
1982 	 * CRLF (or CR or LF) line terminator(s) preceding Request-Line.
1983 	 */
1984 	while (cp < ep && (*cp == '\r' || *cp == '\n')) {
1985 		cp++;
1986 	}
1987 	while (cp < ep && *get == *cp) {
1988 		get++;
1989 		cp++;
1990 	}
1991 	if (*get != 0) {
1992 		/* Note a "GET", check for "POST" */
1993 		while (cp < ep && *post == *cp) {
1994 			post++;
1995 			cp++;
1996 		}
1997 		if (*post != 0) {
1998 			if (cp == ep) {
1999 				nl7c_uri_more_get++;
2000 				goto more;
2001 			}
2002 			/* Not a "GET" or a "POST", just pass */
2003 			nl7c_uri_pass_method++;
2004 			goto pass;
2005 		}
2006 		/* "POST", don't cache but still may want to parse */
2007 		uri->hash = URI_TEMP;
2008 	}
2009 	/*
2010 	 * Skip over URI path char(s) and save start and past end pointers.
2011 	 */
2012 	uris = cp;
2013 	while (cp < ep && (c = *cp) != ' ' && c != '\r') {
2014 		if (c == '?') {
2015 			/* Don't cache but still may want to parse */
2016 			uri->hash = URI_TEMP;
2017 		}
2018 		CHASH(hv, c);
2019 		cp++;
2020 	}
2021 	if (c != '\r' && cp == ep) {
2022 		nl7c_uri_more_eol++;
2023 		goto more;
2024 	}
2025 	/*
2026 	 * Request-Line URI parsed, pass the rest of the request on
2027 	 * to the the http scheme parse.
2028 	 */
2029 	uri->path.cp = uris;
2030 	uri->path.ep = cp;
2031 	uri->hvalue = hv;
2032 	if (! nl7c_http_request(&cp, ep, uri, so) || cp == NULL) {
2033 		/*
2034 		 * Parse not successful or pass on request, the pointer
2035 		 * to the parse pointer "cp" is overloaded such that ! NULL
2036 		 * for more data and NULL for bad parse of request or pass.
2037 		 */
2038 		if (cp != NULL) {
2039 			nl7c_uri_more_http++;
2040 			goto more;
2041 		}
2042 		nl7c_uri_pass_http++;
2043 		goto pass;
2044 	}
2045 	if (uri->nocache) {
2046 		uri->hash = URI_TEMP;
2047 		(void) uri_lookup(uri, B_FALSE, nonblocking);
2048 	} else if (uri->hash == URI_TEMP) {
2049 		uri->nocache = B_TRUE;
2050 		(void) uri_lookup(uri, B_FALSE, nonblocking);
2051 	}
2052 
2053 	if (uri->hash == URI_TEMP) {
2054 		if (sti->sti_nl7c_flags & NL7C_SOPERSIST) {
2055 			/* Temporary URI so skip hash processing */
2056 			nl7c_uri_request++;
2057 			nl7c_uri_temp++;
2058 			goto temp;
2059 		}
2060 		/* Not persistent so not interested in the response */
2061 		nl7c_uri_pass_temp++;
2062 		goto pass;
2063 	}
2064 	/*
2065 	 * Check the URI hash for a cached response, save the request
2066 	 * uri in case we need it below.
2067 	 */
2068 	ruri = uri;
2069 	if ((uri = uri_lookup(uri, B_TRUE, nonblocking)) == NULL) {
2070 		/*
2071 		 * Failed to lookup due to nonblocking wait required,
2072 		 * interrupted cv_wait_sig(), KM_NOSLEEP memory alloc
2073 		 * failure, ... Just pass on this request.
2074 		 */
2075 		nl7c_uri_pass_addfail++;
2076 		goto pass;
2077 	}
2078 	nl7c_uri_request++;
2079 	if (uri->response.sz > 0) {
2080 		/*
2081 		 * We have the response cached, update recv mblk rptr
2082 		 * to reflect the data consumed in parse.
2083 		 */
2084 		mblk_t	*mp = sti->sti_nl7c_rcv_mp;
2085 
2086 		if (cp == (char *)mp->b_wptr) {
2087 			sti->sti_nl7c_rcv_mp = mp->b_cont;
2088 			mp->b_cont = NULL;
2089 			freeb(mp);
2090 		} else {
2091 			mp->b_rptr = (unsigned char *)cp;
2092 		}
2093 		nl7c_uri_hit++;
2094 		/* If logging enabled log request */
2095 		if (nl7c_logd_enabled) {
2096 			ipaddr_t faddr;
2097 
2098 			if (so->so_family == AF_INET) {
2099 				/* Only support IPv4 addrs */
2100 				faddr = ((struct sockaddr_in *)
2101 				    sti->sti_faddr_sa) ->sin_addr.s_addr;
2102 			} else {
2103 				faddr = 0;
2104 			}
2105 			/* XXX need to pass response type, e.g. 200, 304 */
2106 			nl7c_logd_log(ruri, uri, sti->sti_nl7c_rtime, faddr);
2107 		}
2108 
2109 		/* If conditional request check for substitute response */
2110 		if (ruri->conditional) {
2111 			uri = nl7c_http_cond(ruri, uri);
2112 		}
2113 
2114 		/*
2115 		 * Release reference on request URI, send the response out
2116 		 * the socket, release reference on response uri, set the
2117 		 * *ret value to B_TRUE to indicate request was consumed
2118 		 * then return B_FALSE to indcate no more data needed.
2119 		 */
2120 		REF_RELE(ruri);
2121 		(void) uri_response(so, uri);
2122 		REF_RELE(uri);
2123 		*ret = B_TRUE;
2124 		return (B_FALSE);
2125 	}
2126 	/*
2127 	 * Miss the cache, the request URI is in the cache waiting for
2128 	 * application write-side data to fill it.
2129 	 */
2130 	nl7c_uri_miss++;
2131 temp:
2132 	/*
2133 	 * A miss or temp URI for which response data is needed, link
2134 	 * uri to so and so to uri, set WAITWRITE in the so such that
2135 	 * read-side processing is suspended (so the next read() gets
2136 	 * the request data) until a write() is processed by NL7C.
2137 	 *
2138 	 * Note, sti->sti_nl7c_uri now owns the REF_INIT() ref.
2139 	 */
2140 	uri->proc = so;
2141 	sti->sti_nl7c_uri = uri;
2142 	sti->sti_nl7c_flags |= NL7C_WAITWRITE;
2143 	*ret = B_FALSE;
2144 	return (B_FALSE);
2145 
2146 more:
2147 	/* More data is needed, note fragmented recv not supported */
2148 	nl7c_uri_more++;
2149 
2150 pass:
2151 	/* Pass on this request */
2152 	nl7c_uri_pass++;
2153 	nl7c_uri_request++;
2154 	if (ruri != NULL) {
2155 		REF_RELE(ruri);
2156 	}
2157 	if (uri) {
2158 		REF_RELE(uri);
2159 	}
2160 	sti->sti_nl7c_flags = 0;
2161 	*ret = B_FALSE;
2162 	return (B_FALSE);
2163 }
2164