xref: /titanic_44/usr/src/uts/common/fs/sockfs/nl7curi.c (revision f645252839e7ff6d25cadf6a45b2ae9099943c5a)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include <sys/strsubr.h>
27 #include <sys/strsun.h>
28 #include <sys/param.h>
29 #include <sys/sysmacros.h>
30 #include <vm/seg_map.h>
31 #include <vm/seg_kpm.h>
32 #include <sys/condvar_impl.h>
33 #include <sys/sendfile.h>
34 #include <fs/sockfs/nl7c.h>
35 #include <fs/sockfs/nl7curi.h>
36 
37 #include <inet/common.h>
38 #include <inet/ip.h>
39 #include <inet/ip6.h>
40 #include <inet/tcp.h>
41 #include <inet/led.h>
42 #include <inet/mi.h>
43 
44 #include <inet/nca/ncadoorhdr.h>
45 #include <inet/nca/ncalogd.h>
46 #include <inet/nca/ncandd.h>
47 
48 #include <sys/promif.h>
49 
50 /*
51  * Some externs:
52  */
53 
54 extern boolean_t	nl7c_logd_enabled;
55 extern void		nl7c_logd_log(uri_desc_t *, uri_desc_t *,
56 			    time_t, ipaddr_t);
57 extern boolean_t	nl7c_close_addr(struct sonode *);
58 extern struct sonode	*nl7c_addr2portso(void *);
59 extern uri_desc_t	*nl7c_http_cond(uri_desc_t *, uri_desc_t *);
60 
61 /*
62  * Various global tuneables:
63  */
64 
65 clock_t		nl7c_uri_ttl = -1;	/* TTL in seconds (-1 == infinite) */
66 
67 boolean_t	nl7c_use_kmem = B_FALSE; /* Force use of kmem (no segmap) */
68 
69 uint64_t	nl7c_file_prefetch = 1; /* File cache prefetch pages */
70 
71 uint64_t	nl7c_uri_max = 0;	/* Maximum bytes (0 == infinite) */
72 uint64_t	nl7c_uri_bytes = 0;	/* Bytes of kmem used by URIs */
73 
74 /*
75  * Locals:
76  */
77 
78 static int	uri_rd_response(struct sonode *, uri_desc_t *,
79 		    uri_rd_t *, boolean_t);
80 static int	uri_response(struct sonode *, uri_desc_t *);
81 
82 /*
83  * HTTP scheme functions called from nl7chttp.c:
84  */
85 
86 boolean_t nl7c_http_request(char **, char *, uri_desc_t *, struct sonode *);
87 boolean_t nl7c_http_response(char **, char *, uri_desc_t *, struct sonode *);
88 boolean_t nl7c_http_cmp(void *, void *);
89 mblk_t *nl7c_http_persist(struct sonode *);
90 void nl7c_http_free(void *arg);
91 void nl7c_http_init(void);
92 
93 /*
94  * Counters that need to move to kstat and/or be removed:
95  */
96 
97 volatile uint64_t nl7c_uri_request = 0;
98 volatile uint64_t nl7c_uri_hit = 0;
99 volatile uint64_t nl7c_uri_pass = 0;
100 volatile uint64_t nl7c_uri_miss = 0;
101 volatile uint64_t nl7c_uri_temp = 0;
102 volatile uint64_t nl7c_uri_more = 0;
103 volatile uint64_t nl7c_uri_data = 0;
104 volatile uint64_t nl7c_uri_sendfilev = 0;
105 volatile uint64_t nl7c_uri_reclaim_calls = 0;
106 volatile uint64_t nl7c_uri_reclaim_cnt = 0;
107 volatile uint64_t nl7c_uri_pass_urifail = 0;
108 volatile uint64_t nl7c_uri_pass_dupbfail = 0;
109 volatile uint64_t nl7c_uri_more_get = 0;
110 volatile uint64_t nl7c_uri_pass_method = 0;
111 volatile uint64_t nl7c_uri_pass_option = 0;
112 volatile uint64_t nl7c_uri_more_eol = 0;
113 volatile uint64_t nl7c_uri_more_http = 0;
114 volatile uint64_t nl7c_uri_pass_http = 0;
115 volatile uint64_t nl7c_uri_pass_addfail = 0;
116 volatile uint64_t nl7c_uri_pass_temp = 0;
117 volatile uint64_t nl7c_uri_expire = 0;
118 volatile uint64_t nl7c_uri_purge = 0;
119 volatile uint64_t nl7c_uri_NULL1 = 0;
120 volatile uint64_t nl7c_uri_NULL2 = 0;
121 volatile uint64_t nl7c_uri_close = 0;
122 volatile uint64_t nl7c_uri_temp_close = 0;
123 volatile uint64_t nl7c_uri_free = 0;
124 volatile uint64_t nl7c_uri_temp_free = 0;
125 volatile uint64_t nl7c_uri_temp_mk = 0;
126 volatile uint64_t nl7c_uri_rd_EAGAIN = 0;
127 
128 /*
129  * Various kmem_cache_t's:
130  */
131 
132 kmem_cache_t *nl7c_uri_kmc;
133 kmem_cache_t *nl7c_uri_rd_kmc;
134 static kmem_cache_t *uri_desb_kmc;
135 static kmem_cache_t *uri_segmap_kmc;
136 
137 static void uri_kmc_reclaim(void *);
138 
139 static void nl7c_uri_reclaim(void);
140 
141 /*
142  * The URI hash is a dynamically sized A/B bucket hash, when the current
143  * hash's average bucket chain length exceeds URI_HASH_AVRG a new hash of
144  * the next P2Ps[] size is created.
145  *
146  * All lookups are done in the current hash then the new hash (if any),
147  * if there is a new has then when a current hash bucket chain is examined
148  * any uri_desc_t members will be migrated to the new hash and when the
149  * last uri_desc_t has been migrated then the new hash will become the
150  * current and the previous current hash will be freed leaving a single
151  * hash.
152  *
153  * uri_hash_t - hash bucket (chain) type, contained in the uri_hash_ab[]
154  * and can be accessed only after aquiring the uri_hash_access lock (for
155  * READER or WRITER) then acquiring the lock uri_hash_t.lock, the uri_hash_t
156  * and all linked uri_desc_t.hash members are protected. Note, a REF_HOLD()
157  * is placed on all uri_desc_t uri_hash_t list members.
158  *
159  * uri_hash_access - rwlock for all uri_hash_* variables, READER for read
160  * access and WRITER for write access. Note, WRITER is only required for
161  * hash geometry changes.
162  *
163  * uri_hash_which - which uri_hash_ab[] is the current hash.
164  *
165  * uri_hash_n[] - the P2Ps[] index for each uri_hash_ab[].
166  *
167  * uri_hash_sz[] - the size for each uri_hash_ab[].
168  *
169  * uri_hash_cnt[] - the total uri_desc_t members for each uri_hash_ab[].
170  *
171  * uri_hash_overflow[] - the uri_hash_cnt[] for each uri_hash_ab[] when
172  * a new uri_hash_ab[] needs to be created.
173  *
174  * uri_hash_ab[] - the uri_hash_t entries.
175  *
176  * uri_hash_lru[] - the last uri_hash_ab[] walked for lru reclaim.
177  */
178 
179 typedef struct uri_hash_s {
180 	struct uri_desc_s	*list;		/* List of uri_t(s) */
181 	kmutex_t		lock;
182 } uri_hash_t;
183 
184 #define	URI_HASH_AVRG	5	/* Desired average hash chain length */
185 #define	URI_HASH_N_INIT	9	/* P2Ps[] initial index */
186 
187 static krwlock_t	uri_hash_access;
188 static uint32_t		uri_hash_which = 0;
189 static uint32_t		uri_hash_n[2] = {URI_HASH_N_INIT, 0};
190 static uint32_t		uri_hash_sz[2] = {0, 0};
191 static uint32_t		uri_hash_cnt[2] = {0, 0};
192 static uint32_t		uri_hash_overflow[2] = {0, 0};
193 static uri_hash_t	*uri_hash_ab[2] = {NULL, NULL};
194 static uri_hash_t	*uri_hash_lru[2] = {NULL, NULL};
195 
196 /*
197  * Primes for N of 3 - 24 where P is first prime less then (2^(N-1))+(2^(N-2))
198  * these primes have been foud to be useful for prime sized hash tables.
199  */
200 
201 static const int P2Ps[] = {
202 	0, 0, 0, 5, 11, 23, 47, 89, 191, 383, 761, 1531, 3067,
203 	6143, 12281, 24571, 49139, 98299, 196597, 393209,
204 	786431, 1572853, 3145721, 6291449, 12582893, 0};
205 
206 /*
207  * Hash macros:
208  *
209  *    H2A(char *cp, char *ep, char c) - convert the escaped octet (ASCII)
210  *    hex multichar of the format "%HH" pointeded to by *cp to a char and
211  *    return in c, *ep points to past end of (char *), on return *cp will
212  *    point to the last char consumed.
213  *
214  *    URI_HASH(unsigned hix, char *cp, char *ep) - hash the char(s) from
215  *    *cp to *ep to the unsigned hix, cp nor ep are modified.
216  *
217  *    URI_HASH_IX(unsigned hix, int which) - convert the hash value hix to
218  *    a hash index 0 - (uri_hash_sz[which] - 1).
219  *
220  *    URI_HASH_MIGRATE(from, hp, to) - migrate the uri_hash_t *hp list
221  *    uri_desc_t members from hash from to hash to.
222  *
223  *    URI_HASH_UNLINK(cur, new, hp, puri, uri) - unlink the uri_desc_t
224  *    *uri which is a member of the uri_hash_t *hp list with a previous
225  *    list member of *puri for the uri_hash_ab[] cur. After unlinking
226  *    check for cur hash empty, if so make new cur. Note, as this macro
227  *    can change a hash chain it needs to be run under hash_access as
228  *    RW_WRITER, futher as it can change the new hash to cur any access
229  *    to the hash state must be done after either dropping locks and
230  *    starting over or making sure the global state is consistent after
231  *    as before.
232  */
233 
234 #define	H2A(cp, ep, c) {						\
235 	int	_h = 2;							\
236 	int	_n = 0;							\
237 	char	_hc;							\
238 									\
239 	while (_h > 0 && ++(cp) < (ep)) {				\
240 		if (_h == 1)						\
241 			_n *= 0x10;					\
242 		_hc = *(cp);						\
243 		if (_hc >= '0' && _hc <= '9')				\
244 			_n += _hc - '0';				\
245 		else if (_hc >= 'a' || _hc <= 'f')			\
246 			_n += _hc - 'W';				\
247 		else if (_hc >= 'A' || _hc <= 'F')			\
248 			_n += _hc - '7';				\
249 		_h--;							\
250 	}								\
251 	(c) = _n;							\
252 }
253 
254 #define	URI_HASH(hv, cp, ep) {						\
255 	char	*_s = (cp);						\
256 	char	_c;							\
257 									\
258 	while (_s < (ep)) {						\
259 		if ((_c = *_s) == '%') {				\
260 			H2A(_s, (ep), _c);				\
261 		}							\
262 		CHASH(hv, _c);						\
263 		_s++;							\
264 	}								\
265 }
266 
267 #define	URI_HASH_IX(hix, which) (hix) = (hix) % (uri_hash_sz[(which)])
268 
269 #define	URI_HASH_MIGRATE(from, hp, to) {				\
270 	uri_desc_t	*_nuri;						\
271 	uint32_t	_nhix;						\
272 	uri_hash_t	*_nhp;						\
273 									\
274 	mutex_enter(&(hp)->lock);					\
275 	while ((_nuri = (hp)->list) != NULL) {				\
276 		(hp)->list = _nuri->hash;				\
277 		atomic_add_32(&uri_hash_cnt[(from)], -1);		\
278 		atomic_add_32(&uri_hash_cnt[(to)], 1);			\
279 		_nhix = _nuri->hvalue;					\
280 		URI_HASH_IX(_nhix, to);					\
281 		_nhp = &uri_hash_ab[(to)][_nhix];			\
282 		mutex_enter(&_nhp->lock);				\
283 		_nuri->hash = _nhp->list;				\
284 		_nhp->list = _nuri;					\
285 		_nuri->hit = 0;						\
286 		mutex_exit(&_nhp->lock);				\
287 	}								\
288 	mutex_exit(&(hp)->lock);					\
289 }
290 
291 #define	URI_HASH_UNLINK(cur, new, hp, puri, uri) {			\
292 	if ((puri) != NULL) {						\
293 		(puri)->hash = (uri)->hash;				\
294 	} else {							\
295 		(hp)->list = (uri)->hash;				\
296 	}								\
297 	if (atomic_add_32_nv(&uri_hash_cnt[(cur)], -1) == 0 &&		\
298 	    uri_hash_ab[(new)] != NULL) {				\
299 		kmem_free(uri_hash_ab[cur],				\
300 		    sizeof (uri_hash_t) * uri_hash_sz[cur]);		\
301 		uri_hash_ab[(cur)] = NULL;				\
302 		uri_hash_lru[(cur)] = NULL;				\
303 		uri_hash_which = (new);					\
304 	} else {							\
305 		uri_hash_lru[(cur)] = (hp);				\
306 	}								\
307 }
308 
309 void
310 nl7c_uri_init(void)
311 {
312 	uint32_t	cur = uri_hash_which;
313 
314 	rw_init(&uri_hash_access, NULL, RW_DEFAULT, NULL);
315 
316 	uri_hash_sz[cur] = P2Ps[URI_HASH_N_INIT];
317 	uri_hash_overflow[cur] = P2Ps[URI_HASH_N_INIT] * URI_HASH_AVRG;
318 	uri_hash_ab[cur] = kmem_zalloc(sizeof (uri_hash_t) * uri_hash_sz[cur],
319 	    KM_SLEEP);
320 	uri_hash_lru[cur] = uri_hash_ab[cur];
321 
322 	nl7c_uri_kmc = kmem_cache_create("NL7C_uri_kmc", sizeof (uri_desc_t),
323 	    0, NULL, NULL, uri_kmc_reclaim, NULL, NULL, 0);
324 
325 	nl7c_uri_rd_kmc = kmem_cache_create("NL7C_uri_rd_kmc",
326 	    sizeof (uri_rd_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
327 
328 	uri_desb_kmc = kmem_cache_create("NL7C_uri_desb_kmc",
329 	    sizeof (uri_desb_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
330 
331 	uri_segmap_kmc = kmem_cache_create("NL7C_uri_segmap_kmc",
332 	    sizeof (uri_segmap_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
333 
334 	nl7c_http_init();
335 }
336 
337 #define	CV_SZ	16
338 
339 void
340 nl7c_mi_report_hash(mblk_t *mp)
341 {
342 	uri_hash_t	*hp, *pend;
343 	uri_desc_t	*uri;
344 	uint32_t	cur;
345 	uint32_t	new;
346 	int		n, nz, tot;
347 	uint32_t	cv[CV_SZ + 1];
348 
349 	rw_enter(&uri_hash_access, RW_READER);
350 	cur = uri_hash_which;
351 	new = cur ? 0 : 1;
352 next:
353 	for (n = 0; n <= CV_SZ; n++)
354 		cv[n] = 0;
355 	nz = 0;
356 	tot = 0;
357 	hp = &uri_hash_ab[cur][0];
358 	pend = &uri_hash_ab[cur][uri_hash_sz[cur]];
359 	while (hp < pend) {
360 		n = 0;
361 		for (uri = hp->list; uri != NULL; uri = uri->hash) {
362 			n++;
363 		}
364 		tot += n;
365 		if (n > 0)
366 			nz++;
367 		if (n > CV_SZ)
368 			n = CV_SZ;
369 		cv[n]++;
370 		hp++;
371 	}
372 
373 	(void) mi_mpprintf(mp, "\nHash=%s, Buckets=%d, "
374 	    "Avrg=%d\nCount by bucket:", cur != new ? "CUR" : "NEW",
375 	    uri_hash_sz[cur], nz != 0 ? ((tot * 10 + 5) / nz) / 10 : 0);
376 	(void) mi_mpprintf(mp, "Free=%d", cv[0]);
377 	for (n = 1; n < CV_SZ; n++) {
378 		int	pn = 0;
379 		char	pv[5];
380 		char	*pp = pv;
381 
382 		for (pn = n; pn < 1000; pn *= 10)
383 			*pp++ = ' ';
384 		*pp = 0;
385 		(void) mi_mpprintf(mp, "%s%d=%d", pv, n, cv[n]);
386 	}
387 	(void) mi_mpprintf(mp, "Long=%d", cv[CV_SZ]);
388 
389 	if (cur != new && uri_hash_ab[new] != NULL) {
390 		cur = new;
391 		goto next;
392 	}
393 	rw_exit(&uri_hash_access);
394 }
395 
396 void
397 nl7c_mi_report_uri(mblk_t *mp)
398 {
399 	uri_hash_t	*hp;
400 	uri_desc_t	*uri;
401 	uint32_t	cur;
402 	uint32_t	new;
403 	int		ix;
404 	int		ret;
405 	char		sc;
406 
407 	rw_enter(&uri_hash_access, RW_READER);
408 	cur = uri_hash_which;
409 	new = cur ? 0 : 1;
410 next:
411 	for (ix = 0; ix < uri_hash_sz[cur]; ix++) {
412 		hp = &uri_hash_ab[cur][ix];
413 		mutex_enter(&hp->lock);
414 		uri = hp->list;
415 		while (uri != NULL) {
416 			sc = *(uri->path.ep);
417 			*(uri->path.ep) = 0;
418 			ret = mi_mpprintf(mp, "%s: %d %d %d",
419 			    uri->path.cp, (int)uri->resplen,
420 			    (int)uri->respclen, (int)uri->count);
421 			*(uri->path.ep) = sc;
422 			if (ret == -1) break;
423 			uri = uri->hash;
424 		}
425 		mutex_exit(&hp->lock);
426 		if (ret == -1) break;
427 	}
428 	if (ret != -1 && cur != new && uri_hash_ab[new] != NULL) {
429 		cur = new;
430 		goto next;
431 	}
432 	rw_exit(&uri_hash_access);
433 }
434 
435 /*
436  * The uri_desc_t ref_t inactive function called on the last REF_RELE(),
437  * free all resources contained in the uri_desc_t. Note, the uri_desc_t
438  * will be freed by REF_RELE() on return.
439  */
440 
441 void
442 nl7c_uri_inactive(uri_desc_t *uri)
443 {
444 	int64_t	 bytes = 0;
445 
446 	if (uri->tail) {
447 		uri_rd_t *rdp = &uri->response;
448 		uri_rd_t *free = NULL;
449 
450 		while (rdp) {
451 			if (rdp->off == -1) {
452 				bytes += rdp->sz;
453 				kmem_free(rdp->data.kmem, rdp->sz);
454 			} else {
455 				VN_RELE(rdp->data.vnode);
456 			}
457 			rdp = rdp->next;
458 			if (free != NULL) {
459 				kmem_cache_free(nl7c_uri_rd_kmc, free);
460 			}
461 			free = rdp;
462 		}
463 	}
464 	if (bytes) {
465 		atomic_add_64(&nl7c_uri_bytes, -bytes);
466 	}
467 	if (uri->scheme != NULL) {
468 		nl7c_http_free(uri->scheme);
469 	}
470 	if (uri->reqmp) {
471 		freeb(uri->reqmp);
472 	}
473 }
474 
475 /*
476  * The reclaim is called by the kmem subsystem when kmem is running
477  * low. More work is needed to determine the best reclaim policy, for
478  * now we just manipulate the nl7c_uri_max global maximum bytes threshold
479  * value using a simple arithmetic backoff of the value every time this
480  * function is called then call uri_reclaim() to enforce it.
481  *
482  * Note, this value remains in place and enforced for all subsequent
483  * URI request/response processing.
484  *
485  * Note, nl7c_uri_max is currently initialized to 0 or infinite such that
486  * the first call here set it to the current uri_bytes value then backoff
487  * from there.
488  *
489  * XXX how do we determine when to increase nl7c_uri_max ???
490  */
491 
492 /*ARGSUSED*/
493 static void
494 uri_kmc_reclaim(void *arg)
495 {
496 	uint64_t new_max;
497 
498 	if ((new_max = nl7c_uri_max) == 0) {
499 		/* Currently infinite, initialize to current bytes used */
500 		nl7c_uri_max = nl7c_uri_bytes;
501 		new_max = nl7c_uri_bytes;
502 	}
503 	if (new_max > 1) {
504 		/* Lower max_bytes to 93% of current value */
505 		new_max >>= 1;			/* 50% */
506 		new_max += (new_max >> 1);	/* 75% */
507 		new_max += (new_max >> 2);	/* 93% */
508 		if (new_max < nl7c_uri_max)
509 			nl7c_uri_max = new_max;
510 		else
511 			nl7c_uri_max = 1;
512 	}
513 	nl7c_uri_reclaim();
514 }
515 
516 /*
517  * Delete a uri_desc_t from the URI hash.
518  */
519 
520 static void
521 uri_delete(uri_desc_t *del)
522 {
523 	uint32_t	hix;
524 	uri_hash_t	*hp;
525 	uri_desc_t	*uri;
526 	uri_desc_t	*puri;
527 	uint32_t	cur;
528 	uint32_t	new;
529 
530 	ASSERT(del->hash != URI_TEMP);
531 	rw_enter(&uri_hash_access, RW_WRITER);
532 	cur = uri_hash_which;
533 	new = cur ? 0 : 1;
534 next:
535 	puri = NULL;
536 	hix = del->hvalue;
537 	URI_HASH_IX(hix, cur);
538 	hp = &uri_hash_ab[cur][hix];
539 	for (uri = hp->list; uri != NULL; uri = uri->hash) {
540 		if (uri != del) {
541 			puri = uri;
542 			continue;
543 		}
544 		/*
545 		 * Found the URI, unlink from the hash chain,
546 		 * drop locks, ref release it.
547 		 */
548 		URI_HASH_UNLINK(cur, new, hp, puri, uri);
549 		rw_exit(&uri_hash_access);
550 		REF_RELE(uri);
551 		return;
552 	}
553 	if (cur != new && uri_hash_ab[new] != NULL) {
554 		/*
555 		 * Not found in current hash and have a new hash so
556 		 * check the new hash next.
557 		 */
558 		cur = new;
559 		goto next;
560 	}
561 	rw_exit(&uri_hash_access);
562 }
563 
564 /*
565  * Add a uri_desc_t to the URI hash.
566  */
567 
568 static void
569 uri_add(uri_desc_t *uri, krw_t rwlock, boolean_t nonblocking)
570 {
571 	uint32_t	hix;
572 	uri_hash_t	*hp;
573 	uint32_t	cur = uri_hash_which;
574 	uint32_t	new = cur ? 0 : 1;
575 
576 	/*
577 	 * Caller of uri_add() must hold the uri_hash_access rwlock.
578 	 */
579 	ASSERT((rwlock == RW_READER && RW_READ_HELD(&uri_hash_access)) ||
580 	    (rwlock == RW_WRITER && RW_WRITE_HELD(&uri_hash_access)));
581 	/*
582 	 * uri_add() always succeeds so add a hash ref to the URI now.
583 	 */
584 	REF_HOLD(uri);
585 again:
586 	hix = uri->hvalue;
587 	URI_HASH_IX(hix, cur);
588 	if (uri_hash_ab[new] == NULL &&
589 	    uri_hash_cnt[cur] < uri_hash_overflow[cur]) {
590 		/*
591 		 * Easy case, no new hash and current hasn't overflowed,
592 		 * add URI to current hash and return.
593 		 *
594 		 * Note, the check for uri_hash_cnt[] above aren't done
595 		 * atomictally, i.e. multiple threads can be in this code
596 		 * as RW_READER and update the cnt[], this isn't a problem
597 		 * as the check is only advisory.
598 		 */
599 	fast:
600 		atomic_add_32(&uri_hash_cnt[cur], 1);
601 		hp = &uri_hash_ab[cur][hix];
602 		mutex_enter(&hp->lock);
603 		uri->hash = hp->list;
604 		hp->list = uri;
605 		mutex_exit(&hp->lock);
606 		rw_exit(&uri_hash_access);
607 		return;
608 	}
609 	if (uri_hash_ab[new] == NULL) {
610 		/*
611 		 * Need a new a or b hash, if not already RW_WRITER
612 		 * try to upgrade our lock to writer.
613 		 */
614 		if (rwlock != RW_WRITER && ! rw_tryupgrade(&uri_hash_access)) {
615 			/*
616 			 * Upgrade failed, we can't simple exit and reenter
617 			 * the lock as after the exit and before the reenter
618 			 * the whole world can change so just wait for writer
619 			 * then do everything again.
620 			 */
621 			if (nonblocking) {
622 				/*
623 				 * Can't block, use fast-path above.
624 				 *
625 				 * XXX should have a background thread to
626 				 * handle new ab[] in this case so as to
627 				 * not overflow the cur hash to much.
628 				 */
629 				goto fast;
630 			}
631 			rw_exit(&uri_hash_access);
632 			rwlock = RW_WRITER;
633 			rw_enter(&uri_hash_access, rwlock);
634 			cur = uri_hash_which;
635 			new = cur ? 0 : 1;
636 			goto again;
637 		}
638 		rwlock = RW_WRITER;
639 		if (uri_hash_ab[new] == NULL) {
640 			/*
641 			 * Still need a new hash, allocate and initialize
642 			 * the new hash.
643 			 */
644 			uri_hash_n[new] = uri_hash_n[cur] + 1;
645 			if (uri_hash_n[new] == 0) {
646 				/*
647 				 * No larger P2Ps[] value so use current,
648 				 * i.e. 2 of the largest are better than 1 ?
649 				 */
650 				uri_hash_n[new] = uri_hash_n[cur];
651 				cmn_err(CE_NOTE, "NL7C: hash index overflow");
652 			}
653 			uri_hash_sz[new] = P2Ps[uri_hash_n[new]];
654 			ASSERT(uri_hash_cnt[new] == 0);
655 			uri_hash_overflow[new] = uri_hash_sz[new] *
656 			    URI_HASH_AVRG;
657 			uri_hash_ab[new] = kmem_zalloc(sizeof (uri_hash_t) *
658 			    uri_hash_sz[new], nonblocking ? KM_NOSLEEP :
659 			    KM_SLEEP);
660 			if (uri_hash_ab[new] == NULL) {
661 				/*
662 				 * Alloc failed, use fast-path above.
663 				 *
664 				 * XXX should have a background thread to
665 				 * handle new ab[] in this case so as to
666 				 * not overflow the cur hash to much.
667 				 */
668 				goto fast;
669 			}
670 			uri_hash_lru[new] = uri_hash_ab[new];
671 		}
672 	}
673 	/*
674 	 * Hashed against current hash so migrate any current hash chain
675 	 * members, if any.
676 	 *
677 	 * Note, the hash chain list can be checked for a non empty list
678 	 * outside of the hash chain list lock as the hash chain struct
679 	 * can't be destroyed while in the uri_hash_access rwlock, worst
680 	 * case is that a non empty list is found and after acquiring the
681 	 * lock another thread beats us to it (i.e. migrated the list).
682 	 */
683 	hp = &uri_hash_ab[cur][hix];
684 	if (hp->list != NULL) {
685 		URI_HASH_MIGRATE(cur, hp, new);
686 	}
687 	/*
688 	 * If new hash has overflowed before current hash has been
689 	 * completely migrated then walk all current hash chains and
690 	 * migrate list members now.
691 	 */
692 	if (atomic_add_32_nv(&uri_hash_cnt[new], 1) >= uri_hash_overflow[new]) {
693 		for (hix = 0; hix < uri_hash_sz[cur]; hix++) {
694 			hp = &uri_hash_ab[cur][hix];
695 			if (hp->list != NULL) {
696 				URI_HASH_MIGRATE(cur, hp, new);
697 			}
698 		}
699 	}
700 	/*
701 	 * Add URI to new hash.
702 	 */
703 	hix = uri->hvalue;
704 	URI_HASH_IX(hix, new);
705 	hp = &uri_hash_ab[new][hix];
706 	mutex_enter(&hp->lock);
707 	uri->hash = hp->list;
708 	hp->list = uri;
709 	mutex_exit(&hp->lock);
710 	/*
711 	 * Last, check to see if last cur hash chain has been
712 	 * migrated, if so free cur hash and make new hash cur.
713 	 */
714 	if (uri_hash_cnt[cur] == 0) {
715 		/*
716 		 * If we don't already hold the uri_hash_access rwlock for
717 		 * RW_WRITE try to upgrade to RW_WRITE and if successful
718 		 * check again and to see if still need to do the free.
719 		 */
720 		if ((rwlock == RW_WRITER || rw_tryupgrade(&uri_hash_access)) &&
721 		    uri_hash_cnt[cur] == 0 && uri_hash_ab[new] != 0) {
722 			kmem_free(uri_hash_ab[cur],
723 			    sizeof (uri_hash_t) * uri_hash_sz[cur]);
724 			uri_hash_ab[cur] = NULL;
725 			uri_hash_lru[cur] = NULL;
726 			uri_hash_which = new;
727 		}
728 	}
729 	rw_exit(&uri_hash_access);
730 }
731 
732 /*
733  * Lookup a uri_desc_t in the URI hash, if found free the request uri_desc_t
734  * and return the found uri_desc_t with a REF_HOLD() placed on it. Else, if
735  * add B_TRUE use the request URI to create a new hash entry. Else if add
736  * B_FALSE ...
737  */
738 
739 static uri_desc_t *
740 uri_lookup(uri_desc_t *ruri, boolean_t add, boolean_t nonblocking)
741 {
742 	uint32_t	hix;
743 	uri_hash_t	*hp;
744 	uri_desc_t	*uri;
745 	uri_desc_t	*puri;
746 	uint32_t	cur;
747 	uint32_t	new;
748 	char		*rcp = ruri->path.cp;
749 	char		*rep = ruri->path.ep;
750 
751 again:
752 	rw_enter(&uri_hash_access, RW_READER);
753 	cur = uri_hash_which;
754 	new = cur ? 0 : 1;
755 nexthash:
756 	puri = NULL;
757 	hix = ruri->hvalue;
758 	URI_HASH_IX(hix, cur);
759 	hp = &uri_hash_ab[cur][hix];
760 	mutex_enter(&hp->lock);
761 	for (uri = hp->list; uri != NULL; uri = uri->hash) {
762 		char	*ap = uri->path.cp;
763 		char	*bp = rcp;
764 		char	a, b;
765 
766 		/* Compare paths */
767 		while (bp < rep && ap < uri->path.ep) {
768 			if ((a = *ap) == '%') {
769 				/* Escaped hex multichar, convert it */
770 				H2A(ap, uri->path.ep, a);
771 			}
772 			if ((b = *bp) == '%') {
773 				/* Escaped hex multichar, convert it */
774 				H2A(bp, rep, b);
775 			}
776 			if (a != b) {
777 				/* Char's don't match */
778 				goto nexturi;
779 			}
780 			ap++;
781 			bp++;
782 		}
783 		if (bp != rep || ap != uri->path.ep) {
784 			/* Not same length */
785 			goto nexturi;
786 		}
787 		ap = uri->auth.cp;
788 		bp = ruri->auth.cp;
789 		if (ap != NULL) {
790 			if (bp == NULL) {
791 				/* URI has auth request URI doesn't */
792 				goto nexturi;
793 			}
794 			while (bp < ruri->auth.ep && ap < uri->auth.ep) {
795 				if ((a = *ap) == '%') {
796 					/* Escaped hex multichar, convert it */
797 					H2A(ap, uri->path.ep, a);
798 				}
799 				if ((b = *bp) == '%') {
800 					/* Escaped hex multichar, convert it */
801 					H2A(bp, rep, b);
802 				}
803 				if (a != b) {
804 					/* Char's don't match */
805 					goto nexturi;
806 				}
807 				ap++;
808 				bp++;
809 			}
810 			if (bp != ruri->auth.ep || ap != uri->auth.ep) {
811 				/* Not same length */
812 				goto nexturi;
813 			}
814 		} else if (bp != NULL) {
815 			/* URI doesn't have auth and request URI does */
816 			goto nexturi;
817 		}
818 		/*
819 		 * Have a path/auth match so before any other processing
820 		 * of requested URI, check for expire or request no cache
821 		 * purge.
822 		 */
823 		if (uri->expire >= 0 && uri->expire <= lbolt || ruri->nocache) {
824 			/*
825 			 * URI has expired or request specified to not use
826 			 * the cached version, unlink the URI from the hash
827 			 * chain, release all locks, release the hash ref
828 			 * on the URI, and last look it up again.
829 			 *
830 			 * Note, this will cause all variants of the named
831 			 * URI to be purged.
832 			 */
833 			if (puri != NULL) {
834 				puri->hash = uri->hash;
835 			} else {
836 				hp->list = uri->hash;
837 			}
838 			mutex_exit(&hp->lock);
839 			atomic_add_32(&uri_hash_cnt[cur], -1);
840 			rw_exit(&uri_hash_access);
841 			if (ruri->nocache)
842 				nl7c_uri_purge++;
843 			else
844 				nl7c_uri_expire++;
845 			REF_RELE(uri);
846 			goto again;
847 		}
848 		if (uri->scheme != NULL) {
849 			/*
850 			 * URI has scheme private qualifier(s), if request
851 			 * URI doesn't or if no match skip this URI.
852 			 */
853 			if (ruri->scheme == NULL ||
854 			    ! nl7c_http_cmp(uri->scheme, ruri->scheme))
855 				goto nexturi;
856 		} else if (ruri->scheme != NULL) {
857 			/*
858 			 * URI doesn't have scheme private qualifiers but
859 			 * request URI does, no match, skip this URI.
860 			 */
861 			goto nexturi;
862 		}
863 		/*
864 		 * Have a match, ready URI for return, first put a reference
865 		 * hold on the URI, if this URI is currently being processed
866 		 * then have to wait for the processing to be completed and
867 		 * redo the lookup, else return it.
868 		 */
869 		REF_HOLD(uri);
870 		mutex_enter(&uri->proclock);
871 		if (uri->proc != NULL) {
872 			/* The URI is being processed, wait for completion */
873 			mutex_exit(&hp->lock);
874 			rw_exit(&uri_hash_access);
875 			if (! nonblocking &&
876 			    cv_wait_sig(&uri->waiting, &uri->proclock)) {
877 				/*
878 				 * URI has been processed but things may
879 				 * have changed while we were away so do
880 				 * most everything again.
881 				 */
882 				mutex_exit(&uri->proclock);
883 				REF_RELE(uri);
884 				goto again;
885 			} else {
886 				/*
887 				 * A nonblocking socket or an interrupted
888 				 * cv_wait_sig() in the first case can't
889 				 * block waiting for the processing of the
890 				 * uri hash hit uri to complete, in both
891 				 * cases just return failure to lookup.
892 				 */
893 				mutex_exit(&uri->proclock);
894 				REF_RELE(uri);
895 				return (NULL);
896 			}
897 		}
898 		mutex_exit(&uri->proclock);
899 		uri->hit++;
900 		mutex_exit(&hp->lock);
901 		rw_exit(&uri_hash_access);
902 		return (uri);
903 	nexturi:
904 		puri = uri;
905 	}
906 	mutex_exit(&hp->lock);
907 	if (cur != new && uri_hash_ab[new] != NULL) {
908 		/*
909 		 * Not found in current hash and have a new hash so
910 		 * check the new hash next.
911 		 */
912 		cur = new;
913 		goto nexthash;
914 	}
915 add:
916 	if (! add) {
917 		/* Lookup only so return failure */
918 		rw_exit(&uri_hash_access);
919 		return (NULL);
920 	}
921 	/*
922 	 * URI not hashed, finish intialization of the
923 	 * request URI, add it to the hash, return it.
924 	 */
925 	ruri->hit = 0;
926 	ruri->expire = -1;
927 	ruri->response.sz = 0;
928 	ruri->proc = (struct sonode *)~NULL;
929 	cv_init(&ruri->waiting, NULL, CV_DEFAULT, NULL);
930 	mutex_init(&ruri->proclock, NULL, MUTEX_DEFAULT, NULL);
931 	uri_add(ruri, RW_READER, nonblocking);
932 	/* uri_add() has done rw_exit(&uri_hash_access) */
933 	return (ruri);
934 }
935 
936 /*
937  * Reclaim URIs until max cache size threshold has been reached.
938  *
939  * A CLOCK based reclaim modified with a history (hit counter) counter.
940  */
941 
942 static void
943 nl7c_uri_reclaim(void)
944 {
945 	uri_hash_t	*hp, *start, *pend;
946 	uri_desc_t	*uri;
947 	uri_desc_t	*puri;
948 	uint32_t	cur;
949 	uint32_t	new;
950 
951 	nl7c_uri_reclaim_calls++;
952 again:
953 	rw_enter(&uri_hash_access, RW_WRITER);
954 	cur = uri_hash_which;
955 	new = cur ? 0 : 1;
956 next:
957 	hp = uri_hash_lru[cur];
958 	start = hp;
959 	pend = &uri_hash_ab[cur][uri_hash_sz[cur]];
960 	while (nl7c_uri_bytes > nl7c_uri_max) {
961 		puri = NULL;
962 		for (uri = hp->list; uri != NULL; uri = uri->hash) {
963 			if (uri->hit != 0) {
964 				/*
965 				 * Decrement URI activity counter and skip.
966 				 */
967 				uri->hit--;
968 				puri = uri;
969 				continue;
970 			}
971 			if (uri->proc != NULL) {
972 				/*
973 				 * Currently being processed by a socket, skip.
974 				 */
975 				continue;
976 			}
977 			/*
978 			 * Found a candidate, no hit(s) since added or last
979 			 * reclaim pass, unlink from it's hash chain, update
980 			 * lru scan pointer, drop lock, ref release it.
981 			 */
982 			URI_HASH_UNLINK(cur, new, hp, puri, uri);
983 			if (cur == uri_hash_which) {
984 				if (++hp == pend) {
985 					/* Wrap pointer */
986 					hp = uri_hash_ab[cur];
987 				}
988 				uri_hash_lru[cur] = hp;
989 			}
990 			rw_exit(&uri_hash_access);
991 			REF_RELE(uri);
992 			nl7c_uri_reclaim_cnt++;
993 			goto again;
994 		}
995 		if (++hp == pend) {
996 			/* Wrap pointer */
997 			hp = uri_hash_ab[cur];
998 		}
999 		if (hp == start) {
1000 			if (cur != new && uri_hash_ab[new] != NULL) {
1001 				/*
1002 				 * Done with the current hash and have a
1003 				 * new hash so check the new hash next.
1004 				 */
1005 				cur = new;
1006 				goto next;
1007 			}
1008 		}
1009 	}
1010 	rw_exit(&uri_hash_access);
1011 }
1012 
1013 /*
1014  * Called for a socket which is being freed prior to close, e.g. errored.
1015  */
1016 
1017 void
1018 nl7c_urifree(struct sonode *so)
1019 {
1020 	uri_desc_t *uri = (uri_desc_t *)so->so_nl7c_uri;
1021 
1022 	so->so_nl7c_uri = NULL;
1023 	if (uri->hash != URI_TEMP) {
1024 		uri_delete(uri);
1025 		mutex_enter(&uri->proclock);
1026 		uri->proc = NULL;
1027 		if (CV_HAS_WAITERS(&uri->waiting)) {
1028 			cv_broadcast(&uri->waiting);
1029 		}
1030 		mutex_exit(&uri->proclock);
1031 		nl7c_uri_free++;
1032 	} else {
1033 		/* No proclock as uri exclusively owned by so */
1034 		uri->proc = NULL;
1035 		nl7c_uri_temp_free++;
1036 	}
1037 	REF_RELE(uri);
1038 }
1039 
1040 /*
1041  * ...
1042  *
1043  *	< 0	need more data
1044  *
1045  *	  0	parse complete
1046  *
1047  *	> 0	parse error
1048  */
1049 
1050 volatile uint64_t nl7c_resp_pfail = 0;
1051 volatile uint64_t nl7c_resp_ntemp = 0;
1052 volatile uint64_t nl7c_resp_pass = 0;
1053 
1054 static int
1055 nl7c_resp_parse(struct sonode *so, uri_desc_t *uri, char *data, int sz)
1056 {
1057 	if (! nl7c_http_response(&data, &data[sz], uri, so)) {
1058 		if (data == NULL) {
1059 			/* Parse fail */
1060 			goto pfail;
1061 		}
1062 		/* More data */
1063 		data = NULL;
1064 	} else if (data == NULL) {
1065 		goto pass;
1066 	}
1067 	if (uri->hash != URI_TEMP && uri->nocache) {
1068 		/*
1069 		 * After response parse now no cache,
1070 		 * delete it from cache, wakeup any
1071 		 * waiters on this URI, make URI_TEMP.
1072 		 */
1073 		uri_delete(uri);
1074 		mutex_enter(&uri->proclock);
1075 		if (CV_HAS_WAITERS(&uri->waiting)) {
1076 			cv_broadcast(&uri->waiting);
1077 		}
1078 		mutex_exit(&uri->proclock);
1079 		uri->hash = URI_TEMP;
1080 		nl7c_uri_temp_mk++;
1081 	}
1082 	if (data == NULL) {
1083 		/* More data needed */
1084 		return (-1);
1085 	}
1086 	/* Success */
1087 	return (0);
1088 
1089 pfail:
1090 	nl7c_resp_pfail++;
1091 	return (EINVAL);
1092 
1093 pass:
1094 	nl7c_resp_pass++;
1095 	return (ENOTSUP);
1096 }
1097 
1098 /*
1099  * Called to sink application response data, the processing of the data
1100  * is the same for a cached or temp URI (i.e. a URI for which we aren't
1101  * going to cache the URI but want to parse it for detecting response
1102  * data end such that for a persistent connection we can parse the next
1103  * request).
1104  *
1105  * On return 0 is returned for sink success, > 0 on error, and < 0 on
1106  * no so URI (note, data not sinked).
1107  */
1108 
1109 int
1110 nl7c_data(struct sonode *so, uio_t *uio)
1111 {
1112 	uri_desc_t	*uri = (uri_desc_t *)so->so_nl7c_uri;
1113 	iovec_t		*iov;
1114 	int		cnt;
1115 	int		sz = uio->uio_resid;
1116 	char		*data, *alloc;
1117 	char		*bp;
1118 	uri_rd_t	*rdp;
1119 	boolean_t	first;
1120 	int		error, perror;
1121 
1122 	nl7c_uri_data++;
1123 
1124 	if (uri == NULL) {
1125 		/* Socket & NL7C out of sync, disable NL7C */
1126 		so->so_nl7c_flags = 0;
1127 		nl7c_uri_NULL1++;
1128 		return (-1);
1129 	}
1130 
1131 	if (so->so_nl7c_flags & NL7C_WAITWRITE) {
1132 		so->so_nl7c_flags &= ~NL7C_WAITWRITE;
1133 		first = B_TRUE;
1134 	} else {
1135 		first = B_FALSE;
1136 	}
1137 
1138 	alloc = kmem_alloc(sz, KM_SLEEP);
1139 	URI_RD_ADD(uri, rdp, sz, -1);
1140 	if (rdp == NULL) {
1141 		error = ENOMEM;
1142 		goto fail;
1143 	}
1144 
1145 	if (uri->hash != URI_TEMP && uri->count > nca_max_cache_size) {
1146 		uri_delete(uri);
1147 		uri->hash = URI_TEMP;
1148 	}
1149 	data = alloc;
1150 	alloc = NULL;
1151 	rdp->data.kmem = data;
1152 	atomic_add_64(&nl7c_uri_bytes, sz);
1153 
1154 	bp = data;
1155 	while (uio->uio_resid > 0) {
1156 		iov = uio->uio_iov;
1157 		if ((cnt = iov->iov_len) == 0) {
1158 			goto next;
1159 		}
1160 		cnt = MIN(cnt, uio->uio_resid);
1161 		error = xcopyin(iov->iov_base, bp, cnt);
1162 		if (error)
1163 			goto fail;
1164 
1165 		iov->iov_base += cnt;
1166 		iov->iov_len -= cnt;
1167 		uio->uio_resid -= cnt;
1168 		uio->uio_loffset += cnt;
1169 		bp += cnt;
1170 	next:
1171 		uio->uio_iov++;
1172 		uio->uio_iovcnt--;
1173 	}
1174 
1175 	/* Successfull sink of data, response parse the data */
1176 	perror = nl7c_resp_parse(so, uri, data, sz);
1177 
1178 	/* Send the data out the connection */
1179 	error = uri_rd_response(so, uri, rdp, first);
1180 	if (error)
1181 		goto fail;
1182 
1183 	/* Success */
1184 	if (perror == 0 &&
1185 	    ((uri->respclen == URI_LEN_NOVALUE &&
1186 	    uri->resplen == URI_LEN_NOVALUE) ||
1187 	    uri->count >= uri->resplen)) {
1188 		/*
1189 		 * No more data needed and no pending response
1190 		 * data or current data count >= response length
1191 		 * so close the URI processing for this so.
1192 		 */
1193 		nl7c_close(so);
1194 		if (! (so->so_nl7c_flags & NL7C_SOPERSIST)) {
1195 			/* Not a persistent connection */
1196 			so->so_nl7c_flags = 0;
1197 		}
1198 	}
1199 
1200 	return (0);
1201 
1202 fail:
1203 	if (alloc != NULL) {
1204 		kmem_free(alloc, sz);
1205 	}
1206 	so->so_nl7c_flags = 0;
1207 	nl7c_urifree(so);
1208 
1209 	return (error);
1210 }
1211 
1212 /*
1213  * Called to read data from file "*fp" at offset "*off" of length "*len"
1214  * for a maximum of "*max_rem" bytes.
1215  *
1216  * On success a pointer to the kmem_alloc()ed file data is returned, "*off"
1217  * and "*len" are updated for the acutal number of bytes read and "*max_rem"
1218  * is updated with the number of bytes remaining to be read.
1219  *
1220  * Else, "NULL" is returned.
1221  */
1222 
1223 static char *
1224 nl7c_readfile(file_t *fp, u_offset_t *off, int *len, int max, int *ret)
1225 {
1226 	vnode_t	*vp = fp->f_vnode;
1227 	int	flg = 0;
1228 	size_t	size = MIN(*len, max);
1229 	char	*data;
1230 	int	error;
1231 	uio_t	uio;
1232 	iovec_t	iov;
1233 
1234 	(void) VOP_RWLOCK(vp, flg, NULL);
1235 
1236 	if (*off > MAXOFFSET_T) {
1237 		VOP_RWUNLOCK(vp, flg, NULL);
1238 		*ret = EFBIG;
1239 		return (NULL);
1240 	}
1241 
1242 	if (*off + size > MAXOFFSET_T)
1243 		size = (ssize32_t)(MAXOFFSET_T - *off);
1244 
1245 	data = kmem_alloc(size, KM_SLEEP);
1246 
1247 	iov.iov_base = data;
1248 	iov.iov_len = size;
1249 	uio.uio_loffset = *off;
1250 	uio.uio_iov = &iov;
1251 	uio.uio_iovcnt = 1;
1252 	uio.uio_resid = size;
1253 	uio.uio_segflg = UIO_SYSSPACE;
1254 	uio.uio_llimit = MAXOFFSET_T;
1255 	uio.uio_fmode = fp->f_flag;
1256 
1257 	error = VOP_READ(vp, &uio, fp->f_flag, fp->f_cred, NULL);
1258 	VOP_RWUNLOCK(vp, flg, NULL);
1259 	*ret = error;
1260 	if (error) {
1261 		kmem_free(data, size);
1262 		return (NULL);
1263 	}
1264 	*len = size;
1265 	*off += size;
1266 	return (data);
1267 }
1268 
1269 /*
1270  * Called to sink application response sendfilev, as with nl7c_data() above
1271  * all the data will be processed by NL7C unless there's an error.
1272  */
1273 
1274 int
1275 nl7c_sendfilev(struct sonode *so, u_offset_t *fileoff, sendfilevec_t *sfvp,
1276 	int sfvc, ssize_t *xfer)
1277 {
1278 	uri_desc_t	*uri = (uri_desc_t *)so->so_nl7c_uri;
1279 	file_t		*fp = NULL;
1280 	vnode_t		*vp = NULL;
1281 	char		*data = NULL;
1282 	u_offset_t	off;
1283 	int		len;
1284 	int		cnt;
1285 	int		total_count = 0;
1286 	char		*alloc;
1287 	uri_rd_t	*rdp;
1288 	int		max;
1289 	int		perror;
1290 	int		error = 0;
1291 	boolean_t	first = B_TRUE;
1292 
1293 	nl7c_uri_sendfilev++;
1294 
1295 	if (uri == NULL) {
1296 		/* Socket & NL7C out of sync, disable NL7C */
1297 		so->so_nl7c_flags = 0;
1298 		nl7c_uri_NULL2++;
1299 		return (0);
1300 	}
1301 
1302 	if (so->so_nl7c_flags & NL7C_WAITWRITE)
1303 		so->so_nl7c_flags &= ~NL7C_WAITWRITE;
1304 
1305 	while (sfvc-- > 0) {
1306 		/*
1307 		 * off - the current sfv read file offset or user address.
1308 		 *
1309 		 * len - the current sfv length in bytes.
1310 		 *
1311 		 * cnt - number of bytes kmem_alloc()ed.
1312 		 *
1313 		 * alloc - the kmem_alloc()ed buffer of size "cnt".
1314 		 *
1315 		 * data - copy of "alloc" used for post alloc references.
1316 		 *
1317 		 * fp - the current sfv file_t pointer.
1318 		 *
1319 		 * vp - the current "*vp" vnode_t pointer.
1320 		 *
1321 		 * Note, for "data" and "fp" and "vp" a NULL value is used
1322 		 * when not allocated such that the common failure path "fail"
1323 		 * is used.
1324 		 */
1325 		off = sfvp->sfv_off;
1326 		len = sfvp->sfv_len;
1327 		cnt = len;
1328 
1329 		if (len == 0) {
1330 			sfvp++;
1331 			continue;
1332 		}
1333 
1334 		if (sfvp->sfv_fd == SFV_FD_SELF) {
1335 			/*
1336 			 * User memory, copyin() all the bytes.
1337 			 */
1338 			alloc = kmem_alloc(cnt, KM_SLEEP);
1339 			error = xcopyin((caddr_t)(uintptr_t)off, alloc, cnt);
1340 			if (error)
1341 				goto fail;
1342 		} else {
1343 			/*
1344 			 * File descriptor, prefetch some bytes.
1345 			 */
1346 			if ((fp = getf(sfvp->sfv_fd)) == NULL) {
1347 				error = EBADF;
1348 				goto fail;
1349 			}
1350 			if ((fp->f_flag & FREAD) == 0) {
1351 				error = EACCES;
1352 				goto fail;
1353 			}
1354 			vp = fp->f_vnode;
1355 			if (vp->v_type != VREG) {
1356 				error = EINVAL;
1357 				goto fail;
1358 			}
1359 			VN_HOLD(vp);
1360 
1361 			/* Read max_rem bytes from file for prefetch */
1362 			if (nl7c_use_kmem) {
1363 				max = cnt;
1364 			} else {
1365 				max = MAXBSIZE * nl7c_file_prefetch;
1366 			}
1367 			alloc = nl7c_readfile(fp, &off, &cnt, max, &error);
1368 			if (alloc == NULL)
1369 				goto fail;
1370 
1371 			releasef(sfvp->sfv_fd);
1372 			fp = NULL;
1373 		}
1374 		URI_RD_ADD(uri, rdp, cnt, -1);
1375 		if (rdp == NULL) {
1376 			error = ENOMEM;
1377 			goto fail;
1378 		}
1379 		data = alloc;
1380 		alloc = NULL;
1381 		rdp->data.kmem = data;
1382 		total_count += cnt;
1383 		if (uri->hash != URI_TEMP && total_count > nca_max_cache_size) {
1384 			uri_delete(uri);
1385 			uri->hash = URI_TEMP;
1386 		}
1387 
1388 		/* Response parse */
1389 		perror = nl7c_resp_parse(so, uri, data, len);
1390 
1391 		/* Send kmem data out the connection */
1392 		error = uri_rd_response(so, uri, rdp, first);
1393 
1394 		if (error)
1395 			goto fail;
1396 
1397 		if (sfvp->sfv_fd != SFV_FD_SELF) {
1398 			/*
1399 			 * File descriptor, if any bytes left save vnode_t.
1400 			 */
1401 			if (len > cnt) {
1402 				/* More file data so add it */
1403 				URI_RD_ADD(uri, rdp, len - cnt, off);
1404 				if (rdp == NULL) {
1405 					error = ENOMEM;
1406 					goto fail;
1407 				}
1408 				rdp->data.vnode = vp;
1409 
1410 				/* Send vnode data out the connection */
1411 				error = uri_rd_response(so, uri, rdp, first);
1412 			} else {
1413 				/* All file data fit in the prefetch */
1414 				VN_RELE(vp);
1415 			}
1416 			*fileoff += len;
1417 			vp = NULL;
1418 		}
1419 		*xfer += len;
1420 		sfvp++;
1421 
1422 		if (first)
1423 			first = B_FALSE;
1424 	}
1425 	if (total_count > 0) {
1426 		atomic_add_64(&nl7c_uri_bytes, total_count);
1427 	}
1428 	if (perror == 0 &&
1429 	    ((uri->respclen == URI_LEN_NOVALUE &&
1430 	    uri->resplen == URI_LEN_NOVALUE) ||
1431 	    uri->count >= uri->resplen)) {
1432 		/*
1433 		 * No more data needed and no pending response
1434 		 * data or current data count >= response length
1435 		 * so close the URI processing for this so.
1436 		 */
1437 		nl7c_close(so);
1438 		if (! (so->so_nl7c_flags & NL7C_SOPERSIST)) {
1439 			/* Not a persistent connection */
1440 			so->so_nl7c_flags = 0;
1441 		}
1442 	}
1443 
1444 	return (0);
1445 
1446 fail:
1447 	if (alloc != NULL)
1448 		kmem_free(data, len);
1449 
1450 	if (vp != NULL)
1451 		VN_RELE(vp);
1452 
1453 	if (fp != NULL)
1454 		releasef(sfvp->sfv_fd);
1455 
1456 	if (total_count > 0) {
1457 		atomic_add_64(&nl7c_uri_bytes, total_count);
1458 	}
1459 
1460 	so->so_nl7c_flags = 0;
1461 	nl7c_urifree(so);
1462 
1463 	return (error);
1464 }
1465 
1466 /*
1467  * Called for a socket which is closing or when an application has
1468  * completed sending all the response data (i.e. for a persistent
1469  * connection called once for each completed application response).
1470  */
1471 
1472 void
1473 nl7c_close(struct sonode *so)
1474 {
1475 	uri_desc_t *uri = (uri_desc_t *)so->so_nl7c_uri;
1476 
1477 	if (uri == NULL) {
1478 		/*
1479 		 * No URI being processed so might be a listen()er
1480 		 * if so do any cleanup, else nothing more to do.
1481 		 */
1482 		if (so->so_state & SS_ACCEPTCONN) {
1483 			(void) nl7c_close_addr(so);
1484 		}
1485 		return;
1486 	}
1487 	so->so_nl7c_uri = NULL;
1488 	if (uri->hash != URI_TEMP) {
1489 		mutex_enter(&uri->proclock);
1490 		uri->proc = NULL;
1491 		if (CV_HAS_WAITERS(&uri->waiting)) {
1492 			cv_broadcast(&uri->waiting);
1493 		}
1494 		mutex_exit(&uri->proclock);
1495 		nl7c_uri_close++;
1496 	} else {
1497 		/* No proclock as uri exclusively owned by so */
1498 		uri->proc = NULL;
1499 		nl7c_uri_temp_close++;
1500 	}
1501 	REF_RELE(uri);
1502 	if (nl7c_uri_max > 0 && nl7c_uri_bytes > nl7c_uri_max) {
1503 		nl7c_uri_reclaim();
1504 	}
1505 }
1506 
1507 /*
1508  * The uri_segmap_t ref_t inactive function called on the last REF_RELE(),
1509  * release the segmap mapping. Note, the uri_segmap_t will be freed by
1510  * REF_RELE() on return.
1511  */
1512 
1513 void
1514 uri_segmap_inactive(uri_segmap_t *smp)
1515 {
1516 	if (!segmap_kpm) {
1517 		(void) segmap_fault(kas.a_hat, segkmap, smp->base,
1518 		    smp->len, F_SOFTUNLOCK, S_OTHER);
1519 	}
1520 	(void) segmap_release(segkmap, smp->base, SM_DONTNEED);
1521 	VN_RELE(smp->vp);
1522 }
1523 
1524 /*
1525  * The call-back for desballoc()ed mblk_t's, if a segmap mapped mblk_t
1526  * release the reference, one per desballoc() of a segmap page, if a rd_t
1527  * mapped mblk_t release the reference, one per desballoc() of a uri_desc_t,
1528  * last kmem free the uri_desb_t.
1529  */
1530 
1531 static void
1532 uri_desb_free(uri_desb_t *desb)
1533 {
1534 	if (desb->segmap != NULL) {
1535 		REF_RELE(desb->segmap);
1536 	}
1537 	REF_RELE(desb->uri);
1538 	kmem_cache_free(uri_desb_kmc, desb);
1539 }
1540 
1541 /*
1542  * Segmap map up to a page of a uri_rd_t file descriptor.
1543  */
1544 
1545 uri_segmap_t *
1546 uri_segmap_map(uri_rd_t *rdp, int bytes)
1547 {
1548 	uri_segmap_t	*segmap = kmem_cache_alloc(uri_segmap_kmc, KM_SLEEP);
1549 	int		len = MIN(rdp->sz, MAXBSIZE);
1550 
1551 	if (len > bytes)
1552 		len = bytes;
1553 
1554 	REF_INIT(segmap, 1, uri_segmap_inactive, uri_segmap_kmc);
1555 	segmap->len = len;
1556 	VN_HOLD(rdp->data.vnode);
1557 	segmap->vp = rdp->data.vnode;
1558 
1559 	segmap->base = segmap_getmapflt(segkmap, segmap->vp, rdp->off, len,
1560 	    segmap_kpm ? SM_FAULT : 0, S_READ);
1561 
1562 	if (segmap_fault(kas.a_hat, segkmap, segmap->base, len,
1563 	    F_SOFTLOCK, S_READ) != 0) {
1564 		REF_RELE(segmap);
1565 		return (NULL);
1566 	}
1567 	return (segmap);
1568 }
1569 
1570 /*
1571  * Chop up the kernel virtual memory area *data of size *sz bytes for
1572  * a maximum of *bytes bytes into an besballoc()ed mblk_t chain using
1573  * the given template uri_desb_t *temp of max_mblk bytes per.
1574  *
1575  * The values of *data, *sz, and *bytes are updated on return, the
1576  * mblk_t chain is returned.
1577  */
1578 
1579 static mblk_t *
1580 uri_desb_chop(
1581 	char 		**data,
1582 	size_t		*sz,
1583 	int 		*bytes,
1584 	uri_desb_t 	*temp,
1585 	int		max_mblk,
1586 	char		*eoh,
1587 	mblk_t		*persist
1588 )
1589 {
1590 	char		*ldata = *data;
1591 	size_t		lsz = *sz;
1592 	int		lbytes = bytes ? *bytes : lsz;
1593 	uri_desb_t	*desb;
1594 	mblk_t		*mp = NULL;
1595 	mblk_t		*nmp, *pmp = NULL;
1596 	int		msz;
1597 
1598 	if (lbytes == 0 && lsz == 0)
1599 		return (NULL);
1600 
1601 	while (lbytes > 0 && lsz > 0) {
1602 		msz = MIN(lbytes, max_mblk);
1603 		msz = MIN(msz, lsz);
1604 		if (persist && eoh >= ldata && eoh < &ldata[msz]) {
1605 			msz = (eoh - ldata);
1606 			pmp = persist;
1607 			persist = NULL;
1608 			if (msz == 0) {
1609 				nmp = pmp;
1610 				pmp = NULL;
1611 				goto zero;
1612 			}
1613 		}
1614 		desb = kmem_cache_alloc(uri_desb_kmc, KM_SLEEP);
1615 		REF_HOLD(temp->uri);
1616 		if (temp->segmap) {
1617 			REF_HOLD(temp->segmap);
1618 		}
1619 		bcopy(temp, desb, sizeof (*desb));
1620 		desb->frtn.free_arg = (caddr_t)desb;
1621 		nmp = desballoc((uchar_t *)ldata, msz, BPRI_HI, &desb->frtn);
1622 		if (nmp == NULL) {
1623 			if (temp->segmap) {
1624 				REF_RELE(temp->segmap);
1625 			}
1626 			REF_RELE(temp->uri);
1627 			if (mp != NULL) {
1628 				mp->b_next = NULL;
1629 				freemsg(mp);
1630 			}
1631 			if (persist != NULL) {
1632 				freeb(persist);
1633 			}
1634 			return (NULL);
1635 		}
1636 		nmp->b_wptr += msz;
1637 	zero:
1638 		if (mp != NULL) {
1639 			mp->b_next->b_cont = nmp;
1640 		} else {
1641 			mp = nmp;
1642 		}
1643 		if (pmp != NULL) {
1644 			nmp->b_cont = pmp;
1645 			nmp = pmp;
1646 			pmp = NULL;
1647 		}
1648 		mp->b_next = nmp;
1649 		ldata += msz;
1650 		lsz -= msz;
1651 		lbytes -= msz;
1652 	}
1653 	*data = ldata;
1654 	*sz = lsz;
1655 	if (bytes)
1656 		*bytes = lbytes;
1657 	return (mp);
1658 }
1659 
1660 /*
1661  * Experimential noqwait (i.e. no canput()/qwait() checks), just send
1662  * the entire mblk_t chain down without flow-control checks.
1663  */
1664 
1665 static int
1666 kstrwritempnoqwait(struct vnode *vp, mblk_t *mp)
1667 {
1668 	struct stdata *stp;
1669 	int error = 0;
1670 
1671 	ASSERT(vp->v_stream);
1672 	stp = vp->v_stream;
1673 
1674 	/* Fast check of flags before acquiring the lock */
1675 	if (stp->sd_flag & (STWRERR|STRHUP|STPLEX)) {
1676 		mutex_enter(&stp->sd_lock);
1677 		error = strgeterr(stp, STWRERR|STRHUP|STPLEX, 0);
1678 		mutex_exit(&stp->sd_lock);
1679 		if (error != 0) {
1680 			if (!(stp->sd_flag & STPLEX) &&
1681 			    (stp->sd_wput_opt & SW_SIGPIPE)) {
1682 				tsignal(curthread, SIGPIPE);
1683 				error = EPIPE;
1684 			}
1685 			return (error);
1686 		}
1687 	}
1688 	putnext(stp->sd_wrq, mp);
1689 	return (0);
1690 }
1691 
1692 /*
1693  * Send the URI uri_desc_t *uri response uri_rd_t *rdp out the socket_t *so.
1694  */
1695 
1696 static int
1697 uri_rd_response(struct sonode *so,
1698     uri_desc_t *uri,
1699     uri_rd_t *rdp,
1700     boolean_t first)
1701 {
1702 	vnode_t		*vp = SOTOV(so);
1703 	int		max_mblk = (int)((tcp_t *)so->so_priv)->tcp_mss;
1704 	int		wsz;
1705 	mblk_t		*mp, *wmp, *persist;
1706 	int		write_bytes;
1707 	uri_rd_t	rd;
1708 	uri_desb_t	desb;
1709 	uri_segmap_t	*segmap = NULL;
1710 	char		*segmap_data;
1711 	size_t		segmap_sz;
1712 	int		error;
1713 	int		fflg = ((so->so_state & SS_NDELAY) ? FNDELAY : 0) |
1714 	    ((so->so_state & SS_NONBLOCK) ? FNONBLOCK : 0);
1715 
1716 
1717 	/* Initialize template uri_desb_t */
1718 	desb.frtn.free_func = uri_desb_free;
1719 	desb.frtn.free_arg = NULL;
1720 	desb.uri = uri;
1721 
1722 	/* Get a local copy of the rd_t */
1723 	bcopy(rdp, &rd, sizeof (rd));
1724 	do {
1725 		if (first) {
1726 			/*
1727 			 * For first kstrwrite() enough data to get
1728 			 * things going, note non blocking version of
1729 			 * kstrwrite() will be used below.
1730 			 */
1731 			write_bytes = P2ROUNDUP((max_mblk * 4),
1732 			    MAXBSIZE * nl7c_file_prefetch);
1733 		} else {
1734 			if ((write_bytes = so->so_sndbuf) == 0)
1735 				write_bytes = vp->v_stream->sd_qn_maxpsz;
1736 			ASSERT(write_bytes > 0);
1737 			write_bytes = P2ROUNDUP(write_bytes, MAXBSIZE);
1738 		}
1739 		/*
1740 		 * Chop up to a write_bytes worth of data.
1741 		 */
1742 		wmp = NULL;
1743 		wsz = write_bytes;
1744 		do {
1745 			if (rd.sz == 0)
1746 				break;
1747 			if (rd.off == -1) {
1748 				if (uri->eoh >= rd.data.kmem &&
1749 				    uri->eoh < &rd.data.kmem[rd.sz]) {
1750 					persist = nl7c_http_persist(so);
1751 				} else {
1752 					persist = NULL;
1753 				}
1754 				desb.segmap = NULL;
1755 				mp = uri_desb_chop(&rd.data.kmem, &rd.sz,
1756 				    &wsz, &desb, max_mblk, uri->eoh, persist);
1757 				if (mp == NULL) {
1758 					error = ENOMEM;
1759 					goto invalidate;
1760 				}
1761 			} else {
1762 				if (segmap == NULL) {
1763 					segmap = uri_segmap_map(&rd,
1764 					    write_bytes);
1765 					if (segmap == NULL) {
1766 						error = ENOMEM;
1767 						goto invalidate;
1768 					}
1769 					desb.segmap = segmap;
1770 					segmap_data = segmap->base;
1771 					segmap_sz = segmap->len;
1772 				}
1773 				mp = uri_desb_chop(&segmap_data, &segmap_sz,
1774 				    &wsz, &desb, max_mblk, NULL, NULL);
1775 				if (mp == NULL) {
1776 					error = ENOMEM;
1777 					goto invalidate;
1778 				}
1779 				if (segmap_sz == 0) {
1780 					rd.sz -= segmap->len;
1781 					rd.off += segmap->len;
1782 					REF_RELE(segmap);
1783 					segmap = NULL;
1784 				}
1785 			}
1786 			if (wmp == NULL) {
1787 				wmp = mp;
1788 			} else {
1789 				wmp->b_next->b_cont = mp;
1790 				wmp->b_next = mp->b_next;
1791 				mp->b_next = NULL;
1792 			}
1793 		} while (wsz > 0 && rd.sz > 0);
1794 
1795 		wmp->b_next = NULL;
1796 		if (first) {
1797 			/* First kstrwrite(), use noqwait */
1798 			if ((error = kstrwritempnoqwait(vp, wmp)) != 0)
1799 				goto invalidate;
1800 			/*
1801 			 * For the rest of the kstrwrite()s use SO_SNDBUF
1802 			 * worth of data at a time, note these kstrwrite()s
1803 			 * may (will) block one or more times.
1804 			 */
1805 			first = B_FALSE;
1806 		} else {
1807 			if ((error = kstrwritemp(vp, wmp, fflg)) != 0) {
1808 				if (error == EAGAIN) {
1809 					nl7c_uri_rd_EAGAIN++;
1810 					if ((error =
1811 					    kstrwritempnoqwait(vp, wmp)) != 0)
1812 						goto invalidate;
1813 				} else
1814 					goto invalidate;
1815 			}
1816 		}
1817 	} while (rd.sz > 0);
1818 
1819 	return (0);
1820 
1821 invalidate:
1822 	if (segmap) {
1823 		REF_RELE(segmap);
1824 	}
1825 	if (wmp)
1826 		freemsg(wmp);
1827 
1828 	return (error);
1829 }
1830 
1831 /*
1832  * Send the URI uri_desc_t *uri response out the socket_t *so.
1833  */
1834 
1835 static int
1836 uri_response(struct sonode *so, uri_desc_t *uri)
1837 {
1838 	uri_rd_t	*rdp = &uri->response;
1839 	boolean_t	first = B_TRUE;
1840 	int		error;
1841 
1842 	while (rdp != NULL) {
1843 		error = uri_rd_response(so, uri, rdp, first);
1844 		if (error != 0) {
1845 			goto invalidate;
1846 		}
1847 		first = B_FALSE;
1848 		rdp = rdp->next;
1849 	}
1850 	return (0);
1851 
1852 invalidate:
1853 	uri_delete(uri);
1854 	return (error);
1855 }
1856 
1857 /*
1858  * The pchars[] array is indexed by a char to determine if it's a
1859  * valid URI path component chararcter where:
1860  *
1861  *    pchar       = unreserved | escaped |
1862  *                  ":" | "@" | "&" | "=" | "+" | "$" | ","
1863  *
1864  *    unreserved  = alphanum | mark
1865  *
1866  *    alphanum    = alpha | digit
1867  *
1868  *    alpha       = lowalpha | upalpha
1869  *
1870  *    lowalpha    = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" |
1871  *                  "i" | "j" | "k" | "l" | "m" | "n" | "o" | "p" |
1872  *                  "q" | "r" | "s" | "t" | "u" | "v" | "w" | "x" |
1873  *                  "y" | "z"
1874  *
1875  *    upalpha     = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" |
1876  *                  "I" | "J" | "K" | "L" | "M" | "N" | "O" | "P" |
1877  *                  "Q" | "R" | "S" | "T" | "U" | "V" | "W" | "X" |
1878  *                  "Y" | "Z"
1879  *
1880  *    digit       = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" |
1881  *                  "8" | "9"
1882  *
1883  *    mark        = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
1884  *
1885  *    escaped     = "%" hex hex
1886  *    hex         = digit | "A" | "B" | "C" | "D" | "E" | "F" |
1887  *                  "a" | "b" | "c" | "d" | "e" | "f"
1888  */
1889 
1890 static char pchars[] = {
1891     0, 0, 0, 0, 0, 0, 0, 0,	/* 0x00 - 0x07 */
1892     0, 0, 0, 0, 0, 0, 0, 0,	/* 0x08 - 0x0F */
1893     0, 0, 0, 0, 0, 0, 0, 0,	/* 0x10 - 0x17 */
1894     0, 0, 0, 0, 0, 0, 0, 0,	/* 0x18 - 0x1F */
1895     0, 1, 0, 0, 1, 1, 1, 1,	/* 0x20 - 0x27 */
1896     0, 0, 1, 1, 1, 1, 1, 1,	/* 0x28 - 0x2F */
1897     1, 1, 1, 1, 1, 1, 1, 1,	/* 0x30 - 0x37 */
1898     1, 1, 1, 0, 0, 1, 0, 0,	/* 0x38 - 0x3F */
1899     1, 1, 1, 1, 1, 1, 1, 1,	/* 0x40 - 0x47 */
1900     1, 1, 1, 1, 1, 1, 1, 1,	/* 0x48 - 0x4F */
1901     1, 1, 1, 1, 1, 1, 1, 1,	/* 0x50 - 0x57 */
1902     1, 1, 1, 0, 0, 0, 0, 1,	/* 0x58 - 0x5F */
1903     0, 1, 1, 1, 1, 1, 1, 1,	/* 0x60 - 0x67 */
1904     1, 1, 1, 1, 1, 1, 1, 1,	/* 0x68 - 0x6F */
1905     1, 1, 1, 1, 1, 1, 1, 1,	/* 0x70 - 0x77 */
1906     1, 1, 1, 0, 0, 0, 1, 0	/* 0x78 - 0x7F */
1907 };
1908 
1909 #define	PCHARS_MASK 0x7F
1910 
1911 /*
1912  * This is the main L7 request message parse, we are called each time
1913  * new data is availble for a socket, each time a single buffer of the
1914  * entire message to date is given.
1915  *
1916  * Here we parse the request looking for the URI, parse it, and if a
1917  * supported scheme call the scheme parser to commplete the parse of any
1918  * headers which may further qualify the identity of the requested object
1919  * then lookup it up in the URI hash.
1920  *
1921  * Return B_TRUE for more processing.
1922  *
1923  * Note, at this time the parser supports the generic message format as
1924  * specified in RFC 822 with potentional limitations as specified in RFC
1925  * 2616 for HTTP messages.
1926  *
1927  * Note, the caller supports an mblk_t chain, for now the parser(s)
1928  * require the complete header in a single mblk_t. This is the common
1929  * case and certainly for high performance environments, if at a future
1930  * date mblk_t chains are important the parse can be reved to process
1931  * mblk_t chains.
1932  */
1933 
1934 boolean_t
1935 nl7c_parse(struct sonode *so, boolean_t nonblocking, boolean_t *ret)
1936 {
1937 	char	*cp = (char *)so->so_nl7c_rcv_mp->b_rptr;
1938 	char	*ep = (char *)so->so_nl7c_rcv_mp->b_wptr;
1939 	char	*get = "GET ";
1940 	char	*post = "POST ";
1941 	char	c;
1942 	char	*uris;
1943 	uri_desc_t *uri = NULL;
1944 	uri_desc_t *ruri = NULL;
1945 	mblk_t	*reqmp;
1946 	uint32_t hv = 0;
1947 
1948 	if ((reqmp = dupb(so->so_nl7c_rcv_mp)) == NULL) {
1949 		nl7c_uri_pass_dupbfail++;
1950 		goto pass;
1951 	}
1952 	/*
1953 	 * Allocate and initialize minimumal state for the request
1954 	 * uri_desc_t, in the cache hit case this uri_desc_t will
1955 	 * be freed.
1956 	 */
1957 	uri = kmem_cache_alloc(nl7c_uri_kmc, KM_SLEEP);
1958 	REF_INIT(uri, 1, nl7c_uri_inactive, nl7c_uri_kmc);
1959 	uri->hash = NULL;
1960 	uri->tail = NULL;
1961 	uri->scheme = NULL;
1962 	uri->count = 0;
1963 	uri->reqmp = reqmp;
1964 
1965 	/*
1966 	 * Set request time to current time.
1967 	 */
1968 	so->so_nl7c_rtime = gethrestime_sec();
1969 
1970 	/*
1971 	 * Parse the Request-Line for the URI.
1972 	 *
1973 	 * For backwards HTTP version compatable reasons skip any leading
1974 	 * CRLF (or CR or LF) line terminator(s) preceding Request-Line.
1975 	 */
1976 	while (cp < ep && (*cp == '\r' || *cp == '\n')) {
1977 		cp++;
1978 	}
1979 	while (cp < ep && *get == *cp) {
1980 		get++;
1981 		cp++;
1982 	}
1983 	if (*get != 0) {
1984 		/* Note a "GET", check for "POST" */
1985 		while (cp < ep && *post == *cp) {
1986 			post++;
1987 			cp++;
1988 		}
1989 		if (*post != 0) {
1990 			if (cp == ep) {
1991 				nl7c_uri_more_get++;
1992 				goto more;
1993 			}
1994 			/* Not a "GET" or a "POST", just pass */
1995 			nl7c_uri_pass_method++;
1996 			goto pass;
1997 		}
1998 		/* "POST", don't cache but still may want to parse */
1999 		uri->hash = URI_TEMP;
2000 	}
2001 	/*
2002 	 * Skip over URI path char(s) and save start and past end pointers.
2003 	 */
2004 	uris = cp;
2005 	while (cp < ep && (c = *cp) != ' ' && c != '\r') {
2006 		if (c == '?') {
2007 			/* Don't cache but still may want to parse */
2008 			uri->hash = URI_TEMP;
2009 		}
2010 		CHASH(hv, c);
2011 		cp++;
2012 	}
2013 	if (c != '\r' && cp == ep) {
2014 		nl7c_uri_more_eol++;
2015 		goto more;
2016 	}
2017 	/*
2018 	 * Request-Line URI parsed, pass the rest of the request on
2019 	 * to the the http scheme parse.
2020 	 */
2021 	uri->path.cp = uris;
2022 	uri->path.ep = cp;
2023 	uri->hvalue = hv;
2024 	if (! nl7c_http_request(&cp, ep, uri, so) || cp == NULL) {
2025 		/*
2026 		 * Parse not successful or pass on request, the pointer
2027 		 * to the parse pointer "cp" is overloaded such that ! NULL
2028 		 * for more data and NULL for bad parse of request or pass.
2029 		 */
2030 		if (cp != NULL) {
2031 			nl7c_uri_more_http++;
2032 			goto more;
2033 		}
2034 		nl7c_uri_pass_http++;
2035 		goto pass;
2036 	}
2037 	if (uri->nocache) {
2038 		uri->hash = URI_TEMP;
2039 		(void) uri_lookup(uri, B_FALSE, nonblocking);
2040 	} else if (uri->hash == URI_TEMP) {
2041 		uri->nocache = B_TRUE;
2042 		(void) uri_lookup(uri, B_FALSE, nonblocking);
2043 	}
2044 
2045 	if (uri->hash == URI_TEMP) {
2046 		if (so->so_nl7c_flags & NL7C_SOPERSIST) {
2047 			/* Temporary URI so skip hash processing */
2048 			nl7c_uri_request++;
2049 			nl7c_uri_temp++;
2050 			goto temp;
2051 		}
2052 		/* Not persistent so not interested in the response */
2053 		nl7c_uri_pass_temp++;
2054 		goto pass;
2055 	}
2056 	/*
2057 	 * Check the URI hash for a cached response, save the request
2058 	 * uri in case we need it below.
2059 	 */
2060 	ruri = uri;
2061 	if ((uri = uri_lookup(uri, B_TRUE, nonblocking)) == NULL) {
2062 		/*
2063 		 * Failed to lookup due to nonblocking wait required,
2064 		 * interrupted cv_wait_sig(), KM_NOSLEEP memory alloc
2065 		 * failure, ... Just pass on this request.
2066 		 */
2067 		nl7c_uri_pass_addfail++;
2068 		goto pass;
2069 	}
2070 	nl7c_uri_request++;
2071 	if (uri->response.sz > 0) {
2072 		/*
2073 		 * We have the response cached, update recv mblk rptr
2074 		 * to reflect the data consumed in parse.
2075 		 */
2076 		mblk_t	*mp = so->so_nl7c_rcv_mp;
2077 
2078 		if (cp == (char *)mp->b_wptr) {
2079 			so->so_nl7c_rcv_mp = mp->b_cont;
2080 			mp->b_cont = NULL;
2081 			freeb(mp);
2082 		} else {
2083 			mp->b_rptr = (unsigned char *)cp;
2084 		}
2085 		nl7c_uri_hit++;
2086 		/* If conditional request check for substitute response */
2087 		if (ruri->conditional) {
2088 			uri = nl7c_http_cond(ruri, uri);
2089 		}
2090 		/* If logging enabled log request */
2091 		if (nl7c_logd_enabled) {
2092 			ipaddr_t faddr;
2093 
2094 			if (so->so_family == AF_INET) {
2095 				/* Only support IPv4 addrs */
2096 				faddr = ((struct sockaddr_in *)
2097 				    so->so_faddr_sa) ->sin_addr.s_addr;
2098 			} else {
2099 				faddr = 0;
2100 			}
2101 			/* XXX need to pass response type, e.g. 200, 304 */
2102 			nl7c_logd_log(ruri, uri, so->so_nl7c_rtime, faddr);
2103 		}
2104 		/*
2105 		 * Release reference on request URI, send the response out
2106 		 * the socket, release reference on response uri, set the
2107 		 * *ret value to B_TRUE to indicate request was consumed
2108 		 * then return B_FALSE to indcate no more data needed.
2109 		 */
2110 		REF_RELE(ruri);
2111 		(void) uri_response(so, uri);
2112 		REF_RELE(uri);
2113 		*ret = B_TRUE;
2114 		return (B_FALSE);
2115 	}
2116 	/*
2117 	 * Miss the cache, the request URI is in the cache waiting for
2118 	 * application write-side data to fill it.
2119 	 */
2120 	nl7c_uri_miss++;
2121 temp:
2122 	/*
2123 	 * A miss or temp URI for which response data is needed, link
2124 	 * uri to so and so to uri, set WAITWRITE in the so such that
2125 	 * read-side processing is suspended (so the next read() gets
2126 	 * the request data) until a write() is processed by NL7C.
2127 	 *
2128 	 * Note, so->so_nl7c_uri now owns the REF_INIT() ref.
2129 	 */
2130 	uri->proc = so;
2131 	so->so_nl7c_uri = uri;
2132 	so->so_nl7c_flags |= NL7C_WAITWRITE;
2133 	*ret = B_FALSE;
2134 	return (B_FALSE);
2135 
2136 more:
2137 	/* More data is needed, note fragmented recv not supported */
2138 	nl7c_uri_more++;
2139 
2140 pass:
2141 	/* Pass on this request */
2142 	nl7c_uri_pass++;
2143 	nl7c_uri_request++;
2144 	if (ruri != NULL) {
2145 		REF_RELE(ruri);
2146 	}
2147 	if (uri) {
2148 		REF_RELE(uri);
2149 	}
2150 	so->so_nl7c_flags = 0;
2151 	*ret = B_FALSE;
2152 	return (B_FALSE);
2153 }
2154