xref: /titanic_50/usr/src/uts/common/fs/sockfs/nl7curi.c (revision 2df1fe9ca32bb227b9158c67f5c00b54c20b10fd)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <sys/strsubr.h>
29 #include <sys/strsun.h>
30 #include <sys/param.h>
31 #include <sys/sysmacros.h>
32 #include <vm/seg_map.h>
33 #include <vm/seg_kpm.h>
34 #include <sys/condvar_impl.h>
35 #include <sys/sendfile.h>
36 #include <fs/sockfs/nl7c.h>
37 #include <fs/sockfs/nl7curi.h>
38 
39 #include <inet/common.h>
40 #include <inet/ip.h>
41 #include <inet/ip6.h>
42 #include <inet/tcp.h>
43 #include <inet/led.h>
44 #include <inet/mi.h>
45 
46 #include <inet/nca/ncadoorhdr.h>
47 #include <inet/nca/ncalogd.h>
48 #include <inet/nca/ncandd.h>
49 
50 #include <sys/promif.h>
51 
52 /*
53  * Some externs:
54  */
55 
56 extern boolean_t	nl7c_logd_enabled;
57 extern void		nl7c_logd_log(uri_desc_t *, uri_desc_t *,
58 			    time_t, ipaddr_t);
59 extern boolean_t	nl7c_close_addr(struct sonode *);
60 extern struct sonode	*nl7c_addr2portso(void *);
61 extern uri_desc_t	*nl7c_http_cond(uri_desc_t *, uri_desc_t *);
62 
63 /*
64  * Various global tuneables:
65  */
66 
67 clock_t		nl7c_uri_ttl = -1;	/* TTL in seconds (-1 == infinite) */
68 
69 boolean_t	nl7c_use_kmem = B_FALSE; /* Force use of kmem (no segmap) */
70 
71 uint64_t	nl7c_file_prefetch = 1; /* File cache prefetch pages */
72 
73 uint64_t	nl7c_uri_max = 0;	/* Maximum bytes (0 == infinite) */
74 uint64_t	nl7c_uri_bytes = 0;	/* Bytes of kmem used by URIs */
75 
76 /*
77  * Locals:
78  */
79 
80 static int	uri_rd_response(struct sonode *, uri_desc_t *,
81 		    uri_rd_t *, boolean_t);
82 static int	uri_response(struct sonode *, uri_desc_t *);
83 
84 /*
85  * HTTP scheme functions called from nl7chttp.c:
86  */
87 
88 boolean_t nl7c_http_request(char **, char *, uri_desc_t *, struct sonode *);
89 boolean_t nl7c_http_response(char **, char *, uri_desc_t *, struct sonode *);
90 boolean_t nl7c_http_cmp(void *, void *);
91 mblk_t *nl7c_http_persist(struct sonode *);
92 void nl7c_http_free(void *arg);
93 void nl7c_http_init(void);
94 
95 /*
96  * Counters that need to move to kstat and/or be removed:
97  */
98 
99 volatile uint64_t nl7c_uri_request = 0;
100 volatile uint64_t nl7c_uri_hit = 0;
101 volatile uint64_t nl7c_uri_pass = 0;
102 volatile uint64_t nl7c_uri_miss = 0;
103 volatile uint64_t nl7c_uri_temp = 0;
104 volatile uint64_t nl7c_uri_more = 0;
105 volatile uint64_t nl7c_uri_data = 0;
106 volatile uint64_t nl7c_uri_sendfilev = 0;
107 volatile uint64_t nl7c_uri_reclaim_calls = 0;
108 volatile uint64_t nl7c_uri_reclaim_cnt = 0;
109 volatile uint64_t nl7c_uri_pass_urifail = 0;
110 volatile uint64_t nl7c_uri_pass_dupbfail = 0;
111 volatile uint64_t nl7c_uri_more_get = 0;
112 volatile uint64_t nl7c_uri_pass_method = 0;
113 volatile uint64_t nl7c_uri_pass_option = 0;
114 volatile uint64_t nl7c_uri_more_eol = 0;
115 volatile uint64_t nl7c_uri_more_http = 0;
116 volatile uint64_t nl7c_uri_pass_http = 0;
117 volatile uint64_t nl7c_uri_pass_addfail = 0;
118 volatile uint64_t nl7c_uri_pass_temp = 0;
119 volatile uint64_t nl7c_uri_expire = 0;
120 volatile uint64_t nl7c_uri_purge = 0;
121 volatile uint64_t nl7c_uri_NULL1 = 0;
122 volatile uint64_t nl7c_uri_NULL2 = 0;
123 volatile uint64_t nl7c_uri_close = 0;
124 volatile uint64_t nl7c_uri_temp_close = 0;
125 volatile uint64_t nl7c_uri_free = 0;
126 volatile uint64_t nl7c_uri_temp_free = 0;
127 volatile uint64_t nl7c_uri_temp_mk = 0;
128 volatile uint64_t nl7c_uri_rd_EAGAIN = 0;
129 
130 /*
131  * Various kmem_cache_t's:
132  */
133 
134 kmem_cache_t *nl7c_uri_kmc;
135 kmem_cache_t *nl7c_uri_rd_kmc;
136 static kmem_cache_t *uri_desb_kmc;
137 static kmem_cache_t *uri_segmap_kmc;
138 
139 static void uri_kmc_reclaim(void *);
140 
141 static void nl7c_uri_reclaim(void);
142 
143 /*
144  * The URI hash is a dynamically sized A/B bucket hash, when the current
145  * hash's average bucket chain length exceeds URI_HASH_AVRG a new hash of
146  * the next P2Ps[] size is created.
147  *
148  * All lookups are done in the current hash then the new hash (if any),
149  * if there is a new has then when a current hash bucket chain is examined
150  * any uri_desc_t members will be migrated to the new hash and when the
151  * last uri_desc_t has been migrated then the new hash will become the
152  * current and the previous current hash will be freed leaving a single
153  * hash.
154  *
155  * uri_hash_t - hash bucket (chain) type, contained in the uri_hash_ab[]
156  * and can be accessed only after aquiring the uri_hash_access lock (for
157  * READER or WRITER) then acquiring the lock uri_hash_t.lock, the uri_hash_t
158  * and all linked uri_desc_t.hash members are protected. Note, a REF_HOLD()
159  * is placed on all uri_desc_t uri_hash_t list members.
160  *
161  * uri_hash_access - rwlock for all uri_hash_* variables, READER for read
162  * access and WRITER for write access. Note, WRITER is only required for
163  * hash geometry changes.
164  *
165  * uri_hash_which - which uri_hash_ab[] is the current hash.
166  *
167  * uri_hash_n[] - the P2Ps[] index for each uri_hash_ab[].
168  *
169  * uri_hash_sz[] - the size for each uri_hash_ab[].
170  *
171  * uri_hash_cnt[] - the total uri_desc_t members for each uri_hash_ab[].
172  *
173  * uri_hash_overflow[] - the uri_hash_cnt[] for each uri_hash_ab[] when
174  * a new uri_hash_ab[] needs to be created.
175  *
176  * uri_hash_ab[] - the uri_hash_t entries.
177  *
178  * uri_hash_lru[] - the last uri_hash_ab[] walked for lru reclaim.
179  */
180 
181 typedef struct uri_hash_s {
182 	struct uri_desc_s	*list;		/* List of uri_t(s) */
183 	kmutex_t		lock;
184 } uri_hash_t;
185 
186 #define	URI_HASH_AVRG	5	/* Desired average hash chain length */
187 #define	URI_HASH_N_INIT	9	/* P2Ps[] initial index */
188 
189 static krwlock_t	uri_hash_access;
190 static uint32_t		uri_hash_which = 0;
191 static uint32_t		uri_hash_n[2] = {URI_HASH_N_INIT, 0};
192 static uint32_t		uri_hash_sz[2] = {0, 0};
193 static uint32_t		uri_hash_cnt[2] = {0, 0};
194 static uint32_t		uri_hash_overflow[2] = {0, 0};
195 static uri_hash_t	*uri_hash_ab[2] = {NULL, NULL};
196 static uri_hash_t	*uri_hash_lru[2] = {NULL, NULL};
197 
198 /*
199  * Primes for N of 3 - 24 where P is first prime less then (2^(N-1))+(2^(N-2))
200  * these primes have been foud to be useful for prime sized hash tables.
201  */
202 
203 static const int P2Ps[] = {
204 	0, 0, 0, 5, 11, 23, 47, 89, 191, 383, 761, 1531, 3067,
205 	6143, 12281, 24571, 49139, 98299, 196597, 393209,
206 	786431, 1572853, 3145721, 6291449, 12582893, 0};
207 
208 /*
209  * Hash macros:
210  *
211  *    H2A(char *cp, char *ep, char c) - convert the escaped octet (ASCII)
212  *    hex multichar of the format "%HH" pointeded to by *cp to a char and
213  *    return in c, *ep points to past end of (char *), on return *cp will
214  *    point to the last char consumed.
215  *
216  *    URI_HASH(unsigned hix, char *cp, char *ep) - hash the char(s) from
217  *    *cp to *ep to the unsigned hix, cp nor ep are modified.
218  *
219  *    URI_HASH_IX(unsigned hix, int which) - convert the hash value hix to
220  *    a hash index 0 - (uri_hash_sz[which] - 1).
221  *
222  *    URI_HASH_MIGRATE(from, hp, to) - migrate the uri_hash_t *hp list
223  *    uri_desc_t members from hash from to hash to.
224  *
225  *    URI_HASH_UNLINK(cur, new, hp, puri, uri) - unlink the uri_desc_t
226  *    *uri which is a member of the uri_hash_t *hp list with a previous
227  *    list member of *puri for the uri_hash_ab[] cur. After unlinking
228  *    check for cur hash empty, if so make new cur. Note, as this macro
229  *    can change a hash chain it needs to be run under hash_access as
230  *    RW_WRITER, futher as it can change the new hash to cur any access
231  *    to the hash state must be done after either dropping locks and
232  *    starting over or making sure the global state is consistent after
233  *    as before.
234  */
235 
236 #define	H2A(cp, ep, c) {						\
237 	int	_h = 2;							\
238 	int	_n = 0;							\
239 	char	_hc;							\
240 									\
241 	while (_h > 0 && ++(cp) < (ep)) {				\
242 		if (_h == 1)						\
243 			_n *= 0x10;					\
244 		_hc = *(cp);						\
245 		if (_hc >= '0' && _hc <= '9')				\
246 			_n += _hc - '0';				\
247 		else if (_hc >= 'a' || _hc <= 'f')			\
248 			_n += _hc - 'W';				\
249 		else if (_hc >= 'A' || _hc <= 'F')			\
250 			_n += _hc - '7';				\
251 		_h--;							\
252 	}								\
253 	(c) = _n;							\
254 }
255 
256 #define	URI_HASH(hv, cp, ep) {						\
257 	char	*_s = (cp);						\
258 	char	_c;							\
259 									\
260 	while (_s < (ep)) {						\
261 		if ((_c = *_s) == '%') {				\
262 			H2A(_s, (ep), _c);				\
263 		}							\
264 		CHASH(hv, _c);						\
265 		_s++;							\
266 	}								\
267 }
268 
269 #define	URI_HASH_IX(hix, which) (hix) = (hix) % (uri_hash_sz[(which)])
270 
271 #define	URI_HASH_MIGRATE(from, hp, to) {				\
272 	uri_desc_t	*_nuri;						\
273 	uint32_t	_nhix;						\
274 	uri_hash_t	*_nhp;						\
275 									\
276 	mutex_enter(&(hp)->lock);					\
277 	while ((_nuri = (hp)->list) != NULL) {				\
278 		(hp)->list = _nuri->hash;				\
279 		atomic_add_32(&uri_hash_cnt[(from)], -1);		\
280 		atomic_add_32(&uri_hash_cnt[(to)], 1);			\
281 		_nhix = _nuri->hvalue;					\
282 		URI_HASH_IX(_nhix, to);					\
283 		_nhp = &uri_hash_ab[(to)][_nhix];			\
284 		mutex_enter(&_nhp->lock);				\
285 		_nuri->hash = _nhp->list;				\
286 		_nhp->list = _nuri;					\
287 		_nuri->hit = 0;						\
288 		mutex_exit(&_nhp->lock);				\
289 	}								\
290 	mutex_exit(&(hp)->lock);					\
291 }
292 
293 #define	URI_HASH_UNLINK(cur, new, hp, puri, uri) {			\
294 	if ((puri) != NULL) {						\
295 		(puri)->hash = (uri)->hash;				\
296 	} else {							\
297 		(hp)->list = (uri)->hash;				\
298 	}								\
299 	if (atomic_add_32_nv(&uri_hash_cnt[(cur)], -1) == 0 &&		\
300 	    uri_hash_ab[(new)] != NULL) {				\
301 		kmem_free(uri_hash_ab[cur],				\
302 		    sizeof (uri_hash_t) * uri_hash_sz[cur]);		\
303 		uri_hash_ab[(cur)] = NULL;				\
304 		uri_hash_lru[(cur)] = NULL;				\
305 		uri_hash_which = (new);					\
306 	} else {							\
307 		uri_hash_lru[(cur)] = (hp);				\
308 	}								\
309 }
310 
311 void
312 nl7c_uri_init(void)
313 {
314 	uint32_t	cur = uri_hash_which;
315 
316 	rw_init(&uri_hash_access, NULL, RW_DEFAULT, NULL);
317 
318 	uri_hash_sz[cur] = P2Ps[URI_HASH_N_INIT];
319 	uri_hash_overflow[cur] = P2Ps[URI_HASH_N_INIT] * URI_HASH_AVRG;
320 	uri_hash_ab[cur] = kmem_zalloc(sizeof (uri_hash_t) * uri_hash_sz[cur],
321 	    KM_SLEEP);
322 	uri_hash_lru[cur] = uri_hash_ab[cur];
323 
324 	nl7c_uri_kmc = kmem_cache_create("NL7C_uri_kmc", sizeof (uri_desc_t),
325 	    0, NULL, NULL, uri_kmc_reclaim, NULL, NULL, 0);
326 
327 	nl7c_uri_rd_kmc = kmem_cache_create("NL7C_uri_rd_kmc",
328 	    sizeof (uri_rd_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
329 
330 	uri_desb_kmc = kmem_cache_create("NL7C_uri_desb_kmc",
331 	    sizeof (uri_desb_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
332 
333 	uri_segmap_kmc = kmem_cache_create("NL7C_uri_segmap_kmc",
334 	    sizeof (uri_segmap_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
335 
336 	nl7c_http_init();
337 }
338 
339 #define	CV_SZ	16
340 
341 void
342 nl7c_mi_report_hash(mblk_t *mp)
343 {
344 	uri_hash_t	*hp, *pend;
345 	uri_desc_t	*uri;
346 	uint32_t	cur;
347 	uint32_t	new;
348 	int		n, nz, tot;
349 	uint32_t	cv[CV_SZ + 1];
350 
351 	rw_enter(&uri_hash_access, RW_READER);
352 	cur = uri_hash_which;
353 	new = cur ? 0 : 1;
354 next:
355 	for (n = 0; n <= CV_SZ; n++)
356 		cv[n] = 0;
357 	nz = 0;
358 	tot = 0;
359 	hp = &uri_hash_ab[cur][0];
360 	pend = &uri_hash_ab[cur][uri_hash_sz[cur]];
361 	while (hp < pend) {
362 		n = 0;
363 		for (uri = hp->list; uri != NULL; uri = uri->hash) {
364 			n++;
365 		}
366 		tot += n;
367 		if (n > 0)
368 			nz++;
369 		if (n > CV_SZ)
370 			n = CV_SZ;
371 		cv[n]++;
372 		hp++;
373 	}
374 
375 	(void) mi_mpprintf(mp, "\nHash=%s, Buckets=%d, "
376 	    "Avrg=%d\nCount by bucket:", cur != new ? "CUR" : "NEW",
377 	    uri_hash_sz[cur], nz != 0 ? ((tot * 10 + 5) / nz) / 10 : 0);
378 	(void) mi_mpprintf(mp, "Free=%d", cv[0]);
379 	for (n = 1; n < CV_SZ; n++) {
380 		int	pn = 0;
381 		char	pv[5];
382 		char	*pp = pv;
383 
384 		for (pn = n; pn < 1000; pn *= 10)
385 			*pp++ = ' ';
386 		*pp = 0;
387 		(void) mi_mpprintf(mp, "%s%d=%d", pv, n, cv[n]);
388 	}
389 	(void) mi_mpprintf(mp, "Long=%d", cv[CV_SZ]);
390 
391 	if (cur != new && uri_hash_ab[new] != NULL) {
392 		cur = new;
393 		goto next;
394 	}
395 	rw_exit(&uri_hash_access);
396 }
397 
398 void
399 nl7c_mi_report_uri(mblk_t *mp)
400 {
401 	uri_hash_t	*hp;
402 	uri_desc_t	*uri;
403 	uint32_t	cur;
404 	uint32_t	new;
405 	int		ix;
406 	int		ret;
407 	char		sc;
408 
409 	rw_enter(&uri_hash_access, RW_READER);
410 	cur = uri_hash_which;
411 	new = cur ? 0 : 1;
412 next:
413 	for (ix = 0; ix < uri_hash_sz[cur]; ix++) {
414 		hp = &uri_hash_ab[cur][ix];
415 		mutex_enter(&hp->lock);
416 		uri = hp->list;
417 		while (uri != NULL) {
418 			sc = *(uri->path.ep);
419 			*(uri->path.ep) = 0;
420 			ret = mi_mpprintf(mp, "%s: %d %d %d",
421 			    uri->path.cp, (int)uri->resplen,
422 			    (int)uri->respclen, (int)uri->count);
423 			*(uri->path.ep) = sc;
424 			if (ret == -1) break;
425 			uri = uri->hash;
426 		}
427 		mutex_exit(&hp->lock);
428 		if (ret == -1) break;
429 	}
430 	if (ret != -1 && cur != new && uri_hash_ab[new] != NULL) {
431 		cur = new;
432 		goto next;
433 	}
434 	rw_exit(&uri_hash_access);
435 }
436 
437 /*
438  * The uri_desc_t ref_t inactive function called on the last REF_RELE(),
439  * free all resources contained in the uri_desc_t. Note, the uri_desc_t
440  * will be freed by REF_RELE() on return.
441  */
442 
443 void
444 nl7c_uri_inactive(uri_desc_t *uri)
445 {
446 	int64_t	 bytes = 0;
447 
448 	if (uri->tail) {
449 		uri_rd_t *rdp = &uri->response;
450 		uri_rd_t *free = NULL;
451 
452 		while (rdp) {
453 			if (rdp->off == -1) {
454 				bytes += rdp->sz;
455 				kmem_free(rdp->data.kmem, rdp->sz);
456 			} else {
457 				VN_RELE(rdp->data.vnode);
458 			}
459 			rdp = rdp->next;
460 			if (free != NULL) {
461 				kmem_cache_free(nl7c_uri_rd_kmc, free);
462 			}
463 			free = rdp;
464 		}
465 	}
466 	if (bytes) {
467 		atomic_add_64(&nl7c_uri_bytes, -bytes);
468 	}
469 	if (uri->scheme != NULL) {
470 		nl7c_http_free(uri->scheme);
471 	}
472 	if (uri->reqmp) {
473 		freeb(uri->reqmp);
474 	}
475 }
476 
477 /*
478  * The reclaim is called by the kmem subsystem when kmem is running
479  * low. More work is needed to determine the best reclaim policy, for
480  * now we just manipulate the nl7c_uri_max global maximum bytes threshold
481  * value using a simple arithmetic backoff of the value every time this
482  * function is called then call uri_reclaim() to enforce it.
483  *
484  * Note, this value remains in place and enforced for all subsequent
485  * URI request/response processing.
486  *
487  * Note, nl7c_uri_max is currently initialized to 0 or infinite such that
488  * the first call here set it to the current uri_bytes value then backoff
489  * from there.
490  *
491  * XXX how do we determine when to increase nl7c_uri_max ???
492  */
493 
494 /*ARGSUSED*/
495 static void
496 uri_kmc_reclaim(void *arg)
497 {
498 	uint64_t new_max;
499 
500 	if ((new_max = nl7c_uri_max) == 0) {
501 		/* Currently infinite, initialize to current bytes used */
502 		nl7c_uri_max = nl7c_uri_bytes;
503 		new_max = nl7c_uri_bytes;
504 	}
505 	if (new_max > 1) {
506 		/* Lower max_bytes to 93% of current value */
507 		new_max >>= 1;			/* 50% */
508 		new_max += (new_max >> 1);	/* 75% */
509 		new_max += (new_max >> 2);	/* 93% */
510 		if (new_max < nl7c_uri_max)
511 			nl7c_uri_max = new_max;
512 		else
513 			nl7c_uri_max = 1;
514 	}
515 	nl7c_uri_reclaim();
516 }
517 
518 /*
519  * Delete a uri_desc_t from the URI hash.
520  */
521 
522 static void
523 uri_delete(uri_desc_t *del)
524 {
525 	uint32_t	hix;
526 	uri_hash_t	*hp;
527 	uri_desc_t	*uri;
528 	uri_desc_t	*puri;
529 	uint32_t	cur;
530 	uint32_t	new;
531 
532 	ASSERT(del->hash != URI_TEMP);
533 	rw_enter(&uri_hash_access, RW_WRITER);
534 	cur = uri_hash_which;
535 	new = cur ? 0 : 1;
536 next:
537 	puri = NULL;
538 	hix = del->hvalue;
539 	URI_HASH_IX(hix, cur);
540 	hp = &uri_hash_ab[cur][hix];
541 	for (uri = hp->list; uri != NULL; uri = uri->hash) {
542 		if (uri != del) {
543 			puri = uri;
544 			continue;
545 		}
546 		/*
547 		 * Found the URI, unlink from the hash chain,
548 		 * drop locks, ref release it.
549 		 */
550 		URI_HASH_UNLINK(cur, new, hp, puri, uri);
551 		rw_exit(&uri_hash_access);
552 		REF_RELE(uri);
553 		return;
554 	}
555 	if (cur != new && uri_hash_ab[new] != NULL) {
556 		/*
557 		 * Not found in current hash and have a new hash so
558 		 * check the new hash next.
559 		 */
560 		cur = new;
561 		goto next;
562 	}
563 	rw_exit(&uri_hash_access);
564 }
565 
566 /*
567  * Add a uri_desc_t to the URI hash.
568  */
569 
570 static void
571 uri_add(uri_desc_t *uri, krw_t rwlock, boolean_t nonblocking)
572 {
573 	uint32_t	hix;
574 	uri_hash_t	*hp;
575 	uint32_t	cur = uri_hash_which;
576 	uint32_t	new = cur ? 0 : 1;
577 
578 	/*
579 	 * Caller of uri_add() must hold the uri_hash_access rwlock.
580 	 */
581 	ASSERT((rwlock == RW_READER && RW_READ_HELD(&uri_hash_access)) ||
582 	    (rwlock == RW_WRITER && RW_WRITE_HELD(&uri_hash_access)));
583 	/*
584 	 * uri_add() always succeeds so add a hash ref to the URI now.
585 	 */
586 	REF_HOLD(uri);
587 again:
588 	hix = uri->hvalue;
589 	URI_HASH_IX(hix, cur);
590 	if (uri_hash_ab[new] == NULL &&
591 	    uri_hash_cnt[cur] < uri_hash_overflow[cur]) {
592 		/*
593 		 * Easy case, no new hash and current hasn't overflowed,
594 		 * add URI to current hash and return.
595 		 *
596 		 * Note, the check for uri_hash_cnt[] above aren't done
597 		 * atomictally, i.e. multiple threads can be in this code
598 		 * as RW_READER and update the cnt[], this isn't a problem
599 		 * as the check is only advisory.
600 		 */
601 	fast:
602 		atomic_add_32(&uri_hash_cnt[cur], 1);
603 		hp = &uri_hash_ab[cur][hix];
604 		mutex_enter(&hp->lock);
605 		uri->hash = hp->list;
606 		hp->list = uri;
607 		mutex_exit(&hp->lock);
608 		rw_exit(&uri_hash_access);
609 		return;
610 	}
611 	if (uri_hash_ab[new] == NULL) {
612 		/*
613 		 * Need a new a or b hash, if not already RW_WRITER
614 		 * try to upgrade our lock to writer.
615 		 */
616 		if (rwlock != RW_WRITER && ! rw_tryupgrade(&uri_hash_access)) {
617 			/*
618 			 * Upgrade failed, we can't simple exit and reenter
619 			 * the lock as after the exit and before the reenter
620 			 * the whole world can change so just wait for writer
621 			 * then do everything again.
622 			 */
623 			if (nonblocking) {
624 				/*
625 				 * Can't block, use fast-path above.
626 				 *
627 				 * XXX should have a background thread to
628 				 * handle new ab[] in this case so as to
629 				 * not overflow the cur hash to much.
630 				 */
631 				goto fast;
632 			}
633 			rw_exit(&uri_hash_access);
634 			rwlock = RW_WRITER;
635 			rw_enter(&uri_hash_access, rwlock);
636 			cur = uri_hash_which;
637 			new = cur ? 0 : 1;
638 			goto again;
639 		}
640 		rwlock = RW_WRITER;
641 		if (uri_hash_ab[new] == NULL) {
642 			/*
643 			 * Still need a new hash, allocate and initialize
644 			 * the new hash.
645 			 */
646 			uri_hash_n[new] = uri_hash_n[cur] + 1;
647 			if (uri_hash_n[new] == 0) {
648 				/*
649 				 * No larger P2Ps[] value so use current,
650 				 * i.e. 2 of the largest are better than 1 ?
651 				 */
652 				uri_hash_n[new] = uri_hash_n[cur];
653 				cmn_err(CE_NOTE, "NL7C: hash index overflow");
654 			}
655 			uri_hash_sz[new] = P2Ps[uri_hash_n[new]];
656 			ASSERT(uri_hash_cnt[new] == 0);
657 			uri_hash_overflow[new] = uri_hash_sz[new] *
658 			    URI_HASH_AVRG;
659 			uri_hash_ab[new] = kmem_zalloc(sizeof (uri_hash_t) *
660 			    uri_hash_sz[new], nonblocking ? KM_NOSLEEP :
661 			    KM_SLEEP);
662 			if (uri_hash_ab[new] == NULL) {
663 				/*
664 				 * Alloc failed, use fast-path above.
665 				 *
666 				 * XXX should have a background thread to
667 				 * handle new ab[] in this case so as to
668 				 * not overflow the cur hash to much.
669 				 */
670 				goto fast;
671 			}
672 			uri_hash_lru[new] = uri_hash_ab[new];
673 		}
674 	}
675 	/*
676 	 * Hashed against current hash so migrate any current hash chain
677 	 * members, if any.
678 	 *
679 	 * Note, the hash chain list can be checked for a non empty list
680 	 * outside of the hash chain list lock as the hash chain struct
681 	 * can't be destroyed while in the uri_hash_access rwlock, worst
682 	 * case is that a non empty list is found and after acquiring the
683 	 * lock another thread beats us to it (i.e. migrated the list).
684 	 */
685 	hp = &uri_hash_ab[cur][hix];
686 	if (hp->list != NULL) {
687 		URI_HASH_MIGRATE(cur, hp, new);
688 	}
689 	/*
690 	 * If new hash has overflowed before current hash has been
691 	 * completely migrated then walk all current hash chains and
692 	 * migrate list members now.
693 	 */
694 	if (atomic_add_32_nv(&uri_hash_cnt[new], 1) >= uri_hash_overflow[new]) {
695 		for (hix = 0; hix < uri_hash_sz[cur]; hix++) {
696 			hp = &uri_hash_ab[cur][hix];
697 			if (hp->list != NULL) {
698 				URI_HASH_MIGRATE(cur, hp, new);
699 			}
700 		}
701 	}
702 	/*
703 	 * Add URI to new hash.
704 	 */
705 	hix = uri->hvalue;
706 	URI_HASH_IX(hix, new);
707 	hp = &uri_hash_ab[new][hix];
708 	mutex_enter(&hp->lock);
709 	uri->hash = hp->list;
710 	hp->list = uri;
711 	mutex_exit(&hp->lock);
712 	/*
713 	 * Last, check to see if last cur hash chain has been
714 	 * migrated, if so free cur hash and make new hash cur.
715 	 */
716 	if (uri_hash_cnt[cur] == 0) {
717 		/*
718 		 * If we don't already hold the uri_hash_access rwlock for
719 		 * RW_WRITE try to upgrade to RW_WRITE and if successful
720 		 * check again and to see if still need to do the free.
721 		 */
722 		if ((rwlock == RW_WRITER || rw_tryupgrade(&uri_hash_access)) &&
723 		    uri_hash_cnt[cur] == 0 && uri_hash_ab[new] != 0) {
724 			kmem_free(uri_hash_ab[cur],
725 			    sizeof (uri_hash_t) * uri_hash_sz[cur]);
726 			uri_hash_ab[cur] = NULL;
727 			uri_hash_lru[cur] = NULL;
728 			uri_hash_which = new;
729 		}
730 	}
731 	rw_exit(&uri_hash_access);
732 }
733 
734 /*
735  * Lookup a uri_desc_t in the URI hash, if found free the request uri_desc_t
736  * and return the found uri_desc_t with a REF_HOLD() placed on it. Else, if
737  * add B_TRUE use the request URI to create a new hash entry. Else if add
738  * B_FALSE ...
739  */
740 
741 static uri_desc_t *
742 uri_lookup(uri_desc_t *ruri, boolean_t add, boolean_t nonblocking)
743 {
744 	uint32_t	hix;
745 	uri_hash_t	*hp;
746 	uri_desc_t	*uri;
747 	uri_desc_t	*puri;
748 	uint32_t	cur;
749 	uint32_t	new;
750 	char		*rcp = ruri->path.cp;
751 	char		*rep = ruri->path.ep;
752 
753 again:
754 	rw_enter(&uri_hash_access, RW_READER);
755 	cur = uri_hash_which;
756 	new = cur ? 0 : 1;
757 nexthash:
758 	puri = NULL;
759 	hix = ruri->hvalue;
760 	URI_HASH_IX(hix, cur);
761 	hp = &uri_hash_ab[cur][hix];
762 	mutex_enter(&hp->lock);
763 	for (uri = hp->list; uri != NULL; uri = uri->hash) {
764 		char	*ap = uri->path.cp;
765 		char	*bp = rcp;
766 		char	a, b;
767 
768 		/* Compare paths */
769 		while (bp < rep && ap < uri->path.ep) {
770 			if ((a = *ap) == '%') {
771 				/* Escaped hex multichar, convert it */
772 				H2A(ap, uri->path.ep, a);
773 			}
774 			if ((b = *bp) == '%') {
775 				/* Escaped hex multichar, convert it */
776 				H2A(bp, rep, b);
777 			}
778 			if (a != b) {
779 				/* Char's don't match */
780 				goto nexturi;
781 			}
782 			ap++;
783 			bp++;
784 		}
785 		if (bp != rep || ap != uri->path.ep) {
786 			/* Not same length */
787 			goto nexturi;
788 		}
789 		ap = uri->auth.cp;
790 		bp = ruri->auth.cp;
791 		if (ap != NULL) {
792 			if (bp == NULL) {
793 				/* URI has auth request URI doesn't */
794 				goto nexturi;
795 			}
796 			while (bp < ruri->auth.ep && ap < uri->auth.ep) {
797 				if ((a = *ap) == '%') {
798 					/* Escaped hex multichar, convert it */
799 					H2A(ap, uri->path.ep, a);
800 				}
801 				if ((b = *bp) == '%') {
802 					/* Escaped hex multichar, convert it */
803 					H2A(bp, rep, b);
804 				}
805 				if (a != b) {
806 					/* Char's don't match */
807 					goto nexturi;
808 				}
809 				ap++;
810 				bp++;
811 			}
812 			if (bp != ruri->auth.ep || ap != uri->auth.ep) {
813 				/* Not same length */
814 				goto nexturi;
815 			}
816 		} else if (bp != NULL) {
817 			/* URI doesn't have auth and request URI does */
818 			goto nexturi;
819 		}
820 		/*
821 		 * Have a path/auth match so before any other processing
822 		 * of requested URI, check for expire or request no cache
823 		 * purge.
824 		 */
825 		if (uri->expire >= 0 && uri->expire <= lbolt || ruri->nocache) {
826 			/*
827 			 * URI has expired or request specified to not use
828 			 * the cached version, unlink the URI from the hash
829 			 * chain, release all locks, release the hash ref
830 			 * on the URI, and last look it up again.
831 			 *
832 			 * Note, this will cause all variants of the named
833 			 * URI to be purged.
834 			 */
835 			if (puri != NULL) {
836 				puri->hash = uri->hash;
837 			} else {
838 				hp->list = uri->hash;
839 			}
840 			mutex_exit(&hp->lock);
841 			atomic_add_32(&uri_hash_cnt[cur], -1);
842 			rw_exit(&uri_hash_access);
843 			if (ruri->nocache)
844 				nl7c_uri_purge++;
845 			else
846 				nl7c_uri_expire++;
847 			REF_RELE(uri);
848 			goto again;
849 		}
850 		if (uri->scheme != NULL) {
851 			/*
852 			 * URI has scheme private qualifier(s), if request
853 			 * URI doesn't or if no match skip this URI.
854 			 */
855 			if (ruri->scheme == NULL ||
856 			    ! nl7c_http_cmp(uri->scheme, ruri->scheme))
857 				goto nexturi;
858 		} else if (ruri->scheme != NULL) {
859 			/*
860 			 * URI doesn't have scheme private qualifiers but
861 			 * request URI does, no match, skip this URI.
862 			 */
863 			goto nexturi;
864 		}
865 		/*
866 		 * Have a match, ready URI for return, first put a reference
867 		 * hold on the URI, if this URI is currently being processed
868 		 * then have to wait for the processing to be completed and
869 		 * redo the lookup, else return it.
870 		 */
871 		REF_HOLD(uri);
872 		mutex_enter(&uri->proclock);
873 		if (uri->proc != NULL) {
874 			/* The URI is being processed, wait for completion */
875 			mutex_exit(&hp->lock);
876 			rw_exit(&uri_hash_access);
877 			if (! nonblocking &&
878 			    cv_wait_sig(&uri->waiting, &uri->proclock)) {
879 				/*
880 				 * URI has been processed but things may
881 				 * have changed while we were away so do
882 				 * most everything again.
883 				 */
884 				mutex_exit(&uri->proclock);
885 				REF_RELE(uri);
886 				goto again;
887 			} else {
888 				/*
889 				 * A nonblocking socket or an interrupted
890 				 * cv_wait_sig() in the first case can't
891 				 * block waiting for the processing of the
892 				 * uri hash hit uri to complete, in both
893 				 * cases just return failure to lookup.
894 				 */
895 				mutex_exit(&uri->proclock);
896 				REF_RELE(uri);
897 				return (NULL);
898 			}
899 		}
900 		mutex_exit(&uri->proclock);
901 		uri->hit++;
902 		mutex_exit(&hp->lock);
903 		rw_exit(&uri_hash_access);
904 		return (uri);
905 	nexturi:
906 		puri = uri;
907 	}
908 	mutex_exit(&hp->lock);
909 	if (cur != new && uri_hash_ab[new] != NULL) {
910 		/*
911 		 * Not found in current hash and have a new hash so
912 		 * check the new hash next.
913 		 */
914 		cur = new;
915 		goto nexthash;
916 	}
917 add:
918 	if (! add) {
919 		/* Lookup only so return failure */
920 		rw_exit(&uri_hash_access);
921 		return (NULL);
922 	}
923 	/*
924 	 * URI not hashed, finish intialization of the
925 	 * request URI, add it to the hash, return it.
926 	 */
927 	ruri->hit = 0;
928 	ruri->expire = -1;
929 	ruri->response.sz = 0;
930 	ruri->proc = (struct sonode *)~NULL;
931 	cv_init(&ruri->waiting, NULL, CV_DEFAULT, NULL);
932 	mutex_init(&ruri->proclock, NULL, MUTEX_DEFAULT, NULL);
933 	uri_add(ruri, RW_READER, nonblocking);
934 	/* uri_add() has done rw_exit(&uri_hash_access) */
935 	return (ruri);
936 }
937 
938 /*
939  * Reclaim URIs until max cache size threshold has been reached.
940  *
941  * A CLOCK based reclaim modified with a history (hit counter) counter.
942  */
943 
944 static void
945 nl7c_uri_reclaim(void)
946 {
947 	uri_hash_t	*hp, *start, *pend;
948 	uri_desc_t	*uri;
949 	uri_desc_t	*puri;
950 	uint32_t	cur;
951 	uint32_t	new;
952 
953 	nl7c_uri_reclaim_calls++;
954 again:
955 	rw_enter(&uri_hash_access, RW_WRITER);
956 	cur = uri_hash_which;
957 	new = cur ? 0 : 1;
958 next:
959 	hp = uri_hash_lru[cur];
960 	start = hp;
961 	pend = &uri_hash_ab[cur][uri_hash_sz[cur]];
962 	while (nl7c_uri_bytes > nl7c_uri_max) {
963 		puri = NULL;
964 		for (uri = hp->list; uri != NULL; uri = uri->hash) {
965 			if (uri->hit != 0) {
966 				/*
967 				 * Decrement URI activity counter and skip.
968 				 */
969 				uri->hit--;
970 				puri = uri;
971 				continue;
972 			}
973 			if (uri->proc != NULL) {
974 				/*
975 				 * Currently being processed by a socket, skip.
976 				 */
977 				continue;
978 			}
979 			/*
980 			 * Found a candidate, no hit(s) since added or last
981 			 * reclaim pass, unlink from it's hash chain, update
982 			 * lru scan pointer, drop lock, ref release it.
983 			 */
984 			URI_HASH_UNLINK(cur, new, hp, puri, uri);
985 			if (cur == uri_hash_which) {
986 				if (++hp == pend) {
987 					/* Wrap pointer */
988 					hp = uri_hash_ab[cur];
989 				}
990 				uri_hash_lru[cur] = hp;
991 			}
992 			rw_exit(&uri_hash_access);
993 			REF_RELE(uri);
994 			nl7c_uri_reclaim_cnt++;
995 			goto again;
996 		}
997 		if (++hp == pend) {
998 			/* Wrap pointer */
999 			hp = uri_hash_ab[cur];
1000 		}
1001 		if (hp == start) {
1002 			if (cur != new && uri_hash_ab[new] != NULL) {
1003 				/*
1004 				 * Done with the current hash and have a
1005 				 * new hash so check the new hash next.
1006 				 */
1007 				cur = new;
1008 				goto next;
1009 			}
1010 		}
1011 	}
1012 	rw_exit(&uri_hash_access);
1013 }
1014 
1015 /*
1016  * Called for a socket which is being freed prior to close, e.g. errored.
1017  */
1018 
1019 void
1020 nl7c_urifree(struct sonode *so)
1021 {
1022 	uri_desc_t *uri = (uri_desc_t *)so->so_nl7c_uri;
1023 
1024 	so->so_nl7c_uri = NULL;
1025 	if (uri->hash != URI_TEMP) {
1026 		uri_delete(uri);
1027 		mutex_enter(&uri->proclock);
1028 		uri->proc = NULL;
1029 		if (CV_HAS_WAITERS(&uri->waiting)) {
1030 			cv_broadcast(&uri->waiting);
1031 		}
1032 		mutex_exit(&uri->proclock);
1033 		nl7c_uri_free++;
1034 	} else {
1035 		/* No proclock as uri exclusively owned by so */
1036 		uri->proc = NULL;
1037 		nl7c_uri_temp_free++;
1038 	}
1039 	REF_RELE(uri);
1040 }
1041 
1042 /*
1043  * ...
1044  *
1045  *	< 0	need more data
1046  *
1047  *	  0	parse complete
1048  *
1049  *	> 0	parse error
1050  */
1051 
1052 volatile uint64_t nl7c_resp_pfail = 0;
1053 volatile uint64_t nl7c_resp_ntemp = 0;
1054 volatile uint64_t nl7c_resp_pass = 0;
1055 
1056 static int
1057 nl7c_resp_parse(struct sonode *so, uri_desc_t *uri, char *data, int sz)
1058 {
1059 	if (! nl7c_http_response(&data, &data[sz], uri, so)) {
1060 		if (data == NULL) {
1061 			/* Parse fail */
1062 			goto pfail;
1063 		}
1064 		/* More data */
1065 		data = NULL;
1066 	} else if (data == NULL) {
1067 		goto pass;
1068 	}
1069 	if (uri->hash != URI_TEMP && uri->nocache) {
1070 		/*
1071 		 * After response parse now no cache,
1072 		 * delete it from cache, wakeup any
1073 		 * waiters on this URI, make URI_TEMP.
1074 		 */
1075 		uri_delete(uri);
1076 		mutex_enter(&uri->proclock);
1077 		if (CV_HAS_WAITERS(&uri->waiting)) {
1078 			cv_broadcast(&uri->waiting);
1079 		}
1080 		mutex_exit(&uri->proclock);
1081 		uri->hash = URI_TEMP;
1082 		nl7c_uri_temp_mk++;
1083 	}
1084 	if (data == NULL) {
1085 		/* More data needed */
1086 		return (-1);
1087 	}
1088 	/* Success */
1089 	return (0);
1090 
1091 pfail:
1092 	nl7c_resp_pfail++;
1093 	return (EINVAL);
1094 
1095 pass:
1096 	nl7c_resp_pass++;
1097 	return (ENOTSUP);
1098 }
1099 
1100 /*
1101  * Called to sink application response data, the processing of the data
1102  * is the same for a cached or temp URI (i.e. a URI for which we aren't
1103  * going to cache the URI but want to parse it for detecting response
1104  * data end such that for a persistent connection we can parse the next
1105  * request).
1106  *
1107  * On return 0 is returned for sink success, > 0 on error, and < 0 on
1108  * no so URI (note, data not sinked).
1109  */
1110 
1111 int
1112 nl7c_data(struct sonode *so, uio_t *uio)
1113 {
1114 	uri_desc_t	*uri = (uri_desc_t *)so->so_nl7c_uri;
1115 	iovec_t		*iov;
1116 	int		cnt;
1117 	int		sz = uio->uio_resid;
1118 	char		*data, *alloc;
1119 	char		*bp;
1120 	uri_rd_t	*rdp;
1121 	boolean_t	first;
1122 	int		error, perror;
1123 
1124 	nl7c_uri_data++;
1125 
1126 	if (uri == NULL) {
1127 		/* Socket & NL7C out of sync, disable NL7C */
1128 		so->so_nl7c_flags = 0;
1129 		nl7c_uri_NULL1++;
1130 		return (-1);
1131 	}
1132 
1133 	if (so->so_nl7c_flags & NL7C_WAITWRITE) {
1134 		so->so_nl7c_flags &= ~NL7C_WAITWRITE;
1135 		first = B_TRUE;
1136 	} else {
1137 		first = B_FALSE;
1138 	}
1139 
1140 	alloc = kmem_alloc(sz, KM_SLEEP);
1141 	URI_RD_ADD(uri, rdp, sz, -1);
1142 	if (rdp == NULL) {
1143 		error = ENOMEM;
1144 		goto fail;
1145 	}
1146 
1147 	if (uri->hash != URI_TEMP && uri->count > nca_max_cache_size) {
1148 		uri_delete(uri);
1149 		uri->hash = URI_TEMP;
1150 	}
1151 	data = alloc;
1152 	alloc = NULL;
1153 	rdp->data.kmem = data;
1154 	atomic_add_64(&nl7c_uri_bytes, sz);
1155 
1156 	bp = data;
1157 	while (uio->uio_resid > 0) {
1158 		iov = uio->uio_iov;
1159 		if ((cnt = iov->iov_len) == 0) {
1160 			goto next;
1161 		}
1162 		cnt = MIN(cnt, uio->uio_resid);
1163 		error = xcopyin(iov->iov_base, bp, cnt);
1164 		if (error)
1165 			goto fail;
1166 
1167 		iov->iov_base += cnt;
1168 		iov->iov_len -= cnt;
1169 		uio->uio_resid -= cnt;
1170 		uio->uio_loffset += cnt;
1171 		bp += cnt;
1172 	next:
1173 		uio->uio_iov++;
1174 		uio->uio_iovcnt--;
1175 	}
1176 
1177 	/* Successfull sink of data, response parse the data */
1178 	perror = nl7c_resp_parse(so, uri, data, sz);
1179 
1180 	/* Send the data out the connection */
1181 	error = uri_rd_response(so, uri, rdp, first);
1182 	if (error)
1183 		goto fail;
1184 
1185 	/* Success */
1186 	if (perror == 0 &&
1187 	    ((uri->respclen == URI_LEN_NOVALUE &&
1188 	    uri->resplen == URI_LEN_NOVALUE) ||
1189 	    uri->count >= uri->resplen)) {
1190 		/*
1191 		 * No more data needed and no pending response
1192 		 * data or current data count >= response length
1193 		 * so close the URI processing for this so.
1194 		 */
1195 		nl7c_close(so);
1196 		if (! (so->so_nl7c_flags & NL7C_SOPERSIST)) {
1197 			/* Not a persistent connection */
1198 			so->so_nl7c_flags = 0;
1199 		}
1200 	}
1201 
1202 	return (0);
1203 
1204 fail:
1205 	if (alloc != NULL) {
1206 		kmem_free(alloc, sz);
1207 	}
1208 	so->so_nl7c_flags = 0;
1209 	nl7c_urifree(so);
1210 
1211 	return (error);
1212 }
1213 
1214 /*
1215  * Called to read data from file "*fp" at offset "*off" of length "*len"
1216  * for a maximum of "*max_rem" bytes.
1217  *
1218  * On success a pointer to the kmem_alloc()ed file data is returned, "*off"
1219  * and "*len" are updated for the acutal number of bytes read and "*max_rem"
1220  * is updated with the number of bytes remaining to be read.
1221  *
1222  * Else, "NULL" is returned.
1223  */
1224 
1225 static char *
1226 nl7c_readfile(file_t *fp, u_offset_t *off, int *len, int max, int *ret)
1227 {
1228 	vnode_t	*vp = fp->f_vnode;
1229 	int	flg = 0;
1230 	size_t	size = MIN(*len, max);
1231 	char	*data;
1232 	int	error;
1233 	uio_t	uio;
1234 	iovec_t	iov;
1235 
1236 	(void) VOP_RWLOCK(vp, flg, NULL);
1237 
1238 	if (*off > MAXOFFSET_T) {
1239 		VOP_RWUNLOCK(vp, flg, NULL);
1240 		*ret = EFBIG;
1241 		return (NULL);
1242 	}
1243 
1244 	if (*off + size > MAXOFFSET_T)
1245 		size = (ssize32_t)(MAXOFFSET_T - *off);
1246 
1247 	data = kmem_alloc(size, KM_SLEEP);
1248 
1249 	iov.iov_base = data;
1250 	iov.iov_len = size;
1251 	uio.uio_loffset = *off;
1252 	uio.uio_iov = &iov;
1253 	uio.uio_iovcnt = 1;
1254 	uio.uio_resid = size;
1255 	uio.uio_segflg = UIO_SYSSPACE;
1256 	uio.uio_llimit = MAXOFFSET_T;
1257 	uio.uio_fmode = fp->f_flag;
1258 
1259 	error = VOP_READ(vp, &uio, fp->f_flag, fp->f_cred, NULL);
1260 	VOP_RWUNLOCK(vp, flg, NULL);
1261 	*ret = error;
1262 	if (error) {
1263 		kmem_free(data, size);
1264 		return (NULL);
1265 	}
1266 	*len = size;
1267 	*off += size;
1268 	return (data);
1269 }
1270 
1271 /*
1272  * Called to sink application response sendfilev, as with nl7c_data() above
1273  * all the data will be processed by NL7C unless there's an error.
1274  */
1275 
1276 int
1277 nl7c_sendfilev(struct sonode *so, u_offset_t *fileoff, sendfilevec_t *sfvp,
1278 	int sfvc, ssize_t *xfer)
1279 {
1280 	uri_desc_t	*uri = (uri_desc_t *)so->so_nl7c_uri;
1281 	file_t		*fp = NULL;
1282 	vnode_t		*vp = NULL;
1283 	char		*data = NULL;
1284 	u_offset_t	off;
1285 	int		len;
1286 	int		cnt;
1287 	int		total_count = 0;
1288 	char		*alloc;
1289 	uri_rd_t	*rdp;
1290 	int		max;
1291 	int		perror;
1292 	int		error = 0;
1293 	boolean_t	first = B_TRUE;
1294 
1295 	nl7c_uri_sendfilev++;
1296 
1297 	if (uri == NULL) {
1298 		/* Socket & NL7C out of sync, disable NL7C */
1299 		so->so_nl7c_flags = 0;
1300 		nl7c_uri_NULL2++;
1301 		return (0);
1302 	}
1303 
1304 	if (so->so_nl7c_flags & NL7C_WAITWRITE)
1305 		so->so_nl7c_flags &= ~NL7C_WAITWRITE;
1306 
1307 	while (sfvc-- > 0) {
1308 		/*
1309 		 * off - the current sfv read file offset or user address.
1310 		 *
1311 		 * len - the current sfv length in bytes.
1312 		 *
1313 		 * cnt - number of bytes kmem_alloc()ed.
1314 		 *
1315 		 * alloc - the kmem_alloc()ed buffer of size "cnt".
1316 		 *
1317 		 * data - copy of "alloc" used for post alloc references.
1318 		 *
1319 		 * fp - the current sfv file_t pointer.
1320 		 *
1321 		 * vp - the current "*vp" vnode_t pointer.
1322 		 *
1323 		 * Note, for "data" and "fp" and "vp" a NULL value is used
1324 		 * when not allocated such that the common failure path "fail"
1325 		 * is used.
1326 		 */
1327 		off = sfvp->sfv_off;
1328 		len = sfvp->sfv_len;
1329 		cnt = len;
1330 		if (sfvp->sfv_fd == SFV_FD_SELF) {
1331 			/*
1332 			 * User memory, copyin() all the bytes.
1333 			 */
1334 			alloc = kmem_alloc(cnt, KM_SLEEP);
1335 			error = xcopyin((caddr_t)(uintptr_t)off, alloc, cnt);
1336 			if (error)
1337 				goto fail;
1338 		} else {
1339 			/*
1340 			 * File descriptor, prefetch some bytes.
1341 			 */
1342 			if ((fp = getf(sfvp->sfv_fd)) == NULL) {
1343 				error = EBADF;
1344 				goto fail;
1345 			}
1346 			if ((fp->f_flag & FREAD) == 0) {
1347 				error = EACCES;
1348 				goto fail;
1349 			}
1350 			vp = fp->f_vnode;
1351 			if (vp->v_type != VREG) {
1352 				error = EINVAL;
1353 				goto fail;
1354 			}
1355 			VN_HOLD(vp);
1356 
1357 			/* Read max_rem bytes from file for prefetch */
1358 			if (nl7c_use_kmem) {
1359 				max = cnt;
1360 			} else {
1361 				max = MAXBSIZE * nl7c_file_prefetch;
1362 			}
1363 			alloc = nl7c_readfile(fp, &off, &cnt, max, &error);
1364 			if (alloc == NULL)
1365 				goto fail;
1366 
1367 			releasef(sfvp->sfv_fd);
1368 			fp = NULL;
1369 		}
1370 		URI_RD_ADD(uri, rdp, cnt, -1);
1371 		if (rdp == NULL) {
1372 			error = ENOMEM;
1373 			goto fail;
1374 		}
1375 		data = alloc;
1376 		alloc = NULL;
1377 		rdp->data.kmem = data;
1378 		total_count += cnt;
1379 		if (uri->hash != URI_TEMP && total_count > nca_max_cache_size) {
1380 			uri_delete(uri);
1381 			uri->hash = URI_TEMP;
1382 		}
1383 
1384 		/* Response parse */
1385 		perror = nl7c_resp_parse(so, uri, data, len);
1386 
1387 		/* Send kmem data out the connection */
1388 		error = uri_rd_response(so, uri, rdp, first);
1389 
1390 		if (error)
1391 			goto fail;
1392 
1393 		if (sfvp->sfv_fd != SFV_FD_SELF) {
1394 			/*
1395 			 * File descriptor, if any bytes left save vnode_t.
1396 			 */
1397 			if (len > cnt) {
1398 				/* More file data so add it */
1399 				URI_RD_ADD(uri, rdp, len - cnt, off);
1400 				if (rdp == NULL) {
1401 					error = ENOMEM;
1402 					goto fail;
1403 				}
1404 				rdp->data.vnode = vp;
1405 
1406 				/* Send vnode data out the connection */
1407 				error = uri_rd_response(so, uri, rdp, first);
1408 			} else {
1409 				/* All file data fit in the prefetch */
1410 				VN_RELE(vp);
1411 			}
1412 			*fileoff += len;
1413 			vp = NULL;
1414 		}
1415 		*xfer += len;
1416 		sfvp++;
1417 
1418 		if (first)
1419 			first = B_FALSE;
1420 	}
1421 	if (total_count > 0) {
1422 		atomic_add_64(&nl7c_uri_bytes, total_count);
1423 	}
1424 	if (perror == 0 &&
1425 	    ((uri->respclen == URI_LEN_NOVALUE &&
1426 	    uri->resplen == URI_LEN_NOVALUE) ||
1427 	    uri->count >= uri->resplen)) {
1428 		/*
1429 		 * No more data needed and no pending response
1430 		 * data or current data count >= response length
1431 		 * so close the URI processing for this so.
1432 		 */
1433 		nl7c_close(so);
1434 		if (! (so->so_nl7c_flags & NL7C_SOPERSIST)) {
1435 			/* Not a persistent connection */
1436 			so->so_nl7c_flags = 0;
1437 		}
1438 	}
1439 
1440 	return (0);
1441 
1442 fail:
1443 	if (alloc != NULL)
1444 		kmem_free(data, len);
1445 
1446 	if (vp != NULL)
1447 		VN_RELE(vp);
1448 
1449 	if (fp != NULL)
1450 		releasef(sfvp->sfv_fd);
1451 
1452 	if (total_count > 0) {
1453 		atomic_add_64(&nl7c_uri_bytes, total_count);
1454 	}
1455 
1456 	so->so_nl7c_flags = 0;
1457 	nl7c_urifree(so);
1458 
1459 	return (error);
1460 }
1461 
1462 /*
1463  * Called for a socket which is closing or when an application has
1464  * completed sending all the response data (i.e. for a persistent
1465  * connection called once for each completed application response).
1466  */
1467 
1468 void
1469 nl7c_close(struct sonode *so)
1470 {
1471 	uri_desc_t *uri = (uri_desc_t *)so->so_nl7c_uri;
1472 
1473 	if (uri == NULL) {
1474 		/*
1475 		 * No URI being processed so might be a listen()er
1476 		 * if so do any cleanup, else nothing more to do.
1477 		 */
1478 		if (so->so_state & SS_ACCEPTCONN) {
1479 			(void) nl7c_close_addr(so);
1480 		}
1481 		return;
1482 	}
1483 	so->so_nl7c_uri = NULL;
1484 	if (uri->hash != URI_TEMP) {
1485 		mutex_enter(&uri->proclock);
1486 		uri->proc = NULL;
1487 		if (CV_HAS_WAITERS(&uri->waiting)) {
1488 			cv_broadcast(&uri->waiting);
1489 		}
1490 		mutex_exit(&uri->proclock);
1491 		nl7c_uri_close++;
1492 	} else {
1493 		/* No proclock as uri exclusively owned by so */
1494 		uri->proc = NULL;
1495 		nl7c_uri_temp_close++;
1496 	}
1497 	REF_RELE(uri);
1498 	if (nl7c_uri_max > 0 && nl7c_uri_bytes > nl7c_uri_max) {
1499 		nl7c_uri_reclaim();
1500 	}
1501 }
1502 
1503 /*
1504  * The uri_segmap_t ref_t inactive function called on the last REF_RELE(),
1505  * release the segmap mapping. Note, the uri_segmap_t will be freed by
1506  * REF_RELE() on return.
1507  */
1508 
1509 void
1510 uri_segmap_inactive(uri_segmap_t *smp)
1511 {
1512 	if (!segmap_kpm) {
1513 		(void) segmap_fault(kas.a_hat, segkmap, smp->base,
1514 		    smp->len, F_SOFTUNLOCK, S_OTHER);
1515 	}
1516 	(void) segmap_release(segkmap, smp->base, SM_DONTNEED);
1517 	VN_RELE(smp->vp);
1518 }
1519 
1520 /*
1521  * The call-back for desballoc()ed mblk_t's, if a segmap mapped mblk_t
1522  * release the reference, one per desballoc() of a segmap page, if a rd_t
1523  * mapped mblk_t release the reference, one per desballoc() of a uri_desc_t,
1524  * last kmem free the uri_desb_t.
1525  */
1526 
1527 static void
1528 uri_desb_free(uri_desb_t *desb)
1529 {
1530 	if (desb->segmap != NULL) {
1531 		REF_RELE(desb->segmap);
1532 	}
1533 	REF_RELE(desb->uri);
1534 	kmem_cache_free(uri_desb_kmc, desb);
1535 }
1536 
1537 /*
1538  * Segmap map up to a page of a uri_rd_t file descriptor.
1539  */
1540 
1541 uri_segmap_t *
1542 uri_segmap_map(uri_rd_t *rdp, int bytes)
1543 {
1544 	uri_segmap_t	*segmap = kmem_cache_alloc(uri_segmap_kmc, KM_SLEEP);
1545 	int		len = MIN(rdp->sz, MAXBSIZE);
1546 
1547 	if (len > bytes)
1548 		len = bytes;
1549 
1550 	REF_INIT(segmap, 1, uri_segmap_inactive, uri_segmap_kmc);
1551 	segmap->len = len;
1552 	VN_HOLD(rdp->data.vnode);
1553 	segmap->vp = rdp->data.vnode;
1554 
1555 	segmap->base = segmap_getmapflt(segkmap, segmap->vp, rdp->off, len,
1556 	    segmap_kpm ? SM_FAULT : 0, S_READ);
1557 
1558 	if (segmap_fault(kas.a_hat, segkmap, segmap->base, len,
1559 	    F_SOFTLOCK, S_READ) != 0) {
1560 		REF_RELE(segmap);
1561 		return (NULL);
1562 	}
1563 	return (segmap);
1564 }
1565 
1566 /*
1567  * Chop up the kernel virtual memory area *data of size *sz bytes for
1568  * a maximum of *bytes bytes into an besballoc()ed mblk_t chain using
1569  * the given template uri_desb_t *temp of max_mblk bytes per.
1570  *
1571  * The values of *data, *sz, and *bytes are updated on return, the
1572  * mblk_t chain is returned.
1573  */
1574 
1575 static mblk_t *
1576 uri_desb_chop(
1577 	char 		**data,
1578 	size_t		*sz,
1579 	int 		*bytes,
1580 	uri_desb_t 	*temp,
1581 	int		max_mblk,
1582 	char		*eoh,
1583 	mblk_t		*persist
1584 )
1585 {
1586 	char		*ldata = *data;
1587 	size_t		lsz = *sz;
1588 	int		lbytes = bytes ? *bytes : lsz;
1589 	uri_desb_t	*desb;
1590 	mblk_t		*mp = NULL;
1591 	mblk_t		*nmp, *pmp = NULL;
1592 	int		msz;
1593 
1594 	if (lbytes == 0 && lsz == 0)
1595 		return (NULL);
1596 
1597 	while (lbytes > 0 && lsz > 0) {
1598 		msz = MIN(lbytes, max_mblk);
1599 		msz = MIN(msz, lsz);
1600 		if (persist && eoh >= ldata && eoh < &ldata[msz]) {
1601 			msz = (eoh - ldata);
1602 			pmp = persist;
1603 			persist = NULL;
1604 			if (msz == 0) {
1605 				nmp = pmp;
1606 				pmp = NULL;
1607 				goto zero;
1608 			}
1609 		}
1610 		desb = kmem_cache_alloc(uri_desb_kmc, KM_SLEEP);
1611 		REF_HOLD(temp->uri);
1612 		if (temp->segmap) {
1613 			REF_HOLD(temp->segmap);
1614 		}
1615 		bcopy(temp, desb, sizeof (*desb));
1616 		desb->frtn.free_arg = (caddr_t)desb;
1617 		nmp = desballoc((uchar_t *)ldata, msz, BPRI_HI, &desb->frtn);
1618 		if (nmp == NULL) {
1619 			if (temp->segmap) {
1620 				REF_RELE(temp->segmap);
1621 			}
1622 			REF_RELE(temp->uri);
1623 			if (mp != NULL) {
1624 				mp->b_next = NULL;
1625 				freemsg(mp);
1626 			}
1627 			if (persist != NULL) {
1628 				freeb(persist);
1629 			}
1630 			return (NULL);
1631 		}
1632 		nmp->b_wptr += msz;
1633 	zero:
1634 		if (mp != NULL) {
1635 			mp->b_next->b_cont = nmp;
1636 		} else {
1637 			mp = nmp;
1638 		}
1639 		if (pmp != NULL) {
1640 			nmp->b_cont = pmp;
1641 			nmp = pmp;
1642 			pmp = NULL;
1643 		}
1644 		mp->b_next = nmp;
1645 		ldata += msz;
1646 		lsz -= msz;
1647 		lbytes -= msz;
1648 	}
1649 	*data = ldata;
1650 	*sz = lsz;
1651 	if (bytes)
1652 		*bytes = lbytes;
1653 	return (mp);
1654 }
1655 
1656 /*
1657  * Experimential noqwait (i.e. no canput()/qwait() checks), just send
1658  * the entire mblk_t chain down without flow-control checks.
1659  */
1660 
1661 static int
1662 kstrwritempnoqwait(struct vnode *vp, mblk_t *mp)
1663 {
1664 	struct stdata *stp;
1665 	int error = 0;
1666 
1667 	ASSERT(vp->v_stream);
1668 	stp = vp->v_stream;
1669 
1670 	/* Fast check of flags before acquiring the lock */
1671 	if (stp->sd_flag & (STWRERR|STRHUP|STPLEX)) {
1672 		mutex_enter(&stp->sd_lock);
1673 		error = strgeterr(stp, STWRERR|STRHUP|STPLEX, 0);
1674 		mutex_exit(&stp->sd_lock);
1675 		if (error != 0) {
1676 			if (!(stp->sd_flag & STPLEX) &&
1677 			    (stp->sd_wput_opt & SW_SIGPIPE)) {
1678 				tsignal(curthread, SIGPIPE);
1679 				error = EPIPE;
1680 			}
1681 			return (error);
1682 		}
1683 	}
1684 	putnext(stp->sd_wrq, mp);
1685 	return (0);
1686 }
1687 
1688 /*
1689  * Send the URI uri_desc_t *uri response uri_rd_t *rdp out the socket_t *so.
1690  */
1691 
1692 static int
1693 uri_rd_response(struct sonode *so,
1694     uri_desc_t *uri,
1695     uri_rd_t *rdp,
1696     boolean_t first)
1697 {
1698 	vnode_t		*vp = SOTOV(so);
1699 	int		max_mblk = (int)((tcp_t *)so->so_priv)->tcp_mss;
1700 	int		wsz;
1701 	mblk_t		*mp, *wmp, *persist;
1702 	int		write_bytes;
1703 	uri_rd_t	rd;
1704 	uri_desb_t	desb;
1705 	uri_segmap_t	*segmap = NULL;
1706 	char		*segmap_data;
1707 	size_t		segmap_sz;
1708 	int		error;
1709 	int		fflg = ((so->so_state & SS_NDELAY) ? FNDELAY : 0) |
1710 			    ((so->so_state & SS_NONBLOCK) ? FNONBLOCK : 0);
1711 
1712 
1713 	/* Initialize template uri_desb_t */
1714 	desb.frtn.free_func = uri_desb_free;
1715 	desb.frtn.free_arg = NULL;
1716 	desb.uri = uri;
1717 
1718 	/* Get a local copy of the rd_t */
1719 	bcopy(rdp, &rd, sizeof (rd));
1720 	do {
1721 		if (first) {
1722 			/*
1723 			 * For first kstrwrite() enough data to get
1724 			 * things going, note non blocking version of
1725 			 * kstrwrite() will be used below.
1726 			 */
1727 			write_bytes = P2ROUNDUP((max_mblk * 4),
1728 			    MAXBSIZE * nl7c_file_prefetch);
1729 		} else {
1730 			if ((write_bytes = so->so_sndbuf) == 0)
1731 				write_bytes = vp->v_stream->sd_qn_maxpsz;
1732 			ASSERT(write_bytes > 0);
1733 			write_bytes = P2ROUNDUP(write_bytes, MAXBSIZE);
1734 		}
1735 		/*
1736 		 * Chop up to a write_bytes worth of data.
1737 		 */
1738 		wmp = NULL;
1739 		wsz = write_bytes;
1740 		do {
1741 			if (rd.sz == 0)
1742 				break;
1743 			if (rd.off == -1) {
1744 				if (uri->eoh >= rd.data.kmem &&
1745 				    uri->eoh < &rd.data.kmem[rd.sz]) {
1746 					persist = nl7c_http_persist(so);
1747 				} else {
1748 					persist = NULL;
1749 				}
1750 				desb.segmap = NULL;
1751 				mp = uri_desb_chop(&rd.data.kmem, &rd.sz,
1752 				    &wsz, &desb, max_mblk, uri->eoh, persist);
1753 				if (mp == NULL) {
1754 					error = ENOMEM;
1755 					goto invalidate;
1756 				}
1757 			} else {
1758 				if (segmap == NULL) {
1759 					segmap = uri_segmap_map(&rd,
1760 					    write_bytes);
1761 					if (segmap == NULL) {
1762 						error = ENOMEM;
1763 						goto invalidate;
1764 					}
1765 					desb.segmap = segmap;
1766 					segmap_data = segmap->base;
1767 					segmap_sz = segmap->len;
1768 				}
1769 				mp = uri_desb_chop(&segmap_data, &segmap_sz,
1770 				    &wsz, &desb, max_mblk, NULL, NULL);
1771 				if (mp == NULL) {
1772 					error = ENOMEM;
1773 					goto invalidate;
1774 				}
1775 				if (segmap_sz == 0) {
1776 					rd.sz -= segmap->len;
1777 					rd.off += segmap->len;
1778 					REF_RELE(segmap);
1779 					segmap = NULL;
1780 				}
1781 			}
1782 			if (wmp == NULL) {
1783 				wmp = mp;
1784 			} else {
1785 				wmp->b_next->b_cont = mp;
1786 				wmp->b_next = mp->b_next;
1787 				mp->b_next = NULL;
1788 			}
1789 		} while (wsz > 0 && rd.sz > 0);
1790 
1791 		wmp->b_next = NULL;
1792 		if (first) {
1793 			/* First kstrwrite(), use noqwait */
1794 			if ((error = kstrwritempnoqwait(vp, wmp)) != 0)
1795 				goto invalidate;
1796 			/*
1797 			 * For the rest of the kstrwrite()s use SO_SNDBUF
1798 			 * worth of data at a time, note these kstrwrite()s
1799 			 * may (will) block one or more times.
1800 			 */
1801 			first = B_FALSE;
1802 		} else {
1803 			if ((error = kstrwritemp(vp, wmp, fflg)) != 0) {
1804 				if (error == EAGAIN) {
1805 					nl7c_uri_rd_EAGAIN++;
1806 					if ((error =
1807 					    kstrwritempnoqwait(vp, wmp)) != 0)
1808 						goto invalidate;
1809 				} else
1810 					goto invalidate;
1811 			}
1812 		}
1813 	} while (rd.sz > 0);
1814 
1815 	return (0);
1816 
1817 invalidate:
1818 	if (segmap) {
1819 		REF_RELE(segmap);
1820 	}
1821 	if (wmp)
1822 		freemsg(wmp);
1823 
1824 	return (error);
1825 }
1826 
1827 /*
1828  * Send the URI uri_desc_t *uri response out the socket_t *so.
1829  */
1830 
1831 static int
1832 uri_response(struct sonode *so, uri_desc_t *uri)
1833 {
1834 	uri_rd_t	*rdp = &uri->response;
1835 	boolean_t	first = B_TRUE;
1836 	int		error;
1837 
1838 	while (rdp != NULL) {
1839 		error = uri_rd_response(so, uri, rdp, first);
1840 		if (error != 0) {
1841 			goto invalidate;
1842 		}
1843 		first = B_FALSE;
1844 		rdp = rdp->next;
1845 	}
1846 	return (0);
1847 
1848 invalidate:
1849 	uri_delete(uri);
1850 	return (error);
1851 }
1852 
1853 /*
1854  * The pchars[] array is indexed by a char to determine if it's a
1855  * valid URI path component chararcter where:
1856  *
1857  *    pchar       = unreserved | escaped |
1858  *                  ":" | "@" | "&" | "=" | "+" | "$" | ","
1859  *
1860  *    unreserved  = alphanum | mark
1861  *
1862  *    alphanum    = alpha | digit
1863  *
1864  *    alpha       = lowalpha | upalpha
1865  *
1866  *    lowalpha    = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" |
1867  *                  "i" | "j" | "k" | "l" | "m" | "n" | "o" | "p" |
1868  *                  "q" | "r" | "s" | "t" | "u" | "v" | "w" | "x" |
1869  *                  "y" | "z"
1870  *
1871  *    upalpha     = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" |
1872  *                  "I" | "J" | "K" | "L" | "M" | "N" | "O" | "P" |
1873  *                  "Q" | "R" | "S" | "T" | "U" | "V" | "W" | "X" |
1874  *                  "Y" | "Z"
1875  *
1876  *    digit       = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" |
1877  *                  "8" | "9"
1878  *
1879  *    mark        = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
1880  *
1881  *    escaped     = "%" hex hex
1882  *    hex         = digit | "A" | "B" | "C" | "D" | "E" | "F" |
1883  *                  "a" | "b" | "c" | "d" | "e" | "f"
1884  */
1885 
1886 static char pchars[] = {
1887     0, 0, 0, 0, 0, 0, 0, 0,	/* 0x00 - 0x07 */
1888     0, 0, 0, 0, 0, 0, 0, 0,	/* 0x08 - 0x0F */
1889     0, 0, 0, 0, 0, 0, 0, 0,	/* 0x10 - 0x17 */
1890     0, 0, 0, 0, 0, 0, 0, 0,	/* 0x18 - 0x1F */
1891     0, 1, 0, 0, 1, 1, 1, 1,	/* 0x20 - 0x27 */
1892     0, 0, 1, 1, 1, 1, 1, 1,	/* 0x28 - 0x2F */
1893     1, 1, 1, 1, 1, 1, 1, 1,	/* 0x30 - 0x37 */
1894     1, 1, 1, 0, 0, 1, 0, 0,	/* 0x38 - 0x3F */
1895     1, 1, 1, 1, 1, 1, 1, 1,	/* 0x40 - 0x47 */
1896     1, 1, 1, 1, 1, 1, 1, 1,	/* 0x48 - 0x4F */
1897     1, 1, 1, 1, 1, 1, 1, 1,	/* 0x50 - 0x57 */
1898     1, 1, 1, 0, 0, 0, 0, 1,	/* 0x58 - 0x5F */
1899     0, 1, 1, 1, 1, 1, 1, 1,	/* 0x60 - 0x67 */
1900     1, 1, 1, 1, 1, 1, 1, 1,	/* 0x68 - 0x6F */
1901     1, 1, 1, 1, 1, 1, 1, 1,	/* 0x70 - 0x77 */
1902     1, 1, 1, 0, 0, 0, 1, 0	/* 0x78 - 0x7F */
1903 };
1904 
1905 #define	PCHARS_MASK 0x7F
1906 
1907 /*
1908  * This is the main L7 request message parse, we are called each time
1909  * new data is availble for a socket, each time a single buffer of the
1910  * entire message to date is given.
1911  *
1912  * Here we parse the request looking for the URI, parse it, and if a
1913  * supported scheme call the scheme parser to commplete the parse of any
1914  * headers which may further qualify the identity of the requested object
1915  * then lookup it up in the URI hash.
1916  *
1917  * Return B_TRUE for more processing.
1918  *
1919  * Note, at this time the parser supports the generic message format as
1920  * specified in RFC 822 with potentional limitations as specified in RFC
1921  * 2616 for HTTP messages.
1922  *
1923  * Note, the caller supports an mblk_t chain, for now the parser(s)
1924  * require the complete header in a single mblk_t. This is the common
1925  * case and certainly for high performance environments, if at a future
1926  * date mblk_t chains are important the parse can be reved to process
1927  * mblk_t chains.
1928  */
1929 
1930 boolean_t
1931 nl7c_parse(struct sonode *so, boolean_t nonblocking, boolean_t *ret)
1932 {
1933 	char	*cp = (char *)so->so_nl7c_rcv_mp->b_rptr;
1934 	char	*ep = (char *)so->so_nl7c_rcv_mp->b_wptr;
1935 	char	*get = "GET ";
1936 	char	*post = "POST ";
1937 	char	c;
1938 	char	*uris;
1939 	uri_desc_t *uri = NULL;
1940 	uri_desc_t *ruri = NULL;
1941 	mblk_t	*reqmp;
1942 	uint32_t hv = 0;
1943 
1944 	if ((reqmp = dupb(so->so_nl7c_rcv_mp)) == NULL) {
1945 		nl7c_uri_pass_dupbfail++;
1946 		goto pass;
1947 	}
1948 	/*
1949 	 * Allocate and initialize minimumal state for the request
1950 	 * uri_desc_t, in the cache hit case this uri_desc_t will
1951 	 * be freed.
1952 	 */
1953 	uri = kmem_cache_alloc(nl7c_uri_kmc, KM_SLEEP);
1954 	REF_INIT(uri, 1, nl7c_uri_inactive, nl7c_uri_kmc);
1955 	uri->hash = NULL;
1956 	uri->tail = NULL;
1957 	uri->scheme = NULL;
1958 	uri->count = 0;
1959 	uri->reqmp = reqmp;
1960 
1961 	/*
1962 	 * Set request time to current time.
1963 	 */
1964 	so->so_nl7c_rtime = gethrestime_sec();
1965 
1966 	/*
1967 	 * Parse the Request-Line for the URI.
1968 	 *
1969 	 * For backwards HTTP version compatable reasons skip any leading
1970 	 * CRLF (or CR or LF) line terminator(s) preceding Request-Line.
1971 	 */
1972 	while (cp < ep && (*cp == '\r' || *cp == '\n')) {
1973 		cp++;
1974 	}
1975 	while (cp < ep && *get == *cp) {
1976 		get++;
1977 		cp++;
1978 	}
1979 	if (*get != 0) {
1980 		/* Note a "GET", check for "POST" */
1981 		while (cp < ep && *post == *cp) {
1982 			post++;
1983 			cp++;
1984 		}
1985 		if (*post != 0) {
1986 			if (cp == ep) {
1987 				nl7c_uri_more_get++;
1988 				goto more;
1989 			}
1990 			/* Not a "GET" or a "POST", just pass */
1991 			nl7c_uri_pass_method++;
1992 			goto pass;
1993 		}
1994 		/* "POST", don't cache but still may want to parse */
1995 		uri->hash = URI_TEMP;
1996 	}
1997 	/*
1998 	 * Skip over URI path char(s) and save start and past end pointers.
1999 	 */
2000 	uris = cp;
2001 	while (cp < ep && (c = *cp) != ' ' && c != '\r') {
2002 		if (c == '?') {
2003 			/* Don't cache but still may want to parse */
2004 			uri->hash = URI_TEMP;
2005 		}
2006 		CHASH(hv, c);
2007 		cp++;
2008 	}
2009 	if (c != '\r' && cp == ep) {
2010 		nl7c_uri_more_eol++;
2011 		goto more;
2012 	}
2013 	/*
2014 	 * Request-Line URI parsed, pass the rest of the request on
2015 	 * to the the http scheme parse.
2016 	 */
2017 	uri->path.cp = uris;
2018 	uri->path.ep = cp;
2019 	uri->hvalue = hv;
2020 	if (! nl7c_http_request(&cp, ep, uri, so) || cp == NULL) {
2021 		/*
2022 		 * Parse not successful or pass on request, the pointer
2023 		 * to the parse pointer "cp" is overloaded such that ! NULL
2024 		 * for more data and NULL for bad parse of request or pass.
2025 		 */
2026 		if (cp != NULL) {
2027 			nl7c_uri_more_http++;
2028 			goto more;
2029 		}
2030 		nl7c_uri_pass_http++;
2031 		goto pass;
2032 	}
2033 	if (uri->nocache) {
2034 		uri->hash = URI_TEMP;
2035 		(void) uri_lookup(uri, B_FALSE, nonblocking);
2036 	} else if (uri->hash == URI_TEMP) {
2037 		uri->nocache = B_TRUE;
2038 		(void) uri_lookup(uri, B_FALSE, nonblocking);
2039 	}
2040 
2041 	if (uri->hash == URI_TEMP) {
2042 		if (so->so_nl7c_flags & NL7C_SOPERSIST) {
2043 			/* Temporary URI so skip hash processing */
2044 			nl7c_uri_request++;
2045 			nl7c_uri_temp++;
2046 			goto temp;
2047 		}
2048 		/* Not persistent so not interested in the response */
2049 		nl7c_uri_pass_temp++;
2050 		goto pass;
2051 	}
2052 	/*
2053 	 * Check the URI hash for a cached response, save the request
2054 	 * uri in case we need it below.
2055 	 */
2056 	ruri = uri;
2057 	if ((uri = uri_lookup(uri, B_TRUE, nonblocking)) == NULL) {
2058 		/*
2059 		 * Failed to lookup due to nonblocking wait required,
2060 		 * interrupted cv_wait_sig(), KM_NOSLEEP memory alloc
2061 		 * failure, ... Just pass on this request.
2062 		 */
2063 		nl7c_uri_pass_addfail++;
2064 		goto pass;
2065 	}
2066 	nl7c_uri_request++;
2067 	if (uri->response.sz > 0) {
2068 		/*
2069 		 * We have the response cached, update recv mblk rptr
2070 		 * to reflect the data consumed in parse.
2071 		 */
2072 		mblk_t	*mp = so->so_nl7c_rcv_mp;
2073 
2074 		if (cp == (char *)mp->b_wptr) {
2075 			so->so_nl7c_rcv_mp = mp->b_cont;
2076 			mp->b_cont = NULL;
2077 			freeb(mp);
2078 		} else {
2079 			mp->b_rptr = (unsigned char *)cp;
2080 		}
2081 		nl7c_uri_hit++;
2082 		/* If conditional request check for substitute response */
2083 		if (ruri->conditional) {
2084 			uri = nl7c_http_cond(ruri, uri);
2085 		}
2086 		/* If logging enabled log request */
2087 		if (nl7c_logd_enabled) {
2088 			ipaddr_t faddr;
2089 
2090 			if (so->so_family == AF_INET) {
2091 				/* Only support IPv4 addrs */
2092 				faddr = ((struct sockaddr_in *)
2093 				    so->so_faddr_sa) ->sin_addr.s_addr;
2094 			} else {
2095 				faddr = 0;
2096 			}
2097 			/* XXX need to pass response type, e.g. 200, 304 */
2098 			nl7c_logd_log(ruri, uri, so->so_nl7c_rtime, faddr);
2099 		}
2100 		/*
2101 		 * Release reference on request URI, send the response out
2102 		 * the socket, release reference on response uri, set the
2103 		 * *ret value to B_TRUE to indicate request was consumed
2104 		 * then return B_FALSE to indcate no more data needed.
2105 		 */
2106 		REF_RELE(ruri);
2107 		(void) uri_response(so, uri);
2108 		REF_RELE(uri);
2109 		*ret = B_TRUE;
2110 		return (B_FALSE);
2111 	}
2112 	/*
2113 	 * Miss the cache, the request URI is in the cache waiting for
2114 	 * application write-side data to fill it.
2115 	 */
2116 	nl7c_uri_miss++;
2117 temp:
2118 	/*
2119 	 * A miss or temp URI for which response data is needed, link
2120 	 * uri to so and so to uri, set WAITWRITE in the so such that
2121 	 * read-side processing is suspended (so the next read() gets
2122 	 * the request data) until a write() is processed by NL7C.
2123 	 *
2124 	 * Note, so->so_nl7c_uri now owns the REF_INIT() ref.
2125 	 */
2126 	uri->proc = so;
2127 	so->so_nl7c_uri = uri;
2128 	so->so_nl7c_flags |= NL7C_WAITWRITE;
2129 	*ret = B_FALSE;
2130 	return (B_FALSE);
2131 
2132 more:
2133 	/* More data is needed, note fragmented recv not supported */
2134 	nl7c_uri_more++;
2135 
2136 pass:
2137 	/* Pass on this request */
2138 	nl7c_uri_pass++;
2139 	nl7c_uri_request++;
2140 	if (ruri != NULL) {
2141 		REF_RELE(ruri);
2142 	}
2143 	if (uri) {
2144 		REF_RELE(uri);
2145 	}
2146 	so->so_nl7c_flags = 0;
2147 	*ret = B_FALSE;
2148 	return (B_FALSE);
2149 }
2150