1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/strsubr.h> 30 #include <sys/strsun.h> 31 #include <sys/param.h> 32 #include <sys/sysmacros.h> 33 #include <vm/seg_map.h> 34 #include <vm/seg_kpm.h> 35 #include <sys/condvar_impl.h> 36 #include <sys/promif.h> 37 #include <sys/sendfile.h> 38 #include <fs/sockfs/nl7c.h> 39 #include <fs/sockfs/nl7curi.h> 40 41 /* 42 * Some externs: 43 */ 44 45 extern boolean_t nl7c_logd_enabled; 46 extern void nl7c_logd_log(uri_desc_t *, uri_desc_t *, time_t, ipaddr_t); 47 boolean_t nl7c_close_addr(struct sonode *); 48 49 /* 50 * Various global tuneables: 51 */ 52 53 clock_t nl7c_uri_ttl = -1; /* TTL in seconds (-1 == infinite) */ 54 55 boolean_t nl7c_use_kmem = B_FALSE; /* Force use of kmem (no segmap) */ 56 57 uint64_t nl7c_file_prefetch = 1; /* File cache prefetch pages */ 58 59 uint64_t nl7c_uri_max = 0; /* Maximum bytes (0 == infinite) */ 60 uint64_t nl7c_uri_bytes = 0; /* Bytes of kmem used by URIs */ 61 62 /* 63 * Counters that need to move to kstat and/or be removed: 64 */ 65 66 volatile uint64_t nl7c_uri_request = 0; 67 volatile uint64_t nl7c_uri_hit = 0; 68 volatile uint64_t nl7c_uri_pass = 0; 69 volatile uint64_t nl7c_uri_miss = 0; 70 volatile uint64_t nl7c_uri_temp = 0; 71 volatile uint64_t nl7c_uri_more = 0; 72 volatile uint64_t nl7c_uri_data = 0; 73 volatile uint64_t nl7c_uri_sendfilev = 0; 74 volatile uint64_t nl7c_uri_reclaim_calls = 0; 75 volatile uint64_t nl7c_uri_reclaim_cnt = 0; 76 volatile uint64_t nl7c_uri_pass_urifail = 0; 77 volatile uint64_t nl7c_uri_pass_dupbfail = 0; 78 volatile uint64_t nl7c_uri_more_get = 0; 79 volatile uint64_t nl7c_uri_pass_getnot = 0; 80 volatile uint64_t nl7c_uri_pass_option = 0; 81 volatile uint64_t nl7c_uri_more_eol = 0; 82 volatile uint64_t nl7c_uri_more_http = 0; 83 volatile uint64_t nl7c_uri_pass_http = 0; 84 volatile uint64_t nl7c_uri_pass_addfail = 0; 85 volatile uint64_t nl7c_uri_pass_temp = 0; 86 volatile uint64_t nl7c_uri_expire = 0; 87 volatile uint64_t nl7c_uri_NULL1 = 0; 88 volatile uint64_t nl7c_uri_NULL2 = 0; 89 volatile uint64_t nl7c_uri_close = 0; 90 volatile uint64_t nl7c_uri_temp_close = 0; 91 volatile uint64_t nl7c_uri_free = 0; 92 volatile uint64_t nl7c_uri_temp_free = 0; 93 94 /* 95 * Various kmem_cache_t's: 96 */ 97 98 static kmem_cache_t *uri_kmc; 99 static kmem_cache_t *uri_rd_kmc; 100 static kmem_cache_t *uri_desb_kmc; 101 static kmem_cache_t *uri_segmap_kmc; 102 103 static void uri_kmc_reclaim(void *); 104 105 static void nl7c_uri_reclaim(void); 106 107 /* 108 * The URI hash is a dynamically sized A/B bucket hash, when the current 109 * hash's average bucket chain length exceeds URI_HASH_AVRG a new hash of 110 * the next P2Ps[] size is created. 111 * 112 * All lookups are done in the current hash then the new hash (if any), 113 * if there is a new has then when a current hash bucket chain is examined 114 * any uri_desc_t members will be migrated to the new hash and when the 115 * last uri_desc_t has been migrated then the new hash will become the 116 * current and the previous current hash will be freed leaving a single 117 * hash. 118 * 119 * uri_hash_t - hash bucket (chain) type, contained in the uri_hash_ab[] 120 * and can be accessed only after aquiring the uri_hash_access lock (for 121 * READER or WRITER) then acquiring the lock uri_hash_t.lock, the uri_hash_t 122 * and all linked uri_desc_t.hash members are protected. Note, a REF_HOLD() 123 * is placed on all uri_desc_t uri_hash_t list members. 124 * 125 * uri_hash_access - rwlock for all uri_hash_* variables, READER for read 126 * access and WRITER for write access. Note, WRITER is only required for 127 * hash geometry changes. 128 * 129 * uri_hash_which - which uri_hash_ab[] is the current hash. 130 * 131 * uri_hash_n[] - the P2Ps[] index for each uri_hash_ab[]. 132 * 133 * uri_hash_sz[] - the size for each uri_hash_ab[]. 134 * 135 * uri_hash_cnt[] - the total uri_desc_t members for each uri_hash_ab[]. 136 * 137 * uri_hash_overflow[] - the uri_hash_cnt[] for each uri_hash_ab[] when 138 * a new uri_hash_ab[] needs to be created. 139 * 140 * uri_hash_ab[] - the uri_hash_t entries. 141 * 142 * uri_hash_lru[] - the last uri_hash_ab[] walked for lru reclaim. 143 */ 144 145 typedef struct uri_hash_s { 146 struct uri_desc_s *list; /* List of uri_t(s) */ 147 kmutex_t lock; 148 } uri_hash_t; 149 150 #define URI_HASH_AVRG 5 /* Desired average hash chain length */ 151 #define URI_HASH_N_INIT 9 /* P2Ps[] initial index */ 152 153 static krwlock_t uri_hash_access; 154 static uint32_t uri_hash_which = 0; 155 static uint32_t uri_hash_n[2] = {URI_HASH_N_INIT, 0}; 156 static uint32_t uri_hash_sz[2] = {0, 0}; 157 static uint32_t uri_hash_cnt[2] = {0, 0}; 158 static uint32_t uri_hash_overflow[2] = {0, 0}; 159 static uri_hash_t *uri_hash_ab[2] = {NULL, NULL}; 160 static uri_hash_t *uri_hash_lru[2] = {NULL, NULL}; 161 162 /* 163 * Primes for N of 3 - 24 where P is first prime less then (2^(N-1))+(2^(N-2)) 164 * these primes have been foud to be useful for prime sized hash tables. 165 */ 166 167 static const int P2Ps[] = { 168 0, 0, 0, 5, 11, 23, 47, 89, 191, 383, 761, 1531, 3067, 169 6143, 12281, 24571, 49139, 98299, 196597, 393209, 170 786431, 1572853, 3145721, 6291449, 12582893, 0}; 171 172 /* 173 * Hash macros: 174 * 175 * H2A(char *cp, char *ep, char c) - convert the escaped octet (ASCII) 176 * hex multichar of the format "%HH" pointeded to by *cp to a char and 177 * return in c, *ep points to past end of (char *), on return *cp will 178 * point to the last char consumed. 179 * 180 * URI_HASH(unsigned hix, char *cp, char *ep) - hash the char(s) from 181 * *cp to *ep to the unsigned hix, cp nor ep are modified. 182 * 183 * URI_HASH_STR(unsigned hix, str_t *sp) - hash the str_t *sp. 184 * 185 * URI_HASH_IX(unsigned hix, int hsz) - convert the hash value hix to 186 * a hash index 0 - hsz. 187 * 188 * URI_HASH_DESC(uri_desc_t *uri, int which, unsigned hix) - hash the 189 * uri_desc_t *uri's str_t path member, 190 * 191 * URI_HASH_MIGRATE(from, hp, to) - migrate the uri_hash_t *hp list 192 * uri_desc_t members from hash from to hash to. 193 * 194 * URI_HASH_UNLINK(cur, new, hp, puri, uri) - unlink the uri_desc_t 195 * *uri which is a member of the uri_hash_t *hp list with a previous 196 * list member of *puri for the uri_hash_ab[] cur. After unlinking 197 * check for cur hash empty, if so make new cur. Note, as this macro 198 * can change a hash chain it needs to be run under hash_access as 199 * RW_WRITER, futher as it can change the new hash to cur any access 200 * to the hash state must be done after either dropping locks and 201 * starting over or making sure the global state is consistent after 202 * as before. 203 */ 204 205 #define H2A(cp, ep, c) { \ 206 int _h = 2; \ 207 int _n = 0; \ 208 char _hc; \ 209 \ 210 while (_h > 0 && ++(cp) < (ep)) { \ 211 if (_h == 1) \ 212 _n *= 0x10; \ 213 _hc = *(cp); \ 214 if (_hc >= '0' && _hc <= '9') \ 215 _n += _hc - '0'; \ 216 else if (_hc >= 'a' || _hc <= 'f') \ 217 _n += _hc - 'W'; \ 218 else if (_hc >= 'A' || _hc <= 'F') \ 219 _n += _hc - '7'; \ 220 _h--; \ 221 } \ 222 (c) = _n; \ 223 } 224 225 #define URI_HASH(hv, cp, ep) { \ 226 char *_s = (cp); \ 227 char _c; \ 228 \ 229 while (_s < (ep)) { \ 230 if ((_c = *_s) == '%') { \ 231 H2A(_s, (ep), _c); \ 232 } \ 233 (hv) = ((hv) << 5) + (hv) + _c; \ 234 (hv) &= 0x7FFFFFFF; \ 235 _s++; \ 236 } \ 237 } 238 239 #define URI_HASH_STR(hix, sp) URI_HASH(hix, (sp)->cp, (sp)->ep) 240 241 #define URI_HASH_IX(hix, hsz) (hix) = (hix) % (hsz) 242 243 #define URI_HASH_DESC(uri, which, hix) { \ 244 (hix) = 0; \ 245 URI_HASH_STR((hix), &(uri)->path); \ 246 if ((uri)->auth.cp != NULL) { \ 247 URI_HASH_STR((hix), &(uri)->auth); \ 248 } \ 249 URI_HASH_IX((hix), uri_hash_sz[(which)]); \ 250 } 251 252 #define URI_HASH_MIGRATE(from, hp, to) { \ 253 uri_desc_t *_nuri; \ 254 uint32_t _nhix; \ 255 uri_hash_t *_nhp; \ 256 \ 257 mutex_enter(&(hp)->lock); \ 258 while ((_nuri = (hp)->list) != NULL) { \ 259 (hp)->list = _nuri->hash; \ 260 atomic_add_32(&uri_hash_cnt[(from)], -1); \ 261 atomic_add_32(&uri_hash_cnt[(to)], 1); \ 262 URI_HASH_DESC(_nuri, (to), _nhix); \ 263 _nhp = &uri_hash_ab[(to)][_nhix]; \ 264 mutex_enter(&_nhp->lock); \ 265 _nuri->hash = _nhp->list; \ 266 _nhp->list = _nuri; \ 267 _nuri->hit = 0; \ 268 mutex_exit(&_nhp->lock); \ 269 } \ 270 mutex_exit(&(hp)->lock); \ 271 } 272 273 #define URI_HASH_UNLINK(cur, new, hp, puri, uri) { \ 274 if ((puri) != NULL) { \ 275 (puri)->hash = (uri)->hash; \ 276 } else { \ 277 (hp)->list = (uri)->hash; \ 278 } \ 279 if (atomic_add_32_nv(&uri_hash_cnt[(cur)], -1) == 0 && \ 280 uri_hash_ab[(new)] != NULL) { \ 281 kmem_free(uri_hash_ab[cur], \ 282 sizeof (uri_hash_t) * uri_hash_sz[cur]); \ 283 uri_hash_ab[(cur)] = NULL; \ 284 uri_hash_lru[(cur)] = NULL; \ 285 uri_hash_which = (new); \ 286 } else { \ 287 uri_hash_lru[(cur)] = (hp); \ 288 } \ 289 } 290 291 #define URI_RD_ADD(uri, rdp, size, offset) { \ 292 if ((uri)->tail == NULL) { \ 293 (rdp) = &(uri)->response; \ 294 } else { \ 295 (rdp) = kmem_cache_alloc(uri_rd_kmc, KM_SLEEP); \ 296 (uri)->tail->next = (rdp); \ 297 } \ 298 (rdp)->sz = size; \ 299 (rdp)->off = offset; \ 300 (rdp)->next = NULL; \ 301 (uri)->tail = rdp; \ 302 (uri)->count += size; \ 303 } 304 305 void 306 nl7c_uri_init(void) 307 { 308 uint32_t cur = uri_hash_which; 309 310 rw_init(&uri_hash_access, NULL, RW_DEFAULT, NULL); 311 312 uri_hash_sz[cur] = P2Ps[URI_HASH_N_INIT]; 313 uri_hash_overflow[cur] = P2Ps[URI_HASH_N_INIT] * URI_HASH_AVRG; 314 uri_hash_ab[cur] = kmem_zalloc(sizeof (uri_hash_t) * uri_hash_sz[cur], 315 KM_SLEEP); 316 uri_hash_lru[cur] = uri_hash_ab[cur]; 317 318 uri_kmc = kmem_cache_create("NL7C_uri_kmc", sizeof (uri_desc_t), 0, 319 NULL, NULL, uri_kmc_reclaim, NULL, NULL, 0); 320 321 uri_rd_kmc = kmem_cache_create("NL7C_uri_rd_kmc", sizeof (uri_rd_t), 0, 322 NULL, NULL, NULL, NULL, NULL, 0); 323 324 uri_desb_kmc = kmem_cache_create("NL7C_uri_desb_kmc", 325 sizeof (uri_desb_t), 0, NULL, NULL, NULL, NULL, NULL, 0); 326 327 uri_segmap_kmc = kmem_cache_create("NL7C_uri_segmap_kmc", 328 sizeof (uri_segmap_t), 0, NULL, NULL, NULL, NULL, NULL, 0); 329 330 nl7c_http_init(); 331 } 332 333 /* 334 * The uri_desc_t ref_t inactive function called on the last REF_RELE(), 335 * free all resources contained in the uri_desc_t. Note, the uri_desc_t 336 * will be freed by REF_RELE() on return. 337 */ 338 339 static void 340 uri_inactive(uri_desc_t *uri) 341 { 342 int64_t bytes = 0; 343 344 if (uri->tail) { 345 uri_rd_t *rdp = &uri->response; 346 uri_rd_t *free = NULL; 347 348 while (rdp) { 349 if (rdp->off == -1) { 350 bytes += rdp->sz; 351 kmem_free(rdp->data.kmem, rdp->sz); 352 } else { 353 VN_RELE(rdp->data.vnode); 354 } 355 rdp = rdp->next; 356 if (free != NULL) { 357 kmem_cache_free(uri_rd_kmc, free); 358 } 359 free = rdp; 360 } 361 } 362 if (bytes) { 363 atomic_add_64(&nl7c_uri_bytes, -bytes); 364 } 365 if (uri->scheme != NULL) { 366 nl7c_http_free(uri->scheme); 367 } 368 if (uri->reqmp) { 369 freeb(uri->reqmp); 370 } 371 } 372 373 /* 374 * The reclaim is called by the kmem subsystem when kmem is running 375 * low. More work is needed to determine the best reclaim policy, for 376 * now we just manipulate the nl7c_uri_max global maximum bytes threshold 377 * value using a simple arithmetic backoff of the value every time this 378 * function is called then call uri_reclaim() to enforce it. 379 * 380 * Note, this value remains in place and enforced for all subsequent 381 * URI request/response processing. 382 * 383 * Note, nl7c_uri_max is currently initialized to 0 or infinite such that 384 * the first call here set it to the current uri_bytes value then backoff 385 * from there. 386 * 387 * XXX how do we determine when to increase nl7c_uri_max ??? 388 */ 389 390 /*ARGSUSED*/ 391 static void 392 uri_kmc_reclaim(void *arg) 393 { 394 uint64_t new_max; 395 396 if ((new_max = nl7c_uri_max) == 0) { 397 /* Currently infinite, initialize to current bytes used */ 398 nl7c_uri_max = nl7c_uri_bytes; 399 new_max = nl7c_uri_bytes; 400 } 401 if (new_max > 1) { 402 /* Lower max_bytes to 93% of current value */ 403 new_max >>= 1; /* 50% */ 404 new_max += (new_max >> 1); /* 75% */ 405 new_max += (new_max >> 2); /* 93% */ 406 if (new_max < nl7c_uri_max) 407 nl7c_uri_max = new_max; 408 else 409 nl7c_uri_max = 1; 410 } 411 nl7c_uri_reclaim(); 412 } 413 414 /* 415 * Delete a uri_desc_t from the URI hash. 416 */ 417 418 static void 419 uri_delete(uri_desc_t *del) 420 { 421 uint32_t hash = 0; 422 uint32_t hix; 423 uri_hash_t *hp; 424 uri_desc_t *uri; 425 uri_desc_t *puri; 426 uint32_t cur; 427 uint32_t new; 428 429 URI_HASH_STR(hash, &(del->path)); 430 if (del->auth.cp != NULL) { 431 /* Calculate hash for request authority */ 432 URI_HASH(hash, del->auth.cp, del->auth.ep); 433 } 434 rw_enter(&uri_hash_access, RW_WRITER); 435 cur = uri_hash_which; 436 new = cur ? 0 : 1; 437 next: 438 puri = NULL; 439 hix = hash; 440 URI_HASH_IX(hix, uri_hash_sz[cur]); 441 hp = &uri_hash_ab[cur][hix]; 442 for (uri = hp->list; uri != NULL; uri = uri->hash) { 443 if (uri != del) { 444 puri = uri; 445 continue; 446 } 447 /* 448 * Found the URI, unlink from the hash chain, 449 * drop locks, ref release it. 450 */ 451 URI_HASH_UNLINK(cur, new, hp, puri, uri); 452 rw_exit(&uri_hash_access); 453 REF_RELE(uri); 454 return; 455 } 456 if (cur != new && uri_hash_ab[new] != NULL) { 457 /* 458 * Not found in current hash and have a new hash so 459 * check the new hash next. 460 */ 461 cur = new; 462 goto next; 463 } 464 rw_exit(&uri_hash_access); 465 } 466 467 /* 468 * Add a uri_desc_t to the URI hash. 469 */ 470 471 static void 472 uri_add(uri_desc_t *uri, krw_t rwlock, boolean_t nonblocking) 473 { 474 uint32_t hix; 475 uri_hash_t *hp; 476 uint32_t cur = uri_hash_which; 477 uint32_t new = cur ? 0 : 1; 478 479 /* 480 * Caller of uri_add() must hold the uri_hash_access rwlock. 481 */ 482 ASSERT((rwlock == RW_READER && RW_READ_HELD(&uri_hash_access)) || 483 (rwlock == RW_WRITER && RW_WRITE_HELD(&uri_hash_access))); 484 /* 485 * uri_add() always succeeds so add a hash ref to the URI now. 486 */ 487 REF_HOLD(uri); 488 again: 489 URI_HASH_DESC(uri, cur, hix); 490 if (uri_hash_ab[new] == NULL && 491 uri_hash_cnt[cur] < uri_hash_overflow[cur]) { 492 /* 493 * Easy case, no new hash and current hasn't overflowed, 494 * add URI to current hash and return. 495 * 496 * Note, the check for uri_hash_cnt[] above aren't done 497 * atomictally, i.e. multiple threads can be in this code 498 * as RW_READER and update the cnt[], this isn't a problem 499 * as the check is only advisory. 500 */ 501 fast: 502 atomic_add_32(&uri_hash_cnt[cur], 1); 503 hp = &uri_hash_ab[cur][hix]; 504 mutex_enter(&hp->lock); 505 uri->hash = hp->list; 506 hp->list = uri; 507 mutex_exit(&hp->lock); 508 rw_exit(&uri_hash_access); 509 return; 510 } 511 if (uri_hash_ab[new] == NULL) { 512 /* 513 * Need a new a or b hash, if not already RW_WRITER 514 * try to upgrade our lock to writer. 515 */ 516 if (rwlock != RW_WRITER && ! rw_tryupgrade(&uri_hash_access)) { 517 /* 518 * Upgrade failed, we can't simple exit and reenter 519 * the lock as after the exit and before the reenter 520 * the whole world can change so just wait for writer 521 * then do everything again. 522 */ 523 if (nonblocking) { 524 /* 525 * Can't block, use fast-path above. 526 * 527 * XXX should have a background thread to 528 * handle new ab[] in this case so as to 529 * not overflow the cur hash to much. 530 */ 531 goto fast; 532 } 533 rw_exit(&uri_hash_access); 534 rwlock = RW_WRITER; 535 rw_enter(&uri_hash_access, rwlock); 536 cur = uri_hash_which; 537 new = cur ? 0 : 1; 538 goto again; 539 } 540 rwlock = RW_WRITER; 541 if (uri_hash_ab[new] == NULL) { 542 /* 543 * Still need a new hash, allocate and initialize 544 * the new hash. 545 */ 546 uri_hash_n[new] = uri_hash_n[cur] + 1; 547 if (uri_hash_n[new] == 0) { 548 /* 549 * No larger P2Ps[] value so use current, 550 * i.e. 2 of the largest are better than 1 ? 551 */ 552 uri_hash_n[new] = uri_hash_n[cur]; 553 cmn_err(CE_NOTE, "NL7C: hash index overflow"); 554 } 555 uri_hash_sz[new] = P2Ps[uri_hash_n[new]]; 556 ASSERT(uri_hash_cnt[new] == 0); 557 uri_hash_overflow[new] = uri_hash_sz[new] * 558 URI_HASH_AVRG; 559 uri_hash_ab[new] = kmem_zalloc(sizeof (uri_hash_t) * 560 uri_hash_sz[new], nonblocking ? KM_NOSLEEP : 561 KM_SLEEP); 562 if (uri_hash_ab[new] == NULL) { 563 /* 564 * Alloc failed, use fast-path above. 565 * 566 * XXX should have a background thread to 567 * handle new ab[] in this case so as to 568 * not overflow the cur hash to much. 569 */ 570 goto fast; 571 } 572 uri_hash_lru[new] = uri_hash_ab[new]; 573 } 574 } 575 /* 576 * Hashed against current hash so migrate any current hash chain 577 * members, if any. 578 * 579 * Note, the hash chain list can be checked for a non empty list 580 * outside of the hash chain list lock as the hash chain struct 581 * can't be destroyed while in the uri_hash_access rwlock, worst 582 * case is that a non empty list is found and after acquiring the 583 * lock another thread beats us to it (i.e. migrated the list). 584 */ 585 hp = &uri_hash_ab[cur][hix]; 586 if (hp->list != NULL) { 587 URI_HASH_MIGRATE(cur, hp, new); 588 } 589 /* 590 * If new hash has overflowed before current hash has been 591 * completely migrated then walk all current hash chains and 592 * migrate list members now. 593 */ 594 if (atomic_add_32_nv(&uri_hash_cnt[new], 1) >= uri_hash_overflow[new]) { 595 for (hix = 0; hix < uri_hash_sz[cur]; hix++) { 596 hp = &uri_hash_ab[cur][hix]; 597 if (hp->list != NULL) { 598 URI_HASH_MIGRATE(cur, hp, new); 599 } 600 } 601 } 602 /* 603 * Add URI to new hash. 604 */ 605 URI_HASH_DESC(uri, new, hix); 606 hp = &uri_hash_ab[new][hix]; 607 mutex_enter(&hp->lock); 608 uri->hash = hp->list; 609 hp->list = uri; 610 mutex_exit(&hp->lock); 611 /* 612 * Last, check to see if last cur hash chain has been 613 * migrated, if so free cur hash and make new hash cur. 614 */ 615 if (uri_hash_cnt[cur] == 0) { 616 /* 617 * If we don't already hold the uri_hash_access rwlock for 618 * RW_WRITE try to upgrade to RW_WRITE and if successful 619 * check again and to see if still need to do the free. 620 */ 621 if ((rwlock == RW_WRITER || rw_tryupgrade(&uri_hash_access)) && 622 uri_hash_cnt[cur] == 0 && uri_hash_ab[new] != 0) { 623 kmem_free(uri_hash_ab[cur], 624 sizeof (uri_hash_t) * uri_hash_sz[cur]); 625 uri_hash_ab[cur] = NULL; 626 uri_hash_lru[cur] = NULL; 627 uri_hash_which = new; 628 } 629 } 630 rw_exit(&uri_hash_access); 631 } 632 633 /* 634 * Lookup a uri_desc_t in the URI hash, if found free the request uri_desc_t 635 * and return the found uri_desc_t with a REF_HOLD() placed on it. Else, use 636 * the request URI to create a new hash entry. 637 */ 638 639 static uri_desc_t * 640 uri_lookup(uri_desc_t *ruri, boolean_t add, boolean_t nonblocking) 641 { 642 uint32_t hash = 0; 643 uint32_t hix; 644 uri_hash_t *hp; 645 uri_desc_t *uri; 646 uri_desc_t *puri; 647 uint32_t cur; 648 uint32_t new; 649 char *rcp = ruri->path.cp; 650 char *rep = ruri->path.ep; 651 652 /* Calculate hash for request path */ 653 URI_HASH(hash, rcp, rep); 654 if (ruri->auth.cp != NULL) { 655 /* Calculate hash for request authority */ 656 URI_HASH(hash, ruri->auth.cp, ruri->auth.ep); 657 } 658 again: 659 rw_enter(&uri_hash_access, RW_READER); 660 cur = uri_hash_which; 661 new = cur ? 0 : 1; 662 nexthash: 663 puri = NULL; 664 hix = hash; 665 URI_HASH_IX(hix, uri_hash_sz[cur]); 666 hp = &uri_hash_ab[cur][hix]; 667 mutex_enter(&hp->lock); 668 for (uri = hp->list; uri != NULL; uri = uri->hash) { 669 char *ap = uri->path.cp; 670 char *bp = rcp; 671 char a, b; 672 673 /* Compare paths */ 674 while (bp < rep && ap < uri->path.ep) { 675 if ((a = *ap) == '%') { 676 /* Escaped hex multichar, convert it */ 677 H2A(ap, uri->path.ep, a); 678 } 679 if ((b = *bp) == '%') { 680 /* Escaped hex multichar, convert it */ 681 H2A(bp, rep, b); 682 } 683 if (a != b) { 684 /* Char's don't match */ 685 goto nexturi; 686 } 687 ap++; 688 bp++; 689 } 690 if (bp != rep || ap != uri->path.ep) { 691 /* Not same length */ 692 goto nexturi; 693 } 694 ap = uri->auth.cp; 695 bp = ruri->auth.cp; 696 if (ap != NULL) { 697 if (bp == NULL) { 698 /* URI has auth request URI doesn't */ 699 goto nexturi; 700 } 701 while (bp < ruri->auth.ep && ap < uri->auth.ep) { 702 if ((a = *ap) == '%') { 703 /* Escaped hex multichar, convert it */ 704 H2A(ap, uri->path.ep, a); 705 } 706 if ((b = *bp) == '%') { 707 /* Escaped hex multichar, convert it */ 708 H2A(bp, rep, b); 709 } 710 if (a != b) { 711 /* Char's don't match */ 712 goto nexturi; 713 } 714 ap++; 715 bp++; 716 } 717 if (bp != ruri->auth.ep || ap != uri->auth.ep) { 718 /* Not same length */ 719 goto nexturi; 720 } 721 } else if (bp != NULL) { 722 /* URI doesn't have auth and request URI does */ 723 goto nexturi; 724 } 725 if (uri->scheme != NULL) { 726 if (ruri->scheme == NULL) { 727 /* 728 * URI has scheme private qualifiers, 729 * request URI doesn't. 730 */ 731 goto nexturi; 732 } 733 if (! nl7c_http_cmp(uri->scheme, ruri->scheme)) { 734 /* No match */ 735 goto nexturi; 736 } 737 } else if (ruri->scheme != NULL) { 738 /* 739 * URI doesn't have scheme private qualifiers, 740 * request URI does. 741 */ 742 goto nexturi; 743 } 744 /* 745 * Have a match, check for expire or request no cache. 746 */ 747 if (uri->expire >= 0 && uri->expire <= lbolt || ruri->nocache) { 748 /* 749 * URI has expired or request specified to not use 750 * the cached version, unlink the URI from the hash 751 * chain, release all locks, release the hash ref 752 * on the URI, and last look it up again. 753 */ 754 if (puri != NULL) { 755 puri->hash = uri->hash; 756 } else { 757 hp->list = uri->hash; 758 } 759 mutex_exit(&hp->lock); 760 atomic_add_32(&uri_hash_cnt[cur], -1); 761 rw_exit(&uri_hash_access); 762 nl7c_uri_expire++; 763 REF_RELE(uri); 764 goto again; 765 } 766 /* 767 * Ready URI for return, put a reference hold on the URI, 768 * if this URI is currently being processed (i.e. filled) 769 * then wait for the processing to be completed first, free 770 * up the request URI and return the matched URI. 771 */ 772 REF_HOLD(uri); 773 mutex_enter(&uri->proclock); 774 if (uri->proc != NULL) { 775 /* The URI is being processed, wait for completion */ 776 mutex_exit(&hp->lock); 777 rw_exit(&uri_hash_access); 778 if (! nonblocking && 779 cv_wait_sig(&uri->waiting, &uri->proclock)) { 780 /* 781 * URI has been processed but things may 782 * have changed while we were away so do 783 * most everything again. 784 */ 785 mutex_exit(&uri->proclock); 786 REF_RELE(uri); 787 goto again; 788 } else { 789 /* 790 * A nonblocking socket or an interrupted 791 * cv_wait_sig() in the first case can't 792 * block waiting for the processing of the 793 * uri hash hit uri to complete, in both 794 * cases just return failure to lookup. 795 */ 796 mutex_exit(&uri->proclock); 797 REF_RELE(uri); 798 REF_RELE(ruri); 799 return (NULL); 800 } 801 } else { 802 mutex_exit(&uri->proclock); 803 } 804 uri->hit++; 805 mutex_exit(&hp->lock); 806 rw_exit(&uri_hash_access); 807 REF_RELE(ruri); 808 return (uri); 809 nexturi: 810 puri = uri; 811 } 812 mutex_exit(&hp->lock); 813 if (cur != new && uri_hash_ab[new] != NULL) { 814 /* 815 * Not found in current hash and have a new hash so 816 * check the new hash next. 817 */ 818 cur = new; 819 goto nexthash; 820 } 821 add: 822 if (! add) { 823 /* 824 * Lookup only so free the 825 * request URI and return. 826 */ 827 rw_exit(&uri_hash_access); 828 REF_RELE(ruri); 829 return (NULL); 830 } 831 /* 832 * URI not hashed, finish intialization of the 833 * request URI, add it to the hash, return it. 834 */ 835 ruri->hit = 0; 836 ruri->expire = -1; 837 ruri->response.sz = 0; 838 cv_init(&ruri->waiting, NULL, CV_DEFAULT, NULL); 839 mutex_init(&ruri->proclock, NULL, MUTEX_DEFAULT, NULL); 840 uri_add(ruri, RW_READER, nonblocking); 841 /* uri_add() has done rw_exit(&uri_hash_access) */ 842 return (ruri); 843 } 844 845 /* 846 * Reclaim URIs until max cache size threshold has been reached. 847 * 848 * A CLOCK based reclaim modified with a history (hit counter) counter. 849 */ 850 851 static void 852 nl7c_uri_reclaim(void) 853 { 854 uri_hash_t *hp, *start, *pend; 855 uri_desc_t *uri; 856 uri_desc_t *puri; 857 uint32_t cur; 858 uint32_t new; 859 860 nl7c_uri_reclaim_calls++; 861 again: 862 rw_enter(&uri_hash_access, RW_WRITER); 863 cur = uri_hash_which; 864 new = cur ? 0 : 1; 865 next: 866 hp = uri_hash_lru[cur]; 867 start = hp; 868 pend = &uri_hash_ab[cur][uri_hash_sz[cur]]; 869 while (nl7c_uri_bytes > nl7c_uri_max) { 870 puri = NULL; 871 for (uri = hp->list; uri != NULL; uri = uri->hash) { 872 if (uri->hit != 0) { 873 /* 874 * Decrement URI activity counter and skip. 875 */ 876 uri->hit--; 877 puri = uri; 878 continue; 879 } 880 if (uri->proc != NULL) { 881 /* 882 * Currently being processed by a socket, skip. 883 */ 884 continue; 885 } 886 /* 887 * Found a candidate, no hit(s) since added or last 888 * reclaim pass, unlink from it's hash chain, update 889 * lru scan pointer, drop lock, ref release it. 890 */ 891 URI_HASH_UNLINK(cur, new, hp, puri, uri); 892 if (cur == uri_hash_which) { 893 if (++hp == pend) { 894 /* Wrap pointer */ 895 hp = uri_hash_ab[cur]; 896 } 897 uri_hash_lru[cur] = hp; 898 } 899 rw_exit(&uri_hash_access); 900 REF_RELE(uri); 901 nl7c_uri_reclaim_cnt++; 902 goto again; 903 } 904 if (++hp == pend) { 905 /* Wrap pointer */ 906 hp = uri_hash_ab[cur]; 907 } 908 if (hp == start) { 909 if (cur != new && uri_hash_ab[new] != NULL) { 910 /* 911 * Done with the current hash and have a 912 * new hash so check the new hash next. 913 */ 914 cur = new; 915 goto next; 916 } 917 } 918 } 919 rw_exit(&uri_hash_access); 920 } 921 922 /* 923 * Called for a socket which is being freed prior to close, e.g. errored. 924 */ 925 926 void 927 nl7c_urifree(struct sonode *so) 928 { 929 uri_desc_t *uri = (uri_desc_t *)so->so_nl7c_uri; 930 931 so->so_nl7c_uri = NULL; 932 if (uri->hash != URI_TEMP) { 933 uri_delete(uri); 934 mutex_enter(&uri->proclock); 935 uri->proc = NULL; 936 if (CV_HAS_WAITERS(&uri->waiting)) { 937 cv_broadcast(&uri->waiting); 938 } 939 mutex_exit(&uri->proclock); 940 nl7c_uri_free++; 941 } else { 942 /* No proclock as uri exclusively owned by so */ 943 uri->proc = NULL; 944 nl7c_uri_temp_free++; 945 } 946 REF_RELE(uri); 947 } 948 949 /* 950 * Called to copy some application response data. 951 */ 952 953 volatile uint64_t nl7c_data_pfail = 0; 954 volatile uint64_t nl7c_data_ntemp = 0; 955 volatile uint64_t nl7c_data_ncntl = 0; 956 957 void 958 nl7c_data(struct sonode *so, uio_t *uiop) 959 { 960 uri_desc_t *uri = (uri_desc_t *)so->so_nl7c_uri; 961 iovec_t *iovp = uiop->uio_iov; 962 int resid = uiop->uio_resid; 963 int sz, len, cnt; 964 char *alloc; 965 char *data; 966 char *bp; 967 uri_rd_t *rdp; 968 int error = 0; 969 970 nl7c_uri_data++; 971 972 if (uri == NULL) { 973 /* Socket & NL7C out of sync, disable NL7C */ 974 so->so_nl7c_flags = 0; 975 nl7c_uri_NULL1++; 976 return; 977 } 978 979 if (so->so_nl7c_flags & NL7C_WAITWRITE) 980 so->so_nl7c_flags &= ~NL7C_WAITWRITE; 981 982 if (uri->hash == URI_TEMP) { 983 if (uri->resplen == -1) 984 sz = MIN(resid, URI_TEMP_PARSE_SZ); 985 else 986 sz = 0; 987 } else { 988 sz = resid; 989 } 990 if (sz > 0) { 991 alloc = kmem_alloc(sz, KM_SLEEP); 992 } else { 993 alloc = NULL; 994 } 995 if (uri->hash == URI_TEMP) { 996 uri->count += resid; 997 data = alloc; 998 } else { 999 URI_RD_ADD(uri, rdp, sz, -1); 1000 if (rdp == NULL) 1001 goto fail; 1002 rdp->data.kmem = alloc; 1003 data = alloc; 1004 alloc = NULL; 1005 } 1006 bp = data; 1007 for (len = sz; len > 0; len -= cnt) { 1008 cnt = MIN(len, iovp->iov_len); 1009 error = xcopyin(iovp->iov_base, bp, cnt); 1010 if (error) { 1011 goto fail; 1012 } 1013 bp += cnt; 1014 iovp++; 1015 } 1016 bp = data; 1017 if (uri->resplen == -1 && 1018 ! nl7c_http_response(&bp, &bp[sz], uri, so) && 1019 (bp == NULL || uri->hash != URI_TEMP || uri->resplen == -1)) { 1020 /* 1021 * Parse not complete and parse failed or not TEMP 1022 * partial parse or TEMP partial parse and no resplen. 1023 */ 1024 if (bp == NULL) 1025 nl7c_data_pfail++; 1026 else if (uri->hash != URI_TEMP) 1027 nl7c_data_ntemp++; 1028 else if (uri->resplen == -1) 1029 nl7c_data_ncntl++; 1030 goto fail; 1031 } 1032 if (uri->resplen != -1 && uri->count >= uri->resplen) { 1033 /* Got the response data, close the uri */ 1034 nl7c_close(so); 1035 } 1036 if (alloc != NULL) { 1037 kmem_free(alloc, sz); 1038 } else { 1039 atomic_add_64(&nl7c_uri_bytes, sz); 1040 } 1041 return; 1042 1043 fail: 1044 if (alloc != NULL) { 1045 kmem_free(alloc, sz); 1046 } 1047 so->so_nl7c_flags = 0; 1048 nl7c_urifree(so); 1049 } 1050 1051 /* 1052 * Called to read data from file "*fp" at offset "*off" of length "*len" 1053 * for a maximum of "*max_rem" bytes. 1054 * 1055 * On success a pointer to the kmem_alloc()ed file data is returned, "*off" 1056 * and "*len" are updated for the acutal number of bytes read and "*max_rem" 1057 * is updated with the number of bytes remaining to be read. 1058 * 1059 * Else, "NULL" is returned. 1060 */ 1061 1062 static char * 1063 nl7c_readfile(file_t *fp, u_offset_t *off, int *len, int *max_rem) 1064 { 1065 vnode_t *vp = fp->f_vnode; 1066 int flg = 0; 1067 size_t size = MIN(*len, *max_rem); 1068 char *data; 1069 int error; 1070 uio_t uio; 1071 iovec_t iov; 1072 1073 (void) VOP_RWLOCK(vp, flg, NULL); 1074 1075 if (*off > MAXOFFSET_T) { 1076 VOP_RWUNLOCK(vp, flg, NULL); 1077 return (NULL); 1078 } 1079 1080 if (*off + size > MAXOFFSET_T) 1081 size = (ssize32_t)(MAXOFFSET_T - *off); 1082 1083 data = kmem_alloc(size, KM_SLEEP); 1084 1085 iov.iov_base = data; 1086 iov.iov_len = size; 1087 uio.uio_loffset = *off; 1088 uio.uio_iov = &iov; 1089 uio.uio_iovcnt = 1; 1090 uio.uio_resid = size; 1091 uio.uio_segflg = UIO_SYSSPACE; 1092 uio.uio_llimit = MAXOFFSET_T; 1093 uio.uio_fmode = fp->f_flag; 1094 1095 error = VOP_READ(vp, &uio, fp->f_flag, fp->f_cred, NULL); 1096 VOP_RWUNLOCK(vp, flg, NULL); 1097 if (error) { 1098 kmem_free(data, size); 1099 return (NULL); 1100 } 1101 *max_rem = *len - size; 1102 *len = size; 1103 *off += size; 1104 return (data); 1105 } 1106 1107 /* 1108 * Called to copy application response sendfilev. 1109 * 1110 * Note, the value of kmem_bytes is a segmap max sized value greater 1111 * than or equal to TCP's tcp_slow_start_initial, there are several 1112 * issues with this scheme least of which is assuming that TCP is the 1113 * only IP transport we care about or that we hardcode the TCP value 1114 * but until ... 1115 */ 1116 1117 void 1118 nl7c_sendfilev(struct sonode *so, u_offset_t off, sendfilevec_t *sfvp, int sfvc) 1119 { 1120 uri_desc_t *uri = (uri_desc_t *)so->so_nl7c_uri; 1121 file_t *fp = NULL; 1122 vnode_t *vp = NULL; 1123 char *data = NULL; 1124 int len; 1125 int count; 1126 int total_count = 0; 1127 char *bp; 1128 uri_rd_t *rdp; 1129 int max_rem; 1130 int error = 0; 1131 1132 nl7c_uri_sendfilev++; 1133 1134 if (uri == NULL) { 1135 /* Socket & NL7C out of sync, disable NL7C */ 1136 so->so_nl7c_flags = 0; 1137 nl7c_uri_NULL2++; 1138 return; 1139 } 1140 1141 if (so->so_nl7c_flags & NL7C_WAITWRITE) 1142 so->so_nl7c_flags &= ~NL7C_WAITWRITE; 1143 1144 while (sfvc-- > 0) { 1145 /* 1146 * off - the current sfv read file offset or user address. 1147 * 1148 * len - the current sfv kmem_alloc()ed buffer length, note 1149 * may be less than the actual sfv size. 1150 * 1151 * count - the actual current sfv size in bytes. 1152 * 1153 * data - the kmem_alloc()ed buffer of size "len". 1154 * 1155 * fp - the current sfv file_t pointer. 1156 * 1157 * vp - the current "*vp" vnode_t pointer. 1158 * 1159 * Note, for "data" and "fp" and "vp" a NULL value is used 1160 * when not allocated such that the common failure path "fail" 1161 * is used. 1162 */ 1163 off = sfvp->sfv_off; 1164 len = sfvp->sfv_len; 1165 count = len; 1166 if (uri->hash == URI_TEMP) { 1167 if (uri->resplen == -1) 1168 len = MIN(len, URI_TEMP_PARSE_SZ); 1169 else 1170 len = 0; 1171 } 1172 if (len == 0) { 1173 /* 1174 * TEMP uri with no data to sink, just count bytes. 1175 */ 1176 uri->count += count; 1177 } else if (sfvp->sfv_fd == SFV_FD_SELF) { 1178 /* 1179 * Process user memory, copyin(). 1180 */ 1181 data = kmem_alloc(len, KM_SLEEP); 1182 1183 error = xcopyin((caddr_t)(uintptr_t)off, data, len); 1184 if (error) 1185 goto fail; 1186 1187 bp = data; 1188 if (uri->resplen == -1 && 1189 ! nl7c_http_response(&bp, &bp[len], uri, so) && 1190 (bp == NULL || uri->hash != URI_TEMP || 1191 (uri->hash == URI_TEMP && uri->resplen == -1))) { 1192 /* 1193 * Parse not complete and parse failed or 1194 * not TEMP partial parse or TEMP partial 1195 * parse and no resplen. 1196 */ 1197 goto fail; 1198 } 1199 1200 if (uri->hash == URI_TEMP) { 1201 uri->count += len; 1202 kmem_free(data, len); 1203 data = NULL; 1204 } else { 1205 URI_RD_ADD(uri, rdp, len, -1); 1206 if (rdp == NULL) 1207 goto fail; 1208 rdp->data.kmem = data; 1209 data = NULL; 1210 total_count += len; 1211 } 1212 } else { 1213 /* 1214 * File descriptor, prefetch some bytes, 1215 * save vnode_t if any bytes left. 1216 */ 1217 if ((fp = getf(sfvp->sfv_fd)) == NULL) 1218 goto fail; 1219 1220 if ((fp->f_flag & FREAD) == 0) 1221 goto fail; 1222 1223 vp = fp->f_vnode; 1224 if (vp->v_type != VREG) 1225 goto fail; 1226 VN_HOLD(vp); 1227 1228 /* Read max_rem bytes from file for prefetch */ 1229 if (nl7c_use_kmem) { 1230 max_rem = len; 1231 } else { 1232 max_rem = MAXBSIZE * nl7c_file_prefetch; 1233 } 1234 data = nl7c_readfile(fp, &off, &len, &max_rem); 1235 if (data == NULL) 1236 goto fail; 1237 1238 releasef(sfvp->sfv_fd); 1239 fp = NULL; 1240 1241 bp = data; 1242 if (uri->resplen == -1 && 1243 ! nl7c_http_response(&bp, &bp[len], uri, so) && 1244 (bp == NULL || uri->hash != URI_TEMP || 1245 uri->resplen == -1)) { 1246 /* 1247 * Parse not complete and parse failed or 1248 * not TEMP partial parse or TEMP partial 1249 * parse and no resplen. 1250 */ 1251 goto fail; 1252 } 1253 1254 if (uri->hash == URI_TEMP) { 1255 /* 1256 * Temp uri, account for all sfv bytes and 1257 * free up any resources allocated above. 1258 */ 1259 uri->count += count; 1260 kmem_free(data, len); 1261 data = NULL; 1262 VN_RELE(vp); 1263 vp = NULL; 1264 } else { 1265 /* 1266 * Setup an uri_rd_t for the prefetch and 1267 * if any sfv data remains setup an another 1268 * uri_rd_t to map it, last free up any 1269 * resources allocated above. 1270 */ 1271 URI_RD_ADD(uri, rdp, len, -1); 1272 if (rdp == NULL) 1273 goto fail; 1274 rdp->data.kmem = data; 1275 data = NULL; 1276 if (max_rem > 0) { 1277 /* More file data so add it */ 1278 URI_RD_ADD(uri, rdp, max_rem, off); 1279 if (rdp == NULL) 1280 goto fail; 1281 rdp->data.vnode = vp; 1282 } else { 1283 /* All file data fit in the prefetch */ 1284 VN_RELE(vp); 1285 } 1286 vp = NULL; 1287 /* Only account for the kmem_alloc()ed bytes */ 1288 total_count += len; 1289 } 1290 } 1291 sfvp++; 1292 } 1293 if (total_count > 0) { 1294 atomic_add_64(&nl7c_uri_bytes, total_count); 1295 } 1296 if (uri->resplen != -1 && uri->count >= uri->resplen) { 1297 /* Got the response data, close the uri */ 1298 nl7c_close(so); 1299 } 1300 return; 1301 1302 fail: 1303 if (data != NULL) 1304 kmem_free(data, len); 1305 1306 if (vp != NULL) 1307 VN_RELE(vp); 1308 1309 if (fp != NULL) 1310 releasef(sfvp->sfv_fd); 1311 1312 if (total_count > 0) { 1313 atomic_add_64(&nl7c_uri_bytes, total_count); 1314 } 1315 so->so_nl7c_flags = 0; 1316 nl7c_urifree(so); 1317 } 1318 1319 /* 1320 * Called for a socket which is closing or when an application has 1321 * completed sending all the response data (i.e. for a persistent 1322 * connection called once for each completed application response). 1323 */ 1324 1325 void 1326 nl7c_close(struct sonode *so) 1327 { 1328 uri_desc_t *uri = (uri_desc_t *)so->so_nl7c_uri; 1329 1330 if (uri == NULL) { 1331 /* 1332 * No URI being processed so might be a listen()er 1333 * if so do any cleanup, else nothing more to do. 1334 */ 1335 if (so->so_state & SS_ACCEPTCONN) { 1336 (void) nl7c_close_addr(so); 1337 } 1338 return; 1339 } 1340 so->so_nl7c_uri = NULL; 1341 if (uri->hash != URI_TEMP) { 1342 mutex_enter(&uri->proclock); 1343 uri->proc = NULL; 1344 if (CV_HAS_WAITERS(&uri->waiting)) { 1345 cv_broadcast(&uri->waiting); 1346 } 1347 mutex_exit(&uri->proclock); 1348 nl7c_uri_close++; 1349 } else { 1350 /* No proclock as uri exclusively owned by so */ 1351 uri->proc = NULL; 1352 nl7c_uri_temp_close++; 1353 } 1354 REF_RELE(uri); 1355 if (nl7c_uri_max > 0 && nl7c_uri_bytes > nl7c_uri_max) { 1356 nl7c_uri_reclaim(); 1357 } 1358 } 1359 1360 /* 1361 * The uri_segmap_t ref_t inactive function called on the last REF_RELE(), 1362 * release the segmap mapping. Note, the uri_segmap_t will be freed by 1363 * REF_RELE() on return. 1364 */ 1365 1366 void 1367 uri_segmap_inactive(uri_segmap_t *smp) 1368 { 1369 if (!segmap_kpm) { 1370 (void) segmap_fault(kas.a_hat, segkmap, smp->base, 1371 smp->len, F_SOFTUNLOCK, S_OTHER); 1372 } 1373 (void) segmap_release(segkmap, smp->base, SM_DONTNEED); 1374 VN_RELE(smp->vp); 1375 } 1376 1377 /* 1378 * The call-back for desballoc()ed mblk_t's, if a segmap mapped mblk_t 1379 * release the reference, one per deballoc() of a segmap page, release 1380 * the reference of the URI containing the uri_rd_t, last free kmem 1381 * free the uri_desb_t. 1382 */ 1383 1384 static void 1385 uri_desb_free(uri_desb_t *desb) 1386 { 1387 if (desb->segmap != NULL) { 1388 REF_RELE(desb->segmap); 1389 } 1390 REF_RELE(desb->uri); 1391 kmem_cache_free(uri_desb_kmc, desb); 1392 } 1393 1394 /* 1395 * Segmap map up to a page of a uri_rd_t file descriptor. 1396 */ 1397 1398 uri_segmap_t * 1399 uri_segmap_map(uri_rd_t *rdp, int bytes) 1400 { 1401 uri_segmap_t *segmap = kmem_cache_alloc(uri_segmap_kmc, KM_SLEEP); 1402 int len = MIN(rdp->sz, MAXBSIZE); 1403 1404 if (len > bytes) 1405 len = bytes; 1406 1407 REF_INIT(segmap, 1, uri_segmap_inactive, uri_segmap_kmc); 1408 segmap->len = len; 1409 VN_HOLD(rdp->data.vnode); 1410 segmap->vp = rdp->data.vnode; 1411 1412 segmap->base = segmap_getmapflt(segkmap, segmap->vp, rdp->off, len, 1413 segmap_kpm ? SM_FAULT : 0, S_READ); 1414 1415 if (segmap_fault(kas.a_hat, segkmap, segmap->base, len, 1416 F_SOFTLOCK, S_READ) != 0) { 1417 REF_RELE(segmap); 1418 return (NULL); 1419 } 1420 return (segmap); 1421 } 1422 1423 /* 1424 * Chop up the kernel virtual memory area *data of size *sz bytes for 1425 * a maximum of *bytes bytes into an besballoc()ed mblk_t chain using 1426 * the given template uri_desb_t *temp of max_mblk bytes per. 1427 * 1428 * The values of *data, *sz, and *bytes are updated on return, the 1429 * mblk_t chain is returned. 1430 */ 1431 1432 static mblk_t * 1433 uri_desb_chop( 1434 char **data, 1435 size_t *sz, 1436 int *bytes, 1437 uri_desb_t *temp, 1438 int max_mblk, 1439 char *eoh, 1440 mblk_t *persist 1441 ) 1442 { 1443 char *ldata = *data; 1444 size_t lsz = *sz; 1445 int lbytes = bytes ? *bytes : lsz; 1446 uri_desb_t *desb; 1447 mblk_t *mp = NULL; 1448 mblk_t *nmp, *tmp, *pmp = NULL; 1449 int msz; 1450 1451 if (lbytes == 0 && lsz == 0) 1452 return (NULL); 1453 1454 while (lbytes > 0 && lsz > 0) { 1455 msz = MIN(lbytes, max_mblk); 1456 msz = MIN(msz, lsz); 1457 if (persist && eoh >= ldata && eoh < &ldata[msz]) { 1458 msz = (eoh - ldata); 1459 pmp = persist; 1460 persist = NULL; 1461 } 1462 desb = kmem_cache_alloc(uri_desb_kmc, KM_SLEEP); 1463 REF_HOLD(temp->uri); 1464 if (temp->segmap) { 1465 REF_HOLD(temp->segmap); 1466 } 1467 bcopy(temp, desb, sizeof (*desb)); 1468 desb->frtn.free_arg = (caddr_t)desb; 1469 nmp = desballoc((uchar_t *)ldata, msz, BPRI_HI, &desb->frtn); 1470 if (nmp == NULL) { 1471 if (temp->segmap) { 1472 REF_RELE(temp->segmap); 1473 } 1474 REF_RELE(temp->uri); 1475 if (mp != NULL) { 1476 freemsg(mp); 1477 } 1478 return (NULL); 1479 } 1480 nmp->b_wptr += msz; 1481 if (mp != NULL) { 1482 /*LINTED*/ 1483 ASSERT(tmp->b_cont == NULL); 1484 /*LINTED*/ 1485 tmp->b_cont = nmp; 1486 } else { 1487 mp = nmp; 1488 } 1489 tmp = nmp; 1490 ldata += msz; 1491 lsz -= msz; 1492 lbytes -= msz; 1493 if (pmp) { 1494 tmp->b_cont = pmp; 1495 tmp = pmp; 1496 pmp = NULL; 1497 } 1498 } 1499 *data = ldata; 1500 *sz = lsz; 1501 if (bytes) 1502 *bytes = lbytes; 1503 mp->b_next = tmp; 1504 return (mp); 1505 } 1506 1507 /* 1508 * Experimential noqwait (i.e. no canput()/qwait() checks), just send 1509 * the entire mblk_t chain down without flow-control checks. 1510 */ 1511 1512 static int 1513 kstrwritempnoqwait(struct vnode *vp, mblk_t *mp) 1514 { 1515 struct stdata *stp; 1516 int error = 0; 1517 1518 ASSERT(vp->v_stream); 1519 stp = vp->v_stream; 1520 1521 /* Fast check of flags before acquiring the lock */ 1522 if (stp->sd_flag & (STWRERR|STRHUP|STPLEX)) { 1523 mutex_enter(&stp->sd_lock); 1524 error = strgeterr(stp, STWRERR|STRHUP|STPLEX, 0); 1525 mutex_exit(&stp->sd_lock); 1526 if (error != 0) { 1527 if (!(stp->sd_flag & STPLEX) && 1528 (stp->sd_wput_opt & SW_SIGPIPE)) { 1529 tsignal(curthread, SIGPIPE); 1530 error = EPIPE; 1531 } 1532 return (error); 1533 } 1534 } 1535 putnext(stp->sd_wrq, mp); 1536 return (0); 1537 } 1538 1539 /* 1540 * Send the URI response uri_desc_t *uri out the socket_t *so. 1541 */ 1542 1543 static void 1544 uri_response(struct sonode *so, uri_desc_t *uri, int max_mblk) 1545 { 1546 uri_rd_t *rdp = &uri->response; 1547 vnode_t *vp = SOTOV(so); 1548 int wsz; 1549 mblk_t *mp, *wmp, *persist; 1550 int write_bytes; 1551 uri_rd_t rd = {0}; 1552 uri_desb_t desb; 1553 uri_segmap_t *segmap = NULL; 1554 char *segmap_data; 1555 size_t segmap_sz; 1556 boolean_t first = B_TRUE; 1557 1558 /* For first kstrwrite() enough data to get things going */ 1559 write_bytes = P2ROUNDUP((max_mblk * 4), MAXBSIZE * nl7c_file_prefetch); 1560 1561 /* Initialize template uri_desb_t */ 1562 desb.frtn.free_func = uri_desb_free; 1563 desb.frtn.free_arg = NULL; 1564 desb.uri = uri; 1565 1566 do { 1567 wmp = NULL; 1568 wsz = write_bytes; 1569 do { 1570 if (rd.sz == 0) { 1571 bcopy(rdp, &rd, sizeof (rd)); 1572 rdp = rdp->next; 1573 } 1574 if (rd.off == -1) { 1575 if (uri->eoh >= rd.data.kmem && 1576 uri->eoh < &rd.data.kmem[rd.sz]) { 1577 persist = nl7c_http_persist(so); 1578 } else { 1579 persist = NULL; 1580 } 1581 desb.segmap = NULL; 1582 mp = uri_desb_chop(&rd.data.kmem, &rd.sz, 1583 &wsz, &desb, max_mblk, uri->eoh, persist); 1584 if (mp == NULL) 1585 goto invalidate; 1586 } else { 1587 if (segmap == NULL) { 1588 segmap = uri_segmap_map(&rd, 1589 write_bytes); 1590 if (segmap == NULL) 1591 goto invalidate; 1592 desb.segmap = segmap; 1593 segmap_data = segmap->base; 1594 segmap_sz = segmap->len; 1595 } 1596 mp = uri_desb_chop(&segmap_data, &segmap_sz, 1597 &wsz, &desb, max_mblk, NULL, NULL); 1598 if (mp == NULL) 1599 goto invalidate; 1600 if (segmap_sz == 0) { 1601 rd.sz -= segmap->len; 1602 rd.off += segmap->len; 1603 REF_RELE(segmap); 1604 segmap = NULL; 1605 } 1606 } 1607 if (wmp == NULL) { 1608 wmp = mp; 1609 } else { 1610 wmp->b_next->b_cont = mp; 1611 wmp->b_next = mp->b_next; 1612 mp->b_next = NULL; 1613 } 1614 } while (wsz > 0 && (rd.sz > 0 || rdp != NULL)); 1615 1616 wmp->b_next = NULL; 1617 if (first) { 1618 /* First kstrwrite(), use noqwait */ 1619 if (kstrwritempnoqwait(vp, wmp) != 0) 1620 goto invalidate; 1621 /* 1622 * For the rest of the kstrwrite()s use SO_SNDBUF 1623 * worth of data at a time, note these kstrwrite()s 1624 * may (will) block one or more times. 1625 */ 1626 first = B_FALSE; 1627 if ((write_bytes = so->so_sndbuf) == 0) 1628 write_bytes = vp->v_stream->sd_qn_maxpsz; 1629 ASSERT(write_bytes > 0); 1630 write_bytes = P2ROUNDUP(write_bytes, MAXBSIZE); 1631 } else { 1632 if (kstrwritemp(vp, wmp, 0) != 0) 1633 goto invalidate; 1634 } 1635 } while (rd.sz > 0 || rdp != NULL); 1636 1637 return; 1638 1639 invalidate: 1640 if (segmap) { 1641 REF_RELE(segmap); 1642 } 1643 if (wmp) 1644 freemsg(wmp); 1645 uri_delete(uri); 1646 } 1647 1648 /* 1649 * The pchars[] array is indexed by a char to determine if it's a 1650 * valid URI path component chararcter where: 1651 * 1652 * pchar = unreserved | escaped | 1653 * ":" | "@" | "&" | "=" | "+" | "$" | "," 1654 * 1655 * unreserved = alphanum | mark 1656 * 1657 * alphanum = alpha | digit 1658 * 1659 * alpha = lowalpha | upalpha 1660 * 1661 * lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | 1662 * "i" | "j" | "k" | "l" | "m" | "n" | "o" | "p" | 1663 * "q" | "r" | "s" | "t" | "u" | "v" | "w" | "x" | 1664 * "y" | "z" 1665 * 1666 * upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | 1667 * "I" | "J" | "K" | "L" | "M" | "N" | "O" | "P" | 1668 * "Q" | "R" | "S" | "T" | "U" | "V" | "W" | "X" | 1669 * "Y" | "Z" 1670 * 1671 * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | 1672 * "8" | "9" 1673 * 1674 * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")" 1675 * 1676 * escaped = "%" hex hex 1677 * hex = digit | "A" | "B" | "C" | "D" | "E" | "F" | 1678 * "a" | "b" | "c" | "d" | "e" | "f" 1679 */ 1680 1681 static char pchars[] = { 1682 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00 - 0x07 */ 1683 0, 0, 0, 0, 0, 0, 0, 0, /* 0x08 - 0x0F */ 1684 0, 0, 0, 0, 0, 0, 0, 0, /* 0x10 - 0x17 */ 1685 0, 0, 0, 0, 0, 0, 0, 0, /* 0x18 - 0x1F */ 1686 0, 1, 0, 0, 1, 1, 1, 1, /* 0x20 - 0x27 */ 1687 0, 0, 1, 1, 1, 1, 1, 1, /* 0x28 - 0x2F */ 1688 1, 1, 1, 1, 1, 1, 1, 1, /* 0x30 - 0x37 */ 1689 1, 1, 1, 0, 0, 1, 0, 0, /* 0x38 - 0x3F */ 1690 1, 1, 1, 1, 1, 1, 1, 1, /* 0x40 - 0x47 */ 1691 1, 1, 1, 1, 1, 1, 1, 1, /* 0x48 - 0x4F */ 1692 1, 1, 1, 1, 1, 1, 1, 1, /* 0x50 - 0x57 */ 1693 1, 1, 1, 0, 0, 0, 0, 1, /* 0x58 - 0x5F */ 1694 0, 1, 1, 1, 1, 1, 1, 1, /* 0x60 - 0x67 */ 1695 1, 1, 1, 1, 1, 1, 1, 1, /* 0x68 - 0x6F */ 1696 1, 1, 1, 1, 1, 1, 1, 1, /* 0x70 - 0x77 */ 1697 1, 1, 1, 0, 0, 0, 1, 0 /* 0x78 - 0x7F */ 1698 }; 1699 1700 #define PCHARS_MASK 0x7F 1701 1702 /* 1703 * This is the main L7 request message parse, we are called each time 1704 * new data is availble for a socket, each time a single buffer of the 1705 * entire message to date is given. 1706 * 1707 * Here we parse the request looking for the URI, parse it, and if a 1708 * supported scheme call the scheme parser to commplete the parse of any 1709 * headers which may further qualify the identity of the requested object 1710 * then lookup it up in the URI hash. 1711 * 1712 * Return B_TRUE for more processing. 1713 * 1714 * Note, at this time the parser supports the generic message format as 1715 * specified in RFC 822 with potentional limitations as specified in RFC 1716 * 2616 for HTTP messages. 1717 * 1718 * Note, the caller supports an mblk_t chain, for now the parser(s) 1719 * require the complete header in a single mblk_t. This is the common 1720 * case and certainly for high performance environments, if at a future 1721 * date mblk_t chains are important the parse can be reved to process 1722 * mblk_t chains. 1723 */ 1724 1725 boolean_t 1726 nl7c_parse(struct sonode *so, boolean_t nonblocking, boolean_t *ret, 1727 int max_mblk) 1728 { 1729 char *cp = (char *)so->so_nl7c_rcv_mp->b_rptr; 1730 char *ep = (char *)so->so_nl7c_rcv_mp->b_wptr; 1731 char *get = "GET "; 1732 char c; 1733 char *uris; 1734 uri_desc_t *uri; 1735 uri_desc_t *ruri = NULL; 1736 1737 /* 1738 * Allocate and initialize minimumal state for the request 1739 * uri_desc_t, in the cache hit case this uri_desc_t will 1740 * be freed. 1741 */ 1742 uri = kmem_cache_alloc(uri_kmc, KM_SLEEP); 1743 REF_INIT(uri, 1, uri_inactive, uri_kmc); 1744 uri->hash = NULL; 1745 uri->tail = NULL; 1746 uri->scheme = NULL; 1747 uri->count = 0; 1748 if ((uri->reqmp = dupb(so->so_nl7c_rcv_mp)) == NULL) { 1749 nl7c_uri_pass_dupbfail++; 1750 goto pass; 1751 } 1752 /* 1753 * Set request time to current time. 1754 */ 1755 so->so_nl7c_rtime = gethrestime_sec(); 1756 /* 1757 * Parse the Request-Line for the URI. 1758 * 1759 * For backwards HTTP version compatable reasons skip any leading 1760 * CRLF (or CR or LF) line terminator(s) preceding Request-Line. 1761 */ 1762 while (cp < ep && (*cp == '\r' || *cp == '\n')) { 1763 cp++; 1764 } 1765 while (cp < ep && *get == *cp) { 1766 get++; 1767 cp++; 1768 } 1769 if (*get != 0) { 1770 if (cp == ep) { 1771 nl7c_uri_more_get++; 1772 goto more; 1773 } 1774 nl7c_uri_pass_getnot++; 1775 goto pass; 1776 } 1777 /* 1778 * Skip over URI path char(s) and save start and past end pointers. 1779 */ 1780 uris = cp; 1781 while (cp < ep && (c = *cp) != ' ' && c != '\r') { 1782 if (c == '?') { 1783 /* Don't cache but still may want to parse */ 1784 uri->hash = URI_TEMP; 1785 } 1786 cp++; 1787 } 1788 if (c != '\r' && cp == ep) { 1789 nl7c_uri_more_eol++; 1790 goto more; 1791 } 1792 /* 1793 * Request-Line URI parsed, pass the rest of the request on 1794 * to the the http scheme parse. 1795 */ 1796 uri->path.cp = uris; 1797 uri->path.ep = cp; 1798 if (! nl7c_http_request(&cp, ep, uri, so)) { 1799 /* 1800 * Parse not successful, the pointer to the parse pointer 1801 * "cp" is overloaded such that ! NULL for more data and 1802 * NULL for pass on request. 1803 */ 1804 if (cp != NULL) { 1805 nl7c_uri_more_http++; 1806 goto more; 1807 } 1808 nl7c_uri_pass_http++; 1809 goto pass; 1810 } 1811 if (uri->hash == URI_TEMP) { 1812 if (so->so_nl7c_flags & NL7C_SOPERSIST) { 1813 /* Temporary URI so skip hash processing */ 1814 nl7c_uri_request++; 1815 nl7c_uri_temp++; 1816 goto temp; 1817 } 1818 /* Not persistent so not interested in the response */ 1819 nl7c_uri_pass_temp++; 1820 goto pass; 1821 } 1822 /* 1823 * If logging enabled save the request uri pointer and place 1824 * an additional reference on it for logging use after lookup(). 1825 */ 1826 if (nl7c_logd_enabled) { 1827 ruri = uri; 1828 REF_HOLD(ruri); 1829 } 1830 /* 1831 * Check the URI hash for a cached response. 1832 */ 1833 if ((uri = uri_lookup(uri, B_TRUE, nonblocking)) == NULL) { 1834 /* 1835 * Failed to lookup due to nonblocking wait required, 1836 * interrupted cv_wait_sig(), KM_NOSLEEP memory alloc 1837 * failure, ... Just pass on this request. 1838 */ 1839 nl7c_uri_pass_addfail++; 1840 goto pass; 1841 } 1842 nl7c_uri_request++; 1843 if (uri->response.sz > 0) { 1844 /* 1845 * We have the response cached, update recv mblk rptr 1846 * to reflect the data consumed above, send the response 1847 * out the socket, release reference on uri from the 1848 * call to lookup_add(), set the *ret value to B_TRUE 1849 * for socket close and return B_FALSE to indcate no 1850 * more data needed. 1851 */ 1852 mblk_t *mp = so->so_nl7c_rcv_mp; 1853 1854 /* If a saved ruri set above then log request */ 1855 if (ruri != NULL) { 1856 ipaddr_t faddr; 1857 1858 if (so->so_family == AF_INET) { 1859 /* Only support IPv4 addrs */ 1860 faddr = ((struct sockaddr_in *) 1861 so->so_faddr_sa) ->sin_addr.s_addr; 1862 } else { 1863 faddr = 0; 1864 } 1865 nl7c_logd_log(ruri, uri, so->so_nl7c_rtime, faddr); 1866 REF_RELE(ruri); 1867 } 1868 nl7c_uri_hit++; 1869 if (cp == (char *)mp->b_wptr) { 1870 so->so_nl7c_rcv_mp = mp->b_cont; 1871 mp->b_cont = NULL; 1872 freeb(mp); 1873 } else { 1874 mp->b_rptr = (unsigned char *)cp; 1875 } 1876 uri_response(so, uri, max_mblk); 1877 REF_RELE(uri); 1878 *ret = B_TRUE; 1879 return (B_FALSE); 1880 } 1881 if (ruri != NULL) { 1882 REF_RELE(ruri); 1883 } 1884 /* 1885 * Don't have a response cached or may want to cache the 1886 * response from the webserver so store the uri pointer in 1887 * the so subsequent write-side calls ... 1888 */ 1889 nl7c_uri_miss++; 1890 temp: 1891 uri->proc = so; 1892 so->so_nl7c_uri = uri; 1893 so->so_nl7c_flags |= NL7C_WAITWRITE; 1894 *ret = B_FALSE; 1895 return (B_FALSE); 1896 1897 more: 1898 /* More data is needed, note fragmented recv not supported */ 1899 nl7c_uri_more++; 1900 1901 pass: 1902 /* Pass on this request */ 1903 nl7c_uri_pass++; 1904 nl7c_uri_request++; 1905 if (ruri != NULL) { 1906 REF_RELE(ruri); 1907 } 1908 if (uri) { 1909 REF_RELE(uri); 1910 } 1911 so->so_nl7c_flags = 0; 1912 *ret = B_FALSE; 1913 return (B_FALSE); 1914 } 1915