/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * Some externs: */ extern boolean_t nl7c_logd_enabled; extern void nl7c_logd_log(uri_desc_t *, uri_desc_t *, time_t, ipaddr_t); extern boolean_t nl7c_close_addr(struct sonode *); extern struct sonode *nl7c_addr2portso(void *); extern uri_desc_t *nl7c_http_cond(uri_desc_t *, uri_desc_t *); /* * Various global tuneables: */ clock_t nl7c_uri_ttl = -1; /* TTL in seconds (-1 == infinite) */ boolean_t nl7c_use_kmem = B_FALSE; /* Force use of kmem (no segmap) */ uint64_t nl7c_file_prefetch = 1; /* File cache prefetch pages */ uint64_t nl7c_uri_max = 0; /* Maximum bytes (0 == infinite) */ uint64_t nl7c_uri_bytes = 0; /* Bytes of kmem used by URIs */ /* * Locals: */ static int uri_rd_response(struct sonode *, uri_desc_t *, uri_rd_t *, boolean_t); static int uri_response(struct sonode *, uri_desc_t *); /* * HTTP scheme functions called from nl7chttp.c: */ boolean_t nl7c_http_request(char **, char *, uri_desc_t *, struct sonode *); boolean_t nl7c_http_response(char **, char *, uri_desc_t *, struct sonode *); boolean_t nl7c_http_cmp(void *, void *); mblk_t *nl7c_http_persist(struct sonode *); void nl7c_http_free(void *arg); void nl7c_http_init(void); /* * Counters that need to move to kstat and/or be removed: */ volatile uint64_t nl7c_uri_request = 0; volatile uint64_t nl7c_uri_hit = 0; volatile uint64_t nl7c_uri_pass = 0; volatile uint64_t nl7c_uri_miss = 0; volatile uint64_t nl7c_uri_temp = 0; volatile uint64_t nl7c_uri_more = 0; volatile uint64_t nl7c_uri_data = 0; volatile uint64_t nl7c_uri_sendfilev = 0; volatile uint64_t nl7c_uri_reclaim_calls = 0; volatile uint64_t nl7c_uri_reclaim_cnt = 0; volatile uint64_t nl7c_uri_pass_urifail = 0; volatile uint64_t nl7c_uri_pass_dupbfail = 0; volatile uint64_t nl7c_uri_more_get = 0; volatile uint64_t nl7c_uri_pass_method = 0; volatile uint64_t nl7c_uri_pass_option = 0; volatile uint64_t nl7c_uri_more_eol = 0; volatile uint64_t nl7c_uri_more_http = 0; volatile uint64_t nl7c_uri_pass_http = 0; volatile uint64_t nl7c_uri_pass_addfail = 0; volatile uint64_t nl7c_uri_pass_temp = 0; volatile uint64_t nl7c_uri_expire = 0; volatile uint64_t nl7c_uri_purge = 0; volatile uint64_t nl7c_uri_NULL1 = 0; volatile uint64_t nl7c_uri_NULL2 = 0; volatile uint64_t nl7c_uri_close = 0; volatile uint64_t nl7c_uri_temp_close = 0; volatile uint64_t nl7c_uri_free = 0; volatile uint64_t nl7c_uri_temp_free = 0; volatile uint64_t nl7c_uri_temp_mk = 0; volatile uint64_t nl7c_uri_rd_EAGAIN = 0; /* * Various kmem_cache_t's: */ kmem_cache_t *nl7c_uri_kmc; kmem_cache_t *nl7c_uri_rd_kmc; static kmem_cache_t *uri_desb_kmc; static kmem_cache_t *uri_segmap_kmc; static void uri_kmc_reclaim(void *); static void nl7c_uri_reclaim(void); /* * The URI hash is a dynamically sized A/B bucket hash, when the current * hash's average bucket chain length exceeds URI_HASH_AVRG a new hash of * the next P2Ps[] size is created. * * All lookups are done in the current hash then the new hash (if any), * if there is a new has then when a current hash bucket chain is examined * any uri_desc_t members will be migrated to the new hash and when the * last uri_desc_t has been migrated then the new hash will become the * current and the previous current hash will be freed leaving a single * hash. * * uri_hash_t - hash bucket (chain) type, contained in the uri_hash_ab[] * and can be accessed only after aquiring the uri_hash_access lock (for * READER or WRITER) then acquiring the lock uri_hash_t.lock, the uri_hash_t * and all linked uri_desc_t.hash members are protected. Note, a REF_HOLD() * is placed on all uri_desc_t uri_hash_t list members. * * uri_hash_access - rwlock for all uri_hash_* variables, READER for read * access and WRITER for write access. Note, WRITER is only required for * hash geometry changes. * * uri_hash_which - which uri_hash_ab[] is the current hash. * * uri_hash_n[] - the P2Ps[] index for each uri_hash_ab[]. * * uri_hash_sz[] - the size for each uri_hash_ab[]. * * uri_hash_cnt[] - the total uri_desc_t members for each uri_hash_ab[]. * * uri_hash_overflow[] - the uri_hash_cnt[] for each uri_hash_ab[] when * a new uri_hash_ab[] needs to be created. * * uri_hash_ab[] - the uri_hash_t entries. * * uri_hash_lru[] - the last uri_hash_ab[] walked for lru reclaim. */ typedef struct uri_hash_s { struct uri_desc_s *list; /* List of uri_t(s) */ kmutex_t lock; } uri_hash_t; #define URI_HASH_AVRG 5 /* Desired average hash chain length */ #define URI_HASH_N_INIT 9 /* P2Ps[] initial index */ static krwlock_t uri_hash_access; static uint32_t uri_hash_which = 0; static uint32_t uri_hash_n[2] = {URI_HASH_N_INIT, 0}; static uint32_t uri_hash_sz[2] = {0, 0}; static uint32_t uri_hash_cnt[2] = {0, 0}; static uint32_t uri_hash_overflow[2] = {0, 0}; static uri_hash_t *uri_hash_ab[2] = {NULL, NULL}; static uri_hash_t *uri_hash_lru[2] = {NULL, NULL}; /* * Primes for N of 3 - 24 where P is first prime less then (2^(N-1))+(2^(N-2)) * these primes have been foud to be useful for prime sized hash tables. */ static const int P2Ps[] = { 0, 0, 0, 5, 11, 23, 47, 89, 191, 383, 761, 1531, 3067, 6143, 12281, 24571, 49139, 98299, 196597, 393209, 786431, 1572853, 3145721, 6291449, 12582893, 0}; /* * Hash macros: * * H2A(char *cp, char *ep, char c) - convert the escaped octet (ASCII) * hex multichar of the format "%HH" pointeded to by *cp to a char and * return in c, *ep points to past end of (char *), on return *cp will * point to the last char consumed. * * URI_HASH(unsigned hix, char *cp, char *ep) - hash the char(s) from * *cp to *ep to the unsigned hix, cp nor ep are modified. * * URI_HASH_IX(unsigned hix, int which) - convert the hash value hix to * a hash index 0 - (uri_hash_sz[which] - 1). * * URI_HASH_MIGRATE(from, hp, to) - migrate the uri_hash_t *hp list * uri_desc_t members from hash from to hash to. * * URI_HASH_UNLINK(cur, new, hp, puri, uri) - unlink the uri_desc_t * *uri which is a member of the uri_hash_t *hp list with a previous * list member of *puri for the uri_hash_ab[] cur. After unlinking * check for cur hash empty, if so make new cur. Note, as this macro * can change a hash chain it needs to be run under hash_access as * RW_WRITER, futher as it can change the new hash to cur any access * to the hash state must be done after either dropping locks and * starting over or making sure the global state is consistent after * as before. */ #define H2A(cp, ep, c) { \ int _h = 2; \ int _n = 0; \ char _hc; \ \ while (_h > 0 && ++(cp) < (ep)) { \ if (_h == 1) \ _n *= 0x10; \ _hc = *(cp); \ if (_hc >= '0' && _hc <= '9') \ _n += _hc - '0'; \ else if (_hc >= 'a' || _hc <= 'f') \ _n += _hc - 'W'; \ else if (_hc >= 'A' || _hc <= 'F') \ _n += _hc - '7'; \ _h--; \ } \ (c) = _n; \ } #define URI_HASH(hv, cp, ep) { \ char *_s = (cp); \ char _c; \ \ while (_s < (ep)) { \ if ((_c = *_s) == '%') { \ H2A(_s, (ep), _c); \ } \ CHASH(hv, _c); \ _s++; \ } \ } #define URI_HASH_IX(hix, which) (hix) = (hix) % (uri_hash_sz[(which)]) #define URI_HASH_MIGRATE(from, hp, to) { \ uri_desc_t *_nuri; \ uint32_t _nhix; \ uri_hash_t *_nhp; \ \ mutex_enter(&(hp)->lock); \ while ((_nuri = (hp)->list) != NULL) { \ (hp)->list = _nuri->hash; \ atomic_add_32(&uri_hash_cnt[(from)], -1); \ atomic_add_32(&uri_hash_cnt[(to)], 1); \ _nhix = _nuri->hvalue; \ URI_HASH_IX(_nhix, to); \ _nhp = &uri_hash_ab[(to)][_nhix]; \ mutex_enter(&_nhp->lock); \ _nuri->hash = _nhp->list; \ _nhp->list = _nuri; \ _nuri->hit = 0; \ mutex_exit(&_nhp->lock); \ } \ mutex_exit(&(hp)->lock); \ } #define URI_HASH_UNLINK(cur, new, hp, puri, uri) { \ if ((puri) != NULL) { \ (puri)->hash = (uri)->hash; \ } else { \ (hp)->list = (uri)->hash; \ } \ if (atomic_add_32_nv(&uri_hash_cnt[(cur)], -1) == 0 && \ uri_hash_ab[(new)] != NULL) { \ kmem_free(uri_hash_ab[cur], \ sizeof (uri_hash_t) * uri_hash_sz[cur]); \ uri_hash_ab[(cur)] = NULL; \ uri_hash_lru[(cur)] = NULL; \ uri_hash_which = (new); \ } else { \ uri_hash_lru[(cur)] = (hp); \ } \ } void nl7c_uri_init(void) { uint32_t cur = uri_hash_which; rw_init(&uri_hash_access, NULL, RW_DEFAULT, NULL); uri_hash_sz[cur] = P2Ps[URI_HASH_N_INIT]; uri_hash_overflow[cur] = P2Ps[URI_HASH_N_INIT] * URI_HASH_AVRG; uri_hash_ab[cur] = kmem_zalloc(sizeof (uri_hash_t) * uri_hash_sz[cur], KM_SLEEP); uri_hash_lru[cur] = uri_hash_ab[cur]; nl7c_uri_kmc = kmem_cache_create("NL7C_uri_kmc", sizeof (uri_desc_t), 0, NULL, NULL, uri_kmc_reclaim, NULL, NULL, 0); nl7c_uri_rd_kmc = kmem_cache_create("NL7C_uri_rd_kmc", sizeof (uri_rd_t), 0, NULL, NULL, NULL, NULL, NULL, 0); uri_desb_kmc = kmem_cache_create("NL7C_uri_desb_kmc", sizeof (uri_desb_t), 0, NULL, NULL, NULL, NULL, NULL, 0); uri_segmap_kmc = kmem_cache_create("NL7C_uri_segmap_kmc", sizeof (uri_segmap_t), 0, NULL, NULL, NULL, NULL, NULL, 0); nl7c_http_init(); } #define CV_SZ 16 void nl7c_mi_report_hash(mblk_t *mp) { uri_hash_t *hp, *pend; uri_desc_t *uri; uint32_t cur; uint32_t new; int n, nz, tot; uint32_t cv[CV_SZ + 1]; rw_enter(&uri_hash_access, RW_READER); cur = uri_hash_which; new = cur ? 0 : 1; next: for (n = 0; n <= CV_SZ; n++) cv[n] = 0; nz = 0; tot = 0; hp = &uri_hash_ab[cur][0]; pend = &uri_hash_ab[cur][uri_hash_sz[cur]]; while (hp < pend) { n = 0; for (uri = hp->list; uri != NULL; uri = uri->hash) { n++; } tot += n; if (n > 0) nz++; if (n > CV_SZ) n = CV_SZ; cv[n]++; hp++; } (void) mi_mpprintf(mp, "\nHash=%s, Buckets=%d, " "Avrg=%d\nCount by bucket:", cur != new ? "CUR" : "NEW", uri_hash_sz[cur], nz != 0 ? ((tot * 10 + 5) / nz) / 10 : 0); (void) mi_mpprintf(mp, "Free=%d", cv[0]); for (n = 1; n < CV_SZ; n++) { int pn = 0; char pv[5]; char *pp = pv; for (pn = n; pn < 1000; pn *= 10) *pp++ = ' '; *pp = 0; (void) mi_mpprintf(mp, "%s%d=%d", pv, n, cv[n]); } (void) mi_mpprintf(mp, "Long=%d", cv[CV_SZ]); if (cur != new && uri_hash_ab[new] != NULL) { cur = new; goto next; } rw_exit(&uri_hash_access); } void nl7c_mi_report_uri(mblk_t *mp) { uri_hash_t *hp; uri_desc_t *uri; uint32_t cur; uint32_t new; int ix; int ret; char sc; rw_enter(&uri_hash_access, RW_READER); cur = uri_hash_which; new = cur ? 0 : 1; next: for (ix = 0; ix < uri_hash_sz[cur]; ix++) { hp = &uri_hash_ab[cur][ix]; mutex_enter(&hp->lock); uri = hp->list; while (uri != NULL) { sc = *(uri->path.ep); *(uri->path.ep) = 0; ret = mi_mpprintf(mp, "%s: %d %d %d", uri->path.cp, (int)uri->resplen, (int)uri->respclen, (int)uri->count); *(uri->path.ep) = sc; if (ret == -1) break; uri = uri->hash; } mutex_exit(&hp->lock); if (ret == -1) break; } if (ret != -1 && cur != new && uri_hash_ab[new] != NULL) { cur = new; goto next; } rw_exit(&uri_hash_access); } /* * The uri_desc_t ref_t inactive function called on the last REF_RELE(), * free all resources contained in the uri_desc_t. Note, the uri_desc_t * will be freed by REF_RELE() on return. */ void nl7c_uri_inactive(uri_desc_t *uri) { int64_t bytes = 0; if (uri->tail) { uri_rd_t *rdp = &uri->response; uri_rd_t *free = NULL; while (rdp) { if (rdp->off == -1) { bytes += rdp->sz; kmem_free(rdp->data.kmem, rdp->sz); } else { VN_RELE(rdp->data.vnode); } rdp = rdp->next; if (free != NULL) { kmem_cache_free(nl7c_uri_rd_kmc, free); } free = rdp; } } if (bytes) { atomic_add_64(&nl7c_uri_bytes, -bytes); } if (uri->scheme != NULL) { nl7c_http_free(uri->scheme); } if (uri->reqmp) { freeb(uri->reqmp); } } /* * The reclaim is called by the kmem subsystem when kmem is running * low. More work is needed to determine the best reclaim policy, for * now we just manipulate the nl7c_uri_max global maximum bytes threshold * value using a simple arithmetic backoff of the value every time this * function is called then call uri_reclaim() to enforce it. * * Note, this value remains in place and enforced for all subsequent * URI request/response processing. * * Note, nl7c_uri_max is currently initialized to 0 or infinite such that * the first call here set it to the current uri_bytes value then backoff * from there. * * XXX how do we determine when to increase nl7c_uri_max ??? */ /*ARGSUSED*/ static void uri_kmc_reclaim(void *arg) { uint64_t new_max; if ((new_max = nl7c_uri_max) == 0) { /* Currently infinite, initialize to current bytes used */ nl7c_uri_max = nl7c_uri_bytes; new_max = nl7c_uri_bytes; } if (new_max > 1) { /* Lower max_bytes to 93% of current value */ new_max >>= 1; /* 50% */ new_max += (new_max >> 1); /* 75% */ new_max += (new_max >> 2); /* 93% */ if (new_max < nl7c_uri_max) nl7c_uri_max = new_max; else nl7c_uri_max = 1; } nl7c_uri_reclaim(); } /* * Delete a uri_desc_t from the URI hash. */ static void uri_delete(uri_desc_t *del) { uint32_t hix; uri_hash_t *hp; uri_desc_t *uri; uri_desc_t *puri; uint32_t cur; uint32_t new; ASSERT(del->hash != URI_TEMP); rw_enter(&uri_hash_access, RW_WRITER); cur = uri_hash_which; new = cur ? 0 : 1; next: puri = NULL; hix = del->hvalue; URI_HASH_IX(hix, cur); hp = &uri_hash_ab[cur][hix]; for (uri = hp->list; uri != NULL; uri = uri->hash) { if (uri != del) { puri = uri; continue; } /* * Found the URI, unlink from the hash chain, * drop locks, ref release it. */ URI_HASH_UNLINK(cur, new, hp, puri, uri); rw_exit(&uri_hash_access); REF_RELE(uri); return; } if (cur != new && uri_hash_ab[new] != NULL) { /* * Not found in current hash and have a new hash so * check the new hash next. */ cur = new; goto next; } rw_exit(&uri_hash_access); } /* * Add a uri_desc_t to the URI hash. */ static void uri_add(uri_desc_t *uri, krw_t rwlock, boolean_t nonblocking) { uint32_t hix; uri_hash_t *hp; uint32_t cur = uri_hash_which; uint32_t new = cur ? 0 : 1; /* * Caller of uri_add() must hold the uri_hash_access rwlock. */ ASSERT((rwlock == RW_READER && RW_READ_HELD(&uri_hash_access)) || (rwlock == RW_WRITER && RW_WRITE_HELD(&uri_hash_access))); /* * uri_add() always succeeds so add a hash ref to the URI now. */ REF_HOLD(uri); again: hix = uri->hvalue; URI_HASH_IX(hix, cur); if (uri_hash_ab[new] == NULL && uri_hash_cnt[cur] < uri_hash_overflow[cur]) { /* * Easy case, no new hash and current hasn't overflowed, * add URI to current hash and return. * * Note, the check for uri_hash_cnt[] above aren't done * atomictally, i.e. multiple threads can be in this code * as RW_READER and update the cnt[], this isn't a problem * as the check is only advisory. */ fast: atomic_add_32(&uri_hash_cnt[cur], 1); hp = &uri_hash_ab[cur][hix]; mutex_enter(&hp->lock); uri->hash = hp->list; hp->list = uri; mutex_exit(&hp->lock); rw_exit(&uri_hash_access); return; } if (uri_hash_ab[new] == NULL) { /* * Need a new a or b hash, if not already RW_WRITER * try to upgrade our lock to writer. */ if (rwlock != RW_WRITER && ! rw_tryupgrade(&uri_hash_access)) { /* * Upgrade failed, we can't simple exit and reenter * the lock as after the exit and before the reenter * the whole world can change so just wait for writer * then do everything again. */ if (nonblocking) { /* * Can't block, use fast-path above. * * XXX should have a background thread to * handle new ab[] in this case so as to * not overflow the cur hash to much. */ goto fast; } rw_exit(&uri_hash_access); rwlock = RW_WRITER; rw_enter(&uri_hash_access, rwlock); cur = uri_hash_which; new = cur ? 0 : 1; goto again; } rwlock = RW_WRITER; if (uri_hash_ab[new] == NULL) { /* * Still need a new hash, allocate and initialize * the new hash. */ uri_hash_n[new] = uri_hash_n[cur] + 1; if (uri_hash_n[new] == 0) { /* * No larger P2Ps[] value so use current, * i.e. 2 of the largest are better than 1 ? */ uri_hash_n[new] = uri_hash_n[cur]; cmn_err(CE_NOTE, "NL7C: hash index overflow"); } uri_hash_sz[new] = P2Ps[uri_hash_n[new]]; ASSERT(uri_hash_cnt[new] == 0); uri_hash_overflow[new] = uri_hash_sz[new] * URI_HASH_AVRG; uri_hash_ab[new] = kmem_zalloc(sizeof (uri_hash_t) * uri_hash_sz[new], nonblocking ? KM_NOSLEEP : KM_SLEEP); if (uri_hash_ab[new] == NULL) { /* * Alloc failed, use fast-path above. * * XXX should have a background thread to * handle new ab[] in this case so as to * not overflow the cur hash to much. */ goto fast; } uri_hash_lru[new] = uri_hash_ab[new]; } } /* * Hashed against current hash so migrate any current hash chain * members, if any. * * Note, the hash chain list can be checked for a non empty list * outside of the hash chain list lock as the hash chain struct * can't be destroyed while in the uri_hash_access rwlock, worst * case is that a non empty list is found and after acquiring the * lock another thread beats us to it (i.e. migrated the list). */ hp = &uri_hash_ab[cur][hix]; if (hp->list != NULL) { URI_HASH_MIGRATE(cur, hp, new); } /* * If new hash has overflowed before current hash has been * completely migrated then walk all current hash chains and * migrate list members now. */ if (atomic_add_32_nv(&uri_hash_cnt[new], 1) >= uri_hash_overflow[new]) { for (hix = 0; hix < uri_hash_sz[cur]; hix++) { hp = &uri_hash_ab[cur][hix]; if (hp->list != NULL) { URI_HASH_MIGRATE(cur, hp, new); } } } /* * Add URI to new hash. */ hix = uri->hvalue; URI_HASH_IX(hix, new); hp = &uri_hash_ab[new][hix]; mutex_enter(&hp->lock); uri->hash = hp->list; hp->list = uri; mutex_exit(&hp->lock); /* * Last, check to see if last cur hash chain has been * migrated, if so free cur hash and make new hash cur. */ if (uri_hash_cnt[cur] == 0) { /* * If we don't already hold the uri_hash_access rwlock for * RW_WRITE try to upgrade to RW_WRITE and if successful * check again and to see if still need to do the free. */ if ((rwlock == RW_WRITER || rw_tryupgrade(&uri_hash_access)) && uri_hash_cnt[cur] == 0 && uri_hash_ab[new] != 0) { kmem_free(uri_hash_ab[cur], sizeof (uri_hash_t) * uri_hash_sz[cur]); uri_hash_ab[cur] = NULL; uri_hash_lru[cur] = NULL; uri_hash_which = new; } } rw_exit(&uri_hash_access); } /* * Lookup a uri_desc_t in the URI hash, if found free the request uri_desc_t * and return the found uri_desc_t with a REF_HOLD() placed on it. Else, if * add B_TRUE use the request URI to create a new hash entry. Else if add * B_FALSE ... */ static uri_desc_t * uri_lookup(uri_desc_t *ruri, boolean_t add, boolean_t nonblocking) { uint32_t hix; uri_hash_t *hp; uri_desc_t *uri; uri_desc_t *puri; uint32_t cur; uint32_t new; char *rcp = ruri->path.cp; char *rep = ruri->path.ep; again: rw_enter(&uri_hash_access, RW_READER); cur = uri_hash_which; new = cur ? 0 : 1; nexthash: puri = NULL; hix = ruri->hvalue; URI_HASH_IX(hix, cur); hp = &uri_hash_ab[cur][hix]; mutex_enter(&hp->lock); for (uri = hp->list; uri != NULL; uri = uri->hash) { char *ap = uri->path.cp; char *bp = rcp; char a, b; /* Compare paths */ while (bp < rep && ap < uri->path.ep) { if ((a = *ap) == '%') { /* Escaped hex multichar, convert it */ H2A(ap, uri->path.ep, a); } if ((b = *bp) == '%') { /* Escaped hex multichar, convert it */ H2A(bp, rep, b); } if (a != b) { /* Char's don't match */ goto nexturi; } ap++; bp++; } if (bp != rep || ap != uri->path.ep) { /* Not same length */ goto nexturi; } ap = uri->auth.cp; bp = ruri->auth.cp; if (ap != NULL) { if (bp == NULL) { /* URI has auth request URI doesn't */ goto nexturi; } while (bp < ruri->auth.ep && ap < uri->auth.ep) { if ((a = *ap) == '%') { /* Escaped hex multichar, convert it */ H2A(ap, uri->path.ep, a); } if ((b = *bp) == '%') { /* Escaped hex multichar, convert it */ H2A(bp, rep, b); } if (a != b) { /* Char's don't match */ goto nexturi; } ap++; bp++; } if (bp != ruri->auth.ep || ap != uri->auth.ep) { /* Not same length */ goto nexturi; } } else if (bp != NULL) { /* URI doesn't have auth and request URI does */ goto nexturi; } /* * Have a path/auth match so before any other processing * of requested URI, check for expire or request no cache * purge. */ if (uri->expire >= 0 && uri->expire <= lbolt || ruri->nocache) { /* * URI has expired or request specified to not use * the cached version, unlink the URI from the hash * chain, release all locks, release the hash ref * on the URI, and last look it up again. * * Note, this will cause all variants of the named * URI to be purged. */ if (puri != NULL) { puri->hash = uri->hash; } else { hp->list = uri->hash; } mutex_exit(&hp->lock); atomic_add_32(&uri_hash_cnt[cur], -1); rw_exit(&uri_hash_access); if (ruri->nocache) nl7c_uri_purge++; else nl7c_uri_expire++; REF_RELE(uri); goto again; } if (uri->scheme != NULL) { /* * URI has scheme private qualifier(s), if request * URI doesn't or if no match skip this URI. */ if (ruri->scheme == NULL || ! nl7c_http_cmp(uri->scheme, ruri->scheme)) goto nexturi; } else if (ruri->scheme != NULL) { /* * URI doesn't have scheme private qualifiers but * request URI does, no match, skip this URI. */ goto nexturi; } /* * Have a match, ready URI for return, first put a reference * hold on the URI, if this URI is currently being processed * then have to wait for the processing to be completed and * redo the lookup, else return it. */ REF_HOLD(uri); mutex_enter(&uri->proclock); if (uri->proc != NULL) { /* The URI is being processed, wait for completion */ mutex_exit(&hp->lock); rw_exit(&uri_hash_access); if (! nonblocking && cv_wait_sig(&uri->waiting, &uri->proclock)) { /* * URI has been processed but things may * have changed while we were away so do * most everything again. */ mutex_exit(&uri->proclock); REF_RELE(uri); goto again; } else { /* * A nonblocking socket or an interrupted * cv_wait_sig() in the first case can't * block waiting for the processing of the * uri hash hit uri to complete, in both * cases just return failure to lookup. */ mutex_exit(&uri->proclock); REF_RELE(uri); return (NULL); } } mutex_exit(&uri->proclock); uri->hit++; mutex_exit(&hp->lock); rw_exit(&uri_hash_access); return (uri); nexturi: puri = uri; } mutex_exit(&hp->lock); if (cur != new && uri_hash_ab[new] != NULL) { /* * Not found in current hash and have a new hash so * check the new hash next. */ cur = new; goto nexthash; } add: if (! add) { /* Lookup only so return failure */ rw_exit(&uri_hash_access); return (NULL); } /* * URI not hashed, finish intialization of the * request URI, add it to the hash, return it. */ ruri->hit = 0; ruri->expire = -1; ruri->response.sz = 0; ruri->proc = (struct sonode *)~NULL; cv_init(&ruri->waiting, NULL, CV_DEFAULT, NULL); mutex_init(&ruri->proclock, NULL, MUTEX_DEFAULT, NULL); uri_add(ruri, RW_READER, nonblocking); /* uri_add() has done rw_exit(&uri_hash_access) */ return (ruri); } /* * Reclaim URIs until max cache size threshold has been reached. * * A CLOCK based reclaim modified with a history (hit counter) counter. */ static void nl7c_uri_reclaim(void) { uri_hash_t *hp, *start, *pend; uri_desc_t *uri; uri_desc_t *puri; uint32_t cur; uint32_t new; nl7c_uri_reclaim_calls++; again: rw_enter(&uri_hash_access, RW_WRITER); cur = uri_hash_which; new = cur ? 0 : 1; next: hp = uri_hash_lru[cur]; start = hp; pend = &uri_hash_ab[cur][uri_hash_sz[cur]]; while (nl7c_uri_bytes > nl7c_uri_max) { puri = NULL; for (uri = hp->list; uri != NULL; uri = uri->hash) { if (uri->hit != 0) { /* * Decrement URI activity counter and skip. */ uri->hit--; puri = uri; continue; } if (uri->proc != NULL) { /* * Currently being processed by a socket, skip. */ continue; } /* * Found a candidate, no hit(s) since added or last * reclaim pass, unlink from it's hash chain, update * lru scan pointer, drop lock, ref release it. */ URI_HASH_UNLINK(cur, new, hp, puri, uri); if (cur == uri_hash_which) { if (++hp == pend) { /* Wrap pointer */ hp = uri_hash_ab[cur]; } uri_hash_lru[cur] = hp; } rw_exit(&uri_hash_access); REF_RELE(uri); nl7c_uri_reclaim_cnt++; goto again; } if (++hp == pend) { /* Wrap pointer */ hp = uri_hash_ab[cur]; } if (hp == start) { if (cur != new && uri_hash_ab[new] != NULL) { /* * Done with the current hash and have a * new hash so check the new hash next. */ cur = new; goto next; } } } rw_exit(&uri_hash_access); } /* * Called for a socket which is being freed prior to close, e.g. errored. */ void nl7c_urifree(struct sonode *so) { sotpi_info_t *sti = SOTOTPI(so); uri_desc_t *uri = (uri_desc_t *)sti->sti_nl7c_uri; sti->sti_nl7c_uri = NULL; if (uri->hash != URI_TEMP) { uri_delete(uri); mutex_enter(&uri->proclock); uri->proc = NULL; if (CV_HAS_WAITERS(&uri->waiting)) { cv_broadcast(&uri->waiting); } mutex_exit(&uri->proclock); nl7c_uri_free++; } else { /* No proclock as uri exclusively owned by so */ uri->proc = NULL; nl7c_uri_temp_free++; } REF_RELE(uri); } /* * ... * * < 0 need more data * * 0 parse complete * * > 0 parse error */ volatile uint64_t nl7c_resp_pfail = 0; volatile uint64_t nl7c_resp_ntemp = 0; volatile uint64_t nl7c_resp_pass = 0; static int nl7c_resp_parse(struct sonode *so, uri_desc_t *uri, char *data, int sz) { if (! nl7c_http_response(&data, &data[sz], uri, so)) { if (data == NULL) { /* Parse fail */ goto pfail; } /* More data */ data = NULL; } else if (data == NULL) { goto pass; } if (uri->hash != URI_TEMP && uri->nocache) { /* * After response parse now no cache, * delete it from cache, wakeup any * waiters on this URI, make URI_TEMP. */ uri_delete(uri); mutex_enter(&uri->proclock); if (CV_HAS_WAITERS(&uri->waiting)) { cv_broadcast(&uri->waiting); } mutex_exit(&uri->proclock); uri->hash = URI_TEMP; nl7c_uri_temp_mk++; } if (data == NULL) { /* More data needed */ return (-1); } /* Success */ return (0); pfail: nl7c_resp_pfail++; return (EINVAL); pass: nl7c_resp_pass++; return (ENOTSUP); } /* * Called to sink application response data, the processing of the data * is the same for a cached or temp URI (i.e. a URI for which we aren't * going to cache the URI but want to parse it for detecting response * data end such that for a persistent connection we can parse the next * request). * * On return 0 is returned for sink success, > 0 on error, and < 0 on * no so URI (note, data not sinked). */ int nl7c_data(struct sonode *so, uio_t *uio) { sotpi_info_t *sti = SOTOTPI(so); uri_desc_t *uri = (uri_desc_t *)sti->sti_nl7c_uri; iovec_t *iov; int cnt; int sz = uio->uio_resid; char *data, *alloc; char *bp; uri_rd_t *rdp; boolean_t first; int error, perror; nl7c_uri_data++; if (uri == NULL) { /* Socket & NL7C out of sync, disable NL7C */ sti->sti_nl7c_flags = 0; nl7c_uri_NULL1++; return (-1); } if (sti->sti_nl7c_flags & NL7C_WAITWRITE) { sti->sti_nl7c_flags &= ~NL7C_WAITWRITE; first = B_TRUE; } else { first = B_FALSE; } alloc = kmem_alloc(sz, KM_SLEEP); URI_RD_ADD(uri, rdp, sz, -1); if (rdp == NULL) { error = ENOMEM; goto fail; } if (uri->hash != URI_TEMP && uri->count > nca_max_cache_size) { uri_delete(uri); uri->hash = URI_TEMP; } data = alloc; alloc = NULL; rdp->data.kmem = data; atomic_add_64(&nl7c_uri_bytes, sz); bp = data; while (uio->uio_resid > 0) { iov = uio->uio_iov; if ((cnt = iov->iov_len) == 0) { goto next; } cnt = MIN(cnt, uio->uio_resid); error = xcopyin(iov->iov_base, bp, cnt); if (error) goto fail; iov->iov_base += cnt; iov->iov_len -= cnt; uio->uio_resid -= cnt; uio->uio_loffset += cnt; bp += cnt; next: uio->uio_iov++; uio->uio_iovcnt--; } /* Successfull sink of data, response parse the data */ perror = nl7c_resp_parse(so, uri, data, sz); /* Send the data out the connection */ error = uri_rd_response(so, uri, rdp, first); if (error) goto fail; /* Success */ if (perror == 0 && ((uri->respclen == URI_LEN_NOVALUE && uri->resplen == URI_LEN_NOVALUE) || uri->count >= uri->resplen)) { /* * No more data needed and no pending response * data or current data count >= response length * so close the URI processing for this so. */ nl7c_close(so); if (! (sti->sti_nl7c_flags & NL7C_SOPERSIST)) { /* Not a persistent connection */ sti->sti_nl7c_flags = 0; } } return (0); fail: if (alloc != NULL) { kmem_free(alloc, sz); } sti->sti_nl7c_flags = 0; nl7c_urifree(so); return (error); } /* * Called to read data from file "*fp" at offset "*off" of length "*len" * for a maximum of "*max_rem" bytes. * * On success a pointer to the kmem_alloc()ed file data is returned, "*off" * and "*len" are updated for the acutal number of bytes read and "*max_rem" * is updated with the number of bytes remaining to be read. * * Else, "NULL" is returned. */ static char * nl7c_readfile(file_t *fp, u_offset_t *off, int *len, int max, int *ret) { vnode_t *vp = fp->f_vnode; int flg = 0; size_t size = MIN(*len, max); char *data; int error; uio_t uio; iovec_t iov; (void) VOP_RWLOCK(vp, flg, NULL); if (*off > MAXOFFSET_T) { VOP_RWUNLOCK(vp, flg, NULL); *ret = EFBIG; return (NULL); } if (*off + size > MAXOFFSET_T) size = (ssize32_t)(MAXOFFSET_T - *off); data = kmem_alloc(size, KM_SLEEP); iov.iov_base = data; iov.iov_len = size; uio.uio_loffset = *off; uio.uio_iov = &iov; uio.uio_iovcnt = 1; uio.uio_resid = size; uio.uio_segflg = UIO_SYSSPACE; uio.uio_llimit = MAXOFFSET_T; uio.uio_fmode = fp->f_flag; error = VOP_READ(vp, &uio, fp->f_flag, fp->f_cred, NULL); VOP_RWUNLOCK(vp, flg, NULL); *ret = error; if (error) { kmem_free(data, size); return (NULL); } *len = size; *off += size; return (data); } /* * Called to sink application response sendfilev, as with nl7c_data() above * all the data will be processed by NL7C unless there's an error. */ int nl7c_sendfilev(struct sonode *so, u_offset_t *fileoff, sendfilevec_t *sfvp, int sfvc, ssize_t *xfer) { sotpi_info_t *sti = SOTOTPI(so); uri_desc_t *uri = (uri_desc_t *)sti->sti_nl7c_uri; file_t *fp = NULL; vnode_t *vp = NULL; char *data = NULL; u_offset_t off; int len; int cnt; int total_count = 0; char *alloc; uri_rd_t *rdp; int max; int perror; int error = 0; boolean_t first = B_TRUE; nl7c_uri_sendfilev++; if (uri == NULL) { /* Socket & NL7C out of sync, disable NL7C */ sti->sti_nl7c_flags = 0; nl7c_uri_NULL2++; return (0); } if (sti->sti_nl7c_flags & NL7C_WAITWRITE) sti->sti_nl7c_flags &= ~NL7C_WAITWRITE; while (sfvc-- > 0) { /* * off - the current sfv read file offset or user address. * * len - the current sfv length in bytes. * * cnt - number of bytes kmem_alloc()ed. * * alloc - the kmem_alloc()ed buffer of size "cnt". * * data - copy of "alloc" used for post alloc references. * * fp - the current sfv file_t pointer. * * vp - the current "*vp" vnode_t pointer. * * Note, for "data" and "fp" and "vp" a NULL value is used * when not allocated such that the common failure path "fail" * is used. */ off = sfvp->sfv_off; len = sfvp->sfv_len; cnt = len; if (len == 0) { sfvp++; continue; } if (sfvp->sfv_fd == SFV_FD_SELF) { /* * User memory, copyin() all the bytes. */ alloc = kmem_alloc(cnt, KM_SLEEP); error = xcopyin((caddr_t)(uintptr_t)off, alloc, cnt); if (error) goto fail; } else { /* * File descriptor, prefetch some bytes. */ if ((fp = getf(sfvp->sfv_fd)) == NULL) { error = EBADF; goto fail; } if ((fp->f_flag & FREAD) == 0) { error = EACCES; goto fail; } vp = fp->f_vnode; if (vp->v_type != VREG) { error = EINVAL; goto fail; } VN_HOLD(vp); /* Read max_rem bytes from file for prefetch */ if (nl7c_use_kmem) { max = cnt; } else { max = MAXBSIZE * nl7c_file_prefetch; } alloc = nl7c_readfile(fp, &off, &cnt, max, &error); if (alloc == NULL) goto fail; releasef(sfvp->sfv_fd); fp = NULL; } URI_RD_ADD(uri, rdp, cnt, -1); if (rdp == NULL) { error = ENOMEM; goto fail; } data = alloc; alloc = NULL; rdp->data.kmem = data; total_count += cnt; if (uri->hash != URI_TEMP && total_count > nca_max_cache_size) { uri_delete(uri); uri->hash = URI_TEMP; } /* Response parse */ perror = nl7c_resp_parse(so, uri, data, len); /* Send kmem data out the connection */ error = uri_rd_response(so, uri, rdp, first); if (error) goto fail; if (sfvp->sfv_fd != SFV_FD_SELF) { /* * File descriptor, if any bytes left save vnode_t. */ if (len > cnt) { /* More file data so add it */ URI_RD_ADD(uri, rdp, len - cnt, off); if (rdp == NULL) { error = ENOMEM; goto fail; } rdp->data.vnode = vp; /* Send vnode data out the connection */ error = uri_rd_response(so, uri, rdp, first); } else { /* All file data fit in the prefetch */ VN_RELE(vp); } *fileoff += len; vp = NULL; } *xfer += len; sfvp++; if (first) first = B_FALSE; } if (total_count > 0) { atomic_add_64(&nl7c_uri_bytes, total_count); } if (perror == 0 && ((uri->respclen == URI_LEN_NOVALUE && uri->resplen == URI_LEN_NOVALUE) || uri->count >= uri->resplen)) { /* * No more data needed and no pending response * data or current data count >= response length * so close the URI processing for this so. */ nl7c_close(so); if (! (sti->sti_nl7c_flags & NL7C_SOPERSIST)) { /* Not a persistent connection */ sti->sti_nl7c_flags = 0; } } return (0); fail: if (error == EPIPE) tsignal(curthread, SIGPIPE); if (alloc != NULL) kmem_free(data, len); if (vp != NULL) VN_RELE(vp); if (fp != NULL) releasef(sfvp->sfv_fd); if (total_count > 0) { atomic_add_64(&nl7c_uri_bytes, total_count); } sti->sti_nl7c_flags = 0; nl7c_urifree(so); return (error); } /* * Called for a socket which is closing or when an application has * completed sending all the response data (i.e. for a persistent * connection called once for each completed application response). */ void nl7c_close(struct sonode *so) { sotpi_info_t *sti = SOTOTPI(so); uri_desc_t *uri = (uri_desc_t *)sti->sti_nl7c_uri; if (uri == NULL) { /* * No URI being processed so might be a listen()er * if so do any cleanup, else nothing more to do. */ if (so->so_state & SS_ACCEPTCONN) { (void) nl7c_close_addr(so); } return; } sti->sti_nl7c_uri = NULL; if (uri->hash != URI_TEMP) { mutex_enter(&uri->proclock); uri->proc = NULL; if (CV_HAS_WAITERS(&uri->waiting)) { cv_broadcast(&uri->waiting); } mutex_exit(&uri->proclock); nl7c_uri_close++; } else { /* No proclock as uri exclusively owned by so */ uri->proc = NULL; nl7c_uri_temp_close++; } REF_RELE(uri); if (nl7c_uri_max > 0 && nl7c_uri_bytes > nl7c_uri_max) { nl7c_uri_reclaim(); } } /* * The uri_segmap_t ref_t inactive function called on the last REF_RELE(), * release the segmap mapping. Note, the uri_segmap_t will be freed by * REF_RELE() on return. */ void uri_segmap_inactive(uri_segmap_t *smp) { if (!segmap_kpm) { (void) segmap_fault(kas.a_hat, segkmap, smp->base, smp->len, F_SOFTUNLOCK, S_OTHER); } (void) segmap_release(segkmap, smp->base, SM_DONTNEED); VN_RELE(smp->vp); } /* * The call-back for desballoc()ed mblk_t's, if a segmap mapped mblk_t * release the reference, one per desballoc() of a segmap page, if a rd_t * mapped mblk_t release the reference, one per desballoc() of a uri_desc_t, * last kmem free the uri_desb_t. */ static void uri_desb_free(uri_desb_t *desb) { if (desb->segmap != NULL) { REF_RELE(desb->segmap); } REF_RELE(desb->uri); kmem_cache_free(uri_desb_kmc, desb); } /* * Segmap map up to a page of a uri_rd_t file descriptor. */ uri_segmap_t * uri_segmap_map(uri_rd_t *rdp, int bytes) { uri_segmap_t *segmap = kmem_cache_alloc(uri_segmap_kmc, KM_SLEEP); int len = MIN(rdp->sz, MAXBSIZE); if (len > bytes) len = bytes; REF_INIT(segmap, 1, uri_segmap_inactive, uri_segmap_kmc); segmap->len = len; VN_HOLD(rdp->data.vnode); segmap->vp = rdp->data.vnode; segmap->base = segmap_getmapflt(segkmap, segmap->vp, rdp->off, len, segmap_kpm ? SM_FAULT : 0, S_READ); if (segmap_fault(kas.a_hat, segkmap, segmap->base, len, F_SOFTLOCK, S_READ) != 0) { REF_RELE(segmap); return (NULL); } return (segmap); } /* * Chop up the kernel virtual memory area *data of size *sz bytes for * a maximum of *bytes bytes into an besballoc()ed mblk_t chain using * the given template uri_desb_t *temp of max_mblk bytes per. * * The values of *data, *sz, and *bytes are updated on return, the * mblk_t chain is returned. */ static mblk_t * uri_desb_chop( char **data, size_t *sz, int *bytes, uri_desb_t *temp, int max_mblk, char *eoh, mblk_t *persist ) { char *ldata = *data; size_t lsz = *sz; int lbytes = bytes ? *bytes : lsz; uri_desb_t *desb; mblk_t *mp = NULL; mblk_t *nmp, *pmp = NULL; int msz; if (lbytes == 0 && lsz == 0) return (NULL); while (lbytes > 0 && lsz > 0) { msz = MIN(lbytes, max_mblk); msz = MIN(msz, lsz); if (persist && eoh >= ldata && eoh < &ldata[msz]) { msz = (eoh - ldata); pmp = persist; persist = NULL; if (msz == 0) { nmp = pmp; pmp = NULL; goto zero; } } desb = kmem_cache_alloc(uri_desb_kmc, KM_SLEEP); REF_HOLD(temp->uri); if (temp->segmap) { REF_HOLD(temp->segmap); } bcopy(temp, desb, sizeof (*desb)); desb->frtn.free_arg = (caddr_t)desb; nmp = desballoc((uchar_t *)ldata, msz, BPRI_HI, &desb->frtn); if (nmp == NULL) { if (temp->segmap) { REF_RELE(temp->segmap); } REF_RELE(temp->uri); if (mp != NULL) { mp->b_next = NULL; freemsg(mp); } if (persist != NULL) { freeb(persist); } return (NULL); } nmp->b_wptr += msz; zero: if (mp != NULL) { mp->b_next->b_cont = nmp; } else { mp = nmp; } if (pmp != NULL) { nmp->b_cont = pmp; nmp = pmp; pmp = NULL; } mp->b_next = nmp; ldata += msz; lsz -= msz; lbytes -= msz; } *data = ldata; *sz = lsz; if (bytes) *bytes = lbytes; return (mp); } /* * Experimential noqwait (i.e. no canput()/qwait() checks), just send * the entire mblk_t chain down without flow-control checks. */ static int kstrwritempnoqwait(struct vnode *vp, mblk_t *mp) { struct stdata *stp; int error = 0; ASSERT(vp->v_stream); stp = vp->v_stream; /* Fast check of flags before acquiring the lock */ if (stp->sd_flag & (STWRERR|STRHUP|STPLEX)) { mutex_enter(&stp->sd_lock); error = strgeterr(stp, STWRERR|STRHUP|STPLEX, 0); mutex_exit(&stp->sd_lock); if (error != 0) { if (!(stp->sd_flag & STPLEX) && (stp->sd_wput_opt & SW_SIGPIPE)) { error = EPIPE; } return (error); } } putnext(stp->sd_wrq, mp); return (0); } /* * Send the URI uri_desc_t *uri response uri_rd_t *rdp out the socket_t *so. */ static int uri_rd_response(struct sonode *so, uri_desc_t *uri, uri_rd_t *rdp, boolean_t first) { vnode_t *vp = SOTOV(so); int max_mblk = (int)vp->v_stream->sd_maxblk; int wsz; mblk_t *mp, *wmp, *persist; int write_bytes; uri_rd_t rd; uri_desb_t desb; uri_segmap_t *segmap = NULL; char *segmap_data; size_t segmap_sz; int error; int fflg = ((so->so_state & SS_NDELAY) ? FNDELAY : 0) | ((so->so_state & SS_NONBLOCK) ? FNONBLOCK : 0); /* Initialize template uri_desb_t */ desb.frtn.free_func = uri_desb_free; desb.frtn.free_arg = NULL; desb.uri = uri; /* Get a local copy of the rd_t */ bcopy(rdp, &rd, sizeof (rd)); do { if (first) { /* * For first kstrwrite() enough data to get * things going, note non blocking version of * kstrwrite() will be used below. */ write_bytes = P2ROUNDUP((max_mblk * 4), MAXBSIZE * nl7c_file_prefetch); } else { if ((write_bytes = so->so_sndbuf) == 0) write_bytes = vp->v_stream->sd_qn_maxpsz; ASSERT(write_bytes > 0); write_bytes = P2ROUNDUP(write_bytes, MAXBSIZE); } /* * Chop up to a write_bytes worth of data. */ wmp = NULL; wsz = write_bytes; do { if (rd.sz == 0) break; if (rd.off == -1) { if (uri->eoh >= rd.data.kmem && uri->eoh < &rd.data.kmem[rd.sz]) { persist = nl7c_http_persist(so); } else { persist = NULL; } desb.segmap = NULL; mp = uri_desb_chop(&rd.data.kmem, &rd.sz, &wsz, &desb, max_mblk, uri->eoh, persist); if (mp == NULL) { error = ENOMEM; goto invalidate; } } else { if (segmap == NULL) { segmap = uri_segmap_map(&rd, write_bytes); if (segmap == NULL) { error = ENOMEM; goto invalidate; } desb.segmap = segmap; segmap_data = segmap->base; segmap_sz = segmap->len; } mp = uri_desb_chop(&segmap_data, &segmap_sz, &wsz, &desb, max_mblk, NULL, NULL); if (mp == NULL) { error = ENOMEM; goto invalidate; } if (segmap_sz == 0) { rd.sz -= segmap->len; rd.off += segmap->len; REF_RELE(segmap); segmap = NULL; } } if (wmp == NULL) { wmp = mp; } else { wmp->b_next->b_cont = mp; wmp->b_next = mp->b_next; mp->b_next = NULL; } } while (wsz > 0 && rd.sz > 0); wmp->b_next = NULL; if (first) { /* First kstrwrite(), use noqwait */ if ((error = kstrwritempnoqwait(vp, wmp)) != 0) goto invalidate; /* * For the rest of the kstrwrite()s use SO_SNDBUF * worth of data at a time, note these kstrwrite()s * may (will) block one or more times. */ first = B_FALSE; } else { if ((error = kstrwritemp(vp, wmp, fflg)) != 0) { if (error == EAGAIN) { nl7c_uri_rd_EAGAIN++; if ((error = kstrwritempnoqwait(vp, wmp)) != 0) goto invalidate; } else goto invalidate; } } } while (rd.sz > 0); return (0); invalidate: if (segmap) { REF_RELE(segmap); } if (wmp) freemsg(wmp); return (error); } /* * Send the URI uri_desc_t *uri response out the socket_t *so. */ static int uri_response(struct sonode *so, uri_desc_t *uri) { uri_rd_t *rdp = &uri->response; boolean_t first = B_TRUE; int error; while (rdp != NULL) { error = uri_rd_response(so, uri, rdp, first); if (error != 0) { goto invalidate; } first = B_FALSE; rdp = rdp->next; } return (0); invalidate: uri_delete(uri); return (error); } /* * The pchars[] array is indexed by a char to determine if it's a * valid URI path component chararcter where: * * pchar = unreserved | escaped | * ":" | "@" | "&" | "=" | "+" | "$" | "," * * unreserved = alphanum | mark * * alphanum = alpha | digit * * alpha = lowalpha | upalpha * * lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | * "i" | "j" | "k" | "l" | "m" | "n" | "o" | "p" | * "q" | "r" | "s" | "t" | "u" | "v" | "w" | "x" | * "y" | "z" * * upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | * "I" | "J" | "K" | "L" | "M" | "N" | "O" | "P" | * "Q" | "R" | "S" | "T" | "U" | "V" | "W" | "X" | * "Y" | "Z" * * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | * "8" | "9" * * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")" * * escaped = "%" hex hex * hex = digit | "A" | "B" | "C" | "D" | "E" | "F" | * "a" | "b" | "c" | "d" | "e" | "f" */ static char pchars[] = { 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00 - 0x07 */ 0, 0, 0, 0, 0, 0, 0, 0, /* 0x08 - 0x0F */ 0, 0, 0, 0, 0, 0, 0, 0, /* 0x10 - 0x17 */ 0, 0, 0, 0, 0, 0, 0, 0, /* 0x18 - 0x1F */ 0, 1, 0, 0, 1, 1, 1, 1, /* 0x20 - 0x27 */ 0, 0, 1, 1, 1, 1, 1, 1, /* 0x28 - 0x2F */ 1, 1, 1, 1, 1, 1, 1, 1, /* 0x30 - 0x37 */ 1, 1, 1, 0, 0, 1, 0, 0, /* 0x38 - 0x3F */ 1, 1, 1, 1, 1, 1, 1, 1, /* 0x40 - 0x47 */ 1, 1, 1, 1, 1, 1, 1, 1, /* 0x48 - 0x4F */ 1, 1, 1, 1, 1, 1, 1, 1, /* 0x50 - 0x57 */ 1, 1, 1, 0, 0, 0, 0, 1, /* 0x58 - 0x5F */ 0, 1, 1, 1, 1, 1, 1, 1, /* 0x60 - 0x67 */ 1, 1, 1, 1, 1, 1, 1, 1, /* 0x68 - 0x6F */ 1, 1, 1, 1, 1, 1, 1, 1, /* 0x70 - 0x77 */ 1, 1, 1, 0, 0, 0, 1, 0 /* 0x78 - 0x7F */ }; #define PCHARS_MASK 0x7F /* * This is the main L7 request message parse, we are called each time * new data is availble for a socket, each time a single buffer of the * entire message to date is given. * * Here we parse the request looking for the URI, parse it, and if a * supported scheme call the scheme parser to commplete the parse of any * headers which may further qualify the identity of the requested object * then lookup it up in the URI hash. * * Return B_TRUE for more processing. * * Note, at this time the parser supports the generic message format as * specified in RFC 822 with potentional limitations as specified in RFC * 2616 for HTTP messages. * * Note, the caller supports an mblk_t chain, for now the parser(s) * require the complete header in a single mblk_t. This is the common * case and certainly for high performance environments, if at a future * date mblk_t chains are important the parse can be reved to process * mblk_t chains. */ boolean_t nl7c_parse(struct sonode *so, boolean_t nonblocking, boolean_t *ret) { sotpi_info_t *sti = SOTOTPI(so); char *cp = (char *)sti->sti_nl7c_rcv_mp->b_rptr; char *ep = (char *)sti->sti_nl7c_rcv_mp->b_wptr; char *get = "GET "; char *post = "POST "; char c; char *uris; uri_desc_t *uri = NULL; uri_desc_t *ruri = NULL; mblk_t *reqmp; uint32_t hv = 0; if ((reqmp = dupb(sti->sti_nl7c_rcv_mp)) == NULL) { nl7c_uri_pass_dupbfail++; goto pass; } /* * Allocate and initialize minimumal state for the request * uri_desc_t, in the cache hit case this uri_desc_t will * be freed. */ uri = kmem_cache_alloc(nl7c_uri_kmc, KM_SLEEP); REF_INIT(uri, 1, nl7c_uri_inactive, nl7c_uri_kmc); uri->hash = NULL; uri->tail = NULL; uri->scheme = NULL; uri->count = 0; uri->reqmp = reqmp; /* * Set request time to current time. */ sti->sti_nl7c_rtime = gethrestime_sec(); /* * Parse the Request-Line for the URI. * * For backwards HTTP version compatable reasons skip any leading * CRLF (or CR or LF) line terminator(s) preceding Request-Line. */ while (cp < ep && (*cp == '\r' || *cp == '\n')) { cp++; } while (cp < ep && *get == *cp) { get++; cp++; } if (*get != 0) { /* Note a "GET", check for "POST" */ while (cp < ep && *post == *cp) { post++; cp++; } if (*post != 0) { if (cp == ep) { nl7c_uri_more_get++; goto more; } /* Not a "GET" or a "POST", just pass */ nl7c_uri_pass_method++; goto pass; } /* "POST", don't cache but still may want to parse */ uri->hash = URI_TEMP; } /* * Skip over URI path char(s) and save start and past end pointers. */ uris = cp; while (cp < ep && (c = *cp) != ' ' && c != '\r') { if (c == '?') { /* Don't cache but still may want to parse */ uri->hash = URI_TEMP; } CHASH(hv, c); cp++; } if (c != '\r' && cp == ep) { nl7c_uri_more_eol++; goto more; } /* * Request-Line URI parsed, pass the rest of the request on * to the the http scheme parse. */ uri->path.cp = uris; uri->path.ep = cp; uri->hvalue = hv; if (! nl7c_http_request(&cp, ep, uri, so) || cp == NULL) { /* * Parse not successful or pass on request, the pointer * to the parse pointer "cp" is overloaded such that ! NULL * for more data and NULL for bad parse of request or pass. */ if (cp != NULL) { nl7c_uri_more_http++; goto more; } nl7c_uri_pass_http++; goto pass; } if (uri->nocache) { uri->hash = URI_TEMP; (void) uri_lookup(uri, B_FALSE, nonblocking); } else if (uri->hash == URI_TEMP) { uri->nocache = B_TRUE; (void) uri_lookup(uri, B_FALSE, nonblocking); } if (uri->hash == URI_TEMP) { if (sti->sti_nl7c_flags & NL7C_SOPERSIST) { /* Temporary URI so skip hash processing */ nl7c_uri_request++; nl7c_uri_temp++; goto temp; } /* Not persistent so not interested in the response */ nl7c_uri_pass_temp++; goto pass; } /* * Check the URI hash for a cached response, save the request * uri in case we need it below. */ ruri = uri; if ((uri = uri_lookup(uri, B_TRUE, nonblocking)) == NULL) { /* * Failed to lookup due to nonblocking wait required, * interrupted cv_wait_sig(), KM_NOSLEEP memory alloc * failure, ... Just pass on this request. */ nl7c_uri_pass_addfail++; goto pass; } nl7c_uri_request++; if (uri->response.sz > 0) { /* * We have the response cached, update recv mblk rptr * to reflect the data consumed in parse. */ mblk_t *mp = sti->sti_nl7c_rcv_mp; if (cp == (char *)mp->b_wptr) { sti->sti_nl7c_rcv_mp = mp->b_cont; mp->b_cont = NULL; freeb(mp); } else { mp->b_rptr = (unsigned char *)cp; } nl7c_uri_hit++; /* If logging enabled log request */ if (nl7c_logd_enabled) { ipaddr_t faddr; if (so->so_family == AF_INET) { /* Only support IPv4 addrs */ faddr = ((struct sockaddr_in *) sti->sti_faddr_sa) ->sin_addr.s_addr; } else { faddr = 0; } /* XXX need to pass response type, e.g. 200, 304 */ nl7c_logd_log(ruri, uri, sti->sti_nl7c_rtime, faddr); } /* If conditional request check for substitute response */ if (ruri->conditional) { uri = nl7c_http_cond(ruri, uri); } /* * Release reference on request URI, send the response out * the socket, release reference on response uri, set the * *ret value to B_TRUE to indicate request was consumed * then return B_FALSE to indcate no more data needed. */ REF_RELE(ruri); (void) uri_response(so, uri); REF_RELE(uri); *ret = B_TRUE; return (B_FALSE); } /* * Miss the cache, the request URI is in the cache waiting for * application write-side data to fill it. */ nl7c_uri_miss++; temp: /* * A miss or temp URI for which response data is needed, link * uri to so and so to uri, set WAITWRITE in the so such that * read-side processing is suspended (so the next read() gets * the request data) until a write() is processed by NL7C. * * Note, sti->sti_nl7c_uri now owns the REF_INIT() ref. */ uri->proc = so; sti->sti_nl7c_uri = uri; sti->sti_nl7c_flags |= NL7C_WAITWRITE; *ret = B_FALSE; return (B_FALSE); more: /* More data is needed, note fragmented recv not supported */ nl7c_uri_more++; pass: /* Pass on this request */ nl7c_uri_pass++; nl7c_uri_request++; if (ruri != NULL) { REF_RELE(ruri); } if (uri) { REF_RELE(uri); } sti->sti_nl7c_flags = 0; *ret = B_FALSE; return (B_FALSE); }