1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #include <sys/types.h> 28 #include <sys/param.h> 29 #include <sys/cmn_err.h> 30 #include <sys/uio.h> 31 #include <sys/stropts.h> 32 #include <sys/strsun.h> 33 #include <sys/systm.h> 34 #include <sys/socketvar.h> 35 #include <fs/sockfs/sodirect.h> 36 37 /* 38 * In support of on-board asynchronous DMA hardware (e.g. Intel I/OAT) 39 * we use a consolidation private KAPI to allow the protocol to start 40 * an asynchronous copyout to a user-land receive-side buffer (uioa) 41 * when a blocking socket read (e.g. read, recv, ...) is pending. 42 * 43 * In some broad strokes, this is what happens. When recv is called, 44 * we first determine whether it would be beneficial to use uioa, and 45 * if so set up the required state (all done by sod_rcv_init()). 46 * The protocol can only initiate asynchronous copyout if the receive 47 * queue is empty, so the first thing we do is drain any previously 48 * queued data (using sod_uioa_so_init()). Once the copyouts (if any) 49 * have been scheduled we wait for the receive to be satisfied. During 50 * that time any new mblks that are enqueued will be scheduled to be 51 * copied out asynchronously (sod_uioa_mblk_init()). When the receive 52 * has been satisfied we wait for all scheduled copyout operations to 53 * complete before we return to the user (sod_rcv_done()) 54 */ 55 56 static struct kmem_cache *sock_sod_cache; 57 58 /* 59 * This function is called at the beginning of recvmsg(). 60 * 61 * If I/OAT is enabled on this sonode, initialize the uioa state machine 62 * with state UIOA_ALLOC. 63 */ 64 uio_t * 65 sod_rcv_init(struct sonode *so, int flags, struct uio **uiopp) 66 { 67 struct uio *suiop; 68 struct uio *uiop; 69 sodirect_t *sodp = so->so_direct; 70 71 if (sodp == NULL) 72 return (NULL); 73 74 suiop = NULL; 75 uiop = *uiopp; 76 77 mutex_enter(&so->so_lock); 78 if (uiop->uio_resid >= uioasync.mincnt && 79 sodp != NULL && sodp->sod_enabled && 80 uioasync.enabled && !(flags & MSG_PEEK) && 81 !so->so_proto_props.sopp_loopback && 82 !(so->so_state & SS_CANTRCVMORE)) { 83 /* 84 * Big enough I/O for uioa min setup and an sodirect socket 85 * and sodirect enabled and uioa enabled and I/O will be done 86 * and not EOF so initialize the sodirect_t uioa_t with "uiop". 87 */ 88 if (!uioainit(uiop, &sodp->sod_uioa)) { 89 /* 90 * Successful uioainit() so the uio_t part of the 91 * uioa_t will be used for all uio_t work to follow, 92 * we return the original "uiop" in "suiop". 93 */ 94 suiop = uiop; 95 *uiopp = (uio_t *)&sodp->sod_uioa; 96 /* 97 * Before returning to the caller the passed in uio_t 98 * "uiop" will be updated via a call to uioafini() 99 * below. 100 * 101 * Note, the uioa.uioa_state isn't set to UIOA_ENABLED 102 * here as first we have to uioamove() any currently 103 * queued M_DATA mblk_t(s) so it will be done later. 104 */ 105 } 106 } 107 mutex_exit(&so->so_lock); 108 109 return (suiop); 110 } 111 112 /* 113 * This function is called at the end of recvmsg(), it finializes all the I/OAT 114 * operations, and reset the uioa state to UIOA_ALLOC. 115 */ 116 int 117 sod_rcv_done(struct sonode *so, struct uio *suiop, struct uio *uiop) 118 { 119 int error = 0; 120 sodirect_t *sodp = so->so_direct; 121 mblk_t *mp; 122 123 if (sodp == NULL) { 124 return (0); 125 } 126 127 ASSERT(MUTEX_HELD(&so->so_lock)); 128 /* Finish any sodirect and uioa processing */ 129 if (suiop != NULL) { 130 /* Finish any uioa_t processing */ 131 132 ASSERT(uiop == (uio_t *)&sodp->sod_uioa); 133 error = uioafini(suiop, (uioa_t *)uiop); 134 if ((mp = sodp->sod_uioafh) != NULL) { 135 sodp->sod_uioafh = NULL; 136 sodp->sod_uioaft = NULL; 137 freemsg(mp); 138 } 139 } 140 ASSERT(sodp->sod_uioafh == NULL); 141 142 return (error); 143 } 144 145 /* 146 * Schedule a uioamove() on a mblk. This is done as mblks are enqueued 147 * by the protocol on the socket's rcv queue. 148 * 149 * Caller must be holding so_lock. 150 */ 151 void 152 sod_uioa_mblk_init(struct sodirect_s *sodp, mblk_t *mp, size_t msg_size) 153 { 154 uioa_t *uioap = &sodp->sod_uioa; 155 mblk_t *mp1 = mp; 156 mblk_t *lmp = NULL; 157 158 ASSERT(DB_TYPE(mp) == M_DATA); 159 ASSERT(msg_size == msgdsize(mp)); 160 161 if (uioap->uioa_state & UIOA_ENABLED) { 162 /* Uioa is enabled */ 163 164 if (msg_size > uioap->uio_resid) { 165 /* 166 * There isn't enough uio space for the mblk_t chain 167 * so disable uioa such that this and any additional 168 * mblk_t data is handled by the socket and schedule 169 * the socket for wakeup to finish this uioa. 170 */ 171 uioap->uioa_state &= UIOA_CLR; 172 uioap->uioa_state |= UIOA_FINI; 173 return; 174 } 175 do { 176 uint32_t len = MBLKL(mp1); 177 178 if (!uioamove(mp1->b_rptr, len, UIO_READ, uioap)) { 179 /* Scheduled, mark dblk_t as such */ 180 DB_FLAGS(mp1) |= DBLK_UIOA; 181 } else { 182 /* Error, turn off async processing */ 183 uioap->uioa_state &= UIOA_CLR; 184 uioap->uioa_state |= UIOA_FINI; 185 break; 186 } 187 lmp = mp1; 188 } while ((mp1 = mp1->b_cont) != NULL); 189 190 if (mp1 != NULL || uioap->uio_resid == 0) { 191 /* Break the mblk chain if neccessary. */ 192 if (mp1 != NULL && lmp != NULL) { 193 mp->b_next = mp1; 194 lmp->b_cont = NULL; 195 } 196 } 197 } 198 } 199 200 /* 201 * This function is called on a mblk that thas been successfully uioamoved(). 202 */ 203 void 204 sod_uioa_mblk_done(sodirect_t *sodp, mblk_t *bp) 205 { 206 if (bp != NULL && (bp->b_datap->db_flags & DBLK_UIOA)) { 207 /* 208 * A uioa flaged mblk_t chain, already uio processed, 209 * add it to the sodirect uioa pending free list. 210 * 211 * Note, a b_cont chain headed by a DBLK_UIOA enable 212 * mblk_t must have all mblk_t(s) DBLK_UIOA enabled. 213 */ 214 mblk_t *bpt = sodp->sod_uioaft; 215 216 ASSERT(sodp != NULL); 217 218 /* 219 * Add first mblk_t of "bp" chain to current sodirect uioa 220 * free list tail mblk_t, if any, else empty list so new head. 221 */ 222 if (bpt == NULL) 223 sodp->sod_uioafh = bp; 224 else 225 bpt->b_cont = bp; 226 227 /* 228 * Walk mblk_t "bp" chain to find tail and adjust rptr of 229 * each to reflect that uioamove() has consumed all data. 230 */ 231 bpt = bp; 232 for (;;) { 233 ASSERT(bpt->b_datap->db_flags & DBLK_UIOA); 234 235 bpt->b_rptr = bpt->b_wptr; 236 if (bpt->b_cont == NULL) 237 break; 238 bpt = bpt->b_cont; 239 } 240 /* New sodirect uioa free list tail */ 241 sodp->sod_uioaft = bpt; 242 243 /* Only dequeue once with data returned per uioa_t */ 244 if (sodp->sod_uioa.uioa_state & UIOA_ENABLED) { 245 sodp->sod_uioa.uioa_state &= UIOA_CLR; 246 sodp->sod_uioa.uioa_state |= UIOA_FINI; 247 } 248 } 249 } 250 251 /* 252 * When transit from UIOA_INIT state to UIOA_ENABLE state in recvmsg(), call 253 * this function on a non-STREAMS socket to schedule uioamove() on the data 254 * that has already queued in this socket. 255 */ 256 void 257 sod_uioa_so_init(struct sonode *so, struct sodirect_s *sodp, struct uio *uiop) 258 { 259 uioa_t *uioap = (uioa_t *)uiop; 260 mblk_t *lbp; 261 mblk_t *wbp; 262 mblk_t *bp; 263 int len; 264 int error; 265 boolean_t in_rcv_q = B_TRUE; 266 267 ASSERT(MUTEX_HELD(&so->so_lock)); 268 ASSERT(&sodp->sod_uioa == uioap); 269 270 /* 271 * Walk first b_cont chain in sod_q 272 * and schedule any M_DATA mblk_t's for uio asynchronous move. 273 */ 274 bp = so->so_rcv_q_head; 275 276 again: 277 /* Walk the chain */ 278 lbp = NULL; 279 wbp = bp; 280 281 do { 282 if (bp == NULL) 283 break; 284 285 if (wbp->b_datap->db_type != M_DATA) { 286 /* Not M_DATA, no more uioa */ 287 goto nouioa; 288 } 289 if ((len = wbp->b_wptr - wbp->b_rptr) > 0) { 290 /* Have a M_DATA mblk_t with data */ 291 if (len > uioap->uio_resid || (so->so_oobmark > 0 && 292 len + uioap->uioa_mbytes >= so->so_oobmark)) { 293 /* Not enough uio sapce, or beyond oobmark */ 294 goto nouioa; 295 } 296 ASSERT(!(wbp->b_datap->db_flags & DBLK_UIOA)); 297 error = uioamove(wbp->b_rptr, len, 298 UIO_READ, uioap); 299 if (!error) { 300 /* Scheduled, mark dblk_t as such */ 301 wbp->b_datap->db_flags |= DBLK_UIOA; 302 } else { 303 /* Break the mblk chain */ 304 goto nouioa; 305 } 306 } 307 /* Save last wbp processed */ 308 lbp = wbp; 309 } while ((wbp = wbp->b_cont) != NULL); 310 311 if (in_rcv_q && (bp == NULL || bp->b_next == NULL)) { 312 /* 313 * We get here only once to process the sonode dump area 314 * if so_rcv_q_head is NULL or all the mblks have been 315 * successfully uioamoved()ed. 316 */ 317 in_rcv_q = B_FALSE; 318 319 /* move to dump area */ 320 bp = so->so_rcv_head; 321 goto again; 322 } 323 324 return; 325 326 nouioa: 327 /* No more uioa */ 328 uioap->uioa_state &= UIOA_CLR; 329 uioap->uioa_state |= UIOA_FINI; 330 331 /* 332 * If we processed 1 or more mblk_t(s) then we need to split the 333 * current mblk_t chain in 2 so that all the uioamove()ed mblk_t(s) 334 * are in the current chain and the rest are in the following new 335 * chain. 336 */ 337 if (lbp != NULL) { 338 /* New end of current chain */ 339 lbp->b_cont = NULL; 340 341 /* Insert new chain wbp after bp */ 342 if ((wbp->b_next = bp->b_next) == NULL) { 343 if (in_rcv_q) 344 so->so_rcv_q_last_head = wbp; 345 else 346 so->so_rcv_last_head = wbp; 347 } 348 bp->b_next = wbp; 349 bp->b_next->b_prev = bp->b_prev; 350 bp->b_prev = lbp; 351 } 352 } 353 354 /* 355 * Initialize sodirect data structures on a socket. 356 */ 357 void 358 sod_sock_init(struct sonode *so) 359 { 360 sodirect_t *sodp; 361 362 ASSERT(so->so_direct == NULL); 363 364 so->so_state |= SS_SODIRECT; 365 366 sodp = kmem_cache_alloc(sock_sod_cache, KM_SLEEP); 367 sodp->sod_enabled = B_TRUE; 368 sodp->sod_uioafh = NULL; 369 sodp->sod_uioaft = NULL; 370 /* 371 * Remainder of the sod_uioa members are left uninitialized 372 * but will be initialized later by uioainit() before uioa 373 * is enabled. 374 */ 375 sodp->sod_uioa.uioa_state = UIOA_ALLOC; 376 so->so_direct = sodp; 377 } 378 379 void 380 sod_sock_fini(struct sonode *so) 381 { 382 sodirect_t *sodp = so->so_direct; 383 384 ASSERT(sodp->sod_uioafh == NULL); 385 386 so->so_direct = NULL; 387 kmem_cache_free(sock_sod_cache, sodp); 388 } 389 390 /* 391 * Init the sodirect kmem cache while sockfs is loading. 392 */ 393 int 394 sod_init() 395 { 396 /* Allocate sodirect_t kmem_cache */ 397 sock_sod_cache = kmem_cache_create("sock_sod_cache", 398 sizeof (sodirect_t), 0, NULL, NULL, NULL, NULL, NULL, 0); 399 400 return (0); 401 } 402 403 ssize_t 404 sod_uioa_mblk(struct sonode *so, mblk_t *mp) 405 { 406 sodirect_t *sodp = so->so_direct; 407 408 ASSERT(sodp != NULL); 409 ASSERT(MUTEX_HELD(&so->so_lock)); 410 411 ASSERT(sodp->sod_enabled); 412 ASSERT(sodp->sod_uioa.uioa_state != (UIOA_ALLOC|UIOA_INIT)); 413 414 ASSERT(sodp->sod_uioa.uioa_state & (UIOA_ENABLED|UIOA_FINI)); 415 416 if (mp == NULL && so->so_rcv_q_head != NULL) { 417 mp = so->so_rcv_q_head; 418 ASSERT(mp->b_prev != NULL); 419 mp->b_prev = NULL; 420 so->so_rcv_q_head = mp->b_next; 421 if (so->so_rcv_q_head == NULL) { 422 so->so_rcv_q_last_head = NULL; 423 } 424 mp->b_next = NULL; 425 } 426 427 sod_uioa_mblk_done(sodp, mp); 428 429 if (so->so_rcv_q_head == NULL && so->so_rcv_head != NULL && 430 DB_TYPE(so->so_rcv_head) == M_DATA && 431 (DB_FLAGS(so->so_rcv_head) & DBLK_UIOA)) { 432 /* more arrived */ 433 ASSERT(so->so_rcv_q_head == NULL); 434 mp = so->so_rcv_head; 435 so->so_rcv_head = mp->b_next; 436 if (so->so_rcv_head == NULL) 437 so->so_rcv_last_head = NULL; 438 mp->b_prev = mp->b_next = NULL; 439 sod_uioa_mblk_done(sodp, mp); 440 } 441 442 #ifdef DEBUG 443 if (so->so_rcv_q_head != NULL) { 444 mblk_t *m = so->so_rcv_q_head; 445 while (m != NULL) { 446 if (DB_FLAGS(m) & DBLK_UIOA) { 447 cmn_err(CE_PANIC, "Unexpected I/OAT mblk %p" 448 " in so_rcv_q_head.\n", (void *)m); 449 } 450 m = m->b_next; 451 } 452 } 453 if (so->so_rcv_head != NULL) { 454 mblk_t *m = so->so_rcv_head; 455 while (m != NULL) { 456 if (DB_FLAGS(m) & DBLK_UIOA) { 457 cmn_err(CE_PANIC, "Unexpected I/OAT mblk %p" 458 " in so_rcv_head.\n", (void *)m); 459 } 460 m = m->b_next; 461 } 462 } 463 #endif 464 return (sodp->sod_uioa.uioa_mbytes); 465 } 466