1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. 24 */ 25 26 #include <sys/types.h> 27 #include <sys/param.h> 28 #include <sys/cmn_err.h> 29 #include <sys/uio.h> 30 #include <sys/stropts.h> 31 #include <sys/strsun.h> 32 #include <sys/systm.h> 33 #include <sys/socketvar.h> 34 #include <fs/sockfs/sodirect.h> 35 36 /* 37 * In support of on-board asynchronous DMA hardware (e.g. Intel I/OAT) 38 * we use a consolidation private KAPI to allow the protocol to start 39 * an asynchronous copyout to a user-land receive-side buffer (uioa) 40 * when a blocking socket read (e.g. read, recv, ...) is pending. 41 * 42 * In some broad strokes, this is what happens. When recv is called, 43 * we first determine whether it would be beneficial to use uioa, and 44 * if so set up the required state (all done by sod_rcv_init()). 45 * The protocol can only initiate asynchronous copyout if the receive 46 * queue is empty, so the first thing we do is drain any previously 47 * queued data (using sod_uioa_so_init()). Once the copyouts (if any) 48 * have been scheduled we wait for the receive to be satisfied. During 49 * that time any new mblks that are enqueued will be scheduled to be 50 * copied out asynchronously (sod_uioa_mblk_init()). When the receive 51 * has been satisfied we wait for all scheduled copyout operations to 52 * complete before we return to the user (sod_rcv_done()) 53 */ 54 55 static struct kmem_cache *sock_sod_cache; 56 57 /* 58 * This function is called at the beginning of recvmsg(). 59 * 60 * If I/OAT is enabled on this sonode, initialize the uioa state machine 61 * with state UIOA_ALLOC. 62 */ 63 uio_t * 64 sod_rcv_init(struct sonode *so, int flags, struct uio **uiopp) 65 { 66 struct uio *suiop; 67 struct uio *uiop; 68 sodirect_t *sodp = so->so_direct; 69 70 if (sodp == NULL) 71 return (NULL); 72 73 suiop = NULL; 74 uiop = *uiopp; 75 76 mutex_enter(&so->so_lock); 77 if (uiop->uio_resid >= uioasync.mincnt && 78 sodp != NULL && sodp->sod_enabled && 79 uioasync.enabled && !(flags & MSG_PEEK) && 80 !so->so_proto_props.sopp_loopback && so->so_filter_active == 0 && 81 !(so->so_state & SS_CANTRCVMORE)) { 82 /* 83 * Big enough I/O for uioa min setup and an sodirect socket 84 * and sodirect enabled and uioa enabled and I/O will be done 85 * and not EOF so initialize the sodirect_t uioa_t with "uiop". 86 */ 87 if (!uioainit(uiop, &sodp->sod_uioa)) { 88 /* 89 * Successful uioainit() so the uio_t part of the 90 * uioa_t will be used for all uio_t work to follow, 91 * we return the original "uiop" in "suiop". 92 */ 93 suiop = uiop; 94 *uiopp = (uio_t *)&sodp->sod_uioa; 95 /* 96 * Before returning to the caller the passed in uio_t 97 * "uiop" will be updated via a call to uioafini() 98 * below. 99 * 100 * Note, the uioa.uioa_state isn't set to UIOA_ENABLED 101 * here as first we have to uioamove() any currently 102 * queued M_DATA mblk_t(s) so it will be done later. 103 */ 104 } 105 } 106 mutex_exit(&so->so_lock); 107 108 return (suiop); 109 } 110 111 /* 112 * This function is called at the end of recvmsg(), it finializes all the I/OAT 113 * operations, and reset the uioa state to UIOA_ALLOC. 114 */ 115 int 116 sod_rcv_done(struct sonode *so, struct uio *suiop, struct uio *uiop) 117 { 118 int error = 0; 119 sodirect_t *sodp = so->so_direct; 120 mblk_t *mp; 121 122 if (sodp == NULL) { 123 return (0); 124 } 125 126 ASSERT(MUTEX_HELD(&so->so_lock)); 127 /* Finish any sodirect and uioa processing */ 128 if (suiop != NULL) { 129 /* Finish any uioa_t processing */ 130 131 ASSERT(uiop == (uio_t *)&sodp->sod_uioa); 132 error = uioafini(suiop, (uioa_t *)uiop); 133 if ((mp = sodp->sod_uioafh) != NULL) { 134 sodp->sod_uioafh = NULL; 135 sodp->sod_uioaft = NULL; 136 freemsg(mp); 137 } 138 } 139 ASSERT(sodp->sod_uioafh == NULL); 140 141 return (error); 142 } 143 144 /* 145 * Schedule a uioamove() on a mblk. This is done as mblks are enqueued 146 * by the protocol on the socket's rcv queue. 147 * 148 * Caller must be holding so_lock. 149 */ 150 void 151 sod_uioa_mblk_init(struct sodirect_s *sodp, mblk_t *mp, size_t msg_size) 152 { 153 uioa_t *uioap = &sodp->sod_uioa; 154 mblk_t *mp1 = mp; 155 mblk_t *lmp = NULL; 156 157 ASSERT(DB_TYPE(mp) == M_DATA); 158 ASSERT(msg_size == msgdsize(mp)); 159 160 if (uioap->uioa_state & UIOA_ENABLED) { 161 /* Uioa is enabled */ 162 163 if (msg_size > uioap->uio_resid) { 164 /* 165 * There isn't enough uio space for the mblk_t chain 166 * so disable uioa such that this and any additional 167 * mblk_t data is handled by the socket and schedule 168 * the socket for wakeup to finish this uioa. 169 */ 170 uioap->uioa_state &= UIOA_CLR; 171 uioap->uioa_state |= UIOA_FINI; 172 return; 173 } 174 do { 175 uint32_t len = MBLKL(mp1); 176 177 if (!uioamove(mp1->b_rptr, len, UIO_READ, uioap)) { 178 /* Scheduled, mark dblk_t as such */ 179 DB_FLAGS(mp1) |= DBLK_UIOA; 180 } else { 181 /* Error, turn off async processing */ 182 uioap->uioa_state &= UIOA_CLR; 183 uioap->uioa_state |= UIOA_FINI; 184 break; 185 } 186 lmp = mp1; 187 } while ((mp1 = mp1->b_cont) != NULL); 188 189 if (mp1 != NULL || uioap->uio_resid == 0) { 190 /* Break the mblk chain if neccessary. */ 191 if (mp1 != NULL && lmp != NULL) { 192 mp->b_next = mp1; 193 lmp->b_cont = NULL; 194 } 195 } 196 } 197 } 198 199 /* 200 * This function is called on a mblk that thas been successfully uioamoved(). 201 */ 202 void 203 sod_uioa_mblk_done(sodirect_t *sodp, mblk_t *bp) 204 { 205 if (bp != NULL && (bp->b_datap->db_flags & DBLK_UIOA)) { 206 /* 207 * A uioa flaged mblk_t chain, already uio processed, 208 * add it to the sodirect uioa pending free list. 209 * 210 * Note, a b_cont chain headed by a DBLK_UIOA enable 211 * mblk_t must have all mblk_t(s) DBLK_UIOA enabled. 212 */ 213 mblk_t *bpt = sodp->sod_uioaft; 214 215 ASSERT(sodp != NULL); 216 217 /* 218 * Add first mblk_t of "bp" chain to current sodirect uioa 219 * free list tail mblk_t, if any, else empty list so new head. 220 */ 221 if (bpt == NULL) 222 sodp->sod_uioafh = bp; 223 else 224 bpt->b_cont = bp; 225 226 /* 227 * Walk mblk_t "bp" chain to find tail and adjust rptr of 228 * each to reflect that uioamove() has consumed all data. 229 */ 230 bpt = bp; 231 for (;;) { 232 ASSERT(bpt->b_datap->db_flags & DBLK_UIOA); 233 234 bpt->b_rptr = bpt->b_wptr; 235 if (bpt->b_cont == NULL) 236 break; 237 bpt = bpt->b_cont; 238 } 239 /* New sodirect uioa free list tail */ 240 sodp->sod_uioaft = bpt; 241 242 /* Only dequeue once with data returned per uioa_t */ 243 if (sodp->sod_uioa.uioa_state & UIOA_ENABLED) { 244 sodp->sod_uioa.uioa_state &= UIOA_CLR; 245 sodp->sod_uioa.uioa_state |= UIOA_FINI; 246 } 247 } 248 } 249 250 /* 251 * When transit from UIOA_INIT state to UIOA_ENABLE state in recvmsg(), call 252 * this function on a non-STREAMS socket to schedule uioamove() on the data 253 * that has already queued in this socket. 254 */ 255 void 256 sod_uioa_so_init(struct sonode *so, struct sodirect_s *sodp, struct uio *uiop) 257 { 258 uioa_t *uioap = (uioa_t *)uiop; 259 mblk_t *lbp; 260 mblk_t *wbp; 261 mblk_t *bp; 262 int len; 263 int error; 264 boolean_t in_rcv_q = B_TRUE; 265 266 ASSERT(MUTEX_HELD(&so->so_lock)); 267 ASSERT(&sodp->sod_uioa == uioap); 268 269 /* 270 * Walk first b_cont chain in sod_q 271 * and schedule any M_DATA mblk_t's for uio asynchronous move. 272 */ 273 bp = so->so_rcv_q_head; 274 275 again: 276 /* Walk the chain */ 277 lbp = NULL; 278 wbp = bp; 279 280 do { 281 if (bp == NULL) 282 break; 283 284 if (wbp->b_datap->db_type != M_DATA) { 285 /* Not M_DATA, no more uioa */ 286 goto nouioa; 287 } 288 if ((len = wbp->b_wptr - wbp->b_rptr) > 0) { 289 /* Have a M_DATA mblk_t with data */ 290 if (len > uioap->uio_resid || (so->so_oobmark > 0 && 291 len + uioap->uioa_mbytes >= so->so_oobmark)) { 292 /* Not enough uio sapce, or beyond oobmark */ 293 goto nouioa; 294 } 295 ASSERT(!(wbp->b_datap->db_flags & DBLK_UIOA)); 296 error = uioamove(wbp->b_rptr, len, 297 UIO_READ, uioap); 298 if (!error) { 299 /* Scheduled, mark dblk_t as such */ 300 wbp->b_datap->db_flags |= DBLK_UIOA; 301 } else { 302 /* Break the mblk chain */ 303 goto nouioa; 304 } 305 } 306 /* Save last wbp processed */ 307 lbp = wbp; 308 } while ((wbp = wbp->b_cont) != NULL); 309 310 if (in_rcv_q && (bp == NULL || bp->b_next == NULL)) { 311 /* 312 * We get here only once to process the sonode dump area 313 * if so_rcv_q_head is NULL or all the mblks have been 314 * successfully uioamoved()ed. 315 */ 316 in_rcv_q = B_FALSE; 317 318 /* move to dump area */ 319 bp = so->so_rcv_head; 320 goto again; 321 } 322 323 return; 324 325 nouioa: 326 /* No more uioa */ 327 uioap->uioa_state &= UIOA_CLR; 328 uioap->uioa_state |= UIOA_FINI; 329 330 /* 331 * If we processed 1 or more mblk_t(s) then we need to split the 332 * current mblk_t chain in 2 so that all the uioamove()ed mblk_t(s) 333 * are in the current chain and the rest are in the following new 334 * chain. 335 */ 336 if (lbp != NULL) { 337 /* New end of current chain */ 338 lbp->b_cont = NULL; 339 340 /* Insert new chain wbp after bp */ 341 if ((wbp->b_next = bp->b_next) == NULL) { 342 if (in_rcv_q) 343 so->so_rcv_q_last_head = wbp; 344 else 345 so->so_rcv_last_head = wbp; 346 } 347 bp->b_next = wbp; 348 bp->b_next->b_prev = bp->b_prev; 349 bp->b_prev = lbp; 350 } 351 } 352 353 /* 354 * Initialize sodirect data structures on a socket. 355 */ 356 void 357 sod_sock_init(struct sonode *so) 358 { 359 sodirect_t *sodp; 360 361 ASSERT(so->so_direct == NULL); 362 363 so->so_state |= SS_SODIRECT; 364 365 sodp = kmem_cache_alloc(sock_sod_cache, KM_SLEEP); 366 sodp->sod_enabled = B_TRUE; 367 sodp->sod_uioafh = NULL; 368 sodp->sod_uioaft = NULL; 369 /* 370 * Remainder of the sod_uioa members are left uninitialized 371 * but will be initialized later by uioainit() before uioa 372 * is enabled. 373 */ 374 sodp->sod_uioa.uioa_state = UIOA_ALLOC; 375 so->so_direct = sodp; 376 } 377 378 void 379 sod_sock_fini(struct sonode *so) 380 { 381 sodirect_t *sodp = so->so_direct; 382 383 ASSERT(sodp->sod_uioafh == NULL); 384 385 so->so_direct = NULL; 386 kmem_cache_free(sock_sod_cache, sodp); 387 } 388 389 /* 390 * Init the sodirect kmem cache while sockfs is loading. 391 */ 392 int 393 sod_init() 394 { 395 /* Allocate sodirect_t kmem_cache */ 396 sock_sod_cache = kmem_cache_create("sock_sod_cache", 397 sizeof (sodirect_t), 0, NULL, NULL, NULL, NULL, NULL, 0); 398 399 return (0); 400 } 401 402 ssize_t 403 sod_uioa_mblk(struct sonode *so, mblk_t *mp) 404 { 405 sodirect_t *sodp = so->so_direct; 406 407 ASSERT(sodp != NULL); 408 ASSERT(MUTEX_HELD(&so->so_lock)); 409 410 ASSERT(sodp->sod_enabled); 411 ASSERT(sodp->sod_uioa.uioa_state != (UIOA_ALLOC|UIOA_INIT)); 412 413 ASSERT(sodp->sod_uioa.uioa_state & (UIOA_ENABLED|UIOA_FINI)); 414 415 if (mp == NULL && so->so_rcv_q_head != NULL) { 416 mp = so->so_rcv_q_head; 417 ASSERT(mp->b_prev != NULL); 418 mp->b_prev = NULL; 419 so->so_rcv_q_head = mp->b_next; 420 if (so->so_rcv_q_head == NULL) { 421 so->so_rcv_q_last_head = NULL; 422 } 423 mp->b_next = NULL; 424 } 425 426 sod_uioa_mblk_done(sodp, mp); 427 428 if (so->so_rcv_q_head == NULL && so->so_rcv_head != NULL && 429 DB_TYPE(so->so_rcv_head) == M_DATA && 430 (DB_FLAGS(so->so_rcv_head) & DBLK_UIOA)) { 431 /* more arrived */ 432 ASSERT(so->so_rcv_q_head == NULL); 433 mp = so->so_rcv_head; 434 so->so_rcv_head = mp->b_next; 435 if (so->so_rcv_head == NULL) 436 so->so_rcv_last_head = NULL; 437 mp->b_prev = mp->b_next = NULL; 438 sod_uioa_mblk_done(sodp, mp); 439 } 440 441 #ifdef DEBUG 442 if (so->so_rcv_q_head != NULL) { 443 mblk_t *m = so->so_rcv_q_head; 444 while (m != NULL) { 445 if (DB_FLAGS(m) & DBLK_UIOA) { 446 cmn_err(CE_PANIC, "Unexpected I/OAT mblk %p" 447 " in so_rcv_q_head.\n", (void *)m); 448 } 449 m = m->b_next; 450 } 451 } 452 if (so->so_rcv_head != NULL) { 453 mblk_t *m = so->so_rcv_head; 454 while (m != NULL) { 455 if (DB_FLAGS(m) & DBLK_UIOA) { 456 cmn_err(CE_PANIC, "Unexpected I/OAT mblk %p" 457 " in so_rcv_head.\n", (void *)m); 458 } 459 m = m->b_next; 460 } 461 } 462 #endif 463 return (sodp->sod_uioa.uioa_mbytes); 464 } 465