1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2022 Alexander V. Chernikov <melifaro@FreeBSD.org> 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include <sys/param.h> 29 #include <sys/malloc.h> 30 #include <sys/lock.h> 31 #include <sys/rmlock.h> 32 #include <sys/mbuf.h> 33 #include <sys/ck.h> 34 #include <sys/socket.h> 35 #include <sys/socketvar.h> 36 #include <sys/syslog.h> 37 38 #include <netlink/netlink.h> 39 #include <netlink/netlink_ctl.h> 40 #include <netlink/netlink_linux.h> 41 #include <netlink/netlink_var.h> 42 43 #define DEBUG_MOD_NAME nl_writer 44 #define DEBUG_MAX_LEVEL LOG_DEBUG3 45 #include <netlink/netlink_debug.h> 46 _DECLARE_DEBUG(LOG_INFO); 47 48 /* 49 * The goal of this file is to provide convenient message writing KPI on top of 50 * different storage methods (mbufs, uio, temporary memory chunks). 51 * 52 * The main KPI guarantee is that the (last) message always resides in the contiguous 53 * memory buffer, so one is able to update the header after writing the entire message. 54 * 55 * This guarantee comes with a side effect of potentially reallocating underlying 56 * buffer, so one needs to update the desired pointers after something is added 57 * to the header. 58 * 59 * Messaging layer contains hooks performing transparent Linux translation for the messages. 60 * 61 * There are 3 types of supported targets: 62 * * socket (adds mbufs to the socket buffer, used for message replies) 63 * * group (sends mbuf/chain to the specified groups, used for the notifications) 64 * * chain (returns mbuf chain, used in Linux message translation code) 65 * 66 * There are 3 types of storage: 67 * * NS_WRITER_TYPE_MBUF (mbuf-based, most efficient, used when a single message 68 * fits in NLMBUFSIZE) 69 * * NS_WRITER_TYPE_BUF (fallback, malloc-based, used when a single message needs 70 * to be larger than one supported by NS_WRITER_TYPE_MBUF) 71 * * NS_WRITER_TYPE_LBUF (malloc-based, similar to NS_WRITER_TYPE_BUF, used for 72 * Linux sockets, calls translation hook prior to sending messages to the socket). 73 * 74 * Internally, KPI switches between different types of storage when memory requirements 75 * change. It happens transparently to the caller. 76 */ 77 78 /* 79 * Uma zone for the mbuf-based Netlink storage 80 */ 81 static uma_zone_t nlmsg_zone; 82 83 static void 84 nl_free_mbuf_storage(struct mbuf *m) 85 { 86 uma_zfree(nlmsg_zone, m->m_ext.ext_buf); 87 } 88 89 static int 90 nl_setup_mbuf_storage(void *mem, int size, void *arg, int how __unused) 91 { 92 struct mbuf *m = (struct mbuf *)arg; 93 94 if (m != NULL) 95 m_extadd(m, mem, size, nl_free_mbuf_storage, NULL, NULL, 0, EXT_MOD_TYPE); 96 97 return (0); 98 } 99 100 static struct mbuf * 101 nl_get_mbuf_flags(int size, int malloc_flags, int mbuf_flags) 102 { 103 struct mbuf *m, *m_storage; 104 105 if (size <= MHLEN) 106 return (m_get2(size, malloc_flags, MT_DATA, mbuf_flags)); 107 108 if (__predict_false(size > NLMBUFSIZE)) 109 return (NULL); 110 111 m = m_gethdr(malloc_flags, MT_DATA); 112 if (m == NULL) 113 return (NULL); 114 115 m_storage = uma_zalloc_arg(nlmsg_zone, m, malloc_flags); 116 if (m_storage == NULL) { 117 m_free_raw(m); 118 return (NULL); 119 } 120 121 return (m); 122 } 123 124 static struct mbuf * 125 nl_get_mbuf(int size, int malloc_flags) 126 { 127 return (nl_get_mbuf_flags(size, malloc_flags, M_PKTHDR)); 128 } 129 130 /* 131 * Gets a chain of Netlink mbufs. 132 * This is strip-down version of m_getm2() 133 */ 134 static struct mbuf * 135 nl_get_mbuf_chain(int len, int malloc_flags) 136 { 137 struct mbuf *m_chain = NULL, *m_tail = NULL; 138 int mbuf_flags = M_PKTHDR; 139 140 while (len > 0) { 141 int sz = len > NLMBUFSIZE ? NLMBUFSIZE: len; 142 struct mbuf *m = nl_get_mbuf_flags(sz, malloc_flags, mbuf_flags); 143 144 if (m == NULL) { 145 m_freem(m_chain); 146 return (NULL); 147 } 148 149 /* Book keeping. */ 150 len -= M_SIZE(m); 151 if (m_tail != NULL) 152 m_tail->m_next = m; 153 else 154 m_chain = m; 155 m_tail = m; 156 mbuf_flags &= ~M_PKTHDR; /* Only valid on the first mbuf. */ 157 } 158 159 return (m_chain); 160 } 161 162 void 163 nl_init_msg_zone(void) 164 { 165 nlmsg_zone = uma_zcreate("netlink", NLMBUFSIZE, nl_setup_mbuf_storage, 166 NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 167 } 168 169 void 170 nl_destroy_msg_zone(void) 171 { 172 uma_zdestroy(nlmsg_zone); 173 } 174 175 176 typedef bool nlwriter_op_init(struct nl_writer *nw, int size, bool waitok); 177 typedef bool nlwriter_op_write(struct nl_writer *nw, void *buf, int buflen, int cnt); 178 179 struct nlwriter_ops { 180 nlwriter_op_init *init; 181 nlwriter_op_write *write_socket; 182 nlwriter_op_write *write_group; 183 nlwriter_op_write *write_chain; 184 }; 185 186 /* 187 * NS_WRITER_TYPE_BUF 188 * Writes message to a temporary memory buffer, 189 * flushing to the socket/group when buffer size limit is reached 190 */ 191 static bool 192 nlmsg_get_ns_buf(struct nl_writer *nw, int size, bool waitok) 193 { 194 int mflag = waitok ? M_WAITOK : M_NOWAIT; 195 nw->_storage = malloc(size, M_NETLINK, mflag | M_ZERO); 196 if (__predict_false(nw->_storage == NULL)) 197 return (false); 198 nw->alloc_len = size; 199 nw->offset = 0; 200 nw->hdr = NULL; 201 nw->data = nw->_storage; 202 nw->writer_type = NS_WRITER_TYPE_BUF; 203 nw->malloc_flag = mflag; 204 nw->num_messages = 0; 205 nw->enomem = false; 206 return (true); 207 } 208 209 static bool 210 nlmsg_write_socket_buf(struct nl_writer *nw, void *buf, int datalen, int cnt) 211 { 212 NL_LOG(LOG_DEBUG2, "IN: ptr: %p len: %d arg: %p", buf, datalen, nw->arg.ptr); 213 if (__predict_false(datalen == 0)) { 214 free(buf, M_NETLINK); 215 return (true); 216 } 217 218 struct mbuf *m = nl_get_mbuf_chain(datalen, nw->malloc_flag); 219 if (__predict_false(m == NULL)) { 220 /* XXX: should we set sorcverr? */ 221 free(buf, M_NETLINK); 222 return (false); 223 } 224 m_append(m, datalen, buf); 225 free(buf, M_NETLINK); 226 227 int io_flags = (nw->ignore_limit) ? NL_IOF_IGNORE_LIMIT : 0; 228 return (nl_send_one(m, (struct nlpcb *)(nw->arg.ptr), cnt, io_flags)); 229 } 230 231 static bool 232 nlmsg_write_group_buf(struct nl_writer *nw, void *buf, int datalen, int cnt) 233 { 234 NL_LOG(LOG_DEBUG2, "IN: ptr: %p len: %d proto: %d id: %d", buf, datalen, 235 nw->arg.group.proto, nw->arg.group.id); 236 if (__predict_false(datalen == 0)) { 237 free(buf, M_NETLINK); 238 return (true); 239 } 240 241 struct mbuf *m = nl_get_mbuf_chain(datalen, nw->malloc_flag); 242 if (__predict_false(m == NULL)) { 243 free(buf, M_NETLINK); 244 return (false); 245 } 246 bool success = m_append(m, datalen, buf) != 0; 247 free(buf, M_NETLINK); 248 249 if (!success) 250 return (false); 251 252 nl_send_group(m, cnt, nw->arg.group.proto, nw->arg.group.id); 253 return (true); 254 } 255 256 static bool 257 nlmsg_write_chain_buf(struct nl_writer *nw, void *buf, int datalen, int cnt) 258 { 259 struct mbuf **m0 = (struct mbuf **)(nw->arg.ptr); 260 NL_LOG(LOG_DEBUG2, "IN: ptr: %p len: %d arg: %p", buf, datalen, nw->arg.ptr); 261 262 if (__predict_false(datalen == 0)) { 263 free(buf, M_NETLINK); 264 return (true); 265 } 266 267 if (*m0 == NULL) { 268 struct mbuf *m = nl_get_mbuf_chain(datalen, nw->malloc_flag); 269 270 if (__predict_false(m == NULL)) { 271 free(buf, M_NETLINK); 272 return (false); 273 } 274 *m0 = m; 275 } 276 if (__predict_false(m_append(*m0, datalen, buf) == 0)) { 277 free(buf, M_NETLINK); 278 return (false); 279 } 280 return (true); 281 } 282 283 284 /* 285 * NS_WRITER_TYPE_MBUF 286 * Writes message to the allocated mbuf, 287 * flushing to socket/group when mbuf size limit is reached. 288 * This is the most efficient mechanism as it avoids double-copying. 289 * 290 * Allocates a single mbuf suitable to store up to @size bytes of data. 291 * If size < MHLEN (around 160 bytes), allocates mbuf with pkghdr. 292 * If the size <= NLMBUFSIZE (2k), allocate mbuf+storage out of nlmsg_zone. 293 * Returns NULL on greater size or the allocation failure. 294 */ 295 static bool 296 nlmsg_get_ns_mbuf(struct nl_writer *nw, int size, bool waitok) 297 { 298 int mflag = waitok ? M_WAITOK : M_NOWAIT; 299 struct mbuf *m = nl_get_mbuf(size, mflag); 300 301 if (__predict_false(m == NULL)) 302 return (false); 303 nw->alloc_len = M_TRAILINGSPACE(m); 304 nw->offset = 0; 305 nw->hdr = NULL; 306 nw->_storage = (void *)m; 307 nw->data = mtod(m, void *); 308 nw->writer_type = NS_WRITER_TYPE_MBUF; 309 nw->malloc_flag = mflag; 310 nw->num_messages = 0; 311 nw->enomem = false; 312 memset(nw->data, 0, size); 313 NL_LOG(LOG_DEBUG2, "alloc mbuf %p req_len %d alloc_len %d data_ptr %p", 314 m, size, nw->alloc_len, nw->data); 315 return (true); 316 } 317 318 static bool 319 nlmsg_write_socket_mbuf(struct nl_writer *nw, void *buf, int datalen, int cnt) 320 { 321 struct mbuf *m = (struct mbuf *)buf; 322 NL_LOG(LOG_DEBUG2, "IN: ptr: %p len: %d arg: %p", buf, datalen, nw->arg.ptr); 323 324 if (__predict_false(datalen == 0)) { 325 m_freem(m); 326 return (true); 327 } 328 329 m->m_pkthdr.len = datalen; 330 m->m_len = datalen; 331 int io_flags = (nw->ignore_limit) ? NL_IOF_IGNORE_LIMIT : 0; 332 return (nl_send_one(m, (struct nlpcb *)(nw->arg.ptr), cnt, io_flags)); 333 } 334 335 static bool 336 nlmsg_write_group_mbuf(struct nl_writer *nw, void *buf, int datalen, int cnt) 337 { 338 struct mbuf *m = (struct mbuf *)buf; 339 NL_LOG(LOG_DEBUG2, "IN: ptr: %p len: %d proto: %d id: %d", buf, datalen, 340 nw->arg.group.proto, nw->arg.group.id); 341 342 if (__predict_false(datalen == 0)) { 343 m_freem(m); 344 return (true); 345 } 346 347 m->m_pkthdr.len = datalen; 348 m->m_len = datalen; 349 nl_send_group(m, cnt, nw->arg.group.proto, nw->arg.group.id); 350 return (true); 351 } 352 353 static bool 354 nlmsg_write_chain_mbuf(struct nl_writer *nw, void *buf, int datalen, int cnt) 355 { 356 struct mbuf *m_new = (struct mbuf *)buf; 357 struct mbuf **m0 = (struct mbuf **)(nw->arg.ptr); 358 359 NL_LOG(LOG_DEBUG2, "IN: ptr: %p len: %d arg: %p", buf, datalen, nw->arg.ptr); 360 361 if (__predict_false(datalen == 0)) { 362 m_freem(m_new); 363 return (true); 364 } 365 366 m_new->m_pkthdr.len = datalen; 367 m_new->m_len = datalen; 368 369 if (*m0 == NULL) { 370 *m0 = m_new; 371 } else { 372 struct mbuf *m_last; 373 for (m_last = *m0; m_last->m_next != NULL; m_last = m_last->m_next) 374 ; 375 m_last->m_next = m_new; 376 (*m0)->m_pkthdr.len += datalen; 377 } 378 379 return (true); 380 } 381 382 /* 383 * NS_WRITER_TYPE_LBUF 384 * Writes message to the allocated memory buffer, 385 * flushing to socket/group when mbuf size limit is reached. 386 * Calls linux handler to rewrite messages before sending to the socket. 387 */ 388 static bool 389 nlmsg_get_ns_lbuf(struct nl_writer *nw, int size, bool waitok) 390 { 391 int mflag = waitok ? M_WAITOK : M_NOWAIT; 392 size = roundup2(size, sizeof(void *)); 393 int add_size = sizeof(struct linear_buffer) + SCRATCH_BUFFER_SIZE; 394 char *buf = malloc(add_size + size * 2, M_NETLINK, mflag | M_ZERO); 395 if (__predict_false(buf == NULL)) 396 return (false); 397 398 /* Fill buffer header first */ 399 struct linear_buffer *lb = (struct linear_buffer *)buf; 400 lb->base = &buf[sizeof(struct linear_buffer) + size]; 401 lb->size = size + SCRATCH_BUFFER_SIZE; 402 403 nw->alloc_len = size; 404 nw->offset = 0; 405 nw->hdr = NULL; 406 nw->_storage = buf; 407 nw->data = (char *)(lb + 1); 408 nw->malloc_flag = mflag; 409 nw->writer_type = NS_WRITER_TYPE_LBUF; 410 nw->num_messages = 0; 411 nw->enomem = false; 412 return (true); 413 } 414 415 static bool 416 nlmsg_write_socket_lbuf(struct nl_writer *nw, void *buf, int datalen, int cnt) 417 { 418 struct linear_buffer *lb = (struct linear_buffer *)buf; 419 char *data = (char *)(lb + 1); 420 struct nlpcb *nlp = (struct nlpcb *)(nw->arg.ptr); 421 422 if (__predict_false(datalen == 0)) { 423 free(buf, M_NETLINK); 424 return (true); 425 } 426 427 struct mbuf *m = NULL; 428 if (linux_netlink_p != NULL) 429 m = linux_netlink_p->msgs_to_linux(nlp->nl_proto, data, datalen, nlp); 430 free(buf, M_NETLINK); 431 432 if (__predict_false(m == NULL)) { 433 /* XXX: should we set sorcverr? */ 434 return (false); 435 } 436 437 int io_flags = (nw->ignore_limit) ? NL_IOF_IGNORE_LIMIT : 0; 438 return (nl_send_one(m, nlp, cnt, io_flags)); 439 } 440 441 /* Shouldn't be called (maybe except Linux code originating message) */ 442 static bool 443 nlmsg_write_group_lbuf(struct nl_writer *nw, void *buf, int datalen, int cnt) 444 { 445 struct linear_buffer *lb = (struct linear_buffer *)buf; 446 char *data = (char *)(lb + 1); 447 448 if (__predict_false(datalen == 0)) { 449 free(buf, M_NETLINK); 450 return (true); 451 } 452 453 struct mbuf *m = nl_get_mbuf_chain(datalen, nw->malloc_flag); 454 if (__predict_false(m == NULL)) { 455 free(buf, M_NETLINK); 456 return (false); 457 } 458 m_append(m, datalen, data); 459 free(buf, M_NETLINK); 460 461 nl_send_group(m, cnt, nw->arg.group.proto, nw->arg.group.id); 462 return (true); 463 } 464 465 static const struct nlwriter_ops nlmsg_writers[] = { 466 /* NS_WRITER_TYPE_MBUF */ 467 { 468 .init = nlmsg_get_ns_mbuf, 469 .write_socket = nlmsg_write_socket_mbuf, 470 .write_group = nlmsg_write_group_mbuf, 471 .write_chain = nlmsg_write_chain_mbuf, 472 }, 473 /* NS_WRITER_TYPE_BUF */ 474 { 475 .init = nlmsg_get_ns_buf, 476 .write_socket = nlmsg_write_socket_buf, 477 .write_group = nlmsg_write_group_buf, 478 .write_chain = nlmsg_write_chain_buf, 479 }, 480 /* NS_WRITER_TYPE_LBUF */ 481 { 482 .init = nlmsg_get_ns_lbuf, 483 .write_socket = nlmsg_write_socket_lbuf, 484 .write_group = nlmsg_write_group_lbuf, 485 }, 486 }; 487 488 static void 489 nlmsg_set_callback(struct nl_writer *nw) 490 { 491 const struct nlwriter_ops *pops = &nlmsg_writers[nw->writer_type]; 492 493 switch (nw->writer_target) { 494 case NS_WRITER_TARGET_SOCKET: 495 nw->cb = pops->write_socket; 496 break; 497 case NS_WRITER_TARGET_GROUP: 498 nw->cb = pops->write_group; 499 break; 500 case NS_WRITER_TARGET_CHAIN: 501 nw->cb = pops->write_chain; 502 break; 503 default: 504 panic("not implemented"); 505 } 506 } 507 508 static bool 509 nlmsg_get_buf_type(struct nl_writer *nw, int size, int type, bool waitok) 510 { 511 MPASS(type + 1 <= sizeof(nlmsg_writers) / sizeof(nlmsg_writers[0])); 512 NL_LOG(LOG_DEBUG3, "Setting up nw %p size %d type %d", nw, size, type); 513 return (nlmsg_writers[type].init(nw, size, waitok)); 514 } 515 516 static bool 517 nlmsg_get_buf(struct nl_writer *nw, int size, bool waitok, bool is_linux) 518 { 519 int type; 520 521 if (!is_linux) { 522 if (__predict_true(size <= NLMBUFSIZE)) 523 type = NS_WRITER_TYPE_MBUF; 524 else 525 type = NS_WRITER_TYPE_BUF; 526 } else 527 type = NS_WRITER_TYPE_LBUF; 528 return (nlmsg_get_buf_type(nw, size, type, waitok)); 529 } 530 531 bool 532 _nlmsg_get_unicast_writer(struct nl_writer *nw, int size, struct nlpcb *nlp) 533 { 534 if (!nlmsg_get_buf(nw, size, false, nlp->nl_linux)) 535 return (false); 536 nw->arg.ptr = (void *)nlp; 537 nw->writer_target = NS_WRITER_TARGET_SOCKET; 538 nlmsg_set_callback(nw); 539 return (true); 540 } 541 542 bool 543 _nlmsg_get_group_writer(struct nl_writer *nw, int size, int protocol, int group_id) 544 { 545 if (!nlmsg_get_buf(nw, size, false, false)) 546 return (false); 547 nw->arg.group.proto = protocol; 548 nw->arg.group.id = group_id; 549 nw->writer_target = NS_WRITER_TARGET_GROUP; 550 nlmsg_set_callback(nw); 551 return (true); 552 } 553 554 bool 555 _nlmsg_get_chain_writer(struct nl_writer *nw, int size, struct mbuf **pm) 556 { 557 if (!nlmsg_get_buf(nw, size, false, false)) 558 return (false); 559 *pm = NULL; 560 nw->arg.ptr = (void *)pm; 561 nw->writer_target = NS_WRITER_TARGET_CHAIN; 562 nlmsg_set_callback(nw); 563 NL_LOG(LOG_DEBUG3, "setup cb %p (need %p)", nw->cb, &nlmsg_write_chain_mbuf); 564 return (true); 565 } 566 567 void 568 _nlmsg_ignore_limit(struct nl_writer *nw) 569 { 570 nw->ignore_limit = true; 571 } 572 573 bool 574 _nlmsg_flush(struct nl_writer *nw) 575 { 576 577 if (__predict_false(nw->hdr != NULL)) { 578 /* Last message has not been completed, skip it. */ 579 int completed_len = (char *)nw->hdr - nw->data; 580 /* Send completed messages */ 581 nw->offset -= nw->offset - completed_len; 582 nw->hdr = NULL; 583 } 584 585 NL_LOG(LOG_DEBUG2, "OUT"); 586 bool result = nw->cb(nw, nw->_storage, nw->offset, nw->num_messages); 587 nw->_storage = NULL; 588 589 if (!result) { 590 NL_LOG(LOG_DEBUG, "nw %p offset %d: flush with %p() failed", nw, nw->offset, nw->cb); 591 } 592 593 return (result); 594 } 595 596 /* 597 * Flushes previous data and allocates new underlying storage 598 * sufficient for holding at least @required_len bytes. 599 * Return true on success. 600 */ 601 bool 602 _nlmsg_refill_buffer(struct nl_writer *nw, int required_len) 603 { 604 struct nl_writer ns_new = {}; 605 int completed_len, new_len; 606 607 if (nw->enomem) 608 return (false); 609 610 NL_LOG(LOG_DEBUG3, "no space at offset %d/%d (want %d), trying to reclaim", 611 nw->offset, nw->alloc_len, required_len); 612 613 /* Calculated new buffer size and allocate it s*/ 614 completed_len = (nw->hdr != NULL) ? (char *)nw->hdr - nw->data : nw->offset; 615 if (completed_len > 0 && required_len < NLMBUFSIZE) { 616 /* We already ran out of space, use the largest effective size */ 617 new_len = max(nw->alloc_len, NLMBUFSIZE); 618 } else { 619 if (nw->alloc_len < NLMBUFSIZE) 620 new_len = NLMBUFSIZE; 621 else 622 new_len = nw->alloc_len * 2; 623 while (new_len < required_len) 624 new_len *= 2; 625 } 626 bool waitok = (nw->malloc_flag == M_WAITOK); 627 bool is_linux = (nw->writer_type == NS_WRITER_TYPE_LBUF); 628 if (!nlmsg_get_buf(&ns_new, new_len, waitok, is_linux)) { 629 nw->enomem = true; 630 NL_LOG(LOG_DEBUG, "getting new buf failed, setting ENOMEM"); 631 return (false); 632 } 633 if (nw->ignore_limit) 634 nlmsg_ignore_limit(&ns_new); 635 636 /* Update callback data */ 637 ns_new.writer_target = nw->writer_target; 638 nlmsg_set_callback(&ns_new); 639 ns_new.arg = nw->arg; 640 641 /* Copy last (unfinished) header to the new storage */ 642 int last_len = nw->offset - completed_len; 643 if (last_len > 0) { 644 memcpy(ns_new.data, nw->hdr, last_len); 645 ns_new.hdr = (struct nlmsghdr *)ns_new.data; 646 ns_new.offset = last_len; 647 } 648 649 NL_LOG(LOG_DEBUG2, "completed: %d bytes, copied: %d bytes", completed_len, last_len); 650 651 /* Flush completed headers & switch to the new nw */ 652 nlmsg_flush(nw); 653 memcpy(nw, &ns_new, sizeof(struct nl_writer)); 654 NL_LOG(LOG_DEBUG2, "switched buffer: used %d/%d bytes", nw->offset, nw->alloc_len); 655 656 return (true); 657 } 658 659 bool 660 _nlmsg_add(struct nl_writer *nw, uint32_t portid, uint32_t seq, uint16_t type, 661 uint16_t flags, uint32_t len) 662 { 663 struct nlmsghdr *hdr; 664 665 MPASS(nw->hdr == NULL); 666 667 int required_len = NETLINK_ALIGN(len + sizeof(struct nlmsghdr)); 668 if (__predict_false(nw->offset + required_len > nw->alloc_len)) { 669 if (!nlmsg_refill_buffer(nw, required_len)) 670 return (false); 671 } 672 673 hdr = (struct nlmsghdr *)(&nw->data[nw->offset]); 674 675 hdr->nlmsg_len = len; 676 hdr->nlmsg_type = type; 677 hdr->nlmsg_flags = flags; 678 hdr->nlmsg_seq = seq; 679 hdr->nlmsg_pid = portid; 680 681 nw->hdr = hdr; 682 nw->offset += sizeof(struct nlmsghdr); 683 684 return (true); 685 } 686 687 bool 688 _nlmsg_end(struct nl_writer *nw) 689 { 690 MPASS(nw->hdr != NULL); 691 692 if (nw->enomem) { 693 NL_LOG(LOG_DEBUG, "ENOMEM when dumping message"); 694 nlmsg_abort(nw); 695 return (false); 696 } 697 698 nw->hdr->nlmsg_len = (uint32_t)(nw->data + nw->offset - (char *)nw->hdr); 699 NL_LOG(LOG_DEBUG2, "wrote msg len: %u type: %d: flags: 0x%X seq: %u pid: %u", 700 nw->hdr->nlmsg_len, nw->hdr->nlmsg_type, nw->hdr->nlmsg_flags, 701 nw->hdr->nlmsg_seq, nw->hdr->nlmsg_pid); 702 nw->hdr = NULL; 703 nw->num_messages++; 704 return (true); 705 } 706 707 void 708 _nlmsg_abort(struct nl_writer *nw) 709 { 710 if (nw->hdr != NULL) { 711 nw->offset = (uint32_t)((char *)nw->hdr - nw->data); 712 nw->hdr = NULL; 713 } 714 } 715 716 void 717 nlmsg_ack(struct nlpcb *nlp, int error, struct nlmsghdr *hdr, 718 struct nl_pstate *npt) 719 { 720 struct nlmsgerr *errmsg; 721 int payload_len; 722 uint32_t flags = nlp->nl_flags; 723 struct nl_writer *nw = npt->nw; 724 bool cap_ack; 725 726 payload_len = sizeof(struct nlmsgerr); 727 728 /* 729 * The only case when we send the full message in the 730 * reply is when there is an error and NETLINK_CAP_ACK 731 * is not set. 732 */ 733 cap_ack = (error == 0) || (flags & NLF_CAP_ACK); 734 if (!cap_ack) 735 payload_len += hdr->nlmsg_len - sizeof(struct nlmsghdr); 736 payload_len = NETLINK_ALIGN(payload_len); 737 738 uint16_t nl_flags = cap_ack ? NLM_F_CAPPED : 0; 739 if ((npt->err_msg || npt->err_off) && nlp->nl_flags & NLF_EXT_ACK) 740 nl_flags |= NLM_F_ACK_TLVS; 741 742 NL_LOG(LOG_DEBUG3, "acknowledging message type %d seq %d", 743 hdr->nlmsg_type, hdr->nlmsg_seq); 744 745 if (!nlmsg_add(nw, nlp->nl_port, hdr->nlmsg_seq, NLMSG_ERROR, nl_flags, payload_len)) 746 goto enomem; 747 748 errmsg = nlmsg_reserve_data(nw, payload_len, struct nlmsgerr); 749 errmsg->error = error; 750 /* In case of error copy the whole message, else just the header */ 751 memcpy(&errmsg->msg, hdr, cap_ack ? sizeof(*hdr) : hdr->nlmsg_len); 752 753 if (npt->err_msg != NULL && nlp->nl_flags & NLF_EXT_ACK) 754 nlattr_add_string(nw, NLMSGERR_ATTR_MSG, npt->err_msg); 755 if (npt->err_off != 0 && nlp->nl_flags & NLF_EXT_ACK) 756 nlattr_add_u32(nw, NLMSGERR_ATTR_OFFS, npt->err_off); 757 if (npt->cookie != NULL) 758 nlattr_add_raw(nw, npt->cookie); 759 760 if (nlmsg_end(nw)) 761 return; 762 enomem: 763 NLP_LOG(LOG_DEBUG, nlp, "error allocating ack data for message %d seq %u", 764 hdr->nlmsg_type, hdr->nlmsg_seq); 765 nlmsg_abort(nw); 766 } 767 768 bool 769 _nlmsg_end_dump(struct nl_writer *nw, int error, struct nlmsghdr *hdr) 770 { 771 if (!nlmsg_add(nw, hdr->nlmsg_pid, hdr->nlmsg_seq, NLMSG_DONE, 0, sizeof(int))) { 772 NL_LOG(LOG_DEBUG, "Error finalizing table dump"); 773 return (false); 774 } 775 /* Save operation result */ 776 int *perror = nlmsg_reserve_object(nw, int); 777 NL_LOG(LOG_DEBUG2, "record error=%d at off %d (%p)", error, 778 nw->offset, perror); 779 *perror = error; 780 nlmsg_end(nw); 781 nw->suppress_ack = true; 782 783 return (true); 784 } 785 786 #include <netlink/ktest_netlink_message_writer.h> 787