1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2022 Alexander V. Chernikov <melifaro@FreeBSD.org> 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include "opt_netlink.h" 29 30 #include <sys/cdefs.h> 31 #include <sys/param.h> 32 #include <sys/malloc.h> 33 #include <sys/lock.h> 34 #include <sys/rmlock.h> 35 #include <sys/mbuf.h> 36 #include <sys/ck.h> 37 #include <sys/socket.h> 38 #include <sys/socketvar.h> 39 #include <sys/syslog.h> 40 41 #include <netlink/netlink.h> 42 #include <netlink/netlink_ctl.h> 43 #include <netlink/netlink_linux.h> 44 #include <netlink/netlink_var.h> 45 46 #define DEBUG_MOD_NAME nl_writer 47 #define DEBUG_MAX_LEVEL LOG_DEBUG3 48 #include <netlink/netlink_debug.h> 49 _DECLARE_DEBUG(LOG_INFO); 50 51 /* 52 * The goal of this file is to provide convenient message writing KPI on top of 53 * different storage methods (mbufs, uio, temporary memory chunks). 54 * 55 * The main KPI guarantee is that the (last) message always resides in the contiguous 56 * memory buffer, so one is able to update the header after writing the entire message. 57 * 58 * This guarantee comes with a side effect of potentially reallocating underlying 59 * buffer, so one needs to update the desired pointers after something is added 60 * to the header. 61 * 62 * Messaging layer contains hooks performing transparent Linux translation for the messages. 63 * 64 * There are 3 types of supported targets: 65 * * socket (adds mbufs to the socket buffer, used for message replies) 66 * * group (sends mbuf/chain to the specified groups, used for the notifications) 67 * * chain (returns mbuf chain, used in Linux message translation code) 68 * 69 * There are 3 types of storage: 70 * * NS_WRITER_TYPE_MBUF (mbuf-based, most efficient, used when a single message 71 * fits in NLMBUFSIZE) 72 * * NS_WRITER_TYPE_BUF (fallback, malloc-based, used when a single message needs 73 * to be larger than one supported by NS_WRITER_TYPE_MBUF) 74 * * NS_WRITER_TYPE_LBUF (malloc-based, similar to NS_WRITER_TYPE_BUF, used for 75 * Linux sockets, calls translation hook prior to sending messages to the socket). 76 * 77 * Internally, KPI switches between different types of storage when memory requirements 78 * change. It happens transparently to the caller. 79 */ 80 81 /* 82 * Uma zone for the mbuf-based Netlink storage 83 */ 84 static uma_zone_t nlmsg_zone; 85 86 static void 87 nl_free_mbuf_storage(struct mbuf *m) 88 { 89 uma_zfree(nlmsg_zone, m->m_ext.ext_buf); 90 } 91 92 static int 93 nl_setup_mbuf_storage(void *mem, int size, void *arg, int how __unused) 94 { 95 struct mbuf *m = (struct mbuf *)arg; 96 97 if (m != NULL) 98 m_extadd(m, mem, size, nl_free_mbuf_storage, NULL, NULL, 0, EXT_MOD_TYPE); 99 100 return (0); 101 } 102 103 static struct mbuf * 104 nl_get_mbuf_flags(int size, int malloc_flags, int mbuf_flags) 105 { 106 struct mbuf *m, *m_storage; 107 108 if (size <= MHLEN) 109 return (m_get2(size, malloc_flags, MT_DATA, mbuf_flags)); 110 111 if (__predict_false(size > NLMBUFSIZE)) 112 return (NULL); 113 114 m = m_gethdr(malloc_flags, MT_DATA); 115 if (m == NULL) 116 return (NULL); 117 118 m_storage = uma_zalloc_arg(nlmsg_zone, m, malloc_flags); 119 if (m_storage == NULL) { 120 m_free_raw(m); 121 return (NULL); 122 } 123 124 return (m); 125 } 126 127 static struct mbuf * 128 nl_get_mbuf(int size, int malloc_flags) 129 { 130 return (nl_get_mbuf_flags(size, malloc_flags, M_PKTHDR)); 131 } 132 133 /* 134 * Gets a chain of Netlink mbufs. 135 * This is strip-down version of m_getm2() 136 */ 137 static struct mbuf * 138 nl_get_mbuf_chain(int len, int malloc_flags) 139 { 140 struct mbuf *m_chain = NULL, *m_tail = NULL; 141 int mbuf_flags = M_PKTHDR; 142 143 while (len > 0) { 144 int sz = len > NLMBUFSIZE ? NLMBUFSIZE: len; 145 struct mbuf *m = nl_get_mbuf_flags(sz, malloc_flags, mbuf_flags); 146 147 if (m == NULL) { 148 m_freem(m_chain); 149 return (NULL); 150 } 151 152 /* Book keeping. */ 153 len -= M_SIZE(m); 154 if (m_tail != NULL) 155 m_tail->m_next = m; 156 else 157 m_chain = m; 158 m_tail = m; 159 mbuf_flags &= ~M_PKTHDR; /* Only valid on the first mbuf. */ 160 } 161 162 return (m_chain); 163 } 164 165 void 166 nl_init_msg_zone(void) 167 { 168 nlmsg_zone = uma_zcreate("netlink", NLMBUFSIZE, nl_setup_mbuf_storage, 169 NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 170 } 171 172 void 173 nl_destroy_msg_zone(void) 174 { 175 uma_zdestroy(nlmsg_zone); 176 } 177 178 179 typedef bool nlwriter_op_init(struct nl_writer *nw, int size, bool waitok); 180 typedef bool nlwriter_op_write(struct nl_writer *nw, void *buf, int buflen, int cnt); 181 182 struct nlwriter_ops { 183 nlwriter_op_init *init; 184 nlwriter_op_write *write_socket; 185 nlwriter_op_write *write_group; 186 nlwriter_op_write *write_chain; 187 }; 188 189 /* 190 * NS_WRITER_TYPE_BUF 191 * Writes message to a temporary memory buffer, 192 * flushing to the socket/group when buffer size limit is reached 193 */ 194 static bool 195 nlmsg_get_ns_buf(struct nl_writer *nw, int size, bool waitok) 196 { 197 int mflag = waitok ? M_WAITOK : M_NOWAIT; 198 nw->_storage = malloc(size, M_NETLINK, mflag | M_ZERO); 199 if (__predict_false(nw->_storage == NULL)) 200 return (false); 201 nw->alloc_len = size; 202 nw->offset = 0; 203 nw->hdr = NULL; 204 nw->data = nw->_storage; 205 nw->writer_type = NS_WRITER_TYPE_BUF; 206 nw->malloc_flag = mflag; 207 nw->num_messages = 0; 208 nw->enomem = false; 209 return (true); 210 } 211 212 static bool 213 nlmsg_write_socket_buf(struct nl_writer *nw, void *buf, int datalen, int cnt) 214 { 215 NL_LOG(LOG_DEBUG2, "IN: ptr: %p len: %d arg: %p", buf, datalen, nw->arg.ptr); 216 if (__predict_false(datalen == 0)) { 217 free(buf, M_NETLINK); 218 return (true); 219 } 220 221 struct mbuf *m = nl_get_mbuf_chain(datalen, nw->malloc_flag); 222 if (__predict_false(m == NULL)) { 223 /* XXX: should we set sorcverr? */ 224 free(buf, M_NETLINK); 225 return (false); 226 } 227 m_append(m, datalen, buf); 228 free(buf, M_NETLINK); 229 230 int io_flags = (nw->ignore_limit) ? NL_IOF_IGNORE_LIMIT : 0; 231 return (nl_send_one(m, (struct nlpcb *)(nw->arg.ptr), cnt, io_flags)); 232 } 233 234 static bool 235 nlmsg_write_group_buf(struct nl_writer *nw, void *buf, int datalen, int cnt) 236 { 237 NL_LOG(LOG_DEBUG2, "IN: ptr: %p len: %d proto: %d id: %d", buf, datalen, 238 nw->arg.group.proto, nw->arg.group.id); 239 if (__predict_false(datalen == 0)) { 240 free(buf, M_NETLINK); 241 return (true); 242 } 243 244 struct mbuf *m = nl_get_mbuf_chain(datalen, nw->malloc_flag); 245 if (__predict_false(m == NULL)) { 246 free(buf, M_NETLINK); 247 return (false); 248 } 249 bool success = m_append(m, datalen, buf) != 0; 250 free(buf, M_NETLINK); 251 252 if (!success) 253 return (false); 254 255 nl_send_group(m, cnt, nw->arg.group.proto, nw->arg.group.id); 256 return (true); 257 } 258 259 static bool 260 nlmsg_write_chain_buf(struct nl_writer *nw, void *buf, int datalen, int cnt) 261 { 262 struct mbuf **m0 = (struct mbuf **)(nw->arg.ptr); 263 NL_LOG(LOG_DEBUG2, "IN: ptr: %p len: %d arg: %p", buf, datalen, nw->arg.ptr); 264 265 if (__predict_false(datalen == 0)) { 266 free(buf, M_NETLINK); 267 return (true); 268 } 269 270 if (*m0 == NULL) { 271 struct mbuf *m = nl_get_mbuf_chain(datalen, nw->malloc_flag); 272 273 if (__predict_false(m == NULL)) { 274 free(buf, M_NETLINK); 275 return (false); 276 } 277 *m0 = m; 278 } 279 if (__predict_false(m_append(*m0, datalen, buf) == 0)) { 280 free(buf, M_NETLINK); 281 return (false); 282 } 283 return (true); 284 } 285 286 287 /* 288 * NS_WRITER_TYPE_MBUF 289 * Writes message to the allocated mbuf, 290 * flushing to socket/group when mbuf size limit is reached. 291 * This is the most efficient mechanism as it avoids double-copying. 292 * 293 * Allocates a single mbuf suitable to store up to @size bytes of data. 294 * If size < MHLEN (around 160 bytes), allocates mbuf with pkghdr. 295 * If the size <= NLMBUFSIZE (2k), allocate mbuf+storage out of nlmsg_zone. 296 * Returns NULL on greater size or the allocation failure. 297 */ 298 static bool 299 nlmsg_get_ns_mbuf(struct nl_writer *nw, int size, bool waitok) 300 { 301 int mflag = waitok ? M_WAITOK : M_NOWAIT; 302 struct mbuf *m = nl_get_mbuf(size, mflag); 303 304 if (__predict_false(m == NULL)) 305 return (false); 306 nw->alloc_len = M_TRAILINGSPACE(m); 307 nw->offset = 0; 308 nw->hdr = NULL; 309 nw->_storage = (void *)m; 310 nw->data = mtod(m, void *); 311 nw->writer_type = NS_WRITER_TYPE_MBUF; 312 nw->malloc_flag = mflag; 313 nw->num_messages = 0; 314 nw->enomem = false; 315 memset(nw->data, 0, size); 316 NL_LOG(LOG_DEBUG2, "alloc mbuf %p req_len %d alloc_len %d data_ptr %p", 317 m, size, nw->alloc_len, nw->data); 318 return (true); 319 } 320 321 static bool 322 nlmsg_write_socket_mbuf(struct nl_writer *nw, void *buf, int datalen, int cnt) 323 { 324 struct mbuf *m = (struct mbuf *)buf; 325 NL_LOG(LOG_DEBUG2, "IN: ptr: %p len: %d arg: %p", buf, datalen, nw->arg.ptr); 326 327 if (__predict_false(datalen == 0)) { 328 m_freem(m); 329 return (true); 330 } 331 332 m->m_pkthdr.len = datalen; 333 m->m_len = datalen; 334 int io_flags = (nw->ignore_limit) ? NL_IOF_IGNORE_LIMIT : 0; 335 return (nl_send_one(m, (struct nlpcb *)(nw->arg.ptr), cnt, io_flags)); 336 } 337 338 static bool 339 nlmsg_write_group_mbuf(struct nl_writer *nw, void *buf, int datalen, int cnt) 340 { 341 struct mbuf *m = (struct mbuf *)buf; 342 NL_LOG(LOG_DEBUG2, "IN: ptr: %p len: %d proto: %d id: %d", buf, datalen, 343 nw->arg.group.proto, nw->arg.group.id); 344 345 if (__predict_false(datalen == 0)) { 346 m_freem(m); 347 return (true); 348 } 349 350 m->m_pkthdr.len = datalen; 351 m->m_len = datalen; 352 nl_send_group(m, cnt, nw->arg.group.proto, nw->arg.group.id); 353 return (true); 354 } 355 356 static bool 357 nlmsg_write_chain_mbuf(struct nl_writer *nw, void *buf, int datalen, int cnt) 358 { 359 struct mbuf *m_new = (struct mbuf *)buf; 360 struct mbuf **m0 = (struct mbuf **)(nw->arg.ptr); 361 362 NL_LOG(LOG_DEBUG2, "IN: ptr: %p len: %d arg: %p", buf, datalen, nw->arg.ptr); 363 364 if (__predict_false(datalen == 0)) { 365 m_freem(m_new); 366 return (true); 367 } 368 369 m_new->m_pkthdr.len = datalen; 370 m_new->m_len = datalen; 371 372 if (*m0 == NULL) { 373 *m0 = m_new; 374 } else { 375 struct mbuf *m_last; 376 for (m_last = *m0; m_last->m_next != NULL; m_last = m_last->m_next) 377 ; 378 m_last->m_next = m_new; 379 (*m0)->m_pkthdr.len += datalen; 380 } 381 382 return (true); 383 } 384 385 /* 386 * NS_WRITER_TYPE_LBUF 387 * Writes message to the allocated memory buffer, 388 * flushing to socket/group when mbuf size limit is reached. 389 * Calls linux handler to rewrite messages before sending to the socket. 390 */ 391 static bool 392 nlmsg_get_ns_lbuf(struct nl_writer *nw, int size, bool waitok) 393 { 394 int mflag = waitok ? M_WAITOK : M_NOWAIT; 395 size = roundup2(size, sizeof(void *)); 396 int add_size = sizeof(struct linear_buffer) + SCRATCH_BUFFER_SIZE; 397 char *buf = malloc(add_size + size * 2, M_NETLINK, mflag | M_ZERO); 398 if (__predict_false(buf == NULL)) 399 return (false); 400 401 /* Fill buffer header first */ 402 struct linear_buffer *lb = (struct linear_buffer *)buf; 403 lb->base = &buf[sizeof(struct linear_buffer) + size]; 404 lb->size = size + SCRATCH_BUFFER_SIZE; 405 406 nw->alloc_len = size; 407 nw->offset = 0; 408 nw->hdr = NULL; 409 nw->_storage = buf; 410 nw->data = (char *)(lb + 1); 411 nw->malloc_flag = mflag; 412 nw->writer_type = NS_WRITER_TYPE_LBUF; 413 nw->num_messages = 0; 414 nw->enomem = false; 415 return (true); 416 } 417 418 static bool 419 nlmsg_write_socket_lbuf(struct nl_writer *nw, void *buf, int datalen, int cnt) 420 { 421 struct linear_buffer *lb = (struct linear_buffer *)buf; 422 char *data = (char *)(lb + 1); 423 struct nlpcb *nlp = (struct nlpcb *)(nw->arg.ptr); 424 425 if (__predict_false(datalen == 0)) { 426 free(buf, M_NETLINK); 427 return (true); 428 } 429 430 struct mbuf *m = NULL; 431 if (linux_netlink_p != NULL) 432 m = linux_netlink_p->msgs_to_linux(nlp->nl_proto, data, datalen, nlp); 433 free(buf, M_NETLINK); 434 435 if (__predict_false(m == NULL)) { 436 /* XXX: should we set sorcverr? */ 437 return (false); 438 } 439 440 int io_flags = (nw->ignore_limit) ? NL_IOF_IGNORE_LIMIT : 0; 441 return (nl_send_one(m, nlp, cnt, io_flags)); 442 } 443 444 /* Shouldn't be called (maybe except Linux code originating message) */ 445 static bool 446 nlmsg_write_group_lbuf(struct nl_writer *nw, void *buf, int datalen, int cnt) 447 { 448 struct linear_buffer *lb = (struct linear_buffer *)buf; 449 char *data = (char *)(lb + 1); 450 451 if (__predict_false(datalen == 0)) { 452 free(buf, M_NETLINK); 453 return (true); 454 } 455 456 struct mbuf *m = nl_get_mbuf_chain(datalen, nw->malloc_flag); 457 if (__predict_false(m == NULL)) { 458 free(buf, M_NETLINK); 459 return (false); 460 } 461 m_append(m, datalen, data); 462 free(buf, M_NETLINK); 463 464 nl_send_group(m, cnt, nw->arg.group.proto, nw->arg.group.id); 465 return (true); 466 } 467 468 static const struct nlwriter_ops nlmsg_writers[] = { 469 /* NS_WRITER_TYPE_MBUF */ 470 { 471 .init = nlmsg_get_ns_mbuf, 472 .write_socket = nlmsg_write_socket_mbuf, 473 .write_group = nlmsg_write_group_mbuf, 474 .write_chain = nlmsg_write_chain_mbuf, 475 }, 476 /* NS_WRITER_TYPE_BUF */ 477 { 478 .init = nlmsg_get_ns_buf, 479 .write_socket = nlmsg_write_socket_buf, 480 .write_group = nlmsg_write_group_buf, 481 .write_chain = nlmsg_write_chain_buf, 482 }, 483 /* NS_WRITER_TYPE_LBUF */ 484 { 485 .init = nlmsg_get_ns_lbuf, 486 .write_socket = nlmsg_write_socket_lbuf, 487 .write_group = nlmsg_write_group_lbuf, 488 }, 489 }; 490 491 static void 492 nlmsg_set_callback(struct nl_writer *nw) 493 { 494 const struct nlwriter_ops *pops = &nlmsg_writers[nw->writer_type]; 495 496 switch (nw->writer_target) { 497 case NS_WRITER_TARGET_SOCKET: 498 nw->cb = pops->write_socket; 499 break; 500 case NS_WRITER_TARGET_GROUP: 501 nw->cb = pops->write_group; 502 break; 503 case NS_WRITER_TARGET_CHAIN: 504 nw->cb = pops->write_chain; 505 break; 506 default: 507 panic("not implemented"); 508 } 509 } 510 511 static bool 512 nlmsg_get_buf_type(struct nl_writer *nw, int size, int type, bool waitok) 513 { 514 MPASS(type + 1 <= sizeof(nlmsg_writers) / sizeof(nlmsg_writers[0])); 515 NL_LOG(LOG_DEBUG3, "Setting up nw %p size %d type %d", nw, size, type); 516 return (nlmsg_writers[type].init(nw, size, waitok)); 517 } 518 519 static bool 520 nlmsg_get_buf(struct nl_writer *nw, int size, bool waitok, bool is_linux) 521 { 522 int type; 523 524 if (!is_linux) { 525 if (__predict_true(size <= NLMBUFSIZE)) 526 type = NS_WRITER_TYPE_MBUF; 527 else 528 type = NS_WRITER_TYPE_BUF; 529 } else 530 type = NS_WRITER_TYPE_LBUF; 531 return (nlmsg_get_buf_type(nw, size, type, waitok)); 532 } 533 534 bool 535 _nlmsg_get_unicast_writer(struct nl_writer *nw, int size, struct nlpcb *nlp) 536 { 537 if (!nlmsg_get_buf(nw, size, false, nlp->nl_linux)) 538 return (false); 539 nw->arg.ptr = (void *)nlp; 540 nw->writer_target = NS_WRITER_TARGET_SOCKET; 541 nlmsg_set_callback(nw); 542 return (true); 543 } 544 545 bool 546 _nlmsg_get_group_writer(struct nl_writer *nw, int size, int protocol, int group_id) 547 { 548 if (!nlmsg_get_buf(nw, size, false, false)) 549 return (false); 550 nw->arg.group.proto = protocol; 551 nw->arg.group.id = group_id; 552 nw->writer_target = NS_WRITER_TARGET_GROUP; 553 nlmsg_set_callback(nw); 554 return (true); 555 } 556 557 bool 558 _nlmsg_get_chain_writer(struct nl_writer *nw, int size, struct mbuf **pm) 559 { 560 if (!nlmsg_get_buf(nw, size, false, false)) 561 return (false); 562 *pm = NULL; 563 nw->arg.ptr = (void *)pm; 564 nw->writer_target = NS_WRITER_TARGET_CHAIN; 565 nlmsg_set_callback(nw); 566 NL_LOG(LOG_DEBUG3, "setup cb %p (need %p)", nw->cb, &nlmsg_write_chain_mbuf); 567 return (true); 568 } 569 570 void 571 _nlmsg_ignore_limit(struct nl_writer *nw) 572 { 573 nw->ignore_limit = true; 574 } 575 576 bool 577 _nlmsg_flush(struct nl_writer *nw) 578 { 579 580 if (__predict_false(nw->hdr != NULL)) { 581 /* Last message has not been completed, skip it. */ 582 int completed_len = (char *)nw->hdr - nw->data; 583 /* Send completed messages */ 584 nw->offset -= nw->offset - completed_len; 585 nw->hdr = NULL; 586 } 587 588 NL_LOG(LOG_DEBUG2, "OUT"); 589 bool result = nw->cb(nw, nw->_storage, nw->offset, nw->num_messages); 590 nw->_storage = NULL; 591 592 if (!result) { 593 NL_LOG(LOG_DEBUG, "nw %p offset %d: flush with %p() failed", nw, nw->offset, nw->cb); 594 } 595 596 return (result); 597 } 598 599 /* 600 * Flushes previous data and allocates new underlying storage 601 * sufficient for holding at least @required_len bytes. 602 * Return true on success. 603 */ 604 bool 605 _nlmsg_refill_buffer(struct nl_writer *nw, int required_len) 606 { 607 struct nl_writer ns_new = {}; 608 int completed_len, new_len; 609 610 if (nw->enomem) 611 return (false); 612 613 NL_LOG(LOG_DEBUG3, "no space at offset %d/%d (want %d), trying to reclaim", 614 nw->offset, nw->alloc_len, required_len); 615 616 /* Calculated new buffer size and allocate it s*/ 617 completed_len = (nw->hdr != NULL) ? (char *)nw->hdr - nw->data : nw->offset; 618 if (completed_len > 0 && required_len < NLMBUFSIZE) { 619 /* We already ran out of space, use the largest effective size */ 620 new_len = max(nw->alloc_len, NLMBUFSIZE); 621 } else { 622 if (nw->alloc_len < NLMBUFSIZE) 623 new_len = NLMBUFSIZE; 624 else 625 new_len = nw->alloc_len * 2; 626 while (new_len < required_len) 627 new_len *= 2; 628 } 629 bool waitok = (nw->malloc_flag == M_WAITOK); 630 bool is_linux = (nw->writer_type == NS_WRITER_TYPE_LBUF); 631 if (!nlmsg_get_buf(&ns_new, new_len, waitok, is_linux)) { 632 nw->enomem = true; 633 NL_LOG(LOG_DEBUG, "getting new buf failed, setting ENOMEM"); 634 return (false); 635 } 636 if (nw->ignore_limit) 637 nlmsg_ignore_limit(&ns_new); 638 639 /* Update callback data */ 640 ns_new.writer_target = nw->writer_target; 641 nlmsg_set_callback(&ns_new); 642 ns_new.arg = nw->arg; 643 644 /* Copy last (unfinished) header to the new storage */ 645 int last_len = nw->offset - completed_len; 646 if (last_len > 0) { 647 memcpy(ns_new.data, nw->hdr, last_len); 648 ns_new.hdr = (struct nlmsghdr *)ns_new.data; 649 ns_new.offset = last_len; 650 } 651 652 NL_LOG(LOG_DEBUG2, "completed: %d bytes, copied: %d bytes", completed_len, last_len); 653 654 /* Flush completed headers & switch to the new nw */ 655 nlmsg_flush(nw); 656 memcpy(nw, &ns_new, sizeof(struct nl_writer)); 657 NL_LOG(LOG_DEBUG2, "switched buffer: used %d/%d bytes", nw->offset, nw->alloc_len); 658 659 return (true); 660 } 661 662 bool 663 _nlmsg_add(struct nl_writer *nw, uint32_t portid, uint32_t seq, uint16_t type, 664 uint16_t flags, uint32_t len) 665 { 666 struct nlmsghdr *hdr; 667 668 MPASS(nw->hdr == NULL); 669 670 int required_len = NETLINK_ALIGN(len + sizeof(struct nlmsghdr)); 671 if (__predict_false(nw->offset + required_len > nw->alloc_len)) { 672 if (!nlmsg_refill_buffer(nw, required_len)) 673 return (false); 674 } 675 676 hdr = (struct nlmsghdr *)(&nw->data[nw->offset]); 677 678 hdr->nlmsg_len = len; 679 hdr->nlmsg_type = type; 680 hdr->nlmsg_flags = flags; 681 hdr->nlmsg_seq = seq; 682 hdr->nlmsg_pid = portid; 683 684 nw->hdr = hdr; 685 nw->offset += sizeof(struct nlmsghdr); 686 687 return (true); 688 } 689 690 bool 691 _nlmsg_end(struct nl_writer *nw) 692 { 693 MPASS(nw->hdr != NULL); 694 695 if (nw->enomem) { 696 NL_LOG(LOG_DEBUG, "ENOMEM when dumping message"); 697 nlmsg_abort(nw); 698 return (false); 699 } 700 701 nw->hdr->nlmsg_len = (uint32_t)(nw->data + nw->offset - (char *)nw->hdr); 702 NL_LOG(LOG_DEBUG2, "wrote msg len: %u type: %d: flags: 0x%X seq: %u pid: %u", 703 nw->hdr->nlmsg_len, nw->hdr->nlmsg_type, nw->hdr->nlmsg_flags, 704 nw->hdr->nlmsg_seq, nw->hdr->nlmsg_pid); 705 nw->hdr = NULL; 706 nw->num_messages++; 707 return (true); 708 } 709 710 void 711 _nlmsg_abort(struct nl_writer *nw) 712 { 713 if (nw->hdr != NULL) { 714 nw->offset = (uint32_t)((char *)nw->hdr - nw->data); 715 nw->hdr = NULL; 716 } 717 } 718 719 void 720 nlmsg_ack(struct nlpcb *nlp, int error, struct nlmsghdr *hdr, 721 struct nl_pstate *npt) 722 { 723 struct nlmsgerr *errmsg; 724 int payload_len; 725 uint32_t flags = nlp->nl_flags; 726 struct nl_writer *nw = npt->nw; 727 bool cap_ack; 728 729 payload_len = sizeof(struct nlmsgerr); 730 731 /* 732 * The only case when we send the full message in the 733 * reply is when there is an error and NETLINK_CAP_ACK 734 * is not set. 735 */ 736 cap_ack = (error == 0) || (flags & NLF_CAP_ACK); 737 if (!cap_ack) 738 payload_len += hdr->nlmsg_len - sizeof(struct nlmsghdr); 739 payload_len = NETLINK_ALIGN(payload_len); 740 741 uint16_t nl_flags = cap_ack ? NLM_F_CAPPED : 0; 742 if ((npt->err_msg || npt->err_off) && nlp->nl_flags & NLF_EXT_ACK) 743 nl_flags |= NLM_F_ACK_TLVS; 744 745 NL_LOG(LOG_DEBUG3, "acknowledging message type %d seq %d", 746 hdr->nlmsg_type, hdr->nlmsg_seq); 747 748 if (!nlmsg_add(nw, nlp->nl_port, hdr->nlmsg_seq, NLMSG_ERROR, nl_flags, payload_len)) 749 goto enomem; 750 751 errmsg = nlmsg_reserve_data(nw, payload_len, struct nlmsgerr); 752 errmsg->error = error; 753 /* In case of error copy the whole message, else just the header */ 754 memcpy(&errmsg->msg, hdr, cap_ack ? sizeof(*hdr) : hdr->nlmsg_len); 755 756 if (npt->err_msg != NULL && nlp->nl_flags & NLF_EXT_ACK) 757 nlattr_add_string(nw, NLMSGERR_ATTR_MSG, npt->err_msg); 758 if (npt->err_off != 0 && nlp->nl_flags & NLF_EXT_ACK) 759 nlattr_add_u32(nw, NLMSGERR_ATTR_OFFS, npt->err_off); 760 if (npt->cookie != NULL) 761 nlattr_add_raw(nw, npt->cookie); 762 763 if (nlmsg_end(nw)) 764 return; 765 enomem: 766 NLP_LOG(LOG_DEBUG, nlp, "error allocating ack data for message %d seq %u", 767 hdr->nlmsg_type, hdr->nlmsg_seq); 768 nlmsg_abort(nw); 769 } 770 771 bool 772 _nlmsg_end_dump(struct nl_writer *nw, int error, struct nlmsghdr *hdr) 773 { 774 if (!nlmsg_add(nw, hdr->nlmsg_pid, hdr->nlmsg_seq, NLMSG_DONE, 0, sizeof(int))) { 775 NL_LOG(LOG_DEBUG, "Error finalizing table dump"); 776 return (false); 777 } 778 /* Save operation result */ 779 int *perror = nlmsg_reserve_object(nw, int); 780 NL_LOG(LOG_DEBUG2, "record error=%d at off %d (%p)", error, 781 nw->offset, perror); 782 *perror = error; 783 nlmsg_end(nw); 784 nw->suppress_ack = true; 785 786 return (true); 787 } 788 789 #include <netlink/ktest_netlink_message_writer.h> 790