1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2022 Alexander V. Chernikov <melifaro@FreeBSD.org> 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include <sys/cdefs.h> 29 #include <sys/param.h> 30 #include <sys/malloc.h> 31 #include <sys/lock.h> 32 #include <sys/rmlock.h> 33 #include <sys/mbuf.h> 34 #include <sys/ck.h> 35 #include <sys/socket.h> 36 #include <sys/socketvar.h> 37 #include <sys/syslog.h> 38 39 #include <netlink/netlink.h> 40 #include <netlink/netlink_ctl.h> 41 #include <netlink/netlink_linux.h> 42 #include <netlink/netlink_var.h> 43 44 #define DEBUG_MOD_NAME nl_writer 45 #define DEBUG_MAX_LEVEL LOG_DEBUG3 46 #include <netlink/netlink_debug.h> 47 _DECLARE_DEBUG(LOG_INFO); 48 49 /* 50 * The goal of this file is to provide convenient message writing KPI on top of 51 * different storage methods (mbufs, uio, temporary memory chunks). 52 * 53 * The main KPI guarantee is that the (last) message always resides in the contiguous 54 * memory buffer, so one is able to update the header after writing the entire message. 55 * 56 * This guarantee comes with a side effect of potentially reallocating underlying 57 * buffer, so one needs to update the desired pointers after something is added 58 * to the header. 59 * 60 * Messaging layer contains hooks performing transparent Linux translation for the messages. 61 * 62 * There are 3 types of supported targets: 63 * * socket (adds mbufs to the socket buffer, used for message replies) 64 * * group (sends mbuf/chain to the specified groups, used for the notifications) 65 * * chain (returns mbuf chain, used in Linux message translation code) 66 * 67 * There are 3 types of storage: 68 * * NS_WRITER_TYPE_MBUF (mbuf-based, most efficient, used when a single message 69 * fits in NLMBUFSIZE) 70 * * NS_WRITER_TYPE_BUF (fallback, malloc-based, used when a single message needs 71 * to be larger than one supported by NS_WRITER_TYPE_MBUF) 72 * * NS_WRITER_TYPE_LBUF (malloc-based, similar to NS_WRITER_TYPE_BUF, used for 73 * Linux sockets, calls translation hook prior to sending messages to the socket). 74 * 75 * Internally, KPI switches between different types of storage when memory requirements 76 * change. It happens transparently to the caller. 77 */ 78 79 /* 80 * Uma zone for the mbuf-based Netlink storage 81 */ 82 static uma_zone_t nlmsg_zone; 83 84 static void 85 nl_free_mbuf_storage(struct mbuf *m) 86 { 87 uma_zfree(nlmsg_zone, m->m_ext.ext_buf); 88 } 89 90 static int 91 nl_setup_mbuf_storage(void *mem, int size, void *arg, int how __unused) 92 { 93 struct mbuf *m = (struct mbuf *)arg; 94 95 if (m != NULL) 96 m_extadd(m, mem, size, nl_free_mbuf_storage, NULL, NULL, 0, EXT_MOD_TYPE); 97 98 return (0); 99 } 100 101 static struct mbuf * 102 nl_get_mbuf_flags(int size, int malloc_flags, int mbuf_flags) 103 { 104 struct mbuf *m, *m_storage; 105 106 if (size <= MHLEN) 107 return (m_get2(size, malloc_flags, MT_DATA, mbuf_flags)); 108 109 if (__predict_false(size > NLMBUFSIZE)) 110 return (NULL); 111 112 m = m_gethdr(malloc_flags, MT_DATA); 113 if (m == NULL) 114 return (NULL); 115 116 m_storage = uma_zalloc_arg(nlmsg_zone, m, malloc_flags); 117 if (m_storage == NULL) { 118 m_free_raw(m); 119 return (NULL); 120 } 121 122 return (m); 123 } 124 125 static struct mbuf * 126 nl_get_mbuf(int size, int malloc_flags) 127 { 128 return (nl_get_mbuf_flags(size, malloc_flags, M_PKTHDR)); 129 } 130 131 /* 132 * Gets a chain of Netlink mbufs. 133 * This is strip-down version of m_getm2() 134 */ 135 static struct mbuf * 136 nl_get_mbuf_chain(int len, int malloc_flags) 137 { 138 struct mbuf *m_chain = NULL, *m_tail = NULL; 139 int mbuf_flags = M_PKTHDR; 140 141 while (len > 0) { 142 int sz = len > NLMBUFSIZE ? NLMBUFSIZE: len; 143 struct mbuf *m = nl_get_mbuf_flags(sz, malloc_flags, mbuf_flags); 144 145 if (m == NULL) { 146 m_freem(m_chain); 147 return (NULL); 148 } 149 150 /* Book keeping. */ 151 len -= M_SIZE(m); 152 if (m_tail != NULL) 153 m_tail->m_next = m; 154 else 155 m_chain = m; 156 m_tail = m; 157 mbuf_flags &= ~M_PKTHDR; /* Only valid on the first mbuf. */ 158 } 159 160 return (m_chain); 161 } 162 163 void 164 nl_init_msg_zone(void) 165 { 166 nlmsg_zone = uma_zcreate("netlink", NLMBUFSIZE, nl_setup_mbuf_storage, 167 NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 168 } 169 170 void 171 nl_destroy_msg_zone(void) 172 { 173 uma_zdestroy(nlmsg_zone); 174 } 175 176 177 typedef bool nlwriter_op_init(struct nl_writer *nw, int size, bool waitok); 178 typedef bool nlwriter_op_write(struct nl_writer *nw, void *buf, int buflen, int cnt); 179 180 struct nlwriter_ops { 181 nlwriter_op_init *init; 182 nlwriter_op_write *write_socket; 183 nlwriter_op_write *write_group; 184 nlwriter_op_write *write_chain; 185 }; 186 187 /* 188 * NS_WRITER_TYPE_BUF 189 * Writes message to a temporary memory buffer, 190 * flushing to the socket/group when buffer size limit is reached 191 */ 192 static bool 193 nlmsg_get_ns_buf(struct nl_writer *nw, int size, bool waitok) 194 { 195 int mflag = waitok ? M_WAITOK : M_NOWAIT; 196 nw->_storage = malloc(size, M_NETLINK, mflag | M_ZERO); 197 if (__predict_false(nw->_storage == NULL)) 198 return (false); 199 nw->alloc_len = size; 200 nw->offset = 0; 201 nw->hdr = NULL; 202 nw->data = nw->_storage; 203 nw->writer_type = NS_WRITER_TYPE_BUF; 204 nw->malloc_flag = mflag; 205 nw->num_messages = 0; 206 nw->enomem = false; 207 return (true); 208 } 209 210 static bool 211 nlmsg_write_socket_buf(struct nl_writer *nw, void *buf, int datalen, int cnt) 212 { 213 NL_LOG(LOG_DEBUG2, "IN: ptr: %p len: %d arg: %p", buf, datalen, nw->arg.ptr); 214 if (__predict_false(datalen == 0)) { 215 free(buf, M_NETLINK); 216 return (true); 217 } 218 219 struct mbuf *m = nl_get_mbuf_chain(datalen, nw->malloc_flag); 220 if (__predict_false(m == NULL)) { 221 /* XXX: should we set sorcverr? */ 222 free(buf, M_NETLINK); 223 return (false); 224 } 225 m_append(m, datalen, buf); 226 free(buf, M_NETLINK); 227 228 int io_flags = (nw->ignore_limit) ? NL_IOF_IGNORE_LIMIT : 0; 229 return (nl_send_one(m, (struct nlpcb *)(nw->arg.ptr), cnt, io_flags)); 230 } 231 232 static bool 233 nlmsg_write_group_buf(struct nl_writer *nw, void *buf, int datalen, int cnt) 234 { 235 NL_LOG(LOG_DEBUG2, "IN: ptr: %p len: %d proto: %d id: %d", buf, datalen, 236 nw->arg.group.proto, nw->arg.group.id); 237 if (__predict_false(datalen == 0)) { 238 free(buf, M_NETLINK); 239 return (true); 240 } 241 242 struct mbuf *m = nl_get_mbuf_chain(datalen, nw->malloc_flag); 243 if (__predict_false(m == NULL)) { 244 free(buf, M_NETLINK); 245 return (false); 246 } 247 bool success = m_append(m, datalen, buf) != 0; 248 free(buf, M_NETLINK); 249 250 if (!success) 251 return (false); 252 253 nl_send_group(m, cnt, nw->arg.group.proto, nw->arg.group.id); 254 return (true); 255 } 256 257 static bool 258 nlmsg_write_chain_buf(struct nl_writer *nw, void *buf, int datalen, int cnt) 259 { 260 struct mbuf **m0 = (struct mbuf **)(nw->arg.ptr); 261 NL_LOG(LOG_DEBUG2, "IN: ptr: %p len: %d arg: %p", buf, datalen, nw->arg.ptr); 262 263 if (__predict_false(datalen == 0)) { 264 free(buf, M_NETLINK); 265 return (true); 266 } 267 268 if (*m0 == NULL) { 269 struct mbuf *m = nl_get_mbuf_chain(datalen, nw->malloc_flag); 270 271 if (__predict_false(m == NULL)) { 272 free(buf, M_NETLINK); 273 return (false); 274 } 275 *m0 = m; 276 } 277 if (__predict_false(m_append(*m0, datalen, buf) == 0)) { 278 free(buf, M_NETLINK); 279 return (false); 280 } 281 return (true); 282 } 283 284 285 /* 286 * NS_WRITER_TYPE_MBUF 287 * Writes message to the allocated mbuf, 288 * flushing to socket/group when mbuf size limit is reached. 289 * This is the most efficient mechanism as it avoids double-copying. 290 * 291 * Allocates a single mbuf suitable to store up to @size bytes of data. 292 * If size < MHLEN (around 160 bytes), allocates mbuf with pkghdr. 293 * If the size <= NLMBUFSIZE (2k), allocate mbuf+storage out of nlmsg_zone. 294 * Returns NULL on greater size or the allocation failure. 295 */ 296 static bool 297 nlmsg_get_ns_mbuf(struct nl_writer *nw, int size, bool waitok) 298 { 299 int mflag = waitok ? M_WAITOK : M_NOWAIT; 300 struct mbuf *m = nl_get_mbuf(size, mflag); 301 302 if (__predict_false(m == NULL)) 303 return (false); 304 nw->alloc_len = M_TRAILINGSPACE(m); 305 nw->offset = 0; 306 nw->hdr = NULL; 307 nw->_storage = (void *)m; 308 nw->data = mtod(m, void *); 309 nw->writer_type = NS_WRITER_TYPE_MBUF; 310 nw->malloc_flag = mflag; 311 nw->num_messages = 0; 312 nw->enomem = false; 313 memset(nw->data, 0, size); 314 NL_LOG(LOG_DEBUG2, "alloc mbuf %p req_len %d alloc_len %d data_ptr %p", 315 m, size, nw->alloc_len, nw->data); 316 return (true); 317 } 318 319 static bool 320 nlmsg_write_socket_mbuf(struct nl_writer *nw, void *buf, int datalen, int cnt) 321 { 322 struct mbuf *m = (struct mbuf *)buf; 323 NL_LOG(LOG_DEBUG2, "IN: ptr: %p len: %d arg: %p", buf, datalen, nw->arg.ptr); 324 325 if (__predict_false(datalen == 0)) { 326 m_freem(m); 327 return (true); 328 } 329 330 m->m_pkthdr.len = datalen; 331 m->m_len = datalen; 332 int io_flags = (nw->ignore_limit) ? NL_IOF_IGNORE_LIMIT : 0; 333 return (nl_send_one(m, (struct nlpcb *)(nw->arg.ptr), cnt, io_flags)); 334 } 335 336 static bool 337 nlmsg_write_group_mbuf(struct nl_writer *nw, void *buf, int datalen, int cnt) 338 { 339 struct mbuf *m = (struct mbuf *)buf; 340 NL_LOG(LOG_DEBUG2, "IN: ptr: %p len: %d proto: %d id: %d", buf, datalen, 341 nw->arg.group.proto, nw->arg.group.id); 342 343 if (__predict_false(datalen == 0)) { 344 m_freem(m); 345 return (true); 346 } 347 348 m->m_pkthdr.len = datalen; 349 m->m_len = datalen; 350 nl_send_group(m, cnt, nw->arg.group.proto, nw->arg.group.id); 351 return (true); 352 } 353 354 static bool 355 nlmsg_write_chain_mbuf(struct nl_writer *nw, void *buf, int datalen, int cnt) 356 { 357 struct mbuf *m_new = (struct mbuf *)buf; 358 struct mbuf **m0 = (struct mbuf **)(nw->arg.ptr); 359 360 NL_LOG(LOG_DEBUG2, "IN: ptr: %p len: %d arg: %p", buf, datalen, nw->arg.ptr); 361 362 if (__predict_false(datalen == 0)) { 363 m_freem(m_new); 364 return (true); 365 } 366 367 m_new->m_pkthdr.len = datalen; 368 m_new->m_len = datalen; 369 370 if (*m0 == NULL) { 371 *m0 = m_new; 372 } else { 373 struct mbuf *m_last; 374 for (m_last = *m0; m_last->m_next != NULL; m_last = m_last->m_next) 375 ; 376 m_last->m_next = m_new; 377 (*m0)->m_pkthdr.len += datalen; 378 } 379 380 return (true); 381 } 382 383 /* 384 * NS_WRITER_TYPE_LBUF 385 * Writes message to the allocated memory buffer, 386 * flushing to socket/group when mbuf size limit is reached. 387 * Calls linux handler to rewrite messages before sending to the socket. 388 */ 389 static bool 390 nlmsg_get_ns_lbuf(struct nl_writer *nw, int size, bool waitok) 391 { 392 int mflag = waitok ? M_WAITOK : M_NOWAIT; 393 size = roundup2(size, sizeof(void *)); 394 int add_size = sizeof(struct linear_buffer) + SCRATCH_BUFFER_SIZE; 395 char *buf = malloc(add_size + size * 2, M_NETLINK, mflag | M_ZERO); 396 if (__predict_false(buf == NULL)) 397 return (false); 398 399 /* Fill buffer header first */ 400 struct linear_buffer *lb = (struct linear_buffer *)buf; 401 lb->base = &buf[sizeof(struct linear_buffer) + size]; 402 lb->size = size + SCRATCH_BUFFER_SIZE; 403 404 nw->alloc_len = size; 405 nw->offset = 0; 406 nw->hdr = NULL; 407 nw->_storage = buf; 408 nw->data = (char *)(lb + 1); 409 nw->malloc_flag = mflag; 410 nw->writer_type = NS_WRITER_TYPE_LBUF; 411 nw->num_messages = 0; 412 nw->enomem = false; 413 return (true); 414 } 415 416 static bool 417 nlmsg_write_socket_lbuf(struct nl_writer *nw, void *buf, int datalen, int cnt) 418 { 419 struct linear_buffer *lb = (struct linear_buffer *)buf; 420 char *data = (char *)(lb + 1); 421 struct nlpcb *nlp = (struct nlpcb *)(nw->arg.ptr); 422 423 if (__predict_false(datalen == 0)) { 424 free(buf, M_NETLINK); 425 return (true); 426 } 427 428 struct mbuf *m = NULL; 429 if (linux_netlink_p != NULL) 430 m = linux_netlink_p->msgs_to_linux(nlp->nl_proto, data, datalen, nlp); 431 free(buf, M_NETLINK); 432 433 if (__predict_false(m == NULL)) { 434 /* XXX: should we set sorcverr? */ 435 return (false); 436 } 437 438 int io_flags = (nw->ignore_limit) ? NL_IOF_IGNORE_LIMIT : 0; 439 return (nl_send_one(m, nlp, cnt, io_flags)); 440 } 441 442 /* Shouldn't be called (maybe except Linux code originating message) */ 443 static bool 444 nlmsg_write_group_lbuf(struct nl_writer *nw, void *buf, int datalen, int cnt) 445 { 446 struct linear_buffer *lb = (struct linear_buffer *)buf; 447 char *data = (char *)(lb + 1); 448 449 if (__predict_false(datalen == 0)) { 450 free(buf, M_NETLINK); 451 return (true); 452 } 453 454 struct mbuf *m = nl_get_mbuf_chain(datalen, nw->malloc_flag); 455 if (__predict_false(m == NULL)) { 456 free(buf, M_NETLINK); 457 return (false); 458 } 459 m_append(m, datalen, data); 460 free(buf, M_NETLINK); 461 462 nl_send_group(m, cnt, nw->arg.group.proto, nw->arg.group.id); 463 return (true); 464 } 465 466 static const struct nlwriter_ops nlmsg_writers[] = { 467 /* NS_WRITER_TYPE_MBUF */ 468 { 469 .init = nlmsg_get_ns_mbuf, 470 .write_socket = nlmsg_write_socket_mbuf, 471 .write_group = nlmsg_write_group_mbuf, 472 .write_chain = nlmsg_write_chain_mbuf, 473 }, 474 /* NS_WRITER_TYPE_BUF */ 475 { 476 .init = nlmsg_get_ns_buf, 477 .write_socket = nlmsg_write_socket_buf, 478 .write_group = nlmsg_write_group_buf, 479 .write_chain = nlmsg_write_chain_buf, 480 }, 481 /* NS_WRITER_TYPE_LBUF */ 482 { 483 .init = nlmsg_get_ns_lbuf, 484 .write_socket = nlmsg_write_socket_lbuf, 485 .write_group = nlmsg_write_group_lbuf, 486 }, 487 }; 488 489 static void 490 nlmsg_set_callback(struct nl_writer *nw) 491 { 492 const struct nlwriter_ops *pops = &nlmsg_writers[nw->writer_type]; 493 494 switch (nw->writer_target) { 495 case NS_WRITER_TARGET_SOCKET: 496 nw->cb = pops->write_socket; 497 break; 498 case NS_WRITER_TARGET_GROUP: 499 nw->cb = pops->write_group; 500 break; 501 case NS_WRITER_TARGET_CHAIN: 502 nw->cb = pops->write_chain; 503 break; 504 default: 505 panic("not implemented"); 506 } 507 } 508 509 static bool 510 nlmsg_get_buf_type(struct nl_writer *nw, int size, int type, bool waitok) 511 { 512 MPASS(type + 1 <= sizeof(nlmsg_writers) / sizeof(nlmsg_writers[0])); 513 NL_LOG(LOG_DEBUG3, "Setting up nw %p size %d type %d", nw, size, type); 514 return (nlmsg_writers[type].init(nw, size, waitok)); 515 } 516 517 static bool 518 nlmsg_get_buf(struct nl_writer *nw, int size, bool waitok, bool is_linux) 519 { 520 int type; 521 522 if (!is_linux) { 523 if (__predict_true(size <= NLMBUFSIZE)) 524 type = NS_WRITER_TYPE_MBUF; 525 else 526 type = NS_WRITER_TYPE_BUF; 527 } else 528 type = NS_WRITER_TYPE_LBUF; 529 return (nlmsg_get_buf_type(nw, size, type, waitok)); 530 } 531 532 bool 533 _nlmsg_get_unicast_writer(struct nl_writer *nw, int size, struct nlpcb *nlp) 534 { 535 if (!nlmsg_get_buf(nw, size, false, nlp->nl_linux)) 536 return (false); 537 nw->arg.ptr = (void *)nlp; 538 nw->writer_target = NS_WRITER_TARGET_SOCKET; 539 nlmsg_set_callback(nw); 540 return (true); 541 } 542 543 bool 544 _nlmsg_get_group_writer(struct nl_writer *nw, int size, int protocol, int group_id) 545 { 546 if (!nlmsg_get_buf(nw, size, false, false)) 547 return (false); 548 nw->arg.group.proto = protocol; 549 nw->arg.group.id = group_id; 550 nw->writer_target = NS_WRITER_TARGET_GROUP; 551 nlmsg_set_callback(nw); 552 return (true); 553 } 554 555 bool 556 _nlmsg_get_chain_writer(struct nl_writer *nw, int size, struct mbuf **pm) 557 { 558 if (!nlmsg_get_buf(nw, size, false, false)) 559 return (false); 560 *pm = NULL; 561 nw->arg.ptr = (void *)pm; 562 nw->writer_target = NS_WRITER_TARGET_CHAIN; 563 nlmsg_set_callback(nw); 564 NL_LOG(LOG_DEBUG3, "setup cb %p (need %p)", nw->cb, &nlmsg_write_chain_mbuf); 565 return (true); 566 } 567 568 void 569 _nlmsg_ignore_limit(struct nl_writer *nw) 570 { 571 nw->ignore_limit = true; 572 } 573 574 bool 575 _nlmsg_flush(struct nl_writer *nw) 576 { 577 578 if (__predict_false(nw->hdr != NULL)) { 579 /* Last message has not been completed, skip it. */ 580 int completed_len = (char *)nw->hdr - nw->data; 581 /* Send completed messages */ 582 nw->offset -= nw->offset - completed_len; 583 nw->hdr = NULL; 584 } 585 586 NL_LOG(LOG_DEBUG2, "OUT"); 587 bool result = nw->cb(nw, nw->_storage, nw->offset, nw->num_messages); 588 nw->_storage = NULL; 589 590 if (!result) { 591 NL_LOG(LOG_DEBUG, "nw %p offset %d: flush with %p() failed", nw, nw->offset, nw->cb); 592 } 593 594 return (result); 595 } 596 597 /* 598 * Flushes previous data and allocates new underlying storage 599 * sufficient for holding at least @required_len bytes. 600 * Return true on success. 601 */ 602 bool 603 _nlmsg_refill_buffer(struct nl_writer *nw, int required_len) 604 { 605 struct nl_writer ns_new = {}; 606 int completed_len, new_len; 607 608 if (nw->enomem) 609 return (false); 610 611 NL_LOG(LOG_DEBUG3, "no space at offset %d/%d (want %d), trying to reclaim", 612 nw->offset, nw->alloc_len, required_len); 613 614 /* Calculated new buffer size and allocate it s*/ 615 completed_len = (nw->hdr != NULL) ? (char *)nw->hdr - nw->data : nw->offset; 616 if (completed_len > 0 && required_len < NLMBUFSIZE) { 617 /* We already ran out of space, use the largest effective size */ 618 new_len = max(nw->alloc_len, NLMBUFSIZE); 619 } else { 620 if (nw->alloc_len < NLMBUFSIZE) 621 new_len = NLMBUFSIZE; 622 else 623 new_len = nw->alloc_len * 2; 624 while (new_len < required_len) 625 new_len *= 2; 626 } 627 bool waitok = (nw->malloc_flag == M_WAITOK); 628 bool is_linux = (nw->writer_type == NS_WRITER_TYPE_LBUF); 629 if (!nlmsg_get_buf(&ns_new, new_len, waitok, is_linux)) { 630 nw->enomem = true; 631 NL_LOG(LOG_DEBUG, "getting new buf failed, setting ENOMEM"); 632 return (false); 633 } 634 if (nw->ignore_limit) 635 nlmsg_ignore_limit(&ns_new); 636 637 /* Update callback data */ 638 ns_new.writer_target = nw->writer_target; 639 nlmsg_set_callback(&ns_new); 640 ns_new.arg = nw->arg; 641 642 /* Copy last (unfinished) header to the new storage */ 643 int last_len = nw->offset - completed_len; 644 if (last_len > 0) { 645 memcpy(ns_new.data, nw->hdr, last_len); 646 ns_new.hdr = (struct nlmsghdr *)ns_new.data; 647 ns_new.offset = last_len; 648 } 649 650 NL_LOG(LOG_DEBUG2, "completed: %d bytes, copied: %d bytes", completed_len, last_len); 651 652 /* Flush completed headers & switch to the new nw */ 653 nlmsg_flush(nw); 654 memcpy(nw, &ns_new, sizeof(struct nl_writer)); 655 NL_LOG(LOG_DEBUG2, "switched buffer: used %d/%d bytes", nw->offset, nw->alloc_len); 656 657 return (true); 658 } 659 660 bool 661 _nlmsg_add(struct nl_writer *nw, uint32_t portid, uint32_t seq, uint16_t type, 662 uint16_t flags, uint32_t len) 663 { 664 struct nlmsghdr *hdr; 665 666 MPASS(nw->hdr == NULL); 667 668 int required_len = NETLINK_ALIGN(len + sizeof(struct nlmsghdr)); 669 if (__predict_false(nw->offset + required_len > nw->alloc_len)) { 670 if (!nlmsg_refill_buffer(nw, required_len)) 671 return (false); 672 } 673 674 hdr = (struct nlmsghdr *)(&nw->data[nw->offset]); 675 676 hdr->nlmsg_len = len; 677 hdr->nlmsg_type = type; 678 hdr->nlmsg_flags = flags; 679 hdr->nlmsg_seq = seq; 680 hdr->nlmsg_pid = portid; 681 682 nw->hdr = hdr; 683 nw->offset += sizeof(struct nlmsghdr); 684 685 return (true); 686 } 687 688 bool 689 _nlmsg_end(struct nl_writer *nw) 690 { 691 MPASS(nw->hdr != NULL); 692 693 if (nw->enomem) { 694 NL_LOG(LOG_DEBUG, "ENOMEM when dumping message"); 695 nlmsg_abort(nw); 696 return (false); 697 } 698 699 nw->hdr->nlmsg_len = (uint32_t)(nw->data + nw->offset - (char *)nw->hdr); 700 NL_LOG(LOG_DEBUG2, "wrote msg len: %u type: %d: flags: 0x%X seq: %u pid: %u", 701 nw->hdr->nlmsg_len, nw->hdr->nlmsg_type, nw->hdr->nlmsg_flags, 702 nw->hdr->nlmsg_seq, nw->hdr->nlmsg_pid); 703 nw->hdr = NULL; 704 nw->num_messages++; 705 return (true); 706 } 707 708 void 709 _nlmsg_abort(struct nl_writer *nw) 710 { 711 if (nw->hdr != NULL) { 712 nw->offset = (uint32_t)((char *)nw->hdr - nw->data); 713 nw->hdr = NULL; 714 } 715 } 716 717 void 718 nlmsg_ack(struct nlpcb *nlp, int error, struct nlmsghdr *hdr, 719 struct nl_pstate *npt) 720 { 721 struct nlmsgerr *errmsg; 722 int payload_len; 723 uint32_t flags = nlp->nl_flags; 724 struct nl_writer *nw = npt->nw; 725 bool cap_ack; 726 727 payload_len = sizeof(struct nlmsgerr); 728 729 /* 730 * The only case when we send the full message in the 731 * reply is when there is an error and NETLINK_CAP_ACK 732 * is not set. 733 */ 734 cap_ack = (error == 0) || (flags & NLF_CAP_ACK); 735 if (!cap_ack) 736 payload_len += hdr->nlmsg_len - sizeof(struct nlmsghdr); 737 payload_len = NETLINK_ALIGN(payload_len); 738 739 uint16_t nl_flags = cap_ack ? NLM_F_CAPPED : 0; 740 if ((npt->err_msg || npt->err_off) && nlp->nl_flags & NLF_EXT_ACK) 741 nl_flags |= NLM_F_ACK_TLVS; 742 743 NL_LOG(LOG_DEBUG3, "acknowledging message type %d seq %d", 744 hdr->nlmsg_type, hdr->nlmsg_seq); 745 746 if (!nlmsg_add(nw, nlp->nl_port, hdr->nlmsg_seq, NLMSG_ERROR, nl_flags, payload_len)) 747 goto enomem; 748 749 errmsg = nlmsg_reserve_data(nw, payload_len, struct nlmsgerr); 750 errmsg->error = error; 751 /* In case of error copy the whole message, else just the header */ 752 memcpy(&errmsg->msg, hdr, cap_ack ? sizeof(*hdr) : hdr->nlmsg_len); 753 754 if (npt->err_msg != NULL && nlp->nl_flags & NLF_EXT_ACK) 755 nlattr_add_string(nw, NLMSGERR_ATTR_MSG, npt->err_msg); 756 if (npt->err_off != 0 && nlp->nl_flags & NLF_EXT_ACK) 757 nlattr_add_u32(nw, NLMSGERR_ATTR_OFFS, npt->err_off); 758 if (npt->cookie != NULL) 759 nlattr_add_raw(nw, npt->cookie); 760 761 if (nlmsg_end(nw)) 762 return; 763 enomem: 764 NLP_LOG(LOG_DEBUG, nlp, "error allocating ack data for message %d seq %u", 765 hdr->nlmsg_type, hdr->nlmsg_seq); 766 nlmsg_abort(nw); 767 } 768 769 bool 770 _nlmsg_end_dump(struct nl_writer *nw, int error, struct nlmsghdr *hdr) 771 { 772 if (!nlmsg_add(nw, hdr->nlmsg_pid, hdr->nlmsg_seq, NLMSG_DONE, 0, sizeof(int))) { 773 NL_LOG(LOG_DEBUG, "Error finalizing table dump"); 774 return (false); 775 } 776 /* Save operation result */ 777 int *perror = nlmsg_reserve_object(nw, int); 778 NL_LOG(LOG_DEBUG2, "record error=%d at off %d (%p)", error, 779 nw->offset, perror); 780 *perror = error; 781 nlmsg_end(nw); 782 nw->suppress_ack = true; 783 784 return (true); 785 } 786 787 #include <netlink/ktest_netlink_message_writer.h> 788