1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2022 Alexander V. Chernikov <melifaro@FreeBSD.org> 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include "opt_netlink.h" 29 30 #include <sys/cdefs.h> 31 __FBSDID("$FreeBSD$"); 32 #include <sys/param.h> 33 #include <sys/malloc.h> 34 #include <sys/lock.h> 35 #include <sys/rmlock.h> 36 #include <sys/mbuf.h> 37 #include <sys/ck.h> 38 #include <sys/socket.h> 39 #include <sys/socketvar.h> 40 #include <sys/syslog.h> 41 42 #include <netlink/netlink.h> 43 #include <netlink/netlink_ctl.h> 44 #include <netlink/netlink_linux.h> 45 #include <netlink/netlink_var.h> 46 47 #define DEBUG_MOD_NAME nl_writer 48 #define DEBUG_MAX_LEVEL LOG_DEBUG3 49 #include <netlink/netlink_debug.h> 50 _DECLARE_DEBUG(LOG_INFO); 51 52 /* 53 * The goal of this file is to provide convenient message writing KPI on top of 54 * different storage methods (mbufs, uio, temporary memory chunks). 55 * 56 * The main KPI guarantee is that the (last) message always resides in the contiguous 57 * memory buffer, so one is able to update the header after writing the entire message. 58 * 59 * This guarantee comes with a side effect of potentially reallocating underlying 60 * buffer, so one needs to update the desired pointers after something is added 61 * to the header. 62 * 63 * Messaging layer contains hooks performing transparent Linux translation for the messages. 64 * 65 * There are 3 types of supported targets: 66 * * socket (adds mbufs to the socket buffer, used for message replies) 67 * * group (sends mbuf/chain to the specified groups, used for the notifications) 68 * * chain (returns mbuf chain, used in Linux message translation code) 69 * 70 * There are 3 types of storage: 71 * * NS_WRITER_TYPE_MBUF (mbuf-based, most efficient, used when a single message 72 * fits in NLMBUFSIZE) 73 * * NS_WRITER_TYPE_BUF (fallback, malloc-based, used when a single message needs 74 * to be larger than one supported by NS_WRITER_TYPE_MBUF) 75 * * NS_WRITER_TYPE_LBUF (malloc-based, similar to NS_WRITER_TYPE_BUF, used for 76 * Linux sockets, calls translation hook prior to sending messages to the socket). 77 * 78 * Internally, KPI switches between different types of storage when memory requirements 79 * change. It happens transparently to the caller. 80 */ 81 82 /* 83 * Uma zone for the mbuf-based Netlink storage 84 */ 85 static uma_zone_t nlmsg_zone; 86 87 static void 88 nl_free_mbuf_storage(struct mbuf *m) 89 { 90 uma_zfree(nlmsg_zone, m->m_ext.ext_buf); 91 } 92 93 static int 94 nl_setup_mbuf_storage(void *mem, int size, void *arg, int how __unused) 95 { 96 struct mbuf *m = (struct mbuf *)arg; 97 98 if (m != NULL) 99 m_extadd(m, mem, size, nl_free_mbuf_storage, NULL, NULL, 0, EXT_MOD_TYPE); 100 101 return (0); 102 } 103 104 static struct mbuf * 105 nl_get_mbuf_flags(int size, int malloc_flags, int mbuf_flags) 106 { 107 struct mbuf *m, *m_storage; 108 109 if (size <= MHLEN) 110 return (m_get2(size, malloc_flags, MT_DATA, mbuf_flags)); 111 112 if (__predict_false(size > NLMBUFSIZE)) 113 return (NULL); 114 115 m = m_gethdr(malloc_flags, MT_DATA); 116 if (m == NULL) 117 return (NULL); 118 119 m_storage = uma_zalloc_arg(nlmsg_zone, m, malloc_flags); 120 if (m_storage == NULL) { 121 m_free_raw(m); 122 return (NULL); 123 } 124 125 return (m); 126 } 127 128 static struct mbuf * 129 nl_get_mbuf(int size, int malloc_flags) 130 { 131 return (nl_get_mbuf_flags(size, malloc_flags, M_PKTHDR)); 132 } 133 134 /* 135 * Gets a chain of Netlink mbufs. 136 * This is strip-down version of m_getm2() 137 */ 138 static struct mbuf * 139 nl_get_mbuf_chain(int len, int malloc_flags) 140 { 141 struct mbuf *m_chain = NULL, *m_tail = NULL; 142 int mbuf_flags = M_PKTHDR; 143 144 while (len > 0) { 145 int sz = len > NLMBUFSIZE ? NLMBUFSIZE: len; 146 struct mbuf *m = nl_get_mbuf_flags(sz, malloc_flags, mbuf_flags); 147 148 if (m == NULL) { 149 m_freem(m_chain); 150 return (NULL); 151 } 152 153 /* Book keeping. */ 154 len -= M_SIZE(m); 155 if (m_tail != NULL) 156 m_tail->m_next = m; 157 else 158 m_chain = m; 159 m_tail = m; 160 mbuf_flags &= ~M_PKTHDR; /* Only valid on the first mbuf. */ 161 } 162 163 return (m_chain); 164 } 165 166 void 167 nl_init_msg_zone(void) 168 { 169 nlmsg_zone = uma_zcreate("netlink", NLMBUFSIZE, nl_setup_mbuf_storage, 170 NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 171 } 172 173 void 174 nl_destroy_msg_zone(void) 175 { 176 uma_zdestroy(nlmsg_zone); 177 } 178 179 180 typedef bool nlwriter_op_init(struct nl_writer *nw, int size, bool waitok); 181 typedef bool nlwriter_op_write(struct nl_writer *nw, void *buf, int buflen, int cnt); 182 183 struct nlwriter_ops { 184 nlwriter_op_init *init; 185 nlwriter_op_write *write_socket; 186 nlwriter_op_write *write_group; 187 nlwriter_op_write *write_chain; 188 }; 189 190 /* 191 * NS_WRITER_TYPE_BUF 192 * Writes message to a temporary memory buffer, 193 * flushing to the socket/group when buffer size limit is reached 194 */ 195 static bool 196 nlmsg_get_ns_buf(struct nl_writer *nw, int size, bool waitok) 197 { 198 int mflag = waitok ? M_WAITOK : M_NOWAIT; 199 nw->_storage = malloc(size, M_NETLINK, mflag | M_ZERO); 200 if (__predict_false(nw->_storage == NULL)) 201 return (false); 202 nw->alloc_len = size; 203 nw->offset = 0; 204 nw->hdr = NULL; 205 nw->data = nw->_storage; 206 nw->writer_type = NS_WRITER_TYPE_BUF; 207 nw->malloc_flag = mflag; 208 nw->num_messages = 0; 209 nw->enomem = false; 210 return (true); 211 } 212 213 static bool 214 nlmsg_write_socket_buf(struct nl_writer *nw, void *buf, int datalen, int cnt) 215 { 216 NL_LOG(LOG_DEBUG2, "IN: ptr: %p len: %d arg: %p", buf, datalen, nw->arg.ptr); 217 if (__predict_false(datalen == 0)) { 218 free(buf, M_NETLINK); 219 return (true); 220 } 221 222 struct mbuf *m = nl_get_mbuf_chain(datalen, nw->malloc_flag); 223 if (__predict_false(m == NULL)) { 224 /* XXX: should we set sorcverr? */ 225 free(buf, M_NETLINK); 226 return (false); 227 } 228 m_append(m, datalen, buf); 229 free(buf, M_NETLINK); 230 231 int io_flags = (nw->ignore_limit) ? NL_IOF_IGNORE_LIMIT : 0; 232 return (nl_send_one(m, (struct nlpcb *)(nw->arg.ptr), cnt, io_flags)); 233 } 234 235 static bool 236 nlmsg_write_group_buf(struct nl_writer *nw, void *buf, int datalen, int cnt) 237 { 238 NL_LOG(LOG_DEBUG2, "IN: ptr: %p len: %d proto: %d id: %d", buf, datalen, 239 nw->arg.group.proto, nw->arg.group.id); 240 if (__predict_false(datalen == 0)) { 241 free(buf, M_NETLINK); 242 return (true); 243 } 244 245 struct mbuf *m = nl_get_mbuf_chain(datalen, nw->malloc_flag); 246 if (__predict_false(m == NULL)) { 247 free(buf, M_NETLINK); 248 return (false); 249 } 250 bool success = m_append(m, datalen, buf) != 0; 251 free(buf, M_NETLINK); 252 253 if (!success) 254 return (false); 255 256 nl_send_group(m, cnt, nw->arg.group.proto, nw->arg.group.id); 257 return (true); 258 } 259 260 static bool 261 nlmsg_write_chain_buf(struct nl_writer *nw, void *buf, int datalen, int cnt) 262 { 263 struct mbuf **m0 = (struct mbuf **)(nw->arg.ptr); 264 NL_LOG(LOG_DEBUG2, "IN: ptr: %p len: %d arg: %p", buf, datalen, nw->arg.ptr); 265 266 if (__predict_false(datalen == 0)) { 267 free(buf, M_NETLINK); 268 return (true); 269 } 270 271 if (*m0 == NULL) { 272 struct mbuf *m = nl_get_mbuf_chain(datalen, nw->malloc_flag); 273 274 if (__predict_false(m == NULL)) { 275 free(buf, M_NETLINK); 276 return (false); 277 } 278 *m0 = m; 279 } 280 if (__predict_false(m_append(*m0, datalen, buf) == 0)) { 281 free(buf, M_NETLINK); 282 return (false); 283 } 284 return (true); 285 } 286 287 288 /* 289 * NS_WRITER_TYPE_MBUF 290 * Writes message to the allocated mbuf, 291 * flushing to socket/group when mbuf size limit is reached. 292 * This is the most efficient mechanism as it avoids double-copying. 293 * 294 * Allocates a single mbuf suitable to store up to @size bytes of data. 295 * If size < MHLEN (around 160 bytes), allocates mbuf with pkghdr. 296 * If the size <= NLMBUFSIZE (2k), allocate mbuf+storage out of nlmsg_zone. 297 * Returns NULL on greater size or the allocation failure. 298 */ 299 static bool 300 nlmsg_get_ns_mbuf(struct nl_writer *nw, int size, bool waitok) 301 { 302 int mflag = waitok ? M_WAITOK : M_NOWAIT; 303 struct mbuf *m = nl_get_mbuf(size, mflag); 304 305 if (__predict_false(m == NULL)) 306 return (false); 307 nw->alloc_len = M_TRAILINGSPACE(m); 308 nw->offset = 0; 309 nw->hdr = NULL; 310 nw->_storage = (void *)m; 311 nw->data = mtod(m, void *); 312 nw->writer_type = NS_WRITER_TYPE_MBUF; 313 nw->malloc_flag = mflag; 314 nw->num_messages = 0; 315 nw->enomem = false; 316 memset(nw->data, 0, size); 317 NL_LOG(LOG_DEBUG2, "alloc mbuf %p req_len %d alloc_len %d data_ptr %p", 318 m, size, nw->alloc_len, nw->data); 319 return (true); 320 } 321 322 static bool 323 nlmsg_write_socket_mbuf(struct nl_writer *nw, void *buf, int datalen, int cnt) 324 { 325 struct mbuf *m = (struct mbuf *)buf; 326 NL_LOG(LOG_DEBUG2, "IN: ptr: %p len: %d arg: %p", buf, datalen, nw->arg.ptr); 327 328 if (__predict_false(datalen == 0)) { 329 m_freem(m); 330 return (true); 331 } 332 333 m->m_pkthdr.len = datalen; 334 m->m_len = datalen; 335 int io_flags = (nw->ignore_limit) ? NL_IOF_IGNORE_LIMIT : 0; 336 return (nl_send_one(m, (struct nlpcb *)(nw->arg.ptr), cnt, io_flags)); 337 } 338 339 static bool 340 nlmsg_write_group_mbuf(struct nl_writer *nw, void *buf, int datalen, int cnt) 341 { 342 struct mbuf *m = (struct mbuf *)buf; 343 NL_LOG(LOG_DEBUG2, "IN: ptr: %p len: %d proto: %d id: %d", buf, datalen, 344 nw->arg.group.proto, nw->arg.group.id); 345 346 if (__predict_false(datalen == 0)) { 347 m_freem(m); 348 return (true); 349 } 350 351 m->m_pkthdr.len = datalen; 352 m->m_len = datalen; 353 nl_send_group(m, cnt, nw->arg.group.proto, nw->arg.group.id); 354 return (true); 355 } 356 357 static bool 358 nlmsg_write_chain_mbuf(struct nl_writer *nw, void *buf, int datalen, int cnt) 359 { 360 struct mbuf *m_new = (struct mbuf *)buf; 361 struct mbuf **m0 = (struct mbuf **)(nw->arg.ptr); 362 363 NL_LOG(LOG_DEBUG2, "IN: ptr: %p len: %d arg: %p", buf, datalen, nw->arg.ptr); 364 365 if (__predict_false(datalen == 0)) { 366 m_freem(m_new); 367 return (true); 368 } 369 370 m_new->m_pkthdr.len = datalen; 371 m_new->m_len = datalen; 372 373 if (*m0 == NULL) { 374 *m0 = m_new; 375 } else { 376 struct mbuf *m_last; 377 for (m_last = *m0; m_last->m_next != NULL; m_last = m_last->m_next) 378 ; 379 m_last->m_next = m_new; 380 (*m0)->m_pkthdr.len += datalen; 381 } 382 383 return (true); 384 } 385 386 /* 387 * NS_WRITER_TYPE_LBUF 388 * Writes message to the allocated memory buffer, 389 * flushing to socket/group when mbuf size limit is reached. 390 * Calls linux handler to rewrite messages before sending to the socket. 391 */ 392 static bool 393 nlmsg_get_ns_lbuf(struct nl_writer *nw, int size, bool waitok) 394 { 395 int mflag = waitok ? M_WAITOK : M_NOWAIT; 396 size = roundup2(size, sizeof(void *)); 397 int add_size = sizeof(struct linear_buffer) + SCRATCH_BUFFER_SIZE; 398 char *buf = malloc(add_size + size * 2, M_NETLINK, mflag | M_ZERO); 399 if (__predict_false(buf == NULL)) 400 return (false); 401 402 /* Fill buffer header first */ 403 struct linear_buffer *lb = (struct linear_buffer *)buf; 404 lb->base = &buf[sizeof(struct linear_buffer) + size]; 405 lb->size = size + SCRATCH_BUFFER_SIZE; 406 407 nw->alloc_len = size; 408 nw->offset = 0; 409 nw->hdr = NULL; 410 nw->_storage = buf; 411 nw->data = (char *)(lb + 1); 412 nw->malloc_flag = mflag; 413 nw->writer_type = NS_WRITER_TYPE_LBUF; 414 nw->num_messages = 0; 415 nw->enomem = false; 416 return (true); 417 } 418 419 static bool 420 nlmsg_write_socket_lbuf(struct nl_writer *nw, void *buf, int datalen, int cnt) 421 { 422 struct linear_buffer *lb = (struct linear_buffer *)buf; 423 char *data = (char *)(lb + 1); 424 struct nlpcb *nlp = (struct nlpcb *)(nw->arg.ptr); 425 426 if (__predict_false(datalen == 0)) { 427 free(buf, M_NETLINK); 428 return (true); 429 } 430 431 struct mbuf *m = NULL; 432 if (linux_netlink_p != NULL) 433 m = linux_netlink_p->msgs_to_linux(nlp->nl_proto, data, datalen, nlp); 434 free(buf, M_NETLINK); 435 436 if (__predict_false(m == NULL)) { 437 /* XXX: should we set sorcverr? */ 438 return (false); 439 } 440 441 int io_flags = (nw->ignore_limit) ? NL_IOF_IGNORE_LIMIT : 0; 442 return (nl_send_one(m, nlp, cnt, io_flags)); 443 } 444 445 /* Shouldn't be called (maybe except Linux code originating message) */ 446 static bool 447 nlmsg_write_group_lbuf(struct nl_writer *nw, void *buf, int datalen, int cnt) 448 { 449 struct linear_buffer *lb = (struct linear_buffer *)buf; 450 char *data = (char *)(lb + 1); 451 452 if (__predict_false(datalen == 0)) { 453 free(buf, M_NETLINK); 454 return (true); 455 } 456 457 struct mbuf *m = nl_get_mbuf_chain(datalen, nw->malloc_flag); 458 if (__predict_false(m == NULL)) { 459 free(buf, M_NETLINK); 460 return (false); 461 } 462 m_append(m, datalen, data); 463 free(buf, M_NETLINK); 464 465 nl_send_group(m, cnt, nw->arg.group.proto, nw->arg.group.id); 466 return (true); 467 } 468 469 static const struct nlwriter_ops nlmsg_writers[] = { 470 /* NS_WRITER_TYPE_MBUF */ 471 { 472 .init = nlmsg_get_ns_mbuf, 473 .write_socket = nlmsg_write_socket_mbuf, 474 .write_group = nlmsg_write_group_mbuf, 475 .write_chain = nlmsg_write_chain_mbuf, 476 }, 477 /* NS_WRITER_TYPE_BUF */ 478 { 479 .init = nlmsg_get_ns_buf, 480 .write_socket = nlmsg_write_socket_buf, 481 .write_group = nlmsg_write_group_buf, 482 .write_chain = nlmsg_write_chain_buf, 483 }, 484 /* NS_WRITER_TYPE_LBUF */ 485 { 486 .init = nlmsg_get_ns_lbuf, 487 .write_socket = nlmsg_write_socket_lbuf, 488 .write_group = nlmsg_write_group_lbuf, 489 }, 490 }; 491 492 static void 493 nlmsg_set_callback(struct nl_writer *nw) 494 { 495 const struct nlwriter_ops *pops = &nlmsg_writers[nw->writer_type]; 496 497 switch (nw->writer_target) { 498 case NS_WRITER_TARGET_SOCKET: 499 nw->cb = pops->write_socket; 500 break; 501 case NS_WRITER_TARGET_GROUP: 502 nw->cb = pops->write_group; 503 break; 504 case NS_WRITER_TARGET_CHAIN: 505 nw->cb = pops->write_chain; 506 break; 507 default: 508 panic("not implemented"); 509 } 510 } 511 512 static bool 513 nlmsg_get_buf_type(struct nl_writer *nw, int size, int type, bool waitok) 514 { 515 MPASS(type + 1 <= sizeof(nlmsg_writers) / sizeof(nlmsg_writers[0])); 516 NL_LOG(LOG_DEBUG3, "Setting up nw %p size %d type %d", nw, size, type); 517 return (nlmsg_writers[type].init(nw, size, waitok)); 518 } 519 520 static bool 521 nlmsg_get_buf(struct nl_writer *nw, int size, bool waitok, bool is_linux) 522 { 523 int type; 524 525 if (!is_linux) { 526 if (__predict_true(size <= NLMBUFSIZE)) 527 type = NS_WRITER_TYPE_MBUF; 528 else 529 type = NS_WRITER_TYPE_BUF; 530 } else 531 type = NS_WRITER_TYPE_LBUF; 532 return (nlmsg_get_buf_type(nw, size, type, waitok)); 533 } 534 535 bool 536 _nlmsg_get_unicast_writer(struct nl_writer *nw, int size, struct nlpcb *nlp) 537 { 538 if (!nlmsg_get_buf(nw, size, false, nlp->nl_linux)) 539 return (false); 540 nw->arg.ptr = (void *)nlp; 541 nw->writer_target = NS_WRITER_TARGET_SOCKET; 542 nlmsg_set_callback(nw); 543 return (true); 544 } 545 546 bool 547 _nlmsg_get_group_writer(struct nl_writer *nw, int size, int protocol, int group_id) 548 { 549 if (!nlmsg_get_buf(nw, size, false, false)) 550 return (false); 551 nw->arg.group.proto = protocol; 552 nw->arg.group.id = group_id; 553 nw->writer_target = NS_WRITER_TARGET_GROUP; 554 nlmsg_set_callback(nw); 555 return (true); 556 } 557 558 bool 559 _nlmsg_get_chain_writer(struct nl_writer *nw, int size, struct mbuf **pm) 560 { 561 if (!nlmsg_get_buf(nw, size, false, false)) 562 return (false); 563 *pm = NULL; 564 nw->arg.ptr = (void *)pm; 565 nw->writer_target = NS_WRITER_TARGET_CHAIN; 566 nlmsg_set_callback(nw); 567 NL_LOG(LOG_DEBUG3, "setup cb %p (need %p)", nw->cb, &nlmsg_write_chain_mbuf); 568 return (true); 569 } 570 571 void 572 _nlmsg_ignore_limit(struct nl_writer *nw) 573 { 574 nw->ignore_limit = true; 575 } 576 577 bool 578 _nlmsg_flush(struct nl_writer *nw) 579 { 580 581 if (__predict_false(nw->hdr != NULL)) { 582 /* Last message has not been completed, skip it. */ 583 int completed_len = (char *)nw->hdr - nw->data; 584 /* Send completed messages */ 585 nw->offset -= nw->offset - completed_len; 586 nw->hdr = NULL; 587 } 588 589 NL_LOG(LOG_DEBUG2, "OUT"); 590 bool result = nw->cb(nw, nw->_storage, nw->offset, nw->num_messages); 591 nw->_storage = NULL; 592 593 if (!result) { 594 NL_LOG(LOG_DEBUG, "nw %p offset %d: flush with %p() failed", nw, nw->offset, nw->cb); 595 } 596 597 return (result); 598 } 599 600 /* 601 * Flushes previous data and allocates new underlying storage 602 * sufficient for holding at least @required_len bytes. 603 * Return true on success. 604 */ 605 bool 606 _nlmsg_refill_buffer(struct nl_writer *nw, int required_len) 607 { 608 struct nl_writer ns_new = {}; 609 int completed_len, new_len; 610 611 if (nw->enomem) 612 return (false); 613 614 NL_LOG(LOG_DEBUG3, "no space at offset %d/%d (want %d), trying to reclaim", 615 nw->offset, nw->alloc_len, required_len); 616 617 /* Calculated new buffer size and allocate it s*/ 618 completed_len = (nw->hdr != NULL) ? (char *)nw->hdr - nw->data : nw->offset; 619 if (completed_len > 0 && required_len < NLMBUFSIZE) { 620 /* We already ran out of space, use the largest effective size */ 621 new_len = max(nw->alloc_len, NLMBUFSIZE); 622 } else { 623 if (nw->alloc_len < NLMBUFSIZE) 624 new_len = NLMBUFSIZE; 625 else 626 new_len = nw->alloc_len * 2; 627 while (new_len < required_len) 628 new_len *= 2; 629 } 630 bool waitok = (nw->malloc_flag == M_WAITOK); 631 bool is_linux = (nw->writer_type == NS_WRITER_TYPE_LBUF); 632 if (!nlmsg_get_buf(&ns_new, new_len, waitok, is_linux)) { 633 nw->enomem = true; 634 NL_LOG(LOG_DEBUG, "getting new buf failed, setting ENOMEM"); 635 return (false); 636 } 637 if (nw->ignore_limit) 638 nlmsg_ignore_limit(&ns_new); 639 640 /* Update callback data */ 641 ns_new.writer_target = nw->writer_target; 642 nlmsg_set_callback(&ns_new); 643 ns_new.arg = nw->arg; 644 645 /* Copy last (unfinished) header to the new storage */ 646 int last_len = nw->offset - completed_len; 647 if (last_len > 0) { 648 memcpy(ns_new.data, nw->hdr, last_len); 649 ns_new.hdr = (struct nlmsghdr *)ns_new.data; 650 ns_new.offset = last_len; 651 } 652 653 NL_LOG(LOG_DEBUG2, "completed: %d bytes, copied: %d bytes", completed_len, last_len); 654 655 /* Flush completed headers & switch to the new nw */ 656 nlmsg_flush(nw); 657 memcpy(nw, &ns_new, sizeof(struct nl_writer)); 658 NL_LOG(LOG_DEBUG2, "switched buffer: used %d/%d bytes", nw->offset, nw->alloc_len); 659 660 return (true); 661 } 662 663 bool 664 _nlmsg_add(struct nl_writer *nw, uint32_t portid, uint32_t seq, uint16_t type, 665 uint16_t flags, uint32_t len) 666 { 667 struct nlmsghdr *hdr; 668 669 MPASS(nw->hdr == NULL); 670 671 int required_len = NETLINK_ALIGN(len + sizeof(struct nlmsghdr)); 672 if (__predict_false(nw->offset + required_len > nw->alloc_len)) { 673 if (!nlmsg_refill_buffer(nw, required_len)) 674 return (false); 675 } 676 677 hdr = (struct nlmsghdr *)(&nw->data[nw->offset]); 678 679 hdr->nlmsg_len = len; 680 hdr->nlmsg_type = type; 681 hdr->nlmsg_flags = flags; 682 hdr->nlmsg_seq = seq; 683 hdr->nlmsg_pid = portid; 684 685 nw->hdr = hdr; 686 nw->offset += sizeof(struct nlmsghdr); 687 688 return (true); 689 } 690 691 bool 692 _nlmsg_end(struct nl_writer *nw) 693 { 694 MPASS(nw->hdr != NULL); 695 696 if (nw->enomem) { 697 NL_LOG(LOG_DEBUG, "ENOMEM when dumping message"); 698 nlmsg_abort(nw); 699 return (false); 700 } 701 702 nw->hdr->nlmsg_len = (uint32_t)(nw->data + nw->offset - (char *)nw->hdr); 703 NL_LOG(LOG_DEBUG2, "wrote msg len: %u type: %d: flags: 0x%X seq: %u pid: %u", 704 nw->hdr->nlmsg_len, nw->hdr->nlmsg_type, nw->hdr->nlmsg_flags, 705 nw->hdr->nlmsg_seq, nw->hdr->nlmsg_pid); 706 nw->hdr = NULL; 707 nw->num_messages++; 708 return (true); 709 } 710 711 void 712 _nlmsg_abort(struct nl_writer *nw) 713 { 714 if (nw->hdr != NULL) { 715 nw->offset = (uint32_t)((char *)nw->hdr - nw->data); 716 nw->hdr = NULL; 717 } 718 } 719 720 void 721 nlmsg_ack(struct nlpcb *nlp, int error, struct nlmsghdr *hdr, 722 struct nl_pstate *npt) 723 { 724 struct nlmsgerr *errmsg; 725 int payload_len; 726 uint32_t flags = nlp->nl_flags; 727 struct nl_writer *nw = npt->nw; 728 bool cap_ack; 729 730 payload_len = sizeof(struct nlmsgerr); 731 732 /* 733 * The only case when we send the full message in the 734 * reply is when there is an error and NETLINK_CAP_ACK 735 * is not set. 736 */ 737 cap_ack = (error == 0) || (flags & NLF_CAP_ACK); 738 if (!cap_ack) 739 payload_len += hdr->nlmsg_len - sizeof(struct nlmsghdr); 740 payload_len = NETLINK_ALIGN(payload_len); 741 742 uint16_t nl_flags = cap_ack ? NLM_F_CAPPED : 0; 743 if ((npt->err_msg || npt->err_off) && nlp->nl_flags & NLF_EXT_ACK) 744 nl_flags |= NLM_F_ACK_TLVS; 745 746 NL_LOG(LOG_DEBUG3, "acknowledging message type %d seq %d", 747 hdr->nlmsg_type, hdr->nlmsg_seq); 748 749 if (!nlmsg_add(nw, nlp->nl_port, hdr->nlmsg_seq, NLMSG_ERROR, nl_flags, payload_len)) 750 goto enomem; 751 752 errmsg = nlmsg_reserve_data(nw, payload_len, struct nlmsgerr); 753 errmsg->error = error; 754 /* In case of error copy the whole message, else just the header */ 755 memcpy(&errmsg->msg, hdr, cap_ack ? sizeof(*hdr) : hdr->nlmsg_len); 756 757 if (npt->err_msg != NULL && nlp->nl_flags & NLF_EXT_ACK) 758 nlattr_add_string(nw, NLMSGERR_ATTR_MSG, npt->err_msg); 759 if (npt->err_off != 0 && nlp->nl_flags & NLF_EXT_ACK) 760 nlattr_add_u32(nw, NLMSGERR_ATTR_OFFS, npt->err_off); 761 if (npt->cookie != NULL) 762 nlattr_add_raw(nw, npt->cookie); 763 764 if (nlmsg_end(nw)) 765 return; 766 enomem: 767 NLP_LOG(LOG_DEBUG, nlp, "error allocating ack data for message %d seq %u", 768 hdr->nlmsg_type, hdr->nlmsg_seq); 769 nlmsg_abort(nw); 770 } 771 772 bool 773 _nlmsg_end_dump(struct nl_writer *nw, int error, struct nlmsghdr *hdr) 774 { 775 if (!nlmsg_add(nw, hdr->nlmsg_pid, hdr->nlmsg_seq, NLMSG_DONE, 0, sizeof(int))) { 776 NL_LOG(LOG_DEBUG, "Error finalizing table dump"); 777 return (false); 778 } 779 /* Save operation result */ 780 int *perror = nlmsg_reserve_object(nw, int); 781 NL_LOG(LOG_DEBUG2, "record error=%d at off %d (%p)", error, 782 nw->offset, perror); 783 *perror = error; 784 nlmsg_end(nw); 785 nw->suppress_ack = true; 786 787 return (true); 788 } 789 790 #include <netlink/ktest_netlink_message_writer.h> 791