xref: /freebsd/sys/netlink/netlink_message_writer.c (revision 63f537551380d2dab29fa402ad1269feae17e594)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2022 Alexander V. Chernikov <melifaro@FreeBSD.org>
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 #include <sys/cdefs.h>
29 #include <sys/param.h>
30 #include <sys/malloc.h>
31 #include <sys/lock.h>
32 #include <sys/rmlock.h>
33 #include <sys/mbuf.h>
34 #include <sys/ck.h>
35 #include <sys/socket.h>
36 #include <sys/socketvar.h>
37 #include <sys/syslog.h>
38 
39 #include <netlink/netlink.h>
40 #include <netlink/netlink_ctl.h>
41 #include <netlink/netlink_linux.h>
42 #include <netlink/netlink_var.h>
43 
44 #define	DEBUG_MOD_NAME	nl_writer
45 #define	DEBUG_MAX_LEVEL	LOG_DEBUG3
46 #include <netlink/netlink_debug.h>
47 _DECLARE_DEBUG(LOG_INFO);
48 
49 /*
50  * The goal of this file is to provide convenient message writing KPI on top of
51  * different storage methods (mbufs, uio, temporary memory chunks).
52  *
53  * The main KPI guarantee is that the (last) message always resides in the contiguous
54  *  memory buffer, so one is able to update the header after writing the entire message.
55  *
56  * This guarantee comes with a side effect of potentially reallocating underlying
57  *  buffer, so one needs to update the desired pointers after something is added
58  *  to the header.
59  *
60  * Messaging layer contains hooks performing transparent Linux translation for the messages.
61  *
62  * There are 3 types of supported targets:
63  *  * socket (adds mbufs to the socket buffer, used for message replies)
64  *  * group (sends mbuf/chain to the specified groups, used for the notifications)
65  *  * chain (returns mbuf chain, used in Linux message translation code)
66  *
67  * There are 3 types of storage:
68  * * NS_WRITER_TYPE_MBUF (mbuf-based, most efficient, used when a single message
69  *    fits in NLMBUFSIZE)
70  * * NS_WRITER_TYPE_BUF (fallback, malloc-based, used when a single message needs
71  *    to be larger than one supported by NS_WRITER_TYPE_MBUF)
72  * * NS_WRITER_TYPE_LBUF (malloc-based, similar to NS_WRITER_TYPE_BUF, used for
73  *    Linux sockets, calls translation hook prior to sending messages to the socket).
74  *
75  * Internally, KPI switches between different types of storage when memory requirements
76  *  change. It happens transparently to the caller.
77  */
78 
79 /*
80  * Uma zone for the mbuf-based Netlink storage
81  */
82 static uma_zone_t	nlmsg_zone;
83 
84 static void
85 nl_free_mbuf_storage(struct mbuf *m)
86 {
87 	uma_zfree(nlmsg_zone, m->m_ext.ext_buf);
88 }
89 
90 static int
91 nl_setup_mbuf_storage(void *mem, int size, void *arg, int how __unused)
92 {
93 	struct mbuf *m = (struct mbuf *)arg;
94 
95 	if (m != NULL)
96 		m_extadd(m, mem, size, nl_free_mbuf_storage, NULL, NULL, 0, EXT_MOD_TYPE);
97 
98 	return (0);
99 }
100 
101 static struct mbuf *
102 nl_get_mbuf_flags(int size, int malloc_flags, int mbuf_flags)
103 {
104 	struct mbuf *m, *m_storage;
105 
106 	if (size <= MHLEN)
107 		return (m_get2(size, malloc_flags, MT_DATA, mbuf_flags));
108 
109 	if (__predict_false(size > NLMBUFSIZE))
110 		return (NULL);
111 
112 	m = m_gethdr(malloc_flags, MT_DATA);
113 	if (m == NULL)
114 		return (NULL);
115 
116 	m_storage = uma_zalloc_arg(nlmsg_zone, m, malloc_flags);
117 	if (m_storage == NULL) {
118 		m_free_raw(m);
119 		return (NULL);
120 	}
121 
122 	return (m);
123 }
124 
125 static struct mbuf *
126 nl_get_mbuf(int size, int malloc_flags)
127 {
128 	return (nl_get_mbuf_flags(size, malloc_flags, M_PKTHDR));
129 }
130 
131 /*
132  * Gets a chain of Netlink mbufs.
133  * This is strip-down version of m_getm2()
134  */
135 static struct mbuf *
136 nl_get_mbuf_chain(int len, int malloc_flags)
137 {
138 	struct mbuf *m_chain = NULL, *m_tail = NULL;
139 	int mbuf_flags = M_PKTHDR;
140 
141 	while (len > 0) {
142 		int sz = len > NLMBUFSIZE ? NLMBUFSIZE: len;
143 		struct mbuf *m = nl_get_mbuf_flags(sz, malloc_flags, mbuf_flags);
144 
145 		if (m == NULL) {
146 			m_freem(m_chain);
147 			return (NULL);
148 		}
149 
150 		/* Book keeping. */
151 		len -= M_SIZE(m);
152 		if (m_tail != NULL)
153 			m_tail->m_next = m;
154 		else
155 			m_chain = m;
156 		m_tail = m;
157 		mbuf_flags &= ~M_PKTHDR;	/* Only valid on the first mbuf. */
158 	}
159 
160 	return (m_chain);
161 }
162 
163 void
164 nl_init_msg_zone(void)
165 {
166 	nlmsg_zone = uma_zcreate("netlink", NLMBUFSIZE, nl_setup_mbuf_storage,
167 	    NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
168 }
169 
170 void
171 nl_destroy_msg_zone(void)
172 {
173 	uma_zdestroy(nlmsg_zone);
174 }
175 
176 
177 typedef bool nlwriter_op_init(struct nl_writer *nw, int size, bool waitok);
178 typedef bool nlwriter_op_write(struct nl_writer *nw, void *buf, int buflen, int cnt);
179 
180 struct nlwriter_ops {
181 	nlwriter_op_init	*init;
182 	nlwriter_op_write	*write_socket;
183 	nlwriter_op_write	*write_group;
184 	nlwriter_op_write	*write_chain;
185 };
186 
187 /*
188  * NS_WRITER_TYPE_BUF
189  * Writes message to a temporary memory buffer,
190  * flushing to the socket/group when buffer size limit is reached
191  */
192 static bool
193 nlmsg_get_ns_buf(struct nl_writer *nw, int size, bool waitok)
194 {
195 	int mflag = waitok ? M_WAITOK : M_NOWAIT;
196 	nw->_storage = malloc(size, M_NETLINK, mflag | M_ZERO);
197 	if (__predict_false(nw->_storage == NULL))
198 		return (false);
199 	nw->alloc_len = size;
200 	nw->offset = 0;
201 	nw->hdr = NULL;
202 	nw->data = nw->_storage;
203 	nw->writer_type = NS_WRITER_TYPE_BUF;
204 	nw->malloc_flag = mflag;
205 	nw->num_messages = 0;
206 	nw->enomem = false;
207 	return (true);
208 }
209 
210 static bool
211 nlmsg_write_socket_buf(struct nl_writer *nw, void *buf, int datalen, int cnt)
212 {
213 	NL_LOG(LOG_DEBUG2, "IN: ptr: %p len: %d arg: %p", buf, datalen, nw->arg.ptr);
214 	if (__predict_false(datalen == 0)) {
215 		free(buf, M_NETLINK);
216 		return (true);
217 	}
218 
219 	struct mbuf *m = nl_get_mbuf_chain(datalen, nw->malloc_flag);
220 	if (__predict_false(m == NULL)) {
221 		/* XXX: should we set sorcverr? */
222 		free(buf, M_NETLINK);
223 		return (false);
224 	}
225 	m_append(m, datalen, buf);
226 	free(buf, M_NETLINK);
227 
228 	int io_flags = (nw->ignore_limit) ? NL_IOF_IGNORE_LIMIT : 0;
229 	return (nl_send_one(m, (struct nlpcb *)(nw->arg.ptr), cnt, io_flags));
230 }
231 
232 static bool
233 nlmsg_write_group_buf(struct nl_writer *nw, void *buf, int datalen, int cnt)
234 {
235 	NL_LOG(LOG_DEBUG2, "IN: ptr: %p len: %d proto: %d id: %d", buf, datalen,
236 	    nw->arg.group.proto, nw->arg.group.id);
237 	if (__predict_false(datalen == 0)) {
238 		free(buf, M_NETLINK);
239 		return (true);
240 	}
241 
242 	struct mbuf *m = nl_get_mbuf_chain(datalen, nw->malloc_flag);
243 	if (__predict_false(m == NULL)) {
244 		free(buf, M_NETLINK);
245 		return (false);
246 	}
247 	bool success = m_append(m, datalen, buf) != 0;
248 	free(buf, M_NETLINK);
249 
250 	if (!success)
251 		return (false);
252 
253 	nl_send_group(m, cnt, nw->arg.group.proto, nw->arg.group.id);
254 	return (true);
255 }
256 
257 static bool
258 nlmsg_write_chain_buf(struct nl_writer *nw, void *buf, int datalen, int cnt)
259 {
260 	struct mbuf **m0 = (struct mbuf **)(nw->arg.ptr);
261 	NL_LOG(LOG_DEBUG2, "IN: ptr: %p len: %d arg: %p", buf, datalen, nw->arg.ptr);
262 
263 	if (__predict_false(datalen == 0)) {
264 		free(buf, M_NETLINK);
265 		return (true);
266 	}
267 
268 	if (*m0 == NULL) {
269 		struct mbuf *m = nl_get_mbuf_chain(datalen, nw->malloc_flag);
270 
271 		if (__predict_false(m == NULL)) {
272 			free(buf, M_NETLINK);
273 			return (false);
274 		}
275 		*m0 = m;
276 	}
277 	if (__predict_false(m_append(*m0, datalen, buf) == 0)) {
278 		free(buf, M_NETLINK);
279 		return (false);
280 	}
281 	return (true);
282 }
283 
284 
285 /*
286  * NS_WRITER_TYPE_MBUF
287  * Writes message to the allocated mbuf,
288  * flushing to socket/group when mbuf size limit is reached.
289  * This is the most efficient mechanism as it avoids double-copying.
290  *
291  * Allocates a single mbuf suitable to store up to @size bytes of data.
292  * If size < MHLEN (around 160 bytes), allocates mbuf with pkghdr.
293  * If the size <= NLMBUFSIZE (2k), allocate mbuf+storage out of nlmsg_zone.
294  * Returns NULL on greater size or the allocation failure.
295  */
296 static bool
297 nlmsg_get_ns_mbuf(struct nl_writer *nw, int size, bool waitok)
298 {
299 	int mflag = waitok ? M_WAITOK : M_NOWAIT;
300 	struct mbuf *m = nl_get_mbuf(size, mflag);
301 
302 	if (__predict_false(m == NULL))
303 		return (false);
304 	nw->alloc_len = M_TRAILINGSPACE(m);
305 	nw->offset = 0;
306 	nw->hdr = NULL;
307 	nw->_storage = (void *)m;
308 	nw->data = mtod(m, void *);
309 	nw->writer_type = NS_WRITER_TYPE_MBUF;
310 	nw->malloc_flag = mflag;
311 	nw->num_messages = 0;
312 	nw->enomem = false;
313 	memset(nw->data, 0, size);
314 	NL_LOG(LOG_DEBUG2, "alloc mbuf %p req_len %d alloc_len %d data_ptr %p",
315 	    m, size, nw->alloc_len, nw->data);
316 	return (true);
317 }
318 
319 static bool
320 nlmsg_write_socket_mbuf(struct nl_writer *nw, void *buf, int datalen, int cnt)
321 {
322 	struct mbuf *m = (struct mbuf *)buf;
323 	NL_LOG(LOG_DEBUG2, "IN: ptr: %p len: %d arg: %p", buf, datalen, nw->arg.ptr);
324 
325 	if (__predict_false(datalen == 0)) {
326 		m_freem(m);
327 		return (true);
328 	}
329 
330 	m->m_pkthdr.len = datalen;
331 	m->m_len = datalen;
332 	int io_flags = (nw->ignore_limit) ? NL_IOF_IGNORE_LIMIT : 0;
333 	return (nl_send_one(m, (struct nlpcb *)(nw->arg.ptr), cnt, io_flags));
334 }
335 
336 static bool
337 nlmsg_write_group_mbuf(struct nl_writer *nw, void *buf, int datalen, int cnt)
338 {
339 	struct mbuf *m = (struct mbuf *)buf;
340 	NL_LOG(LOG_DEBUG2, "IN: ptr: %p len: %d proto: %d id: %d", buf, datalen,
341 	    nw->arg.group.proto, nw->arg.group.id);
342 
343 	if (__predict_false(datalen == 0)) {
344 		m_freem(m);
345 		return (true);
346 	}
347 
348 	m->m_pkthdr.len = datalen;
349 	m->m_len = datalen;
350 	nl_send_group(m, cnt, nw->arg.group.proto, nw->arg.group.id);
351 	return (true);
352 }
353 
354 static bool
355 nlmsg_write_chain_mbuf(struct nl_writer *nw, void *buf, int datalen, int cnt)
356 {
357 	struct mbuf *m_new = (struct mbuf *)buf;
358 	struct mbuf **m0 = (struct mbuf **)(nw->arg.ptr);
359 
360 	NL_LOG(LOG_DEBUG2, "IN: ptr: %p len: %d arg: %p", buf, datalen, nw->arg.ptr);
361 
362 	if (__predict_false(datalen == 0)) {
363 		m_freem(m_new);
364 		return (true);
365 	}
366 
367 	m_new->m_pkthdr.len = datalen;
368 	m_new->m_len = datalen;
369 
370 	if (*m0 == NULL) {
371 		*m0 = m_new;
372 	} else {
373 		struct mbuf *m_last;
374 		for (m_last = *m0; m_last->m_next != NULL; m_last = m_last->m_next)
375 			;
376 		m_last->m_next = m_new;
377 		(*m0)->m_pkthdr.len += datalen;
378 	}
379 
380 	return (true);
381 }
382 
383 /*
384  * NS_WRITER_TYPE_LBUF
385  * Writes message to the allocated memory buffer,
386  * flushing to socket/group when mbuf size limit is reached.
387  * Calls linux handler to rewrite messages before sending to the socket.
388  */
389 static bool
390 nlmsg_get_ns_lbuf(struct nl_writer *nw, int size, bool waitok)
391 {
392 	int mflag = waitok ? M_WAITOK : M_NOWAIT;
393 	size = roundup2(size, sizeof(void *));
394 	int add_size = sizeof(struct linear_buffer) + SCRATCH_BUFFER_SIZE;
395 	char *buf = malloc(add_size + size * 2, M_NETLINK, mflag | M_ZERO);
396 	if (__predict_false(buf == NULL))
397 		return (false);
398 
399 	/* Fill buffer header first */
400 	struct linear_buffer *lb = (struct linear_buffer *)buf;
401 	lb->base = &buf[sizeof(struct linear_buffer) + size];
402 	lb->size = size + SCRATCH_BUFFER_SIZE;
403 
404 	nw->alloc_len = size;
405 	nw->offset = 0;
406 	nw->hdr = NULL;
407 	nw->_storage = buf;
408 	nw->data = (char *)(lb + 1);
409 	nw->malloc_flag = mflag;
410 	nw->writer_type = NS_WRITER_TYPE_LBUF;
411 	nw->num_messages = 0;
412 	nw->enomem = false;
413 	return (true);
414 }
415 
416 static bool
417 nlmsg_write_socket_lbuf(struct nl_writer *nw, void *buf, int datalen, int cnt)
418 {
419 	struct linear_buffer *lb = (struct linear_buffer *)buf;
420 	char *data = (char *)(lb + 1);
421 	struct nlpcb *nlp = (struct nlpcb *)(nw->arg.ptr);
422 
423 	if (__predict_false(datalen == 0)) {
424 		free(buf, M_NETLINK);
425 		return (true);
426 	}
427 
428 	struct mbuf *m = NULL;
429 	if (linux_netlink_p != NULL)
430 		m = linux_netlink_p->msgs_to_linux(nlp->nl_proto, data, datalen, nlp);
431 	free(buf, M_NETLINK);
432 
433 	if (__predict_false(m == NULL)) {
434 		/* XXX: should we set sorcverr? */
435 		return (false);
436 	}
437 
438 	int io_flags = (nw->ignore_limit) ? NL_IOF_IGNORE_LIMIT : 0;
439 	return (nl_send_one(m, nlp, cnt, io_flags));
440 }
441 
442 /* Shouldn't be called (maybe except Linux code originating message) */
443 static bool
444 nlmsg_write_group_lbuf(struct nl_writer *nw, void *buf, int datalen, int cnt)
445 {
446 	struct linear_buffer *lb = (struct linear_buffer *)buf;
447 	char *data = (char *)(lb + 1);
448 
449 	if (__predict_false(datalen == 0)) {
450 		free(buf, M_NETLINK);
451 		return (true);
452 	}
453 
454 	struct mbuf *m = nl_get_mbuf_chain(datalen, nw->malloc_flag);
455 	if (__predict_false(m == NULL)) {
456 		free(buf, M_NETLINK);
457 		return (false);
458 	}
459 	m_append(m, datalen, data);
460 	free(buf, M_NETLINK);
461 
462 	nl_send_group(m, cnt, nw->arg.group.proto, nw->arg.group.id);
463 	return (true);
464 }
465 
466 static const struct nlwriter_ops nlmsg_writers[] = {
467 	/* NS_WRITER_TYPE_MBUF */
468 	{
469 		.init = nlmsg_get_ns_mbuf,
470 		.write_socket = nlmsg_write_socket_mbuf,
471 		.write_group = nlmsg_write_group_mbuf,
472 		.write_chain = nlmsg_write_chain_mbuf,
473 	},
474 	/* NS_WRITER_TYPE_BUF */
475 	{
476 		.init = nlmsg_get_ns_buf,
477 		.write_socket = nlmsg_write_socket_buf,
478 		.write_group = nlmsg_write_group_buf,
479 		.write_chain = nlmsg_write_chain_buf,
480 	},
481 	/* NS_WRITER_TYPE_LBUF */
482 	{
483 		.init = nlmsg_get_ns_lbuf,
484 		.write_socket = nlmsg_write_socket_lbuf,
485 		.write_group = nlmsg_write_group_lbuf,
486 	},
487 };
488 
489 static void
490 nlmsg_set_callback(struct nl_writer *nw)
491 {
492 	const struct nlwriter_ops *pops = &nlmsg_writers[nw->writer_type];
493 
494 	switch (nw->writer_target) {
495 	case NS_WRITER_TARGET_SOCKET:
496 		nw->cb = pops->write_socket;
497 		break;
498 	case NS_WRITER_TARGET_GROUP:
499 		nw->cb = pops->write_group;
500 		break;
501 	case NS_WRITER_TARGET_CHAIN:
502 		nw->cb = pops->write_chain;
503 		break;
504 	default:
505 		panic("not implemented");
506 	}
507 }
508 
509 static bool
510 nlmsg_get_buf_type(struct nl_writer *nw, int size, int type, bool waitok)
511 {
512 	MPASS(type + 1 <= sizeof(nlmsg_writers) / sizeof(nlmsg_writers[0]));
513 	NL_LOG(LOG_DEBUG3, "Setting up nw %p size %d type %d", nw, size, type);
514 	return (nlmsg_writers[type].init(nw, size, waitok));
515 }
516 
517 static bool
518 nlmsg_get_buf(struct nl_writer *nw, int size, bool waitok, bool is_linux)
519 {
520 	int type;
521 
522 	if (!is_linux) {
523 		if (__predict_true(size <= NLMBUFSIZE))
524 			type = NS_WRITER_TYPE_MBUF;
525 		else
526 			type = NS_WRITER_TYPE_BUF;
527 	} else
528 		type = NS_WRITER_TYPE_LBUF;
529 	return (nlmsg_get_buf_type(nw, size, type, waitok));
530 }
531 
532 bool
533 _nlmsg_get_unicast_writer(struct nl_writer *nw, int size, struct nlpcb *nlp)
534 {
535 	if (!nlmsg_get_buf(nw, size, false, nlp->nl_linux))
536 		return (false);
537 	nw->arg.ptr = (void *)nlp;
538 	nw->writer_target = NS_WRITER_TARGET_SOCKET;
539 	nlmsg_set_callback(nw);
540 	return (true);
541 }
542 
543 bool
544 _nlmsg_get_group_writer(struct nl_writer *nw, int size, int protocol, int group_id)
545 {
546 	if (!nlmsg_get_buf(nw, size, false, false))
547 		return (false);
548 	nw->arg.group.proto = protocol;
549 	nw->arg.group.id = group_id;
550 	nw->writer_target = NS_WRITER_TARGET_GROUP;
551 	nlmsg_set_callback(nw);
552 	return (true);
553 }
554 
555 bool
556 _nlmsg_get_chain_writer(struct nl_writer *nw, int size, struct mbuf **pm)
557 {
558 	if (!nlmsg_get_buf(nw, size, false, false))
559 		return (false);
560 	*pm = NULL;
561 	nw->arg.ptr = (void *)pm;
562 	nw->writer_target = NS_WRITER_TARGET_CHAIN;
563 	nlmsg_set_callback(nw);
564 	NL_LOG(LOG_DEBUG3, "setup cb %p (need %p)", nw->cb, &nlmsg_write_chain_mbuf);
565 	return (true);
566 }
567 
568 void
569 _nlmsg_ignore_limit(struct nl_writer *nw)
570 {
571 	nw->ignore_limit = true;
572 }
573 
574 bool
575 _nlmsg_flush(struct nl_writer *nw)
576 {
577 
578 	if (__predict_false(nw->hdr != NULL)) {
579 		/* Last message has not been completed, skip it. */
580 		int completed_len = (char *)nw->hdr - nw->data;
581 		/* Send completed messages */
582 		nw->offset -= nw->offset - completed_len;
583 		nw->hdr = NULL;
584 	}
585 
586 	NL_LOG(LOG_DEBUG2, "OUT");
587 	bool result = nw->cb(nw, nw->_storage, nw->offset, nw->num_messages);
588 	nw->_storage = NULL;
589 
590 	if (!result) {
591 		NL_LOG(LOG_DEBUG, "nw %p offset %d: flush with %p() failed", nw, nw->offset, nw->cb);
592 	}
593 
594 	return (result);
595 }
596 
597 /*
598  * Flushes previous data and allocates new underlying storage
599  *  sufficient for holding at least @required_len bytes.
600  * Return true on success.
601  */
602 bool
603 _nlmsg_refill_buffer(struct nl_writer *nw, int required_len)
604 {
605 	struct nl_writer ns_new = {};
606 	int completed_len, new_len;
607 
608 	if (nw->enomem)
609 		return (false);
610 
611 	NL_LOG(LOG_DEBUG3, "no space at offset %d/%d (want %d), trying to reclaim",
612 	    nw->offset, nw->alloc_len, required_len);
613 
614 	/* Calculated new buffer size and allocate it s*/
615 	completed_len = (nw->hdr != NULL) ? (char *)nw->hdr - nw->data : nw->offset;
616 	if (completed_len > 0 && required_len < NLMBUFSIZE) {
617 		/* We already ran out of space, use the largest effective size */
618 		new_len = max(nw->alloc_len, NLMBUFSIZE);
619 	} else {
620 		if (nw->alloc_len < NLMBUFSIZE)
621 			new_len = NLMBUFSIZE;
622 		else
623 			new_len = nw->alloc_len * 2;
624 		while (new_len < required_len)
625 			new_len *= 2;
626 	}
627 	bool waitok = (nw->malloc_flag == M_WAITOK);
628 	bool is_linux = (nw->writer_type == NS_WRITER_TYPE_LBUF);
629 	if (!nlmsg_get_buf(&ns_new, new_len, waitok, is_linux)) {
630 		nw->enomem = true;
631 		NL_LOG(LOG_DEBUG, "getting new buf failed, setting ENOMEM");
632 		return (false);
633 	}
634 	if (nw->ignore_limit)
635 		nlmsg_ignore_limit(&ns_new);
636 
637 	/* Update callback data */
638 	ns_new.writer_target = nw->writer_target;
639 	nlmsg_set_callback(&ns_new);
640 	ns_new.arg = nw->arg;
641 
642 	/* Copy last (unfinished) header to the new storage */
643 	int last_len = nw->offset - completed_len;
644 	if (last_len > 0) {
645 		memcpy(ns_new.data, nw->hdr, last_len);
646 		ns_new.hdr = (struct nlmsghdr *)ns_new.data;
647 		ns_new.offset = last_len;
648 	}
649 
650 	NL_LOG(LOG_DEBUG2, "completed: %d bytes, copied: %d bytes", completed_len, last_len);
651 
652 	/* Flush completed headers & switch to the new nw */
653 	nlmsg_flush(nw);
654 	memcpy(nw, &ns_new, sizeof(struct nl_writer));
655 	NL_LOG(LOG_DEBUG2, "switched buffer: used %d/%d bytes", nw->offset, nw->alloc_len);
656 
657 	return (true);
658 }
659 
660 bool
661 _nlmsg_add(struct nl_writer *nw, uint32_t portid, uint32_t seq, uint16_t type,
662     uint16_t flags, uint32_t len)
663 {
664 	struct nlmsghdr *hdr;
665 
666 	MPASS(nw->hdr == NULL);
667 
668 	int required_len = NETLINK_ALIGN(len + sizeof(struct nlmsghdr));
669 	if (__predict_false(nw->offset + required_len > nw->alloc_len)) {
670 		if (!nlmsg_refill_buffer(nw, required_len))
671 			return (false);
672 	}
673 
674 	hdr = (struct nlmsghdr *)(&nw->data[nw->offset]);
675 
676 	hdr->nlmsg_len = len;
677 	hdr->nlmsg_type = type;
678 	hdr->nlmsg_flags = flags;
679 	hdr->nlmsg_seq = seq;
680 	hdr->nlmsg_pid = portid;
681 
682 	nw->hdr = hdr;
683 	nw->offset += sizeof(struct nlmsghdr);
684 
685 	return (true);
686 }
687 
688 bool
689 _nlmsg_end(struct nl_writer *nw)
690 {
691 	MPASS(nw->hdr != NULL);
692 
693 	if (nw->enomem) {
694 		NL_LOG(LOG_DEBUG, "ENOMEM when dumping message");
695 		nlmsg_abort(nw);
696 		return (false);
697 	}
698 
699 	nw->hdr->nlmsg_len = (uint32_t)(nw->data + nw->offset - (char *)nw->hdr);
700 	NL_LOG(LOG_DEBUG2, "wrote msg len: %u type: %d: flags: 0x%X seq: %u pid: %u",
701 	    nw->hdr->nlmsg_len, nw->hdr->nlmsg_type, nw->hdr->nlmsg_flags,
702 	    nw->hdr->nlmsg_seq, nw->hdr->nlmsg_pid);
703 	nw->hdr = NULL;
704 	nw->num_messages++;
705 	return (true);
706 }
707 
708 void
709 _nlmsg_abort(struct nl_writer *nw)
710 {
711 	if (nw->hdr != NULL) {
712 		nw->offset = (uint32_t)((char *)nw->hdr - nw->data);
713 		nw->hdr = NULL;
714 	}
715 }
716 
717 void
718 nlmsg_ack(struct nlpcb *nlp, int error, struct nlmsghdr *hdr,
719     struct nl_pstate *npt)
720 {
721 	struct nlmsgerr *errmsg;
722 	int payload_len;
723 	uint32_t flags = nlp->nl_flags;
724 	struct nl_writer *nw = npt->nw;
725 	bool cap_ack;
726 
727 	payload_len = sizeof(struct nlmsgerr);
728 
729 	/*
730 	 * The only case when we send the full message in the
731 	 * reply is when there is an error and NETLINK_CAP_ACK
732 	 * is not set.
733 	 */
734 	cap_ack = (error == 0) || (flags & NLF_CAP_ACK);
735 	if (!cap_ack)
736 		payload_len += hdr->nlmsg_len - sizeof(struct nlmsghdr);
737 	payload_len = NETLINK_ALIGN(payload_len);
738 
739 	uint16_t nl_flags = cap_ack ? NLM_F_CAPPED : 0;
740 	if ((npt->err_msg || npt->err_off) && nlp->nl_flags & NLF_EXT_ACK)
741 		nl_flags |= NLM_F_ACK_TLVS;
742 
743 	NL_LOG(LOG_DEBUG3, "acknowledging message type %d seq %d",
744 	    hdr->nlmsg_type, hdr->nlmsg_seq);
745 
746 	if (!nlmsg_add(nw, nlp->nl_port, hdr->nlmsg_seq, NLMSG_ERROR, nl_flags, payload_len))
747 		goto enomem;
748 
749 	errmsg = nlmsg_reserve_data(nw, payload_len, struct nlmsgerr);
750 	errmsg->error = error;
751 	/* In case of error copy the whole message, else just the header */
752 	memcpy(&errmsg->msg, hdr, cap_ack ? sizeof(*hdr) : hdr->nlmsg_len);
753 
754 	if (npt->err_msg != NULL && nlp->nl_flags & NLF_EXT_ACK)
755 		nlattr_add_string(nw, NLMSGERR_ATTR_MSG, npt->err_msg);
756 	if (npt->err_off != 0 && nlp->nl_flags & NLF_EXT_ACK)
757 		nlattr_add_u32(nw, NLMSGERR_ATTR_OFFS, npt->err_off);
758 	if (npt->cookie != NULL)
759 		nlattr_add_raw(nw, npt->cookie);
760 
761 	if (nlmsg_end(nw))
762 		return;
763 enomem:
764 	NLP_LOG(LOG_DEBUG, nlp, "error allocating ack data for message %d seq %u",
765 	    hdr->nlmsg_type, hdr->nlmsg_seq);
766 	nlmsg_abort(nw);
767 }
768 
769 bool
770 _nlmsg_end_dump(struct nl_writer *nw, int error, struct nlmsghdr *hdr)
771 {
772 	if (!nlmsg_add(nw, hdr->nlmsg_pid, hdr->nlmsg_seq, NLMSG_DONE, 0, sizeof(int))) {
773 		NL_LOG(LOG_DEBUG, "Error finalizing table dump");
774 		return (false);
775 	}
776 	/* Save operation result */
777 	int *perror = nlmsg_reserve_object(nw, int);
778 	NL_LOG(LOG_DEBUG2, "record error=%d at off %d (%p)", error,
779 	    nw->offset, perror);
780 	*perror = error;
781 	nlmsg_end(nw);
782 	nw->suppress_ack = true;
783 
784 	return (true);
785 }
786 
787 #include <netlink/ktest_netlink_message_writer.h>
788