xref: /freebsd/sys/netlink/netlink_message_writer.c (revision 5abaf0866445a61c11665fffc148ecd13a7bb9ac)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2022 Alexander V. Chernikov <melifaro@FreeBSD.org>
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 #include <sys/cdefs.h>
29 __FBSDID("$FreeBSD$");
30 #include <sys/param.h>
31 #include <sys/malloc.h>
32 #include <sys/lock.h>
33 #include <sys/rmlock.h>
34 #include <sys/mbuf.h>
35 #include <sys/ck.h>
36 #include <sys/socket.h>
37 #include <sys/socketvar.h>
38 #include <sys/syslog.h>
39 
40 #include <netlink/netlink.h>
41 #include <netlink/netlink_ctl.h>
42 #include <netlink/netlink_linux.h>
43 #include <netlink/netlink_var.h>
44 
45 #define	DEBUG_MOD_NAME	nl_writer
46 #define	DEBUG_MAX_LEVEL	LOG_DEBUG3
47 #include <netlink/netlink_debug.h>
48 _DECLARE_DEBUG(LOG_DEBUG);
49 
50 /*
51  * The goal of this file is to provide convenient message writing KPI on top of
52  * different storage methods (mbufs, uio, temporary memory chunks).
53  *
54  * The main KPI guarantee is the the (last) message always resides in the contiguous
55  *  memory buffer, so one is able to update the header after writing the entire message.
56  *
57  * This guarantee comes with a side effect of potentially reallocating underlying
58  *  buffer, so one needs to update the desired pointers after something is added
59  *  to the header.
60  *
61  * Messaging layer contains hooks performing transparent Linux translation for the messages.
62  *
63  * There are 3 types of supported targets:
64  *  * socket (adds mbufs to the socket buffer, used for message replies)
65  *  * group (sends mbuf/chain to the specified groups, used for the notifications)
66  *  * chain (returns mbuf chain, used in Linux message translation code)
67  *
68  * There are 3 types of storage:
69  * * NS_WRITER_TYPE_MBUF (mbuf-based, most efficient, used when a single message
70  *    fits in MCLBYTES)
71  * * NS_WRITER_TYPE_BUF (fallback, malloc-based, used when a single message needs
72  *    to be larger than one supported by NS_WRITER_TYPE_MBUF)
73  * * NS_WRITER_TYPE_LBUF (malloc-based, similar to NS_WRITER_TYPE_BUF, used for
74  *    Linux sockets, calls translation hook prior to sending messages to the socket).
75  *
76  * Internally, KPI switches between different types of storage when memory requirements
77  *  change. It happens transparently to the caller.
78  */
79 
80 
81 typedef bool nlwriter_op_init(struct nl_writer *nw, int size, bool waitok);
82 typedef bool nlwriter_op_write(struct nl_writer *nw, void *buf, int buflen, int cnt);
83 
84 struct nlwriter_ops {
85 	nlwriter_op_init	*init;
86 	nlwriter_op_write	*write_socket;
87 	nlwriter_op_write	*write_group;
88 	nlwriter_op_write	*write_chain;
89 };
90 
91 /*
92  * NS_WRITER_TYPE_BUF
93  * Writes message to a temporary memory buffer,
94  * flushing to the socket/group when buffer size limit is reached
95  */
96 static bool
97 nlmsg_get_ns_buf(struct nl_writer *nw, int size, bool waitok)
98 {
99 	int mflag = waitok ? M_WAITOK : M_NOWAIT;
100 	nw->_storage = malloc(size, M_NETLINK, mflag | M_ZERO);
101 	if (__predict_false(nw->_storage == NULL))
102 		return (false);
103 	nw->alloc_len = size;
104 	nw->offset = 0;
105 	nw->hdr = NULL;
106 	nw->data = nw->_storage;
107 	nw->writer_type = NS_WRITER_TYPE_BUF;
108 	nw->malloc_flag = mflag;
109 	nw->num_messages = 0;
110 	nw->enomem = false;
111 	return (true);
112 }
113 
114 static bool
115 nlmsg_write_socket_buf(struct nl_writer *nw, void *buf, int datalen, int cnt)
116 {
117 	NL_LOG(LOG_DEBUG2, "IN: ptr: %p len: %d arg: %p", buf, datalen, nw);
118 	if (__predict_false(datalen == 0)) {
119 		free(buf, M_NETLINK);
120 		return (true);
121 	}
122 
123 	struct mbuf *m = m_getm2(NULL, datalen, nw->malloc_flag, MT_DATA, M_PKTHDR);
124 	if (__predict_false(m == NULL)) {
125 		/* XXX: should we set sorcverr? */
126 		free(buf, M_NETLINK);
127 		return (false);
128 	}
129 	m_append(m, datalen, buf);
130 	free(buf, M_NETLINK);
131 
132 	int io_flags = (nw->ignore_limit) ? NL_IOF_IGNORE_LIMIT : 0;
133 	return (nl_send_one(m, (struct nlpcb *)(nw->arg_ptr), cnt, io_flags));
134 }
135 
136 static bool
137 nlmsg_write_group_buf(struct nl_writer *nw, void *buf, int datalen, int cnt)
138 {
139 	NL_LOG(LOG_DEBUG2, "IN: ptr: %p len: %d arg: %p", buf, datalen, nw->arg_ptr);
140 	if (__predict_false(datalen == 0)) {
141 		free(buf, M_NETLINK);
142 		return (true);
143 	}
144 
145 	struct mbuf *m = m_getm2(NULL, datalen, nw->malloc_flag, MT_DATA, M_PKTHDR);
146 	if (__predict_false(m == NULL)) {
147 		free(buf, M_NETLINK);
148 		return (false);
149 	}
150 	bool success = m_append(m, datalen, buf) != 0;
151 	free(buf, M_NETLINK);
152 
153 	if (!success)
154 		return (false);
155 
156 	nl_send_group(m, cnt, nw->arg_uint >> 16, nw->arg_uint & 0xFFFF);
157 	return (true);
158 }
159 
160 static bool
161 nlmsg_write_chain_buf(struct nl_writer *nw, void *buf, int datalen, int cnt)
162 {
163 	struct mbuf **m0 = (struct mbuf **)(nw->arg_ptr);
164 	NL_LOG(LOG_DEBUG2, "IN: ptr: %p len: %d arg: %p", buf, datalen, nw->arg_ptr);
165 
166 	if (__predict_false(datalen == 0)) {
167 		free(buf, M_NETLINK);
168 		return (true);
169 	}
170 
171 	if (*m0 == NULL) {
172 		struct mbuf *m;
173 
174 		m = m_getm2(NULL, datalen, nw->malloc_flag, MT_DATA, M_PKTHDR);
175 		if (__predict_false(m == NULL)) {
176 			free(buf, M_NETLINK);
177 			return (false);
178 		}
179 		*m0 = m;
180 	}
181 	if (__predict_false(m_append(*m0, datalen, buf) == 0)) {
182 		free(buf, M_NETLINK);
183 		return (false);
184 	}
185 	return (true);
186 }
187 
188 
189 /*
190  * NS_WRITER_TYPE_MBUF
191  * Writes message to the allocated mbuf,
192  * flushing to socket/group when mbuf size limit is reached.
193  * This is the most efficient mechanism as it avoids double-copying.
194  *
195  * Allocates a single mbuf suitable to store up to @size bytes of data.
196  * If size < MHLEN (around 160 bytes), allocates mbuf with pkghdr
197  * If size <= MCLBYTES (2k), allocate a single mbuf cluster
198  * Otherwise, return NULL.
199  */
200 static bool
201 nlmsg_get_ns_mbuf(struct nl_writer *nw, int size, bool waitok)
202 {
203 	struct mbuf *m;
204 
205 	int mflag = waitok ? M_WAITOK : M_NOWAIT;
206 	m = m_get2(size, mflag, MT_DATA, M_PKTHDR);
207 	if (__predict_false(m == NULL))
208 		return (false);
209 	nw->alloc_len = M_TRAILINGSPACE(m);
210 	nw->offset = 0;
211 	nw->hdr = NULL;
212 	nw->_storage = (void *)m;
213 	nw->data = mtod(m, void *);
214 	nw->writer_type = NS_WRITER_TYPE_MBUF;
215 	nw->malloc_flag = mflag;
216 	nw->num_messages = 0;
217 	nw->enomem = false;
218 	memset(nw->data, 0, size);
219 	NL_LOG(LOG_DEBUG2, "alloc mbuf %p req_len %d alloc_len %d data_ptr %p",
220 	    m, size, nw->alloc_len, nw->data);
221 	return (true);
222 }
223 
224 static bool
225 nlmsg_write_socket_mbuf(struct nl_writer *nw, void *buf, int datalen, int cnt)
226 {
227 	struct mbuf *m = (struct mbuf *)buf;
228 	NL_LOG(LOG_DEBUG2, "IN: ptr: %p len: %d arg: %p", buf, datalen, nw->arg_ptr);
229 
230 	if (__predict_false(datalen == 0)) {
231 		m_freem(m);
232 		return (true);
233 	}
234 
235 	m->m_pkthdr.len = datalen;
236 	m->m_len = datalen;
237 	int io_flags = (nw->ignore_limit) ? NL_IOF_IGNORE_LIMIT : 0;
238 	return (nl_send_one(m, (struct nlpcb *)(nw->arg_ptr), cnt, io_flags));
239 }
240 
241 static bool
242 nlmsg_write_group_mbuf(struct nl_writer *nw, void *buf, int datalen, int cnt)
243 {
244 	struct mbuf *m = (struct mbuf *)buf;
245 	NL_LOG(LOG_DEBUG2, "IN: ptr: %p len: %d arg: %p", buf, datalen, nw->arg_ptr);
246 
247 	if (__predict_false(datalen == 0)) {
248 		m_freem(m);
249 		return (true);
250 	}
251 
252 	m->m_pkthdr.len = datalen;
253 	m->m_len = datalen;
254 	nl_send_group(m, cnt, nw->arg_uint >> 16, nw->arg_uint & 0xFFFF);
255 	return (true);
256 }
257 
258 static bool
259 nlmsg_write_chain_mbuf(struct nl_writer *nw, void *buf, int datalen, int cnt)
260 {
261 	struct mbuf *m_new = (struct mbuf *)buf;
262 	struct mbuf **m0 = (struct mbuf **)(nw->arg_ptr);
263 
264 	NL_LOG(LOG_DEBUG2, "IN: ptr: %p len: %d arg: %p", buf, datalen, nw->arg_ptr);
265 
266 	if (__predict_false(datalen == 0)) {
267 		m_freem(m_new);
268 		return (true);
269 	}
270 
271 	m_new->m_pkthdr.len = datalen;
272 	m_new->m_len = datalen;
273 
274 	if (*m0 == NULL) {
275 		*m0 = m_new;
276 	} else {
277 		struct mbuf *m_last;
278 		for (m_last = *m0; m_last->m_next != NULL; m_last = m_last->m_next)
279 			;
280 		m_last->m_next = m_new;
281 		(*m0)->m_pkthdr.len += datalen;
282 	}
283 
284 	return (true);
285 }
286 
287 /*
288  * NS_WRITER_TYPE_LBUF
289  * Writes message to the allocated memory buffer,
290  * flushing to socket/group when mbuf size limit is reached.
291  * Calls linux handler to rewrite messages before sending to the socket.
292  */
293 static bool
294 nlmsg_get_ns_lbuf(struct nl_writer *nw, int size, bool waitok)
295 {
296 	int mflag = waitok ? M_WAITOK : M_NOWAIT;
297 	size = roundup2(size, sizeof(void *));
298 	int add_size = sizeof(struct linear_buffer) + SCRATCH_BUFFER_SIZE;
299 	char *buf = malloc(add_size + size * 2, M_NETLINK, mflag | M_ZERO);
300 	if (__predict_false(buf == NULL))
301 		return (false);
302 
303 	/* Fill buffer header first */
304 	struct linear_buffer *lb = (struct linear_buffer *)buf;
305 	lb->base = &buf[sizeof(struct linear_buffer) + size];
306 	lb->size = size + SCRATCH_BUFFER_SIZE;
307 
308 	nw->alloc_len = size;
309 	nw->offset = 0;
310 	nw->hdr = NULL;
311 	nw->_storage = buf;
312 	nw->data = (char *)(lb + 1);
313 	nw->malloc_flag = mflag;
314 	nw->writer_type = NS_WRITER_TYPE_LBUF;
315 	nw->num_messages = 0;
316 	nw->enomem = false;
317 	return (true);
318 }
319 
320 static bool
321 nlmsg_write_socket_lbuf(struct nl_writer *nw, void *buf, int datalen, int cnt)
322 {
323 	struct linear_buffer *lb = (struct linear_buffer *)buf;
324 	char *data = (char *)(lb + 1);
325 	struct nlpcb *nlp = (struct nlpcb *)(nw->arg_ptr);
326 
327 	if (__predict_false(datalen == 0)) {
328 		free(buf, M_NETLINK);
329 		return (true);
330 	}
331 
332 	struct mbuf *m = NULL;
333 	if (linux_netlink_p != NULL)
334 		m = linux_netlink_p->msgs_to_linux(nlp->nl_proto, data, datalen, nlp);
335 	free(buf, M_NETLINK);
336 
337 	if (__predict_false(m == NULL)) {
338 		/* XXX: should we set sorcverr? */
339 		return (false);
340 	}
341 
342 	int io_flags = (nw->ignore_limit) ? NL_IOF_IGNORE_LIMIT : 0;
343 	return (nl_send_one(m, nlp, cnt, io_flags));
344 }
345 
346 /* Shouldn't be called (maybe except Linux code originating message) */
347 static bool
348 nlmsg_write_group_lbuf(struct nl_writer *nw, void *buf, int datalen, int cnt)
349 {
350 	struct linear_buffer *lb = (struct linear_buffer *)buf;
351 	char *data = (char *)(lb + 1);
352 
353 	if (__predict_false(datalen == 0)) {
354 		free(buf, M_NETLINK);
355 		return (true);
356 	}
357 
358 	struct mbuf *m = m_getm2(NULL, datalen, nw->malloc_flag, MT_DATA, M_PKTHDR);
359 	if (__predict_false(m == NULL)) {
360 		free(buf, M_NETLINK);
361 		return (false);
362 	}
363 	m_append(m, datalen, data);
364 	free(buf, M_NETLINK);
365 
366 	nl_send_group(m, cnt, nw->arg_uint >> 16, nw->arg_uint & 0xFFFF);
367 	return (true);
368 }
369 
370 static const struct nlwriter_ops nlmsg_writers[] = {
371 	/* NS_WRITER_TYPE_MBUF */
372 	{
373 		.init = nlmsg_get_ns_mbuf,
374 		.write_socket = nlmsg_write_socket_mbuf,
375 		.write_group = nlmsg_write_group_mbuf,
376 		.write_chain = nlmsg_write_chain_mbuf,
377 	},
378 	/* NS_WRITER_TYPE_BUF */
379 	{
380 		.init = nlmsg_get_ns_buf,
381 		.write_socket = nlmsg_write_socket_buf,
382 		.write_group = nlmsg_write_group_buf,
383 		.write_chain = nlmsg_write_chain_buf,
384 	},
385 	/* NS_WRITER_TYPE_LBUF */
386 	{
387 		.init = nlmsg_get_ns_lbuf,
388 		.write_socket = nlmsg_write_socket_lbuf,
389 		.write_group = nlmsg_write_group_lbuf,
390 	},
391 };
392 
393 static void
394 nlmsg_set_callback(struct nl_writer *nw)
395 {
396 	const struct nlwriter_ops *pops = &nlmsg_writers[nw->writer_type];
397 
398 	switch (nw->writer_target) {
399 	case NS_WRITER_TARGET_SOCKET:
400 		nw->cb = pops->write_socket;
401 		break;
402 	case NS_WRITER_TARGET_GROUP:
403 		nw->cb = pops->write_group;
404 		break;
405 	case NS_WRITER_TARGET_CHAIN:
406 		nw->cb = pops->write_chain;
407 		break;
408 	default:
409 		panic("not implemented");
410 	}
411 }
412 
413 static bool
414 nlmsg_get_buf_type(struct nl_writer *nw, int size, int type, bool waitok)
415 {
416 	MPASS(type + 1 <= sizeof(nlmsg_writers) / sizeof(nlmsg_writers[0]));
417 	NL_LOG(LOG_DEBUG3, "Setting up nw %p size %d type %d", nw, size, type);
418 	return (nlmsg_writers[type].init(nw, size, waitok));
419 }
420 
421 static bool
422 nlmsg_get_buf(struct nl_writer *nw, int size, bool waitok, bool is_linux)
423 {
424 	int type;
425 
426 	if (!is_linux) {
427 		if (__predict_true(size <= MCLBYTES))
428 			type = NS_WRITER_TYPE_MBUF;
429 		else
430 			type = NS_WRITER_TYPE_BUF;
431 	} else
432 		type = NS_WRITER_TYPE_LBUF;
433 	return (nlmsg_get_buf_type(nw, size, type, waitok));
434 }
435 
436 bool
437 nlmsg_get_unicast_writer(struct nl_writer *nw, int size, struct nlpcb *nlp)
438 {
439 	if (!nlmsg_get_buf(nw, size, false, nlp->nl_linux))
440 		return (false);
441 	nw->arg_ptr = (void *)nlp;
442 	nw->writer_target = NS_WRITER_TARGET_SOCKET;
443 	nlmsg_set_callback(nw);
444 	return (true);
445 }
446 
447 bool
448 nlmsg_get_group_writer(struct nl_writer *nw, int size, int protocol, int group_id)
449 {
450 	if (!nlmsg_get_buf(nw, size, false, false))
451 		return (false);
452 	nw->arg_uint = (uint64_t)protocol << 16 | (uint64_t)group_id;
453 	nw->writer_target = NS_WRITER_TARGET_GROUP;
454 	nlmsg_set_callback(nw);
455 	return (true);
456 }
457 
458 bool
459 nlmsg_get_chain_writer(struct nl_writer *nw, int size, struct mbuf **pm)
460 {
461 	if (!nlmsg_get_buf(nw, size, false, false))
462 		return (false);
463 	*pm = NULL;
464 	nw->arg_ptr = (void *)pm;
465 	nw->writer_target = NS_WRITER_TARGET_CHAIN;
466 	nlmsg_set_callback(nw);
467 	NL_LOG(LOG_DEBUG3, "setup cb %p (need %p)", nw->cb, &nlmsg_write_chain_mbuf);
468 	return (true);
469 }
470 
471 void
472 nlmsg_ignore_limit(struct nl_writer *nw)
473 {
474 	nw->ignore_limit = true;
475 }
476 
477 bool
478 nlmsg_flush(struct nl_writer *nw)
479 {
480 
481 	if (__predict_false(nw->hdr != NULL)) {
482 		/* Last message has not been completed, skip it. */
483 		int completed_len = (char *)nw->hdr - nw->data;
484 		/* Send completed messages */
485 		nw->offset -= nw->offset - completed_len;
486 		nw->hdr = NULL;
487 	}
488 
489 	NL_LOG(LOG_DEBUG2, "OUT");
490 	bool result = nw->cb(nw, nw->_storage, nw->offset, nw->num_messages);
491 	nw->_storage = NULL;
492 
493 	if (!result) {
494 		NL_LOG(LOG_DEBUG, "nw %p offset %d: flush with %p() failed", nw, nw->offset, nw->cb);
495 	}
496 
497 	return (result);
498 }
499 
500 /*
501  * Flushes previous data and allocates new underlying storage
502  *  sufficient for holding at least @required_len bytes.
503  * Return true on success.
504  */
505 bool
506 nlmsg_refill_buffer(struct nl_writer *nw, int required_len)
507 {
508 	struct nl_writer ns_new = {};
509 	int completed_len, new_len;
510 
511 	if (nw->enomem)
512 		return (false);
513 
514 	NL_LOG(LOG_DEBUG3, "no space at offset %d/%d (want %d), trying to reclaim",
515 	    nw->offset, nw->alloc_len, required_len);
516 
517 	/* Calculated new buffer size and allocate it s*/
518 	completed_len = (nw->hdr != NULL) ? (char *)nw->hdr - nw->data : nw->offset;
519 	if (completed_len > 0 && required_len < MCLBYTES) {
520 		/* We already ran out of space, use the largest effective size */
521 		new_len = max(nw->alloc_len, MCLBYTES);
522 	} else {
523 		if (nw->alloc_len < MCLBYTES)
524 			new_len = MCLBYTES;
525 		else
526 			new_len = nw->alloc_len * 2;
527 		while (new_len < required_len)
528 			new_len *= 2;
529 	}
530 	bool waitok = (nw->malloc_flag == M_WAITOK);
531 	bool is_linux = (nw->writer_type == NS_WRITER_TYPE_LBUF);
532 	if (!nlmsg_get_buf(&ns_new, new_len, waitok, is_linux)) {
533 		nw->enomem = true;
534 		NL_LOG(LOG_DEBUG, "getting new buf failed, setting ENOMEM");
535 		return (false);
536 	}
537 	if (nw->ignore_limit)
538 		nlmsg_ignore_limit(&ns_new);
539 
540 	/* Update callback data */
541 	ns_new.writer_target = nw->writer_target;
542 	nlmsg_set_callback(&ns_new);
543 	ns_new.arg_uint = nw->arg_uint;
544 
545 	/* Copy last (unfinished) header to the new storage */
546 	int last_len = nw->offset - completed_len;
547 	if (last_len > 0) {
548 		memcpy(ns_new.data, nw->hdr, last_len);
549 		ns_new.hdr = (struct nlmsghdr *)ns_new.data;
550 		ns_new.offset = last_len;
551 	}
552 
553 	NL_LOG(LOG_DEBUG2, "completed: %d bytes, copied: %d bytes", completed_len, last_len);
554 
555 	/* Flush completed headers & switch to the new nw */
556 	nlmsg_flush(nw);
557 	memcpy(nw, &ns_new, sizeof(struct nl_writer));
558 	NL_LOG(LOG_DEBUG2, "switched buffer: used %d/%d bytes", nw->offset, nw->alloc_len);
559 
560 	return (true);
561 }
562 
563 bool
564 nlmsg_add(struct nl_writer *nw, uint32_t portid, uint32_t seq, uint16_t type,
565     uint16_t flags, uint32_t len)
566 {
567 	struct nlmsghdr *hdr;
568 
569 	MPASS(nw->hdr == NULL);
570 
571 	int required_len = NETLINK_ALIGN(len + sizeof(struct nlmsghdr));
572 	if (__predict_false(nw->offset + required_len > nw->alloc_len)) {
573 		if (!nlmsg_refill_buffer(nw, required_len))
574 			return (false);
575 	}
576 
577 	hdr = (struct nlmsghdr *)(&nw->data[nw->offset]);
578 
579 	hdr->nlmsg_len = len;
580 	hdr->nlmsg_type = type;
581 	hdr->nlmsg_flags = flags;
582 	hdr->nlmsg_seq = seq;
583 	hdr->nlmsg_pid = portid;
584 
585 	nw->hdr = hdr;
586 	nw->offset += sizeof(struct nlmsghdr);
587 
588 	return (true);
589 }
590 
591 bool
592 nlmsg_end(struct nl_writer *nw)
593 {
594 	MPASS(nw->hdr != NULL);
595 
596 	if (nw->enomem) {
597 		NL_LOG(LOG_DEBUG, "ENOMEM when dumping message");
598 		nlmsg_abort(nw);
599 		return (false);
600 	}
601 
602 	nw->hdr->nlmsg_len = (uint32_t)(nw->data + nw->offset - (char *)nw->hdr);
603 	NL_LOG(LOG_DEBUG2, "wrote msg len: %u type: %d: flags: 0x%X seq: %u pid: %u",
604 	    nw->hdr->nlmsg_len, nw->hdr->nlmsg_type, nw->hdr->nlmsg_flags,
605 	    nw->hdr->nlmsg_seq, nw->hdr->nlmsg_pid);
606 	nw->hdr = NULL;
607 	nw->num_messages++;
608 	return (true);
609 }
610 
611 void
612 nlmsg_abort(struct nl_writer *nw)
613 {
614 	if (nw->hdr != NULL) {
615 		nw->offset = (uint32_t)((char *)nw->hdr - nw->data);
616 		nw->hdr = NULL;
617 	}
618 }
619 
620 void
621 nlmsg_ack(struct nlpcb *nlp, int error, struct nlmsghdr *hdr,
622     struct nl_pstate *npt)
623 {
624 	struct nlmsgerr *errmsg;
625 	int payload_len;
626 	uint32_t flags = nlp->nl_flags;
627 	struct nl_writer *nw = npt->nw;
628 	bool cap_ack;
629 
630 	payload_len = sizeof(struct nlmsgerr);
631 
632 	/*
633 	 * The only case when we send the full message in the
634 	 * reply is when there is an error and NETLINK_CAP_ACK
635 	 * is not set.
636 	 */
637 	cap_ack = (error == 0) || (flags & NLF_CAP_ACK);
638 	if (!cap_ack)
639 		payload_len += hdr->nlmsg_len - sizeof(struct nlmsghdr);
640 	payload_len = NETLINK_ALIGN(payload_len);
641 
642 	uint16_t nl_flags = cap_ack ? NLM_F_CAPPED : 0;
643 	if ((npt->err_msg || npt->err_off) && nlp->nl_flags & NLF_EXT_ACK)
644 		nl_flags |= NLM_F_ACK_TLVS;
645 
646 	NL_LOG(LOG_DEBUG3, "acknowledging message type %d seq %d",
647 	    hdr->nlmsg_type, hdr->nlmsg_seq);
648 
649 	if (!nlmsg_add(nw, nlp->nl_port, hdr->nlmsg_seq, NLMSG_ERROR, nl_flags, payload_len))
650 		goto enomem;
651 
652 	errmsg = nlmsg_reserve_data(nw, payload_len, struct nlmsgerr);
653 	errmsg->error = error;
654 	/* In case of error copy the whole message, else just the header */
655 	memcpy(&errmsg->msg, hdr, cap_ack ? sizeof(*hdr) : hdr->nlmsg_len);
656 
657 	if (npt->err_msg != NULL && nlp->nl_flags & NLF_EXT_ACK)
658 		nlattr_add_string(nw, NLMSGERR_ATTR_MSG, npt->err_msg);
659 	if (npt->err_off != 0 && nlp->nl_flags & NLF_EXT_ACK)
660 		nlattr_add_u32(nw, NLMSGERR_ATTR_OFFS, npt->err_off);
661 	if (npt->cookie != NULL)
662 		nlattr_add_raw(nw, npt->cookie);
663 
664 	if (nlmsg_end(nw))
665 		return;
666 enomem:
667 	NLP_LOG(LOG_DEBUG, nlp, "error allocating ack data for message %d seq %u",
668 	    hdr->nlmsg_type, hdr->nlmsg_seq);
669 	nlmsg_abort(nw);
670 }
671 
672 bool
673 nlmsg_end_dump(struct nl_writer *nw, int error, struct nlmsghdr *hdr)
674 {
675 	if (!nlmsg_add(nw, hdr->nlmsg_pid, hdr->nlmsg_seq, NLMSG_DONE, 0, sizeof(int))) {
676 		NL_LOG(LOG_DEBUG, "Error finalizing table dump");
677 		return (false);
678 	}
679 	/* Save operation result */
680 	int *perror = nlmsg_reserve_object(nw, int);
681 	NL_LOG(LOG_DEBUG2, "record error=%d at off %d (%p)", error,
682 	    nw->offset, perror);
683 	*perror = error;
684 	nlmsg_end(nw);
685 	nw->suppress_ack = true;
686 
687 	return (true);
688 }
689