xref: /freebsd/contrib/xz/src/liblzma/common/stream_encoder_mt.c (revision 02e9120893770924227138ba49df1edb3896112a)
1 ///////////////////////////////////////////////////////////////////////////////
2 //
3 /// \file       stream_encoder_mt.c
4 /// \brief      Multithreaded .xz Stream encoder
5 //
6 //  Author:     Lasse Collin
7 //
8 //  This file has been put into the public domain.
9 //  You can do whatever you want with this file.
10 //
11 ///////////////////////////////////////////////////////////////////////////////
12 
13 #include "filter_encoder.h"
14 #include "easy_preset.h"
15 #include "block_encoder.h"
16 #include "block_buffer_encoder.h"
17 #include "index_encoder.h"
18 #include "outqueue.h"
19 
20 
21 /// Maximum supported block size. This makes it simpler to prevent integer
22 /// overflows if we are given unusually large block size.
23 #define BLOCK_SIZE_MAX (UINT64_MAX / LZMA_THREADS_MAX)
24 
25 
26 typedef enum {
27 	/// Waiting for work.
28 	THR_IDLE,
29 
30 	/// Encoding is in progress.
31 	THR_RUN,
32 
33 	/// Encoding is in progress but no more input data will
34 	/// be read.
35 	THR_FINISH,
36 
37 	/// The main thread wants the thread to stop whatever it was doing
38 	/// but not exit.
39 	THR_STOP,
40 
41 	/// The main thread wants the thread to exit. We could use
42 	/// cancellation but since there's stopped anyway, this is lazier.
43 	THR_EXIT,
44 
45 } worker_state;
46 
47 typedef struct lzma_stream_coder_s lzma_stream_coder;
48 
49 typedef struct worker_thread_s worker_thread;
50 struct worker_thread_s {
51 	worker_state state;
52 
53 	/// Input buffer of coder->block_size bytes. The main thread will
54 	/// put new input into this and update in_size accordingly. Once
55 	/// no more input is coming, state will be set to THR_FINISH.
56 	uint8_t *in;
57 
58 	/// Amount of data available in the input buffer. This is modified
59 	/// only by the main thread.
60 	size_t in_size;
61 
62 	/// Output buffer for this thread. This is set by the main
63 	/// thread every time a new Block is started with this thread
64 	/// structure.
65 	lzma_outbuf *outbuf;
66 
67 	/// Pointer to the main structure is needed when putting this
68 	/// thread back to the stack of free threads.
69 	lzma_stream_coder *coder;
70 
71 	/// The allocator is set by the main thread. Since a copy of the
72 	/// pointer is kept here, the application must not change the
73 	/// allocator before calling lzma_end().
74 	const lzma_allocator *allocator;
75 
76 	/// Amount of uncompressed data that has already been compressed.
77 	uint64_t progress_in;
78 
79 	/// Amount of compressed data that is ready.
80 	uint64_t progress_out;
81 
82 	/// Block encoder
83 	lzma_next_coder block_encoder;
84 
85 	/// Compression options for this Block
86 	lzma_block block_options;
87 
88 	/// Filter chain for this thread. By copying the filters array
89 	/// to each thread it is possible to change the filter chain
90 	/// between Blocks using lzma_filters_update().
91 	lzma_filter filters[LZMA_FILTERS_MAX + 1];
92 
93 	/// Next structure in the stack of free worker threads.
94 	worker_thread *next;
95 
96 	mythread_mutex mutex;
97 	mythread_cond cond;
98 
99 	/// The ID of this thread is used to join the thread
100 	/// when it's not needed anymore.
101 	mythread thread_id;
102 };
103 
104 
105 struct lzma_stream_coder_s {
106 	enum {
107 		SEQ_STREAM_HEADER,
108 		SEQ_BLOCK,
109 		SEQ_INDEX,
110 		SEQ_STREAM_FOOTER,
111 	} sequence;
112 
113 	/// Start a new Block every block_size bytes of input unless
114 	/// LZMA_FULL_FLUSH or LZMA_FULL_BARRIER is used earlier.
115 	size_t block_size;
116 
117 	/// The filter chain to use for the next Block.
118 	/// This can be updated using lzma_filters_update()
119 	/// after LZMA_FULL_BARRIER or LZMA_FULL_FLUSH.
120 	lzma_filter filters[LZMA_FILTERS_MAX + 1];
121 
122 	/// A copy of filters[] will be put here when attempting to get
123 	/// a new worker thread. This will be copied to a worker thread
124 	/// when a thread becomes free and then this cache is marked as
125 	/// empty by setting [0].id = LZMA_VLI_UNKNOWN. Without this cache
126 	/// the filter options from filters[] would get uselessly copied
127 	/// multiple times (allocated and freed) when waiting for a new free
128 	/// worker thread.
129 	///
130 	/// This is freed if filters[] is updated via lzma_filters_update().
131 	lzma_filter filters_cache[LZMA_FILTERS_MAX + 1];
132 
133 
134 	/// Index to hold sizes of the Blocks
135 	lzma_index *index;
136 
137 	/// Index encoder
138 	lzma_next_coder index_encoder;
139 
140 
141 	/// Stream Flags for encoding the Stream Header and Stream Footer.
142 	lzma_stream_flags stream_flags;
143 
144 	/// Buffer to hold Stream Header and Stream Footer.
145 	uint8_t header[LZMA_STREAM_HEADER_SIZE];
146 
147 	/// Read position in header[]
148 	size_t header_pos;
149 
150 
151 	/// Output buffer queue for compressed data
152 	lzma_outq outq;
153 
154 	/// How much memory to allocate for each lzma_outbuf.buf
155 	size_t outbuf_alloc_size;
156 
157 
158 	/// Maximum wait time if cannot use all the input and cannot
159 	/// fill the output buffer. This is in milliseconds.
160 	uint32_t timeout;
161 
162 
163 	/// Error code from a worker thread
164 	lzma_ret thread_error;
165 
166 	/// Array of allocated thread-specific structures
167 	worker_thread *threads;
168 
169 	/// Number of structures in "threads" above. This is also the
170 	/// number of threads that will be created at maximum.
171 	uint32_t threads_max;
172 
173 	/// Number of thread structures that have been initialized, and
174 	/// thus the number of worker threads actually created so far.
175 	uint32_t threads_initialized;
176 
177 	/// Stack of free threads. When a thread finishes, it puts itself
178 	/// back into this stack. This starts as empty because threads
179 	/// are created only when actually needed.
180 	worker_thread *threads_free;
181 
182 	/// The most recent worker thread to which the main thread writes
183 	/// the new input from the application.
184 	worker_thread *thr;
185 
186 
187 	/// Amount of uncompressed data in Blocks that have already
188 	/// been finished.
189 	uint64_t progress_in;
190 
191 	/// Amount of compressed data in Stream Header + Blocks that
192 	/// have already been finished.
193 	uint64_t progress_out;
194 
195 
196 	mythread_mutex mutex;
197 	mythread_cond cond;
198 };
199 
200 
201 /// Tell the main thread that something has gone wrong.
202 static void
203 worker_error(worker_thread *thr, lzma_ret ret)
204 {
205 	assert(ret != LZMA_OK);
206 	assert(ret != LZMA_STREAM_END);
207 
208 	mythread_sync(thr->coder->mutex) {
209 		if (thr->coder->thread_error == LZMA_OK)
210 			thr->coder->thread_error = ret;
211 
212 		mythread_cond_signal(&thr->coder->cond);
213 	}
214 
215 	return;
216 }
217 
218 
219 static worker_state
220 worker_encode(worker_thread *thr, size_t *out_pos, worker_state state)
221 {
222 	assert(thr->progress_in == 0);
223 	assert(thr->progress_out == 0);
224 
225 	// Set the Block options.
226 	thr->block_options = (lzma_block){
227 		.version = 0,
228 		.check = thr->coder->stream_flags.check,
229 		.compressed_size = thr->outbuf->allocated,
230 		.uncompressed_size = thr->coder->block_size,
231 		.filters = thr->filters,
232 	};
233 
234 	// Calculate maximum size of the Block Header. This amount is
235 	// reserved in the beginning of the buffer so that Block Header
236 	// along with Compressed Size and Uncompressed Size can be
237 	// written there.
238 	lzma_ret ret = lzma_block_header_size(&thr->block_options);
239 	if (ret != LZMA_OK) {
240 		worker_error(thr, ret);
241 		return THR_STOP;
242 	}
243 
244 	// Initialize the Block encoder.
245 	ret = lzma_block_encoder_init(&thr->block_encoder,
246 			thr->allocator, &thr->block_options);
247 	if (ret != LZMA_OK) {
248 		worker_error(thr, ret);
249 		return THR_STOP;
250 	}
251 
252 	size_t in_pos = 0;
253 	size_t in_size = 0;
254 
255 	*out_pos = thr->block_options.header_size;
256 	const size_t out_size = thr->outbuf->allocated;
257 
258 	do {
259 		mythread_sync(thr->mutex) {
260 			// Store in_pos and *out_pos into *thr so that
261 			// an application may read them via
262 			// lzma_get_progress() to get progress information.
263 			//
264 			// NOTE: These aren't updated when the encoding
265 			// finishes. Instead, the final values are taken
266 			// later from thr->outbuf.
267 			thr->progress_in = in_pos;
268 			thr->progress_out = *out_pos;
269 
270 			while (in_size == thr->in_size
271 					&& thr->state == THR_RUN)
272 				mythread_cond_wait(&thr->cond, &thr->mutex);
273 
274 			state = thr->state;
275 			in_size = thr->in_size;
276 		}
277 
278 		// Return if we were asked to stop or exit.
279 		if (state >= THR_STOP)
280 			return state;
281 
282 		lzma_action action = state == THR_FINISH
283 				? LZMA_FINISH : LZMA_RUN;
284 
285 		// Limit the amount of input given to the Block encoder
286 		// at once. This way this thread can react fairly quickly
287 		// if the main thread wants us to stop or exit.
288 		static const size_t in_chunk_max = 16384;
289 		size_t in_limit = in_size;
290 		if (in_size - in_pos > in_chunk_max) {
291 			in_limit = in_pos + in_chunk_max;
292 			action = LZMA_RUN;
293 		}
294 
295 		ret = thr->block_encoder.code(
296 				thr->block_encoder.coder, thr->allocator,
297 				thr->in, &in_pos, in_limit, thr->outbuf->buf,
298 				out_pos, out_size, action);
299 	} while (ret == LZMA_OK && *out_pos < out_size);
300 
301 	switch (ret) {
302 	case LZMA_STREAM_END:
303 		assert(state == THR_FINISH);
304 
305 		// Encode the Block Header. By doing it after
306 		// the compression, we can store the Compressed Size
307 		// and Uncompressed Size fields.
308 		ret = lzma_block_header_encode(&thr->block_options,
309 				thr->outbuf->buf);
310 		if (ret != LZMA_OK) {
311 			worker_error(thr, ret);
312 			return THR_STOP;
313 		}
314 
315 		break;
316 
317 	case LZMA_OK:
318 		// The data was incompressible. Encode it using uncompressed
319 		// LZMA2 chunks.
320 		//
321 		// First wait that we have gotten all the input.
322 		mythread_sync(thr->mutex) {
323 			while (thr->state == THR_RUN)
324 				mythread_cond_wait(&thr->cond, &thr->mutex);
325 
326 			state = thr->state;
327 			in_size = thr->in_size;
328 		}
329 
330 		if (state >= THR_STOP)
331 			return state;
332 
333 		// Do the encoding. This takes care of the Block Header too.
334 		*out_pos = 0;
335 		ret = lzma_block_uncomp_encode(&thr->block_options,
336 				thr->in, in_size, thr->outbuf->buf,
337 				out_pos, out_size);
338 
339 		// It shouldn't fail.
340 		if (ret != LZMA_OK) {
341 			worker_error(thr, LZMA_PROG_ERROR);
342 			return THR_STOP;
343 		}
344 
345 		break;
346 
347 	default:
348 		worker_error(thr, ret);
349 		return THR_STOP;
350 	}
351 
352 	// Set the size information that will be read by the main thread
353 	// to write the Index field.
354 	thr->outbuf->unpadded_size
355 			= lzma_block_unpadded_size(&thr->block_options);
356 	assert(thr->outbuf->unpadded_size != 0);
357 	thr->outbuf->uncompressed_size = thr->block_options.uncompressed_size;
358 
359 	return THR_FINISH;
360 }
361 
362 
363 static MYTHREAD_RET_TYPE
364 worker_start(void *thr_ptr)
365 {
366 	worker_thread *thr = thr_ptr;
367 	worker_state state = THR_IDLE; // Init to silence a warning
368 
369 	while (true) {
370 		// Wait for work.
371 		mythread_sync(thr->mutex) {
372 			while (true) {
373 				// The thread is already idle so if we are
374 				// requested to stop, just set the state.
375 				if (thr->state == THR_STOP) {
376 					thr->state = THR_IDLE;
377 					mythread_cond_signal(&thr->cond);
378 				}
379 
380 				state = thr->state;
381 				if (state != THR_IDLE)
382 					break;
383 
384 				mythread_cond_wait(&thr->cond, &thr->mutex);
385 			}
386 		}
387 
388 		size_t out_pos = 0;
389 
390 		assert(state != THR_IDLE);
391 		assert(state != THR_STOP);
392 
393 		if (state <= THR_FINISH)
394 			state = worker_encode(thr, &out_pos, state);
395 
396 		if (state == THR_EXIT)
397 			break;
398 
399 		// Mark the thread as idle unless the main thread has
400 		// told us to exit. Signal is needed for the case
401 		// where the main thread is waiting for the threads to stop.
402 		mythread_sync(thr->mutex) {
403 			if (thr->state != THR_EXIT) {
404 				thr->state = THR_IDLE;
405 				mythread_cond_signal(&thr->cond);
406 			}
407 		}
408 
409 		mythread_sync(thr->coder->mutex) {
410 			// If no errors occurred, make the encoded data
411 			// available to be copied out.
412 			if (state == THR_FINISH) {
413 				thr->outbuf->pos = out_pos;
414 				thr->outbuf->finished = true;
415 			}
416 
417 			// Update the main progress info.
418 			thr->coder->progress_in
419 					+= thr->outbuf->uncompressed_size;
420 			thr->coder->progress_out += out_pos;
421 			thr->progress_in = 0;
422 			thr->progress_out = 0;
423 
424 			// Return this thread to the stack of free threads.
425 			thr->next = thr->coder->threads_free;
426 			thr->coder->threads_free = thr;
427 
428 			mythread_cond_signal(&thr->coder->cond);
429 		}
430 	}
431 
432 	// Exiting, free the resources.
433 	lzma_filters_free(thr->filters, thr->allocator);
434 
435 	mythread_mutex_destroy(&thr->mutex);
436 	mythread_cond_destroy(&thr->cond);
437 
438 	lzma_next_end(&thr->block_encoder, thr->allocator);
439 	lzma_free(thr->in, thr->allocator);
440 	return MYTHREAD_RET_VALUE;
441 }
442 
443 
444 /// Make the threads stop but not exit. Optionally wait for them to stop.
445 static void
446 threads_stop(lzma_stream_coder *coder, bool wait_for_threads)
447 {
448 	// Tell the threads to stop.
449 	for (uint32_t i = 0; i < coder->threads_initialized; ++i) {
450 		mythread_sync(coder->threads[i].mutex) {
451 			coder->threads[i].state = THR_STOP;
452 			mythread_cond_signal(&coder->threads[i].cond);
453 		}
454 	}
455 
456 	if (!wait_for_threads)
457 		return;
458 
459 	// Wait for the threads to settle in the idle state.
460 	for (uint32_t i = 0; i < coder->threads_initialized; ++i) {
461 		mythread_sync(coder->threads[i].mutex) {
462 			while (coder->threads[i].state != THR_IDLE)
463 				mythread_cond_wait(&coder->threads[i].cond,
464 						&coder->threads[i].mutex);
465 		}
466 	}
467 
468 	return;
469 }
470 
471 
472 /// Stop the threads and free the resources associated with them.
473 /// Wait until the threads have exited.
474 static void
475 threads_end(lzma_stream_coder *coder, const lzma_allocator *allocator)
476 {
477 	for (uint32_t i = 0; i < coder->threads_initialized; ++i) {
478 		mythread_sync(coder->threads[i].mutex) {
479 			coder->threads[i].state = THR_EXIT;
480 			mythread_cond_signal(&coder->threads[i].cond);
481 		}
482 	}
483 
484 	for (uint32_t i = 0; i < coder->threads_initialized; ++i) {
485 		int ret = mythread_join(coder->threads[i].thread_id);
486 		assert(ret == 0);
487 		(void)ret;
488 	}
489 
490 	lzma_free(coder->threads, allocator);
491 	return;
492 }
493 
494 
495 /// Initialize a new worker_thread structure and create a new thread.
496 static lzma_ret
497 initialize_new_thread(lzma_stream_coder *coder,
498 		const lzma_allocator *allocator)
499 {
500 	worker_thread *thr = &coder->threads[coder->threads_initialized];
501 
502 	thr->in = lzma_alloc(coder->block_size, allocator);
503 	if (thr->in == NULL)
504 		return LZMA_MEM_ERROR;
505 
506 	if (mythread_mutex_init(&thr->mutex))
507 		goto error_mutex;
508 
509 	if (mythread_cond_init(&thr->cond))
510 		goto error_cond;
511 
512 	thr->state = THR_IDLE;
513 	thr->allocator = allocator;
514 	thr->coder = coder;
515 	thr->progress_in = 0;
516 	thr->progress_out = 0;
517 	thr->block_encoder = LZMA_NEXT_CODER_INIT;
518 	thr->filters[0].id = LZMA_VLI_UNKNOWN;
519 
520 	if (mythread_create(&thr->thread_id, &worker_start, thr))
521 		goto error_thread;
522 
523 	++coder->threads_initialized;
524 	coder->thr = thr;
525 
526 	return LZMA_OK;
527 
528 error_thread:
529 	mythread_cond_destroy(&thr->cond);
530 
531 error_cond:
532 	mythread_mutex_destroy(&thr->mutex);
533 
534 error_mutex:
535 	lzma_free(thr->in, allocator);
536 	return LZMA_MEM_ERROR;
537 }
538 
539 
540 static lzma_ret
541 get_thread(lzma_stream_coder *coder, const lzma_allocator *allocator)
542 {
543 	// If there are no free output subqueues, there is no
544 	// point to try getting a thread.
545 	if (!lzma_outq_has_buf(&coder->outq))
546 		return LZMA_OK;
547 
548 	// That's also true if we cannot allocate memory for the output
549 	// buffer in the output queue.
550 	return_if_error(lzma_outq_prealloc_buf(&coder->outq, allocator,
551 			coder->outbuf_alloc_size));
552 
553 	// Make a thread-specific copy of the filter chain. Put it in
554 	// the cache array first so that if we cannot get a new thread yet,
555 	// the allocation is ready when we try again.
556 	if (coder->filters_cache[0].id == LZMA_VLI_UNKNOWN)
557 		return_if_error(lzma_filters_copy(
558 			coder->filters, coder->filters_cache, allocator));
559 
560 	// If there is a free structure on the stack, use it.
561 	mythread_sync(coder->mutex) {
562 		if (coder->threads_free != NULL) {
563 			coder->thr = coder->threads_free;
564 			coder->threads_free = coder->threads_free->next;
565 		}
566 	}
567 
568 	if (coder->thr == NULL) {
569 		// If there are no uninitialized structures left, return.
570 		if (coder->threads_initialized == coder->threads_max)
571 			return LZMA_OK;
572 
573 		// Initialize a new thread.
574 		return_if_error(initialize_new_thread(coder, allocator));
575 	}
576 
577 	// Reset the parts of the thread state that have to be done
578 	// in the main thread.
579 	mythread_sync(coder->thr->mutex) {
580 		coder->thr->state = THR_RUN;
581 		coder->thr->in_size = 0;
582 		coder->thr->outbuf = lzma_outq_get_buf(&coder->outq, NULL);
583 
584 		// Free the old thread-specific filter options and replace
585 		// them with the already-allocated new options from
586 		// coder->filters_cache[]. Then mark the cache as empty.
587 		lzma_filters_free(coder->thr->filters, allocator);
588 		memcpy(coder->thr->filters, coder->filters_cache,
589 				sizeof(coder->filters_cache));
590 		coder->filters_cache[0].id = LZMA_VLI_UNKNOWN;
591 
592 		mythread_cond_signal(&coder->thr->cond);
593 	}
594 
595 	return LZMA_OK;
596 }
597 
598 
599 static lzma_ret
600 stream_encode_in(lzma_stream_coder *coder, const lzma_allocator *allocator,
601 		const uint8_t *restrict in, size_t *restrict in_pos,
602 		size_t in_size, lzma_action action)
603 {
604 	while (*in_pos < in_size
605 			|| (coder->thr != NULL && action != LZMA_RUN)) {
606 		if (coder->thr == NULL) {
607 			// Get a new thread.
608 			const lzma_ret ret = get_thread(coder, allocator);
609 			if (coder->thr == NULL)
610 				return ret;
611 		}
612 
613 		// Copy the input data to thread's buffer.
614 		size_t thr_in_size = coder->thr->in_size;
615 		lzma_bufcpy(in, in_pos, in_size, coder->thr->in,
616 				&thr_in_size, coder->block_size);
617 
618 		// Tell the Block encoder to finish if
619 		//  - it has got block_size bytes of input; or
620 		//  - all input was used and LZMA_FINISH, LZMA_FULL_FLUSH,
621 		//    or LZMA_FULL_BARRIER was used.
622 		//
623 		// TODO: LZMA_SYNC_FLUSH and LZMA_SYNC_BARRIER.
624 		const bool finish = thr_in_size == coder->block_size
625 				|| (*in_pos == in_size && action != LZMA_RUN);
626 
627 		bool block_error = false;
628 
629 		mythread_sync(coder->thr->mutex) {
630 			if (coder->thr->state == THR_IDLE) {
631 				// Something has gone wrong with the Block
632 				// encoder. It has set coder->thread_error
633 				// which we will read a few lines later.
634 				block_error = true;
635 			} else {
636 				// Tell the Block encoder its new amount
637 				// of input and update the state if needed.
638 				coder->thr->in_size = thr_in_size;
639 
640 				if (finish)
641 					coder->thr->state = THR_FINISH;
642 
643 				mythread_cond_signal(&coder->thr->cond);
644 			}
645 		}
646 
647 		if (block_error) {
648 			lzma_ret ret = LZMA_OK; // Init to silence a warning.
649 
650 			mythread_sync(coder->mutex) {
651 				ret = coder->thread_error;
652 			}
653 
654 			return ret;
655 		}
656 
657 		if (finish)
658 			coder->thr = NULL;
659 	}
660 
661 	return LZMA_OK;
662 }
663 
664 
665 /// Wait until more input can be consumed, more output can be read, or
666 /// an optional timeout is reached.
667 static bool
668 wait_for_work(lzma_stream_coder *coder, mythread_condtime *wait_abs,
669 		bool *has_blocked, bool has_input)
670 {
671 	if (coder->timeout != 0 && !*has_blocked) {
672 		// Every time when stream_encode_mt() is called via
673 		// lzma_code(), *has_blocked starts as false. We set it
674 		// to true here and calculate the absolute time when
675 		// we must return if there's nothing to do.
676 		//
677 		// This way if we block multiple times for short moments
678 		// less than "timeout" milliseconds, we will return once
679 		// "timeout" amount of time has passed since the *first*
680 		// blocking occurred. If the absolute time was calculated
681 		// again every time we block, "timeout" would effectively
682 		// be meaningless if we never consecutively block longer
683 		// than "timeout" ms.
684 		*has_blocked = true;
685 		mythread_condtime_set(wait_abs, &coder->cond, coder->timeout);
686 	}
687 
688 	bool timed_out = false;
689 
690 	mythread_sync(coder->mutex) {
691 		// There are four things that we wait. If one of them
692 		// becomes possible, we return.
693 		//  - If there is input left, we need to get a free
694 		//    worker thread and an output buffer for it.
695 		//  - Data ready to be read from the output queue.
696 		//  - A worker thread indicates an error.
697 		//  - Time out occurs.
698 		while ((!has_input || coder->threads_free == NULL
699 					|| !lzma_outq_has_buf(&coder->outq))
700 				&& !lzma_outq_is_readable(&coder->outq)
701 				&& coder->thread_error == LZMA_OK
702 				&& !timed_out) {
703 			if (coder->timeout != 0)
704 				timed_out = mythread_cond_timedwait(
705 						&coder->cond, &coder->mutex,
706 						wait_abs) != 0;
707 			else
708 				mythread_cond_wait(&coder->cond,
709 						&coder->mutex);
710 		}
711 	}
712 
713 	return timed_out;
714 }
715 
716 
717 static lzma_ret
718 stream_encode_mt(void *coder_ptr, const lzma_allocator *allocator,
719 		const uint8_t *restrict in, size_t *restrict in_pos,
720 		size_t in_size, uint8_t *restrict out,
721 		size_t *restrict out_pos, size_t out_size, lzma_action action)
722 {
723 	lzma_stream_coder *coder = coder_ptr;
724 
725 	switch (coder->sequence) {
726 	case SEQ_STREAM_HEADER:
727 		lzma_bufcpy(coder->header, &coder->header_pos,
728 				sizeof(coder->header),
729 				out, out_pos, out_size);
730 		if (coder->header_pos < sizeof(coder->header))
731 			return LZMA_OK;
732 
733 		coder->header_pos = 0;
734 		coder->sequence = SEQ_BLOCK;
735 
736 	// Fall through
737 
738 	case SEQ_BLOCK: {
739 		// Initialized to silence warnings.
740 		lzma_vli unpadded_size = 0;
741 		lzma_vli uncompressed_size = 0;
742 		lzma_ret ret = LZMA_OK;
743 
744 		// These are for wait_for_work().
745 		bool has_blocked = false;
746 		mythread_condtime wait_abs = { 0 };
747 
748 		while (true) {
749 			mythread_sync(coder->mutex) {
750 				// Check for Block encoder errors.
751 				ret = coder->thread_error;
752 				if (ret != LZMA_OK) {
753 					assert(ret != LZMA_STREAM_END);
754 					break; // Break out of mythread_sync.
755 				}
756 
757 				// Try to read compressed data to out[].
758 				ret = lzma_outq_read(&coder->outq, allocator,
759 						out, out_pos, out_size,
760 						&unpadded_size,
761 						&uncompressed_size);
762 			}
763 
764 			if (ret == LZMA_STREAM_END) {
765 				// End of Block. Add it to the Index.
766 				ret = lzma_index_append(coder->index,
767 						allocator, unpadded_size,
768 						uncompressed_size);
769 				if (ret != LZMA_OK) {
770 					threads_stop(coder, false);
771 					return ret;
772 				}
773 
774 				// If we didn't fill the output buffer yet,
775 				// try to read more data. Maybe the next
776 				// outbuf has been finished already too.
777 				if (*out_pos < out_size)
778 					continue;
779 			}
780 
781 			if (ret != LZMA_OK) {
782 				// coder->thread_error was set.
783 				threads_stop(coder, false);
784 				return ret;
785 			}
786 
787 			// Try to give uncompressed data to a worker thread.
788 			ret = stream_encode_in(coder, allocator,
789 					in, in_pos, in_size, action);
790 			if (ret != LZMA_OK) {
791 				threads_stop(coder, false);
792 				return ret;
793 			}
794 
795 			// See if we should wait or return.
796 			//
797 			// TODO: LZMA_SYNC_FLUSH and LZMA_SYNC_BARRIER.
798 			if (*in_pos == in_size) {
799 				// LZMA_RUN: More data is probably coming
800 				// so return to let the caller fill the
801 				// input buffer.
802 				if (action == LZMA_RUN)
803 					return LZMA_OK;
804 
805 				// LZMA_FULL_BARRIER: The same as with
806 				// LZMA_RUN but tell the caller that the
807 				// barrier was completed.
808 				if (action == LZMA_FULL_BARRIER)
809 					return LZMA_STREAM_END;
810 
811 				// Finishing or flushing isn't completed until
812 				// all input data has been encoded and copied
813 				// to the output buffer.
814 				if (lzma_outq_is_empty(&coder->outq)) {
815 					// LZMA_FINISH: Continue to encode
816 					// the Index field.
817 					if (action == LZMA_FINISH)
818 						break;
819 
820 					// LZMA_FULL_FLUSH: Return to tell
821 					// the caller that flushing was
822 					// completed.
823 					if (action == LZMA_FULL_FLUSH)
824 						return LZMA_STREAM_END;
825 				}
826 			}
827 
828 			// Return if there is no output space left.
829 			// This check must be done after testing the input
830 			// buffer, because we might want to use a different
831 			// return code.
832 			if (*out_pos == out_size)
833 				return LZMA_OK;
834 
835 			// Neither in nor out has been used completely.
836 			// Wait until there's something we can do.
837 			if (wait_for_work(coder, &wait_abs, &has_blocked,
838 					*in_pos < in_size))
839 				return LZMA_TIMED_OUT;
840 		}
841 
842 		// All Blocks have been encoded and the threads have stopped.
843 		// Prepare to encode the Index field.
844 		return_if_error(lzma_index_encoder_init(
845 				&coder->index_encoder, allocator,
846 				coder->index));
847 		coder->sequence = SEQ_INDEX;
848 
849 		// Update the progress info to take the Index and
850 		// Stream Footer into account. Those are very fast to encode
851 		// so in terms of progress information they can be thought
852 		// to be ready to be copied out.
853 		coder->progress_out += lzma_index_size(coder->index)
854 				+ LZMA_STREAM_HEADER_SIZE;
855 	}
856 
857 	// Fall through
858 
859 	case SEQ_INDEX: {
860 		// Call the Index encoder. It doesn't take any input, so
861 		// those pointers can be NULL.
862 		const lzma_ret ret = coder->index_encoder.code(
863 				coder->index_encoder.coder, allocator,
864 				NULL, NULL, 0,
865 				out, out_pos, out_size, LZMA_RUN);
866 		if (ret != LZMA_STREAM_END)
867 			return ret;
868 
869 		// Encode the Stream Footer into coder->buffer.
870 		coder->stream_flags.backward_size
871 				= lzma_index_size(coder->index);
872 		if (lzma_stream_footer_encode(&coder->stream_flags,
873 				coder->header) != LZMA_OK)
874 			return LZMA_PROG_ERROR;
875 
876 		coder->sequence = SEQ_STREAM_FOOTER;
877 	}
878 
879 	// Fall through
880 
881 	case SEQ_STREAM_FOOTER:
882 		lzma_bufcpy(coder->header, &coder->header_pos,
883 				sizeof(coder->header),
884 				out, out_pos, out_size);
885 		return coder->header_pos < sizeof(coder->header)
886 				? LZMA_OK : LZMA_STREAM_END;
887 	}
888 
889 	assert(0);
890 	return LZMA_PROG_ERROR;
891 }
892 
893 
894 static void
895 stream_encoder_mt_end(void *coder_ptr, const lzma_allocator *allocator)
896 {
897 	lzma_stream_coder *coder = coder_ptr;
898 
899 	// Threads must be killed before the output queue can be freed.
900 	threads_end(coder, allocator);
901 	lzma_outq_end(&coder->outq, allocator);
902 
903 	lzma_filters_free(coder->filters, allocator);
904 	lzma_filters_free(coder->filters_cache, allocator);
905 
906 	lzma_next_end(&coder->index_encoder, allocator);
907 	lzma_index_end(coder->index, allocator);
908 
909 	mythread_cond_destroy(&coder->cond);
910 	mythread_mutex_destroy(&coder->mutex);
911 
912 	lzma_free(coder, allocator);
913 	return;
914 }
915 
916 
917 static lzma_ret
918 stream_encoder_mt_update(void *coder_ptr, const lzma_allocator *allocator,
919 		const lzma_filter *filters,
920 		const lzma_filter *reversed_filters
921 			lzma_attribute((__unused__)))
922 {
923 	lzma_stream_coder *coder = coder_ptr;
924 
925 	// Applications shouldn't attempt to change the options when
926 	// we are already encoding the Index or Stream Footer.
927 	if (coder->sequence > SEQ_BLOCK)
928 		return LZMA_PROG_ERROR;
929 
930 	// For now the threaded encoder doesn't support changing
931 	// the options in the middle of a Block.
932 	if (coder->thr != NULL)
933 		return LZMA_PROG_ERROR;
934 
935 	// Check if the filter chain seems mostly valid. See the comment
936 	// in stream_encoder_mt_init().
937 	if (lzma_raw_encoder_memusage(filters) == UINT64_MAX)
938 		return LZMA_OPTIONS_ERROR;
939 
940 	// Make a copy to a temporary buffer first. This way the encoder
941 	// state stays unchanged if an error occurs in lzma_filters_copy().
942 	lzma_filter temp[LZMA_FILTERS_MAX + 1];
943 	return_if_error(lzma_filters_copy(filters, temp, allocator));
944 
945 	// Free the options of the old chain as well as the cache.
946 	lzma_filters_free(coder->filters, allocator);
947 	lzma_filters_free(coder->filters_cache, allocator);
948 
949 	// Copy the new filter chain in place.
950 	memcpy(coder->filters, temp, sizeof(temp));
951 
952 	return LZMA_OK;
953 }
954 
955 
956 /// Options handling for lzma_stream_encoder_mt_init() and
957 /// lzma_stream_encoder_mt_memusage()
958 static lzma_ret
959 get_options(const lzma_mt *options, lzma_options_easy *opt_easy,
960 		const lzma_filter **filters, uint64_t *block_size,
961 		uint64_t *outbuf_size_max)
962 {
963 	// Validate some of the options.
964 	if (options == NULL)
965 		return LZMA_PROG_ERROR;
966 
967 	if (options->flags != 0 || options->threads == 0
968 			|| options->threads > LZMA_THREADS_MAX)
969 		return LZMA_OPTIONS_ERROR;
970 
971 	if (options->filters != NULL) {
972 		// Filter chain was given, use it as is.
973 		*filters = options->filters;
974 	} else {
975 		// Use a preset.
976 		if (lzma_easy_preset(opt_easy, options->preset))
977 			return LZMA_OPTIONS_ERROR;
978 
979 		*filters = opt_easy->filters;
980 	}
981 
982 	// Block size
983 	if (options->block_size > 0) {
984 		if (options->block_size > BLOCK_SIZE_MAX)
985 			return LZMA_OPTIONS_ERROR;
986 
987 		*block_size = options->block_size;
988 	} else {
989 		// Determine the Block size from the filter chain.
990 		*block_size = lzma_mt_block_size(*filters);
991 		if (*block_size == 0)
992 			return LZMA_OPTIONS_ERROR;
993 
994 		assert(*block_size <= BLOCK_SIZE_MAX);
995 	}
996 
997 	// Calculate the maximum amount output that a single output buffer
998 	// may need to hold. This is the same as the maximum total size of
999 	// a Block.
1000 	*outbuf_size_max = lzma_block_buffer_bound64(*block_size);
1001 	if (*outbuf_size_max == 0)
1002 		return LZMA_MEM_ERROR;
1003 
1004 	return LZMA_OK;
1005 }
1006 
1007 
1008 static void
1009 get_progress(void *coder_ptr, uint64_t *progress_in, uint64_t *progress_out)
1010 {
1011 	lzma_stream_coder *coder = coder_ptr;
1012 
1013 	// Lock coder->mutex to prevent finishing threads from moving their
1014 	// progress info from the worker_thread structure to lzma_stream_coder.
1015 	mythread_sync(coder->mutex) {
1016 		*progress_in = coder->progress_in;
1017 		*progress_out = coder->progress_out;
1018 
1019 		for (size_t i = 0; i < coder->threads_initialized; ++i) {
1020 			mythread_sync(coder->threads[i].mutex) {
1021 				*progress_in += coder->threads[i].progress_in;
1022 				*progress_out += coder->threads[i]
1023 						.progress_out;
1024 			}
1025 		}
1026 	}
1027 
1028 	return;
1029 }
1030 
1031 
1032 static lzma_ret
1033 stream_encoder_mt_init(lzma_next_coder *next, const lzma_allocator *allocator,
1034 		const lzma_mt *options)
1035 {
1036 	lzma_next_coder_init(&stream_encoder_mt_init, next, allocator);
1037 
1038 	// Get the filter chain.
1039 	lzma_options_easy easy;
1040 	const lzma_filter *filters;
1041 	uint64_t block_size;
1042 	uint64_t outbuf_size_max;
1043 	return_if_error(get_options(options, &easy, &filters,
1044 			&block_size, &outbuf_size_max));
1045 
1046 #if SIZE_MAX < UINT64_MAX
1047 	if (block_size > SIZE_MAX || outbuf_size_max > SIZE_MAX)
1048 		return LZMA_MEM_ERROR;
1049 #endif
1050 
1051 	// Validate the filter chain so that we can give an error in this
1052 	// function instead of delaying it to the first call to lzma_code().
1053 	// The memory usage calculation verifies the filter chain as
1054 	// a side effect so we take advantage of that. It's not a perfect
1055 	// check though as raw encoder allows LZMA1 too but such problems
1056 	// will be caught eventually with Block Header encoder.
1057 	if (lzma_raw_encoder_memusage(filters) == UINT64_MAX)
1058 		return LZMA_OPTIONS_ERROR;
1059 
1060 	// Validate the Check ID.
1061 	if ((unsigned int)(options->check) > LZMA_CHECK_ID_MAX)
1062 		return LZMA_PROG_ERROR;
1063 
1064 	if (!lzma_check_is_supported(options->check))
1065 		return LZMA_UNSUPPORTED_CHECK;
1066 
1067 	// Allocate and initialize the base structure if needed.
1068 	lzma_stream_coder *coder = next->coder;
1069 	if (coder == NULL) {
1070 		coder = lzma_alloc(sizeof(lzma_stream_coder), allocator);
1071 		if (coder == NULL)
1072 			return LZMA_MEM_ERROR;
1073 
1074 		next->coder = coder;
1075 
1076 		// For the mutex and condition variable initializations
1077 		// the error handling has to be done here because
1078 		// stream_encoder_mt_end() doesn't know if they have
1079 		// already been initialized or not.
1080 		if (mythread_mutex_init(&coder->mutex)) {
1081 			lzma_free(coder, allocator);
1082 			next->coder = NULL;
1083 			return LZMA_MEM_ERROR;
1084 		}
1085 
1086 		if (mythread_cond_init(&coder->cond)) {
1087 			mythread_mutex_destroy(&coder->mutex);
1088 			lzma_free(coder, allocator);
1089 			next->coder = NULL;
1090 			return LZMA_MEM_ERROR;
1091 		}
1092 
1093 		next->code = &stream_encode_mt;
1094 		next->end = &stream_encoder_mt_end;
1095 		next->get_progress = &get_progress;
1096 		next->update = &stream_encoder_mt_update;
1097 
1098 		coder->filters[0].id = LZMA_VLI_UNKNOWN;
1099 		coder->filters_cache[0].id = LZMA_VLI_UNKNOWN;
1100 		coder->index_encoder = LZMA_NEXT_CODER_INIT;
1101 		coder->index = NULL;
1102 		memzero(&coder->outq, sizeof(coder->outq));
1103 		coder->threads = NULL;
1104 		coder->threads_max = 0;
1105 		coder->threads_initialized = 0;
1106 	}
1107 
1108 	// Basic initializations
1109 	coder->sequence = SEQ_STREAM_HEADER;
1110 	coder->block_size = (size_t)(block_size);
1111 	coder->outbuf_alloc_size = (size_t)(outbuf_size_max);
1112 	coder->thread_error = LZMA_OK;
1113 	coder->thr = NULL;
1114 
1115 	// Allocate the thread-specific base structures.
1116 	assert(options->threads > 0);
1117 	if (coder->threads_max != options->threads) {
1118 		threads_end(coder, allocator);
1119 
1120 		coder->threads = NULL;
1121 		coder->threads_max = 0;
1122 
1123 		coder->threads_initialized = 0;
1124 		coder->threads_free = NULL;
1125 
1126 		coder->threads = lzma_alloc(
1127 				options->threads * sizeof(worker_thread),
1128 				allocator);
1129 		if (coder->threads == NULL)
1130 			return LZMA_MEM_ERROR;
1131 
1132 		coder->threads_max = options->threads;
1133 	} else {
1134 		// Reuse the old structures and threads. Tell the running
1135 		// threads to stop and wait until they have stopped.
1136 		threads_stop(coder, true);
1137 	}
1138 
1139 	// Output queue
1140 	return_if_error(lzma_outq_init(&coder->outq, allocator,
1141 			options->threads));
1142 
1143 	// Timeout
1144 	coder->timeout = options->timeout;
1145 
1146 	// Free the old filter chain and the cache.
1147 	lzma_filters_free(coder->filters, allocator);
1148 	lzma_filters_free(coder->filters_cache, allocator);
1149 
1150 	// Copy the new filter chain.
1151 	return_if_error(lzma_filters_copy(
1152 			filters, coder->filters, allocator));
1153 
1154 	// Index
1155 	lzma_index_end(coder->index, allocator);
1156 	coder->index = lzma_index_init(allocator);
1157 	if (coder->index == NULL)
1158 		return LZMA_MEM_ERROR;
1159 
1160 	// Stream Header
1161 	coder->stream_flags.version = 0;
1162 	coder->stream_flags.check = options->check;
1163 	return_if_error(lzma_stream_header_encode(
1164 			&coder->stream_flags, coder->header));
1165 
1166 	coder->header_pos = 0;
1167 
1168 	// Progress info
1169 	coder->progress_in = 0;
1170 	coder->progress_out = LZMA_STREAM_HEADER_SIZE;
1171 
1172 	return LZMA_OK;
1173 }
1174 
1175 
1176 #ifdef HAVE_SYMBOL_VERSIONS_LINUX
1177 // These are for compatibility with binaries linked against liblzma that
1178 // has been patched with xz-5.2.2-compat-libs.patch from RHEL/CentOS 7.
1179 // Actually that patch didn't create lzma_stream_encoder_mt@XZ_5.2.2
1180 // but it has been added here anyway since someone might misread the
1181 // RHEL patch and think both @XZ_5.1.2alpha and @XZ_5.2.2 exist.
1182 LZMA_SYMVER_API("lzma_stream_encoder_mt@XZ_5.1.2alpha",
1183 	lzma_ret, lzma_stream_encoder_mt_512a)(
1184 		lzma_stream *strm, const lzma_mt *options)
1185 		lzma_nothrow lzma_attr_warn_unused_result
1186 		__attribute__((__alias__("lzma_stream_encoder_mt_52")));
1187 
1188 LZMA_SYMVER_API("lzma_stream_encoder_mt@XZ_5.2.2",
1189 	lzma_ret, lzma_stream_encoder_mt_522)(
1190 		lzma_stream *strm, const lzma_mt *options)
1191 		lzma_nothrow lzma_attr_warn_unused_result
1192 		__attribute__((__alias__("lzma_stream_encoder_mt_52")));
1193 
1194 LZMA_SYMVER_API("lzma_stream_encoder_mt@@XZ_5.2",
1195 	lzma_ret, lzma_stream_encoder_mt_52)(
1196 		lzma_stream *strm, const lzma_mt *options)
1197 		lzma_nothrow lzma_attr_warn_unused_result;
1198 
1199 #define lzma_stream_encoder_mt lzma_stream_encoder_mt_52
1200 #endif
1201 extern LZMA_API(lzma_ret)
1202 lzma_stream_encoder_mt(lzma_stream *strm, const lzma_mt *options)
1203 {
1204 	lzma_next_strm_init(stream_encoder_mt_init, strm, options);
1205 
1206 	strm->internal->supported_actions[LZMA_RUN] = true;
1207 // 	strm->internal->supported_actions[LZMA_SYNC_FLUSH] = true;
1208 	strm->internal->supported_actions[LZMA_FULL_FLUSH] = true;
1209 	strm->internal->supported_actions[LZMA_FULL_BARRIER] = true;
1210 	strm->internal->supported_actions[LZMA_FINISH] = true;
1211 
1212 	return LZMA_OK;
1213 }
1214 
1215 
1216 #ifdef HAVE_SYMBOL_VERSIONS_LINUX
1217 LZMA_SYMVER_API("lzma_stream_encoder_mt_memusage@XZ_5.1.2alpha",
1218 	uint64_t, lzma_stream_encoder_mt_memusage_512a)(
1219 	const lzma_mt *options) lzma_nothrow lzma_attr_pure
1220 	__attribute__((__alias__("lzma_stream_encoder_mt_memusage_52")));
1221 
1222 LZMA_SYMVER_API("lzma_stream_encoder_mt_memusage@XZ_5.2.2",
1223 	uint64_t, lzma_stream_encoder_mt_memusage_522)(
1224 	const lzma_mt *options) lzma_nothrow lzma_attr_pure
1225 	__attribute__((__alias__("lzma_stream_encoder_mt_memusage_52")));
1226 
1227 LZMA_SYMVER_API("lzma_stream_encoder_mt_memusage@@XZ_5.2",
1228 	uint64_t, lzma_stream_encoder_mt_memusage_52)(
1229 	const lzma_mt *options) lzma_nothrow lzma_attr_pure;
1230 
1231 #define lzma_stream_encoder_mt_memusage lzma_stream_encoder_mt_memusage_52
1232 #endif
1233 // This function name is a monster but it's consistent with the older
1234 // monster names. :-( 31 chars is the max that C99 requires so in that
1235 // sense it's not too long. ;-)
1236 extern LZMA_API(uint64_t)
1237 lzma_stream_encoder_mt_memusage(const lzma_mt *options)
1238 {
1239 	lzma_options_easy easy;
1240 	const lzma_filter *filters;
1241 	uint64_t block_size;
1242 	uint64_t outbuf_size_max;
1243 
1244 	if (get_options(options, &easy, &filters, &block_size,
1245 			&outbuf_size_max) != LZMA_OK)
1246 		return UINT64_MAX;
1247 
1248 	// Memory usage of the input buffers
1249 	const uint64_t inbuf_memusage = options->threads * block_size;
1250 
1251 	// Memory usage of the filter encoders
1252 	uint64_t filters_memusage = lzma_raw_encoder_memusage(filters);
1253 	if (filters_memusage == UINT64_MAX)
1254 		return UINT64_MAX;
1255 
1256 	filters_memusage *= options->threads;
1257 
1258 	// Memory usage of the output queue
1259 	const uint64_t outq_memusage = lzma_outq_memusage(
1260 			outbuf_size_max, options->threads);
1261 	if (outq_memusage == UINT64_MAX)
1262 		return UINT64_MAX;
1263 
1264 	// Sum them with overflow checking.
1265 	uint64_t total_memusage = LZMA_MEMUSAGE_BASE
1266 			+ sizeof(lzma_stream_coder)
1267 			+ options->threads * sizeof(worker_thread);
1268 
1269 	if (UINT64_MAX - total_memusage < inbuf_memusage)
1270 		return UINT64_MAX;
1271 
1272 	total_memusage += inbuf_memusage;
1273 
1274 	if (UINT64_MAX - total_memusage < filters_memusage)
1275 		return UINT64_MAX;
1276 
1277 	total_memusage += filters_memusage;
1278 
1279 	if (UINT64_MAX - total_memusage < outq_memusage)
1280 		return UINT64_MAX;
1281 
1282 	return total_memusage + outq_memusage;
1283 }
1284