Home | History | Annotate | Line # | Download | only in common
      1 // SPDX-License-Identifier: 0BSD
      2 
      3 ///////////////////////////////////////////////////////////////////////////////
      4 //
      5 /// \file       common.h
      6 /// \brief      Definitions common to the whole liblzma library
      7 //
      8 //  Author:     Lasse Collin
      9 //
     10 ///////////////////////////////////////////////////////////////////////////////
     11 
     12 #ifndef LZMA_COMMON_H
     13 #define LZMA_COMMON_H
     14 
     15 #include "sysdefs.h"
     16 #include "mythread.h"
     17 #include "tuklib_integer.h"
     18 
     19 // LZMA_API_EXPORT is used to mark the exported API functions.
     20 // It's used to define the LZMA_API macro.
     21 //
     22 // lzma_attr_visibility_hidden is used for marking *declarations* of extern
     23 // variables that are internal to liblzma (-fvisibility=hidden alone is
     24 // enough to hide the *definitions*). Such markings allow slightly more
     25 // efficient code to accesses those variables in ELF shared libraries.
     26 #if defined(_WIN32) || defined(__CYGWIN__)
     27 #	ifdef DLL_EXPORT
     28 #		define LZMA_API_EXPORT __declspec(dllexport)
     29 #	else
     30 #		define LZMA_API_EXPORT
     31 #	endif
     32 #	define lzma_attr_visibility_hidden
     33 // Don't use ifdef or defined() below.
     34 #elif HAVE_VISIBILITY
     35 #	define LZMA_API_EXPORT __attribute__((__visibility__("default")))
     36 #	define lzma_attr_visibility_hidden \
     37 			__attribute__((__visibility__("hidden")))
     38 #else
     39 #	define LZMA_API_EXPORT
     40 #	define lzma_attr_visibility_hidden
     41 #endif
     42 
     43 #define LZMA_API(type) LZMA_API_EXPORT type LZMA_API_CALL
     44 
     45 #include "lzma.h"
     46 
     47 // This is for detecting modern GCC and Clang attributes
     48 // like __symver__ in GCC >= 10.
     49 #ifdef __has_attribute
     50 #	define lzma_has_attribute(attr) __has_attribute(attr)
     51 #else
     52 #	define lzma_has_attribute(attr) 0
     53 #endif
     54 
     55 // The extra symbol versioning in the C files may only be used when
     56 // building a shared library. If HAVE_SYMBOL_VERSIONS_LINUX is defined
     57 // to 2 then symbol versioning is done only if also PIC is defined.
     58 // By default Libtool defines PIC when building a shared library and
     59 // doesn't define it when building a static library but it can be
     60 // overridden with --with-pic and --without-pic. configure let's rely
     61 // on PIC if neither --with-pic or --without-pic was used.
     62 #if defined(HAVE_SYMBOL_VERSIONS_LINUX) \
     63 		&& (HAVE_SYMBOL_VERSIONS_LINUX == 2 && !defined(PIC))
     64 #	undef HAVE_SYMBOL_VERSIONS_LINUX
     65 #endif
     66 
     67 #ifdef HAVE_SYMBOL_VERSIONS_LINUX
     68 // To keep link-time optimization (LTO, -flto) working with GCC,
     69 // the __symver__ attribute must be used instead of __asm__(".symver ...").
     70 // Otherwise the symbol versions may be lost, resulting in broken liblzma
     71 // that has wrong default versions in the exported symbol list!
     72 // The attribute was added in GCC 10; LTO with older GCC is not supported.
     73 //
     74 // To keep -Wmissing-prototypes happy, use LZMA_SYMVER_API only with function
     75 // declarations (including those with __alias__ attribute) and LZMA_API with
     76 // the function definitions. This means a little bit of silly copy-and-paste
     77 // between declarations and definitions though.
     78 //
     79 // As of GCC 12.2, the __symver__ attribute supports only @ and @@ but the
     80 // very convenient @@@ isn't supported (it's supported by GNU assembler
     81 // since 2000). When using @@ instead of @@@, the internal name must not be
     82 // the same as the external name to avoid problems in some situations. This
     83 // is why "#define foo_52 foo" is needed for the default symbol versions.
     84 //
     85 // __has_attribute is supported before GCC 10 and it is supported in Clang 14
     86 // too (which doesn't support __symver__) so use it to detect if __symver__
     87 // is available. This should be far more reliable than looking at compiler
     88 // version macros as nowadays especially __GNUC__ is defined by many compilers.
     89 #	if lzma_has_attribute(__symver__)
     90 #		define LZMA_SYMVER_API(extnamever, type, intname) \
     91 			extern __attribute__((__symver__(extnamever))) \
     92 					LZMA_API(type) intname
     93 #	else
     94 #		define LZMA_SYMVER_API(extnamever, type, intname) \
     95 			__asm__(".symver " #intname "," extnamever); \
     96 			extern LZMA_API(type) intname
     97 #	endif
     98 #endif
     99 
    100 // MSVC has __forceinline which shouldn't be combined with the inline keyword
    101 // (results in a warning).
    102 //
    103 // GCC 3.1 added always_inline attribute so we don't need to check
    104 // for __GNUC__ version. Similarly, all relevant Clang versions
    105 // support it (at least Clang 3.0.0 does already).
    106 // Other compilers might support too which also support __has_attribute
    107 // (Solaris Studio) so do that check too.
    108 #if defined(_MSC_VER)
    109 #	define lzma_always_inline __forceinline
    110 #elif defined(__GNUC__) || defined(__clang__) || defined(__INTEL_COMPILER) \
    111 		|| lzma_has_attribute(__always_inline__)
    112 #	define lzma_always_inline inline __attribute__((__always_inline__))
    113 #else
    114 #	define lzma_always_inline inline
    115 #endif
    116 
    117 // These allow helping the compiler in some often-executed branches, whose
    118 // result is almost always the same.
    119 #ifdef __GNUC__
    120 #	define likely(expr) __builtin_expect(expr, true)
    121 #	define unlikely(expr) __builtin_expect(expr, false)
    122 #else
    123 #	define likely(expr) (expr)
    124 #	define unlikely(expr) (expr)
    125 #endif
    126 
    127 
    128 /// Size of temporary buffers needed in some filters
    129 #define LZMA_BUFFER_SIZE 4096
    130 
    131 
    132 /// Maximum number of worker threads within one multithreaded component.
    133 /// The limit exists solely to make it simpler to prevent integer overflows
    134 /// when allocating structures etc. This should be big enough for now...
    135 /// the code won't scale anywhere close to this number anyway.
    136 #define LZMA_THREADS_MAX 16384
    137 
    138 
    139 /// Starting value for memory usage estimates. Instead of calculating size
    140 /// of _every_ structure and taking into account malloc() overhead etc., we
    141 /// add a base size to all memory usage estimates. It's not very accurate
    142 /// but should be easily good enough.
    143 #define LZMA_MEMUSAGE_BASE (UINT64_C(1) << 15)
    144 
    145 /// Start of internal Filter ID space. These IDs must never be used
    146 /// in Streams.
    147 #define LZMA_FILTER_RESERVED_START (LZMA_VLI_C(1) << 62)
    148 
    149 
    150 /// Supported flags that can be passed to lzma_stream_decoder(),
    151 /// lzma_auto_decoder(), or lzma_stream_decoder_mt().
    152 #define LZMA_SUPPORTED_FLAGS \
    153 	( LZMA_TELL_NO_CHECK \
    154 	| LZMA_TELL_UNSUPPORTED_CHECK \
    155 	| LZMA_TELL_ANY_CHECK \
    156 	| LZMA_IGNORE_CHECK \
    157 	| LZMA_CONCATENATED \
    158 	| LZMA_FAIL_FAST )
    159 
    160 
    161 /// Largest valid lzma_action value as unsigned integer.
    162 #define LZMA_ACTION_MAX ((unsigned int)(LZMA_FULL_BARRIER))
    163 
    164 
    165 /// Special return value (lzma_ret) to indicate that a timeout was reached
    166 /// and lzma_code() must not return LZMA_BUF_ERROR. This is converted to
    167 /// LZMA_OK in lzma_code().
    168 #define LZMA_TIMED_OUT LZMA_RET_INTERNAL1
    169 
    170 /// Special return value (lzma_ret) for use in stream_decoder_mt.c to
    171 /// indicate Index was detected instead of a Block Header.
    172 #define LZMA_INDEX_DETECTED LZMA_RET_INTERNAL2
    173 
    174 
    175 typedef struct lzma_next_coder_s lzma_next_coder;
    176 
    177 typedef struct lzma_filter_info_s lzma_filter_info;
    178 
    179 
    180 /// Type of a function used to initialize a filter encoder or decoder
    181 typedef lzma_ret (*lzma_init_function)(
    182 		lzma_next_coder *next, const lzma_allocator *allocator,
    183 		const lzma_filter_info *filters);
    184 
    185 /// Type of a function to do some kind of coding work (filters, Stream,
    186 /// Block encoders/decoders etc.). Some special coders use don't use both
    187 /// input and output buffers, but for simplicity they still use this same
    188 /// function prototype.
    189 typedef lzma_ret (*lzma_code_function)(
    190 		void *coder, const lzma_allocator *allocator,
    191 		const uint8_t *restrict in, size_t *restrict in_pos,
    192 		size_t in_size, uint8_t *restrict out,
    193 		size_t *restrict out_pos, size_t out_size,
    194 		lzma_action action);
    195 
    196 /// Type of a function to free the memory allocated for the coder
    197 typedef void (*lzma_end_function)(
    198 		void *coder, const lzma_allocator *allocator);
    199 
    200 
    201 /// Raw coder validates and converts an array of lzma_filter structures to
    202 /// an array of lzma_filter_info structures. This array is used with
    203 /// lzma_next_filter_init to initialize the filter chain.
    204 struct lzma_filter_info_s {
    205 	/// Filter ID. This can be used to share the same initiazation
    206 	/// function *and* data structures with different Filter IDs
    207 	/// (LZMA_FILTER_LZMA1EXT does it), and also by the encoder
    208 	/// with lzma_filters_update() if filter chain is updated
    209 	/// in the middle of a raw stream or Block (LZMA_SYNC_FLUSH).
    210 	lzma_vli id;
    211 
    212 	/// Pointer to function used to initialize the filter.
    213 	/// This is NULL to indicate end of array.
    214 	lzma_init_function init;
    215 
    216 	/// Pointer to filter's options structure
    217 	void *options;
    218 };
    219 
    220 
    221 /// Hold data and function pointers of the next filter in the chain.
    222 struct lzma_next_coder_s {
    223 	/// Pointer to coder-specific data
    224 	void *coder;
    225 
    226 	/// Filter ID. This is LZMA_VLI_UNKNOWN when this structure doesn't
    227 	/// point to a filter coder.
    228 	lzma_vli id;
    229 
    230 	/// "Pointer" to init function. This is never called here.
    231 	/// We need only to detect if we are initializing a coder
    232 	/// that was allocated earlier. See lzma_next_coder_init and
    233 	/// lzma_next_strm_init macros in this file.
    234 	uintptr_t init;
    235 
    236 	/// Pointer to function to do the actual coding
    237 	lzma_code_function code;
    238 
    239 	/// Pointer to function to free lzma_next_coder.coder. This can
    240 	/// be NULL; in that case, lzma_free is called to free
    241 	/// lzma_next_coder.coder.
    242 	lzma_end_function end;
    243 
    244 	/// Pointer to a function to get progress information. If this is NULL,
    245 	/// lzma_stream.total_in and .total_out are used instead.
    246 	void (*get_progress)(void *coder,
    247 			uint64_t *progress_in, uint64_t *progress_out);
    248 
    249 	/// Pointer to function to return the type of the integrity check.
    250 	/// Most coders won't support this.
    251 	lzma_check (*get_check)(const void *coder);
    252 
    253 	/// Pointer to function to get and/or change the memory usage limit.
    254 	/// If new_memlimit == 0, the limit is not changed.
    255 	lzma_ret (*memconfig)(void *coder, uint64_t *memusage,
    256 			uint64_t *old_memlimit, uint64_t new_memlimit);
    257 
    258 	/// Update the filter-specific options or the whole filter chain
    259 	/// in the encoder.
    260 	lzma_ret (*update)(void *coder, const lzma_allocator *allocator,
    261 			const lzma_filter *filters,
    262 			const lzma_filter *reversed_filters);
    263 
    264 	/// Set how many bytes of output this coder may produce at maximum.
    265 	/// On success LZMA_OK must be returned.
    266 	/// If the filter chain as a whole cannot support this feature,
    267 	/// this must return LZMA_OPTIONS_ERROR.
    268 	/// If no input has been given to the coder and the requested limit
    269 	/// is too small, this must return LZMA_BUF_ERROR. If input has been
    270 	/// seen, LZMA_OK is allowed too.
    271 	lzma_ret (*set_out_limit)(void *coder, uint64_t *uncomp_size,
    272 			uint64_t out_limit);
    273 };
    274 
    275 
    276 /// Macro to initialize lzma_next_coder structure
    277 #define LZMA_NEXT_CODER_INIT \
    278 	(lzma_next_coder){ \
    279 		.coder = NULL, \
    280 		.init = (uintptr_t)(NULL), \
    281 		.id = LZMA_VLI_UNKNOWN, \
    282 		.code = NULL, \
    283 		.end = NULL, \
    284 		.get_progress = NULL, \
    285 		.get_check = NULL, \
    286 		.memconfig = NULL, \
    287 		.update = NULL, \
    288 		.set_out_limit = NULL, \
    289 	}
    290 
    291 
    292 /// Internal data for lzma_strm_init, lzma_code, and lzma_end. A pointer to
    293 /// this is stored in lzma_stream.
    294 struct lzma_internal_s {
    295 	/// The actual coder that should do something useful
    296 	lzma_next_coder next;
    297 
    298 	/// Track the state of the coder. This is used to validate arguments
    299 	/// so that the actual coders can rely on e.g. that LZMA_SYNC_FLUSH
    300 	/// is used on every call to lzma_code until next.code has returned
    301 	/// LZMA_STREAM_END.
    302 	enum {
    303 		ISEQ_RUN,
    304 		ISEQ_SYNC_FLUSH,
    305 		ISEQ_FULL_FLUSH,
    306 		ISEQ_FINISH,
    307 		ISEQ_FULL_BARRIER,
    308 		ISEQ_END,
    309 		ISEQ_ERROR,
    310 	} sequence;
    311 
    312 	/// A copy of lzma_stream avail_in. This is used to verify that the
    313 	/// amount of input doesn't change once e.g. LZMA_FINISH has been
    314 	/// used.
    315 	size_t avail_in;
    316 
    317 	/// Indicates which lzma_action values are allowed by next.code.
    318 	bool supported_actions[LZMA_ACTION_MAX + 1];
    319 
    320 	/// If true, lzma_code will return LZMA_BUF_ERROR if no progress was
    321 	/// made (no input consumed and no output produced by next.code).
    322 	bool allow_buf_error;
    323 };
    324 
    325 
    326 /// Allocates memory
    327 lzma_attr_alloc_size(1)
    328 extern void *lzma_alloc(size_t size, const lzma_allocator *allocator);
    329 
    330 /// Allocates memory and zeroes it (like calloc()). This can be faster
    331 /// than lzma_alloc() + memzero() while being backward compatible with
    332 /// custom allocators.
    333 lzma_attr_alloc_size(1)
    334 extern void *lzma_alloc_zero(size_t size, const lzma_allocator *allocator);
    335 
    336 /// Frees memory
    337 extern void lzma_free(void *ptr, const lzma_allocator *allocator);
    338 
    339 
    340 /// Allocates strm->internal if it is NULL, and initializes *strm and
    341 /// strm->internal. This function is only called via lzma_next_strm_init macro.
    342 extern lzma_ret lzma_strm_init(lzma_stream *strm);
    343 
    344 /// Initializes the next filter in the chain, if any. This takes care of
    345 /// freeing the memory of previously initialized filter if it is different
    346 /// than the filter being initialized now. This way the actual filter
    347 /// initialization functions don't need to use lzma_next_coder_init macro.
    348 extern lzma_ret lzma_next_filter_init(lzma_next_coder *next,
    349 		const lzma_allocator *allocator,
    350 		const lzma_filter_info *filters);
    351 
    352 /// Update the next filter in the chain, if any. This checks that
    353 /// the application is not trying to change the Filter IDs.
    354 extern lzma_ret lzma_next_filter_update(
    355 		lzma_next_coder *next, const lzma_allocator *allocator,
    356 		const lzma_filter *reversed_filters);
    357 
    358 /// Frees the memory allocated for next->coder either using next->end or,
    359 /// if next->end is NULL, using lzma_free.
    360 extern void lzma_next_end(lzma_next_coder *next,
    361 		const lzma_allocator *allocator);
    362 
    363 
    364 /// Copy as much data as possible from in[] to out[] and update *in_pos
    365 /// and *out_pos accordingly. Returns the number of bytes copied.
    366 extern size_t lzma_bufcpy(const uint8_t *restrict in, size_t *restrict in_pos,
    367 		size_t in_size, uint8_t *restrict out,
    368 		size_t *restrict out_pos, size_t out_size);
    369 
    370 
    371 /// \brief      Return if expression doesn't evaluate to LZMA_OK
    372 ///
    373 /// There are several situations where we want to return immediately
    374 /// with the value of expr if it isn't LZMA_OK. This macro shortens
    375 /// the code a little.
    376 #define return_if_error(expr) \
    377 do { \
    378 	const lzma_ret ret_ = (expr); \
    379 	if (ret_ != LZMA_OK) \
    380 		return ret_; \
    381 } while (0)
    382 
    383 
    384 /// If next isn't already initialized, free the previous coder. Then mark
    385 /// that next is _possibly_ initialized for the coder using this macro.
    386 /// "Possibly" means that if e.g. allocation of next->coder fails, the
    387 /// structure isn't actually initialized for this coder, but leaving
    388 /// next->init to func is still OK.
    389 #define lzma_next_coder_init(func, next, allocator) \
    390 do { \
    391 	if ((uintptr_t)(func) != (next)->init) \
    392 		lzma_next_end(next, allocator); \
    393 	(next)->init = (uintptr_t)(func); \
    394 } while (0)
    395 
    396 
    397 /// Initializes lzma_strm and calls func() to initialize strm->internal->next.
    398 /// (The function being called will use lzma_next_coder_init()). If
    399 /// initialization fails, memory that wasn't freed by func() is freed
    400 /// along strm->internal.
    401 #define lzma_next_strm_init(func, strm, ...) \
    402 do { \
    403 	return_if_error(lzma_strm_init(strm)); \
    404 	const lzma_ret ret_ = func(&(strm)->internal->next, \
    405 			(strm)->allocator, __VA_ARGS__); \
    406 	if (ret_ != LZMA_OK) { \
    407 		lzma_end(strm); \
    408 		return ret_; \
    409 	} \
    410 } while (0)
    411 
    412 #endif
    413