Loading...
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 | /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * decompress_common.h - Code shared by the XPRESS and LZX decompressors * * Copyright (C) 2015 Eric Biggers */ #ifndef _LINUX_NTFS3_LIB_DECOMPRESS_COMMON_H #define _LINUX_NTFS3_LIB_DECOMPRESS_COMMON_H #include <linux/string.h> #include <linux/compiler.h> #include <linux/types.h> #include <linux/slab.h> #include <asm/unaligned.h> /* "Force inline" macro (not required, but helpful for performance) */ #define forceinline __always_inline /* Enable whole-word match copying on selected architectures */ #if defined(__i386__) || defined(__x86_64__) || defined(__ARM_FEATURE_UNALIGNED) # define FAST_UNALIGNED_ACCESS #endif /* Size of a machine word */ #define WORDBYTES (sizeof(size_t)) static forceinline void copy_unaligned_word(const void *src, void *dst) { put_unaligned(get_unaligned((const size_t *)src), (size_t *)dst); } /* Generate a "word" with platform-dependent size whose bytes all contain the * value 'b'. */ static forceinline size_t repeat_byte(u8 b) { size_t v; v = b; v |= v << 8; v |= v << 16; v |= v << ((WORDBYTES == 8) ? 32 : 0); return v; } /* Structure that encapsulates a block of in-memory data being interpreted as a * stream of bits, optionally with interwoven literal bytes. Bits are assumed * to be stored in little endian 16-bit coding units, with the bits ordered high * to low. */ struct input_bitstream { /* Bits that have been read from the input buffer. The bits are * left-justified; the next bit is always bit 31. */ u32 bitbuf; /* Number of bits currently held in @bitbuf. */ u32 bitsleft; /* Pointer to the next byte to be retrieved from the input buffer. */ const u8 *next; /* Pointer to just past the end of the input buffer. */ const u8 *end; }; /* Initialize a bitstream to read from the specified input buffer. */ static forceinline void init_input_bitstream(struct input_bitstream *is, const void *buffer, u32 size) { is->bitbuf = 0; is->bitsleft = 0; is->next = buffer; is->end = is->next + size; } /* Ensure the bit buffer variable for the bitstream contains at least @num_bits * bits. Following this, bitstream_peek_bits() and/or bitstream_remove_bits() * may be called on the bitstream to peek or remove up to @num_bits bits. Note * that @num_bits must be <= 16. */ static forceinline void bitstream_ensure_bits(struct input_bitstream *is, u32 num_bits) { if (is->bitsleft < num_bits) { if (is->end - is->next >= 2) { is->bitbuf |= (u32)get_unaligned_le16(is->next) << (16 - is->bitsleft); is->next += 2; } is->bitsleft += 16; } } /* Return the next @num_bits bits from the bitstream, without removing them. * There must be at least @num_bits remaining in the buffer variable, from a * previous call to bitstream_ensure_bits(). */ static forceinline u32 bitstream_peek_bits(const struct input_bitstream *is, const u32 num_bits) { return (is->bitbuf >> 1) >> (sizeof(is->bitbuf) * 8 - num_bits - 1); } /* Remove @num_bits from the bitstream. There must be at least @num_bits * remaining in the buffer variable, from a previous call to * bitstream_ensure_bits(). */ static forceinline void bitstream_remove_bits(struct input_bitstream *is, u32 num_bits) { is->bitbuf <<= num_bits; is->bitsleft -= num_bits; } /* Remove and return @num_bits bits from the bitstream. There must be at least * @num_bits remaining in the buffer variable, from a previous call to * bitstream_ensure_bits(). */ static forceinline u32 bitstream_pop_bits(struct input_bitstream *is, u32 num_bits) { u32 bits = bitstream_peek_bits(is, num_bits); bitstream_remove_bits(is, num_bits); return bits; } /* Read and return the next @num_bits bits from the bitstream. */ static forceinline u32 bitstream_read_bits(struct input_bitstream *is, u32 num_bits) { bitstream_ensure_bits(is, num_bits); return bitstream_pop_bits(is, num_bits); } /* Read and return the next literal byte embedded in the bitstream. */ static forceinline u8 bitstream_read_byte(struct input_bitstream *is) { if (unlikely(is->end == is->next)) return 0; return *is->next++; } /* Read and return the next 16-bit integer embedded in the bitstream. */ static forceinline u16 bitstream_read_u16(struct input_bitstream *is) { u16 v; if (unlikely(is->end - is->next < 2)) return 0; v = get_unaligned_le16(is->next); is->next += 2; return v; } /* Read and return the next 32-bit integer embedded in the bitstream. */ static forceinline u32 bitstream_read_u32(struct input_bitstream *is) { u32 v; if (unlikely(is->end - is->next < 4)) return 0; v = get_unaligned_le32(is->next); is->next += 4; return v; } /* Read into @dst_buffer an array of literal bytes embedded in the bitstream. * Return either a pointer to the byte past the last written, or NULL if the * read overflows the input buffer. */ static forceinline void *bitstream_read_bytes(struct input_bitstream *is, void *dst_buffer, size_t count) { if ((size_t)(is->end - is->next) < count) return NULL; memcpy(dst_buffer, is->next, count); is->next += count; return (u8 *)dst_buffer + count; } /* Align the input bitstream on a coding-unit boundary. */ static forceinline void bitstream_align(struct input_bitstream *is) { is->bitsleft = 0; is->bitbuf = 0; } extern int make_huffman_decode_table(u16 decode_table[], const u32 num_syms, const u32 num_bits, const u8 lens[], const u32 max_codeword_len, u16 working_space[]); /* Reads and returns the next Huffman-encoded symbol from a bitstream. If the * input data is exhausted, the Huffman symbol is decoded as if the missing bits * are all zeroes. */ static forceinline u32 read_huffsym(struct input_bitstream *istream, const u16 decode_table[], u32 table_bits, u32 max_codeword_len) { u32 entry; u32 key_bits; bitstream_ensure_bits(istream, max_codeword_len); /* Index the decode table by the next table_bits bits of the input. */ key_bits = bitstream_peek_bits(istream, table_bits); entry = decode_table[key_bits]; if (entry < 0xC000) { /* Fast case: The decode table directly provided the * symbol and codeword length. The low 11 bits are the * symbol, and the high 5 bits are the codeword length. */ bitstream_remove_bits(istream, entry >> 11); return entry & 0x7FF; } /* Slow case: The codeword for the symbol is longer than * table_bits, so the symbol does not have an entry * directly in the first (1 << table_bits) entries of the * decode table. Traverse the appropriate binary tree * bit-by-bit to decode the symbol. */ bitstream_remove_bits(istream, table_bits); do { key_bits = (entry & 0x3FFF) + bitstream_pop_bits(istream, 1); } while ((entry = decode_table[key_bits]) >= 0xC000); return entry; } /* * Copy an LZ77 match at (dst - offset) to dst. * * The length and offset must be already validated --- that is, (dst - offset) * can't underrun the output buffer, and (dst + length) can't overrun the output * buffer. Also, the length cannot be 0. * * @bufend points to the byte past the end of the output buffer. This function * won't write any data beyond this position. * * Returns dst + length. */ static forceinline u8 *lz_copy(u8 *dst, u32 length, u32 offset, const u8 *bufend, u32 min_length) { const u8 *src = dst - offset; /* * Try to copy one machine word at a time. On i386 and x86_64 this is * faster than copying one byte at a time, unless the data is * near-random and all the matches have very short lengths. Note that * since this requires unaligned memory accesses, it won't necessarily * be faster on every architecture. * * Also note that we might copy more than the length of the match. For * example, if a word is 8 bytes and the match is of length 5, then * we'll simply copy 8 bytes. This is okay as long as we don't write * beyond the end of the output buffer, hence the check for (bufend - * end >= WORDBYTES - 1). */ #ifdef FAST_UNALIGNED_ACCESS u8 * const end = dst + length; if (bufend - end >= (ptrdiff_t)(WORDBYTES - 1)) { if (offset >= WORDBYTES) { /* The source and destination words don't overlap. */ /* To improve branch prediction, one iteration of this * loop is unrolled. Most matches are short and will * fail the first check. But if that check passes, then * it becomes increasing likely that the match is long * and we'll need to continue copying. */ copy_unaligned_word(src, dst); src += WORDBYTES; dst += WORDBYTES; if (dst < end) { do { copy_unaligned_word(src, dst); src += WORDBYTES; dst += WORDBYTES; } while (dst < end); } return end; } else if (offset == 1) { /* Offset 1 matches are equivalent to run-length * encoding of the previous byte. This case is common * if the data contains many repeated bytes. */ size_t v = repeat_byte(*(dst - 1)); do { put_unaligned(v, (size_t *)dst); src += WORDBYTES; dst += WORDBYTES; } while (dst < end); return end; } /* * We don't bother with special cases for other 'offset < * WORDBYTES', which are usually rarer than 'offset == 1'. Extra * checks will just slow things down. Actually, it's possible * to handle all the 'offset < WORDBYTES' cases using the same * code, but it still becomes more complicated doesn't seem any * faster overall; it definitely slows down the more common * 'offset == 1' case. */ } #endif /* FAST_UNALIGNED_ACCESS */ /* Fall back to a bytewise copy. */ if (min_length >= 2) { *dst++ = *src++; length--; } if (min_length >= 3) { *dst++ = *src++; length--; } do { *dst++ = *src++; } while (--length); return dst; } #endif /* _LINUX_NTFS3_LIB_DECOMPRESS_COMMON_H */ |