Loading...
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 | /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _BCACHEFS_EXTENTS_FORMAT_H #define _BCACHEFS_EXTENTS_FORMAT_H /* * In extent bkeys, the value is a list of pointers (bch_extent_ptr), optionally * preceded by checksum/compression information (bch_extent_crc32 or * bch_extent_crc64). * * One major determining factor in the format of extents is how we handle and * represent extents that have been partially overwritten and thus trimmed: * * If an extent is not checksummed or compressed, when the extent is trimmed we * don't have to remember the extent we originally allocated and wrote: we can * merely adjust ptr->offset to point to the start of the data that is currently * live. The size field in struct bkey records the current (live) size of the * extent, and is also used to mean "size of region on disk that we point to" in * this case. * * Thus an extent that is not checksummed or compressed will consist only of a * list of bch_extent_ptrs, with none of the fields in * bch_extent_crc32/bch_extent_crc64. * * When an extent is checksummed or compressed, it's not possible to read only * the data that is currently live: we have to read the entire extent that was * originally written, and then return only the part of the extent that is * currently live. * * Thus, in addition to the current size of the extent in struct bkey, we need * to store the size of the originally allocated space - this is the * compressed_size and uncompressed_size fields in bch_extent_crc32/64. Also, * when the extent is trimmed, instead of modifying the offset field of the * pointer, we keep a second smaller offset field - "offset into the original * extent of the currently live region". * * The other major determining factor is replication and data migration: * * Each pointer may have its own bch_extent_crc32/64. When doing a replicated * write, we will initially write all the replicas in the same format, with the * same checksum type and compression format - however, when copygc runs later (or * tiering/cache promotion, anything that moves data), it is not in general * going to rewrite all the pointers at once - one of the replicas may be in a * bucket on one device that has very little fragmentation while another lives * in a bucket that has become heavily fragmented, and thus is being rewritten * sooner than the rest. * * Thus it will only move a subset of the pointers (or in the case of * tiering/cache promotion perhaps add a single pointer without dropping any * current pointers), and if the extent has been partially overwritten it must * write only the currently live portion (or copygc would not be able to reduce * fragmentation!) - which necessitates a different bch_extent_crc format for * the new pointer. * * But in the interests of space efficiency, we don't want to store one * bch_extent_crc for each pointer if we don't have to. * * Thus, a bch_extent consists of bch_extent_crc32s, bch_extent_crc64s, and * bch_extent_ptrs appended arbitrarily one after the other. We determine the * type of a given entry with a scheme similar to utf8 (except we're encoding a * type, not a size), encoding the type in the position of the first set bit: * * bch_extent_crc32 - 0b1 * bch_extent_ptr - 0b10 * bch_extent_crc64 - 0b100 * * We do it this way because bch_extent_crc32 is _very_ constrained on bits (and * bch_extent_crc64 is the least constrained). * * Then, each bch_extent_crc32/64 applies to the pointers that follow after it, * until the next bch_extent_crc32/64. * * If there are no bch_extent_crcs preceding a bch_extent_ptr, then that pointer * is neither checksummed nor compressed. */ #define BCH_EXTENT_ENTRY_TYPES() \ x(ptr, 0) \ x(crc32, 1) \ x(crc64, 2) \ x(crc128, 3) \ x(stripe_ptr, 4) \ x(rebalance, 5) #define BCH_EXTENT_ENTRY_MAX 6 enum bch_extent_entry_type { #define x(f, n) BCH_EXTENT_ENTRY_##f = n, BCH_EXTENT_ENTRY_TYPES() #undef x }; /* Compressed/uncompressed size are stored biased by 1: */ struct bch_extent_crc32 { #if defined(__LITTLE_ENDIAN_BITFIELD) __u32 type:2, _compressed_size:7, _uncompressed_size:7, offset:7, _unused:1, csum_type:4, compression_type:4; __u32 csum; #elif defined (__BIG_ENDIAN_BITFIELD) __u32 csum; __u32 compression_type:4, csum_type:4, _unused:1, offset:7, _uncompressed_size:7, _compressed_size:7, type:2; #endif } __packed __aligned(8); #define CRC32_SIZE_MAX (1U << 7) #define CRC32_NONCE_MAX 0 struct bch_extent_crc64 { #if defined(__LITTLE_ENDIAN_BITFIELD) __u64 type:3, _compressed_size:9, _uncompressed_size:9, offset:9, nonce:10, csum_type:4, compression_type:4, csum_hi:16; #elif defined (__BIG_ENDIAN_BITFIELD) __u64 csum_hi:16, compression_type:4, csum_type:4, nonce:10, offset:9, _uncompressed_size:9, _compressed_size:9, type:3; #endif __u64 csum_lo; } __packed __aligned(8); #define CRC64_SIZE_MAX (1U << 9) #define CRC64_NONCE_MAX ((1U << 10) - 1) struct bch_extent_crc128 { #if defined(__LITTLE_ENDIAN_BITFIELD) __u64 type:4, _compressed_size:13, _uncompressed_size:13, offset:13, nonce:13, csum_type:4, compression_type:4; #elif defined (__BIG_ENDIAN_BITFIELD) __u64 compression_type:4, csum_type:4, nonce:13, offset:13, _uncompressed_size:13, _compressed_size:13, type:4; #endif struct bch_csum csum; } __packed __aligned(8); #define CRC128_SIZE_MAX (1U << 13) #define CRC128_NONCE_MAX ((1U << 13) - 1) /* * @reservation - pointer hasn't been written to, just reserved */ struct bch_extent_ptr { #if defined(__LITTLE_ENDIAN_BITFIELD) __u64 type:1, cached:1, unused:1, unwritten:1, offset:44, /* 8 petabytes */ dev:8, gen:8; #elif defined (__BIG_ENDIAN_BITFIELD) __u64 gen:8, dev:8, offset:44, unwritten:1, unused:1, cached:1, type:1; #endif } __packed __aligned(8); struct bch_extent_stripe_ptr { #if defined(__LITTLE_ENDIAN_BITFIELD) __u64 type:5, block:8, redundancy:4, idx:47; #elif defined (__BIG_ENDIAN_BITFIELD) __u64 idx:47, redundancy:4, block:8, type:5; #endif }; struct bch_extent_rebalance { #if defined(__LITTLE_ENDIAN_BITFIELD) __u64 type:6, unused:34, compression:8, /* enum bch_compression_opt */ target:16; #elif defined (__BIG_ENDIAN_BITFIELD) __u64 target:16, compression:8, unused:34, type:6; #endif }; union bch_extent_entry { #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ || __BITS_PER_LONG == 64 unsigned long type; #elif __BITS_PER_LONG == 32 struct { unsigned long pad; unsigned long type; }; #else #error edit for your odd byteorder. #endif #define x(f, n) struct bch_extent_##f f; BCH_EXTENT_ENTRY_TYPES() #undef x }; struct bch_btree_ptr { struct bch_val v; __u64 _data[0]; struct bch_extent_ptr start[]; } __packed __aligned(8); struct bch_btree_ptr_v2 { struct bch_val v; __u64 mem_ptr; __le64 seq; __le16 sectors_written; __le16 flags; struct bpos min_key; __u64 _data[0]; struct bch_extent_ptr start[]; } __packed __aligned(8); LE16_BITMASK(BTREE_PTR_RANGE_UPDATED, struct bch_btree_ptr_v2, flags, 0, 1); struct bch_extent { struct bch_val v; __u64 _data[0]; union bch_extent_entry start[]; } __packed __aligned(8); /* Maximum size (in u64s) a single pointer could be: */ #define BKEY_EXTENT_PTR_U64s_MAX\ ((sizeof(struct bch_extent_crc128) + \ sizeof(struct bch_extent_ptr)) / sizeof(__u64)) /* Maximum possible size of an entire extent value: */ #define BKEY_EXTENT_VAL_U64s_MAX \ (1 + BKEY_EXTENT_PTR_U64s_MAX * (BCH_REPLICAS_MAX + 1)) /* * Maximum possible size of an entire extent, key + value: */ #define BKEY_EXTENT_U64s_MAX (BKEY_U64s + BKEY_EXTENT_VAL_U64s_MAX) /* Btree pointers don't carry around checksums: */ #define BKEY_BTREE_PTR_VAL_U64s_MAX \ ((sizeof(struct bch_btree_ptr_v2) + \ sizeof(struct bch_extent_ptr) * BCH_REPLICAS_MAX) / sizeof(__u64)) #define BKEY_BTREE_PTR_U64s_MAX \ (BKEY_U64s + BKEY_BTREE_PTR_VAL_U64s_MAX) struct bch_reservation { struct bch_val v; __le32 generation; __u8 nr_replicas; __u8 pad[3]; } __packed __aligned(8); struct bch_inline_data { struct bch_val v; u8 data[]; }; #endif /* _BCACHEFS_EXTENTS_FORMAT_H */ |