Line | Count | Source |
1 | | #ifndef PACK_OBJECTS_H |
2 | | #define PACK_OBJECTS_H |
3 | | |
4 | | #include "odb.h" |
5 | | #include "thread-utils.h" |
6 | | #include "pack.h" |
7 | | #include "packfile.h" |
8 | | |
9 | | struct repository; |
10 | | |
11 | | #define DEFAULT_DELTA_CACHE_SIZE (256 * 1024 * 1024) |
12 | 0 | #define DEFAULT_DELTA_BASE_CACHE_LIMIT (96 * 1024 * 1024) |
13 | | |
14 | | #define OE_DFS_STATE_BITS 2 |
15 | | #define OE_DEPTH_BITS 12 |
16 | | #define OE_IN_PACK_BITS 10 |
17 | | #define OE_Z_DELTA_BITS 20 |
18 | | /* |
19 | | * Note that oe_set_size() becomes expensive when the given size is |
20 | | * above this limit. Don't lower it too much. |
21 | | */ |
22 | | #define OE_SIZE_BITS 31 |
23 | | #define OE_DELTA_SIZE_BITS 23 |
24 | | |
25 | | /* |
26 | | * State flags for depth-first search used for analyzing delta cycles. |
27 | | * |
28 | | * The depth is measured in delta-links to the base (so if A is a delta |
29 | | * against B, then A has a depth of 1, and B a depth of 0). |
30 | | */ |
31 | | enum dfs_state { |
32 | | DFS_NONE = 0, |
33 | | DFS_ACTIVE, |
34 | | DFS_DONE, |
35 | | DFS_NUM_STATES |
36 | | }; |
37 | | |
38 | | /* |
39 | | * The size of struct nearly determines pack-objects's memory |
40 | | * consumption. This struct is packed tight for that reason. When you |
41 | | * add or reorder something in this struct, think a bit about this. |
42 | | * |
43 | | * basic object info |
44 | | * ----------------- |
45 | | * idx.oid is filled up before delta searching starts. idx.crc32 is |
46 | | * only valid after the object is written out and will be used for |
47 | | * generating the index. idx.offset will be both gradually set and |
48 | | * used in writing phase (base objects get offset first, then deltas |
49 | | * refer to them) |
50 | | * |
51 | | * "size" is the uncompressed object size. Compressed size of the raw |
52 | | * data for an object in a pack is not stored anywhere but is computed |
53 | | * and made available when reverse .idx is made. Note that when a |
54 | | * delta is reused, "size" is the uncompressed _delta_ size, not the |
55 | | * canonical one after the delta has been applied. |
56 | | * |
57 | | * "hash" contains a path name hash which is used for sorting the |
58 | | * delta list and also during delta searching. Once prepare_pack() |
59 | | * returns it's no longer needed. |
60 | | * |
61 | | * source pack info |
62 | | * ---------------- |
63 | | * The (in_pack, in_pack_offset) tuple contains the location of the |
64 | | * object in the source pack. in_pack_header_size allows quickly |
65 | | * skipping the header and going straight to the zlib stream. |
66 | | * |
67 | | * "type" and "in_pack_type" both describe object type. in_pack_type |
68 | | * may contain a delta type, while type is always the canonical type. |
69 | | * |
70 | | * deltas |
71 | | * ------ |
72 | | * Delta links (delta, delta_child and delta_sibling) are created to |
73 | | * reflect that delta graph from the source pack then updated or added |
74 | | * during delta searching phase when we find better deltas. |
75 | | * |
76 | | * delta_child and delta_sibling are last needed in |
77 | | * compute_write_order(). "delta" and "delta_size" must remain valid |
78 | | * at object writing phase in case the delta is not cached. |
79 | | * |
80 | | * If a delta is cached in memory and is compressed, delta_data points |
81 | | * to the data and z_delta_size contains the compressed size. If it's |
82 | | * uncompressed [1], z_delta_size must be zero. delta_size is always |
83 | | * the uncompressed size and must be valid even if the delta is not |
84 | | * cached. |
85 | | * |
86 | | * [1] during try_delta phase we don't bother with compressing because |
87 | | * the delta could be quickly replaced with a better one. |
88 | | */ |
89 | | struct object_entry { |
90 | | struct pack_idx_entry idx; |
91 | | void *delta_data; /* cached delta (uncompressed) */ |
92 | | off_t in_pack_offset; |
93 | | uint32_t hash; /* name hint hash */ |
94 | | unsigned size_:OE_SIZE_BITS; |
95 | | unsigned size_valid:1; |
96 | | uint32_t delta_idx; /* delta base object */ |
97 | | uint32_t delta_child_idx; /* deltified objects who bases me */ |
98 | | uint32_t delta_sibling_idx; /* other deltified objects who |
99 | | * uses the same base as me |
100 | | */ |
101 | | unsigned delta_size_:OE_DELTA_SIZE_BITS; /* delta data size (uncompressed) */ |
102 | | unsigned delta_size_valid:1; |
103 | | unsigned char in_pack_header_size; |
104 | | unsigned in_pack_idx:OE_IN_PACK_BITS; /* already in pack */ |
105 | | unsigned z_delta_size:OE_Z_DELTA_BITS; |
106 | | unsigned type_valid:1; |
107 | | unsigned no_try_delta:1; |
108 | | unsigned type_:TYPE_BITS; |
109 | | unsigned in_pack_type:TYPE_BITS; /* could be delta */ |
110 | | |
111 | | unsigned preferred_base:1; /* |
112 | | * we do not pack this, but is available |
113 | | * to be used as the base object to delta |
114 | | * objects against. |
115 | | */ |
116 | | unsigned tagged:1; /* near the very tip of refs */ |
117 | | unsigned filled:1; /* assigned write-order */ |
118 | | unsigned dfs_state:OE_DFS_STATE_BITS; |
119 | | unsigned depth:OE_DEPTH_BITS; |
120 | | unsigned ext_base:1; /* delta_idx points outside packlist */ |
121 | | }; |
122 | | |
123 | | /** |
124 | | * A packing region is a section of the packing_data.objects array |
125 | | * as given by a starting index and a number of elements. |
126 | | */ |
127 | | struct packing_region { |
128 | | size_t start; |
129 | | size_t nr; |
130 | | }; |
131 | | |
132 | | struct packing_data { |
133 | | struct repository *repo; |
134 | | struct object_entry *objects; |
135 | | uint32_t nr_objects, nr_alloc; |
136 | | |
137 | | struct packing_region *regions; |
138 | | size_t nr_regions, nr_regions_alloc; |
139 | | |
140 | | int32_t *index; |
141 | | uint32_t index_size; |
142 | | |
143 | | unsigned int *in_pack_pos; |
144 | | unsigned long *delta_size; |
145 | | |
146 | | /* |
147 | | * Only one of these can be non-NULL and they have different |
148 | | * sizes. if in_pack_by_idx is allocated, oe_in_pack() returns |
149 | | * the pack of an object using in_pack_idx field. If not, |
150 | | * in_pack[] array is used the same way as in_pack_pos[] |
151 | | */ |
152 | | struct packed_git **in_pack_by_idx; |
153 | | struct packed_git **in_pack; |
154 | | |
155 | | /* |
156 | | * During packing with multiple threads, protect the in-core |
157 | | * object database from concurrent accesses. |
158 | | */ |
159 | | pthread_mutex_t odb_lock; |
160 | | |
161 | | /* |
162 | | * This list contains entries for bases which we know the other side |
163 | | * has (e.g., via reachability bitmaps), but which aren't in our |
164 | | * "objects" list. |
165 | | */ |
166 | | struct object_entry *ext_bases; |
167 | | uint32_t nr_ext, alloc_ext; |
168 | | |
169 | | uintmax_t oe_size_limit; |
170 | | uintmax_t oe_delta_size_limit; |
171 | | |
172 | | /* delta islands */ |
173 | | unsigned int *tree_depth; |
174 | | unsigned char *layer; |
175 | | |
176 | | /* |
177 | | * Used when writing cruft packs. |
178 | | * |
179 | | * Object mtimes are stored in pack order when writing, but |
180 | | * written out in lexicographic (index) order. |
181 | | */ |
182 | | uint32_t *cruft_mtime; |
183 | | }; |
184 | | |
185 | | void prepare_packing_data(struct repository *r, struct packing_data *pdata); |
186 | | void clear_packing_data(struct packing_data *pdata); |
187 | | |
188 | | /* Protect access to object database */ |
189 | | static inline void packing_data_lock(struct packing_data *pdata) |
190 | 0 | { |
191 | 0 | pthread_mutex_lock(&pdata->odb_lock); |
192 | 0 | } Unexecuted instantiation: pack-write.c:packing_data_lock Unexecuted instantiation: repo-settings.c:packing_data_lock Unexecuted instantiation: midx.c:packing_data_lock |
193 | | static inline void packing_data_unlock(struct packing_data *pdata) |
194 | 0 | { |
195 | 0 | pthread_mutex_unlock(&pdata->odb_lock); |
196 | 0 | } Unexecuted instantiation: pack-write.c:packing_data_unlock Unexecuted instantiation: repo-settings.c:packing_data_unlock Unexecuted instantiation: midx.c:packing_data_unlock |
197 | | |
198 | | struct object_entry *packlist_alloc(struct packing_data *pdata, |
199 | | const struct object_id *oid); |
200 | | |
201 | | struct object_entry *packlist_find(struct packing_data *pdata, |
202 | | const struct object_id *oid); |
203 | | |
204 | | static inline uint32_t pack_name_hash(const char *name) |
205 | 0 | { |
206 | 0 | uint32_t c, hash = 0; |
207 | 0 |
|
208 | 0 | if (!name) |
209 | 0 | return 0; |
210 | 0 |
|
211 | 0 | /* |
212 | 0 | * This effectively just creates a sortable number from the |
213 | 0 | * last sixteen non-whitespace characters. Last characters |
214 | 0 | * count "most", so things that end in ".c" sort together. |
215 | 0 | */ |
216 | 0 | while ((c = *name++) != 0) { |
217 | 0 | if (isspace(c)) |
218 | 0 | continue; |
219 | 0 | hash = (hash >> 2) + (c << 24); |
220 | 0 | } |
221 | 0 | return hash; |
222 | 0 | } Unexecuted instantiation: pack-write.c:pack_name_hash Unexecuted instantiation: repo-settings.c:pack_name_hash Unexecuted instantiation: midx.c:pack_name_hash |
223 | | |
224 | | static inline uint32_t pack_name_hash_v2(const unsigned char *name) |
225 | 0 | { |
226 | 0 | uint32_t hash = 0, base = 0, c; |
227 | 0 |
|
228 | 0 | if (!name) |
229 | 0 | return 0; |
230 | 0 |
|
231 | 0 | while ((c = *name++)) { |
232 | 0 | if (isspace(c)) |
233 | 0 | continue; |
234 | 0 | if (c == '/') { |
235 | 0 | base = (base >> 6) ^ hash; |
236 | 0 | hash = 0; |
237 | 0 | } else { |
238 | 0 | /* |
239 | 0 | * 'c' is only a single byte. Reverse it and move |
240 | 0 | * it to the top of the hash, moving the rest to |
241 | 0 | * less-significant bits. |
242 | 0 | */ |
243 | 0 | c = (c & 0xF0) >> 4 | (c & 0x0F) << 4; |
244 | 0 | c = (c & 0xCC) >> 2 | (c & 0x33) << 2; |
245 | 0 | c = (c & 0xAA) >> 1 | (c & 0x55) << 1; |
246 | 0 | hash = (hash >> 2) + (c << 24); |
247 | 0 | } |
248 | 0 | } |
249 | 0 | return (base >> 6) ^ hash; |
250 | 0 | } Unexecuted instantiation: pack-write.c:pack_name_hash_v2 Unexecuted instantiation: repo-settings.c:pack_name_hash_v2 Unexecuted instantiation: midx.c:pack_name_hash_v2 |
251 | | |
252 | | static inline enum object_type oe_type(const struct object_entry *e) |
253 | 0 | { |
254 | 0 | return e->type_valid ? e->type_ : OBJ_BAD; |
255 | 0 | } Unexecuted instantiation: pack-write.c:oe_type Unexecuted instantiation: repo-settings.c:oe_type Unexecuted instantiation: midx.c:oe_type |
256 | | |
257 | | static inline void oe_set_type(struct object_entry *e, |
258 | | enum object_type type) |
259 | 0 | { |
260 | 0 | if (type >= OBJ_ANY) |
261 | 0 | BUG("OBJ_ANY cannot be set in pack-objects code"); |
262 | 0 |
|
263 | 0 | e->type_valid = type >= OBJ_NONE; |
264 | 0 | e->type_ = (unsigned)type; |
265 | 0 | } Unexecuted instantiation: pack-write.c:oe_set_type Unexecuted instantiation: repo-settings.c:oe_set_type Unexecuted instantiation: midx.c:oe_set_type |
266 | | |
267 | | static inline unsigned int oe_in_pack_pos(const struct packing_data *pack, |
268 | | const struct object_entry *e) |
269 | 0 | { |
270 | 0 | return pack->in_pack_pos[e - pack->objects]; |
271 | 0 | } Unexecuted instantiation: pack-write.c:oe_in_pack_pos Unexecuted instantiation: repo-settings.c:oe_in_pack_pos Unexecuted instantiation: midx.c:oe_in_pack_pos |
272 | | |
273 | | static inline void oe_set_in_pack_pos(const struct packing_data *pack, |
274 | | const struct object_entry *e, |
275 | | unsigned int pos) |
276 | 0 | { |
277 | 0 | pack->in_pack_pos[e - pack->objects] = pos; |
278 | 0 | } Unexecuted instantiation: pack-write.c:oe_set_in_pack_pos Unexecuted instantiation: repo-settings.c:oe_set_in_pack_pos Unexecuted instantiation: midx.c:oe_set_in_pack_pos |
279 | | |
280 | | static inline struct packed_git *oe_in_pack(const struct packing_data *pack, |
281 | | const struct object_entry *e) |
282 | 0 | { |
283 | 0 | if (pack->in_pack_by_idx) |
284 | 0 | return pack->in_pack_by_idx[e->in_pack_idx]; |
285 | 0 | else |
286 | 0 | return pack->in_pack[e - pack->objects]; |
287 | 0 | } Unexecuted instantiation: pack-write.c:oe_in_pack Unexecuted instantiation: repo-settings.c:oe_in_pack Unexecuted instantiation: midx.c:oe_in_pack |
288 | | |
289 | | void oe_map_new_pack(struct packing_data *pack); |
290 | | |
291 | | static inline void oe_set_in_pack(struct packing_data *pack, |
292 | | struct object_entry *e, |
293 | | struct packed_git *p) |
294 | 0 | { |
295 | 0 | if (pack->in_pack_by_idx) { |
296 | 0 | if (p->index) { |
297 | 0 | e->in_pack_idx = p->index; |
298 | 0 | return; |
299 | 0 | } |
300 | 0 | /* |
301 | 0 | * We're accessing packs by index, but this pack doesn't have |
302 | 0 | * an index (e.g., because it was added since we created the |
303 | 0 | * in_pack_by_idx array). Bail to oe_map_new_pack(), which |
304 | 0 | * will convert us to using the full in_pack array, and then |
305 | 0 | * fall through to our in_pack handling. |
306 | 0 | */ |
307 | 0 | oe_map_new_pack(pack); |
308 | 0 | } |
309 | 0 | pack->in_pack[e - pack->objects] = p; |
310 | 0 | } Unexecuted instantiation: pack-write.c:oe_set_in_pack Unexecuted instantiation: repo-settings.c:oe_set_in_pack Unexecuted instantiation: midx.c:oe_set_in_pack |
311 | | |
312 | | void oe_set_delta_ext(struct packing_data *pack, |
313 | | struct object_entry *e, |
314 | | const struct object_id *oid); |
315 | | |
316 | | static inline unsigned int oe_tree_depth(struct packing_data *pack, |
317 | | struct object_entry *e) |
318 | 0 | { |
319 | 0 | if (!pack->tree_depth) |
320 | 0 | return 0; |
321 | 0 | return pack->tree_depth[e - pack->objects]; |
322 | 0 | } Unexecuted instantiation: pack-write.c:oe_tree_depth Unexecuted instantiation: repo-settings.c:oe_tree_depth Unexecuted instantiation: midx.c:oe_tree_depth |
323 | | |
324 | | static inline void oe_set_layer(struct packing_data *pack, |
325 | | struct object_entry *e, |
326 | | unsigned char layer) |
327 | 0 | { |
328 | 0 | if (!pack->layer) |
329 | 0 | CALLOC_ARRAY(pack->layer, pack->nr_alloc); |
330 | 0 | pack->layer[e - pack->objects] = layer; |
331 | 0 | } Unexecuted instantiation: pack-write.c:oe_set_layer Unexecuted instantiation: repo-settings.c:oe_set_layer Unexecuted instantiation: midx.c:oe_set_layer |
332 | | |
333 | | static inline uint32_t oe_cruft_mtime(struct packing_data *pack, |
334 | | struct object_entry *e) |
335 | 0 | { |
336 | 0 | if (!pack->cruft_mtime) |
337 | 0 | return 0; |
338 | 0 | return pack->cruft_mtime[e - pack->objects]; |
339 | 0 | } Unexecuted instantiation: pack-write.c:oe_cruft_mtime Unexecuted instantiation: repo-settings.c:oe_cruft_mtime Unexecuted instantiation: midx.c:oe_cruft_mtime |
340 | | |
341 | | static inline void oe_set_cruft_mtime(struct packing_data *pack, |
342 | | struct object_entry *e, |
343 | | uint32_t mtime) |
344 | 0 | { |
345 | 0 | if (!pack->cruft_mtime) |
346 | 0 | CALLOC_ARRAY(pack->cruft_mtime, pack->nr_alloc); |
347 | 0 | pack->cruft_mtime[e - pack->objects] = mtime; |
348 | 0 | } Unexecuted instantiation: pack-write.c:oe_set_cruft_mtime Unexecuted instantiation: repo-settings.c:oe_set_cruft_mtime Unexecuted instantiation: midx.c:oe_set_cruft_mtime |
349 | | |
350 | | #endif |