The sixteenth batch
[git/gitster.git] / midx.c
blob3c5bc8217300ca23809503a96bc23da87b6c211a
1 #define DISABLE_SIGN_COMPARE_WARNINGS
3 #include "git-compat-util.h"
4 #include "config.h"
5 #include "dir.h"
6 #include "hex.h"
7 #include "packfile.h"
8 #include "hash-lookup.h"
9 #include "midx.h"
10 #include "progress.h"
11 #include "trace2.h"
12 #include "chunk-format.h"
13 #include "pack-bitmap.h"
14 #include "pack-revindex.h"
16 #define MIDX_PACK_ERROR ((void *)(intptr_t)-1)
18 int midx_checksum_valid(struct multi_pack_index *m);
19 void clear_midx_files_ext(const char *object_dir, const char *ext,
20 const char *keep_hash);
21 void clear_incremental_midx_files_ext(const char *object_dir, const char *ext,
22 char **keep_hashes,
23 uint32_t hashes_nr);
24 int cmp_idx_or_pack_name(const char *idx_or_pack_name,
25 const char *idx_name);
27 const unsigned char *get_midx_checksum(struct multi_pack_index *m)
29 return m->data + m->data_len - m->repo->hash_algo->rawsz;
32 void get_midx_filename(const struct git_hash_algo *hash_algo,
33 struct strbuf *out, const char *object_dir)
35 get_midx_filename_ext(hash_algo, out, object_dir, NULL, NULL);
38 void get_midx_filename_ext(const struct git_hash_algo *hash_algo,
39 struct strbuf *out, const char *object_dir,
40 const unsigned char *hash, const char *ext)
42 strbuf_addf(out, "%s/pack/multi-pack-index", object_dir);
43 if (ext)
44 strbuf_addf(out, "-%s.%s", hash_to_hex_algop(hash, hash_algo), ext);
47 static int midx_read_oid_fanout(const unsigned char *chunk_start,
48 size_t chunk_size, void *data)
50 int i;
51 struct multi_pack_index *m = data;
52 m->chunk_oid_fanout = (uint32_t *)chunk_start;
54 if (chunk_size != 4 * 256) {
55 error(_("multi-pack-index OID fanout is of the wrong size"));
56 return 1;
58 for (i = 0; i < 255; i++) {
59 uint32_t oid_fanout1 = ntohl(m->chunk_oid_fanout[i]);
60 uint32_t oid_fanout2 = ntohl(m->chunk_oid_fanout[i+1]);
62 if (oid_fanout1 > oid_fanout2) {
63 error(_("oid fanout out of order: fanout[%d] = %"PRIx32" > %"PRIx32" = fanout[%d]"),
64 i, oid_fanout1, oid_fanout2, i + 1);
65 return 1;
68 m->num_objects = ntohl(m->chunk_oid_fanout[255]);
69 return 0;
72 static int midx_read_oid_lookup(const unsigned char *chunk_start,
73 size_t chunk_size, void *data)
75 struct multi_pack_index *m = data;
76 m->chunk_oid_lookup = chunk_start;
78 if (chunk_size != st_mult(m->hash_len, m->num_objects)) {
79 error(_("multi-pack-index OID lookup chunk is the wrong size"));
80 return 1;
82 return 0;
85 static int midx_read_object_offsets(const unsigned char *chunk_start,
86 size_t chunk_size, void *data)
88 struct multi_pack_index *m = data;
89 m->chunk_object_offsets = chunk_start;
91 if (chunk_size != st_mult(m->num_objects, MIDX_CHUNK_OFFSET_WIDTH)) {
92 error(_("multi-pack-index object offset chunk is the wrong size"));
93 return 1;
95 return 0;
98 static struct multi_pack_index *load_multi_pack_index_one(struct repository *r,
99 const char *object_dir,
100 const char *midx_name,
101 int local)
103 struct multi_pack_index *m = NULL;
104 int fd;
105 struct stat st;
106 size_t midx_size;
107 void *midx_map = NULL;
108 uint32_t hash_version;
109 uint32_t i;
110 const char *cur_pack_name;
111 struct chunkfile *cf = NULL;
113 fd = git_open(midx_name);
115 if (fd < 0)
116 goto cleanup_fail;
117 if (fstat(fd, &st)) {
118 error_errno(_("failed to read %s"), midx_name);
119 goto cleanup_fail;
122 midx_size = xsize_t(st.st_size);
124 if (midx_size < (MIDX_HEADER_SIZE + r->hash_algo->rawsz)) {
125 error(_("multi-pack-index file %s is too small"), midx_name);
126 goto cleanup_fail;
129 midx_map = xmmap(NULL, midx_size, PROT_READ, MAP_PRIVATE, fd, 0);
130 close(fd);
132 FLEX_ALLOC_STR(m, object_dir, object_dir);
133 m->data = midx_map;
134 m->data_len = midx_size;
135 m->local = local;
136 m->repo = r;
138 m->signature = get_be32(m->data);
139 if (m->signature != MIDX_SIGNATURE)
140 die(_("multi-pack-index signature 0x%08x does not match signature 0x%08x"),
141 m->signature, MIDX_SIGNATURE);
143 m->version = m->data[MIDX_BYTE_FILE_VERSION];
144 if (m->version != MIDX_VERSION)
145 die(_("multi-pack-index version %d not recognized"),
146 m->version);
148 hash_version = m->data[MIDX_BYTE_HASH_VERSION];
149 if (hash_version != oid_version(r->hash_algo)) {
150 error(_("multi-pack-index hash version %u does not match version %u"),
151 hash_version, oid_version(r->hash_algo));
152 goto cleanup_fail;
154 m->hash_len = r->hash_algo->rawsz;
156 m->num_chunks = m->data[MIDX_BYTE_NUM_CHUNKS];
158 m->num_packs = get_be32(m->data + MIDX_BYTE_NUM_PACKS);
160 m->preferred_pack_idx = -1;
162 cf = init_chunkfile(NULL);
164 if (read_table_of_contents(cf, m->data, midx_size,
165 MIDX_HEADER_SIZE, m->num_chunks,
166 MIDX_CHUNK_ALIGNMENT))
167 goto cleanup_fail;
169 if (pair_chunk(cf, MIDX_CHUNKID_PACKNAMES, &m->chunk_pack_names, &m->chunk_pack_names_len))
170 die(_("multi-pack-index required pack-name chunk missing or corrupted"));
171 if (read_chunk(cf, MIDX_CHUNKID_OIDFANOUT, midx_read_oid_fanout, m))
172 die(_("multi-pack-index required OID fanout chunk missing or corrupted"));
173 if (read_chunk(cf, MIDX_CHUNKID_OIDLOOKUP, midx_read_oid_lookup, m))
174 die(_("multi-pack-index required OID lookup chunk missing or corrupted"));
175 if (read_chunk(cf, MIDX_CHUNKID_OBJECTOFFSETS, midx_read_object_offsets, m))
176 die(_("multi-pack-index required object offsets chunk missing or corrupted"));
178 pair_chunk(cf, MIDX_CHUNKID_LARGEOFFSETS, &m->chunk_large_offsets,
179 &m->chunk_large_offsets_len);
180 if (git_env_bool("GIT_TEST_MIDX_READ_BTMP", 1))
181 pair_chunk(cf, MIDX_CHUNKID_BITMAPPEDPACKS,
182 (const unsigned char **)&m->chunk_bitmapped_packs,
183 &m->chunk_bitmapped_packs_len);
185 if (git_env_bool("GIT_TEST_MIDX_READ_RIDX", 1))
186 pair_chunk(cf, MIDX_CHUNKID_REVINDEX, &m->chunk_revindex,
187 &m->chunk_revindex_len);
189 CALLOC_ARRAY(m->pack_names, m->num_packs);
190 CALLOC_ARRAY(m->packs, m->num_packs);
192 cur_pack_name = (const char *)m->chunk_pack_names;
193 for (i = 0; i < m->num_packs; i++) {
194 const char *end;
195 size_t avail = m->chunk_pack_names_len -
196 (cur_pack_name - (const char *)m->chunk_pack_names);
198 m->pack_names[i] = cur_pack_name;
200 end = memchr(cur_pack_name, '\0', avail);
201 if (!end)
202 die(_("multi-pack-index pack-name chunk is too short"));
203 cur_pack_name = end + 1;
205 if (i && strcmp(m->pack_names[i], m->pack_names[i - 1]) <= 0)
206 die(_("multi-pack-index pack names out of order: '%s' before '%s'"),
207 m->pack_names[i - 1],
208 m->pack_names[i]);
211 trace2_data_intmax("midx", r, "load/num_packs", m->num_packs);
212 trace2_data_intmax("midx", r, "load/num_objects", m->num_objects);
214 free_chunkfile(cf);
215 return m;
217 cleanup_fail:
218 free(m);
219 free_chunkfile(cf);
220 if (midx_map)
221 munmap(midx_map, midx_size);
222 if (0 <= fd)
223 close(fd);
224 return NULL;
227 void get_midx_chain_dirname(struct strbuf *buf, const char *object_dir)
229 strbuf_addf(buf, "%s/pack/multi-pack-index.d", object_dir);
232 void get_midx_chain_filename(struct strbuf *buf, const char *object_dir)
234 get_midx_chain_dirname(buf, object_dir);
235 strbuf_addstr(buf, "/multi-pack-index-chain");
238 void get_split_midx_filename_ext(const struct git_hash_algo *hash_algo,
239 struct strbuf *buf, const char *object_dir,
240 const unsigned char *hash, const char *ext)
242 get_midx_chain_dirname(buf, object_dir);
243 strbuf_addf(buf, "/multi-pack-index-%s.%s",
244 hash_to_hex_algop(hash, hash_algo), ext);
247 static int open_multi_pack_index_chain(const struct git_hash_algo *hash_algo,
248 const char *chain_file, int *fd,
249 struct stat *st)
251 *fd = git_open(chain_file);
252 if (*fd < 0)
253 return 0;
254 if (fstat(*fd, st)) {
255 close(*fd);
256 return 0;
258 if (st->st_size < hash_algo->hexsz) {
259 close(*fd);
260 if (!st->st_size) {
261 /* treat empty files the same as missing */
262 errno = ENOENT;
263 } else {
264 warning(_("multi-pack-index chain file too small"));
265 errno = EINVAL;
267 return 0;
269 return 1;
272 static int add_midx_to_chain(struct multi_pack_index *midx,
273 struct multi_pack_index *midx_chain)
275 if (midx_chain) {
276 if (unsigned_add_overflows(midx_chain->num_packs,
277 midx_chain->num_packs_in_base)) {
278 warning(_("pack count in base MIDX too high: %"PRIuMAX),
279 (uintmax_t)midx_chain->num_packs_in_base);
280 return 0;
282 if (unsigned_add_overflows(midx_chain->num_objects,
283 midx_chain->num_objects_in_base)) {
284 warning(_("object count in base MIDX too high: %"PRIuMAX),
285 (uintmax_t)midx_chain->num_objects_in_base);
286 return 0;
288 midx->num_packs_in_base = midx_chain->num_packs +
289 midx_chain->num_packs_in_base;
290 midx->num_objects_in_base = midx_chain->num_objects +
291 midx_chain->num_objects_in_base;
294 midx->base_midx = midx_chain;
295 midx->has_chain = 1;
297 return 1;
300 static struct multi_pack_index *load_midx_chain_fd_st(struct repository *r,
301 const char *object_dir,
302 int local,
303 int fd, struct stat *st,
304 int *incomplete_chain)
306 struct multi_pack_index *midx_chain = NULL;
307 struct strbuf buf = STRBUF_INIT;
308 int valid = 1;
309 uint32_t i, count;
310 FILE *fp = xfdopen(fd, "r");
312 count = st->st_size / (r->hash_algo->hexsz + 1);
314 for (i = 0; i < count; i++) {
315 struct multi_pack_index *m;
316 struct object_id layer;
318 if (strbuf_getline_lf(&buf, fp) == EOF)
319 break;
321 if (get_oid_hex_algop(buf.buf, &layer, r->hash_algo)) {
322 warning(_("invalid multi-pack-index chain: line '%s' "
323 "not a hash"),
324 buf.buf);
325 valid = 0;
326 break;
329 valid = 0;
331 strbuf_reset(&buf);
332 get_split_midx_filename_ext(r->hash_algo, &buf, object_dir,
333 layer.hash, MIDX_EXT_MIDX);
334 m = load_multi_pack_index_one(r, object_dir, buf.buf, local);
336 if (m) {
337 if (add_midx_to_chain(m, midx_chain)) {
338 midx_chain = m;
339 valid = 1;
340 } else {
341 close_midx(m);
344 if (!valid) {
345 warning(_("unable to find all multi-pack index files"));
346 break;
350 fclose(fp);
351 strbuf_release(&buf);
353 *incomplete_chain = !valid;
354 return midx_chain;
357 static struct multi_pack_index *load_multi_pack_index_chain(struct repository *r,
358 const char *object_dir,
359 int local)
361 struct strbuf chain_file = STRBUF_INIT;
362 struct stat st;
363 int fd;
364 struct multi_pack_index *m = NULL;
366 get_midx_chain_filename(&chain_file, object_dir);
367 if (open_multi_pack_index_chain(r->hash_algo, chain_file.buf, &fd, &st)) {
368 int incomplete;
369 /* ownership of fd is taken over by load function */
370 m = load_midx_chain_fd_st(r, object_dir, local, fd, &st,
371 &incomplete);
374 strbuf_release(&chain_file);
375 return m;
378 struct multi_pack_index *load_multi_pack_index(struct repository *r,
379 const char *object_dir,
380 int local)
382 struct strbuf midx_name = STRBUF_INIT;
383 struct multi_pack_index *m;
385 get_midx_filename(r->hash_algo, &midx_name, object_dir);
387 m = load_multi_pack_index_one(r, object_dir,
388 midx_name.buf, local);
389 if (!m)
390 m = load_multi_pack_index_chain(r, object_dir, local);
392 strbuf_release(&midx_name);
394 return m;
397 void close_midx(struct multi_pack_index *m)
399 uint32_t i;
401 if (!m)
402 return;
404 close_midx(m->next);
405 close_midx(m->base_midx);
407 munmap((unsigned char *)m->data, m->data_len);
409 for (i = 0; i < m->num_packs; i++) {
410 if (m->packs[i] && m->packs[i] != MIDX_PACK_ERROR)
411 m->packs[i]->multi_pack_index = 0;
413 FREE_AND_NULL(m->packs);
414 FREE_AND_NULL(m->pack_names);
415 free(m);
418 static uint32_t midx_for_object(struct multi_pack_index **_m, uint32_t pos)
420 struct multi_pack_index *m = *_m;
421 while (m && pos < m->num_objects_in_base)
422 m = m->base_midx;
424 if (!m)
425 BUG("NULL multi-pack-index for object position: %"PRIu32, pos);
427 if (pos >= m->num_objects + m->num_objects_in_base)
428 die(_("invalid MIDX object position, MIDX is likely corrupt"));
430 *_m = m;
432 return pos - m->num_objects_in_base;
435 static uint32_t midx_for_pack(struct multi_pack_index **_m,
436 uint32_t pack_int_id)
438 struct multi_pack_index *m = *_m;
439 while (m && pack_int_id < m->num_packs_in_base)
440 m = m->base_midx;
442 if (!m)
443 BUG("NULL multi-pack-index for pack ID: %"PRIu32, pack_int_id);
445 if (pack_int_id >= m->num_packs + m->num_packs_in_base)
446 die(_("bad pack-int-id: %u (%u total packs)"),
447 pack_int_id, m->num_packs + m->num_packs_in_base);
449 *_m = m;
451 return pack_int_id - m->num_packs_in_base;
454 int prepare_midx_pack(struct repository *r, struct multi_pack_index *m,
455 uint32_t pack_int_id)
457 struct strbuf pack_name = STRBUF_INIT;
458 struct strbuf key = STRBUF_INIT;
459 struct packed_git *p;
461 pack_int_id = midx_for_pack(&m, pack_int_id);
463 if (m->packs[pack_int_id] == MIDX_PACK_ERROR)
464 return 1;
465 if (m->packs[pack_int_id])
466 return 0;
468 strbuf_addf(&pack_name, "%s/pack/%s", m->object_dir,
469 m->pack_names[pack_int_id]);
471 /* pack_map holds the ".pack" name, but we have the .idx */
472 strbuf_addbuf(&key, &pack_name);
473 strbuf_strip_suffix(&key, ".idx");
474 strbuf_addstr(&key, ".pack");
475 p = hashmap_get_entry_from_hash(&r->objects->pack_map,
476 strhash(key.buf), key.buf,
477 struct packed_git, packmap_ent);
478 if (!p) {
479 p = add_packed_git(r, pack_name.buf, pack_name.len, m->local);
480 if (p) {
481 install_packed_git(r, p);
482 list_add_tail(&p->mru, &r->objects->packed_git_mru);
486 strbuf_release(&pack_name);
487 strbuf_release(&key);
489 if (!p) {
490 m->packs[pack_int_id] = MIDX_PACK_ERROR;
491 return 1;
494 p->multi_pack_index = 1;
495 m->packs[pack_int_id] = p;
497 return 0;
500 struct packed_git *nth_midxed_pack(struct multi_pack_index *m,
501 uint32_t pack_int_id)
503 uint32_t local_pack_int_id = midx_for_pack(&m, pack_int_id);
504 if (m->packs[local_pack_int_id] == MIDX_PACK_ERROR)
505 return NULL;
506 return m->packs[local_pack_int_id];
509 #define MIDX_CHUNK_BITMAPPED_PACKS_WIDTH (2 * sizeof(uint32_t))
511 int nth_bitmapped_pack(struct repository *r, struct multi_pack_index *m,
512 struct bitmapped_pack *bp, uint32_t pack_int_id)
514 uint32_t local_pack_int_id = midx_for_pack(&m, pack_int_id);
516 if (!m->chunk_bitmapped_packs)
517 return error(_("MIDX does not contain the BTMP chunk"));
519 if (prepare_midx_pack(r, m, pack_int_id))
520 return error(_("could not load bitmapped pack %"PRIu32), pack_int_id);
522 bp->p = m->packs[local_pack_int_id];
523 bp->bitmap_pos = get_be32((char *)m->chunk_bitmapped_packs +
524 MIDX_CHUNK_BITMAPPED_PACKS_WIDTH * local_pack_int_id);
525 bp->bitmap_nr = get_be32((char *)m->chunk_bitmapped_packs +
526 MIDX_CHUNK_BITMAPPED_PACKS_WIDTH * local_pack_int_id +
527 sizeof(uint32_t));
528 bp->pack_int_id = pack_int_id;
529 bp->from_midx = m;
531 return 0;
534 int bsearch_one_midx(const struct object_id *oid, struct multi_pack_index *m,
535 uint32_t *result)
537 int ret = bsearch_hash(oid->hash, m->chunk_oid_fanout,
538 m->chunk_oid_lookup, m->repo->hash_algo->rawsz,
539 result);
540 if (result)
541 *result += m->num_objects_in_base;
542 return ret;
545 int bsearch_midx(const struct object_id *oid, struct multi_pack_index *m,
546 uint32_t *result)
548 for (; m; m = m->base_midx)
549 if (bsearch_one_midx(oid, m, result))
550 return 1;
551 return 0;
554 int midx_has_oid(struct multi_pack_index *m, const struct object_id *oid)
556 return bsearch_midx(oid, m, NULL);
559 struct object_id *nth_midxed_object_oid(struct object_id *oid,
560 struct multi_pack_index *m,
561 uint32_t n)
563 if (n >= m->num_objects + m->num_objects_in_base)
564 return NULL;
566 n = midx_for_object(&m, n);
568 oidread(oid, m->chunk_oid_lookup + st_mult(m->hash_len, n),
569 m->repo->hash_algo);
570 return oid;
573 off_t nth_midxed_offset(struct multi_pack_index *m, uint32_t pos)
575 const unsigned char *offset_data;
576 uint32_t offset32;
578 pos = midx_for_object(&m, pos);
580 offset_data = m->chunk_object_offsets + (off_t)pos * MIDX_CHUNK_OFFSET_WIDTH;
581 offset32 = get_be32(offset_data + sizeof(uint32_t));
583 if (m->chunk_large_offsets && offset32 & MIDX_LARGE_OFFSET_NEEDED) {
584 if (sizeof(off_t) < sizeof(uint64_t))
585 die(_("multi-pack-index stores a 64-bit offset, but off_t is too small"));
587 offset32 ^= MIDX_LARGE_OFFSET_NEEDED;
588 if (offset32 >= m->chunk_large_offsets_len / sizeof(uint64_t))
589 die(_("multi-pack-index large offset out of bounds"));
590 return get_be64(m->chunk_large_offsets + sizeof(uint64_t) * offset32);
593 return offset32;
596 uint32_t nth_midxed_pack_int_id(struct multi_pack_index *m, uint32_t pos)
598 pos = midx_for_object(&m, pos);
600 return m->num_packs_in_base + get_be32(m->chunk_object_offsets +
601 (off_t)pos * MIDX_CHUNK_OFFSET_WIDTH);
604 int fill_midx_entry(struct repository *r,
605 const struct object_id *oid,
606 struct pack_entry *e,
607 struct multi_pack_index *m)
609 uint32_t pos;
610 uint32_t pack_int_id;
611 struct packed_git *p;
613 if (!bsearch_midx(oid, m, &pos))
614 return 0;
616 midx_for_object(&m, pos);
617 pack_int_id = nth_midxed_pack_int_id(m, pos);
619 if (prepare_midx_pack(r, m, pack_int_id))
620 return 0;
621 p = m->packs[pack_int_id - m->num_packs_in_base];
624 * We are about to tell the caller where they can locate the
625 * requested object. We better make sure the packfile is
626 * still here and can be accessed before supplying that
627 * answer, as it may have been deleted since the MIDX was
628 * loaded!
630 if (!is_pack_valid(p))
631 return 0;
633 if (oidset_size(&p->bad_objects) &&
634 oidset_contains(&p->bad_objects, oid))
635 return 0;
637 e->offset = nth_midxed_offset(m, pos);
638 e->p = p;
640 return 1;
643 /* Match "foo.idx" against either "foo.pack" _or_ "foo.idx". */
644 int cmp_idx_or_pack_name(const char *idx_or_pack_name,
645 const char *idx_name)
647 /* Skip past any initial matching prefix. */
648 while (*idx_name && *idx_name == *idx_or_pack_name) {
649 idx_name++;
650 idx_or_pack_name++;
654 * If we didn't match completely, we may have matched "pack-1234." and
655 * be left with "idx" and "pack" respectively, which is also OK. We do
656 * not have to check for "idx" and "idx", because that would have been
657 * a complete match (and in that case these strcmps will be false, but
658 * we'll correctly return 0 from the final strcmp() below.
660 * Technically this matches "fooidx" and "foopack", but we'd never have
661 * such names in the first place.
663 if (!strcmp(idx_name, "idx") && !strcmp(idx_or_pack_name, "pack"))
664 return 0;
667 * This not only checks for a complete match, but also orders based on
668 * the first non-identical character, which means our ordering will
669 * match a raw strcmp(). That makes it OK to use this to binary search
670 * a naively-sorted list.
672 return strcmp(idx_or_pack_name, idx_name);
675 static int midx_contains_pack_1(struct multi_pack_index *m,
676 const char *idx_or_pack_name)
678 uint32_t first = 0, last = m->num_packs;
680 while (first < last) {
681 uint32_t mid = first + (last - first) / 2;
682 const char *current;
683 int cmp;
685 current = m->pack_names[mid];
686 cmp = cmp_idx_or_pack_name(idx_or_pack_name, current);
687 if (!cmp)
688 return 1;
689 if (cmp > 0) {
690 first = mid + 1;
691 continue;
693 last = mid;
696 return 0;
699 int midx_contains_pack(struct multi_pack_index *m, const char *idx_or_pack_name)
701 for (; m; m = m->base_midx)
702 if (midx_contains_pack_1(m, idx_or_pack_name))
703 return 1;
704 return 0;
707 int midx_preferred_pack(struct multi_pack_index *m, uint32_t *pack_int_id)
709 if (m->preferred_pack_idx == -1) {
710 uint32_t midx_pos;
711 if (load_midx_revindex(m) < 0) {
712 m->preferred_pack_idx = -2;
713 return -1;
716 midx_pos = pack_pos_to_midx(m, m->num_objects_in_base);
718 m->preferred_pack_idx = nth_midxed_pack_int_id(m, midx_pos);
720 } else if (m->preferred_pack_idx == -2)
721 return -1; /* no revindex */
723 *pack_int_id = m->preferred_pack_idx;
724 return 0;
727 int prepare_multi_pack_index_one(struct repository *r, const char *object_dir, int local)
729 struct multi_pack_index *m;
730 struct multi_pack_index *m_search;
732 prepare_repo_settings(r);
733 if (!r->settings.core_multi_pack_index)
734 return 0;
736 for (m_search = r->objects->multi_pack_index; m_search; m_search = m_search->next)
737 if (!strcmp(object_dir, m_search->object_dir))
738 return 1;
740 m = load_multi_pack_index(r, object_dir, local);
742 if (m) {
743 struct multi_pack_index *mp = r->objects->multi_pack_index;
744 if (mp) {
745 m->next = mp->next;
746 mp->next = m;
747 } else
748 r->objects->multi_pack_index = m;
749 return 1;
752 return 0;
755 int midx_checksum_valid(struct multi_pack_index *m)
757 return hashfile_checksum_valid(m->repo->hash_algo,
758 m->data, m->data_len);
761 struct clear_midx_data {
762 char **keep;
763 uint32_t keep_nr;
764 const char *ext;
767 static void clear_midx_file_ext(const char *full_path, size_t full_path_len UNUSED,
768 const char *file_name, void *_data)
770 struct clear_midx_data *data = _data;
771 uint32_t i;
773 if (!(starts_with(file_name, "multi-pack-index-") &&
774 ends_with(file_name, data->ext)))
775 return;
776 for (i = 0; i < data->keep_nr; i++) {
777 if (!strcmp(data->keep[i], file_name))
778 return;
780 if (unlink(full_path))
781 die_errno(_("failed to remove %s"), full_path);
784 void clear_midx_files_ext(const char *object_dir, const char *ext,
785 const char *keep_hash)
787 struct clear_midx_data data;
788 memset(&data, 0, sizeof(struct clear_midx_data));
790 if (keep_hash) {
791 ALLOC_ARRAY(data.keep, 1);
793 data.keep[0] = xstrfmt("multi-pack-index-%s.%s", keep_hash, ext);
794 data.keep_nr = 1;
796 data.ext = ext;
798 for_each_file_in_pack_dir(object_dir,
799 clear_midx_file_ext,
800 &data);
802 if (keep_hash)
803 free(data.keep[0]);
804 free(data.keep);
807 void clear_incremental_midx_files_ext(const char *object_dir, const char *ext,
808 char **keep_hashes,
809 uint32_t hashes_nr)
811 struct clear_midx_data data;
812 uint32_t i;
814 memset(&data, 0, sizeof(struct clear_midx_data));
816 ALLOC_ARRAY(data.keep, hashes_nr);
817 for (i = 0; i < hashes_nr; i++)
818 data.keep[i] = xstrfmt("multi-pack-index-%s.%s", keep_hashes[i],
819 ext);
820 data.keep_nr = hashes_nr;
821 data.ext = ext;
823 for_each_file_in_pack_subdir(object_dir, "multi-pack-index.d",
824 clear_midx_file_ext, &data);
826 for (i = 0; i < hashes_nr; i++)
827 free(data.keep[i]);
828 free(data.keep);
831 void clear_midx_file(struct repository *r)
833 struct strbuf midx = STRBUF_INIT;
835 get_midx_filename(r->hash_algo, &midx, r->objects->sources->path);
837 if (r->objects && r->objects->multi_pack_index) {
838 close_midx(r->objects->multi_pack_index);
839 r->objects->multi_pack_index = NULL;
842 if (remove_path(midx.buf))
843 die(_("failed to clear multi-pack-index at %s"), midx.buf);
845 clear_midx_files_ext(r->objects->sources->path, MIDX_EXT_BITMAP, NULL);
846 clear_midx_files_ext(r->objects->sources->path, MIDX_EXT_REV, NULL);
848 strbuf_release(&midx);
851 static int verify_midx_error;
853 __attribute__((format (printf, 1, 2)))
854 static void midx_report(const char *fmt, ...)
856 va_list ap;
857 verify_midx_error = 1;
858 va_start(ap, fmt);
859 vfprintf(stderr, fmt, ap);
860 fprintf(stderr, "\n");
861 va_end(ap);
864 struct pair_pos_vs_id
866 uint32_t pos;
867 uint32_t pack_int_id;
870 static int compare_pair_pos_vs_id(const void *_a, const void *_b)
872 struct pair_pos_vs_id *a = (struct pair_pos_vs_id *)_a;
873 struct pair_pos_vs_id *b = (struct pair_pos_vs_id *)_b;
875 return b->pack_int_id - a->pack_int_id;
879 * Limit calls to display_progress() for performance reasons.
880 * The interval here was arbitrarily chosen.
882 #define SPARSE_PROGRESS_INTERVAL (1 << 12)
883 #define midx_display_sparse_progress(progress, n) \
884 do { \
885 uint64_t _n = (n); \
886 if ((_n & (SPARSE_PROGRESS_INTERVAL - 1)) == 0) \
887 display_progress(progress, _n); \
888 } while (0)
890 int verify_midx_file(struct repository *r, const char *object_dir, unsigned flags)
892 struct pair_pos_vs_id *pairs = NULL;
893 uint32_t i;
894 struct progress *progress = NULL;
895 struct multi_pack_index *m = load_multi_pack_index(r, object_dir, 1);
896 struct multi_pack_index *curr;
897 verify_midx_error = 0;
899 if (!m) {
900 int result = 0;
901 struct stat sb;
902 struct strbuf filename = STRBUF_INIT;
904 get_midx_filename(r->hash_algo, &filename, object_dir);
906 if (!stat(filename.buf, &sb)) {
907 error(_("multi-pack-index file exists, but failed to parse"));
908 result = 1;
910 strbuf_release(&filename);
911 return result;
914 if (!midx_checksum_valid(m))
915 midx_report(_("incorrect checksum"));
917 if (flags & MIDX_PROGRESS)
918 progress = start_delayed_progress(r,
919 _("Looking for referenced packfiles"),
920 m->num_packs + m->num_packs_in_base);
921 for (i = 0; i < m->num_packs + m->num_packs_in_base; i++) {
922 if (prepare_midx_pack(r, m, i))
923 midx_report("failed to load pack in position %d", i);
925 display_progress(progress, i + 1);
927 stop_progress(&progress);
929 if (m->num_objects == 0) {
930 midx_report(_("the midx contains no oid"));
932 * Remaining tests assume that we have objects, so we can
933 * return here.
935 goto cleanup;
938 if (flags & MIDX_PROGRESS)
939 progress = start_sparse_progress(r,
940 _("Verifying OID order in multi-pack-index"),
941 m->num_objects - 1);
943 for (curr = m; curr; curr = curr->base_midx) {
944 for (i = 0; i < m->num_objects - 1; i++) {
945 struct object_id oid1, oid2;
947 nth_midxed_object_oid(&oid1, m, m->num_objects_in_base + i);
948 nth_midxed_object_oid(&oid2, m, m->num_objects_in_base + i + 1);
950 if (oidcmp(&oid1, &oid2) >= 0)
951 midx_report(_("oid lookup out of order: oid[%d] = %s >= %s = oid[%d]"),
952 i, oid_to_hex(&oid1), oid_to_hex(&oid2), i + 1);
954 midx_display_sparse_progress(progress, i + 1);
957 stop_progress(&progress);
960 * Create an array mapping each object to its packfile id. Sort it
961 * to group the objects by packfile. Use this permutation to visit
962 * each of the objects and only require 1 packfile to be open at a
963 * time.
965 ALLOC_ARRAY(pairs, m->num_objects + m->num_objects_in_base);
966 for (i = 0; i < m->num_objects + m->num_objects_in_base; i++) {
967 pairs[i].pos = i;
968 pairs[i].pack_int_id = nth_midxed_pack_int_id(m, i);
971 if (flags & MIDX_PROGRESS)
972 progress = start_sparse_progress(r,
973 _("Sorting objects by packfile"),
974 m->num_objects);
975 display_progress(progress, 0); /* TODO: Measure QSORT() progress */
976 QSORT(pairs, m->num_objects, compare_pair_pos_vs_id);
977 stop_progress(&progress);
979 if (flags & MIDX_PROGRESS)
980 progress = start_sparse_progress(r,
981 _("Verifying object offsets"),
982 m->num_objects);
983 for (i = 0; i < m->num_objects + m->num_objects_in_base; i++) {
984 struct object_id oid;
985 struct pack_entry e;
986 off_t m_offset, p_offset;
988 if (i > 0 && pairs[i-1].pack_int_id != pairs[i].pack_int_id &&
989 nth_midxed_pack(m, pairs[i-1].pack_int_id)) {
990 uint32_t pack_int_id = pairs[i-1].pack_int_id;
991 struct packed_git *p = nth_midxed_pack(m, pack_int_id);
993 close_pack_fd(p);
994 close_pack_index(p);
997 nth_midxed_object_oid(&oid, m, pairs[i].pos);
999 if (!fill_midx_entry(r, &oid, &e, m)) {
1000 midx_report(_("failed to load pack entry for oid[%d] = %s"),
1001 pairs[i].pos, oid_to_hex(&oid));
1002 continue;
1005 if (open_pack_index(e.p)) {
1006 midx_report(_("failed to load pack-index for packfile %s"),
1007 e.p->pack_name);
1008 break;
1011 m_offset = e.offset;
1012 p_offset = find_pack_entry_one(&oid, e.p);
1014 if (m_offset != p_offset)
1015 midx_report(_("incorrect object offset for oid[%d] = %s: %"PRIx64" != %"PRIx64),
1016 pairs[i].pos, oid_to_hex(&oid), m_offset, p_offset);
1018 midx_display_sparse_progress(progress, i + 1);
1020 stop_progress(&progress);
1022 cleanup:
1023 free(pairs);
1024 close_midx(m);
1026 return verify_midx_error;