The sixteenth batch
[git/gitster.git] / path-walk.c
blob2d4ddbadd50f78e958f1a79a78fae475c5ff1296
1 /*
2 * path-walk.c: implementation for path-based walks of the object graph.
3 */
4 #include "git-compat-util.h"
5 #include "path-walk.h"
6 #include "blob.h"
7 #include "commit.h"
8 #include "dir.h"
9 #include "hashmap.h"
10 #include "hex.h"
11 #include "list-objects.h"
12 #include "object.h"
13 #include "oid-array.h"
14 #include "prio-queue.h"
15 #include "repository.h"
16 #include "revision.h"
17 #include "string-list.h"
18 #include "strmap.h"
19 #include "tag.h"
20 #include "trace2.h"
21 #include "tree.h"
22 #include "tree-walk.h"
24 static const char *root_path = "";
26 struct type_and_oid_list {
27 enum object_type type;
28 struct oid_array oids;
29 int maybe_interesting;
32 #define TYPE_AND_OID_LIST_INIT { \
33 .type = OBJ_NONE, \
34 .oids = OID_ARRAY_INIT \
37 struct path_walk_context {
38 /**
39 * Repeats of data in 'struct path_walk_info' for
40 * access with fewer characters.
42 struct repository *repo;
43 struct rev_info *revs;
44 struct path_walk_info *info;
46 /**
47 * Map a path to a 'struct type_and_oid_list'
48 * containing the objects discovered at that
49 * path.
51 struct strmap paths_to_lists;
53 /**
54 * Store the current list of paths in a priority queue,
55 * using object type as a sorting mechanism, mostly to
56 * make sure blobs are popped off the stack first. No
57 * other sort is made, so within each object type it acts
58 * like a stack and performs a DFS within the trees.
60 * Use path_stack_pushed to indicate whether a path
61 * was previously added to path_stack.
63 struct prio_queue path_stack;
64 struct strset path_stack_pushed;
67 static int compare_by_type(const void *one, const void *two, void *cb_data)
69 struct type_and_oid_list *list1, *list2;
70 const char *str1 = one;
71 const char *str2 = two;
72 struct path_walk_context *ctx = cb_data;
74 list1 = strmap_get(&ctx->paths_to_lists, str1);
75 list2 = strmap_get(&ctx->paths_to_lists, str2);
78 * If object types are equal, then use path comparison.
80 if (!list1 || !list2 || list1->type == list2->type)
81 return strcmp(str1, str2);
83 /* Prefer tags to be popped off first. */
84 if (list1->type == OBJ_TAG)
85 return -1;
86 if (list2->type == OBJ_TAG)
87 return 1;
89 /* Prefer blobs to be popped off second. */
90 if (list1->type == OBJ_BLOB)
91 return -1;
92 if (list2->type == OBJ_BLOB)
93 return 1;
95 return 0;
98 static void push_to_stack(struct path_walk_context *ctx,
99 const char *path)
101 if (strset_contains(&ctx->path_stack_pushed, path))
102 return;
104 strset_add(&ctx->path_stack_pushed, path);
105 prio_queue_put(&ctx->path_stack, xstrdup(path));
108 static int add_tree_entries(struct path_walk_context *ctx,
109 const char *base_path,
110 struct object_id *oid)
112 struct tree_desc desc;
113 struct name_entry entry;
114 struct strbuf path = STRBUF_INIT;
115 size_t base_len;
116 struct tree *tree = lookup_tree(ctx->repo, oid);
118 if (!tree) {
119 error(_("failed to walk children of tree %s: not found"),
120 oid_to_hex(oid));
121 return -1;
122 } else if (parse_tree_gently(tree, 1)) {
123 error("bad tree object %s", oid_to_hex(oid));
124 return -1;
127 strbuf_addstr(&path, base_path);
128 base_len = path.len;
130 init_tree_desc(&desc, &tree->object.oid, tree->buffer, tree->size);
131 while (tree_entry(&desc, &entry)) {
132 struct type_and_oid_list *list;
133 struct object *o;
134 /* Not actually true, but we will ignore submodules later. */
135 enum object_type type = S_ISDIR(entry.mode) ? OBJ_TREE : OBJ_BLOB;
137 /* Skip submodules. */
138 if (S_ISGITLINK(entry.mode))
139 continue;
141 /* If the caller doesn't want blobs, then don't bother. */
142 if (!ctx->info->blobs && type == OBJ_BLOB)
143 continue;
145 if (type == OBJ_TREE) {
146 struct tree *child = lookup_tree(ctx->repo, &entry.oid);
147 o = child ? &child->object : NULL;
148 } else if (type == OBJ_BLOB) {
149 struct blob *child = lookup_blob(ctx->repo, &entry.oid);
150 o = child ? &child->object : NULL;
151 } else {
152 BUG("invalid type for tree entry: %d", type);
155 if (!o) {
156 error(_("failed to find object %s"),
157 oid_to_hex(&o->oid));
158 return -1;
161 /* Skip this object if already seen. */
162 if (o->flags & SEEN)
163 continue;
164 o->flags |= SEEN;
166 strbuf_setlen(&path, base_len);
167 strbuf_add(&path, entry.path, entry.pathlen);
170 * Trees will end with "/" for concatenation and distinction
171 * from blobs at the same path.
173 if (type == OBJ_TREE)
174 strbuf_addch(&path, '/');
176 if (ctx->info->pl) {
177 int dtype;
178 enum pattern_match_result match;
179 match = path_matches_pattern_list(path.buf, path.len,
180 path.buf + base_len, &dtype,
181 ctx->info->pl,
182 ctx->repo->index);
184 if (ctx->info->pl->use_cone_patterns &&
185 match == NOT_MATCHED)
186 continue;
187 else if (!ctx->info->pl->use_cone_patterns &&
188 type == OBJ_BLOB &&
189 match != MATCHED)
190 continue;
193 if (!(list = strmap_get(&ctx->paths_to_lists, path.buf))) {
194 CALLOC_ARRAY(list, 1);
195 list->type = type;
196 strmap_put(&ctx->paths_to_lists, path.buf, list);
198 push_to_stack(ctx, path.buf);
200 if (!(o->flags & UNINTERESTING))
201 list->maybe_interesting = 1;
203 oid_array_append(&list->oids, &entry.oid);
206 free_tree_buffer(tree);
207 strbuf_release(&path);
208 return 0;
212 * For each path in paths_to_explore, walk the trees another level
213 * and add any found blobs to the batch (but only if they exist and
214 * haven't been added yet).
216 static int walk_path(struct path_walk_context *ctx,
217 const char *path)
219 struct type_and_oid_list *list;
220 int ret = 0;
222 list = strmap_get(&ctx->paths_to_lists, path);
224 if (!list)
225 BUG("provided path '%s' that had no associated list", path);
227 if (!list->oids.nr)
228 return 0;
230 if (ctx->info->prune_all_uninteresting) {
232 * This is true if all objects were UNINTERESTING
233 * when added to the list.
235 if (!list->maybe_interesting)
236 return 0;
239 * But it's still possible that the objects were set
240 * as UNINTERESTING after being added. Do a quick check.
242 list->maybe_interesting = 0;
243 for (size_t i = 0;
244 !list->maybe_interesting && i < list->oids.nr;
245 i++) {
246 if (list->type == OBJ_TREE) {
247 struct tree *t = lookup_tree(ctx->repo,
248 &list->oids.oid[i]);
249 if (t && !(t->object.flags & UNINTERESTING))
250 list->maybe_interesting = 1;
251 } else if (list->type == OBJ_BLOB) {
252 struct blob *b = lookup_blob(ctx->repo,
253 &list->oids.oid[i]);
254 if (b && !(b->object.flags & UNINTERESTING))
255 list->maybe_interesting = 1;
256 } else {
257 /* Tags are always interesting if visited. */
258 list->maybe_interesting = 1;
262 /* We have confirmed that all objects are UNINTERESTING. */
263 if (!list->maybe_interesting)
264 return 0;
267 /* Evaluate function pointer on this data, if requested. */
268 if ((list->type == OBJ_TREE && ctx->info->trees) ||
269 (list->type == OBJ_BLOB && ctx->info->blobs) ||
270 (list->type == OBJ_TAG && ctx->info->tags))
271 ret = ctx->info->path_fn(path, &list->oids, list->type,
272 ctx->info->path_fn_data);
274 /* Expand data for children. */
275 if (list->type == OBJ_TREE) {
276 for (size_t i = 0; i < list->oids.nr; i++) {
277 ret |= add_tree_entries(ctx,
278 path,
279 &list->oids.oid[i]);
283 oid_array_clear(&list->oids);
284 strmap_remove(&ctx->paths_to_lists, path, 1);
285 return ret;
288 static void clear_paths_to_lists(struct strmap *map)
290 struct hashmap_iter iter;
291 struct strmap_entry *e;
293 hashmap_for_each_entry(&map->map, &iter, e, ent) {
294 struct type_and_oid_list *list = e->value;
295 oid_array_clear(&list->oids);
297 strmap_clear(map, 1);
298 strmap_init(map);
301 static struct repository *edge_repo;
302 static struct type_and_oid_list *edge_tree_list;
304 static void show_edge(struct commit *commit)
306 struct tree *t = repo_get_commit_tree(edge_repo, commit);
308 if (!t)
309 return;
311 if (commit->object.flags & UNINTERESTING)
312 t->object.flags |= UNINTERESTING;
314 if (t->object.flags & SEEN)
315 return;
316 t->object.flags |= SEEN;
318 oid_array_append(&edge_tree_list->oids, &t->object.oid);
321 static int setup_pending_objects(struct path_walk_info *info,
322 struct path_walk_context *ctx)
324 struct type_and_oid_list *tags = NULL;
325 struct type_and_oid_list *tagged_blobs = NULL;
326 struct type_and_oid_list *root_tree_list = NULL;
328 if (info->tags)
329 CALLOC_ARRAY(tags, 1);
330 if (info->blobs)
331 CALLOC_ARRAY(tagged_blobs, 1);
332 if (info->trees)
333 root_tree_list = strmap_get(&ctx->paths_to_lists, root_path);
336 * Pending objects include:
337 * * Commits at branch tips.
338 * * Annotated tags at tag tips.
339 * * Any kind of object at lightweight tag tips.
340 * * Trees and blobs in the index (with an associated path).
342 for (size_t i = 0; i < info->revs->pending.nr; i++) {
343 struct object_array_entry *pending = info->revs->pending.objects + i;
344 struct object *obj = pending->item;
346 /* Commits will be picked up by revision walk. */
347 if (obj->type == OBJ_COMMIT)
348 continue;
350 /* Navigate annotated tag object chains. */
351 while (obj->type == OBJ_TAG) {
352 struct tag *tag = lookup_tag(info->revs->repo, &obj->oid);
353 if (!tag) {
354 error(_("failed to find tag %s"),
355 oid_to_hex(&obj->oid));
356 return -1;
358 if (tag->object.flags & SEEN)
359 break;
360 tag->object.flags |= SEEN;
362 if (tags)
363 oid_array_append(&tags->oids, &obj->oid);
364 obj = tag->tagged;
367 if (obj->type == OBJ_TAG)
368 continue;
370 /* We are now at a non-tag object. */
371 if (obj->flags & SEEN)
372 continue;
373 obj->flags |= SEEN;
375 switch (obj->type) {
376 case OBJ_TREE:
377 if (!info->trees)
378 continue;
379 if (pending->path) {
380 struct type_and_oid_list *list;
381 char *path = *pending->path ? xstrfmt("%s/", pending->path)
382 : xstrdup("");
383 if (!(list = strmap_get(&ctx->paths_to_lists, path))) {
384 CALLOC_ARRAY(list, 1);
385 list->type = OBJ_TREE;
386 strmap_put(&ctx->paths_to_lists, path, list);
388 oid_array_append(&list->oids, &obj->oid);
389 free(path);
390 } else {
391 /* assume a root tree, such as a lightweight tag. */
392 oid_array_append(&root_tree_list->oids, &obj->oid);
394 break;
396 case OBJ_BLOB:
397 if (!info->blobs)
398 continue;
399 if (pending->path) {
400 struct type_and_oid_list *list;
401 char *path = pending->path;
402 if (!(list = strmap_get(&ctx->paths_to_lists, path))) {
403 CALLOC_ARRAY(list, 1);
404 list->type = OBJ_BLOB;
405 strmap_put(&ctx->paths_to_lists, path, list);
407 oid_array_append(&list->oids, &obj->oid);
408 } else {
409 /* assume a root tree, such as a lightweight tag. */
410 oid_array_append(&tagged_blobs->oids, &obj->oid);
412 break;
414 case OBJ_COMMIT:
415 /* Make sure it is in the object walk */
416 if (obj != pending->item)
417 add_pending_object(info->revs, obj, "");
418 break;
420 default:
421 BUG("should not see any other type here");
426 * Add tag objects and tagged blobs if they exist.
428 if (tagged_blobs) {
429 if (tagged_blobs->oids.nr) {
430 const char *tagged_blob_path = "/tagged-blobs";
431 tagged_blobs->type = OBJ_BLOB;
432 tagged_blobs->maybe_interesting = 1;
433 strmap_put(&ctx->paths_to_lists, tagged_blob_path, tagged_blobs);
434 push_to_stack(ctx, tagged_blob_path);
435 } else {
436 oid_array_clear(&tagged_blobs->oids);
437 free(tagged_blobs);
440 if (tags) {
441 if (tags->oids.nr) {
442 const char *tag_path = "/tags";
443 tags->type = OBJ_TAG;
444 tags->maybe_interesting = 1;
445 strmap_put(&ctx->paths_to_lists, tag_path, tags);
446 push_to_stack(ctx, tag_path);
447 } else {
448 oid_array_clear(&tags->oids);
449 free(tags);
453 return 0;
457 * Given the configuration of 'info', walk the commits based on 'info->revs' and
458 * call 'info->path_fn' on each discovered path.
460 * Returns nonzero on an error.
462 int walk_objects_by_path(struct path_walk_info *info)
464 int ret;
465 size_t commits_nr = 0, paths_nr = 0;
466 struct commit *c;
467 struct type_and_oid_list *root_tree_list;
468 struct type_and_oid_list *commit_list;
469 struct path_walk_context ctx = {
470 .repo = info->revs->repo,
471 .revs = info->revs,
472 .info = info,
473 .path_stack = {
474 .compare = compare_by_type,
475 .cb_data = &ctx
477 .path_stack_pushed = STRSET_INIT,
478 .paths_to_lists = STRMAP_INIT
481 trace2_region_enter("path-walk", "commit-walk", info->revs->repo);
483 CALLOC_ARRAY(commit_list, 1);
484 commit_list->type = OBJ_COMMIT;
486 if (info->tags)
487 info->revs->tag_objects = 1;
489 /* Insert a single list for the root tree into the paths. */
490 CALLOC_ARRAY(root_tree_list, 1);
491 root_tree_list->type = OBJ_TREE;
492 root_tree_list->maybe_interesting = 1;
493 strmap_put(&ctx.paths_to_lists, root_path, root_tree_list);
494 push_to_stack(&ctx, root_path);
497 * Set these values before preparing the walk to catch
498 * lightweight tags pointing to non-commits and indexed objects.
500 info->revs->blob_objects = info->blobs;
501 info->revs->tree_objects = info->trees;
503 if (prepare_revision_walk(info->revs))
504 die(_("failed to setup revision walk"));
507 * Walk trees to mark them as UNINTERESTING.
508 * This is particularly important when 'edge_aggressive' is set.
510 info->revs->edge_hint_aggressive = info->edge_aggressive;
511 edge_repo = info->revs->repo;
512 edge_tree_list = root_tree_list;
513 mark_edges_uninteresting(info->revs, show_edge,
514 info->prune_all_uninteresting);
515 edge_repo = NULL;
516 edge_tree_list = NULL;
518 info->revs->blob_objects = info->revs->tree_objects = 0;
520 trace2_region_enter("path-walk", "pending-walk", info->revs->repo);
521 ret = setup_pending_objects(info, &ctx);
522 trace2_region_leave("path-walk", "pending-walk", info->revs->repo);
524 if (ret)
525 return ret;
527 while ((c = get_revision(info->revs))) {
528 struct object_id *oid;
529 struct tree *t;
530 commits_nr++;
532 if (info->commits)
533 oid_array_append(&commit_list->oids,
534 &c->object.oid);
536 /* If we only care about commits, then skip trees. */
537 if (!info->trees && !info->blobs)
538 continue;
540 oid = get_commit_tree_oid(c);
541 t = lookup_tree(info->revs->repo, oid);
543 if (!t) {
544 error("could not find tree %s", oid_to_hex(oid));
545 return -1;
548 if (t->object.flags & SEEN)
549 continue;
550 t->object.flags |= SEEN;
551 oid_array_append(&root_tree_list->oids, oid);
554 trace2_data_intmax("path-walk", ctx.repo, "commits", commits_nr);
555 trace2_region_leave("path-walk", "commit-walk", info->revs->repo);
557 /* Track all commits. */
558 if (info->commits && commit_list->oids.nr)
559 ret = info->path_fn("", &commit_list->oids, OBJ_COMMIT,
560 info->path_fn_data);
561 oid_array_clear(&commit_list->oids);
562 free(commit_list);
564 trace2_region_enter("path-walk", "path-walk", info->revs->repo);
565 while (!ret && ctx.path_stack.nr) {
566 char *path = prio_queue_get(&ctx.path_stack);
567 paths_nr++;
569 ret = walk_path(&ctx, path);
571 free(path);
574 /* Are there paths remaining? Likely they are from indexed objects. */
575 if (!strmap_empty(&ctx.paths_to_lists)) {
576 struct hashmap_iter iter;
577 struct strmap_entry *entry;
579 strmap_for_each_entry(&ctx.paths_to_lists, &iter, entry)
580 push_to_stack(&ctx, entry->key);
582 while (!ret && ctx.path_stack.nr) {
583 char *path = prio_queue_get(&ctx.path_stack);
584 paths_nr++;
586 ret = walk_path(&ctx, path);
588 free(path);
592 trace2_data_intmax("path-walk", ctx.repo, "paths", paths_nr);
593 trace2_region_leave("path-walk", "path-walk", info->revs->repo);
595 clear_paths_to_lists(&ctx.paths_to_lists);
596 strset_clear(&ctx.path_stack_pushed);
597 clear_prio_queue(&ctx.path_stack);
598 return ret;
601 void path_walk_info_init(struct path_walk_info *info)
603 struct path_walk_info empty = PATH_WALK_INFO_INIT;
604 memcpy(info, &empty, sizeof(empty));
607 void path_walk_info_clear(struct path_walk_info *info)
609 if (info->pl) {
610 clear_pattern_list(info->pl);
611 free(info->pl);