2 * path-walk.c: implementation for path-based walks of the object graph.
4 #include "git-compat-util.h"
11 #include "list-objects.h"
13 #include "oid-array.h"
14 #include "prio-queue.h"
15 #include "repository.h"
17 #include "string-list.h"
22 #include "tree-walk.h"
24 static const char *root_path
= "";
26 struct type_and_oid_list
{
27 enum object_type type
;
28 struct oid_array oids
;
29 int maybe_interesting
;
32 #define TYPE_AND_OID_LIST_INIT { \
34 .oids = OID_ARRAY_INIT \
37 struct path_walk_context
{
39 * Repeats of data in 'struct path_walk_info' for
40 * access with fewer characters.
42 struct repository
*repo
;
43 struct rev_info
*revs
;
44 struct path_walk_info
*info
;
47 * Map a path to a 'struct type_and_oid_list'
48 * containing the objects discovered at that
51 struct strmap paths_to_lists
;
54 * Store the current list of paths in a priority queue,
55 * using object type as a sorting mechanism, mostly to
56 * make sure blobs are popped off the stack first. No
57 * other sort is made, so within each object type it acts
58 * like a stack and performs a DFS within the trees.
60 * Use path_stack_pushed to indicate whether a path
61 * was previously added to path_stack.
63 struct prio_queue path_stack
;
64 struct strset path_stack_pushed
;
67 static int compare_by_type(const void *one
, const void *two
, void *cb_data
)
69 struct type_and_oid_list
*list1
, *list2
;
70 const char *str1
= one
;
71 const char *str2
= two
;
72 struct path_walk_context
*ctx
= cb_data
;
74 list1
= strmap_get(&ctx
->paths_to_lists
, str1
);
75 list2
= strmap_get(&ctx
->paths_to_lists
, str2
);
78 * If object types are equal, then use path comparison.
80 if (!list1
|| !list2
|| list1
->type
== list2
->type
)
81 return strcmp(str1
, str2
);
83 /* Prefer tags to be popped off first. */
84 if (list1
->type
== OBJ_TAG
)
86 if (list2
->type
== OBJ_TAG
)
89 /* Prefer blobs to be popped off second. */
90 if (list1
->type
== OBJ_BLOB
)
92 if (list2
->type
== OBJ_BLOB
)
98 static void push_to_stack(struct path_walk_context
*ctx
,
101 if (strset_contains(&ctx
->path_stack_pushed
, path
))
104 strset_add(&ctx
->path_stack_pushed
, path
);
105 prio_queue_put(&ctx
->path_stack
, xstrdup(path
));
108 static int add_tree_entries(struct path_walk_context
*ctx
,
109 const char *base_path
,
110 struct object_id
*oid
)
112 struct tree_desc desc
;
113 struct name_entry entry
;
114 struct strbuf path
= STRBUF_INIT
;
116 struct tree
*tree
= lookup_tree(ctx
->repo
, oid
);
119 error(_("failed to walk children of tree %s: not found"),
122 } else if (parse_tree_gently(tree
, 1)) {
123 error("bad tree object %s", oid_to_hex(oid
));
127 strbuf_addstr(&path
, base_path
);
130 init_tree_desc(&desc
, &tree
->object
.oid
, tree
->buffer
, tree
->size
);
131 while (tree_entry(&desc
, &entry
)) {
132 struct type_and_oid_list
*list
;
134 /* Not actually true, but we will ignore submodules later. */
135 enum object_type type
= S_ISDIR(entry
.mode
) ? OBJ_TREE
: OBJ_BLOB
;
137 /* Skip submodules. */
138 if (S_ISGITLINK(entry
.mode
))
141 /* If the caller doesn't want blobs, then don't bother. */
142 if (!ctx
->info
->blobs
&& type
== OBJ_BLOB
)
145 if (type
== OBJ_TREE
) {
146 struct tree
*child
= lookup_tree(ctx
->repo
, &entry
.oid
);
147 o
= child
? &child
->object
: NULL
;
148 } else if (type
== OBJ_BLOB
) {
149 struct blob
*child
= lookup_blob(ctx
->repo
, &entry
.oid
);
150 o
= child
? &child
->object
: NULL
;
152 BUG("invalid type for tree entry: %d", type
);
156 error(_("failed to find object %s"),
157 oid_to_hex(&o
->oid
));
161 /* Skip this object if already seen. */
166 strbuf_setlen(&path
, base_len
);
167 strbuf_add(&path
, entry
.path
, entry
.pathlen
);
170 * Trees will end with "/" for concatenation and distinction
171 * from blobs at the same path.
173 if (type
== OBJ_TREE
)
174 strbuf_addch(&path
, '/');
178 enum pattern_match_result match
;
179 match
= path_matches_pattern_list(path
.buf
, path
.len
,
180 path
.buf
+ base_len
, &dtype
,
184 if (ctx
->info
->pl
->use_cone_patterns
&&
185 match
== NOT_MATCHED
)
187 else if (!ctx
->info
->pl
->use_cone_patterns
&&
193 if (!(list
= strmap_get(&ctx
->paths_to_lists
, path
.buf
))) {
194 CALLOC_ARRAY(list
, 1);
196 strmap_put(&ctx
->paths_to_lists
, path
.buf
, list
);
198 push_to_stack(ctx
, path
.buf
);
200 if (!(o
->flags
& UNINTERESTING
))
201 list
->maybe_interesting
= 1;
203 oid_array_append(&list
->oids
, &entry
.oid
);
206 free_tree_buffer(tree
);
207 strbuf_release(&path
);
212 * For each path in paths_to_explore, walk the trees another level
213 * and add any found blobs to the batch (but only if they exist and
214 * haven't been added yet).
216 static int walk_path(struct path_walk_context
*ctx
,
219 struct type_and_oid_list
*list
;
222 list
= strmap_get(&ctx
->paths_to_lists
, path
);
225 BUG("provided path '%s' that had no associated list", path
);
230 if (ctx
->info
->prune_all_uninteresting
) {
232 * This is true if all objects were UNINTERESTING
233 * when added to the list.
235 if (!list
->maybe_interesting
)
239 * But it's still possible that the objects were set
240 * as UNINTERESTING after being added. Do a quick check.
242 list
->maybe_interesting
= 0;
244 !list
->maybe_interesting
&& i
< list
->oids
.nr
;
246 if (list
->type
== OBJ_TREE
) {
247 struct tree
*t
= lookup_tree(ctx
->repo
,
249 if (t
&& !(t
->object
.flags
& UNINTERESTING
))
250 list
->maybe_interesting
= 1;
251 } else if (list
->type
== OBJ_BLOB
) {
252 struct blob
*b
= lookup_blob(ctx
->repo
,
254 if (b
&& !(b
->object
.flags
& UNINTERESTING
))
255 list
->maybe_interesting
= 1;
257 /* Tags are always interesting if visited. */
258 list
->maybe_interesting
= 1;
262 /* We have confirmed that all objects are UNINTERESTING. */
263 if (!list
->maybe_interesting
)
267 /* Evaluate function pointer on this data, if requested. */
268 if ((list
->type
== OBJ_TREE
&& ctx
->info
->trees
) ||
269 (list
->type
== OBJ_BLOB
&& ctx
->info
->blobs
) ||
270 (list
->type
== OBJ_TAG
&& ctx
->info
->tags
))
271 ret
= ctx
->info
->path_fn(path
, &list
->oids
, list
->type
,
272 ctx
->info
->path_fn_data
);
274 /* Expand data for children. */
275 if (list
->type
== OBJ_TREE
) {
276 for (size_t i
= 0; i
< list
->oids
.nr
; i
++) {
277 ret
|= add_tree_entries(ctx
,
283 oid_array_clear(&list
->oids
);
284 strmap_remove(&ctx
->paths_to_lists
, path
, 1);
288 static void clear_paths_to_lists(struct strmap
*map
)
290 struct hashmap_iter iter
;
291 struct strmap_entry
*e
;
293 hashmap_for_each_entry(&map
->map
, &iter
, e
, ent
) {
294 struct type_and_oid_list
*list
= e
->value
;
295 oid_array_clear(&list
->oids
);
297 strmap_clear(map
, 1);
301 static struct repository
*edge_repo
;
302 static struct type_and_oid_list
*edge_tree_list
;
304 static void show_edge(struct commit
*commit
)
306 struct tree
*t
= repo_get_commit_tree(edge_repo
, commit
);
311 if (commit
->object
.flags
& UNINTERESTING
)
312 t
->object
.flags
|= UNINTERESTING
;
314 if (t
->object
.flags
& SEEN
)
316 t
->object
.flags
|= SEEN
;
318 oid_array_append(&edge_tree_list
->oids
, &t
->object
.oid
);
321 static int setup_pending_objects(struct path_walk_info
*info
,
322 struct path_walk_context
*ctx
)
324 struct type_and_oid_list
*tags
= NULL
;
325 struct type_and_oid_list
*tagged_blobs
= NULL
;
326 struct type_and_oid_list
*root_tree_list
= NULL
;
329 CALLOC_ARRAY(tags
, 1);
331 CALLOC_ARRAY(tagged_blobs
, 1);
333 root_tree_list
= strmap_get(&ctx
->paths_to_lists
, root_path
);
336 * Pending objects include:
337 * * Commits at branch tips.
338 * * Annotated tags at tag tips.
339 * * Any kind of object at lightweight tag tips.
340 * * Trees and blobs in the index (with an associated path).
342 for (size_t i
= 0; i
< info
->revs
->pending
.nr
; i
++) {
343 struct object_array_entry
*pending
= info
->revs
->pending
.objects
+ i
;
344 struct object
*obj
= pending
->item
;
346 /* Commits will be picked up by revision walk. */
347 if (obj
->type
== OBJ_COMMIT
)
350 /* Navigate annotated tag object chains. */
351 while (obj
->type
== OBJ_TAG
) {
352 struct tag
*tag
= lookup_tag(info
->revs
->repo
, &obj
->oid
);
354 error(_("failed to find tag %s"),
355 oid_to_hex(&obj
->oid
));
358 if (tag
->object
.flags
& SEEN
)
360 tag
->object
.flags
|= SEEN
;
363 oid_array_append(&tags
->oids
, &obj
->oid
);
367 if (obj
->type
== OBJ_TAG
)
370 /* We are now at a non-tag object. */
371 if (obj
->flags
& SEEN
)
380 struct type_and_oid_list
*list
;
381 char *path
= *pending
->path
? xstrfmt("%s/", pending
->path
)
383 if (!(list
= strmap_get(&ctx
->paths_to_lists
, path
))) {
384 CALLOC_ARRAY(list
, 1);
385 list
->type
= OBJ_TREE
;
386 strmap_put(&ctx
->paths_to_lists
, path
, list
);
388 oid_array_append(&list
->oids
, &obj
->oid
);
391 /* assume a root tree, such as a lightweight tag. */
392 oid_array_append(&root_tree_list
->oids
, &obj
->oid
);
400 struct type_and_oid_list
*list
;
401 char *path
= pending
->path
;
402 if (!(list
= strmap_get(&ctx
->paths_to_lists
, path
))) {
403 CALLOC_ARRAY(list
, 1);
404 list
->type
= OBJ_BLOB
;
405 strmap_put(&ctx
->paths_to_lists
, path
, list
);
407 oid_array_append(&list
->oids
, &obj
->oid
);
409 /* assume a root tree, such as a lightweight tag. */
410 oid_array_append(&tagged_blobs
->oids
, &obj
->oid
);
415 /* Make sure it is in the object walk */
416 if (obj
!= pending
->item
)
417 add_pending_object(info
->revs
, obj
, "");
421 BUG("should not see any other type here");
426 * Add tag objects and tagged blobs if they exist.
429 if (tagged_blobs
->oids
.nr
) {
430 const char *tagged_blob_path
= "/tagged-blobs";
431 tagged_blobs
->type
= OBJ_BLOB
;
432 tagged_blobs
->maybe_interesting
= 1;
433 strmap_put(&ctx
->paths_to_lists
, tagged_blob_path
, tagged_blobs
);
434 push_to_stack(ctx
, tagged_blob_path
);
436 oid_array_clear(&tagged_blobs
->oids
);
442 const char *tag_path
= "/tags";
443 tags
->type
= OBJ_TAG
;
444 tags
->maybe_interesting
= 1;
445 strmap_put(&ctx
->paths_to_lists
, tag_path
, tags
);
446 push_to_stack(ctx
, tag_path
);
448 oid_array_clear(&tags
->oids
);
457 * Given the configuration of 'info', walk the commits based on 'info->revs' and
458 * call 'info->path_fn' on each discovered path.
460 * Returns nonzero on an error.
462 int walk_objects_by_path(struct path_walk_info
*info
)
465 size_t commits_nr
= 0, paths_nr
= 0;
467 struct type_and_oid_list
*root_tree_list
;
468 struct type_and_oid_list
*commit_list
;
469 struct path_walk_context ctx
= {
470 .repo
= info
->revs
->repo
,
474 .compare
= compare_by_type
,
477 .path_stack_pushed
= STRSET_INIT
,
478 .paths_to_lists
= STRMAP_INIT
481 trace2_region_enter("path-walk", "commit-walk", info
->revs
->repo
);
483 CALLOC_ARRAY(commit_list
, 1);
484 commit_list
->type
= OBJ_COMMIT
;
487 info
->revs
->tag_objects
= 1;
489 /* Insert a single list for the root tree into the paths. */
490 CALLOC_ARRAY(root_tree_list
, 1);
491 root_tree_list
->type
= OBJ_TREE
;
492 root_tree_list
->maybe_interesting
= 1;
493 strmap_put(&ctx
.paths_to_lists
, root_path
, root_tree_list
);
494 push_to_stack(&ctx
, root_path
);
497 * Set these values before preparing the walk to catch
498 * lightweight tags pointing to non-commits and indexed objects.
500 info
->revs
->blob_objects
= info
->blobs
;
501 info
->revs
->tree_objects
= info
->trees
;
503 if (prepare_revision_walk(info
->revs
))
504 die(_("failed to setup revision walk"));
507 * Walk trees to mark them as UNINTERESTING.
508 * This is particularly important when 'edge_aggressive' is set.
510 info
->revs
->edge_hint_aggressive
= info
->edge_aggressive
;
511 edge_repo
= info
->revs
->repo
;
512 edge_tree_list
= root_tree_list
;
513 mark_edges_uninteresting(info
->revs
, show_edge
,
514 info
->prune_all_uninteresting
);
516 edge_tree_list
= NULL
;
518 info
->revs
->blob_objects
= info
->revs
->tree_objects
= 0;
520 trace2_region_enter("path-walk", "pending-walk", info
->revs
->repo
);
521 ret
= setup_pending_objects(info
, &ctx
);
522 trace2_region_leave("path-walk", "pending-walk", info
->revs
->repo
);
527 while ((c
= get_revision(info
->revs
))) {
528 struct object_id
*oid
;
533 oid_array_append(&commit_list
->oids
,
536 /* If we only care about commits, then skip trees. */
537 if (!info
->trees
&& !info
->blobs
)
540 oid
= get_commit_tree_oid(c
);
541 t
= lookup_tree(info
->revs
->repo
, oid
);
544 error("could not find tree %s", oid_to_hex(oid
));
548 if (t
->object
.flags
& SEEN
)
550 t
->object
.flags
|= SEEN
;
551 oid_array_append(&root_tree_list
->oids
, oid
);
554 trace2_data_intmax("path-walk", ctx
.repo
, "commits", commits_nr
);
555 trace2_region_leave("path-walk", "commit-walk", info
->revs
->repo
);
557 /* Track all commits. */
558 if (info
->commits
&& commit_list
->oids
.nr
)
559 ret
= info
->path_fn("", &commit_list
->oids
, OBJ_COMMIT
,
561 oid_array_clear(&commit_list
->oids
);
564 trace2_region_enter("path-walk", "path-walk", info
->revs
->repo
);
565 while (!ret
&& ctx
.path_stack
.nr
) {
566 char *path
= prio_queue_get(&ctx
.path_stack
);
569 ret
= walk_path(&ctx
, path
);
574 /* Are there paths remaining? Likely they are from indexed objects. */
575 if (!strmap_empty(&ctx
.paths_to_lists
)) {
576 struct hashmap_iter iter
;
577 struct strmap_entry
*entry
;
579 strmap_for_each_entry(&ctx
.paths_to_lists
, &iter
, entry
)
580 push_to_stack(&ctx
, entry
->key
);
582 while (!ret
&& ctx
.path_stack
.nr
) {
583 char *path
= prio_queue_get(&ctx
.path_stack
);
586 ret
= walk_path(&ctx
, path
);
592 trace2_data_intmax("path-walk", ctx
.repo
, "paths", paths_nr
);
593 trace2_region_leave("path-walk", "path-walk", info
->revs
->repo
);
595 clear_paths_to_lists(&ctx
.paths_to_lists
);
596 strset_clear(&ctx
.path_stack_pushed
);
597 clear_prio_queue(&ctx
.path_stack
);
601 void path_walk_info_init(struct path_walk_info
*info
)
603 struct path_walk_info empty
= PATH_WALK_INFO_INIT
;
604 memcpy(info
, &empty
, sizeof(empty
));
607 void path_walk_info_clear(struct path_walk_info
*info
)
610 clear_pattern_list(info
->pl
);