The sixteenth batch
[git/gitster.git] / tree-diff.c
blobe00fc2f450d116255db2f9c8966a3d19df0cc1ed
1 /*
2 * Helper functions for tree diff generation
3 */
5 #define USE_THE_REPOSITORY_VARIABLE
6 #define DISABLE_SIGN_COMPARE_WARNINGS
8 #include "git-compat-util.h"
9 #include "diff.h"
10 #include "diffcore.h"
11 #include "hash.h"
12 #include "tree.h"
13 #include "tree-walk.h"
14 #include "environment.h"
15 #include "repository.h"
18 * Some mode bits are also used internally for computations.
20 * They *must* not overlap with any valid modes, and they *must* not be emitted
21 * to outside world - i.e. appear on disk or network. In other words, it's just
22 * temporary fields, which we internally use, but they have to stay in-house.
24 * ( such approach is valid, as standard S_IF* fits into 16 bits, and in Git
25 * codebase mode is `unsigned int` which is assumed to be at least 32 bits )
28 #define S_DIFFTREE_IFXMIN_NEQ 0x80000000
31 * internal mode marker, saying a tree entry != entry of tp[imin]
32 * (see ll_diff_tree_paths for what it means there)
34 * we will update/use/emit entry for diff only with it unset.
36 #define S_IFXMIN_NEQ S_DIFFTREE_IFXMIN_NEQ
38 #define FAST_ARRAY_ALLOC(x, nr) do { \
39 if ((nr) <= 2) \
40 (x) = xalloca((nr) * sizeof(*(x))); \
41 else \
42 ALLOC_ARRAY((x), nr); \
43 } while(0)
44 #define FAST_ARRAY_FREE(x, nr) do { \
45 if ((nr) <= 2) \
46 xalloca_free((x)); \
47 else \
48 free((x)); \
49 } while(0)
51 static void ll_diff_tree_paths(
52 struct combine_diff_path ***tail, const struct object_id *oid,
53 const struct object_id **parents_oid, int nparent,
54 struct strbuf *base, struct diff_options *opt,
55 int depth);
56 static void ll_diff_tree_oid(const struct object_id *old_oid,
57 const struct object_id *new_oid,
58 struct strbuf *base, struct diff_options *opt);
61 * Compare two tree entries, taking into account only path/S_ISDIR(mode),
62 * but not their sha1's.
64 * NOTE files and directories *always* compare differently, even when having
65 * the same name - thanks to base_name_compare().
67 * NOTE empty (=invalid) descriptor(s) take part in comparison as +infty,
68 * so that they sort *after* valid tree entries.
70 * Due to this convention, if trees are scanned in sorted order, all
71 * non-empty descriptors will be processed first.
73 static int tree_entry_pathcmp(struct tree_desc *t1, struct tree_desc *t2)
75 struct name_entry *e1, *e2;
76 int cmp;
78 /* empty descriptors sort after valid tree entries */
79 if (!t1->size)
80 return t2->size ? 1 : 0;
81 else if (!t2->size)
82 return -1;
84 e1 = &t1->entry;
85 e2 = &t2->entry;
86 cmp = base_name_compare(e1->path, tree_entry_len(e1), e1->mode,
87 e2->path, tree_entry_len(e2), e2->mode);
88 return cmp;
93 * convert path -> opt->diff_*() callbacks
95 * emits diff to first parent only, and tells diff tree-walker that we are done
96 * with p and it can be freed.
98 static int emit_diff_first_parent_only(struct diff_options *opt, struct combine_diff_path *p)
100 struct combine_diff_parent *p0 = &p->parent[0];
101 if (p->mode && p0->mode) {
102 opt->change(opt, p0->mode, p->mode, &p0->oid, &p->oid,
103 1, 1, p->path, 0, 0);
105 else {
106 const struct object_id *oid;
107 unsigned int mode;
108 int addremove;
110 if (p->mode) {
111 addremove = '+';
112 oid = &p->oid;
113 mode = p->mode;
114 } else {
115 addremove = '-';
116 oid = &p0->oid;
117 mode = p0->mode;
120 opt->add_remove(opt, addremove, mode, oid, 1, p->path, 0);
123 return 0; /* we are done with p */
128 * new path should be added to combine diff
130 * 3 cases on how/when it should be called and behaves:
132 * t, !tp -> path added, all parents lack it
133 * !t, tp -> path removed from all parents
134 * t, tp -> path modified/added
135 * (M for tp[i]=tp[imin], A otherwise)
137 static void emit_path(struct combine_diff_path ***tail,
138 struct strbuf *base, struct diff_options *opt,
139 int nparent, struct tree_desc *t, struct tree_desc *tp,
140 int imin, int depth)
142 unsigned short mode;
143 const char *path;
144 const struct object_id *oid;
145 int pathlen;
146 int old_baselen = base->len;
147 int i, isdir, recurse = 0, emitthis = 1;
149 /* at least something has to be valid */
150 assert(t || tp);
152 if (t) {
153 /* path present in resulting tree */
154 oid = tree_entry_extract(t, &path, &mode);
155 pathlen = tree_entry_len(&t->entry);
156 isdir = S_ISDIR(mode);
157 } else {
159 * a path was removed - take path from imin parent. Also take
160 * mode from that parent, to decide on recursion(1).
162 * 1) all modes for tp[i]=tp[imin] should be the same wrt
163 * S_ISDIR, thanks to base_name_compare().
165 tree_entry_extract(&tp[imin], &path, &mode);
166 pathlen = tree_entry_len(&tp[imin].entry);
168 isdir = S_ISDIR(mode);
169 oid = NULL;
170 mode = 0;
173 if (opt->flags.recursive && isdir) {
174 recurse = 1;
175 emitthis = opt->flags.tree_in_recursive;
178 if (emitthis) {
179 int keep;
180 struct combine_diff_path *p;
182 strbuf_add(base, path, pathlen);
183 p = combine_diff_path_new(base->buf, base->len, mode,
184 oid ? oid : null_oid(the_hash_algo),
185 nparent);
186 strbuf_setlen(base, old_baselen);
188 for (i = 0; i < nparent; ++i) {
190 * tp[i] is valid, if present and if tp[i]==tp[imin] -
191 * otherwise, we should ignore it.
193 int tpi_valid = tp && !(tp[i].entry.mode & S_IFXMIN_NEQ);
195 const struct object_id *oid_i;
196 unsigned mode_i;
198 p->parent[i].status =
199 !t ? DIFF_STATUS_DELETED :
200 tpi_valid ?
201 DIFF_STATUS_MODIFIED :
202 DIFF_STATUS_ADDED;
204 if (tpi_valid) {
205 oid_i = &tp[i].entry.oid;
206 mode_i = tp[i].entry.mode;
208 else {
209 oid_i = null_oid(the_hash_algo);
210 mode_i = 0;
213 p->parent[i].mode = mode_i;
214 oidcpy(&p->parent[i].oid, oid_i);
217 keep = 1;
218 if (opt->pathchange)
219 keep = opt->pathchange(opt, p);
221 if (keep) {
222 **tail = p;
223 *tail = &p->next;
224 } else {
225 free(p);
229 if (recurse) {
230 const struct object_id **parents_oid;
232 FAST_ARRAY_ALLOC(parents_oid, nparent);
233 for (i = 0; i < nparent; ++i) {
234 /* same rule as in emitthis */
235 int tpi_valid = tp && !(tp[i].entry.mode & S_IFXMIN_NEQ);
237 parents_oid[i] = tpi_valid ? &tp[i].entry.oid : NULL;
240 strbuf_add(base, path, pathlen);
241 strbuf_addch(base, '/');
242 ll_diff_tree_paths(tail, oid, parents_oid, nparent, base, opt,
243 depth + 1);
244 FAST_ARRAY_FREE(parents_oid, nparent);
247 strbuf_setlen(base, old_baselen);
250 static void skip_uninteresting(struct tree_desc *t, struct strbuf *base,
251 struct diff_options *opt)
253 enum interesting match;
255 while (t->size) {
256 match = tree_entry_interesting(opt->repo->index, &t->entry,
257 base, &opt->pathspec);
258 if (match) {
259 if (match == all_entries_not_interesting)
260 t->size = 0;
261 break;
263 update_tree_entry(t);
269 * generate paths for combined diff D(sha1,parents_oid[])
271 * Resulting paths are appended to combine_diff_path linked list, and also, are
272 * emitted on the go via opt->pathchange() callback, so it is possible to
273 * process the result as batch or incrementally.
275 * The paths are generated scanning new tree and all parents trees
276 * simultaneously, similarly to what diff_tree() was doing for 2 trees.
277 * The theory behind such scan is as follows:
280 * D(T,P1...Pn) calculation scheme
281 * -------------------------------
283 * D(T,P1...Pn) = D(T,P1) ^ ... ^ D(T,Pn) (regarding resulting paths set)
285 * D(T,Pj) - diff between T..Pj
286 * D(T,P1...Pn) - combined diff from T to parents P1,...,Pn
289 * We start from all trees, which are sorted, and compare their entries in
290 * lock-step:
292 * T P1 Pn
293 * - - -
294 * |t| |p1| |pn|
295 * |-| |--| ... |--| imin = argmin(p1...pn)
296 * | | | | | |
297 * |-| |--| |--|
298 * |.| |. | |. |
299 * . . .
300 * . . .
302 * at any time there could be 3 cases:
304 * 1) t < p[imin];
305 * 2) t > p[imin];
306 * 3) t = p[imin].
308 * Schematic deduction of what every case means, and what to do, follows:
310 * 1) t < p[imin] -> ∀j t ∉ Pj -> "+t" ∈ D(T,Pj) -> D += "+t"; t↓
312 * 2) t > p[imin]
314 * 2.1) ∃j: pj > p[imin] -> "-p[imin]" ∉ D(T,Pj) -> D += ø; ∀ pi=p[imin] pi↓
315 * 2.2) ∀i pi = p[imin] -> pi ∉ T -> "-pi" ∈ D(T,Pi) -> D += "-p[imin]"; ∀i pi↓
317 * 3) t = p[imin]
319 * 3.1) ∃j: pj > p[imin] -> "+t" ∈ D(T,Pj) -> only pi=p[imin] remains to investigate
320 * 3.2) pi = p[imin] -> investigate δ(t,pi)
325 * 3.1+3.2) looking at δ(t,pi) ∀i: pi=p[imin] - if all != ø ->
327 * ⎧δ(t,pi) - if pi=p[imin]
328 * -> D += ⎨
329 * ⎩"+t" - if pi>p[imin]
332 * in any case t↓ ∀ pi=p[imin] pi↓
335 * ~~~~~~~~
337 * NOTE
339 * Usual diff D(A,B) is by definition the same as combined diff D(A,[B]),
340 * so this diff paths generator can, and is used, for plain diffs
341 * generation too.
343 * Please keep attention to the common D(A,[B]) case when working on the
344 * code, in order not to slow it down.
346 * NOTE
347 * nparent must be > 0.
351 /* ∀ pi=p[imin] pi↓ */
352 static inline void update_tp_entries(struct tree_desc *tp, int nparent)
354 int i;
355 for (i = 0; i < nparent; ++i)
356 if (!(tp[i].entry.mode & S_IFXMIN_NEQ))
357 update_tree_entry(&tp[i]);
360 static void ll_diff_tree_paths(
361 struct combine_diff_path ***tail, const struct object_id *oid,
362 const struct object_id **parents_oid, int nparent,
363 struct strbuf *base, struct diff_options *opt,
364 int depth)
366 struct tree_desc t, *tp;
367 void *ttree, **tptree;
368 int i;
370 if (depth > max_allowed_tree_depth)
371 die("exceeded maximum allowed tree depth");
373 FAST_ARRAY_ALLOC(tp, nparent);
374 FAST_ARRAY_ALLOC(tptree, nparent);
377 * load parents first, as they are probably already cached.
379 * ( log_tree_diff() parses commit->parent before calling here via
380 * diff_tree_oid(parent, commit) )
382 for (i = 0; i < nparent; ++i)
383 tptree[i] = fill_tree_descriptor(opt->repo, &tp[i], parents_oid[i]);
384 ttree = fill_tree_descriptor(opt->repo, &t, oid);
386 /* Enable recursion indefinitely */
387 opt->pathspec.recursive = opt->flags.recursive;
389 for (;;) {
390 int imin, cmp;
392 if (diff_can_quit_early(opt))
393 break;
395 if (opt->max_changes && diff_queued_diff.nr > opt->max_changes)
396 break;
398 if (opt->pathspec.nr) {
399 skip_uninteresting(&t, base, opt);
400 for (i = 0; i < nparent; i++)
401 skip_uninteresting(&tp[i], base, opt);
404 /* comparing is finished when all trees are done */
405 if (!t.size) {
406 int done = 1;
407 for (i = 0; i < nparent; ++i)
408 if (tp[i].size) {
409 done = 0;
410 break;
412 if (done)
413 break;
417 * lookup imin = argmin(p1...pn),
418 * mark entries whether they =p[imin] along the way
420 imin = 0;
421 tp[0].entry.mode &= ~S_IFXMIN_NEQ;
423 for (i = 1; i < nparent; ++i) {
424 cmp = tree_entry_pathcmp(&tp[i], &tp[imin]);
425 if (cmp < 0) {
426 imin = i;
427 tp[i].entry.mode &= ~S_IFXMIN_NEQ;
429 else if (cmp == 0) {
430 tp[i].entry.mode &= ~S_IFXMIN_NEQ;
432 else {
433 tp[i].entry.mode |= S_IFXMIN_NEQ;
437 /* fixup markings for entries before imin */
438 for (i = 0; i < imin; ++i)
439 tp[i].entry.mode |= S_IFXMIN_NEQ; /* pi > p[imin] */
443 /* compare t vs p[imin] */
444 cmp = tree_entry_pathcmp(&t, &tp[imin]);
446 /* t = p[imin] */
447 if (cmp == 0) {
448 /* are either pi > p[imin] or diff(t,pi) != ø ? */
449 if (!opt->flags.find_copies_harder) {
450 for (i = 0; i < nparent; ++i) {
451 /* p[i] > p[imin] */
452 if (tp[i].entry.mode & S_IFXMIN_NEQ)
453 continue;
455 /* diff(t,pi) != ø */
456 if (!oideq(&t.entry.oid, &tp[i].entry.oid) ||
457 (t.entry.mode != tp[i].entry.mode))
458 continue;
460 goto skip_emit_t_tp;
464 /* D += {δ(t,pi) if pi=p[imin]; "+a" if pi > p[imin]} */
465 emit_path(tail, base, opt, nparent,
466 &t, tp, imin, depth);
468 skip_emit_t_tp:
469 /* t↓, ∀ pi=p[imin] pi↓ */
470 update_tree_entry(&t);
471 update_tp_entries(tp, nparent);
474 /* t < p[imin] */
475 else if (cmp < 0) {
476 /* D += "+t" */
477 emit_path(tail, base, opt, nparent,
478 &t, /*tp=*/NULL, -1, depth);
480 /* t↓ */
481 update_tree_entry(&t);
484 /* t > p[imin] */
485 else {
486 /* ∀i pi=p[imin] -> D += "-p[imin]" */
487 if (!opt->flags.find_copies_harder) {
488 for (i = 0; i < nparent; ++i)
489 if (tp[i].entry.mode & S_IFXMIN_NEQ)
490 goto skip_emit_tp;
493 emit_path(tail, base, opt, nparent,
494 /*t=*/NULL, tp, imin, depth);
496 skip_emit_tp:
497 /* ∀ pi=p[imin] pi↓ */
498 update_tp_entries(tp, nparent);
502 free(ttree);
503 for (i = nparent-1; i >= 0; i--)
504 free(tptree[i]);
505 FAST_ARRAY_FREE(tptree, nparent);
506 FAST_ARRAY_FREE(tp, nparent);
509 struct combine_diff_path *diff_tree_paths(
510 const struct object_id *oid,
511 const struct object_id **parents_oid, int nparent,
512 struct strbuf *base, struct diff_options *opt)
514 struct combine_diff_path *head = NULL, **tail = &head;
515 ll_diff_tree_paths(&tail, oid, parents_oid, nparent, base, opt, 0);
516 return head;
520 * Does it look like the resulting diff might be due to a rename?
521 * - single entry
522 * - not a valid previous file
524 static inline int diff_might_be_rename(void)
526 return diff_queued_diff.nr == 1 &&
527 !DIFF_FILE_VALID(diff_queued_diff.queue[0]->one);
530 static void try_to_follow_renames(const struct object_id *old_oid,
531 const struct object_id *new_oid,
532 struct strbuf *base, struct diff_options *opt)
534 struct diff_options diff_opts;
535 struct diff_queue_struct *q = &diff_queued_diff;
536 struct diff_filepair *choice;
537 int i;
540 * follow-rename code is very specific, we need exactly one
541 * path. Magic that matches more than one path is not
542 * supported.
544 GUARD_PATHSPEC(&opt->pathspec, PATHSPEC_FROMTOP | PATHSPEC_LITERAL);
545 #if 0
547 * We should reject wildcards as well. Unfortunately we
548 * haven't got a reliable way to detect that 'foo\*bar' in
549 * fact has no wildcards. nowildcard_len is merely a hint for
550 * optimization. Let it slip for now until wildmatch is taught
551 * about dry-run mode and returns wildcard info.
553 if (opt->pathspec.has_wildcard)
554 BUG("wildcards are not supported");
555 #endif
557 /* Remove the file creation entry from the diff queue, and remember it */
558 choice = q->queue[0];
559 q->nr = 0;
561 repo_diff_setup(opt->repo, &diff_opts);
562 diff_opts.flags.recursive = 1;
563 diff_opts.flags.find_copies_harder = 1;
564 diff_opts.output_format = DIFF_FORMAT_NO_OUTPUT;
565 diff_opts.single_follow = opt->pathspec.items[0].match;
566 diff_opts.break_opt = opt->break_opt;
567 diff_opts.rename_score = opt->rename_score;
568 diff_setup_done(&diff_opts);
569 ll_diff_tree_oid(old_oid, new_oid, base, &diff_opts);
570 diffcore_std(&diff_opts);
571 clear_pathspec(&diff_opts.pathspec);
573 /* Go through the new set of filepairing, and see if we find a more interesting one */
574 opt->found_follow = 0;
575 for (i = 0; i < q->nr; i++) {
576 struct diff_filepair *p = q->queue[i];
579 * Found a source? Not only do we use that for the new
580 * diff_queued_diff, we will also use that as the path in
581 * the future!
583 if ((p->status == 'R' || p->status == 'C') &&
584 !strcmp(p->two->path, opt->pathspec.items[0].match)) {
585 const char *path[2];
587 /* Switch the file-pairs around */
588 q->queue[i] = choice;
589 choice = p;
591 /* Update the path we use from now on.. */
592 path[0] = p->one->path;
593 path[1] = NULL;
594 clear_pathspec(&opt->pathspec);
595 parse_pathspec(&opt->pathspec,
596 PATHSPEC_ALL_MAGIC & ~PATHSPEC_LITERAL,
597 PATHSPEC_LITERAL_PATH, "", path);
600 * The caller expects us to return a set of vanilla
601 * filepairs to let a later call to diffcore_std()
602 * it makes to sort the renames out (among other
603 * things), but we already have found renames
604 * ourselves; signal diffcore_std() not to muck with
605 * rename information.
607 opt->found_follow = 1;
608 break;
613 * Then, discard all the non-relevant file pairs...
615 for (i = 0; i < q->nr; i++) {
616 struct diff_filepair *p = q->queue[i];
617 diff_free_filepair(p);
621 * .. and re-instate the one we want (which might be either the
622 * original one, or the rename/copy we found)
624 q->queue[0] = choice;
625 q->nr = 1;
628 static void ll_diff_tree_oid(const struct object_id *old_oid,
629 const struct object_id *new_oid,
630 struct strbuf *base, struct diff_options *opt)
632 struct combine_diff_path *paths, *p;
633 pathchange_fn_t pathchange_old = opt->pathchange;
635 opt->pathchange = emit_diff_first_parent_only;
636 paths = diff_tree_paths(new_oid, &old_oid, 1, base, opt);
638 for (p = paths; p;) {
639 struct combine_diff_path *pprev = p;
640 p = p->next;
641 free(pprev);
644 opt->pathchange = pathchange_old;
647 void diff_tree_oid(const struct object_id *old_oid,
648 const struct object_id *new_oid,
649 const char *base_str, struct diff_options *opt)
651 struct strbuf base;
653 strbuf_init(&base, PATH_MAX);
654 strbuf_addstr(&base, base_str);
656 ll_diff_tree_oid(old_oid, new_oid, &base, opt);
657 if (!*base_str && opt->flags.follow_renames && diff_might_be_rename())
658 try_to_follow_renames(old_oid, new_oid, &base, opt);
660 strbuf_release(&base);
663 void diff_root_tree_oid(const struct object_id *new_oid,
664 const char *base,
665 struct diff_options *opt)
667 diff_tree_oid(NULL, new_oid, base, opt);