diffcore-rename: fall back to -C when -C -C busts the rename limit
authorJunio C Hamano <gitster@pobox.com>
Thu, 6 Jan 2011 21:50:06 +0000 (13:50 -0800)
committerJunio C Hamano <gitster@pobox.com>
Tue, 22 Mar 2011 21:29:07 +0000 (14:29 -0700)
When there are too many paths in the project, the number of rename source
candidates "git diff -C -C" finds will exceed the rename detection limit,
and no inexact rename detection is performed.  We however could fall back
to "git diff -C" if the number of modified paths is sufficiently small.

Signed-off-by: Junio C Hamano <gitster@pobox.com>
builtin/diff-tree.c
builtin/log.c
diff.c
diff.h
diffcore-rename.c
merge-recursive.c
t/t4001-diff-rename.sh

index 0d2a3e9fa2023cc673f1f6d9e17d3deeca1c7e9d..be6417d166abf428d379a70b9d67f894da4641d1 100644 (file)
@@ -163,6 +163,9 @@ int cmd_diff_tree(int argc, const char **argv, const char *prefix)
        }
 
        if (read_stdin) {
+               int saved_nrl = 0;
+               int saved_dcctc = 0;
+
                if (opt->diffopt.detect_rename)
                        opt->diffopt.setup |= (DIFF_SETUP_USE_SIZE_CACHE |
                                               DIFF_SETUP_USE_CACHE);
@@ -173,9 +176,16 @@ int cmd_diff_tree(int argc, const char **argv, const char *prefix)
                                fputs(line, stdout);
                                fflush(stdout);
                        }
-                       else
+                       else {
                                diff_tree_stdin(line);
+                               if (saved_nrl < opt->diffopt.needed_rename_limit)
+                                       saved_nrl = opt->diffopt.needed_rename_limit;
+                               if (opt->diffopt.degraded_cc_to_c)
+                                       saved_dcctc = 1;
+                       }
                }
+               opt->diffopt.degraded_cc_to_c = saved_dcctc;
+               opt->diffopt.needed_rename_limit = saved_nrl;
        }
 
        return diff_result_code(&opt->diffopt, 0);
index 796e9e57460468748d1e2af975ac52a6470a379d..707fd5706c37290bd65fcdf1f482202c54205881 100644 (file)
@@ -247,6 +247,8 @@ static void finish_early_output(struct rev_info *rev)
 static int cmd_log_walk(struct rev_info *rev)
 {
        struct commit *commit;
+       int saved_nrl = 0;
+       int saved_dcctc = 0;
 
        if (rev->early_output)
                setup_early_output(rev);
@@ -277,7 +279,14 @@ static int cmd_log_walk(struct rev_info *rev)
                }
                free_commit_list(commit->parents);
                commit->parents = NULL;
+               if (saved_nrl < rev->diffopt.needed_rename_limit)
+                       saved_nrl = rev->diffopt.needed_rename_limit;
+               if (rev->diffopt.degraded_cc_to_c)
+                       saved_dcctc = 1;
        }
+       rev->diffopt.degraded_cc_to_c = saved_dcctc;
+       rev->diffopt.needed_rename_limit = saved_nrl;
+
        if (rev->diffopt.output_format & DIFF_FORMAT_CHECKDIFF &&
            DIFF_OPT_TST(&rev->diffopt, CHECK_FAILED)) {
                return 02;
diff --git a/diff.c b/diff.c
index 9b3eb9938f8cf9f81cac2b9e79da34e1e02e727c..13f322bc5f8d12f3a11f62db3744f0648ba8ace6 100644 (file)
--- a/diff.c
+++ b/diff.c
@@ -3956,6 +3956,28 @@ static int is_summary_empty(const struct diff_queue_struct *q)
        return 1;
 }
 
+static const char rename_limit_warning[] =
+"inexact rename detection was skipped due to too many files.";
+
+static const char degrade_cc_to_c_warning[] =
+"only found copies from modified paths due to too many files.";
+
+static const char rename_limit_advice[] =
+"you may want to set your %s variable to at least "
+"%d and retry the command.";
+
+void diff_warn_rename_limit(const char *varname, int needed, int degraded_cc)
+{
+       if (degraded_cc)
+               warning(degrade_cc_to_c_warning);
+       else if (needed)
+               warning(rename_limit_warning);
+       else
+               return;
+       if (0 < needed && needed < 32767)
+               warning(rename_limit_advice, varname, needed);
+}
+
 void diff_flush(struct diff_options *options)
 {
        struct diff_queue_struct *q = &diff_queued_diff;
@@ -4237,6 +4259,10 @@ void diffcore_std(struct diff_options *options)
 int diff_result_code(struct diff_options *opt, int status)
 {
        int result = 0;
+
+       diff_warn_rename_limit("diff.renamelimit",
+                              opt->needed_rename_limit,
+                              opt->degraded_cc_to_c);
        if (!DIFF_OPT_TST(opt, EXIT_WITH_STATUS) &&
            !(opt->output_format & DIFF_FORMAT_CHECKDIFF))
                return status;
diff --git a/diff.h b/diff.h
index 007a0554d4b252e83e98f5578758d51ec0c6e120..b8dde8a750b17730f2e65da395700e5bd3ee2210 100644 (file)
--- a/diff.h
+++ b/diff.h
@@ -111,6 +111,7 @@ struct diff_options {
        int rename_score;
        int rename_limit;
        int needed_rename_limit;
+       int degraded_cc_to_c;
        int show_rename_progress;
        int dirstat_percent;
        int setup;
@@ -273,6 +274,7 @@ extern void diffcore_fix_diff_index(struct diff_options *);
 
 extern int diff_queue_is_empty(void);
 extern void diff_flush(struct diff_options*);
+extern void diff_warn_rename_limit(const char *varname, int needed, int degraded_cc);
 
 /* diff-raw status letters */
 #define DIFF_STATUS_ADDED              'A'
index a932f762d29df59e520a999fd82bbad605b14bd4..f62587e523e1b38c2fd440780d2635677e734c4d 100644 (file)
@@ -420,11 +420,18 @@ static void record_if_better(struct diff_score m[], struct diff_score *o)
                m[worst] = *o;
 }
 
+/*
+ * Returns:
+ * 0 if we are under the limit;
+ * 1 if we need to disable inexact rename detection;
+ * 2 if we would be under the limit if we were given -C instead of -C -C.
+ */
 static int too_many_rename_candidates(int num_create,
                                      struct diff_options *options)
 {
        int rename_limit = options->rename_limit;
        int num_src = rename_src_nr;
+       int i;
 
        options->needed_rename_limit = 0;
 
@@ -445,6 +452,20 @@ static int too_many_rename_candidates(int num_create,
 
        options->needed_rename_limit =
                num_src > num_create ? num_src : num_create;
+
+       /* Are we running under -C -C? */
+       if (!DIFF_OPT_TST(options, FIND_COPIES_HARDER))
+               return 1;
+
+       /* Would we bust the limit if we were running under -C? */
+       for (num_src = i = 0; i < rename_src_nr; i++) {
+               if (diff_unmodified_pair(rename_src[i].p))
+                       continue;
+               num_src++;
+       }
+       if ((num_create <= rename_limit || num_src <= rename_limit) &&
+           (num_create * num_src <= rename_limit * rename_limit))
+               return 2;
        return 1;
 }
 
@@ -476,7 +497,7 @@ void diffcore_rename(struct diff_options *options)
        struct diff_queue_struct *q = &diff_queued_diff;
        struct diff_queue_struct outq;
        struct diff_score *mx;
-       int i, j, rename_count;
+       int i, j, rename_count, skip_unmodified = 0;
        int num_create, num_src, dst_cnt;
        struct progress *progress = NULL;
 
@@ -539,8 +560,16 @@ void diffcore_rename(struct diff_options *options)
        if (!num_create)
                goto cleanup;
 
-       if (too_many_rename_candidates(num_create, options))
+       switch (too_many_rename_candidates(num_create, options)) {
+       case 1:
                goto cleanup;
+       case 2:
+               options->degraded_cc_to_c = 1;
+               skip_unmodified = 1;
+               break;
+       default:
+               break;
+       }
 
        if (options->show_rename_progress) {
                progress = start_progress_delay(
@@ -563,6 +592,11 @@ void diffcore_rename(struct diff_options *options)
                for (j = 0; j < rename_src_nr; j++) {
                        struct diff_filespec *one = rename_src[j].p->one;
                        struct diff_score this_src;
+
+                       if (skip_unmodified &&
+                           diff_unmodified_pair(rename_src[j].p))
+                               continue;
+
                        this_src.score = estimate_similarity(one, two,
                                                             minimum_score);
                        this_src.name_score = basename_same(one, two);
index 8e82a8b1a5ab8a2f5af00910a8febda93b9e420d..3ae4d53cc552fb837cb50edddbcc993c460aa941 100644 (file)
 #include "dir.h"
 #include "submodule.h"
 
-static const char rename_limit_advice[] =
-"inexact rename detection was skipped because there were too many\n"
-"  files. You may want to set your merge.renamelimit variable to at least\n"
-"  %d and retry this merge.";
-
 static struct tree *shift_tree_object(struct tree *one, struct tree *two,
                                      const char *subtree_shift)
 {
@@ -1656,8 +1651,9 @@ int merge_recursive(struct merge_options *o,
                commit_list_insert(h2, &(*result)->parents->next);
        }
        flush_output(o);
-       if (o->needed_rename_limit)
-               warning(rename_limit_advice, o->needed_rename_limit);
+       if (show(o, 2))
+               diff_warn_rename_limit("merge.renamelimit",
+                                      o->needed_rename_limit, 0);
        return clean;
 }
 
index 71bac83dd5e42a19e3b1a7e869df0e7143371c99..301f3a0e545be02b071fcf43ea690a1a6d6e044a 100755 (executable)
@@ -77,4 +77,29 @@ test_expect_success  'favour same basenames even with minor differences' '
        git show HEAD:path1 | sed "s/15/16/" > subdir/path1 &&
        git status | grep "renamed: .*path1 -> subdir/path1"'
 
+test_expect_success 'setup for many rename source candidates' '
+       git reset --hard &&
+       for i in 0 1 2 3 4 5 6 7 8 9;
+       do
+               for j in 0 1 2 3 4 5 6 7 8 9;
+               do
+                       echo "$i$j" >"path$i$j"
+               done
+       done &&
+       git add "path??" &&
+       test_tick &&
+       git commit -m "hundred" &&
+       (cat path1; echo new) >new-path &&
+       echo old >>path1 &&
+       git add new-path path1 &&
+       git diff -l 4 -C -C --cached --name-status >actual 2>actual.err &&
+       sed -e "s/^\([CM]\)[0-9]*       /\1     /" actual >actual.munged &&
+       cat >expect <<-EOF &&
+       C       path1   new-path
+       M       path1
+       EOF
+       test_cmp expect actual.munged &&
+       grep warning actual.err
+'
+
 test_done