xdiff: add xdl_merge()
authorJohannes Schindelin <Johannes.Schindelin@gmx.de>
Tue, 21 Nov 2006 22:24:34 +0000 (23:24 +0100)
committerJunio C Hamano <junkio@cox.net>
Sun, 3 Dec 2006 01:28:19 +0000 (17:28 -0800)
This new function implements the functionality of RCS merge, but
in-memory. It returns < 0 on error, otherwise the number of conflicts.

Finding the conflicting lines can be a very expensive task. You can
control the eagerness of this algorithm:

- a level value of 0 means that all overlapping changes are treated
  as conflicts,
- a value of 1 means that if the overlapping changes are identical,
  it is not treated as a conflict.
- If you set level to 2, overlapping changes will be analyzed, so that
  almost identical changes will not result in huge conflicts. Rather,
  only the conflicting lines will be shown inside conflict markers.

With each increasing level, the algorithm gets slower, but more accurate.
Note that the code for level 2 depends on the simple definition of
mmfile_t specific to git, and therefore it will be harder to port that
to LibXDiff.

Signed-off-by: Johannes Schindelin <Johannes.Schindelin@gmx.de>
Signed-off-by: Junio C Hamano <junkio@cox.net>
Makefile
xdiff/xdiff.h
xdiff/xdiffi.c
xdiff/xdiffi.h
xdiff/xmerge.c [new file with mode: 0644]

index de3e9f38ec9f737961ce29fa2b4e0703b8accfd2..23bbb90683b5272ab65e9afd883307cacdff6b4a 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -723,7 +723,8 @@ $(DIFF_OBJS): diffcore.h
 $(LIB_FILE): $(LIB_OBJS)
        rm -f $@ && $(AR) rcs $@ $(LIB_OBJS)
 
-XDIFF_OBJS=xdiff/xdiffi.o xdiff/xprepare.o xdiff/xutils.o xdiff/xemit.o
+XDIFF_OBJS=xdiff/xdiffi.o xdiff/xprepare.o xdiff/xutils.o xdiff/xemit.o \
+       xdiff/xmerge.o
 $(XDIFF_OBJS): xdiff/xinclude.h xdiff/xmacros.h xdiff/xdiff.h xdiff/xtypes.h \
        xdiff/xutils.h xdiff/xprepare.h xdiff/xdiffi.h xdiff/xemit.h
 
index c9f817818a781164b63c78be2af71a5d21e35d6f..fa409d5234009bf6ddac8803e8d20ed1b23a3258 100644 (file)
@@ -49,6 +49,9 @@ extern "C" {
 #define XDL_BDOP_CPY 2
 #define XDL_BDOP_INSB 3
 
+#define XDL_MERGE_MINIMAL 0
+#define XDL_MERGE_EAGER 1
+#define XDL_MERGE_ZEALOUS 2
 
 typedef struct s_mmfile {
        char *ptr;
@@ -90,6 +93,10 @@ long xdl_mmfile_size(mmfile_t *mmf);
 int xdl_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp,
             xdemitconf_t const *xecfg, xdemitcb_t *ecb);
 
+int xdl_merge(mmfile_t *orig, mmfile_t *mf1, const char *name1,
+               mmfile_t *mf2, const char *name2,
+               xpparam_t const *xpp, int level, mmbuffer_t *result);
+
 #ifdef __cplusplus
 }
 #endif /* #ifdef __cplusplus */
index d76e76a0e6d987cdec985e304ad682020fef5e1f..9aeebc473b11a3dbfa5b52930b4a8e5a35f22215 100644 (file)
@@ -45,7 +45,6 @@ static long xdl_split(unsigned long const *ha1, long off1, long lim1,
                      long *kvdf, long *kvdb, int need_min, xdpsplit_t *spl,
                      xdalgoenv_t *xenv);
 static xdchange_t *xdl_add_change(xdchange_t *xscr, long i1, long i2, long chg1, long chg2);
-static int xdl_change_compact(xdfile_t *xdf, xdfile_t *xdfo, long flags);
 
 
 
@@ -397,7 +396,7 @@ static xdchange_t *xdl_add_change(xdchange_t *xscr, long i1, long i2, long chg1,
 }
 
 
-static int xdl_change_compact(xdfile_t *xdf, xdfile_t *xdfo, long flags) {
+int xdl_change_compact(xdfile_t *xdf, xdfile_t *xdfo, long flags) {
        long ix, ixo, ixs, ixref, grpsiz, nrec = xdf->nrec;
        char *rchg = xdf->rchg, *rchgo = xdfo->rchg;
        xrecord_t **recs = xdf->recs;
index d3b72716b58273169904a4351f02fab7ab2fc6e9..472aeaecfab85e7276baaceb446d2095eeab01fd 100644 (file)
@@ -50,6 +50,7 @@ int xdl_recs_cmp(diffdata_t *dd1, long off1, long lim1,
                 long *kvdf, long *kvdb, int need_min, xdalgoenv_t *xenv);
 int xdl_do_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp,
                xdfenv_t *xe);
+int xdl_change_compact(xdfile_t *xdf, xdfile_t *xdfo, long flags);
 int xdl_build_script(xdfenv_t *xe, xdchange_t **xscr);
 void xdl_free_script(xdchange_t *xscr);
 int xdl_emit_diff(xdfenv_t *xe, xdchange_t *xscr, xdemitcb_t *ecb,
diff --git a/xdiff/xmerge.c b/xdiff/xmerge.c
new file mode 100644 (file)
index 0000000..7b85aa5
--- /dev/null
@@ -0,0 +1,433 @@
+/*
+ *  LibXDiff by Davide Libenzi ( File Differential Library )
+ *  Copyright (C) 2003-2006 Davide Libenzi, Johannes E. Schindelin
+ *
+ *  This library is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU Lesser General Public
+ *  License as published by the Free Software Foundation; either
+ *  version 2.1 of the License, or (at your option) any later version.
+ *
+ *  This library is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  Lesser General Public License for more details.
+ *
+ *  You should have received a copy of the GNU Lesser General Public
+ *  License along with this library; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ *  Davide Libenzi <davidel@xmailserver.org>
+ *
+ */
+
+#include "xinclude.h"
+
+typedef struct s_xdmerge {
+       struct s_xdmerge *next;
+       /*
+        * 0 = conflict,
+        * 1 = no conflict, take first,
+        * 2 = no conflict, take second.
+        */
+       int mode;
+       long i1, i2;
+       long chg1, chg2;
+} xdmerge_t;
+
+static int xdl_append_merge(xdmerge_t **merge, int mode,
+               long i1, long chg1, long i2, long chg2)
+{
+       xdmerge_t *m = *merge;
+       if (m && mode == m->mode &&
+                       (i1 == m->i1 + m->chg1 || i2 == m->i2 + m->chg2)) {
+               m->chg1 = i1 + chg1 - m->i1;
+               m->chg2 = i2 + chg2 - m->i2;
+       } else {
+               m = xdl_malloc(sizeof(xdmerge_t));
+               if (!m)
+                       return -1;
+               m->next = NULL;
+               m->mode = mode;
+               m->i1 = i1;
+               m->chg1 = chg1;
+               m->i2 = i2;
+               m->chg2 = chg2;
+               if (*merge)
+                       (*merge)->next = m;
+               *merge = m;
+       }
+       return 0;
+}
+
+static int xdl_cleanup_merge(xdmerge_t *c)
+{
+       int count = 0;
+       xdmerge_t *next_c;
+
+       /* were there conflicts? */
+       for (; c; c = next_c) {
+               if (c->mode == 0)
+                       count++;
+               next_c = c->next;
+               free(c);
+       }
+       return count;
+}
+
+static int xdl_merge_cmp_lines(xdfenv_t *xe1, int i1, xdfenv_t *xe2, int i2,
+               int line_count, long flags)
+{
+       int i;
+       xrecord_t **rec1 = xe1->xdf2.recs + i1;
+       xrecord_t **rec2 = xe2->xdf2.recs + i2;
+
+       for (i = 0; i < line_count; i++) {
+               int result = xdl_recmatch(rec1[i]->ptr, rec1[i]->size,
+                       rec2[i]->ptr, rec2[i]->size, flags);
+               if (!result)
+                       return -1;
+       }
+       return 0;
+}
+
+static int xdl_recs_copy(xdfenv_t *xe, int i, int count, int add_nl, char *dest)
+{
+       xrecord_t **recs = xe->xdf2.recs + i;
+       int size = 0;
+
+       if (count < 1)
+               return 0;
+
+       for (i = 0; i < count; size += recs[i++]->size)
+               if (dest)
+                       memcpy(dest + size, recs[i]->ptr, recs[i]->size);
+       if (add_nl) {
+               i = recs[count - 1]->size;
+               if (i == 0 || recs[count - 1]->ptr[i - 1] != '\n') {
+                       if (dest)
+                               dest[size] = '\n';
+                       size++;
+               }
+       }
+       return size;
+}
+
+static int xdl_fill_merge_buffer(xdfenv_t *xe1, const char *name1,
+               xdfenv_t *xe2, const char *name2, xdmerge_t *m, char *dest)
+{
+       const int marker_size = 7;
+       int marker1_size = (name1 ? strlen(name1) + 1 : 0);
+       int marker2_size = (name2 ? strlen(name2) + 1 : 0);
+       int conflict_marker_size = 3 * (marker_size + 1)
+               + marker1_size + marker2_size;
+       int size, i1, j;
+
+       for (size = i1 = 0; m; m = m->next) {
+               if (m->mode == 0) {
+                       size += xdl_recs_copy(xe1, i1, m->i1 - i1, 0,
+                                       dest ? dest + size : NULL);
+                       if (dest) {
+                               for (j = 0; j < marker_size; j++)
+                                       dest[size++] = '<';
+                               if (marker1_size) {
+                                       dest[size] = ' ';
+                                       memcpy(dest + size + 1, name1,
+                                                       marker1_size - 1);
+                                       size += marker1_size;
+                               }
+                               dest[size++] = '\n';
+                       } else
+                               size += conflict_marker_size;
+                       size += xdl_recs_copy(xe1, m->i1, m->chg1, 1,
+                                       dest ? dest + size : NULL);
+                       if (dest) {
+                               for (j = 0; j < marker_size; j++)
+                                       dest[size++] = '=';
+                               dest[size++] = '\n';
+                       }
+                       size += xdl_recs_copy(xe2, m->i2, m->chg2, 1,
+                                       dest ? dest + size : NULL);
+                       if (dest) {
+                               for (j = 0; j < marker_size; j++)
+                                       dest[size++] = '>';
+                               if (marker2_size) {
+                                       dest[size] = ' ';
+                                       memcpy(dest + size + 1, name2,
+                                                       marker2_size - 1);
+                                       size += marker2_size;
+                               }
+                               dest[size++] = '\n';
+                       }
+               } else if (m->mode == 1)
+                       size += xdl_recs_copy(xe1, i1, m->i1 + m->chg1 - i1, 0,
+                                       dest ? dest + size : NULL);
+               else if (m->mode == 2)
+                       size += xdl_recs_copy(xe2, m->i2 - m->i1 + i1,
+                                       m->i1 + m->chg2 - i1, 0,
+                                       dest ? dest + size : NULL);
+               i1 = m->i1 + m->chg1;
+       }
+       size += xdl_recs_copy(xe1, i1, xe1->xdf2.nrec - i1, 0,
+                       dest ? dest + size : NULL);
+       return size;
+}
+
+/*
+ * Sometimes, changes are not quite identical, but differ in only a few
+ * lines. Try hard to show only these few lines as conflicting.
+ */
+static int xdl_refine_conflicts(xdfenv_t *xe1, xdfenv_t *xe2, xdmerge_t *m,
+               xpparam_t const *xpp)
+{
+       for (; m; m = m->next) {
+               mmfile_t t1, t2;
+               xdfenv_t xe;
+               xdchange_t *xscr, *x;
+               int i1 = m->i1, i2 = m->i2;
+
+               /* let's handle just the conflicts */
+               if (m->mode)
+                       continue;
+
+               /*
+                * This probably does not work outside git, since
+                * we have a very simple mmfile structure.
+                */
+               t1.ptr = (char *)xe1->xdf2.recs[m->i1]->ptr;
+               t1.size = xe1->xdf2.recs[m->i1 + m->chg1]->ptr
+                       + xe1->xdf2.recs[m->i1 + m->chg1]->size - t1.ptr;
+               t2.ptr = (char *)xe2->xdf2.recs[m->i1]->ptr;
+               t2.size = xe2->xdf2.recs[m->i1 + m->chg1]->ptr
+                       + xe2->xdf2.recs[m->i1 + m->chg1]->size - t2.ptr;
+               if (xdl_do_diff(&t1, &t2, xpp, &xe) < 0)
+                       return -1;
+               if (xdl_change_compact(&xe.xdf1, &xe.xdf2, xpp->flags) < 0 ||
+                   xdl_change_compact(&xe.xdf2, &xe.xdf1, xpp->flags) < 0 ||
+                   xdl_build_script(&xe, &xscr) < 0) {
+                       xdl_free_env(&xe);
+                       return -1;
+               }
+               if (!xscr) {
+                       /* If this happens, it's a bug. */
+                       xdl_free_env(&xe);
+                       return -2;
+               }
+               x = xscr;
+               m->i1 = xscr->i1 + i1;
+               m->chg1 = xscr->chg1;
+               m->i2 = xscr->i2 + i2;
+               m->chg2 = xscr->chg2;
+               while (xscr->next) {
+                       xdmerge_t *m2 = xdl_malloc(sizeof(xdmerge_t));
+                       if (!m2) {
+                               xdl_free_env(&xe);
+                               xdl_free_script(x);
+                               return -1;
+                       }
+                       xscr = xscr->next;
+                       m2->next = m->next;
+                       m->next = m2;
+                       m = m2;
+                       m->mode = 0;
+                       m->i1 = xscr->i1 + i1;
+                       m->chg1 = xscr->chg1;
+                       m->i2 = xscr->i2 + i2;
+                       m->chg2 = xscr->chg2;
+               }
+               xdl_free_env(&xe);
+               xdl_free_script(x);
+       }
+       return 0;
+}
+
+/*
+ * level == 0: mark all overlapping changes as conflict
+ * level == 1: mark overlapping changes as conflict only if not identical
+ * level == 2: analyze non-identical changes for minimal conflict set
+ *
+ * returns < 0 on error, == 0 for no conflicts, else number of conflicts
+ */
+static int xdl_do_merge(xdfenv_t *xe1, xdchange_t *xscr1, const char *name1,
+               xdfenv_t *xe2, xdchange_t *xscr2, const char *name2,
+               int level, xpparam_t const *xpp, mmbuffer_t *result) {
+       xdmerge_t *changes, *c;
+       int i1, i2, chg1, chg2;
+
+       c = changes = NULL;
+
+       while (xscr1 && xscr2) {
+               if (!changes)
+                       changes = c;
+               if (xscr1->i1 + xscr1->chg1 < xscr2->i1) {
+                       i1 = xscr1->i2;
+                       i2 = xscr2->i2 - xscr2->i1 + xscr1->i1;
+                       chg1 = xscr1->chg2;
+                       chg2 = xscr1->chg1;
+                       if (xdl_append_merge(&c, 1, i1, chg1, i2, chg2)) {
+                               xdl_cleanup_merge(changes);
+                               return -1;
+                       }
+                       xscr1 = xscr1->next;
+                       continue;
+               }
+               if (xscr2->i1 + xscr2->chg1 < xscr1->i1) {
+                       i1 = xscr1->i2 - xscr1->i1 + xscr2->i1;
+                       i2 = xscr2->i2;
+                       chg1 = xscr2->chg1;
+                       chg2 = xscr2->chg2;
+                       if (xdl_append_merge(&c, 2, i1, chg1, i2, chg2)) {
+                               xdl_cleanup_merge(changes);
+                               return -1;
+                       }
+                       xscr2 = xscr2->next;
+                       continue;
+               }
+               if (level < 1 || xscr1->i1 != xscr2->i1 ||
+                               xscr1->chg1 != xscr2->chg1 ||
+                               xscr1->chg2 != xscr2->chg2 ||
+                               xdl_merge_cmp_lines(xe1, xscr1->i2,
+                                       xe2, xscr2->i2,
+                                       xscr1->chg2, xpp->flags)) {
+                       /* conflict */
+                       int off = xscr1->i1 - xscr2->i1;
+                       int ffo = off + xscr1->chg1 - xscr2->chg1;
+
+                       i1 = xscr1->i2;
+                       i2 = xscr2->i2;
+                       if (off > 0)
+                               i1 -= off;
+                       else
+                               i2 += off;
+                       chg1 = xscr1->i2 + xscr1->chg2 - i1;
+                       chg2 = xscr2->i2 + xscr2->chg2 - i2;
+                       if (ffo > 0)
+                               chg2 += ffo;
+                       else
+                               chg1 -= ffo;
+                       if (xdl_append_merge(&c, 0, i1, chg1, i2, chg2)) {
+                               xdl_cleanup_merge(changes);
+                               return -1;
+                       }
+               }
+
+               i1 = xscr1->i1 + xscr1->chg1;
+               i2 = xscr2->i1 + xscr2->chg1;
+
+               if (i1 > i2) {
+                       xscr1->chg1 -= i1 - i2;
+                       xscr1->i1 = i2;
+                       xscr1->i2 += xscr1->chg2;
+                       xscr1->chg2 = 0;
+                       xscr1 = xscr1->next;
+               } else if (i2 > i1) {
+                       xscr2->chg1 -= i2 - i1;
+                       xscr2->i1 = i1;
+                       xscr2->i2 += xscr2->chg2;
+                       xscr2->chg2 = 0;
+                       xscr2 = xscr2->next;
+               } else {
+                       xscr1 = xscr1->next;
+                       xscr2 = xscr2->next;
+               }
+       }
+       while (xscr1) {
+               if (!changes)
+                       changes = c;
+               i1 = xscr1->i2;
+               i2 = xscr1->i1 + xe2->xdf2.nrec - xe2->xdf1.nrec;
+               chg1 = xscr1->chg2;
+               chg2 = xscr1->chg1;
+               if (xdl_append_merge(&c, 1, i1, chg1, i2, chg2)) {
+                       xdl_cleanup_merge(changes);
+                       return -1;
+               }
+               xscr1 = xscr1->next;
+       }
+       while (xscr2) {
+               if (!changes)
+                       changes = c;
+               i1 = xscr2->i1 + xe1->xdf2.nrec - xe1->xdf1.nrec;
+               i2 = xscr2->i2;
+               chg1 = xscr2->chg1;
+               chg2 = xscr2->chg2;
+               if (xdl_append_merge(&c, 2, i1, chg1, i2, chg2)) {
+                       xdl_cleanup_merge(changes);
+                       return -1;
+               }
+               xscr2 = xscr2->next;
+       }
+       if (!changes)
+               changes = c;
+       /* refine conflicts */
+       if (level > 1 && xdl_refine_conflicts(xe1, xe2, changes, xpp) < 0) {
+               xdl_cleanup_merge(changes);
+               return -1;
+       }
+       /* output */
+       if (result) {
+               int size = xdl_fill_merge_buffer(xe1, name1, xe2, name2,
+                       changes, NULL);
+               result->ptr = xdl_malloc(size);
+               if (!result->ptr) {
+                       xdl_cleanup_merge(changes);
+                       return -1;
+               }
+               result->size = size;
+               xdl_fill_merge_buffer(xe1, name1, xe2, name2, changes,
+                               result->ptr);
+       }
+       return xdl_cleanup_merge(changes);
+}
+
+int xdl_merge(mmfile_t *orig, mmfile_t *mf1, const char *name1,
+               mmfile_t *mf2, const char *name2,
+               xpparam_t const *xpp, int level, mmbuffer_t *result) {
+       xdchange_t *xscr1, *xscr2;
+       xdfenv_t xe1, xe2;
+
+       result->ptr = NULL;
+       result->size = 0;
+
+       if (xdl_do_diff(orig, mf1, xpp, &xe1) < 0 ||
+                       xdl_do_diff(orig, mf2, xpp, &xe2) < 0) {
+               return -1;
+       }
+       if (xdl_change_compact(&xe1.xdf1, &xe1.xdf2, xpp->flags) < 0 ||
+           xdl_change_compact(&xe1.xdf2, &xe1.xdf1, xpp->flags) < 0 ||
+           xdl_build_script(&xe1, &xscr1) < 0) {
+               xdl_free_env(&xe1);
+               return -1;
+       }
+       if (xdl_change_compact(&xe2.xdf1, &xe2.xdf2, xpp->flags) < 0 ||
+           xdl_change_compact(&xe2.xdf2, &xe2.xdf1, xpp->flags) < 0 ||
+           xdl_build_script(&xe2, &xscr2) < 0) {
+               xdl_free_env(&xe2);
+               return -1;
+       }
+       if (xscr1 || xscr2) {
+               if (!xscr1) {
+                       result->ptr = xdl_malloc(mf2->size);
+                       memcpy(result->ptr, mf2->ptr, mf2->size);
+                       result->size = mf2->size;
+               } else if (!xscr2) {
+                       result->ptr = xdl_malloc(mf1->size);
+                       memcpy(result->ptr, mf1->ptr, mf1->size);
+                       result->size = mf1->size;
+               } else if (xdl_do_merge(&xe1, xscr1, name1,
+                                       &xe2, xscr2, name2,
+                                       level, xpp, result) < 0) {
+                       xdl_free_script(xscr1);
+                       xdl_free_script(xscr2);
+                       xdl_free_env(&xe1);
+                       xdl_free_env(&xe2);
+                       return -1;
+               }
+               xdl_free_script(xscr1);
+               xdl_free_script(xscr2);
+       }
+       xdl_free_env(&xe1);
+       xdl_free_env(&xe2);
+
+       return 0;
+}
+