git wrapper: DWIM mistyped commands
authorJohannes Schindelin <Johannes.Schindelin@gmx.de>
Sun, 31 Aug 2008 13:50:23 +0000 (15:50 +0200)
committerJunio C Hamano <gitster@pobox.com>
Sun, 31 Aug 2008 17:14:58 +0000 (10:14 -0700)
This patch introduces a modified Damerau-Levenshtein algorithm into
Git's code base, and uses it with the following penalties to show some
similar commands when an unknown command was encountered:

swap = 0, insertion = 1, substitution = 2, deletion = 4

A typical output would now look like this:

$ git sm
git: 'sm' is not a git-command. See 'git --help'.

Did you mean one of these?
am
rm

The cut-off is at similarity rating 6, which was empirically determined
to give sensible results.

As a convenience, if there is only one candidate, Git continues under
the assumption that the user mistyped it.  Example:

$ git reabse
WARNING: You called a Git program named 'reabse', which does
not exist.
Continuing under the assumption that you meant 'rebase'
[...]

Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
Signed-off-by: Alex Riesen <raa.lkml@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
Makefile
builtin.h
git.c
help.c
help.h
levenshtein.c [new file with mode: 0644]
levenshtein.h [new file with mode: 0644]

index bf400e64f39e5967206c01d2266aca800d4db667..3daa6dcdb004f06559191bc929176cbda7c84368 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -358,6 +358,7 @@ LIB_H += graph.h
 LIB_H += grep.h
 LIB_H += hash.h
 LIB_H += help.h
+LIB_H += levenshtein.h
 LIB_H += list-objects.h
 LIB_H += ll-merge.h
 LIB_H += log-tree.h
@@ -433,6 +434,7 @@ LIB_OBJS += hash.o
 LIB_OBJS += help.o
 LIB_OBJS += ident.o
 LIB_OBJS += interpolate.o
+LIB_OBJS += levenshtein.o
 LIB_OBJS += list-objects.o
 LIB_OBJS += ll-merge.o
 LIB_OBJS += lockfile.o
index f3502d305e4f65e9707fe8b738f64be6e49f7f84..e67cb2090e8c111be4b137939953f3e006f31dfd 100644 (file)
--- a/builtin.h
+++ b/builtin.h
@@ -11,7 +11,7 @@ extern const char git_usage_string[];
 extern const char git_more_info_string[];
 
 extern void list_common_cmds_help(void);
-extern void help_unknown_cmd(const char *cmd);
+extern const char *help_unknown_cmd(const char *cmd);
 extern void prune_packed_objects(int);
 extern int read_line_with_nul(char *buf, int size, FILE *file);
 extern int fmt_merge_msg(int merge_summary, struct strbuf *in,
diff --git a/git.c b/git.c
index 37b1d76a08ca59f3de54e11890dce962403cf8d3..54c5bfa69b0696a376828e36c30b957506fce408 100644 (file)
--- a/git.c
+++ b/git.c
@@ -499,7 +499,9 @@ int main(int argc, const char **argv)
                                cmd, argv[0]);
                        exit(1);
                }
-               help_unknown_cmd(cmd);
+               argv[0] = help_unknown_cmd(cmd);
+               handle_internal_command(argc, argv);
+               execv_dashed_external(argv);
        }
 
        fprintf(stderr, "Failed to run command '%s': %s\n",
diff --git a/help.c b/help.c
index 1afbac0927cdf2ba395126a7fe87d96a9ea87a94..b1ebca4091e9e1cb623ff89a69306cdb00aa9c68 100644 (file)
--- a/help.c
+++ b/help.c
@@ -1,6 +1,7 @@
 #include "cache.h"
 #include "builtin.h"
 #include "exec_cmd.h"
+#include "levenshtein.h"
 #include "help.h"
 
 /* most GUI terminals set COLUMNS (although some don't export it) */
@@ -37,6 +38,16 @@ void add_cmdname(struct cmdnames *cmds, const char *name, int len)
        cmds->names[cmds->cnt++] = ent;
 }
 
+static void clean_cmdnames(struct cmdnames *cmds)
+{
+       int i;
+       for (i = 0; i < cmds->cnt; ++i)
+               free(cmds->names[i]);
+       free(cmds->names);
+       cmds->cnt = 0;
+       cmds->alloc = 0;
+}
+
 static int cmdname_compare(const void *a_, const void *b_)
 {
        struct cmdname *a = *(struct cmdname **)a_;
@@ -257,9 +268,68 @@ int is_in_cmdlist(struct cmdnames *c, const char *s)
        return 0;
 }
 
-void help_unknown_cmd(const char *cmd)
+static int levenshtein_compare(const void *p1, const void *p2)
+{
+       const struct cmdname *const *c1 = p1, *const *c2 = p2;
+       const char *s1 = (*c1)->name, *s2 = (*c2)->name;
+       int l1 = (*c1)->len;
+       int l2 = (*c2)->len;
+       return l1 != l2 ? l1 - l2 : strcmp(s1, s2);
+}
+
+const char *help_unknown_cmd(const char *cmd)
 {
+       int i, n, best_similarity = 0;
+       struct cmdnames main_cmds, other_cmds;
+
+       memset(&main_cmds, 0, sizeof(main_cmds));
+       memset(&other_cmds, 0, sizeof(main_cmds));
+
+       load_command_list("git-", &main_cmds, &other_cmds);
+
+       ALLOC_GROW(main_cmds.names, main_cmds.cnt + other_cmds.cnt,
+                  main_cmds.alloc);
+       memcpy(main_cmds.names + main_cmds.cnt, other_cmds.names,
+              other_cmds.cnt * sizeof(other_cmds.names[0]));
+       main_cmds.cnt += other_cmds.cnt;
+       free(other_cmds.names);
+
+       /* This reuses cmdname->len for similarity index */
+       for (i = 0; i < main_cmds.cnt; ++i)
+               main_cmds.names[i]->len =
+                       levenshtein(cmd, main_cmds.names[i]->name, 0, 2, 1, 4);
+
+       qsort(main_cmds.names, main_cmds.cnt,
+             sizeof(*main_cmds.names), levenshtein_compare);
+
+       if (!main_cmds.cnt)
+               die ("Uh oh. Your system reports no Git commands at all.");
+
+       best_similarity = main_cmds.names[0]->len;
+       n = 1;
+       while (n < main_cmds.cnt && best_similarity == main_cmds.names[n]->len)
+               ++n;
+       if (n == 1) {
+               const char *assumed = main_cmds.names[0]->name;
+               main_cmds.names[0] = NULL;
+               clean_cmdnames(&main_cmds);
+               fprintf(stderr, "WARNING: You called a Git program named '%s', "
+                       "which does not exist.\n"
+                       "Continuing under the assumption that you meant '%s'\n",
+                       cmd, assumed);
+               return assumed;
+       }
+
        fprintf(stderr, "git: '%s' is not a git-command. See 'git --help'.\n", cmd);
+
+       if (best_similarity < 6) {
+               fprintf(stderr, "\nDid you mean %s?\n",
+                       n < 2 ? "this": "one of these");
+
+               for (i = 0; i < n; i++)
+                       fprintf(stderr, "\t%s\n", main_cmds.names[i]->name);
+       }
+
        exit(1);
 }
 
diff --git a/help.h b/help.h
index 3f1ae89dd69413a4547e12bb3736bd66326d830f..5fc7892705ef3bd042d63487caa11aba5c0cba3a 100644 (file)
--- a/help.h
+++ b/help.h
@@ -5,7 +5,7 @@ struct cmdnames {
        int alloc;
        int cnt;
        struct cmdname {
-               size_t len;
+               size_t len; /* also used for similarity index in help.c */
                char name[FLEX_ARRAY];
        } **names;
 };
diff --git a/levenshtein.c b/levenshtein.c
new file mode 100644 (file)
index 0000000..db52f2c
--- /dev/null
@@ -0,0 +1,47 @@
+#include "cache.h"
+#include "levenshtein.h"
+
+int levenshtein(const char *string1, const char *string2,
+               int w, int s, int a, int d)
+{
+       int len1 = strlen(string1), len2 = strlen(string2);
+       int *row0 = xmalloc(sizeof(int) * (len2 + 1));
+       int *row1 = xmalloc(sizeof(int) * (len2 + 1));
+       int *row2 = xmalloc(sizeof(int) * (len2 + 1));
+       int i, j;
+
+       for (j = 0; j <= len2; j++)
+               row1[j] = j * a;
+       for (i = 0; i < len1; i++) {
+               int *dummy;
+
+               row2[0] = (i + 1) * d;
+               for (j = 0; j < len2; j++) {
+                       /* substitution */
+                       row2[j + 1] = row1[j] + s * (string1[i] != string2[j]);
+                       /* swap */
+                       if (i > 0 && j > 0 && string1[i - 1] == string2[j] &&
+                                       string1[i] == string2[j - 1] &&
+                                       row2[j + 1] > row0[j - 1] + w)
+                               row2[j + 1] = row0[j - 1] + w;
+                       /* deletion */
+                       if (j + 1 < len2 && row2[j + 1] > row1[j + 1] + d)
+                               row2[j + 1] = row1[j + 1] + d;
+                       /* insertion */
+                       if (row2[j + 1] > row2[j] + a)
+                               row2[j + 1] = row2[j] + a;
+               }
+
+               dummy = row0;
+               row0 = row1;
+               row1 = row2;
+               row2 = dummy;
+       }
+
+       i = row1[len2];
+       free(row0);
+       free(row1);
+       free(row2);
+
+       return i;
+}
diff --git a/levenshtein.h b/levenshtein.h
new file mode 100644 (file)
index 0000000..0173abe
--- /dev/null
@@ -0,0 +1,8 @@
+#ifndef LEVENSHTEIN_H
+#define LEVENSHTEIN_H
+
+int levenshtein(const char *string1, const char *string2,
+       int swap_penalty, int substition_penalty,
+       int insertion_penalty, int deletion_penalty);
+
+#endif