Merge branch 'jc/sha1-name-more'
[git.git] / compat / precompose_utf8.c
1 /*
2  * Converts filenames from decomposed unicode into precomposed unicode.
3  * Used on MacOS X.
4 */
5
6
7 #define PRECOMPOSE_UNICODE_C
8
9 #include "cache.h"
10 #include "utf8.h"
11 #include "precompose_utf8.h"
12
13 typedef char *iconv_ibp;
14 const static char *repo_encoding = "UTF-8";
15 const static char *path_encoding = "UTF-8-MAC";
16
17
18 static size_t has_utf8(const char *s, size_t maxlen, size_t *strlen_c)
19 {
20         const uint8_t *utf8p = (const uint8_t*) s;
21         size_t strlen_chars = 0;
22         size_t ret = 0;
23
24         if ((!utf8p) || (!*utf8p)) {
25                 return 0;
26         }
27
28         while((*utf8p) && maxlen) {
29                 if (*utf8p & 0x80)
30                         ret++;
31                 strlen_chars++;
32                 utf8p++;
33                 maxlen--;
34         }
35         if (strlen_c)
36                 *strlen_c = strlen_chars;
37
38         return ret;
39 }
40
41
42 void probe_utf8_pathname_composition(char *path, int len)
43 {
44         const static char *auml_nfc = "\xc3\xa4";
45         const static char *auml_nfd = "\x61\xcc\x88";
46         int output_fd;
47         if (precomposed_unicode != -1)
48                 return; /* We found it defined in the global config, respect it */
49         path[len] = 0;
50         strcpy(path + len, auml_nfc);
51         output_fd = open(path, O_CREAT|O_EXCL|O_RDWR, 0600);
52         if (output_fd >=0) {
53                 close(output_fd);
54                 path[len] = 0;
55                 strcpy(path + len, auml_nfd);
56                 /* Indicate to the user, that we can configure it to true */
57                 if (0 == access(path, R_OK))
58                         git_config_set("core.precomposeunicode", "false");
59                         /* To be backward compatible, set precomposed_unicode to 0 */
60                 precomposed_unicode = 0;
61                 path[len] = 0;
62                 strcpy(path + len, auml_nfc);
63                 unlink(path);
64         }
65 }
66
67
68 void precompose_argv(int argc, const char **argv)
69 {
70         int i = 0;
71         const char *oldarg;
72         char *newarg;
73         iconv_t ic_precompose;
74
75         if (precomposed_unicode != 1)
76                 return;
77
78         ic_precompose = iconv_open(repo_encoding, path_encoding);
79         if (ic_precompose == (iconv_t) -1)
80                 return;
81
82         while (i < argc) {
83                 size_t namelen;
84                 oldarg = argv[i];
85                 if (has_utf8(oldarg, (size_t)-1, &namelen)) {
86                         newarg = reencode_string_iconv(oldarg, namelen, ic_precompose);
87                         if (newarg)
88                                 argv[i] = newarg;
89                 }
90                 i++;
91         }
92         iconv_close(ic_precompose);
93 }
94
95
96 PREC_DIR *precompose_utf8_opendir(const char *dirname)
97 {
98         PREC_DIR *prec_dir = xmalloc(sizeof(PREC_DIR));
99         prec_dir->dirent_nfc = xmalloc(sizeof(dirent_prec_psx));
100         prec_dir->dirent_nfc->max_name_len = sizeof(prec_dir->dirent_nfc->d_name);
101
102         prec_dir->dirp = opendir(dirname);
103         if (!prec_dir->dirp) {
104                 free(prec_dir->dirent_nfc);
105                 free(prec_dir);
106                 return NULL;
107         } else {
108                 int ret_errno = errno;
109                 prec_dir->ic_precompose = iconv_open(repo_encoding, path_encoding);
110                 /* if iconv_open() fails, die() in readdir() if needed */
111                 errno = ret_errno;
112         }
113
114         return prec_dir;
115 }
116
117 struct dirent_prec_psx *precompose_utf8_readdir(PREC_DIR *prec_dir)
118 {
119         struct dirent *res;
120         res = readdir(prec_dir->dirp);
121         if (res) {
122                 size_t namelenz = strlen(res->d_name) + 1; /* \0 */
123                 size_t new_maxlen = namelenz;
124
125                 int ret_errno = errno;
126
127                 if (new_maxlen > prec_dir->dirent_nfc->max_name_len) {
128                         size_t new_len = sizeof(dirent_prec_psx) + new_maxlen -
129                                 sizeof(prec_dir->dirent_nfc->d_name);
130
131                         prec_dir->dirent_nfc = xrealloc(prec_dir->dirent_nfc, new_len);
132                         prec_dir->dirent_nfc->max_name_len = new_maxlen;
133                 }
134
135                 prec_dir->dirent_nfc->d_ino  = res->d_ino;
136                 prec_dir->dirent_nfc->d_type = res->d_type;
137
138                 if ((precomposed_unicode == 1) && has_utf8(res->d_name, (size_t)-1, NULL)) {
139                         if (prec_dir->ic_precompose == (iconv_t)-1) {
140                                 die("iconv_open(%s,%s) failed, but needed:\n"
141                                                 "    precomposed unicode is not supported.\n"
142                                                 "    If you wnat to use decomposed unicode, run\n"
143                                                 "    \"git config core.precomposeunicode false\"\n",
144                                                 repo_encoding, path_encoding);
145                         } else {
146                                 iconv_ibp       cp = (iconv_ibp)res->d_name;
147                                 size_t inleft = namelenz;
148                                 char *outpos = &prec_dir->dirent_nfc->d_name[0];
149                                 size_t outsz = prec_dir->dirent_nfc->max_name_len;
150                                 size_t cnt;
151                                 errno = 0;
152                                 cnt = iconv(prec_dir->ic_precompose, &cp, &inleft, &outpos, &outsz);
153                                 if (errno || inleft) {
154                                         /*
155                                          * iconv() failed and errno could be E2BIG, EILSEQ, EINVAL, EBADF
156                                          * MacOS X avoids illegal byte sequemces.
157                                          * If they occur on a mounted drive (e.g. NFS) it is not worth to
158                                          * die() for that, but rather let the user see the original name
159                                         */
160                                         namelenz = 0; /* trigger strlcpy */
161                                 }
162                         }
163                 }
164                 else
165                         namelenz = 0;
166
167                 if (!namelenz)
168                         strlcpy(prec_dir->dirent_nfc->d_name, res->d_name,
169                                                         prec_dir->dirent_nfc->max_name_len);
170
171                 errno = ret_errno;
172                 return prec_dir->dirent_nfc;
173         }
174         return NULL;
175 }
176
177
178 int precompose_utf8_closedir(PREC_DIR *prec_dir)
179 {
180         int ret_value;
181         int ret_errno;
182         ret_value = closedir(prec_dir->dirp);
183         ret_errno = errno;
184         if (prec_dir->ic_precompose != (iconv_t)-1)
185                 iconv_close(prec_dir->ic_precompose);
186         free(prec_dir->dirent_nfc);
187         free(prec_dir);
188         errno = ret_errno;
189         return ret_value;
190 }