From 8594e4d028e1403112a2b48d0b00d58c2c0f2c87 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Thu, 17 Nov 2016 17:24:10 +0100 Subject: [PATCH 01/14] Allow for platform-specific core.* config settings In the Git for Windows project, we have ample precendent for config settings that apply to Windows, and to Windows only. Let's formalize this concept by introducing a platform_core_config() function that can be #define'd in a platform-specific manner. This will allow us to contain platform-specific code better, as the corresponding variables no longer need to be exported so that they can be defined in environment.c and be set in config.c Signed-off-by: Johannes Schindelin --- compat/mingw.c | 5 +++++ compat/mingw.h | 3 +++ config.c | 2 +- git-compat-util.h | 8 ++++++++ 4 files changed, 17 insertions(+), 1 deletion(-) diff --git a/compat/mingw.c b/compat/mingw.c index 8b6fa0db44..613958c1f0 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -202,6 +202,11 @@ static int ask_yes_no_if_possible(const char *format, ...) } } +int mingw_core_config(const char *var, const char *value) +{ + return 0; +} + int mingw_unlink(const char *pathname) { int ret, tries = 0; diff --git a/compat/mingw.h b/compat/mingw.h index e03aecfe2e..0e4be440ac 100644 --- a/compat/mingw.h +++ b/compat/mingw.h @@ -11,6 +11,9 @@ typedef _sigset_t sigset_t; #undef _POSIX_THREAD_SAFE_FUNCTIONS #endif +extern int mingw_core_config(const char *var, const char *value); +#define platform_core_config mingw_core_config + /* * things that are not available in header files */ diff --git a/config.c b/config.c index 231f9a750b..93a1ee68a5 100644 --- a/config.c +++ b/config.c @@ -1242,7 +1242,7 @@ static int git_default_core_config(const char *var, const char *value) } /* Add other config variables here and to Documentation/config.txt. */ - return 0; + return platform_core_config(var, value); } static int git_default_i18n_config(const char *var, const char *value) diff --git a/git-compat-util.h b/git-compat-util.h index db9c22de76..d4e0611f22 100644 --- a/git-compat-util.h +++ b/git-compat-util.h @@ -338,6 +338,14 @@ typedef uintmax_t timestamp_t; #define _PATH_DEFPATH "/usr/local/bin:/usr/bin:/bin" #endif +#ifndef platform_core_config +static inline int noop_core_config(const char *var, const char *value) +{ + return 0; +} +#define platform_core_config noop_core_config +#endif + #ifndef has_dos_drive_prefix static inline int git_has_dos_drive_prefix(const char *path) { From 980de0b0073bbab00dec6bc3872bf73fec406786 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Thu, 17 Nov 2016 17:33:06 +0100 Subject: [PATCH 02/14] Move Windows-specific config settings into compat/mingw.c Signed-off-by: Johannes Schindelin --- cache.h | 8 -------- compat/mingw.c | 17 +++++++++++++++++ config.c | 8 -------- environment.c | 1 - 4 files changed, 17 insertions(+), 17 deletions(-) diff --git a/cache.h b/cache.h index 71fe092644..6f22f205ce 100644 --- a/cache.h +++ b/cache.h @@ -789,14 +789,6 @@ extern int ref_paranoia; extern char comment_line_char; extern int auto_comment_line_char; -/* Windows only */ -enum hide_dotfiles_type { - HIDE_DOTFILES_FALSE = 0, - HIDE_DOTFILES_TRUE, - HIDE_DOTFILES_DOTGITONLY -}; -extern enum hide_dotfiles_type hide_dotfiles; - enum log_refs_config { LOG_REFS_UNSET = -1, LOG_REFS_NONE = 0, diff --git a/compat/mingw.c b/compat/mingw.c index 613958c1f0..8d75c282a2 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -202,8 +202,25 @@ static int ask_yes_no_if_possible(const char *format, ...) } } +/* Windows only */ +enum hide_dotfiles_type { + HIDE_DOTFILES_FALSE = 0, + HIDE_DOTFILES_TRUE, + HIDE_DOTFILES_DOTGITONLY +}; + +static enum hide_dotfiles_type hide_dotfiles = HIDE_DOTFILES_DOTGITONLY; + int mingw_core_config(const char *var, const char *value) { + if (!strcmp(var, "core.hidedotfiles")) { + if (value && !strcasecmp(value, "dotgitonly")) + hide_dotfiles = HIDE_DOTFILES_DOTGITONLY; + else + hide_dotfiles = git_config_bool(var, value); + return 0; + } + return 0; } diff --git a/config.c b/config.c index 93a1ee68a5..d1b70f21d7 100644 --- a/config.c +++ b/config.c @@ -1233,14 +1233,6 @@ static int git_default_core_config(const char *var, const char *value) return 0; } - if (!strcmp(var, "core.hidedotfiles")) { - if (value && !strcasecmp(value, "dotgitonly")) - hide_dotfiles = HIDE_DOTFILES_DOTGITONLY; - else - hide_dotfiles = git_config_bool(var, value); - return 0; - } - /* Add other config variables here and to Documentation/config.txt. */ return platform_core_config(var, value); } diff --git a/environment.c b/environment.c index 3fd4b10845..c974bed180 100644 --- a/environment.c +++ b/environment.c @@ -64,7 +64,6 @@ int core_apply_sparse_checkout; int merge_log_config = -1; int precomposed_unicode = -1; /* see probe_utf8_pathname_composition() */ unsigned long pack_size_limit_cfg; -enum hide_dotfiles_type hide_dotfiles = HIDE_DOTFILES_DOTGITONLY; enum log_refs_config log_all_ref_updates = LOG_REFS_UNSET; #ifndef PROTECT_HFS_DEFAULT From d85e50754eca749d0d110784f1d6a4e7079b25e7 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Thu, 17 Nov 2016 18:05:04 +0100 Subject: [PATCH 03/14] mingw: unset PERL5LIB by default Git for Windows ships with its own Perl interpreter, and insists on using it, so it will most likely wreak havoc if PERL5LIB is set before launching Git. Let's just unset that environment variables when spawning processes. To make this feature extensible (and overrideable), there is a new config setting `core.unsetenvvars` that allows specifying a comma-separated list of names to unset before spawning processes. Reported by Gabriel Fuhrmann. Signed-off-by: Johannes Schindelin --- Documentation/config.txt | 6 ++++++ compat/mingw.c | 35 ++++++++++++++++++++++++++++++++++- t/t0028-core-unsetenvvars.sh | 30 ++++++++++++++++++++++++++++++ 3 files changed, 70 insertions(+), 1 deletion(-) create mode 100755 t/t0028-core-unsetenvvars.sh diff --git a/Documentation/config.txt b/Documentation/config.txt index d5c9c4cab6..7f17db26ce 100644 --- a/Documentation/config.txt +++ b/Documentation/config.txt @@ -859,6 +859,12 @@ relatively high IO latencies. When enabled, Git will do the index comparison to the filesystem data in parallel, allowing overlapping IO's. Defaults to true. +core.unsetenvvars:: + EXPERIMENTAL, Windows-only: comma-separated list of environment + variables' names that need to be unset before spawning any other + process. Defaults to `PERL5LIB` to account for the fact that Git + for Windows insists on using its own Perl interpreter. + core.createObject:: You can set this to 'link', in which case a hardlink followed by a delete of the source are used to make sure that object creation diff --git a/compat/mingw.c b/compat/mingw.c index 8d75c282a2..cfa1e514d7 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -210,6 +210,7 @@ enum hide_dotfiles_type { }; static enum hide_dotfiles_type hide_dotfiles = HIDE_DOTFILES_DOTGITONLY; +static char *unset_environment_variables; int mingw_core_config(const char *var, const char *value) { @@ -221,6 +222,12 @@ int mingw_core_config(const char *var, const char *value) return 0; } + if (!strcmp(var, "core.unsetenvvars")) { + free(unset_environment_variables); + unset_environment_variables = xstrdup(value); + return 0; + } + return 0; } @@ -1054,6 +1061,27 @@ static wchar_t *make_environment_block(char **deltaenv) return wenvblk; } +static void do_unset_environment_variables(void) +{ + static int done; + char *p = unset_environment_variables; + + if (done || !p) + return; + done = 1; + + for (;;) { + char *comma = strchr(p, ','); + + if (comma) + *comma = '\0'; + unsetenv(p); + if (!comma) + break; + p = comma + 1; + } +} + struct pinfo_t { struct pinfo_t *next; pid_t pid; @@ -1072,9 +1100,12 @@ static pid_t mingw_spawnve_fd(const char *cmd, const char **argv, char **deltaen wchar_t wcmd[MAX_PATH], wdir[MAX_PATH], *wargs, *wenvblk = NULL; unsigned flags = CREATE_UNICODE_ENVIRONMENT; BOOL ret; + HANDLE cons; + + do_unset_environment_variables(); /* Determine whether or not we are associated to a console */ - HANDLE cons = CreateFile("CONOUT$", GENERIC_WRITE, + cons = CreateFile("CONOUT$", GENERIC_WRITE, FILE_SHARE_WRITE, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL); if (cons == INVALID_HANDLE_VALUE) { @@ -2208,6 +2239,8 @@ void mingw_startup(void) /* fix Windows specific environment settings */ setup_windows_environment(); + unset_environment_variables = xstrdup("PERL5LIB"); + /* initialize critical section for waitpid pinfo_t list */ InitializeCriticalSection(&pinfo_cs); diff --git a/t/t0028-core-unsetenvvars.sh b/t/t0028-core-unsetenvvars.sh new file mode 100755 index 0000000000..24ce46a6ea --- /dev/null +++ b/t/t0028-core-unsetenvvars.sh @@ -0,0 +1,30 @@ +#!/bin/sh + +test_description='test the Windows-only core.unsetenvvars setting' + +. ./test-lib.sh + +if ! test_have_prereq MINGW +then + skip_all='skipping Windows-specific tests' + test_done +fi + +test_expect_success 'setup' ' + mkdir -p "$TRASH_DIRECTORY/.git/hooks" && + write_script "$TRASH_DIRECTORY/.git/hooks/pre-commit" <<-\EOF + echo $HOBBES >&2 + EOF +' + +test_expect_success 'core.unsetenvvars works' ' + HOBBES=Calvin && + export HOBBES && + git commit --allow-empty -m with 2>err && + grep Calvin err && + git -c core.unsetenvvars=FINDUS,HOBBES,CALVIN \ + commit --allow-empty -m without 2>err && + ! grep Calvin err +' + +test_done From 5957d74d22c14af46c2eff20dcfd7edb6359b3fc Mon Sep 17 00:00:00 2001 From: Karsten Blees Date: Sat, 6 Jul 2013 02:09:35 +0200 Subject: [PATCH 04/14] Win32: make FILETIME conversion functions public Signed-off-by: Karsten Blees --- compat/mingw.c | 16 ---------------- compat/mingw.h | 16 ++++++++++++++++ 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/compat/mingw.c b/compat/mingw.c index cfa1e514d7..ce98052b85 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -543,22 +543,6 @@ int mingw_chmod(const char *filename, int mode) return _wchmod(wfilename, mode); } -/* - * The unit of FILETIME is 100-nanoseconds since January 1, 1601, UTC. - * Returns the 100-nanoseconds ("hekto nanoseconds") since the epoch. - */ -static inline long long filetime_to_hnsec(const FILETIME *ft) -{ - long long winTime = ((long long)ft->dwHighDateTime << 32) + ft->dwLowDateTime; - /* Windows to Unix Epoch conversion */ - return winTime - 116444736000000000LL; -} - -static inline time_t filetime_to_time_t(const FILETIME *ft) -{ - return (time_t)(filetime_to_hnsec(ft) / 10000000); -} - /** * Verifies that safe_create_leading_directories() would succeed. */ diff --git a/compat/mingw.h b/compat/mingw.h index 0e4be440ac..481bb9ee64 100644 --- a/compat/mingw.h +++ b/compat/mingw.h @@ -329,6 +329,22 @@ static inline int getrlimit(int resource, struct rlimit *rlp) return 0; } +/* + * The unit of FILETIME is 100-nanoseconds since January 1, 1601, UTC. + * Returns the 100-nanoseconds ("hekto nanoseconds") since the epoch. + */ +static inline long long filetime_to_hnsec(const FILETIME *ft) +{ + long long winTime = ((long long)ft->dwHighDateTime << 32) + ft->dwLowDateTime; + /* Windows to Unix Epoch conversion */ + return winTime - 116444736000000000LL; +} + +static inline time_t filetime_to_time_t(const FILETIME *ft) +{ + return (time_t)(filetime_to_hnsec(ft) / 10000000); +} + /* * Use mingw specific stat()/lstat()/fstat() implementations on Windows. */ From 0ee3272e10700a32560c5cfa92d5186d9a09ff9f Mon Sep 17 00:00:00 2001 From: Karsten Blees Date: Sun, 8 Sep 2013 14:17:31 +0200 Subject: [PATCH 05/14] Win32: dirent.c: Move opendir down Move opendir down in preparation for the next patch. Signed-off-by: Karsten Blees --- compat/win32/dirent.c | 68 +++++++++++++++++++++---------------------- 1 file changed, 34 insertions(+), 34 deletions(-) diff --git a/compat/win32/dirent.c b/compat/win32/dirent.c index 52420ec7d4..2603a0fa39 100644 --- a/compat/win32/dirent.c +++ b/compat/win32/dirent.c @@ -18,40 +18,6 @@ static inline void finddata2dirent(struct dirent *ent, WIN32_FIND_DATAW *fdata) ent->d_type = DT_REG; } -DIR *opendir(const char *name) -{ - wchar_t pattern[MAX_PATH + 2]; /* + 2 for '/' '*' */ - WIN32_FIND_DATAW fdata; - HANDLE h; - int len; - DIR *dir; - - /* convert name to UTF-16 and check length < MAX_PATH */ - if ((len = xutftowcs_path(pattern, name)) < 0) - return NULL; - - /* append optional '/' and wildcard '*' */ - if (len && !is_dir_sep(pattern[len - 1])) - pattern[len++] = '/'; - pattern[len++] = '*'; - pattern[len] = 0; - - /* open find handle */ - h = FindFirstFileW(pattern, &fdata); - if (h == INVALID_HANDLE_VALUE) { - DWORD err = GetLastError(); - errno = (err == ERROR_DIRECTORY) ? ENOTDIR : err_win_to_posix(err); - return NULL; - } - - /* initialize DIR structure and copy first dir entry */ - dir = xmalloc(sizeof(DIR)); - dir->dd_handle = h; - dir->dd_stat = 0; - finddata2dirent(&dir->dd_dir, &fdata); - return dir; -} - struct dirent *readdir(DIR *dir) { if (!dir) { @@ -90,3 +56,37 @@ int closedir(DIR *dir) free(dir); return 0; } + +DIR *opendir(const char *name) +{ + wchar_t pattern[MAX_PATH + 2]; /* + 2 for '/' '*' */ + WIN32_FIND_DATAW fdata; + HANDLE h; + int len; + DIR *dir; + + /* convert name to UTF-16 and check length < MAX_PATH */ + if ((len = xutftowcs_path(pattern, name)) < 0) + return NULL; + + /* append optional '/' and wildcard '*' */ + if (len && !is_dir_sep(pattern[len - 1])) + pattern[len++] = '/'; + pattern[len++] = '*'; + pattern[len] = 0; + + /* open find handle */ + h = FindFirstFileW(pattern, &fdata); + if (h == INVALID_HANDLE_VALUE) { + DWORD err = GetLastError(); + errno = (err == ERROR_DIRECTORY) ? ENOTDIR : err_win_to_posix(err); + return NULL; + } + + /* initialize DIR structure and copy first dir entry */ + dir = xmalloc(sizeof(DIR)); + dir->dd_handle = h; + dir->dd_stat = 0; + finddata2dirent(&dir->dd_dir, &fdata); + return dir; +} From 9fd3bd326312f4e649d234cc4c63069723c37dec Mon Sep 17 00:00:00 2001 From: Karsten Blees Date: Sun, 8 Sep 2013 14:18:40 +0200 Subject: [PATCH 06/14] Win32: Make the dirent implementation pluggable Emulating the POSIX dirent API on Windows via FindFirstFile/FindNextFile is pretty staightforward, however, most of the information provided in the WIN32_FIND_DATA structure is thrown away in the process. A more sophisticated implementation may cache this data, e.g. for later reuse in calls to lstat. Make the dirent implementation pluggable so that it can be switched at runtime, e.g. based on a config option. Define a base DIR structure with pointers to readdir/closedir that match the opendir implementation (i.e. similar to vtable pointers in OOP). Define readdir/closedir so that they call the function pointers in the DIR structure. This allows to choose the opendir implementation on a call-by-call basis. Move the fixed sized dirent.d_name buffer to the dirent-specific DIR structure, as d_name may be implementation specific (e.g. a caching implementation may just set d_name to point into the cache instead of copying the entire file name string). Signed-off-by: Karsten Blees --- compat/win32/dirent.c | 27 +++++++++++++++++---------- compat/win32/dirent.h | 26 +++++++++++++++++++------- 2 files changed, 36 insertions(+), 17 deletions(-) diff --git a/compat/win32/dirent.c b/compat/win32/dirent.c index 2603a0fa39..6b87042182 100644 --- a/compat/win32/dirent.c +++ b/compat/win32/dirent.c @@ -1,15 +1,19 @@ #include "../../git-compat-util.h" -struct DIR { +typedef struct dirent_DIR { + struct DIR base_dir; /* extend base struct DIR */ struct dirent dd_dir; /* includes d_type */ HANDLE dd_handle; /* FindFirstFile handle */ int dd_stat; /* 0-based index */ -}; + char dd_name[MAX_PATH * 3]; /* file name (* 3 for UTF-8 conversion) */ +} dirent_DIR; + +DIR *(*opendir)(const char *dirname) = dirent_opendir; static inline void finddata2dirent(struct dirent *ent, WIN32_FIND_DATAW *fdata) { - /* convert UTF-16 name to UTF-8 */ - xwcstoutf(ent->d_name, fdata->cFileName, sizeof(ent->d_name)); + /* convert UTF-16 name to UTF-8 (d_name points to dirent_DIR.dd_name) */ + xwcstoutf(ent->d_name, fdata->cFileName, MAX_PATH * 3); /* Set file type, based on WIN32_FIND_DATA */ if (fdata->dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) @@ -18,7 +22,7 @@ static inline void finddata2dirent(struct dirent *ent, WIN32_FIND_DATAW *fdata) ent->d_type = DT_REG; } -struct dirent *readdir(DIR *dir) +static struct dirent *dirent_readdir(dirent_DIR *dir) { if (!dir) { errno = EBADF; /* No set_errno for mingw */ @@ -45,7 +49,7 @@ struct dirent *readdir(DIR *dir) return &dir->dd_dir; } -int closedir(DIR *dir) +static int dirent_closedir(dirent_DIR *dir) { if (!dir) { errno = EBADF; @@ -57,13 +61,13 @@ int closedir(DIR *dir) return 0; } -DIR *opendir(const char *name) +DIR *dirent_opendir(const char *name) { wchar_t pattern[MAX_PATH + 2]; /* + 2 for '/' '*' */ WIN32_FIND_DATAW fdata; HANDLE h; int len; - DIR *dir; + dirent_DIR *dir; /* convert name to UTF-16 and check length < MAX_PATH */ if ((len = xutftowcs_path(pattern, name)) < 0) @@ -84,9 +88,12 @@ DIR *opendir(const char *name) } /* initialize DIR structure and copy first dir entry */ - dir = xmalloc(sizeof(DIR)); + dir = xmalloc(sizeof(dirent_DIR)); + dir->base_dir.preaddir = (struct dirent *(*)(DIR *dir)) dirent_readdir; + dir->base_dir.pclosedir = (int (*)(DIR *dir)) dirent_closedir; + dir->dd_dir.d_name = dir->dd_name; dir->dd_handle = h; dir->dd_stat = 0; finddata2dirent(&dir->dd_dir, &fdata); - return dir; + return (DIR*) dir; } diff --git a/compat/win32/dirent.h b/compat/win32/dirent.h index 058207e4bf..6b3ddee51b 100644 --- a/compat/win32/dirent.h +++ b/compat/win32/dirent.h @@ -1,20 +1,32 @@ #ifndef DIRENT_H #define DIRENT_H -typedef struct DIR DIR; - #define DT_UNKNOWN 0 #define DT_DIR 1 #define DT_REG 2 #define DT_LNK 3 struct dirent { - unsigned char d_type; /* file type to prevent lstat after readdir */ - char d_name[MAX_PATH * 3]; /* file name (* 3 for UTF-8 conversion) */ + unsigned char d_type; /* file type to prevent lstat after readdir */ + char *d_name; /* file name */ }; -DIR *opendir(const char *dirname); -struct dirent *readdir(DIR *dir); -int closedir(DIR *dir); +/* + * Base DIR structure, contains pointers to readdir/closedir implementations so + * that opendir may choose a concrete implementation on a call-by-call basis. + */ +typedef struct DIR { + struct dirent *(*preaddir)(struct DIR *dir); + int (*pclosedir)(struct DIR *dir); +} DIR; + +/* default dirent implementation */ +extern DIR *dirent_opendir(const char *dirname); + +/* current dirent implementation */ +extern DIR *(*opendir)(const char *dirname); + +#define readdir(dir) (dir->preaddir(dir)) +#define closedir(dir) (dir->pclosedir(dir)) #endif /* DIRENT_H */ From f50a55b3288a036f26ef971df56086bc4226b28f Mon Sep 17 00:00:00 2001 From: Karsten Blees Date: Sun, 8 Sep 2013 14:21:30 +0200 Subject: [PATCH 07/14] Win32: make the lstat implementation pluggable Emulating the POSIX lstat API on Windows via GetFileAttributes[Ex] is quite slow. Windows operating system APIs seem to be much better at scanning the status of entire directories than checking single files. A caching implementation may improve performance by bulk-reading entire directories or reusing data obtained via opendir / readdir. Make the lstat implementation pluggable so that it can be switched at runtime, e.g. based on a config option. Signed-off-by: Karsten Blees --- compat/mingw.c | 2 ++ compat/mingw.h | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/compat/mingw.c b/compat/mingw.c index ce98052b85..1c7c694dae 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -682,6 +682,8 @@ static int do_stat_internal(int follow, const char *file_name, struct stat *buf) return do_lstat(follow, alt_name, buf); } +int (*lstat)(const char *file_name, struct stat *buf) = mingw_lstat; + int mingw_lstat(const char *file_name, struct stat *buf) { return do_stat_internal(0, file_name, buf); diff --git a/compat/mingw.h b/compat/mingw.h index 481bb9ee64..c820372a81 100644 --- a/compat/mingw.h +++ b/compat/mingw.h @@ -368,7 +368,7 @@ int mingw_fstat(int fd, struct stat *buf); #ifdef lstat #undef lstat #endif -#define lstat mingw_lstat +extern int (*lstat)(const char *file_name, struct stat *buf); #ifndef _stati64 # define _stati64(x,y) mingw_stat(x,y) From ab355636bb75eeff28705982edc548cc50b3edab Mon Sep 17 00:00:00 2001 From: Karsten Blees Date: Sun, 8 Sep 2013 14:23:27 +0200 Subject: [PATCH 08/14] add infrastructure for read-only file system level caches Add a macro to mark code sections that only read from the file system, along with a config option and documentation. This facilitates implementation of relatively simple file system level caches without the need to synchronize with the file system. Enable read-only sections for 'git status' and preload_index. Signed-off-by: Karsten Blees --- Documentation/config.txt | 6 ++++++ builtin/commit.c | 1 + compat/mingw.c | 6 ++++++ compat/mingw.h | 2 ++ git-compat-util.h | 15 +++++++++++++++ preload-index.c | 2 ++ 6 files changed, 32 insertions(+) diff --git a/Documentation/config.txt b/Documentation/config.txt index 7f17db26ce..97fe304145 100644 --- a/Documentation/config.txt +++ b/Documentation/config.txt @@ -859,6 +859,12 @@ relatively high IO latencies. When enabled, Git will do the index comparison to the filesystem data in parallel, allowing overlapping IO's. Defaults to true. +core.fscache:: + Enable additional caching of file system data for some operations. ++ +Git for Windows uses this to bulk-read and cache lstat data of entire +directories (instead of doing lstat file by file). + core.unsetenvvars:: EXPERIMENTAL, Windows-only: comma-separated list of environment variables' names that need to be unset before spawning any other diff --git a/builtin/commit.c b/builtin/commit.c index 8e93802511..fa9a055295 100644 --- a/builtin/commit.c +++ b/builtin/commit.c @@ -1382,6 +1382,7 @@ int cmd_status(int argc, const char **argv, const char *prefix) PATHSPEC_PREFER_FULL, prefix, argv); + enable_fscache(1); read_cache_preload(&s.pathspec); refresh_index(&the_index, REFRESH_QUIET|REFRESH_UNMERGED, &s.pathspec, NULL, NULL); diff --git a/compat/mingw.c b/compat/mingw.c index 1c7c694dae..130f13e05e 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -211,6 +211,7 @@ enum hide_dotfiles_type { static enum hide_dotfiles_type hide_dotfiles = HIDE_DOTFILES_DOTGITONLY; static char *unset_environment_variables; +int core_fscache; int mingw_core_config(const char *var, const char *value) { @@ -222,6 +223,11 @@ int mingw_core_config(const char *var, const char *value) return 0; } + if (!strcmp(var, "core.fscache")) { + core_fscache = git_config_bool(var, value); + return 0; + } + if (!strcmp(var, "core.unsetenvvars")) { free(unset_environment_variables); unset_environment_variables = xstrdup(value); diff --git a/compat/mingw.h b/compat/mingw.h index c820372a81..eb7636e8e0 100644 --- a/compat/mingw.h +++ b/compat/mingw.h @@ -11,6 +11,8 @@ typedef _sigset_t sigset_t; #undef _POSIX_THREAD_SAFE_FUNCTIONS #endif +extern int core_fscache; + extern int mingw_core_config(const char *var, const char *value); #define platform_core_config mingw_core_config diff --git a/git-compat-util.h b/git-compat-util.h index d4e0611f22..8e58d11121 100644 --- a/git-compat-util.h +++ b/git-compat-util.h @@ -1169,6 +1169,21 @@ static inline int is_missing_file_error(int errno_) return (errno_ == ENOENT || errno_ == ENOTDIR); } +/* + * Enable/disable a read-only cache for file system data on platforms that + * support it. + * + * Implementing a live-cache is complicated and requires special platform + * support (inotify, ReadDirectoryChangesW...). enable_fscache shall be used + * to mark sections of git code that extensively read from the file system + * without modifying anything. Implementations can use this to cache e.g. stat + * data or even file content without the need to synchronize with the file + * system. + */ +#ifndef enable_fscache +#define enable_fscache(x) /* noop */ +#endif + extern int cmd_main(int, const char **); #endif diff --git a/preload-index.c b/preload-index.c index 70a4c80878..4dbed6c16d 100644 --- a/preload-index.c +++ b/preload-index.c @@ -86,6 +86,7 @@ static void preload_index(struct index_state *index, offset = 0; work = DIV_ROUND_UP(index->cache_nr, threads); memset(&data, 0, sizeof(data)); + enable_fscache(1); for (i = 0; i < threads; i++) { struct thread_data *p = data+i; p->index = index; @@ -102,6 +103,7 @@ static void preload_index(struct index_state *index, if (pthread_join(p->pthread, NULL)) die("unable to join threaded lstat"); } + enable_fscache(0); } #endif From 851ca49bcf19adda32749c8d685706ac07f2e93c Mon Sep 17 00:00:00 2001 From: Karsten Blees Date: Tue, 1 Oct 2013 12:51:54 +0200 Subject: [PATCH 09/14] Win32: add a cache below mingw's lstat and dirent implementations Checking the work tree status is quite slow on Windows, due to slow lstat emulation (git calls lstat once for each file in the index). Windows operating system APIs seem to be much better at scanning the status of entire directories than checking single files. Add an lstat implementation that uses a cache for lstat data. Cache misses read the entire parent directory and add it to the cache. Subsequent lstat calls for the same directory are served directly from the cache. Also implement opendir / readdir / closedir so that they create and use directory listings in the cache. The cache doesn't track file system changes and doesn't plug into any modifying file APIs, so it has to be explicitly enabled for git functions that don't modify the working copy. Note: in an earlier version of this patch, the cache was always active and tracked file system changes via ReadDirectoryChangesW. However, this was much more complex and had negative impact on the performance of modifying git commands such as 'git checkout'. Signed-off-by: Karsten Blees --- compat/win32/fscache.c | 444 +++++++++++++++++++++++++++++++++++++++++ compat/win32/fscache.h | 10 + config.mak.uname | 4 +- git-compat-util.h | 2 + 4 files changed, 458 insertions(+), 2 deletions(-) create mode 100644 compat/win32/fscache.c create mode 100644 compat/win32/fscache.h diff --git a/compat/win32/fscache.c b/compat/win32/fscache.c new file mode 100644 index 0000000000..0efa7663c5 --- /dev/null +++ b/compat/win32/fscache.c @@ -0,0 +1,444 @@ +#include "../../cache.h" +#include "../../hashmap.h" +#include "../win32.h" +#include "fscache.h" + +static int initialized; +static volatile long enabled; +static struct hashmap map; +static CRITICAL_SECTION mutex; + +/* + * An entry in the file system cache. Used for both entire directory listings + * and file entries. + */ +struct fsentry { + struct hashmap_entry ent; + mode_t st_mode; + /* Length of name. */ + unsigned short len; + /* + * Name of the entry. For directory listings: relative path of the + * directory, without trailing '/' (empty for cwd()). For file entries: + * name of the file. Typically points to the end of the structure if + * the fsentry is allocated on the heap (see fsentry_alloc), or to a + * local variable if on the stack (see fsentry_init). + */ + const char *name; + /* Pointer to the directory listing, or NULL for the listing itself. */ + struct fsentry *list; + /* Pointer to the next file entry of the list. */ + struct fsentry *next; + + union { + /* Reference count of the directory listing. */ + volatile long refcnt; + struct { + /* More stat members (only used for file entries). */ + off64_t st_size; + time_t st_atime; + time_t st_mtime; + time_t st_ctime; + }; + }; +}; + +/* + * Compares the paths of two fsentry structures for equality. + */ +static int fsentry_cmp(void *unused_cmp_data, + const struct fsentry *fse1, const struct fsentry *fse2, + void *unused_keydata) +{ + int res; + if (fse1 == fse2) + return 0; + + /* compare the list parts first */ + if (fse1->list != fse2->list && + (res = fsentry_cmp(NULL, fse1->list ? fse1->list : fse1, + fse2->list ? fse2->list : fse2, NULL))) + return res; + + /* if list parts are equal, compare len and name */ + if (fse1->len != fse2->len) + return fse1->len - fse2->len; + return strnicmp(fse1->name, fse2->name, fse1->len); +} + +/* + * Calculates the hash code of an fsentry structure's path. + */ +static unsigned int fsentry_hash(const struct fsentry *fse) +{ + unsigned int hash = fse->list ? fse->list->ent.hash : 0; + return hash ^ memihash(fse->name, fse->len); +} + +/* + * Initialize an fsentry structure for use by fsentry_hash and fsentry_cmp. + */ +static void fsentry_init(struct fsentry *fse, struct fsentry *list, + const char *name, size_t len) +{ + fse->list = list; + fse->name = name; + fse->len = len; + hashmap_entry_init(fse, fsentry_hash(fse)); +} + +/* + * Allocate an fsentry structure on the heap. + */ +static struct fsentry *fsentry_alloc(struct fsentry *list, const char *name, + size_t len) +{ + /* overallocate fsentry and copy the name to the end */ + struct fsentry *fse = xmalloc(sizeof(struct fsentry) + len + 1); + char *nm = ((char*) fse) + sizeof(struct fsentry); + memcpy(nm, name, len); + nm[len] = 0; + /* init the rest of the structure */ + fsentry_init(fse, list, nm, len); + fse->next = NULL; + fse->refcnt = 1; + return fse; +} + +/* + * Add a reference to an fsentry. + */ +inline static void fsentry_addref(struct fsentry *fse) +{ + if (fse->list) + fse = fse->list; + + InterlockedIncrement(&(fse->refcnt)); +} + +/* + * Release the reference to an fsentry, frees the memory if its the last ref. + */ +static void fsentry_release(struct fsentry *fse) +{ + if (fse->list) + fse = fse->list; + + if (InterlockedDecrement(&(fse->refcnt))) + return; + + while (fse) { + struct fsentry *next = fse->next; + free(fse); + fse = next; + } +} + +/* + * Allocate and initialize an fsentry from a WIN32_FIND_DATA structure. + */ +static struct fsentry *fseentry_create_entry(struct fsentry *list, + const WIN32_FIND_DATAW *fdata) +{ + char buf[MAX_PATH * 3]; + int len; + struct fsentry *fse; + len = xwcstoutf(buf, fdata->cFileName, ARRAY_SIZE(buf)); + + fse = fsentry_alloc(list, buf, len); + + fse->st_mode = file_attr_to_st_mode(fdata->dwFileAttributes); + fse->st_size = (((off64_t) (fdata->nFileSizeHigh)) << 32) + | fdata->nFileSizeLow; + fse->st_atime = filetime_to_time_t(&(fdata->ftLastAccessTime)); + fse->st_mtime = filetime_to_time_t(&(fdata->ftLastWriteTime)); + fse->st_ctime = filetime_to_time_t(&(fdata->ftCreationTime)); + + return fse; +} + +/* + * Create an fsentry-based directory listing (similar to opendir / readdir). + * Dir should not contain trailing '/'. Use an empty string for the current + * directory (not "."!). + */ +static struct fsentry *fsentry_create_list(const struct fsentry *dir) +{ + wchar_t pattern[MAX_PATH + 2]; /* + 2 for '/' '*' */ + WIN32_FIND_DATAW fdata; + HANDLE h; + int wlen; + struct fsentry *list, **phead; + DWORD err; + + /* convert name to UTF-16 and check length < MAX_PATH */ + if ((wlen = xutftowcsn(pattern, dir->name, MAX_PATH, dir->len)) < 0) { + if (errno == ERANGE) + errno = ENAMETOOLONG; + return NULL; + } + + /* append optional '/' and wildcard '*' */ + if (wlen) + pattern[wlen++] = '/'; + pattern[wlen++] = '*'; + pattern[wlen] = 0; + + /* open find handle */ + h = FindFirstFileW(pattern, &fdata); + if (h == INVALID_HANDLE_VALUE) { + err = GetLastError(); + errno = (err == ERROR_DIRECTORY) ? ENOTDIR : err_win_to_posix(err); + return NULL; + } + + /* allocate object to hold directory listing */ + list = fsentry_alloc(NULL, dir->name, dir->len); + + /* walk directory and build linked list of fsentry structures */ + phead = &list->next; + do { + *phead = fseentry_create_entry(list, &fdata); + phead = &(*phead)->next; + } while (FindNextFileW(h, &fdata)); + + /* remember result of last FindNextFile, then close find handle */ + err = GetLastError(); + FindClose(h); + + /* return the list if we've got all the files */ + if (err == ERROR_NO_MORE_FILES) + return list; + + /* otherwise free the list and return error */ + fsentry_release(list); + errno = err_win_to_posix(err); + return NULL; +} + +/* + * Adds a directory listing to the cache. + */ +static void fscache_add(struct fsentry *fse) +{ + if (fse->list) + fse = fse->list; + + for (; fse; fse = fse->next) + hashmap_add(&map, fse); +} + +/* + * Clears the cache. + */ +static void fscache_clear(void) +{ + hashmap_free(&map, 1); + hashmap_init(&map, (hashmap_cmp_fn)fsentry_cmp, NULL, 0); +} + +/* + * Checks if the cache is enabled for the given path. + */ +static inline int fscache_enabled(const char *path) +{ + return enabled > 0 && !is_absolute_path(path); +} + +/* + * Looks up or creates a cache entry for the specified key. + */ +static struct fsentry *fscache_get(struct fsentry *key) +{ + struct fsentry *fse; + + EnterCriticalSection(&mutex); + /* check if entry is in cache */ + fse = hashmap_get(&map, key, NULL); + if (fse) { + fsentry_addref(fse); + LeaveCriticalSection(&mutex); + return fse; + } + /* if looking for a file, check if directory listing is in cache */ + if (!fse && key->list) { + fse = hashmap_get(&map, key->list, NULL); + if (fse) { + LeaveCriticalSection(&mutex); + /* dir entry without file entry -> file doesn't exist */ + errno = ENOENT; + return NULL; + } + } + + /* create the directory listing (outside mutex!) */ + LeaveCriticalSection(&mutex); + fse = fsentry_create_list(key->list ? key->list : key); + if (!fse) + return NULL; + + EnterCriticalSection(&mutex); + /* add directory listing if it hasn't been added by some other thread */ + if (!hashmap_get(&map, key, NULL)) + fscache_add(fse); + + /* lookup file entry if requested (fse already points to directory) */ + if (key->list) + fse = hashmap_get(&map, key, NULL); + + /* return entry or ENOENT */ + if (fse) + fsentry_addref(fse); + else + errno = ENOENT; + + LeaveCriticalSection(&mutex); + return fse; +} + +/* + * Enables or disables the cache. Note that the cache is read-only, changes to + * the working directory are NOT reflected in the cache while enabled. + */ +int fscache_enable(int enable) +{ + int result; + + if (!initialized) { + /* allow the cache to be disabled entirely */ + if (!core_fscache) + return 0; + + InitializeCriticalSection(&mutex); + hashmap_init(&map, (hashmap_cmp_fn) fsentry_cmp, NULL, 0); + initialized = 1; + } + + result = enable ? InterlockedIncrement(&enabled) + : InterlockedDecrement(&enabled); + + if (enable && result == 1) { + /* redirect opendir and lstat to the fscache implementations */ + opendir = fscache_opendir; + lstat = fscache_lstat; + } else if (!enable && !result) { + /* reset opendir and lstat to the original implementations */ + opendir = dirent_opendir; + lstat = mingw_lstat; + EnterCriticalSection(&mutex); + fscache_clear(); + LeaveCriticalSection(&mutex); + } + return result; +} + +/* + * Lstat replacement, uses the cache if enabled, otherwise redirects to + * mingw_lstat. + */ +int fscache_lstat(const char *filename, struct stat *st) +{ + int dirlen, base, len; + struct fsentry key[2], *fse; + + if (!fscache_enabled(filename)) + return mingw_lstat(filename, st); + + /* split filename into path + name */ + len = strlen(filename); + if (len && is_dir_sep(filename[len - 1])) + len--; + base = len; + while (base && !is_dir_sep(filename[base - 1])) + base--; + dirlen = base ? base - 1 : 0; + + /* lookup entry for path + name in cache */ + fsentry_init(key, NULL, filename, dirlen); + fsentry_init(key + 1, key, filename + base, len - base); + fse = fscache_get(key + 1); + if (!fse) + return -1; + + /* copy stat data */ + st->st_ino = 0; + st->st_gid = 0; + st->st_uid = 0; + st->st_dev = 0; + st->st_rdev = 0; + st->st_nlink = 1; + st->st_mode = fse->st_mode; + st->st_size = fse->st_size; + st->st_atime = fse->st_atime; + st->st_mtime = fse->st_mtime; + st->st_ctime = fse->st_ctime; + + /* don't forget to release fsentry */ + fsentry_release(fse); + return 0; +} + +typedef struct fscache_DIR { + struct DIR base_dir; /* extend base struct DIR */ + struct fsentry *pfsentry; + struct dirent dirent; +} fscache_DIR; + +/* + * Readdir replacement. + */ +static struct dirent *fscache_readdir(DIR *base_dir) +{ + fscache_DIR *dir = (fscache_DIR*) base_dir; + struct fsentry *next = dir->pfsentry->next; + if (!next) + return NULL; + dir->pfsentry = next; + dir->dirent.d_type = S_ISDIR(next->st_mode) ? DT_DIR : DT_REG; + dir->dirent.d_name = (char*) next->name; + return &(dir->dirent); +} + +/* + * Closedir replacement. + */ +static int fscache_closedir(DIR *base_dir) +{ + fscache_DIR *dir = (fscache_DIR*) base_dir; + fsentry_release(dir->pfsentry); + free(dir); + return 0; +} + +/* + * Opendir replacement, uses a directory listing from the cache if enabled, + * otherwise calls original dirent implementation. + */ +DIR *fscache_opendir(const char *dirname) +{ + struct fsentry key, *list; + fscache_DIR *dir; + int len; + + if (!fscache_enabled(dirname)) + return dirent_opendir(dirname); + + /* prepare name (strip trailing '/', replace '.') */ + len = strlen(dirname); + if ((len == 1 && dirname[0] == '.') || + (len && is_dir_sep(dirname[len - 1]))) + len--; + + /* get directory listing from cache */ + fsentry_init(&key, NULL, dirname, len); + list = fscache_get(&key); + if (!list) + return NULL; + + /* alloc and return DIR structure */ + dir = (fscache_DIR*) xmalloc(sizeof(fscache_DIR)); + dir->base_dir.preaddir = fscache_readdir; + dir->base_dir.pclosedir = fscache_closedir; + dir->pfsentry = list; + return (DIR*) dir; +} diff --git a/compat/win32/fscache.h b/compat/win32/fscache.h new file mode 100644 index 0000000000..ed518b422d --- /dev/null +++ b/compat/win32/fscache.h @@ -0,0 +1,10 @@ +#ifndef FSCACHE_H +#define FSCACHE_H + +int fscache_enable(int enable); +#define enable_fscache(x) fscache_enable(x) + +DIR *fscache_opendir(const char *dir); +int fscache_lstat(const char *file_name, struct stat *buf); + +#endif diff --git a/config.mak.uname b/config.mak.uname index 6604b130f8..3e6e915970 100644 --- a/config.mak.uname +++ b/config.mak.uname @@ -373,7 +373,7 @@ ifeq ($(uname_S),Windows) BASIC_CFLAGS = -nologo -I. -I../zlib -Icompat/vcbuild -Icompat/vcbuild/include -DWIN32 -D_CONSOLE -DHAVE_STRING_H -D_CRT_SECURE_NO_WARNINGS -D_CRT_NONSTDC_NO_DEPRECATE COMPAT_OBJS = compat/msvc.o compat/winansi.o \ compat/win32/pthread.o compat/win32/syslog.o \ - compat/win32/dirent.o + compat/win32/dirent.o compat/win32/fscache.o COMPAT_CFLAGS = -D__USE_MINGW_ACCESS -DNOGDI -DHAVE_STRING_H -Icompat -Icompat/regex -Icompat/win32 -DSTRIP_EXTENSION=\".exe\" BASIC_LDFLAGS = -IGNORE:4217 -IGNORE:4049 -NOLOGO -SUBSYSTEM:CONSOLE EXTLIBS = user32.lib advapi32.lib shell32.lib wininet.lib ws2_32.lib invalidcontinue.obj @@ -515,7 +515,7 @@ ifneq (,$(findstring MINGW,$(uname_S))) COMPAT_CFLAGS += -DSTRIP_EXTENSION=\".exe\" COMPAT_OBJS += compat/mingw.o compat/winansi.o \ compat/win32/pthread.o compat/win32/syslog.o \ - compat/win32/dirent.o + compat/win32/dirent.o compat/win32/fscache.o BASIC_CFLAGS += -DPROTECT_NTFS_DEFAULT=1 EXTLIBS += -lws2_32 GITLIBS += git.res diff --git a/git-compat-util.h b/git-compat-util.h index 8e58d11121..0380c606fa 100644 --- a/git-compat-util.h +++ b/git-compat-util.h @@ -195,8 +195,10 @@ #if defined(__MINGW32__) /* pull in Windows compatibility stuff */ #include "compat/mingw.h" +#include "compat/win32/fscache.h" #elif defined(_MSC_VER) #include "compat/msvc.h" +#include "compat/win32/fscache.h" #else #include #include From d7c4187b1ba681d4a08fe378a8158b0d7f6c5ff5 Mon Sep 17 00:00:00 2001 From: Karsten Blees Date: Tue, 24 Jun 2014 13:22:35 +0200 Subject: [PATCH 10/14] fscache: load directories only once If multiple threads access a directory that is not yet in the cache, the directory will be loaded by each thread. Only one of the results is added to the cache, all others are leaked. This wastes performance and memory. On cache miss, add a future object to the cache to indicate that the directory is currently being loaded. Subsequent threads register themselves with the future object and wait. When the first thread has loaded the directory, it replaces the future object with the result and notifies waiting threads. Signed-off-by: Karsten Blees --- compat/win32/fscache.c | 67 +++++++++++++++++++++++++++++++++++------- 1 file changed, 57 insertions(+), 10 deletions(-) diff --git a/compat/win32/fscache.c b/compat/win32/fscache.c index 0efa7663c5..8094d0a8c6 100644 --- a/compat/win32/fscache.c +++ b/compat/win32/fscache.c @@ -33,6 +33,8 @@ struct fsentry { union { /* Reference count of the directory listing. */ volatile long refcnt; + /* Handle to wait on the loading thread. */ + HANDLE hwait; struct { /* More stat members (only used for file entries). */ off64_t st_size; @@ -245,16 +247,43 @@ static inline int fscache_enabled(const char *path) return enabled > 0 && !is_absolute_path(path); } +/* + * Looks up a cache entry, waits if its being loaded by another thread. + * The mutex must be owned by the calling thread. + */ +static struct fsentry *fscache_get_wait(struct fsentry *key) +{ + struct fsentry *fse = hashmap_get(&map, key, NULL); + + /* return if its a 'real' entry (future entries have refcnt == 0) */ + if (!fse || fse->list || fse->refcnt) + return fse; + + /* create an event and link our key to the future entry */ + key->hwait = CreateEvent(NULL, TRUE, FALSE, NULL); + key->next = fse->next; + fse->next = key; + + /* wait for the loading thread to signal us */ + LeaveCriticalSection(&mutex); + WaitForSingleObject(key->hwait, INFINITE); + CloseHandle(key->hwait); + EnterCriticalSection(&mutex); + + /* repeat cache lookup */ + return hashmap_get(&map, key, NULL); +} + /* * Looks up or creates a cache entry for the specified key. */ static struct fsentry *fscache_get(struct fsentry *key) { - struct fsentry *fse; + struct fsentry *fse, *future, *waiter; EnterCriticalSection(&mutex); /* check if entry is in cache */ - fse = hashmap_get(&map, key, NULL); + fse = fscache_get_wait(key); if (fse) { fsentry_addref(fse); LeaveCriticalSection(&mutex); @@ -262,7 +291,7 @@ static struct fsentry *fscache_get(struct fsentry *key) } /* if looking for a file, check if directory listing is in cache */ if (!fse && key->list) { - fse = hashmap_get(&map, key->list, NULL); + fse = fscache_get_wait(key->list); if (fse) { LeaveCriticalSection(&mutex); /* dir entry without file entry -> file doesn't exist */ @@ -271,16 +300,34 @@ static struct fsentry *fscache_get(struct fsentry *key) } } + /* add future entry to indicate that we're loading it */ + future = key->list ? key->list : key; + future->next = NULL; + future->refcnt = 0; + hashmap_add(&map, future); + /* create the directory listing (outside mutex!) */ LeaveCriticalSection(&mutex); - fse = fsentry_create_list(key->list ? key->list : key); - if (!fse) - return NULL; - + fse = fsentry_create_list(future); EnterCriticalSection(&mutex); - /* add directory listing if it hasn't been added by some other thread */ - if (!hashmap_get(&map, key, NULL)) - fscache_add(fse); + + /* remove future entry and signal waiting threads */ + hashmap_remove(&map, future, NULL); + waiter = future->next; + while (waiter) { + HANDLE h = waiter->hwait; + waiter = waiter->next; + SetEvent(h); + } + + /* leave on error (errno set by fsentry_create_list) */ + if (!fse) { + LeaveCriticalSection(&mutex); + return NULL; + } + + /* add directory listing to the cache */ + fscache_add(fse); /* lookup file entry if requested (fse already points to directory) */ if (key->list) From 31aa9f000815687116aac1770f82e10de223dc44 Mon Sep 17 00:00:00 2001 From: Doug Kelly Date: Wed, 8 Jan 2014 20:28:15 -0600 Subject: [PATCH 11/14] Add a test demonstrating a problem with long submodule paths [jes: adusted test number to avoid conflicts, fixed non-portable use of the 'export' statement] Signed-off-by: Johannes Schindelin --- t/t7415-submodule-long-path.sh | 101 +++++++++++++++++++++++++++++++++ 1 file changed, 101 insertions(+) create mode 100755 t/t7415-submodule-long-path.sh diff --git a/t/t7415-submodule-long-path.sh b/t/t7415-submodule-long-path.sh new file mode 100755 index 0000000000..6bb641cd4b --- /dev/null +++ b/t/t7415-submodule-long-path.sh @@ -0,0 +1,101 @@ +#!/bin/sh +# +# Copyright (c) 2013 Doug Kelly +# + +test_description='Test submodules with a path near PATH_MAX + +This test verifies that "git submodule" initialization, update and clones work, including with recursive submodules and paths approaching PATH_MAX (260 characters on Windows) +' + +TEST_NO_CREATE_REPO=1 +. ./test-lib.sh + +longpath="" +for (( i=0; i<4; i++ )); do + longpath="0123456789abcdefghijklmnopqrstuvwxyz$longpath" +done +# Pick a substring maximum of 90 characters +# This should be good, since we'll add on a lot for temp directories +longpath=${longpath:0:90}; export longpath + +test_expect_failure 'submodule with a long path' ' + git init --bare remote && + test_create_repo bundle1 && + ( + cd bundle1 && + test_commit "shoot" && + git rev-parse --verify HEAD >../expect + ) && + mkdir home && + ( + cd home && + git clone ../remote test && + cd test && + git submodule add ../bundle1 $longpath && + test_commit "sogood" && + ( + cd $longpath && + git rev-parse --verify HEAD >actual && + test_cmp ../../../expect actual + ) && + git push origin master + ) && + mkdir home2 && + ( + cd home2 && + git clone ../remote test && + cd test && + git checkout master && + git submodule update --init && + ( + cd $longpath && + git rev-parse --verify HEAD >actual && + test_cmp ../../../expect actual + ) + ) +' + +test_expect_failure 'recursive submodule with a long path' ' + git init --bare super && + test_create_repo child && + ( + cd child && + test_commit "shoot" && + git rev-parse --verify HEAD >../expect + ) && + test_create_repo parent && + ( + cd parent && + git submodule add ../child $longpath && + test_commit "aim" + ) && + mkdir home3 && + ( + cd home3 && + git clone ../super test && + cd test && + git submodule add ../parent foo && + git submodule update --init --recursive + test_commit "sogood" && + ( + cd foo/$longpath && + git rev-parse --verify HEAD >actual && + test_cmp ../../../../expect actual + ) && + git push origin master + ) && + mkdir home4 && + ( + cd home4 && + git clone ../super test --recursive && + ( + cd test/foo/$longpath && + git rev-parse --verify HEAD >actual && + test_cmp ../../../../expect actual + ) + ) +' +unset longpath + +test_done From fce262725307250fae7add6cf6d60527f353c183 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Tue, 28 Jul 2015 21:07:41 +0200 Subject: [PATCH 12/14] Win32: support long paths Windows paths are typically limited to MAX_PATH = 260 characters, even though the underlying NTFS file system supports paths up to 32,767 chars. This limitation is also evident in Windows Explorer, cmd.exe and many other applications (including IDEs). Particularly annoying is that most Windows APIs return bogus error codes if a relative path only barely exceeds MAX_PATH in conjunction with the current directory, e.g. ERROR_PATH_NOT_FOUND / ENOENT instead of the infinitely more helpful ERROR_FILENAME_EXCED_RANGE / ENAMETOOLONG. Many Windows wide char APIs support longer than MAX_PATH paths through the file namespace prefix ('\\?\' or '\\?\UNC\') followed by an absolute path. Notable exceptions include functions dealing with executables and the current directory (CreateProcess, LoadLibrary, Get/SetCurrentDirectory) as well as the entire shell API (ShellExecute, SHGetSpecialFolderPath...). Introduce a handle_long_path function to check the length of a specified path properly (and fail with ENAMETOOLONG), and to optionally expand long paths using the '\\?\' file namespace prefix. Short paths will not be modified, so we don't need to worry about device names (NUL, CON, AUX). Contrary to MSDN docs, the GetFullPathNameW function doesn't seem to be limited to MAX_PATH (at least not on Win7), so we can use it to do the heavy lifting of the conversion (translate '/' to '\', eliminate '.' and '..', and make an absolute path). Add long path error checking to xutftowcs_path for APIs with hard MAX_PATH limit. Add a new MAX_LONG_PATH constant and xutftowcs_long_path function for APIs that support long paths. While improved error checking is always active, long paths support must be explicitly enabled via 'core.longpaths' option. This is to prevent end users to shoot themselves in the foot by checking out files that Windows Explorer, cmd/bash or their favorite IDE cannot handle. Test suite: Test the case is when the full pathname length of a dir is close to 260 (MAX_PATH). Bug report and an original reproducer by Andrey Rogozhnikov: https://github.com/msysgit/git/pull/122#issuecomment-43604199 [jes: adjusted test number to avoid conflicts] Thanks-to: Martin W. Kirst Thanks-to: Doug Kelly Signed-off-by: Karsten Blees Original-test-by: Andrey Rogozhnikov Signed-off-by: Stepan Kasal Signed-off-by: Johannes Schindelin --- Documentation/config.txt | 7 ++ compat/mingw.c | 138 ++++++++++++++++++++++++++------- compat/mingw.h | 75 ++++++++++++++++-- compat/win32/dirent.c | 14 ++-- compat/win32/fscache.c | 17 ++-- t/t2027-checkout-long-paths.sh | 99 +++++++++++++++++++++++ t/t7415-submodule-long-path.sh | 24 +++--- 7 files changed, 319 insertions(+), 55 deletions(-) create mode 100755 t/t2027-checkout-long-paths.sh diff --git a/Documentation/config.txt b/Documentation/config.txt index 97fe304145..6c4b5ec1b7 100644 --- a/Documentation/config.txt +++ b/Documentation/config.txt @@ -865,6 +865,13 @@ core.fscache:: Git for Windows uses this to bulk-read and cache lstat data of entire directories (instead of doing lstat file by file). +core.longpaths:: + Enable long path (> 260) support for builtin commands in Git for + Windows. This is disabled by default, as long paths are not supported + by Windows Explorer, cmd.exe and the Git for Windows tool chain + (msys, bash, tcl, perl...). Only enable this if you know what you're + doing and are prepared to live with a few quirks. + core.unsetenvvars:: EXPERIMENTAL, Windows-only: comma-separated list of environment variables' names that need to be unset before spawning any other diff --git a/compat/mingw.c b/compat/mingw.c index 130f13e05e..2b825db9fe 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -212,6 +212,7 @@ enum hide_dotfiles_type { static enum hide_dotfiles_type hide_dotfiles = HIDE_DOTFILES_DOTGITONLY; static char *unset_environment_variables; int core_fscache; +int core_long_paths; int mingw_core_config(const char *var, const char *value) { @@ -228,6 +229,11 @@ int mingw_core_config(const char *var, const char *value) return 0; } + if (!strcmp(var, "core.longpaths")) { + core_long_paths = git_config_bool(var, value); + return 0; + } + if (!strcmp(var, "core.unsetenvvars")) { free(unset_environment_variables); unset_environment_variables = xstrdup(value); @@ -240,8 +246,8 @@ int mingw_core_config(const char *var, const char *value) int mingw_unlink(const char *pathname) { int ret, tries = 0; - wchar_t wpathname[MAX_PATH]; - if (xutftowcs_path(wpathname, pathname) < 0) + wchar_t wpathname[MAX_LONG_PATH]; + if (xutftowcs_long_path(wpathname, pathname) < 0) return -1; /* read-only files cannot be removed */ @@ -270,7 +276,7 @@ static int is_dir_empty(const wchar_t *wpath) { WIN32_FIND_DATAW findbuf; HANDLE handle; - wchar_t wbuf[MAX_PATH + 2]; + wchar_t wbuf[MAX_LONG_PATH + 2]; wcscpy(wbuf, wpath); wcscat(wbuf, L"\\*"); handle = FindFirstFileW(wbuf, &findbuf); @@ -291,8 +297,8 @@ static int is_dir_empty(const wchar_t *wpath) int mingw_rmdir(const char *pathname) { int ret, tries = 0; - wchar_t wpathname[MAX_PATH]; - if (xutftowcs_path(wpathname, pathname) < 0) + wchar_t wpathname[MAX_LONG_PATH]; + if (xutftowcs_long_path(wpathname, pathname) < 0) return -1; while ((ret = _wrmdir(wpathname)) == -1 && tries < ARRAY_SIZE(delay)) { @@ -367,9 +373,12 @@ static int set_hidden_flag(const wchar_t *path, int set) int mingw_mkdir(const char *path, int mode) { int ret; - wchar_t wpath[MAX_PATH]; - if (xutftowcs_path(wpath, path) < 0) + wchar_t wpath[MAX_LONG_PATH]; + /* CreateDirectoryW path limit is 248 (MAX_PATH - 8.3 file name) */ + if (xutftowcs_path_ex(wpath, path, MAX_LONG_PATH, -1, 248, + core_long_paths) < 0) return -1; + ret = _wmkdir(wpath); if (!ret && needs_hiding(path)) return set_hidden_flag(wpath, 1); @@ -381,7 +390,7 @@ int mingw_open (const char *filename, int oflags, ...) va_list args; unsigned mode; int fd; - wchar_t wfilename[MAX_PATH]; + wchar_t wfilename[MAX_LONG_PATH]; va_start(args, oflags); mode = va_arg(args, int); @@ -390,7 +399,7 @@ int mingw_open (const char *filename, int oflags, ...) if (filename && !strcmp(filename, "/dev/null")) filename = "nul"; - if (xutftowcs_path(wfilename, filename) < 0) + if (xutftowcs_long_path(wfilename, filename) < 0) return -1; fd = _wopen(wfilename, oflags, mode); @@ -447,10 +456,10 @@ FILE *mingw_fopen (const char *filename, const char *otype) { int hide = needs_hiding(filename); FILE *file; - wchar_t wfilename[MAX_PATH], wotype[4]; + wchar_t wfilename[MAX_LONG_PATH], wotype[4]; if (filename && !strcmp(filename, "/dev/null")) filename = "nul"; - if (xutftowcs_path(wfilename, filename) < 0 || + if (xutftowcs_long_path(wfilename, filename) < 0 || xutftowcs(wotype, otype, ARRAY_SIZE(wotype)) < 0) return NULL; if (hide && !access(filename, F_OK) && set_hidden_flag(wfilename, 0)) { @@ -469,10 +478,10 @@ FILE *mingw_freopen (const char *filename, const char *otype, FILE *stream) { int hide = needs_hiding(filename); FILE *file; - wchar_t wfilename[MAX_PATH], wotype[4]; + wchar_t wfilename[MAX_LONG_PATH], wotype[4]; if (filename && !strcmp(filename, "/dev/null")) filename = "nul"; - if (xutftowcs_path(wfilename, filename) < 0 || + if (xutftowcs_long_path(wfilename, filename) < 0 || xutftowcs(wotype, otype, ARRAY_SIZE(wotype)) < 0) return NULL; if (hide && !access(filename, F_OK) && set_hidden_flag(wfilename, 0)) { @@ -526,25 +535,32 @@ ssize_t mingw_write(int fd, const void *buf, size_t len) int mingw_access(const char *filename, int mode) { - wchar_t wfilename[MAX_PATH]; - if (xutftowcs_path(wfilename, filename) < 0) + wchar_t wfilename[MAX_LONG_PATH]; + if (xutftowcs_long_path(wfilename, filename) < 0) return -1; /* X_OK is not supported by the MSVCRT version */ return _waccess(wfilename, mode & ~X_OK); } +/* cached length of current directory for handle_long_path */ +static int current_directory_len = 0; + int mingw_chdir(const char *dirname) { + int result; wchar_t wdirname[MAX_PATH]; + /* SetCurrentDirectoryW doesn't support long paths */ if (xutftowcs_path(wdirname, dirname) < 0) return -1; - return _wchdir(wdirname); + result = _wchdir(wdirname); + current_directory_len = GetCurrentDirectoryW(0, NULL); + return result; } int mingw_chmod(const char *filename, int mode) { - wchar_t wfilename[MAX_PATH]; - if (xutftowcs_path(wfilename, filename) < 0) + wchar_t wfilename[MAX_LONG_PATH]; + if (xutftowcs_long_path(wfilename, filename) < 0) return -1; return _wchmod(wfilename, mode); } @@ -592,8 +608,8 @@ static int has_valid_directory_prefix(wchar_t *wfilename) static int do_lstat(int follow, const char *file_name, struct stat *buf) { WIN32_FILE_ATTRIBUTE_DATA fdata; - wchar_t wfilename[MAX_PATH]; - if (xutftowcs_path(wfilename, file_name) < 0) + wchar_t wfilename[MAX_LONG_PATH]; + if (xutftowcs_long_path(wfilename, file_name) < 0) return -1; if (GetFileAttributesExW(wfilename, GetFileExInfoStandard, &fdata)) { @@ -742,8 +758,8 @@ int mingw_utime (const char *file_name, const struct utimbuf *times) FILETIME mft, aft; int fh, rc; DWORD attrs; - wchar_t wfilename[MAX_PATH]; - if (xutftowcs_path(wfilename, file_name) < 0) + wchar_t wfilename[MAX_LONG_PATH]; + if (xutftowcs_long_path(wfilename, file_name) < 0) return -1; /* must have write permission */ @@ -791,6 +807,7 @@ unsigned int sleep (unsigned int seconds) char *mingw_mktemp(char *template) { wchar_t wtemplate[MAX_PATH]; + /* we need to return the path, thus no long paths here! */ if (xutftowcs_path(wtemplate, template) < 0) return NULL; if (!_wmktemp(wtemplate)) @@ -1127,6 +1144,7 @@ static pid_t mingw_spawnve_fd(const char *cmd, const char **argv, char **deltaen si.hStdOutput = winansi_get_osfhandle(fhout); si.hStdError = winansi_get_osfhandle(fherr); + /* executables and the current directory don't support long paths */ if (xutftowcs_path(wcmd, cmd) < 0) return -1; if (dir && xutftowcs_path(wdir, dir) < 0) @@ -1695,8 +1713,9 @@ int mingw_rename(const char *pold, const char *pnew) { DWORD attrs, gle; int tries = 0; - wchar_t wpold[MAX_PATH], wpnew[MAX_PATH]; - if (xutftowcs_path(wpold, pold) < 0 || xutftowcs_path(wpnew, pnew) < 0) + wchar_t wpold[MAX_LONG_PATH], wpnew[MAX_LONG_PATH]; + if (xutftowcs_long_path(wpold, pold) < 0 || + xutftowcs_long_path(wpnew, pnew) < 0) return -1; /* @@ -1936,9 +1955,9 @@ int link(const char *oldpath, const char *newpath) { typedef BOOL (WINAPI *T)(LPCWSTR, LPCWSTR, LPSECURITY_ATTRIBUTES); static T create_hard_link = NULL; - wchar_t woldpath[MAX_PATH], wnewpath[MAX_PATH]; - if (xutftowcs_path(woldpath, oldpath) < 0 || - xutftowcs_path(wnewpath, newpath) < 0) + wchar_t woldpath[MAX_LONG_PATH], wnewpath[MAX_LONG_PATH]; + if (xutftowcs_long_path(woldpath, oldpath) < 0 || + xutftowcs_long_path(wnewpath, newpath) < 0) return -1; if (!create_hard_link) { @@ -2151,6 +2170,68 @@ static void setup_windows_environment(void) setenv("TERM", "cygwin", 1); } +int handle_long_path(wchar_t *path, int len, int max_path, int expand) +{ + int result; + wchar_t buf[MAX_LONG_PATH]; + + /* + * we don't need special handling if path is relative to the current + * directory, and current directory + path don't exceed the desired + * max_path limit. This should cover > 99 % of cases with minimal + * performance impact (git almost always uses relative paths). + */ + if ((len < 2 || (!is_dir_sep(path[0]) && path[1] != ':')) && + (current_directory_len + len < max_path)) + return len; + + /* + * handle everything else: + * - absolute paths: "C:\dir\file" + * - absolute UNC paths: "\\server\share\dir\file" + * - absolute paths on current drive: "\dir\file" + * - relative paths on other drive: "X:file" + * - prefixed paths: "\\?\...", "\\.\..." + */ + + /* convert to absolute path using GetFullPathNameW */ + result = GetFullPathNameW(path, MAX_LONG_PATH, buf, NULL); + if (!result) { + errno = err_win_to_posix(GetLastError()); + return -1; + } + + /* + * return absolute path if it fits within max_path (even if + * "cwd + path" doesn't due to '..' components) + */ + if (result < max_path) { + wcscpy(path, buf); + return result; + } + + /* error out if we shouldn't expand the path or buf is too small */ + if (!expand || result >= MAX_LONG_PATH - 6) { + errno = ENAMETOOLONG; + return -1; + } + + /* prefix full path with "\\?\" or "\\?\UNC\" */ + if (buf[0] == '\\') { + /* ...unless already prefixed */ + if (buf[1] == '\\' && (buf[2] == '?' || buf[2] == '.')) + return len; + + wcscpy(path, L"\\\\?\\UNC\\"); + wcscpy(path + 8, buf + 2); + return result + 6; + } else { + wcscpy(path, L"\\\\?\\"); + wcscpy(path + 4, buf); + return result + 4; + } +} + /* * Disable MSVCRT command line wildcard expansion (__getmainargs called from * mingw startup code, see init.c in mingw runtime). @@ -2244,6 +2325,9 @@ void mingw_startup(void) /* initialize Unicode console */ winansi_init(); + + /* init length of current directory for handle_long_path */ + current_directory_len = GetCurrentDirectoryW(0, NULL); } int uname(struct utsname *buf) diff --git a/compat/mingw.h b/compat/mingw.h index eb7636e8e0..710922568a 100644 --- a/compat/mingw.h +++ b/compat/mingw.h @@ -12,6 +12,7 @@ typedef _sigset_t sigset_t; #endif extern int core_fscache; +extern int core_long_paths; extern int mingw_core_config(const char *var, const char *value); #define platform_core_config mingw_core_config @@ -449,6 +450,42 @@ int mingw_offset_1st_component(const char *path); #include #endif +/** + * Max length of long paths (exceeding MAX_PATH). The actual maximum supported + * by NTFS is 32,767 (* sizeof(wchar_t)), but we choose an arbitrary smaller + * value to limit required stack memory. + */ +#define MAX_LONG_PATH 4096 + +/** + * Handles paths that would exceed the MAX_PATH limit of Windows Unicode APIs. + * + * With expand == false, the function checks for over-long paths and fails + * with ENAMETOOLONG. The path parameter is not modified, except if cwd + path + * exceeds max_path, but the resulting absolute path doesn't (e.g. due to + * eliminating '..' components). The path parameter must point to a buffer + * of max_path wide characters. + * + * With expand == true, an over-long path is automatically converted in place + * to an absolute path prefixed with '\\?\', and the new length is returned. + * The path parameter must point to a buffer of MAX_LONG_PATH wide characters. + * + * Parameters: + * path: path to check and / or convert + * len: size of path on input (number of wide chars without \0) + * max_path: max short path length to check (usually MAX_PATH = 260, but just + * 248 for CreateDirectoryW) + * expand: false to only check the length, true to expand the path to a + * '\\?\'-prefixed absolute path + * + * Return: + * length of the resulting path, or -1 on failure + * + * Errors: + * ENAMETOOLONG if path is too long + */ +int handle_long_path(wchar_t *path, int len, int max_path, int expand); + /** * Converts UTF-8 encoded string to UTF-16LE. * @@ -506,17 +543,45 @@ static inline int xutftowcs(wchar_t *wcs, const char *utf, size_t wcslen) return xutftowcsn(wcs, utf, wcslen, -1); } +/** + * Simplified file system specific wrapper of xutftowcsn and handle_long_path. + * Converts ERANGE to ENAMETOOLONG. If expand is true, wcs must be at least + * MAX_LONG_PATH wide chars (see handle_long_path). + */ +static inline int xutftowcs_path_ex(wchar_t *wcs, const char *utf, + size_t wcslen, int utflen, int max_path, int expand) +{ + int result = xutftowcsn(wcs, utf, wcslen, utflen); + if (result < 0 && errno == ERANGE) + errno = ENAMETOOLONG; + if (result >= 0) + result = handle_long_path(wcs, result, max_path, expand); + return result; +} + /** * Simplified file system specific variant of xutftowcsn, assumes output * buffer size is MAX_PATH wide chars and input string is \0-terminated, - * fails with ENAMETOOLONG if input string is too long. + * fails with ENAMETOOLONG if input string is too long. Typically used for + * Windows APIs that don't support long paths, e.g. SetCurrentDirectory, + * LoadLibrary, CreateProcess... */ static inline int xutftowcs_path(wchar_t *wcs, const char *utf) { - int result = xutftowcsn(wcs, utf, MAX_PATH, -1); - if (result < 0 && errno == ERANGE) - errno = ENAMETOOLONG; - return result; + return xutftowcs_path_ex(wcs, utf, MAX_PATH, -1, MAX_PATH, 0); +} + +/** + * Simplified file system specific variant of xutftowcsn for Windows APIs + * that support long paths via '\\?\'-prefix, assumes output buffer size is + * MAX_LONG_PATH wide chars, fails with ENAMETOOLONG if input string is too + * long. The 'core.longpaths' git-config option controls whether the path + * is only checked or expanded to a long path. + */ +static inline int xutftowcs_long_path(wchar_t *wcs, const char *utf) +{ + return xutftowcs_path_ex(wcs, utf, MAX_LONG_PATH, -1, MAX_PATH, + core_long_paths); } /** diff --git a/compat/win32/dirent.c b/compat/win32/dirent.c index 6b87042182..b3bd8d7af7 100644 --- a/compat/win32/dirent.c +++ b/compat/win32/dirent.c @@ -63,19 +63,23 @@ static int dirent_closedir(dirent_DIR *dir) DIR *dirent_opendir(const char *name) { - wchar_t pattern[MAX_PATH + 2]; /* + 2 for '/' '*' */ + wchar_t pattern[MAX_LONG_PATH + 2]; /* + 2 for "\*" */ WIN32_FIND_DATAW fdata; HANDLE h; int len; dirent_DIR *dir; - /* convert name to UTF-16 and check length < MAX_PATH */ - if ((len = xutftowcs_path(pattern, name)) < 0) + /* convert name to UTF-16 and check length */ + if ((len = xutftowcs_path_ex(pattern, name, MAX_LONG_PATH, -1, + MAX_PATH - 2, core_long_paths)) < 0) return NULL; - /* append optional '/' and wildcard '*' */ + /* + * append optional '\' and wildcard '*'. Note: we need to use '\' as + * Windows doesn't translate '/' to '\' for "\\?\"-prefixed paths. + */ if (len && !is_dir_sep(pattern[len - 1])) - pattern[len++] = '/'; + pattern[len++] = '\\'; pattern[len++] = '*'; pattern[len] = 0; diff --git a/compat/win32/fscache.c b/compat/win32/fscache.c index 8094d0a8c6..162fbed09b 100644 --- a/compat/win32/fscache.c +++ b/compat/win32/fscache.c @@ -166,23 +166,24 @@ static struct fsentry *fseentry_create_entry(struct fsentry *list, */ static struct fsentry *fsentry_create_list(const struct fsentry *dir) { - wchar_t pattern[MAX_PATH + 2]; /* + 2 for '/' '*' */ + wchar_t pattern[MAX_LONG_PATH + 2]; /* + 2 for "\*" */ WIN32_FIND_DATAW fdata; HANDLE h; int wlen; struct fsentry *list, **phead; DWORD err; - /* convert name to UTF-16 and check length < MAX_PATH */ - if ((wlen = xutftowcsn(pattern, dir->name, MAX_PATH, dir->len)) < 0) { - if (errno == ERANGE) - errno = ENAMETOOLONG; + /* convert name to UTF-16 and check length */ + if ((wlen = xutftowcs_path_ex(pattern, dir->name, MAX_LONG_PATH, + dir->len, MAX_PATH - 2, core_long_paths)) < 0) return NULL; - } - /* append optional '/' and wildcard '*' */ + /* + * append optional '\' and wildcard '*'. Note: we need to use '\' as + * Windows doesn't translate '/' to '\' for "\\?\"-prefixed paths. + */ if (wlen) - pattern[wlen++] = '/'; + pattern[wlen++] = '\\'; pattern[wlen++] = '*'; pattern[wlen] = 0; diff --git a/t/t2027-checkout-long-paths.sh b/t/t2027-checkout-long-paths.sh new file mode 100755 index 0000000000..e65a38da4d --- /dev/null +++ b/t/t2027-checkout-long-paths.sh @@ -0,0 +1,99 @@ +#!/bin/sh + +test_description='checkout long paths on Windows + +Ensures that Git for Windows can deal with long paths (>260) enabled via core.longpaths' + +. ./test-lib.sh + +if test_have_prereq !MINGW +then + skip_all='skipping MINGW specific long paths test' + test_done +fi + +test_expect_success setup ' + p=longpathxx && # -> 10 + p=$p$p$p$p$p && # -> 50 + p=$p$p$p$p$p && # -> 250 + + path=${p}/longtestfile && # -> 263 (MAX_PATH = 260) + + blob=$(echo foobar | git hash-object -w --stdin) && + + printf "100644 %s 0\t%s\n" "$blob" "$path" | + git update-index --add --index-info && + git commit -m initial -q +' + +test_expect_success 'checkout of long paths without core.longpaths fails' ' + git config core.longpaths false && + test_must_fail git checkout -f 2>error && + grep -q "Filename too long" error && + test_path_is_missing longpa~1/longtestfile +' + +test_expect_success 'checkout of long paths with core.longpaths works' ' + git config core.longpaths true && + git checkout -f && + test_path_is_file longpa~1/longtestfile +' + +test_expect_success 'update of long paths' ' + echo frotz >> longpa~1/longtestfile && + echo $path > expect && + git ls-files -m > actual && + test_cmp expect actual && + git add $path && + git commit -m second && + git grep "frotz" HEAD -- $path +' + +test_expect_success cleanup ' + # bash cannot delete the trash dir if it contains a long path + # lets help cleaning up (unless in debug mode) + test ! -z "$debug" || rm -rf longpa~1 +' + +# check that the template used in the test won't be too long: +abspath="$(pwd -W)"/testdir +test ${#abspath} -gt 230 || +test_set_prereq SHORTABSPATH + +test_expect_success SHORTABSPATH 'clean up path close to MAX_PATH' ' + p=/123456789abcdef/123456789abcdef/123456789abcdef/123456789abc/ef && + p=y$p$p$p$p && + subdir="x$(echo "$p" | tail -c $((253 - ${#abspath})) - )" && + # Now, $abspath/$subdir has exactly 254 characters, and is inside CWD + p2="$abspath/$subdir" && + test 254 = ${#p2} && + + # Be careful to overcome path limitations of the MSys tools and split + # the $subdir into two parts. ($subdir2 has to contain 16 chars and a + # slash somewhere following; that is why we asked for abspath <= 230 and + # why we placed a slash near the end of the $subdir template.) + subdir2=${subdir#????????????????*/} && + subdir1=testdir/${subdir%/$subdir2} && + mkdir -p "$subdir1" && + i=0 && + # The most important case is when absolute path is 258 characters long, + # and that will be when i == 4. + while test $i -le 7 + do + mkdir -p $subdir2 && + touch $subdir2/one-file && + mv ${subdir2%%/*} "$subdir1/" && + subdir2=z${subdir2} && + i=$(($i+1)) || + exit 1 + done && + + # now check that git is able to clear the tree: + (cd testdir && + git init && + git config core.longpaths yes && + git clean -fdx) && + test ! -d "$subdir1" +' + +test_done diff --git a/t/t7415-submodule-long-path.sh b/t/t7415-submodule-long-path.sh index 6bb641cd4b..59b3d347db 100755 --- a/t/t7415-submodule-long-path.sh +++ b/t/t7415-submodule-long-path.sh @@ -11,15 +11,20 @@ This test verifies that "git submodule" initialization, update and clones work, TEST_NO_CREATE_REPO=1 . ./test-lib.sh -longpath="" -for (( i=0; i<4; i++ )); do - longpath="0123456789abcdefghijklmnopqrstuvwxyz$longpath" -done -# Pick a substring maximum of 90 characters -# This should be good, since we'll add on a lot for temp directories -longpath=${longpath:0:90}; export longpath +# cloning a submodule calls is_git_directory("$path/../.git/modules/$path"), +# which effectively limits the maximum length to PATH_MAX / 2 minus some +# overhead; start with 3 * 36 = 108 chars (test 2 fails if >= 110) +longpath36=0123456789abcdefghijklmnopqrstuvwxyz +longpath180=$longpath36$longpath36$longpath36$longpath36$longpath36 -test_expect_failure 'submodule with a long path' ' +# the git database must fit within PATH_MAX, which limits the submodule name +# to PATH_MAX - len(pwd) - ~90 (= len("/objects//") + 40-byte sha1 + some +# overhead from the test case) +pwd=$(pwd) +pwdlen=$(echo "$pwd" | wc -c) +longpath=$(echo $longpath180 | cut -c 1-$((170-$pwdlen))) + +test_expect_success 'submodule with a long path' ' git init --bare remote && test_create_repo bundle1 && ( @@ -56,7 +61,7 @@ test_expect_failure 'submodule with a long path' ' ) ' -test_expect_failure 'recursive submodule with a long path' ' +test_expect_success 'recursive submodule with a long path' ' git init --bare super && test_create_repo child && ( @@ -96,6 +101,5 @@ test_expect_failure 'recursive submodule with a long path' ' ) ) ' -unset longpath test_done From 103a5f0616d599a754d2aa11e176b521fe0d8f26 Mon Sep 17 00:00:00 2001 From: Karsten Blees Date: Sun, 30 Aug 2015 18:49:05 +0200 Subject: [PATCH 13/14] Win32: support long paths Windows paths are typically limited to MAX_PATH = 260 characters, even though the underlying NTFS file system supports paths up to 32,767 chars. This limitation is also evident in Windows Explorer, cmd.exe and many other applications (including IDEs). Particularly annoying is that most Windows APIs return bogus error codes if a relative path only barely exceeds MAX_PATH in conjunction with the current directory, e.g. ERROR_PATH_NOT_FOUND / ENOENT instead of the infinitely more helpful ERROR_FILENAME_EXCED_RANGE / ENAMETOOLONG. Many Windows wide char APIs support longer than MAX_PATH paths through the file namespace prefix ('\\?\' or '\\?\UNC\') followed by an absolute path. Notable exceptions include functions dealing with executables and the current directory (CreateProcess, LoadLibrary, Get/SetCurrentDirectory) as well as the entire shell API (ShellExecute, SHGetSpecialFolderPath...). Introduce a handle_long_path function to check the length of a specified path properly (and fail with ENAMETOOLONG), and to optionally expand long paths using the '\\?\' file namespace prefix. Short paths will not be modified, so we don't need to worry about device names (NUL, CON, AUX). Contrary to MSDN docs, the GetFullPathNameW function doesn't seem to be limited to MAX_PATH (at least not on Win7), so we can use it to do the heavy lifting of the conversion (translate '/' to '\', eliminate '.' and '..', and make an absolute path). Add long path error checking to xutftowcs_path for APIs with hard MAX_PATH limit. Add a new MAX_LONG_PATH constant and xutftowcs_long_path function for APIs that support long paths. While improved error checking is always active, long paths support must be explicitly enabled via 'core.longpaths' option. This is to prevent end users to shoot themselves in the foot by checking out files that Windows Explorer, cmd/bash or their favorite IDE cannot handle. Test suite: Test the case is when the full pathname length of a dir is close to 260 (MAX_PATH). Bug report and an original reproducer by Andrey Rogozhnikov: https://github.com/msysgit/git/pull/122#issuecomment-43604199 Note that the test cannot rely on the presence of short names, as they are not enabled by default except on the system drive. [jes: adjusted test number to avoid conflicts, reinstated && chain, adjusted test to work without short names] Thanks-to: Martin W. Kirst Thanks-to: Doug Kelly Signed-off-by: Karsten Blees Original-test-by: Andrey Rogozhnikov Signed-off-by: Stepan Kasal Signed-off-by: Johannes Schindelin --- compat/mingw.c | 5 ++--- ...t-long-paths.sh => t2029-checkout-long-paths.sh} | 13 +++++++++---- 2 files changed, 11 insertions(+), 7 deletions(-) rename t/{t2027-checkout-long-paths.sh => t2029-checkout-long-paths.sh} (93%) diff --git a/compat/mingw.c b/compat/mingw.c index 2b825db9fe..4651ddc2b4 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -548,9 +548,8 @@ static int current_directory_len = 0; int mingw_chdir(const char *dirname) { int result; - wchar_t wdirname[MAX_PATH]; - /* SetCurrentDirectoryW doesn't support long paths */ - if (xutftowcs_path(wdirname, dirname) < 0) + wchar_t wdirname[MAX_LONG_PATH]; + if (xutftowcs_long_path(wdirname, dirname) < 0) return -1; result = _wchdir(wdirname); current_directory_len = GetCurrentDirectoryW(0, NULL); diff --git a/t/t2027-checkout-long-paths.sh b/t/t2029-checkout-long-paths.sh similarity index 93% rename from t/t2027-checkout-long-paths.sh rename to t/t2029-checkout-long-paths.sh index e65a38da4d..18e5eabe15 100755 --- a/t/t2027-checkout-long-paths.sh +++ b/t/t2029-checkout-long-paths.sh @@ -30,17 +30,19 @@ test_expect_success 'checkout of long paths without core.longpaths fails' ' git config core.longpaths false && test_must_fail git checkout -f 2>error && grep -q "Filename too long" error && - test_path_is_missing longpa~1/longtestfile + test ! -d longpa* ' test_expect_success 'checkout of long paths with core.longpaths works' ' git config core.longpaths true && git checkout -f && - test_path_is_file longpa~1/longtestfile + (cd longpa* && + test_path_is_file longtestfile) ' test_expect_success 'update of long paths' ' - echo frotz >> longpa~1/longtestfile && + (cd longpa* && + echo frotz >> longtestfile) && echo $path > expect && git ls-files -m > actual && test_cmp expect actual && @@ -52,7 +54,10 @@ test_expect_success 'update of long paths' ' test_expect_success cleanup ' # bash cannot delete the trash dir if it contains a long path # lets help cleaning up (unless in debug mode) - test ! -z "$debug" || rm -rf longpa~1 + if test -z "$debug" + then + rm -rf longpa~1 + fi ' # check that the template used in the test won't be too long: From be6a998ab510de0d603364e93d8040ed63358dec Mon Sep 17 00:00:00 2001 From: Karsten Blees Date: Sat, 5 Jul 2014 00:00:36 +0200 Subject: [PATCH 14/14] Win32: fix 'lstat("dir/")' with long paths Use a suffciently large buffer to strip the trailing slash. Signed-off-by: Karsten Blees --- compat/mingw.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/compat/mingw.c b/compat/mingw.c index 4651ddc2b4..1e70c51ff3 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -679,7 +679,7 @@ static int do_lstat(int follow, const char *file_name, struct stat *buf) static int do_stat_internal(int follow, const char *file_name, struct stat *buf) { int namelen; - char alt_name[PATH_MAX]; + char alt_name[MAX_LONG_PATH]; if (!do_lstat(follow, file_name, buf)) return 0; @@ -695,7 +695,7 @@ static int do_stat_internal(int follow, const char *file_name, struct stat *buf) return -1; while (namelen && file_name[namelen-1] == '/') --namelen; - if (!namelen || namelen >= PATH_MAX) + if (!namelen || namelen >= MAX_LONG_PATH) return -1; memcpy(alt_name, file_name, namelen);