From 7ca583891f0e7b69b27933c8efeaa17be6cc5acf Mon Sep 17 00:00:00 2001 From: Karsten Blees Date: Sat, 6 Jul 2013 02:09:35 +0200 Subject: [PATCH 1/7] Win32: make FILETIME conversion functions public Signed-off-by: Karsten Blees --- compat/mingw.c | 16 ---------------- compat/mingw.h | 16 ++++++++++++++++ 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/compat/mingw.c b/compat/mingw.c index 00191fe9db..2d20e17715 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -580,22 +580,6 @@ int mingw_chmod(const char *filename, int mode) return _wchmod(wfilename, mode); } -/* - * The unit of FILETIME is 100-nanoseconds since January 1, 1601, UTC. - * Returns the 100-nanoseconds ("hekto nanoseconds") since the epoch. - */ -static inline long long filetime_to_hnsec(const FILETIME *ft) -{ - long long winTime = ((long long)ft->dwHighDateTime << 32) + ft->dwLowDateTime; - /* Windows to Unix Epoch conversion */ - return winTime - 116444736000000000LL; -} - -static inline time_t filetime_to_time_t(const FILETIME *ft) -{ - return (time_t)(filetime_to_hnsec(ft) / 10000000); -} - /** * Verifies that safe_create_leading_directories() would succeed. */ diff --git a/compat/mingw.h b/compat/mingw.h index 908279decc..8c603c8443 100644 --- a/compat/mingw.h +++ b/compat/mingw.h @@ -329,6 +329,22 @@ static inline int getrlimit(int resource, struct rlimit *rlp) return 0; } +/* + * The unit of FILETIME is 100-nanoseconds since January 1, 1601, UTC. + * Returns the 100-nanoseconds ("hekto nanoseconds") since the epoch. + */ +static inline long long filetime_to_hnsec(const FILETIME *ft) +{ + long long winTime = ((long long)ft->dwHighDateTime << 32) + ft->dwLowDateTime; + /* Windows to Unix Epoch conversion */ + return winTime - 116444736000000000LL; +} + +static inline time_t filetime_to_time_t(const FILETIME *ft) +{ + return (time_t)(filetime_to_hnsec(ft) / 10000000); +} + /* * Use mingw specific stat()/lstat()/fstat() implementations on Windows. */ From 4fcdefd04344df9e9815f8f886f6255ad7bb4dee Mon Sep 17 00:00:00 2001 From: Karsten Blees Date: Sun, 8 Sep 2013 14:17:31 +0200 Subject: [PATCH 2/7] Win32: dirent.c: Move opendir down Move opendir down in preparation for the next patch. Signed-off-by: Karsten Blees --- compat/win32/dirent.c | 68 +++++++++++++++++++++---------------------- 1 file changed, 34 insertions(+), 34 deletions(-) diff --git a/compat/win32/dirent.c b/compat/win32/dirent.c index 52420ec7d4..2603a0fa39 100644 --- a/compat/win32/dirent.c +++ b/compat/win32/dirent.c @@ -18,40 +18,6 @@ static inline void finddata2dirent(struct dirent *ent, WIN32_FIND_DATAW *fdata) ent->d_type = DT_REG; } -DIR *opendir(const char *name) -{ - wchar_t pattern[MAX_PATH + 2]; /* + 2 for '/' '*' */ - WIN32_FIND_DATAW fdata; - HANDLE h; - int len; - DIR *dir; - - /* convert name to UTF-16 and check length < MAX_PATH */ - if ((len = xutftowcs_path(pattern, name)) < 0) - return NULL; - - /* append optional '/' and wildcard '*' */ - if (len && !is_dir_sep(pattern[len - 1])) - pattern[len++] = '/'; - pattern[len++] = '*'; - pattern[len] = 0; - - /* open find handle */ - h = FindFirstFileW(pattern, &fdata); - if (h == INVALID_HANDLE_VALUE) { - DWORD err = GetLastError(); - errno = (err == ERROR_DIRECTORY) ? ENOTDIR : err_win_to_posix(err); - return NULL; - } - - /* initialize DIR structure and copy first dir entry */ - dir = xmalloc(sizeof(DIR)); - dir->dd_handle = h; - dir->dd_stat = 0; - finddata2dirent(&dir->dd_dir, &fdata); - return dir; -} - struct dirent *readdir(DIR *dir) { if (!dir) { @@ -90,3 +56,37 @@ int closedir(DIR *dir) free(dir); return 0; } + +DIR *opendir(const char *name) +{ + wchar_t pattern[MAX_PATH + 2]; /* + 2 for '/' '*' */ + WIN32_FIND_DATAW fdata; + HANDLE h; + int len; + DIR *dir; + + /* convert name to UTF-16 and check length < MAX_PATH */ + if ((len = xutftowcs_path(pattern, name)) < 0) + return NULL; + + /* append optional '/' and wildcard '*' */ + if (len && !is_dir_sep(pattern[len - 1])) + pattern[len++] = '/'; + pattern[len++] = '*'; + pattern[len] = 0; + + /* open find handle */ + h = FindFirstFileW(pattern, &fdata); + if (h == INVALID_HANDLE_VALUE) { + DWORD err = GetLastError(); + errno = (err == ERROR_DIRECTORY) ? ENOTDIR : err_win_to_posix(err); + return NULL; + } + + /* initialize DIR structure and copy first dir entry */ + dir = xmalloc(sizeof(DIR)); + dir->dd_handle = h; + dir->dd_stat = 0; + finddata2dirent(&dir->dd_dir, &fdata); + return dir; +} From 256271ba7bae019a99aeb88efcf7cba1d4f925fa Mon Sep 17 00:00:00 2001 From: Karsten Blees Date: Sun, 8 Sep 2013 14:18:40 +0200 Subject: [PATCH 3/7] Win32: Make the dirent implementation pluggable Emulating the POSIX dirent API on Windows via FindFirstFile/FindNextFile is pretty staightforward, however, most of the information provided in the WIN32_FIND_DATA structure is thrown away in the process. A more sophisticated implementation may cache this data, e.g. for later reuse in calls to lstat. Make the dirent implementation pluggable so that it can be switched at runtime, e.g. based on a config option. Define a base DIR structure with pointers to readdir/closedir that match the opendir implementation (i.e. similar to vtable pointers in OOP). Define readdir/closedir so that they call the function pointers in the DIR structure. This allows to choose the opendir implementation on a call-by-call basis. Move the fixed sized dirent.d_name buffer to the dirent-specific DIR structure, as d_name may be implementation specific (e.g. a caching implementation may just set d_name to point into the cache instead of copying the entire file name string). Signed-off-by: Karsten Blees --- compat/win32/dirent.c | 27 +++++++++++++++++---------- compat/win32/dirent.h | 26 +++++++++++++++++++------- 2 files changed, 36 insertions(+), 17 deletions(-) diff --git a/compat/win32/dirent.c b/compat/win32/dirent.c index 2603a0fa39..6b87042182 100644 --- a/compat/win32/dirent.c +++ b/compat/win32/dirent.c @@ -1,15 +1,19 @@ #include "../../git-compat-util.h" -struct DIR { +typedef struct dirent_DIR { + struct DIR base_dir; /* extend base struct DIR */ struct dirent dd_dir; /* includes d_type */ HANDLE dd_handle; /* FindFirstFile handle */ int dd_stat; /* 0-based index */ -}; + char dd_name[MAX_PATH * 3]; /* file name (* 3 for UTF-8 conversion) */ +} dirent_DIR; + +DIR *(*opendir)(const char *dirname) = dirent_opendir; static inline void finddata2dirent(struct dirent *ent, WIN32_FIND_DATAW *fdata) { - /* convert UTF-16 name to UTF-8 */ - xwcstoutf(ent->d_name, fdata->cFileName, sizeof(ent->d_name)); + /* convert UTF-16 name to UTF-8 (d_name points to dirent_DIR.dd_name) */ + xwcstoutf(ent->d_name, fdata->cFileName, MAX_PATH * 3); /* Set file type, based on WIN32_FIND_DATA */ if (fdata->dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) @@ -18,7 +22,7 @@ static inline void finddata2dirent(struct dirent *ent, WIN32_FIND_DATAW *fdata) ent->d_type = DT_REG; } -struct dirent *readdir(DIR *dir) +static struct dirent *dirent_readdir(dirent_DIR *dir) { if (!dir) { errno = EBADF; /* No set_errno for mingw */ @@ -45,7 +49,7 @@ struct dirent *readdir(DIR *dir) return &dir->dd_dir; } -int closedir(DIR *dir) +static int dirent_closedir(dirent_DIR *dir) { if (!dir) { errno = EBADF; @@ -57,13 +61,13 @@ int closedir(DIR *dir) return 0; } -DIR *opendir(const char *name) +DIR *dirent_opendir(const char *name) { wchar_t pattern[MAX_PATH + 2]; /* + 2 for '/' '*' */ WIN32_FIND_DATAW fdata; HANDLE h; int len; - DIR *dir; + dirent_DIR *dir; /* convert name to UTF-16 and check length < MAX_PATH */ if ((len = xutftowcs_path(pattern, name)) < 0) @@ -84,9 +88,12 @@ DIR *opendir(const char *name) } /* initialize DIR structure and copy first dir entry */ - dir = xmalloc(sizeof(DIR)); + dir = xmalloc(sizeof(dirent_DIR)); + dir->base_dir.preaddir = (struct dirent *(*)(DIR *dir)) dirent_readdir; + dir->base_dir.pclosedir = (int (*)(DIR *dir)) dirent_closedir; + dir->dd_dir.d_name = dir->dd_name; dir->dd_handle = h; dir->dd_stat = 0; finddata2dirent(&dir->dd_dir, &fdata); - return dir; + return (DIR*) dir; } diff --git a/compat/win32/dirent.h b/compat/win32/dirent.h index 058207e4bf..6b3ddee51b 100644 --- a/compat/win32/dirent.h +++ b/compat/win32/dirent.h @@ -1,20 +1,32 @@ #ifndef DIRENT_H #define DIRENT_H -typedef struct DIR DIR; - #define DT_UNKNOWN 0 #define DT_DIR 1 #define DT_REG 2 #define DT_LNK 3 struct dirent { - unsigned char d_type; /* file type to prevent lstat after readdir */ - char d_name[MAX_PATH * 3]; /* file name (* 3 for UTF-8 conversion) */ + unsigned char d_type; /* file type to prevent lstat after readdir */ + char *d_name; /* file name */ }; -DIR *opendir(const char *dirname); -struct dirent *readdir(DIR *dir); -int closedir(DIR *dir); +/* + * Base DIR structure, contains pointers to readdir/closedir implementations so + * that opendir may choose a concrete implementation on a call-by-call basis. + */ +typedef struct DIR { + struct dirent *(*preaddir)(struct DIR *dir); + int (*pclosedir)(struct DIR *dir); +} DIR; + +/* default dirent implementation */ +extern DIR *dirent_opendir(const char *dirname); + +/* current dirent implementation */ +extern DIR *(*opendir)(const char *dirname); + +#define readdir(dir) (dir->preaddir(dir)) +#define closedir(dir) (dir->pclosedir(dir)) #endif /* DIRENT_H */ From 34f4dc3dc4182685893d0a1dd2880bc34996de20 Mon Sep 17 00:00:00 2001 From: Karsten Blees Date: Sun, 8 Sep 2013 14:21:30 +0200 Subject: [PATCH 4/7] Win32: make the lstat implementation pluggable Emulating the POSIX lstat API on Windows via GetFileAttributes[Ex] is quite slow. Windows operating system APIs seem to be much better at scanning the status of entire directories than checking single files. A caching implementation may improve performance by bulk-reading entire directories or reusing data obtained via opendir / readdir. Make the lstat implementation pluggable so that it can be switched at runtime, e.g. based on a config option. Signed-off-by: Karsten Blees --- compat/mingw.c | 2 ++ compat/mingw.h | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/compat/mingw.c b/compat/mingw.c index 2d20e17715..146a5069bd 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -719,6 +719,8 @@ static int do_stat_internal(int follow, const char *file_name, struct stat *buf) return do_lstat(follow, alt_name, buf); } +int (*lstat)(const char *file_name, struct stat *buf) = mingw_lstat; + int mingw_lstat(const char *file_name, struct stat *buf) { return do_stat_internal(0, file_name, buf); diff --git a/compat/mingw.h b/compat/mingw.h index 8c603c8443..33544e141a 100644 --- a/compat/mingw.h +++ b/compat/mingw.h @@ -368,7 +368,7 @@ int mingw_fstat(int fd, struct stat *buf); #ifdef lstat #undef lstat #endif -#define lstat mingw_lstat +extern int (*lstat)(const char *file_name, struct stat *buf); #ifndef _stati64 # define _stati64(x,y) mingw_stat(x,y) From 6462b6911699a5f1d0a273e5bc2fb209e28b05f2 Mon Sep 17 00:00:00 2001 From: Karsten Blees Date: Sun, 8 Sep 2013 14:23:27 +0200 Subject: [PATCH 5/7] add infrastructure for read-only file system level caches Add a macro to mark code sections that only read from the file system, along with a config option and documentation. This facilitates implementation of relatively simple file system level caches without the need to synchronize with the file system. Enable read-only sections for 'git status' and preload_index. Signed-off-by: Karsten Blees --- Documentation/config.txt | 6 ++++++ builtin/commit.c | 1 + compat/mingw.c | 6 ++++++ compat/mingw.h | 2 ++ git-compat-util.h | 15 +++++++++++++++ preload-index.c | 2 ++ 6 files changed, 32 insertions(+) diff --git a/Documentation/config.txt b/Documentation/config.txt index b25f1c0dc4..3aa9df09ed 100644 --- a/Documentation/config.txt +++ b/Documentation/config.txt @@ -909,6 +909,12 @@ relatively high IO latencies. When enabled, Git will do the index comparison to the filesystem data in parallel, allowing overlapping IO's. Defaults to true. +core.fscache:: + Enable additional caching of file system data for some operations. ++ +Git for Windows uses this to bulk-read and cache lstat data of entire +directories (instead of doing lstat file by file). + core.unsetenvvars:: EXPERIMENTAL, Windows-only: comma-separated list of environment variables' names that need to be unset before spawning any other diff --git a/builtin/commit.c b/builtin/commit.c index 0d9828e29e..f6be863a0a 100644 --- a/builtin/commit.c +++ b/builtin/commit.c @@ -1355,6 +1355,7 @@ int cmd_status(int argc, const char **argv, const char *prefix) PATHSPEC_PREFER_FULL, prefix, argv); + enable_fscache(1); read_cache_preload(&s.pathspec); refresh_index(&the_index, REFRESH_QUIET|REFRESH_UNMERGED, &s.pathspec, NULL, NULL); diff --git a/compat/mingw.c b/compat/mingw.c index 146a5069bd..ba146ad270 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -211,6 +211,7 @@ enum hide_dotfiles_type { static enum hide_dotfiles_type hide_dotfiles = HIDE_DOTFILES_DOTGITONLY; static char *unset_environment_variables; +int core_fscache; int mingw_core_config(const char *var, const char *value, void *cb) { @@ -222,6 +223,11 @@ int mingw_core_config(const char *var, const char *value, void *cb) return 0; } + if (!strcmp(var, "core.fscache")) { + core_fscache = git_config_bool(var, value); + return 0; + } + if (!strcmp(var, "core.unsetenvvars")) { free(unset_environment_variables); unset_environment_variables = xstrdup(value); diff --git a/compat/mingw.h b/compat/mingw.h index 33544e141a..616a5066cd 100644 --- a/compat/mingw.h +++ b/compat/mingw.h @@ -11,6 +11,8 @@ typedef _sigset_t sigset_t; #undef _POSIX_THREAD_SAFE_FUNCTIONS #endif +extern int core_fscache; + extern int mingw_core_config(const char *var, const char *value, void *cb); #define platform_core_config mingw_core_config diff --git a/git-compat-util.h b/git-compat-util.h index 7513ec4949..9fed029e7e 100644 --- a/git-compat-util.h +++ b/git-compat-util.h @@ -1225,6 +1225,21 @@ static inline int is_missing_file_error(int errno_) return (errno_ == ENOENT || errno_ == ENOTDIR); } +/* + * Enable/disable a read-only cache for file system data on platforms that + * support it. + * + * Implementing a live-cache is complicated and requires special platform + * support (inotify, ReadDirectoryChangesW...). enable_fscache shall be used + * to mark sections of git code that extensively read from the file system + * without modifying anything. Implementations can use this to cache e.g. stat + * data or even file content without the need to synchronize with the file + * system. + */ +#ifndef enable_fscache +#define enable_fscache(x) /* noop */ +#endif + extern int cmd_main(int, const char **); /* diff --git a/preload-index.c b/preload-index.c index 71cd2437a3..9c22a0705f 100644 --- a/preload-index.c +++ b/preload-index.c @@ -93,6 +93,7 @@ static void preload_index(struct index_state *index, offset = 0; work = DIV_ROUND_UP(index->cache_nr, threads); memset(&data, 0, sizeof(data)); + enable_fscache(1); for (i = 0; i < threads; i++) { struct thread_data *p = data+i; p->index = index; @@ -110,6 +111,7 @@ static void preload_index(struct index_state *index, die("unable to join threaded lstat"); } trace_performance_since(start, "preload index"); + enable_fscache(0); } #endif From 66f51ba51228244a6a903192e06ab7b4c21b0eff Mon Sep 17 00:00:00 2001 From: Karsten Blees Date: Tue, 1 Oct 2013 12:51:54 +0200 Subject: [PATCH 6/7] Win32: add a cache below mingw's lstat and dirent implementations Checking the work tree status is quite slow on Windows, due to slow lstat emulation (git calls lstat once for each file in the index). Windows operating system APIs seem to be much better at scanning the status of entire directories than checking single files. Add an lstat implementation that uses a cache for lstat data. Cache misses read the entire parent directory and add it to the cache. Subsequent lstat calls for the same directory are served directly from the cache. Also implement opendir / readdir / closedir so that they create and use directory listings in the cache. The cache doesn't track file system changes and doesn't plug into any modifying file APIs, so it has to be explicitly enabled for git functions that don't modify the working copy. Note: in an earlier version of this patch, the cache was always active and tracked file system changes via ReadDirectoryChangesW. However, this was much more complex and had negative impact on the performance of modifying git commands such as 'git checkout'. Signed-off-by: Karsten Blees --- compat/win32/fscache.c | 444 +++++++++++++++++++++++++++++++++++++++++ compat/win32/fscache.h | 10 + config.mak.uname | 4 +- git-compat-util.h | 2 + 4 files changed, 458 insertions(+), 2 deletions(-) create mode 100644 compat/win32/fscache.c create mode 100644 compat/win32/fscache.h diff --git a/compat/win32/fscache.c b/compat/win32/fscache.c new file mode 100644 index 0000000000..0efa7663c5 --- /dev/null +++ b/compat/win32/fscache.c @@ -0,0 +1,444 @@ +#include "../../cache.h" +#include "../../hashmap.h" +#include "../win32.h" +#include "fscache.h" + +static int initialized; +static volatile long enabled; +static struct hashmap map; +static CRITICAL_SECTION mutex; + +/* + * An entry in the file system cache. Used for both entire directory listings + * and file entries. + */ +struct fsentry { + struct hashmap_entry ent; + mode_t st_mode; + /* Length of name. */ + unsigned short len; + /* + * Name of the entry. For directory listings: relative path of the + * directory, without trailing '/' (empty for cwd()). For file entries: + * name of the file. Typically points to the end of the structure if + * the fsentry is allocated on the heap (see fsentry_alloc), or to a + * local variable if on the stack (see fsentry_init). + */ + const char *name; + /* Pointer to the directory listing, or NULL for the listing itself. */ + struct fsentry *list; + /* Pointer to the next file entry of the list. */ + struct fsentry *next; + + union { + /* Reference count of the directory listing. */ + volatile long refcnt; + struct { + /* More stat members (only used for file entries). */ + off64_t st_size; + time_t st_atime; + time_t st_mtime; + time_t st_ctime; + }; + }; +}; + +/* + * Compares the paths of two fsentry structures for equality. + */ +static int fsentry_cmp(void *unused_cmp_data, + const struct fsentry *fse1, const struct fsentry *fse2, + void *unused_keydata) +{ + int res; + if (fse1 == fse2) + return 0; + + /* compare the list parts first */ + if (fse1->list != fse2->list && + (res = fsentry_cmp(NULL, fse1->list ? fse1->list : fse1, + fse2->list ? fse2->list : fse2, NULL))) + return res; + + /* if list parts are equal, compare len and name */ + if (fse1->len != fse2->len) + return fse1->len - fse2->len; + return strnicmp(fse1->name, fse2->name, fse1->len); +} + +/* + * Calculates the hash code of an fsentry structure's path. + */ +static unsigned int fsentry_hash(const struct fsentry *fse) +{ + unsigned int hash = fse->list ? fse->list->ent.hash : 0; + return hash ^ memihash(fse->name, fse->len); +} + +/* + * Initialize an fsentry structure for use by fsentry_hash and fsentry_cmp. + */ +static void fsentry_init(struct fsentry *fse, struct fsentry *list, + const char *name, size_t len) +{ + fse->list = list; + fse->name = name; + fse->len = len; + hashmap_entry_init(fse, fsentry_hash(fse)); +} + +/* + * Allocate an fsentry structure on the heap. + */ +static struct fsentry *fsentry_alloc(struct fsentry *list, const char *name, + size_t len) +{ + /* overallocate fsentry and copy the name to the end */ + struct fsentry *fse = xmalloc(sizeof(struct fsentry) + len + 1); + char *nm = ((char*) fse) + sizeof(struct fsentry); + memcpy(nm, name, len); + nm[len] = 0; + /* init the rest of the structure */ + fsentry_init(fse, list, nm, len); + fse->next = NULL; + fse->refcnt = 1; + return fse; +} + +/* + * Add a reference to an fsentry. + */ +inline static void fsentry_addref(struct fsentry *fse) +{ + if (fse->list) + fse = fse->list; + + InterlockedIncrement(&(fse->refcnt)); +} + +/* + * Release the reference to an fsentry, frees the memory if its the last ref. + */ +static void fsentry_release(struct fsentry *fse) +{ + if (fse->list) + fse = fse->list; + + if (InterlockedDecrement(&(fse->refcnt))) + return; + + while (fse) { + struct fsentry *next = fse->next; + free(fse); + fse = next; + } +} + +/* + * Allocate and initialize an fsentry from a WIN32_FIND_DATA structure. + */ +static struct fsentry *fseentry_create_entry(struct fsentry *list, + const WIN32_FIND_DATAW *fdata) +{ + char buf[MAX_PATH * 3]; + int len; + struct fsentry *fse; + len = xwcstoutf(buf, fdata->cFileName, ARRAY_SIZE(buf)); + + fse = fsentry_alloc(list, buf, len); + + fse->st_mode = file_attr_to_st_mode(fdata->dwFileAttributes); + fse->st_size = (((off64_t) (fdata->nFileSizeHigh)) << 32) + | fdata->nFileSizeLow; + fse->st_atime = filetime_to_time_t(&(fdata->ftLastAccessTime)); + fse->st_mtime = filetime_to_time_t(&(fdata->ftLastWriteTime)); + fse->st_ctime = filetime_to_time_t(&(fdata->ftCreationTime)); + + return fse; +} + +/* + * Create an fsentry-based directory listing (similar to opendir / readdir). + * Dir should not contain trailing '/'. Use an empty string for the current + * directory (not "."!). + */ +static struct fsentry *fsentry_create_list(const struct fsentry *dir) +{ + wchar_t pattern[MAX_PATH + 2]; /* + 2 for '/' '*' */ + WIN32_FIND_DATAW fdata; + HANDLE h; + int wlen; + struct fsentry *list, **phead; + DWORD err; + + /* convert name to UTF-16 and check length < MAX_PATH */ + if ((wlen = xutftowcsn(pattern, dir->name, MAX_PATH, dir->len)) < 0) { + if (errno == ERANGE) + errno = ENAMETOOLONG; + return NULL; + } + + /* append optional '/' and wildcard '*' */ + if (wlen) + pattern[wlen++] = '/'; + pattern[wlen++] = '*'; + pattern[wlen] = 0; + + /* open find handle */ + h = FindFirstFileW(pattern, &fdata); + if (h == INVALID_HANDLE_VALUE) { + err = GetLastError(); + errno = (err == ERROR_DIRECTORY) ? ENOTDIR : err_win_to_posix(err); + return NULL; + } + + /* allocate object to hold directory listing */ + list = fsentry_alloc(NULL, dir->name, dir->len); + + /* walk directory and build linked list of fsentry structures */ + phead = &list->next; + do { + *phead = fseentry_create_entry(list, &fdata); + phead = &(*phead)->next; + } while (FindNextFileW(h, &fdata)); + + /* remember result of last FindNextFile, then close find handle */ + err = GetLastError(); + FindClose(h); + + /* return the list if we've got all the files */ + if (err == ERROR_NO_MORE_FILES) + return list; + + /* otherwise free the list and return error */ + fsentry_release(list); + errno = err_win_to_posix(err); + return NULL; +} + +/* + * Adds a directory listing to the cache. + */ +static void fscache_add(struct fsentry *fse) +{ + if (fse->list) + fse = fse->list; + + for (; fse; fse = fse->next) + hashmap_add(&map, fse); +} + +/* + * Clears the cache. + */ +static void fscache_clear(void) +{ + hashmap_free(&map, 1); + hashmap_init(&map, (hashmap_cmp_fn)fsentry_cmp, NULL, 0); +} + +/* + * Checks if the cache is enabled for the given path. + */ +static inline int fscache_enabled(const char *path) +{ + return enabled > 0 && !is_absolute_path(path); +} + +/* + * Looks up or creates a cache entry for the specified key. + */ +static struct fsentry *fscache_get(struct fsentry *key) +{ + struct fsentry *fse; + + EnterCriticalSection(&mutex); + /* check if entry is in cache */ + fse = hashmap_get(&map, key, NULL); + if (fse) { + fsentry_addref(fse); + LeaveCriticalSection(&mutex); + return fse; + } + /* if looking for a file, check if directory listing is in cache */ + if (!fse && key->list) { + fse = hashmap_get(&map, key->list, NULL); + if (fse) { + LeaveCriticalSection(&mutex); + /* dir entry without file entry -> file doesn't exist */ + errno = ENOENT; + return NULL; + } + } + + /* create the directory listing (outside mutex!) */ + LeaveCriticalSection(&mutex); + fse = fsentry_create_list(key->list ? key->list : key); + if (!fse) + return NULL; + + EnterCriticalSection(&mutex); + /* add directory listing if it hasn't been added by some other thread */ + if (!hashmap_get(&map, key, NULL)) + fscache_add(fse); + + /* lookup file entry if requested (fse already points to directory) */ + if (key->list) + fse = hashmap_get(&map, key, NULL); + + /* return entry or ENOENT */ + if (fse) + fsentry_addref(fse); + else + errno = ENOENT; + + LeaveCriticalSection(&mutex); + return fse; +} + +/* + * Enables or disables the cache. Note that the cache is read-only, changes to + * the working directory are NOT reflected in the cache while enabled. + */ +int fscache_enable(int enable) +{ + int result; + + if (!initialized) { + /* allow the cache to be disabled entirely */ + if (!core_fscache) + return 0; + + InitializeCriticalSection(&mutex); + hashmap_init(&map, (hashmap_cmp_fn) fsentry_cmp, NULL, 0); + initialized = 1; + } + + result = enable ? InterlockedIncrement(&enabled) + : InterlockedDecrement(&enabled); + + if (enable && result == 1) { + /* redirect opendir and lstat to the fscache implementations */ + opendir = fscache_opendir; + lstat = fscache_lstat; + } else if (!enable && !result) { + /* reset opendir and lstat to the original implementations */ + opendir = dirent_opendir; + lstat = mingw_lstat; + EnterCriticalSection(&mutex); + fscache_clear(); + LeaveCriticalSection(&mutex); + } + return result; +} + +/* + * Lstat replacement, uses the cache if enabled, otherwise redirects to + * mingw_lstat. + */ +int fscache_lstat(const char *filename, struct stat *st) +{ + int dirlen, base, len; + struct fsentry key[2], *fse; + + if (!fscache_enabled(filename)) + return mingw_lstat(filename, st); + + /* split filename into path + name */ + len = strlen(filename); + if (len && is_dir_sep(filename[len - 1])) + len--; + base = len; + while (base && !is_dir_sep(filename[base - 1])) + base--; + dirlen = base ? base - 1 : 0; + + /* lookup entry for path + name in cache */ + fsentry_init(key, NULL, filename, dirlen); + fsentry_init(key + 1, key, filename + base, len - base); + fse = fscache_get(key + 1); + if (!fse) + return -1; + + /* copy stat data */ + st->st_ino = 0; + st->st_gid = 0; + st->st_uid = 0; + st->st_dev = 0; + st->st_rdev = 0; + st->st_nlink = 1; + st->st_mode = fse->st_mode; + st->st_size = fse->st_size; + st->st_atime = fse->st_atime; + st->st_mtime = fse->st_mtime; + st->st_ctime = fse->st_ctime; + + /* don't forget to release fsentry */ + fsentry_release(fse); + return 0; +} + +typedef struct fscache_DIR { + struct DIR base_dir; /* extend base struct DIR */ + struct fsentry *pfsentry; + struct dirent dirent; +} fscache_DIR; + +/* + * Readdir replacement. + */ +static struct dirent *fscache_readdir(DIR *base_dir) +{ + fscache_DIR *dir = (fscache_DIR*) base_dir; + struct fsentry *next = dir->pfsentry->next; + if (!next) + return NULL; + dir->pfsentry = next; + dir->dirent.d_type = S_ISDIR(next->st_mode) ? DT_DIR : DT_REG; + dir->dirent.d_name = (char*) next->name; + return &(dir->dirent); +} + +/* + * Closedir replacement. + */ +static int fscache_closedir(DIR *base_dir) +{ + fscache_DIR *dir = (fscache_DIR*) base_dir; + fsentry_release(dir->pfsentry); + free(dir); + return 0; +} + +/* + * Opendir replacement, uses a directory listing from the cache if enabled, + * otherwise calls original dirent implementation. + */ +DIR *fscache_opendir(const char *dirname) +{ + struct fsentry key, *list; + fscache_DIR *dir; + int len; + + if (!fscache_enabled(dirname)) + return dirent_opendir(dirname); + + /* prepare name (strip trailing '/', replace '.') */ + len = strlen(dirname); + if ((len == 1 && dirname[0] == '.') || + (len && is_dir_sep(dirname[len - 1]))) + len--; + + /* get directory listing from cache */ + fsentry_init(&key, NULL, dirname, len); + list = fscache_get(&key); + if (!list) + return NULL; + + /* alloc and return DIR structure */ + dir = (fscache_DIR*) xmalloc(sizeof(fscache_DIR)); + dir->base_dir.preaddir = fscache_readdir; + dir->base_dir.pclosedir = fscache_closedir; + dir->pfsentry = list; + return (DIR*) dir; +} diff --git a/compat/win32/fscache.h b/compat/win32/fscache.h new file mode 100644 index 0000000000..ed518b422d --- /dev/null +++ b/compat/win32/fscache.h @@ -0,0 +1,10 @@ +#ifndef FSCACHE_H +#define FSCACHE_H + +int fscache_enable(int enable); +#define enable_fscache(x) fscache_enable(x) + +DIR *fscache_opendir(const char *dir); +int fscache_lstat(const char *file_name, struct stat *buf); + +#endif diff --git a/config.mak.uname b/config.mak.uname index e47af72e01..6646aa303e 100644 --- a/config.mak.uname +++ b/config.mak.uname @@ -393,7 +393,7 @@ ifeq ($(uname_S),Windows) BASIC_CFLAGS = -nologo -I. -I../zlib -Icompat/vcbuild -Icompat/vcbuild/include -DWIN32 -D_CONSOLE -DHAVE_STRING_H -D_CRT_SECURE_NO_WARNINGS -D_CRT_NONSTDC_NO_DEPRECATE COMPAT_OBJS = compat/msvc.o compat/winansi.o \ compat/win32/pthread.o compat/win32/syslog.o \ - compat/win32/dirent.o + compat/win32/dirent.o compat/win32/fscache.o COMPAT_CFLAGS = -D__USE_MINGW_ACCESS -DNOGDI -DHAVE_STRING_H -Icompat -Icompat/regex -Icompat/win32 -DSTRIP_EXTENSION=\".exe\" BASIC_LDFLAGS = -IGNORE:4217 -IGNORE:4049 -NOLOGO -SUBSYSTEM:CONSOLE EXTLIBS = user32.lib advapi32.lib shell32.lib wininet.lib ws2_32.lib invalidcontinue.obj @@ -537,7 +537,7 @@ ifneq (,$(findstring MINGW,$(uname_S))) COMPAT_CFLAGS += -DSTRIP_EXTENSION=\".exe\" COMPAT_OBJS += compat/mingw.o compat/winansi.o \ compat/win32/pthread.o compat/win32/syslog.o \ - compat/win32/dirent.o + compat/win32/dirent.o compat/win32/fscache.o BASIC_CFLAGS += -DWIN32 -DPROTECT_NTFS_DEFAULT=1 EXTLIBS += -lws2_32 GITLIBS += git.res diff --git a/git-compat-util.h b/git-compat-util.h index 9fed029e7e..d0d249b931 100644 --- a/git-compat-util.h +++ b/git-compat-util.h @@ -195,8 +195,10 @@ #if defined(__MINGW32__) /* pull in Windows compatibility stuff */ #include "compat/mingw.h" +#include "compat/win32/fscache.h" #elif defined(_MSC_VER) #include "compat/msvc.h" +#include "compat/win32/fscache.h" #else #include #include From 72be485574a02b2bee92c08c8a1cda63a387eec5 Mon Sep 17 00:00:00 2001 From: Karsten Blees Date: Tue, 24 Jun 2014 13:22:35 +0200 Subject: [PATCH 7/7] fscache: load directories only once If multiple threads access a directory that is not yet in the cache, the directory will be loaded by each thread. Only one of the results is added to the cache, all others are leaked. This wastes performance and memory. On cache miss, add a future object to the cache to indicate that the directory is currently being loaded. Subsequent threads register themselves with the future object and wait. When the first thread has loaded the directory, it replaces the future object with the result and notifies waiting threads. Signed-off-by: Karsten Blees --- compat/win32/fscache.c | 67 +++++++++++++++++++++++++++++++++++------- 1 file changed, 57 insertions(+), 10 deletions(-) diff --git a/compat/win32/fscache.c b/compat/win32/fscache.c index 0efa7663c5..8094d0a8c6 100644 --- a/compat/win32/fscache.c +++ b/compat/win32/fscache.c @@ -33,6 +33,8 @@ struct fsentry { union { /* Reference count of the directory listing. */ volatile long refcnt; + /* Handle to wait on the loading thread. */ + HANDLE hwait; struct { /* More stat members (only used for file entries). */ off64_t st_size; @@ -245,16 +247,43 @@ static inline int fscache_enabled(const char *path) return enabled > 0 && !is_absolute_path(path); } +/* + * Looks up a cache entry, waits if its being loaded by another thread. + * The mutex must be owned by the calling thread. + */ +static struct fsentry *fscache_get_wait(struct fsentry *key) +{ + struct fsentry *fse = hashmap_get(&map, key, NULL); + + /* return if its a 'real' entry (future entries have refcnt == 0) */ + if (!fse || fse->list || fse->refcnt) + return fse; + + /* create an event and link our key to the future entry */ + key->hwait = CreateEvent(NULL, TRUE, FALSE, NULL); + key->next = fse->next; + fse->next = key; + + /* wait for the loading thread to signal us */ + LeaveCriticalSection(&mutex); + WaitForSingleObject(key->hwait, INFINITE); + CloseHandle(key->hwait); + EnterCriticalSection(&mutex); + + /* repeat cache lookup */ + return hashmap_get(&map, key, NULL); +} + /* * Looks up or creates a cache entry for the specified key. */ static struct fsentry *fscache_get(struct fsentry *key) { - struct fsentry *fse; + struct fsentry *fse, *future, *waiter; EnterCriticalSection(&mutex); /* check if entry is in cache */ - fse = hashmap_get(&map, key, NULL); + fse = fscache_get_wait(key); if (fse) { fsentry_addref(fse); LeaveCriticalSection(&mutex); @@ -262,7 +291,7 @@ static struct fsentry *fscache_get(struct fsentry *key) } /* if looking for a file, check if directory listing is in cache */ if (!fse && key->list) { - fse = hashmap_get(&map, key->list, NULL); + fse = fscache_get_wait(key->list); if (fse) { LeaveCriticalSection(&mutex); /* dir entry without file entry -> file doesn't exist */ @@ -271,16 +300,34 @@ static struct fsentry *fscache_get(struct fsentry *key) } } + /* add future entry to indicate that we're loading it */ + future = key->list ? key->list : key; + future->next = NULL; + future->refcnt = 0; + hashmap_add(&map, future); + /* create the directory listing (outside mutex!) */ LeaveCriticalSection(&mutex); - fse = fsentry_create_list(key->list ? key->list : key); - if (!fse) - return NULL; - + fse = fsentry_create_list(future); EnterCriticalSection(&mutex); - /* add directory listing if it hasn't been added by some other thread */ - if (!hashmap_get(&map, key, NULL)) - fscache_add(fse); + + /* remove future entry and signal waiting threads */ + hashmap_remove(&map, future, NULL); + waiter = future->next; + while (waiter) { + HANDLE h = waiter->hwait; + waiter = waiter->next; + SetEvent(h); + } + + /* leave on error (errno set by fsentry_create_list) */ + if (!fse) { + LeaveCriticalSection(&mutex); + return NULL; + } + + /* add directory listing to the cache */ + fscache_add(fse); /* lookup file entry if requested (fse already points to directory) */ if (key->list)