From 1f229c48977e6fbc093d64eaac84bf9f9b126cc9 Mon Sep 17 00:00:00 2001 From: Karsten Blees Date: Sat, 6 Jul 2013 02:09:35 +0200 Subject: [PATCH 1/7] Win32: make FILETIME conversion functions public Signed-off-by: Karsten Blees --- compat/mingw.c | 16 ---------------- compat/mingw.h | 16 ++++++++++++++++ 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/compat/mingw.c b/compat/mingw.c index 3fbfda5978..d4ed08e5e1 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -512,22 +512,6 @@ int mingw_chmod(const char *filename, int mode) return _wchmod(wfilename, mode); } -/* - * The unit of FILETIME is 100-nanoseconds since January 1, 1601, UTC. - * Returns the 100-nanoseconds ("hekto nanoseconds") since the epoch. - */ -static inline long long filetime_to_hnsec(const FILETIME *ft) -{ - long long winTime = ((long long)ft->dwHighDateTime << 32) + ft->dwLowDateTime; - /* Windows to Unix Epoch conversion */ - return winTime - 116444736000000000LL; -} - -static inline time_t filetime_to_time_t(const FILETIME *ft) -{ - return (time_t)(filetime_to_hnsec(ft) / 10000000); -} - /** * Verifies that safe_create_leading_directories() would succeed. */ diff --git a/compat/mingw.h b/compat/mingw.h index 034fff9479..5e00c6da25 100644 --- a/compat/mingw.h +++ b/compat/mingw.h @@ -326,6 +326,22 @@ static inline int getrlimit(int resource, struct rlimit *rlp) return 0; } +/* + * The unit of FILETIME is 100-nanoseconds since January 1, 1601, UTC. + * Returns the 100-nanoseconds ("hekto nanoseconds") since the epoch. + */ +static inline long long filetime_to_hnsec(const FILETIME *ft) +{ + long long winTime = ((long long)ft->dwHighDateTime << 32) + ft->dwLowDateTime; + /* Windows to Unix Epoch conversion */ + return winTime - 116444736000000000LL; +} + +static inline time_t filetime_to_time_t(const FILETIME *ft) +{ + return (time_t)(filetime_to_hnsec(ft) / 10000000); +} + /* * Use mingw specific stat()/lstat()/fstat() implementations on Windows. */ From 0cf90d5c6d34fb47ac8961078999547d2ba9ad0a Mon Sep 17 00:00:00 2001 From: Karsten Blees Date: Sun, 8 Sep 2013 14:17:31 +0200 Subject: [PATCH 2/7] Win32: dirent.c: Move opendir down Move opendir down in preparation for the next patch. Signed-off-by: Karsten Blees --- compat/win32/dirent.c | 68 +++++++++++++++++++++---------------------- 1 file changed, 34 insertions(+), 34 deletions(-) diff --git a/compat/win32/dirent.c b/compat/win32/dirent.c index 52420ec7d4..2603a0fa39 100644 --- a/compat/win32/dirent.c +++ b/compat/win32/dirent.c @@ -18,40 +18,6 @@ static inline void finddata2dirent(struct dirent *ent, WIN32_FIND_DATAW *fdata) ent->d_type = DT_REG; } -DIR *opendir(const char *name) -{ - wchar_t pattern[MAX_PATH + 2]; /* + 2 for '/' '*' */ - WIN32_FIND_DATAW fdata; - HANDLE h; - int len; - DIR *dir; - - /* convert name to UTF-16 and check length < MAX_PATH */ - if ((len = xutftowcs_path(pattern, name)) < 0) - return NULL; - - /* append optional '/' and wildcard '*' */ - if (len && !is_dir_sep(pattern[len - 1])) - pattern[len++] = '/'; - pattern[len++] = '*'; - pattern[len] = 0; - - /* open find handle */ - h = FindFirstFileW(pattern, &fdata); - if (h == INVALID_HANDLE_VALUE) { - DWORD err = GetLastError(); - errno = (err == ERROR_DIRECTORY) ? ENOTDIR : err_win_to_posix(err); - return NULL; - } - - /* initialize DIR structure and copy first dir entry */ - dir = xmalloc(sizeof(DIR)); - dir->dd_handle = h; - dir->dd_stat = 0; - finddata2dirent(&dir->dd_dir, &fdata); - return dir; -} - struct dirent *readdir(DIR *dir) { if (!dir) { @@ -90,3 +56,37 @@ int closedir(DIR *dir) free(dir); return 0; } + +DIR *opendir(const char *name) +{ + wchar_t pattern[MAX_PATH + 2]; /* + 2 for '/' '*' */ + WIN32_FIND_DATAW fdata; + HANDLE h; + int len; + DIR *dir; + + /* convert name to UTF-16 and check length < MAX_PATH */ + if ((len = xutftowcs_path(pattern, name)) < 0) + return NULL; + + /* append optional '/' and wildcard '*' */ + if (len && !is_dir_sep(pattern[len - 1])) + pattern[len++] = '/'; + pattern[len++] = '*'; + pattern[len] = 0; + + /* open find handle */ + h = FindFirstFileW(pattern, &fdata); + if (h == INVALID_HANDLE_VALUE) { + DWORD err = GetLastError(); + errno = (err == ERROR_DIRECTORY) ? ENOTDIR : err_win_to_posix(err); + return NULL; + } + + /* initialize DIR structure and copy first dir entry */ + dir = xmalloc(sizeof(DIR)); + dir->dd_handle = h; + dir->dd_stat = 0; + finddata2dirent(&dir->dd_dir, &fdata); + return dir; +} From 2e0a8fc9bc00cc18ae5d4065bd4d8db49d4f59dc Mon Sep 17 00:00:00 2001 From: Karsten Blees Date: Sun, 8 Sep 2013 14:18:40 +0200 Subject: [PATCH 3/7] Win32: Make the dirent implementation pluggable Emulating the POSIX dirent API on Windows via FindFirstFile/FindNextFile is pretty staightforward, however, most of the information provided in the WIN32_FIND_DATA structure is thrown away in the process. A more sophisticated implementation may cache this data, e.g. for later reuse in calls to lstat. Make the dirent implementation pluggable so that it can be switched at runtime, e.g. based on a config option. Define a base DIR structure with pointers to readdir/closedir that match the opendir implementation (i.e. similar to vtable pointers in OOP). Define readdir/closedir so that they call the function pointers in the DIR structure. This allows to choose the opendir implementation on a call-by-call basis. Move the fixed sized dirent.d_name buffer to the dirent-specific DIR structure, as d_name may be implementation specific (e.g. a caching implementation may just set d_name to point into the cache instead of copying the entire file name string). Signed-off-by: Karsten Blees --- compat/win32/dirent.c | 27 +++++++++++++++++---------- compat/win32/dirent.h | 26 +++++++++++++++++++------- 2 files changed, 36 insertions(+), 17 deletions(-) diff --git a/compat/win32/dirent.c b/compat/win32/dirent.c index 2603a0fa39..6b87042182 100644 --- a/compat/win32/dirent.c +++ b/compat/win32/dirent.c @@ -1,15 +1,19 @@ #include "../../git-compat-util.h" -struct DIR { +typedef struct dirent_DIR { + struct DIR base_dir; /* extend base struct DIR */ struct dirent dd_dir; /* includes d_type */ HANDLE dd_handle; /* FindFirstFile handle */ int dd_stat; /* 0-based index */ -}; + char dd_name[MAX_PATH * 3]; /* file name (* 3 for UTF-8 conversion) */ +} dirent_DIR; + +DIR *(*opendir)(const char *dirname) = dirent_opendir; static inline void finddata2dirent(struct dirent *ent, WIN32_FIND_DATAW *fdata) { - /* convert UTF-16 name to UTF-8 */ - xwcstoutf(ent->d_name, fdata->cFileName, sizeof(ent->d_name)); + /* convert UTF-16 name to UTF-8 (d_name points to dirent_DIR.dd_name) */ + xwcstoutf(ent->d_name, fdata->cFileName, MAX_PATH * 3); /* Set file type, based on WIN32_FIND_DATA */ if (fdata->dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) @@ -18,7 +22,7 @@ static inline void finddata2dirent(struct dirent *ent, WIN32_FIND_DATAW *fdata) ent->d_type = DT_REG; } -struct dirent *readdir(DIR *dir) +static struct dirent *dirent_readdir(dirent_DIR *dir) { if (!dir) { errno = EBADF; /* No set_errno for mingw */ @@ -45,7 +49,7 @@ struct dirent *readdir(DIR *dir) return &dir->dd_dir; } -int closedir(DIR *dir) +static int dirent_closedir(dirent_DIR *dir) { if (!dir) { errno = EBADF; @@ -57,13 +61,13 @@ int closedir(DIR *dir) return 0; } -DIR *opendir(const char *name) +DIR *dirent_opendir(const char *name) { wchar_t pattern[MAX_PATH + 2]; /* + 2 for '/' '*' */ WIN32_FIND_DATAW fdata; HANDLE h; int len; - DIR *dir; + dirent_DIR *dir; /* convert name to UTF-16 and check length < MAX_PATH */ if ((len = xutftowcs_path(pattern, name)) < 0) @@ -84,9 +88,12 @@ DIR *opendir(const char *name) } /* initialize DIR structure and copy first dir entry */ - dir = xmalloc(sizeof(DIR)); + dir = xmalloc(sizeof(dirent_DIR)); + dir->base_dir.preaddir = (struct dirent *(*)(DIR *dir)) dirent_readdir; + dir->base_dir.pclosedir = (int (*)(DIR *dir)) dirent_closedir; + dir->dd_dir.d_name = dir->dd_name; dir->dd_handle = h; dir->dd_stat = 0; finddata2dirent(&dir->dd_dir, &fdata); - return dir; + return (DIR*) dir; } diff --git a/compat/win32/dirent.h b/compat/win32/dirent.h index 058207e4bf..6b3ddee51b 100644 --- a/compat/win32/dirent.h +++ b/compat/win32/dirent.h @@ -1,20 +1,32 @@ #ifndef DIRENT_H #define DIRENT_H -typedef struct DIR DIR; - #define DT_UNKNOWN 0 #define DT_DIR 1 #define DT_REG 2 #define DT_LNK 3 struct dirent { - unsigned char d_type; /* file type to prevent lstat after readdir */ - char d_name[MAX_PATH * 3]; /* file name (* 3 for UTF-8 conversion) */ + unsigned char d_type; /* file type to prevent lstat after readdir */ + char *d_name; /* file name */ }; -DIR *opendir(const char *dirname); -struct dirent *readdir(DIR *dir); -int closedir(DIR *dir); +/* + * Base DIR structure, contains pointers to readdir/closedir implementations so + * that opendir may choose a concrete implementation on a call-by-call basis. + */ +typedef struct DIR { + struct dirent *(*preaddir)(struct DIR *dir); + int (*pclosedir)(struct DIR *dir); +} DIR; + +/* default dirent implementation */ +extern DIR *dirent_opendir(const char *dirname); + +/* current dirent implementation */ +extern DIR *(*opendir)(const char *dirname); + +#define readdir(dir) (dir->preaddir(dir)) +#define closedir(dir) (dir->pclosedir(dir)) #endif /* DIRENT_H */ From fd69f1f32b78c058a67c68fca7d98c7a142971b6 Mon Sep 17 00:00:00 2001 From: Karsten Blees Date: Sun, 8 Sep 2013 14:21:30 +0200 Subject: [PATCH 4/7] Win32: make the lstat implementation pluggable Emulating the POSIX lstat API on Windows via GetFileAttributes[Ex] is quite slow. Windows operating system APIs seem to be much better at scanning the status of entire directories than checking single files. A caching implementation may improve performance by bulk-reading entire directories or reusing data obtained via opendir / readdir. Make the lstat implementation pluggable so that it can be switched at runtime, e.g. based on a config option. Signed-off-by: Karsten Blees --- compat/mingw.c | 2 ++ compat/mingw.h | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/compat/mingw.c b/compat/mingw.c index d4ed08e5e1..954009e067 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -651,6 +651,8 @@ static int do_stat_internal(int follow, const char *file_name, struct stat *buf) return do_lstat(follow, alt_name, buf); } +int (*lstat)(const char *file_name, struct stat *buf) = mingw_lstat; + int mingw_lstat(const char *file_name, struct stat *buf) { return do_stat_internal(0, file_name, buf); diff --git a/compat/mingw.h b/compat/mingw.h index 5e00c6da25..e5bd433b2d 100644 --- a/compat/mingw.h +++ b/compat/mingw.h @@ -365,7 +365,7 @@ int mingw_fstat(int fd, struct stat *buf); #ifdef lstat #undef lstat #endif -#define lstat mingw_lstat +extern int (*lstat)(const char *file_name, struct stat *buf); #ifndef _stati64 # define _stati64(x,y) mingw_stat(x,y) From 844568f9594314da52ef7157606f3c361f9cc306 Mon Sep 17 00:00:00 2001 From: Karsten Blees Date: Sun, 8 Sep 2013 14:23:27 +0200 Subject: [PATCH 5/7] add infrastructure for read-only file system level caches Add a macro to mark code sections that only read from the file system, along with a config option and documentation. This facilitates implementation of relatively simple file system level caches without the need to synchronize with the file system. Enable read-only sections for 'git status' and preload_index. Signed-off-by: Karsten Blees --- Documentation/config.txt | 6 ++++++ builtin/commit.c | 1 + cache.h | 2 ++ config.c | 5 +++++ environment.c | 1 + git-compat-util.h | 15 +++++++++++++++ preload-index.c | 2 ++ 7 files changed, 32 insertions(+) diff --git a/Documentation/config.txt b/Documentation/config.txt index 21fdddf240..e7a768eeb0 100644 --- a/Documentation/config.txt +++ b/Documentation/config.txt @@ -760,6 +760,12 @@ relatively high IO latencies. When enabled, Git will do the index comparison to the filesystem data in parallel, allowing overlapping IO's. Defaults to true. +core.fscache:: + Enable additional caching of file system data for some operations. ++ +Git for Windows uses this to bulk-read and cache lstat data of entire +directories (instead of doing lstat file by file). + core.createObject:: You can set this to 'link', in which case a hardlink followed by a delete of the source are used to make sure that object creation diff --git a/builtin/commit.c b/builtin/commit.c index 7a1ade0d27..4c616484de 100644 --- a/builtin/commit.c +++ b/builtin/commit.c @@ -1374,6 +1374,7 @@ int cmd_status(int argc, const char **argv, const char *prefix) PATHSPEC_PREFER_FULL, prefix, argv); + enable_fscache(1); read_cache_preload(&s.pathspec); refresh_index(&the_index, REFRESH_QUIET|REFRESH_UNMERGED, &s.pathspec, NULL, NULL); diff --git a/cache.h b/cache.h index 1ec9021a70..02777be500 100644 --- a/cache.h +++ b/cache.h @@ -712,6 +712,8 @@ enum hide_dotfiles_type { }; extern enum hide_dotfiles_type hide_dotfiles; +extern int core_fscache; + enum branch_track { BRANCH_TRACK_UNSPECIFIED = -1, BRANCH_TRACK_NEVER = 0, diff --git a/config.c b/config.c index 0dfed682b8..91ff686add 100644 --- a/config.c +++ b/config.c @@ -1007,6 +1007,11 @@ static int git_default_core_config(const char *var, const char *value) return 0; } + if (!strcmp(var, "core.fscache")) { + core_fscache = git_config_bool(var, value); + return 0; + } + /* Add other config variables here and to Documentation/config.txt. */ return 0; } diff --git a/environment.c b/environment.c index ca72464a98..d513d78484 100644 --- a/environment.c +++ b/environment.c @@ -65,6 +65,7 @@ int merge_log_config = -1; int precomposed_unicode = -1; /* see probe_utf8_pathname_composition() */ unsigned long pack_size_limit_cfg; enum hide_dotfiles_type hide_dotfiles = HIDE_DOTFILES_DOTGITONLY; +int core_fscache; #ifndef PROTECT_HFS_DEFAULT #define PROTECT_HFS_DEFAULT 0 diff --git a/git-compat-util.h b/git-compat-util.h index b4d9c2aa58..d3f050ae39 100644 --- a/git-compat-util.h +++ b/git-compat-util.h @@ -1090,6 +1090,21 @@ struct tm *git_gmtime_r(const time_t *, struct tm *); #define getc_unlocked(fh) getc(fh) #endif +/* + * Enable/disable a read-only cache for file system data on platforms that + * support it. + * + * Implementing a live-cache is complicated and requires special platform + * support (inotify, ReadDirectoryChangesW...). enable_fscache shall be used + * to mark sections of git code that extensively read from the file system + * without modifying anything. Implementations can use this to cache e.g. stat + * data or even file content without the need to synchronize with the file + * system. + */ +#ifndef enable_fscache +#define enable_fscache(x) /* noop */ +#endif + #endif extern int cmd_main(int, const char **); diff --git a/preload-index.c b/preload-index.c index c1fe3a3ef9..c7970d7cb9 100644 --- a/preload-index.c +++ b/preload-index.c @@ -84,6 +84,7 @@ static void preload_index(struct index_state *index, offset = 0; work = DIV_ROUND_UP(index->cache_nr, threads); memset(&data, 0, sizeof(data)); + enable_fscache(1); for (i = 0; i < threads; i++) { struct thread_data *p = data+i; p->index = index; @@ -100,6 +101,7 @@ static void preload_index(struct index_state *index, if (pthread_join(p->pthread, NULL)) die("unable to join threaded lstat"); } + enable_fscache(0); } #endif From 013c3f6ad3e16be477d46508bedb7323b05a3e16 Mon Sep 17 00:00:00 2001 From: Karsten Blees Date: Tue, 1 Oct 2013 12:51:54 +0200 Subject: [PATCH 6/7] Win32: add a cache below mingw's lstat and dirent implementations Checking the work tree status is quite slow on Windows, due to slow lstat emulation (git calls lstat once for each file in the index). Windows operating system APIs seem to be much better at scanning the status of entire directories than checking single files. Add an lstat implementation that uses a cache for lstat data. Cache misses read the entire parent directory and add it to the cache. Subsequent lstat calls for the same directory are served directly from the cache. Also implement opendir / readdir / closedir so that they create and use directory listings in the cache. The cache doesn't track file system changes and doesn't plug into any modifying file APIs, so it has to be explicitly enabled for git functions that don't modify the working copy. Note: in an earlier version of this patch, the cache was always active and tracked file system changes via ReadDirectoryChangesW. However, this was much more complex and had negative impact on the performance of modifying git commands such as 'git checkout'. Signed-off-by: Karsten Blees --- compat/win32/fscache.c | 458 +++++++++++++++++++++++++++++++++++++++++ compat/win32/fscache.h | 10 + config.mak.uname | 4 +- git-compat-util.h | 2 + 4 files changed, 472 insertions(+), 2 deletions(-) create mode 100644 compat/win32/fscache.c create mode 100644 compat/win32/fscache.h diff --git a/compat/win32/fscache.c b/compat/win32/fscache.c new file mode 100644 index 0000000000..28d22791bd --- /dev/null +++ b/compat/win32/fscache.c @@ -0,0 +1,458 @@ +#include "../../cache.h" +#include "../../hashmap.h" +#include "../win32.h" +#include "fscache.h" + +static int initialized; +static volatile long enabled; +static struct hashmap map; +static CRITICAL_SECTION mutex; + +/* + * An entry in the file system cache. Used for both entire directory listings + * and file entries. + */ +struct fsentry { + struct hashmap_entry ent; + mode_t st_mode; + /* Length of name. */ + unsigned short len; + /* + * Name of the entry. For directory listings: relative path of the + * directory, without trailing '/' (empty for cwd()). For file entries: + * name of the file. Typically points to the end of the structure if + * the fsentry is allocated on the heap (see fsentry_alloc), or to a + * local variable if on the stack (see fsentry_init). + */ + const char *name; + /* Pointer to the directory listing, or NULL for the listing itself. */ + struct fsentry *list; + /* Pointer to the next file entry of the list. */ + struct fsentry *next; + + union { + /* Reference count of the directory listing. */ + volatile long refcnt; + struct { + /* More stat members (only used for file entries). */ + off64_t st_size; + time_t st_atime; + time_t st_mtime; + time_t st_ctime; + }; + }; +}; + +/* + * Compares the paths of two fsentry structures for equality. + */ +static int fsentry_cmp(const struct fsentry *fse1, const struct fsentry *fse2) +{ + int res; + if (fse1 == fse2) + return 0; + + /* compare the list parts first */ + if (fse1->list != fse2->list && (res = fsentry_cmp( + fse1->list ? fse1->list : fse1, + fse2->list ? fse2->list : fse2))) + return res; + + /* if list parts are equal, compare len and name */ + if (fse1->len != fse2->len) + return fse1->len - fse2->len; + return strnicmp(fse1->name, fse2->name, fse1->len); +} + +/* + * Calculates the hash code of an fsentry structure's path. + */ +static unsigned int fsentry_hash(const struct fsentry *fse) +{ + unsigned int hash = fse->list ? fse->list->ent.hash : 0; + return hash ^ memihash(fse->name, fse->len); +} + +/* + * Initialize an fsentry structure for use by fsentry_hash and fsentry_cmp. + */ +static void fsentry_init(struct fsentry *fse, struct fsentry *list, + const char *name, size_t len) +{ + fse->list = list; + fse->name = name; + fse->len = len; + hashmap_entry_init(fse, fsentry_hash(fse)); +} + +/* + * Allocate an fsentry structure on the heap. + */ +static struct fsentry *fsentry_alloc(struct fsentry *list, const char *name, + size_t len) +{ + /* overallocate fsentry and copy the name to the end */ + struct fsentry *fse = xmalloc(sizeof(struct fsentry) + len + 1); + char *nm = ((char*) fse) + sizeof(struct fsentry); + memcpy(nm, name, len); + nm[len] = 0; + /* init the rest of the structure */ + fsentry_init(fse, list, nm, len); + fse->next = NULL; + fse->refcnt = 1; + return fse; +} + +/* + * Add a reference to an fsentry. + */ +inline static void fsentry_addref(struct fsentry *fse) +{ + if (fse->list) + fse = fse->list; + + InterlockedIncrement(&(fse->refcnt)); +} + +/* + * Release the reference to an fsentry, frees the memory if its the last ref. + */ +static void fsentry_release(struct fsentry *fse) +{ + if (fse->list) + fse = fse->list; + + if (InterlockedDecrement(&(fse->refcnt))) + return; + + while (fse) { + struct fsentry *next = fse->next; + free(fse); + fse = next; + } +} + +/* + * Allocate and initialize an fsentry from a WIN32_FIND_DATA structure. + */ +static struct fsentry *fseentry_create_entry(struct fsentry *list, + const WIN32_FIND_DATAW *fdata) +{ + char buf[MAX_PATH * 3]; + int len; + struct fsentry *fse; + len = xwcstoutf(buf, fdata->cFileName, ARRAY_SIZE(buf)); + + fse = fsentry_alloc(list, buf, len); + + fse->st_mode = file_attr_to_st_mode(fdata->dwFileAttributes); + fse->st_size = (((off64_t) (fdata->nFileSizeHigh)) << 32) + | fdata->nFileSizeLow; + fse->st_atime = filetime_to_time_t(&(fdata->ftLastAccessTime)); + fse->st_mtime = filetime_to_time_t(&(fdata->ftLastWriteTime)); + fse->st_ctime = filetime_to_time_t(&(fdata->ftCreationTime)); + + return fse; +} + +/* + * Create an fsentry-based directory listing (similar to opendir / readdir). + * Dir should not contain trailing '/'. Use an empty string for the current + * directory (not "."!). + */ +static struct fsentry *fsentry_create_list(const struct fsentry *dir) +{ + wchar_t pattern[MAX_PATH + 2]; /* + 2 for '/' '*' */ + WIN32_FIND_DATAW fdata; + HANDLE h; + int wlen; + struct fsentry *list, **phead; + DWORD err; + + /* convert name to UTF-16 and check length < MAX_PATH */ + if ((wlen = xutftowcsn(pattern, dir->name, MAX_PATH, dir->len)) < 0) { + if (errno == ERANGE) + errno = ENAMETOOLONG; + return NULL; + } + + /* append optional '/' and wildcard '*' */ + if (wlen) + pattern[wlen++] = '/'; + pattern[wlen++] = '*'; + pattern[wlen] = 0; + + /* open find handle */ + h = FindFirstFileW(pattern, &fdata); + if (h == INVALID_HANDLE_VALUE) { + err = GetLastError(); + errno = (err == ERROR_DIRECTORY) ? ENOTDIR : err_win_to_posix(err); + return NULL; + } + + /* allocate object to hold directory listing */ + list = fsentry_alloc(NULL, dir->name, dir->len); + + /* walk directory and build linked list of fsentry structures */ + phead = &list->next; + do { + *phead = fseentry_create_entry(list, &fdata); + phead = &(*phead)->next; + } while (FindNextFileW(h, &fdata)); + + /* remember result of last FindNextFile, then close find handle */ + err = GetLastError(); + FindClose(h); + + /* return the list if we've got all the files */ + if (err == ERROR_NO_MORE_FILES) + return list; + + /* otherwise free the list and return error */ + fsentry_release(list); + errno = err_win_to_posix(err); + return NULL; +} + +/* + * Adds a directory listing to the cache. + */ +static void fscache_add(struct fsentry *fse) +{ + if (fse->list) + fse = fse->list; + + for (; fse; fse = fse->next) + hashmap_add(&map, fse); +} + +/* + * Removes a directory listing from the cache. + */ +static void fscache_remove(struct fsentry *fse) +{ + if (fse->list) + fse = fse->list; + + for (; fse; fse = fse->next) + hashmap_remove(&map, fse, NULL); +} + +/* + * Clears the cache. + */ +static void fscache_clear(void) +{ + struct hashmap_iter iter; + struct fsentry *fse; + while ((fse = hashmap_iter_first(&map, &iter))) { + fscache_remove(fse); + fsentry_release(fse); + } +} + +/* + * Checks if the cache is enabled for the given path. + */ +static inline int fscache_enabled(const char *path) +{ + return enabled > 0 && !is_absolute_path(path); +} + +/* + * Looks up or creates a cache entry for the specified key. + */ +static struct fsentry *fscache_get(struct fsentry *key) +{ + struct fsentry *fse; + + EnterCriticalSection(&mutex); + /* check if entry is in cache */ + fse = hashmap_get(&map, key, NULL); + if (fse) { + fsentry_addref(fse); + LeaveCriticalSection(&mutex); + return fse; + } + /* if looking for a file, check if directory listing is in cache */ + if (!fse && key->list) { + fse = hashmap_get(&map, key->list, NULL); + if (fse) { + LeaveCriticalSection(&mutex); + /* dir entry without file entry -> file doesn't exist */ + errno = ENOENT; + return NULL; + } + } + + /* create the directory listing (outside mutex!) */ + LeaveCriticalSection(&mutex); + fse = fsentry_create_list(key->list ? key->list : key); + if (!fse) + return NULL; + + EnterCriticalSection(&mutex); + /* add directory listing if it hasn't been added by some other thread */ + if (!hashmap_get(&map, key, NULL)) + fscache_add(fse); + + /* lookup file entry if requested (fse already points to directory) */ + if (key->list) + fse = hashmap_get(&map, key, NULL); + + /* return entry or ENOENT */ + if (fse) + fsentry_addref(fse); + else + errno = ENOENT; + + LeaveCriticalSection(&mutex); + return fse; +} + +/* + * Enables or disables the cache. Note that the cache is read-only, changes to + * the working directory are NOT reflected in the cache while enabled. + */ +int fscache_enable(int enable) +{ + int result; + + if (!initialized) { + /* allow the cache to be disabled entirely */ + if (!core_fscache) + return 0; + + InitializeCriticalSection(&mutex); + hashmap_init(&map, (hashmap_cmp_fn) fsentry_cmp, 0); + initialized = 1; + } + + result = enable ? InterlockedIncrement(&enabled) + : InterlockedDecrement(&enabled); + + if (enable && result == 1) { + /* redirect opendir and lstat to the fscache implementations */ + opendir = fscache_opendir; + lstat = fscache_lstat; + } else if (!enable && !result) { + /* reset opendir and lstat to the original implementations */ + opendir = dirent_opendir; + lstat = mingw_lstat; + EnterCriticalSection(&mutex); + fscache_clear(); + LeaveCriticalSection(&mutex); + } + return result; +} + +/* + * Lstat replacement, uses the cache if enabled, otherwise redirects to + * mingw_lstat. + */ +int fscache_lstat(const char *filename, struct stat *st) +{ + int dirlen, base, len; + struct fsentry key[2], *fse; + + if (!fscache_enabled(filename)) + return mingw_lstat(filename, st); + + /* split filename into path + name */ + len = strlen(filename); + if (len && is_dir_sep(filename[len - 1])) + len--; + base = len; + while (base && !is_dir_sep(filename[base - 1])) + base--; + dirlen = base ? base - 1 : 0; + + /* lookup entry for path + name in cache */ + fsentry_init(key, NULL, filename, dirlen); + fsentry_init(key + 1, key, filename + base, len - base); + fse = fscache_get(key + 1); + if (!fse) + return -1; + + /* copy stat data */ + st->st_ino = 0; + st->st_gid = 0; + st->st_uid = 0; + st->st_dev = 0; + st->st_rdev = 0; + st->st_nlink = 1; + st->st_mode = fse->st_mode; + st->st_size = fse->st_size; + st->st_atime = fse->st_atime; + st->st_mtime = fse->st_mtime; + st->st_ctime = fse->st_ctime; + + /* don't forget to release fsentry */ + fsentry_release(fse); + return 0; +} + +typedef struct fscache_DIR { + struct DIR base_dir; /* extend base struct DIR */ + struct fsentry *pfsentry; + struct dirent dirent; +} fscache_DIR; + +/* + * Readdir replacement. + */ +static struct dirent *fscache_readdir(DIR *base_dir) +{ + fscache_DIR *dir = (fscache_DIR*) base_dir; + struct fsentry *next = dir->pfsentry->next; + if (!next) + return NULL; + dir->pfsentry = next; + dir->dirent.d_type = S_ISDIR(next->st_mode) ? DT_DIR : DT_REG; + dir->dirent.d_name = (char*) next->name; + return &(dir->dirent); +} + +/* + * Closedir replacement. + */ +static int fscache_closedir(DIR *base_dir) +{ + fscache_DIR *dir = (fscache_DIR*) base_dir; + fsentry_release(dir->pfsentry); + free(dir); + return 0; +} + +/* + * Opendir replacement, uses a directory listing from the cache if enabled, + * otherwise calls original dirent implementation. + */ +DIR *fscache_opendir(const char *dirname) +{ + struct fsentry key, *list; + fscache_DIR *dir; + int len; + + if (!fscache_enabled(dirname)) + return dirent_opendir(dirname); + + /* prepare name (strip trailing '/', replace '.') */ + len = strlen(dirname); + if ((len == 1 && dirname[0] == '.') || + (len && is_dir_sep(dirname[len - 1]))) + len--; + + /* get directory listing from cache */ + fsentry_init(&key, NULL, dirname, len); + list = fscache_get(&key); + if (!list) + return NULL; + + /* alloc and return DIR structure */ + dir = (fscache_DIR*) xmalloc(sizeof(fscache_DIR)); + dir->base_dir.preaddir = fscache_readdir; + dir->base_dir.pclosedir = fscache_closedir; + dir->pfsentry = list; + return (DIR*) dir; +} diff --git a/compat/win32/fscache.h b/compat/win32/fscache.h new file mode 100644 index 0000000000..ed518b422d --- /dev/null +++ b/compat/win32/fscache.h @@ -0,0 +1,10 @@ +#ifndef FSCACHE_H +#define FSCACHE_H + +int fscache_enable(int enable); +#define enable_fscache(x) fscache_enable(x) + +DIR *fscache_opendir(const char *dir); +int fscache_lstat(const char *file_name, struct stat *buf); + +#endif diff --git a/config.mak.uname b/config.mak.uname index b232908f8c..c517b378cf 100644 --- a/config.mak.uname +++ b/config.mak.uname @@ -382,7 +382,7 @@ ifeq ($(uname_S),Windows) BASIC_CFLAGS = -nologo -I. -I../zlib -Icompat/vcbuild -Icompat/vcbuild/include -DWIN32 -D_CONSOLE -DHAVE_STRING_H -D_CRT_SECURE_NO_WARNINGS -D_CRT_NONSTDC_NO_DEPRECATE COMPAT_OBJS = compat/msvc.o compat/winansi.o \ compat/win32/pthread.o compat/win32/syslog.o \ - compat/win32/dirent.o + compat/win32/dirent.o compat/win32/fscache.o COMPAT_CFLAGS = -D__USE_MINGW_ACCESS -DNOGDI -DHAVE_STRING_H -Icompat -Icompat/regex -Icompat/win32 -DSTRIP_EXTENSION=\".exe\" BASIC_LDFLAGS = -IGNORE:4217 -IGNORE:4049 -NOLOGO -SUBSYSTEM:CONSOLE EXTLIBS = user32.lib advapi32.lib shell32.lib wininet.lib ws2_32.lib invalidcontinue.obj @@ -528,7 +528,7 @@ ifneq (,$(findstring MINGW,$(uname_S))) COMPAT_CFLAGS += -DSTRIP_EXTENSION=\".exe\" COMPAT_OBJS += compat/mingw.o compat/winansi.o \ compat/win32/pthread.o compat/win32/syslog.o \ - compat/win32/dirent.o + compat/win32/dirent.o compat/win32/fscache.o BASIC_CFLAGS += -DPROTECT_NTFS_DEFAULT=1 EXTLIBS += -lws2_32 GITLIBS += git.res diff --git a/git-compat-util.h b/git-compat-util.h index d3f050ae39..787561a222 100644 --- a/git-compat-util.h +++ b/git-compat-util.h @@ -192,8 +192,10 @@ #if defined(__MINGW32__) /* pull in Windows compatibility stuff */ #include "compat/mingw.h" +#include "compat/win32/fscache.h" #elif defined(_MSC_VER) #include "compat/msvc.h" +#include "compat/win32/fscache.h" #else #include #include From fcfdf122d3ea32532b86faab048e7fcc067b2253 Mon Sep 17 00:00:00 2001 From: Karsten Blees Date: Tue, 24 Jun 2014 13:22:35 +0200 Subject: [PATCH 7/7] fscache: load directories only once If multiple threads access a directory that is not yet in the cache, the directory will be loaded by each thread. Only one of the results is added to the cache, all others are leaked. This wastes performance and memory. On cache miss, add a future object to the cache to indicate that the directory is currently being loaded. Subsequent threads register themselves with the future object and wait. When the first thread has loaded the directory, it replaces the future object with the result and notifies waiting threads. Signed-off-by: Karsten Blees --- compat/win32/fscache.c | 67 +++++++++++++++++++++++++++++++++++------- 1 file changed, 57 insertions(+), 10 deletions(-) diff --git a/compat/win32/fscache.c b/compat/win32/fscache.c index 28d22791bd..cbfcb18474 100644 --- a/compat/win32/fscache.c +++ b/compat/win32/fscache.c @@ -33,6 +33,8 @@ struct fsentry { union { /* Reference count of the directory listing. */ volatile long refcnt; + /* Handle to wait on the loading thread. */ + HANDLE hwait; struct { /* More stat members (only used for file entries). */ off64_t st_size; @@ -259,16 +261,43 @@ static inline int fscache_enabled(const char *path) return enabled > 0 && !is_absolute_path(path); } +/* + * Looks up a cache entry, waits if its being loaded by another thread. + * The mutex must be owned by the calling thread. + */ +static struct fsentry *fscache_get_wait(struct fsentry *key) +{ + struct fsentry *fse = hashmap_get(&map, key, NULL); + + /* return if its a 'real' entry (future entries have refcnt == 0) */ + if (!fse || fse->list || fse->refcnt) + return fse; + + /* create an event and link our key to the future entry */ + key->hwait = CreateEvent(NULL, TRUE, FALSE, NULL); + key->next = fse->next; + fse->next = key; + + /* wait for the loading thread to signal us */ + LeaveCriticalSection(&mutex); + WaitForSingleObject(key->hwait, INFINITE); + CloseHandle(key->hwait); + EnterCriticalSection(&mutex); + + /* repeat cache lookup */ + return hashmap_get(&map, key, NULL); +} + /* * Looks up or creates a cache entry for the specified key. */ static struct fsentry *fscache_get(struct fsentry *key) { - struct fsentry *fse; + struct fsentry *fse, *future, *waiter; EnterCriticalSection(&mutex); /* check if entry is in cache */ - fse = hashmap_get(&map, key, NULL); + fse = fscache_get_wait(key); if (fse) { fsentry_addref(fse); LeaveCriticalSection(&mutex); @@ -276,7 +305,7 @@ static struct fsentry *fscache_get(struct fsentry *key) } /* if looking for a file, check if directory listing is in cache */ if (!fse && key->list) { - fse = hashmap_get(&map, key->list, NULL); + fse = fscache_get_wait(key->list); if (fse) { LeaveCriticalSection(&mutex); /* dir entry without file entry -> file doesn't exist */ @@ -285,16 +314,34 @@ static struct fsentry *fscache_get(struct fsentry *key) } } + /* add future entry to indicate that we're loading it */ + future = key->list ? key->list : key; + future->next = NULL; + future->refcnt = 0; + hashmap_add(&map, future); + /* create the directory listing (outside mutex!) */ LeaveCriticalSection(&mutex); - fse = fsentry_create_list(key->list ? key->list : key); - if (!fse) - return NULL; - + fse = fsentry_create_list(future); EnterCriticalSection(&mutex); - /* add directory listing if it hasn't been added by some other thread */ - if (!hashmap_get(&map, key, NULL)) - fscache_add(fse); + + /* remove future entry and signal waiting threads */ + hashmap_remove(&map, future, NULL); + waiter = future->next; + while (waiter) { + HANDLE h = waiter->hwait; + waiter = waiter->next; + SetEvent(h); + } + + /* leave on error (errno set by fsentry_create_list) */ + if (!fse) { + LeaveCriticalSection(&mutex); + return NULL; + } + + /* add directory listing to the cache */ + fscache_add(fse); /* lookup file entry if requested (fse already points to directory) */ if (key->list)