Files
git/preload-index.c
Ben Peart f3ea036ad3 fscache: update fscache to be thread specific instead of global
The threading model for fscache has been to have a single, global cache.
This puts requirements on it to be thread safe so that callers like
preload-index can call it from multiple threads.  This was implemented
with a single mutex and completion events which introduces contention
between the calling threads.

Simplify the threading model by making fscache thread specific.  This allows
us to remove the global mutex and synchronization events entirely and instead
associate a fscache with every thread that requests one. This works well with
the current multi-threading which divides the cache entries into blocks with
a separate thread processing each block.

At the end of each worker thread, if there is a fscache on the primary
thread, merge the cached results from the worker into the primary thread
cache. This enables us to reuse the cache later especially when scanning for
untracked files.

In testing, this reduced the time spent in preload_index() by about 25% and
also reduced the CPU utilization significantly.  On a repo with ~200K files,
it reduced overall status times by ~12%.

Signed-off-by: Ben Peart <benpeart@microsoft.com>
2018-11-16 08:06:14 -05:00

124 lines
2.8 KiB
C

/*
* Copyright (C) 2008 Linus Torvalds
*/
#include "cache.h"
#include "pathspec.h"
#include "dir.h"
#include "fsmonitor.h"
#ifdef NO_PTHREADS
static void preload_index(struct index_state *index,
const struct pathspec *pathspec)
{
; /* nothing */
}
#else
#include <pthread.h>
/*
* Mostly randomly chosen maximum thread counts: we
* cap the parallelism to 20 threads, and we want
* to have at least 500 lstat's per thread for it to
* be worth starting a thread.
*/
#define MAX_PARALLEL (20)
#define THREAD_COST (500)
struct thread_data {
pthread_t pthread;
struct index_state *index;
struct pathspec pathspec;
int offset, nr;
};
static void *preload_thread(void *_data)
{
int nr;
struct thread_data *p = _data;
struct index_state *index = p->index;
struct cache_entry **cep = index->cache + p->offset;
struct cache_def cache = CACHE_DEF_INIT;
nr = p->nr;
if (nr + p->offset > index->cache_nr)
nr = index->cache_nr - p->offset;
do {
struct cache_entry *ce = *cep++;
struct stat st;
if (ce_stage(ce))
continue;
if (S_ISGITLINK(ce->ce_mode))
continue;
if (ce_uptodate(ce))
continue;
if (ce_skip_worktree(ce))
continue;
if (ce->ce_flags & CE_FSMONITOR_VALID)
continue;
if (!ce_path_match(index, ce, &p->pathspec, NULL))
continue;
if (threaded_has_symlink_leading_path(&cache, ce->name, ce_namelen(ce)))
continue;
if (lstat(ce->name, &st))
continue;
if (ie_match_stat(index, ce, &st, CE_MATCH_RACY_IS_DIRTY|CE_MATCH_IGNORE_FSMONITOR))
continue;
ce_mark_uptodate(ce);
mark_fsmonitor_valid(ce);
} while (--nr > 0);
cache_def_clear(&cache);
return NULL;
}
static void preload_index(struct index_state *index,
const struct pathspec *pathspec)
{
int threads, i, work, offset;
struct thread_data data[MAX_PARALLEL];
uint64_t start = getnanotime();
if (!core_preload_index)
return;
threads = index->cache_nr / THREAD_COST;
if ((index->cache_nr > 1) && (threads < 2) && getenv("GIT_FORCE_PRELOAD_TEST"))
threads = 2;
if (threads < 2)
return;
if (threads > MAX_PARALLEL)
threads = MAX_PARALLEL;
offset = 0;
work = DIV_ROUND_UP(index->cache_nr, threads);
memset(&data, 0, sizeof(data));
for (i = 0; i < threads; i++) {
struct thread_data *p = data+i;
p->index = index;
if (pathspec)
copy_pathspec(&p->pathspec, pathspec);
p->offset = offset;
p->nr = work;
offset += work;
if (pthread_create(&p->pthread, NULL, preload_thread, p))
die("unable to create threaded lstat");
}
for (i = 0; i < threads; i++) {
struct thread_data *p = data+i;
if (pthread_join(p->pthread, NULL))
die("unable to join threaded lstat");
}
trace_performance_since(start, "preload index");
}
#endif
int read_index_preload(struct index_state *index,
const struct pathspec *pathspec)
{
int retval = read_index(index);
preload_index(index, pathspec);
return retval;
}