From eb65b299ea0020fe3263d19af6f1cbdb04ba3740 Mon Sep 17 00:00:00 2001 From: Karsten Blees Date: Sat, 31 Jul 2010 00:04:01 +0000 Subject: [PATCH 01/41] Support Unicode console output on Windows WriteConsoleW seems to be the only way to reliably print unicode to the console (without weird code page conversions). Also redirects vfprintf to the winansi.c version. Signed-off-by: Karsten Blees Signed-off-by: Johannes Schindelin --- compat/mingw.h | 2 ++ compat/winansi.c | 26 ++++++++++++++++++++------ 2 files changed, 22 insertions(+), 6 deletions(-) diff --git a/compat/mingw.h b/compat/mingw.h index e033e720c9..95e289b1d7 100644 --- a/compat/mingw.h +++ b/compat/mingw.h @@ -320,9 +320,11 @@ int mingw_raise(int sig); int winansi_fputs(const char *str, FILE *stream); int winansi_printf(const char *format, ...) __attribute__((format (printf, 1, 2))); int winansi_fprintf(FILE *stream, const char *format, ...) __attribute__((format (printf, 2, 3))); +int winansi_vfprintf(FILE *stream, const char *format, va_list list); #define fputs winansi_fputs #define printf(...) winansi_printf(__VA_ARGS__) #define fprintf(...) winansi_fprintf(__VA_ARGS__) +#define vfprintf winansi_vfprintf /* * git specific compatibility diff --git a/compat/winansi.c b/compat/winansi.c index dedce2104e..abe0feaa2c 100644 --- a/compat/winansi.c +++ b/compat/winansi.c @@ -3,6 +3,7 @@ */ #include "../git-compat-util.h" +#include /* Functions to be wrapped: @@ -10,6 +11,7 @@ #undef printf #undef fprintf #undef fputs +#undef vfprintf /* TODO: write */ /* @@ -46,6 +48,18 @@ static void init(void) initialized = 1; } +static int write_console(const char *str, size_t len) +{ + /* convert utf-8 to utf-16, write directly to console */ + int wlen = MultiByteToWideChar(CP_UTF8, 0, str, len, NULL, 0); + wchar_t *wbuf = (wchar_t *) alloca(wlen * sizeof(wchar_t)); + MultiByteToWideChar(CP_UTF8, 0, str, len, wbuf, wlen); + + WriteConsoleW(console, wbuf, wlen, NULL, NULL); + + /* return original (utf-8 encoded) length */ + return len; +} #define FOREGROUND_ALL (FOREGROUND_RED | FOREGROUND_GREEN | FOREGROUND_BLUE) #define BACKGROUND_ALL (BACKGROUND_RED | BACKGROUND_GREEN | BACKGROUND_BLUE) @@ -245,13 +259,15 @@ static int ansi_emulate(const char *str, FILE *stream) int rv = 0; const char *pos = str; + fflush(stream); + while (*pos) { pos = strstr(str, "\033["); if (pos) { size_t len = pos - str; if (len) { - size_t out_len = fwrite(str, 1, len, stream); + size_t out_len = write_console(str, len); rv += out_len; if (out_len < len) return rv; @@ -260,14 +276,12 @@ static int ansi_emulate(const char *str, FILE *stream) str = pos + 2; rv += 2; - fflush(stream); - pos = set_attr(str); rv += pos - str; str = pos; } else { - rv += strlen(str); - fputs(str, stream); + size_t len = strlen(str); + rv += write_console(str, len); return rv; } } @@ -294,7 +308,7 @@ int winansi_fputs(const char *str, FILE *stream) return EOF; } -static int winansi_vfprintf(FILE *stream, const char *format, va_list list) +int winansi_vfprintf(FILE *stream, const char *format, va_list list) { int len, rv; char small_buf[256]; From 9bf5d37b2d0413b5f67af7a228641b0355f8deae Mon Sep 17 00:00:00 2001 From: Karsten Blees Date: Sat, 31 Jul 2010 00:04:02 +0000 Subject: [PATCH 02/41] Detect console streams more reliably on Windows GetStdHandle(STD_OUTPUT_HANDLE) doesn't work for stderr if stdout is redirected. Use _get_osfhandle of the FILE* instead. _isatty() is true for all character devices (including parallel and serial ports). Check return value of GetConsoleScreenBufferInfo instead to reliably detect console handles (also don't initialize internal state from an uninitialized CONSOLE_SCREEN_BUFFER_INFO structure if the function fails). Signed-off-by: Karsten Blees Signed-off-by: Johannes Schindelin --- compat/winansi.c | 50 +++++++++++++++++++++++++----------------------- 1 file changed, 26 insertions(+), 24 deletions(-) diff --git a/compat/winansi.c b/compat/winansi.c index abe0feaa2c..c4be401a6e 100644 --- a/compat/winansi.c +++ b/compat/winansi.c @@ -25,27 +25,39 @@ static HANDLE console; static WORD plain_attr; static WORD attr; static int negative; +static FILE *last_stream = NULL; -static void init(void) +static int is_console(FILE *stream) { CONSOLE_SCREEN_BUFFER_INFO sbi; + HANDLE hcon; static int initialized = 0; - if (initialized) - return; - console = GetStdHandle(STD_OUTPUT_HANDLE); - if (console == INVALID_HANDLE_VALUE) - console = NULL; + /* use cached value if stream hasn't changed */ + if (stream == last_stream) + return console != NULL; - if (!console) - return; + last_stream = stream; + console = NULL; - GetConsoleScreenBufferInfo(console, &sbi); - attr = plain_attr = sbi.wAttributes; - negative = 0; + /* get OS handle of the stream */ + hcon = (HANDLE) _get_osfhandle(_fileno(stream)); + if (hcon == INVALID_HANDLE_VALUE) + return 0; - initialized = 1; + /* check if its a handle to a console output screen buffer */ + if (!GetConsoleScreenBufferInfo(hcon, &sbi)) + return 0; + + if (!initialized) { + attr = plain_attr = sbi.wAttributes; + negative = 0; + initialized = 1; + } + + console = hcon; + return 1; } static int write_console(const char *str, size_t len) @@ -292,12 +304,7 @@ int winansi_fputs(const char *str, FILE *stream) { int rv; - if (!isatty(fileno(stream))) - return fputs(str, stream); - - init(); - - if (!console) + if (!is_console(stream)) return fputs(str, stream); rv = ansi_emulate(str, stream); @@ -315,12 +322,7 @@ int winansi_vfprintf(FILE *stream, const char *format, va_list list) char *buf = small_buf; va_list cp; - if (!isatty(fileno(stream))) - goto abort; - - init(); - - if (!console) + if (!is_console(stream)) goto abort; va_copy(cp, list); From 676e3b33adcf06efdbdc60ab2caae2182acba0a5 Mon Sep 17 00:00:00 2001 From: Karsten Blees Date: Sat, 31 Jul 2010 00:04:03 +0000 Subject: [PATCH 03/41] Warn if the Windows console font doesn't support Unicode Unicode console output won't display correctly with default settings because the default console font ("Terminal") only supports the system's OEM charset. Unfortunately, this is a user specific setting, so it cannot be easily fixed by e.g. some registry tricks in the setup program. This change prints a warning on exit if console output contained non-ascii characters and the console font is supposedly not a TrueType font (which usually have decent Unicode support). Signed-off-by: Karsten Blees Signed-off-by: Johannes Schindelin --- compat/winansi.c | 56 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/compat/winansi.c b/compat/winansi.c index c4be401a6e..a5ca2d9be3 100644 --- a/compat/winansi.c +++ b/compat/winansi.c @@ -4,6 +4,8 @@ #include "../git-compat-util.h" #include +#include +#include /* Functions to be wrapped: @@ -26,6 +28,54 @@ static WORD plain_attr; static WORD attr; static int negative; static FILE *last_stream = NULL; +static int non_ascii_used = 0; + +typedef struct _CONSOLE_FONT_INFOEX { + ULONG cbSize; + DWORD nFont; + COORD dwFontSize; + UINT FontFamily; + UINT FontWeight; + WCHAR FaceName[LF_FACESIZE]; +} CONSOLE_FONT_INFOEX, *PCONSOLE_FONT_INFOEX; + +typedef BOOL (WINAPI *PGETCURRENTCONSOLEFONTEX)(HANDLE, BOOL, + PCONSOLE_FONT_INFOEX); + +static void warn_if_raster_font(void) +{ + DWORD fontFamily = 0; + PGETCURRENTCONSOLEFONTEX pGetCurrentConsoleFontEx; + + /* don't bother if output was ascii only */ + if (!non_ascii_used) + return; + + /* GetCurrentConsoleFontEx is available since Vista */ + pGetCurrentConsoleFontEx = GetProcAddress(GetModuleHandle("kernel32.dll"), + "GetCurrentConsoleFontEx"); + if (pGetCurrentConsoleFontEx) { + CONSOLE_FONT_INFOEX cfi; + cfi.cbSize = sizeof(cfi); + if (pGetCurrentConsoleFontEx(console, 0, &cfi)) + fontFamily = cfi.FontFamily; + } else { + /* pre-Vista: check default console font in registry */ + HKEY hkey; + if (ERROR_SUCCESS == RegOpenKeyExA(HKEY_CURRENT_USER, "Console", 0, + KEY_READ, &hkey)) { + DWORD size = sizeof(fontFamily); + RegQueryValueExA(hkey, "FontFamily", NULL, NULL, + (LPVOID) &fontFamily, &size); + RegCloseKey(hkey); + } + } + + if (!(fontFamily & TMPF_TRUETYPE)) + warning("Your console font probably doesn\'t support " + "Unicode. If you experience strange characters in the output, " + "consider switching to a TrueType font such as Lucida Console!"); +} static int is_console(FILE *stream) { @@ -54,6 +104,8 @@ static int is_console(FILE *stream) attr = plain_attr = sbi.wAttributes; negative = 0; initialized = 1; + /* check console font on exit */ + atexit(warn_if_raster_font); } console = hcon; @@ -69,6 +121,10 @@ static int write_console(const char *str, size_t len) WriteConsoleW(console, wbuf, wlen, NULL, NULL); + /* remember if non-ascii characters are printed */ + if (wlen != len) + non_ascii_used = 1; + /* return original (utf-8 encoded) length */ return len; } From 5fd1d8a8731bbac394cbe977d3ce971597e702aa Mon Sep 17 00:00:00 2001 From: Karsten Blees Date: Fri, 7 Jan 2011 17:34:33 +0100 Subject: [PATCH 04/41] Win32 dirent: remove unused dirent.d_ino member There are no proper inodes on Windows, so remove dirent.d_ino and #define NO_D_INO_IN_DIRENT in the Makefile (this skips e.g. an ineffective qsort in fsck.c). Signed-off-by: Karsten Blees Signed-off-by: Erik Faye-Lund --- compat/win32/dirent.h | 1 - config.mak.uname | 2 ++ 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/compat/win32/dirent.h b/compat/win32/dirent.h index 927a25ca76..b38973b051 100644 --- a/compat/win32/dirent.h +++ b/compat/win32/dirent.h @@ -9,7 +9,6 @@ typedef struct DIR DIR; #define DT_LNK 3 struct dirent { - long d_ino; /* Always zero. */ char d_name[FILENAME_MAX]; /* File name. */ union { unsigned short d_reclen; /* Always zero. */ diff --git a/config.mak.uname b/config.mak.uname index efaed94d5d..36b755e085 100644 --- a/config.mak.uname +++ b/config.mak.uname @@ -356,6 +356,7 @@ ifeq ($(uname_S),Windows) NO_POSIX_GOODIES = UnfortunatelyYes NATIVE_CRLF = YesPlease DEFAULT_HELP_FORMAT = html + NO_D_INO_IN_DIRENT = YesPlease CC = compat/vcbuild/scripts/clink.pl AR = compat/vcbuild/scripts/lib.pl @@ -508,6 +509,7 @@ ifneq (,$(findstring MINGW,$(uname_S))) NO_INET_NTOP = YesPlease NO_POSIX_GOODIES = UnfortunatelyYes DEFAULT_HELP_FORMAT = html + NO_D_INO_IN_DIRENT = YesPlease COMPAT_CFLAGS += -D__USE_MINGW_ACCESS -D_USE_32BIT_TIME_T -DNOGDI -Icompat -Icompat/win32 COMPAT_CFLAGS += -DSTRIP_EXTENSION=\".exe\" COMPAT_OBJS += compat/mingw.o compat/winansi.o \ From 8c046da76f3e2dd9c9a2762c6f9e6bb9882c9328 Mon Sep 17 00:00:00 2001 From: Karsten Blees Date: Fri, 7 Jan 2011 17:38:25 +0100 Subject: [PATCH 05/41] Win32 dirent: remove unused dirent.d_reclen member Remove the union around dirent.d_type and the unused dirent.d_reclen member (which was necessary for compatibility with the MinGW dirent runtime, which is no longer used). Signed-off-by: Karsten Blees Signed-off-by: Erik Faye-Lund --- compat/win32/dirent.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/compat/win32/dirent.h b/compat/win32/dirent.h index b38973b051..7f4e6c71d9 100644 --- a/compat/win32/dirent.h +++ b/compat/win32/dirent.h @@ -10,10 +10,7 @@ typedef struct DIR DIR; struct dirent { char d_name[FILENAME_MAX]; /* File name. */ - union { - unsigned short d_reclen; /* Always zero. */ - unsigned char d_type; /* Reimplementation adds this */ - }; + unsigned char d_type; /* file type to prevent lstat after readdir */ }; DIR *opendir(const char *dirname); From c1ecc9ef18c64e206a49b53f6406abf1e77010d8 Mon Sep 17 00:00:00 2001 From: Karsten Blees Date: Fri, 7 Jan 2011 17:43:14 +0100 Subject: [PATCH 06/41] Win32 dirent: change FILENAME_MAX to MAX_PATH FILENAME_MAX and MAX_PATH are both 260 on Windows, however, MAX_PATH is used throughout the other Win32 code in Git, and also defines the length of file name buffers in the Win32 API (e.g. WIN32_FIND_DATA.cFileName, from which we're copying the dirent data). Signed-off-by: Karsten Blees Signed-off-by: Erik Faye-Lund --- compat/win32/dirent.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/compat/win32/dirent.h b/compat/win32/dirent.h index 7f4e6c71d9..8838cd61fc 100644 --- a/compat/win32/dirent.h +++ b/compat/win32/dirent.h @@ -9,8 +9,8 @@ typedef struct DIR DIR; #define DT_LNK 3 struct dirent { - char d_name[FILENAME_MAX]; /* File name. */ unsigned char d_type; /* file type to prevent lstat after readdir */ + char d_name[MAX_PATH]; /* file name */ }; DIR *opendir(const char *dirname); From 596ca9a80610f7e8264dd224bcae970815d1d31f Mon Sep 17 00:00:00 2001 From: Karsten Blees Date: Fri, 7 Jan 2011 17:47:41 +0100 Subject: [PATCH 07/41] Win32 dirent: clarify #include directives Git-compat-util.h is two dirs up, and already includes (which is the same as "dirent.h" due to -Icompat/win32 in the Makefile). Signed-off-by: Karsten Blees Signed-off-by: Erik Faye-Lund --- compat/win32/dirent.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/compat/win32/dirent.c b/compat/win32/dirent.c index 7a0debe51b..fac7f25047 100644 --- a/compat/win32/dirent.c +++ b/compat/win32/dirent.c @@ -1,5 +1,4 @@ -#include "../git-compat-util.h" -#include "dirent.h" +#include "../../git-compat-util.h" struct DIR { struct dirent dd_dir; /* includes d_type */ From a60a66e86cf0b84e6fb74cd2600ff7ee6e6d3005 Mon Sep 17 00:00:00 2001 From: Karsten Blees Date: Fri, 7 Jan 2011 17:57:02 +0100 Subject: [PATCH 08/41] Win32 dirent: improve dirent implementation Improve the dirent implementation by removing the relics that were once necessary to plug into the now unused MinGW runtime, in preparation for Unicode file name support. Move FindFirstFile to opendir, and FindClose to closedir, with the following implications: - DIR.dd_name is no longer needed - chdir(one); opendir(relative); chdir(two); readdir() works as expected (i.e. lists one/relative instead of two/relative) - DIR.dd_handle is a valid handle for the entire lifetime of the DIR struct - thus, all checks for dd_handle == INVALID_HANDLE_VALUE and dd_handle == 0 have been removed - the special case that the directory has been fully read (which was previously explicitly tracked with dd_handle == INVALID_HANDLE_VALUE && dd_stat != 0) is now handled implicitly by the FindNextFile error handling code (if a client continues to call readdir after receiving NULL, FindNextFile will continue to fail with ERROR_NO_MORE_FILES, to the same effect) - extracting dirent data from WIN32_FIND_DATA is needed in two places, so moved to its own method - GetFileAttributes is no longer needed. The same information can be obtained from the FindFirstFile error code, which is ERROR_DIRECTORY if the name is NOT a directory (-> ENOTDIR), otherwise we can use err_win_to_posix (e.g. ERROR_PATH_NOT_FOUND -> ENOENT). The ERROR_DIRECTORY case could be fixed in err_win_to_posix, but this probably breaks other functionality. Removes the ERROR_NO_MORE_FILES check after FindFirstFile (this was fortunately a NOOP (searching for '*' always finds '.' and '..'), otherwise the subsequent code would have copied data from an uninitialized buffer). Changes malloc to git support function xmalloc, so opendir will die() if out of memory, rather than failing with ENOMEM and letting git work on incomplete directory listings (error handling in dir.c is quite sparse). Signed-off-by: Karsten Blees Signed-off-by: Erik Faye-Lund --- compat/win32/dirent.c | 111 ++++++++++++++++++++---------------------- 1 file changed, 53 insertions(+), 58 deletions(-) diff --git a/compat/win32/dirent.c b/compat/win32/dirent.c index fac7f25047..82a515c21b 100644 --- a/compat/win32/dirent.c +++ b/compat/win32/dirent.c @@ -4,92 +4,88 @@ struct DIR { struct dirent dd_dir; /* includes d_type */ HANDLE dd_handle; /* FindFirstFile handle */ int dd_stat; /* 0-based index */ - char dd_name[1]; /* extend struct */ }; +static inline void finddata2dirent(struct dirent *ent, WIN32_FIND_DATAA *fdata) +{ + /* copy file name from WIN32_FIND_DATA to dirent */ + memcpy(ent->d_name, fdata->cFileName, sizeof(ent->d_name)); + + /* Set file type, based on WIN32_FIND_DATA */ + if (fdata->dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) + ent->d_type = DT_DIR; + else + ent->d_type = DT_REG; +} + DIR *opendir(const char *name) { - DWORD attrs = GetFileAttributesA(name); + char pattern[MAX_PATH]; + WIN32_FIND_DATAA fdata; + HANDLE h; int len; - DIR *p; + DIR *dir; - /* check for valid path */ - if (attrs == INVALID_FILE_ATTRIBUTES) { - errno = ENOENT; + /* check that name is not NULL */ + if (!name) { + errno = EINVAL; return NULL; } - - /* check if it's a directory */ - if (!(attrs & FILE_ATTRIBUTE_DIRECTORY)) { - errno = ENOTDIR; - return NULL; - } - /* check that the pattern won't be too long for FindFirstFileA */ len = strlen(name); - if (is_dir_sep(name[len - 1])) - len--; if (len + 2 >= MAX_PATH) { errno = ENAMETOOLONG; return NULL; } + /* copy name to temp buffer */ + memcpy(pattern, name, len + 1); - p = malloc(sizeof(DIR) + len + 2); - if (!p) + /* append optional '/' and wildcard '*' */ + if (len && !is_dir_sep(pattern[len - 1])) + pattern[len++] = '/'; + pattern[len++] = '*'; + pattern[len] = 0; + + /* open find handle */ + h = FindFirstFileA(pattern, &fdata); + if (h == INVALID_HANDLE_VALUE) { + DWORD err = GetLastError(); + errno = (err == ERROR_DIRECTORY) ? ENOTDIR : err_win_to_posix(err); return NULL; + } - memset(p, 0, sizeof(DIR) + len + 2); - strcpy(p->dd_name, name); - p->dd_name[len] = '/'; - p->dd_name[len+1] = '*'; - - p->dd_handle = INVALID_HANDLE_VALUE; - return p; + /* initialize DIR structure and copy first dir entry */ + dir = xmalloc(sizeof(DIR)); + dir->dd_handle = h; + dir->dd_stat = 0; + finddata2dirent(&dir->dd_dir, &fdata); + return dir; } struct dirent *readdir(DIR *dir) { - WIN32_FIND_DATAA buf; - HANDLE handle; - - if (!dir || !dir->dd_handle) { + if (!dir) { errno = EBADF; /* No set_errno for mingw */ return NULL; } - if (dir->dd_handle == INVALID_HANDLE_VALUE && dir->dd_stat == 0) { - DWORD lasterr; - handle = FindFirstFileA(dir->dd_name, &buf); - lasterr = GetLastError(); - dir->dd_handle = handle; - if (handle == INVALID_HANDLE_VALUE && (lasterr != ERROR_NO_MORE_FILES)) { - errno = err_win_to_posix(lasterr); + /* if first entry, dirent has already been set up by opendir */ + if (dir->dd_stat) { + /* get next entry and convert from WIN32_FIND_DATA to dirent */ + WIN32_FIND_DATAA fdata; + if (FindNextFileA(dir->dd_handle, &fdata)) { + finddata2dirent(&dir->dd_dir, &fdata); + } else { + DWORD lasterr = GetLastError(); + /* POSIX says you shouldn't set errno when readdir can't + find any more files; so, if another error we leave it set. */ + if (lasterr != ERROR_NO_MORE_FILES) + errno = err_win_to_posix(lasterr); return NULL; } - } else if (dir->dd_handle == INVALID_HANDLE_VALUE) { - return NULL; - } else if (!FindNextFileA(dir->dd_handle, &buf)) { - DWORD lasterr = GetLastError(); - FindClose(dir->dd_handle); - dir->dd_handle = INVALID_HANDLE_VALUE; - /* POSIX says you shouldn't set errno when readdir can't - find any more files; so, if another error we leave it set. */ - if (lasterr != ERROR_NO_MORE_FILES) - errno = err_win_to_posix(lasterr); - return NULL; } - /* We get here if `buf' contains valid data. */ - strcpy(dir->dd_dir.d_name, buf.cFileName); ++dir->dd_stat; - - /* Set file type, based on WIN32_FIND_DATA */ - dir->dd_dir.d_type = 0; - if (buf.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) - dir->dd_dir.d_type |= DT_DIR; - else - dir->dd_dir.d_type |= DT_REG; - return &dir->dd_dir; } @@ -100,8 +96,7 @@ int closedir(DIR *dir) return -1; } - if (dir->dd_handle != INVALID_HANDLE_VALUE) - FindClose(dir->dd_handle); + FindClose(dir->dd_handle); free(dir); return 0; } From 18d41f75d5a8b642f9ff8c9d160381c5f45aafb2 Mon Sep 17 00:00:00 2001 From: Karsten Blees Date: Fri, 7 Jan 2011 18:04:16 +0100 Subject: [PATCH 09/41] Win32: fix potential multi-threading issue ...by removing a static buffer in do_stat_internal. Signed-off-by: Karsten Blees Signed-off-by: Erik Faye-Lund --- compat/mingw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/compat/mingw.c b/compat/mingw.c index e9892f8ee4..7692aa70cf 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -441,7 +441,7 @@ static int do_lstat(int follow, const char *file_name, struct stat *buf) static int do_stat_internal(int follow, const char *file_name, struct stat *buf) { int namelen; - static char alt_name[PATH_MAX]; + char alt_name[PATH_MAX]; if (!do_lstat(follow, file_name, buf)) return 0; From 9206e7fd3fa3a9296359cf1a8aa25a0a1c54cb2c Mon Sep 17 00:00:00 2001 From: Karsten Blees Date: Fri, 7 Jan 2011 19:47:23 +0100 Subject: [PATCH 10/41] Win32: move main macro to a function The code in the MinGW main macro is getting more and more complex, move to a separate initialization function for readabiliy and extensibility. Signed-off-by: Karsten Blees Signed-off-by: Erik Faye-Lund --- compat/mingw.c | 15 +++++++++++++++ compat/mingw.h | 16 +++++----------- http-fetch.c | 3 ++- remote-curl.c | 3 ++- 4 files changed, 24 insertions(+), 13 deletions(-) diff --git a/compat/mingw.c b/compat/mingw.c index 7692aa70cf..b88956a758 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -1823,3 +1823,18 @@ pid_t waitpid(pid_t pid, int *status, int options) errno = EINVAL; return -1; } + +void mingw_startup() +{ + /* copy executable name to argv[0] */ + __argv[0] = xstrdup(_pgmptr); + + /* initialize critical section for waitpid pinfo_t list */ + InitializeCriticalSection(&pinfo_cs); + + /* set up default file mode and file modes for stdin/out/err */ + _fmode = _O_BINARY; + _setmode(_fileno(stdin), _O_BINARY); + _setmode(_fileno(stdout), _O_BINARY); + _setmode(_fileno(stderr), _O_BINARY); +} diff --git a/compat/mingw.h b/compat/mingw.h index 95e289b1d7..dc9b0e5d8d 100644 --- a/compat/mingw.h +++ b/compat/mingw.h @@ -363,22 +363,16 @@ void free_environ(char **env); extern CRITICAL_SECTION pinfo_cs; /* - * A replacement of main() that ensures that argv[0] has a path - * and that default fmode and std(in|out|err) are in binary mode + * A replacement of main() that adds win32 specific initialization. */ +void mingw_startup(); #define main(c,v) dummy_decl_mingw_main(); \ static int mingw_main(c,v); \ -int main(int argc, char **argv) \ +int main(c,v) \ { \ - extern CRITICAL_SECTION pinfo_cs; \ - _fmode = _O_BINARY; \ - _setmode(_fileno(stdin), _O_BINARY); \ - _setmode(_fileno(stdout), _O_BINARY); \ - _setmode(_fileno(stderr), _O_BINARY); \ - argv[0] = xstrdup(_pgmptr); \ - InitializeCriticalSection(&pinfo_cs); \ - return mingw_main(argc, argv); \ + mingw_startup(); \ + return mingw_main(__argc, __argv); \ } \ static int mingw_main(c,v) diff --git a/http-fetch.c b/http-fetch.c index ba3ea10670..51b26d7af8 100644 --- a/http-fetch.c +++ b/http-fetch.c @@ -6,11 +6,12 @@ static const char http_fetch_usage[] = "git http-fetch " "[-c] [-t] [-a] [-v] [--recover] [-w ref] [--stdin] commit-id url"; -int main(int argc, const char **argv) +int main(int argc, char **av) { struct walker *walker; int commits_on_stdin = 0; int commits; + const char **argv = (const char **)av; const char **write_ref = NULL; char **commit_id; char *url = NULL; diff --git a/remote-curl.c b/remote-curl.c index 10cb0114ea..0a2c718349 100644 --- a/remote-curl.c +++ b/remote-curl.c @@ -941,9 +941,10 @@ static void parse_push(struct strbuf *buf) free(specs); } -int main(int argc, const char **argv) +int main(int argc, char **av) { struct strbuf buf = STRBUF_INIT; + const char **argv = (const char **)av; int nongit; git_extract_argv0_path(argv[0]); From f70b59cedb6360a93d81678f8d7c528b1ff36910 Mon Sep 17 00:00:00 2001 From: Karsten Blees Date: Fri, 7 Jan 2011 19:52:20 +0100 Subject: [PATCH 11/41] MinGW: disable CRT command line globbing MingwRT listens to _CRT_glob to decide if __getmainargs should perform globbing, with the default being that it should. Unfortunately, __getmainargs globbing is sub-par; for instance patterns like "*.c" will only match c-sources in the current directory. Disable __getmainargs' command line wildcard expansion, so these patterns will be left untouched, and handled by Git's superior built-in globbing instead. MSVC defaults to no globbing, so we don't need to do anything in that case. This fixes t5505 and t7810. Signed-off-by: Karsten Blees Signed-off-by: Erik Faye-Lund --- compat/mingw.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/compat/mingw.c b/compat/mingw.c index b88956a758..df7184d5e0 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -1824,6 +1824,12 @@ pid_t waitpid(pid_t pid, int *status, int options) return -1; } +/* + * Disable MSVCRT command line wildcard expansion (__getmainargs called from + * mingw startup code, see init.c in mingw runtime). + */ +int _CRT_glob = 0; + void mingw_startup() { /* copy executable name to argv[0] */ From bbfb6d92b4cbcde083e09816f77618eaadf66e6d Mon Sep 17 00:00:00 2001 From: Karsten Blees Date: Thu, 5 Aug 2010 22:45:33 +0000 Subject: [PATCH 12/41] Unicode console: fix font warning on Vista and Win7 GetCurrentConsoleFontEx in an atexit routine doesn't work because git closes stdout before exit (which also closes the console handle). Check the console font when we first encounter a non-ascii character and only schedule the warning message to be printed at exit (warnings go to stderr, which is not closed by git). Signed-off-by: Karsten Blees Signed-off-by: Erik Faye-Lund --- compat/winansi.c | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/compat/winansi.c b/compat/winansi.c index a5ca2d9be3..ab38ed95ab 100644 --- a/compat/winansi.c +++ b/compat/winansi.c @@ -28,7 +28,6 @@ static WORD plain_attr; static WORD attr; static int negative; static FILE *last_stream = NULL; -static int non_ascii_used = 0; typedef struct _CONSOLE_FONT_INFOEX { ULONG cbSize; @@ -42,14 +41,23 @@ typedef struct _CONSOLE_FONT_INFOEX { typedef BOOL (WINAPI *PGETCURRENTCONSOLEFONTEX)(HANDLE, BOOL, PCONSOLE_FONT_INFOEX); -static void warn_if_raster_font(void) +static void print_font_warning(void) { + warning("Your console font probably doesn\'t support Unicode. If " + "you experience strange characters in the output, consider " + "switching to a TrueType font such as Lucida Console!"); +} + +static void check_truetype_font(void) +{ + static int truetype_font_checked; DWORD fontFamily = 0; PGETCURRENTCONSOLEFONTEX pGetCurrentConsoleFontEx; - /* don't bother if output was ascii only */ - if (!non_ascii_used) + /* don't do this twice */ + if (truetype_font_checked) return; + truetype_font_checked = 1; /* GetCurrentConsoleFontEx is available since Vista */ pGetCurrentConsoleFontEx = GetProcAddress(GetModuleHandle("kernel32.dll"), @@ -72,9 +80,7 @@ static void warn_if_raster_font(void) } if (!(fontFamily & TMPF_TRUETYPE)) - warning("Your console font probably doesn\'t support " - "Unicode. If you experience strange characters in the output, " - "consider switching to a TrueType font such as Lucida Console!"); + atexit(print_font_warning); } static int is_console(FILE *stream) @@ -104,8 +110,6 @@ static int is_console(FILE *stream) attr = plain_attr = sbi.wAttributes; negative = 0; initialized = 1; - /* check console font on exit */ - atexit(warn_if_raster_font); } console = hcon; @@ -121,9 +125,12 @@ static int write_console(const char *str, size_t len) WriteConsoleW(console, wbuf, wlen, NULL, NULL); - /* remember if non-ascii characters are printed */ + /* + * if non-ascii characters are printed, check that the current console + * font supports this + */ if (wlen != len) - non_ascii_used = 1; + check_truetype_font(); /* return original (utf-8 encoded) length */ return len; From e788158f1bbebc7de014c0092fa94a047d14cddc Mon Sep 17 00:00:00 2001 From: Karsten Blees Date: Sat, 14 Jan 2012 19:06:41 +0100 Subject: [PATCH 13/41] Revert "Windows: teach getenv to do a case-sensitive search" This reverts commit df599e9612788b728ce43a03159b85f1fe624d6a. As of 5e9637c6 "i18n: add infrastructure for translating Git with gettext", eval_gettext uses MinGW envsubst.exe instead of git-sh-i18n--envsubst.exe for variable substitution. This breaks git-submodule.sh messages and tests, as envsubst.exe doesn't support case-sensitive environment lookup (the same is true for almost everything on Windows, including MSys and Cygwin tools). 30a615ac "Windows/i18n: rename $path to prevent clashes with $PATH" renames the conflicting variable in git-submodule.sh, so that it works on Windows (i.e. with case-insensitive environment, regardless of the toolset). Revert to the documented behaviour of case-insensitive environment on Windows. Signed-off-by: Karsten Blees --- compat/mingw.c | 23 +++-------------------- 1 file changed, 3 insertions(+), 20 deletions(-) diff --git a/compat/mingw.c b/compat/mingw.c index df7184d5e0..0ff893858c 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -1155,31 +1155,14 @@ char **make_augmented_environ(const char *const *vars) } #undef getenv - -/* - * The system's getenv looks up the name in a case-insensitive manner. - * This version tries a case-sensitive lookup and falls back to - * case-insensitive if nothing was found. This is necessary because, - * as a prominent example, CMD sets 'Path', but not 'PATH'. - * Warning: not thread-safe. - */ -static char *getenv_cs(const char *name) -{ - size_t len = strlen(name); - int i = lookup_env(environ, name, len); - if (i >= 0) - return environ[i] + len + 1; /* skip past name and '=' */ - return getenv(name); -} - char *mingw_getenv(const char *name) { - char *result = getenv_cs(name); + char *result = getenv(name); if (!result && !strcmp(name, "TMPDIR")) { /* on Windows it is TMP and TEMP */ - result = getenv_cs("TMP"); + result = getenv("TMP"); if (!result) - result = getenv_cs("TEMP"); + result = getenv("TEMP"); } return result; } From a5485e2c33d69f4461408c18cb575741d6c421f1 Mon Sep 17 00:00:00 2001 From: Karsten Blees Date: Sun, 15 Jan 2012 00:19:31 +0100 Subject: [PATCH 14/41] Revert "mingw.c: move definition of mingw_getenv down" This reverts commit 06bc4b796ad69ba93f0a8c451368602e0553c2d3. Signed-off-by: Karsten Blees --- compat/mingw.c | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/compat/mingw.c b/compat/mingw.c index 0ff893858c..5317071169 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -178,7 +178,7 @@ static int ask_yes_no_if_possible(const char *format, ...) vsnprintf(question, sizeof(question), format, args); va_end(args); - if ((retry_hook[0] = mingw_getenv("GIT_ASK_YESNO"))) { + if ((retry_hook[0] = getenv("GIT_ASK_YESNO"))) { retry_hook[1] = question; return !run_command_v_opt(retry_hook, 0); } @@ -630,6 +630,19 @@ char *mingw_getcwd(char *pointer, int len) return ret; } +#undef getenv +char *mingw_getenv(const char *name) +{ + char *result = getenv(name); + if (!result && !strcmp(name, "TMPDIR")) { + /* on Windows it is TMP and TEMP */ + result = getenv("TMP"); + if (!result) + result = getenv("TEMP"); + } + return result; +} + /* * See http://msdn2.microsoft.com/en-us/library/17w5ykft(vs.71).aspx * (Parsing C++ Command-Line Arguments) @@ -729,7 +742,7 @@ static const char *parse_interpreter(const char *cmd) */ static char **get_path_split(void) { - char *p, **path, *envpath = mingw_getenv("PATH"); + char *p, **path, *envpath = getenv("PATH"); int i, n = 0; if (!envpath || !*envpath) @@ -1154,19 +1167,6 @@ char **make_augmented_environ(const char *const *vars) return env; } -#undef getenv -char *mingw_getenv(const char *name) -{ - char *result = getenv(name); - if (!result && !strcmp(name, "TMPDIR")) { - /* on Windows it is TMP and TEMP */ - result = getenv("TMP"); - if (!result) - result = getenv("TEMP"); - } - return result; -} - /* * Note, this isn't a complete replacement for getaddrinfo. It assumes * that service contains a numerical port, or that it is null. It From f74a9f98039392d4e7f951420f442223532ee528 Mon Sep 17 00:00:00 2001 From: Karsten Blees Date: Sat, 31 Jul 2010 00:04:00 +0000 Subject: [PATCH 15/41] Enable color output in Windows cmd.exe Git requires the TERM environment variable to be set for all color* settings. Simulate the TERM variable if it is not set (default on Windows). Signed-off-by: Karsten Blees Signed-off-by: Johannes Schindelin --- compat/mingw.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/compat/mingw.c b/compat/mingw.c index 5317071169..27a81f769c 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -640,6 +640,10 @@ char *mingw_getenv(const char *name) if (!result) result = getenv("TEMP"); } + else if (!result && !strcmp(name, "TERM")) { + /* simulate TERM to enable auto-color (see color.c) */ + result = "winansi"; + } return result; } From db425fcbbfc7453a6f47604dc87a5bc3ba582f1e Mon Sep 17 00:00:00 2001 From: Karsten Blees Date: Thu, 6 Oct 2011 17:40:56 +0000 Subject: [PATCH 16/41] MSVC: fix winansi.c compile errors Some constants (such as LF_FACESIZE) are undefined with -DNOGDI (set in the Makefile), and CONSOLE_FONT_INFOEX is available in MSVC, but not in MinGW. Cast FARPROC to PGETCURRENTCONSOLEFONTEX to suppress MSVC compiler warning. Signed-off-by: Karsten Blees Signed-off-by: Johannes Schindelin --- compat/winansi.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/compat/winansi.c b/compat/winansi.c index ab38ed95ab..bec6713b74 100644 --- a/compat/winansi.c +++ b/compat/winansi.c @@ -2,6 +2,7 @@ * Copyright 2008 Peter Harris */ +#undef NOGDI #include "../git-compat-util.h" #include #include @@ -29,6 +30,7 @@ static WORD attr; static int negative; static FILE *last_stream = NULL; +#ifdef __MINGW32__ typedef struct _CONSOLE_FONT_INFOEX { ULONG cbSize; DWORD nFont; @@ -37,6 +39,7 @@ typedef struct _CONSOLE_FONT_INFOEX { UINT FontWeight; WCHAR FaceName[LF_FACESIZE]; } CONSOLE_FONT_INFOEX, *PCONSOLE_FONT_INFOEX; +#endif typedef BOOL (WINAPI *PGETCURRENTCONSOLEFONTEX)(HANDLE, BOOL, PCONSOLE_FONT_INFOEX); @@ -60,8 +63,8 @@ static void check_truetype_font(void) truetype_font_checked = 1; /* GetCurrentConsoleFontEx is available since Vista */ - pGetCurrentConsoleFontEx = GetProcAddress(GetModuleHandle("kernel32.dll"), - "GetCurrentConsoleFontEx"); + pGetCurrentConsoleFontEx = (PGETCURRENTCONSOLEFONTEX) GetProcAddress( + GetModuleHandle("kernel32.dll"), "GetCurrentConsoleFontEx"); if (pGetCurrentConsoleFontEx) { CONSOLE_FONT_INFOEX cfi; cfi.cbSize = sizeof(cfi); From 6b5e42eed9b12d268ad93583c1ccb2807d0e4536 Mon Sep 17 00:00:00 2001 From: Karsten Blees Date: Sat, 14 Jan 2012 22:24:19 +0100 Subject: [PATCH 17/41] Win32: Thread-safe windows console output Winansi.c has many static variables that are accessed and modified from the [v][f]printf / fputs functions overridden in the file. This may cause multi threaded git commands that print to the console to produce corrupted output or even crash. Additionally, winansi.c doesn't override all functions that can be used to print to the console (e.g. fwrite, write, fputc are missing), so that ANSI escapes don't work properly for some git commands (e.g. git-grep). Instead of doing ANSI emulation in just a few wrapped functions on top of the IO API, let's plug into the IO system and take advantage of the thread safety inherent to the IO system. Redirect stdout and stderr to a pipe if they point to the console. A background thread reads from the pipe, handles ANSI escape sequences and UTF-8 to UTF-16 conversion, then writes to the console. The pipe-based stdout and stderr replacements must be set to unbuffered, as MSVCRT doesn't support line buffering and fully buffered streams are inappropriate for console output. Due to the byte-oriented pipe, ANSI escape sequences and multi-byte UTF-8 sequences can no longer be expected to arrive in one piece. Replace the string-based ansi_emulate() with a simple stateful parser (this also fixes colored diff hunk headers, which were broken as of commit 2efcc977). Override isatty to return true for the pipes redirecting to the console. Exec/spawn obtain the original console handle to pass to the next process via winansi_get_osfhandle(). All other overrides are gone, the default stdio implementations work as expected with the piped stdout/stderr descriptors. Global variables are either initialized on startup (single threaded) or exclusively modified by the background thread. Threads communicate through the pipe, no further synchronization is necessary. The background thread is terminated by disonnecting the pipe after flushing the stdio and pipe buffers. This doesn't work for anonymous pipes (created via CreatePipe), as DisconnectNamedPipe only works on the read end, which discards remaining data. Thus we have to setup the pipe manually, with the write end beeing the server (opened with CreateNamedPipe) and the read end the client (opened with CreateFile). Limitations: doesn't track reopened or duped file descriptors, i.e.: - fdopen(1/2) returns fully buffered streams - dup(1/2), dup2(1/2) returns normal pipe descriptors (i.e. isatty() = false, winansi_get_osfhandle won't return the original console handle) Currently, only the git-format-patch command uses xfdopen(xdup(1)) (see "realstdout" in builtin/log.c), but works well with these limitations. Many thanks to Atsushi Nakagawa for suggesting and reviewing the thread-exit-mechanism. Signed-off-by: Karsten Blees --- compat/mingw.c | 9 +- compat/mingw.h | 12 +- compat/winansi.c | 420 ++++++++++++++++++++++++++++++----------------- 3 files changed, 282 insertions(+), 159 deletions(-) diff --git a/compat/mingw.c b/compat/mingw.c index 27a81f769c..1577335fb1 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -882,9 +882,9 @@ static pid_t mingw_spawnve_fd(const char *cmd, const char **argv, char **env, memset(&si, 0, sizeof(si)); si.cb = sizeof(si); si.dwFlags = STARTF_USESTDHANDLES; - si.hStdInput = (HANDLE) _get_osfhandle(fhin); - si.hStdOutput = (HANDLE) _get_osfhandle(fhout); - si.hStdError = (HANDLE) _get_osfhandle(fherr); + si.hStdInput = winansi_get_osfhandle(fhin); + si.hStdOutput = winansi_get_osfhandle(fhout); + si.hStdError = winansi_get_osfhandle(fherr); /* concatenate argv, quoting args as we go */ strbuf_init(&args, 0); @@ -1830,4 +1830,7 @@ void mingw_startup() _setmode(_fileno(stdin), _O_BINARY); _setmode(_fileno(stdout), _O_BINARY); _setmode(_fileno(stderr), _O_BINARY); + + /* initialize Unicode console */ + winansi_init(); } diff --git a/compat/mingw.h b/compat/mingw.h index dc9b0e5d8d..dfa5780397 100644 --- a/compat/mingw.h +++ b/compat/mingw.h @@ -317,14 +317,10 @@ int mingw_raise(int sig); * ANSI emulation wrappers */ -int winansi_fputs(const char *str, FILE *stream); -int winansi_printf(const char *format, ...) __attribute__((format (printf, 1, 2))); -int winansi_fprintf(FILE *stream, const char *format, ...) __attribute__((format (printf, 2, 3))); -int winansi_vfprintf(FILE *stream, const char *format, va_list list); -#define fputs winansi_fputs -#define printf(...) winansi_printf(__VA_ARGS__) -#define fprintf(...) winansi_fprintf(__VA_ARGS__) -#define vfprintf winansi_vfprintf +void winansi_init(void); +int winansi_isatty(int fd); +HANDLE winansi_get_osfhandle(int fd); +#define isatty winansi_isatty /* * git specific compatibility diff --git a/compat/winansi.c b/compat/winansi.c index bec6713b74..a3e4d88295 100644 --- a/compat/winansi.c +++ b/compat/winansi.c @@ -4,18 +4,13 @@ #undef NOGDI #include "../git-compat-util.h" -#include #include #include /* Functions to be wrapped: */ -#undef printf -#undef fprintf -#undef fputs -#undef vfprintf -/* TODO: write */ +#undef isatty /* ANSI codes used by git: m, K @@ -28,7 +23,10 @@ static HANDLE console; static WORD plain_attr; static WORD attr; static int negative; -static FILE *last_stream = NULL; +static int non_ascii_used = 0; +static HANDLE hthread, hread, hwrite; +static HANDLE hwrite1 = INVALID_HANDLE_VALUE, hwrite2 = INVALID_HANDLE_VALUE; +static HANDLE hconsole1, hconsole2; #ifdef __MINGW32__ typedef struct _CONSOLE_FONT_INFOEX { @@ -44,27 +42,19 @@ typedef struct _CONSOLE_FONT_INFOEX { typedef BOOL (WINAPI *PGETCURRENTCONSOLEFONTEX)(HANDLE, BOOL, PCONSOLE_FONT_INFOEX); -static void print_font_warning(void) +static void warn_if_raster_font(void) { - warning("Your console font probably doesn\'t support Unicode. If " - "you experience strange characters in the output, consider " - "switching to a TrueType font such as Lucida Console!"); -} - -static void check_truetype_font(void) -{ - static int truetype_font_checked; DWORD fontFamily = 0; PGETCURRENTCONSOLEFONTEX pGetCurrentConsoleFontEx; - /* don't do this twice */ - if (truetype_font_checked) + /* don't bother if output was ascii only */ + if (!non_ascii_used) return; - truetype_font_checked = 1; /* GetCurrentConsoleFontEx is available since Vista */ pGetCurrentConsoleFontEx = (PGETCURRENTCONSOLEFONTEX) GetProcAddress( - GetModuleHandle("kernel32.dll"), "GetCurrentConsoleFontEx"); + GetModuleHandle("kernel32.dll"), + "GetCurrentConsoleFontEx"); if (pGetCurrentConsoleFontEx) { CONSOLE_FONT_INFOEX cfi; cfi.cbSize = sizeof(cfi); @@ -73,8 +63,8 @@ static void check_truetype_font(void) } else { /* pre-Vista: check default console font in registry */ HKEY hkey; - if (ERROR_SUCCESS == RegOpenKeyExA(HKEY_CURRENT_USER, "Console", 0, - KEY_READ, &hkey)) { + if (ERROR_SUCCESS == RegOpenKeyExA(HKEY_CURRENT_USER, "Console", + 0, KEY_READ, &hkey)) { DWORD size = sizeof(fontFamily); RegQueryValueExA(hkey, "FontFamily", NULL, NULL, (LPVOID) &fontFamily, &size); @@ -82,61 +72,63 @@ static void check_truetype_font(void) } } - if (!(fontFamily & TMPF_TRUETYPE)) - atexit(print_font_warning); + if (!(fontFamily & TMPF_TRUETYPE)) { + const wchar_t *msg = L"\nWarning: Your console font probably " + L"doesn\'t support Unicode. If you experience strange " + L"characters in the output, consider switching to a " + L"TrueType font such as Lucida Console!\n"; + WriteConsoleW(console, msg, wcslen(msg), NULL, NULL); + } } -static int is_console(FILE *stream) +static int is_console(int fd) { CONSOLE_SCREEN_BUFFER_INFO sbi; HANDLE hcon; static int initialized = 0; - /* use cached value if stream hasn't changed */ - if (stream == last_stream) - return console != NULL; - - last_stream = stream; - console = NULL; - - /* get OS handle of the stream */ - hcon = (HANDLE) _get_osfhandle(_fileno(stream)); + /* get OS handle of the file descriptor */ + hcon = (HANDLE) _get_osfhandle(fd); if (hcon == INVALID_HANDLE_VALUE) return 0; + /* check if its a device (i.e. console, printer, serial port) */ + if (GetFileType(hcon) != FILE_TYPE_CHAR) + return 0; + /* check if its a handle to a console output screen buffer */ if (!GetConsoleScreenBufferInfo(hcon, &sbi)) return 0; + /* initialize attributes */ if (!initialized) { attr = plain_attr = sbi.wAttributes; negative = 0; initialized = 1; } - console = hcon; return 1; } -static int write_console(const char *str, size_t len) -{ - /* convert utf-8 to utf-16, write directly to console */ - int wlen = MultiByteToWideChar(CP_UTF8, 0, str, len, NULL, 0); - wchar_t *wbuf = (wchar_t *) alloca(wlen * sizeof(wchar_t)); - MultiByteToWideChar(CP_UTF8, 0, str, len, wbuf, wlen); +#define BUFFER_SIZE 4096 +#define MAX_PARAMS 16 +static void write_console(unsigned char *str, size_t len) +{ + /* only called from console_thread, so a static buffer will do */ + static wchar_t wbuf[2 * BUFFER_SIZE + 1]; + + /* convert utf-8 to utf-16 */ + int wlen = MultiByteToWideChar(CP_UTF8, 0, (char*) str, len, wbuf, + ARRAY_SIZE(wbuf)); + + /* write directly to console */ WriteConsoleW(console, wbuf, wlen, NULL, NULL); - /* - * if non-ascii characters are printed, check that the current console - * font supports this - */ + /* remember if non-ascii characters are printed */ if (wlen != len) - check_truetype_font(); - - /* return original (utf-8 encoded) length */ - return len; + non_ascii_used = 1; } #define FOREGROUND_ALL (FOREGROUND_RED | FOREGROUND_GREEN | FOREGROUND_BLUE) @@ -182,18 +174,13 @@ static void erase_in_line(void) &dummy); } - -static const char *set_attr(const char *str) +static void set_attr(char func, const int *params, int paramlen) { - const char *func; - size_t len = strspn(str, "0123456789;"); - func = str + len; - - switch (*func) { + int i; + switch (func) { case 'm': - do { - long val = strtol(str, (char **)&str, 10); - switch (val) { + for (i = 0; i < paramlen; i++) { + switch (params[i]) { case 0: /* reset */ attr = plain_attr; negative = 0; @@ -316,9 +303,7 @@ static const char *set_attr(const char *str) /* Unsupported code */ break; } - str++; - } while (*(str-1) == ';'); - + } set_console_attr(); break; case 'K': @@ -328,112 +313,251 @@ static const char *set_attr(const char *str) /* Unsupported code */ break; } - - return func + 1; } -static int ansi_emulate(const char *str, FILE *stream) +enum { + TEXT = 0, ESCAPE = 033, BRACKET = '[' +}; + +static DWORD WINAPI console_thread(LPVOID unused) { - int rv = 0; - const char *pos = str; + unsigned char buffer[BUFFER_SIZE]; + DWORD bytes; + int start, end = 0, c, parampos = 0, state = TEXT; + int params[MAX_PARAMS]; - fflush(stream); + while (1) { + /* read next chunk of bytes from the pipe */ + if (!ReadFile(hread, buffer + end, BUFFER_SIZE - end, &bytes, + NULL)) { + /* exit if pipe has been closed or disconnected */ + if (GetLastError() == ERROR_PIPE_NOT_CONNECTED || + GetLastError() == ERROR_BROKEN_PIPE) + break; + /* ignore other errors */ + continue; + } - while (*pos) { - pos = strstr(str, "\033["); - if (pos) { - size_t len = pos - str; + /* scan the bytes and handle ANSI control codes */ + bytes += end; + start = end = 0; + while (end < bytes) { + c = buffer[end++]; + switch (state) { + case TEXT: + if (c == ESCAPE) { + /* print text seen so far */ + if (end - 1 > start) + write_console(buffer + start, + end - 1 - start); - if (len) { - size_t out_len = write_console(str, len); - rv += out_len; - if (out_len < len) - return rv; + /* then start parsing escape sequence */ + start = end - 1; + memset(params, 0, sizeof(params)); + parampos = 0; + state = ESCAPE; + } + break; + + case ESCAPE: + /* continue if "\033[", otherwise bail out */ + state = (c == BRACKET) ? BRACKET : TEXT; + break; + + case BRACKET: + /* parse [0-9;]* into array of parameters */ + if (c >= '0' && c <= '9') { + params[parampos] *= 10; + params[parampos] += c - '0'; + } else if (c == ';') { + /* + * next parameter, bail out if out of + * bounds + */ + parampos++; + if (parampos >= MAX_PARAMS) + state = TEXT; + } else { + /* + * end of escape sequence, change + * console attributes + */ + set_attr(c, params, parampos + 1); + start = end; + state = TEXT; + } + break; + } + } + + /* print remaining text unless parsing an escape sequence */ + if (state == TEXT && end > start) { + /* check for incomplete UTF-8 sequences and fix end */ + if (buffer[end - 1] >= 0x80) { + if (buffer[end -1] >= 0xc0) + end--; + else if (end - 1 > start && + buffer[end - 2] >= 0xe0) + end -= 2; + else if (end - 2 > start && + buffer[end - 3] >= 0xf0) + end -= 3; } - str = pos + 2; - rv += 2; + /* print remaining complete UTF-8 sequences */ + if (end > start) + write_console(buffer + start, end - start); - pos = set_attr(str); - rv += pos - str; - str = pos; + /* move remaining bytes to the front */ + if (end < bytes) + memmove(buffer, buffer + end, bytes - end); + end = bytes - end; } else { - size_t len = strlen(str); - rv += write_console(str, len); - return rv; + /* all data has been consumed, mark buffer empty */ + end = 0; } } - return rv; + + /* check if the console font supports unicode */ + warn_if_raster_font(); + + CloseHandle(hread); + return 0; } -int winansi_fputs(const char *str, FILE *stream) +static void winansi_exit(void) { - int rv; + /* flush all streams */ + _flushall(); - if (!is_console(stream)) - return fputs(str, stream); + /* signal console thread to exit */ + FlushFileBuffers(hwrite); + DisconnectNamedPipe(hwrite); - rv = ansi_emulate(str, stream); + /* wait for console thread to copy remaining data */ + WaitForSingleObject(hthread, INFINITE); - if (rv >= 0) - return 0; + /* cleanup handles... */ + if (hwrite1 != INVALID_HANDLE_VALUE) + CloseHandle(hwrite1); + if (hwrite2 != INVALID_HANDLE_VALUE) + CloseHandle(hwrite2); + CloseHandle(hwrite); + CloseHandle(hthread); +} + +static void die_lasterr(const char *fmt, ...) +{ + va_list params; + va_start(params, fmt); + errno = err_win_to_posix(GetLastError()); + die_errno(fmt, params); + va_end(params); +} + +static HANDLE duplicate_handle(HANDLE hnd) +{ + HANDLE hresult, hproc = GetCurrentProcess(); + if (!DuplicateHandle(hproc, hnd, hproc, &hresult, 0, TRUE, + DUPLICATE_SAME_ACCESS)) + die_lasterr("DuplicateHandle(%li) failed", (long) hnd); + return hresult; +} + +static HANDLE redirect_console(FILE *stream, HANDLE *phcon, int new_fd) +{ + /* get original console handle */ + int fd = _fileno(stream); + HANDLE hcon = (HANDLE) _get_osfhandle(fd); + if (hcon == INVALID_HANDLE_VALUE) + die_errno("_get_osfhandle(%i) failed", fd); + + /* save a copy to phcon and console (used by the background thread) */ + console = *phcon = duplicate_handle(hcon); + + /* duplicate new_fd over fd (closes fd and associated handle (hcon)) */ + if (_dup2(new_fd, fd)) + die_errno("_dup2(%i, %i) failed", new_fd, fd); + + /* no buffering, or stdout / stderr will be out of sync */ + setbuf(stream, NULL); + return (HANDLE) _get_osfhandle(fd); +} + +void winansi_init(void) +{ + int con1, con2, hwrite_fd; + char name[32]; + + /* check if either stdout or stderr is a console output screen buffer */ + con1 = is_console(1); + con2 = is_console(2); + if (!con1 && !con2) + return; + + /* create a named pipe to communicate with the console thread */ + sprintf(name, "\\\\.\\pipe\\winansi%lu", GetCurrentProcessId()); + hwrite = CreateNamedPipe(name, PIPE_ACCESS_OUTBOUND, + PIPE_TYPE_BYTE | PIPE_WAIT, 1, BUFFER_SIZE, 0, 0, NULL); + if (hwrite == INVALID_HANDLE_VALUE) + die_lasterr("CreateNamedPipe failed"); + + hread = CreateFile(name, GENERIC_READ, 0, NULL, OPEN_EXISTING, 0, NULL); + if (hread == INVALID_HANDLE_VALUE) + die_lasterr("CreateFile for named pipe failed"); + + /* start console spool thread on the pipe's read end */ + hthread = CreateThread(NULL, 0, console_thread, NULL, 0, NULL); + if (hthread == INVALID_HANDLE_VALUE) + die_lasterr("CreateThread(console_thread) failed"); + + /* schedule cleanup routine */ + if (atexit(winansi_exit)) + die_errno("atexit(winansi_exit) failed"); + + /* create a file descriptor for the write end of the pipe */ + hwrite_fd = _open_osfhandle((long) duplicate_handle(hwrite), _O_BINARY); + if (hwrite_fd == -1) + die_errno("_open_osfhandle(%li) failed", (long) hwrite); + + /* redirect stdout / stderr to the pipe */ + if (con1) + hwrite1 = redirect_console(stdout, &hconsole1, hwrite_fd); + if (con2) + hwrite2 = redirect_console(stderr, &hconsole2, hwrite_fd); + + /* close pipe file descriptor (also closes the duped hwrite) */ + close(hwrite_fd); +} + +static int is_same_handle(HANDLE hnd, int fd) +{ + return hnd != INVALID_HANDLE_VALUE && hnd == (HANDLE) _get_osfhandle(fd); +} + +/* + * Return true if stdout / stderr is a pipe redirecting to the console. + */ +int winansi_isatty(int fd) +{ + if (fd == 1 && is_same_handle(hwrite1, 1)) + return 1; + else if (fd == 2 && is_same_handle(hwrite2, 2)) + return 1; else - return EOF; + return isatty(fd); } -int winansi_vfprintf(FILE *stream, const char *format, va_list list) +/* + * Returns the real console handle if stdout / stderr is a pipe redirecting + * to the console. Allows spawn / exec to pass the console to the next process. + */ +HANDLE winansi_get_osfhandle(int fd) { - int len, rv; - char small_buf[256]; - char *buf = small_buf; - va_list cp; - - if (!is_console(stream)) - goto abort; - - va_copy(cp, list); - len = vsnprintf(small_buf, sizeof(small_buf), format, cp); - va_end(cp); - - if (len > sizeof(small_buf) - 1) { - buf = malloc(len + 1); - if (!buf) - goto abort; - - len = vsnprintf(buf, len + 1, format, list); - } - - rv = ansi_emulate(buf, stream); - - if (buf != small_buf) - free(buf); - return rv; - -abort: - rv = vfprintf(stream, format, list); - return rv; -} - -int winansi_fprintf(FILE *stream, const char *format, ...) -{ - va_list list; - int rv; - - va_start(list, format); - rv = winansi_vfprintf(stream, format, list); - va_end(list); - - return rv; -} - -int winansi_printf(const char *format, ...) -{ - va_list list; - int rv; - - va_start(list, format); - rv = winansi_vfprintf(stdout, format, list); - va_end(list); - - return rv; + if (fd == 1 && is_same_handle(hwrite1, 1)) + return hconsole1; + else if (fd == 2 && is_same_handle(hwrite2, 2)) + return hconsole2; + else + return (HANDLE) _get_osfhandle(fd); } From a3044f7d60b3de7a23a1e64794083c3fb0c2b6de Mon Sep 17 00:00:00 2001 From: Karsten Blees Date: Fri, 25 Nov 2011 21:05:06 +0100 Subject: [PATCH 18/41] Win32: add Unicode conversion functions Add Unicode conversion functions to convert between Windows native UTF-16LE encoding to UTF-8 and back. To support repositories with legacy-encoded file names, the UTF-8 to UTF-16 conversion function tries to create valid, unique file names even for invalid UTF-8 byte sequences, so that these repositories can be checked out without error. The current implementation leaves invalid UTF-8 bytes in range 0xa0 - 0xff as is (producing printable Unicode chars \u00a0 - \u00ff, equivalent to ISO-8859-1), and converts 0x80 - 0x9f to hex-code (\u0080 - \u009f are control chars). The Windows MultiByteToWideChar API was not used as it either drops invalid UTF-8 sequences (on Win2k/XP; producing non-unique or even empty file names) or converts them to the replacement char \ufffd (Vista/7; causing ERROR_INVALID_NAME in subsequent calls to file system APIs). Signed-off-by: Karsten Blees --- compat/mingw.c | 85 ++++++++++++++++++++++++++++++++++++++++ compat/mingw.h | 104 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 189 insertions(+) diff --git a/compat/mingw.c b/compat/mingw.c index 1577335fb1..73e5a9805a 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -1811,6 +1811,91 @@ pid_t waitpid(pid_t pid, int *status, int options) return -1; } +int xutftowcsn(wchar_t *wcs, const char *utfs, size_t wcslen, int utflen) +{ + int upos = 0, wpos = 0; + const unsigned char *utf = (const unsigned char*) utfs; + if (!utf || !wcs || wcslen < 1) { + errno = EINVAL; + return -1; + } + /* reserve space for \0 */ + wcslen--; + if (utflen < 0) + utflen = INT_MAX; + + while (upos < utflen) { + int c = utf[upos++] & 0xff; + if (utflen == INT_MAX && c == 0) + break; + + if (wpos >= wcslen) { + wcs[wpos] = 0; + errno = ERANGE; + return -1; + } + + if (c < 0x80) { + /* ASCII */ + wcs[wpos++] = c; + } else if (c >= 0xc2 && c < 0xe0 && upos < utflen && + (utf[upos] & 0xc0) == 0x80) { + /* 2-byte utf-8 */ + c = ((c & 0x1f) << 6); + c |= (utf[upos++] & 0x3f); + wcs[wpos++] = c; + } else if (c >= 0xe0 && c < 0xf0 && upos + 1 < utflen && + !(c == 0xe0 && utf[upos] < 0xa0) && /* over-long encoding */ + (utf[upos] & 0xc0) == 0x80 && + (utf[upos + 1] & 0xc0) == 0x80) { + /* 3-byte utf-8 */ + c = ((c & 0x0f) << 12); + c |= ((utf[upos++] & 0x3f) << 6); + c |= (utf[upos++] & 0x3f); + wcs[wpos++] = c; + } else if (c >= 0xf0 && c < 0xf5 && upos + 2 < utflen && + wpos + 1 < wcslen && + !(c == 0xf0 && utf[upos] < 0x90) && /* over-long encoding */ + !(c == 0xf4 && utf[upos] >= 0x90) && /* > \u10ffff */ + (utf[upos] & 0xc0) == 0x80 && + (utf[upos + 1] & 0xc0) == 0x80 && + (utf[upos + 2] & 0xc0) == 0x80) { + /* 4-byte utf-8: convert to \ud8xx \udcxx surrogate pair */ + c = ((c & 0x07) << 18); + c |= ((utf[upos++] & 0x3f) << 12); + c |= ((utf[upos++] & 0x3f) << 6); + c |= (utf[upos++] & 0x3f); + c -= 0x10000; + wcs[wpos++] = 0xd800 | (c >> 10); + wcs[wpos++] = 0xdc00 | (c & 0x3ff); + } else if (c >= 0xa0) { + /* invalid utf-8 byte, printable unicode char: convert 1:1 */ + wcs[wpos++] = c; + } else { + /* invalid utf-8 byte, non-printable unicode: convert to hex */ + static const char *hex = "0123456789abcdef"; + wcs[wpos++] = hex[c >> 4]; + if (wpos < wcslen) + wcs[wpos++] = hex[c & 0x0f]; + } + } + wcs[wpos] = 0; + return wpos; +} + +int xwcstoutf(char *utf, const wchar_t *wcs, size_t utflen) +{ + if (!wcs || !utf || utflen < 1) { + errno = EINVAL; + return -1; + } + utflen = WideCharToMultiByte(CP_UTF8, 0, wcs, -1, utf, utflen, NULL, NULL); + if (utflen) + return utflen - 1; + errno = ERANGE; + return -1; +} + /* * Disable MSVCRT command line wildcard expansion (__getmainargs called from * mingw startup code, see init.c in mingw runtime). diff --git a/compat/mingw.h b/compat/mingw.h index dfa5780397..02bcb78674 100644 --- a/compat/mingw.h +++ b/compat/mingw.h @@ -351,6 +351,110 @@ void mingw_open_html(const char *path); char **make_augmented_environ(const char *const *vars); void free_environ(char **env); +/** + * Converts UTF-8 encoded string to UTF-16LE. + * + * To support repositories with legacy-encoded file names, invalid UTF-8 bytes + * 0xa0 - 0xff are converted to corresponding printable Unicode chars \u00a0 - + * \u00ff, and invalid UTF-8 bytes 0x80 - 0x9f (which would make non-printable + * Unicode) are converted to hex-code. + * + * Lead-bytes not followed by an appropriate number of trail-bytes, over-long + * encodings and 4-byte encodings > \u10ffff are detected as invalid UTF-8. + * + * Maximum space requirement for the target buffer is two wide chars per UTF-8 + * char (((strlen(utf) * 2) + 1) [* sizeof(wchar_t)]). + * + * The maximum space is needed only if the entire input string consists of + * invalid UTF-8 bytes in range 0x80-0x9f, as per the following table: + * + * | | UTF-8 | UTF-16 | + * Code point | UTF-8 sequence | bytes | words | ratio + * --------------+-------------------+-------+--------+------- + * 000000-00007f | 0-7f | 1 | 1 | 1 + * 000080-0007ff | c2-df + 80-bf | 2 | 1 | 0.5 + * 000800-00ffff | e0-ef + 2 * 80-bf | 3 | 1 | 0.33 + * 010000-10ffff | f0-f4 + 3 * 80-bf | 4 | 2 (a) | 0.5 + * invalid | 80-9f | 1 | 2 (b) | 2 + * invalid | a0-ff | 1 | 1 | 1 + * + * (a) encoded as UTF-16 surrogate pair + * (b) encoded as two hex digits + * + * Note that, while the UTF-8 encoding scheme can be extended to 5-byte, 6-byte + * or even indefinite-byte sequences, the largest valid code point \u10ffff + * encodes as only 4 UTF-8 bytes. + * + * Parameters: + * wcs: wide char target buffer + * utf: string to convert + * wcslen: size of target buffer (in wchar_t's) + * utflen: size of string to convert, or -1 if 0-terminated + * + * Returns: + * length of converted string (_wcslen(wcs)), or -1 on failure + * + * Errors: + * EINVAL: one of the input parameters is invalid (e.g. NULL) + * ERANGE: the output buffer is too small + */ +int xutftowcsn(wchar_t *wcs, const char *utf, size_t wcslen, int utflen); + +/** + * Simplified variant of xutftowcsn, assumes input string is \0-terminated. + */ +static inline int xutftowcs(wchar_t *wcs, const char *utf, size_t wcslen) +{ + return xutftowcsn(wcs, utf, wcslen, -1); +} + +/** + * Simplified file system specific variant of xutftowcsn, assumes output + * buffer size is MAX_PATH wide chars and input string is \0-terminated, + * fails with ENAMETOOLONG if input string is too long. + */ +static inline int xutftowcs_path(wchar_t *wcs, const char *utf) +{ + int result = xutftowcsn(wcs, utf, MAX_PATH, -1); + if (result < 0 && errno == ERANGE) + errno = ENAMETOOLONG; + return result; +} + +/** + * Converts UTF-16LE encoded string to UTF-8. + * + * Maximum space requirement for the target buffer is three UTF-8 chars per + * wide char ((_wcslen(wcs) * 3) + 1). + * + * The maximum space is needed only if the entire input string consists of + * UTF-16 words in range 0x0800-0xd7ff or 0xe000-0xffff (i.e. \u0800-\uffff + * modulo surrogate pairs), as per the following table: + * + * | | UTF-16 | UTF-8 | + * Code point | UTF-16 sequence | words | bytes | ratio + * --------------+-----------------------+--------+-------+------- + * 000000-00007f | 0000-007f | 1 | 1 | 1 + * 000080-0007ff | 0080-07ff | 1 | 2 | 2 + * 000800-00ffff | 0800-d7ff / e000-ffff | 1 | 3 | 3 + * 010000-10ffff | d800-dbff + dc00-dfff | 2 | 4 | 2 + * + * Note that invalid code points > 10ffff cannot be represented in UTF-16. + * + * Parameters: + * utf: target buffer + * wcs: wide string to convert + * utflen: size of target buffer + * + * Returns: + * length of converted string, or -1 on failure + * + * Errors: + * EINVAL: one of the input parameters is invalid (e.g. NULL) + * ERANGE: the output buffer is too small + */ +int xwcstoutf(char *utf, const wchar_t *wcs, size_t utflen); + /* * A critical section used in the implementation of the spawn * functions (mingw_spawnv[p]e()) and waitpid(). Intialised in From 03a102ff23637fcdeb5ac6a85d9b5b125748729f Mon Sep 17 00:00:00 2001 From: Karsten Blees Date: Thu, 15 Mar 2012 18:21:28 +0100 Subject: [PATCH 19/41] Win32: Unicode file name support (except dirent) Replaces Windows "ANSI" APIs dealing with file- or path names with their Unicode equivalent, adding UTF-8/UTF-16LE conversion as necessary. The dirent API (opendir/readdir/closedir) is updated in a separate commit. Adds trivial wrappers for access, chmod and chdir. Adds wrapper for mktemp (needed for both mkstemp and mkdtemp). The simplest way to convert a repository with legacy-encoded (e.g. Cp1252) file names to UTF-8 ist to checkout with an old msysgit version and "git add --all & git commit" with the new version. Signed-off-by: Karsten Blees --- compat/mingw.c | 195 ++++++++++++++++++++++++++++++++++++------------- compat/mingw.h | 18 ++++- 2 files changed, 157 insertions(+), 56 deletions(-) diff --git a/compat/mingw.c b/compat/mingw.c index 73e5a9805a..bc0df64a01 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -1,6 +1,7 @@ #include "../git-compat-util.h" #include "win32.h" #include +#include #include "../strbuf.h" #include "../run-command.h" @@ -198,14 +199,16 @@ static int ask_yes_no_if_possible(const char *format, ...) } } -#undef unlink int mingw_unlink(const char *pathname) { int ret, tries = 0; + wchar_t wpathname[MAX_PATH]; + if (xutftowcs_path(wpathname, pathname) < 0) + return -1; /* read-only files cannot be removed */ - chmod(pathname, 0666); - while ((ret = unlink(pathname)) == -1 && tries < ARRAY_SIZE(delay)) { + _wchmod(wpathname, 0666); + while ((ret = _wunlink(wpathname)) == -1 && tries < ARRAY_SIZE(delay)) { if (!is_file_in_use_error(GetLastError())) break; /* @@ -221,45 +224,42 @@ int mingw_unlink(const char *pathname) while (ret == -1 && is_file_in_use_error(GetLastError()) && ask_yes_no_if_possible("Unlink of file '%s' failed. " "Should I try again?", pathname)) - ret = unlink(pathname); + ret = _wunlink(wpathname); return ret; } -static int is_dir_empty(const char *path) +static int is_dir_empty(const wchar_t *wpath) { - struct strbuf buf = STRBUF_INIT; - WIN32_FIND_DATAA findbuf; + WIN32_FIND_DATAW findbuf; HANDLE handle; - - strbuf_addf(&buf, "%s\\*", path); - handle = FindFirstFileA(buf.buf, &findbuf); - if (handle == INVALID_HANDLE_VALUE) { - strbuf_release(&buf); + wchar_t wbuf[MAX_PATH + 2]; + wcscpy(wbuf, wpath); + wcscat(wbuf, L"\\*"); + handle = FindFirstFileW(wbuf, &findbuf); + if (handle == INVALID_HANDLE_VALUE) return GetLastError() == ERROR_NO_MORE_FILES; - } - while (!strcmp(findbuf.cFileName, ".") || - !strcmp(findbuf.cFileName, "..")) - if (!FindNextFile(handle, &findbuf)) { - strbuf_release(&buf); + while (!wcscmp(findbuf.cFileName, L".") || + !wcscmp(findbuf.cFileName, L"..")) + if (!FindNextFileW(handle, &findbuf)) return GetLastError() == ERROR_NO_MORE_FILES; - } FindClose(handle); - strbuf_release(&buf); return 0; } -#undef rmdir int mingw_rmdir(const char *pathname) { int ret, tries = 0; + wchar_t wpathname[MAX_PATH]; + if (xutftowcs_path(wpathname, pathname) < 0) + return -1; - while ((ret = rmdir(pathname)) == -1 && tries < ARRAY_SIZE(delay)) { + while ((ret = _wrmdir(wpathname)) == -1 && tries < ARRAY_SIZE(delay)) { if (!is_file_in_use_error(GetLastError())) errno = err_win_to_posix(GetLastError()); if (errno != EACCES) break; - if (!is_dir_empty(pathname)) { + if (!is_dir_empty(wpathname)) { errno = ENOTEMPTY; break; } @@ -276,16 +276,26 @@ int mingw_rmdir(const char *pathname) while (ret == -1 && errno == EACCES && is_file_in_use_error(GetLastError()) && ask_yes_no_if_possible("Deletion of directory '%s' failed. " "Should I try again?", pathname)) - ret = rmdir(pathname); + ret = _wrmdir(wpathname); + return ret; +} + +int mingw_mkdir(const char *path, int mode) +{ + int ret; + wchar_t wpath[MAX_PATH]; + if (xutftowcs_path(wpath, path) < 0) + return -1; + ret = _wmkdir(wpath); return ret; } -#undef open int mingw_open (const char *filename, int oflags, ...) { va_list args; unsigned mode; int fd; + wchar_t wfilename[MAX_PATH]; va_start(args, oflags); mode = va_arg(args, int); @@ -294,10 +304,12 @@ int mingw_open (const char *filename, int oflags, ...) if (filename && !strcmp(filename, "/dev/null")) filename = "nul"; - fd = open(filename, oflags, mode); + if (xutftowcs_path(wfilename, filename) < 0) + return -1; + fd = _wopen(wfilename, oflags, mode); if (fd < 0 && (oflags & O_CREAT) && errno == EACCES) { - DWORD attrs = GetFileAttributes(filename); + DWORD attrs = GetFileAttributesW(wfilename); if (attrs != INVALID_FILE_ATTRIBUTES && (attrs & FILE_ATTRIBUTE_DIRECTORY)) errno = EISDIR; } @@ -332,17 +344,28 @@ int mingw_fgetc(FILE *stream) #undef fopen FILE *mingw_fopen (const char *filename, const char *otype) { + FILE *file; + wchar_t wfilename[MAX_PATH], wotype[4]; if (filename && !strcmp(filename, "/dev/null")) filename = "nul"; - return fopen(filename, otype); + if (xutftowcs_path(wfilename, filename) < 0 || + xutftowcs(wotype, otype, ARRAY_SIZE(wotype)) < 0) + return NULL; + file = _wfopen(wfilename, wotype); + return file; } -#undef freopen FILE *mingw_freopen (const char *filename, const char *otype, FILE *stream) { + FILE *file; + wchar_t wfilename[MAX_PATH], wotype[4]; if (filename && !strcmp(filename, "/dev/null")) filename = "nul"; - return freopen(filename, otype, stream); + if (xutftowcs_path(wfilename, filename) < 0 || + xutftowcs(wotype, otype, ARRAY_SIZE(wotype)) < 0) + return NULL; + file = _wfreopen(wfilename, wotype, stream); + return file; } #undef fflush @@ -367,6 +390,31 @@ int mingw_fflush(FILE *stream) return ret; } +int mingw_access(const char *filename, int mode) +{ + wchar_t wfilename[MAX_PATH]; + if (xutftowcs_path(wfilename, filename) < 0) + return -1; + /* X_OK is not supported by the MSVCRT version */ + return _waccess(wfilename, mode & ~X_OK); +} + +int mingw_chdir(const char *dirname) +{ + wchar_t wdirname[MAX_PATH]; + if (xutftowcs_path(wdirname, dirname) < 0) + return -1; + return _wchdir(wdirname); +} + +int mingw_chmod(const char *filename, int mode) +{ + wchar_t wfilename[MAX_PATH]; + if (xutftowcs_path(wfilename, filename) < 0) + return -1; + return _wchmod(wfilename, mode); +} + /* * The unit of FILETIME is 100-nanoseconds since January 1, 1601, UTC. * Returns the 100-nanoseconds ("hekto nanoseconds") since the epoch. @@ -392,10 +440,12 @@ static inline time_t filetime_to_time_t(const FILETIME *ft) */ static int do_lstat(int follow, const char *file_name, struct stat *buf) { - int err; WIN32_FILE_ATTRIBUTE_DATA fdata; + wchar_t wfilename[MAX_PATH]; + if (xutftowcs_path(wfilename, file_name) < 0) + return -1; - if (!(err = get_file_attr(file_name, &fdata))) { + if (GetFileAttributesExW(wfilename, GetFileExInfoStandard, &fdata)) { buf->st_ino = 0; buf->st_gid = 0; buf->st_uid = 0; @@ -408,8 +458,8 @@ static int do_lstat(int follow, const char *file_name, struct stat *buf) buf->st_mtime = filetime_to_time_t(&(fdata.ftLastWriteTime)); buf->st_ctime = filetime_to_time_t(&(fdata.ftCreationTime)); if (fdata.dwFileAttributes & FILE_ATTRIBUTE_REPARSE_POINT) { - WIN32_FIND_DATAA findbuf; - HANDLE handle = FindFirstFileA(file_name, &findbuf); + WIN32_FIND_DATAW findbuf; + HANDLE handle = FindFirstFileW(wfilename, &findbuf); if (handle != INVALID_HANDLE_VALUE) { if ((findbuf.dwFileAttributes & FILE_ATTRIBUTE_REPARSE_POINT) && (findbuf.dwReserved0 == IO_REPARSE_TAG_SYMLINK)) { @@ -428,7 +478,23 @@ static int do_lstat(int follow, const char *file_name, struct stat *buf) } return 0; } - errno = err; + switch (GetLastError()) { + case ERROR_ACCESS_DENIED: + case ERROR_SHARING_VIOLATION: + case ERROR_LOCK_VIOLATION: + case ERROR_SHARING_BUFFER_EXCEEDED: + errno = EACCES; + break; + case ERROR_BUFFER_OVERFLOW: + errno = ENAMETOOLONG; + break; + case ERROR_NOT_ENOUGH_MEMORY: + errno = ENOMEM; + break; + default: + errno = ENOENT; + break; + } return -1; } @@ -516,16 +582,20 @@ int mingw_utime (const char *file_name, const struct utimbuf *times) { FILETIME mft, aft; int fh, rc; + DWORD attrs; + wchar_t wfilename[MAX_PATH]; + if (xutftowcs_path(wfilename, file_name) < 0) + return -1; /* must have write permission */ - DWORD attrs = GetFileAttributes(file_name); + attrs = GetFileAttributesW(wfilename); if (attrs != INVALID_FILE_ATTRIBUTES && (attrs & FILE_ATTRIBUTE_READONLY)) { /* ignore errors here; open() will report them */ - SetFileAttributes(file_name, attrs & ~FILE_ATTRIBUTE_READONLY); + SetFileAttributesW(wfilename, attrs & ~FILE_ATTRIBUTE_READONLY); } - if ((fh = open(file_name, O_RDWR | O_BINARY)) < 0) { + if ((fh = _wopen(wfilename, O_RDWR | O_BINARY)) < 0) { rc = -1; goto revert_attrs; } @@ -548,7 +618,7 @@ revert_attrs: if (attrs != INVALID_FILE_ATTRIBUTES && (attrs & FILE_ATTRIBUTE_READONLY)) { /* ignore errors again */ - SetFileAttributes(file_name, attrs); + SetFileAttributesW(wfilename, attrs); } return rc; } @@ -559,6 +629,18 @@ unsigned int sleep (unsigned int seconds) return 0; } +char *mingw_mktemp(char *template) +{ + wchar_t wtemplate[MAX_PATH]; + if (xutftowcs_path(wtemplate, template) < 0) + return NULL; + if (!_wmktemp(wtemplate)) + return NULL; + if (xwcstoutf(template, wtemplate, strlen(template) + 1) < 0) + return NULL; + return template; +} + int mkstemp(char *template) { char *filename = mktemp(template); @@ -617,17 +699,18 @@ struct tm *localtime_r(const time_t *timep, struct tm *result) return result; } -#undef getcwd char *mingw_getcwd(char *pointer, int len) { int i; - char *ret = getcwd(pointer, len); - if (!ret) - return ret; + wchar_t wpointer[MAX_PATH]; + if (!_wgetcwd(wpointer, ARRAY_SIZE(wpointer))) + return NULL; + if (xwcstoutf(pointer, wpointer, len) < 0) + return NULL; for (i = 0; pointer[i]; i++) if (pointer[i] == '\\') pointer[i] = '/'; - return ret; + return pointer; } #undef getenv @@ -1468,33 +1551,36 @@ int mingw_rename(const char *pold, const char *pnew) { DWORD attrs, gle; int tries = 0; + wchar_t wpold[MAX_PATH], wpnew[MAX_PATH]; + if (xutftowcs_path(wpold, pold) < 0 || xutftowcs_path(wpnew, pnew) < 0) + return -1; /* * Try native rename() first to get errno right. * It is based on MoveFile(), which cannot overwrite existing files. */ - if (!rename(pold, pnew)) + if (!_wrename(wpold, wpnew)) return 0; if (errno != EEXIST) return -1; repeat: - if (MoveFileEx(pold, pnew, MOVEFILE_REPLACE_EXISTING)) + if (MoveFileExW(wpold, wpnew, MOVEFILE_REPLACE_EXISTING)) return 0; /* TODO: translate more errors */ gle = GetLastError(); if (gle == ERROR_ACCESS_DENIED && - (attrs = GetFileAttributes(pnew)) != INVALID_FILE_ATTRIBUTES) { + (attrs = GetFileAttributesW(wpnew)) != INVALID_FILE_ATTRIBUTES) { if (attrs & FILE_ATTRIBUTE_DIRECTORY) { errno = EISDIR; return -1; } if ((attrs & FILE_ATTRIBUTE_READONLY) && - SetFileAttributes(pnew, attrs & ~FILE_ATTRIBUTE_READONLY)) { - if (MoveFileEx(pold, pnew, MOVEFILE_REPLACE_EXISTING)) + SetFileAttributesW(wpnew, attrs & ~FILE_ATTRIBUTE_READONLY)) { + if (MoveFileExW(wpold, wpnew, MOVEFILE_REPLACE_EXISTING)) return 0; gle = GetLastError(); /* revert file attributes on failure */ - SetFileAttributes(pnew, attrs); + SetFileAttributesW(wpnew, attrs); } } if (tries < ARRAY_SIZE(delay) && gle == ERROR_ACCESS_DENIED) { @@ -1740,11 +1826,16 @@ void mingw_open_html(const char *unixpath) int link(const char *oldpath, const char *newpath) { - typedef BOOL (WINAPI *T)(const char*, const char*, LPSECURITY_ATTRIBUTES); + typedef BOOL (WINAPI *T)(LPCWSTR, LPCWSTR, LPSECURITY_ATTRIBUTES); static T create_hard_link = NULL; + wchar_t woldpath[MAX_PATH], wnewpath[MAX_PATH]; + if (xutftowcs_path(woldpath, oldpath) < 0 || + xutftowcs_path(wnewpath, newpath) < 0) + return -1; + if (!create_hard_link) { create_hard_link = (T) GetProcAddress( - GetModuleHandle("kernel32.dll"), "CreateHardLinkA"); + GetModuleHandle("kernel32.dll"), "CreateHardLinkW"); if (!create_hard_link) create_hard_link = (T)-1; } @@ -1752,7 +1843,7 @@ int link(const char *oldpath, const char *newpath) errno = ENOSYS; return -1; } - if (!create_hard_link(newpath, oldpath, NULL)) { + if (!create_hard_link(wnewpath, woldpath, NULL)) { errno = err_win_to_posix(GetLastError()); return -1; } diff --git a/compat/mingw.h b/compat/mingw.h index 02bcb78674..3417796fe7 100644 --- a/compat/mingw.h +++ b/compat/mingw.h @@ -118,10 +118,7 @@ static inline int fcntl(int fd, int cmd, ...) * simple adaptors */ -static inline int mingw_mkdir(const char *path, int mode) -{ - return mkdir(path); -} +int mingw_mkdir(const char *path, int mode); #define mkdir mingw_mkdir #define WNOHANG 1 @@ -192,6 +189,19 @@ FILE *mingw_freopen (const char *filename, const char *otype, FILE *stream); int mingw_fflush(FILE *stream); #define fflush mingw_fflush +int mingw_access(const char *filename, int mode); +#undef access +#define access mingw_access + +int mingw_chdir(const char *dirname); +#define chdir mingw_chdir + +int mingw_chmod(const char *filename, int mode); +#define chmod mingw_chmod + +char *mingw_mktemp(char *template); +#define mktemp mingw_mktemp + char *mingw_getcwd(char *pointer, int len); #define getcwd mingw_getcwd From d203214155634feb192b52ca64a389148ea145a1 Mon Sep 17 00:00:00 2001 From: Karsten Blees Date: Sat, 14 Jan 2012 22:01:09 +0100 Subject: [PATCH 20/41] Win32: Unicode file name support (dirent) Changes opendir/readdir to use Windows Unicode APIs and convert between UTF-8/UTF-16. Removes parameter checks that are already covered by xutftowcs_path. This changes detection of ENAMETOOLONG from MAX_PATH - 2 to MAX_PATH (matching is_dir_empty in mingw.c). If name + "/*" or the resulting absolute path is too long, FindFirstFile fails and errno is set through err_win_to_posix. Increases the size of dirent.d_name to accommodate the full WIN32_FIND_DATA.cFileName converted to UTF-8 (UTF-16 to UTF-8 conversion may grow by factor three in the worst case). Signed-off-by: Karsten Blees --- compat/win32/dirent.c | 30 ++++++++++-------------------- compat/win32/dirent.h | 2 +- 2 files changed, 11 insertions(+), 21 deletions(-) diff --git a/compat/win32/dirent.c b/compat/win32/dirent.c index 82a515c21b..52420ec7d4 100644 --- a/compat/win32/dirent.c +++ b/compat/win32/dirent.c @@ -6,10 +6,10 @@ struct DIR { int dd_stat; /* 0-based index */ }; -static inline void finddata2dirent(struct dirent *ent, WIN32_FIND_DATAA *fdata) +static inline void finddata2dirent(struct dirent *ent, WIN32_FIND_DATAW *fdata) { - /* copy file name from WIN32_FIND_DATA to dirent */ - memcpy(ent->d_name, fdata->cFileName, sizeof(ent->d_name)); + /* convert UTF-16 name to UTF-8 */ + xwcstoutf(ent->d_name, fdata->cFileName, sizeof(ent->d_name)); /* Set file type, based on WIN32_FIND_DATA */ if (fdata->dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) @@ -20,25 +20,15 @@ static inline void finddata2dirent(struct dirent *ent, WIN32_FIND_DATAA *fdata) DIR *opendir(const char *name) { - char pattern[MAX_PATH]; - WIN32_FIND_DATAA fdata; + wchar_t pattern[MAX_PATH + 2]; /* + 2 for '/' '*' */ + WIN32_FIND_DATAW fdata; HANDLE h; int len; DIR *dir; - /* check that name is not NULL */ - if (!name) { - errno = EINVAL; + /* convert name to UTF-16 and check length < MAX_PATH */ + if ((len = xutftowcs_path(pattern, name)) < 0) return NULL; - } - /* check that the pattern won't be too long for FindFirstFileA */ - len = strlen(name); - if (len + 2 >= MAX_PATH) { - errno = ENAMETOOLONG; - return NULL; - } - /* copy name to temp buffer */ - memcpy(pattern, name, len + 1); /* append optional '/' and wildcard '*' */ if (len && !is_dir_sep(pattern[len - 1])) @@ -47,7 +37,7 @@ DIR *opendir(const char *name) pattern[len] = 0; /* open find handle */ - h = FindFirstFileA(pattern, &fdata); + h = FindFirstFileW(pattern, &fdata); if (h == INVALID_HANDLE_VALUE) { DWORD err = GetLastError(); errno = (err == ERROR_DIRECTORY) ? ENOTDIR : err_win_to_posix(err); @@ -72,8 +62,8 @@ struct dirent *readdir(DIR *dir) /* if first entry, dirent has already been set up by opendir */ if (dir->dd_stat) { /* get next entry and convert from WIN32_FIND_DATA to dirent */ - WIN32_FIND_DATAA fdata; - if (FindNextFileA(dir->dd_handle, &fdata)) { + WIN32_FIND_DATAW fdata; + if (FindNextFileW(dir->dd_handle, &fdata)) { finddata2dirent(&dir->dd_dir, &fdata); } else { DWORD lasterr = GetLastError(); diff --git a/compat/win32/dirent.h b/compat/win32/dirent.h index 8838cd61fc..058207e4bf 100644 --- a/compat/win32/dirent.h +++ b/compat/win32/dirent.h @@ -10,7 +10,7 @@ typedef struct DIR DIR; struct dirent { unsigned char d_type; /* file type to prevent lstat after readdir */ - char d_name[MAX_PATH]; /* file name */ + char d_name[MAX_PATH * 3]; /* file name (* 3 for UTF-8 conversion) */ }; DIR *opendir(const char *dirname); From d3c33ed4602167323043483e4a51224788287fdd Mon Sep 17 00:00:00 2001 From: Karsten Blees Date: Sat, 4 Feb 2012 21:54:36 +0100 Subject: [PATCH 21/41] Unicode file name support (gitk and git-gui) Assumes file names in git tree objects are UTF-8 encoded. On most unix systems, the system encoding (and thus the TCL system encoding) will be UTF-8, so file names will be displayed correctly. On Windows, it is impossible to set the system encoding to UTF-8. Changing the TCL system encoding (via 'encoding system ...', e.g. in the startup code) is explicitly discouraged by the TCL docs. Change gitk and git-gui functions dealing with file names to always convert from and to UTF-8. Signed-off-by: Karsten Blees --- git-gui/git-gui.sh | 11 +++++++---- git-gui/lib/browser.tcl | 2 +- git-gui/lib/index.tcl | 6 +++--- gitk-git/gitk | 15 ++++++++------- 4 files changed, 19 insertions(+), 15 deletions(-) diff --git a/git-gui/git-gui.sh b/git-gui/git-gui.sh index cf2209b4f2..4b79862cab 100755 --- a/git-gui/git-gui.sh +++ b/git-gui/git-gui.sh @@ -548,6 +548,9 @@ proc git {args} { _trace_exec [concat $opt $cmdp $args] set result [eval exec $opt $cmdp $args] + if {[encoding system] != "utf-8"} { + set result [encoding convertfrom utf-8 [encoding convertto $result]] + } if {$::_trace} { puts stderr "< $result" } @@ -1104,7 +1107,7 @@ git-version proc _parse_config {arr_name args} { [list git_read config] \ $args \ [list --null --list]] - fconfigure $fd_rc -translation binary + fconfigure $fd_rc -translation binary -encoding utf-8 set buf [read $fd_rc] close $fd_rc } @@ -1678,7 +1681,7 @@ proc read_diff_index {fd after} { set i [split [string range $buf_rdi $c [expr {$z1 - 2}]] { }] set p [string range $buf_rdi $z1 [expr {$z2 - 1}]] merge_state \ - [encoding convertfrom $p] \ + [encoding convertfrom utf-8 $p] \ [lindex $i 4]? \ [list [lindex $i 0] [lindex $i 2]] \ [list] @@ -1711,7 +1714,7 @@ proc read_diff_files {fd after} { set i [split [string range $buf_rdf $c [expr {$z1 - 2}]] { }] set p [string range $buf_rdf $z1 [expr {$z2 - 1}]] merge_state \ - [encoding convertfrom $p] \ + [encoding convertfrom utf-8 $p] \ ?[lindex $i 4] \ [list] \ [list [lindex $i 0] [lindex $i 2]] @@ -1734,7 +1737,7 @@ proc read_ls_others {fd after} { set pck [split $buf_rlo "\0"] set buf_rlo [lindex $pck end] foreach p [lrange $pck 0 end-1] { - set p [encoding convertfrom $p] + set p [encoding convertfrom utf-8 $p] if {[string index $p end] eq {/}} { set p [string range $p 0 end-1] } diff --git a/git-gui/lib/browser.tcl b/git-gui/lib/browser.tcl index 0328338fda..555db896f4 100644 --- a/git-gui/lib/browser.tcl +++ b/git-gui/lib/browser.tcl @@ -197,7 +197,7 @@ method _ls {tree_id {name {}}} { $w conf -state disabled set fd [git_read ls-tree -z $tree_id] - fconfigure $fd -blocking 0 -translation binary -encoding binary + fconfigure $fd -blocking 0 -translation binary -encoding utf-8 fileevent $fd readable [cb _read $fd] } diff --git a/git-gui/lib/index.tcl b/git-gui/lib/index.tcl index 74a81a7b42..d10ffe9209 100644 --- a/git-gui/lib/index.tcl +++ b/git-gui/lib/index.tcl @@ -115,7 +115,7 @@ proc write_update_indexinfo {fd pathList totalCnt batch after} { set info [lindex $s 2] if {$info eq {}} continue - puts -nonewline $fd "$info\t[encoding convertto $path]\0" + puts -nonewline $fd "$info\t[encoding convertto utf-8 $path]\0" display_file $path $new } @@ -186,7 +186,7 @@ proc write_update_index {fd pathList totalCnt batch after} { ?M {set new M_} ?? {continue} } - puts -nonewline $fd "[encoding convertto $path]\0" + puts -nonewline $fd "[encoding convertto utf-8 $path]\0" display_file $path $new } @@ -247,7 +247,7 @@ proc write_checkout_index {fd pathList totalCnt batch after} { ?M - ?T - ?D { - puts -nonewline $fd "[encoding convertto $path]\0" + puts -nonewline $fd "[encoding convertto utf-8 $path]\0" display_file $path ?_ } } diff --git a/gitk-git/gitk b/gitk-git/gitk index 90764e8948..9232258249 100755 --- a/gitk-git/gitk +++ b/gitk-git/gitk @@ -7525,7 +7525,7 @@ proc gettreeline {gtf id} { if {[string index $fname 0] eq "\""} { set fname [lindex $fname 0] } - set fname [encoding convertfrom $fname] + set fname [encoding convertfrom utf-8 $fname] lappend treefilelist($id) $fname } if {![eof $gtf]} { @@ -7784,7 +7784,7 @@ proc gettreediffline {gdtf ids} { if {[string index $file 0] eq "\""} { set file [lindex $file 0] } - set file [encoding convertfrom $file] + set file [encoding convertfrom utf-8 $file] if {$file ne [lindex $treediff end]} { lappend treediff $file lappend sublist $file @@ -7929,7 +7929,7 @@ proc makediffhdr {fname ids} { global ctext curdiffstart treediffs diffencoding global ctext_file_names jump_to_here targetline diffline - set fname [encoding convertfrom $fname] + set fname [encoding convertfrom utf-8 $fname] set diffencoding [get_path_encoding $fname] set i [lsearch -exact $treediffs($ids) $fname] if {$i >= 0} { @@ -7986,7 +7986,7 @@ proc parseblobdiffline {ids line} { if {![string compare -length 5 "diff " $line]} { if {![regexp {^diff (--cc|--git) } $line m type]} { - set line [encoding convertfrom $line] + set line [encoding convertfrom utf-8 $line] $ctext insert end "$line\n" hunksep continue } @@ -8033,7 +8033,7 @@ proc parseblobdiffline {ids line} { makediffhdr $fname $ids } elseif {![string compare -length 16 "* Unmerged path " $line]} { - set fname [encoding convertfrom [string range $line 16 end]] + set fname [encoding convertfrom utf-8 [string range $line 16 end]] $ctext insert end "\n" set curdiffstart [$ctext index "end - 1c"] lappend ctext_file_names $fname @@ -8088,7 +8088,7 @@ proc parseblobdiffline {ids line} { if {[string index $fname 0] eq "\""} { set fname [lindex $fname 0] } - set fname [encoding convertfrom $fname] + set fname [encoding convertfrom utf-8 $fname] set i [lsearch -exact $treediffs($ids) $fname] if {$i >= 0} { setinlist difffilestart $i $curdiffstart @@ -8107,6 +8107,7 @@ proc parseblobdiffline {ids line} { set diffinhdr 0 return } + set line [encoding convertfrom utf-8 $line] $ctext insert end "$line\n" filesep } else { @@ -11895,7 +11896,7 @@ proc cache_gitattr {attr pathlist} { foreach row [split $rlist "\n"] { if {[regexp "(.*): $attr: (.*)" $row m path value]} { if {[string index $path 0] eq "\""} { - set path [encoding convertfrom [lindex $path 0]] + set path [encoding convertfrom utf-8 [lindex $path 0]] } set path_attr_cache($attr,$path) $value } From ab319d0e3e2f206670492276a61168392eb7caec Mon Sep 17 00:00:00 2001 From: Karsten Blees Date: Sun, 16 Jan 2011 18:27:53 +0100 Subject: [PATCH 22/41] Win32: Unicode arguments (outgoing) Convert command line arguments from UTF-8 to UTF-16 when creating other processes. Signed-off-by: Karsten Blees --- compat/mingw.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/compat/mingw.c b/compat/mingw.c index bc0df64a01..b598454fbb 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -931,9 +931,10 @@ static pid_t mingw_spawnve_fd(const char *cmd, const char **argv, char **env, const char *dir, int prepend_cmd, int fhin, int fhout, int fherr) { - STARTUPINFO si; + STARTUPINFOW si; PROCESS_INFORMATION pi; struct strbuf envblk, args; + wchar_t wcmd[MAX_PATH], wdir[MAX_PATH], *wargs; unsigned flags; BOOL ret; @@ -969,6 +970,11 @@ static pid_t mingw_spawnve_fd(const char *cmd, const char **argv, char **env, si.hStdOutput = winansi_get_osfhandle(fhout); si.hStdError = winansi_get_osfhandle(fherr); + if (xutftowcs_path(wcmd, cmd) < 0) + return -1; + if (dir && xutftowcs_path(wdir, dir) < 0) + return -1; + /* concatenate argv, quoting args as we go */ strbuf_init(&args, 0); if (prepend_cmd) { @@ -986,6 +992,10 @@ static pid_t mingw_spawnve_fd(const char *cmd, const char **argv, char **env, free(quoted); } + wargs = xmalloc((2 * args.len + 1) * sizeof(wchar_t)); + xutftowcs(wargs, args.buf, 2 * args.len + 1); + strbuf_release(&args); + if (env) { int count = 0; char **e, **sorted_env; @@ -1007,12 +1017,12 @@ static pid_t mingw_spawnve_fd(const char *cmd, const char **argv, char **env, } memset(&pi, 0, sizeof(pi)); - ret = CreateProcess(cmd, args.buf, NULL, NULL, TRUE, flags, - env ? envblk.buf : NULL, dir, &si, &pi); + ret = CreateProcessW(wcmd, wargs, NULL, NULL, TRUE, flags, + env ? envblk.buf : NULL, dir ? wdir : NULL, &si, &pi); if (env) strbuf_release(&envblk); - strbuf_release(&args); + free(wargs); if (!ret) { errno = ENOENT; From 9e3b36e079198568641c413a10b83b9d4ab1abc8 Mon Sep 17 00:00:00 2001 From: Karsten Blees Date: Sun, 16 Jan 2011 18:28:27 +0100 Subject: [PATCH 23/41] Win32: Unicode arguments (incoming) Convert command line arguments from UTF-16 to UTF-8 on startup. Signed-off-by: Karsten Blees --- compat/mingw.c | 35 +++++++++++++++++++++++++++++++++-- 1 file changed, 33 insertions(+), 2 deletions(-) diff --git a/compat/mingw.c b/compat/mingw.c index b598454fbb..e66acd6e8e 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -2003,10 +2003,41 @@ int xwcstoutf(char *utf, const wchar_t *wcs, size_t utflen) */ int _CRT_glob = 0; +typedef struct { + int newmode; +} _startupinfo; + +extern int __wgetmainargs(int *argc, wchar_t ***argv, wchar_t ***env, int glob, + _startupinfo *si); + void mingw_startup() { - /* copy executable name to argv[0] */ - __argv[0] = xstrdup(_pgmptr); + int i, len, maxlen, argc; + char *buffer; + wchar_t **wenv, **wargv; + _startupinfo si; + + /* get wide char arguments and environment */ + si.newmode = 0; + __wgetmainargs(&argc, &wargv, &wenv, _CRT_glob, &si); + + /* determine size of argv and environ conversion buffer */ + maxlen = wcslen(_wpgmptr); + for (i = 1; i < argc; i++) + maxlen = max(maxlen, wcslen(wargv[i])); + + /* allocate buffer (wchar_t encodes to max 3 UTF-8 bytes) */ + maxlen = 3 * maxlen + 1; + buffer = xmalloc(maxlen); + + /* convert command line arguments and environment to UTF-8 */ + len = xwcstoutf(buffer, _wpgmptr, maxlen); + __argv[0] = xmemdupz(buffer, len); + for (i = 1; i < argc; i++) { + len = xwcstoutf(buffer, wargv[i], maxlen); + __argv[i] = xmemdupz(buffer, len); + } + free(buffer); /* initialize critical section for waitpid pinfo_t list */ InitializeCriticalSection(&pinfo_cs); From 32a13e56ffc894adefebe8151c6d15ee182584ae Mon Sep 17 00:00:00 2001 From: Karsten Blees Date: Fri, 25 Nov 2011 21:17:49 +0100 Subject: [PATCH 24/41] Win32: sync Unicode console output and file system Use the same Unicode conversion functions for file names and console conversions so that the file system and console output are in sync when checking out legacy encoded repositories (i.e. with invalid UTF-8 file names). Signed-off-by: Karsten Blees --- compat/winansi.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/compat/winansi.c b/compat/winansi.c index a3e4d88295..9f95954390 100644 --- a/compat/winansi.c +++ b/compat/winansi.c @@ -120,8 +120,7 @@ static void write_console(unsigned char *str, size_t len) static wchar_t wbuf[2 * BUFFER_SIZE + 1]; /* convert utf-8 to utf-16 */ - int wlen = MultiByteToWideChar(CP_UTF8, 0, (char*) str, len, wbuf, - ARRAY_SIZE(wbuf)); + int wlen = xutftowcsn(wbuf, (char*) str, ARRAY_SIZE(wbuf), len); /* write directly to console */ WriteConsoleW(console, wbuf, wlen, NULL, NULL); From 87d2c7f25adbb6bc037521b77e09b50907232542 Mon Sep 17 00:00:00 2001 From: Karsten Blees Date: Mon, 16 Jan 2012 00:07:46 +0100 Subject: [PATCH 25/41] Win32: Unicode environment (outgoing) Convert environment from UTF-8 to UTF-16 when creating other processes. Signed-off-by: Karsten Blees --- compat/mingw.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/compat/mingw.c b/compat/mingw.c index e66acd6e8e..48a74d82af 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -933,9 +933,9 @@ static pid_t mingw_spawnve_fd(const char *cmd, const char **argv, char **env, { STARTUPINFOW si; PROCESS_INFORMATION pi; - struct strbuf envblk, args; - wchar_t wcmd[MAX_PATH], wdir[MAX_PATH], *wargs; - unsigned flags; + struct strbuf args; + wchar_t wcmd[MAX_PATH], wdir[MAX_PATH], *wargs, *wenvblk = NULL; + unsigned flags = CREATE_UNICODE_ENVIRONMENT; BOOL ret; /* Determine whether or not we are associated to a console */ @@ -952,7 +952,7 @@ static pid_t mingw_spawnve_fd(const char *cmd, const char **argv, char **env, * instead of CREATE_NO_WINDOW to make ssh * recognize that it has no console. */ - flags = DETACHED_PROCESS; + flags |= DETACHED_PROCESS; } else { /* There is already a console. If we specified * DETACHED_PROCESS here, too, Windows would @@ -960,7 +960,6 @@ static pid_t mingw_spawnve_fd(const char *cmd, const char **argv, char **env, * The same is true for CREATE_NO_WINDOW. * Go figure! */ - flags = 0; CloseHandle(cons); } memset(&si, 0, sizeof(si)); @@ -999,6 +998,7 @@ static pid_t mingw_spawnve_fd(const char *cmd, const char **argv, char **env, if (env) { int count = 0; char **e, **sorted_env; + int size = 0, wenvsz = 0, wenvpos = 0; for (e = env; *e; e++) count++; @@ -1008,20 +1008,22 @@ static pid_t mingw_spawnve_fd(const char *cmd, const char **argv, char **env, memcpy(sorted_env, env, sizeof(*sorted_env) * (count + 1)); qsort(sorted_env, count, sizeof(*sorted_env), env_compare); - strbuf_init(&envblk, 0); + /* create environment block from temporary environment */ for (e = sorted_env; *e; e++) { - strbuf_addstr(&envblk, *e); - strbuf_addch(&envblk, '\0'); + size = 2 * strlen(*e) + 2; /* +2 for final \0 */ + ALLOC_GROW(wenvblk, (wenvpos + size) * sizeof(wchar_t), wenvsz); + wenvpos += xutftowcs(&wenvblk[wenvpos], *e, size) + 1; } + /* add final \0 terminator */ + wenvblk[wenvpos] = 0; free(sorted_env); } memset(&pi, 0, sizeof(pi)); ret = CreateProcessW(wcmd, wargs, NULL, NULL, TRUE, flags, - env ? envblk.buf : NULL, dir ? wdir : NULL, &si, &pi); + wenvblk, dir ? wdir : NULL, &si, &pi); - if (env) - strbuf_release(&envblk); + free(wenvblk); free(wargs); if (!ret) { From c92615ff28cb240eab21ea584aa955596914ea1a Mon Sep 17 00:00:00 2001 From: Karsten Blees Date: Mon, 25 Apr 2011 23:32:27 +0100 Subject: [PATCH 26/41] Win32: Unicode environment (incoming) Convert environment from UTF-16 to UTF-8 on startup. No changes to getenv() are necessary, as the MSVCRT version is implemented on top of char **environ. However, putenv / _wputenv from MSVCRT no longer work, for two reasons: 1. they try to keep environ, _wenviron and the Win32 process environment in sync, using the default system encoding instead of UTF-8 to convert between charsets 2. msysgit and MSVCRT use different allocators, memory allocated in git cannot be freed by the CRT and vice versa Implement mingw_putenv using the env_setenv helper function from the environment merge code. Note that in case of memory allocation failure, putenv now dies with error message (due to xrealloc) instead of failing with ENOMEM. As git assumes setenv / putenv to always succeed, this prevents it from continuing with incorrect settings. Signed-off-by: Karsten Blees --- compat/mingw.c | 15 +++++++++++++++ compat/mingw.h | 2 ++ 2 files changed, 17 insertions(+) diff --git a/compat/mingw.c b/compat/mingw.c index 48a74d82af..235d6244d5 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -1266,6 +1266,12 @@ char **make_augmented_environ(const char *const *vars) return env; } +int mingw_putenv(const char *namevalue) +{ + environ = env_setenv(environ, namevalue); + return 0; +} + /* * Note, this isn't a complete replacement for getaddrinfo. It assumes * that service contains a numerical port, or that it is null. It @@ -2027,6 +2033,11 @@ void mingw_startup() maxlen = wcslen(_wpgmptr); for (i = 1; i < argc; i++) maxlen = max(maxlen, wcslen(wargv[i])); + for (i = 0; wenv[i]; i++) + maxlen = max(maxlen, wcslen(wenv[i])); + + /* nedmalloc can't free CRT memory, allocate resizable environment list */ + environ = xcalloc(i + 1, sizeof(char*)); /* allocate buffer (wchar_t encodes to max 3 UTF-8 bytes) */ maxlen = 3 * maxlen + 1; @@ -2039,6 +2050,10 @@ void mingw_startup() len = xwcstoutf(buffer, wargv[i], maxlen); __argv[i] = xmemdupz(buffer, len); } + for (i = 0; wenv[i]; i++) { + len = xwcstoutf(buffer, wenv[i], maxlen); + environ[i] = xmemdupz(buffer, len); + } free(buffer); /* initialize critical section for waitpid pinfo_t list */ diff --git a/compat/mingw.h b/compat/mingw.h index 3417796fe7..1876a39391 100644 --- a/compat/mingw.h +++ b/compat/mingw.h @@ -207,6 +207,8 @@ char *mingw_getcwd(char *pointer, int len); char *mingw_getenv(const char *name); #define getenv mingw_getenv +int mingw_putenv(const char *namevalue); +#define putenv mingw_putenv int mingw_gethostname(char *host, int namelen); #define gethostname mingw_gethostname From a57a4caebc4cf99c00d117b4f7c29a55ba988700 Mon Sep 17 00:00:00 2001 From: Karsten Blees Date: Fri, 25 Nov 2011 21:29:40 +0100 Subject: [PATCH 27/41] Win32: fix environment memory leaks All functions that modify the environment have memory leaks. Disable gitunsetenv in the Makefile and use env_setenv (via mingw_putenv) instead (this frees removed environment entries). Move xstrdup from env_setenv to make_augmented_environ, so that mingw_putenv no longer copies the environment entries (according to POSIX [1], "the string [...] shall become part of the environment"). This also fixes the memory leak in gitsetenv, which expects a POSIX compliant putenv. [1] http://pubs.opengroup.org/onlinepubs/009695399/functions/putenv.html Note: This patch depends on taking control of char **environ and having our own mingw_putenv (both introduced in "Win32: Unicode environment (incoming)"). Signed-off-by: Karsten Blees --- compat/mingw.c | 10 ++++++---- compat/mingw.h | 1 + config.mak.uname | 2 -- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/compat/mingw.c b/compat/mingw.c index 235d6244d5..b3ff81ecc1 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -1239,14 +1239,14 @@ static char **env_setenv(char **env, const char *name) for (i = 0; env[i]; i++) ; env = xrealloc(env, (i+2)*sizeof(*env)); - env[i] = xstrdup(name); + env[i] = (char*) name; env[i+1] = NULL; } } else { free(env[i]); if (*eq) - env[i] = xstrdup(name); + env[i] = (char*) name; else for (; env[i]; i++) env[i] = env[i+1]; @@ -1261,8 +1261,10 @@ char **make_augmented_environ(const char *const *vars) { char **env = copy_environ(); - while (*vars) - env = env_setenv(env, *vars++); + while (*vars) { + const char *v = *vars++; + env = env_setenv(env, strchr(v, '=') ? xstrdup(v) : v); + } return env; } diff --git a/compat/mingw.h b/compat/mingw.h index 1876a39391..9f3e17a1d6 100644 --- a/compat/mingw.h +++ b/compat/mingw.h @@ -209,6 +209,7 @@ char *mingw_getenv(const char *name); #define getenv mingw_getenv int mingw_putenv(const char *namevalue); #define putenv mingw_putenv +#define unsetenv mingw_putenv int mingw_gethostname(char *host, int namelen); #define gethostname mingw_gethostname diff --git a/config.mak.uname b/config.mak.uname index 36b755e085..e9e7a60253 100644 --- a/config.mak.uname +++ b/config.mak.uname @@ -325,7 +325,6 @@ ifeq ($(uname_S),Windows) NO_IPV6 = YesPlease NO_UNIX_SOCKETS = YesPlease NO_SETENV = YesPlease - NO_UNSETENV = YesPlease NO_STRCASESTR = YesPlease NO_STRLCPY = YesPlease NO_FNMATCH = YesPlease @@ -483,7 +482,6 @@ ifneq (,$(findstring MINGW,$(uname_S))) NO_SYMLINK_HEAD = YesPlease NO_UNIX_SOCKETS = YesPlease NO_SETENV = YesPlease - NO_UNSETENV = YesPlease NO_STRCASESTR = YesPlease NO_STRLCPY = YesPlease NO_FNMATCH = YesPlease From 15106ae1579888dceca6240cb7b74c6d1100ece6 Mon Sep 17 00:00:00 2001 From: Karsten Blees Date: Sat, 14 Jan 2012 23:42:09 +0100 Subject: [PATCH 28/41] Win32: unify environment case-sensitivity The environment on Windows is case-insensitive. Some environment functions (such as unsetenv and make_augmented_environ) have always used case- sensitive comparisons instead, while others (getenv, putenv, sorting in spawn*) were case-insensitive. Prevent potential inconsistencies by using case-insensitive comparison in lookup_env (used by putenv, unsetenv and make_augmented_environ). Signed-off-by: Karsten Blees --- compat/mingw.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/compat/mingw.c b/compat/mingw.c index b3ff81ecc1..f3aeb0289c 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -1218,8 +1218,7 @@ static int lookup_env(char **env, const char *name, size_t nmln) int i; for (i = 0; env[i]; i++) { - if (0 == strncmp(env[i], name, nmln) - && '=' == env[i][nmln]) + if (!strncasecmp(env[i], name, nmln) && '=' == env[i][nmln]) /* matches */ return i; } From deff26aca2fefbace4869f526c171c98e5205ea2 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Mon, 28 May 2012 21:21:39 -0500 Subject: [PATCH 29/41] Let mingw_execve() return an int This is in the great tradition of POSIX. Original fix by Olivier Refalo. Signed-off-by: Johannes Schindelin --- compat/mingw.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/compat/mingw.c b/compat/mingw.c index f3aeb0289c..cb91416954 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -1131,7 +1131,7 @@ static int try_shell_exec(const char *cmd, char *const *argv, char **env) return pid; } -static void mingw_execve(const char *cmd, char *const *argv, char *const *env) +static int mingw_execve(const char *cmd, char *const *argv, char *const *env) { /* check if git_command is a shell script */ if (!try_shell_exec(cmd, argv, (char **)env)) { @@ -1139,11 +1139,12 @@ static void mingw_execve(const char *cmd, char *const *argv, char *const *env) pid = mingw_spawnve(cmd, (const char **)argv, (char **)env, 0); if (pid < 0) - return; + return -1; if (waitpid(pid, &status, 0) < 0) status = 255; exit(status); } + return -1; } int mingw_execvp(const char *cmd, char *const *argv) From cef8aa8c83e28d7af90563a0a8d46d4d85be1e0c Mon Sep 17 00:00:00 2001 From: Karsten Blees Date: Fri, 25 Nov 2011 21:33:17 +0100 Subject: [PATCH 30/41] Win32: simplify internal mingw_spawn* APIs The only public spawn function that needs to tweak the environment is mingw_spawnvpe (called from start_command). Nevertheless, all internal spawn* functions take an env parameter and needlessly pass the global char **environ around. Remove the env parameter where it's not needed. This removes the internal mingw_execve abstraction, which is no longer needed. Signed-off-by: Karsten Blees --- compat/mingw.c | 23 ++++++++--------------- 1 file changed, 8 insertions(+), 15 deletions(-) diff --git a/compat/mingw.c b/compat/mingw.c index cb91416954..cc36982253 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -1053,10 +1053,9 @@ static pid_t mingw_spawnve_fd(const char *cmd, const char **argv, char **env, return (pid_t)pi.dwProcessId; } -static pid_t mingw_spawnve(const char *cmd, const char **argv, char **env, - int prepend_cmd) +static pid_t mingw_spawnv(const char *cmd, const char **argv, int prepend_cmd) { - return mingw_spawnve_fd(cmd, argv, env, NULL, prepend_cmd, 0, 1, 2); + return mingw_spawnve_fd(cmd, argv, environ, NULL, prepend_cmd, 0, 1, 2); } pid_t mingw_spawnvpe(const char *cmd, const char **argv, char **env, @@ -1098,7 +1097,7 @@ pid_t mingw_spawnvpe(const char *cmd, const char **argv, char **env, return pid; } -static int try_shell_exec(const char *cmd, char *const *argv, char **env) +static int try_shell_exec(const char *cmd, char *const *argv) { const char *interpr = parse_interpreter(cmd); char **path; @@ -1116,7 +1115,7 @@ static int try_shell_exec(const char *cmd, char *const *argv, char **env) argv2 = xmalloc(sizeof(*argv) * (argc+1)); argv2[0] = (char *)cmd; /* full path to the script file */ memcpy(&argv2[1], &argv[1], sizeof(*argv) * argc); - pid = mingw_spawnve(prog, argv2, env, 1); + pid = mingw_spawnv(prog, argv2, 1); if (pid >= 0) { int status; if (waitpid(pid, &status, 0) < 0) @@ -1131,13 +1130,13 @@ static int try_shell_exec(const char *cmd, char *const *argv, char **env) return pid; } -static int mingw_execve(const char *cmd, char *const *argv, char *const *env) +int mingw_execv(const char *cmd, char *const *argv) { /* check if git_command is a shell script */ - if (!try_shell_exec(cmd, argv, (char **)env)) { + if (!try_shell_exec(cmd, argv)) { int pid, status; - pid = mingw_spawnve(cmd, (const char **)argv, (char **)env, 0); + pid = mingw_spawnv(cmd, (const char **)argv, 0); if (pid < 0) return -1; if (waitpid(pid, &status, 0) < 0) @@ -1153,7 +1152,7 @@ int mingw_execvp(const char *cmd, char *const *argv) char *prog = path_lookup(cmd, path, 0); if (prog) { - mingw_execve(prog, argv, environ); + mingw_execv(prog, argv); free(prog); } else errno = ENOENT; @@ -1162,12 +1161,6 @@ int mingw_execvp(const char *cmd, char *const *argv) return -1; } -int mingw_execv(const char *cmd, char *const *argv) -{ - mingw_execve(cmd, argv, environ); - return -1; -} - int mingw_kill(pid_t pid, int sig) { if (pid > 0 && sig == SIGTERM) { From d4b590d83244b1dc5bef824314db68829dfc5f92 Mon Sep 17 00:00:00 2001 From: Karsten Blees Date: Sun, 15 Jan 2012 00:05:04 +0100 Subject: [PATCH 31/41] Win32: move environment functions Move environment helper functions up so that they can be reused by mingw_getenv and mingw_spawnve_fd in subsequent patches. Signed-off-by: Karsten Blees --- compat/mingw.c | 106 ++++++++++++++++++++++++------------------------- 1 file changed, 53 insertions(+), 53 deletions(-) diff --git a/compat/mingw.c b/compat/mingw.c index cc36982253..17345df68c 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -713,6 +713,53 @@ char *mingw_getcwd(char *pointer, int len) return pointer; } +static int env_compare(const void *a, const void *b) +{ + char *const *ea = a; + char *const *eb = b; + return strcasecmp(*ea, *eb); +} + +static int lookup_env(char **env, const char *name, size_t nmln) +{ + int i; + + for (i = 0; env[i]; i++) { + if (!strncasecmp(env[i], name, nmln) && '=' == env[i][nmln]) + /* matches */ + return i; + } + return -1; +} + +/* + * If name contains '=', then sets the variable, otherwise it unsets it + */ +static char **env_setenv(char **env, const char *name) +{ + char *eq = strchrnul(name, '='); + int i = lookup_env(env, name, eq-name); + + if (i < 0) { + if (*eq) { + for (i = 0; env[i]; i++) + ; + env = xrealloc(env, (i+2)*sizeof(*env)); + env[i] = (char*) name; + env[i+1] = NULL; + } + } + else { + free(env[i]); + if (*eq) + env[i] = (char*) name; + else + for (; env[i]; i++) + env[i] = env[i+1]; + } + return env; +} + #undef getenv char *mingw_getenv(const char *name) { @@ -730,6 +777,12 @@ char *mingw_getenv(const char *name) return result; } +int mingw_putenv(const char *namevalue) +{ + environ = env_setenv(environ, namevalue); + return 0; +} + /* * See http://msdn2.microsoft.com/en-us/library/17w5ykft(vs.71).aspx * (Parsing C++ Command-Line Arguments) @@ -912,13 +965,6 @@ static char *path_lookup(const char *cmd, char **path, int exe_only) return prog; } -static int env_compare(const void *a, const void *b) -{ - char *const *ea = a; - char *const *eb = b; - return strcasecmp(*ea, *eb); -} - struct pinfo_t { struct pinfo_t *next; pid_t pid; @@ -1207,46 +1253,6 @@ void free_environ(char **env) free(env); } -static int lookup_env(char **env, const char *name, size_t nmln) -{ - int i; - - for (i = 0; env[i]; i++) { - if (!strncasecmp(env[i], name, nmln) && '=' == env[i][nmln]) - /* matches */ - return i; - } - return -1; -} - -/* - * If name contains '=', then sets the variable, otherwise it unsets it - */ -static char **env_setenv(char **env, const char *name) -{ - char *eq = strchrnul(name, '='); - int i = lookup_env(env, name, eq-name); - - if (i < 0) { - if (*eq) { - for (i = 0; env[i]; i++) - ; - env = xrealloc(env, (i+2)*sizeof(*env)); - env[i] = (char*) name; - env[i+1] = NULL; - } - } - else { - free(env[i]); - if (*eq) - env[i] = (char*) name; - else - for (; env[i]; i++) - env[i] = env[i+1]; - } - return env; -} - /* * Copies global environ and adjusts variables as specified by vars. */ @@ -1261,12 +1267,6 @@ char **make_augmented_environ(const char *const *vars) return env; } -int mingw_putenv(const char *namevalue) -{ - environ = env_setenv(environ, namevalue); - return 0; -} - /* * Note, this isn't a complete replacement for getaddrinfo. It assumes * that service contains a numerical port, or that it is null. It From fa7d1b61c7613ad7cf41b1395a2b7bd03e18c010 Mon Sep 17 00:00:00 2001 From: Karsten Blees Date: Wed, 5 Oct 2011 22:01:46 +0200 Subject: [PATCH 32/41] Win32: unify environment function names Environment helper functions use random naming ('env' prefix or suffix or both, with or without '_'). Change to POSIX naming scheme ('env' suffix, no '_'). Env_setenv has more in common with putenv than setenv. Change to do_putenv. Signed-off-by: Karsten Blees --- compat/mingw.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/compat/mingw.c b/compat/mingw.c index 17345df68c..0a4278ef95 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -713,14 +713,14 @@ char *mingw_getcwd(char *pointer, int len) return pointer; } -static int env_compare(const void *a, const void *b) +static int compareenv(const void *a, const void *b) { char *const *ea = a; char *const *eb = b; return strcasecmp(*ea, *eb); } -static int lookup_env(char **env, const char *name, size_t nmln) +static int lookupenv(char **env, const char *name, size_t nmln) { int i; @@ -735,10 +735,10 @@ static int lookup_env(char **env, const char *name, size_t nmln) /* * If name contains '=', then sets the variable, otherwise it unsets it */ -static char **env_setenv(char **env, const char *name) +static char **do_putenv(char **env, const char *name) { char *eq = strchrnul(name, '='); - int i = lookup_env(env, name, eq-name); + int i = lookupenv(env, name, eq-name); if (i < 0) { if (*eq) { @@ -779,7 +779,7 @@ char *mingw_getenv(const char *name) int mingw_putenv(const char *namevalue) { - environ = env_setenv(environ, namevalue); + environ = do_putenv(environ, namevalue); return 0; } @@ -1052,7 +1052,7 @@ static pid_t mingw_spawnve_fd(const char *cmd, const char **argv, char **env, /* environment must be sorted */ sorted_env = xmalloc(sizeof(*sorted_env) * (count + 1)); memcpy(sorted_env, env, sizeof(*sorted_env) * (count + 1)); - qsort(sorted_env, count, sizeof(*sorted_env), env_compare); + qsort(sorted_env, count, sizeof(*sorted_env), compareenv); /* create environment block from temporary environment */ for (e = sorted_env; *e; e++) { @@ -1262,7 +1262,7 @@ char **make_augmented_environ(const char *const *vars) while (*vars) { const char *v = *vars++; - env = env_setenv(env, strchr(v, '=') ? xstrdup(v) : v); + env = do_putenv(env, strchr(v, '=') ? xstrdup(v) : v); } return env; } From 504cb44463ac04f395f3b4481d9503c8a2ae11f6 Mon Sep 17 00:00:00 2001 From: Karsten Blees Date: Sun, 15 Jan 2012 00:31:57 +0100 Subject: [PATCH 33/41] Win32: factor out environment block creation Signed-off-by: Karsten Blees --- compat/mingw.c | 55 +++++++++++++++++++++++++++++--------------------- 1 file changed, 32 insertions(+), 23 deletions(-) diff --git a/compat/mingw.c b/compat/mingw.c index 0a4278ef95..dfe1485860 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -965,6 +965,36 @@ static char *path_lookup(const char *cmd, char **path, int exe_only) return prog; } +/* + * Create environment block suitable for CreateProcess. + */ +static wchar_t *make_environment_block(char **env) +{ + wchar_t *wenvblk = NULL; + int count = 0; + char **e, **tmpenv; + int size = 0, wenvsz = 0, wenvpos = 0; + + for (e = env; *e; e++) + count++; + + /* environment must be sorted */ + tmpenv = xmalloc(sizeof(*tmpenv) * (count + 1)); + memcpy(tmpenv, env, sizeof(*tmpenv) * (count + 1)); + qsort(tmpenv, count, sizeof(*tmpenv), compareenv); + + /* create environment block from temporary environment */ + for (e = tmpenv; *e; e++) { + size = 2 * strlen(*e) + 2; /* +2 for final \0 */ + ALLOC_GROW(wenvblk, (wenvpos + size) * sizeof(wchar_t), wenvsz); + wenvpos += xutftowcs(&wenvblk[wenvpos], *e, size) + 1; + } + /* add final \0 terminator */ + wenvblk[wenvpos] = 0; + free(tmpenv); + return wenvblk; +} + struct pinfo_t { struct pinfo_t *next; pid_t pid; @@ -1041,29 +1071,8 @@ static pid_t mingw_spawnve_fd(const char *cmd, const char **argv, char **env, xutftowcs(wargs, args.buf, 2 * args.len + 1); strbuf_release(&args); - if (env) { - int count = 0; - char **e, **sorted_env; - int size = 0, wenvsz = 0, wenvpos = 0; - - for (e = env; *e; e++) - count++; - - /* environment must be sorted */ - sorted_env = xmalloc(sizeof(*sorted_env) * (count + 1)); - memcpy(sorted_env, env, sizeof(*sorted_env) * (count + 1)); - qsort(sorted_env, count, sizeof(*sorted_env), compareenv); - - /* create environment block from temporary environment */ - for (e = sorted_env; *e; e++) { - size = 2 * strlen(*e) + 2; /* +2 for final \0 */ - ALLOC_GROW(wenvblk, (wenvpos + size) * sizeof(wchar_t), wenvsz); - wenvpos += xutftowcs(&wenvblk[wenvpos], *e, size) + 1; - } - /* add final \0 terminator */ - wenvblk[wenvpos] = 0; - free(sorted_env); - } + if (env) + wenvblk = make_environment_block(env); memset(&pi, 0, sizeof(pi)); ret = CreateProcessW(wcmd, wargs, NULL, NULL, TRUE, flags, From 81dc4ed8e946e8c55c623c2788563ee2d2634c4f Mon Sep 17 00:00:00 2001 From: Karsten Blees Date: Sun, 15 Jan 2012 00:57:14 +0100 Subject: [PATCH 34/41] Win32: don't copy the environment twice when spawning child processes When spawning child processes via start_command(), the environment and all environment entries are copied twice. First by make_augmented_environ / copy_environ to merge with child_process.env. Then a second time by make_environment_block to create a sorted environment block string as required by CreateProcess. Move the merge logic to make_environment_block so that we only need to copy the environment once. This changes semantics of the env parameter: it now expects a delta (such as child_process.env) rather than a full environment. This is not a problem as the parameter is only used by start_command() (all other callers previously passed char **environ, and now pass NULL). The merge logic no longer xstrdup()s the environment strings, so do_putenv must not free them. Add a parameter to distinguish this from normal putenv. Remove the now unused make_augmented_environ / free_environ API. Signed-off-by: Karsten Blees --- compat/mingw.c | 74 ++++++++++++++++---------------------------------- compat/mingw.h | 8 ++---- run-command.c | 10 ++----- 3 files changed, 28 insertions(+), 64 deletions(-) diff --git a/compat/mingw.c b/compat/mingw.c index dfe1485860..2b8d9b4da9 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -735,7 +735,7 @@ static int lookupenv(char **env, const char *name, size_t nmln) /* * If name contains '=', then sets the variable, otherwise it unsets it */ -static char **do_putenv(char **env, const char *name) +static char **do_putenv(char **env, const char *name, int free_old) { char *eq = strchrnul(name, '='); int i = lookupenv(env, name, eq-name); @@ -750,7 +750,8 @@ static char **do_putenv(char **env, const char *name) } } else { - free(env[i]); + if (free_old) + free(env[i]); if (*eq) env[i] = (char*) name; else @@ -779,7 +780,7 @@ char *mingw_getenv(const char *name) int mingw_putenv(const char *namevalue) { - environ = do_putenv(environ, namevalue); + environ = do_putenv(environ, namevalue, 1); return 0; } @@ -966,21 +967,30 @@ static char *path_lookup(const char *cmd, char **path, int exe_only) } /* - * Create environment block suitable for CreateProcess. + * Create environment block suitable for CreateProcess. Merges current + * process environment and the supplied environment changes. */ -static wchar_t *make_environment_block(char **env) +static wchar_t *make_environment_block(char **deltaenv) { wchar_t *wenvblk = NULL; int count = 0; char **e, **tmpenv; int size = 0, wenvsz = 0, wenvpos = 0; - for (e = env; *e; e++) + while (environ[count]) count++; - /* environment must be sorted */ + /* copy the environment */ tmpenv = xmalloc(sizeof(*tmpenv) * (count + 1)); - memcpy(tmpenv, env, sizeof(*tmpenv) * (count + 1)); + memcpy(tmpenv, environ, sizeof(*tmpenv) * (count + 1)); + + /* merge supplied environment changes into the temporary environment */ + for (e = deltaenv; e && *e; e++) + tmpenv = do_putenv(tmpenv, *e, 0); + + /* environment must be sorted */ + for (count = 0; tmpenv[count]; ) + count++; qsort(tmpenv, count, sizeof(*tmpenv), compareenv); /* create environment block from temporary environment */ @@ -1003,7 +1013,7 @@ struct pinfo_t { static struct pinfo_t *pinfo = NULL; CRITICAL_SECTION pinfo_cs; -static pid_t mingw_spawnve_fd(const char *cmd, const char **argv, char **env, +static pid_t mingw_spawnve_fd(const char *cmd, const char **argv, char **deltaenv, const char *dir, int prepend_cmd, int fhin, int fhout, int fherr) { @@ -1071,8 +1081,7 @@ static pid_t mingw_spawnve_fd(const char *cmd, const char **argv, char **env, xutftowcs(wargs, args.buf, 2 * args.len + 1); strbuf_release(&args); - if (env) - wenvblk = make_environment_block(env); + wenvblk = make_environment_block(deltaenv); memset(&pi, 0, sizeof(pi)); ret = CreateProcessW(wcmd, wargs, NULL, NULL, TRUE, flags, @@ -1110,10 +1119,10 @@ static pid_t mingw_spawnve_fd(const char *cmd, const char **argv, char **env, static pid_t mingw_spawnv(const char *cmd, const char **argv, int prepend_cmd) { - return mingw_spawnve_fd(cmd, argv, environ, NULL, prepend_cmd, 0, 1, 2); + return mingw_spawnve_fd(cmd, argv, NULL, NULL, prepend_cmd, 0, 1, 2); } -pid_t mingw_spawnvpe(const char *cmd, const char **argv, char **env, +pid_t mingw_spawnvpe(const char *cmd, const char **argv, char **deltaenv, const char *dir, int fhin, int fhout, int fherr) { @@ -1137,14 +1146,14 @@ pid_t mingw_spawnvpe(const char *cmd, const char **argv, char **env, pid = -1; } else { - pid = mingw_spawnve_fd(iprog, argv, env, dir, 1, + pid = mingw_spawnve_fd(iprog, argv, deltaenv, dir, 1, fhin, fhout, fherr); free(iprog); } argv[0] = argv0; } else - pid = mingw_spawnve_fd(prog, argv, env, dir, 0, + pid = mingw_spawnve_fd(prog, argv, deltaenv, dir, 0, fhin, fhout, fherr); free(prog); } @@ -1241,41 +1250,6 @@ int mingw_kill(pid_t pid, int sig) return -1; } -static char **copy_environ(void) -{ - char **env; - int i = 0; - while (environ[i]) - i++; - env = xmalloc((i+1)*sizeof(*env)); - for (i = 0; environ[i]; i++) - env[i] = xstrdup(environ[i]); - env[i] = NULL; - return env; -} - -void free_environ(char **env) -{ - int i; - for (i = 0; env[i]; i++) - free(env[i]); - free(env); -} - -/* - * Copies global environ and adjusts variables as specified by vars. - */ -char **make_augmented_environ(const char *const *vars) -{ - char **env = copy_environ(); - - while (*vars) { - const char *v = *vars++; - env = do_putenv(env, strchr(v, '=') ? xstrdup(v) : v); - } - return env; -} - /* * Note, this isn't a complete replacement for getaddrinfo. It assumes * that service contains a numerical port, or that it is null. It diff --git a/compat/mingw.h b/compat/mingw.h index 9f3e17a1d6..56f8eea423 100644 --- a/compat/mingw.h +++ b/compat/mingw.h @@ -357,12 +357,8 @@ static inline char *mingw_find_last_dir_sep(const char *path) void mingw_open_html(const char *path); #define open_html mingw_open_html -/* - * helpers - */ - -char **make_augmented_environ(const char *const *vars); -void free_environ(char **env); +void mingw_mark_as_git_dir(const char *dir); +#define mark_as_git_dir mingw_mark_as_git_dir /** * Converts UTF-8 encoded string to UTF-16LE. diff --git a/run-command.c b/run-command.c index 75abc478c6..f2626f5ba3 100644 --- a/run-command.c +++ b/run-command.c @@ -450,7 +450,6 @@ fail_pipe: { int fhin = 0, fhout = 1, fherr = 2; const char **sargv = cmd->argv; - char **env = environ; if (cmd->no_stdin) fhin = open("/dev/null", O_RDWR); @@ -475,24 +474,19 @@ fail_pipe: else if (cmd->out > 1) fhout = dup(cmd->out); - if (cmd->env) - env = make_augmented_environ(cmd->env); - if (cmd->git_cmd) cmd->argv = prepare_git_cmd(cmd->argv); else if (cmd->use_shell) cmd->argv = prepare_shell_cmd(cmd->argv); - cmd->pid = mingw_spawnvpe(cmd->argv[0], cmd->argv, env, cmd->dir, - fhin, fhout, fherr); + cmd->pid = mingw_spawnvpe(cmd->argv[0], cmd->argv, (char**) cmd->env, + cmd->dir, fhin, fhout, fherr); failed_errno = errno; if (cmd->pid < 0 && (!cmd->silent_exec_failure || errno != ENOENT)) error("cannot spawn %s: %s", cmd->argv[0], strerror(errno)); if (cmd->clean_on_exit && cmd->pid >= 0) mark_child_for_cleanup(cmd->pid); - if (cmd->env) - free_environ(env); if (cmd->git_cmd) free(cmd->argv); From 188b824cb9b9ca78966b2a528c9ecef4c3d68bfd Mon Sep 17 00:00:00 2001 From: Karsten Blees Date: Mon, 16 Jan 2012 00:00:35 +0100 Subject: [PATCH 35/41] Win32: reduce environment array reallocations Move environment array reallocation from do_putenv to the respective callers. Keep track of the environment size in a global variable. Use ALLOC_GROW in mingw_putenv to reduce reallocations. Allocate a sufficiently sized environment array in make_environment_block to prevent reallocations. Signed-off-by: Karsten Blees --- compat/mingw.c | 60 ++++++++++++++++++++++++++++---------------------- 1 file changed, 34 insertions(+), 26 deletions(-) diff --git a/compat/mingw.c b/compat/mingw.c index 2b8d9b4da9..201b355e25 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -734,19 +734,19 @@ static int lookupenv(char **env, const char *name, size_t nmln) /* * If name contains '=', then sets the variable, otherwise it unsets it + * Size includes the terminating NULL. Env must have room for size + 1 entries + * (in case of insert). Returns the new size. Optionally frees removed entries. */ -static char **do_putenv(char **env, const char *name, int free_old) +static int do_putenv(char **env, const char *name, int size, int free_old) { char *eq = strchrnul(name, '='); int i = lookupenv(env, name, eq-name); if (i < 0) { if (*eq) { - for (i = 0; env[i]; i++) - ; - env = xrealloc(env, (i+2)*sizeof(*env)); - env[i] = (char*) name; - env[i+1] = NULL; + env[size - 1] = (char*) name; + env[size] = NULL; + size++; } } else { @@ -754,13 +754,20 @@ static char **do_putenv(char **env, const char *name, int free_old) free(env[i]); if (*eq) env[i] = (char*) name; - else + else { for (; env[i]; i++) env[i] = env[i+1]; + size--; + } } - return env; + return size; } +/* used number of elements of environ array, including terminating NULL */ +static int environ_size = 0; +/* allocated size of environ array, in bytes */ +static int environ_alloc = 0; + #undef getenv char *mingw_getenv(const char *name) { @@ -780,7 +787,8 @@ char *mingw_getenv(const char *name) int mingw_putenv(const char *namevalue) { - environ = do_putenv(environ, namevalue, 1); + ALLOC_GROW(environ, (environ_size + 1) * sizeof(char*), environ_alloc); + environ_size = do_putenv(environ, namevalue, environ_size, 1); return 0; } @@ -973,31 +981,28 @@ static char *path_lookup(const char *cmd, char **path, int exe_only) static wchar_t *make_environment_block(char **deltaenv) { wchar_t *wenvblk = NULL; - int count = 0; - char **e, **tmpenv; - int size = 0, wenvsz = 0, wenvpos = 0; + char **tmpenv; + int i = 0, size = environ_size, wenvsz = 0, wenvpos = 0; - while (environ[count]) - count++; + while (deltaenv && deltaenv[i]) + i++; - /* copy the environment */ - tmpenv = xmalloc(sizeof(*tmpenv) * (count + 1)); - memcpy(tmpenv, environ, sizeof(*tmpenv) * (count + 1)); + /* copy the environment, leaving space for changes */ + tmpenv = xmalloc((size + i) * sizeof(char*)); + memcpy(tmpenv, environ, size * sizeof(char*)); /* merge supplied environment changes into the temporary environment */ - for (e = deltaenv; e && *e; e++) - tmpenv = do_putenv(tmpenv, *e, 0); + for (i = 0; deltaenv && deltaenv[i]; i++) + size = do_putenv(tmpenv, deltaenv[i], size, 0); /* environment must be sorted */ - for (count = 0; tmpenv[count]; ) - count++; - qsort(tmpenv, count, sizeof(*tmpenv), compareenv); + qsort(tmpenv, size - 1, sizeof(char*), compareenv); /* create environment block from temporary environment */ - for (e = tmpenv; *e; e++) { - size = 2 * strlen(*e) + 2; /* +2 for final \0 */ + for (i = 0; tmpenv[i]; i++) { + size = 2 * strlen(tmpenv[i]) + 2; /* +2 for final \0 */ ALLOC_GROW(wenvblk, (wenvpos + size) * sizeof(wchar_t), wenvsz); - wenvpos += xutftowcs(&wenvblk[wenvpos], *e, size) + 1; + wenvpos += xutftowcs(&wenvblk[wenvpos], tmpenv[i], size) + 1; } /* add final \0 terminator */ wenvblk[wenvpos] = 0; @@ -2015,7 +2020,9 @@ void mingw_startup() maxlen = max(maxlen, wcslen(wenv[i])); /* nedmalloc can't free CRT memory, allocate resizable environment list */ - environ = xcalloc(i + 1, sizeof(char*)); + environ = NULL; + environ_size = i + 1; + ALLOC_GROW(environ, environ_size * sizeof(char*), environ_alloc); /* allocate buffer (wchar_t encodes to max 3 UTF-8 bytes) */ maxlen = 3 * maxlen + 1; @@ -2032,6 +2039,7 @@ void mingw_startup() len = xwcstoutf(buffer, wenv[i], maxlen); environ[i] = xmemdupz(buffer, len); } + environ[i] = NULL; free(buffer); /* initialize critical section for waitpid pinfo_t list */ From 260fee8d9650e0b1f8c96a57b64505ef4a090c36 Mon Sep 17 00:00:00 2001 From: Karsten Blees Date: Thu, 15 Mar 2012 20:29:04 +0100 Subject: [PATCH 36/41] Win32: use low-level memory allocation during initialization As of d41489a6 "Add more large blob test cases", git's high-level memory allocation functions (xmalloc, xmemdupz etc.) access the environment to simulate limited memory in tests (see 'getenv("GIT_ALLOC_LIMIT")' in memory_limit_check()). These functions should not be used before the environment is fully initialized (particularly not to initialize the environment itself). The current solution ('environ = NULL; ALLOC_GROW(environ...)') only works because MSVCRT's getenv() reinitializes environ when it is NULL (i.e. it leaves us with two sets of unusabe (non-UTF-8) and unfreeable (CRT- allocated) environments). Add our own set of malloc-or-die functions to be used in startup code. Also check the result of __wgetmainargs, which may fail if there's not enough memory for wide-char arguments and environment. This patch is in preparation of the sorted environment feature, which completely replaces MSVCRT's getenv() implementation. Signed-off-by: Karsten Blees --- compat/mingw.c | 52 ++++++++++++++++++++++++++++++++++---------------- 1 file changed, 36 insertions(+), 16 deletions(-) diff --git a/compat/mingw.c b/compat/mingw.c index 201b355e25..828da5d70f 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -2001,16 +2001,37 @@ typedef struct { extern int __wgetmainargs(int *argc, wchar_t ***argv, wchar_t ***env, int glob, _startupinfo *si); +static NORETURN void die_startup() +{ + fputs("fatal: not enough memory for initialization", stderr); + exit(128); +} + +static void *malloc_startup(size_t size) +{ + void *result = malloc(size); + if (!result) + die_startup(); + return result; +} + +static char *wcstoutfdup_startup(char *buffer, const wchar_t *wcs, size_t len) +{ + len = xwcstoutf(buffer, wcs, len) + 1; + return memcpy(malloc_startup(len), buffer, len); +} + void mingw_startup() { - int i, len, maxlen, argc; + int i, maxlen, argc; char *buffer; wchar_t **wenv, **wargv; _startupinfo si; /* get wide char arguments and environment */ si.newmode = 0; - __wgetmainargs(&argc, &wargv, &wenv, _CRT_glob, &si); + if (__wgetmainargs(&argc, &wargv, &wenv, _CRT_glob, &si) < 0) + die_startup(); /* determine size of argv and environ conversion buffer */ maxlen = wcslen(_wpgmptr); @@ -2019,26 +2040,25 @@ void mingw_startup() for (i = 0; wenv[i]; i++) maxlen = max(maxlen, wcslen(wenv[i])); - /* nedmalloc can't free CRT memory, allocate resizable environment list */ - environ = NULL; + /* + * nedmalloc can't free CRT memory, allocate resizable environment + * list. Note that xmalloc / xmemdupz etc. call getenv, so we cannot + * use it while initializing the environment itself. + */ environ_size = i + 1; - ALLOC_GROW(environ, environ_size * sizeof(char*), environ_alloc); + environ_alloc = alloc_nr(environ_size * sizeof(char*)); + environ = malloc_startup(environ_alloc); /* allocate buffer (wchar_t encodes to max 3 UTF-8 bytes) */ maxlen = 3 * maxlen + 1; - buffer = xmalloc(maxlen); + buffer = malloc_startup(maxlen); /* convert command line arguments and environment to UTF-8 */ - len = xwcstoutf(buffer, _wpgmptr, maxlen); - __argv[0] = xmemdupz(buffer, len); - for (i = 1; i < argc; i++) { - len = xwcstoutf(buffer, wargv[i], maxlen); - __argv[i] = xmemdupz(buffer, len); - } - for (i = 0; wenv[i]; i++) { - len = xwcstoutf(buffer, wenv[i], maxlen); - environ[i] = xmemdupz(buffer, len); - } + __argv[0] = wcstoutfdup_startup(buffer, _wpgmptr, maxlen); + for (i = 1; i < argc; i++) + __argv[i] = wcstoutfdup_startup(buffer, wargv[i], maxlen); + for (i = 0; wenv[i]; i++) + environ[i] = wcstoutfdup_startup(buffer, wenv[i], maxlen); environ[i] = NULL; free(buffer); From 71def2147530b89667844d370cec0decf25a873d Mon Sep 17 00:00:00 2001 From: Karsten Blees Date: Sun, 15 Jan 2012 02:30:02 +0100 Subject: [PATCH 37/41] Win32: keep the environment sorted The Windows environment is sorted, keep it that way for O(log n) environment access. Change compareenv to compare only the keys, so that it can be used to find an entry irrespective of the value. Change lookupenv to binary seach for an entry. Return one's complement of the insert position if not found (libc's bsearch returns NULL). Replace MSVCRT's getenv with a minimal do_getenv based on the binary search function. Change do_putenv to insert new entries at the correct position. Simplify the function by swapping if conditions and using memmove instead of for loops. Move qsort from make_environment_block to mingw_startup. We still need to sort on startup to make sure that the environment is sorted according to our compareenv function (while Win32 / CreateProcess requires the environment block to be sorted case-insensitively, CreateProcess currently doesn't enforce this, and some applications such as bash just don't care). Note that environment functions are _not_ thread-safe and are not required to be so by POSIX, the application is responsible for synchronizing access to the environment. MSVCRT's getenv and our new getenv implementation are better than that in that they are thread-safe with respect to other getenv calls as long as the environment is not modified. Git's indiscriminate use of getenv in background threads currently requires this property. Signed-off-by: Karsten Blees --- compat/mingw.c | 98 +++++++++++++++++++++++++++++++------------------- 1 file changed, 62 insertions(+), 36 deletions(-) diff --git a/compat/mingw.c b/compat/mingw.c index 828da5d70f..7a275729e6 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -713,23 +713,42 @@ char *mingw_getcwd(char *pointer, int len) return pointer; } -static int compareenv(const void *a, const void *b) +/* + * Compare environment entries by key (i.e. stopping at '=' or '\0'). + */ +static int compareenv(const void *v1, const void *v2) { - char *const *ea = a; - char *const *eb = b; - return strcasecmp(*ea, *eb); + const char *e1 = *(const char**)v1; + const char *e2 = *(const char**)v2; + + for (;;) { + int c1 = *e1++; + int c2 = *e2++; + c1 = (c1 == '=') ? 0 : tolower(c1); + c2 = (c2 == '=') ? 0 : tolower(c2); + if (c1 > c2) + return 1; + if (c1 < c2) + return -1; + if (c1 == 0) + return 0; + } } -static int lookupenv(char **env, const char *name, size_t nmln) +static int bsearchenv(char **env, const char *name, size_t size) { - int i; - - for (i = 0; env[i]; i++) { - if (!strncasecmp(env[i], name, nmln) && '=' == env[i][nmln]) - /* matches */ - return i; + unsigned low = 0, high = size; + while (low < high) { + unsigned mid = low + ((high - low) >> 1); + int cmp = compareenv(&env[mid], &name); + if (cmp < 0) + low = mid + 1; + else if (cmp > 0) + high = mid; + else + return mid; } - return -1; + return ~low; /* not found, return 1's complement of insert position */ } /* @@ -739,26 +758,24 @@ static int lookupenv(char **env, const char *name, size_t nmln) */ static int do_putenv(char **env, const char *name, int size, int free_old) { - char *eq = strchrnul(name, '='); - int i = lookupenv(env, name, eq-name); + int i = bsearchenv(env, name, size - 1); - if (i < 0) { - if (*eq) { - env[size - 1] = (char*) name; - env[size] = NULL; + /* optionally free removed / replaced entry */ + if (i >= 0 && free_old) + free(env[i]); + + if (strchr(name, '=')) { + /* if new value ('key=value') is specified, insert or replace entry */ + if (i < 0) { + i = ~i; + memmove(&env[i + 1], &env[i], (size - i) * sizeof(char*)); size++; } - } - else { - if (free_old) - free(env[i]); - if (*eq) - env[i] = (char*) name; - else { - for (; env[i]; i++) - env[i] = env[i+1]; - size--; - } + env[i] = (char*) name; + } else if (i >= 0) { + /* otherwise ('key') remove existing entry */ + size--; + memmove(&env[i], &env[i + 1], (size - i) * sizeof(char*)); } return size; } @@ -768,15 +785,24 @@ static int environ_size = 0; /* allocated size of environ array, in bytes */ static int environ_alloc = 0; -#undef getenv +static char *do_getenv(const char *name) +{ + char *value; + int pos = bsearchenv(environ, name, environ_size - 1); + if (pos < 0) + return NULL; + value = strchr(environ[pos], '='); + return value ? &value[1] : NULL; +} + char *mingw_getenv(const char *name) { - char *result = getenv(name); + char *result = do_getenv(name); if (!result && !strcmp(name, "TMPDIR")) { /* on Windows it is TMP and TEMP */ - result = getenv("TMP"); + result = do_getenv("TMP"); if (!result) - result = getenv("TEMP"); + result = do_getenv("TEMP"); } else if (!result && !strcmp(name, "TERM")) { /* simulate TERM to enable auto-color (see color.c) */ @@ -995,9 +1021,6 @@ static wchar_t *make_environment_block(char **deltaenv) for (i = 0; deltaenv && deltaenv[i]; i++) size = do_putenv(tmpenv, deltaenv[i], size, 0); - /* environment must be sorted */ - qsort(tmpenv, size - 1, sizeof(char*), compareenv); - /* create environment block from temporary environment */ for (i = 0; tmpenv[i]; i++) { size = 2 * strlen(tmpenv[i]) + 2; /* +2 for final \0 */ @@ -2062,6 +2085,9 @@ void mingw_startup() environ[i] = NULL; free(buffer); + /* sort environment for O(log n) getenv / putenv */ + qsort(environ, i, sizeof(char*), compareenv); + /* initialize critical section for waitpid pinfo_t list */ InitializeCriticalSection(&pinfo_cs); From 71a6de371920bbd3d38b4236c414d71d6f910feb Mon Sep 17 00:00:00 2001 From: Karsten Blees Date: Sun, 15 Jan 2012 02:35:26 +0100 Subject: [PATCH 38/41] Win32: patch Windows environment on startup Fix Windows specific environment settings on startup rather than checking for special values on every getenv call. As a side effect, this makes the patched environment (i.e. with properly initialized TMPDIR and TERM) available to child processes. Signed-off-by: Karsten Blees --- compat/mingw.c | 33 ++++++++++++++++----------------- 1 file changed, 16 insertions(+), 17 deletions(-) diff --git a/compat/mingw.c b/compat/mingw.c index 7a275729e6..b8bad7baf3 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -785,7 +785,7 @@ static int environ_size = 0; /* allocated size of environ array, in bytes */ static int environ_alloc = 0; -static char *do_getenv(const char *name) +char *mingw_getenv(const char *name) { char *value; int pos = bsearchenv(environ, name, environ_size - 1); @@ -795,22 +795,6 @@ static char *do_getenv(const char *name) return value ? &value[1] : NULL; } -char *mingw_getenv(const char *name) -{ - char *result = do_getenv(name); - if (!result && !strcmp(name, "TMPDIR")) { - /* on Windows it is TMP and TEMP */ - result = do_getenv("TMP"); - if (!result) - result = do_getenv("TEMP"); - } - else if (!result && !strcmp(name, "TERM")) { - /* simulate TERM to enable auto-color (see color.c) */ - result = "winansi"; - } - return result; -} - int mingw_putenv(const char *namevalue) { ALLOC_GROW(environ, (environ_size + 1) * sizeof(char*), environ_alloc); @@ -2088,6 +2072,21 @@ void mingw_startup() /* sort environment for O(log n) getenv / putenv */ qsort(environ, i, sizeof(char*), compareenv); + /* fix Windows specific environment settings */ + + /* on Windows it is TMP and TEMP */ + if (!getenv("TMPDIR")) { + const char *tmp = getenv("TMP"); + if (!tmp) + tmp = getenv("TEMP"); + if (tmp) + setenv("TMPDIR", tmp, 1); + } + + /* simulate TERM to enable auto-color (see color.c) */ + if (!getenv("TERM")) + setenv("TERM", "winansi", 1); + /* initialize critical section for waitpid pinfo_t list */ InitializeCriticalSection(&pinfo_cs); From 6202932aad1c7cb5abcbab03f76283d703b1437b Mon Sep 17 00:00:00 2001 From: Karsten Blees Date: Thu, 15 Mar 2012 20:37:26 +0100 Subject: [PATCH 39/41] Win32: fix detection of empty directories in is_dir_empty On Windows XP (not Win7), directories cannot be deleted while a find handle is open, causing "Deletion of directory '...' failed. Should I try again?" prompts. Prior to 19d1e75d "Win32: Unicode file name support (except dirent)", these failures were silently ignored due to strbuf_free in is_dir_empty resetting GetLastError to ERROR_SUCCESS. Close the find handle in is_dir_empty so that git doesn't block deletion of the directory even after all other applications have released it. Reported-by: John Chen Signed-off-by: Karsten Blees --- compat/mingw.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/compat/mingw.c b/compat/mingw.c index b8bad7baf3..79bac118ba 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -241,8 +241,11 @@ static int is_dir_empty(const wchar_t *wpath) while (!wcscmp(findbuf.cFileName, L".") || !wcscmp(findbuf.cFileName, L"..")) - if (!FindNextFileW(handle, &findbuf)) - return GetLastError() == ERROR_NO_MORE_FILES; + if (!FindNextFileW(handle, &findbuf)) { + DWORD err = GetLastError(); + FindClose(handle); + return err == ERROR_NO_MORE_FILES; + } FindClose(handle); return 0; } From 17fc483f21e269d3ea74957355b22c86fce30345 Mon Sep 17 00:00:00 2001 From: Karsten Blees Date: Thu, 1 Mar 2012 21:53:54 +0100 Subject: [PATCH 40/41] Win32: fix broken pipe detection As of "Win32: Thread-safe windows console output", git-log no longer terminates when the pager process dies. This is due to disabling buffering for the replaced stdout / stderr streams. Git-log will periodically fflush stdout (see write_or_die.c/mayble_flush_or_die()), but with no buffering, this is a NOP that always succeeds (so we never detect the EPIPE error). Exchange the original console handles with our console thread pipe handles by accessing the internal MSVCRT data structures directly (which are exposed via __pioinfo for some reason). Implement this with minimal assumptions about the actual data structure to make it work with different (hopefully even future) MSVCRT versions. While messing with internal data structures is ugly, this patch solves the problem at the source instead of adding more workarounds. We no longer need the special winansi_isatty override, and the limitations documented in "Win32: Thread-safe windows console output" are gone (i.e. fdopen(1/2) returns unbuffered streams now, and isatty() for duped console file descriptors works as expected). Signed-off-by: Karsten Blees --- compat/mingw.h | 2 - compat/winansi.c | 112 +++++++++++++++++++++++++++++------------------ 2 files changed, 69 insertions(+), 45 deletions(-) diff --git a/compat/mingw.h b/compat/mingw.h index 56f8eea423..e5006e92d9 100644 --- a/compat/mingw.h +++ b/compat/mingw.h @@ -331,9 +331,7 @@ int mingw_raise(int sig); */ void winansi_init(void); -int winansi_isatty(int fd); HANDLE winansi_get_osfhandle(int fd); -#define isatty winansi_isatty /* * git specific compatibility diff --git a/compat/winansi.c b/compat/winansi.c index 9f95954390..040ef5adca 100644 --- a/compat/winansi.c +++ b/compat/winansi.c @@ -7,11 +7,6 @@ #include #include -/* - Functions to be wrapped: -*/ -#undef isatty - /* ANSI codes used by git: m, K @@ -103,6 +98,7 @@ static int is_console(int fd) /* initialize attributes */ if (!initialized) { + console = hcon; attr = plain_attr = sbi.wAttributes; negative = 0; initialized = 1; @@ -463,29 +459,80 @@ static HANDLE duplicate_handle(HANDLE hnd) return hresult; } -static HANDLE redirect_console(FILE *stream, HANDLE *phcon, int new_fd) + +/* + * Make MSVCRT's internal file descriptor control structure accessible + * so that we can tweak OS handles and flags directly (we need MSVCRT + * to treat our pipe handle as if it were a console). + * + * We assume that the ioinfo structure (exposed by MSVCRT.dll via + * __pioinfo) starts with the OS handle and the flags. The exact size + * varies between MSVCRT versions, so we try different sizes until + * toggling the FDEV bit of _pioinfo(1)->osflags is reflected in + * isatty(1). + */ +typedef struct { + HANDLE osfhnd; + char osflags; +} ioinfo; + +extern __declspec(dllimport) ioinfo *__pioinfo[]; + +static size_t sizeof_ioinfo = 0; + +#define IOINFO_L2E 5 +#define IOINFO_ARRAY_ELTS (1 << IOINFO_L2E) + +#define FDEV 0x40 + +static inline ioinfo* _pioinfo(int fd) { - /* get original console handle */ - int fd = _fileno(stream); - HANDLE hcon = (HANDLE) _get_osfhandle(fd); - if (hcon == INVALID_HANDLE_VALUE) - die_errno("_get_osfhandle(%i) failed", fd); + return (ioinfo*)((char*)__pioinfo[fd >> IOINFO_L2E] + + (fd & (IOINFO_ARRAY_ELTS - 1)) * sizeof_ioinfo); +} - /* save a copy to phcon and console (used by the background thread) */ - console = *phcon = duplicate_handle(hcon); +static int init_sizeof_ioinfo() +{ + int istty, wastty; + /* don't init twice */ + if (sizeof_ioinfo) + return sizeof_ioinfo >= 256; - /* duplicate new_fd over fd (closes fd and associated handle (hcon)) */ - if (_dup2(new_fd, fd)) - die_errno("_dup2(%i, %i) failed", new_fd, fd); + sizeof_ioinfo = sizeof(ioinfo); + wastty = isatty(1); + while (sizeof_ioinfo < 256) { + /* toggle FDEV flag, check isatty, then toggle back */ + _pioinfo(1)->osflags ^= FDEV; + istty = isatty(1); + _pioinfo(1)->osflags ^= FDEV; + /* return if we found the correct size */ + if (istty != wastty) + return 0; + sizeof_ioinfo += sizeof(void*); + } + error("Tweaking file descriptors doesn't work with this MSVCRT.dll"); + return 1; +} - /* no buffering, or stdout / stderr will be out of sync */ - setbuf(stream, NULL); - return (HANDLE) _get_osfhandle(fd); +static HANDLE swap_osfhnd(int fd, HANDLE new_handle) +{ + ioinfo *pioinfo; + HANDLE old_handle; + + /* init ioinfo size if we haven't done so */ + if (init_sizeof_ioinfo()) + return INVALID_HANDLE_VALUE; + + /* get ioinfo pointer and change the handles */ + pioinfo = _pioinfo(fd); + old_handle = pioinfo->osfhnd; + pioinfo->osfhnd = new_handle; + return old_handle; } void winansi_init(void) { - int con1, con2, hwrite_fd; + int con1, con2; char name[32]; /* check if either stdout or stderr is a console output screen buffer */ @@ -514,19 +561,11 @@ void winansi_init(void) if (atexit(winansi_exit)) die_errno("atexit(winansi_exit) failed"); - /* create a file descriptor for the write end of the pipe */ - hwrite_fd = _open_osfhandle((long) duplicate_handle(hwrite), _O_BINARY); - if (hwrite_fd == -1) - die_errno("_open_osfhandle(%li) failed", (long) hwrite); - /* redirect stdout / stderr to the pipe */ if (con1) - hwrite1 = redirect_console(stdout, &hconsole1, hwrite_fd); + hconsole1 = swap_osfhnd(1, hwrite1 = duplicate_handle(hwrite)); if (con2) - hwrite2 = redirect_console(stderr, &hconsole2, hwrite_fd); - - /* close pipe file descriptor (also closes the duped hwrite) */ - close(hwrite_fd); + hconsole2 = swap_osfhnd(2, hwrite2 = duplicate_handle(hwrite)); } static int is_same_handle(HANDLE hnd, int fd) @@ -534,19 +573,6 @@ static int is_same_handle(HANDLE hnd, int fd) return hnd != INVALID_HANDLE_VALUE && hnd == (HANDLE) _get_osfhandle(fd); } -/* - * Return true if stdout / stderr is a pipe redirecting to the console. - */ -int winansi_isatty(int fd) -{ - if (fd == 1 && is_same_handle(hwrite1, 1)) - return 1; - else if (fd == 2 && is_same_handle(hwrite2, 2)) - return 1; - else - return isatty(fd); -} - /* * Returns the real console handle if stdout / stderr is a pipe redirecting * to the console. Allows spawn / exec to pass the console to the next process. From 49da5f7c768aca669f50194d722b2f1a092f9c5f Mon Sep 17 00:00:00 2001 From: Karsten Blees Date: Thu, 15 Mar 2012 14:45:49 +0100 Subject: [PATCH 41/41] Win32: fix segfault in WriteConsoleW when debugging in gdb On Windows XP (not Win7), WriteConsoleW and WriteFile seem to raise and catch SIGSEGV if the lpNumberOfCharsWritten parameter is NULL. This is not a problem when executed standalone, but gdb stops execution here (unless disabled via "handle SIGSEGV nostop"). Fix it by passing a dummy variable. Signed-off-by: Karsten Blees --- compat/winansi.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/compat/winansi.c b/compat/winansi.c index 040ef5adca..13bc28dd33 100644 --- a/compat/winansi.c +++ b/compat/winansi.c @@ -72,7 +72,8 @@ static void warn_if_raster_font(void) L"doesn\'t support Unicode. If you experience strange " L"characters in the output, consider switching to a " L"TrueType font such as Lucida Console!\n"; - WriteConsoleW(console, msg, wcslen(msg), NULL, NULL); + DWORD dummy; + WriteConsoleW(console, msg, wcslen(msg), &dummy, NULL); } } @@ -114,12 +115,13 @@ static void write_console(unsigned char *str, size_t len) { /* only called from console_thread, so a static buffer will do */ static wchar_t wbuf[2 * BUFFER_SIZE + 1]; + DWORD dummy; /* convert utf-8 to utf-16 */ int wlen = xutftowcsn(wbuf, (char*) str, ARRAY_SIZE(wbuf), len); /* write directly to console */ - WriteConsoleW(console, wbuf, wlen, NULL, NULL); + WriteConsoleW(console, wbuf, wlen, &dummy, NULL); /* remember if non-ascii characters are printed */ if (wlen != len)