diff --git a/Documentation/git-grep.txt b/Documentation/git-grep.txt index 4a5837881d..e150c77cff 100644 --- a/Documentation/git-grep.txt +++ b/Documentation/git-grep.txt @@ -12,6 +12,7 @@ SYNOPSIS 'git grep' [-a | --text] [-I] [-i | --ignore-case] [-w | --word-regexp] [-v | --invert-match] [-h|-H] [--full-name] [-E | --extended-regexp] [-G | --basic-regexp] + [-P | --perl-regexp] [-F | --fixed-strings] [-n | --line-number] [-l | --files-with-matches] [-L | --files-without-match] [(-O | --open-files-in-pager) []] @@ -97,6 +98,11 @@ OPTIONS Use POSIX extended/basic regexp for patterns. Default is to use basic regexp. +-P:: +--perl-regexp:: + Use Perl-compatible regexp for patterns. Requires libpcre to be + compiled in. + -F:: --fixed-strings:: Use fixed strings for patterns (don't interpret pattern diff --git a/Makefile b/Makefile index 5379aaa2e0..cf4da1e190 100644 --- a/Makefile +++ b/Makefile @@ -24,6 +24,12 @@ all:: # Define NO_OPENSSL environment variable if you do not have OpenSSL. # This also implies BLK_SHA1. # +# Define USE_LIBPCRE if you have and want to use libpcre. git-grep will be +# able to use Perl-compatible regular expressions. +# +# Define LIBPCREDIR=/foo/bar if your libpcre header and library files are in +# /foo/bar/include and /foo/bar/lib directories. +# # Define NO_CURL if you do not have libcurl installed. git-http-pull and # git-http-push are not built, and you cannot use http:// and https:// # transports. @@ -1251,6 +1257,15 @@ ifdef NO_LIBGEN_H COMPAT_OBJS += compat/basename.o endif +ifdef USE_LIBPCRE + BASIC_CFLAGS += -DUSE_LIBPCRE + ifdef LIBPCREDIR + BASIC_CFLAGS += -I$(LIBPCREDIR)/include + EXTLIBS += -L$(LIBPCREDIR)/$(lib) $(CC_LD_DYNPATH)$(LIBPCREDIR)/$(lib) + endif + EXTLIBS += -lpcre +endif + ifdef NO_CURL BASIC_CFLAGS += -DNO_CURL REMOTE_CURL_PRIMARY = diff --git a/builtin/grep.c b/builtin/grep.c index 931eee0d75..871afaa3c7 100644 --- a/builtin/grep.c +++ b/builtin/grep.c @@ -753,6 +753,15 @@ int cmd_grep(int argc, const char **argv, const char *prefix) int i; int dummy; int use_index = 1; + enum { + pattern_type_unspecified = 0, + pattern_type_bre, + pattern_type_ere, + pattern_type_fixed, + pattern_type_pcre, + }; + int pattern_type = pattern_type_unspecified; + struct option options[] = { OPT_BOOLEAN(0, "cached", &cached, "search in index instead of in the work tree"), @@ -774,13 +783,18 @@ int cmd_grep(int argc, const char **argv, const char *prefix) "descend at most levels", PARSE_OPT_NONEG, NULL, 1 }, OPT_GROUP(""), - OPT_BIT('E', "extended-regexp", &opt.regflags, - "use extended POSIX regular expressions", REG_EXTENDED), - OPT_NEGBIT('G', "basic-regexp", &opt.regflags, - "use basic POSIX regular expressions (default)", - REG_EXTENDED), - OPT_BOOLEAN('F', "fixed-strings", &opt.fixed, - "interpret patterns as fixed strings"), + OPT_SET_INT('E', "extended-regexp", &pattern_type, + "use extended POSIX regular expressions", + pattern_type_ere), + OPT_SET_INT('G', "basic-regexp", &pattern_type, + "use basic POSIX regular expressions (default)", + pattern_type_bre), + OPT_SET_INT('F', "fixed-strings", &pattern_type, + "interpret patterns as fixed strings", + pattern_type_fixed), + OPT_SET_INT('P', "perl-regexp", &pattern_type, + "use Perl-compatible regular expressions", + pattern_type_pcre), OPT_GROUP(""), OPT_BOOLEAN('n', "line-number", &opt.linenum, "show line numbers"), OPT_NEGBIT('h', NULL, &opt.pathname, "don't show filenames", 1), @@ -886,6 +900,28 @@ int cmd_grep(int argc, const char **argv, const char *prefix) PARSE_OPT_KEEP_DASHDASH | PARSE_OPT_STOP_AT_NON_OPTION | PARSE_OPT_NO_INTERNAL_HELP); + switch (pattern_type) { + case pattern_type_fixed: + opt.fixed = 1; + opt.pcre = 0; + break; + case pattern_type_bre: + opt.fixed = 0; + opt.pcre = 0; + opt.regflags &= ~REG_EXTENDED; + break; + case pattern_type_ere: + opt.fixed = 0; + opt.pcre = 0; + opt.regflags |= REG_EXTENDED; + break; + case pattern_type_pcre: + opt.fixed = 0; + opt.pcre = 1; + break; + default: + break; /* nothing */ + } if (use_index && !startup_info->have_repository) /* die the same way as if we did it at the beginning */ @@ -925,8 +961,6 @@ int cmd_grep(int argc, const char **argv, const char *prefix) die(_("no pattern given.")); if (!opt.fixed && opt.ignore_case) opt.regflags |= REG_ICASE; - if ((opt.regflags != REG_NEWLINE) && opt.fixed) - die(_("cannot mix --fixed-strings and regexp")); #ifndef NO_PTHREADS if (online_cpus() == 1 || !grep_threads_ok(&opt)) diff --git a/config.mak.in b/config.mak.in index e378534cbd..f30130b17a 100644 --- a/config.mak.in +++ b/config.mak.in @@ -61,6 +61,7 @@ NO_INET_PTON=@NO_INET_PTON@ NO_ICONV=@NO_ICONV@ OLD_ICONV=@OLD_ICONV@ NO_REGEX=@NO_REGEX@ +USE_LIBPCRE=@USE_LIBPCRE@ NO_DEFLATE_BOUND=@NO_DEFLATE_BOUND@ INLINE=@INLINE@ SOCKLEN_T=@SOCKLEN_T@ diff --git a/configure.ac b/configure.ac index fafd81557c..048a1d4972 100644 --- a/configure.ac +++ b/configure.ac @@ -220,6 +220,27 @@ AS_HELP_STRING([--with-openssl],[use OpenSSL library (default is YES)]) AS_HELP_STRING([], [ARG can be prefix for openssl library and headers]),\ GIT_PARSE_WITH(openssl)) # +# Define USE_LIBPCRE if you have and want to use libpcre. git-grep will be +# able to use Perl-compatible regular expressions. +# +# Define LIBPCREDIR=/foo/bar if your libpcre header and library files are in +# /foo/bar/include and /foo/bar/lib directories. +# +AC_ARG_WITH(libpcre, +AS_HELP_STRING([--with-libpcre],[support Perl-compatible regexes (default is NO)]) +AS_HELP_STRING([], [ARG can be also prefix for libpcre library and headers]), +if test "$withval" = "no"; then \ + USE_LIBPCRE=; \ +elif test "$withval" = "yes"; then \ + USE_LIBPCRE=YesPlease; \ +else + USE_LIBPCRE=YesPlease; \ + LIBPCREDIR=$withval; \ + AC_MSG_NOTICE([Setting LIBPCREDIR to $withval]); \ + GIT_CONF_APPEND_LINE(LIBPCREDIR=$withval); \ +fi \ +) +# # Define NO_CURL if you do not have curl installed. git-http-pull and # git-http-push are not built, and you cannot use http:// and https:// # transports. @@ -434,6 +455,25 @@ GIT_UNSTASH_FLAGS($OPENSSLDIR) AC_SUBST(NEEDS_SSL_WITH_CRYPTO) AC_SUBST(NO_OPENSSL) +# +# Define USE_LIBPCRE if you have and want to use libpcre. git-grep will be +# able to use Perl-compatible regular expressions. +# + +if test -n "$USE_LIBPCRE"; then + +GIT_STASH_FLAGS($LIBPCREDIR) + +AC_CHECK_LIB([pcre], [pcre_version], +[USE_LIBPCRE=YesPlease], +[USE_LIBPCRE=]) + +GIT_UNSTASH_FLAGS($LIBPCREDIR) + +AC_SUBST(USE_LIBPCRE) + +fi + # # Define NO_CURL if you do not have libcurl installed. git-http-pull and # git-http-push are not built, and you cannot use http:// and https:// diff --git a/contrib/completion/git-completion.bash b/contrib/completion/git-completion.bash index 049e218477..b36290fa60 100755 --- a/contrib/completion/git-completion.bash +++ b/contrib/completion/git-completion.bash @@ -1443,6 +1443,7 @@ _git_grep () --text --ignore-case --word-regexp --invert-match --full-name --line-number --extended-regexp --basic-regexp --fixed-strings + --perl-regexp --files-with-matches --name-only --files-without-match --max-depth diff --git a/grep.c b/grep.c index d67baf9566..d03d9e24c2 100644 --- a/grep.c +++ b/grep.c @@ -59,6 +59,84 @@ struct grep_opt *grep_opt_dup(const struct grep_opt *opt) return ret; } +static NORETURN void compile_regexp_failed(const struct grep_pat *p, + const char *error) +{ + char where[1024]; + + if (p->no) + sprintf(where, "In '%s' at %d, ", p->origin, p->no); + else if (p->origin) + sprintf(where, "%s, ", p->origin); + else + where[0] = 0; + + die("%s'%s': %s", where, p->pattern, error); +} + +#ifdef USE_LIBPCRE +static void compile_pcre_regexp(struct grep_pat *p, const struct grep_opt *opt) +{ + const char *error; + int erroffset; + int options = 0; + + if (opt->ignore_case) + options |= PCRE_CASELESS; + + p->pcre_regexp = pcre_compile(p->pattern, options, &error, &erroffset, + NULL); + if (!p->pcre_regexp) + compile_regexp_failed(p, error); + + p->pcre_extra_info = pcre_study(p->pcre_regexp, 0, &error); + if (!p->pcre_extra_info && error) + die("%s", error); +} + +static int pcrematch(struct grep_pat *p, const char *line, const char *eol, + regmatch_t *match, int eflags) +{ + int ovector[30], ret, flags = 0; + + if (eflags & REG_NOTBOL) + flags |= PCRE_NOTBOL; + + ret = pcre_exec(p->pcre_regexp, p->pcre_extra_info, line, eol - line, + 0, flags, ovector, ARRAY_SIZE(ovector)); + if (ret < 0 && ret != PCRE_ERROR_NOMATCH) + die("pcre_exec failed with error code %d", ret); + if (ret > 0) { + ret = 0; + match->rm_so = ovector[0]; + match->rm_eo = ovector[1]; + } + + return ret; +} + +static void free_pcre_regexp(struct grep_pat *p) +{ + pcre_free(p->pcre_regexp); + pcre_free(p->pcre_extra_info); +} +#else /* !USE_LIBPCRE */ +static void compile_pcre_regexp(struct grep_pat *p, const struct grep_opt *opt) +{ + die("cannot use Perl-compatible regexes when not compiled with USE_LIBPCRE"); +} + +static int pcrematch(struct grep_pat *p, const char *line, const char *eol, + regmatch_t *match, int eflags) +{ + return 1; +} + +static void free_pcre_regexp(struct grep_pat *p) +{ +} +#endif /* !USE_LIBPCRE */ + static void compile_regexp(struct grep_pat *p, struct grep_opt *opt) { int err; @@ -70,20 +148,17 @@ static void compile_regexp(struct grep_pat *p, struct grep_opt *opt) if (p->fixed) return; + if (opt->pcre) { + compile_pcre_regexp(p, opt); + return; + } + err = regcomp(&p->regexp, p->pattern, opt->regflags); if (err) { char errbuf[1024]; - char where[1024]; - if (p->no) - sprintf(where, "In '%s' at %d, ", - p->origin, p->no); - else if (p->origin) - sprintf(where, "%s, ", p->origin); - else - where[0] = 0; regerror(err, &p->regexp, errbuf, 1024); regfree(&p->regexp); - die("%s'%s': %s", where, p->pattern, errbuf); + compile_regexp_failed(p, errbuf); } } @@ -320,7 +395,10 @@ void free_grep_patterns(struct grep_opt *opt) case GREP_PATTERN: /* atom */ case GREP_PATTERN_HEAD: case GREP_PATTERN_BODY: - regfree(&p->regexp); + if (p->pcre_regexp) + free_pcre_regexp(p); + else + regfree(&p->regexp); break; default: break; @@ -419,6 +497,8 @@ static int patmatch(struct grep_pat *p, char *line, char *eol, if (p->fixed) hit = !fixmatch(p, line, eol, match); + else if (p->pcre_regexp) + hit = !pcrematch(p, line, eol, match, eflags); else hit = !regmatch(&p->regexp, line, eol, match, eflags); @@ -898,7 +978,7 @@ static int grep_buffer_1(struct grep_opt *opt, const char *name, int hit; /* - * look_ahead() skips quicly to the line that possibly + * look_ahead() skips quickly to the line that possibly * has the next hit; don't call it if we need to do * something more than just skipping the current line * in response to an unmatch for the current line. E.g. diff --git a/grep.h b/grep.h index 06621fe663..cd055cdfa8 100644 --- a/grep.h +++ b/grep.h @@ -1,6 +1,12 @@ #ifndef GREP_H #define GREP_H #include "color.h" +#ifdef USE_LIBPCRE +#include +#else +typedef int pcre; +typedef int pcre_extra; +#endif enum grep_pat_token { GREP_PATTERN, @@ -33,6 +39,8 @@ struct grep_pat { size_t patternlen; enum grep_header_field field; regex_t regexp; + pcre *pcre_regexp; + pcre_extra *pcre_extra_info; unsigned fixed:1; unsigned ignore_case:1; unsigned word_regexp:1; @@ -83,6 +91,7 @@ struct grep_opt { #define GREP_BINARY_TEXT 2 int binary; int extended; + int pcre; int relative; int pathname; int null_following_name; diff --git a/t/README b/t/README index cad36dd750..c85abaffb3 100644 --- a/t/README +++ b/t/README @@ -588,6 +588,11 @@ use these, and "test_set_prereq" for how to define your own. Test is not run by root user, and an attempt to write to an unwritable file is expected to fail correctly. + - LIBPCRE + + Git was compiled with USE_LIBPCRE=YesPlease. Wrap any tests + that use git-grep --perl-regexp or git-grep -P in these. + Tips for Writing Tests ---------------------- diff --git a/t/t7810-grep.sh b/t/t7810-grep.sh index 8184c264cf..2a31eca5f2 100755 --- a/t/t7810-grep.sh +++ b/t/t7810-grep.sh @@ -26,6 +26,12 @@ test_expect_success setup ' echo foo mmap bar_mmap echo foo_mmap bar mmap baz } >file && + { + echo Hello world + echo HeLLo world + echo Hello_world + echo HeLLo_world + } >hello_world && echo vvv >v && echo ww w >w && echo x x xx x >x && @@ -599,4 +605,52 @@ test_expect_success 'grep -e -- -- path' ' test_cmp expected actual ' +cat >expected <actual && + test_cmp expected actual +' + +test_expect_success LIBPCRE 'grep -P pattern' ' + git grep -P "\p{Ps}.*?\p{Pe}" hello.c >actual && + test_cmp expected actual +' + +test_expect_success LIBPCRE 'grep -P -i pattern' ' + { + echo "hello.c: printf(\"Hello world.\n\");" + } >expected && + git grep -P -i "PRINTF\([^\d]+\)" hello.c >actual && + test_cmp expected actual +' + +test_expect_success LIBPCRE 'grep -P -w pattern' ' + { + echo "hello_world:Hello world" + echo "hello_world:HeLLo world" + } >expected && + git grep -P -w "He((?i)ll)o" hello_world >actual && + test_cmp expected actual +' + +test_expect_success LIBPCRE 'grep -P -F returns error' ' + test_expect_code 128 git grep -P -F main +' + +test_expect_success LIBPCRE 'grep -P -E returns error' ' + test_expect_code 128 git grep -P -E main +' + +test_expect_failure LIBPCRE 'grep -P -G returns error' ' + test_expect_code 128 git grep -P -G main +' + +test_expect_failure LIBPCRE 'grep -P -E -G returns error' ' + test_expect_code 128 git grep -P -E -G main +' + test_done diff --git a/t/test-lib.sh b/t/test-lib.sh index b2ce2bc4b2..64a367a46d 100644 --- a/t/test-lib.sh +++ b/t/test-lib.sh @@ -1072,6 +1072,7 @@ esac test -z "$NO_PERL" && test_set_prereq PERL test -z "$NO_PYTHON" && test_set_prereq PYTHON +test -n "$USE_LIBPCRE" && test_set_prereq LIBPCRE # Can we rely on git's output in the C locale? if test -n "$GETTEXT_POISON"