Merge branch 'mc/tr2-process-ancestry-cleanup' into jch

Add process ancestry data to trace2 on macOS to match what we
already do on Linux and Windows.  Also adjust the way Windows
implementation reports this information to match the other two.

* mc/tr2-process-ancestry-cleanup:
  t0213: add trace2 cmd_ancestry tests
  test-tool: extend trace2 helper with 400ancestry
  trace2: emit cmd_ancestry data for Windows
  trace2: refactor Windows process ancestry trace2 event
  build: include procinfo.c impl for macOS
  trace2: add macOS process ancestry tracing
This commit is contained in:
Junio C Hamano
2026-02-23 14:25:42 -08:00
9 changed files with 379 additions and 27 deletions

97
compat/darwin/procinfo.c Normal file
View File

@@ -0,0 +1,97 @@
#include "git-compat-util.h"
#include "strbuf.h"
#include "strvec.h"
#include "trace2.h"
#include <sys/sysctl.h>
/*
* An arbitrarily chosen value to limit the depth of the ancestor chain.
*/
#define NR_PIDS_LIMIT 10
/*
* Get the process name and parent PID for a given PID using sysctl().
* Returns 0 on success, -1 on failure.
*/
static int get_proc_info(pid_t pid, struct strbuf *name, pid_t *ppid)
{
int mib[4];
struct kinfo_proc proc;
size_t size = sizeof(proc);
mib[0] = CTL_KERN;
mib[1] = KERN_PROC;
mib[2] = KERN_PROC_PID;
mib[3] = pid;
if (sysctl(mib, 4, &proc, &size, NULL, 0) < 0)
return -1;
if (size == 0)
return -1;
strbuf_addstr(name, proc.kp_proc.p_comm);
*ppid = proc.kp_eproc.e_ppid;
return 0;
}
/*
* Recursively push process names onto the ancestry array.
* We guard against cycles by limiting the depth to NR_PIDS_LIMIT.
*/
static void push_ancestry_name(struct strvec *names, pid_t pid, int depth)
{
struct strbuf name = STRBUF_INIT;
pid_t ppid;
if (depth >= NR_PIDS_LIMIT)
return;
if (pid <= 0)
return;
if (get_proc_info(pid, &name, &ppid) < 0)
goto cleanup;
strvec_push(names, name.buf);
/*
* Recurse to the parent process. Stop if ppid not valid
* or if we've reached ourselves (cycle).
*/
if (ppid && ppid != pid)
push_ancestry_name(names, ppid, depth + 1);
cleanup:
strbuf_release(&name);
}
void trace2_collect_process_info(enum trace2_process_info_reason reason)
{
struct strvec names = STRVEC_INIT;
if (!trace2_is_enabled())
return;
switch (reason) {
case TRACE2_PROCESS_INFO_STARTUP:
push_ancestry_name(&names, getppid(), 0);
if (names.nr)
trace2_cmd_ancestry(names.v);
strvec_clear(&names);
break;
case TRACE2_PROCESS_INFO_EXIT:
/*
* The Windows version of this calls its
* get_peak_memory_info() here. We may want to insert
* similar process-end statistics here in the future.
*/
break;
default:
BUG("trace2_collect_process_info: unknown reason '%d'", reason);
}
}

View File

@@ -3,6 +3,7 @@
#include "../../git-compat-util.h"
#include "../../json-writer.h"
#include "../../repository.h"
#include "../../strvec.h"
#include "../../trace2.h"
#include "lazyload.h"
#include <psapi.h>
@@ -32,12 +33,7 @@ static int find_pid(DWORD pid, HANDLE hSnapshot, PROCESSENTRY32 *pe32)
}
/*
* Accumulate JSON array of our parent processes:
* [
* exe-name-parent,
* exe-name-grand-parent,
* ...
* ]
* Accumulate array of our parent process names.
*
* Note: we only report the filename of the process executable; the
* only way to get its full pathname is to use OpenProcess()
@@ -73,7 +69,7 @@ static int find_pid(DWORD pid, HANDLE hSnapshot, PROCESSENTRY32 *pe32)
* simple and avoid the alloc/realloc overhead. It is OK if we
* truncate the search and return a partial answer.
*/
static void get_processes(struct json_writer *jw, HANDLE hSnapshot)
static void get_processes(struct strvec *names, HANDLE hSnapshot)
{
PROCESSENTRY32 pe32;
DWORD pid;
@@ -82,19 +78,19 @@ static void get_processes(struct json_writer *jw, HANDLE hSnapshot)
pid = GetCurrentProcessId();
while (find_pid(pid, hSnapshot, &pe32)) {
/* Only report parents. Omit self from the JSON output. */
/* Only report parents. Omit self from the output. */
if (nr_pids)
jw_array_string(jw, pe32.szExeFile);
strvec_push(names, pe32.szExeFile);
/* Check for cycle in snapshot. (Yes, it happened.) */
for (k = 0; k < nr_pids; k++)
if (pid == pid_list[k]) {
jw_array_string(jw, "(cycle)");
strvec_push(names, "(cycle)");
return;
}
if (nr_pids == NR_PIDS_LIMIT) {
jw_array_string(jw, "(truncated)");
strvec_push(names, "(truncated)");
return;
}
@@ -105,24 +101,14 @@ static void get_processes(struct json_writer *jw, HANDLE hSnapshot)
}
/*
* Emit JSON data for the current and parent processes. Individual
* trace2 targets can decide how to actually print it.
* Collect the list of parent process names.
*/
static void get_ancestry(void)
static void get_ancestry(struct strvec *names)
{
HANDLE hSnapshot = CreateToolhelp32Snapshot(TH32CS_SNAPPROCESS, 0);
if (hSnapshot != INVALID_HANDLE_VALUE) {
struct json_writer jw = JSON_WRITER_INIT;
jw_array_begin(&jw, 0);
get_processes(&jw, hSnapshot);
jw_end(&jw);
trace2_data_json("process", the_repository, "windows/ancestry",
&jw);
jw_release(&jw);
get_processes(names, hSnapshot);
CloseHandle(hSnapshot);
}
}
@@ -176,13 +162,35 @@ static void get_peak_memory_info(void)
void trace2_collect_process_info(enum trace2_process_info_reason reason)
{
struct strvec names = STRVEC_INIT;
if (!trace2_is_enabled())
return;
switch (reason) {
case TRACE2_PROCESS_INFO_STARTUP:
get_is_being_debugged();
get_ancestry();
get_ancestry(&names);
if (names.nr) {
/*
Emit the ancestry data as a data_json event to
maintain compatibility for consumers of the older
"windows/ancestry" event.
*/
struct json_writer jw = JSON_WRITER_INIT;
jw_array_begin(&jw, 0);
for (size_t i = 0; i < names.nr; i++)
jw_array_string(&jw, names.v[i]);
jw_end(&jw);
trace2_data_json("process", the_repository,
"windows/ancestry", &jw);
jw_release(&jw);
/* Emit the ancestry data with the new event. */
trace2_cmd_ancestry(names.v);
}
strvec_clear(&names);
return;
case TRACE2_PROCESS_INFO_EXIT:

View File

@@ -149,6 +149,8 @@ ifeq ($(uname_S),Darwin)
HAVE_NS_GET_EXECUTABLE_PATH = YesPlease
CSPRNG_METHOD = arc4random
USE_ENHANCED_BASIC_REGULAR_EXPRESSIONS = YesPlease
HAVE_PLATFORM_PROCINFO = YesPlease
COMPAT_OBJS += compat/darwin/procinfo.o
ifeq ($(uname_M),arm64)
HOMEBREW_PREFIX = /opt/homebrew

View File

@@ -274,6 +274,8 @@ if(CMAKE_SYSTEM_NAME STREQUAL "Windows")
elseif(CMAKE_SYSTEM_NAME STREQUAL "Linux")
add_compile_definitions(PROCFS_EXECUTABLE_PATH="/proc/self/exe" HAVE_DEV_TTY )
list(APPEND compat_SOURCES unix-socket.c unix-stream-server.c compat/linux/procinfo.c)
elseif(CMAKE_SYSTEM_NAME STREQUAL "Darwin")
list(APPEND compat_SOURCES compat/darwin/procinfo.c)
endif()
if(CMAKE_SYSTEM_NAME STREQUAL "Windows")

View File

@@ -1294,6 +1294,8 @@ if host_machine.system() == 'linux'
libgit_sources += 'compat/linux/procinfo.c'
elif host_machine.system() == 'windows'
libgit_sources += 'compat/win32/trace2_win32_process_info.c'
elif host_machine.system() == 'darwin'
libgit_sources += 'compat/darwin/procinfo.c'
else
libgit_sources += 'compat/stub/procinfo.c'
endif

View File

@@ -466,6 +466,63 @@ static int ut_303redact_def_param(int argc, const char **argv)
return 0;
}
/*
* Run a child process with specific trace2 environment settings so that
* we can capture its trace2 output (including cmd_ancestry) in isolation.
*
* test-tool trace2 400ancestry <target> <output_file> [<child_command_line>]
*
* <target> is one of: normal, perf, event
*
* For example:
* test-tool trace2 400ancestry normal out.normal test-tool trace2 001return 0
*
* The child process inherits a controlled trace2 environment where only
* the specified target is directed to <output_file>. The parent's trace2
* environment variables are cleared in the child so that only the child's
* events are captured.
*
* This is used by t0213-trace2-ancestry.sh to test cmd_ancestry events.
* The child process will see "test-tool" as its immediate parent in the
* process ancestry, giving us a predictable value to verify.
*/
static int ut_400ancestry(int argc, const char **argv)
{
struct child_process cmd = CHILD_PROCESS_INIT;
const char *target;
const char *outfile;
int result;
if (argc < 3)
die("expect <target> <output_file> <child_command_line>");
target = argv[0];
outfile = argv[1];
argv += 2;
argc -= 2;
/* Clear all trace2 environment variables in the child. */
strvec_push(&cmd.env, "GIT_TRACE2=");
strvec_push(&cmd.env, "GIT_TRACE2_PERF=");
strvec_push(&cmd.env, "GIT_TRACE2_EVENT=");
strvec_push(&cmd.env, "GIT_TRACE2_BRIEF=1");
/* Set only the requested target. */
if (!strcmp(target, "normal"))
strvec_pushf(&cmd.env, "GIT_TRACE2=%s", outfile);
else if (!strcmp(target, "perf"))
strvec_pushf(&cmd.env, "GIT_TRACE2_PERF=%s", outfile);
else if (!strcmp(target, "event"))
strvec_pushf(&cmd.env, "GIT_TRACE2_EVENT=%s", outfile);
else
die("invalid target '%s', expected: normal, perf, event",
target);
strvec_pushv(&cmd.args, argv);
result = run_command(&cmd);
exit(result);
}
/*
* Usage:
* test-tool trace2 <ut_name_1> <ut_usage_1>
@@ -497,6 +554,8 @@ static struct unit_test ut_table[] = {
{ ut_301redact_child_start, "301redact_child_start", "<argv...>" },
{ ut_302redact_exec, "302redact_exec", "<exe> <argv...>" },
{ ut_303redact_def_param, "303redact_def_param", "<key> <value>" },
{ ut_400ancestry, "400ancestry", "<target> <output_file> [<child_command_line>]" },
};
/* clang-format on */

View File

@@ -134,6 +134,7 @@ integration_tests = [
't0210-trace2-normal.sh',
't0211-trace2-perf.sh',
't0212-trace2-event.sh',
't0213-trace2-ancestry.sh',
't0300-credentials.sh',
't0301-credential-cache.sh',
't0302-credential-store.sh',

View File

@@ -74,8 +74,9 @@ scrub_normal () {
# This line is only emitted when RUNTIME_PREFIX is defined,
# so just omit it for testing purposes.
#
# 4. 'cmd_ancestry' is not implemented everywhere, so for portability's
# sake, skip it when parsing normal.
# 4. 'cmd_ancestry' output depends on how the test is run and
# is not relevant to the features we are testing here.
# Ancestry tests are covered in t0213-trace2-ancestry.sh instead.
sed \
-e 's/elapsed:[0-9]*\.[0-9][0-9]*\([eE][-+]\{0,1\}[0-9][0-9]*\)\{0,1\}/elapsed:_TIME_/g' \
-e "s/^start '[^']*' \(.*\)/start _EXE_ \1/" \

180
t/t0213-trace2-ancestry.sh Executable file
View File

@@ -0,0 +1,180 @@
#!/bin/sh
test_description='test trace2 cmd_ancestry event'
. ./test-lib.sh
# Turn off any inherited trace2 settings for this test.
sane_unset GIT_TRACE2 GIT_TRACE2_PERF GIT_TRACE2_EVENT
sane_unset GIT_TRACE2_BRIEF
sane_unset GIT_TRACE2_CONFIG_PARAMS
# Add t/helper directory to PATH so that we can use a relative
# path to run nested instances of test-tool.exe (see 004child).
# This helps with HEREDOC comparisons later.
TTDIR="$GIT_BUILD_DIR/t/helper/" && export TTDIR
PATH="$TTDIR:$PATH" && export PATH
# The 400ancestry helper spawns a child process so that the child
# sees "test-tool" in its process ancestry. We capture only the
# child's trace2 output to a file.
#
# The tests use git commands that spawn child git processes (e.g.,
# alias resolution) to create a controlled multi-level process tree.
# Because cmd_ancestry walks the real process tree, processes will
# also report ancestors above "test-tool" that depend on the test
# runner environment (e.g., bash, make, tmux). The filter functions
# below truncate the ancestry at "test-tool", discarding anything
# above it, so only the controlled portion is verified.
#
# On platforms without a real procinfo implementation (the stub),
# no cmd_ancestry event is emitted. We detect this at runtime and
# skip the format-specific tests accordingly.
# Determine if cmd_ancestry is supported on this platform.
test_expect_success 'detect cmd_ancestry support' '
test_when_finished "rm -f trace.detect" &&
GIT_TRACE2_BRIEF=1 GIT_TRACE2="$(pwd)/trace.detect" \
test-tool trace2 001return 0 &&
if grep -q "^cmd_ancestry" trace.detect
then
test_set_prereq TRACE2_ANCESTRY
fi
'
# Filter functions for each trace2 target format.
#
# Each extracts cmd_ancestry events, strips format-specific syntax,
# and truncates the ancestor list at the outermost "test-tool"
# (or "test-tool.exe" on Windows), discarding any higher-level
# (uncontrolled) ancestors.
#
# Output is a space-separated list of ancestor names, one line per
# cmd_ancestry event, with the immediate parent listed first:
#
# test-tool (or: test-tool.exe)
# git test-tool (or: git.exe test-tool.exe)
# git test-tool test-tool (or: git.exe test-tool.exe test-tool.exe)
if test_have_prereq MINGW
then
TT=test-tool$X
else
TT=test-tool
fi
filter_ancestry_normal () {
sed -n '/^cmd_ancestry/{
s/^cmd_ancestry //
s/ <- / /g
s/\(.*'"$TT"'\) .*/\1/
p
}'
}
filter_ancestry_perf () {
sed -n '/cmd_ancestry/{
s/.*ancestry:\[//
s/\]//
s/\(.*'"$TT"'\) .*/\1/
p
}'
}
filter_ancestry_event () {
sed -n '/"cmd_ancestry"/{
s/.*"ancestry":\[//
s/\].*//
s/"//g
s/,/ /g
s/\(.*'"$TT"'\) .*/\1/
p
}'
}
# On Windows (MINGW) when running with the bin-wrappers, we also see "sh.exe" in
# the ancestry. We must therefore account for this expected ancestry element in
# the expected output of the tests.
if test_have_prereq MINGW && test -z "$no_bin_wrappers"; then
SH_TT="sh$X $TT"
else
SH_TT="$TT"
fi
# Git alias resolution spawns the target command as a child process.
# Using "git -c alias.xyz=version xyz" creates a two-level chain:
#
# test-tool (400ancestry)
# -> git (resolves alias xyz -> version)
# -> git (version)
#
# Both git processes are instrumented and emit cmd_ancestry. After
# filtering out ancestors above test-tool, we get:
#
# test-tool (from git alias resolver)
# git test-tool (from git version)
test_expect_success TRACE2_ANCESTRY 'normal: git alias chain, 2 levels' '
test_when_finished "rm -f trace.normal actual expect" &&
test-tool trace2 400ancestry normal "$(pwd)/trace.normal" \
git -c alias.xyz=version xyz &&
filter_ancestry_normal <trace.normal >actual &&
cat >expect <<-EOF &&
$SH_TT
git$X $SH_TT
EOF
test_cmp expect actual
'
test_expect_success TRACE2_ANCESTRY 'perf: git alias chain, 2 levels' '
test_when_finished "rm -f trace.perf actual expect" &&
test-tool trace2 400ancestry perf "$(pwd)/trace.perf" \
git -c alias.xyz=version xyz &&
filter_ancestry_perf <trace.perf >actual &&
cat >expect <<-EOF &&
$SH_TT
git$X $SH_TT
EOF
test_cmp expect actual
'
test_expect_success TRACE2_ANCESTRY 'event: git alias chain, 2 levels' '
test_when_finished "rm -f trace.event actual expect" &&
test-tool trace2 400ancestry event "$(pwd)/trace.event" \
git -c alias.xyz=version xyz &&
filter_ancestry_event <trace.event >actual &&
cat >expect <<-EOF &&
$SH_TT
git$X $SH_TT
EOF
test_cmp expect actual
'
# Use 004child to add a test-tool layer, creating a three-level chain:
#
# test-tool (400ancestry)
# -> test-tool (004child)
# -> git (resolves alias xyz -> version)
# -> git (version)
#
# Three instrumented processes emit cmd_ancestry. After filtering:
#
# test-tool (from test-tool 004child)
# test-tool test-tool (from git alias resolver)
# git test-tool test-tool (from git version)
test_expect_success TRACE2_ANCESTRY 'normal: deeper chain, 3 levels' '
test_when_finished "rm -f trace.normal actual expect" &&
test-tool trace2 400ancestry normal "$(pwd)/trace.normal" \
test-tool trace2 004child \
git -c alias.xyz=version xyz &&
filter_ancestry_normal <trace.normal >actual &&
cat >expect <<-EOF &&
$TT
$SH_TT $TT
git$X $SH_TT $TT
EOF
test_cmp expect actual
'
test_done