Files
git/hook.h
Adrian Ratiu c549a40547 hook: add jobs option
Allow the API callers to specify the number of jobs across which
hook execution can be parallelized. It defaults to 1 and no hook
currently changes it, so all hooks run sequentially as before.

This allows us to both pave the way for parallel hook execution
(that will be a follow-up patch series building upon this) and to
finish the API conversion of builtin/receive-pack.c, keeping the
output async sideband thread ("muxer") design as Peff suggested.

When .jobs==1 nothing changes, the "copy_to_sideband" async thread
still outputs directly via sideband channel 2, keeping the current
(mostly) real-time output characteristics, avoids unnecessary poll
delays or deadlock risks.

When .jobs > 1, a more complex muxer is needed to buffer the hook
output and avoid interleaving. After working on this mux I quickly
realized I was re-implementing run-command with ungroup=0 so that
idea was dropped in favor of run-command which outputs to stderr.

In other words, run-command itself already can buffer/deinterleave
pp child outputs (ungroup=0), so we can just connect its stderr to
the sideband async task when jobs > 1.

Maybe it helps to illustrate how it works with ascii graphics:

 [ Sequential (jobs = 1) ]             [ Parallel (jobs > 1) ]

 +--------------+                      +--------+   +--------+
 | Hook Process |                      | Hook 1 |   | Hook 2 |
 +--------------+                      +--------+   +--------+
        |                                  |             |
        | stderr (inherited)               | stderr pipe |
        |                                  | (captured)  |
        v                                  v             v
 +-------------------------------------------------------------+
 |                      Parent Process                         |
 |                                                             |
 |      (direct write)              [run-command (buffered)]   |
 |             |                                 |             |
 |             |                                 | writes      |
 |             v                                 v             |
 |      +-------------------------------------------+          |
 |      |             stderr (FD 2)                 |          |
 |      +-------------------------------------------+          |
 |                           |                                 |
 |                           | (dup2'd to pipe)                |
 |                           v                                 |
 |               +-----------------------+                     |
 |               | sideband async thread |                     |
 |               +-----------------------+                     |
 +-------------------------------------------------------------+

When use_sideband == 0, the sideband async thread is missing, so
this same architecture just outputs via the parent stderr stream.

See the following commits for the hook API conversions doing this,
using pre-existing sideband thread logic from `copy_to_sideband`.

Suggested-by: Jeff King <peff@peff.net>
Signed-off-by: Adrian Ratiu <adrian.ratiu@collabora.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2026-01-28 15:47:03 -08:00

152 lines
4.3 KiB
C

#ifndef HOOK_H
#define HOOK_H
#include "strvec.h"
#include "run-command.h"
struct repository;
struct run_hooks_opt
{
/* Environment vars to be set for each hook */
struct strvec env;
/* Args to be passed to each hook */
struct strvec args;
/* Emit an error if the hook is missing */
unsigned int error_if_missing:1;
/**
* Number of processes to parallelize across.
*
* If > 1, output will be buffered and de-interleaved (ungroup=0).
* If == 1, output will be real-time (ungroup=1).
*/
unsigned int jobs;
/**
* An optional initial working directory for the hook,
* translates to "struct child_process"'s "dir" member.
*/
const char *dir;
/**
* A pointer which if provided will be set to 1 or 0 depending
* on if a hook was started, regardless of whether or not that
* was successful. I.e. if the underlying start_command() was
* successful this will be set to 1.
*
* Used for avoiding TOCTOU races in code that would otherwise
* call hook_exist() after a "maybe hook run" to see if a hook
* was invoked.
*/
int *invoked_hook;
/**
* Send the hook's stdout to stderr.
*
* This is the default behavior for all hooks except pre-push,
* which has separate stdout and stderr streams for backwards
* compatibility reasons.
*/
unsigned int stdout_to_stderr:1;
/**
* Path to file which should be piped to stdin for each hook.
*/
const char *path_to_stdin;
/**
* Callback used to incrementally feed a child hook stdin pipe.
*
* Useful especially if a hook consumes large quantities of data
* (e.g. a list of all refs in a client push), so feeding it via
* in-memory strings or slurping to/from files is inefficient.
* While the callback allows piecemeal writing, it can also be
* used for smaller inputs, where it gets called only once.
*
* Add hook callback initalization context to `feed_pipe_ctx`.
* Add hook callback internal state to `feed_pipe_cb_data`.
*
*/
feed_pipe_fn feed_pipe;
/**
* Opaque data pointer used to pass context to `feed_pipe_fn`.
*
* It can be accessed via the second callback arg 'pp_cb':
* ((struct hook_cb_data *) pp_cb)->hook_cb->options->feed_pipe_ctx;
*
* The caller is responsible for managing the memory for this data.
* Only useful when using `run_hooks_opt.feed_pipe`, otherwise ignore it.
*/
void *feed_pipe_ctx;
/**
* Opaque data pointer used to keep internal state across callback calls.
*
* It can be accessed directly via the third callback arg 'pp_task_cb':
* struct ... *state = pp_task_cb;
*
* The caller is responsible for managing the memory for this data.
* Only useful when using `run_hooks_opt.feed_pipe`, otherwise ignore it.
*/
void *feed_pipe_cb_data;
};
#define RUN_HOOKS_OPT_INIT { \
.env = STRVEC_INIT, \
.args = STRVEC_INIT, \
.stdout_to_stderr = 1, \
.jobs = 1, \
}
struct hook_cb_data {
/* rc reflects the cumulative failure state */
int rc;
const char *hook_name;
const char *hook_path;
struct run_hooks_opt *options;
};
/*
* Returns the path to the hook file, or NULL if the hook is missing
* or disabled. Note that this points to static storage that will be
* overwritten by further calls to find_hook and run_hook_*.
*/
const char *find_hook(struct repository *r, const char *name);
/**
* A boolean version of find_hook()
*/
int hook_exists(struct repository *r, const char *hookname);
/**
* Takes a `hook_name`, resolves it to a path with find_hook(), and
* runs the hook for you with the options specified in "struct
* run_hooks opt". Will free memory associated with the "struct run_hooks_opt".
*
* Returns the status code of the run hook, or a negative value on
* error().
*/
int run_hooks_opt(struct repository *r, const char *hook_name,
struct run_hooks_opt *options);
/**
* A wrapper for run_hooks_opt() which provides a dummy "struct
* run_hooks_opt" initialized with "RUN_HOOKS_OPT_INIT".
*/
int run_hooks(struct repository *r, const char *hook_name);
/**
* Like run_hooks(), a wrapper for run_hooks_opt().
*
* In addition to the wrapping behavior provided by run_hooks(), this
* wrapper takes a list of strings terminated by a NULL
* argument. These things will be used as positional arguments to the
* hook. This function behaves like the old run_hook_le() API.
*/
LAST_ARG_MUST_BE_NULL
int run_hooks_l(struct repository *r, const char *hook_name, ...);
#endif