Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
187 changes: 187 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,187 @@
name: CI

on:
push:
branches: [master]
tags: ["v*"]
pull_request:
branches: [master]

permissions:
contents: read

env:
MIX_ENV: test
ELIXIR_VERSION: "1.17.3"
OTP_VERSION: "27.2"

jobs:
compile:
name: Compile & Warnings
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4

- uses: erlef/setup-beam@v1
with:
elixir-version: ${{ env.ELIXIR_VERSION }}
otp-version: ${{ env.OTP_VERSION }}

- name: Cache deps & build
uses: actions/cache@v4
with:
path: |
deps
_build
key: ${{ runner.os }}-mix-${{ env.ELIXIR_VERSION }}-${{ env.OTP_VERSION }}-${{ hashFiles('mix.lock') }}
restore-keys: |
${{ runner.os }}-mix-${{ env.ELIXIR_VERSION }}-${{ env.OTP_VERSION }}-

- run: mix deps.get
- run: mix deps.compile

- name: Compile with warnings as errors
run: mix compile --warnings-as-errors

format:
name: Formatting
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4

- uses: erlef/setup-beam@v1
with:
elixir-version: ${{ env.ELIXIR_VERSION }}
otp-version: ${{ env.OTP_VERSION }}

- name: Cache deps
uses: actions/cache@v4
with:
path: deps
key: ${{ runner.os }}-mix-${{ env.ELIXIR_VERSION }}-${{ env.OTP_VERSION }}-${{ hashFiles('mix.lock') }}
restore-keys: |
${{ runner.os }}-mix-${{ env.ELIXIR_VERSION }}-${{ env.OTP_VERSION }}-

- run: mix deps.get
- run: mix format --check-formatted

credo:
name: Credo
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4

- uses: erlef/setup-beam@v1
with:
elixir-version: ${{ env.ELIXIR_VERSION }}
otp-version: ${{ env.OTP_VERSION }}

- name: Cache deps & build
uses: actions/cache@v4
with:
path: |
deps
_build
key: ${{ runner.os }}-mix-${{ env.ELIXIR_VERSION }}-${{ env.OTP_VERSION }}-${{ hashFiles('mix.lock') }}
restore-keys: |
${{ runner.os }}-mix-${{ env.ELIXIR_VERSION }}-${{ env.OTP_VERSION }}-

- run: mix deps.get
- run: mix credo --strict

test:
name: Tests (OTP ${{ matrix.otp }} / Elixir ${{ matrix.elixir }})
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
include:
- elixir: "1.17.3"
otp: "27.2"
- elixir: "1.18.3"
otp: "27.2"
- elixir: "1.20.0-rc.1"
otp: "28.3.3"
steps:
- uses: actions/checkout@v4

- uses: erlef/setup-beam@v1
with:
elixir-version: ${{ matrix.elixir }}
otp-version: ${{ matrix.otp }}

- name: Cache deps & build
uses: actions/cache@v4
with:
path: |
deps
_build
key: ${{ runner.os }}-mix-${{ matrix.elixir }}-${{ matrix.otp }}-${{ hashFiles('mix.lock') }}
restore-keys: |
${{ runner.os }}-mix-${{ matrix.elixir }}-${{ matrix.otp }}-

- run: mix deps.get
- run: mix deps.compile
- run: mix compile
- run: mix test

dialyzer:
name: Dialyzer
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4

- uses: erlef/setup-beam@v1
with:
elixir-version: ${{ env.ELIXIR_VERSION }}
otp-version: ${{ env.OTP_VERSION }}

- name: Cache deps & build
uses: actions/cache@v4
with:
path: |
deps
_build
key: ${{ runner.os }}-mix-${{ env.ELIXIR_VERSION }}-${{ env.OTP_VERSION }}-${{ hashFiles('mix.lock') }}
restore-keys: |
${{ runner.os }}-mix-${{ env.ELIXIR_VERSION }}-${{ env.OTP_VERSION }}-

- name: Cache PLTs
uses: actions/cache@v4
with:
path: priv/plts
key: ${{ runner.os }}-plt-${{ env.ELIXIR_VERSION }}-${{ env.OTP_VERSION }}-${{ hashFiles('mix.lock') }}
restore-keys: |
${{ runner.os }}-plt-${{ env.ELIXIR_VERSION }}-${{ env.OTP_VERSION }}-

- run: mix deps.get
- run: mix deps.compile
- run: mix compile
- run: mix dialyzer

publish:
name: Publish to Hex
runs-on: ubuntu-latest
needs: [compile, format, credo, test, dialyzer]
if: startsWith(github.ref, 'refs/tags/v')
steps:
- uses: actions/checkout@v4

- uses: erlef/setup-beam@v1
with:
elixir-version: ${{ env.ELIXIR_VERSION }}
otp-version: ${{ env.OTP_VERSION }}

- name: Set version from tag
run: |
TAG_VERSION="${GITHUB_REF#refs/tags/v}"
sed -i "s/@version \".*\"/@version \"${TAG_VERSION}\"/" mix.exs
grep '@version' mix.exs

- run: mix deps.get
- run: mix compile

- name: Publish to Hex
run: mix hex.publish --yes
env:
HEX_API_KEY: ${{ secrets.HEX_API_KEY }}
77 changes: 77 additions & 0 deletions CLAUDE.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
# CLAUDE.md

This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.

## Build & Test Commands

```bash
mix deps.get # Fetch dependencies
mix compile # Compile Elixir + C code (via elixir_make)
mix test # Run full test suite (47 tests)
mix test test/process_test.exs # Run a single test file
mix test test/process_test.exs:10 # Run a single test at line
mix format # Auto-format code
mix format --check-formatted # Check formatting (CI)
mix compile --warnings-as-errors # Compile with strict warnings
mix credo --strict # Lint with credo
mix dialyzer # Static type analysis
make clean && make all # Rebuild C code only
```

## Architecture

NetRunner is a safe OS process execution library for Elixir with NIF-based backpressure, zero zombie guarantees, PTY support, and cgroup isolation.

### Three-Tier Design

**Elixir Layer** (`lib/`) — GenServer-based process management, stream API, daemon mode.

**NIF Layer** (`c_src/net_runner_nif.c`) — Wraps FDs in NIF resources with `enif_select` for async I/O on dirty IO schedulers. On EAGAIN, registers with BEAM's epoll/kqueue; the GenServer parks callers in an operations queue and retries when `{:select, _, _, :ready_input/:ready_output}` arrives.

**Shepherd Layer** (`c_src/shepherd.c`) — Persistent C binary spawned per command via `Port.open`. Forks the child, passes pipe FDs to BEAM via SCM_RIGHTS over a UDS socket, then enters a `poll()` loop. Detects BEAM death via POLLHUP and escalates SIGTERM→SIGKILL on the child's process group.

### Zero Zombie Prevention (3 layers)

1. **Shepherd** — detects BEAM death (POLLHUP on UDS), kills child process group
2. **Watcher GenServer** — monitors Process GenServer, kills OS process on DOWN
3. **NIF destructor** — closes FDs on garbage collection

### Spawn Sequence

1. BEAM creates UDS listener at random temp path
2. `Port.open` launches shepherd with UDS path as argv[1]
3. Shepherd connects, forks child, sends pipe FDs via SCM_RIGHTS
4. Shepherd sends `MSG_CHILD_STARTED(pid)` over UDS
5. GenServer wraps FDs in NIF resources, registers with Watcher

### Key Module Relationships

- `NetRunner` — top-level API (`run/2`, `stream!/2`, `stream/2`)
- `NetRunner.Process` — GenServer owning the OS process lifecycle
- `NetRunner.Process.Exec` — spawn logic (UDS, Port, SCM_RIGHTS, Pipe creation)
- `NetRunner.Process.Nif` — NIF stubs (`nif_read`, `nif_write`, `nif_close`, `nif_create_fd`, `nif_kill`)
- `NetRunner.Process.Pipe` — struct wrapping a NIF resource with owner/type metadata
- `NetRunner.Process.Operations` — pending operation queue (park on EAGAIN, retry on select)
- `NetRunner.Stream` — `Stream.resource` wrapper with concurrent input writer Task
- `NetRunner.Daemon` — supervised long-running process with output draining
- `NetRunner.Watcher` — belt-and-suspenders process monitor
- `NetRunner.Signal` — signal atom to platform number resolution via NIF

### Shepherd Protocol (`c_src/protocol.h`)

BEAM→Shepherd: `CMD_KILL(signal)`, `CMD_CLOSE_STDIN`, `CMD_SET_WINSIZE(rows,cols)`
Shepherd→BEAM: `MSG_CHILD_STARTED(pid)`, `MSG_CHILD_EXITED(status)`, `MSG_ERROR(msg)`

## C Code

- C99, compiled with `-Wall -Wextra -Werror`
- Platform detection in Makefile: `-D_GNU_SOURCE` (Linux) or `-D_DARWIN_C_SOURCE` (macOS)
- Two build artifacts: `priv/shepherd` (executable) and `priv/net_runner_nif.so` (shared lib)
- NIF functions run on dirty IO schedulers to avoid blocking BEAM

## Conventions

- Elixir ~> 1.17, CI tests against 1.17 and 1.18 on OTP 27
- All public API functions have `@doc` and `@spec`
- Tests are async where possible (`async: true`)
- Credo strict mode enforced: max cyclomatic complexity 9, max nesting depth 2
7 changes: 4 additions & 3 deletions c_src/shepherd.c
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,9 @@
#include <stdlib.h>
#include <string.h>
#include <sys/ioctl.h>
#include <arpa/inet.h>
#include <sys/socket.h>
#include <sys/stat.h>
#include <sys/un.h>
#include <sys/wait.h>
#include <unistd.h>
Expand All @@ -46,8 +48,8 @@ static int signal_pipe[2] = {-1, -1};
static void sigchld_handler(int sig) {
(void)sig;
int saved_errno = errno;
/* Write a single byte to wake up poll() */
(void)write(signal_pipe[1], "C", 1);
/* Write a single byte to wake up poll() — ignore failure in signal handler */
if (write(signal_pipe[1], "C", 1) < 0) { /* nothing to do */ }
errno = saved_errno;
}

Expand Down Expand Up @@ -189,7 +191,6 @@ static void cgroup_cleanup(void) {
if (cgroup_path[0] == '\0') return;

char full_path[512];
char procs_path[576];
char kill_path[576];

snprintf(full_path, sizeof(full_path), "/sys/fs/cgroup/%s", cgroup_path);
Expand Down
36 changes: 18 additions & 18 deletions lib/net_runner.ex
Original file line number Diff line number Diff line change
Expand Up @@ -59,24 +59,7 @@ defmodule NetRunner do

{:ok, pid} = Proc.start(cmd, args, process_opts)

# Run I/O in a task so we can enforce timeout via Task.yield
task =
Task.async(fn ->
if input do
write_all_input(pid, input)
else
Proc.close_stdin(pid)
end

case read_all_with_limits(pid, max_output_size) do
{:ok, output} ->
{:ok, exit_status} = Proc.await_exit(pid)
{output, exit_status}

{:error, _} = error ->
error
end
end)
task = Task.async(fn -> run_io(pid, input, max_output_size) end)

effective_timeout = timeout || :infinity

Expand Down Expand Up @@ -170,6 +153,23 @@ defmodule NetRunner do
end
end

defp run_io(pid, input, max_output_size) do
if input do
write_all_input(pid, input)
else
Proc.close_stdin(pid)
end

case read_all_with_limits(pid, max_output_size) do
{:ok, output} ->
{:ok, exit_status} = Proc.await_exit(pid)
{output, exit_status}

{:error, _} = error ->
error
end
end

defp kill_and_cleanup(pid) do
Proc.kill(pid, :sigterm)

Expand Down
Loading