Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 34 additions & 1 deletion pkg/pid/pidfile.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,11 @@ import (
"encoding/json"
"errors"
"fmt"
"net"
"net/http"
"os"
"path/filepath"
"strconv"
"sync"
"time"

Expand Down Expand Up @@ -45,6 +48,36 @@ func generateToken() string {
return hex.EncodeToString(b)
}

// isGatewayAlive performs a health check against the recorded host and port,
// then verifies the reported PID matches expectedPID to confirm the process
// is actually a picoclaw gateway and not a foreign service on the same port.
func isGatewayAlive(host string, port int, expectedPID int) bool {
url := "http://" + net.JoinHostPort(host, strconv.Itoa(port)) + "/health"
client := &http.Client{Timeout: 2 * time.Second}
resp, err := client.Get(url)
if err != nil {
// Port not responding β€” PID belongs to a foreign process
return false
}
defer resp.Body.Close()

if resp.StatusCode != http.StatusOK {
return false
}

var body struct {
PID int `json:"pid"`
}
if err := json.NewDecoder(resp.Body).Decode(&body); err != nil {
return false
}

// Only treat as alive if the reported PID matches the PID file.
// This prevents an unrelated service on the same port from being
// mistaken for a running gateway.
return body.PID == expectedPID
}

// WritePidFile creates (or overwrites) the PID file atomically.
// It returns an error if another gateway instance appears to be running
// (a valid PID file exists with a live process).
Expand All @@ -63,7 +96,7 @@ func WritePidFile(homePath, host string, port int) (*PidFileData, error) {
// PID file on a shared volume, the host's PID 1 (init) would
// pass the isProcessRunning check, blocking new gateway starts.
// Treat recorded PID 1 as always stale.
if data.PID != 1 && isProcessRunning(data.PID) {
if data.PID != 1 && isProcessRunning(data.PID) && isGatewayAlive(data.Host, data.Port, data.PID) {
return nil, fmt.Errorf("gateway is already running (PID: %d, version: %s)", data.PID, data.Version)
}
logger.Warnf("not running (PID: %d) so will remove the pid file: %s", data.PID, pidPath)
Expand Down
97 changes: 97 additions & 0 deletions pkg/pid/pidfile_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,12 @@ package pid

import (
"encoding/json"
"net"
"net/http"
"net/http/httptest"
"os"
"path/filepath"
"strconv"
"testing"
)

Expand Down Expand Up @@ -51,6 +55,99 @@ func TestPidFilePath(t *testing.T) {
}
}

// verifies that an unrelated service on the recorded port is not mistaken for the gateway.
func TestWritePidFileHealthPIDMismatch(t *testing.T) {
mux := http.NewServeMux()
mux.HandleFunc("/health", func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")
// Return PID 99999 β€” does not match the PID file entry
json.NewEncoder(w).Encode(map[string]any{"pid": 99999, "status": "ok"})
})
srv := httptest.NewServer(mux)
defer srv.Close()

host, portStr, _ := net.SplitHostPort(srv.Listener.Addr().String())
port, _ := strconv.Atoi(portStr)

dir := tmpDir(t)
foreign := PidFileData{
PID: os.Getpid(), // real running PID so it reaches isGatewayAlive
Token: "deadbeef12345678deadbeef12345678",
Port: port,
Host: host,
}
raw, _ := json.MarshalIndent(foreign, "", " ")
os.WriteFile(filepath.Join(dir, pidFileName), raw, 0o600)

// Should succeed β€” health PID (99999) != PID file PID (os.Getpid()) = not our gateway
data, err := WritePidFile(dir, "127.0.0.1", 18790)
if err != nil {
t.Fatalf("WritePidFile should treat health PID mismatch as stale, got error: %v", err)
}
if data.PID != os.Getpid() {
t.Errorf("PID = %d, want %d", data.PID, os.Getpid())
}
}

// verifies that isGatewayAlive uses the host from the PID file instead of hardcoding localhost.
func TestWritePidFileNonLocalhostHost(t *testing.T) {
if !isProcessRunning(os.Getppid()) {
t.Skip("skipping: parent process not running in this environment")
}

mux := http.NewServeMux()
mux.HandleFunc("/health", func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")
// Return parent PID β€” matches the PID file entry
json.NewEncoder(w).Encode(map[string]any{"pid": os.Getppid(), "status": "ok"})
})
srv := httptest.NewServer(mux)
defer srv.Close()

host, portStr, _ := net.SplitHostPort(srv.Listener.Addr().String())
port, _ := strconv.Atoi(portStr)

dir := tmpDir(t)
foreign := PidFileData{
PID: os.Getppid(), // parent PID β€” real, running, but not us
Token: "deadbeef12345678deadbeef12345678",
Port: port,
Host: host,
}
raw, _ := json.MarshalIndent(foreign, "", " ")
os.WriteFile(filepath.Join(dir, pidFileName), raw, 0o600)

// Should block β€” PID exists, health responds with matching PID on non-localhost host
_, err := WritePidFile(dir, "127.0.0.1", 18790)
if err == nil {
t.Fatal("WritePidFile should block startup when gateway is genuinely alive on non-localhost host")
}
}

// verifies that a foreign process reusing a crashed gateway's PID is treated as stale.
func TestWritePidFileForeignPIDReuse(t *testing.T) {
dir := tmpDir(t)

// PID 1 (init/systemd) is always running but won't respond on port 19999
foreign := PidFileData{
PID: 1,
Token: "deadbeef12345678deadbeef12345678",
Port: 19999, // nothing listening here
Host: "127.0.0.1",
}
raw, _ := json.MarshalIndent(foreign, "", " ")
os.WriteFile(filepath.Join(dir, pidFileName), raw, 0o600)

// Should succeed β€” foreign PID reuse should be treated as stale
data, err := WritePidFile(dir, "127.0.0.1", 18790)
if err != nil {
t.Fatalf("WritePidFile should treat foreign PID as stale, got error: %v", err)
}
if data.PID != os.Getpid() {
t.Errorf("PID = %d, want %d", data.PID, os.Getpid())
}
}

// TestWritePidFile creates a PID file and verifies its contents.
func TestWritePidFile(t *testing.T) {
dir := tmpDir(t)
Expand Down