Files
at-container-registry/deploy/upcloud/update.go
Evan Jarrett de02e1f046 remove distribution from hold, add vulnerability scanning in appview.
1. Removing distribution/distribution from the Hold Service (biggest change)
  The hold service previously used distribution's StorageDriver interface for all blob operations. This replaces it with direct AWS SDK v2 calls through ATCR's own pkg/s3.S3Service:
  - New S3Service methods: Stat(), PutBytes(), Move(), Delete(), WalkBlobs(), ListPrefix() added to pkg/s3/types.go
  - Pull zone fix: Presigned URLs are now generated against the real S3 endpoint, then the host is swapped to the CDN URL post-signing (previously the CDN URL was set as the endpoint, which
  broke SigV4 signatures)
  - All hold subsystems migrated: GC, OCI uploads, XRPC handlers, profile uploads, scan broadcaster, manifest posts — all now use *s3.S3Service instead of storagedriver.StorageDriver
  - Config simplified: Removed configuration.Storage type and buildStorageConfigFromFields(); replaced with a simple S3Params() method
  - Mock expanded: MockS3Client gains an in-memory object store + 5 new methods, replacing duplicate mockStorageDriver implementations in tests (~160 lines deleted from each test file)
2. Vulnerability Scan UI in AppView (new feature)
  Displays scan results from the hold's PDS on the repository page:
  - New lexicon: io/atcr/hold/scan.json with vulnReportBlob field for storing full Grype reports
  - Two new HTMX endpoints: /api/scan-result (badge) and /api/vuln-details (modal with CVE table)
  - New templates: vuln-badge.html (severity count chips) and vuln-details.html (full CVE table with NVD/GHSA links)
  - Repository page: Lazy-loads scan badges per manifest via HTMX
  - Tests: ~590 lines of test coverage for both handlers
3. S3 Diagnostic Tool
  New cmd/s3-test/main.go (418 lines) — tests S3 connectivity with both SDK v1 and v2, including presigned URL generation, pull zone host swapping, and verbose signing debug output.
4. Deployment Tooling
  - New syncServiceUnit() for comparing/updating systemd units on servers
  - Update command now syncs config keys (adds missing keys from template) and service units with daemon-reload
5. DB Migration
  0011_fix_captain_successor_column.yaml — rebuilds hold_captain_records to add the successor column that was missed in a previous migration.
6. Documentation
  - APPVIEW-UI-FUTURE.md rewritten as a status-tracked feature inventory
  - DISTRIBUTION.md renamed to CREDENTIAL_HELPER.md
  - New REMOVING_DISTRIBUTION.md — 480-line analysis of fully removing distribution from the appview side
7. go.mod
  aws-sdk-go v1 moved from indirect to direct (needed by cmd/s3-test).
2026-02-13 15:26:24 -06:00

263 lines
6.6 KiB
Go

package main
import (
"bytes"
"fmt"
"io"
"os"
"os/exec"
"strings"
"time"
"github.com/spf13/cobra"
)
var updateCmd = &cobra.Command{
Use: "update [target]",
Short: "Deploy updates to servers",
Args: cobra.MaximumNArgs(1),
ValidArgs: []string{"all", "appview", "hold"},
RunE: func(cmd *cobra.Command, args []string) error {
target := "all"
if len(args) > 0 {
target = args[0]
}
return cmdUpdate(target)
},
}
var sshCmd = &cobra.Command{
Use: "ssh <target>",
Short: "SSH into a server",
Args: cobra.ExactArgs(1),
ValidArgs: []string{"appview", "hold"},
RunE: func(cmd *cobra.Command, args []string) error {
return cmdSSH(args[0])
},
}
func init() {
rootCmd.AddCommand(updateCmd)
rootCmd.AddCommand(sshCmd)
}
func cmdUpdate(target string) error {
state, err := loadState()
if err != nil {
return err
}
naming := state.Naming()
branch := state.Branch()
goVersion, err := requiredGoVersion()
if err != nil {
return err
}
vals := configValsFromState(state)
targets := map[string]struct {
ip string
binaryName string
buildCmd string
serviceName string
healthURL string
configTmpl string
configPath string
unitTmpl string
}{
"appview": {
ip: state.Appview.PublicIP,
binaryName: naming.Appview(),
buildCmd: "appview",
serviceName: naming.Appview(),
healthURL: "http://localhost:5000/health",
configTmpl: appviewConfigTmpl,
configPath: naming.AppviewConfigPath(),
unitTmpl: appviewServiceTmpl,
},
"hold": {
ip: state.Hold.PublicIP,
binaryName: naming.Hold(),
buildCmd: "hold",
serviceName: naming.Hold(),
healthURL: "http://localhost:8080/xrpc/_health",
configTmpl: holdConfigTmpl,
configPath: naming.HoldConfigPath(),
unitTmpl: holdServiceTmpl,
},
}
var toUpdate []string
switch target {
case "all":
toUpdate = []string{"appview", "hold"}
case "appview", "hold":
toUpdate = []string{target}
default:
return fmt.Errorf("unknown target: %s (use: all, appview, hold)", target)
}
for _, name := range toUpdate {
t := targets[name]
fmt.Printf("Updating %s (%s)...\n", name, t.ip)
// Sync config keys (adds missing keys from template, never overwrites)
configYAML, err := renderConfig(t.configTmpl, vals)
if err != nil {
return fmt.Errorf("render %s config: %w", name, err)
}
if err := syncConfigKeys(name, t.ip, t.configPath, configYAML); err != nil {
return fmt.Errorf("%s config sync: %w", name, err)
}
// Sync systemd service unit
renderedUnit, err := renderServiceUnit(t.unitTmpl, serviceUnitParams{
DisplayName: naming.DisplayName(),
User: naming.SystemUser(),
BinaryPath: naming.InstallDir() + "/bin/" + t.binaryName,
ConfigPath: t.configPath,
DataDir: naming.BasePath(),
ServiceName: t.serviceName,
})
if err != nil {
return fmt.Errorf("render %s service unit: %w", name, err)
}
unitChanged, err := syncServiceUnit(name, t.ip, t.serviceName, renderedUnit)
if err != nil {
return fmt.Errorf("%s service unit sync: %w", name, err)
}
daemonReload := ""
if unitChanged {
daemonReload = "systemctl daemon-reload"
}
updateScript := fmt.Sprintf(`set -euo pipefail
export PATH=$PATH:/usr/local/go/bin
export GOTMPDIR=/var/tmp
# Update Go if needed
CURRENT_GO=$(go version 2>/dev/null | grep -oP 'go\K[0-9.]+' || echo "none")
REQUIRED_GO="%s"
if [ "$CURRENT_GO" != "$REQUIRED_GO" ]; then
echo "Updating Go: $CURRENT_GO -> $REQUIRED_GO"
rm -rf /usr/local/go
curl -fsSL https://go.dev/dl/go${REQUIRED_GO}.linux-amd64.tar.gz | tar -C /usr/local -xz
fi
cd %s
git pull origin %s
npm ci
go generate ./...
CGO_ENABLED=1 go build \
-ldflags="-s -w -linkmode external -extldflags '-static'" \
-tags sqlite_omit_load_extension -trimpath \
-o bin/%s ./cmd/%s
%s
systemctl restart %s
sleep 2
curl -sf %s > /dev/null && echo "HEALTH_OK" || echo "HEALTH_FAIL"
`, goVersion, naming.InstallDir(), branch, t.binaryName, t.buildCmd, daemonReload, t.serviceName, t.healthURL)
output, err := runSSH(t.ip, updateScript, true)
if err != nil {
fmt.Printf(" ERROR: %v\n", err)
fmt.Printf(" Output: %s\n", output)
return fmt.Errorf("update %s failed", name)
}
if strings.Contains(output, "HEALTH_OK") {
fmt.Printf(" %s: updated and healthy\n", name)
} else if strings.Contains(output, "HEALTH_FAIL") {
fmt.Printf(" %s: updated but health check failed!\n", name)
fmt.Printf(" Check: ssh root@%s journalctl -u %s -n 50\n", t.ip, t.serviceName)
} else {
fmt.Printf(" %s: updated (health check inconclusive)\n", name)
}
}
return nil
}
// configValsFromState builds ConfigValues from persisted state.
// S3SecretKey is intentionally left empty — syncConfigKeys only adds missing
// keys and never overwrites, so the server's existing secret is preserved.
func configValsFromState(state *InfraState) *ConfigValues {
naming := state.Naming()
_, baseDomain, _, _ := extractFromAppviewTemplate()
holdDomain := state.Zone + ".cove." + baseDomain
return &ConfigValues{
S3Endpoint: state.ObjectStorage.Endpoint,
S3Region: state.ObjectStorage.Region,
S3Bucket: state.ObjectStorage.Bucket,
S3AccessKey: state.ObjectStorage.AccessKeyID,
S3SecretKey: "", // not persisted in state; existing value on server is preserved
Zone: state.Zone,
HoldDomain: holdDomain,
HoldDid: "did:web:" + holdDomain,
BasePath: naming.BasePath(),
}
}
func cmdSSH(target string) error {
state, err := loadState()
if err != nil {
return err
}
var ip string
switch target {
case "appview":
ip = state.Appview.PublicIP
case "hold":
ip = state.Hold.PublicIP
default:
return fmt.Errorf("unknown target: %s (use: appview, hold)", target)
}
fmt.Printf("Connecting to %s (%s)...\n", target, ip)
cmd := exec.Command("ssh",
"-o", "StrictHostKeyChecking=accept-new",
"root@"+ip,
)
cmd.Stdin = os.Stdin
cmd.Stdout = os.Stdout
cmd.Stderr = os.Stderr
return cmd.Run()
}
func runSSH(ip, script string, stream bool) (string, error) {
cmd := exec.Command("ssh",
"-o", "StrictHostKeyChecking=accept-new",
"-o", "ConnectTimeout=10",
"root@"+ip,
"bash -s",
)
cmd.Stdin = strings.NewReader(script)
var buf bytes.Buffer
if stream {
cmd.Stdout = io.MultiWriter(os.Stdout, &buf)
cmd.Stderr = io.MultiWriter(os.Stderr, &buf)
} else {
cmd.Stdout = &buf
cmd.Stderr = &buf
}
// Give builds up to 10 minutes
done := make(chan error, 1)
go func() { done <- cmd.Run() }()
select {
case err := <-done:
return buf.String(), err
case <-time.After(10 * time.Minute):
cmd.Process.Kill()
return buf.String(), fmt.Errorf("SSH command timed out after 10 minutes")
}
}