Files
anchorage/internal/pkg/token/token.go
William Gill 12bf35caf8 anchorage v1.0 initial tree
Greenfield Go multi-tenant IPFS Pinning Service wire-compatible with the
IPFS Pinning Services API spec. Paired 1:1 with Kubo over localhost RPC,
clustered via embedded NATS JetStream, Postgres source-of-truth with
RLS-enforced tenancy, Fiber + huma v2 for the HTTP surface, Authentik
OIDC for session login with kid-rotated HS256 JWT API tokens.

Feature-complete against the 22-milestone build plan, including the
ship-it v1.0 gap items:

  * admin CLIs: drain/uncordon, maintenance, mint-token, rotate-key,
    prune-denylist, rebalance --dry-run, cache-stats, cluster-presences
  * TTL leader election via NATS KV, fence tokens, JetStream dedup
  * rebalancer (plan/apply split), reconciler, requeue sweeper
  * ristretto caches with NATS-backed cross-node invalidation
    (placements live-nodes + token denylist)
  * maintenance watchdog for stuck cluster-pause flag
  * Prometheus /metrics with CIDR ACL, HTTP/pin/scheduler/cache gauges
  * rate limiting: session (10/min) + anonymous global (120/min)
  * integration tests: rebalance, refcount multi-org, RLS belt
  * goreleaser (tar + deb/rpm/apk + Alpine Docker) targeting Gitea

Stack: Cobra/Viper, Fiber v2 + huma v2, embedded NATS JetStream,
pgx/sqlc/golang-migrate, ristretto, TypeID, prometheus/client_golang,
testcontainers-go.
2026-04-16 18:13:36 -05:00

296 lines
9.7 KiB
Go

// Package token mints and verifies anchorage API tokens.
//
// Tokens are short-lived JWTs signed with a symmetric HMAC key at v1
// (the plan leaves ed25519/RS256 as a future open item). The jti claim
// is a TypeID (tok_*) so revocation, logs, and audit rows can reference
// it directly.
//
// Every token carries a `kid` header that names the key that signed
// it. Tokens without a `kid` are rejected — anchorage is new software
// so there is no pre-rotation token population to carry forward.
//
// Rotation: Signer accepts multiple keys. Exactly one is flagged as
// Primary — Mint always signs with it and puts its ID in the JWT
// `kid` header. Parse reads `kid` and picks the matching key from the
// full set, so tokens signed with a retiring key keep verifying while
// the retiring key is still loaded. See deploy/README.md for the
// operational procedure.
package token
import (
"context"
"errors"
"fmt"
"time"
"github.com/golang-jwt/jwt/v5"
"anchorage/internal/pkg/ids"
"anchorage/internal/pkg/store"
)
// DevKeyID is the stable kid used by the built-in dev-fallback key.
// It appears ONLY when the operator has not configured any keys and
// anchorage is running against its insecure dev default — the loud
// warning in LoadSigningKeys fires then. Never assume a token with
// kid=DevKeyID is legitimate outside a local dev environment.
const DevKeyID = "dev"
// Claims is anchorage's JWT payload.
type Claims struct {
Org ids.OrgID `json:"org"`
User ids.UserID `json:"sub_id"`
Role string `json:"role"`
Scopes []string `json:"scopes,omitempty"`
jwt.RegisteredClaims
}
// SigningKey binds an HMAC key to a stable identifier. Exactly one key
// in a Signer's set is flagged Primary — it's the one Mint uses.
type SigningKey struct {
// ID is the stable label emitted as the JWT `kid` header and used
// by Parse to look up which key verified a given token. Treat like
// a version tag: "2026-04", "prod-v2", anything stable.
ID string
// Key is the raw HMAC bytes (>= 32 bytes).
Key []byte
// Primary marks the key used for minting. Exactly one key in the
// set must have this set.
Primary bool
}
// DenyCache is the minimal surface the Signer needs from a cache layer
// to speed up denylist lookups. *cache.Cache[string,bool] satisfies it
// implicitly; tests can also pass a fake. nil = no caching, every
// Parse hits the TokenStore.
type DenyCache interface {
Get(key string) (bool, bool)
Set(key string, v bool, cost int64, ttl time.Duration) bool
Delete(key string)
}
// Signer mints + parses JWTs and checks the Postgres-backed denylist.
type Signer struct {
keys map[string][]byte
primaryID string
issuer string
audience string
tokens store.TokenStore
// denyCache is an optional jti → isDenied short-TTL cache. Cuts
// the per-authenticated-request DB hit for the 99.99% of tokens
// that aren't revoked. Cross-node invalidation is the caller's
// responsibility — typically via cache.WatchEntity("token", cache.Delete).
denyCache DenyCache
onRevoke RevokeHook
}
// SetDenyCache attaches a cache implementation for denylist lookups.
// Safe to call once at construction time before the Signer starts
// serving requests; not safe to swap at runtime.
func (s *Signer) SetDenyCache(c DenyCache) { s.denyCache = c }
// RevokeHook is called with the revoked jti after every successful
// Revoke. Typical hook publishes a NATS `cache.invalidate.token.<jti>`
// so peer nodes drop their local denylist cache entries.
type RevokeHook func(jti string)
// SetRevokeHook installs a post-Revoke callback. Nil to clear.
func (s *Signer) SetRevokeHook(h RevokeHook) { s.onRevoke = h }
// NewSigner constructs a Signer with one or more keys for overlap-style
// rotation. Exactly one key must be flagged Primary; all IDs must be
// unique and non-empty; all keys must be >= 32 bytes.
func NewSigner(keys []SigningKey, issuer, audience string, tokens store.TokenStore) (*Signer, error) {
if len(keys) == 0 {
return nil, errors.New("token: at least one signing key is required")
}
s := &Signer{
keys: make(map[string][]byte, len(keys)),
issuer: issuer,
audience: audience,
tokens: tokens,
}
for _, k := range keys {
if k.ID == "" {
return nil, errors.New("token: SigningKey.ID must not be empty")
}
if _, dup := s.keys[k.ID]; dup {
return nil, fmt.Errorf("token: duplicate kid %q", k.ID)
}
if len(k.Key) < 32 {
return nil, fmt.Errorf("token: key %q is too short (need >= 32 bytes)", k.ID)
}
s.keys[k.ID] = k.Key
if k.Primary {
if s.primaryID != "" {
return nil, fmt.Errorf("token: multiple primary keys (%q and %q)", s.primaryID, k.ID)
}
s.primaryID = k.ID
}
}
if s.primaryID == "" {
return nil, errors.New("token: exactly one key must be flagged Primary")
}
return s, nil
}
// PrimaryKeyID returns the kid of the current minting key. Useful for
// tests and for the admin rotation CLI.
func (s *Signer) PrimaryKeyID() string { return s.primaryID }
// AcceptsKey reports whether kid would verify incoming tokens.
func (s *Signer) AcceptsKey(kid string) bool {
_, ok := s.keys[kid]
return ok
}
// Mint signs a new JWT with the given claims. jti is generated if empty;
// ttl <= 0 falls back to 24h. The primary key's ID is written into the
// JWT `kid` header.
func (s *Signer) Mint(ctx context.Context, orgID ids.OrgID, userID ids.UserID, role string, scopes []string, ttl time.Duration) (string, *Claims, error) {
if ttl <= 0 {
ttl = 24 * time.Hour
}
jti, err := ids.NewToken()
if err != nil {
return "", nil, err
}
now := time.Now().UTC()
c := &Claims{
Org: orgID,
User: userID,
Role: role,
Scopes: scopes,
RegisteredClaims: jwt.RegisteredClaims{
ID: jti.String(),
Issuer: s.issuer,
Audience: jwt.ClaimStrings{s.audience},
IssuedAt: jwt.NewNumericDate(now),
ExpiresAt: jwt.NewNumericDate(now.Add(ttl)),
NotBefore: jwt.NewNumericDate(now),
},
}
tok := jwt.NewWithClaims(jwt.SigningMethodHS256, c)
tok.Header["kid"] = s.primaryID
signed, err := tok.SignedString(s.keys[s.primaryID])
if err != nil {
return "", nil, fmt.Errorf("sign jwt: %w", err)
}
return signed, c, nil
}
// Parse validates the JWT signature, expiry, and denylist status.
//
// The verifying key is chosen by the token's `kid` header. Tokens
// without a `kid`, or pointing at a kid that isn't loaded in this
// Signer's key set, are rejected — silently trying every key would
// defeat the point of rotation.
func (s *Signer) Parse(ctx context.Context, raw string) (*Claims, error) {
c := &Claims{}
tok, err := jwt.ParseWithClaims(raw, c, s.keyfunc,
jwt.WithAudience(s.audience),
jwt.WithIssuer(s.issuer),
)
if err != nil {
return nil, err
}
if !tok.Valid {
return nil, errors.New("token: invalid")
}
if c.ID == "" {
return nil, errors.New("token: missing jti")
}
jti, err := ids.ParseToken(c.ID)
if err != nil {
return nil, fmt.Errorf("token: parse jti: %w", err)
}
if s.tokens != nil {
denied, err := s.isDeniedCached(ctx, jti)
if err != nil {
return nil, fmt.Errorf("token: denylist check: %w", err)
}
if denied {
return nil, errors.New("token: revoked")
}
}
return c, nil
}
// isDeniedCached wraps TokenStore.IsDenied with a short-TTL cache
// keyed on the jti string. 5 minutes strikes a balance: a freshly
// revoked jti stops verifying within (at most) 5 minutes without
// explicit invalidation, but cross-node invalidation via
// cache.invalidate.token.<jti> usually drops stale entries within
// one NATS RTT.
//
// Both true and false outcomes are cached. A false (not-denied) hit
// is the common case and the reason this cache exists; a true hit
// still short-circuits the DB call until TTL expires.
func (s *Signer) isDeniedCached(ctx context.Context, jti ids.TokenID) (bool, error) {
key := jti.String()
if s.denyCache != nil {
if v, ok := s.denyCache.Get(key); ok {
return v, nil
}
}
denied, err := s.tokens.IsDenied(ctx, jti)
if err != nil {
return false, err
}
if s.denyCache != nil {
s.denyCache.Set(key, denied, 1, 5*time.Minute)
}
return denied, nil
}
// keyfunc selects the HMAC key matching the token's kid. Kept as a
// method (not a closure inside Parse) for testability + clarity.
func (s *Signer) keyfunc(t *jwt.Token) (any, error) {
if _, ok := t.Method.(*jwt.SigningMethodHMAC); !ok {
return nil, fmt.Errorf("unexpected signing method %q", t.Header["alg"])
}
kid, _ := t.Header["kid"].(string)
if kid == "" {
return nil, errors.New("token: missing kid header")
}
key, ok := s.keys[kid]
if !ok {
return nil, fmt.Errorf("token: unknown kid %q", kid)
}
return key, nil
}
// Revoke marks the jti as denied until its natural expiry. Idempotent.
// Drops the local cache entry immediately so the revoking node sees
// the change without waiting for TTL; cross-node consistency is the
// caller's responsibility — typically via cache.Invalidator.Emit on
// the "token" entity, which other nodes pick up through
// cache.WatchEntity subscribers that call s.InvalidateDenyCache.
func (s *Signer) Revoke(ctx context.Context, jti ids.TokenID, expiresAt time.Time, reason string) error {
if s.tokens == nil {
return nil
}
if err := s.tokens.Revoke(ctx, jti); err != nil && !errors.Is(err, store.ErrNotFound) {
return err
}
if err := s.tokens.AddDenylist(ctx, jti, expiresAt, reason); err != nil {
return err
}
s.InvalidateDenyCache(jti.String())
if s.onRevoke != nil {
s.onRevoke(jti.String())
}
return nil
}
// InvalidateDenyCache drops a single jti from the local denylist cache.
// Exported so the cross-node invalidation subscriber (see
// internal/pkg/cache.WatchEntity) can plug into the Signer without
// reaching into unexported state. A no-op when no cache is attached.
func (s *Signer) InvalidateDenyCache(jti string) {
if s.denyCache == nil {
return
}
s.denyCache.Delete(jti)
}