Greenfield Go multi-tenant IPFS Pinning Service wire-compatible with the
IPFS Pinning Services API spec. Paired 1:1 with Kubo over localhost RPC,
clustered via embedded NATS JetStream, Postgres source-of-truth with
RLS-enforced tenancy, Fiber + huma v2 for the HTTP surface, Authentik
OIDC for session login with kid-rotated HS256 JWT API tokens.
Feature-complete against the 22-milestone build plan, including the
ship-it v1.0 gap items:
* admin CLIs: drain/uncordon, maintenance, mint-token, rotate-key,
prune-denylist, rebalance --dry-run, cache-stats, cluster-presences
* TTL leader election via NATS KV, fence tokens, JetStream dedup
* rebalancer (plan/apply split), reconciler, requeue sweeper
* ristretto caches with NATS-backed cross-node invalidation
(placements live-nodes + token denylist)
* maintenance watchdog for stuck cluster-pause flag
* Prometheus /metrics with CIDR ACL, HTTP/pin/scheduler/cache gauges
* rate limiting: session (10/min) + anonymous global (120/min)
* integration tests: rebalance, refcount multi-org, RLS belt
* goreleaser (tar + deb/rpm/apk + Alpine Docker) targeting Gitea
Stack: Cobra/Viper, Fiber v2 + huma v2, embedded NATS JetStream,
pgx/sqlc/golang-migrate, ristretto, TypeID, prometheus/client_golang,
testcontainers-go.
296 lines
9.7 KiB
Go
296 lines
9.7 KiB
Go
// Package token mints and verifies anchorage API tokens.
|
|
//
|
|
// Tokens are short-lived JWTs signed with a symmetric HMAC key at v1
|
|
// (the plan leaves ed25519/RS256 as a future open item). The jti claim
|
|
// is a TypeID (tok_*) so revocation, logs, and audit rows can reference
|
|
// it directly.
|
|
//
|
|
// Every token carries a `kid` header that names the key that signed
|
|
// it. Tokens without a `kid` are rejected — anchorage is new software
|
|
// so there is no pre-rotation token population to carry forward.
|
|
//
|
|
// Rotation: Signer accepts multiple keys. Exactly one is flagged as
|
|
// Primary — Mint always signs with it and puts its ID in the JWT
|
|
// `kid` header. Parse reads `kid` and picks the matching key from the
|
|
// full set, so tokens signed with a retiring key keep verifying while
|
|
// the retiring key is still loaded. See deploy/README.md for the
|
|
// operational procedure.
|
|
package token
|
|
|
|
import (
|
|
"context"
|
|
"errors"
|
|
"fmt"
|
|
"time"
|
|
|
|
"github.com/golang-jwt/jwt/v5"
|
|
|
|
"anchorage/internal/pkg/ids"
|
|
"anchorage/internal/pkg/store"
|
|
)
|
|
|
|
// DevKeyID is the stable kid used by the built-in dev-fallback key.
|
|
// It appears ONLY when the operator has not configured any keys and
|
|
// anchorage is running against its insecure dev default — the loud
|
|
// warning in LoadSigningKeys fires then. Never assume a token with
|
|
// kid=DevKeyID is legitimate outside a local dev environment.
|
|
const DevKeyID = "dev"
|
|
|
|
// Claims is anchorage's JWT payload.
|
|
type Claims struct {
|
|
Org ids.OrgID `json:"org"`
|
|
User ids.UserID `json:"sub_id"`
|
|
Role string `json:"role"`
|
|
Scopes []string `json:"scopes,omitempty"`
|
|
jwt.RegisteredClaims
|
|
}
|
|
|
|
// SigningKey binds an HMAC key to a stable identifier. Exactly one key
|
|
// in a Signer's set is flagged Primary — it's the one Mint uses.
|
|
type SigningKey struct {
|
|
// ID is the stable label emitted as the JWT `kid` header and used
|
|
// by Parse to look up which key verified a given token. Treat like
|
|
// a version tag: "2026-04", "prod-v2", anything stable.
|
|
ID string
|
|
// Key is the raw HMAC bytes (>= 32 bytes).
|
|
Key []byte
|
|
// Primary marks the key used for minting. Exactly one key in the
|
|
// set must have this set.
|
|
Primary bool
|
|
}
|
|
|
|
// DenyCache is the minimal surface the Signer needs from a cache layer
|
|
// to speed up denylist lookups. *cache.Cache[string,bool] satisfies it
|
|
// implicitly; tests can also pass a fake. nil = no caching, every
|
|
// Parse hits the TokenStore.
|
|
type DenyCache interface {
|
|
Get(key string) (bool, bool)
|
|
Set(key string, v bool, cost int64, ttl time.Duration) bool
|
|
Delete(key string)
|
|
}
|
|
|
|
// Signer mints + parses JWTs and checks the Postgres-backed denylist.
|
|
type Signer struct {
|
|
keys map[string][]byte
|
|
primaryID string
|
|
issuer string
|
|
audience string
|
|
tokens store.TokenStore
|
|
// denyCache is an optional jti → isDenied short-TTL cache. Cuts
|
|
// the per-authenticated-request DB hit for the 99.99% of tokens
|
|
// that aren't revoked. Cross-node invalidation is the caller's
|
|
// responsibility — typically via cache.WatchEntity("token", cache.Delete).
|
|
denyCache DenyCache
|
|
onRevoke RevokeHook
|
|
}
|
|
|
|
// SetDenyCache attaches a cache implementation for denylist lookups.
|
|
// Safe to call once at construction time before the Signer starts
|
|
// serving requests; not safe to swap at runtime.
|
|
func (s *Signer) SetDenyCache(c DenyCache) { s.denyCache = c }
|
|
|
|
// RevokeHook is called with the revoked jti after every successful
|
|
// Revoke. Typical hook publishes a NATS `cache.invalidate.token.<jti>`
|
|
// so peer nodes drop their local denylist cache entries.
|
|
type RevokeHook func(jti string)
|
|
|
|
// SetRevokeHook installs a post-Revoke callback. Nil to clear.
|
|
func (s *Signer) SetRevokeHook(h RevokeHook) { s.onRevoke = h }
|
|
|
|
// NewSigner constructs a Signer with one or more keys for overlap-style
|
|
// rotation. Exactly one key must be flagged Primary; all IDs must be
|
|
// unique and non-empty; all keys must be >= 32 bytes.
|
|
func NewSigner(keys []SigningKey, issuer, audience string, tokens store.TokenStore) (*Signer, error) {
|
|
if len(keys) == 0 {
|
|
return nil, errors.New("token: at least one signing key is required")
|
|
}
|
|
s := &Signer{
|
|
keys: make(map[string][]byte, len(keys)),
|
|
issuer: issuer,
|
|
audience: audience,
|
|
tokens: tokens,
|
|
}
|
|
for _, k := range keys {
|
|
if k.ID == "" {
|
|
return nil, errors.New("token: SigningKey.ID must not be empty")
|
|
}
|
|
if _, dup := s.keys[k.ID]; dup {
|
|
return nil, fmt.Errorf("token: duplicate kid %q", k.ID)
|
|
}
|
|
if len(k.Key) < 32 {
|
|
return nil, fmt.Errorf("token: key %q is too short (need >= 32 bytes)", k.ID)
|
|
}
|
|
s.keys[k.ID] = k.Key
|
|
if k.Primary {
|
|
if s.primaryID != "" {
|
|
return nil, fmt.Errorf("token: multiple primary keys (%q and %q)", s.primaryID, k.ID)
|
|
}
|
|
s.primaryID = k.ID
|
|
}
|
|
}
|
|
if s.primaryID == "" {
|
|
return nil, errors.New("token: exactly one key must be flagged Primary")
|
|
}
|
|
return s, nil
|
|
}
|
|
|
|
// PrimaryKeyID returns the kid of the current minting key. Useful for
|
|
// tests and for the admin rotation CLI.
|
|
func (s *Signer) PrimaryKeyID() string { return s.primaryID }
|
|
|
|
// AcceptsKey reports whether kid would verify incoming tokens.
|
|
func (s *Signer) AcceptsKey(kid string) bool {
|
|
_, ok := s.keys[kid]
|
|
return ok
|
|
}
|
|
|
|
// Mint signs a new JWT with the given claims. jti is generated if empty;
|
|
// ttl <= 0 falls back to 24h. The primary key's ID is written into the
|
|
// JWT `kid` header.
|
|
func (s *Signer) Mint(ctx context.Context, orgID ids.OrgID, userID ids.UserID, role string, scopes []string, ttl time.Duration) (string, *Claims, error) {
|
|
if ttl <= 0 {
|
|
ttl = 24 * time.Hour
|
|
}
|
|
jti, err := ids.NewToken()
|
|
if err != nil {
|
|
return "", nil, err
|
|
}
|
|
now := time.Now().UTC()
|
|
c := &Claims{
|
|
Org: orgID,
|
|
User: userID,
|
|
Role: role,
|
|
Scopes: scopes,
|
|
RegisteredClaims: jwt.RegisteredClaims{
|
|
ID: jti.String(),
|
|
Issuer: s.issuer,
|
|
Audience: jwt.ClaimStrings{s.audience},
|
|
IssuedAt: jwt.NewNumericDate(now),
|
|
ExpiresAt: jwt.NewNumericDate(now.Add(ttl)),
|
|
NotBefore: jwt.NewNumericDate(now),
|
|
},
|
|
}
|
|
tok := jwt.NewWithClaims(jwt.SigningMethodHS256, c)
|
|
tok.Header["kid"] = s.primaryID
|
|
signed, err := tok.SignedString(s.keys[s.primaryID])
|
|
if err != nil {
|
|
return "", nil, fmt.Errorf("sign jwt: %w", err)
|
|
}
|
|
return signed, c, nil
|
|
}
|
|
|
|
// Parse validates the JWT signature, expiry, and denylist status.
|
|
//
|
|
// The verifying key is chosen by the token's `kid` header. Tokens
|
|
// without a `kid`, or pointing at a kid that isn't loaded in this
|
|
// Signer's key set, are rejected — silently trying every key would
|
|
// defeat the point of rotation.
|
|
func (s *Signer) Parse(ctx context.Context, raw string) (*Claims, error) {
|
|
c := &Claims{}
|
|
tok, err := jwt.ParseWithClaims(raw, c, s.keyfunc,
|
|
jwt.WithAudience(s.audience),
|
|
jwt.WithIssuer(s.issuer),
|
|
)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
if !tok.Valid {
|
|
return nil, errors.New("token: invalid")
|
|
}
|
|
if c.ID == "" {
|
|
return nil, errors.New("token: missing jti")
|
|
}
|
|
jti, err := ids.ParseToken(c.ID)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("token: parse jti: %w", err)
|
|
}
|
|
if s.tokens != nil {
|
|
denied, err := s.isDeniedCached(ctx, jti)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("token: denylist check: %w", err)
|
|
}
|
|
if denied {
|
|
return nil, errors.New("token: revoked")
|
|
}
|
|
}
|
|
return c, nil
|
|
}
|
|
|
|
// isDeniedCached wraps TokenStore.IsDenied with a short-TTL cache
|
|
// keyed on the jti string. 5 minutes strikes a balance: a freshly
|
|
// revoked jti stops verifying within (at most) 5 minutes without
|
|
// explicit invalidation, but cross-node invalidation via
|
|
// cache.invalidate.token.<jti> usually drops stale entries within
|
|
// one NATS RTT.
|
|
//
|
|
// Both true and false outcomes are cached. A false (not-denied) hit
|
|
// is the common case and the reason this cache exists; a true hit
|
|
// still short-circuits the DB call until TTL expires.
|
|
func (s *Signer) isDeniedCached(ctx context.Context, jti ids.TokenID) (bool, error) {
|
|
key := jti.String()
|
|
if s.denyCache != nil {
|
|
if v, ok := s.denyCache.Get(key); ok {
|
|
return v, nil
|
|
}
|
|
}
|
|
denied, err := s.tokens.IsDenied(ctx, jti)
|
|
if err != nil {
|
|
return false, err
|
|
}
|
|
if s.denyCache != nil {
|
|
s.denyCache.Set(key, denied, 1, 5*time.Minute)
|
|
}
|
|
return denied, nil
|
|
}
|
|
|
|
// keyfunc selects the HMAC key matching the token's kid. Kept as a
|
|
// method (not a closure inside Parse) for testability + clarity.
|
|
func (s *Signer) keyfunc(t *jwt.Token) (any, error) {
|
|
if _, ok := t.Method.(*jwt.SigningMethodHMAC); !ok {
|
|
return nil, fmt.Errorf("unexpected signing method %q", t.Header["alg"])
|
|
}
|
|
kid, _ := t.Header["kid"].(string)
|
|
if kid == "" {
|
|
return nil, errors.New("token: missing kid header")
|
|
}
|
|
key, ok := s.keys[kid]
|
|
if !ok {
|
|
return nil, fmt.Errorf("token: unknown kid %q", kid)
|
|
}
|
|
return key, nil
|
|
}
|
|
|
|
// Revoke marks the jti as denied until its natural expiry. Idempotent.
|
|
// Drops the local cache entry immediately so the revoking node sees
|
|
// the change without waiting for TTL; cross-node consistency is the
|
|
// caller's responsibility — typically via cache.Invalidator.Emit on
|
|
// the "token" entity, which other nodes pick up through
|
|
// cache.WatchEntity subscribers that call s.InvalidateDenyCache.
|
|
func (s *Signer) Revoke(ctx context.Context, jti ids.TokenID, expiresAt time.Time, reason string) error {
|
|
if s.tokens == nil {
|
|
return nil
|
|
}
|
|
if err := s.tokens.Revoke(ctx, jti); err != nil && !errors.Is(err, store.ErrNotFound) {
|
|
return err
|
|
}
|
|
if err := s.tokens.AddDenylist(ctx, jti, expiresAt, reason); err != nil {
|
|
return err
|
|
}
|
|
s.InvalidateDenyCache(jti.String())
|
|
if s.onRevoke != nil {
|
|
s.onRevoke(jti.String())
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// InvalidateDenyCache drops a single jti from the local denylist cache.
|
|
// Exported so the cross-node invalidation subscriber (see
|
|
// internal/pkg/cache.WatchEntity) can plug into the Signer without
|
|
// reaching into unexported state. A no-op when no cache is attached.
|
|
func (s *Signer) InvalidateDenyCache(jti string) {
|
|
if s.denyCache == nil {
|
|
return
|
|
}
|
|
s.denyCache.Delete(jti)
|
|
}
|