From 7fc81d3d976437bab3b52d16582d8132808cd093 Mon Sep 17 00:00:00 2001 From: Catherine Date: Wed, 17 Sep 2025 03:33:53 +0000 Subject: [PATCH] [breaking-change] Rearchitect for better object store compatibility. Co-authored-by: bin --- README.md | 30 +++- config.toml.example | 15 +- go.mod | 34 +++-- go.sum | 53 +++++-- src/auth.go | 5 + src/backend.go | 347 ++++++++++++++++++++++++++++++++++++++++++++ src/caddy.go | 6 +- src/config.go | 15 +- src/fetch.go | 204 ++++++++------------------ src/main.go | 29 +++- src/manifest.go | 140 ++++++++++++++++++ src/pages.go | 219 +++++++++++++++------------- src/schema.pb.go | 293 +++++++++++++++++++++++++++++++++++++ src/schema.proto | 29 ++++ src/update.go | 86 +++++++++++ 15 files changed, 1226 insertions(+), 279 deletions(-) create mode 100644 src/backend.go create mode 100644 src/manifest.go create mode 100644 src/schema.pb.go create mode 100644 src/schema.proto create mode 100644 src/update.go diff --git a/README.md b/README.md index 56cc630..197155e 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ This is a simple Go service implemented as a strawman proposal of how https://co Features -------- -* In response to a `PUT` or `POST` request, performs a shallow in-memory clone of a git repository, checks out a tree to the filesystem, and atomically updates the version of content being served. +* In response to a `PUT` or `POST` request, performs a shallow in-memory clone of a git repository, checks out a tree to the storage backend, and atomically updates the version of content being served. - `PUT` method is a custom REST endpoint, `POST` method is a Forgejo webhook endpoint. * In response to a `GET` or `HEAD` request, selects an appropriate tree and serves files from it. Supported URL patterns: - `https://domain.tld/project/` (routed to project-specific tree) @@ -56,19 +56,39 @@ Authorization DNS is used for authorization of content updates. - If a `[wildcard]` configuration section is specified, and if the suffix of a hostname in a `POST` request is equal to `[wildcard].domain`, then the request is authorized when and only when the repository URL in the event body matches the repository URL computed from the configuration file. Otherwise the next rule is used. - - If a `PUT` or `POST` request is received at `` with an `Authorization: Pages ` header (or, in absence of such, with an `Authorization: Basic ` header, where `` is equal to `Base64("Pages ")`), then the request is authorized when any of the the TXT records at `_git-pages-challenge.` are equal to `SHA256(" ")`. + - During development, set environment variable `INSECURE=1` to bypass this checks. -Architecture ------------- +Architecture (v2) +----------------- + +An object store (filesystem, S3, ...) is used as the sole mechanism for state storage. The object store is expected to provide atomic operations and where necessary the backend adapter ensures as such. + +- Repositories themselves are never stored on disk; they are cloned in-memory and discarded immediately after their contents is extracted. +- The `blob/` prefix contains file data organized by hash of their contents (indiscriminately of the repository they belong to). + - Very small files are stored inline in the manifest. +- The `site/` prefix contains site manifests organized by domain and project name (e.g. `site/example.org/myproject` or `site/example.org/.index`). + - The manifest is a Protobuf object containing a flat mapping of paths to entries. An entry is comprised of type (file, directory, symlink, etc) and data, which may be stored inline or refer to a blob. + - A small amount of internal metadata within a manifest allows attributing deployments to their source and computing quotas. +- Additionally, the object store contains *staged manifests*, representing an in-progress update operation. + - An update first creates a staged manifest, then uploads blobs, then replaces the deployed manifest with the staged one. This avoids TOCTTOU race conditions during garbage collection. + - Stable marshalling allows addressing staged manifests by the hash of their contents. + +This approach, unlike the v1 one, cannot be easily introspected with normal Unix commands, but is very friendly to S3-style object storage services, as it does not rely on operations these services cannot support (subtree rename, directory stat, symlink/readlink). + + +Architecture (v1) +----------------- + +*This was the original architecture and it is no longer used.* Filesystem is used as the sole mechanism for state storage. - The `data/tree/` directory contains working trees organized by commit hash (indiscriminately of the repository they belong to). Repositories themselves are never stored on disk; they are cloned in-memory and discarded immediately after their contents is extracted. - The presence of a working tree directory under the appropriate commit hash is considered an indicator of its completeness. Checkouts are first done into a temporary directory and then atomically moved into place. - Currently a working tree is never removed, but a practical system would need to have a way to discard orphaned ones. -- The `data/www/` directory contains symlinks to working trees organized by domain and project name (e.g. `data/www/example.org/myproject`, or `data/www/example.org/.index`). +- The `data/www/` directory contains symlinks to working trees organized by domain and project name (e.g. `data/www/example.org/myproject` or `data/www/example.org/.index`). - The presence of a symlink at the appropriate location is considered an indicator of completeness as well. Updating to a new content version is done by creating a new symlink at a temporary location and then atomically moving it into place. - This structure is simple enough that it may be served by e.g. Nginx instead of the Go application. - `openat2(RESOLVE_IN_ROOT)` is used to confine GET requests strictly under the `data/` directory. diff --git a/config.toml.example b/config.toml.example index 3ad1d40..e36aad4 100644 --- a/config.toml.example +++ b/config.toml.example @@ -1,5 +1,3 @@ -data-dir = "./data" - [pages] protocol = "tcp" address = ":3333" @@ -12,3 +10,16 @@ address = ":3334" domain = "codeberg.page" clone-url = "https://codeberg.org/%s/%s.git" index-repo = "%s.codeberg.page" + +[backend] +type = "fs" + +[backend.fs] +root = "data" + +[backend.s3] +endpoint = "play.min.io" +access-key-id = "Q3AM3UQ867SPQQA43P2F" +secret-access-key = "zuf+tfteSlswRu7BJ86wekitnifILbZam1KYY3TG" +region = "us-east-1" +bucket = "git-pages-demo" diff --git a/go.mod b/go.mod index e27b1d3..39db435 100644 --- a/go.mod +++ b/go.mod @@ -1,29 +1,41 @@ module whitequark.org/git-pages -go 1.24.0 - -toolchain go1.24.4 +go 1.25.0 require ( - github.com/cyphar/filepath-securejoin v0.4.1 - github.com/go-git/go-billy/v6 v6.0.0-20250902094905-c2c3cf4b2510 - github.com/go-git/go-git/v6 v6.0.0-20250831162718-34f273445e00 - golang.org/x/sys v0.35.0 + github.com/go-git/go-git/v6 v6.0.0-20250910120214-3a68d0404116 + github.com/minio/minio-go/v7 v7.0.95 + github.com/pelletier/go-toml/v2 v2.2.4 + google.golang.org/protobuf v1.36.9 ) require ( - dario.cat/mergo v1.0.2 // indirect + dario.cat/mergo v1.0.1 // indirect github.com/Microsoft/go-winio v0.6.2 // indirect github.com/ProtonMail/go-crypto v1.3.0 // indirect github.com/cloudflare/circl v1.6.1 // indirect + github.com/cyphar/filepath-securejoin v0.4.1 // indirect + github.com/dustin/go-humanize v1.0.1 // indirect github.com/emirpasic/gods v1.18.1 // indirect github.com/go-git/gcfg/v2 v2.0.2 // indirect + github.com/go-git/go-billy/v6 v6.0.0-20250627091229-31e2a16eef30 // indirect + github.com/go-ini/ini v1.67.0 // indirect + github.com/goccy/go-json v0.10.5 // indirect github.com/golang/groupcache v0.0.0-20241129210726-2c02b8208cf8 // indirect + github.com/google/uuid v1.6.0 // indirect github.com/kevinburke/ssh_config v1.4.0 // indirect - github.com/pelletier/go-toml/v2 v2.2.4 // indirect - github.com/pjbgf/sha1cd v0.4.0 // indirect + github.com/klauspost/compress v1.18.0 // indirect + github.com/klauspost/cpuid/v2 v2.3.0 // indirect + github.com/minio/crc64nvme v1.0.2 // indirect + github.com/minio/md5-simd v1.1.2 // indirect + github.com/philhofer/fwd v1.2.0 // indirect + github.com/pjbgf/sha1cd v0.5.0 // indirect + github.com/rs/xid v1.6.0 // indirect github.com/sergi/go-diff v1.4.0 // indirect + github.com/tinylib/msgp v1.3.0 // indirect golang.org/x/crypto v0.41.0 // indirect - golang.org/x/exp v0.0.0-20250819193227-8b4c13bb791b // indirect + golang.org/x/exp v0.0.0-20250531010427-b6e5de432a8b // indirect golang.org/x/net v0.43.0 // indirect + golang.org/x/sys v0.35.0 // indirect + golang.org/x/text v0.28.0 // indirect ) diff --git a/go.sum b/go.sum index 95d11ca..ad87d7c 100644 --- a/go.sum +++ b/go.sum @@ -1,5 +1,5 @@ -dario.cat/mergo v1.0.2 h1:85+piFYR1tMbRrLcDwR18y4UKJ3aH1Tbzi24VRW1TK8= -dario.cat/mergo v1.0.2/go.mod h1:E/hbnu0NxMFBjpMIE34DRGLWqDy0g5FuKDhCb31ngxA= +dario.cat/mergo v1.0.1 h1:Ra4+bf83h2ztPIQYNP99R6m+Y7KfnARDfID+a+vLl4s= +dario.cat/mergo v1.0.1/go.mod h1:uNxQE+84aUszobStD9th8a29P2fMDhsBdgRYvZOxGmk= github.com/Microsoft/go-winio v0.6.2 h1:F2VQgta7ecxGYO8k3ZZz3RS8fVIXVxONVUPlNERoyfY= github.com/Microsoft/go-winio v0.6.2/go.mod h1:yd8OoFMLzJbo9gZq8j5qaps8bJ9aShtEA8Ipt1oGCvU= github.com/ProtonMail/go-crypto v1.3.0 h1:ILq8+Sf5If5DCpHQp4PbZdS1J7HDFRXz/+xKBiRGFrw= @@ -15,6 +15,8 @@ github.com/cyphar/filepath-securejoin v0.4.1/go.mod h1:Sdj7gXlvMcPZsbhwhQ33GguGL github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY= +github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto= github.com/elazarl/goproxy v1.7.2 h1:Y2o6urb7Eule09PjlhQRGNsqRfPmYI3KKQLFpCAV3+o= github.com/elazarl/goproxy v1.7.2/go.mod h1:82vkLNir0ALaW14Rc399OTTjyNREgmdL2cVoIbS6XaE= github.com/emirpasic/gods v1.18.1 h1:FXtiHYKDGKCW2KzwZKx0iC0PQmdlorYgdFG9jPXJ1Bc= @@ -23,35 +25,60 @@ github.com/gliderlabs/ssh v0.3.8 h1:a4YXD1V7xMF9g5nTkdfnja3Sxy1PVDCj1Zg4Wb8vY6c= github.com/gliderlabs/ssh v0.3.8/go.mod h1:xYoytBv1sV0aL3CavoDuJIQNURXkkfPA/wxQ1pL1fAU= github.com/go-git/gcfg/v2 v2.0.2 h1:MY5SIIfTGGEMhdA7d7JePuVVxtKL7Hp+ApGDJAJ7dpo= github.com/go-git/gcfg/v2 v2.0.2/go.mod h1:/lv2NsxvhepuMrldsFilrgct6pxzpGdSRC13ydTLSLs= -github.com/go-git/go-billy/v6 v6.0.0-20250902094905-c2c3cf4b2510 h1:OENVwI63hXDi8Lg8xzP+at+04zlRSG/JZMPxLy44c40= -github.com/go-git/go-billy/v6 v6.0.0-20250902094905-c2c3cf4b2510/go.mod h1:lKJxR4cJDv25TFfQTQ0zXWrKjd48IuGzNPqL7duMEQA= +github.com/go-git/go-billy/v6 v6.0.0-20250627091229-31e2a16eef30 h1:4KqVJTL5eanN8Sgg3BV6f2/QzfZEFbCd+rTak1fGRRA= +github.com/go-git/go-billy/v6 v6.0.0-20250627091229-31e2a16eef30/go.mod h1:snwvGrbywVFy2d6KJdQ132zapq4aLyzLMgpo79XdEfM= github.com/go-git/go-git-fixtures/v5 v5.1.0 h1:b8cWxDLTk0s09Ihm9x1HvNGUzxUVlRwIH7EAM0gGDKg= github.com/go-git/go-git-fixtures/v5 v5.1.0/go.mod h1:CdmU0oQeDuy4Xh8V0i9Ym+vsTkgDDPKEiofBFEVT+aE= -github.com/go-git/go-git/v6 v6.0.0-20250831162718-34f273445e00 h1:eW0gxk9rk3jv7mf4r+sKNLXNgex2LMReedRCRJewQhw= -github.com/go-git/go-git/v6 v6.0.0-20250831162718-34f273445e00/go.mod h1:O7tkz+vcaOSOSRqAGC+MG6evNI8NsTmyH98ey4BTYwk= +github.com/go-git/go-git/v6 v6.0.0-20250910120214-3a68d0404116 h1:YtWRF4qTPh9YEh2m6lhaKDQDyErFLp/UfWJC9MjLEvk= +github.com/go-git/go-git/v6 v6.0.0-20250910120214-3a68d0404116/go.mod h1:qikYwcUCOy1+Pq2SPaUmoubCmJ2PT+Lg9ti8sgwtmJg= +github.com/go-ini/ini v1.67.0 h1:z6ZrTEZqSWOTyH2FlglNbNgARyHG8oLW9gMELqKr06A= +github.com/go-ini/ini v1.67.0/go.mod h1:ByCAeIL28uOIIG0E3PJtZPDL8WnHpFKFOtgjp+3Ies8= +github.com/goccy/go-json v0.10.5 h1:Fq85nIqj+gXn/S5ahsiTlK3TmC85qgirsdTP/+DeaC4= +github.com/goccy/go-json v0.10.5/go.mod h1:oq7eo15ShAhp70Anwd5lgX2pLfOS3QCiwU/PULtXL6M= github.com/golang/groupcache v0.0.0-20241129210726-2c02b8208cf8 h1:f+oWsMOmNPc8JmEHVZIycC7hBoQxHH9pNKQORJNozsQ= github.com/golang/groupcache v0.0.0-20241129210726-2c02b8208cf8/go.mod h1:wcDNUvekVysuuOpQKo3191zZyTpiI6se1N1ULghS0sw= +github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= +github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= +github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/kevinburke/ssh_config v1.4.0 h1:6xxtP5bZ2E4NF5tuQulISpTO2z8XbtH8cg1PWkxoFkQ= github.com/kevinburke/ssh_config v1.4.0/go.mod h1:q2RIzfka+BXARoNexmF9gkxEX7DmvbW9P4hIVx2Kg4M= +github.com/klauspost/compress v1.18.0 h1:c/Cqfb0r+Yi+JtIEq73FWXVkRonBlf0CRNYc8Zttxdo= +github.com/klauspost/compress v1.18.0/go.mod h1:2Pp+KzxcywXVXMr50+X0Q/Lsb43OQHYWRCY2AiWywWQ= +github.com/klauspost/cpuid/v2 v2.0.1/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= +github.com/klauspost/cpuid/v2 v2.3.0 h1:S4CRMLnYUhGeDFDqkGriYKdfoFlDnMtqTiI/sFzhA9Y= +github.com/klauspost/cpuid/v2 v2.3.0/go.mod h1:hqwkgyIinND0mEev00jJYCxPNVRVXFQeu1XKlok6oO0= github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= +github.com/minio/crc64nvme v1.0.2 h1:6uO1UxGAD+kwqWWp7mBFsi5gAse66C4NXO8cmcVculg= +github.com/minio/crc64nvme v1.0.2/go.mod h1:eVfm2fAzLlxMdUGc0EEBGSMmPwmXD5XiNRpnu9J3bvg= +github.com/minio/md5-simd v1.1.2 h1:Gdi1DZK69+ZVMoNHRXJyNcxrMA4dSxoYHZSQbirFg34= +github.com/minio/md5-simd v1.1.2/go.mod h1:MzdKDxYpY2BT9XQFocsiZf/NKVtR7nkE4RoEpN+20RM= +github.com/minio/minio-go/v7 v7.0.95 h1:ywOUPg+PebTMTzn9VDsoFJy32ZuARN9zhB+K3IYEvYU= +github.com/minio/minio-go/v7 v7.0.95/go.mod h1:wOOX3uxS334vImCNRVyIDdXX9OsXDm89ToynKgqUKlo= github.com/pelletier/go-toml/v2 v2.2.4 h1:mye9XuhQ6gvn5h28+VilKrrPoQVanw5PMw/TB0t5Ec4= github.com/pelletier/go-toml/v2 v2.2.4/go.mod h1:2gIqNv+qfxSVS7cM2xJQKtLSTLUE9V8t9Stt+h56mCY= -github.com/pjbgf/sha1cd v0.4.0 h1:NXzbL1RvjTUi6kgYZCX3fPwwl27Q1LJndxtUDVfJGRY= -github.com/pjbgf/sha1cd v0.4.0/go.mod h1:zQWigSxVmsHEZow5qaLtPYxpcKMMQpa09ixqBxuCS6A= +github.com/philhofer/fwd v1.2.0 h1:e6DnBTl7vGY+Gz322/ASL4Gyp1FspeMvx1RNDoToZuM= +github.com/philhofer/fwd v1.2.0/go.mod h1:RqIHx9QI14HlwKwm98g9Re5prTQ6LdeRQn+gXJFxsJM= +github.com/pjbgf/sha1cd v0.5.0 h1:a+UkboSi1znleCDUNT3M5YxjOnN1fz2FhN48FlwCxs0= +github.com/pjbgf/sha1cd v0.5.0/go.mod h1:lhpGlyHLpQZoxMv8HcgXvZEhcGs0PG/vsZnEJ7H0iCM= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/rs/xid v1.6.0 h1:fV591PaemRlL6JfRxGDEPl69wICngIQ3shQtzfy2gxU= +github.com/rs/xid v1.6.0/go.mod h1:7XoLgs4eV+QndskICGsho+ADou8ySMSjJKDIan90Nz0= github.com/sergi/go-diff v1.4.0 h1:n/SP9D5ad1fORl+llWyN+D6qoUETXNZARKjyY2/KVCw= github.com/sergi/go-diff v1.4.0/go.mod h1:A0bzQcvG0E7Rwjx0REVgAGH58e96+X0MeOfepqsbeW4= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= -github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA= -github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= +github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U= +github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U= +github.com/tinylib/msgp v1.3.0 h1:ULuf7GPooDaIlbyvgAxBV/FI7ynli6LZ1/nVUNu+0ww= +github.com/tinylib/msgp v1.3.0/go.mod h1:ykjzy2wzgrlvpDCRc4LA8UXy6D8bzMSuAF3WD57Gok0= golang.org/x/crypto v0.41.0 h1:WKYxWedPGCTVVl5+WHSSrOBT0O8lx32+zxmHxijgXp4= golang.org/x/crypto v0.41.0/go.mod h1:pO5AFd7FA68rFak7rOAGVuygIISepHftHnr8dr6+sUc= -golang.org/x/exp v0.0.0-20250819193227-8b4c13bb791b h1:DXr+pvt3nC887026GRP39Ej11UATqWDmWuS99x26cD0= -golang.org/x/exp v0.0.0-20250819193227-8b4c13bb791b/go.mod h1:4QTo5u+SEIbbKW1RacMZq1YEfOBqeXa19JeshGi+zc4= +golang.org/x/exp v0.0.0-20250531010427-b6e5de432a8b h1:QoALfVG9rhQ/M7vYDScfPdWjGL9dlsVVM5VGh7aKoAA= +golang.org/x/exp v0.0.0-20250531010427-b6e5de432a8b/go.mod h1:U6Lno4MTRCDY+Ba7aCcauB9T60gsv5s4ralQzP72ZoQ= golang.org/x/net v0.43.0 h1:lat02VYK2j4aLzMzecihNvTlJNQUq316m2Mr9rnM6YE= golang.org/x/net v0.43.0/go.mod h1:vhO1fvI4dGsIjh73sWfUVjj3N7CA9WkKJNQm2svM6Jg= golang.org/x/sys v0.35.0 h1:vz1N37gP5bs89s7He8XuIYXpyY0+QlsKmzipCbUtyxI= @@ -60,6 +87,8 @@ golang.org/x/term v0.34.0 h1:O/2T7POpk0ZZ7MAzMeWFSg6S5IpWd/RXDlM9hgM3DR4= golang.org/x/term v0.34.0/go.mod h1:5jC53AEywhIVebHgPVeg0mj8OD3VO9OzclacVrqpaAw= golang.org/x/text v0.28.0 h1:rhazDwis8INMIwQ4tpjLDzUhx6RlXqZNPEM0huQojng= golang.org/x/text v0.28.0/go.mod h1:U8nCwOR8jO/marOQ0QbDiOngZVEBB7MAiitBuMjXiNU= +google.golang.org/protobuf v1.36.9 h1:w2gp2mA27hUeUzj9Ex9FBjsBm40zfaDtEWow293U7Iw= +google.golang.org/protobuf v1.36.9/go.mod h1:fuxRtAxBytpl4zzqUh6/eyUujkJdNiuEkXntxiD/uRU= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= diff --git a/src/auth.go b/src/auth.go index 6f71023..0c895e8 100644 --- a/src/auth.go +++ b/src/auth.go @@ -6,6 +6,7 @@ import ( "fmt" "net" "net/http" + "os" "slices" "strings" ) @@ -23,6 +24,10 @@ func GetHost(r *http.Request) string { func Authorize(w http.ResponseWriter, r *http.Request) error { host := GetHost(r) + if os.Getenv("INSECURE") != "" { + return nil // for testing only + } + authorization := r.Header.Get("Authorization") if authorization == "" { http.Error(w, "missing Authorization header", http.StatusUnauthorized) diff --git a/src/backend.go b/src/backend.go new file mode 100644 index 0000000..3dc3bcd --- /dev/null +++ b/src/backend.go @@ -0,0 +1,347 @@ +// Abstract interface for storage backends; filesystem backend. + +package main + +import ( + "bytes" + "context" + "crypto/sha256" + "errors" + "fmt" + "io" + "os" + "path/filepath" + "slices" + "strings" + "time" + + "github.com/minio/minio-go/v7" + "github.com/minio/minio-go/v7/pkg/credentials" +) + +type Backend interface { + // Retrieve a blob. Returns `reader, mtime, err`. + GetBlob(name string) (io.ReadSeekCloser, time.Time, error) + + // Store a blob. If a blob called `name` already exists, this function returns `nil` without + // regards to the old or new contents. It is expected that blobs are content-addressed, i.e. + // the `name` contains a cryptographic hash of `data`, but the backend is ignorant of this. + PutBlob(name string, data []byte) error + + // Delete a blob. This is an unconditional operation that can break integrity of manifests. + DeleteBlob(name string) error + + // Retrieve a manifest. + GetManifest(name string) (*Manifest, error) + + // Stage a manifest. This operation stores a new version of a manifest, locking any blobs + // referenced from it in place (for garbage collection purposes) but without any other side + // effects. + StageManifest(manifest *Manifest) error + + // Commit a manifest. This is an atomic operation; `GetManifest` calls will return either + // the old version or the new version of the manifest, never anything else. + CommitManifest(name string, manifest *Manifest) error + + // Delete a manifest. + DeleteManifest(name string) error +} + +type FSBackend struct { + blobRoot *os.Root + siteRoot *os.Root +} + +func maybeCreateOpenRoot(dir string, name string) (*os.Root, error) { + dirName := filepath.Join(dir, name) + + if err := os.Mkdir(dirName, 0o755); err != nil && !errors.Is(err, os.ErrExist) { + return nil, fmt.Errorf("mkdir: %s", err) + } + + root, err := os.OpenRoot(dirName) + if err != nil { + return nil, fmt.Errorf("open: %s", err) + } + + return root, nil +} + +func createTempInRoot(root *os.Root, name string, data []byte) (string, error) { + tempFile, err := os.CreateTemp(root.Name(), name) + if err != nil { + return "", fmt.Errorf("mktemp: %s", err) + } + _, err = tempFile.Write(data) + tempFile.Close() + if err != nil { + return "", fmt.Errorf("write: %s", err) + } + + tempPath, err := filepath.Rel(root.Name(), tempFile.Name()) + if err != nil { + return "", fmt.Errorf("relpath: %s", err) + } + + return tempPath, nil +} + +func NewFSBackend(dir string) (*FSBackend, error) { + blobRoot, err := maybeCreateOpenRoot(dir, "blob") + if err != nil { + return nil, fmt.Errorf("blob: %s", err) + } + siteRoot, err := maybeCreateOpenRoot(dir, "site") + if err != nil { + return nil, fmt.Errorf("site: %s", err) + } + return &FSBackend{blobRoot, siteRoot}, nil +} + +func (fs *FSBackend) Backend() Backend { + return fs +} + +func splitBlobName(name string) []string { + algo, hash, found := strings.Cut(name, "-") + if found { + return slices.Concat([]string{algo}, splitBlobName(hash)) + } else { + return []string{name[0:2], name[2:4], name[4:]} + } +} + +func (fs *FSBackend) GetBlob(name string) (io.ReadSeekCloser, time.Time, error) { + blobPath := filepath.Join(splitBlobName(name)...) + stat, err := fs.blobRoot.Stat(blobPath) + if err != nil { + return nil, time.Time{}, fmt.Errorf("stat: %s", err) + } + file, err := fs.blobRoot.Open(blobPath) + if err != nil { + return nil, time.Time{}, fmt.Errorf("open: %s", err) + } + return file, stat.ModTime(), nil +} + +func (fs *FSBackend) PutBlob(name string, data []byte) error { + blobPath := filepath.Join(splitBlobName(name)...) + blobDir := filepath.Dir(blobPath) + + tempPath, err := createTempInRoot(fs.blobRoot, name, data) + if err != nil { + return err + } + + if err := fs.blobRoot.Chmod(tempPath, 0o444); err != nil { + return fmt.Errorf("chmod: %s", err) + } + + if err := fs.blobRoot.MkdirAll(blobDir, 0o755); err != nil { + return fmt.Errorf("mkdir: %s", err) + } + + if err := fs.blobRoot.Rename(tempPath, blobPath); err != nil { + return fmt.Errorf("rename: %s", err) + } + + return nil +} + +func (fs *FSBackend) DeleteBlob(name string) error { + blobPath := filepath.Join(splitBlobName(name)...) + return fs.blobRoot.Remove(blobPath) +} + +func (fs *FSBackend) GetManifest(name string) (*Manifest, error) { + data, err := fs.siteRoot.ReadFile(name) + if err != nil { + return nil, err + } + + return DecodeManifest(data) +} + +func stagedManifestName(manifestData []byte) string { + return fmt.Sprintf(".%x", sha256.Sum256(manifestData)) +} + +func (fs *FSBackend) StageManifest(manifest *Manifest) error { + manifestData := EncodeManifest(manifest) + + tempPath, err := createTempInRoot(fs.siteRoot, ".manifest", manifestData) + if err != nil { + return err + } + + if err := fs.siteRoot.Rename(tempPath, stagedManifestName(manifestData)); err != nil { + return fmt.Errorf("rename: %s", err) + } + + return nil +} + +func (fs *FSBackend) CommitManifest(name string, manifest *Manifest) error { + manifestData := EncodeManifest(manifest) + manifestHashName := stagedManifestName(manifestData) + + if _, err := fs.siteRoot.Stat(manifestHashName); err != nil { + return fmt.Errorf("manifest not staged") + } + + if err := fs.siteRoot.MkdirAll(filepath.Dir(name), 0o755); err != nil { + return fmt.Errorf("mkdir: %s", err) + } + + if err := fs.siteRoot.Rename(manifestHashName, name); err != nil { + return fmt.Errorf("rename: %s", err) + } + + return nil +} + +func (fs *FSBackend) DeleteManifest(name string) error { + return fs.siteRoot.Remove(name) +} + +type S3Backend struct { + ctx context.Context + client *minio.Client + bucket string +} + +func NewS3Backend( + endpoint string, + insecure bool, + accessKeyID string, + secretAccessKey string, + region string, + bucket string, +) (*S3Backend, error) { + ctx := context.Background() + + client, err := minio.New(config.Backend.S3.Endpoint, &minio.Options{ + Creds: credentials.NewStaticV4( + config.Backend.S3.AccessKeyID, + config.Backend.S3.SecretAccessKey, + "", + ), + Secure: !config.Backend.S3.Insecure, + }) + if err != nil { + return nil, err + } + + exists, err := client.BucketExists(ctx, config.Backend.S3.Bucket) + if err != nil { + return nil, err + } else if !exists { + err = client.MakeBucket(ctx, config.Backend.S3.Bucket, + minio.MakeBucketOptions{Region: config.Backend.S3.Region}) + if err != nil { + return nil, err + } + } + + return &S3Backend{ctx, client, bucket}, nil +} + +func (s3 *S3Backend) Backend() Backend { + return s3 +} + +func blobObjectName(name string) string { + return fmt.Sprintf("blob/%s", name) +} + +func (s3 *S3Backend) GetBlob(name string) (io.ReadSeekCloser, time.Time, error) { + object, err := s3.client.GetObject(s3.ctx, s3.bucket, blobObjectName(name), + minio.GetObjectOptions{}) + if err != nil { + return nil, time.Time{}, err + } + + stat, err := object.Stat() + if err != nil { + return nil, time.Time{}, err + } + + return object, stat.LastModified, nil +} + +func (s3 *S3Backend) PutBlob(name string, data []byte) error { + _, err := s3.client.StatObject(s3.ctx, s3.bucket, blobObjectName(name), + minio.GetObjectOptions{}) + if err != nil { + errResp := minio.ToErrorResponse(err) + if errResp.Code == "NoSuchKey" { + _, err := s3.client.PutObject(s3.ctx, s3.bucket, blobObjectName(name), + bytes.NewReader(data), int64(len(data)), minio.PutObjectOptions{}) + if err != nil { + return err + } + } else { + return err + } + } + return nil // already exists or was created +} + +func (s3 *S3Backend) DeleteBlob(name string) error { + return s3.client.RemoveObject(s3.ctx, s3.bucket, blobObjectName(name), + minio.RemoveObjectOptions{}) +} + +func manifestObjectName(name string) string { + return fmt.Sprintf("site/%s", name) +} + +func stagedManifestObjectName(manifestData []byte) string { + return fmt.Sprintf("dirty/%x", sha256.Sum256(manifestData)) +} + +func (s3 *S3Backend) GetManifest(name string) (*Manifest, error) { + object, err := s3.client.GetObject(s3.ctx, s3.bucket, manifestObjectName(name), + minio.GetObjectOptions{}) + if err != nil { + return nil, err + } + + data, err := io.ReadAll(object) + if err != nil { + return nil, err + } + + return DecodeManifest(data) +} + +func (s3 *S3Backend) StageManifest(manifest *Manifest) error { + data := EncodeManifest(manifest) + + _, err := s3.client.PutObject(s3.ctx, s3.bucket, stagedManifestObjectName(data), + bytes.NewReader(data), int64(len(data)), minio.PutObjectOptions{}) + return err +} + +func (s3 *S3Backend) CommitManifest(name string, manifest *Manifest) error { + data := EncodeManifest(manifest) + + // Remove staged object unconditionally (whether commit succeeded or failed), since + // the upper layer has to retry the complete operation anyway. + _, putErr := s3.client.PutObject(s3.ctx, s3.bucket, manifestObjectName(name), + bytes.NewReader(data), int64(len(data)), minio.PutObjectOptions{}) + removeErr := s3.client.RemoveObject(s3.ctx, s3.bucket, stagedManifestObjectName(data), + minio.RemoveObjectOptions{}) + if putErr != nil { + return putErr + } else if removeErr != nil { + return removeErr + } else { + return nil + } +} + +func (s3 *S3Backend) DeleteManifest(name string) error { + return s3.client.RemoveObject(s3.ctx, s3.bucket, manifestObjectName(name), + minio.RemoveObjectOptions{}) +} diff --git a/src/caddy.go b/src/caddy.go index ff7bb50..14bcb14 100644 --- a/src/caddy.go +++ b/src/caddy.go @@ -1,10 +1,9 @@ package main import ( + "fmt" "log" "net/http" - "os" - "path/filepath" ) func ServeCaddy(w http.ResponseWriter, r *http.Request) { @@ -14,8 +13,7 @@ func ServeCaddy(w http.ResponseWriter, r *http.Request) { return } - wwwRoot := filepath.Join(config.DataDir, "www", domain) - if stat, err := os.Stat(wwwRoot); err == nil && stat.IsDir() { + if manifest, _ := backend.GetManifest(fmt.Sprintf("%s/.index", domain)); manifest != nil { log.Println("caddy:", domain, 200) w.WriteHeader(http.StatusOK) } else { diff --git a/src/config.go b/src/config.go index 91264cf..1ae24de 100644 --- a/src/config.go +++ b/src/config.go @@ -12,7 +12,6 @@ type Listen struct { } type Config struct { - DataDir string `toml:"data-dir"` Pages Listen `toml:"pages"` Caddy Listen `toml:"caddy"` Wildcard struct { @@ -20,6 +19,20 @@ type Config struct { CloneURL string `toml:"clone-url"` IndexRepo string `toml:"index-repo"` } `toml:"wildcard"` + Backend struct { + Type string `toml:"type"` + FS struct { + Root string `toml:"root"` + } `toml:"fs"` + S3 struct { + Endpoint string `toml:"endpoint"` + Insecure bool `toml:"insecure"` + AccessKeyID string `toml:"access-key-id"` + SecretAccessKey string `toml:"secret-access-key"` + Region string `toml:"region"` + Bucket string `toml:"bucket"` + } + } `toml:"backend"` } func readConfig(path string, config *Config) error { diff --git a/src/fetch.go b/src/fetch.go index 0c325bd..afd1029 100644 --- a/src/fetch.go +++ b/src/fetch.go @@ -1,49 +1,21 @@ package main import ( - "errors" "fmt" - "log" - "os" - "path/filepath" - "strings" - "time" + "io" - "github.com/go-git/go-billy/v6/osfs" "github.com/go-git/go-git/v6" "github.com/go-git/go-git/v6/plumbing" + "github.com/go-git/go-git/v6/plumbing/filemode" + "github.com/go-git/go-git/v6/plumbing/object" "github.com/go-git/go-git/v6/storage/memory" ) -type FetchOutcome int - -const ( - FetchError FetchOutcome = iota - FetchTimeout - FetchCreated - FetchUpdated - FetchNoChange -) - -type FetchResult struct { - outcome FetchOutcome - head string - err error -} - -func splitHash(hash plumbing.Hash) string { - head := hash.String() - return filepath.Join(head[:2], head[2:]) -} - -func fetch( - webRoot string, - repoURL string, - branch string, -) FetchResult { +func FetchRepository(repoURL string, branch string) (*Manifest, error) { storer := memory.NewStorage() repo, err := git.Clone(storer, nil, &git.CloneOptions{ + Bare: true, URL: repoURL, ReferenceName: plumbing.ReferenceName(branch), SingleBranch: true, @@ -51,124 +23,72 @@ func fetch( Tags: git.NoTags, }) if err != nil { - return FetchResult{err: fmt.Errorf("git clone: %s", err)} + return nil, fmt.Errorf("git clone: %s", err) } ref, err := repo.Head() if err != nil { - return FetchResult{err: fmt.Errorf("git head: %s", err)} - } - head := ref.Hash() - - destDir := filepath.Join(config.DataDir, "tree", splitHash(head)) - if _, err := os.Stat(destDir); errors.Is(err, os.ErrNotExist) { - // check out to a temporary directory to avoid TOCTTOU race on destDir - tempDir, err := os.MkdirTemp(config.DataDir, ".tree") - if err != nil { - return FetchResult{err: fmt.Errorf("mkdir temp: %s", err)} - } - defer os.RemoveAll(tempDir) - - repo, err = git.Open(storer, osfs.New(tempDir, osfs.WithBoundOS())) - if err != nil { - return FetchResult{err: fmt.Errorf("git open: %s", err)} - } - - worktree, err := repo.Worktree() - if err != nil { - return FetchResult{err: fmt.Errorf("git worktree: %s", err)} - } - - if err := worktree.Checkout(&git.CheckoutOptions{ - Hash: head, - }); err != nil { - return FetchResult{err: fmt.Errorf("git checkout: %s", err)} - } - - if err := os.MkdirAll(filepath.Dir(destDir), 0o755); err != nil { - return FetchResult{err: fmt.Errorf("mkdir parent dest: %s", err)} - } - - // commit atomically; assume another fetch has won the race if directory exists - if err := os.Rename(tempDir, destDir); err != nil && !errors.Is(err, os.ErrExist) { - return FetchResult{err: fmt.Errorf("rename dest: %s", err)} - } + return nil, fmt.Errorf("git head: %s", err) } - webLink := filepath.Join(config.DataDir, "www", webRoot) - destDirRel, _ := filepath.Rel(filepath.Dir(webLink), destDir) - - tempLink := filepath.Join(config.DataDir, - fmt.Sprintf(".link.%s.%s", strings.ReplaceAll(webRoot, "/", ".."), head.String())) - if err := os.Symlink(destDirRel, tempLink); err != nil { - return FetchResult{err: fmt.Errorf("symlink temp: %s", err)} - } - defer os.Remove(tempLink) - - if err := os.MkdirAll(filepath.Dir(webLink), 0o755); err != nil { - return FetchResult{err: fmt.Errorf("mkdir parent web: %s", err)} + commit, err := repo.CommitObject(ref.Hash()) + if err != nil { + return nil, fmt.Errorf("git commit: %s", err) } - // this status is advisory only (is subject to race conditions); it's used only - // to return the correct HTTP status per the spec - outcome := FetchCreated - if existingLink, err := os.Readlink(webLink); err == nil { - if existingLink != destDirRel { - outcome = FetchUpdated + tree, err := repo.TreeObject(commit.TreeHash) + if err != nil { + return nil, fmt.Errorf("git tree: %s", err) + } + + walker := object.NewTreeWalker(tree, true, make(map[plumbing.Hash]bool)) + defer walker.Close() + + manifest := Manifest{ + RepoURL: repoURL, + Branch: branch, + Commit: ref.Hash().String(), + Tree: make(map[string]*Entry), + } + manifest.Tree[""] = &Entry{Type: Type_Directory, Size: 0, Data: []byte{}} + for { + name, entry, err := walker.Next() + if err == io.EOF { + break + } else if err != nil { + return nil, fmt.Errorf("git walker: %s", err) } else { - outcome = FetchNoChange + manifestEntry := Entry{} + if entry.Mode.IsFile() { + blob, err := repo.BlobObject(entry.Hash) + if err != nil { + return nil, fmt.Errorf("git blob %s: %s", name, err) + } + + reader, err := blob.Reader() + if err != nil { + return nil, fmt.Errorf("git blob open: %s", err) + } + + data, err := io.ReadAll(reader) + if err != nil { + return nil, fmt.Errorf("git blob read: %s", err) + } + + if entry.Mode == filemode.Symlink { + manifestEntry.Type = Type_Symlink + } else { + manifestEntry.Type = Type_InlineFile + } + manifestEntry.Size = blob.Size + manifestEntry.Data = data + } else if entry.Mode == filemode.Dir { + manifestEntry.Type = Type_Directory + } else { + manifestEntry.Type = Type_Invalid + } + manifest.Tree[name] = &manifestEntry } } - - // commit atomically; assume another fetch has won the race if symlink exists - // FIXME: might not have the same target - if err := os.Rename(tempLink, webLink); err != nil && !errors.Is(err, os.ErrExist) { - return FetchResult{err: fmt.Errorf("rename web: %s", err)} - } - - return FetchResult{outcome: outcome, head: head.String(), err: nil} -} - -func Fetch( - webRoot string, - repoURL string, - branch string, -) FetchResult { - log.Println("fetch:", webRoot, repoURL, branch) - result := fetch(webRoot, repoURL, branch) - if result.err == nil { - status := "" - switch result.outcome { - case FetchCreated: - status = "created" - case FetchUpdated: - status = "updated" - case FetchNoChange: - status = "unchanged" - } - log.Println("fetch ok:", webRoot, result.head, status) - } else { - log.Println("fetch err:", fmt.Errorf("%s: %s", webRoot, result.err)) - } - return result -} - -func FetchWithTimeout( - webRoot string, - repoURL string, - branch string, - timeout time.Duration, -) FetchResult { - // fetch the updated content with a timeout - c := make(chan FetchResult, 1) - go func() { - result := Fetch(webRoot, repoURL, branch) - c <- result - }() - select { - case result := <-c: - return result - case <-time.After(timeout): - return FetchResult{outcome: FetchTimeout, err: fmt.Errorf("fetch timeout")} - } + return &manifest, nil } diff --git a/src/main.go b/src/main.go index e50c505..b81f57a 100644 --- a/src/main.go +++ b/src/main.go @@ -8,6 +8,7 @@ import ( ) var config Config +var backend Backend func serveHandler(name string, listen Listen, serve func(http.ResponseWriter, *http.Request)) { listener, err := net.Listen(listen.Protocol, listen.Address) @@ -23,13 +24,39 @@ func serveHandler(name string, listen Listen, serve func(http.ResponseWriter, *h } func main() { + var err error + configPath := flag.String("config", "config.toml", "path to configuration file") flag.Parse() - if err := readConfig(*configPath, &config); err != nil { + if err = readConfig(*configPath, &config); err != nil { log.Fatalln("configuration:", err) } + switch config.Backend.Type { + case "fs": + if backend, err = NewFSBackend(config.Backend.FS.Root); err != nil { + log.Fatalln("fs backend:", err) + } + + case "s3": + if backend, err = NewS3Backend( + config.Backend.S3.Endpoint, + config.Backend.S3.Insecure, + config.Backend.S3.AccessKeyID, + config.Backend.S3.SecretAccessKey, + config.Backend.S3.Region, + config.Backend.S3.Bucket, + ); err != nil { + log.Fatalln("s3 backend:", err) + } + + default: + log.Fatalln("unknown backend:", config.Backend.Type) + } + + log.Println("ready") + if config.Caddy != (Listen{}) { go serveHandler("caddy", config.Caddy, ServeCaddy) } diff --git a/src/manifest.go b/src/manifest.go new file mode 100644 index 0000000..9ff58d1 --- /dev/null +++ b/src/manifest.go @@ -0,0 +1,140 @@ +package main + +import ( + "bytes" + "crypto/sha256" + "errors" + "fmt" + "path" + "strings" + "sync" + + "google.golang.org/protobuf/proto" +) + +// Returns `true` if `left` and `right` contain the same files with the same types and data. +func CompareManifest(left *Manifest, right *Manifest) bool { + if len(left.Tree) != len(right.Tree) { + return false + } + for name, leftEntry := range left.Tree { + rightEntry := right.Tree[name] + if rightEntry == nil { + return false + } + if leftEntry.Type != rightEntry.Type { + return false + } + if bytes.Compare(leftEntry.Data, rightEntry.Data) != 0 { + return false + } + } + return true +} + +func EncodeManifest(manifest *Manifest) []byte { + result, err := proto.MarshalOptions{Deterministic: true}.Marshal(manifest) + if err != nil { + panic(err) + } + return result +} + +func DecodeManifest(data []byte) (*Manifest, error) { + manifest := Manifest{} + err := proto.Unmarshal(data, &manifest) + return &manifest, err +} + +const maxSymlinkLevels int = 128 + +var symlinkLoop = errors.New("symbolic link loop") + +func ExpandSymlinks(manifest *Manifest, inPath string) (string, error) { + var levels int +again: + for levels = 0; levels < maxSymlinkLevels; levels += 1 { + parts := strings.Split(inPath, "/") + for i := 1; i <= len(parts); i++ { + linkPath := path.Join(parts[:i]...) + entry := manifest.Tree[linkPath] + if entry != nil && entry.Type == Type_Symlink { + inPath = path.Join( + path.Dir(linkPath), + string(entry.Data), + path.Join(parts[i:]...), + ) + continue again + } + } + break + } + if levels < maxSymlinkLevels { + return inPath, nil + } else { + return "", symlinkLoop + } +} + +const ExternalSizeMin int64 = 256 + +func ExternalizeFiles(manifest *Manifest) *Manifest { + newManifest := Manifest{ + RepoURL: manifest.RepoURL, + Branch: manifest.Branch, + Commit: manifest.Commit, + Tree: make(map[string]*Entry), + } + for name, entry := range manifest.Tree { + if entry.Type == Type_InlineFile && entry.Size > ExternalSizeMin { + newManifest.Tree[name] = &Entry{ + Type: Type_ExternalFile, + Size: entry.Size, + Data: fmt.Appendf(nil, "sha256-%x", sha256.Sum256(entry.Data)), + } + } else { + newManifest.Tree[name] = entry + } + } + return &newManifest +} + +const ManifestSizeMax int = 1048576 + +// Accepts a manifest with inline files, returns a manifest with external files after writing +// file contents and the manifest itself to the storage. +func StoreManifest(backend Backend, name string, manifest *Manifest) (*Manifest, error) { + extManifest := ExternalizeFiles(manifest) + extManifestData := EncodeManifest(extManifest) + if len(extManifestData) > ManifestSizeMax { + return nil, fmt.Errorf("manifest too big: %d > %d bytes", extManifestData, ManifestSizeMax) + } + + if err := backend.StageManifest(extManifest); err != nil { + return nil, fmt.Errorf("stage: %s", err) + } + + wg := sync.WaitGroup{} + ch := make(chan error, len(extManifest.Tree)) + for name, entry := range extManifest.Tree { + if entry.Type == Type_ExternalFile { + wg.Go(func() { + err := backend.PutBlob(string(entry.Data), manifest.Tree[name].Data) + if err != nil { + ch <- fmt.Errorf("put blob %s: %s", name, err) + } + }) + } + } + wg.Wait() + close(ch) + for err := range ch { + return nil, err // currently ignores all but 1st error + } + + if err := backend.CommitManifest(name, extManifest); err != nil { + return nil, fmt.Errorf("commit: %s", err) + } + + return extManifest, nil +} diff --git a/src/pages.go b/src/pages.go index a070461..4d2681b 100644 --- a/src/pages.go +++ b/src/pages.go @@ -2,97 +2,118 @@ package main import ( "bytes" - "crypto/sha1" "encoding/json" - "errors" "fmt" "io" "log" "net/http" "os" - "path/filepath" + "path" "slices" "strings" "time" - securejoin "github.com/cyphar/filepath-securejoin" - "golang.org/x/sys/unix" + "github.com/minio/minio-go/v7" ) -const fetchTimeout = 30 * time.Second +const notFoundPage = "404.html" +const updateTimeout = 60 * time.Second func getPage(w http.ResponseWriter, r *http.Request) error { + var err error + var urlPath string + var manifest *Manifest + host := GetHost(r) - // if the first directory of the path exists under `www/$host`, use it as the root, - // else use `www/$host/.index` - path, _ := strings.CutPrefix(r.URL.Path, "/") - wwwRoot := filepath.Join("www", host, ".index") - requestPath := path - if projectName, projectPath, found := strings.Cut(path, "/"); found { - projectRoot := filepath.Join("www", host, projectName) - if file, _ := securejoin.OpenInRoot(config.DataDir, projectRoot); file != nil { - file.Close() - wwwRoot, requestPath = projectRoot, projectPath - } - } - - // try to serve `$root/$path` first - file, err := securejoin.OpenInRoot(config.DataDir, filepath.Join(wwwRoot, requestPath)) - if err == nil { - // if it's a directory, serve `$root/$path/index.html` - stat, statErr := file.Stat() - if statErr == nil && stat.IsDir() { - defer file.Close() - if !strings.HasSuffix(r.URL.Path, "/") { - // redirect from `$root/$dir` to `$root/$dir/` or links in the document won't work - // correctly - newPath := r.URL.Path + "/" - w.Header().Set("Location", newPath) - w.WriteHeader(http.StatusFound) - fmt.Fprintf(w, "see %s\n", newPath) - return nil - } else { - // serve `$root/$dir/index.html` under `$root/$dir/` - file, err = securejoin.OpenInRoot(config.DataDir, - filepath.Join(wwwRoot, requestPath, "index.html")) - } - } - } - - // if whatever we were serving doesn't exist, try to serve `$root/404.html` - if errors.Is(err, os.ErrNotExist) { - file, _ = securejoin.OpenInRoot(config.DataDir, filepath.Join(wwwRoot, "404.html")) - } - - // acquire read capability to the file being served (if possible) - reader := io.ReadSeeker(nil) - if file != nil { - defer file.Close() - file, err = securejoin.Reopen(file, unix.O_RDONLY) - if file != nil { - defer file.Close() - reader = file - } - } - // allow JavaScript code to access responses (including errors) even across origins w.Header().Set("Access-Control-Allow-Origin", "*") w.Header().Set("Access-Control-Max-Age", "86400") - // decide on the HTTP status - if err != nil { - if errors.Is(err, os.ErrNotExist) { - w.WriteHeader(http.StatusNotFound) - if reader == nil { - reader = bytes.NewReader([]byte("not found\n")) - } - } else { - w.WriteHeader(http.StatusInternalServerError) - reader = bytes.NewReader([]byte("internal server error\n")) + urlPath, _ = strings.CutPrefix(r.URL.Path, "/") + manifest, err = backend.GetManifest(fmt.Sprintf("%s/.index", host)) + if projectName, projectPath, found := strings.Cut(urlPath, "/"); found { + var projectManifest *Manifest + projectManifest, err = backend.GetManifest(fmt.Sprintf("%s/%s", host, projectName)) + if err == nil { + urlPath = projectPath + manifest = projectManifest + } + } + if manifest == nil { + w.WriteHeader(http.StatusNotFound) + fmt.Fprintf(w, "site not found\n") + return err + } + + entryPath := urlPath + entry := (*Entry)(nil) + is404 := false + reader := io.ReadSeeker(nil) + mtime := time.Time{} + for { + entryPath, _ = strings.CutSuffix(entryPath, "/") + entryPath, err = ExpandSymlinks(manifest, entryPath) + if err != nil { + w.WriteHeader(http.StatusInternalServerError) + fmt.Fprintln(w, err) + return err + } + entry = manifest.Tree[entryPath] + if entry == nil || entry.Type == Type_Invalid { + is404 = true + if entryPath == notFoundPage { + break + } + entryPath = notFoundPage + continue + } else if entry.Type == Type_InlineFile { + reader = bytes.NewReader(entry.Data) + } else if entry.Type == Type_ExternalFile { + etag := fmt.Sprintf(`"%x"`, entry.Data) + if r.Header.Get("If-None-Match") == etag { + w.WriteHeader(http.StatusNotModified) + return nil + } else { + var blob io.ReadSeekCloser + blob, mtime, err = backend.GetBlob(string(entry.Data)) + if err != nil { + w.WriteHeader(http.StatusInternalServerError) + fmt.Fprintf(w, "internal server error\n") + return err + } + defer blob.Close() + + reader = blob + w.Header().Set("ETag", etag) + } + } else if entry.Type == Type_Directory { + if strings.HasSuffix(r.URL.Path, "/") { + entryPath = path.Join(entryPath, "index.html") + continue + } else { + // redirect from `dir` to `dir/`, otherwise when `dir/index.html` is served, + // links in it will have the wrong base URL + newPath := r.URL.Path + "/" + w.Header().Set("Location", newPath) + w.WriteHeader(http.StatusFound) + fmt.Fprintf(w, "see %s\n", newPath) + return nil + } + } else if entry.Type == Type_Symlink { + return fmt.Errorf("unexpected symlink") + } + break + } + + // decide on the HTTP status + if is404 { + w.WriteHeader(http.StatusNotFound) + if entry == nil { + fmt.Fprintf(w, "not found\n") + } else { + io.Copy(w, reader) } - // serve custom 404 page (if any) - io.Copy(w, reader) } else { // allow the use of multi-threading in WebAssembly w.Header().Set("Cross-Origin-Embedder-Policy", "credentialless") @@ -102,19 +123,10 @@ func getPage(w http.ResponseWriter, r *http.Request) error { // ETag or If-Modified-Since queries and it avoids stale content being served w.Header().Set("Cache-Control", "public, max-age=0, must-revalidate") - // `www/$host` should be a symlink pointing to an immutable directory under `tree/...`; - // if it's not, assume the server administrator did it on purpose and degrade gracefully - wwwRootDest, err := os.Readlink(filepath.Join(config.DataDir, wwwRoot)) - if err == nil { - w.Header().Set("ETag", fmt.Sprintf(`"%x"`, sha1.Sum([]byte(wwwRootDest)))) - } - // http.ServeContent handles content type and caching - stat, _ := file.Stat() - http.ServeContent(w, r, path, stat.ModTime(), reader) + http.ServeContent(w, r, urlPath, mtime, reader) } - - return err + return nil } func getProjectName(w http.ResponseWriter, r *http.Request) (string, error) { @@ -164,29 +176,31 @@ func putPage(w http.ResponseWriter, r *http.Request) error { branch = "pages" } - result := FetchWithTimeout(webRoot, repoURL, branch, fetchTimeout) - if result.err == nil { + result := UpdateWithTimeout(webRoot, repoURL, branch, updateTimeout) + if result.manifest != nil { w.Header().Add("Content-Location", r.URL.String()) } switch result.outcome { - case FetchError: + case UpdateError: w.WriteHeader(http.StatusServiceUnavailable) - case FetchTimeout: + case UpdateTimeout: w.WriteHeader(http.StatusGatewayTimeout) // HTTP prescribes these response codes to be used - case FetchNoChange: + case UpdateNoChange: w.WriteHeader(http.StatusNoContent) - case FetchCreated: + case UpdateCreated: w.WriteHeader(http.StatusCreated) - case FetchUpdated: + case UpdateReplaced: w.WriteHeader(http.StatusOK) } - if result.err != nil { + if result.manifest != nil { + fmt.Fprintln(w, result.manifest.Commit) + } else if result.err != nil { fmt.Fprintln(w, result.err) } else { - fmt.Fprintln(w, result.head) + fmt.Fprintln(w, "internal error") } - return result.err + return nil } func postPage(w http.ResponseWriter, r *http.Request) error { @@ -271,25 +285,25 @@ func postPage(w http.ResponseWriter, r *http.Request) error { return fmt.Errorf("invalid clone URL") } - result := FetchWithTimeout(webRoot, repoURL, "pages", fetchTimeout) + result := UpdateWithTimeout(webRoot, repoURL, "pages", updateTimeout) switch result.outcome { - case FetchError: + case UpdateError: w.WriteHeader(http.StatusServiceUnavailable) - fmt.Fprintf(w, "fetch error: %s\n", result.err) - case FetchTimeout: + fmt.Fprintf(w, "update error: %s\n", result.err) + case UpdateTimeout: w.WriteHeader(http.StatusGatewayTimeout) - fmt.Fprintln(w, "fetch timeout") - case FetchNoChange: + fmt.Fprintln(w, "update timeout") + case UpdateNoChange: w.WriteHeader(http.StatusOK) fmt.Fprintln(w, "unchanged") - case FetchCreated: + case UpdateCreated: w.WriteHeader(http.StatusOK) fmt.Fprintln(w, "created") - case FetchUpdated: + case UpdateReplaced: w.WriteHeader(http.StatusOK) - fmt.Fprintln(w, "updated") + fmt.Fprintln(w, "replaced") } - return result.err + return nil } func ServePages(w http.ResponseWriter, r *http.Request) { @@ -311,6 +325,9 @@ func ServePages(w http.ResponseWriter, r *http.Request) { if pathErr, ok := err.(*os.PathError); ok { err = fmt.Errorf("not found: %s", pathErr.Path) } + if minioErr, ok := err.(minio.ErrorResponse); ok && minioErr.Code == "NoSuchKey" { + err = fmt.Errorf("not found: %s", minioErr.Key) + } log.Println("pages err:", err) } } diff --git a/src/schema.pb.go b/src/schema.pb.go new file mode 100644 index 0000000..c0ab0e3 --- /dev/null +++ b/src/schema.pb.go @@ -0,0 +1,293 @@ +// Code generated by protoc-gen-go. DO NOT EDIT. +// versions: +// protoc-gen-go v1.36.9 +// protoc v3.21.12 +// source: src/schema.proto + +package main + +import ( + reflect "reflect" + sync "sync" + unsafe "unsafe" + + protoreflect "google.golang.org/protobuf/reflect/protoreflect" + protoimpl "google.golang.org/protobuf/runtime/protoimpl" +) + +const ( + // Verify that this generated code is sufficiently up-to-date. + _ = protoimpl.EnforceVersion(20 - protoimpl.MinVersion) + // Verify that runtime/protoimpl is sufficiently up-to-date. + _ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20) +) + +type Type int32 + +const ( + // Invalid entry. + Type_Invalid Type = 0 + // Directory. + Type_Directory Type = 1 + // Inline file. `Blob.Data` contains file contents. + Type_InlineFile Type = 2 + // External file. `Blob.Data` contains object reference. + Type_ExternalFile Type = 3 + // Symlink. `Blob.Data` contains relative path. + Type_Symlink Type = 4 +) + +// Enum value maps for Type. +var ( + Type_name = map[int32]string{ + 0: "Invalid", + 1: "Directory", + 2: "InlineFile", + 3: "ExternalFile", + 4: "Symlink", + } + Type_value = map[string]int32{ + "Invalid": 0, + "Directory": 1, + "InlineFile": 2, + "ExternalFile": 3, + "Symlink": 4, + } +) + +func (x Type) Enum() *Type { + p := new(Type) + *p = x + return p +} + +func (x Type) String() string { + return protoimpl.X.EnumStringOf(x.Descriptor(), protoreflect.EnumNumber(x)) +} + +func (Type) Descriptor() protoreflect.EnumDescriptor { + return file_src_schema_proto_enumTypes[0].Descriptor() +} + +func (Type) Type() protoreflect.EnumType { + return &file_src_schema_proto_enumTypes[0] +} + +func (x Type) Number() protoreflect.EnumNumber { + return protoreflect.EnumNumber(x) +} + +// Deprecated: Use Type.Descriptor instead. +func (Type) EnumDescriptor() ([]byte, []int) { + return file_src_schema_proto_rawDescGZIP(), []int{0} +} + +type Entry struct { + state protoimpl.MessageState `protogen:"open.v1"` + Type Type `protobuf:"varint,1,opt,name=type,proto3,enum=main.Type" json:"type,omitempty"` + Size int64 `protobuf:"varint,2,opt,name=size,proto3" json:"size,omitempty"` + Data []byte `protobuf:"bytes,3,opt,name=data,proto3" json:"data,omitempty"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *Entry) Reset() { + *x = Entry{} + mi := &file_src_schema_proto_msgTypes[0] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *Entry) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*Entry) ProtoMessage() {} + +func (x *Entry) ProtoReflect() protoreflect.Message { + mi := &file_src_schema_proto_msgTypes[0] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use Entry.ProtoReflect.Descriptor instead. +func (*Entry) Descriptor() ([]byte, []int) { + return file_src_schema_proto_rawDescGZIP(), []int{0} +} + +func (x *Entry) GetType() Type { + if x != nil { + return x.Type + } + return Type_Invalid +} + +func (x *Entry) GetSize() int64 { + if x != nil { + return x.Size + } + return 0 +} + +func (x *Entry) GetData() []byte { + if x != nil { + return x.Data + } + return nil +} + +type Manifest struct { + state protoimpl.MessageState `protogen:"open.v1"` + RepoURL string `protobuf:"bytes,1,opt,name=repoURL,proto3" json:"repoURL,omitempty"` + Branch string `protobuf:"bytes,2,opt,name=branch,proto3" json:"branch,omitempty"` + Commit string `protobuf:"bytes,3,opt,name=commit,proto3" json:"commit,omitempty"` + Tree map[string]*Entry `protobuf:"bytes,4,rep,name=tree,proto3" json:"tree,omitempty" protobuf_key:"bytes,1,opt,name=key" protobuf_val:"bytes,2,opt,name=value"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *Manifest) Reset() { + *x = Manifest{} + mi := &file_src_schema_proto_msgTypes[1] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *Manifest) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*Manifest) ProtoMessage() {} + +func (x *Manifest) ProtoReflect() protoreflect.Message { + mi := &file_src_schema_proto_msgTypes[1] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use Manifest.ProtoReflect.Descriptor instead. +func (*Manifest) Descriptor() ([]byte, []int) { + return file_src_schema_proto_rawDescGZIP(), []int{1} +} + +func (x *Manifest) GetRepoURL() string { + if x != nil { + return x.RepoURL + } + return "" +} + +func (x *Manifest) GetBranch() string { + if x != nil { + return x.Branch + } + return "" +} + +func (x *Manifest) GetCommit() string { + if x != nil { + return x.Commit + } + return "" +} + +func (x *Manifest) GetTree() map[string]*Entry { + if x != nil { + return x.Tree + } + return nil +} + +var File_src_schema_proto protoreflect.FileDescriptor + +const file_src_schema_proto_rawDesc = "" + + "\n" + + "\x10src/schema.proto\x12\x04main\"O\n" + + "\x05Entry\x12\x1e\n" + + "\x04type\x18\x01 \x01(\x0e2\n" + + ".main.TypeR\x04type\x12\x12\n" + + "\x04size\x18\x02 \x01(\x03R\x04size\x12\x12\n" + + "\x04data\x18\x03 \x01(\fR\x04data\"\xc8\x01\n" + + "\bManifest\x12\x18\n" + + "\arepoURL\x18\x01 \x01(\tR\arepoURL\x12\x16\n" + + "\x06branch\x18\x02 \x01(\tR\x06branch\x12\x16\n" + + "\x06commit\x18\x03 \x01(\tR\x06commit\x12,\n" + + "\x04tree\x18\x04 \x03(\v2\x18.main.Manifest.TreeEntryR\x04tree\x1aD\n" + + "\tTreeEntry\x12\x10\n" + + "\x03key\x18\x01 \x01(\tR\x03key\x12!\n" + + "\x05value\x18\x02 \x01(\v2\v.main.EntryR\x05value:\x028\x01*Q\n" + + "\x04Type\x12\v\n" + + "\aInvalid\x10\x00\x12\r\n" + + "\tDirectory\x10\x01\x12\x0e\n" + + "\n" + + "InlineFile\x10\x02\x12\x10\n" + + "\fExternalFile\x10\x03\x12\v\n" + + "\aSymlink\x10\x04b\x06proto3" + +var ( + file_src_schema_proto_rawDescOnce sync.Once + file_src_schema_proto_rawDescData []byte +) + +func file_src_schema_proto_rawDescGZIP() []byte { + file_src_schema_proto_rawDescOnce.Do(func() { + file_src_schema_proto_rawDescData = protoimpl.X.CompressGZIP(unsafe.Slice(unsafe.StringData(file_src_schema_proto_rawDesc), len(file_src_schema_proto_rawDesc))) + }) + return file_src_schema_proto_rawDescData +} + +var file_src_schema_proto_enumTypes = make([]protoimpl.EnumInfo, 1) +var file_src_schema_proto_msgTypes = make([]protoimpl.MessageInfo, 3) +var file_src_schema_proto_goTypes = []any{ + (Type)(0), // 0: main.Type + (*Entry)(nil), // 1: main.Entry + (*Manifest)(nil), // 2: main.Manifest + nil, // 3: main.Manifest.TreeEntry +} +var file_src_schema_proto_depIdxs = []int32{ + 0, // 0: main.Entry.type:type_name -> main.Type + 3, // 1: main.Manifest.tree:type_name -> main.Manifest.TreeEntry + 1, // 2: main.Manifest.TreeEntry.value:type_name -> main.Entry + 3, // [3:3] is the sub-list for method output_type + 3, // [3:3] is the sub-list for method input_type + 3, // [3:3] is the sub-list for extension type_name + 3, // [3:3] is the sub-list for extension extendee + 0, // [0:3] is the sub-list for field type_name +} + +func init() { file_src_schema_proto_init() } +func file_src_schema_proto_init() { + if File_src_schema_proto != nil { + return + } + type x struct{} + out := protoimpl.TypeBuilder{ + File: protoimpl.DescBuilder{ + GoPackagePath: reflect.TypeOf(x{}).PkgPath(), + RawDescriptor: unsafe.Slice(unsafe.StringData(file_src_schema_proto_rawDesc), len(file_src_schema_proto_rawDesc)), + NumEnums: 1, + NumMessages: 3, + NumExtensions: 0, + NumServices: 0, + }, + GoTypes: file_src_schema_proto_goTypes, + DependencyIndexes: file_src_schema_proto_depIdxs, + EnumInfos: file_src_schema_proto_enumTypes, + MessageInfos: file_src_schema_proto_msgTypes, + }.Build() + File_src_schema_proto = out.File + file_src_schema_proto_goTypes = nil + file_src_schema_proto_depIdxs = nil +} diff --git a/src/schema.proto b/src/schema.proto new file mode 100644 index 0000000..b214727 --- /dev/null +++ b/src/schema.proto @@ -0,0 +1,29 @@ +syntax = "proto3"; + +package main; + +enum Type { + // Invalid entry. + Invalid = 0; + // Directory. + Directory = 1; + // Inline file. `Blob.Data` contains file contents. + InlineFile = 2; + // External file. `Blob.Data` contains object reference. + ExternalFile = 3; + // Symlink. `Blob.Data` contains relative path. + Symlink = 4; +} + +message Entry { + Type type = 1; + int64 size = 2; + bytes data = 3; +} + +message Manifest { + string repoURL = 1; + string branch = 2; + string commit = 3; + map tree = 4;; +} diff --git a/src/update.go b/src/update.go new file mode 100644 index 0000000..2221fd2 --- /dev/null +++ b/src/update.go @@ -0,0 +1,86 @@ +package main + +import ( + "fmt" + "log" + "time" +) + +type UpdateOutcome int + +const ( + UpdateError UpdateOutcome = iota + UpdateTimeout + UpdateCreated + UpdateReplaced + UpdateNoChange +) + +type UpdateResult struct { + outcome UpdateOutcome + manifest *Manifest + err error +} + +func Update( + webRoot string, + repoURL string, + branch string, +) UpdateResult { + var fetchManifest, oldManifest, newManifest *Manifest + var err error + + log.Println("update:", webRoot, repoURL, branch) + + outcome := UpdateError + fetchManifest, err = FetchRepository(repoURL, branch) + if err == nil { + oldManifest, _ = backend.GetManifest(webRoot) + newManifest, err = StoreManifest(backend, webRoot, fetchManifest) + if err == nil { + if oldManifest == nil { + outcome = UpdateCreated + } else if CompareManifest(oldManifest, newManifest) { + outcome = UpdateNoChange + } else { + outcome = UpdateReplaced + } + } + } + + if err == nil { + status := "" + switch outcome { + case UpdateCreated: + status = "created" + case UpdateReplaced: + status = "replaced" + case UpdateNoChange: + status = "unchanged" + } + log.Printf("update ok: %s %s %s", webRoot, newManifest.Commit, status) + } else { + log.Printf("update err: %s %s", webRoot, err) + } + + return UpdateResult{outcome, newManifest, err} +} + +func UpdateWithTimeout( + webRoot string, + repoURL string, + branch string, + timeout time.Duration, +) UpdateResult { + c := make(chan UpdateResult, 1) + go func() { + result := Update(webRoot, repoURL, branch) + c <- result + }() + select { + case result := <-c: + return result + case <-time.After(timeout): + return UpdateResult{outcome: UpdateTimeout, err: fmt.Errorf("update timeout")} + } +}