Add fetching via PUT request.

This commit is contained in:
Catherine
2025-09-05 06:33:29 +00:00
parent 364f4392c3
commit 81d795923f
3 changed files with 174 additions and 51 deletions

View File

@@ -14,6 +14,15 @@ import (
"github.com/go-git/go-git/v6/storage/memory"
)
type FetchResult int
const (
FetchError FetchResult = iota
FetchCreated
FetchUpdated
FetchNoChange
)
func splitHash(hash plumbing.Hash) string {
head := hash.String()
return filepath.Join(head[:2], head[2:])
@@ -21,26 +30,26 @@ func splitHash(hash plumbing.Hash) string {
func fetch(
dataDir string,
webroot string,
url string,
branch plumbing.ReferenceName,
) (*plumbing.Hash, error) {
webRoot string,
repoURL string,
branch string,
) (*plumbing.Hash, FetchResult, error) {
storer := memory.NewStorage()
repo, err := git.Clone(storer, nil, &git.CloneOptions{
URL: url,
ReferenceName: branch,
URL: repoURL,
ReferenceName: plumbing.ReferenceName(branch),
SingleBranch: true,
Depth: 1,
Tags: git.NoTags,
})
if err != nil {
return nil, fmt.Errorf("git clone: %s", err)
return nil, 0, fmt.Errorf("git clone: %s", err)
}
ref, err := repo.Head()
if err != nil {
return nil, fmt.Errorf("git head: %s", err)
return nil, 0, fmt.Errorf("git head: %s", err)
}
head := ref.Hash()
@@ -49,73 +58,92 @@ func fetch(
// check out to a temporary directory to avoid TOCTTOU race on destDir
tempDir, err := os.MkdirTemp(dataDir, ".tree")
if err != nil {
return nil, fmt.Errorf("mkdir temp: %s", err)
return nil, 0, fmt.Errorf("mkdir temp: %s", err)
}
defer os.RemoveAll(tempDir)
repo, err = git.Open(storer, osfs.New(tempDir))
if err != nil {
return nil, fmt.Errorf("git open: %s", err)
return nil, 0, fmt.Errorf("git open: %s", err)
}
worktree, err := repo.Worktree()
if err != nil {
return nil, fmt.Errorf("git worktree: %s", err)
return nil, 0, fmt.Errorf("git worktree: %s", err)
}
if err := worktree.Checkout(&git.CheckoutOptions{
Hash: head,
}); err != nil {
return nil, fmt.Errorf("git checkout: %s", err)
return nil, 0, fmt.Errorf("git checkout: %s", err)
}
if err := os.MkdirAll(filepath.Dir(destDir), 0o755); err != nil {
return nil, fmt.Errorf("mkdir parent dest: %s", err)
return nil, 0, fmt.Errorf("mkdir parent dest: %s", err)
}
// commit atomically; assume another fetch has won the race if directory exists
if err := os.Rename(tempDir, destDir); err != nil && !errors.Is(err, os.ErrExist) {
return nil, fmt.Errorf("rename dest: %s", err)
return nil, 0, fmt.Errorf("rename dest: %s", err)
}
}
webLink := filepath.Join(dataDir, "www", webroot)
webLink := filepath.Join(dataDir, "www", webRoot)
destDirRel, _ := filepath.Rel(filepath.Dir(webLink), destDir)
tempLink := filepath.Join(dataDir,
fmt.Sprintf(".link.%s.%s", strings.ReplaceAll(webroot, "/", ".."), head.String()))
fmt.Sprintf(".link.%s.%s", strings.ReplaceAll(webRoot, "/", ".."), head.String()))
if err := os.Symlink(destDirRel, tempLink); err != nil {
return nil, fmt.Errorf("symlink temp: %s", err)
return nil, 0, fmt.Errorf("symlink temp: %s", err)
}
defer os.Remove(tempLink)
if err := os.MkdirAll(filepath.Dir(webLink), 0o755); err != nil {
return nil, fmt.Errorf("mkdir parent web: %s", err)
return nil, 0, fmt.Errorf("mkdir parent web: %s", err)
}
// this status is advisory only (is subject to race conditions); it's used only
// to return the correct HTTP status per the spec
fetchResult := FetchCreated
if existingLink, err := os.Readlink(webLink); err == nil {
if existingLink != destDirRel {
fetchResult = FetchUpdated
} else {
fetchResult = FetchNoChange
}
}
// commit atomically; assume another fetch has won the race if symlink exists
// FIXME: might not have the same target
if err := os.Rename(tempLink, webLink); err != nil && !errors.Is(err, os.ErrExist) {
return nil, fmt.Errorf("rename web: %s", err)
return nil, 0, fmt.Errorf("rename web: %s", err)
}
return &head, nil
return &head, fetchResult, nil
}
func Fetch(
dataDir string,
webroot string,
url string,
branch plumbing.ReferenceName,
) error {
log.Println("fetch:", webroot, url, branch)
head, err := fetch(dataDir, webroot, url, branch)
if err != nil {
log.Println("fetch err:", fmt.Errorf("%s: %s", webroot, err))
return err
webRoot string,
repoURL string,
branch string,
) (string, FetchResult, error) {
log.Println("fetch:", webRoot, repoURL, branch)
head, result, err := fetch(dataDir, webRoot, repoURL, branch)
if err == nil {
status := ""
switch result {
case FetchCreated:
status = "created"
case FetchUpdated:
status = "updated"
case FetchNoChange:
status = "unchanged"
}
log.Println("fetch ok:", webRoot, head, status)
return head.String(), result, err
} else {
log.Println("fetch err:", fmt.Errorf("%s: %s", webRoot, err))
return "", FetchError, err
}
log.Println("fetch ok:", webroot, head)
return nil
}

View File

@@ -1,16 +1,18 @@
package main
import (
"log"
"net/http"
"os"
)
func main() {
dataDir := os.Args[1]
listenAddr := os.Args[2]
Fetch(dataDir, "codeberg.page/.index", "https://codeberg.org/Codeberg/pages-server/", "pages")
mux := http.NewServeMux()
mux.HandleFunc("/", Serve(dataDir))
http.ListenAndServe(":3333", mux)
http.HandleFunc("/", Serve(dataDir))
err := http.ListenAndServe(listenAddr, nil)
if err != nil {
log.Fatalln("failed to listen:", err)
}
}

View File

@@ -1,7 +1,9 @@
package main
import (
"bytes"
"errors"
"fmt"
"io"
"log"
"net"
@@ -9,18 +11,26 @@ import (
"os"
"path/filepath"
"strings"
"time"
securejoin "github.com/cyphar/filepath-securejoin"
"golang.org/x/sys/unix"
)
func getPage(dataDir string, w http.ResponseWriter, r *http.Request) error {
const fetchTimeout = 30 * time.Second
func getHost(r *http.Request) string {
// FIXME: handle IDNA
host, _, err := net.SplitHostPort(r.Host)
if err != nil {
// dirty but the go stdlib doesn't have a "split port if present" function
host = r.Host
}
return host
}
func getPage(dataDir string, w http.ResponseWriter, r *http.Request) error {
host := getHost(r)
// if the first directory of the path exists under `www/$host`, use it as the root,
// else use `www/$host/.index`
@@ -41,6 +51,7 @@ func getPage(dataDir string, w http.ResponseWriter, r *http.Request) error {
// if it's a directory, serve `$root/$path/index.html`
stat, statErr := file.Stat()
if statErr == nil && stat.IsDir() {
defer file.Close()
file, err = securejoin.OpenInRoot(dataDir,
filepath.Join(wwwRoot, requestPath, "index.html"))
}
@@ -50,43 +61,125 @@ func getPage(dataDir string, w http.ResponseWriter, r *http.Request) error {
file, _ = securejoin.OpenInRoot(dataDir, filepath.Join(wwwRoot, "404.html"))
}
data := []byte(nil)
// acquire read capability to the file being served (if possible)
reader := io.ReadSeeker(nil)
if file != nil {
defer file.Close()
file, err = securejoin.Reopen(file, unix.O_RDONLY)
if file != nil {
defer file.Close()
data, err = io.ReadAll(file)
reader = file
}
}
// decide on the HTTP status
if err != nil {
if errors.Is(err, os.ErrNotExist) {
w.WriteHeader(http.StatusNotFound)
if data == nil {
data = []byte("404 not found\n")
if reader == nil {
reader = bytes.NewReader([]byte("not found\n"))
}
} else {
w.WriteHeader(http.StatusInternalServerError)
reader = bytes.NewReader([]byte("internal server error\n"))
}
// serve custom 404 page (if any)
io.Copy(w, reader)
} else {
w.WriteHeader(http.StatusOK)
}
if data != nil {
w.Write(data)
stat, _ := file.Stat()
http.ServeContent(w, r, path, stat.ModTime(), reader)
}
return err
}
type putResult struct {
head string
result FetchResult
err error
}
func putPage(dataDir string, w http.ResponseWriter, r *http.Request) error {
host := getHost(r)
// path must be either `/` or `/foo/` (`/foo` is accepted as an alias)
path, _ := strings.CutPrefix(r.URL.Path, "/")
path, _ = strings.CutSuffix(path, "/")
if strings.HasPrefix(path, ".") {
http.Error(w, "this directory name is reserved for system use", http.StatusBadRequest)
return fmt.Errorf("reserved name")
} else if strings.Contains(path, "/") {
http.Error(w, "only one level of nesting is allowed", http.StatusBadRequest)
return fmt.Errorf("nesting too deep")
}
// path `/` corresponds to pseudo-project `.index`
projectName := ".index"
if path != "" {
projectName = path
}
requestBody, err := io.ReadAll(r.Body)
if err != nil {
return fmt.Errorf("body read: %s", err)
}
// request body contains git repository URL `https://codeberg.org/...`
// request header X-Pages-Branch contains git branch, `pages` by default
webRoot := fmt.Sprintf("%s/%s", host, projectName)
repoURL := string(requestBody)
branch := r.Header.Get("X-Pages-Branch")
if branch == "" {
branch = "pages"
}
// fetch the updated content with a timeout
c := make(chan putResult, 1)
go func() {
head, result, err := Fetch(dataDir, webRoot, repoURL, branch)
c <- putResult{head, result, err}
}()
select {
case putResult := <-c:
if putResult.err == nil {
w.Header().Add("Content-Location", r.URL.String())
}
switch putResult.result {
case FetchError:
w.WriteHeader(http.StatusServiceUnavailable)
fmt.Fprintln(w, putResult.err)
return putResult.err
// HTTP prescribes these response codes to be used
case FetchNoChange:
w.WriteHeader(http.StatusNoContent)
case FetchCreated:
w.WriteHeader(http.StatusCreated)
case FetchUpdated:
w.WriteHeader(http.StatusOK)
}
fmt.Fprintln(w, putResult.head)
return nil
case <-time.After(fetchTimeout):
w.WriteHeader(http.StatusGatewayTimeout)
return fmt.Errorf("fetch timeout")
}
}
func Serve(dataDir string) func(http.ResponseWriter, *http.Request) {
return func(w http.ResponseWriter, r *http.Request) {
log.Println("serve:", r.Host, r.URL)
err := getPage(dataDir, w, r)
log.Println("serve:", r.Method, r.Host, r.URL)
err := error(nil)
switch r.Method {
case http.MethodGet:
err = getPage(dataDir, w, r)
case http.MethodPut:
err = putPage(dataDir, w, r)
default:
http.Error(w, "method not allowed", http.StatusMethodNotAllowed)
err = fmt.Errorf("method %s not allowed", r.Method)
}
if err != nil {
log.Println("serve err:", err)
} else {
log.Println("serve ok")
}
}
}