From 22a894c813bab20665ed5830e253d967aefb2e8b Mon Sep 17 00:00:00 2001 From: Jordan Date: Sun, 21 Jan 2024 12:09:39 -0700 Subject: initial commit --- .gitignore | 4 + Makefile | 20 +++ README | 35 ++++ UNLICENSE | 24 +++ go.mod | 3 + go.sum | 0 log.go | 51 ++++++ pipkin.go | 438 +++++++++++++++++++++++++++++++++++++++++++++++++++ pipkin.json.example | 28 ++++ templates/index.html | 42 +++++ 10 files changed, 645 insertions(+) create mode 100644 .gitignore create mode 100644 Makefile create mode 100644 README create mode 100644 UNLICENSE create mode 100644 go.mod create mode 100644 go.sum create mode 100644 log.go create mode 100644 pipkin.go create mode 100644 pipkin.json.example create mode 100644 templates/index.html diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..705a8d1 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +*.swp +*.swo +pipkin +pipkin.json diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..c177b2e --- /dev/null +++ b/Makefile @@ -0,0 +1,20 @@ +.POSIX: +.SUFFIXES: + +GO = go +RM = rm +GOFLAGS = -o pipkin +PREFIX = /usr/local + +goflags = $(GOFLAGS) + +all: pipkin + +pipkin: + $(GO) build $(goflags) -ldflags "-X main.buildPrefix=$(PREFIX)" + +clean: + $(RM) -f pipkin + +test: + $(GO) test -v ./... diff --git a/README b/README new file mode 100644 index 0000000..7cdf588 --- /dev/null +++ b/README @@ -0,0 +1,35 @@ +pipkin is an S3 HTTP gateway, designed to support the hosting of static websites +from S3 platforms which do not natively provide first-class support for custom +HTTP/TLS termination (e.g. Backblaze B2), and support the use of less-popular +(or even self-managed) content delivery networks. + +pipkin transparently pipes responses received from S3 directly to clients; it +consumes ~15MB of RAM and supports concurrent downloads of over 10Gbit/s, and +has no dependencies. + +pipkin maps host headers to unique buckets/paths, so it's possible to host +multiple websites from a single bucket. client requests and S3 responses are +filtered against sets of authorized headers. webserver-style indexes and +directory listings are supported. HTTP sockets are re-used between requests. + +to improve performance and reduce redundant API transactions/financial DoS, it's +recommended to host pipkin behind a caching proxy (e.g. NGINX) and refresh stale +caches in the background between requests: + +location / { + proxy_pass http://127.0.0.1:8085; + proxy_cache proxycache; + proxy_cache_revalidate on; + proxy_cache_background_update on; + proxy_cache_lock on; + proxy_cache_lock_timeout 1s; + proxy_cache_use_stale updating error timeout http_404 http_500; + proxy_cache_valid 200 302 10m; + proxy_cache_valid 404 1m; +} + +Usage of ./pipkin: + -conf string + path to JSON config (default "pipkin.json") + -templates string + path to template directory (default "./templates") diff --git a/UNLICENSE b/UNLICENSE new file mode 100644 index 0000000..68a49da --- /dev/null +++ b/UNLICENSE @@ -0,0 +1,24 @@ +This is free and unencumbered software released into the public domain. + +Anyone is free to copy, modify, publish, use, compile, sell, or +distribute this software, either in source code form or as a compiled +binary, for any purpose, commercial or non-commercial, and by any +means. + +In jurisdictions that recognize copyright laws, the author or authors +of this software dedicate any and all copyright interest in the +software to the public domain. We make this dedication for the benefit +of the public at large and to the detriment of our heirs and +successors. We intend this dedication to be an overt act of +relinquishment in perpetuity of all present and future rights to this +software under copyright law. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +OTHER DEALINGS IN THE SOFTWARE. + +For more information, please refer to diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..efb957a --- /dev/null +++ b/go.mod @@ -0,0 +1,3 @@ +module main + +go 1.21.5 diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..e69de29 diff --git a/log.go b/log.go new file mode 100644 index 0000000..7de73e7 --- /dev/null +++ b/log.go @@ -0,0 +1,51 @@ +package main + +import ( + "log" + "os" +) + +const ( + infoLevel = 1 + errLevel = 2 +) + +type Log struct { + level int +} + +func NewLogger(logLevel string) *Log { + + if logLevel == "err" { + return &Log{1} + } + if logLevel == "info" { + return &Log{2} + } + return &Log{0} +} + +func (l *Log) Infof(format string, args ...interface{}) { + + if l.level >= infoLevel { + log.SetPrefix("INFO: ") + log.SetOutput(os.Stdout) + log.Printf(format, args...) + } +} + +func (l *Log) Errorf(format string, args ...interface{}) { + + if l.level >= errLevel { + log.SetPrefix("ERROR: ") + log.SetOutput(os.Stderr) + log.Printf(format, args...) + } +} + +func (l *Log) Fatalf(format string, args ...interface{}) { + + log.SetPrefix("FATAL: ") + log.SetOutput(os.Stderr) + log.Fatalf(format, args...) +} diff --git a/pipkin.go b/pipkin.go new file mode 100644 index 0000000..8996f56 --- /dev/null +++ b/pipkin.go @@ -0,0 +1,438 @@ +package main + +import ( + "bufio" + "crypto/hmac" + "crypto/sha256" + "encoding/hex" + "encoding/json" + "encoding/xml" + "flag" + "fmt" + "html/template" + "io" + "log" + "net/http" + "net/url" + "os" + "path" + "strings" + "time" +) + +type Pipkin struct { + client *http.Client + config Config + log *Log + templates string +} + +type Config struct { + Host string `json:"host"` + Port string `json:"port"` + Buckets map[string]Bucket `json:"buckets"` + Hosts map[string]Host `json:"hosts"` + Proxy bool `json:"proxied"` + LogLevel string `json:"loglevel"` +} + +type Bucket struct { + KeyID string `json:"keyID"` + AppKey string `json:"appKey"` + Region string `json:"region"` + Endpoint string `json:"endpoint"` + Host string `json:"host"` +} + +type Host struct { + AutoIndex bool `json:"autoindex"` + Bucket string `json:"bucket"` + Index string `json:"index"` + Path string `json:"path"` +} + +type Index struct { + Path string + CommonPrefixes []Prefixes + Contents []Contents + Name string + MaxKeys uint + KeyCount uint +} + +type Prefixes struct { + Prefix string +} + +type Contents struct { + ETag string + Key string + LastModified string + Size string +} + +// Headers we receive from clients and forward on to S3 endpoints. +var ALLOWED_REQUEST_HEADERS = []string{ + "accept", + "accept-encoding", + "accept-language", + "date", + "if-match", + "if-modified-since", + "if-none-match", + "if-unmodified-since", + "range", + "x-amz-content-sha256", + "x-amz-date", + "x-amz-server-side-encryption-customer-algorithm", + "x-amz-server-side-encryption-customer-key", + "x-amz-server-side-encryption-customer-key-md5", +} + +// Headers we receive from S3 and send back to requesting clients. +var ALLOWED_RESPONSE_HEADERS = []string{ + "accept-ranges", + "content-encoding", + "content-length", + "content-range", + "content-type", + "last-modified", +} + +// Fetch transparently forwards HTTP GET and HEAD requests to S3 services. +// +// Host header values are indexed to buckets may specify unique path, index +// file, and directory listing behavior on a host-by-host basis. +func (pk *Pipkin) fetch(w http.ResponseWriter, r *http.Request) { + + // We only support GET and HEAD methods; anything that can make S3 udpates + // (DELETE, PUT...) is undesired here. + if r.Method != "GET" && r.Method != "HEAD" { + http.Error(w, http.StatusText(http.StatusMethodNotAllowed), + http.StatusMethodNotAllowed) + return + } + + // Host header is only useful when we're fronted by a proxy, otherwise we + // should use r.Host. In most cases, setting Proxied to false will produce + // the desired behavior. + var hostStr string + if pk.config.Proxy { + hostStr = r.URL.Host + } else { + hostStr = strings.Split(r.Host, ":")[0] + } + host, ok := pk.config.Hosts[hostStr] + if !ok { + pk.log.Errorf("Host %s not found in configuration\n", hostStr) + http.Error(w, http.StatusText(http.StatusNotFound), http.StatusNotFound) + return + } + + // If an Index is configured and our key ends in "/", update our path to + // include it, regardless of whether it exists in S3; takes precedence over + // AutoIndex, and its absence results in a 404 response. + key := r.URL.Path + var err error + if strings.HasSuffix(key, "/") && host.Index != "" { + key = path.Join(key, host.Index) + } + + // Pre-sign! We roll our own HMAC-SHA256 signature routines to avoid relying + // on Amazon's (large) SDK; our memory footprint should be low. + presigned, err := pk.presignQuery(r.Method, host, key) + if err != nil { + pk.log.Errorf("Failed to presign: %s %s %s %s: %s\n", r.Method, hostStr, + host.Bucket, key, err.Error()) + http.Error(w, http.StatusText(http.StatusInternalServerError), + http.StatusInternalServerError) + return + } + req, err := http.NewRequest(r.Method, presigned, nil) + if err != nil { + pk.log.Errorf("Failed to create new request: %s %s %s\n", r.Method, + presigned, err.Error()) + http.Error(w, http.StatusText(http.StatusInternalServerError), + http.StatusInternalServerError) + return + } + + // Filter spurious headers; untrusted clients should not be messing with S3. + for name, headers := range r.Header { + for _, v := range headers { + for _, a := range ALLOWED_REQUEST_HEADERS { + if strings.ToLower(name) == a { + req.Header.Set(name, v) + } + } + } + } + + // Perform outbound request to S3; status codes 200-399 are considered + // valid. + resp, err := pk.client.Do(req) + if err != nil { + pk.log.Errorf("Failed to perform request: %s %s %s\n", req.Method, + req.URL.String(), err.Error()) + http.Error(w, http.StatusText(http.StatusNotFound), http.StatusNotFound) + return + } + statusOK := resp.StatusCode >= 200 && resp.StatusCode < 400 + pk.log.Infof("%s %d %s %s %s\n", r.Method, resp.StatusCode, hostStr, + host.Bucket, key) + if !statusOK { + http.Error(w, http.StatusText(http.StatusNotFound), http.StatusNotFound) + return + } + + // If our key ends in "/" and AutoIndex is set to true, try to decode any + // received XML as a directory index and return that instead, unless an + // Index is configured, which takes precedence over AutoIndex and is handled + // above. + // + // The downstream webserver should be configured to uniquely map directory + // paths to indexes if such behavior is desired; index discovery logic and + // state maintenance increases complexity, when we really want to be a + // a minimal request shim with few behavioral assumptions. + // + // NGINX, for example, can map paths directly to index files, so we can use + // directory index logic *and* point to index files for paths declared in + // advance, if desired + // + // location = / { + // proxy_pass http://127.0.0.1:8085/index.html; + // .... + // } + if strings.HasSuffix(key, "/") { + if host.AutoIndex { + if mime, ok := resp.Header["Content-Type"]; ok && mime[0] == + "application/xml" { + reader := bufio.NewReader(resp.Body) + decoder := xml.NewDecoder(reader) + + var index Index + index.Path = r.URL.Path + if err := decoder.Decode(&index); err != nil { + pk.log.Errorf("Failed to decode XML response: %s: %s\n", + req.URL.String(), err.Error()) + http.Error(w, http.StatusText(http.StatusNotFound), + http.StatusNotFound) + return + } + + // We receive the same (empty) XML response from S3 when + // requesting non-existent directories and empty directories, so + // we'll return 404 for both cases. + if index.KeyCount == 0 { + pk.log.Infof("No keys received: %s %s %s %s\n", r.Method, + hostStr, host.Bucket, key) + http.Error(w, http.StatusText(http.StatusNotFound), + http.StatusNotFound) + return + } + + // FIXME: There is probably a more efficient way to prepare + // paths for rendering, here. + relativeDir := path.Join(host.Path, key) + if strings.HasSuffix(key, "/") { + relativeDir += "/" + } + for i := range index.CommonPrefixes { + prefixTrim := strings.TrimPrefix(index.CommonPrefixes[i].Prefix, + strings.TrimPrefix(relativeDir, "/"), + ) + index.CommonPrefixes[i].Prefix = prefixTrim + } + for i := range index.Contents { + prefixTrim := strings.TrimPrefix(index.Contents[i].Key, + strings.TrimPrefix(relativeDir, "/"), + ) + index.Contents[i].Key = prefixTrim + } + + t, _ := template.ParseFiles(path.Join(pk.templates, + "index.html")) + if err := t.Execute(w, index); err != nil { + pk.log.Errorf("Failed to render template: %s %s %s: %s\n", + hostStr, host.Bucket, key, err.Error()) + http.Error(w, http.StatusText(http.StatusNotFound), + http.StatusNotFound) + } + return + } + } else { + http.Error(w, http.StatusText(http.StatusUnauthorized), + http.StatusUnauthorized) + return + } + } + + // Filter any S3-specific response headers; potentially sensitive + // information exposure via IDs, tags... + for name, headers := range resp.Header { + for _, v := range headers { + for _, a := range ALLOWED_RESPONSE_HEADERS { + if strings.ToLower(name) == a { + w.Header().Set(name, v) + } + } + } + } + + // Stream response buffer directly to client; memory-efficient over + // io.ReadAll(). + w.WriteHeader(resp.StatusCode) + _, err = io.Copy(w, resp.Body) + if err != nil { + pk.log.Errorf("Error writing response body: %s %s %s: %s\n", hostStr, + host.Bucket, key, err.Error()) + } +} + +// presignQuery returns the request's signed URL containing query parameters +// used to authenticate against S3 services (Amazon S3, Backblaze B2 ...). +func (pk *Pipkin) presignQuery(method string, host Host, key string) (string, error) { + + amzBucket := pk.config.Buckets[host.Bucket] + amzDateShort := time.Now().UTC().Format("20060102") + + // The date needs to follow the ISO 8601 standard and must be formatted with + // the "yyyyMMddTHHmmssZ" format. + amzDateLong := time.Now().UTC().Format("20060102T150405Z") + + // amzAlgorithm identifies the version of AWS Signature and the algorithm + // that you used to calculate the signature; for AWS Signature Version 4, + // this value is AWS4-HMAC-SHA256--AWS Signature Version 4 (AWS4) and the + // HMAC-SHA256 algorithm (HMAC-SHA256). + amzAlgorithm := "AWS4-HMAC-SHA256" + amzService := "s3" + + // ////aws4_request + amzCred := fmt.Sprintf("%s/%s/%s/%s/aws4_request", amzBucket.KeyID, + amzDateShort, amzBucket.Region, amzService) + + // Prefix current key with path value from pipkin.json; we support virtual + // hosts, e.g. /example.com/index.html + // + // url.JoinPath() encodes the path, so we have to call url.PathUnescape() + // on amzResource when we create our Prefix and the final signed URL. + amzResource, err := url.JoinPath(host.Path, key) + if err != nil { + return "", err + } + var amzPrefix string + if strings.HasSuffix(amzResource, "/") { + if len(amzResource) > 1 { + amzPrefix = strings.TrimPrefix(amzResource, "/") + amzPrefix, _ = url.PathUnescape(amzPrefix) + } + amzResource = "/" + } + + // Seconds for which the generated presigned URL is valid; for example, + // 86400 (24 hours). The minimum value we can set is 1, and the maximum is + // 604800 (seven days). + amzExpires := "86400" + + // The headers that we used to calculate the signature. The following + // headers are required in the signature calculations: + // - The HTTP host header. + // - Any x-amz-* headers that we plan to add to the request. + amzSignedHeaders := "host" + + // Populate URI key/value pairs; used in canonical request signature and in + // the final signed URL we use to make our request to S3, with the + // X-Amz-Signature header below added once the signature is calculated. + uri := url.Values{} + uri.Add("X-Amz-Algorithm", amzAlgorithm) + uri.Add("X-Amz-Credential", amzCred) + uri.Add("X-Amz-Date", amzDateLong) + uri.Add("X-Amz-Expires", amzExpires) + uri.Add("X-Amz-SignedHeaders", amzSignedHeaders) + if amzPrefix != "" { + uri.Add("list-type", "2") + uri.Add("max-keys", "50000") + uri.Add("prefix", amzPrefix) + uri.Add("delimiter", "/") + } + + // GET + // /test.txt + // $encoded_canonicalURI + // host:examplebucket.s3.amazonaws.com + // + // host + // UNSIGNED-PAYLOAD + uriEncoded := strings.Replace(uri.Encode(), "+", "%20", -1) + canonicalRequest := fmt.Sprintf("%s\n%s\n%s\nhost:%s\n\n%s\n%s", method, + amzResource, uriEncoded, amzBucket.Host, amzSignedHeaders, + "UNSIGNED-PAYLOAD") + + // //s3/aws4_request + amzScope := fmt.Sprintf("%s/%s/%s/aws4_request", amzDateShort, + amzBucket.Region, amzService) + + canonicalRequestSHA256 := sha256.Sum256([]byte(canonicalRequest)) + canonicalRequestSHA256Hex := hex.EncodeToString(canonicalRequestSHA256[:]) + + // AWS4-HMAC-SHA256 + // 20130524T000000Z + // 20130524/us-east-1/s3/aws4_request + // 3bfa292879f6447bbcda7001decf97f4a54dc650c8942174ae0a9121cf58ad04 + strToSign := fmt.Sprintf("%s\n%s\n%s\n%s", amzAlgorithm, amzDateLong, + amzScope, canonicalRequestSHA256Hex) + + dateKey := hmac.New(sha256.New, []byte("AWS4"+amzBucket.AppKey)) + dateKey.Write([]byte(amzDateShort)) + + dateRegionKey := hmac.New(sha256.New, dateKey.Sum(nil)) + dateRegionKey.Write([]byte(amzBucket.Region)) + + dateRegionServiceKey := hmac.New(sha256.New, dateRegionKey.Sum(nil)) + dateRegionServiceKey.Write([]byte(amzService)) + + signingKey := hmac.New(sha256.New, dateRegionServiceKey.Sum(nil)) + signingKey.Write([]byte("aws4_request")) + + amzSignatureSha256 := hmac.New(sha256.New, signingKey.Sum(nil)) + amzSignatureSha256.Write([]byte(strToSign)) + + // Provides the signature to authenticate our request. This signature must + // match the signature S3 calculates; otherwise, S3 denies the request. + amzSignatureSha256Hex := hex.EncodeToString(amzSignatureSha256.Sum(nil)) + signedURL, err := url.Parse(amzBucket.Endpoint) + if err != nil { + return "", err + } + + // Build & encode signed URL! We've calculated everything we need, now. + uri.Add("X-Amz-Signature", amzSignatureSha256Hex) + signedURL.Path, _ = url.PathUnescape(amzResource) + uriEncoded = strings.Replace(uri.Encode(), "+", "%20", -1) + signedURL.RawQuery = uriEncoded + return signedURL.String(), nil +} + +func main() { + + // We should re-use socket connections between requests. + client := &http.Client{} + pk := &Pipkin{client: client} + + var confPath string + flag.StringVar(&confPath, "conf", "pipkin.json", "path to JSON config") + flag.StringVar(&pk.templates, "templates", "./templates", "path to template directory") + flag.Parse() + confFile, err := os.ReadFile(confPath) + if err != nil { + log.Fatal(err) + } + err = json.Unmarshal([]byte(confFile), &pk.config) + if err != nil { + log.Fatal(err) + } + pk.log = NewLogger(pk.config.LogLevel) + + http.HandleFunc("/", pk.fetch) + http.ListenAndServe(pk.config.Host+":"+pk.config.Port, nil) +} diff --git a/pipkin.json.example b/pipkin.json.example new file mode 100644 index 0000000..56db546 --- /dev/null +++ b/pipkin.json.example @@ -0,0 +1,28 @@ +{ + "host": "127.0.0.1", + "port": "8085", + "proxied": false, + "buckets": { + "example-bucket": { + "keyID": "XXXXXXXXXXXXXXXXXXXXXXXXX", + "appKey": "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX", + "region": "us-west-001", + "endpoint": "https://example-bucket.s3.us-west-001.backblazeb2.com", + "host": "example-bucket.s3.us-west-001.backblazeb2.com" + } + }, + "hosts": { + "media.example.org": { + "bucket": "example-bucket", + "path": "/media.example.org/", + "index": "", + "autoindex": true + }, + "example.org": { + "bucket": "example-bucket", + "path": "/example.org/", + "index": "index.html", + "autoindex": false + } + } +} diff --git a/templates/index.html b/templates/index.html new file mode 100644 index 0000000..691de8e --- /dev/null +++ b/templates/index.html @@ -0,0 +1,42 @@ + + + + + + + +

Index of {{ .Path }}

+ + + + + + + + {{- range $f := .Contents -}} + + + + + + {{- end -}} + {{- range $f := .CommonPrefixes -}} + + + + {{- end -}} +
NameLast ModifiedSize
{{- $f.Key -}}{{- $f.LastModified -}}{{- $f.Size -}}
{{- $f.Prefix -}}
+ + -- cgit v1.2.3-54-g00ecf