diff options
-rw-r--r-- | .gitignore | 4 | ||||
-rw-r--r-- | Makefile | 20 | ||||
-rw-r--r-- | README | 35 | ||||
-rw-r--r-- | UNLICENSE | 24 | ||||
-rw-r--r-- | go.mod | 3 | ||||
-rw-r--r-- | go.sum | 0 | ||||
-rw-r--r-- | log.go | 51 | ||||
-rw-r--r-- | pipkin.go | 438 | ||||
-rw-r--r-- | pipkin.json.example | 28 | ||||
-rw-r--r-- | templates/index.html | 42 |
10 files changed, 645 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..705a8d1 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +*.swp +*.swo +pipkin +pipkin.json diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..c177b2e --- /dev/null +++ b/Makefile @@ -0,0 +1,20 @@ +.POSIX: +.SUFFIXES: + +GO = go +RM = rm +GOFLAGS = -o pipkin +PREFIX = /usr/local + +goflags = $(GOFLAGS) + +all: pipkin + +pipkin: + $(GO) build $(goflags) -ldflags "-X main.buildPrefix=$(PREFIX)" + +clean: + $(RM) -f pipkin + +test: + $(GO) test -v ./... @@ -0,0 +1,35 @@ +pipkin is an S3 HTTP gateway, designed to support the hosting of static websites +from S3 platforms which do not natively provide first-class support for custom +HTTP/TLS termination (e.g. Backblaze B2), and support the use of less-popular +(or even self-managed) content delivery networks. + +pipkin transparently pipes responses received from S3 directly to clients; it +consumes ~15MB of RAM and supports concurrent downloads of over 10Gbit/s, and +has no dependencies. + +pipkin maps host headers to unique buckets/paths, so it's possible to host +multiple websites from a single bucket. client requests and S3 responses are +filtered against sets of authorized headers. webserver-style indexes and +directory listings are supported. HTTP sockets are re-used between requests. + +to improve performance and reduce redundant API transactions/financial DoS, it's +recommended to host pipkin behind a caching proxy (e.g. NGINX) and refresh stale +caches in the background between requests: + +location / { + proxy_pass http://127.0.0.1:8085; + proxy_cache proxycache; + proxy_cache_revalidate on; + proxy_cache_background_update on; + proxy_cache_lock on; + proxy_cache_lock_timeout 1s; + proxy_cache_use_stale updating error timeout http_404 http_500; + proxy_cache_valid 200 302 10m; + proxy_cache_valid 404 1m; +} + +Usage of ./pipkin: + -conf string + path to JSON config (default "pipkin.json") + -templates string + path to template directory (default "./templates") diff --git a/UNLICENSE b/UNLICENSE new file mode 100644 index 0000000..68a49da --- /dev/null +++ b/UNLICENSE @@ -0,0 +1,24 @@ +This is free and unencumbered software released into the public domain. + +Anyone is free to copy, modify, publish, use, compile, sell, or +distribute this software, either in source code form or as a compiled +binary, for any purpose, commercial or non-commercial, and by any +means. + +In jurisdictions that recognize copyright laws, the author or authors +of this software dedicate any and all copyright interest in the +software to the public domain. We make this dedication for the benefit +of the public at large and to the detriment of our heirs and +successors. We intend this dedication to be an overt act of +relinquishment in perpetuity of all present and future rights to this +software under copyright law. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +OTHER DEALINGS IN THE SOFTWARE. + +For more information, please refer to <http://unlicense.org/> @@ -0,0 +1,3 @@ +module main + +go 1.21.5 @@ -0,0 +1,51 @@ +package main + +import ( + "log" + "os" +) + +const ( + infoLevel = 1 + errLevel = 2 +) + +type Log struct { + level int +} + +func NewLogger(logLevel string) *Log { + + if logLevel == "err" { + return &Log{1} + } + if logLevel == "info" { + return &Log{2} + } + return &Log{0} +} + +func (l *Log) Infof(format string, args ...interface{}) { + + if l.level >= infoLevel { + log.SetPrefix("INFO: ") + log.SetOutput(os.Stdout) + log.Printf(format, args...) + } +} + +func (l *Log) Errorf(format string, args ...interface{}) { + + if l.level >= errLevel { + log.SetPrefix("ERROR: ") + log.SetOutput(os.Stderr) + log.Printf(format, args...) + } +} + +func (l *Log) Fatalf(format string, args ...interface{}) { + + log.SetPrefix("FATAL: ") + log.SetOutput(os.Stderr) + log.Fatalf(format, args...) +} diff --git a/pipkin.go b/pipkin.go new file mode 100644 index 0000000..8996f56 --- /dev/null +++ b/pipkin.go @@ -0,0 +1,438 @@ +package main + +import ( + "bufio" + "crypto/hmac" + "crypto/sha256" + "encoding/hex" + "encoding/json" + "encoding/xml" + "flag" + "fmt" + "html/template" + "io" + "log" + "net/http" + "net/url" + "os" + "path" + "strings" + "time" +) + +type Pipkin struct { + client *http.Client + config Config + log *Log + templates string +} + +type Config struct { + Host string `json:"host"` + Port string `json:"port"` + Buckets map[string]Bucket `json:"buckets"` + Hosts map[string]Host `json:"hosts"` + Proxy bool `json:"proxied"` + LogLevel string `json:"loglevel"` +} + +type Bucket struct { + KeyID string `json:"keyID"` + AppKey string `json:"appKey"` + Region string `json:"region"` + Endpoint string `json:"endpoint"` + Host string `json:"host"` +} + +type Host struct { + AutoIndex bool `json:"autoindex"` + Bucket string `json:"bucket"` + Index string `json:"index"` + Path string `json:"path"` +} + +type Index struct { + Path string + CommonPrefixes []Prefixes + Contents []Contents + Name string + MaxKeys uint + KeyCount uint +} + +type Prefixes struct { + Prefix string +} + +type Contents struct { + ETag string + Key string + LastModified string + Size string +} + +// Headers we receive from clients and forward on to S3 endpoints. +var ALLOWED_REQUEST_HEADERS = []string{ + "accept", + "accept-encoding", + "accept-language", + "date", + "if-match", + "if-modified-since", + "if-none-match", + "if-unmodified-since", + "range", + "x-amz-content-sha256", + "x-amz-date", + "x-amz-server-side-encryption-customer-algorithm", + "x-amz-server-side-encryption-customer-key", + "x-amz-server-side-encryption-customer-key-md5", +} + +// Headers we receive from S3 and send back to requesting clients. +var ALLOWED_RESPONSE_HEADERS = []string{ + "accept-ranges", + "content-encoding", + "content-length", + "content-range", + "content-type", + "last-modified", +} + +// Fetch transparently forwards HTTP GET and HEAD requests to S3 services. +// +// Host header values are indexed to buckets may specify unique path, index +// file, and directory listing behavior on a host-by-host basis. +func (pk *Pipkin) fetch(w http.ResponseWriter, r *http.Request) { + + // We only support GET and HEAD methods; anything that can make S3 udpates + // (DELETE, PUT...) is undesired here. + if r.Method != "GET" && r.Method != "HEAD" { + http.Error(w, http.StatusText(http.StatusMethodNotAllowed), + http.StatusMethodNotAllowed) + return + } + + // Host header is only useful when we're fronted by a proxy, otherwise we + // should use r.Host. In most cases, setting Proxied to false will produce + // the desired behavior. + var hostStr string + if pk.config.Proxy { + hostStr = r.URL.Host + } else { + hostStr = strings.Split(r.Host, ":")[0] + } + host, ok := pk.config.Hosts[hostStr] + if !ok { + pk.log.Errorf("Host %s not found in configuration\n", hostStr) + http.Error(w, http.StatusText(http.StatusNotFound), http.StatusNotFound) + return + } + + // If an Index is configured and our key ends in "/", update our path to + // include it, regardless of whether it exists in S3; takes precedence over + // AutoIndex, and its absence results in a 404 response. + key := r.URL.Path + var err error + if strings.HasSuffix(key, "/") && host.Index != "" { + key = path.Join(key, host.Index) + } + + // Pre-sign! We roll our own HMAC-SHA256 signature routines to avoid relying + // on Amazon's (large) SDK; our memory footprint should be low. + presigned, err := pk.presignQuery(r.Method, host, key) + if err != nil { + pk.log.Errorf("Failed to presign: %s %s %s %s: %s\n", r.Method, hostStr, + host.Bucket, key, err.Error()) + http.Error(w, http.StatusText(http.StatusInternalServerError), + http.StatusInternalServerError) + return + } + req, err := http.NewRequest(r.Method, presigned, nil) + if err != nil { + pk.log.Errorf("Failed to create new request: %s %s %s\n", r.Method, + presigned, err.Error()) + http.Error(w, http.StatusText(http.StatusInternalServerError), + http.StatusInternalServerError) + return + } + + // Filter spurious headers; untrusted clients should not be messing with S3. + for name, headers := range r.Header { + for _, v := range headers { + for _, a := range ALLOWED_REQUEST_HEADERS { + if strings.ToLower(name) == a { + req.Header.Set(name, v) + } + } + } + } + + // Perform outbound request to S3; status codes 200-399 are considered + // valid. + resp, err := pk.client.Do(req) + if err != nil { + pk.log.Errorf("Failed to perform request: %s %s %s\n", req.Method, + req.URL.String(), err.Error()) + http.Error(w, http.StatusText(http.StatusNotFound), http.StatusNotFound) + return + } + statusOK := resp.StatusCode >= 200 && resp.StatusCode < 400 + pk.log.Infof("%s %d %s %s %s\n", r.Method, resp.StatusCode, hostStr, + host.Bucket, key) + if !statusOK { + http.Error(w, http.StatusText(http.StatusNotFound), http.StatusNotFound) + return + } + + // If our key ends in "/" and AutoIndex is set to true, try to decode any + // received XML as a directory index and return that instead, unless an + // Index is configured, which takes precedence over AutoIndex and is handled + // above. + // + // The downstream webserver should be configured to uniquely map directory + // paths to indexes if such behavior is desired; index discovery logic and + // state maintenance increases complexity, when we really want to be a + // a minimal request shim with few behavioral assumptions. + // + // NGINX, for example, can map paths directly to index files, so we can use + // directory index logic *and* point to index files for paths declared in + // advance, if desired + // + // location = / { + // proxy_pass http://127.0.0.1:8085/index.html; + // .... + // } + if strings.HasSuffix(key, "/") { + if host.AutoIndex { + if mime, ok := resp.Header["Content-Type"]; ok && mime[0] == + "application/xml" { + reader := bufio.NewReader(resp.Body) + decoder := xml.NewDecoder(reader) + + var index Index + index.Path = r.URL.Path + if err := decoder.Decode(&index); err != nil { + pk.log.Errorf("Failed to decode XML response: %s: %s\n", + req.URL.String(), err.Error()) + http.Error(w, http.StatusText(http.StatusNotFound), + http.StatusNotFound) + return + } + + // We receive the same (empty) XML response from S3 when + // requesting non-existent directories and empty directories, so + // we'll return 404 for both cases. + if index.KeyCount == 0 { + pk.log.Infof("No keys received: %s %s %s %s\n", r.Method, + hostStr, host.Bucket, key) + http.Error(w, http.StatusText(http.StatusNotFound), + http.StatusNotFound) + return + } + + // FIXME: There is probably a more efficient way to prepare + // paths for rendering, here. + relativeDir := path.Join(host.Path, key) + if strings.HasSuffix(key, "/") { + relativeDir += "/" + } + for i := range index.CommonPrefixes { + prefixTrim := strings.TrimPrefix(index.CommonPrefixes[i].Prefix, + strings.TrimPrefix(relativeDir, "/"), + ) + index.CommonPrefixes[i].Prefix = prefixTrim + } + for i := range index.Contents { + prefixTrim := strings.TrimPrefix(index.Contents[i].Key, + strings.TrimPrefix(relativeDir, "/"), + ) + index.Contents[i].Key = prefixTrim + } + + t, _ := template.ParseFiles(path.Join(pk.templates, + "index.html")) + if err := t.Execute(w, index); err != nil { + pk.log.Errorf("Failed to render template: %s %s %s: %s\n", + hostStr, host.Bucket, key, err.Error()) + http.Error(w, http.StatusText(http.StatusNotFound), + http.StatusNotFound) + } + return + } + } else { + http.Error(w, http.StatusText(http.StatusUnauthorized), + http.StatusUnauthorized) + return + } + } + + // Filter any S3-specific response headers; potentially sensitive + // information exposure via IDs, tags... + for name, headers := range resp.Header { + for _, v := range headers { + for _, a := range ALLOWED_RESPONSE_HEADERS { + if strings.ToLower(name) == a { + w.Header().Set(name, v) + } + } + } + } + + // Stream response buffer directly to client; memory-efficient over + // io.ReadAll(). + w.WriteHeader(resp.StatusCode) + _, err = io.Copy(w, resp.Body) + if err != nil { + pk.log.Errorf("Error writing response body: %s %s %s: %s\n", hostStr, + host.Bucket, key, err.Error()) + } +} + +// presignQuery returns the request's signed URL containing query parameters +// used to authenticate against S3 services (Amazon S3, Backblaze B2 ...). +func (pk *Pipkin) presignQuery(method string, host Host, key string) (string, error) { + + amzBucket := pk.config.Buckets[host.Bucket] + amzDateShort := time.Now().UTC().Format("20060102") + + // The date needs to follow the ISO 8601 standard and must be formatted with + // the "yyyyMMddTHHmmssZ" format. + amzDateLong := time.Now().UTC().Format("20060102T150405Z") + + // amzAlgorithm identifies the version of AWS Signature and the algorithm + // that you used to calculate the signature; for AWS Signature Version 4, + // this value is AWS4-HMAC-SHA256--AWS Signature Version 4 (AWS4) and the + // HMAC-SHA256 algorithm (HMAC-SHA256). + amzAlgorithm := "AWS4-HMAC-SHA256" + amzService := "s3" + + // <your-access-key-id>/<date>/<region>/<service>/aws4_request + amzCred := fmt.Sprintf("%s/%s/%s/%s/aws4_request", amzBucket.KeyID, + amzDateShort, amzBucket.Region, amzService) + + // Prefix current key with path value from pipkin.json; we support virtual + // hosts, e.g. /example.com/index.html + // + // url.JoinPath() encodes the path, so we have to call url.PathUnescape() + // on amzResource when we create our Prefix and the final signed URL. + amzResource, err := url.JoinPath(host.Path, key) + if err != nil { + return "", err + } + var amzPrefix string + if strings.HasSuffix(amzResource, "/") { + if len(amzResource) > 1 { + amzPrefix = strings.TrimPrefix(amzResource, "/") + amzPrefix, _ = url.PathUnescape(amzPrefix) + } + amzResource = "/" + } + + // Seconds for which the generated presigned URL is valid; for example, + // 86400 (24 hours). The minimum value we can set is 1, and the maximum is + // 604800 (seven days). + amzExpires := "86400" + + // The headers that we used to calculate the signature. The following + // headers are required in the signature calculations: + // - The HTTP host header. + // - Any x-amz-* headers that we plan to add to the request. + amzSignedHeaders := "host" + + // Populate URI key/value pairs; used in canonical request signature and in + // the final signed URL we use to make our request to S3, with the + // X-Amz-Signature header below added once the signature is calculated. + uri := url.Values{} + uri.Add("X-Amz-Algorithm", amzAlgorithm) + uri.Add("X-Amz-Credential", amzCred) + uri.Add("X-Amz-Date", amzDateLong) + uri.Add("X-Amz-Expires", amzExpires) + uri.Add("X-Amz-SignedHeaders", amzSignedHeaders) + if amzPrefix != "" { + uri.Add("list-type", "2") + uri.Add("max-keys", "50000") + uri.Add("prefix", amzPrefix) + uri.Add("delimiter", "/") + } + + // GET + // /test.txt + // $encoded_canonicalURI + // host:examplebucket.s3.amazonaws.com + // + // host + // UNSIGNED-PAYLOAD + uriEncoded := strings.Replace(uri.Encode(), "+", "%20", -1) + canonicalRequest := fmt.Sprintf("%s\n%s\n%s\nhost:%s\n\n%s\n%s", method, + amzResource, uriEncoded, amzBucket.Host, amzSignedHeaders, + "UNSIGNED-PAYLOAD") + + // <yyyymmdd>/<AWS Region>/s3/aws4_request + amzScope := fmt.Sprintf("%s/%s/%s/aws4_request", amzDateShort, + amzBucket.Region, amzService) + + canonicalRequestSHA256 := sha256.Sum256([]byte(canonicalRequest)) + canonicalRequestSHA256Hex := hex.EncodeToString(canonicalRequestSHA256[:]) + + // AWS4-HMAC-SHA256 + // 20130524T000000Z + // 20130524/us-east-1/s3/aws4_request + // 3bfa292879f6447bbcda7001decf97f4a54dc650c8942174ae0a9121cf58ad04 + strToSign := fmt.Sprintf("%s\n%s\n%s\n%s", amzAlgorithm, amzDateLong, + amzScope, canonicalRequestSHA256Hex) + + dateKey := hmac.New(sha256.New, []byte("AWS4"+amzBucket.AppKey)) + dateKey.Write([]byte(amzDateShort)) + + dateRegionKey := hmac.New(sha256.New, dateKey.Sum(nil)) + dateRegionKey.Write([]byte(amzBucket.Region)) + + dateRegionServiceKey := hmac.New(sha256.New, dateRegionKey.Sum(nil)) + dateRegionServiceKey.Write([]byte(amzService)) + + signingKey := hmac.New(sha256.New, dateRegionServiceKey.Sum(nil)) + signingKey.Write([]byte("aws4_request")) + + amzSignatureSha256 := hmac.New(sha256.New, signingKey.Sum(nil)) + amzSignatureSha256.Write([]byte(strToSign)) + + // Provides the signature to authenticate our request. This signature must + // match the signature S3 calculates; otherwise, S3 denies the request. + amzSignatureSha256Hex := hex.EncodeToString(amzSignatureSha256.Sum(nil)) + signedURL, err := url.Parse(amzBucket.Endpoint) + if err != nil { + return "", err + } + + // Build & encode signed URL! We've calculated everything we need, now. + uri.Add("X-Amz-Signature", amzSignatureSha256Hex) + signedURL.Path, _ = url.PathUnescape(amzResource) + uriEncoded = strings.Replace(uri.Encode(), "+", "%20", -1) + signedURL.RawQuery = uriEncoded + return signedURL.String(), nil +} + +func main() { + + // We should re-use socket connections between requests. + client := &http.Client{} + pk := &Pipkin{client: client} + + var confPath string + flag.StringVar(&confPath, "conf", "pipkin.json", "path to JSON config") + flag.StringVar(&pk.templates, "templates", "./templates", "path to template directory") + flag.Parse() + confFile, err := os.ReadFile(confPath) + if err != nil { + log.Fatal(err) + } + err = json.Unmarshal([]byte(confFile), &pk.config) + if err != nil { + log.Fatal(err) + } + pk.log = NewLogger(pk.config.LogLevel) + + http.HandleFunc("/", pk.fetch) + http.ListenAndServe(pk.config.Host+":"+pk.config.Port, nil) +} diff --git a/pipkin.json.example b/pipkin.json.example new file mode 100644 index 0000000..56db546 --- /dev/null +++ b/pipkin.json.example @@ -0,0 +1,28 @@ +{ + "host": "127.0.0.1", + "port": "8085", + "proxied": false, + "buckets": { + "example-bucket": { + "keyID": "XXXXXXXXXXXXXXXXXXXXXXXXX", + "appKey": "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX", + "region": "us-west-001", + "endpoint": "https://example-bucket.s3.us-west-001.backblazeb2.com", + "host": "example-bucket.s3.us-west-001.backblazeb2.com" + } + }, + "hosts": { + "media.example.org": { + "bucket": "example-bucket", + "path": "/media.example.org/", + "index": "", + "autoindex": true + }, + "example.org": { + "bucket": "example-bucket", + "path": "/example.org/", + "index": "index.html", + "autoindex": false + } + } +} diff --git a/templates/index.html b/templates/index.html new file mode 100644 index 0000000..691de8e --- /dev/null +++ b/templates/index.html @@ -0,0 +1,42 @@ +<!DOCTYPE html> +<html> +<head> +<style> +table { + font-family: monospace; + border-collapse: collapse; +} + +td, th { + border: 0px solid #dddddd; + text-align: left; + padding: 2px; + padding-right: 1em; +} +</style> +</head> +<body> + +<h2>Index of {{ .Path }}</h2> + +<table> + <tr> + <th>Name</th> + <th>Last Modified</th> + <th>Size</th> + </tr> + {{- range $f := .Contents -}} + <tr> + <td><a href="{{- $f.Key -}}">{{- $f.Key -}}</a></td> + <td>{{- $f.LastModified -}}</td> + <td>{{- $f.Size -}}</td> + </tr> + {{- end -}} + {{- range $f := .CommonPrefixes -}} + <tr> + <td><a href="{{- $f.Prefix -}}">{{- $f.Prefix -}}</a></td> + </tr> + {{- end -}} +</table> +</body> +</html> |