From c0ae643ad6b4ede0e09248f014d750160c6d2340 Mon Sep 17 00:00:00 2001 From: Jordan Date: Mon, 11 Nov 2024 20:39:58 -0700 Subject: pipkin: use continuation tokens to build indexes completely --- pipkin.go | 113 +++++++++++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 86 insertions(+), 27 deletions(-) (limited to 'pipkin.go') diff --git a/pipkin.go b/pipkin.go index d1a494f..79836e5 100644 --- a/pipkin.go +++ b/pipkin.go @@ -1,7 +1,6 @@ package main import ( - "bufio" "crypto/hmac" "crypto/sha256" "encoding/hex" @@ -11,9 +10,9 @@ import ( "flag" "fmt" "html/template" - "math" "io" "log" + "math" "net/http" "os" "path" @@ -145,7 +144,7 @@ func (pk *Pipkin) fetch(w http.ResponseWriter, r *http.Request) { // Pre-sign! We roll our own HMAC-SHA256 signature routines to avoid relying // on Amazon's (large) SDK; our memory footprint should be low. - presigned, err := pk.presignQuery(r.Method, host, key) + presigned, err := pk.presignQuery(r.Method, host, key, "") if err != nil { pk.log.Errorf("Failed to presign: %s %s %s %s: %s\n", r.Method, hostStr, host.Bucket, key, err.Error()) @@ -213,14 +212,10 @@ func (pk *Pipkin) fetch(w http.ResponseWriter, r *http.Request) { if host.AutoIndex { if mime, ok := resp.Header["Content-Type"]; ok && mime[0] == "application/xml" { - reader := bufio.NewReader(resp.Body) - decoder := xml.NewDecoder(reader) - - var index Index - index.Path = r.URL.Path - if err := decoder.Decode(&index); err != nil { - pk.log.Errorf("Failed to decode XML response: %s: %s\n", - req.URL.String(), err.Error()) + index, err := pk.fetchAllKeys(r.Method, host, key) + if err != nil { + pk.log.Errorf("Failed to fetch directory listing: %s: %s\n", + key, err.Error()) http.Error(w, http.StatusText(http.StatusNotFound), http.StatusNotFound) return @@ -237,40 +232,42 @@ func (pk *Pipkin) fetch(w http.ResponseWriter, r *http.Request) { return } - // FIXME: There is probably a more efficient way to prepare - // paths for rendering, here. + // Process paths for rendering relativeDir := path.Join(host.Path, key) if strings.HasSuffix(key, "/") { relativeDir += "/" } + + // Process prefixes and contents paths for i := range index.CommonPrefixes { prefixTrim := strings.TrimPrefix(index.CommonPrefixes[i].Prefix, - strings.TrimPrefix(relativeDir, "/"), - ) + strings.TrimPrefix(relativeDir, "/")) index.CommonPrefixes[i].Prefix = prefixTrim } + for i := range index.Contents { prefixTrim := strings.TrimPrefix(index.Contents[i].Key, - strings.TrimPrefix(relativeDir, "/"), - ) + strings.TrimPrefix(relativeDir, "/")) index.Contents[i].Key = prefixTrim } + // Render template t, err := template.New("index.html").Funcs(template.FuncMap{ - "prettyByteSize": prettyByteSize, + "prettyByteSize": prettyByteSize, "formatLastModified": formatLastModified, }).ParseFiles(path.Join(pk.templates, "index.html")) if err != nil { - pk.log.Errorf("Failed to render template: %s %s %s: %s\n", - hostStr, host.Bucket, key, err.Error()) - http.Error(w, http.StatusText(http.StatusNotFound), - http.StatusNotFound) + pk.log.Errorf("Failed to render template: %s\n", err.Error()) + http.Error(w, http.StatusText(http.StatusInternalServerError), + http.StatusInternalServerError) + return } + if err := t.Execute(w, index); err != nil { - pk.log.Errorf("Failed to render template: %s %s %s: %s\n", - hostStr, host.Bucket, key, err.Error()) - http.Error(w, http.StatusText(http.StatusNotFound), - http.StatusNotFound) + pk.log.Errorf("Failed to execute template: %s\n", err.Error()) + http.Error(w, http.StatusText(http.StatusInternalServerError), + http.StatusInternalServerError) + return } return } @@ -310,9 +307,68 @@ func (pk *Pipkin) fetch(w http.ResponseWriter, r *http.Request) { } } +// fetchAllKeys recursively fetches all keys for a directory listing using continuation tokens +func (pk *Pipkin) fetchAllKeys(method string, host Host, key string) (*Index, error) { + var index Index + var continuationToken string + + for { + // Get presigned URL with continuation token if we have one + presigned, err := pk.presignQuery(method, host, key, continuationToken) + if err != nil { + return nil, fmt.Errorf("failed to presign: %w", err) + } + + req, err := http.NewRequest(method, presigned, nil) + if err != nil { + return nil, fmt.Errorf("failed to create request: %w", err) + } + + resp, err := pk.client.Do(req) + if err != nil { + return nil, fmt.Errorf("failed to perform request: %w", err) + } + defer resp.Body.Close() + + if resp.StatusCode < 200 || resp.StatusCode >= 400 { + return nil, fmt.Errorf("received status code %d", resp.StatusCode) + } + + // Parse the XML response + var listResult struct { + IsTruncated bool + NextContinuationToken string + CommonPrefixes []Prefixes + Contents []Contents + Name string + MaxKeys uint + KeyCount uint + } + + if err := xml.NewDecoder(resp.Body).Decode(&listResult); err != nil { + return nil, fmt.Errorf("failed to decode XML: %w", err) + } + + // Append results to our index + index.CommonPrefixes = append(index.CommonPrefixes, listResult.CommonPrefixes...) + index.Contents = append(index.Contents, listResult.Contents...) + index.Name = listResult.Name + index.MaxKeys += listResult.MaxKeys + index.KeyCount += listResult.KeyCount + + // Check if we need to continue + if !listResult.IsTruncated { + break + } + continuationToken = listResult.NextContinuationToken + } + + return &index, nil +} + // presignQuery returns the request's signed URL containing query parameters // used to authenticate against S3 services (Amazon S3, Backblaze B2 ...). -func (pk *Pipkin) presignQuery(method string, host Host, key string) (string, error) { +func (pk *Pipkin) presignQuery(method string, host Host, key string, token string) (string, error) { amzBucket := pk.config.Buckets[host.Bucket] amzDateShort := time.Now().UTC().Format("20060102") @@ -367,6 +423,9 @@ func (pk *Pipkin) presignQuery(method string, host Host, key string) (string, er uri.Add("prefix", amzPrefix) uri.Add("delimiter", "/") } + if token != "" { + uri.Add("continuation-token", token) + } // GET // /test.txt -- cgit v1.2.3-54-g00ecf