aboutsummaryrefslogtreecommitdiff
path: root/queue.go
diff options
context:
space:
mode:
authorale <ale@incal.net>2020-02-17 21:39:06 +0000
committerale <ale@incal.net>2020-02-17 21:40:29 +0000
commit533f472553d6db42a1ae704285e33f53cf90f81d (patch)
tree122c472cc685e567d25794357c90ff92b7165b1c /queue.go
parentfec78595f9986cb908ef1ff61cfb3a5828986456 (diff)
downloadcrawl-533f472553d6db42a1ae704285e33f53cf90f81d.tar.gz
crawl-533f472553d6db42a1ae704285e33f53cf90f81d.zip
Propagate the link tag through redirects
In order to do this we have to plumb it through the queue and the Handler interface, but it should allow fetches of the resources associated with a page via the IncludeRelatedScope even if it's behind a redirect.
Diffstat (limited to 'queue.go')
-rw-r--r--queue.go7
1 files changed, 4 insertions, 3 deletions
diff --git a/queue.go b/queue.go
index ee0e7ed..cd4143c 100644
--- a/queue.go
+++ b/queue.go
@@ -28,6 +28,7 @@ type queuePair struct {
URL string
Depth int
+ Tag int
}
// Scan the pending queue and send items on 'ch'. Returns an error
@@ -58,10 +59,10 @@ func (q *queue) Scan(ch chan<- queuePair) error {
}
// Add an item to the pending work queue.
-func (q *queue) Add(wb *leveldb.Batch, urlStr string, depth int, when time.Time) error {
+func (q *queue) Add(wb *leveldb.Batch, urlStr string, tag, depth int, when time.Time) error {
t := uint64(when.UnixNano())
qkey := bytes.Join([][]byte{queuePrefix, encodeUint64(t), encodeUint64(uint64(rand.Int63()))}, queueKeySep)
- return q.db.PutObjBatch(wb, qkey, &queuePair{URL: urlStr, Depth: depth})
+ return q.db.PutObjBatch(wb, qkey, &queuePair{URL: urlStr, Tag: tag, Depth: depth})
}
func (q *queue) acquire(qp queuePair) error {
@@ -87,7 +88,7 @@ func (q *queue) Release(wb *leveldb.Batch, qp queuePair) {
// Retry processing this item at a later time.
func (q *queue) Retry(wb *leveldb.Batch, qp queuePair, delay time.Duration) error {
wb.Delete(activeQueueKey(qp.key))
- if err := q.Add(wb, qp.URL, qp.Depth, time.Now().Add(delay)); err != nil {
+ if err := q.Add(wb, qp.URL, qp.Tag, qp.Depth, time.Now().Add(delay)); err != nil {
return err
}
atomic.AddInt32(&q.numActive, -1)