aboutsummaryrefslogtreecommitdiff
path: root/archive.go
blob: 7c38c515fccf3e5ff84f1c6523a2077cf51ef538 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
package main

import (
	"encoding/json"
	"log"
	"net"
	"net/http"
	"regexp"
	"strconv"
	"time"
)

var (
	API_AVAILABILITY string = "http://archive.org/wayback/available?url="
	API_SAVE         string = "https://web.archive.org/save/"

	TIMEOUT time.Duration = 10
	client  *http.Client  = &http.Client{Timeout: TIMEOUT * time.Second}
)

type Wayback struct {
	Snapshots Snapshot `json:"archived_snapshots,omitempty"`
}

type Snapshot struct {
	Recent Closest `json:"closest"`
}

type Closest struct {
	Available bool   `json:"available"`
	Status    string `json:"status"`
}

func isIgnored(regex []string, url string) bool {

	for _, r := range regex {

		if v := regexp.MustCompile(r); v.MatchString(url) {
			return true
		}
	}
	return false
}

func isArchived(url string) (bool, int) {

	req, err := http.NewRequest("GET", API_AVAILABILITY+url, nil)
	resp, err := client.Do(req)
	if err != nil {
		log.Println("isArchived: ", err)
		return false, 0
	}
	av := &Wayback{}
	decoder := json.NewDecoder(resp.Body)
	if err := decoder.Decode(av); err != nil {
		log.Println("isArchived:", err)
		return false, 0
	}
	status, _ := strconv.Atoi(av.Snapshots.Recent.Status)
	return av.Snapshots.Recent.Available, status
}

func archive(url string) int {

	req, err := http.NewRequest("GET", API_SAVE+url, nil)
	resp, err := client.Do(req)
	if err != nil {
		if e, _ := err.(net.Error); !e.Timeout() {
			log.Println("archive:", err)
		}
		return 0
	}
	return resp.StatusCode
}