aboutsummaryrefslogtreecommitdiff
path: root/koya
blob: 04f85d4e80018c6d18fd67f5e8ee6f366871cd4b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
#!/usr/bin/env python3
"""koya

Usage:
  koya --token <token> user1 user2...

Options:
  -h, --help       show this help message and exit
  --origin ORIGIN  sourcehut origin URL (default: https://git.sr.ht/)
  --token TOKEN    account oauth access token (default: None)
  --delay DELAY    delay between origin requests (seconds) (default: 1)
"""

import argparse
import json
import logging
import os
import subprocess
import sys
import time
from pathlib import Path
from urllib.parse import urljoin, urlparse, quote_plus

import requests

logging.basicConfig(
    stream=sys.stdout, format='%(asctime)s %(message)s',
    datefmt='%m/%d/%Y %H:%M:%S')
log = logging.getLogger('koya')
log.setLevel(logging.DEBUG)

class Koya:
    def __init__(self, origin, token, delay):
        self.origin = origin
        self.token = token
        self.delay = delay

        self.session = requests.Session()
        self.session.headers.update({"User-Agent": "koya/1.0 git.jordan.im/koya"})

    def list_repos(self, user):
        headers = {
            "Content-Type": "application/json",
            "Authorization": f"Bearer {self.token}"}
        repos = []
        cursor_obj = ""
        while True:
            graphql = f"""
                query {{
                  user(username: "{user}") {{
                    repositories({cursor_obj}) {{
                      cursor
                      results {{
                        name
                      }}
                    }}
                  }}
                }}"""
            try:
                u = urljoin(self.origin, "/query")
                r = self.session.post(u, headers=headers, json={"query": graphql})
                r.raise_for_status()
            except Exception as err:
                log.error(err)
                return

            res = json.loads(r.text)
            if not res["data"].get("user"):
                return repos

            data = res["data"]["user"]["repositories"]
            if len(data["results"]):
                log.info(f"Discovered {len(data['results'])} repositories...")
                repos.extend(data["results"])

            cursor = data.get("cursor")
            if cursor:
                cursor_obj = f'''cursor: "{cursor}"'''
            else:
                return repos

            time.sleep(self.delay)

def multi_urljoin(*parts):
    ret = urljoin(parts[0], "/".join(
        quote_plus(part.strip("/"), safe="/") for part in parts[1:]))
    return ret

if __name__ == "__main__":
    DESC = "koya: archive git repositories from sourcehut user accounts"
    parser = argparse.ArgumentParser(
        description=DESC,
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument(
        "users", nargs="+", default=os.getcwd(), help="username(s) to archive")
    parser.add_argument(
        "--origin", dest="origin", type=str, action="store",
        default="https://git.sr.ht/", required=False,
        help="sourcehut origin URL")
    parser.add_argument(
        "--token", dest="token", type=str, action="store",
        help="account oauth access token", required=True)
    parser.add_argument(
        "--delay", dest="delay", type=int, action="store", default=1,
        help="delay between origin requests (seconds)")
    args=parser.parse_args()

    host = urlparse(args.origin).hostname
    if not host:
        log.error("Unable to parse hostname from origin, exiting...")
        sys.exit()

    koya = Koya(args.origin, args.token, args.delay)
    for user in args.users:
        repos = koya.list_repos(user)
        if not repos:
            log.error(f"No repositories discovered, skipping {user}...")
            continue

        for repo in repos:
            repo_path = os.path.join(host, user, repo["name"])
            if os.path.exists(repo_path):
                fetch = ["git", "fetch", "--all", "--force", "--tags", "--prune"]
                subprocess.run(fetch, cwd=repo_path)
            else:
                Path(repo_path).mkdir(parents=True, exist_ok=True)

                remote_url = multi_urljoin(args.origin, "~"+user, repo["name"])
                clone = ["git", "clone", remote_url, repo_path]
                subprocess.run(clone)

            time.sleep(args.delay)