1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
|
#!/usr/bin/env python3
"""koya
Usage:
koya --token <token> user1 user2...
Options:
-h, --help show this help message and exit
--origin ORIGIN sourcehut origin URL (default: https://git.sr.ht/)
--token TOKEN account oauth access token (default: None)
--delay DELAY delay between origin requests (seconds) (default: 1)
"""
import argparse
import json
import logging
import os
import subprocess
import sys
import time
from pathlib import Path
from urllib.parse import urljoin, urlparse, quote_plus
import requests
logging.basicConfig(
stream=sys.stdout, format='%(asctime)s %(message)s',
datefmt='%m/%d/%Y %H:%M:%S')
log = logging.getLogger('koya')
log.setLevel(logging.DEBUG)
class Koya:
def __init__(self, origin, token, delay):
self.origin = origin
self.token = token
self.delay = delay
self.session = requests.Session()
self.session.headers.update({"User-Agent": "koya/1.0 git.jordan.im/koya"})
def list_repos(self, user):
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {self.token}"}
repos = []
cursor_obj = ""
while True:
graphql = f"""
query {{
user(username: "{user}") {{
repositories({cursor_obj}) {{
cursor
results {{
name
}}
}}
}}
}}"""
try:
u = urljoin(self.origin, "/query")
r = self.session.post(u, headers=headers, json={"query": graphql})
r.raise_for_status()
except Exception as err:
log.error(err)
return
res = json.loads(r.text)
if not res["data"].get("user"):
return repos
data = res["data"]["user"]["repositories"]
repos.extend(data["results"])
if len(data["results"]):
log.info(f"Discovered {len(data['results'])} repositories...")
cursor = data.get("cursor")
if cursor:
cursor_obj = f'''cursor: "{cursor}"'''
else:
return repos
time.sleep(self.delay)
def multi_urljoin(*parts):
ret = urljoin(parts[0], "/".join(
quote_plus(part.strip("/"), safe="/") for part in parts[1:]))
return ret
if __name__ == "__main__":
DESC = "koya: archive git repositories from sourcehut user accounts"
parser = argparse.ArgumentParser(
description=DESC,
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument(
"users", nargs="+", default=os.getcwd(), help="username(s) to archive")
parser.add_argument(
"--origin", dest="origin", type=str, action="store",
default="https://git.sr.ht/", required=False,
help="sourcehut origin URL")
parser.add_argument(
"--token", dest="token", type=str, action="store",
help="account oauth access token", required=True)
parser.add_argument(
"--delay", dest="delay", type=int, action="store", default=1,
help="delay between origin requests (seconds)")
args=parser.parse_args()
host = urlparse(args.origin).hostname
if not host:
log.error("Unable to parse hostname from origin, exiting...")
sys.exit()
koya = Koya(args.origin, args.token, args.delay)
for user in args.users:
repos = koya.list_repos(user)
if not repos:
log.error(f"No repositories discovered, skipping {user}...")
continue
for repo in repos:
repo_path = os.path.join(host, user, repo["name"])
if os.path.exists(repo_path):
fetch = ["git", "fetch", "--all", "--force", "--tags", "--prune"]
subprocess.run(fetch, cwd=repo_path)
else:
Path(repo_path).mkdir(parents=True, exist_ok=True)
remote_url = multi_urljoin(args.origin, "~"+user, repo["name"])
clone = ["git", "clone", remote_url, repo_path]
subprocess.run(clone)
time.sleep(args.delay)
|