summaryrefslogtreecommitdiff
path: root/scripts/ia-upload-warc
blob: 789d1a864c43c0265300000e8ea51038b2cdb38a (plain)
1
2
3
4
5
6
7
8
#!/usr/bin/env bash

title="$(echo $1 | sed 's/\(-[0-9][0-9][0-9][0-9]-[0-9]\+-[0-9]\+.*\)//g')"
date="$(echo $1 | sed -n 's/.*\([0-9][0-9][0-9][0-9]-[0-9]\+-[0-9]\+\).*/\1/p')"
id=$title-$date
start_url="$(cat $1/start_url)"

ia upload $id $1/*.gz $1/*.cdx --metadata="mediatype:web" --metadata="date:$date" --metadata="title:$title" --metadata="description:recursive crawl of $start_url using grab-site/wpull, taken on $date" --metadata="source:$title"