From 51aa333b4f5b6b2ba3929a53cac8b38aea58c5c8 Mon Sep 17 00:00:00 2001 From: Jordan Date: Tue, 28 Dec 2021 14:02:29 -0700 Subject: add ia-upload-warc --- scripts/ia-upload-warc | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100755 scripts/ia-upload-warc diff --git a/scripts/ia-upload-warc b/scripts/ia-upload-warc new file mode 100755 index 0000000..789d1a8 --- /dev/null +++ b/scripts/ia-upload-warc @@ -0,0 +1,8 @@ +#!/usr/bin/env bash + +title="$(echo $1 | sed 's/\(-[0-9][0-9][0-9][0-9]-[0-9]\+-[0-9]\+.*\)//g')" +date="$(echo $1 | sed -n 's/.*\([0-9][0-9][0-9][0-9]-[0-9]\+-[0-9]\+\).*/\1/p')" +id=$title-$date +start_url="$(cat $1/start_url)" + +ia upload $id $1/*.gz $1/*.cdx --metadata="mediatype:web" --metadata="date:$date" --metadata="title:$title" --metadata="description:recursive crawl of $start_url using grab-site/wpull, taken on $date" --metadata="source:$title" -- cgit v1.2.3-54-g00ecf