aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJordan <me@jordan.im>2020-12-02 20:56:20 -0700
committerJordan <me@jordan.im>2020-12-02 20:56:20 -0700
commitaf2228cf3f3c50dab7f4705e09f546a53f2ee999 (patch)
treedbbef17ad96f4bd353e2e3fee5db08d1e23db13b
downloadpouch-af2228cf3f3c50dab7f4705e09f546a53f2ee999.tar.gz
pouch-af2228cf3f3c50dab7f4705e09f546a53f2ee999.zip
initial commit
-rw-r--r--README7
-rwxr-xr-xpouch.sh47
2 files changed, 54 insertions, 0 deletions
diff --git a/README b/README
new file mode 100644
index 0000000..39dac0f
--- /dev/null
+++ b/README
@@ -0,0 +1,7 @@
+Usage: pouch.sh HTML_FILE_PATH
+
+ HTML_FILE is a pocket-exported document containing the set of URLs to save
+
+Dependencies:
+
+ google-chrome 59+ (headless mode support)
diff --git a/pouch.sh b/pouch.sh
new file mode 100755
index 0000000..500a3a5
--- /dev/null
+++ b/pouch.sh
@@ -0,0 +1,47 @@
+#!/usr/bin/env bash
+
+PROGRAM="${0##*/}"
+
+cmd_usage() {
+ cat >&2 <<-_EOF
+ Usage: $PROGRAM HTML_FILE_PATH
+
+ HTML_FILE is a pocket-exported document containing the set of URLs to save
+
+ Dependencies:
+
+ google-chrome 59+ (headless mode support)
+ _EOF
+}
+
+get_urls() {
+ sed -n 's/.*href="\([^"]*\).*/\1/p' $1
+}
+
+url_to_filename() {
+ echo "${1##*//}" | # remove protocol (https://...)
+ sed 's/\/$//' | # remove trailing slash
+ tr /. - | # replace /. characters with -
+ tr -cd '[[:alnum:]-]' # remove non-alphanumeric/"-" chars
+}
+
+save() {
+ read url
+ pdfname=$(url_to_filename $url).pdf
+
+ if [[ ! -f ./$pdfname ]]; then
+ echo [+] $url...
+ google-chrome --headless --disable-gpu --print-to-pdf=$pdfname $url
+ fi
+}
+
+if [[ $# -eq 1 && ( $1 == --help || $1 == -h || $1 == help ) ]]; then
+ cmd_usage
+elif [[ $# -eq 1 ]]; then
+ get_urls $1 | while read line ; do save $line ; done
+else
+ cmd_usage
+ exit 1
+fi
+
+exit 0