From af2228cf3f3c50dab7f4705e09f546a53f2ee999 Mon Sep 17 00:00:00 2001 From: Jordan Date: Wed, 2 Dec 2020 20:56:20 -0700 Subject: initial commit --- README | 7 +++++++ pouch.sh | 47 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 54 insertions(+) create mode 100644 README create mode 100755 pouch.sh diff --git a/README b/README new file mode 100644 index 0000000..39dac0f --- /dev/null +++ b/README @@ -0,0 +1,7 @@ +Usage: pouch.sh HTML_FILE_PATH + + HTML_FILE is a pocket-exported document containing the set of URLs to save + +Dependencies: + + google-chrome 59+ (headless mode support) diff --git a/pouch.sh b/pouch.sh new file mode 100755 index 0000000..500a3a5 --- /dev/null +++ b/pouch.sh @@ -0,0 +1,47 @@ +#!/usr/bin/env bash + +PROGRAM="${0##*/}" + +cmd_usage() { + cat >&2 <<-_EOF + Usage: $PROGRAM HTML_FILE_PATH + + HTML_FILE is a pocket-exported document containing the set of URLs to save + + Dependencies: + + google-chrome 59+ (headless mode support) + _EOF +} + +get_urls() { + sed -n 's/.*href="\([^"]*\).*/\1/p' $1 +} + +url_to_filename() { + echo "${1##*//}" | # remove protocol (https://...) + sed 's/\/$//' | # remove trailing slash + tr /. - | # replace /. characters with - + tr -cd '[[:alnum:]-]' # remove non-alphanumeric/"-" chars +} + +save() { + read url + pdfname=$(url_to_filename $url).pdf + + if [[ ! -f ./$pdfname ]]; then + echo [+] $url... + google-chrome --headless --disable-gpu --print-to-pdf=$pdfname $url + fi +} + +if [[ $# -eq 1 && ( $1 == --help || $1 == -h || $1 == help ) ]]; then + cmd_usage +elif [[ $# -eq 1 ]]; then + get_urls $1 | while read line ; do save $line ; done +else + cmd_usage + exit 1 +fi + +exit 0 -- cgit v1.2.3-54-g00ecf