From af2228cf3f3c50dab7f4705e09f546a53f2ee999 Mon Sep 17 00:00:00 2001 From: Jordan Date: Wed, 2 Dec 2020 20:56:20 -0700 Subject: initial commit --- pouch.sh | 47 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) create mode 100755 pouch.sh (limited to 'pouch.sh') diff --git a/pouch.sh b/pouch.sh new file mode 100755 index 0000000..500a3a5 --- /dev/null +++ b/pouch.sh @@ -0,0 +1,47 @@ +#!/usr/bin/env bash + +PROGRAM="${0##*/}" + +cmd_usage() { + cat >&2 <<-_EOF + Usage: $PROGRAM HTML_FILE_PATH + + HTML_FILE is a pocket-exported document containing the set of URLs to save + + Dependencies: + + google-chrome 59+ (headless mode support) + _EOF +} + +get_urls() { + sed -n 's/.*href="\([^"]*\).*/\1/p' $1 +} + +url_to_filename() { + echo "${1##*//}" | # remove protocol (https://...) + sed 's/\/$//' | # remove trailing slash + tr /. - | # replace /. characters with - + tr -cd '[[:alnum:]-]' # remove non-alphanumeric/"-" chars +} + +save() { + read url + pdfname=$(url_to_filename $url).pdf + + if [[ ! -f ./$pdfname ]]; then + echo [+] $url... + google-chrome --headless --disable-gpu --print-to-pdf=$pdfname $url + fi +} + +if [[ $# -eq 1 && ( $1 == --help || $1 == -h || $1 == help ) ]]; then + cmd_usage +elif [[ $# -eq 1 ]]; then + get_urls $1 | while read line ; do save $line ; done +else + cmd_usage + exit 1 +fi + +exit 0 -- cgit v1.2.3-54-g00ecf