aboutsummaryrefslogtreecommitdiff
path: root/pouch.sh
diff options
context:
space:
mode:
authorJordan <me@jordan.im>2020-12-24 10:34:56 -0700
committerJordan <me@jordan.im>2020-12-24 10:34:56 -0700
commita5efd75f7954a64d2b8e1f3d4ecf6434774f6af3 (patch)
tree9df3a8382a35c72b7dd80b0da32704bb7c5d83ae /pouch.sh
parentaf2228cf3f3c50dab7f4705e09f546a53f2ee999 (diff)
downloadpouch-a5efd75f7954a64d2b8e1f3d4ecf6434774f6af3.tar.gz
pouch-a5efd75f7954a64d2b8e1f3d4ecf6434774f6af3.zip
use curl for PDF downloads
Diffstat (limited to 'pouch.sh')
-rwxr-xr-xpouch.sh26
1 files changed, 19 insertions, 7 deletions
diff --git a/pouch.sh b/pouch.sh
index 500a3a5..341e846 100755
--- a/pouch.sh
+++ b/pouch.sh
@@ -1,17 +1,19 @@
#!/usr/bin/env bash
PROGRAM="${0##*/}"
+AGENT="Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:84.0) Gecko/20100101 Firefox/84.0"
cmd_usage() {
cat >&2 <<-_EOF
- Usage: $PROGRAM HTML_FILE_PATH
+Usage: $PROGRAM HTML_FILE_PATH
- HTML_FILE is a pocket-exported document containing the set of URLs to save
+ HTML_FILE is a pocket-exported document containing the set of URLs to save
- Dependencies:
+Dependencies:
- google-chrome 59+ (headless mode support)
- _EOF
+ google-chrome 59+ (headless mode support)
+ curl
+_EOF
}
get_urls() {
@@ -27,12 +29,22 @@ url_to_filename() {
save() {
read url
+
+ if [[ -f "$(url_to_filename $url).pdf" ]]; then
+ return 0
+ fi
+
+ content_type=$(curl -s -I -A "$AGENT" "$url" | grep --ignore-case '^content-type:' | cut -d' ' -f2)
pdfname=$(url_to_filename $url).pdf
- if [[ ! -f ./$pdfname ]]; then
+ if [[ $content_type == *"application/pdf"* ]]; then
echo [+] $url...
- google-chrome --headless --disable-gpu --print-to-pdf=$pdfname $url
+ curl -A "$AGENT" "$url" -o "$pdfname"
+ else
+ echo [+] $url...
+ google-chrome --headless --disable-gpu --print-to-pdf="$pdfname" "$url"
fi
+
}
if [[ $# -eq 1 && ( $1 == --help || $1 == -h || $1 == help ) ]]; then