aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJordan <me@jordan.im>2020-12-24 10:34:56 -0700
committerJordan <me@jordan.im>2020-12-24 10:34:56 -0700
commita5efd75f7954a64d2b8e1f3d4ecf6434774f6af3 (patch)
tree9df3a8382a35c72b7dd80b0da32704bb7c5d83ae
parentaf2228cf3f3c50dab7f4705e09f546a53f2ee999 (diff)
downloadpouch-a5efd75f7954a64d2b8e1f3d4ecf6434774f6af3.tar.gz
pouch-a5efd75f7954a64d2b8e1f3d4ecf6434774f6af3.zip
use curl for PDF downloads
-rw-r--r--README5
-rwxr-xr-xpouch.sh26
2 files changed, 22 insertions, 9 deletions
diff --git a/README b/README
index 39dac0f..059684b 100644
--- a/README
+++ b/README
@@ -1,7 +1,8 @@
Usage: pouch.sh HTML_FILE_PATH
- HTML_FILE is a pocket-exported document containing the set of URLs to save
+ HTML_FILE is a pocket-exported document containing the set of URLs to save
Dependencies:
- google-chrome 59+ (headless mode support)
+ google-chrome 59+ (headless mode support)
+ curl
diff --git a/pouch.sh b/pouch.sh
index 500a3a5..341e846 100755
--- a/pouch.sh
+++ b/pouch.sh
@@ -1,17 +1,19 @@
#!/usr/bin/env bash
PROGRAM="${0##*/}"
+AGENT="Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:84.0) Gecko/20100101 Firefox/84.0"
cmd_usage() {
cat >&2 <<-_EOF
- Usage: $PROGRAM HTML_FILE_PATH
+Usage: $PROGRAM HTML_FILE_PATH
- HTML_FILE is a pocket-exported document containing the set of URLs to save
+ HTML_FILE is a pocket-exported document containing the set of URLs to save
- Dependencies:
+Dependencies:
- google-chrome 59+ (headless mode support)
- _EOF
+ google-chrome 59+ (headless mode support)
+ curl
+_EOF
}
get_urls() {
@@ -27,12 +29,22 @@ url_to_filename() {
save() {
read url
+
+ if [[ -f "$(url_to_filename $url).pdf" ]]; then
+ return 0
+ fi
+
+ content_type=$(curl -s -I -A "$AGENT" "$url" | grep --ignore-case '^content-type:' | cut -d' ' -f2)
pdfname=$(url_to_filename $url).pdf
- if [[ ! -f ./$pdfname ]]; then
+ if [[ $content_type == *"application/pdf"* ]]; then
echo [+] $url...
- google-chrome --headless --disable-gpu --print-to-pdf=$pdfname $url
+ curl -A "$AGENT" "$url" -o "$pdfname"
+ else
+ echo [+] $url...
+ google-chrome --headless --disable-gpu --print-to-pdf="$pdfname" "$url"
fi
+
}
if [[ $# -eq 1 && ( $1 == --help || $1 == -h || $1 == help ) ]]; then