diff options
author | Jordan <me@jordan.im> | 2020-12-24 10:34:56 -0700 |
---|---|---|
committer | Jordan <me@jordan.im> | 2020-12-24 10:34:56 -0700 |
commit | a5efd75f7954a64d2b8e1f3d4ecf6434774f6af3 (patch) | |
tree | 9df3a8382a35c72b7dd80b0da32704bb7c5d83ae | |
parent | af2228cf3f3c50dab7f4705e09f546a53f2ee999 (diff) | |
download | pouch-a5efd75f7954a64d2b8e1f3d4ecf6434774f6af3.tar.gz pouch-a5efd75f7954a64d2b8e1f3d4ecf6434774f6af3.zip |
use curl for PDF downloads
-rw-r--r-- | README | 5 | ||||
-rwxr-xr-x | pouch.sh | 26 |
2 files changed, 22 insertions, 9 deletions
@@ -1,7 +1,8 @@ Usage: pouch.sh HTML_FILE_PATH - HTML_FILE is a pocket-exported document containing the set of URLs to save + HTML_FILE is a pocket-exported document containing the set of URLs to save Dependencies: - google-chrome 59+ (headless mode support) + google-chrome 59+ (headless mode support) + curl @@ -1,17 +1,19 @@ #!/usr/bin/env bash PROGRAM="${0##*/}" +AGENT="Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:84.0) Gecko/20100101 Firefox/84.0" cmd_usage() { cat >&2 <<-_EOF - Usage: $PROGRAM HTML_FILE_PATH +Usage: $PROGRAM HTML_FILE_PATH - HTML_FILE is a pocket-exported document containing the set of URLs to save + HTML_FILE is a pocket-exported document containing the set of URLs to save - Dependencies: +Dependencies: - google-chrome 59+ (headless mode support) - _EOF + google-chrome 59+ (headless mode support) + curl +_EOF } get_urls() { @@ -27,12 +29,22 @@ url_to_filename() { save() { read url + + if [[ -f "$(url_to_filename $url).pdf" ]]; then + return 0 + fi + + content_type=$(curl -s -I -A "$AGENT" "$url" | grep --ignore-case '^content-type:' | cut -d' ' -f2) pdfname=$(url_to_filename $url).pdf - if [[ ! -f ./$pdfname ]]; then + if [[ $content_type == *"application/pdf"* ]]; then echo [+] $url... - google-chrome --headless --disable-gpu --print-to-pdf=$pdfname $url + curl -A "$AGENT" "$url" -o "$pdfname" + else + echo [+] $url... + google-chrome --headless --disable-gpu --print-to-pdf="$pdfname" "$url" fi + } if [[ $# -eq 1 && ( $1 == --help || $1 == -h || $1 == help ) ]]; then |