blob: 500a3a506fa22b79f67d6ccce6c7e550f0f26a44 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
|
#!/usr/bin/env bash
PROGRAM="${0##*/}"
cmd_usage() {
cat >&2 <<-_EOF
Usage: $PROGRAM HTML_FILE_PATH
HTML_FILE is a pocket-exported document containing the set of URLs to save
Dependencies:
google-chrome 59+ (headless mode support)
_EOF
}
get_urls() {
sed -n 's/.*href="\([^"]*\).*/\1/p' $1
}
url_to_filename() {
echo "${1##*//}" | # remove protocol (https://...)
sed 's/\/$//' | # remove trailing slash
tr /. - | # replace /. characters with -
tr -cd '[[:alnum:]-]' # remove non-alphanumeric/"-" chars
}
save() {
read url
pdfname=$(url_to_filename $url).pdf
if [[ ! -f ./$pdfname ]]; then
echo [+] $url...
google-chrome --headless --disable-gpu --print-to-pdf=$pdfname $url
fi
}
if [[ $# -eq 1 && ( $1 == --help || $1 == -h || $1 == help ) ]]; then
cmd_usage
elif [[ $# -eq 1 ]]; then
get_urls $1 | while read line ; do save $line ; done
else
cmd_usage
exit 1
fi
exit 0
|