aboutsummaryrefslogtreecommitdiff
path: root/scripts
diff options
context:
space:
mode:
Diffstat (limited to 'scripts')
-rwxr-xr-xscripts/ci/ci-driver.sh12
-rwxr-xr-xscripts/git/git-list-tor-branches.sh6
-rwxr-xr-xscripts/maint/gen_ccls_file.sh11
-rw-r--r--scripts/maint/geoip/README.geoip25
-rw-r--r--scripts/maint/geoip/geoip-db-tool/.gitignore1
-rw-r--r--scripts/maint/geoip/geoip-db-tool/Cargo.lock110
-rw-r--r--scripts/maint/geoip/geoip-db-tool/Cargo.toml14
-rw-r--r--scripts/maint/geoip/geoip-db-tool/src/db.rs138
-rw-r--r--scripts/maint/geoip/geoip-db-tool/src/main.rs239
-rwxr-xr-xscripts/maint/geoip/update_and_commit_geoip.sh34
-rwxr-xr-xscripts/maint/geoip/update_geoip.sh16
11 files changed, 602 insertions, 4 deletions
diff --git a/scripts/ci/ci-driver.sh b/scripts/ci/ci-driver.sh
index e83b3d6f46..f37c009666 100755
--- a/scripts/ci/ci-driver.sh
+++ b/scripts/ci/ci-driver.sh
@@ -293,6 +293,7 @@ TOR_VERSION=$(grep -m 1 AC_INIT configure.ac | sed -e 's/.*\[//; s/\].*//;')
# Tor version. Only create the variables we need.
TOR_VER_AT_LEAST_043=no
TOR_VER_AT_LEAST_044=no
+TOR_VER_AT_LEAST_046=no
# These are the currently supported Tor versions; no need to work with anything
# ancient in this script.
@@ -309,10 +310,19 @@ case "$TOR_VERSION" in
TOR_VER_AT_LEAST_043=yes
TOR_VER_AT_LEAST_044=no
;;
- *)
+ 0.4.4.*)
TOR_VER_AT_LEAST_043=yes
TOR_VER_AT_LEAST_044=yes
;;
+ 0.4.5.*)
+ TOR_VER_AT_LEAST_043=yes
+ TOR_VER_AT_LEAST_044=yes
+ ;;
+ 0.4.6.*)
+ TOR_VER_AT_LEAST_043=yes
+ TOR_VER_AT_LEAST_044=yes
+ TOR_VER_AT_LEAST_046=yes
+ ;;
esac
#############################################################################
diff --git a/scripts/git/git-list-tor-branches.sh b/scripts/git/git-list-tor-branches.sh
index 5a527ffc05..2bcd4722b4 100755
--- a/scripts/git/git-list-tor-branches.sh
+++ b/scripts/git/git-list-tor-branches.sh
@@ -139,12 +139,12 @@ finish() {
branch maint-0.3.5
branch release-0.3.5
-branch maint-0.4.3
-branch release-0.4.3
-
branch maint-0.4.4
branch release-0.4.4
+branch maint-0.4.5
+branch release-0.4.5
+
branch master
finish
diff --git a/scripts/maint/gen_ccls_file.sh b/scripts/maint/gen_ccls_file.sh
index 899e4e9603..b1fa55c973 100755
--- a/scripts/maint/gen_ccls_file.sh
+++ b/scripts/maint/gen_ccls_file.sh
@@ -15,6 +15,17 @@ CCLS_FILE=".ccls"
PRIVATE_DEFS=$(grep -r --include \*.h "_PRIVATE" | grep "#ifdef" | cut -d' ' -f2 | sort | uniq)
echo "clang" > "$CCLS_FILE"
+
+# Add these include so the ccls server can properly check new files that are
+# not in the compile_commands.json yet
+{
+ echo "-I."
+ echo "-I./src"
+ echo "-I./src/ext"
+ echo "-I./src/ext/trunnel"
+} >> "$CCLS_FILE"
+
+# Add all defines (-D).
for p in $PRIVATE_DEFS; do
echo "-D$p" >> "$CCLS_FILE"
done
diff --git a/scripts/maint/geoip/README.geoip b/scripts/maint/geoip/README.geoip
new file mode 100644
index 0000000000..0ed94b2276
--- /dev/null
+++ b/scripts/maint/geoip/README.geoip
@@ -0,0 +1,25 @@
+To generate new geoip files, you'll need to install the
+libloc/"location" tool provided by https://location.ipfire.org/.
+I personally build it with:
+
+ ./configure CFLAGS='-g -O2' --disable-perl --without-systemd --prefix=/opt/libloc
+ make
+ make install
+
+Then (after adjusting PATH and PYTHONPATH) you can get the latest
+dump with:
+
+ location update
+ location dump geoip-dump.txt
+
+And transform it into geoip files with
+
+ cargo run --release -- -i geoip-dump.txt
+
+
+==============================
+
+Note that the current version "0.1.9" of rangemap has a performance
+bug, making this tool quite slow. Previous versions had a
+correctness bug that made the output needlessly long. With luck,
+there will soon be a fast correct rangemap version. \ No newline at end of file
diff --git a/scripts/maint/geoip/geoip-db-tool/.gitignore b/scripts/maint/geoip/geoip-db-tool/.gitignore
new file mode 100644
index 0000000000..eb5a316cbd
--- /dev/null
+++ b/scripts/maint/geoip/geoip-db-tool/.gitignore
@@ -0,0 +1 @@
+target
diff --git a/scripts/maint/geoip/geoip-db-tool/Cargo.lock b/scripts/maint/geoip/geoip-db-tool/Cargo.lock
new file mode 100644
index 0000000000..ba610d4fc3
--- /dev/null
+++ b/scripts/maint/geoip/geoip-db-tool/Cargo.lock
@@ -0,0 +1,110 @@
+# This file is automatically @generated by Cargo.
+# It is not intended for manual editing.
+[[package]]
+name = "argh"
+version = "0.1.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "91792f088f87cdc7a2cfb1d617fa5ea18d7f1dc22ef0e1b5f82f3157cdc522be"
+dependencies = [
+ "argh_derive",
+ "argh_shared",
+]
+
+[[package]]
+name = "argh_derive"
+version = "0.1.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c4eb0c0c120ad477412dc95a4ce31e38f2113e46bd13511253f79196ca68b067"
+dependencies = [
+ "argh_shared",
+ "heck",
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "argh_shared"
+version = "0.1.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "781f336cc9826dbaddb9754cb5db61e64cab4f69668bd19dcc4a0394a86f4cb1"
+
+[[package]]
+name = "geoip-db-tool"
+version = "0.1.0"
+dependencies = [
+ "argh",
+ "ipnetwork",
+ "rangemap",
+]
+
+[[package]]
+name = "heck"
+version = "0.3.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "87cbf45460356b7deeb5e3415b5563308c0a9b057c85e12b06ad551f98d0a6ac"
+dependencies = [
+ "unicode-segmentation",
+]
+
+[[package]]
+name = "ipnetwork"
+version = "0.17.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "02c3eaab3ac0ede60ffa41add21970a7df7d91772c03383aac6c2c3d53cc716b"
+dependencies = [
+ "serde",
+]
+
+[[package]]
+name = "proc-macro2"
+version = "1.0.24"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1e0704ee1a7e00d7bb417d0770ea303c1bccbabf0ef1667dae92b5967f5f8a71"
+dependencies = [
+ "unicode-xid",
+]
+
+[[package]]
+name = "quote"
+version = "1.0.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c3d0b9745dc2debf507c8422de05d7226cc1f0644216dfdfead988f9b1ab32a7"
+dependencies = [
+ "proc-macro2",
+]
+
+[[package]]
+name = "rangemap"
+version = "0.1.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "90531bef860f96441c4cb74a1e43c281cd1366143928f944546ef0b1c60392b0"
+
+[[package]]
+name = "serde"
+version = "1.0.123"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "92d5161132722baa40d802cc70b15262b98258453e85e5d1d365c757c73869ae"
+
+[[package]]
+name = "syn"
+version = "1.0.60"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c700597eca8a5a762beb35753ef6b94df201c81cca676604f547495a0d7f0081"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "unicode-xid",
+]
+
+[[package]]
+name = "unicode-segmentation"
+version = "1.7.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bb0d2e7be6ae3a5fa87eed5fb451aff96f2573d2694942e40543ae0bbe19c796"
+
+[[package]]
+name = "unicode-xid"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f7fe0bb3479651439c9112f72b6c505038574c9fbb575ed1bf3b797fa39dd564"
diff --git a/scripts/maint/geoip/geoip-db-tool/Cargo.toml b/scripts/maint/geoip/geoip-db-tool/Cargo.toml
new file mode 100644
index 0000000000..8c794ca058
--- /dev/null
+++ b/scripts/maint/geoip/geoip-db-tool/Cargo.toml
@@ -0,0 +1,14 @@
+[package]
+name = "geoip-db-tool"
+version = "0.1.0"
+authors = ["Nick Mathewson <nickm@torproject.org>"]
+edition = "2018"
+license = "MIT OR Apache-2.0"
+publish = false
+
+# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+
+[dependencies]
+ipnetwork= "0.17.0"
+rangemap = "0.1.10"
+argh = "0.1.4"
diff --git a/scripts/maint/geoip/geoip-db-tool/src/db.rs b/scripts/maint/geoip/geoip-db-tool/src/db.rs
new file mode 100644
index 0000000000..316182d823
--- /dev/null
+++ b/scripts/maint/geoip/geoip-db-tool/src/db.rs
@@ -0,0 +1,138 @@
+/// Code to parse a dump file
+use std::collections::HashMap;
+use std::convert::TryInto;
+use std::iter::Peekable;
+
+use super::{AsBlock, NetBlock};
+
+pub struct BlockReader<I>
+where
+ I: Iterator<Item = std::io::Result<String>>,
+{
+ iter: Peekable<I>,
+}
+
+pub enum AnyBlock {
+ NetBlock(NetBlock),
+ AsBlock(AsBlock),
+ OtherBlock,
+}
+
+impl<I> BlockReader<I>
+where
+ I: Iterator<Item = std::io::Result<String>>,
+{
+ pub fn new(iter: I) -> Self {
+ BlockReader {
+ iter: iter.peekable(),
+ }
+ }
+
+ /// Extract the initial header from the file.
+ pub fn extract_header(&mut self) -> String {
+ let mut res: String = "".to_string();
+
+ while let Some(Ok(line)) = self.iter.peek() {
+ if !line.starts_with('#') {
+ break;
+ }
+ res.push_str(line.as_str());
+ res.push('\n');
+ let _ = self.iter.next();
+ }
+
+ res
+ }
+
+ /// Extract the next empty-line-delimited block from the file.
+ ///
+ /// This isn't terribly efficient, but it's "fast enough".
+ fn get_block(&mut self) -> Option<std::io::Result<AnyBlock>> {
+ let mut kv = HashMap::new();
+
+ while let Some(line) = self.iter.next() {
+ //dbg!(&line);
+ if let Err(e) = line {
+ return Some(Err(e));
+ }
+ let line_orig = line.unwrap();
+ let line = line_orig.splitn(2, '#').next().unwrap().trim();
+ if line.is_empty() {
+ if kv.is_empty() {
+ continue;
+ } else {
+ break;
+ }
+ }
+ let kwds: Vec<_> = line.splitn(2, ':').collect();
+ if kwds.len() != 2 {
+ return None; // XXXX handle the error better.
+ }
+ kv.insert(kwds[0].trim().to_string(), kwds[1].trim().to_string());
+ }
+
+ if kv.is_empty() {
+ return None;
+ }
+
+ if let Some(name) = kv.remove("name") {
+ // This is an AS block.
+ let asn = kv.get("aut-num").unwrap(); // XXXX handle error better
+ assert!(asn.starts_with("AS"));
+ let asn = asn[2..].parse().unwrap();
+ return Some(Ok(AnyBlock::AsBlock(AsBlock { name, asn })));
+ }
+
+ let net = if let Some(net) = kv.get("net") {
+ net.parse().unwrap() //XXXX handle the error better.
+ } else {
+ return Some(Ok(AnyBlock::OtherBlock));
+ };
+
+ let asn = if let Some(asn) = kv.get("aut-num") {
+ asn.parse().ok()
+ } else {
+ None
+ };
+
+ let cc = if let Some(country) = kv.get("country") {
+ assert!(country.as_bytes().len() == 2);
+ country.as_bytes()[0..2].try_into().unwrap()
+ } else {
+ *b"??"
+ };
+
+ fn is_true(v: Option<&String>) -> bool {
+ match v {
+ Some(s) => s == "true",
+ None => false,
+ }
+ }
+
+ let is_anon_proxy = is_true(kv.get("is-anonymous-proxy"));
+ let is_anycast = is_true(kv.get("is-anycast-proxy"));
+ let is_satellite = is_true(kv.get("is-satellite-provider"));
+
+ Some(Ok(AnyBlock::NetBlock(NetBlock {
+ net,
+ asn,
+ cc,
+ is_anon_proxy,
+ is_anycast,
+ is_satellite,
+ })))
+ }
+}
+
+impl<I> Iterator for BlockReader<I>
+where
+ I: Iterator<Item = std::io::Result<String>>,
+{
+ type Item = AnyBlock;
+ fn next(&mut self) -> Option<Self::Item> {
+ match self.get_block() {
+ Some(Ok(b)) => Some(b),
+ _ => None,
+ }
+ }
+}
diff --git a/scripts/maint/geoip/geoip-db-tool/src/main.rs b/scripts/maint/geoip/geoip-db-tool/src/main.rs
new file mode 100644
index 0000000000..9a22598a35
--- /dev/null
+++ b/scripts/maint/geoip/geoip-db-tool/src/main.rs
@@ -0,0 +1,239 @@
+/// A basic tool to convert IPFire Location dumps into the CSV formats that Tor
+/// expects.
+mod db;
+
+use argh::FromArgs;
+use ipnetwork::IpNetwork;
+use rangemap::RangeInclusiveMap;
+
+use std::fs::File;
+use std::io::{BufRead, BufReader, BufWriter, Write};
+use std::net::{IpAddr, Ipv6Addr};
+use std::num::NonZeroU32;
+use std::path::PathBuf;
+
+fn default_ipv4_path() -> PathBuf {
+ "./geoip".into()
+}
+fn default_ipv6_path() -> PathBuf {
+ "./geoip6".into()
+}
+
+#[derive(FromArgs)]
+/// Convert an IPFire Location dump into CSV geoip files.
+struct Args {
+ /// where to store the IPv4 geoip output
+ #[argh(option, default = "default_ipv4_path()", short = '4')]
+ output_ipv4: PathBuf,
+
+ /// where to store the IPv6 geoip6 output
+ #[argh(option, default = "default_ipv6_path()", short = '6')]
+ output_ipv6: PathBuf,
+
+ /// where to find the dump file
+ #[argh(option, short = 'i')]
+ input: PathBuf,
+
+ /// whether to include AS information in our output
+ #[argh(switch)]
+ include_asn: bool,
+
+ /// where to store the AS map.
+ #[argh(option)]
+ output_asn: Option<PathBuf>,
+}
+
+/// Represents a network block from running `location dump`.
+#[derive(Debug, Clone)]
+pub struct NetBlock {
+ pub net: IpNetwork,
+ pub cc: [u8; 2],
+ pub asn: Option<NonZeroU32>,
+ pub is_anon_proxy: bool,
+ pub is_anycast: bool,
+ pub is_satellite: bool,
+}
+
+/// Represents an AS definition from running `location dump`.
+#[derive(Debug, Clone, Ord, PartialOrd, Eq, PartialEq)]
+pub struct AsBlock {
+ pub asn: NonZeroU32,
+ pub name: String,
+}
+
+impl PartialEq for NetBlock {
+ fn eq(&self, other: &Self) -> bool {
+ self.net == other.net
+ }
+}
+
+/// We define network blocks as being sorted first from largest to smallest,
+/// then by address.
+impl Ord for NetBlock {
+ fn cmp(&self, other: &Self) -> std::cmp::Ordering {
+ self.net
+ .prefix()
+ .cmp(&other.net.prefix())
+ .then_with(|| self.net.network().cmp(&other.net.network()))
+ }
+}
+
+impl PartialOrd for NetBlock {
+ fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
+ Some(self.cmp(other))
+ }
+}
+
+impl Eq for NetBlock {}
+
+#[derive(Copy, Clone, Eq, PartialEq, Debug)]
+struct NetDefn {
+ cc: [u8; 2],
+ asn: Option<NonZeroU32>,
+}
+
+impl NetBlock {
+ fn into_defn(self, include_asn: bool) -> NetDefn {
+ if include_asn {
+ NetDefn {
+ cc: self.cc,
+ asn: self.asn,
+ }
+ } else {
+ NetDefn {
+ cc: self.cc,
+ asn: None,
+ }
+ }
+ }
+}
+
+impl NetDefn {
+ fn cc(&self) -> &str {
+ std::str::from_utf8(&self.cc).unwrap()
+ }
+ fn asn(&self) -> u32 {
+ match self.asn {
+ Some(v) => v.into(),
+ None => 0,
+ }
+ }
+}
+
+const PROLOGUE: &str = "\
+# This file has been converted from the IPFire Location database
+# using Tor's geoip-db-tool. For more information on the data, see
+# https://location.ipfire.org/.
+#
+# Below is the header from the original export:
+#
+";
+
+/// Read an input file in the `location dump` format, and write CSV ipv4 and ipv6 files.
+///
+/// This code tries to be "efficient enough"; most of the logic is handled by
+/// using the rangemap crate.
+fn convert(args: Args) -> std::io::Result<()> {
+ let input = args.input.as_path();
+ let output_v4 = args.output_ipv4.as_path();
+ let output_v6 = args.output_ipv6.as_path();
+ let include_asn = args.include_asn;
+
+ let f = File::open(input)?;
+ let f = BufReader::new(f);
+ let mut blocks = Vec::new();
+ let mut networks = Vec::new();
+
+ let mut reader = db::BlockReader::new(f.lines());
+ let hdr = reader.extract_header();
+ // Read blocks, and then sort them by specificity and address.
+ for nb in reader {
+ match nb {
+ db::AnyBlock::AsBlock(a) => networks.push(a),
+ db::AnyBlock::NetBlock(n) => blocks.push(n),
+ _ => {}
+ }
+ }
+ blocks.sort();
+
+ // Convert the sorted blocks into a map from address ranges into
+ // country codes.
+ //
+ // Note that since we have sorted the blocks from least to most specific,
+ // we will be puttting them into the maps in the right order, so that the
+ // most specific rule "wins".
+ //
+ // We use u32 and u128 as the index types for these RangeInclusiveMaps,
+ // so that we don't need to implement a step function for IpAddr.
+ let mut v4map: RangeInclusiveMap<u32, NetDefn, _> = RangeInclusiveMap::new();
+ let mut v6map: RangeInclusiveMap<u128, NetDefn, _> = RangeInclusiveMap::new();
+
+ let mut n = 0usize;
+ let num_blocks = blocks.len();
+ for nb in blocks {
+ n += 1;
+ if n % 100000 == 0 {
+ println!("{}/{}", n, num_blocks);
+ }
+ let start = nb.net.network();
+ let end = nb.net.broadcast();
+ match (start, end) {
+ (IpAddr::V4(a), IpAddr::V4(b)) => {
+ v4map.insert(a.into()..=b.into(), nb.into_defn(include_asn));
+ }
+ (IpAddr::V6(a), IpAddr::V6(b)) => {
+ v6map.insert(a.into()..=b.into(), nb.into_defn(include_asn));
+ }
+ (_, _) => panic!("network started and ended in different families!?"),
+ }
+ }
+
+ // Write the ranges out to the appropriate files, in order.
+ let mut v4 = BufWriter::new(File::create(output_v4)?);
+ let mut v6 = BufWriter::new(File::create(output_v6)?);
+
+ v4.write_all(PROLOGUE.as_bytes())?;
+ v4.write_all(hdr.as_bytes())?;
+ for (r, defn) in v4map.iter() {
+ let a: u32 = *r.start();
+ let b: u32 = *r.end();
+ if include_asn {
+ writeln!(&mut v4, "{},{},{},{}", a, b, defn.cc(), defn.asn())?;
+ } else {
+ writeln!(&mut v4, "{},{},{}", a, b, defn.cc())?;
+ }
+ }
+
+ v6.write_all(PROLOGUE.as_bytes())?;
+ v6.write_all(hdr.as_bytes())?;
+ for (r, defn) in v6map.iter() {
+ let a: Ipv6Addr = (*r.start()).into();
+ let b: Ipv6Addr = (*r.end()).into();
+ if include_asn {
+ writeln!(&mut v6, "{},{},{},{}", a, b, defn.cc(), defn.asn())?;
+ } else {
+ writeln!(&mut v6, "{},{},{}", a, b, defn.cc())?;
+ }
+ }
+
+ // The documentation says you should always flush a BufWriter.
+ v4.flush()?;
+ v6.flush()?;
+
+ if let Some(output_asn) = args.output_asn {
+ networks.sort();
+ let mut asn = BufWriter::new(File::create(output_asn)?);
+ for net in networks {
+ writeln!(&mut asn, "{},{}", net.asn, net.name)?;
+ }
+ asn.flush()?;
+ }
+
+ Ok(())
+}
+
+fn main() -> std::io::Result<()> {
+ let args: Args = argh::from_env();
+
+ convert(args)
+}
diff --git a/scripts/maint/geoip/update_and_commit_geoip.sh b/scripts/maint/geoip/update_and_commit_geoip.sh
new file mode 100755
index 0000000000..a83f82433e
--- /dev/null
+++ b/scripts/maint/geoip/update_and_commit_geoip.sh
@@ -0,0 +1,34 @@
+#!/bin/sh
+
+set -e
+
+CMDDIR=$(cd "$(dirname "$0")" && pwd)
+
+if [ ! -e "./src/config/geoip" ] ; then
+ echo "Run this from inside the root dir of your oldest LTS repository"
+ exit 1
+fi
+
+if [ -n "$(git status --untracked-files=no --porcelain)" ]; then
+ echo "Working directory is not clean."
+ exit 1
+fi
+
+TOPDIR=$(pwd)
+cd "./src/config/"
+"${CMDDIR}/update_geoip.sh"
+cd "${TOPDIR}"
+
+DASH_DATE=$(date -u +"%Y-%m-%d")
+SLASH_DATE=$(date -u +"%Y/%m/%d")
+CHANGESFILE="changes/geoip-$DASH_DATE"
+
+cat > "$CHANGESFILE" <<EOF
+ o Minor features (geoip data):
+ - Update the geoip files to match the IPFire Location Database,
+ as retrieved on ${SLASH_DATE}.
+EOF
+
+git add "$CHANGESFILE"
+
+git commit -a -m "Update geoip files to match ipfire location db, $SLASH_DATE."
diff --git a/scripts/maint/geoip/update_geoip.sh b/scripts/maint/geoip/update_geoip.sh
new file mode 100755
index 0000000000..9289e7a969
--- /dev/null
+++ b/scripts/maint/geoip/update_geoip.sh
@@ -0,0 +1,16 @@
+#!/bin/sh
+
+set -e
+
+DIR=$(cd "$(dirname "$0")" && pwd)
+TMP=$(mktemp -d)
+
+location update
+location dump "$TMP/geoip-dump.txt"
+
+OLDDIR=$(pwd)
+cd "$DIR/geoip-db-tool/"
+cargo build --release
+cd "$OLDDIR"
+
+"$DIR/geoip-db-tool/target/release/geoip-db-tool" -i "$TMP/geoip-dump.txt"