diff options
Diffstat (limited to 'scripts')
-rwxr-xr-x | scripts/ci/ci-driver.sh | 21 | ||||
-rwxr-xr-x | scripts/git/git-list-tor-branches.sh | 6 | ||||
-rwxr-xr-x | scripts/maint/gen_ccls_file.sh | 11 | ||||
-rw-r--r-- | scripts/maint/geoip/README.geoip | 25 | ||||
-rw-r--r-- | scripts/maint/geoip/geoip-db-tool/.gitignore | 1 | ||||
-rw-r--r-- | scripts/maint/geoip/geoip-db-tool/Cargo.lock | 110 | ||||
-rw-r--r-- | scripts/maint/geoip/geoip-db-tool/Cargo.toml | 14 | ||||
-rw-r--r-- | scripts/maint/geoip/geoip-db-tool/src/db.rs | 138 | ||||
-rw-r--r-- | scripts/maint/geoip/geoip-db-tool/src/main.rs | 239 | ||||
-rwxr-xr-x | scripts/maint/geoip/update_and_commit_geoip.sh | 34 | ||||
-rwxr-xr-x | scripts/maint/geoip/update_geoip.sh | 16 |
11 files changed, 611 insertions, 4 deletions
diff --git a/scripts/ci/ci-driver.sh b/scripts/ci/ci-driver.sh index c07d99188b..3e47f68187 100755 --- a/scripts/ci/ci-driver.sh +++ b/scripts/ci/ci-driver.sh @@ -293,6 +293,7 @@ TOR_VERSION=$(grep -m 1 AC_INIT configure.ac | sed -e 's/.*\[//; s/\].*//;') # Tor version. Only create the variables we need. TOR_VER_AT_LEAST_043=no TOR_VER_AT_LEAST_044=no +TOR_VER_AT_LEAST_046=no # These are the currently supported Tor versions; no need to work with anything # ancient in this script. @@ -309,10 +310,19 @@ case "$TOR_VERSION" in TOR_VER_AT_LEAST_043=yes TOR_VER_AT_LEAST_044=no ;; - *) + 0.4.4.*) TOR_VER_AT_LEAST_043=yes TOR_VER_AT_LEAST_044=yes ;; + 0.4.5.*) + TOR_VER_AT_LEAST_043=yes + TOR_VER_AT_LEAST_044=yes + ;; + 0.4.6.*) + TOR_VER_AT_LEAST_043=yes + TOR_VER_AT_LEAST_044=yes + TOR_VER_AT_LEAST_046=yes + ;; esac ############################################################################# @@ -460,12 +470,21 @@ fi if [[ "${STEM}" = "yes" ]]; then start_section "Stem" + EXCLUDE_TESTS="" + if [[ "${TOR_VER_AT_LEAST_046}" = 'yes' ]]; then + EXCLUDE_TESTS="--exclude-test control.controller.test_ephemeral_hidden_services_v2 --exclude-test control.controller.test_hidden_services_conf --exclude-test control.controller.test_with_ephemeral_hidden_services_basic_auth --exclude-test control.controller.test_without_ephemeral_hidden_services --exclude-test control.controller.test_with_ephemeral_hidden_services_basic_auth_no_credentials" + fi if [[ "${TOR_VER_AT_LEAST_044}" = 'yes' ]]; then # XXXX This should probably be part of some test-stem make target. + + # Disable the check around EXCLUDE_TESTS that requires double quote. We + # need it to be expanded. + # shellcheck disable=SC2086 if runcmd timelimit -p -t 520 -s USR1 -T 30 -S ABRT \ python3 "${STEM_PATH}/run_tests.py" \ --tor src/app/tor \ --integ --test control.controller \ + $EXCLUDE_TESTS \ --test control.base_controller \ --test process \ --log TRACE \ diff --git a/scripts/git/git-list-tor-branches.sh b/scripts/git/git-list-tor-branches.sh index 5a527ffc05..2bcd4722b4 100755 --- a/scripts/git/git-list-tor-branches.sh +++ b/scripts/git/git-list-tor-branches.sh @@ -139,12 +139,12 @@ finish() { branch maint-0.3.5 branch release-0.3.5 -branch maint-0.4.3 -branch release-0.4.3 - branch maint-0.4.4 branch release-0.4.4 +branch maint-0.4.5 +branch release-0.4.5 + branch master finish diff --git a/scripts/maint/gen_ccls_file.sh b/scripts/maint/gen_ccls_file.sh index 899e4e9603..b1fa55c973 100755 --- a/scripts/maint/gen_ccls_file.sh +++ b/scripts/maint/gen_ccls_file.sh @@ -15,6 +15,17 @@ CCLS_FILE=".ccls" PRIVATE_DEFS=$(grep -r --include \*.h "_PRIVATE" | grep "#ifdef" | cut -d' ' -f2 | sort | uniq) echo "clang" > "$CCLS_FILE" + +# Add these include so the ccls server can properly check new files that are +# not in the compile_commands.json yet +{ + echo "-I." + echo "-I./src" + echo "-I./src/ext" + echo "-I./src/ext/trunnel" +} >> "$CCLS_FILE" + +# Add all defines (-D). for p in $PRIVATE_DEFS; do echo "-D$p" >> "$CCLS_FILE" done diff --git a/scripts/maint/geoip/README.geoip b/scripts/maint/geoip/README.geoip new file mode 100644 index 0000000000..0ed94b2276 --- /dev/null +++ b/scripts/maint/geoip/README.geoip @@ -0,0 +1,25 @@ +To generate new geoip files, you'll need to install the +libloc/"location" tool provided by https://location.ipfire.org/. +I personally build it with: + + ./configure CFLAGS='-g -O2' --disable-perl --without-systemd --prefix=/opt/libloc + make + make install + +Then (after adjusting PATH and PYTHONPATH) you can get the latest +dump with: + + location update + location dump geoip-dump.txt + +And transform it into geoip files with + + cargo run --release -- -i geoip-dump.txt + + +============================== + +Note that the current version "0.1.9" of rangemap has a performance +bug, making this tool quite slow. Previous versions had a +correctness bug that made the output needlessly long. With luck, +there will soon be a fast correct rangemap version.
\ No newline at end of file diff --git a/scripts/maint/geoip/geoip-db-tool/.gitignore b/scripts/maint/geoip/geoip-db-tool/.gitignore new file mode 100644 index 0000000000..eb5a316cbd --- /dev/null +++ b/scripts/maint/geoip/geoip-db-tool/.gitignore @@ -0,0 +1 @@ +target diff --git a/scripts/maint/geoip/geoip-db-tool/Cargo.lock b/scripts/maint/geoip/geoip-db-tool/Cargo.lock new file mode 100644 index 0000000000..ba610d4fc3 --- /dev/null +++ b/scripts/maint/geoip/geoip-db-tool/Cargo.lock @@ -0,0 +1,110 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +[[package]] +name = "argh" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91792f088f87cdc7a2cfb1d617fa5ea18d7f1dc22ef0e1b5f82f3157cdc522be" +dependencies = [ + "argh_derive", + "argh_shared", +] + +[[package]] +name = "argh_derive" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4eb0c0c120ad477412dc95a4ce31e38f2113e46bd13511253f79196ca68b067" +dependencies = [ + "argh_shared", + "heck", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "argh_shared" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "781f336cc9826dbaddb9754cb5db61e64cab4f69668bd19dcc4a0394a86f4cb1" + +[[package]] +name = "geoip-db-tool" +version = "0.1.0" +dependencies = [ + "argh", + "ipnetwork", + "rangemap", +] + +[[package]] +name = "heck" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87cbf45460356b7deeb5e3415b5563308c0a9b057c85e12b06ad551f98d0a6ac" +dependencies = [ + "unicode-segmentation", +] + +[[package]] +name = "ipnetwork" +version = "0.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "02c3eaab3ac0ede60ffa41add21970a7df7d91772c03383aac6c2c3d53cc716b" +dependencies = [ + "serde", +] + +[[package]] +name = "proc-macro2" +version = "1.0.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e0704ee1a7e00d7bb417d0770ea303c1bccbabf0ef1667dae92b5967f5f8a71" +dependencies = [ + "unicode-xid", +] + +[[package]] +name = "quote" +version = "1.0.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3d0b9745dc2debf507c8422de05d7226cc1f0644216dfdfead988f9b1ab32a7" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "rangemap" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90531bef860f96441c4cb74a1e43c281cd1366143928f944546ef0b1c60392b0" + +[[package]] +name = "serde" +version = "1.0.123" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92d5161132722baa40d802cc70b15262b98258453e85e5d1d365c757c73869ae" + +[[package]] +name = "syn" +version = "1.0.60" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c700597eca8a5a762beb35753ef6b94df201c81cca676604f547495a0d7f0081" +dependencies = [ + "proc-macro2", + "quote", + "unicode-xid", +] + +[[package]] +name = "unicode-segmentation" +version = "1.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bb0d2e7be6ae3a5fa87eed5fb451aff96f2573d2694942e40543ae0bbe19c796" + +[[package]] +name = "unicode-xid" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f7fe0bb3479651439c9112f72b6c505038574c9fbb575ed1bf3b797fa39dd564" diff --git a/scripts/maint/geoip/geoip-db-tool/Cargo.toml b/scripts/maint/geoip/geoip-db-tool/Cargo.toml new file mode 100644 index 0000000000..8c794ca058 --- /dev/null +++ b/scripts/maint/geoip/geoip-db-tool/Cargo.toml @@ -0,0 +1,14 @@ +[package] +name = "geoip-db-tool" +version = "0.1.0" +authors = ["Nick Mathewson <nickm@torproject.org>"] +edition = "2018" +license = "MIT OR Apache-2.0" +publish = false + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +ipnetwork= "0.17.0" +rangemap = "0.1.10" +argh = "0.1.4" diff --git a/scripts/maint/geoip/geoip-db-tool/src/db.rs b/scripts/maint/geoip/geoip-db-tool/src/db.rs new file mode 100644 index 0000000000..316182d823 --- /dev/null +++ b/scripts/maint/geoip/geoip-db-tool/src/db.rs @@ -0,0 +1,138 @@ +/// Code to parse a dump file +use std::collections::HashMap; +use std::convert::TryInto; +use std::iter::Peekable; + +use super::{AsBlock, NetBlock}; + +pub struct BlockReader<I> +where + I: Iterator<Item = std::io::Result<String>>, +{ + iter: Peekable<I>, +} + +pub enum AnyBlock { + NetBlock(NetBlock), + AsBlock(AsBlock), + OtherBlock, +} + +impl<I> BlockReader<I> +where + I: Iterator<Item = std::io::Result<String>>, +{ + pub fn new(iter: I) -> Self { + BlockReader { + iter: iter.peekable(), + } + } + + /// Extract the initial header from the file. + pub fn extract_header(&mut self) -> String { + let mut res: String = "".to_string(); + + while let Some(Ok(line)) = self.iter.peek() { + if !line.starts_with('#') { + break; + } + res.push_str(line.as_str()); + res.push('\n'); + let _ = self.iter.next(); + } + + res + } + + /// Extract the next empty-line-delimited block from the file. + /// + /// This isn't terribly efficient, but it's "fast enough". + fn get_block(&mut self) -> Option<std::io::Result<AnyBlock>> { + let mut kv = HashMap::new(); + + while let Some(line) = self.iter.next() { + //dbg!(&line); + if let Err(e) = line { + return Some(Err(e)); + } + let line_orig = line.unwrap(); + let line = line_orig.splitn(2, '#').next().unwrap().trim(); + if line.is_empty() { + if kv.is_empty() { + continue; + } else { + break; + } + } + let kwds: Vec<_> = line.splitn(2, ':').collect(); + if kwds.len() != 2 { + return None; // XXXX handle the error better. + } + kv.insert(kwds[0].trim().to_string(), kwds[1].trim().to_string()); + } + + if kv.is_empty() { + return None; + } + + if let Some(name) = kv.remove("name") { + // This is an AS block. + let asn = kv.get("aut-num").unwrap(); // XXXX handle error better + assert!(asn.starts_with("AS")); + let asn = asn[2..].parse().unwrap(); + return Some(Ok(AnyBlock::AsBlock(AsBlock { name, asn }))); + } + + let net = if let Some(net) = kv.get("net") { + net.parse().unwrap() //XXXX handle the error better. + } else { + return Some(Ok(AnyBlock::OtherBlock)); + }; + + let asn = if let Some(asn) = kv.get("aut-num") { + asn.parse().ok() + } else { + None + }; + + let cc = if let Some(country) = kv.get("country") { + assert!(country.as_bytes().len() == 2); + country.as_bytes()[0..2].try_into().unwrap() + } else { + *b"??" + }; + + fn is_true(v: Option<&String>) -> bool { + match v { + Some(s) => s == "true", + None => false, + } + } + + let is_anon_proxy = is_true(kv.get("is-anonymous-proxy")); + let is_anycast = is_true(kv.get("is-anycast-proxy")); + let is_satellite = is_true(kv.get("is-satellite-provider")); + + Some(Ok(AnyBlock::NetBlock(NetBlock { + net, + asn, + cc, + is_anon_proxy, + is_anycast, + is_satellite, + }))) + } +} + +impl<I> Iterator for BlockReader<I> +where + I: Iterator<Item = std::io::Result<String>>, +{ + type Item = AnyBlock; + fn next(&mut self) -> Option<Self::Item> { + match self.get_block() { + Some(Ok(b)) => Some(b), + _ => None, + } + } +} diff --git a/scripts/maint/geoip/geoip-db-tool/src/main.rs b/scripts/maint/geoip/geoip-db-tool/src/main.rs new file mode 100644 index 0000000000..9a22598a35 --- /dev/null +++ b/scripts/maint/geoip/geoip-db-tool/src/main.rs @@ -0,0 +1,239 @@ +/// A basic tool to convert IPFire Location dumps into the CSV formats that Tor +/// expects. +mod db; + +use argh::FromArgs; +use ipnetwork::IpNetwork; +use rangemap::RangeInclusiveMap; + +use std::fs::File; +use std::io::{BufRead, BufReader, BufWriter, Write}; +use std::net::{IpAddr, Ipv6Addr}; +use std::num::NonZeroU32; +use std::path::PathBuf; + +fn default_ipv4_path() -> PathBuf { + "./geoip".into() +} +fn default_ipv6_path() -> PathBuf { + "./geoip6".into() +} + +#[derive(FromArgs)] +/// Convert an IPFire Location dump into CSV geoip files. +struct Args { + /// where to store the IPv4 geoip output + #[argh(option, default = "default_ipv4_path()", short = '4')] + output_ipv4: PathBuf, + + /// where to store the IPv6 geoip6 output + #[argh(option, default = "default_ipv6_path()", short = '6')] + output_ipv6: PathBuf, + + /// where to find the dump file + #[argh(option, short = 'i')] + input: PathBuf, + + /// whether to include AS information in our output + #[argh(switch)] + include_asn: bool, + + /// where to store the AS map. + #[argh(option)] + output_asn: Option<PathBuf>, +} + +/// Represents a network block from running `location dump`. +#[derive(Debug, Clone)] +pub struct NetBlock { + pub net: IpNetwork, + pub cc: [u8; 2], + pub asn: Option<NonZeroU32>, + pub is_anon_proxy: bool, + pub is_anycast: bool, + pub is_satellite: bool, +} + +/// Represents an AS definition from running `location dump`. +#[derive(Debug, Clone, Ord, PartialOrd, Eq, PartialEq)] +pub struct AsBlock { + pub asn: NonZeroU32, + pub name: String, +} + +impl PartialEq for NetBlock { + fn eq(&self, other: &Self) -> bool { + self.net == other.net + } +} + +/// We define network blocks as being sorted first from largest to smallest, +/// then by address. +impl Ord for NetBlock { + fn cmp(&self, other: &Self) -> std::cmp::Ordering { + self.net + .prefix() + .cmp(&other.net.prefix()) + .then_with(|| self.net.network().cmp(&other.net.network())) + } +} + +impl PartialOrd for NetBlock { + fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> { + Some(self.cmp(other)) + } +} + +impl Eq for NetBlock {} + +#[derive(Copy, Clone, Eq, PartialEq, Debug)] +struct NetDefn { + cc: [u8; 2], + asn: Option<NonZeroU32>, +} + +impl NetBlock { + fn into_defn(self, include_asn: bool) -> NetDefn { + if include_asn { + NetDefn { + cc: self.cc, + asn: self.asn, + } + } else { + NetDefn { + cc: self.cc, + asn: None, + } + } + } +} + +impl NetDefn { + fn cc(&self) -> &str { + std::str::from_utf8(&self.cc).unwrap() + } + fn asn(&self) -> u32 { + match self.asn { + Some(v) => v.into(), + None => 0, + } + } +} + +const PROLOGUE: &str = "\ +# This file has been converted from the IPFire Location database +# using Tor's geoip-db-tool. For more information on the data, see +# https://location.ipfire.org/. +# +# Below is the header from the original export: +# +"; + +/// Read an input file in the `location dump` format, and write CSV ipv4 and ipv6 files. +/// +/// This code tries to be "efficient enough"; most of the logic is handled by +/// using the rangemap crate. +fn convert(args: Args) -> std::io::Result<()> { + let input = args.input.as_path(); + let output_v4 = args.output_ipv4.as_path(); + let output_v6 = args.output_ipv6.as_path(); + let include_asn = args.include_asn; + + let f = File::open(input)?; + let f = BufReader::new(f); + let mut blocks = Vec::new(); + let mut networks = Vec::new(); + + let mut reader = db::BlockReader::new(f.lines()); + let hdr = reader.extract_header(); + // Read blocks, and then sort them by specificity and address. + for nb in reader { + match nb { + db::AnyBlock::AsBlock(a) => networks.push(a), + db::AnyBlock::NetBlock(n) => blocks.push(n), + _ => {} + } + } + blocks.sort(); + + // Convert the sorted blocks into a map from address ranges into + // country codes. + // + // Note that since we have sorted the blocks from least to most specific, + // we will be puttting them into the maps in the right order, so that the + // most specific rule "wins". + // + // We use u32 and u128 as the index types for these RangeInclusiveMaps, + // so that we don't need to implement a step function for IpAddr. + let mut v4map: RangeInclusiveMap<u32, NetDefn, _> = RangeInclusiveMap::new(); + let mut v6map: RangeInclusiveMap<u128, NetDefn, _> = RangeInclusiveMap::new(); + + let mut n = 0usize; + let num_blocks = blocks.len(); + for nb in blocks { + n += 1; + if n % 100000 == 0 { + println!("{}/{}", n, num_blocks); + } + let start = nb.net.network(); + let end = nb.net.broadcast(); + match (start, end) { + (IpAddr::V4(a), IpAddr::V4(b)) => { + v4map.insert(a.into()..=b.into(), nb.into_defn(include_asn)); + } + (IpAddr::V6(a), IpAddr::V6(b)) => { + v6map.insert(a.into()..=b.into(), nb.into_defn(include_asn)); + } + (_, _) => panic!("network started and ended in different families!?"), + } + } + + // Write the ranges out to the appropriate files, in order. + let mut v4 = BufWriter::new(File::create(output_v4)?); + let mut v6 = BufWriter::new(File::create(output_v6)?); + + v4.write_all(PROLOGUE.as_bytes())?; + v4.write_all(hdr.as_bytes())?; + for (r, defn) in v4map.iter() { + let a: u32 = *r.start(); + let b: u32 = *r.end(); + if include_asn { + writeln!(&mut v4, "{},{},{},{}", a, b, defn.cc(), defn.asn())?; + } else { + writeln!(&mut v4, "{},{},{}", a, b, defn.cc())?; + } + } + + v6.write_all(PROLOGUE.as_bytes())?; + v6.write_all(hdr.as_bytes())?; + for (r, defn) in v6map.iter() { + let a: Ipv6Addr = (*r.start()).into(); + let b: Ipv6Addr = (*r.end()).into(); + if include_asn { + writeln!(&mut v6, "{},{},{},{}", a, b, defn.cc(), defn.asn())?; + } else { + writeln!(&mut v6, "{},{},{}", a, b, defn.cc())?; + } + } + + // The documentation says you should always flush a BufWriter. + v4.flush()?; + v6.flush()?; + + if let Some(output_asn) = args.output_asn { + networks.sort(); + let mut asn = BufWriter::new(File::create(output_asn)?); + for net in networks { + writeln!(&mut asn, "{},{}", net.asn, net.name)?; + } + asn.flush()?; + } + + Ok(()) +} + +fn main() -> std::io::Result<()> { + let args: Args = argh::from_env(); + + convert(args) +} diff --git a/scripts/maint/geoip/update_and_commit_geoip.sh b/scripts/maint/geoip/update_and_commit_geoip.sh new file mode 100755 index 0000000000..a83f82433e --- /dev/null +++ b/scripts/maint/geoip/update_and_commit_geoip.sh @@ -0,0 +1,34 @@ +#!/bin/sh + +set -e + +CMDDIR=$(cd "$(dirname "$0")" && pwd) + +if [ ! -e "./src/config/geoip" ] ; then + echo "Run this from inside the root dir of your oldest LTS repository" + exit 1 +fi + +if [ -n "$(git status --untracked-files=no --porcelain)" ]; then + echo "Working directory is not clean." + exit 1 +fi + +TOPDIR=$(pwd) +cd "./src/config/" +"${CMDDIR}/update_geoip.sh" +cd "${TOPDIR}" + +DASH_DATE=$(date -u +"%Y-%m-%d") +SLASH_DATE=$(date -u +"%Y/%m/%d") +CHANGESFILE="changes/geoip-$DASH_DATE" + +cat > "$CHANGESFILE" <<EOF + o Minor features (geoip data): + - Update the geoip files to match the IPFire Location Database, + as retrieved on ${SLASH_DATE}. +EOF + +git add "$CHANGESFILE" + +git commit -a -m "Update geoip files to match ipfire location db, $SLASH_DATE." diff --git a/scripts/maint/geoip/update_geoip.sh b/scripts/maint/geoip/update_geoip.sh new file mode 100755 index 0000000000..9289e7a969 --- /dev/null +++ b/scripts/maint/geoip/update_geoip.sh @@ -0,0 +1,16 @@ +#!/bin/sh + +set -e + +DIR=$(cd "$(dirname "$0")" && pwd) +TMP=$(mktemp -d) + +location update +location dump "$TMP/geoip-dump.txt" + +OLDDIR=$(pwd) +cd "$DIR/geoip-db-tool/" +cargo build --release +cd "$OLDDIR" + +"$DIR/geoip-db-tool/target/release/geoip-db-tool" -i "$TMP/geoip-dump.txt" |