diff options
author | Nick Mathewson <nickm@torproject.org> | 2021-02-22 08:30:11 -0500 |
---|---|---|
committer | Nick Mathewson <nickm@torproject.org> | 2021-02-22 12:25:18 -0500 |
commit | 0d4237839b21b466526a01147538d09c117cc884 (patch) | |
tree | f13c7c70aed9ae3645c0eb171348f035e468ad8b /scripts/maint/geoip/geoip-db-tool/src/db.rs | |
parent | 8ccfd4a51ad55e9834cffcc91cbaa13e1f19c8ff (diff) | |
download | tor-0d4237839b21b466526a01147538d09c117cc884.tar.gz tor-0d4237839b21b466526a01147538d09c117cc884.zip |
Rust tool to convert IPFire Location dump into CSV format.
The IPFire people provide a tool that collects data from several
top-level sources, combines it into a single database, and annotates
it with optional overrides. This tool transforms the "dump" format
of their database into the form Tor expects.
Diffstat (limited to 'scripts/maint/geoip/geoip-db-tool/src/db.rs')
-rw-r--r-- | scripts/maint/geoip/geoip-db-tool/src/db.rs | 126 |
1 files changed, 126 insertions, 0 deletions
diff --git a/scripts/maint/geoip/geoip-db-tool/src/db.rs b/scripts/maint/geoip/geoip-db-tool/src/db.rs new file mode 100644 index 0000000000..eaadd4c612 --- /dev/null +++ b/scripts/maint/geoip/geoip-db-tool/src/db.rs @@ -0,0 +1,126 @@ +/// Code to parse a dump file +use std::collections::HashMap; +use std::convert::TryInto; +use std::iter::Peekable; + +use super::NetBlock; + +pub struct BlockReader<I> +where + I: Iterator<Item = std::io::Result<String>>, +{ + iter: Peekable<I>, +} + +enum AnyBlock { + NotNet, + NetBlock(NetBlock), +} + +impl<I> BlockReader<I> +where + I: Iterator<Item = std::io::Result<String>>, +{ + pub fn new(iter: I) -> Self { + BlockReader { + iter: iter.peekable(), + } + } + + /// Extract the initial header from the file. + pub fn extract_header(&mut self) -> String { + let mut res: String = "".to_string(); + + while let Some(Ok(line)) = self.iter.peek() { + if !line.starts_with('#') { + break; + } + res.push_str(line.as_str()); + res.push('\n'); + let _ = self.iter.next(); + } + + res + } + + /// Extract the next empty-line-delimited block from the file. + /// + /// This isn't terribly efficient, but it's "fast enough". + fn get_block(&mut self) -> Option<std::io::Result<AnyBlock>> { + let mut kv = HashMap::new(); + + while let Some(line) = self.iter.next() { + //dbg!(&line); + if let Err(e) = line { + return Some(Err(e)); + } + let line_orig = line.unwrap(); + let line = line_orig.splitn(2, '#').next().unwrap().trim(); + if line.is_empty() { + if kv.is_empty() { + continue; + } else { + break; + } + } + let kwds: Vec<_> = line.splitn(2, ':').collect(); + if kwds.len() != 2 { + return None; // XXXX handle the error better. + } + kv.insert(kwds[0].trim().to_string(), kwds[1].trim().to_string()); + } + + if kv.is_empty() { + return None; + } + + let net = if let Some(net) = kv.get("net") { + net.parse().unwrap() //XXXX handle the error better. + } else { + return Some(Ok(AnyBlock::NotNet)); + }; + + let cc = if let Some(country) = kv.get("country") { + assert!(country.as_bytes().len() == 2); + country.as_bytes()[0..2].try_into().unwrap() + } else { + return Some(Ok(AnyBlock::NotNet)); + }; + + fn is_true(v: Option<&String>) -> bool { + match v { + Some(s) => s == "true", + None => false, + } + } + + let is_anon_proxy = is_true(kv.get("is-anonymous-proxy")); + let is_anycast = is_true(kv.get("is-anycast-proxy")); + let is_satellite = is_true(kv.get("is-satellite-provider")); + + Some(Ok(AnyBlock::NetBlock(NetBlock { + net, + cc, + is_anon_proxy, + is_anycast, + is_satellite, + }))) + } +} + +impl<I> Iterator for BlockReader<I> +where + I: Iterator<Item = std::io::Result<String>>, +{ + type Item = NetBlock; + fn next(&mut self) -> Option<Self::Item> { + loop { + match self.get_block() { + None => return None, + Some(Err(_)) => return None, + Some(Ok(AnyBlock::NotNet)) => continue, + Some(Ok(AnyBlock::NetBlock(n))) => return Some(n), + } + } + } +} |