replace regex use with nom

This reduces the binary size noticeably on my macOS machine (#70):

                             unstripped  stripped
1  before switching to clap    11.1 MiB   6.7 MiB
2  after switching to clap     11.4 MiB   6.9 MiB
3  without regex               10.1 MiB   5.9 MiB
This commit is contained in:
Scott Lamb
2020-04-17 23:02:02 -07:00
parent e8eb764b90
commit af9e568344
10 changed files with 476 additions and 367 deletions

View File

@@ -30,7 +30,6 @@ openssl = "0.10"
parking_lot = { version = "0.9", features = [] }
prettydiff = "0.3.1"
protobuf = { git = "https://github.com/stepancheg/rust-protobuf" }
regex = "1.0"
rusqlite = "0.21.0"
smallvec = "1.0"
tempdir = "0.3"

View File

@@ -30,199 +30,17 @@
use crate::coding::{append_varint32, decode_varint32, unzigzag32, zigzag32};
use crate::db;
use failure::{Error, bail, format_err};
use lazy_static::lazy_static;
use failure::{Error, bail};
use log::trace;
use regex::Regex;
use std::ops;
use std::fmt;
use std::ops::Range;
use std::str::FromStr;
use time;
pub const TIME_UNITS_PER_SEC: i64 = 90000;
pub use base::time::TIME_UNITS_PER_SEC;
pub const DESIRED_RECORDING_DURATION: i64 = 60 * TIME_UNITS_PER_SEC;
pub const MAX_RECORDING_DURATION: i64 = 5 * 60 * TIME_UNITS_PER_SEC;
/// A time specified as 90,000ths of a second since 1970-01-01 00:00:00 UTC.
#[derive(Clone, Copy, Default, Eq, Ord, PartialEq, PartialOrd)]
pub struct Time(pub i64);
impl Time {
pub fn new(tm: time::Timespec) -> Self {
Time(tm.sec * TIME_UNITS_PER_SEC + tm.nsec as i64 * TIME_UNITS_PER_SEC / 1_000_000_000)
}
pub const fn min_value() -> Self { Time(i64::min_value()) }
pub const fn max_value() -> Self { Time(i64::max_value()) }
/// Parses a time as either 90,000ths of a second since epoch or a RFC 3339-like string.
///
/// The former is 90,000ths of a second since 1970-01-01T00:00:00 UTC, excluding leap seconds.
///
/// The latter is a string such as `2006-01-02T15:04:05`, followed by an optional 90,000ths of
/// a second such as `:00001`, followed by an optional time zone offset such as `Z` or
/// `-07:00`. A missing fraction is assumed to be 0. A missing time zone offset implies the
/// local time zone.
pub fn parse(s: &str) -> Result<Self, Error> {
lazy_static! {
static ref RE: Regex = Regex::new(r#"(?x)
^
([0-9]{4})-([0-9]{2})-([0-9]{2})T([0-9]{2}):([0-9]{2}):([0-9]{2})
(?::([0-9]{5}))?
(Z|[+-]([0-9]{2}):([0-9]{2}))?
$"#).unwrap();
}
// First try parsing as 90,000ths of a second since epoch.
match i64::from_str(s) {
Ok(i) => return Ok(Time(i)),
Err(_) => {},
}
// If that failed, parse as a time string or bust.
let c = RE.captures(s).ok_or_else(|| format_err!("unparseable time {:?}", s))?;
let mut tm = time::Tm{
tm_sec: i32::from_str(c.get(6).unwrap().as_str()).unwrap(),
tm_min: i32::from_str(c.get(5).unwrap().as_str()).unwrap(),
tm_hour: i32::from_str(c.get(4).unwrap().as_str()).unwrap(),
tm_mday: i32::from_str(c.get(3).unwrap().as_str()).unwrap(),
tm_mon: i32::from_str(c.get(2).unwrap().as_str()).unwrap(),
tm_year: i32::from_str(c.get(1).unwrap().as_str()).unwrap(),
tm_wday: 0,
tm_yday: 0,
tm_isdst: -1,
tm_utcoff: 0,
tm_nsec: 0,
};
if tm.tm_mon == 0 {
bail!("time {:?} has month 0", s);
}
tm.tm_mon -= 1;
if tm.tm_year < 1900 {
bail!("time {:?} has year before 1900", s);
}
tm.tm_year -= 1900;
// The time crate doesn't use tm_utcoff properly; it just calls timegm() if tm_utcoff == 0,
// mktime() otherwise. If a zone is specified, use the timegm path and a manual offset.
// If no zone is specified, use the tm_utcoff path. This is pretty lame, but follow the
// chrono crate's lead and just use 0 or 1 to choose between these functions.
let sec = if let Some(zone) = c.get(8) {
tm.to_timespec().sec + if zone.as_str() == "Z" {
0
} else {
let off = i64::from_str(c.get(9).unwrap().as_str()).unwrap() * 3600 +
i64::from_str(c.get(10).unwrap().as_str()).unwrap() * 60;
if zone.as_str().as_bytes()[0] == b'-' { off } else { -off }
}
} else {
tm.tm_utcoff = 1;
tm.to_timespec().sec
};
let fraction = if let Some(f) = c.get(7) { i64::from_str(f.as_str()).unwrap() } else { 0 };
Ok(Time(sec * TIME_UNITS_PER_SEC + fraction))
}
/// Convert to unix seconds by floor method (rounding down).
pub fn unix_seconds(&self) -> i64 { self.0 / TIME_UNITS_PER_SEC }
}
impl ops::Sub for Time {
type Output = Duration;
fn sub(self, rhs: Time) -> Duration { Duration(self.0 - rhs.0) }
}
impl ops::AddAssign<Duration> for Time {
fn add_assign(&mut self, rhs: Duration) { self.0 += rhs.0 }
}
impl ops::Add<Duration> for Time {
type Output = Time;
fn add(self, rhs: Duration) -> Time { Time(self.0 + rhs.0) }
}
impl ops::Sub<Duration> for Time {
type Output = Time;
fn sub(self, rhs: Duration) -> Time { Time(self.0 - rhs.0) }
}
impl fmt::Debug for Time {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
// Write both the raw and display forms.
write!(f, "{} /* {} */", self.0, self)
}
}
impl fmt::Display for Time {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
let tm = time::at(time::Timespec{sec: self.0 / TIME_UNITS_PER_SEC, nsec: 0});
let zone_minutes = tm.tm_utcoff.abs() / 60;
write!(f, "{}:{:05}{}{:02}:{:02}", tm.strftime("%FT%T").or_else(|_| Err(fmt::Error))?,
self.0 % TIME_UNITS_PER_SEC,
if tm.tm_utcoff > 0 { '+' } else { '-' }, zone_minutes / 60, zone_minutes % 60)
}
}
/// A duration specified in 1/90,000ths of a second.
/// Durations are typically non-negative, but a `db::CameraDayValue::duration` may be negative.
#[derive(Clone, Copy, Debug, Default, Eq, Ord, PartialEq, PartialOrd)]
pub struct Duration(pub i64);
impl Duration {
pub fn to_tm_duration(&self) -> time::Duration {
time::Duration::nanoseconds(self.0 * 100000 / 9)
}
}
impl fmt::Display for Duration {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
let mut seconds = self.0 / TIME_UNITS_PER_SEC;
const MINUTE_IN_SECONDS: i64 = 60;
const HOUR_IN_SECONDS: i64 = 60 * MINUTE_IN_SECONDS;
const DAY_IN_SECONDS: i64 = 24 * HOUR_IN_SECONDS;
let days = seconds / DAY_IN_SECONDS;
seconds %= DAY_IN_SECONDS;
let hours = seconds / HOUR_IN_SECONDS;
seconds %= HOUR_IN_SECONDS;
let minutes = seconds / MINUTE_IN_SECONDS;
seconds %= MINUTE_IN_SECONDS;
let mut have_written = if days > 0 {
write!(f, "{} day{}", days, if days == 1 { "" } else { "s" })?;
true
} else {
false
};
if hours > 0 {
write!(f, "{}{} hour{}", if have_written { " " } else { "" },
hours, if hours == 1 { "" } else { "s" })?;
have_written = true;
}
if minutes > 0 {
write!(f, "{}{} minute{}", if have_written { " " } else { "" },
minutes, if minutes == 1 { "" } else { "s" })?;
have_written = true;
}
if seconds > 0 || !have_written {
write!(f, "{}{} second{}", if have_written { " " } else { "" },
seconds, if seconds == 1 { "" } else { "s" })?;
}
Ok(())
}
}
impl ops::Add for Duration {
type Output = Duration;
fn add(self, rhs: Duration) -> Duration { Duration(self.0 + rhs.0) }
}
impl ops::AddAssign for Duration {
fn add_assign(&mut self, rhs: Duration) { self.0 += rhs.0 }
}
impl ops::SubAssign for Duration {
fn sub_assign(&mut self, rhs: Duration) { self.0 -= rhs.0 }
}
pub use base::time::Time;
pub use base::time::Duration;
/// An iterator through a sample index.
/// Initially invalid; call `next()` before each read.
@@ -533,52 +351,6 @@ mod tests {
use super::*;
use crate::testutil::{self, TestDb};
#[test]
fn test_parse_time() {
testutil::init();
let tests = &[
("2006-01-02T15:04:05-07:00", 102261550050000),
("2006-01-02T15:04:05:00001-07:00", 102261550050001),
("2006-01-02T15:04:05-08:00", 102261874050000),
("2006-01-02T15:04:05", 102261874050000), // implied -08:00
("2006-01-02T15:04:05:00001", 102261874050001), // implied -08:00
("2006-01-02T15:04:05-00:00", 102259282050000),
("2006-01-02T15:04:05Z", 102259282050000),
("102261550050000", 102261550050000),
];
for test in tests {
assert_eq!(test.1, Time::parse(test.0).unwrap().0, "parsing {}", test.0);
}
}
#[test]
fn test_format_time() {
testutil::init();
assert_eq!("2006-01-02T15:04:05:00000-08:00", format!("{}", Time(102261874050000)));
}
#[test]
fn test_display_duration() {
testutil::init();
let tests = &[
// (output, seconds)
("0 seconds", 0),
("1 second", 1),
("1 minute", 60),
("1 minute 1 second", 61),
("2 minutes", 120),
("1 hour", 3600),
("1 hour 1 minute", 3660),
("2 hours", 7200),
("1 day", 86400),
("1 day 1 hour", 86400 + 3600),
("2 days", 2 * 86400),
];
for test in tests {
assert_eq!(test.0, format!("{}", Duration(test.1 * TIME_UNITS_PER_SEC)));
}
}
/// Tests encoding the example from design/schema.md.
#[test]
fn test_encode_example() {