replace regex use with nom

This reduces the binary size noticeably on my macOS machine (#70):

                             unstripped  stripped
1  before switching to clap    11.1 MiB   6.7 MiB
2  after switching to clap     11.4 MiB   6.9 MiB
3  without regex               10.1 MiB   5.9 MiB
This commit is contained in:
Scott Lamb
2020-04-17 23:02:02 -07:00
parent e8eb764b90
commit af9e568344
10 changed files with 476 additions and 367 deletions

View File

@@ -42,8 +42,6 @@
use byteorder::{BigEndian, WriteBytesExt};
use failure::{Error, bail};
use lazy_static::lazy_static;
use regex::bytes::Regex;
// See ISO/IEC 14496-10 table 7-1 - NAL unit type codes, syntax element categories, and NAL unit
// type classes.
@@ -60,15 +58,28 @@ const NAL_UNIT_TYPE_MASK: u8 = 0x1F; // bottom 5 bits of first byte of unit.
///
/// TODO: detect invalid byte streams. For example, several 0x00s not followed by a 0x01, a stream
/// stream not starting with 0x00 0x00 0x00 0x01, or an empty NAL unit.
fn decode_h264_annex_b<'a, F>(data: &'a [u8], mut f: F) -> Result<(), Error>
fn decode_h264_annex_b<'a, F>(mut data: &'a [u8], mut f: F) -> Result<(), Error>
where F: FnMut(&'a [u8]) -> Result<(), Error> {
lazy_static! {
static ref START_CODE: Regex = Regex::new(r"(\x00{2,}\x01)").unwrap();
}
for unit in START_CODE.split(data) {
if !unit.is_empty() {
f(unit)?;
let start_code = &b"\x00\x00\x01"[..];
use nom::FindSubstring;
'outer: while let Some(pos) = data.find_substring(start_code) {
let mut unit = &data[0..pos];
data = &data[pos + start_code.len() ..];
// Have zero or more bytes that end in a start code. Strip out any trailing 0x00s and
// process the unit if there's anything left.
loop {
match unit.last() {
None => continue 'outer,
Some(b) if *b == 0 => { unit = &unit[..unit.len()-1]; },
Some(_) => break,
}
}
f(unit)?;
}
// No remaining start codes; likely a unit left.
if !data.is_empty() {
f(data)?;
}
Ok(())
}

View File

@@ -34,7 +34,6 @@ use bytes::Bytes;
use crate::body::{Body, BoxedError};
use crate::json;
use crate::mp4;
use base64;
use bytes::{BufMut, BytesMut};
use core::borrow::Borrow;
use core::str::FromStr;
@@ -46,12 +45,12 @@ use futures::sink::SinkExt;
use futures::future::{self, Future, TryFutureExt};
use futures::stream::StreamExt;
use http::{Request, Response, status::StatusCode};
use http_serve;
use http::header::{self, HeaderValue};
use lazy_static::lazy_static;
use log::{debug, info, warn};
use regex::Regex;
use serde_json;
use nom::IResult;
use nom::bytes::complete::{take_while1, tag};
use nom::combinator::{all_consuming, map, map_res, opt};
use nom::sequence::{preceded, tuple};
use std::collections::HashMap;
use std::cmp;
use std::fs;
@@ -64,14 +63,6 @@ use tokio_tungstenite::tungstenite;
use url::form_urlencoded;
use uuid::Uuid;
lazy_static! {
/// Regex used to parse the `s` query parameter to `view.mp4`.
/// As described in `design/api.md`, this is of the form
/// `START_ID[-END_ID][@OPEN_ID][.[REL_START_TIME]-[REL_END_TIME]]`.
static ref SEGMENTS_RE: Regex =
Regex::new(r"^(\d+)(-\d+)?(@\d+)?(?:\.(\d+)?-(\d+)?)?$").unwrap();
}
type BoxedFuture = Box<dyn Future<Output = Result<Response<Body>, BoxedError>> +
Sync + Send + 'static>;
@@ -202,41 +193,48 @@ struct Segments {
end_time: Option<i64>,
}
fn num<'a, T: FromStr>() -> impl Fn(&'a str) -> IResult<&'a str, T> {
map_res(take_while1(|c: char| c.is_ascii_digit()), FromStr::from_str)
}
impl Segments {
pub fn parse(input: &str) -> Result<Segments, ()> {
let caps = SEGMENTS_RE.captures(input).ok_or(())?;
let ids_start = i32::from_str(caps.get(1).unwrap().as_str()).map_err(|_| ())?;
let ids_end = match caps.get(2) {
Some(m) => i32::from_str(&m.as_str()[1..]).map_err(|_| ())?,
None => ids_start,
} + 1;
let open_id = match caps.get(3) {
Some(m) => Some(u32::from_str(&m.as_str()[1..]).map_err(|_| ())?),
None => None,
};
if ids_start < 0 || ids_end <= ids_start {
/// Parses the `s` query parameter to `view.mp4` as described in `design/api.md`.
/// Doesn't do any validation.
fn parse(i: &str) -> IResult<&str, Segments> {
// Parse START_ID[-END_ID] into Range<i32>.
// Note that END_ID is inclusive, but Ranges are half-open.
let (i, ids) = map(tuple((num::<i32>(), opt(preceded(tag("-"), num::<i32>())))),
|(start, end)| start .. end.unwrap_or(start) + 1)(i)?;
// Parse [@OPEN_ID] into Option<u32>.
let (i, open_id) = opt(preceded(tag("@"), num::<u32>()))(i)?;
// Parse [.[REL_START_TIME]-[REL_END_TIME]] into (i64, Option<i64>).
let (i, (start_time, end_time)) = map(
opt(preceded(tag("."), tuple((opt(num::<i64>()), tag("-"), opt(num::<i64>()))))),
|t| {
t.map(|(s, _, e)| (s.unwrap_or(0), e))
.unwrap_or((0, None))
})(i)?;
Ok((i, Segments { ids, open_id, start_time, end_time, }))
}
}
impl FromStr for Segments {
type Err = ();
fn from_str(s: &str) -> Result<Self, Self::Err> {
let (_, s) = all_consuming(Segments::parse)(s).map_err(|_| ())?;
if s.ids.end <= s.ids.start {
return Err(());
}
let start_time = caps.get(4).map_or(Ok(0), |m| i64::from_str(m.as_str())).map_err(|_| ())?;
if start_time < 0 {
return Err(());
if let Some(e) = s.end_time {
if e < s.start_time {
return Err(());
}
}
let end_time = match caps.get(5) {
Some(v) => {
let e = i64::from_str(v.as_str()).map_err(|_| ())?;
if e <= start_time {
return Err(());
}
Some(e)
},
None => None
};
Ok(Segments {
ids: ids_start .. ids_end,
open_id,
start_time,
end_time,
})
Ok(s)
}
}
@@ -422,7 +420,7 @@ impl ServiceInner {
let (key, value) = (key.borrow(), value.borrow());
match key {
"s" => {
let s = Segments::parse(value).map_err(
let s = Segments::from_str(value).map_err(
|()| plain_response(StatusCode::BAD_REQUEST,
format!("invalid s parameter: {}", value)))?;
debug!("stream_view_mp4: appending s={:?}", s);
@@ -1078,6 +1076,7 @@ mod tests {
use futures::future::FutureExt;
use log::info;
use std::collections::HashMap;
use std::str::FromStr;
use super::Segments;
struct Server {
@@ -1233,25 +1232,25 @@ mod tests {
fn test_segments() {
testutil::init();
assert_eq!(Segments{ids: 1..2, open_id: None, start_time: 0, end_time: None},
Segments::parse("1").unwrap());
Segments::from_str("1").unwrap());
assert_eq!(Segments{ids: 1..2, open_id: Some(42), start_time: 0, end_time: None},
Segments::parse("1@42").unwrap());
Segments::from_str("1@42").unwrap());
assert_eq!(Segments{ids: 1..2, open_id: None, start_time: 26, end_time: None},
Segments::parse("1.26-").unwrap());
Segments::from_str("1.26-").unwrap());
assert_eq!(Segments{ids: 1..2, open_id: Some(42), start_time: 26, end_time: None},
Segments::parse("1@42.26-").unwrap());
Segments::from_str("1@42.26-").unwrap());
assert_eq!(Segments{ids: 1..2, open_id: None, start_time: 0, end_time: Some(42)},
Segments::parse("1.-42").unwrap());
Segments::from_str("1.-42").unwrap());
assert_eq!(Segments{ids: 1..2, open_id: None, start_time: 26, end_time: Some(42)},
Segments::parse("1.26-42").unwrap());
Segments::from_str("1.26-42").unwrap());
assert_eq!(Segments{ids: 1..6, open_id: None, start_time: 0, end_time: None},
Segments::parse("1-5").unwrap());
Segments::from_str("1-5").unwrap());
assert_eq!(Segments{ids: 1..6, open_id: None, start_time: 26, end_time: None},
Segments::parse("1-5.26-").unwrap());
Segments::from_str("1-5.26-").unwrap());
assert_eq!(Segments{ids: 1..6, open_id: None, start_time: 0, end_time: Some(42)},
Segments::parse("1-5.-42").unwrap());
Segments::from_str("1-5.-42").unwrap());
assert_eq!(Segments{ids: 1..6, open_id: None, start_time: 26, end_time: Some(42)},
Segments::parse("1-5.26-42").unwrap());
Segments::from_str("1-5.26-42").unwrap());
}
#[tokio::test]