shutdown better

After a frustrating search for a suitable channel to use for shutdown
(tokio::sync::watch::Receiver and
futures::future::Shared<tokio::sync::oneshot::Receiver> didn't look
quite right) in which I rethought my life decisions, I finally just made
my own (server/base/shutdown.rs). We can easily poll it or wait for it
in async or sync contexts. Most importantly, it's convenient; not that
it really matters here, but it's also efficient.

We now do a slightly better job of propagating a "graceful" shutdown
signal, and this channel will give us tools to improve it over time.

* Shut down even when writer or syncer operations are stuck. Fixes #117
* Not done yet: streamers should instantly shut down without waiting for
  a connection attempt or frame or something. I'll probably
  implement that when removing --rtsp-library=ffmpeg. The code should be
  cleaner then.
* Not done yet: fix a couple places that sleep for up to a second when
  they could shut down immediately. I just need to do the plumbing for
  mock clocks to work.

I also implemented an immediate shutdown mode, activated by a second
signal. I think this will mitigate the streamer wait situation.
This commit is contained in:
Scott Lamb
2021-09-23 15:55:53 -07:00
parent 66f76079c0
commit b41a6c43da
21 changed files with 487 additions and 151 deletions

View File

@@ -46,7 +46,7 @@ pub struct Args {
trash_corrupt_rows: bool,
}
pub fn run(args: &Args) -> Result<i32, Error> {
pub fn run(args: Args) -> Result<i32, Error> {
let (_db_dir, mut conn) = super::open_conn(&args.db_dir, super::OpenMode::ReadWrite)?;
check::run(
&mut conn,

View File

@@ -31,7 +31,7 @@ pub struct Args {
db_dir: PathBuf,
}
pub fn run(args: &Args) -> Result<i32, Error> {
pub fn run(args: Args) -> Result<i32, Error> {
let (_db_dir, conn) = super::open_conn(&args.db_dir, super::OpenMode::ReadWrite)?;
let clocks = clock::RealClocks {};
let db = Arc::new(db::Database::new(clocks, conn, true)?);

View File

@@ -19,7 +19,7 @@ pub struct Args {
db_dir: PathBuf,
}
pub fn run(args: &Args) -> Result<i32, Error> {
pub fn run(args: Args) -> Result<i32, Error> {
let (_db_dir, mut conn) = super::open_conn(&args.db_dir, super::OpenMode::Create)?;
// Check if the database has already been initialized.

View File

@@ -53,7 +53,7 @@ pub struct Args {
username: String,
}
pub fn run(args: &Args) -> Result<i32, Error> {
pub fn run(args: Args) -> Result<i32, Error> {
let clocks = clock::RealClocks {};
let (_db_dir, conn) = super::open_conn(&args.db_dir, super::OpenMode::ReadWrite)?;
let db = std::sync::Arc::new(db::Database::new(clocks, conn, true).unwrap());

View File

@@ -8,12 +8,10 @@ use base::clock;
use db::{dir, writer};
use failure::{bail, Error, ResultExt};
use fnv::FnvHashMap;
use futures::future::FutureExt;
use hyper::service::{make_service_fn, service_fn};
use log::error;
use log::{info, warn};
use std::path::PathBuf;
use std::sync::atomic::{AtomicBool, Ordering};
use std::sync::Arc;
use std::thread;
use structopt::StructOpt;
@@ -171,16 +169,55 @@ struct Syncer {
join: thread::JoinHandle<()>,
}
pub fn run(args: &Args) -> Result<i32, Error> {
pub fn run(args: Args) -> Result<i32, Error> {
let mut builder = tokio::runtime::Builder::new_multi_thread();
builder.enable_all();
if let Some(worker_threads) = args.worker_threads {
builder.worker_threads(worker_threads);
}
builder.build().unwrap().block_on(async_run(args))
let rt = builder.build()?;
let r = rt.block_on(async_run(args));
// tokio normally waits for all spawned tasks to complete, but:
// * in the graceful shutdown path, we wait for specific tasks with logging.
// * in the immediate shutdown path, we don't want to wait.
rt.shutdown_background();
r
}
async fn async_run(args: &Args) -> Result<i32, Error> {
async fn async_run(args: Args) -> Result<i32, Error> {
let (shutdown_tx, shutdown_rx) = base::shutdown::channel();
let mut shutdown_tx = Some(shutdown_tx);
tokio::pin! {
let int = signal(SignalKind::interrupt())?;
let term = signal(SignalKind::terminate())?;
let inner = inner(args, shutdown_rx);
}
tokio::select! {
_ = int.recv() => {
info!("Received SIGINT; shutting down gracefully. \
Send another SIGINT or SIGTERM to shut down immediately.");
shutdown_tx.take();
},
_ = term.recv() => {
info!("Received SIGTERM; shutting down gracefully. \
Send another SIGINT or SIGTERM to shut down immediately.");
shutdown_tx.take();
},
result = &mut inner => return result,
}
tokio::select! {
_ = int.recv() => bail!("immediate shutdown due to second signal (SIGINT)"),
_ = term.recv() => bail!("immediate shutdown due to second singal (SIGTERM)"),
result = &mut inner => result,
}
}
async fn inner(args: Args, shutdown_rx: base::shutdown::Receiver) -> Result<i32, Error> {
let clocks = clock::RealClocks {};
let (_db_dir, conn) = super::open_conn(
&args.db_dir,
@@ -215,7 +252,6 @@ async fn async_run(args: &Args) -> Result<i32, Error> {
})?);
// Start a streamer for each stream.
let shutdown_streamers = Arc::new(AtomicBool::new(false));
let mut streamers = Vec::new();
let mut session_groups_by_camera: FnvHashMap<i32, Arc<retina::client::SessionGroup>> =
FnvHashMap::default();
@@ -230,7 +266,7 @@ async fn async_run(args: &Args) -> Result<i32, Error> {
db: &db,
opener: args.rtsp_library.opener(),
transport: args.rtsp_transport,
shutdown: &shutdown_streamers,
shutdown_rx: &shutdown_rx,
};
// Get the directories that need syncers.
@@ -248,7 +284,7 @@ async fn async_run(args: &Args) -> Result<i32, Error> {
drop(l);
let mut syncers = FnvHashMap::with_capacity_and_hasher(dirs.len(), Default::default());
for (id, dir) in dirs.drain() {
let (channel, join) = writer::start_syncer(db.clone(), id)?;
let (channel, join) = writer::start_syncer(db.clone(), shutdown_rx.clone(), id)?;
syncers.insert(id, Syncer { dir, channel, join });
}
@@ -319,34 +355,31 @@ async fn async_run(args: &Args) -> Result<i32, Error> {
.with_context(|_| format!("unable to bind --http-addr={}", &args.http_addr))?
.tcp_nodelay(true)
.serve(make_svc);
let mut int = signal(SignalKind::interrupt())?;
let mut term = signal(SignalKind::terminate())?;
let shutdown = futures::future::select(Box::pin(int.recv()), Box::pin(term.recv()));
let (shutdown_tx, shutdown_rx) = futures::channel::oneshot::channel();
let server = server.with_graceful_shutdown(shutdown_rx.map(|_| ()));
let server = server.with_graceful_shutdown(shutdown_rx.future());
let server_handle = tokio::spawn(server);
info!("Ready to serve HTTP requests");
shutdown.await;
shutdown_tx.send(()).unwrap();
let _ = shutdown_rx.as_future().await;
info!("Shutting down streamers.");
shutdown_streamers.store(true, Ordering::SeqCst);
for streamer in streamers.drain(..) {
streamer.join().unwrap();
}
if let Some(mut ss) = syncers {
// The syncers shut down when all channels to them have been dropped.
// The database maintains one; and `ss` holds one. Drop both.
db.lock().clear_on_flush();
for (_, s) in ss.drain() {
drop(s.channel);
s.join.join().unwrap();
info!("Shutting down streamers and syncers.");
tokio::task::spawn_blocking({
let db = db.clone();
move || {
for streamer in streamers.drain(..) {
streamer.join().unwrap();
}
if let Some(mut ss) = syncers {
// The syncers shut down when all channels to them have been dropped.
// The database maintains one; and `ss` holds one. Drop both.
db.lock().clear_on_flush();
for (_, s) in ss.drain() {
drop(s.channel);
s.join.join().unwrap();
}
}
}
}
})
.await?;
db.lock().clear_watches();

View File

@@ -37,7 +37,7 @@ pub struct Args {
arg: Vec<OsString>,
}
pub fn run(args: &Args) -> Result<i32, Error> {
pub fn run(args: Args) -> Result<i32, Error> {
let mode = if args.read_only {
OpenMode::ReadOnly
} else {

View File

@@ -17,7 +17,7 @@ pub struct Args {
timestamps: Vec<String>,
}
pub fn run(args: &Args) -> Result<i32, Error> {
pub fn run(args: Args) -> Result<i32, Error> {
for timestamp in &args.timestamps {
let t = db::recording::Time::parse(timestamp)?;
println!("{} == {}", t, t.0);

View File

@@ -40,7 +40,7 @@ pub struct Args {
no_vacuum: bool,
}
pub fn run(args: &Args) -> Result<i32, Error> {
pub fn run(args: Args) -> Result<i32, Error> {
let (_db_dir, mut conn) = super::open_conn(&args.db_dir, super::OpenMode::ReadWrite)?;
db::upgrade::run(

View File

@@ -59,16 +59,16 @@ enum Args {
}
impl Args {
fn run(&self) -> Result<i32, failure::Error> {
fn run(self) -> Result<i32, failure::Error> {
match self {
Args::Check(ref a) => cmds::check::run(a),
Args::Config(ref a) => cmds::config::run(a),
Args::Init(ref a) => cmds::init::run(a),
Args::Login(ref a) => cmds::login::run(a),
Args::Run(ref a) => cmds::run::run(a),
Args::Sql(ref a) => cmds::sql::run(a),
Args::Ts(ref a) => cmds::ts::run(a),
Args::Upgrade(ref a) => cmds::upgrade::run(a),
Args::Check(a) => cmds::check::run(a),
Args::Config(a) => cmds::config::run(a),
Args::Init(a) => cmds::init::run(a),
Args::Login(a) => cmds::login::run(a),
Args::Run(a) => cmds::run::run(a),
Args::Sql(a) => cmds::sql::run(a),
Args::Ts(a) => cmds::ts::run(a),
Args::Upgrade(a) => cmds::upgrade::run(a),
}
}
}

View File

@@ -2277,7 +2277,7 @@ mod tests {
}
}
fn copy_mp4_to_db(db: &TestDb<RealClocks>) {
fn copy_mp4_to_db(db: &mut TestDb<RealClocks>) {
let (extra_data, mut input) = stream::FFMPEG
.open(
"test".to_owned(),
@@ -2322,7 +2322,13 @@ mod tests {
};
frame_time += recording::Duration(i64::from(pkt.duration));
output
.write(pkt.data, frame_time, pkt.pts, pkt.is_key)
.write(
&mut db.shutdown_rx,
pkt.data,
frame_time,
pkt.pts,
pkt.is_key,
)
.unwrap();
end_pts = Some(pkt.pts + i64::from(pkt.duration));
}
@@ -2811,8 +2817,8 @@ mod tests {
#[tokio::test]
async fn test_round_trip() {
testutil::init();
let db = TestDb::new(RealClocks {});
copy_mp4_to_db(&db);
let mut db = TestDb::new(RealClocks {});
copy_mp4_to_db(&mut db);
let mp4 = create_mp4_from_db(&db, 0, 0, false);
traverse(mp4.clone()).await;
let new_filename = write_mp4(&mp4, db.tmpdir.path()).await;
@@ -2840,8 +2846,8 @@ mod tests {
#[tokio::test]
async fn test_round_trip_with_subtitles() {
testutil::init();
let db = TestDb::new(RealClocks {});
copy_mp4_to_db(&db);
let mut db = TestDb::new(RealClocks {});
copy_mp4_to_db(&mut db);
let mp4 = create_mp4_from_db(&db, 0, 0, true);
traverse(mp4.clone()).await;
let new_filename = write_mp4(&mp4, db.tmpdir.path()).await;
@@ -2869,8 +2875,8 @@ mod tests {
#[tokio::test]
async fn test_round_trip_with_edit_list() {
testutil::init();
let db = TestDb::new(RealClocks {});
copy_mp4_to_db(&db);
let mut db = TestDb::new(RealClocks {});
copy_mp4_to_db(&mut db);
let mp4 = create_mp4_from_db(&db, 1, 0, false);
traverse(mp4.clone()).await;
let new_filename = write_mp4(&mp4, db.tmpdir.path()).await;
@@ -2898,8 +2904,8 @@ mod tests {
#[tokio::test]
async fn test_round_trip_with_edit_list_and_subtitles() {
testutil::init();
let db = TestDb::new(RealClocks {});
copy_mp4_to_db(&db);
let mut db = TestDb::new(RealClocks {});
copy_mp4_to_db(&mut db);
let off = 2 * TIME_UNITS_PER_SEC;
let mp4 = create_mp4_from_db(&db, i32::try_from(off).unwrap(), 0, true);
traverse(mp4.clone()).await;
@@ -2928,8 +2934,8 @@ mod tests {
#[tokio::test]
async fn test_round_trip_with_shorten() {
testutil::init();
let db = TestDb::new(RealClocks {});
copy_mp4_to_db(&db);
let mut db = TestDb::new(RealClocks {});
copy_mp4_to_db(&mut db);
let mp4 = create_mp4_from_db(&db, 0, 1, false);
traverse(mp4.clone()).await;
let new_filename = write_mp4(&mp4, db.tmpdir.path()).await;

View File

@@ -8,7 +8,6 @@ use db::{dir, recording, writer, Camera, Database, Stream};
use failure::{bail, Error};
use log::{debug, info, trace, warn};
use std::result::Result;
use std::sync::atomic::{AtomicBool, Ordering};
use std::sync::Arc;
use url::Url;
@@ -22,7 +21,7 @@ where
pub opener: &'a dyn stream::Opener,
pub transport: retina::client::Transport,
pub db: &'tmp Arc<Database<C>>,
pub shutdown: &'tmp Arc<AtomicBool>,
pub shutdown_rx: &'tmp base::shutdown::Receiver,
}
/// Connects to a given RTSP stream and writes recordings to the database via [`writer::Writer`].
@@ -31,7 +30,7 @@ pub struct Streamer<'a, C>
where
C: Clocks + Clone,
{
shutdown: Arc<AtomicBool>,
shutdown_rx: base::shutdown::Receiver,
// State below is only used by the thread in Run.
rotate_offset_sec: i64,
@@ -69,7 +68,7 @@ where
bail!("RTSP URL shouldn't include credentials");
}
Ok(Streamer {
shutdown: env.shutdown.clone(),
shutdown_rx: env.shutdown_rx.clone(),
rotate_offset_sec,
rotate_interval_sec,
db: env.db.clone(),
@@ -94,7 +93,7 @@ where
/// Note that when using Retina as the RTSP library, this must be called
/// within a tokio runtime context; see [tokio::runtime::Handle].
pub fn run(&mut self) {
while !self.shutdown.load(Ordering::SeqCst) {
while self.shutdown_rx.check().is_ok() {
if let Err(e) = self.run_once() {
let sleep_time = time::Duration::seconds(1);
warn!(
@@ -124,7 +123,7 @@ where
d,
status.num_sessions
);
std::thread::sleep(d);
self.shutdown_rx.wait_for(d)?;
waited = true;
}
} else {
@@ -164,7 +163,7 @@ where
self.stream_id,
video_sample_entry_id,
);
while !self.shutdown.load(Ordering::SeqCst) {
while self.shutdown_rx.check().is_ok() {
let pkt = {
let _t = TimerGuard::new(&clocks, || "getting next packet");
stream.next()?
@@ -214,7 +213,13 @@ where
}
};
let _t = TimerGuard::new(&clocks, || format!("writing {} bytes", pkt.data.len()));
w.write(pkt.data, local_time, pkt.pts, pkt.is_key)?;
w.write(
&mut self.shutdown_rx,
pkt.data,
local_time,
pkt.pts,
pkt.is_key,
)?;
rotate = Some(r);
}
if rotate.is_some() {
@@ -236,7 +241,6 @@ mod tests {
use parking_lot::Mutex;
use std::cmp;
use std::convert::TryFrom;
use std::sync::atomic::{AtomicBool, Ordering};
use std::sync::Arc;
use time;
@@ -312,7 +316,7 @@ mod tests {
struct MockOpener {
expected_url: url::Url,
streams: Mutex<Vec<(h264::ExtraData, Box<dyn stream::Stream>)>>,
shutdown: Arc<AtomicBool>,
shutdown_tx: Mutex<Option<base::shutdown::Sender>>,
}
impl stream::Opener for MockOpener {
@@ -333,7 +337,7 @@ mod tests {
}
None => {
trace!("MockOpener shutting down");
self.shutdown.store(true, Ordering::SeqCst);
self.shutdown_tx.lock().take();
bail!("done")
}
}
@@ -380,16 +384,17 @@ mod tests {
stream.ts_offset = 123456; // starting pts of the input should be irrelevant
stream.ts_offset_pkts_left = u32::max_value();
stream.pkts_left = u32::max_value();
let (shutdown_tx, shutdown_rx) = base::shutdown::channel();
let opener = MockOpener {
expected_url: url::Url::parse("rtsp://test-camera/main").unwrap(),
streams: Mutex::new(vec![(extra_data, Box::new(stream))]),
shutdown: Arc::new(AtomicBool::new(false)),
shutdown_tx: Mutex::new(Some(shutdown_tx)),
};
let db = testutil::TestDb::new(clocks.clone());
let env = super::Environment {
opener: &opener,
db: &db.db,
shutdown: &opener.shutdown,
shutdown_rx: &shutdown_rx,
transport: retina::client::Transport::Tcp,
};
let mut stream;