shutdown better

After a frustrating search for a suitable channel to use for shutdown
(tokio::sync::watch::Receiver and
futures::future::Shared<tokio::sync::oneshot::Receiver> didn't look
quite right) in which I rethought my life decisions, I finally just made
my own (server/base/shutdown.rs). We can easily poll it or wait for it
in async or sync contexts. Most importantly, it's convenient; not that
it really matters here, but it's also efficient.

We now do a slightly better job of propagating a "graceful" shutdown
signal, and this channel will give us tools to improve it over time.

* Shut down even when writer or syncer operations are stuck. Fixes #117
* Not done yet: streamers should instantly shut down without waiting for
  a connection attempt or frame or something. I'll probably
  implement that when removing --rtsp-library=ffmpeg. The code should be
  cleaner then.
* Not done yet: fix a couple places that sleep for up to a second when
  they could shut down immediately. I just need to do the plumbing for
  mock clocks to work.

I also implemented an immediate shutdown mode, activated by a second
signal. I think this will mitigate the streamer wait situation.
This commit is contained in:
Scott Lamb
2021-09-23 15:55:53 -07:00
parent 66f76079c0
commit b41a6c43da
21 changed files with 487 additions and 151 deletions

View File

@@ -46,7 +46,7 @@ pub struct Args {
trash_corrupt_rows: bool,
}
pub fn run(args: &Args) -> Result<i32, Error> {
pub fn run(args: Args) -> Result<i32, Error> {
let (_db_dir, mut conn) = super::open_conn(&args.db_dir, super::OpenMode::ReadWrite)?;
check::run(
&mut conn,

View File

@@ -31,7 +31,7 @@ pub struct Args {
db_dir: PathBuf,
}
pub fn run(args: &Args) -> Result<i32, Error> {
pub fn run(args: Args) -> Result<i32, Error> {
let (_db_dir, conn) = super::open_conn(&args.db_dir, super::OpenMode::ReadWrite)?;
let clocks = clock::RealClocks {};
let db = Arc::new(db::Database::new(clocks, conn, true)?);

View File

@@ -19,7 +19,7 @@ pub struct Args {
db_dir: PathBuf,
}
pub fn run(args: &Args) -> Result<i32, Error> {
pub fn run(args: Args) -> Result<i32, Error> {
let (_db_dir, mut conn) = super::open_conn(&args.db_dir, super::OpenMode::Create)?;
// Check if the database has already been initialized.

View File

@@ -53,7 +53,7 @@ pub struct Args {
username: String,
}
pub fn run(args: &Args) -> Result<i32, Error> {
pub fn run(args: Args) -> Result<i32, Error> {
let clocks = clock::RealClocks {};
let (_db_dir, conn) = super::open_conn(&args.db_dir, super::OpenMode::ReadWrite)?;
let db = std::sync::Arc::new(db::Database::new(clocks, conn, true).unwrap());

View File

@@ -8,12 +8,10 @@ use base::clock;
use db::{dir, writer};
use failure::{bail, Error, ResultExt};
use fnv::FnvHashMap;
use futures::future::FutureExt;
use hyper::service::{make_service_fn, service_fn};
use log::error;
use log::{info, warn};
use std::path::PathBuf;
use std::sync::atomic::{AtomicBool, Ordering};
use std::sync::Arc;
use std::thread;
use structopt::StructOpt;
@@ -171,16 +169,55 @@ struct Syncer {
join: thread::JoinHandle<()>,
}
pub fn run(args: &Args) -> Result<i32, Error> {
pub fn run(args: Args) -> Result<i32, Error> {
let mut builder = tokio::runtime::Builder::new_multi_thread();
builder.enable_all();
if let Some(worker_threads) = args.worker_threads {
builder.worker_threads(worker_threads);
}
builder.build().unwrap().block_on(async_run(args))
let rt = builder.build()?;
let r = rt.block_on(async_run(args));
// tokio normally waits for all spawned tasks to complete, but:
// * in the graceful shutdown path, we wait for specific tasks with logging.
// * in the immediate shutdown path, we don't want to wait.
rt.shutdown_background();
r
}
async fn async_run(args: &Args) -> Result<i32, Error> {
async fn async_run(args: Args) -> Result<i32, Error> {
let (shutdown_tx, shutdown_rx) = base::shutdown::channel();
let mut shutdown_tx = Some(shutdown_tx);
tokio::pin! {
let int = signal(SignalKind::interrupt())?;
let term = signal(SignalKind::terminate())?;
let inner = inner(args, shutdown_rx);
}
tokio::select! {
_ = int.recv() => {
info!("Received SIGINT; shutting down gracefully. \
Send another SIGINT or SIGTERM to shut down immediately.");
shutdown_tx.take();
},
_ = term.recv() => {
info!("Received SIGTERM; shutting down gracefully. \
Send another SIGINT or SIGTERM to shut down immediately.");
shutdown_tx.take();
},
result = &mut inner => return result,
}
tokio::select! {
_ = int.recv() => bail!("immediate shutdown due to second signal (SIGINT)"),
_ = term.recv() => bail!("immediate shutdown due to second singal (SIGTERM)"),
result = &mut inner => result,
}
}
async fn inner(args: Args, shutdown_rx: base::shutdown::Receiver) -> Result<i32, Error> {
let clocks = clock::RealClocks {};
let (_db_dir, conn) = super::open_conn(
&args.db_dir,
@@ -215,7 +252,6 @@ async fn async_run(args: &Args) -> Result<i32, Error> {
})?);
// Start a streamer for each stream.
let shutdown_streamers = Arc::new(AtomicBool::new(false));
let mut streamers = Vec::new();
let mut session_groups_by_camera: FnvHashMap<i32, Arc<retina::client::SessionGroup>> =
FnvHashMap::default();
@@ -230,7 +266,7 @@ async fn async_run(args: &Args) -> Result<i32, Error> {
db: &db,
opener: args.rtsp_library.opener(),
transport: args.rtsp_transport,
shutdown: &shutdown_streamers,
shutdown_rx: &shutdown_rx,
};
// Get the directories that need syncers.
@@ -248,7 +284,7 @@ async fn async_run(args: &Args) -> Result<i32, Error> {
drop(l);
let mut syncers = FnvHashMap::with_capacity_and_hasher(dirs.len(), Default::default());
for (id, dir) in dirs.drain() {
let (channel, join) = writer::start_syncer(db.clone(), id)?;
let (channel, join) = writer::start_syncer(db.clone(), shutdown_rx.clone(), id)?;
syncers.insert(id, Syncer { dir, channel, join });
}
@@ -319,34 +355,31 @@ async fn async_run(args: &Args) -> Result<i32, Error> {
.with_context(|_| format!("unable to bind --http-addr={}", &args.http_addr))?
.tcp_nodelay(true)
.serve(make_svc);
let mut int = signal(SignalKind::interrupt())?;
let mut term = signal(SignalKind::terminate())?;
let shutdown = futures::future::select(Box::pin(int.recv()), Box::pin(term.recv()));
let (shutdown_tx, shutdown_rx) = futures::channel::oneshot::channel();
let server = server.with_graceful_shutdown(shutdown_rx.map(|_| ()));
let server = server.with_graceful_shutdown(shutdown_rx.future());
let server_handle = tokio::spawn(server);
info!("Ready to serve HTTP requests");
shutdown.await;
shutdown_tx.send(()).unwrap();
let _ = shutdown_rx.as_future().await;
info!("Shutting down streamers.");
shutdown_streamers.store(true, Ordering::SeqCst);
for streamer in streamers.drain(..) {
streamer.join().unwrap();
}
if let Some(mut ss) = syncers {
// The syncers shut down when all channels to them have been dropped.
// The database maintains one; and `ss` holds one. Drop both.
db.lock().clear_on_flush();
for (_, s) in ss.drain() {
drop(s.channel);
s.join.join().unwrap();
info!("Shutting down streamers and syncers.");
tokio::task::spawn_blocking({
let db = db.clone();
move || {
for streamer in streamers.drain(..) {
streamer.join().unwrap();
}
if let Some(mut ss) = syncers {
// The syncers shut down when all channels to them have been dropped.
// The database maintains one; and `ss` holds one. Drop both.
db.lock().clear_on_flush();
for (_, s) in ss.drain() {
drop(s.channel);
s.join.join().unwrap();
}
}
}
}
})
.await?;
db.lock().clear_watches();

View File

@@ -37,7 +37,7 @@ pub struct Args {
arg: Vec<OsString>,
}
pub fn run(args: &Args) -> Result<i32, Error> {
pub fn run(args: Args) -> Result<i32, Error> {
let mode = if args.read_only {
OpenMode::ReadOnly
} else {

View File

@@ -17,7 +17,7 @@ pub struct Args {
timestamps: Vec<String>,
}
pub fn run(args: &Args) -> Result<i32, Error> {
pub fn run(args: Args) -> Result<i32, Error> {
for timestamp in &args.timestamps {
let t = db::recording::Time::parse(timestamp)?;
println!("{} == {}", t, t.0);

View File

@@ -40,7 +40,7 @@ pub struct Args {
no_vacuum: bool,
}
pub fn run(args: &Args) -> Result<i32, Error> {
pub fn run(args: Args) -> Result<i32, Error> {
let (_db_dir, mut conn) = super::open_conn(&args.db_dir, super::OpenMode::ReadWrite)?;
db::upgrade::run(