overhaul error messages

Inspired by the poor error message here:
https://github.com/scottlamb/moonfire-nvr/issues/107#issuecomment-777587727

*   print the friendlier Display version of the error rather than Debug.
    Eg, "EROFS: Read-only filesystem" rather than "Sys(EROFS)". Do this
    everywhere: on command exit, on syncer retries, and on stream
    retries.
*   print the most immediate problem and additional lines for each
    cause.
*   print the backtrace or an advertisement for RUST_BACKTRACE=1 if it's
    unavailable.
*   also mention RUST_BACKTRACE=1 in the troubleshooting guide.
*   add context in various places, including pathnames. There are surely
    many places more it'd be helpful, but this is a start.
*   allow subcommands to return failure without an Error.
    In particular, "moonfire-nvr check" does its own error printing
    because it wants to print all the errors it finds. Printing "see
    earlier errors" with a meaningless stack trace seems like it'd just
    confuse. But I also want to get rid of the misleading "Success" at
    the end and 0 return to the OS.
This commit is contained in:
Scott Lamb 2021-02-11 10:45:56 -08:00
parent ff1615a0b4
commit 9a5957d5ef
19 changed files with 101 additions and 55 deletions

View File

@ -15,7 +15,7 @@ While Moonfire NVR is running, logs will be written to stderr.
likely want to set `MOONFIRE_FORMAT=google-systemd` to format logs as
expected by systemd.
Logging options are controlled by environmental variables:
Logging options are controlled by environment variables:
* `MOONFIRE_LOG` controls the log level. Its format is similar to the
`RUST_LOG` variable used by the
@ -27,6 +27,7 @@ Logging options are controlled by environmental variables:
accepted are `google` (the default, like the Google
[glog](https://github.com/google/glog) package) and `google-systemd` (a
variation for better systemd compatibility).
* Errors include a backtrace if `RUST_BACKTRACE=1` is set.
If you use Docker, set these via Docker's `--env` argument.

View File

@ -64,7 +64,7 @@ where C: Clocks, E: Into<Error> {
Err(e) => e.into(),
};
let sleep_time = Duration::seconds(1);
warn!("sleeping for {:?} after error: {:?}", sleep_time, e);
warn!("sleeping for {:?} after error: {}", sleep_time, crate::error::prettify_failure(&e));
clocks.sleep(sleep_time);
}
}

View File

@ -29,7 +29,22 @@
// along with this program. If not, see <http://www.gnu.org/licenses/>.
use failure::{Backtrace, Context, Fail};
use std::fmt;
use std::fmt::{self, Write};
/// Returns a pretty-and-informative version of `e`.
pub fn prettify_failure(e: &failure::Error) -> String {
let mut msg = e.to_string();
for cause in e.iter_causes() {
write!(&mut msg, "caused by: {}", cause).unwrap();
}
if e.backtrace().is_empty() {
write!(&mut msg, "\n(set environment variable RUST_BACKTRACE=1 to see backtraces)")
.unwrap();
} else {
write!(&mut msg, "\nBacktrace:\n{}", e.backtrace()).unwrap();
}
msg
}
#[derive(Debug)]
pub struct Error {

View File

@ -33,4 +33,4 @@ pub mod time;
mod error;
pub mod strutil;
pub use crate::error::{Error, ErrorKind, ResultExt};
pub use crate::error::{Error, ErrorKind, ResultExt, prettify_failure};

View File

@ -47,13 +47,16 @@ pub struct Options {
pub compare_lens: bool,
}
pub fn run(conn: &rusqlite::Connection, opts: &Options) -> Result<(), Error> {
pub fn run(conn: &rusqlite::Connection, opts: &Options) -> Result<i32, Error> {
let mut printed_error = false;
// Compare schemas.
{
let mut expected = rusqlite::Connection::open_in_memory()?;
db::init(&mut expected)?;
if let Some(diffs) = compare::get_diffs("actual", conn, "expected", &expected)? {
println!("{}", &diffs);
error!("Schema is not as expected:\n{}", &diffs);
printed_error = true;
} else {
println!("Schema is as expected.");
}
@ -88,7 +91,8 @@ pub fn run(conn: &rusqlite::Connection, opts: &Options) -> Result<(), Error> {
}
// Open the directory (checking its metadata) and hold it open (for the lock).
let dir = dir::SampleFileDir::open(&dir_path, &meta)?;
let dir = dir::SampleFileDir::open(&dir_path, &meta)
.map_err(|e| e.context(format!("unable to open dir {}", dir_path)))?;
let mut streams = read_dir(&dir, opts)?;
let mut rows = garbage_stmt.query(params![dir_id])?;
while let Some(row) = rows.next()? {
@ -113,7 +117,7 @@ pub fn run(conn: &rusqlite::Connection, opts: &Options) -> Result<(), Error> {
None => Stream::default(),
Some(d) => d.remove(&stream_id).unwrap_or_else(Stream::default),
};
compare_stream(conn, stream_id, opts, stream)?;
printed_error |= compare_stream(conn, stream_id, opts, stream)?;
}
}
@ -125,12 +129,13 @@ pub fn run(conn: &rusqlite::Connection, opts: &Options) -> Result<(), Error> {
if r.recording_row.is_some() || r.playback_row.is_some() ||
r.integrity_row || !r.garbage_row {
error!("dir {} recording {} for unknown stream: {:#?}", dir_id, id, r);
printed_error = true;
}
}
}
}
Ok(())
Ok(if printed_error { 1 } else { 0 })
}
#[derive(Debug, Eq, PartialEq)]
@ -217,9 +222,10 @@ fn read_dir(d: &dir::SampleFileDir, opts: &Options) -> Result<Dir, Error> {
/// Looks through a known stream for errors.
fn compare_stream(conn: &rusqlite::Connection, stream_id: i32, opts: &Options,
mut stream: Stream) -> Result<(), Error> {
mut stream: Stream) -> Result<bool, Error> {
let start = CompositeId::new(stream_id, 0);
let end = CompositeId::new(stream_id, i32::max_value());
let mut printed_error = false;
// recording row.
{
@ -271,6 +277,7 @@ fn compare_stream(conn: &rusqlite::Connection, stream_id: i32, opts: &Options,
Ok(s) => s,
Err(e) => {
error!("id {} has bad video_index: {}", id, e);
printed_error = true;
continue;
},
};
@ -307,6 +314,7 @@ fn compare_stream(conn: &rusqlite::Connection, stream_id: i32, opts: &Options,
if !recording.garbage_row || recording.playback_row.is_some() ||
recording.integrity_row {
error!("Missing recording row for {}: {:#?}", id, recording);
printed_error = true;
}
continue;
},
@ -315,17 +323,25 @@ fn compare_stream(conn: &rusqlite::Connection, stream_id: i32, opts: &Options,
Some(ref p) => {
if r != p {
error!("Recording {} summary doesn't match video_index: {:#?}", id, recording);
printed_error = true;
}
},
None => error!("Recording {} missing playback row: {:#?}", id, recording),
None => {
error!("Recording {} missing playback row: {:#?}", id, recording);
printed_error = true;
},
}
match recording.file {
Some(len) => if opts.compare_lens && r.bytes != len {
error!("Recording {} length mismatch: {:#?}", id, recording);
printed_error = true;
},
None => {
error!("Recording {} missing file: {:#?}", id, recording);
printed_error = true;
},
None => error!("Recording {} missing file: {:#?}", id, recording),
}
}
Ok(())
Ok(printed_error)
}

View File

@ -1213,7 +1213,8 @@ impl LockedDatabase {
open.id = o.id;
open.uuid.extend_from_slice(&o.uuid.as_bytes()[..]);
}
let d = dir::SampleFileDir::open(&dir.path, &meta)?;
let d = dir::SampleFileDir::open(&dir.path, &meta)
.map_err(|e| e.context(format!("Failed to open dir {}", dir.path)))?;
if self.open.is_none() { // read-only mode; it's already fully opened.
dir.dir = Some(d);
} else { // read-write mode; there are more steps to do.

View File

@ -151,7 +151,7 @@ pub(crate) fn read_meta(dir: &Fd) -> Result<schema::DirMeta, Error> {
}
let data = &data[pos..pos+len as usize];
let mut s = protobuf::CodedInputStream::from_bytes(&data);
meta.merge_from(&mut s).map_err(|e| e.context("Unable to parse metadata proto: {}"))?;
meta.merge_from(&mut s).map_err(|e| e.context("Unable to parse metadata proto"))?;
Ok(meta)
}
@ -164,17 +164,18 @@ pub(crate) fn write_meta(dirfd: RawFd, meta: &schema::DirMeta) -> Result<(), Err
}
data.resize(FIXED_DIR_META_LEN, 0); // pad to required length.
let mut f = crate::fs::openat(dirfd, cstr!("meta"), OFlag::O_CREAT | OFlag::O_WRONLY,
Mode::S_IRUSR | Mode::S_IWUSR)?;
let stat = f.metadata()?;
Mode::S_IRUSR | Mode::S_IWUSR)
.map_err(|e| e.context("Unable to open meta file"))?;
let stat = f.metadata().map_err(|e| e.context("Unable to stat meta file"))?;
if stat.len() == 0 {
// Need to sync not only the data but also the file metadata and dirent.
f.write_all(&data)?;
f.sync_all()?;
nix::unistd::fsync(dirfd)?;
f.write_all(&data).map_err(|e| e.context("Unable to write to meta file"))?;
f.sync_all().map_err(|e| e.context("Unable to sync meta file"))?;
nix::unistd::fsync(dirfd).map_err(|e| e.context("Unable to sync dir"))?;
} else if stat.len() == FIXED_DIR_META_LEN as u64 {
// Just syncing the data will suffice; existing metadata and dirent are fine.
f.write_all(&data)?;
f.sync_data()?;
f.write_all(&data).map_err(|e| e.context("Unable to write to meta file"))?;
f.sync_data().map_err(|e| e.context("Unable to sync meta file"))?;
} else {
bail!("Existing meta file is {}-byte; expected {}", stat.len(), FIXED_DIR_META_LEN);
}
@ -194,8 +195,8 @@ impl SampleFileDir {
FlockArg::LockExclusiveNonblock
} else {
FlockArg::LockSharedNonblock
})?;
let dir_meta = read_meta(&s.fd)?;
}).map_err(|e| e.context(format!("unable to lock dir {}", path)))?;
let dir_meta = read_meta(&s.fd).map_err(|e| e.context("unable to read meta file"))?;
if !SampleFileDir::consistent(db_meta, &dir_meta) {
let serialized =
db_meta.write_length_delimited_to_bytes().expect("proto3->vec is infallible");
@ -230,7 +231,8 @@ impl SampleFileDir {
pub(crate) fn create(path: &str, db_meta: &schema::DirMeta)
-> Result<Arc<SampleFileDir>, Error> {
let s = SampleFileDir::open_self(path, true)?;
s.fd.lock(FlockArg::LockExclusiveNonblock)?;
s.fd.lock(FlockArg::LockExclusiveNonblock)
.map_err(|e| e.context(format!("unable to lock dir {}", path)))?;
let old_meta = read_meta(&s.fd)?;
// Verify metadata. We only care that it hasn't been completely opened.
@ -265,8 +267,7 @@ impl SampleFileDir {
}
fn open_self(path: &str, create: bool) -> Result<Arc<SampleFileDir>, Error> {
let fd = Fd::open(path, create)
.map_err(|e| format_err!("unable to open sample file dir {}: {}", path, e))?;
let fd = Fd::open(path, create)?;
Ok(Arc::new(SampleFileDir {
fd,
}))

View File

@ -146,7 +146,8 @@ pub fn run(_args: &super::Args, tx: &rusqlite::Transaction) -> Result<(), Error>
}
let dir = dir::Fd::open(path, false)?;
dir.lock(FlockArg::LockExclusiveNonblock)?;
dir.lock(FlockArg::LockExclusiveNonblock)
.map_err(|e| e.context(format!("unable to lock dir {}", path)))?;
let mut need_sync = maybe_upgrade_meta(&dir, &db_meta)?;
if maybe_cleanup_garbage_uuids(&dir)? {

View File

@ -47,7 +47,7 @@ pub struct Args {
compare_lens: bool,
}
pub fn run(args: &Args) -> Result<(), Error> {
pub fn run(args: &Args) -> Result<i32, Error> {
// TODO: ReadOnly should be sufficient but seems to fail.
let (_db_dir, conn) = super::open_conn(&args.db_dir, super::OpenMode::ReadWrite)?;
check::run(&conn, &check::Options {

View File

@ -54,7 +54,7 @@ pub struct Args {
db_dir: PathBuf,
}
pub fn run(args: &Args) -> Result<(), Error> {
pub fn run(args: &Args) -> Result<i32, Error> {
let (_db_dir, conn) = super::open_conn(&args.db_dir, super::OpenMode::ReadWrite)?;
let clocks = clock::RealClocks {};
let db = Arc::new(db::Database::new(clocks, conn, true)?);
@ -77,5 +77,5 @@ pub fn run(args: &Args) -> Result<(), Error> {
siv.run();
Ok(())
Ok(0)
}

View File

@ -41,14 +41,14 @@ pub struct Args {
db_dir: PathBuf,
}
pub fn run(args: &Args) -> Result<(), Error> {
pub fn run(args: &Args) -> Result<i32, Error> {
let (_db_dir, mut conn) = super::open_conn(&args.db_dir, super::OpenMode::Create)?;
// Check if the database has already been initialized.
let cur_ver = db::get_schema_version(&conn)?;
if let Some(v) = cur_ver {
info!("Database is already initialized with schema version {}.", v);
return Ok(());
return Ok(0);
}
// Use WAL mode (which is the most efficient way to preserve database integrity) with a large
@ -63,5 +63,5 @@ pub fn run(args: &Args) -> Result<(), Error> {
"#)?;
db::init(&mut conn)?;
info!("Database initialized.");
Ok(())
Ok(0)
}

View File

@ -71,7 +71,7 @@ pub struct Args {
username: String,
}
pub fn run(args: &Args) -> Result<(), Error> {
pub fn run(args: &Args) -> Result<i32, Error> {
let clocks = clock::RealClocks {};
let (_db_dir, conn) = super::open_conn(&args.db_dir, super::OpenMode::ReadWrite)?;
let db = std::sync::Arc::new(db::Database::new(clocks.clone(), conn, true).unwrap());
@ -116,7 +116,7 @@ pub fn run(args: &Args) -> Result<(), Error> {
} else {
println!("s={}", encoded);
}
Ok(())
Ok(0)
}
fn curl_cookie(cookie: &str, flags: i32, domain: &str) -> String {

View File

@ -59,12 +59,12 @@ fn open_dir(db_dir: &Path, mode: OpenMode) -> Result<dir::Fd, Error> {
format!("db dir {} not found; try running moonfire-nvr init",
db_dir.display())
} else {
format!("unable to open db dir {}: {}", db_dir.display(), &e)
format!("unable to open db dir {}", db_dir.display())
}))?;
let ro = mode == OpenMode::ReadOnly;
dir.lock(if ro { FlockArg::LockSharedNonblock } else { FlockArg::LockExclusiveNonblock })
.map_err(|e| e.context(format!("db dir {} already in use; can't get {} lock",
db_dir.display(), if ro { "shared" } else { "exclusive" })))?;
.map_err(|e| e.context(format!("unable to get {} lock on db dir {} ",
if ro { "shared" } else { "exclusive" }, db_dir.display())))?;
Ok(dir)
}

View File

@ -175,7 +175,7 @@ struct Syncer {
}
#[tokio::main]
pub async fn run(args: &Args) -> Result<(), Error> {
pub async fn run(args: &Args) -> Result<i32, Error> {
let clocks = clock::RealClocks {};
let (_db_dir, conn) = super::open_conn(
&args.db_dir,
@ -323,5 +323,5 @@ pub async fn run(args: &Args) -> Result<(), Error> {
info!("Waiting for HTTP requests to finish.");
server_handle.await??;
info!("Exiting.");
Ok(())
Ok(0)
}

View File

@ -32,6 +32,7 @@
use failure::Error;
use std::ffi::OsString;
use std::os::unix::process::CommandExt;
use std::path::PathBuf;
use std::process::Command;
use super::OpenMode;
@ -58,7 +59,7 @@ pub struct Args {
arg: Vec<OsString>,
}
pub fn run(args: &Args) -> Result<(), Error> {
pub fn run(args: &Args) -> Result<i32, Error> {
let mode = if args.read_only { OpenMode::ReadOnly } else { OpenMode::ReadWrite };
let _db_dir = super::open_dir(&args.db_dir, mode)?;
let mut db = OsString::new();
@ -68,6 +69,5 @@ pub fn run(args: &Args) -> Result<(), Error> {
if args.read_only {
db.push("?mode=ro");
}
Command::new("sqlite3").arg(&db).args(&args.arg).status()?;
Ok(())
Err(Command::new("sqlite3").arg(&db).args(&args.arg).exec().into())
}

View File

@ -43,10 +43,10 @@ pub struct Args {
timestamps: Vec<String>,
}
pub fn run(args: &Args) -> Result<(), Error> {
pub fn run(args: &Args) -> Result<i32, Error> {
for timestamp in &args.timestamps {
let t = db::recording::Time::parse(timestamp)?;
println!("{} == {}", t, t.0);
}
Ok(())
Ok(0)
}

View File

@ -59,12 +59,13 @@ pub struct Args {
no_vacuum: bool,
}
pub fn run(args: &Args) -> Result<(), Error> {
pub fn run(args: &Args) -> Result<i32, Error> {
let (_db_dir, mut conn) = super::open_conn(&args.db_dir, super::OpenMode::ReadWrite)?;
db::upgrade::run(&db::upgrade::Args {
sample_file_dir: args.sample_file_dir.as_ref().map(std::path::PathBuf::as_path),
preset_journal: &args.preset_journal,
no_vacuum: args.no_vacuum,
}, &mut conn)
}, &mut conn)?;
Ok(0)
}

View File

@ -30,7 +30,7 @@
#![cfg_attr(all(feature="nightly", test), feature(test))]
use log::{error, info};
use log::{debug, error};
use std::str::FromStr;
use structopt::StructOpt;
@ -111,7 +111,7 @@ enum Args {
}
impl Args {
fn run(&self) -> Result<(), failure::Error> {
fn run(&self) -> Result<i32, failure::Error> {
match self {
Args::Check(ref a) => cmds::check::run(a),
Args::Config(ref a) => cmds::config::run(a),
@ -136,9 +136,18 @@ fn main() {
.build();
h.clone().install().unwrap();
if let Err(e) = { let _a = h.async_scope(); args.run() } {
error!("{:?}", e);
let r = {
let _a = h.async_scope();
args.run()
};
match r {
Err(e) => {
error!("Exiting due to error: {}", base::prettify_failure(&e));
::std::process::exit(1);
},
Ok(rv) => {
debug!("Exiting with status {}", rv);
std::process::exit(rv)
},
}
info!("Success.");
}

View File

@ -103,7 +103,8 @@ impl<'a, C, S> Streamer<'a, C, S> where C: 'a + Clocks + Clone, S: 'a + stream::
while !self.shutdown.load(Ordering::SeqCst) {
if let Err(e) = self.run_once() {
let sleep_time = time::Duration::seconds(1);
warn!("{}: sleeping for {:?} after error: {:?}", self.short_name, sleep_time, e);
warn!("{}: sleeping for {:?} after error: {}",
self.short_name, sleep_time, base::prettify_failure(&e));
self.db.clocks().sleep(sleep_time);
}
}