From 9099d07dfa6dc6436f8af44972a1d5e43b3e57da Mon Sep 17 00:00:00 2001 From: Scott Lamb Date: Wed, 10 Mar 2021 08:12:49 -0800 Subject: [PATCH] improve panic messages and docs (#112) --- guide/troubleshooting.md | 23 +++++++++++++++++++---- server/src/main.rs | 31 +++++++++++++++++++++++++++++++ 2 files changed, 50 insertions(+), 4 deletions(-) diff --git a/guide/troubleshooting.md b/guide/troubleshooting.md index 8b2e949..f4a52df 100644 --- a/guide/troubleshooting.md +++ b/guide/troubleshooting.md @@ -7,7 +7,7 @@ need more help. * [Troubleshooting](#troubleshooting) * [Viewing Moonfire NVR's logs](#viewing-moonfire-nvrs-logs) * [Flushes](#flushes) - * [`thread '...' panicked` errors](#thread--panicked-errors) + * [Panic errors](#panic-errors) * [Slow operations](#slow-operations) * [Camera stream errors](#camera-stream-errors) * [Problems](#problems) @@ -76,6 +76,17 @@ PPPP = module path ... = message body ``` +Moonfire NVR names a few important thread types as follows: + +* `main`: during `moonfire-nvr run`, the main thread does initial setup then + just waits for the other threads. In other subcommands, it does everything. +* `s-CAMERA-TYPE`: there is one of these threads for every recorded stream + (up to two per camera, where `TYPE` is `main` or `sub`). These threads read + frames from the cameras via RTSP and write them to disk. +* `sync-PATH`: there is one of these threads for every sample file directory. + These threads call `fsync` to commit sample files to disk, delete old sample + files, and flush the database. + You can use the following command to teach [`lnav`](http://lnav.org/) Moonfire NVR's log format: @@ -149,17 +160,21 @@ This log message is packed with debugging information: of recordings, and the IDs of each recording. For GCed recordings, the sizes are omitted (as this information is not stored). -### `thread '...' panicked` errors +### Panic errors Errors like the one below indicate a serious bug in Moonfire NVR. Please file a bug if you see one. It's helpful to set the `RUST_BACKTRACE` environment variable to include more information. ``` -thread 's-peck_west-main' panicked at 'should always be an unindexed sample', /usr/local/src/moonfire-nvr/server/db/writer.rs:750:54 -note: run with `RUST_BACKTRACE=1` environment variable to display a backtrace +E20210304 11:09:29.230 main s-peck_west-main] panic at 'src/moonfire-nvr/server/db/writer.rs:750:54': should always be an unindexed sample + +(set environment variable RUST_BACKTRACE=1 to see backtraces)" ``` +In this case, a stream thread (one starting with `s-`) panicked. That stream +won't record again until Moonfire NVR is restarted. + ### Slow operations Warnings like the following indicate that some operation took more than 1 diff --git a/server/src/main.rs b/server/src/main.rs index cc81553..16ed058 100644 --- a/server/src/main.rs +++ b/server/src/main.rs @@ -5,6 +5,7 @@ #![cfg_attr(all(feature = "nightly", test), feature(test))] use log::{debug, error}; +use std::fmt::Write; use std::str::FromStr; use structopt::StructOpt; @@ -107,6 +108,34 @@ impl Args { } } +/// Custom panic hook that logs instead of directly writing to stderr. +/// +/// This means it includes a timestamp and is more recognizable as a serious +/// error (including console color coding by default, a format `lnav` will +/// recognize, etc.). +fn panic_hook(p: &std::panic::PanicInfo) { + let mut msg; + if let Some(l) = p.location() { + msg = format!("panic at '{}'", l); + } else { + msg = "panic".to_owned(); + } + if let Some(s) = p.payload().downcast_ref::<&str>() { + write!(&mut msg, ": {}", s).unwrap(); + } + let b = failure::Backtrace::new(); + if b.is_empty() { + write!( + &mut msg, + "\n\n(set environment variable RUST_BACKTRACE=1 to see backtraces)" + ) + .unwrap(); + } else { + write!(&mut msg, "\n\nBacktrace:\n{}", b).unwrap(); + } + error!("{}", msg); +} + fn main() { let args = Args::from_args(); let mut h = mylog::Builder::new() @@ -126,6 +155,8 @@ fn main() { .build(); h.clone().install().unwrap(); + std::panic::set_hook(Box::new(&panic_hook)); + let r = { let _a = h.async_scope(); args.run()