From d61b5e1bdd4c46d8326a5732a39732cde2f5a508 Mon Sep 17 00:00:00 2001 From: Scott Lamb Date: Thu, 4 Jul 2019 23:22:45 -0500 Subject: [PATCH] Use fixed-size directory meta files Add a new schema version 5; now 4 means the directory meta may or may not be upgraded. Fixes #65: now it's possible to open the directory even if it lies on a completely full disk. --- db/check.rs | 2 +- db/db.rs | 10 ++-- db/dir.rs | 76 ++++++++++++++++++++++------ db/proto/schema.proto | 20 ++++++++ db/schema.sql | 2 +- db/upgrade/mod.rs | 2 + db/upgrade/v3_to_v4.rs | 4 +- db/upgrade/v4_to_v5.rs | 110 +++++++++++++++++++++++++++++++++++++++++ design/schema.md | 29 ++++++++--- guide/schema.md | 11 ++++- src/stream.rs | 3 +- 11 files changed, 236 insertions(+), 33 deletions(-) create mode 100644 db/upgrade/v4_to_v5.rs diff --git a/db/check.rs b/db/check.rs index a3935e5..72c244a 100644 --- a/db/check.rs +++ b/db/check.rs @@ -183,7 +183,7 @@ fn read_dir(path: &str, opts: &Options) -> Result { let f = e.file_name(); let f = f.as_bytes(); match f { - b"meta" | b"meta-tmp" => continue, + b"meta" => continue, _ => {}, }; let id = match dir::parse_id(f) { diff --git a/db/db.rs b/db/db.rs index b4911b3..7ebea03 100644 --- a/db/db.rs +++ b/db/db.rs @@ -83,7 +83,7 @@ use time; use uuid::Uuid; /// Expected schema version. See `guide/schema.md` for more information. -pub const EXPECTED_VERSION: i32 = 4; +pub const EXPECTED_VERSION: i32 = 5; const GET_RECORDING_PLAYBACK_SQL: &'static str = r#" select @@ -2189,20 +2189,20 @@ mod tests { fn test_version_too_old() { testutil::init(); let c = setup_conn(); - c.execute_batch("delete from version; insert into version values (3, 0, '');").unwrap(); + c.execute_batch("delete from version; insert into version values (4, 0, '');").unwrap(); let e = Database::new(clock::RealClocks {}, c, false).err().unwrap(); assert!(e.to_string().starts_with( - "Database schema version 3 is too old (expected 4)"), "got: {:?}", e); + "Database schema version 4 is too old (expected 5)"), "got: {:?}", e); } #[test] fn test_version_too_new() { testutil::init(); let c = setup_conn(); - c.execute_batch("delete from version; insert into version values (5, 0, '');").unwrap(); + c.execute_batch("delete from version; insert into version values (6, 0, '');").unwrap(); let e = Database::new(clock::RealClocks {}, c, false).err().unwrap(); assert!(e.to_string().starts_with( - "Database schema version 5 is too new (expected 4)"), "got: {:?}", e); + "Database schema version 6 is too new (expected 5)"), "got: {:?}", e); } /// Basic test of running some queries on a fresh database. diff --git a/db/dir.rs b/db/dir.rs index 2280f7e..fb8fd96 100644 --- a/db/dir.rs +++ b/db/dir.rs @@ -32,6 +32,7 @@ //! //! This includes opening files for serving, rotating away old files, and saving new files. +use crate::coding; use crate::db::CompositeId; use cstr::*; use failure::{Error, Fail, bail, format_err}; @@ -47,6 +48,11 @@ use std::os::unix::ffi::OsStrExt; use std::os::unix::io::FromRawFd; use std::sync::Arc; +/// The fixed length of a directory's `meta` file. +/// +/// See DirMeta comments within proto/schema.proto for more explanation. +const FIXED_DIR_META_LEN: usize = 512; + /// A sample file directory. Typically one per physical disk drive. /// /// If the directory is used for writing, the `start_syncer` function should be called to start @@ -100,8 +106,8 @@ impl Fd { } /// Opens a sample file within this directory with the given flags and (if creating) mode. - unsafe fn openat(&self, p: *const c_char, flags: libc::c_int, mode: libc::c_int) - -> Result { + pub(crate) unsafe fn openat(&self, p: *const c_char, flags: libc::c_int, mode: libc::c_int) + -> Result { let fd = libc::openat(self.0, p, flags, mode); if fd < 0 { return Err(io::Error::last_os_error()) @@ -153,6 +159,13 @@ pub(crate) fn read_meta(dir: &Fd) -> Result { }; let mut data = Vec::new(); f.read_to_end(&mut data)?; + let (len, pos) = coding::decode_varint32(&data, 0) + .map_err(|_| format_err!("Unable to decode varint length in meta file"))?; + if data.len() != FIXED_DIR_META_LEN || len as usize + pos > FIXED_DIR_META_LEN { + bail!("Expected a {}-byte file with a varint length of a DirMeta message; got \ + a {}-byte file with length {}", FIXED_DIR_META_LEN, data.len(), len); + } + let data = &data[pos..pos+len as usize]; let mut s = protobuf::CodedInputStream::from_bytes(&data); meta.merge_from(&mut s).map_err(|e| e.context("Unable to parse metadata proto: {}"))?; Ok(meta) @@ -160,14 +173,28 @@ pub(crate) fn read_meta(dir: &Fd) -> Result { /// Write `dir`'s metadata, clobbering existing data. pub(crate) fn write_meta(dir: &Fd, meta: &schema::DirMeta) -> Result<(), Error> { - let tmp_path = cstr!("meta.tmp"); - let final_path = cstr!("meta"); - let mut f = unsafe { dir.openat(tmp_path.as_ptr(), - libc::O_CREAT | libc::O_TRUNC | libc::O_WRONLY, 0o600)? }; - meta.write_to_writer(&mut f)?; - f.sync_all()?; - unsafe { renameat(&dir, tmp_path.as_ptr(), &dir, final_path.as_ptr())? }; - dir.sync()?; + let mut data = meta.write_length_delimited_to_bytes().expect("proto3->vec is infallible"); + if data.len() > FIXED_DIR_META_LEN { + bail!("Length-delimited DirMeta message requires {} bytes, over limit of {}", + data.len(), FIXED_DIR_META_LEN); + } + data.resize(FIXED_DIR_META_LEN, 0); // pad to required length. + let path = cstr!("meta"); + let mut f = unsafe { dir.openat(path.as_ptr(), + libc::O_CREAT | libc::O_WRONLY, 0o600)? }; + let stat = f.metadata()?; + if stat.len() == 0 { + // Need to sync not only the data but also the file metadata and dirent. + f.write_all(&data)?; + f.sync_all()?; + dir.sync()?; + } else if stat.len() == FIXED_DIR_META_LEN as u64 { + // Just syncing the data will suffice; existing metadata and dirent are fine. + f.write_all(&data)?; + f.sync_data()?; + } else { + bail!("Existing meta file is {}-byte; expected {}", stat.len(), FIXED_DIR_META_LEN); + } Ok(()) } @@ -183,7 +210,10 @@ impl SampleFileDir { s.fd.lock(if read_write { libc::LOCK_EX } else { libc::LOCK_SH } | libc::LOCK_NB)?; let dir_meta = read_meta(&s.fd)?; if !SampleFileDir::consistent(db_meta, &dir_meta) { - bail!("metadata mismatch.\ndb: {:#?}\ndir: {:#?}", db_meta, &dir_meta); + let serialized = + db_meta.write_length_delimited_to_bytes().expect("proto3->vec is infallible"); + bail!("metadata mismatch.\ndb: {:#?}\ndir: {:#?}\nserialized db: {:#?}", + db_meta, &dir_meta, &serialized); } if db_meta.in_progress_open.is_some() { s.write_meta(db_meta)?; @@ -193,7 +223,7 @@ impl SampleFileDir { /// Returns true if the existing directory and database metadata are consistent; the directory /// is then openable. - fn consistent(db_meta: &schema::DirMeta, dir_meta: &schema::DirMeta) -> bool { + pub(crate) fn consistent(db_meta: &schema::DirMeta, dir_meta: &schema::DirMeta) -> bool { if dir_meta.db_uuid != db_meta.db_uuid { return false; } if dir_meta.dir_uuid != db_meta.dir_uuid { return false; } @@ -234,7 +264,7 @@ impl SampleFileDir { let e = e?; match e.file_name().as_bytes() { b"." | b".." => continue, - b"meta" | b"meta-tmp" => continue, // existing metadata is fine. + b"meta" => continue, // existing metadata is fine. _ => return Ok(false), } } @@ -291,7 +321,7 @@ impl SampleFileDir { } } -/// Parse a composite id filename. +/// Parses a composite id filename. /// /// These are exactly 16 bytes, lowercase hex. pub(crate) fn parse_id(id: &[u8]) -> Result { @@ -311,6 +341,9 @@ pub(crate) fn parse_id(id: &[u8]) -> Result { #[cfg(test)] mod tests { + use protobuf::prelude::MessageField; + use super::*; + #[test] fn parse_id() { use super::parse_id; @@ -321,4 +354,19 @@ mod tests { parse_id(b"0").unwrap_err(); parse_id(b"000000010000000x").unwrap_err(); } + + /// Ensures that a DirMeta with all fields filled fits within the maximum size. + #[test] + fn max_len_meta() { + let mut meta = schema::DirMeta::new(); + let fake_uuid = &[0u8; 16][..]; + meta.db_uuid.extend_from_slice(fake_uuid); + meta.dir_uuid.extend_from_slice(fake_uuid); + meta.last_complete_open.mut_message().id = u32::max_value(); + meta.last_complete_open.mut_message().id = u32::max_value(); + meta.in_progress_open.mut_message().uuid.extend_from_slice(fake_uuid); + meta.in_progress_open.mut_message().uuid.extend_from_slice(fake_uuid); + let data = meta.write_length_delimited_to_bytes().expect("proto3->vec is infallible"); + assert!(data.len() <= FIXED_DIR_META_LEN, "{} vs {}", data.len(), FIXED_DIR_META_LEN); + } } diff --git a/db/proto/schema.proto b/db/proto/schema.proto index 796ac53..d819433 100644 --- a/db/proto/schema.proto +++ b/db/proto/schema.proto @@ -34,6 +34,26 @@ syntax = "proto3"; // against the metadata stored within the database to detect inconsistencies // between the directory and database, such as those described in // design/schema.md. +// +// As of schema version 4, the overall file format is as follows: a +// varint-encoded length, followed by a serialized DirMeta message, followed +// by NUL bytes padding to a total length of 512 bytes. This message never +// exceeds that length. +// +// The goal of this format is to allow atomically rewriting a meta file +// in-place. I hope that on modern OSs and hardware, a single-sector +// rewrite is atomic, though POSIX frustratingly doesn't seem to guarantee +// this. There's some discussion of that here: +// . At worst, there's a short +// window during which the meta file can be corrupted. As the file's purpose +// is to check for inconsistencies, it can be reconstructed if you assume no +// inconsistency exists. +// +// Schema version 3 wrote a serialized DirMeta message with no length or +// padding, and renamed new meta files over the top of old. This scheme +// requires extra space while opening the directory. If the filesystem is +// completely full, it requires freeing space manually, an undocumented and +// error-prone administrator procedure. message DirMeta { // A uuid associated with the database, in binary form. dir_uuid is strictly // more powerful, but it improves diagnostics to know if the directory diff --git a/db/schema.sql b/db/schema.sql index 19546de..95d50bb 100644 --- a/db/schema.sql +++ b/db/schema.sql @@ -489,4 +489,4 @@ create table signal_change ( ); insert into version (id, unix_time, notes) - values (4, cast(strftime('%s', 'now') as int), 'db creation'); + values (5, cast(strftime('%s', 'now') as int), 'db creation'); diff --git a/db/upgrade/mod.rs b/db/upgrade/mod.rs index 7b2f3c5..322cc7f 100644 --- a/db/upgrade/mod.rs +++ b/db/upgrade/mod.rs @@ -41,6 +41,7 @@ mod v0_to_v1; mod v1_to_v2; mod v2_to_v3; mod v3_to_v4; +mod v4_to_v5; const UPGRADE_NOTES: &'static str = concat!("upgraded using moonfire-db ", env!("CARGO_PKG_VERSION")); @@ -66,6 +67,7 @@ pub fn run(args: &Args, conn: &mut rusqlite::Connection) -> Result<(), Error> { v1_to_v2::run, v2_to_v3::run, v3_to_v4::run, + v4_to_v5::run, ]; { diff --git a/db/upgrade/v3_to_v4.rs b/db/upgrade/v3_to_v4.rs index 1ee44eb..9bb32bf 100644 --- a/db/upgrade/v3_to_v4.rs +++ b/db/upgrade/v3_to_v4.rs @@ -1,5 +1,5 @@ -// This file is part of Moonfire NVR, a security camera digital video recorder. -// Copyright (C) 2018 Scott Lamb +// This file is part of Moonfire NVR, a security camera network video recorder. +// Copyright (C) 2019 Scott Lamb // // This program is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by diff --git a/db/upgrade/v4_to_v5.rs b/db/upgrade/v4_to_v5.rs new file mode 100644 index 0000000..3dca674 --- /dev/null +++ b/db/upgrade/v4_to_v5.rs @@ -0,0 +1,110 @@ +// This file is part of Moonfire NVR, a security camera network video recorder. +// Copyright (C) 2019 Scott Lamb +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// In addition, as a special exception, the copyright holders give +// permission to link the code of portions of this program with the +// OpenSSL library under certain conditions as described in each +// individual source file, and distribute linked combinations including +// the two. +// +// You must obey the GNU General Public License in all respects for all +// of the code used other than OpenSSL. If you modify file(s) with this +// exception, you may extend this exception to your version of the +// file(s), but you are not obligated to do so. If you do not wish to do +// so, delete this exception statement from your version. If you delete +// this exception statement from all source files in the program, then +// also delete it here. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +/// Upgrades a version 4 schema to a version 5 schema. +/// +/// This just handles the directory meta files. If they're already in the new format, great. +/// Otherwise, verify they are consistent with the database then upgrade them. + +use crate::db::FromSqlUuid; +use crate::{dir, schema}; +use cstr::*; +use failure::{Error, Fail, bail}; +use protobuf::{Message, prelude::MessageField}; +use rusqlite::params; +use std::io::{Read, Write}; + +const FIXED_DIR_META_LEN: usize = 512; + +pub fn run(_args: &super::Args, tx: &rusqlite::Transaction) -> Result<(), Error> { + let db_uuid: FromSqlUuid = + tx.query_row_and_then(r"select uuid from meta", params![], |row| row.get(0))?; + let mut stmt = tx.prepare(r#" + select + d.path, + d.uuid, + d.last_complete_open_id, + o.uuid + from + sample_file_dir d + left join open o on (d.last_complete_open_id = o.id); + "#)?; + let mut rows = stmt.query(params![])?; + while let Some(row) = rows.next()? { + let path = row.get_raw_checked(0)?.as_str()?; + let dir_uuid: FromSqlUuid = row.get(1)?; + let open_id: Option = row.get(2)?; + let open_uuid: Option = row.get(3)?; + let mut db_meta = schema::DirMeta::new(); + db_meta.db_uuid.extend_from_slice(&db_uuid.0.as_bytes()[..]); + db_meta.dir_uuid.extend_from_slice(&dir_uuid.0.as_bytes()[..]); + match (open_id, open_uuid) { + (Some(id), Some(uuid)) => { + let mut o = db_meta.last_complete_open.mut_message(); + o.id = id; + o.uuid.extend_from_slice(&uuid.0.as_bytes()[..]); + }, + (None, None) => {}, + _ => bail!("open table missing id"), + } + + let dir = dir::Fd::open(path, false)?; + dir.lock(libc::LOCK_EX)?; + let tmp_path = cstr!("meta.tmp"); + let path = cstr!("meta"); + let mut f = unsafe { dir.openat(path.as_ptr(), libc::O_RDONLY, 0) }?; + let mut data = Vec::new(); + f.read_to_end(&mut data)?; + if data.len() == FIXED_DIR_META_LEN { + continue; // already upgraded. + } + let mut s = protobuf::CodedInputStream::from_bytes(&data); + let mut dir_meta = schema::DirMeta::new(); + dir_meta.merge_from(&mut s) + .map_err(|e| e.context("Unable to parse metadata proto: {}"))?; + if !dir::SampleFileDir::consistent(&db_meta, &dir_meta) { + bail!("Inconsistent db_meta={:?} dir_meta={:?}", &db_meta, &dir_meta); + } + let mut f = unsafe { dir.openat(tmp_path.as_ptr(), + libc::O_CREAT | libc::O_TRUNC | libc::O_WRONLY, 0o600)? }; + let mut data = + dir_meta.write_length_delimited_to_bytes().expect("proto3->vec is infallible"); + if data.len() > FIXED_DIR_META_LEN { + bail!("Length-delimited DirMeta message requires {} bytes, over limit of {}", + data.len(), FIXED_DIR_META_LEN); + } + data.resize(FIXED_DIR_META_LEN, 0); // pad to required length. + f.write_all(&data)?; + f.sync_all()?; + unsafe { dir::renameat(&dir, tmp_path.as_ptr(), &dir, path.as_ptr())? }; + dir.sync()?; + } + Ok(()) +} diff --git a/design/schema.md b/design/schema.md index 1e41fe1..704747d 100644 --- a/design/schema.md +++ b/design/schema.md @@ -268,6 +268,24 @@ create table sample_file_dir ( ``` ```proto +// Metadata stored in sample file dirs as "/meta". This is checked +// against the metadata stored within the database to detect inconsistencies +// between the directory and database, such as those described in +// design/schema.md. +// +// As of schema version 4, the overall file format is as follows: a +// varint-encoded length, followed by a serialized DirMeta message, followed +// by NUL bytes padding to a total length of 512 bytes. This message never +// exceeds that length. +// +// The goal of this format is to allow atomically rewriting a meta file +// in-place. I hope that on modern OSs and hardware, a single-sector +// rewrite is atomic, though POSIX frustratingly doesn't seem to guarantee +// this. There's some discussion of that here: +// . At worst, there's a short +// window during which the meta file can be corrupted. As the file's purpose +// is to check for inconsistencies, it can be reconstructed if you assume no +// inconsistency exists. message DirMeta { // A uuid associated with the database, in binary form. dir_uuid is strictly // more powerful, but it improves diagnostics to know if the directory @@ -302,13 +320,12 @@ These are updated through procedures below: This is a sub-procedure used in several places below. -Precondition: the directory's lock is held with `LOCK_EX` (exclusive). +Precondition: the directory's lock is held with `LOCK_EX` (exclusive) and +there is an existing metadata file. - 1. Write a new `meta.tmp` (opened with `O_CREAT|O_TRUNC` to discard an - existing temporary file if any). - 2. `fsync` the `meta.tmp` file descriptor. - 3. `rename` `meta.tmp` to `meta`. - 4. `fsync` the directory. + 1. Open the metadata file. + 2. Rewrite the fixed-length data atomically. + 3. `fdatasync` the file. *Open the database as read-only* diff --git a/guide/schema.md b/guide/schema.md index c996c2c..b89d29f 100644 --- a/guide/schema.md +++ b/guide/schema.md @@ -223,11 +223,18 @@ Version 3 adds over version 1: * additional timestamp fields which may be useful in diagnosing/correcting time jumps/inconsistencies. -### Version 3 to version 4 +### Version 3 to version 4 to version 5 -This upgrade affects only the SQLite database. Version 4 adds over version 3: +This upgrade affects only the SQLite database. + +Version 4 represents a half-finished upgrade from version 3 to version 5; it +is never used. + +Version 5 adds over version 3: * permissions for users and sessions. Existing users will have only the `view_video` permission, matching their previous behavior. * the `signals` schema, used to store status of signals such as camera motion detection, security system zones, etc. +* the ability to recover from a completely full sample file directory (#65) + without manual intervention. diff --git a/src/stream.rs b/src/stream.rs index e955229..e8a3440 100644 --- a/src/stream.rs +++ b/src/stream.rs @@ -34,8 +34,7 @@ use failure::{Error, bail}; use ffmpeg; use lazy_static::lazy_static; use log::{debug, info, warn}; -use std::os::raw::c_char; -use std::ffi::{CStr, CString}; +use std::ffi::CString; use std::result::Result; use std::sync;