Use fixed-size directory meta files

Add a new schema version 5; now 4 means the directory meta may or may
not be upgraded.

Fixes #65: now it's possible to open the directory even if it lies on a
completely full disk.
This commit is contained in:
Scott Lamb 2019-07-04 23:22:45 -05:00
parent 13b192949d
commit d61b5e1bdd
11 changed files with 236 additions and 33 deletions

View File

@ -183,7 +183,7 @@ fn read_dir(path: &str, opts: &Options) -> Result<Dir, Error> {
let f = e.file_name();
let f = f.as_bytes();
match f {
b"meta" | b"meta-tmp" => continue,
b"meta" => continue,
_ => {},
};
let id = match dir::parse_id(f) {

View File

@ -83,7 +83,7 @@ use time;
use uuid::Uuid;
/// Expected schema version. See `guide/schema.md` for more information.
pub const EXPECTED_VERSION: i32 = 4;
pub const EXPECTED_VERSION: i32 = 5;
const GET_RECORDING_PLAYBACK_SQL: &'static str = r#"
select
@ -2189,20 +2189,20 @@ mod tests {
fn test_version_too_old() {
testutil::init();
let c = setup_conn();
c.execute_batch("delete from version; insert into version values (3, 0, '');").unwrap();
c.execute_batch("delete from version; insert into version values (4, 0, '');").unwrap();
let e = Database::new(clock::RealClocks {}, c, false).err().unwrap();
assert!(e.to_string().starts_with(
"Database schema version 3 is too old (expected 4)"), "got: {:?}", e);
"Database schema version 4 is too old (expected 5)"), "got: {:?}", e);
}
#[test]
fn test_version_too_new() {
testutil::init();
let c = setup_conn();
c.execute_batch("delete from version; insert into version values (5, 0, '');").unwrap();
c.execute_batch("delete from version; insert into version values (6, 0, '');").unwrap();
let e = Database::new(clock::RealClocks {}, c, false).err().unwrap();
assert!(e.to_string().starts_with(
"Database schema version 5 is too new (expected 4)"), "got: {:?}", e);
"Database schema version 6 is too new (expected 5)"), "got: {:?}", e);
}
/// Basic test of running some queries on a fresh database.

View File

@ -32,6 +32,7 @@
//!
//! This includes opening files for serving, rotating away old files, and saving new files.
use crate::coding;
use crate::db::CompositeId;
use cstr::*;
use failure::{Error, Fail, bail, format_err};
@ -47,6 +48,11 @@ use std::os::unix::ffi::OsStrExt;
use std::os::unix::io::FromRawFd;
use std::sync::Arc;
/// The fixed length of a directory's `meta` file.
///
/// See DirMeta comments within proto/schema.proto for more explanation.
const FIXED_DIR_META_LEN: usize = 512;
/// A sample file directory. Typically one per physical disk drive.
///
/// If the directory is used for writing, the `start_syncer` function should be called to start
@ -100,8 +106,8 @@ impl Fd {
}
/// Opens a sample file within this directory with the given flags and (if creating) mode.
unsafe fn openat(&self, p: *const c_char, flags: libc::c_int, mode: libc::c_int)
-> Result<fs::File, io::Error> {
pub(crate) unsafe fn openat(&self, p: *const c_char, flags: libc::c_int, mode: libc::c_int)
-> Result<fs::File, io::Error> {
let fd = libc::openat(self.0, p, flags, mode);
if fd < 0 {
return Err(io::Error::last_os_error())
@ -153,6 +159,13 @@ pub(crate) fn read_meta(dir: &Fd) -> Result<schema::DirMeta, Error> {
};
let mut data = Vec::new();
f.read_to_end(&mut data)?;
let (len, pos) = coding::decode_varint32(&data, 0)
.map_err(|_| format_err!("Unable to decode varint length in meta file"))?;
if data.len() != FIXED_DIR_META_LEN || len as usize + pos > FIXED_DIR_META_LEN {
bail!("Expected a {}-byte file with a varint length of a DirMeta message; got \
a {}-byte file with length {}", FIXED_DIR_META_LEN, data.len(), len);
}
let data = &data[pos..pos+len as usize];
let mut s = protobuf::CodedInputStream::from_bytes(&data);
meta.merge_from(&mut s).map_err(|e| e.context("Unable to parse metadata proto: {}"))?;
Ok(meta)
@ -160,14 +173,28 @@ pub(crate) fn read_meta(dir: &Fd) -> Result<schema::DirMeta, Error> {
/// Write `dir`'s metadata, clobbering existing data.
pub(crate) fn write_meta(dir: &Fd, meta: &schema::DirMeta) -> Result<(), Error> {
let tmp_path = cstr!("meta.tmp");
let final_path = cstr!("meta");
let mut f = unsafe { dir.openat(tmp_path.as_ptr(),
libc::O_CREAT | libc::O_TRUNC | libc::O_WRONLY, 0o600)? };
meta.write_to_writer(&mut f)?;
f.sync_all()?;
unsafe { renameat(&dir, tmp_path.as_ptr(), &dir, final_path.as_ptr())? };
dir.sync()?;
let mut data = meta.write_length_delimited_to_bytes().expect("proto3->vec is infallible");
if data.len() > FIXED_DIR_META_LEN {
bail!("Length-delimited DirMeta message requires {} bytes, over limit of {}",
data.len(), FIXED_DIR_META_LEN);
}
data.resize(FIXED_DIR_META_LEN, 0); // pad to required length.
let path = cstr!("meta");
let mut f = unsafe { dir.openat(path.as_ptr(),
libc::O_CREAT | libc::O_WRONLY, 0o600)? };
let stat = f.metadata()?;
if stat.len() == 0 {
// Need to sync not only the data but also the file metadata and dirent.
f.write_all(&data)?;
f.sync_all()?;
dir.sync()?;
} else if stat.len() == FIXED_DIR_META_LEN as u64 {
// Just syncing the data will suffice; existing metadata and dirent are fine.
f.write_all(&data)?;
f.sync_data()?;
} else {
bail!("Existing meta file is {}-byte; expected {}", stat.len(), FIXED_DIR_META_LEN);
}
Ok(())
}
@ -183,7 +210,10 @@ impl SampleFileDir {
s.fd.lock(if read_write { libc::LOCK_EX } else { libc::LOCK_SH } | libc::LOCK_NB)?;
let dir_meta = read_meta(&s.fd)?;
if !SampleFileDir::consistent(db_meta, &dir_meta) {
bail!("metadata mismatch.\ndb: {:#?}\ndir: {:#?}", db_meta, &dir_meta);
let serialized =
db_meta.write_length_delimited_to_bytes().expect("proto3->vec is infallible");
bail!("metadata mismatch.\ndb: {:#?}\ndir: {:#?}\nserialized db: {:#?}",
db_meta, &dir_meta, &serialized);
}
if db_meta.in_progress_open.is_some() {
s.write_meta(db_meta)?;
@ -193,7 +223,7 @@ impl SampleFileDir {
/// Returns true if the existing directory and database metadata are consistent; the directory
/// is then openable.
fn consistent(db_meta: &schema::DirMeta, dir_meta: &schema::DirMeta) -> bool {
pub(crate) fn consistent(db_meta: &schema::DirMeta, dir_meta: &schema::DirMeta) -> bool {
if dir_meta.db_uuid != db_meta.db_uuid { return false; }
if dir_meta.dir_uuid != db_meta.dir_uuid { return false; }
@ -234,7 +264,7 @@ impl SampleFileDir {
let e = e?;
match e.file_name().as_bytes() {
b"." | b".." => continue,
b"meta" | b"meta-tmp" => continue, // existing metadata is fine.
b"meta" => continue, // existing metadata is fine.
_ => return Ok(false),
}
}
@ -291,7 +321,7 @@ impl SampleFileDir {
}
}
/// Parse a composite id filename.
/// Parses a composite id filename.
///
/// These are exactly 16 bytes, lowercase hex.
pub(crate) fn parse_id(id: &[u8]) -> Result<CompositeId, ()> {
@ -311,6 +341,9 @@ pub(crate) fn parse_id(id: &[u8]) -> Result<CompositeId, ()> {
#[cfg(test)]
mod tests {
use protobuf::prelude::MessageField;
use super::*;
#[test]
fn parse_id() {
use super::parse_id;
@ -321,4 +354,19 @@ mod tests {
parse_id(b"0").unwrap_err();
parse_id(b"000000010000000x").unwrap_err();
}
/// Ensures that a DirMeta with all fields filled fits within the maximum size.
#[test]
fn max_len_meta() {
let mut meta = schema::DirMeta::new();
let fake_uuid = &[0u8; 16][..];
meta.db_uuid.extend_from_slice(fake_uuid);
meta.dir_uuid.extend_from_slice(fake_uuid);
meta.last_complete_open.mut_message().id = u32::max_value();
meta.last_complete_open.mut_message().id = u32::max_value();
meta.in_progress_open.mut_message().uuid.extend_from_slice(fake_uuid);
meta.in_progress_open.mut_message().uuid.extend_from_slice(fake_uuid);
let data = meta.write_length_delimited_to_bytes().expect("proto3->vec is infallible");
assert!(data.len() <= FIXED_DIR_META_LEN, "{} vs {}", data.len(), FIXED_DIR_META_LEN);
}
}

View File

@ -34,6 +34,26 @@ syntax = "proto3";
// against the metadata stored within the database to detect inconsistencies
// between the directory and database, such as those described in
// design/schema.md.
//
// As of schema version 4, the overall file format is as follows: a
// varint-encoded length, followed by a serialized DirMeta message, followed
// by NUL bytes padding to a total length of 512 bytes. This message never
// exceeds that length.
//
// The goal of this format is to allow atomically rewriting a meta file
// in-place. I hope that on modern OSs and hardware, a single-sector
// rewrite is atomic, though POSIX frustratingly doesn't seem to guarantee
// this. There's some discussion of that here:
// <https://stackoverflow.com/a/2068608/23584>. At worst, there's a short
// window during which the meta file can be corrupted. As the file's purpose
// is to check for inconsistencies, it can be reconstructed if you assume no
// inconsistency exists.
//
// Schema version 3 wrote a serialized DirMeta message with no length or
// padding, and renamed new meta files over the top of old. This scheme
// requires extra space while opening the directory. If the filesystem is
// completely full, it requires freeing space manually, an undocumented and
// error-prone administrator procedure.
message DirMeta {
// A uuid associated with the database, in binary form. dir_uuid is strictly
// more powerful, but it improves diagnostics to know if the directory

View File

@ -489,4 +489,4 @@ create table signal_change (
);
insert into version (id, unix_time, notes)
values (4, cast(strftime('%s', 'now') as int), 'db creation');
values (5, cast(strftime('%s', 'now') as int), 'db creation');

View File

@ -41,6 +41,7 @@ mod v0_to_v1;
mod v1_to_v2;
mod v2_to_v3;
mod v3_to_v4;
mod v4_to_v5;
const UPGRADE_NOTES: &'static str =
concat!("upgraded using moonfire-db ", env!("CARGO_PKG_VERSION"));
@ -66,6 +67,7 @@ pub fn run(args: &Args, conn: &mut rusqlite::Connection) -> Result<(), Error> {
v1_to_v2::run,
v2_to_v3::run,
v3_to_v4::run,
v4_to_v5::run,
];
{

View File

@ -1,5 +1,5 @@
// This file is part of Moonfire NVR, a security camera digital video recorder.
// Copyright (C) 2018 Scott Lamb <slamb@slamb.org>
// This file is part of Moonfire NVR, a security camera network video recorder.
// Copyright (C) 2019 Scott Lamb <slamb@slamb.org>
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by

110
db/upgrade/v4_to_v5.rs Normal file
View File

@ -0,0 +1,110 @@
// This file is part of Moonfire NVR, a security camera network video recorder.
// Copyright (C) 2019 Scott Lamb <slamb@slamb.org>
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// In addition, as a special exception, the copyright holders give
// permission to link the code of portions of this program with the
// OpenSSL library under certain conditions as described in each
// individual source file, and distribute linked combinations including
// the two.
//
// You must obey the GNU General Public License in all respects for all
// of the code used other than OpenSSL. If you modify file(s) with this
// exception, you may extend this exception to your version of the
// file(s), but you are not obligated to do so. If you do not wish to do
// so, delete this exception statement from your version. If you delete
// this exception statement from all source files in the program, then
// also delete it here.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
/// Upgrades a version 4 schema to a version 5 schema.
///
/// This just handles the directory meta files. If they're already in the new format, great.
/// Otherwise, verify they are consistent with the database then upgrade them.
use crate::db::FromSqlUuid;
use crate::{dir, schema};
use cstr::*;
use failure::{Error, Fail, bail};
use protobuf::{Message, prelude::MessageField};
use rusqlite::params;
use std::io::{Read, Write};
const FIXED_DIR_META_LEN: usize = 512;
pub fn run(_args: &super::Args, tx: &rusqlite::Transaction) -> Result<(), Error> {
let db_uuid: FromSqlUuid =
tx.query_row_and_then(r"select uuid from meta", params![], |row| row.get(0))?;
let mut stmt = tx.prepare(r#"
select
d.path,
d.uuid,
d.last_complete_open_id,
o.uuid
from
sample_file_dir d
left join open o on (d.last_complete_open_id = o.id);
"#)?;
let mut rows = stmt.query(params![])?;
while let Some(row) = rows.next()? {
let path = row.get_raw_checked(0)?.as_str()?;
let dir_uuid: FromSqlUuid = row.get(1)?;
let open_id: Option<u32> = row.get(2)?;
let open_uuid: Option<FromSqlUuid> = row.get(3)?;
let mut db_meta = schema::DirMeta::new();
db_meta.db_uuid.extend_from_slice(&db_uuid.0.as_bytes()[..]);
db_meta.dir_uuid.extend_from_slice(&dir_uuid.0.as_bytes()[..]);
match (open_id, open_uuid) {
(Some(id), Some(uuid)) => {
let mut o = db_meta.last_complete_open.mut_message();
o.id = id;
o.uuid.extend_from_slice(&uuid.0.as_bytes()[..]);
},
(None, None) => {},
_ => bail!("open table missing id"),
}
let dir = dir::Fd::open(path, false)?;
dir.lock(libc::LOCK_EX)?;
let tmp_path = cstr!("meta.tmp");
let path = cstr!("meta");
let mut f = unsafe { dir.openat(path.as_ptr(), libc::O_RDONLY, 0) }?;
let mut data = Vec::new();
f.read_to_end(&mut data)?;
if data.len() == FIXED_DIR_META_LEN {
continue; // already upgraded.
}
let mut s = protobuf::CodedInputStream::from_bytes(&data);
let mut dir_meta = schema::DirMeta::new();
dir_meta.merge_from(&mut s)
.map_err(|e| e.context("Unable to parse metadata proto: {}"))?;
if !dir::SampleFileDir::consistent(&db_meta, &dir_meta) {
bail!("Inconsistent db_meta={:?} dir_meta={:?}", &db_meta, &dir_meta);
}
let mut f = unsafe { dir.openat(tmp_path.as_ptr(),
libc::O_CREAT | libc::O_TRUNC | libc::O_WRONLY, 0o600)? };
let mut data =
dir_meta.write_length_delimited_to_bytes().expect("proto3->vec is infallible");
if data.len() > FIXED_DIR_META_LEN {
bail!("Length-delimited DirMeta message requires {} bytes, over limit of {}",
data.len(), FIXED_DIR_META_LEN);
}
data.resize(FIXED_DIR_META_LEN, 0); // pad to required length.
f.write_all(&data)?;
f.sync_all()?;
unsafe { dir::renameat(&dir, tmp_path.as_ptr(), &dir, path.as_ptr())? };
dir.sync()?;
}
Ok(())
}

View File

@ -268,6 +268,24 @@ create table sample_file_dir (
```
```proto
// Metadata stored in sample file dirs as "<dir>/meta". This is checked
// against the metadata stored within the database to detect inconsistencies
// between the directory and database, such as those described in
// design/schema.md.
//
// As of schema version 4, the overall file format is as follows: a
// varint-encoded length, followed by a serialized DirMeta message, followed
// by NUL bytes padding to a total length of 512 bytes. This message never
// exceeds that length.
//
// The goal of this format is to allow atomically rewriting a meta file
// in-place. I hope that on modern OSs and hardware, a single-sector
// rewrite is atomic, though POSIX frustratingly doesn't seem to guarantee
// this. There's some discussion of that here:
// <https://stackoverflow.com/a/2068608/23584>. At worst, there's a short
// window during which the meta file can be corrupted. As the file's purpose
// is to check for inconsistencies, it can be reconstructed if you assume no
// inconsistency exists.
message DirMeta {
// A uuid associated with the database, in binary form. dir_uuid is strictly
// more powerful, but it improves diagnostics to know if the directory
@ -302,13 +320,12 @@ These are updated through procedures below:
This is a sub-procedure used in several places below.
Precondition: the directory's lock is held with `LOCK_EX` (exclusive).
Precondition: the directory's lock is held with `LOCK_EX` (exclusive) and
there is an existing metadata file.
1. Write a new `meta.tmp` (opened with `O_CREAT|O_TRUNC` to discard an
existing temporary file if any).
2. `fsync` the `meta.tmp` file descriptor.
3. `rename` `meta.tmp` to `meta`.
4. `fsync` the directory.
1. Open the metadata file.
2. Rewrite the fixed-length data atomically.
3. `fdatasync` the file.
*Open the database as read-only*

View File

@ -223,11 +223,18 @@ Version 3 adds over version 1:
* additional timestamp fields which may be useful in diagnosing/correcting
time jumps/inconsistencies.
### Version 3 to version 4
### Version 3 to version 4 to version 5
This upgrade affects only the SQLite database. Version 4 adds over version 3:
This upgrade affects only the SQLite database.
Version 4 represents a half-finished upgrade from version 3 to version 5; it
is never used.
Version 5 adds over version 3:
* permissions for users and sessions. Existing users will have only the
`view_video` permission, matching their previous behavior.
* the `signals` schema, used to store status of signals such as camera
motion detection, security system zones, etc.
* the ability to recover from a completely full sample file directory (#65)
without manual intervention.

View File

@ -34,8 +34,7 @@ use failure::{Error, bail};
use ffmpeg;
use lazy_static::lazy_static;
use log::{debug, info, warn};
use std::os::raw::c_char;
use std::ffi::{CStr, CString};
use std::ffi::CString;
use std::result::Result;
use std::sync;