diff --git a/src/coding.h b/src/coding.h index 5318134..ff983c7 100644 --- a/src/coding.h +++ b/src/coding.h @@ -154,6 +154,16 @@ inline void Append32(int32_t in, std::string *out) { out->append(reinterpret_cast(&net), sizeof(int32_t)); } +inline void AppendU64(uint64_t in, std::string *out) { + uint64_t net = ToNetworkU64(in); + out->append(reinterpret_cast(&net), sizeof(uint64_t)); +} + +inline void Append64(int64_t in, std::string *out) { + int64_t net = ToNetwork64(in); + out->append(reinterpret_cast(&net), sizeof(int64_t)); +} + } // namespace moonfire_nvr #endif // MOONFIRE_NVR_CODING_H diff --git a/src/ffmpeg.h b/src/ffmpeg.h index ed82b58..ca6a588 100644 --- a/src/ffmpeg.h +++ b/src/ffmpeg.h @@ -72,6 +72,7 @@ class VideoPacket { int64_t pts() const { return pkt_.pts; } AVPacket *pkt() { return &pkt_; } + const AVPacket *pkt() const { return &pkt_; } private: AVPacket pkt_; diff --git a/src/http.cc b/src/http.cc index 6afe1f4..1a6b553 100644 --- a/src/http.cc +++ b/src/http.cc @@ -67,7 +67,6 @@ class RealFile : public VirtualFile { int64_t size() const final { return stat_.st_size; } time_t last_modified() const final { return stat_.st_mtime; } std::string mime_type() const final { return mime_type_; } - std::string filename() const final { return slice_.filename(); } std::string etag() const final { return StrCat("\"", stat_.st_ino, ":", stat_.st_size, ":", @@ -317,8 +316,7 @@ void HttpServe(const VirtualFile &file, evhttp_request *req) { evhttp_add_header(out_hdrs, "Content-Range", range_hdr.c_str()); http_status = 416; http_status_str = "Range Not Satisfiable"; - LOG(INFO) << "Replying to non-satisfiable range request for " - << file.filename() << ": " << range_hdr; + LOG(INFO) << "Replying to non-satisfiable range request: " << range_hdr; break; } @@ -328,27 +326,26 @@ void HttpServe(const VirtualFile &file, evhttp_request *req) { std::string range_hdr = StrCat("bytes ", ranges[0].begin, "-", ranges[0].end - 1, "/", file.size()); if (!file.AddRange(ranges[0], &buf, &error_message)) { - LOG(ERROR) << "Unable to serve " << file.filename() << " " - << ranges[0] << ": " << error_message; + LOG(ERROR) << "Unable to serve range " << ranges[0] << ": " + << error_message; return evhttp_send_error(req, HTTP_INTERNAL, EscapeHtml(error_message).c_str()); } evhttp_add_header(out_hdrs, "Content-Range", range_hdr.c_str()); http_status = 206; http_status_str = "Partial Content"; - LOG(INFO) << "Replying to range request for " << file.filename(); + LOG(INFO) << "Replying to range request"; break; } // FALLTHROUGH case internal::RangeHeaderType::kAbsentOrInvalid: if (!file.AddRange(ByteRange(0, file.size()), &buf, &error_message)) { - LOG(ERROR) << "Unable to serve " << file.filename() << ": " - << error_message; + LOG(ERROR) << "Unable to serve file: " << error_message; return evhttp_send_error(req, HTTP_INTERNAL, EscapeHtml(error_message).c_str()); } - LOG(INFO) << "Replying to whole-file request for " << file.filename(); + LOG(INFO) << "Replying to whole-file request"; http_status = HTTP_OK; http_status_str = "OK"; } diff --git a/src/http.h b/src/http.h index 32bf0e6..9736a77 100644 --- a/src/http.h +++ b/src/http.h @@ -131,15 +131,12 @@ class VirtualFile : public FileSlice { virtual time_t last_modified() const = 0; virtual std::string etag() const = 0; virtual std::string mime_type() const = 0; - virtual std::string filename() const = 0; // for logging. }; class RealFileSlice : public FileSlice { public: void Init(re2::StringPiece filename, ByteRange range); - const std::string filename() const { return filename_; } - int64_t size() const final { return range_.size(); } bool AddRange(ByteRange range, EvBuffer *buf, diff --git a/src/mp4-test.cc b/src/mp4-test.cc index b16643a..f192302 100644 --- a/src/mp4-test.cc +++ b/src/mp4-test.cc @@ -30,13 +30,20 @@ // // mp4_test.cc: tests of the mp4.h interface. +#include +#include +#include + #include #include #include +#include "ffmpeg.h" +#include "h264.h" #include "http.h" #include "mp4.h" #include "string.h" +#include "testutil.h" DECLARE_bool(alsologtostderr); @@ -146,6 +153,132 @@ TEST(Mp4SampleTablePiecesTest, Stsz) { EXPECT_EQ(kExpectedEntries, ToHex(pieces.stsz_entries())); } +class IntegrationTest : public testing::Test { + protected: + IntegrationTest() { + tmpdir_path_ = PrepareTempDirOrDie("mp4-integration-test"); + int ret = + GetRealFilesystem()->Open(tmpdir_path_.c_str(), O_RDONLY, &tmpdir_); + CHECK_EQ(0, ret) << strerror(ret); + } + + void CopyMp4ToSingleRecording() { + std::string error_message; + SampleIndexEncoder index; + SampleFileWriter writer(tmpdir_.get()); + recording_.sample_file_path = StrCat(tmpdir_path_, "/clip.sample"); + if (!writer.Open("clip.sample", &error_message)) { + ADD_FAILURE() << "open clip.sample: " << error_message; + return; + } + auto in = GetRealVideoSource()->OpenFile("../src/testdata/clip.mp4", + &error_message); + if (in == nullptr) { + ADD_FAILURE() << "open clip.mp4" << error_message; + return; + } + + video_sample_entry_.width = in->stream()->codec->width; + video_sample_entry_.height = in->stream()->codec->height; + if (!GetH264SampleEntry(GetExtradata(in.get()), in->stream()->codec->width, + in->stream()->codec->height, + &video_sample_entry_.data, &error_message)) { + ADD_FAILURE() << "GetH264SampleEntry: " << error_message; + return; + } + + while (true) { + VideoPacket pkt; + if (!in->GetNext(&pkt, &error_message)) { + if (!error_message.empty()) { + ADD_FAILURE() << "GetNext: " << error_message; + return; + } + break; + } + if (!writer.Write(GetData(pkt), &error_message)) { + ADD_FAILURE() << "Write: " << error_message; + return; + } + index.AddSample(pkt.pkt()->duration, pkt.pkt()->size, pkt.is_key()); + } + + if (!writer.Close(&recording_.sample_file_sha1, &error_message)) { + ADD_FAILURE() << "Close: " << error_message; + } + recording_.video_index = index.data().as_string(); + } + + void CopySingleRecordingToNewMp4() { + Mp4FileBuilder builder; + builder.SetSampleEntry(video_sample_entry_); + builder.Append(Recording(recording_), 0, + std::numeric_limits::max()); + std::string error_message; + auto mp4 = builder.Build(&error_message); + ASSERT_TRUE(mp4 != nullptr) << error_message; + EvBuffer buf; + ASSERT_TRUE(mp4->AddRange(ByteRange(0, mp4->size()), &buf, &error_message)) + << error_message; + WriteFileOrDie(StrCat(tmpdir_path_, "/clip.new.mp4"), &buf); + } + + void CompareMp4s() { + std::string error_message; + auto original = GetRealVideoSource()->OpenFile("../src/testdata/clip.mp4", + &error_message); + ASSERT_TRUE(original != nullptr) << error_message; + auto copied = GetRealVideoSource()->OpenFile( + StrCat(tmpdir_path_, "/clip.new.mp4"), &error_message); + ASSERT_TRUE(copied != nullptr) << error_message; + + EXPECT_EQ(GetExtradata(original.get()), GetExtradata(copied.get())); + EXPECT_EQ(original->stream()->codec->width, copied->stream()->codec->width); + EXPECT_EQ(original->stream()->codec->height, + copied->stream()->codec->height); + + while (true) { + VideoPacket original_pkt; + VideoPacket copied_pkt; + + bool original_has_next = original->GetNext(&original_pkt, &error_message); + ASSERT_TRUE(original_has_next || error_message.empty()) << error_message; + bool copied_has_next = copied->GetNext(&copied_pkt, &error_message); + ASSERT_TRUE(copied_has_next || error_message.empty()) << error_message; + if (!original_has_next && !copied_has_next) { + break; + } + ASSERT_TRUE(original_has_next); + ASSERT_TRUE(copied_has_next); + EXPECT_EQ(original_pkt.pkt()->pts, copied_pkt.pkt()->pts); + EXPECT_EQ(original_pkt.pkt()->duration, copied_pkt.pkt()->duration); + EXPECT_EQ(GetData(original_pkt), GetData(copied_pkt)); + } + } + + re2::StringPiece GetExtradata(InputVideoPacketStream *stream) { + return re2::StringPiece( + reinterpret_cast(stream->stream()->codec->extradata), + stream->stream()->codec->extradata_size); + } + + re2::StringPiece GetData(const VideoPacket &pkt) { + return re2::StringPiece(reinterpret_cast(pkt.pkt()->data), + pkt.pkt()->size); + } + + std::string tmpdir_path_; + std::unique_ptr tmpdir_; + Recording recording_; + VideoSampleEntry video_sample_entry_; +}; + +TEST_F(IntegrationTest, RoundTrip) { + CopyMp4ToSingleRecording(); + CopySingleRecordingToNewMp4(); + CompareMp4s(); +} + } // namespace } // namespace moonfire_nvr diff --git a/src/mp4.cc b/src/mp4.cc index c17fedf..c33ca80 100644 --- a/src/mp4.cc +++ b/src/mp4.cc @@ -29,13 +29,516 @@ // along with this program. If not, see . // // mp4.cc: implementation of mp4.h interface. +// +// This implementation will make the most sense when read side-by-side with +// ISO/IEC 14496-12:2015, available at the following URL: +// +// +// mp4.cc generates VirtualFiles via an array of FileSlices. Each FileSlice +// is responsible for some portion of the .mp4 file, generally some subset of +// a single .mp4 "box". Slices fall into these categories: +// +// 1. entirely static data from a const char kConstant[]. This is preferred in +// the interest of simplicity and efficiency when there is only one useful +// value for all the fields in the box, including its length. +// +// These slices are represented using the StaticStringPieceSlice class. +// +// 2. a box's fixed-length fields. In some cases a slice represents the entire +// contents of a FullBox type; in others a slice represents only the +// "length" and "type" fields of a container Box, while other contents +// (such as child boxes) are appended to the box as a separate slice. +// +// These slices are represented using a specific "struct ...Box" type for +// type safety and simplicity. The structs match the actual wire format---in +// particular, they are packed and store fields in network byte order. +// sizeof(...Box) is meaningful and structure data can be simply written +// with memcpy, as opposed to via manually-written or generated serialization +// code. (This approach could be revisited if there's ever a need to run +// on a compiler that doesn't support __attribute__((packed)) or a +// processor that doesn't support unaligned access.) The structs are +// wrapped with the Mp4Box<> template class which manages child slices and +// fills in the box's length field automatically. +// +// 3. variable-length data generated using the Mp4SampleTablePieces class, +// representing part of one box dealing with a single recording. These +// are the largest portion of a typical .mp4's metadata. +// +// These slices are generated using the FillerFileSlice class. They +// determine their sizes eagerly (so that the size of the file is known and +// so that later byte ranges can be served correctly) but only generate +// their contents when the requested byte range overlaps with the slice +// (for memory/CPU efficiency). +// +// 4. file-backed variable-length data, representing actual video samples. +// +// These are represented using the FileSlice class and are mmap()ed via +// libevent, letting the kernel decide how much to page in at once. +// +// The box hierarchy is constructed through append operations on the Mp4Box +// subclasses. Most of the static data is always in RAM when the VirtualFile +// is, but the file-backed and sample table portions are not. This should be +// a reasonable compromise between simplicity of implementation and memory +// efficiency. #include "mp4.h" #include "coding.h" +#define NET_UINT64_C(x) ::moonfire_nvr::ToNetworkU64(UINT64_C(x)) +#define NET_INT64_C(x) ::moonfire_nvr::ToNetwork64(UINT64_C(x)) +#define NET_UINT32_C(x) ::moonfire_nvr::ToNetworkU32(UINT32_C(x)) +#define NET_INT32_C(x) ::moonfire_nvr::ToNetwork32(UINT32_C(x)) +#define NET_UINT16_C(x) ::moonfire_nvr::ToNetworkU16(UINT16_C(x)) +#define NET_INT16_C(x) ::moonfire_nvr::ToNetwork16(UINT16_C(x)) + +using ::moonfire_nvr::internal::Mp4FileSegment; + namespace moonfire_nvr { +namespace { + +// ISO/IEC 14496-12 section 4.3, ftyp. +const char kFtypBox[] = { + 0x00, 0x00, 0x00, 0x20, // length = 32, sizeof(kFtypBox) + 'f', 't', 'y', 'p', // type + 'i', 's', 'o', 'm', // major_brand + 0x00, 0x00, 0x02, 0x00, // minor_version + 'i', 's', 'o', 'm', // compatible_brands[0] + 'i', 's', 'o', '2', // compatible_brands[1] + 'a', 'v', 'c', '1', // compatible_brands[2] + 'm', 'p', '4', '1', // compatible_brands[3] +}; + +// vmhd and dinf boxes. These are both completely static and adjacent in the +// structure, so they're in a single constant. +const char kVmhdAndDinfBoxes[] = { + // A vmhd box; the "graphicsmode" and "opcolor" values don't have any + // meaningful use. + 0x00, 0x00, 0x00, 0x14, // length == sizeof(kVmhdBox) + 'v', 'm', 'h', 'd', // type = vmhd, ISO/IEC 14496-12 section 12.1.2. + 0x00, 0x00, 0x00, 0x01, // version + flags(1) + 0x00, 0x00, 0x00, 0x00, // graphicsmode (copy), opcolor[0] + 0x00, 0x00, 0x00, 0x00, // opcolor[1], opcolor[2] + + // A dinf box suitable for a "self-contained" .mp4 file (no URL/URN + // references to external data). + 0x00, 0x00, 0x00, 0x24, // length == sizeof(kDinfBox) + 'd', 'i', 'n', 'f', // type = dinf, ISO/IEC 14496-12 section 8.7.1. + 0x00, 0x00, 0x00, 0x1c, // length + 'd', 'r', 'e', 'f', // type = dref, ISO/IEC 14496-12 section 8.7.2. + 0x00, 0x00, 0x00, 0x00, // version and flags + 0x00, 0x00, 0x00, 0x01, // entry_count + 0x00, 0x00, 0x00, 0x0c, // length + 'u', 'r', 'l', ' ', // type = url, ISO/IEC 14496-12 section 8.7.2. + 0x00, 0x00, 0x00, 0x01, // version=0, flags=self-contained +}; + +// A hdlr box suitable for a video track. +const char kHdlrBox[] = { + 0x00, 0x00, 0x00, 0x21, // length == sizeof(kHdlrBox) + 'h', 'd', 'l', 'r', // type == hdlr, ISO/IEC 14496-12 section 8.4.3. + 0x00, 0x00, 0x00, 0x00, // version + flags + 0x00, 0x00, 0x00, 0x00, // pre_defined + 'v', 'i', 'd', 'e', // handler = vide + 0x00, 0x00, 0x00, 0x00, // reserved[0] + 0x00, 0x00, 0x00, 0x00, // reserved[1] + 0x00, 0x00, 0x00, 0x00, // reserved[2] + 0x00, // name, zero-terminated (empty) +}; + +// Convert from 90kHz units since 1970-01-01 00:00:00 UTC to +// seconds since 1904-01-01 00:00:00 UTC. +uint32_t ToIso14496Timestamp(uint64_t time_90k) { + return time_90k / kTimeUnitsPerSecond + 24107 * 86400; +} + +struct MovieBox { // ISO/IEC 14496-12 section 8.2.1, moov. + uint32_t size = NET_UINT32_C(0); + const char type[4] = {'m', 'o', 'o', 'v'}; +}; + +struct MovieHeaderBoxVersion0 { // ISO/IEC 14496-12 section 8.2.2, mvhd. + uint32_t size = NET_UINT32_C(0); + const char type[4] = {'m', 'v', 'h', 'd'}; + const uint32_t version_and_flags = NET_UINT32_C(0); + uint32_t creation_time = NET_UINT32_C(0); + uint32_t modification_time = NET_UINT32_C(0); + uint32_t timescale = ToNetworkU32(kTimeUnitsPerSecond); + uint32_t duration = NET_UINT32_C(0); + const int32_t rate = NET_UINT32_C(0x00010000); + const int16_t volume = NET_INT16_C(0x0100); + const int16_t reserved = NET_UINT16_C(0); + const uint32_t more_reserved[2] = {NET_UINT32_C(0), NET_UINT32_C(0)}; + const int32_t matrix[9] = { + NET_INT32_C(0x00010000), NET_INT32_C(0), NET_INT32_C(0), NET_INT32_C(0), + NET_INT32_C(0x00010000), NET_INT32_C(0), NET_INT32_C(0), NET_INT32_C(0), + NET_INT32_C(0x40000000)}; + const uint32_t pre_defined[6] = {NET_UINT32_C(0), NET_UINT32_C(0), + NET_UINT32_C(0), NET_UINT32_C(0), + NET_UINT32_C(0), NET_UINT32_C(0)}; + uint32_t next_track_id = NET_UINT32_C(2); +} __attribute__((packed)); + +struct TrackBox { // ISO/IEC 14496-12 section 8.3.1, trak. + uint32_t size = NET_UINT32_C(0); + const char type[4] = {'t', 'r', 'a', 'k'}; +} __attribute__((packed)); + +struct TrackHeaderBoxVersion0 { // ISO/IEC 14496-12 section 8.3.2, tkhd. + uint32_t size = NET_UINT32_C(0); + const char type[4] = {'t', 'k', 'h', 'd'}; + // flags 7 = track_enabled | track_in_movie | track_in_preview + const uint32_t version_and_flags = NET_UINT32_C(7); + uint32_t creation_time = NET_UINT32_C(0); + uint32_t modification_time = NET_UINT32_C(0); + uint32_t track_id = NET_UINT32_C(0); + const uint32_t reserved1 = NET_UINT64_C(0); + uint32_t duration = NET_UINT32_C(0); + const uint32_t reserved2[2] = {NET_UINT32_C(0), NET_UINT32_C(0)}; + const uint16_t layer = NET_UINT16_C(0); + const uint16_t alternate_group = NET_UINT16_C(0); + const uint16_t volume = NET_UINT16_C(0); + const uint16_t reserved3 = NET_UINT16_C(0); + const int32_t matrix[9] = { + NET_INT32_C(0x00010000), NET_INT32_C(0), NET_INT32_C(0), NET_INT32_C(0), + NET_INT32_C(0x00010000), NET_INT32_C(0), NET_INT32_C(0), NET_INT32_C(0), + NET_INT32_C(0x40000000)}; + uint32_t width = NET_UINT32_C(0); + uint32_t height = NET_UINT32_C(0); +} __attribute__((packed)); + +struct MediaBox { // ISO/IEC 14496-12 section 8.4.1, mdia. + uint32_t size = NET_UINT32_C(0); + const char type[4] = {'m', 'd', 'i', 'a'}; +} __attribute__((packed)); + +struct MediaHeaderBoxVersion0 { // ISO/IEC 14496-12 section 8.4.2, mdhd. + uint32_t size = NET_UINT32_C(0); + const char type[4] = {'m', 'd', 'h', 'd'}; + const uint32_t version_and_flags = NET_UINT32_C(0); + uint32_t creation_time = NET_UINT32_C(0); + uint32_t modification_time = NET_UINT32_C(0); + uint32_t timescale = ToNetworkU32(kTimeUnitsPerSecond); + uint32_t duration = NET_UINT32_C(0); + uint16_t languages = NET_UINT16_C(0x55c4); // undetermined + const uint16_t pre_defined = NET_UINT32_C(0); +} __attribute__((packed)); + +struct MediaInformationBox { // ISO/IEC 14496-12 section 8.4.4, minf. + uint32_t size = NET_UINT32_C(0); + const char type[4] = {'m', 'i', 'n', 'f'}; +} __attribute__((packed)); + +struct SampleTableBox { // ISO/IEC 14496-12 section 8.5.1, stbl. + uint32_t size = NET_UINT32_C(0); + const char type[4] = {'s', 't', 'b', 'l'}; +} __attribute__((packed)); + +struct SampleDescriptionBoxVersion0 { // ISO/IEC 14496-12 section 8.5.2, stsd. + uint32_t size = NET_UINT32_C(0); + const char type[4] = {'s', 't', 's', 'd'}; + const uint32_t version_and_flags = NET_UINT32_C(0 << 24); + uint32_t entry_count = NET_UINT32_C(0); +} __attribute__((packed)); + +struct TimeToSampleBoxVersion0 { // ISO/IEC 14496-12 section 8.6.1.2, stts. + uint32_t size = NET_UINT32_C(0); + const char type[4] = {'s', 't', 't', 's'}; + const uint32_t version_and_flags = NET_UINT32_C(0); + uint32_t entry_count = NET_UINT32_C(0); +} __attribute__((packed)); + +struct SampleToChunkBoxVersion0 { // ISO/IEC 14496-12 section 8.7.4, stsc. + uint32_t size = NET_UINT32_C(0); + const char type[4] = {'s', 't', 's', 'c'}; + const uint32_t version_and_flags = NET_UINT32_C(0); + uint32_t entry_count = NET_UINT32_C(0); +} __attribute__((packed)); + +struct SampleSizeBoxVersion0 { // ISO/IEC 14496-12 section 8.7.3, stsz. + uint32_t size = NET_UINT32_C(0); + const char type[4] = {'s', 't', 's', 'z'}; + const uint32_t version_and_flags = NET_UINT32_C(0); + uint32_t sample_size = NET_UINT32_C(0); + uint32_t sample_count = NET_UINT32_C(0); +} __attribute__((packed)); + +struct ChunkLargeOffsetBoxVersion0 { // ISO/IEC 14496-12 section 8.7.5, co64. + uint32_t size = NET_UINT32_C(0); + const char type[4] = {'c', 'o', '6', '4'}; + const uint32_t version_and_flags = NET_UINT32_C(0); + uint32_t entry_count = NET_UINT32_C(0); +} __attribute__((packed)); + +struct SyncSampleBoxVersion0 { // ISO/IEC 14496-12 section 8.6.2, stss. + uint32_t size = NET_UINT32_C(0); + const char type[4] = {'s', 't', 's', 's'}; + const uint32_t version_and_flags = NET_UINT32_C(0); + uint32_t entry_count = NET_UINT32_C(0); +} __attribute__((packed)); + +struct LargeMediaDataBox { // ISO/IEC 14496-12 section 8.1.1, mdat. + const uint32_t size = NET_UINT32_C(1); + const char type[4] = {'m', 'd', 'a', 't'}; + uint64_t largesize = NET_UINT64_C(0); +}; + +// Grouping of a box's header and the slice representing the header. +// See also ScopedMp4Box, which calculates the length. +template +class Mp4Box { + public: + Mp4Box() + : header_slice_(re2::StringPiece(reinterpret_cast(&header_), + sizeof(header_))) {} + + Header &header() { return header_; } + const FileSlice *header_slice() const { return &header_slice_; } + + private: + Header header_; + CopyingStringPieceSlice header_slice_; +}; + +// Helper for adding a mp4 box which calculates the header's size field. +// Construction appends the box to the FileSlices; destruction automatically +// calculates the length including any other slices added in the meantime. +// See also CONSTRUCT_BOX macro. +template +class ScopedMp4Box { + public: + explicit ScopedMp4Box(FileSlices *slices, Box *box) + : starting_size_(slices->size()), slices_(slices), box_(box) { + slices_->Append(box->header_slice()); + } + + ScopedMp4Box(const ScopedMp4Box &) = delete; + void operator=(const ScopedMp4Box &) = delete; + + ~ScopedMp4Box() { + box_->header().size = ToNetwork32(slices_->size() - starting_size_); + } + + private: + int64_t starting_size_; + FileSlices *slices_; + Box *box_; +}; + +// Macro for less verbose ScopedMp4Box instantiation. +// For use only within Mp4File. +#define CONSTRUCT_BOX(box) \ + ScopedMp4Box _scoped_##box(&slices_, &box); + +// .mp4 file, constructed from boxes arranged in the order suggested by +// ISO/IEC 14496-12 section 6.2.3 (see Table 1): +// * ftyp (file type and compatibility) +// * moov (container for all the metadata) +// ** mvhd (movie header, overall declarations) +// ** trak (container for an individual track or stream) +// *** tkhd (track header, overall information about the track) +// *** mdia (container for the media information in a track) +// **** mdhd (media header, overall information about the media) +// *** minf (media information container) +// **** vmhd (video media header, overall information (video track only)) +// **** dinf (data information box, container) +// ***** dref (data reference box, declares source(s) of media data in track) +// **** stbl (sample table box, container for the time/space map) +// ***** stsd (sample descriptions (codec types, initilization etc.) +// ***** stts ((decoding) time-to-sample) +// ***** stsc (sample-to-chunk, partial data-offset information) +// ***** stsz (samples sizes (framing)) +// ***** co64 (64-bit chunk offset) +// ***** stss (sync sample table) +// * mdat (media data container) +class Mp4File : public VirtualFile { + public: + Mp4File(std::vector> segments, + VideoSampleEntry &&video_sample_entry) + : segments_(std::move(segments)), + video_sample_entry_(std::move(video_sample_entry)), + ftyp_(re2::StringPiece(kFtypBox, sizeof(kFtypBox))), + moov_trak_mdia_hdlr_(re2::StringPiece(kHdlrBox, sizeof(kHdlrBox))), + moov_trak_mdia_minf_vmhddinf_( + re2::StringPiece(kVmhdAndDinfBoxes, sizeof(kVmhdAndDinfBoxes))), + moov_trak_mdia_minf_stbl_stsd_entry_(video_sample_entry_.data) { + uint32_t duration = 0; + int64_t max_time_90k = 0; + for (const auto &segment : segments_) { + duration += segment->pieces.duration_90k(); + max_time_90k = std::max(max_time_90k, segment->recording.start_time_90k + + segment->rel_end_90k); + } + auto net_duration = ToNetworkU32(duration); + auto net_creation_ts = ToNetworkU32(ToIso14496Timestamp(max_time_90k)); + + slices_.Append(&ftyp_); + AppendMoov(net_duration, net_creation_ts); + + // Add the mdat_ without using CONSTRUCT_BOX. + // mdat_ is special because it uses largesize rather than size. + slices_.Append(mdat_.header_slice()); + initial_sample_byte_pos_ = slices_.size(); + for (const auto &segment : segments_) { + segment->sample_file_slice.Init(segment->recording.sample_file_path, + segment->pieces.sample_pos()); + slices_.Append(&segment->sample_file_slice); + } + mdat_.header().largesize = + ToNetworkU64(slices_.size() - initial_sample_byte_pos_); + } + + time_t last_modified() const final { return 0; } // TODO + std::string etag() const final { return ""; } // TODO + std::string mime_type() const final { return "video/mp4"; } + int64_t size() const final { return slices_.size(); } + bool AddRange(ByteRange range, EvBuffer *buf, + std::string *error_message) const final { + return slices_.AddRange(range, buf, error_message); + } + + private: + void AppendMoov(uint32_t net_duration, uint32_t net_creation_ts) { + CONSTRUCT_BOX(moov_); + { + CONSTRUCT_BOX(moov_mvhd_); + moov_mvhd_.header().creation_time = net_creation_ts; + moov_mvhd_.header().modification_time = net_creation_ts; + moov_mvhd_.header().duration = net_duration; + moov_mvhd_.header().duration = net_duration; + } + { + CONSTRUCT_BOX(moov_trak_); + { + CONSTRUCT_BOX(moov_trak_tkhd_); + moov_trak_tkhd_.header().creation_time = net_creation_ts; + moov_trak_tkhd_.header().modification_time = net_creation_ts; + moov_trak_tkhd_.header().track_id = NET_UINT32_C(1); + moov_trak_tkhd_.header().duration = net_duration; + moov_trak_tkhd_.header().width = + NET_UINT32_C(video_sample_entry_.width << 16); + moov_trak_tkhd_.header().height = + NET_UINT32_C(video_sample_entry_.height << 16); + } + { + CONSTRUCT_BOX(moov_trak_mdia_); + { + CONSTRUCT_BOX(moov_trak_mdia_mdhd_); + moov_trak_mdia_mdhd_.header().creation_time = net_creation_ts; + moov_trak_mdia_mdhd_.header().modification_time = net_creation_ts; + moov_trak_mdia_mdhd_.header().duration = net_duration; + } + slices_.Append(&moov_trak_mdia_hdlr_); + { + CONSTRUCT_BOX(moov_trak_mdia_minf_); + slices_.Append(&moov_trak_mdia_minf_vmhddinf_); + AppendStbl(); + } + } + } + } + + void AppendStbl() { + CONSTRUCT_BOX(moov_trak_mdia_minf_stbl_); + { + CONSTRUCT_BOX(moov_trak_mdia_minf_stbl_stsd_); + moov_trak_mdia_minf_stbl_stsd_.header().entry_count = NET_UINT32_C(1); + slices_.Append(&moov_trak_mdia_minf_stbl_stsd_entry_); + } + { + CONSTRUCT_BOX(moov_trak_mdia_minf_stbl_stts_); + int32_t stts_entry_count = 0; + for (const auto &segment : segments_) { + stts_entry_count += segment->pieces.stts_entry_count(); + slices_.Append(segment->pieces.stts_entries()); + } + moov_trak_mdia_minf_stbl_stts_.header().entry_count = + ToNetwork32(stts_entry_count); + } + { + CONSTRUCT_BOX(moov_trak_mdia_minf_stbl_stsc_); + uint32_t stsc_entry_count = 0; + for (const auto &segment : segments_) { + stsc_entry_count += segment->pieces.stsc_entry_count(); + slices_.Append(segment->pieces.stsc_entries()); + } + moov_trak_mdia_minf_stbl_stsc_.header().entry_count = + ToNetwork32(stsc_entry_count); + } + { + CONSTRUCT_BOX(moov_trak_mdia_minf_stbl_stsz_); + uint32_t stsz_entry_count = 0; + for (const auto &segment : segments_) { + stsz_entry_count += segment->pieces.stsz_entry_count(); + slices_.Append(segment->pieces.stsz_entries()); + } + moov_trak_mdia_minf_stbl_stsz_.header().sample_count = + ToNetwork32(stsz_entry_count); + } + { + CONSTRUCT_BOX(moov_trak_mdia_minf_stbl_co64_); + moov_trak_mdia_minf_stbl_co64_entries_.Init( + sizeof(uint64_t) * segments_.size(), + [this](std::string *s, std::string *error_message) { + return FillCo64Entries(s, error_message); + }); + moov_trak_mdia_minf_stbl_co64_.header().entry_count = + ToNetwork32(segments_.size()); + slices_.Append(&moov_trak_mdia_minf_stbl_co64_entries_); + } + { + CONSTRUCT_BOX(moov_trak_mdia_minf_stbl_stss_); + uint32_t stss_entry_count = 0; + for (const auto &segment : segments_) { + stss_entry_count += segment->pieces.stss_entry_count(); + slices_.Append(segment->pieces.stss_entries()); + } + moov_trak_mdia_minf_stbl_stss_.header().entry_count = + ToNetwork32(stss_entry_count); + } + } + + bool FillCo64Entries(std::string *s, std::string *error_message) { + int64_t pos = initial_sample_byte_pos_; + for (const auto &segment : segments_) { + AppendU64(pos, s); + pos += segment->sample_file_slice.size(); + } + return true; + } + + int64_t initial_sample_byte_pos_ = 0; + std::vector> segments_; + VideoSampleEntry video_sample_entry_; + FileSlices slices_; + + StaticStringPieceSlice ftyp_; + Mp4Box moov_; + Mp4Box moov_mvhd_; + Mp4Box moov_trak_; + Mp4Box moov_trak_tkhd_; + Mp4Box moov_trak_mdia_; + Mp4Box moov_trak_mdia_mdhd_; + StaticStringPieceSlice moov_trak_mdia_hdlr_; + Mp4Box moov_trak_mdia_minf_; + StaticStringPieceSlice moov_trak_mdia_minf_vmhddinf_; + Mp4Box moov_trak_mdia_minf_stbl_; + Mp4Box moov_trak_mdia_minf_stbl_stsd_; + CopyingStringPieceSlice moov_trak_mdia_minf_stbl_stsd_entry_; + Mp4Box moov_trak_mdia_minf_stbl_stts_; + Mp4Box moov_trak_mdia_minf_stbl_stsc_; + Mp4Box moov_trak_mdia_minf_stbl_stsz_; + Mp4Box moov_trak_mdia_minf_stbl_co64_; + FillerFileSlice moov_trak_mdia_minf_stbl_co64_entries_; + Mp4Box moov_trak_mdia_minf_stbl_stss_; + Mp4Box mdat_; +}; + +#undef CONSTRUCT_BOX + +} // namespace + namespace internal { bool Mp4SampleTablePieces::Init(re2::StringPiece video_index_blob, @@ -162,4 +665,50 @@ bool Mp4SampleTablePieces::FillStszEntries(std::string *s, } // namespace internal +Mp4FileBuilder &Mp4FileBuilder::Append(Recording &&recording, + int32_t rel_start_90k, + int32_t rel_end_90k) { + std::unique_ptr s(new Mp4FileSegment); + s->recording = std::move(recording); + s->rel_start_90k = rel_start_90k; + s->rel_end_90k = rel_end_90k; + segments_.push_back(std::move(s)); + return *this; +} + +Mp4FileBuilder &Mp4FileBuilder::SetSampleEntry(const VideoSampleEntry &entry) { + video_sample_entry_ = entry; + return *this; +} + +std::unique_ptr Mp4FileBuilder::Build(std::string *error_message) { + int32_t sample_offset = 1; + for (auto &segment : segments_) { + if (segment->recording.video_sample_entry_sha1 != + video_sample_entry_.sha1) { + *error_message = + StrCat("inconsistent video sample entries. builder has: ", + ToHex(video_sample_entry_.sha1), ", segment has: ", + ToHex(segment->recording.video_sample_entry_sha1)); + return std::unique_ptr(); + } + + if (!segment->pieces.Init(segment->recording.video_index, + 1, // sample entry index + sample_offset, segment->rel_start_90k, + segment->rel_end_90k, error_message)) { + return std::unique_ptr(); + } + sample_offset += segment->pieces.samples(); + } + + if (segments_.empty()) { + *error_message = "Can't construct empty .mp4"; + return std::unique_ptr(); + } + + return std::unique_ptr( + new Mp4File(std::move(segments_), std::move(video_sample_entry_))); +} + } // namespace moonfire_nvr diff --git a/src/mp4.h b/src/mp4.h index 8e11515..61cc810 100644 --- a/src/mp4.h +++ b/src/mp4.h @@ -36,6 +36,9 @@ #ifndef MOONFIRE_NVR_MP4_H #define MOONFIRE_NVR_MP4_H +#include +#include + #include "recording.h" #include "http.h" @@ -118,8 +121,52 @@ class Mp4SampleTablePieces { int32_t key_frames_ = 0; }; +struct Mp4FileSegment { + Recording recording; + Mp4SampleTablePieces pieces; + RealFileSlice sample_file_slice; + int32_t rel_start_90k = 0; + int32_t rel_end_90k = std::numeric_limits::max(); +}; + } // namespace internal +// Builder for a virtual .mp4 file. +class Mp4FileBuilder { + public: + // Append part or all of a recording. + // Note that |recording.video_sample_entry_sha1| must be added via + // AddSampleEntry. + Mp4FileBuilder &Append(Recording &&recording, int32_t rel_start_300ths, + int32_t rel_end_300ths); + + // TODO: support multiple sample entries? + Mp4FileBuilder &SetSampleEntry(const VideoSampleEntry &entry); + + // Set if a subtitle track should be added with timestamps. + // TODO: unimplemented. + Mp4FileBuilder &include_timestamp_subtitle_track(bool); + + // TODO: MPEG-DASH / ISO BMFF Byte Stream Format support. + + // Build the .mp4 file, returning it to the caller. + // The Mp4FileBuilder is left in an undefined state; it should not + // be used afterward. On error, nullptr is returned, with |error_message| + // populated. + // + // Errors include: + // * TODO: width/height mismatch? or is this okay? + // * No segments. + // * Non-final segment has zero duration of last sample. + // * Data error in one of the recording sample indexes. + // * Invalid start/end. + std::unique_ptr Build(std::string *error_message); + + private: + std::vector> segments_; + VideoSampleEntry video_sample_entry_; +}; + } // namespace moonfire_nvr #endif // MOONFIRE_NVR_MP4_H diff --git a/src/recording.h b/src/recording.h index 2d2cb75..2f074b0 100644 --- a/src/recording.h +++ b/src/recording.h @@ -47,6 +47,8 @@ namespace moonfire_nvr { +constexpr uint32_t kTimeUnitsPerSecond = 90000; + // Encodes a sample index. class SampleIndexEncoder { public: @@ -167,6 +169,26 @@ class SampleFileWriter { bool corrupt_ = false; }; +struct VideoSampleEntry { + std::string sha1; + std::string data; + uint16_t width = 0; + uint16_t height = 0; +}; + +// Various fields from the "recording" table which are useful when viewing +// recordings. +struct Recording { + int64_t start_time_90k = -1; + int64_t end_time_90k = -1; + int64_t sample_file_bytes = -1; + std::string sample_file_path; + std::string sample_file_uuid; + std::string sample_file_sha1; + std::string video_sample_entry_sha1; + std::string video_index; +}; + } // namespace moonfire_nvr #endif // MOONFIRE_NVR_RECORDING_H diff --git a/src/schema.sql b/src/schema.sql index 80570aa..1238b5f 100644 --- a/src/schema.sql +++ b/src/schema.sql @@ -73,7 +73,7 @@ create table recording ( sample_file_uuid blob unique not null, sample_file_sha1 blob, - sample_file_size integer, + sample_file_bytes integer, -- The starting and ending time of the recording, in 90 kHz units since -- 1970-01-01 00:00:00 UTC. diff --git a/src/testutil.cc b/src/testutil.cc index 997f84b..ce28e79 100644 --- a/src/testutil.cc +++ b/src/testutil.cc @@ -39,6 +39,7 @@ #include #include +#include #include #include "filesystem.h" @@ -118,6 +119,16 @@ void WriteFileOrDie(const std::string &path, re2::StringPiece contents) { CHECK_EQ(ret, 0) << "close " << path << ": " << strerror(ret); } +void WriteFileOrDie(const std::string &path, EvBuffer *buf) { + int fd = open(path.c_str(), O_CREAT | O_WRONLY | O_TRUNC, 0600); + PCHECK(fd >= 0) << "open: " << path; + size_t buf_len = evbuffer_get_length(buf->get()); + int written = evbuffer_write(buf->get(), fd); + PCHECK(written >= 0 && buf_len == static_cast(written)) + << "buf_len: " << buf_len << ", written: " << written; + PCHECK(close(fd) == 0) << "close"; +} + std::string ReadFileOrDie(const std::string &path) { std::unique_ptr f; int ret = GetRealFilesystem()->Open(path.c_str(), O_RDONLY, &f); diff --git a/src/testutil.h b/src/testutil.h index 08c839c..cad816b 100644 --- a/src/testutil.h +++ b/src/testutil.h @@ -37,6 +37,8 @@ #include #include +#include "http.h" + namespace moonfire_nvr { // Create or empty the given test directory, or die. @@ -45,6 +47,7 @@ std::string PrepareTempDirOrDie(const std::string &test_name); // Write the given file contents to the given path, or die. void WriteFileOrDie(const std::string &path, re2::StringPiece contents); +void WriteFileOrDie(const std::string &path, EvBuffer *buf); // Read the contents of the given path, or die. std::string ReadFileOrDie(const std::string &path);