diff --git a/src/ffmpeg.h b/src/ffmpeg.h index ca6a588..e22a656 100644 --- a/src/ffmpeg.h +++ b/src/ffmpeg.h @@ -46,6 +46,7 @@ #include #include +#include extern "C" { #include @@ -74,6 +75,11 @@ class VideoPacket { AVPacket *pkt() { return &pkt_; } const AVPacket *pkt() const { return &pkt_; } + re2::StringPiece data() { + return re2::StringPiece(reinterpret_cast(pkt_.data), + pkt_.size); + } + private: AVPacket pkt_; }; @@ -99,6 +105,12 @@ class InputVideoPacketStream { // Returns the video stream. virtual const AVStream *stream() const = 0; + + re2::StringPiece extradata() const { + return re2::StringPiece( + reinterpret_cast(stream()->codec->extradata), + stream()->codec->extradata_size); + } }; // A class which opens streams. diff --git a/src/h264-test.cc b/src/h264-test.cc index df04dd6..9fa7a2c 100644 --- a/src/h264-test.cc +++ b/src/h264-test.cc @@ -82,29 +82,63 @@ TEST(H264Test, DecodeOnly) { "68 ee 3c 80")); } -TEST(H264Test, SampleDataFromAnnexBExtraData) { +TEST(H264Test, SampleEntryFromAnnexBExtraData) { re2::StringPiece test_input(reinterpret_cast(kAnnexBTestInput), sizeof(kAnnexBTestInput)); std::string sample_entry; std::string error_message; - ASSERT_TRUE( - GetH264SampleEntry(test_input, 1280, 720, &sample_entry, &error_message)) + bool need_transform; + ASSERT_TRUE(ParseExtraData(test_input, 1280, 720, &sample_entry, + &need_transform, &error_message)) << error_message; EXPECT_EQ(kTestOutput, ToHex(sample_entry, true)); + EXPECT_TRUE(need_transform); } -TEST(H264Test, SampleDataFromAvcDecoderConfigExtraData) { +TEST(H264Test, SampleEntryFromAvcDecoderConfigExtraData) { re2::StringPiece test_input( reinterpret_cast(kAvcDecoderConfigTestInput), sizeof(kAvcDecoderConfigTestInput)); std::string sample_entry; std::string error_message; - ASSERT_TRUE( - GetH264SampleEntry(test_input, 1280, 720, &sample_entry, &error_message)) + bool need_transform; + ASSERT_TRUE(ParseExtraData(test_input, 1280, 720, &sample_entry, + &need_transform, &error_message)) << error_message; EXPECT_EQ(kTestOutput, ToHex(sample_entry, true)); + EXPECT_FALSE(need_transform); +} + +TEST(H264Test, TransformSampleEntry) { + const uint8_t kInput[] = { + 0x00, 0x00, 0x00, 0x01, 0x67, 0x4d, 0x00, 0x1f, 0x9a, 0x66, + 0x02, 0x80, 0x2d, 0xff, 0x35, 0x01, 0x01, 0x01, 0x40, 0x00, + 0x00, 0xfa, 0x00, 0x00, 0x1d, 0x4c, 0x01, + + 0x00, 0x00, 0x00, 0x01, 0x68, 0xee, 0x3c, 0x80, + + 0x00, 0x00, 0x00, 0x01, 0x06, 0x06, 0x01, 0xc4, 0x80, + + 0x00, 0x00, 0x00, 0x01, 0x65, 0x88, 0x80, 0x10, 0x00, 0x08, + 0x7f, 0x00, 0x5d, 0x27, 0xb5, 0xc1, 0xff, 0x8c, 0xd6, 0x35, + // (truncated) + }; + const char kExpectedOutput[] = + "00 00 00 17 " + "67 4d 00 1f 9a 66 02 80 2d ff 35 01 01 01 40 00 00 fa 00 00 1d 4c 01 " + "00 00 00 04 68 ee 3c 80 " + "00 00 00 05 06 06 01 c4 80 " + "00 00 00 10 " + "65 88 80 10 00 08 7f 00 5d 27 b5 c1 ff 8c d6 35"; + re2::StringPiece input(reinterpret_cast(kInput), + sizeof(kInput)); + std::string out; + std::string error_message; + ASSERT_TRUE(TransformSampleData(input, &out, &error_message)) + << error_message; + EXPECT_EQ(kExpectedOutput, ToHex(out, true)); } } // namespace diff --git a/src/h264.cc b/src/h264.cc index 6369869..882c61f 100644 --- a/src/h264.cc +++ b/src/h264.cc @@ -46,15 +46,16 @@ namespace { const int kNalUnitSeqParameterSet = 7; const int kNalUnitPicParameterSet = 8; +const uint8_t kNalUnitTypeMask = 0x1F; // bottom 5 bits of first byte of unit. + // Parse sequence parameter set and picture parameter set from ffmpeg's // "extra_data". bool ParseAnnexBExtraData(re2::StringPiece extradata, re2::StringPiece *sps, re2::StringPiece *pps, std::string *error_message) { bool ok = true; - internal::NalUnitFunction fn = [&ok, sps, pps, - error_message](re2::StringPiece nal_unit) { + internal::NalUnitFunction fn = [&](re2::StringPiece nal_unit) { // See ISO/IEC 14496-10 section 7.3.1, which defines nal_unit. - uint8_t nal_type = nal_unit[0] & 0x1F; // bottom 5 bits of first byte. + uint8_t nal_type = nal_unit[0] & kNalUnitTypeMask; switch (nal_type) { case kNalUnitSeqParameterSet: *sps = nal_unit; @@ -85,8 +86,7 @@ bool ParseAnnexBExtraData(re2::StringPiece extradata, re2::StringPiece *sps, namespace internal { // See ISO/IEC 14496-10 section B.2: Byte stream NAL unit decoding process. -// This is a relatively simple, unoptimized implementation given that it -// only processes a few dozen bytes per recording. +// This is a relatively simple, unoptimized implementation. bool DecodeH264AnnexB(re2::StringPiece data, NalUnitFunction process_nal_unit, std::string *error_message) { static const RE2 kStartCode("(\\x00{2,}\\x01)"); @@ -125,9 +125,9 @@ bool DecodeH264AnnexB(re2::StringPiece data, NalUnitFunction process_nal_unit, } // namespace internal -bool GetH264SampleEntry(re2::StringPiece extradata, uint16_t width, - uint16_t height, std::string *out, - std::string *error_message) { +bool ParseExtraData(re2::StringPiece extradata, uint16_t width, uint16_t height, + std::string *sample_entry, bool *need_transform, + std::string *error_message) { uint32_t avcc_len; re2::StringPiece sps; re2::StringPiece pps; @@ -140,42 +140,44 @@ bool GetH264SampleEntry(re2::StringPiece extradata, uint16_t width, // This magic value is checked at the end. avcc_len = 19 + sps.size() + pps.size(); + *need_transform = true; } else { // Assume "extradata" holds an AVCDecoderConfiguration. avcc_len = 8 + extradata.size(); + *need_transform = false; } // This magic value is also checked at the end. uint32_t avc1_len = 86 + avcc_len; - out->clear(); - out->reserve(avc1_len); + sample_entry->clear(); + sample_entry->reserve(avc1_len); // This is a concatenation of the following boxes/classes. // SampleEntry, ISO/IEC 14496-10 section 8.5.2. - uint32_t avc1_len_pos = out->size(); - AppendU32(avc1_len, out); // length - out->append("avc1"); // type - out->append(6, '\x00'); // reserved - AppendU16(1, out); // data_reference_index = 1 + uint32_t avc1_len_pos = sample_entry->size(); + AppendU32(avc1_len, sample_entry); // length + sample_entry->append("avc1"); // type + sample_entry->append(6, '\x00'); // reserved + AppendU16(1, sample_entry); // data_reference_index = 1 // VisualSampleEntry, ISO/IEC 14496-12 section 12.1.3. - out->append(16, '\x00'); // pre_defined + reserved - AppendU16(width, out); - AppendU16(height, out); - AppendU32(UINT32_C(0x00480000), out); // horizresolution - AppendU32(UINT32_C(0x00480000), out); // vertresolution - AppendU32(0, out); // reserved - AppendU16(1, out); // frame count - out->append(32, '\x00'); // compressorname - AppendU16(0x0018, out); // depth - Append16(-1, out); // pre_defined + sample_entry->append(16, '\x00'); // pre_defined + reserved + AppendU16(width, sample_entry); + AppendU16(height, sample_entry); + AppendU32(UINT32_C(0x00480000), sample_entry); // horizresolution + AppendU32(UINT32_C(0x00480000), sample_entry); // vertresolution + AppendU32(0, sample_entry); // reserved + AppendU16(1, sample_entry); // frame count + sample_entry->append(32, '\x00'); // compressorname + AppendU16(0x0018, sample_entry); // depth + Append16(-1, sample_entry); // pre_defined // AVCSampleEntry, ISO/IEC 14496-15 section 5.3.4.1. // AVCConfigurationBox, ISO/IEC 14496-15 section 5.3.4.1. - uint32_t avcc_len_pos = out->size(); - AppendU32(avcc_len, out); // length - out->append("avcC"); // type + uint32_t avcc_len_pos = sample_entry->size(); + AppendU32(avcc_len, sample_entry); // length + sample_entry->append("avcC"); // type if (!sps.empty() && !pps.empty()) { // Create the AVCDecoderConfiguration, ISO/IEC 14496-15 section 5.2.4.1. @@ -186,43 +188,42 @@ bool GetH264SampleEntry(re2::StringPiece extradata, uint16_t width, // "emulation_prevention_three_byte" in ISO/IEC 14496-10 section 7.4. // It looks like 00 is not a valid value of profile_idc, so this distinction // shouldn't be relevant here. And ffmpeg seems to ignore it. - out->push_back(1); // configurationVersion - out->push_back(sps[1]); // profile_idc -> AVCProfileIndication - out->push_back(sps[2]); // ...misc bits... -> profile_compatibility - out->push_back(sps[3]); // level_idc -> AVCLevelIndication + sample_entry->push_back(1); // configurationVersion + sample_entry->push_back(sps[1]); // profile_idc -> AVCProfileIndication + sample_entry->push_back( + sps[2]); // ...misc bits... -> profile_compatibility + sample_entry->push_back(sps[3]); // level_idc -> AVCLevelIndication - // Hardcode lengthSizeMinusOne to 3. This needs to match what ffmpeg uses - // when generating AVCParameterSamples (ISO/IEC 14496-15 section 5.3.2). - // There doesn't seem to be a clean way to get this from ffmpeg, but it's - // always 3. - out->push_back(static_cast(0xff)); + // Hardcode lengthSizeMinusOne to 3, matching TransformSampleData's 4-byte + // lengths. + sample_entry->push_back(static_cast(0xff)); // Only support one SPS and PPS. // ffmpeg's ff_isom_write_avcc has the same limitation, so it's probably // fine. This next byte is a reserved 0b111 + a 5-bit # of SPSs (1). - out->push_back(static_cast(0xe1)); - AppendU16(sps.size(), out); - out->append(sps.data(), sps.size()); - out->push_back(1); // # of PPSs. - AppendU16(pps.size(), out); - out->append(pps.data(), pps.size()); + sample_entry->push_back(static_cast(0xe1)); + AppendU16(sps.size(), sample_entry); + sample_entry->append(sps.data(), sps.size()); + sample_entry->push_back(1); // # of PPSs. + AppendU16(pps.size(), sample_entry); + sample_entry->append(pps.data(), pps.size()); - if (out->size() - avcc_len_pos != avcc_len) { - *error_message = - StrCat("internal error: anticipated AVCConfigurationBox length ", - avcc_len, ", but was actually ", out->size() - avcc_len_pos, - "; sps length ", sps.size(), ", pps length ", pps.size()); + if (sample_entry->size() - avcc_len_pos != avcc_len) { + *error_message = StrCat( + "internal error: anticipated AVCConfigurationBox length ", avcc_len, + ", but was actually ", sample_entry->size() - avcc_len_pos, + "; sps length ", sps.size(), ", pps length ", pps.size()); return false; } } else { - out->append(extradata.data(), extradata.size()); + sample_entry->append(extradata.data(), extradata.size()); } - if (out->size() - avc1_len_pos != avc1_len) { + if (sample_entry->size() - avc1_len_pos != avc1_len) { *error_message = StrCat("internal error: anticipated AVCSampleEntry length ", avc1_len, - ", but was actually ", out->size() - avc1_len_pos, + ", but was actually ", sample_entry->size() - avc1_len_pos, "; sps length ", sps.size(), ", pps length ", pps.size()); return false; } @@ -230,4 +231,21 @@ bool GetH264SampleEntry(re2::StringPiece extradata, uint16_t width, return true; } +bool TransformSampleData(re2::StringPiece annexb_sample, + std::string *avc_sample, std::string *error_message) { + // See AVCParameterSamples, ISO/IEC 14496-15 section 5.3.2. + avc_sample->clear(); + auto fn = [&](re2::StringPiece nal_unit) { + // 4-byte length; this must be in sync with ParseExtraData's + // lengthSizeMinusOne == 3. + AppendU32(nal_unit.size(), avc_sample); + avc_sample->append(nal_unit.data(), nal_unit.size()); + return IterationControl::kContinue; + }; + if (!internal::DecodeH264AnnexB(annexb_sample, fn, error_message)) { + return false; + } + return true; +} + } // namespace moonfire_nvr diff --git a/src/h264.h b/src/h264.h index d0a160a..d0e888a 100644 --- a/src/h264.h +++ b/src/h264.h @@ -29,20 +29,16 @@ // along with this program. If not, see . // // h264.h: H.264 decoding. For the most part, Moonfire NVR does not try to -// understand the video codec. There's one exception. It must construct the -// .mp4 sample description table, and for AVC, this includes the ISO/IEC -// 14496-15 section 5.2.4.1 AVCDecoderConfigurationRecord. +// understand the video codec. However, H.264 has two byte stream encodings: +// ISO/IEC 14496-10 Annex B, and ISO/IEC 14496-15 AVC access units. +// When streaming from RTSP, ffmpeg supplies the former. We need the latter +// to stick into .mp4 files. This file manages the conversion, both for +// the ffmpeg "extra data" (which should become the ISO/IEC 14496-15 +// section 5.2.4.1 AVCDecoderConfigurationRecord) and the actual samples. // -// When handling a RTSP input source, ffmpeg supplies as "extradata" an -// ISO/IEC 14496-10 Annex B byte stream containing SPS (sequence parameter -// set) and PPS (picture parameter set) NAL units from which this can be -// constructed. ffmpeg of course also has logic for converting "extradata" -// to the AVCDecoderConfigurationRecord, but unfortunately it is not exposed -// except through ffmpeg's own generated .mp4 file. Extracting just this part -// of their .mp4 files would be more trouble than it's worth. -// -// Just to make things interesting, when handling a .mp4 file, ffmpeg supplies -// as "extradata" an AVCDecoderConfiguration. +// ffmpeg of course has logic to do the same thing, but unfortunately it is +// not exposed except through ffmpeg's own generated .mp4 file. Extracting +// just this part of their .mp4 files would be more trouble than it's worth. #ifndef MOONFIRE_NVR_H264_H #define MOONFIRE_NVR_H264_H @@ -76,9 +72,12 @@ bool DecodeH264AnnexB(re2::StringPiece data, NalUnitFunction process_nal_unit, // Gets a H.264 sample entry (AVCSampleEntry, which extends // VisualSampleEntry), given the "extradata", width, and height supplied by // ffmpeg. -bool GetH264SampleEntry(re2::StringPiece extradata, uint16_t width, - uint16_t height, std::string *out, - std::string *error_message); +bool ParseExtraData(re2::StringPiece extradata, uint16_t width, uint16_t height, + std::string *sample_entry, bool *need_transform, + std::string *error_message); + +bool TransformSampleData(re2::StringPiece annexb_sample, + std::string *avc_sample, std::string *error_message); } // namespace moonfire_nvr diff --git a/src/mp4-test.cc b/src/mp4-test.cc index ffd38de..df32144 100644 --- a/src/mp4-test.cc +++ b/src/mp4-test.cc @@ -224,12 +224,14 @@ class IntegrationTest : public testing::Test { video_sample_entry_.width = in->stream()->codec->width; video_sample_entry_.height = in->stream()->codec->height; - if (!GetH264SampleEntry(GetExtradata(in.get()), in->stream()->codec->width, - in->stream()->codec->height, - &video_sample_entry_.data, &error_message)) { + bool need_transform; + if (!ParseExtraData(in->extradata(), in->stream()->codec->width, + in->stream()->codec->height, &video_sample_entry_.data, + &need_transform, &error_message)) { ADD_FAILURE() << "GetH264SampleEntry: " << error_message; return recording; } + EXPECT_FALSE(need_transform); while (true) { VideoPacket pkt; @@ -286,7 +288,7 @@ class IntegrationTest : public testing::Test { StrCat(tmpdir_path_, "/clip.new.mp4"), &error_message); ASSERT_TRUE(copied != nullptr) << error_message; - EXPECT_EQ(GetExtradata(original.get()), GetExtradata(copied.get())); + EXPECT_EQ(original->extradata(), copied->extradata()); EXPECT_EQ(original->stream()->codec->width, copied->stream()->codec->width); EXPECT_EQ(original->stream()->codec->height, copied->stream()->codec->height); @@ -310,12 +312,6 @@ class IntegrationTest : public testing::Test { } } - re2::StringPiece GetExtradata(InputVideoPacketStream *stream) { - return re2::StringPiece( - reinterpret_cast(stream->stream()->codec->extradata), - stream->stream()->codec->extradata_size); - } - re2::StringPiece GetData(const VideoPacket &pkt) { return re2::StringPiece(reinterpret_cast(pkt.pkt()->data), pkt.pkt()->size);