diff --git a/src/coding.h b/src/coding.h index f0e8513..5318134 100644 --- a/src/coding.h +++ b/src/coding.h @@ -139,6 +139,11 @@ inline void AppendU16(uint16_t in, std::string *out) { out->append(reinterpret_cast(&net), sizeof(uint16_t)); } +inline void Append16(int16_t in, std::string *out) { + int16_t net = ToNetwork16(in); + out->append(reinterpret_cast(&net), sizeof(int16_t)); +} + inline void AppendU32(uint32_t in, std::string *out) { uint32_t net = ToNetworkU32(in); out->append(reinterpret_cast(&net), sizeof(uint32_t)); diff --git a/src/h264-test.cc b/src/h264-test.cc index 5cb72f6..5411b81 100644 --- a/src/h264-test.cc +++ b/src/h264-test.cc @@ -66,23 +66,26 @@ TEST(H264Test, DecodeOnly) { } TEST(H264Test, SampleData) { - const uint8_t kTestOutput[] = {0x01, 0x4d, 0x00, 0x1f, 0xff, 0xe1, 0x00, 0x17, - 0x67, 0x4d, 0x00, 0x1f, 0x9a, 0x66, 0x02, 0x80, - 0x2d, 0xff, 0x35, 0x01, 0x01, 0x01, 0x40, 0x00, - 0x00, 0xfa, 0x00, 0x00, 0x1d, 0x4c, 0x01, 0x01, - 0x00, 0x04, 0x68, 0xee, 0x3c, 0x80}; + const char kTestOutput[] = + "00 00 00 84 61 76 63 31 00 00 00 00 00 00 00 01 " + "00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 " + "05 00 02 d0 00 48 00 00 00 48 00 00 00 00 00 00 " + "00 01 00 00 00 00 00 00 00 00 00 00 00 00 00 00 " + "00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 " + "00 00 00 18 ff ff 00 00 00 2e 61 76 63 43 01 4d " + "00 1f ff e1 00 17 67 4d 00 1f 9a 66 02 80 2d ff " + "35 01 01 01 40 00 00 fa 00 00 1d 4c 01 01 00 04 " + "68 ee 3c 80"; re2::StringPiece test_input(reinterpret_cast(kTestInput), sizeof(kTestInput)); - re2::StringPiece test_output(reinterpret_cast(kTestOutput), - sizeof(kTestOutput)); - std::string avc_decoder_config; + std::string sample_entry; std::string error_message; ASSERT_TRUE( - ParseH264ExtraData(test_input, &avc_decoder_config, &error_message)) + GetH264SampleEntry(test_input, 1280, 720, &sample_entry, &error_message)) << error_message; - EXPECT_EQ(ToHex(test_output), ToHex(avc_decoder_config)); + EXPECT_EQ(kTestOutput, ToHex(sample_entry)); } } // namespace diff --git a/src/h264.cc b/src/h264.cc index dd454ef..d51f530 100644 --- a/src/h264.cc +++ b/src/h264.cc @@ -41,17 +41,47 @@ namespace moonfire_nvr { namespace { -// See ISO/IEC 14496-10 section 7.1. +// See ISO/IEC 14496-10 table 7-1 - NAL unit type codes, syntax element +// categories, and NAL unit type classes. const int kNalUnitSeqParameterSet = 7; const int kNalUnitPicParameterSet = 8; +// Parse sequence parameter set and picture parameter set from ffmpeg's +// "extra_data". +bool ParseExtraData(re2::StringPiece extra_data, std::string *sps, + std::string *pps, std::string *error_message) { + bool ok = true; + internal::NalUnitFunction fn = [&ok, sps, pps, + error_message](re2::StringPiece nal_unit) { + // See ISO/IEC 14496-10 section 7.3.1, which defines nal_unit. + uint8_t nal_type = nal_unit[0] & 0x1F; // bottom 5 bits of first byte. + switch (nal_type) { + case kNalUnitSeqParameterSet: + *sps = nal_unit.as_string(); + break; + case kNalUnitPicParameterSet: + *pps = nal_unit.as_string(); + break; + default: + *error_message = + StrCat("Expected only SPS and PPS; got type ", nal_type); + ok = false; + return IterationControl::kBreak; + } + return IterationControl::kContinue; + }; + if (!internal::DecodeH264AnnexB(extra_data, fn, error_message) || !ok) { + return false; + } + if (sps->empty() || pps->empty()) { + *error_message = "SPS and PPS must be specified."; + return false; + } + return true; +} + } // namespace -// See T-REC-H.264-201003-S||PDF-E.PDF page 325 for byte stream NAL unit -// syntax - -// See page 42 for nal_unit. - namespace internal { // See ISO/IEC 14496-10 section B.2: Byte stream NAL unit decoding process. @@ -94,47 +124,47 @@ bool DecodeH264AnnexB(re2::StringPiece data, NalUnitFunction process_nal_unit, } // namespace internal -bool ParseH264ExtraData(re2::StringPiece extra_data, - std::string *avc_decoder_config, +bool GetH264SampleEntry(re2::StringPiece extra_data, uint16_t width, + uint16_t height, std::string *out, std::string *error_message) { std::string sps; std::string pps; - bool ok = true; - internal::NalUnitFunction fn = [&ok, &sps, &pps, - error_message](re2::StringPiece nal_unit) { - uint8_t nal_type = nal_unit[0] & 0x1F; // bottom 5 bits of first byte. - switch (nal_type) { - case kNalUnitSeqParameterSet: - sps = nal_unit.as_string(); - break; - case kNalUnitPicParameterSet: - pps = nal_unit.as_string(); - break; - default: - *error_message = - StrCat("Expected only SPS and PPS; got type ", nal_type); - ok = false; - return IterationControl::kBreak; - } - return IterationControl::kContinue; - }; - if (!internal::DecodeH264AnnexB(extra_data, fn, error_message) || !ok) { - return false; - } - if (sps.empty() || pps.empty()) { - *error_message = "SPS and PPS must be specified."; - return false; - } - if (sps.size() < 4) { - *error_message = "SPS record is too short."; - return false; - } - if (sps.size() > std::numeric_limits::max() || - pps.size() > std::numeric_limits::max()) { - *error_message = "SPS or PPS is too long."; + if (!ParseExtraData(extra_data, &sps, &pps, error_message)) { return false; } + // These match the size of all fields below. + // Don't panic; they're verified at the end. + uint32_t avcc_len = 19 + sps.size() + pps.size(); + uint32_t avc1_len = 86 + avcc_len; + + // This is a concatenation of the following boxes/classes. + // SampleEntry, ISO/IEC 14496-10 section 8.5.2. + uint32_t avc1_len_pos = out->size(); + AppendU32(avc1_len, out); // length + out->append("avc1"); // type + out->append(6, '\x00'); // reserved + AppendU16(1, out); // data_reference_index = 1 + + // VisualSampleEntry, ISO/IEC 14496-12 section 12.1.3. + out->append(16, '\x00'); // pre_defined + reserved + AppendU16(width, out); + AppendU16(height, out); + AppendU32(UINT32_C(0x00480000), out); // horizresolution + AppendU32(UINT32_C(0x00480000), out); // vertresolution + AppendU32(0, out); // reserved + AppendU16(1, out); // frame count + out->append(32, '\x00'); // compressorname + AppendU16(0x0018, out); // depth + Append16(-1, out); // pre_defined + + // AVCSampleEntry, ISO/IEC 14496-15 section 5.3.4.1. + // AVCConfigurationBox, ISO/IEC 14496-15 section 5.3.4.1. + uint32_t avcc_len_pos = out->size(); + AppendU32(avcc_len, out); // length + out->append("avcC"); // type + + // AVCDecoderConfiguration, ISO/IEC 14496-15 section 5.2.4.1. // The beginning of the AVCDecoderConfiguration takes a few values from // the SPS (ISO/IEC 14496-10 section 7.3.2.1.1). One caveat: that section // defines the syntax in terms of RBSP, not NAL. The difference is the @@ -142,27 +172,41 @@ bool ParseH264ExtraData(re2::StringPiece extra_data, // "emulation_prevention_three_byte" in ISO/IEC 14496-10 section 7.4. // It looks like 00 is not a valid value of profile_idc, so this distinction // shouldn't be relevant here. And ffmpeg seems to ignore it. - avc_decoder_config->clear(); - avc_decoder_config->push_back(1); // configurationVersion - avc_decoder_config->push_back(sps[1]); // profile_idc -> AVCProfileIndication - avc_decoder_config->push_back(sps[2]); // ... -> profile_compatibility - avc_decoder_config->push_back(sps[3]); // level_idc -> AVCLevelIndication + out->push_back(1); // configurationVersion + out->push_back(sps[1]); // profile_idc -> AVCProfileIndication + out->push_back(sps[2]); // ...misc bits... -> profile_compatibility + out->push_back(sps[3]); // level_idc -> AVCLevelIndication // Hardcode lengthSizeMinusOne to 3. This needs to match what ffmpeg uses // when generating AVCParameterSamples (ISO/IEC 14496-15 section 5.3.2). // There doesn't seem to be a clean way to get this from ffmpeg, but it's // always 3. - avc_decoder_config->push_back(static_cast(0xff)); + out->push_back(static_cast(0xff)); // Only support one SPS and PPS. // ffmpeg's ff_isom_write_avcc has the same limitation, so it's probably fine. // This next byte is a reserved 0b111 + a 5-bit # of SPSs (1). - avc_decoder_config->push_back(static_cast(0xe1)); - AppendU16(sps.size(), avc_decoder_config); - avc_decoder_config->append(sps.data(), sps.size()); - avc_decoder_config->push_back(1); // # of PPSs. - AppendU16(pps.size(), avc_decoder_config); - avc_decoder_config->append(pps.data(), pps.size()); + out->push_back(static_cast(0xe1)); + AppendU16(sps.size(), out); + out->append(sps.data(), sps.size()); + out->push_back(1); // # of PPSs. + AppendU16(pps.size(), out); + out->append(pps.data(), pps.size()); + + if (out->size() - avcc_len_pos != avcc_len) { + *error_message = + StrCat("internal error: anticipated AVCConfigurationBox length ", + avcc_len, ", but was actually ", out->size() - avcc_len_pos, + "; sps length ", sps.size(), ", pps length ", pps.size()); + return false; + } + if (out->size() - avc1_len_pos != avc1_len) { + *error_message = + StrCat("internal error: anticipated AVCSampleEntry length ", avc1_len, + ", but was actually ", out->size() - avc1_len_pos, + "; sps length ", sps.size(), ", pps length ", pps.size()); + return false; + } return true; } diff --git a/src/h264.h b/src/h264.h index f2ba81a..a5ce0b0 100644 --- a/src/h264.h +++ b/src/h264.h @@ -59,7 +59,7 @@ using NalUnitFunction = std::function; // Decode a H.264 Annex B byte stream into NAL units. -// For ParseH264ExtraData; exposed for testing. +// For GetH264SampleEntry; exposed for testing. // Calls |process_nal_unit| for each NAL unit in the byte stream. // // Note: this won't spot all invalid byte streams. For example, several 0x00s @@ -70,12 +70,11 @@ bool DecodeH264AnnexB(re2::StringPiece data, NalUnitFunction process_nal_unit, } // namespace -// Parse H.264 "extra data" (as supplied by ffmpeg, an Annex B byte stream -// containing SPS and PPS NAL units). On success, fills |avc_decoder_config| -// with an AVCDecoderConfigurationRecord as in ISO/IEC 14496-15 section -// 5.2.4.1. -bool ParseH264ExtraData(re2::StringPiece extra_data, - std::string *avc_decoder_config, +// Gets a H.264 sample entry (AVCSampleEntry, which extends +// VisualSampleEntry), given the "extra_data", width, and height supplied by +// ffmpeg. +bool GetH264SampleEntry(re2::StringPiece extra_data, uint16_t width, + uint16_t height, std::string *out, std::string *error_message); } // namespace moonfire_nvr