mirror of
https://github.com/scottlamb/moonfire-nvr.git
synced 2025-01-27 06:33:20 -05:00
AnnexB->AVC sample data converter
I discovered that the mp4 files I was writing were viewable in VLC and in Chrome-on-desktop (ffmpeg-based) but not in Chrome-on-Android (libstagefright-based). It turns out that I was writing Annex B sample data rather than the correct AVCParameterSample format. ffmpeg gives both the "extradata" and the actual frames in Annex B format when reading from rtsp. This is still my simple, unoptimized implementation of the Annex B parser. My Raspberry Pi 2 is still able to record my six streams using about 30% of 1 core, so it will do for the moment at least.
This commit is contained in:
parent
8ee1ab1c7b
commit
ca368944ec
12
src/ffmpeg.h
12
src/ffmpeg.h
@ -46,6 +46,7 @@
|
||||
#include <string>
|
||||
|
||||
#include <glog/logging.h>
|
||||
#include <re2/stringpiece.h>
|
||||
|
||||
extern "C" {
|
||||
#include <libavformat/avformat.h>
|
||||
@ -74,6 +75,11 @@ class VideoPacket {
|
||||
AVPacket *pkt() { return &pkt_; }
|
||||
const AVPacket *pkt() const { return &pkt_; }
|
||||
|
||||
re2::StringPiece data() {
|
||||
return re2::StringPiece(reinterpret_cast<const char *>(pkt_.data),
|
||||
pkt_.size);
|
||||
}
|
||||
|
||||
private:
|
||||
AVPacket pkt_;
|
||||
};
|
||||
@ -99,6 +105,12 @@ class InputVideoPacketStream {
|
||||
|
||||
// Returns the video stream.
|
||||
virtual const AVStream *stream() const = 0;
|
||||
|
||||
re2::StringPiece extradata() const {
|
||||
return re2::StringPiece(
|
||||
reinterpret_cast<const char *>(stream()->codec->extradata),
|
||||
stream()->codec->extradata_size);
|
||||
}
|
||||
};
|
||||
|
||||
// A class which opens streams.
|
||||
|
@ -82,29 +82,63 @@ TEST(H264Test, DecodeOnly) {
|
||||
"68 ee 3c 80"));
|
||||
}
|
||||
|
||||
TEST(H264Test, SampleDataFromAnnexBExtraData) {
|
||||
TEST(H264Test, SampleEntryFromAnnexBExtraData) {
|
||||
re2::StringPiece test_input(reinterpret_cast<const char *>(kAnnexBTestInput),
|
||||
sizeof(kAnnexBTestInput));
|
||||
std::string sample_entry;
|
||||
std::string error_message;
|
||||
ASSERT_TRUE(
|
||||
GetH264SampleEntry(test_input, 1280, 720, &sample_entry, &error_message))
|
||||
bool need_transform;
|
||||
ASSERT_TRUE(ParseExtraData(test_input, 1280, 720, &sample_entry,
|
||||
&need_transform, &error_message))
|
||||
<< error_message;
|
||||
|
||||
EXPECT_EQ(kTestOutput, ToHex(sample_entry, true));
|
||||
EXPECT_TRUE(need_transform);
|
||||
}
|
||||
|
||||
TEST(H264Test, SampleDataFromAvcDecoderConfigExtraData) {
|
||||
TEST(H264Test, SampleEntryFromAvcDecoderConfigExtraData) {
|
||||
re2::StringPiece test_input(
|
||||
reinterpret_cast<const char *>(kAvcDecoderConfigTestInput),
|
||||
sizeof(kAvcDecoderConfigTestInput));
|
||||
std::string sample_entry;
|
||||
std::string error_message;
|
||||
ASSERT_TRUE(
|
||||
GetH264SampleEntry(test_input, 1280, 720, &sample_entry, &error_message))
|
||||
bool need_transform;
|
||||
ASSERT_TRUE(ParseExtraData(test_input, 1280, 720, &sample_entry,
|
||||
&need_transform, &error_message))
|
||||
<< error_message;
|
||||
|
||||
EXPECT_EQ(kTestOutput, ToHex(sample_entry, true));
|
||||
EXPECT_FALSE(need_transform);
|
||||
}
|
||||
|
||||
TEST(H264Test, TransformSampleEntry) {
|
||||
const uint8_t kInput[] = {
|
||||
0x00, 0x00, 0x00, 0x01, 0x67, 0x4d, 0x00, 0x1f, 0x9a, 0x66,
|
||||
0x02, 0x80, 0x2d, 0xff, 0x35, 0x01, 0x01, 0x01, 0x40, 0x00,
|
||||
0x00, 0xfa, 0x00, 0x00, 0x1d, 0x4c, 0x01,
|
||||
|
||||
0x00, 0x00, 0x00, 0x01, 0x68, 0xee, 0x3c, 0x80,
|
||||
|
||||
0x00, 0x00, 0x00, 0x01, 0x06, 0x06, 0x01, 0xc4, 0x80,
|
||||
|
||||
0x00, 0x00, 0x00, 0x01, 0x65, 0x88, 0x80, 0x10, 0x00, 0x08,
|
||||
0x7f, 0x00, 0x5d, 0x27, 0xb5, 0xc1, 0xff, 0x8c, 0xd6, 0x35,
|
||||
// (truncated)
|
||||
};
|
||||
const char kExpectedOutput[] =
|
||||
"00 00 00 17 "
|
||||
"67 4d 00 1f 9a 66 02 80 2d ff 35 01 01 01 40 00 00 fa 00 00 1d 4c 01 "
|
||||
"00 00 00 04 68 ee 3c 80 "
|
||||
"00 00 00 05 06 06 01 c4 80 "
|
||||
"00 00 00 10 "
|
||||
"65 88 80 10 00 08 7f 00 5d 27 b5 c1 ff 8c d6 35";
|
||||
re2::StringPiece input(reinterpret_cast<const char *>(kInput),
|
||||
sizeof(kInput));
|
||||
std::string out;
|
||||
std::string error_message;
|
||||
ASSERT_TRUE(TransformSampleData(input, &out, &error_message))
|
||||
<< error_message;
|
||||
EXPECT_EQ(kExpectedOutput, ToHex(out, true));
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
120
src/h264.cc
120
src/h264.cc
@ -46,15 +46,16 @@ namespace {
|
||||
const int kNalUnitSeqParameterSet = 7;
|
||||
const int kNalUnitPicParameterSet = 8;
|
||||
|
||||
const uint8_t kNalUnitTypeMask = 0x1F; // bottom 5 bits of first byte of unit.
|
||||
|
||||
// Parse sequence parameter set and picture parameter set from ffmpeg's
|
||||
// "extra_data".
|
||||
bool ParseAnnexBExtraData(re2::StringPiece extradata, re2::StringPiece *sps,
|
||||
re2::StringPiece *pps, std::string *error_message) {
|
||||
bool ok = true;
|
||||
internal::NalUnitFunction fn = [&ok, sps, pps,
|
||||
error_message](re2::StringPiece nal_unit) {
|
||||
internal::NalUnitFunction fn = [&](re2::StringPiece nal_unit) {
|
||||
// See ISO/IEC 14496-10 section 7.3.1, which defines nal_unit.
|
||||
uint8_t nal_type = nal_unit[0] & 0x1F; // bottom 5 bits of first byte.
|
||||
uint8_t nal_type = nal_unit[0] & kNalUnitTypeMask;
|
||||
switch (nal_type) {
|
||||
case kNalUnitSeqParameterSet:
|
||||
*sps = nal_unit;
|
||||
@ -85,8 +86,7 @@ bool ParseAnnexBExtraData(re2::StringPiece extradata, re2::StringPiece *sps,
|
||||
namespace internal {
|
||||
|
||||
// See ISO/IEC 14496-10 section B.2: Byte stream NAL unit decoding process.
|
||||
// This is a relatively simple, unoptimized implementation given that it
|
||||
// only processes a few dozen bytes per recording.
|
||||
// This is a relatively simple, unoptimized implementation.
|
||||
bool DecodeH264AnnexB(re2::StringPiece data, NalUnitFunction process_nal_unit,
|
||||
std::string *error_message) {
|
||||
static const RE2 kStartCode("(\\x00{2,}\\x01)");
|
||||
@ -125,9 +125,9 @@ bool DecodeH264AnnexB(re2::StringPiece data, NalUnitFunction process_nal_unit,
|
||||
|
||||
} // namespace internal
|
||||
|
||||
bool GetH264SampleEntry(re2::StringPiece extradata, uint16_t width,
|
||||
uint16_t height, std::string *out,
|
||||
std::string *error_message) {
|
||||
bool ParseExtraData(re2::StringPiece extradata, uint16_t width, uint16_t height,
|
||||
std::string *sample_entry, bool *need_transform,
|
||||
std::string *error_message) {
|
||||
uint32_t avcc_len;
|
||||
re2::StringPiece sps;
|
||||
re2::StringPiece pps;
|
||||
@ -140,42 +140,44 @@ bool GetH264SampleEntry(re2::StringPiece extradata, uint16_t width,
|
||||
|
||||
// This magic value is checked at the end.
|
||||
avcc_len = 19 + sps.size() + pps.size();
|
||||
*need_transform = true;
|
||||
} else {
|
||||
// Assume "extradata" holds an AVCDecoderConfiguration.
|
||||
avcc_len = 8 + extradata.size();
|
||||
*need_transform = false;
|
||||
}
|
||||
|
||||
// This magic value is also checked at the end.
|
||||
uint32_t avc1_len = 86 + avcc_len;
|
||||
|
||||
out->clear();
|
||||
out->reserve(avc1_len);
|
||||
sample_entry->clear();
|
||||
sample_entry->reserve(avc1_len);
|
||||
|
||||
// This is a concatenation of the following boxes/classes.
|
||||
// SampleEntry, ISO/IEC 14496-10 section 8.5.2.
|
||||
uint32_t avc1_len_pos = out->size();
|
||||
AppendU32(avc1_len, out); // length
|
||||
out->append("avc1"); // type
|
||||
out->append(6, '\x00'); // reserved
|
||||
AppendU16(1, out); // data_reference_index = 1
|
||||
uint32_t avc1_len_pos = sample_entry->size();
|
||||
AppendU32(avc1_len, sample_entry); // length
|
||||
sample_entry->append("avc1"); // type
|
||||
sample_entry->append(6, '\x00'); // reserved
|
||||
AppendU16(1, sample_entry); // data_reference_index = 1
|
||||
|
||||
// VisualSampleEntry, ISO/IEC 14496-12 section 12.1.3.
|
||||
out->append(16, '\x00'); // pre_defined + reserved
|
||||
AppendU16(width, out);
|
||||
AppendU16(height, out);
|
||||
AppendU32(UINT32_C(0x00480000), out); // horizresolution
|
||||
AppendU32(UINT32_C(0x00480000), out); // vertresolution
|
||||
AppendU32(0, out); // reserved
|
||||
AppendU16(1, out); // frame count
|
||||
out->append(32, '\x00'); // compressorname
|
||||
AppendU16(0x0018, out); // depth
|
||||
Append16(-1, out); // pre_defined
|
||||
sample_entry->append(16, '\x00'); // pre_defined + reserved
|
||||
AppendU16(width, sample_entry);
|
||||
AppendU16(height, sample_entry);
|
||||
AppendU32(UINT32_C(0x00480000), sample_entry); // horizresolution
|
||||
AppendU32(UINT32_C(0x00480000), sample_entry); // vertresolution
|
||||
AppendU32(0, sample_entry); // reserved
|
||||
AppendU16(1, sample_entry); // frame count
|
||||
sample_entry->append(32, '\x00'); // compressorname
|
||||
AppendU16(0x0018, sample_entry); // depth
|
||||
Append16(-1, sample_entry); // pre_defined
|
||||
|
||||
// AVCSampleEntry, ISO/IEC 14496-15 section 5.3.4.1.
|
||||
// AVCConfigurationBox, ISO/IEC 14496-15 section 5.3.4.1.
|
||||
uint32_t avcc_len_pos = out->size();
|
||||
AppendU32(avcc_len, out); // length
|
||||
out->append("avcC"); // type
|
||||
uint32_t avcc_len_pos = sample_entry->size();
|
||||
AppendU32(avcc_len, sample_entry); // length
|
||||
sample_entry->append("avcC"); // type
|
||||
|
||||
if (!sps.empty() && !pps.empty()) {
|
||||
// Create the AVCDecoderConfiguration, ISO/IEC 14496-15 section 5.2.4.1.
|
||||
@ -186,43 +188,42 @@ bool GetH264SampleEntry(re2::StringPiece extradata, uint16_t width,
|
||||
// "emulation_prevention_three_byte" in ISO/IEC 14496-10 section 7.4.
|
||||
// It looks like 00 is not a valid value of profile_idc, so this distinction
|
||||
// shouldn't be relevant here. And ffmpeg seems to ignore it.
|
||||
out->push_back(1); // configurationVersion
|
||||
out->push_back(sps[1]); // profile_idc -> AVCProfileIndication
|
||||
out->push_back(sps[2]); // ...misc bits... -> profile_compatibility
|
||||
out->push_back(sps[3]); // level_idc -> AVCLevelIndication
|
||||
sample_entry->push_back(1); // configurationVersion
|
||||
sample_entry->push_back(sps[1]); // profile_idc -> AVCProfileIndication
|
||||
sample_entry->push_back(
|
||||
sps[2]); // ...misc bits... -> profile_compatibility
|
||||
sample_entry->push_back(sps[3]); // level_idc -> AVCLevelIndication
|
||||
|
||||
// Hardcode lengthSizeMinusOne to 3. This needs to match what ffmpeg uses
|
||||
// when generating AVCParameterSamples (ISO/IEC 14496-15 section 5.3.2).
|
||||
// There doesn't seem to be a clean way to get this from ffmpeg, but it's
|
||||
// always 3.
|
||||
out->push_back(static_cast<char>(0xff));
|
||||
// Hardcode lengthSizeMinusOne to 3, matching TransformSampleData's 4-byte
|
||||
// lengths.
|
||||
sample_entry->push_back(static_cast<char>(0xff));
|
||||
|
||||
// Only support one SPS and PPS.
|
||||
// ffmpeg's ff_isom_write_avcc has the same limitation, so it's probably
|
||||
// fine. This next byte is a reserved 0b111 + a 5-bit # of SPSs (1).
|
||||
out->push_back(static_cast<char>(0xe1));
|
||||
AppendU16(sps.size(), out);
|
||||
out->append(sps.data(), sps.size());
|
||||
out->push_back(1); // # of PPSs.
|
||||
AppendU16(pps.size(), out);
|
||||
out->append(pps.data(), pps.size());
|
||||
sample_entry->push_back(static_cast<char>(0xe1));
|
||||
AppendU16(sps.size(), sample_entry);
|
||||
sample_entry->append(sps.data(), sps.size());
|
||||
sample_entry->push_back(1); // # of PPSs.
|
||||
AppendU16(pps.size(), sample_entry);
|
||||
sample_entry->append(pps.data(), pps.size());
|
||||
|
||||
if (out->size() - avcc_len_pos != avcc_len) {
|
||||
*error_message =
|
||||
StrCat("internal error: anticipated AVCConfigurationBox length ",
|
||||
avcc_len, ", but was actually ", out->size() - avcc_len_pos,
|
||||
"; sps length ", sps.size(), ", pps length ", pps.size());
|
||||
if (sample_entry->size() - avcc_len_pos != avcc_len) {
|
||||
*error_message = StrCat(
|
||||
"internal error: anticipated AVCConfigurationBox length ", avcc_len,
|
||||
", but was actually ", sample_entry->size() - avcc_len_pos,
|
||||
"; sps length ", sps.size(), ", pps length ", pps.size());
|
||||
return false;
|
||||
}
|
||||
|
||||
} else {
|
||||
out->append(extradata.data(), extradata.size());
|
||||
sample_entry->append(extradata.data(), extradata.size());
|
||||
}
|
||||
|
||||
if (out->size() - avc1_len_pos != avc1_len) {
|
||||
if (sample_entry->size() - avc1_len_pos != avc1_len) {
|
||||
*error_message =
|
||||
StrCat("internal error: anticipated AVCSampleEntry length ", avc1_len,
|
||||
", but was actually ", out->size() - avc1_len_pos,
|
||||
", but was actually ", sample_entry->size() - avc1_len_pos,
|
||||
"; sps length ", sps.size(), ", pps length ", pps.size());
|
||||
return false;
|
||||
}
|
||||
@ -230,4 +231,21 @@ bool GetH264SampleEntry(re2::StringPiece extradata, uint16_t width,
|
||||
return true;
|
||||
}
|
||||
|
||||
bool TransformSampleData(re2::StringPiece annexb_sample,
|
||||
std::string *avc_sample, std::string *error_message) {
|
||||
// See AVCParameterSamples, ISO/IEC 14496-15 section 5.3.2.
|
||||
avc_sample->clear();
|
||||
auto fn = [&](re2::StringPiece nal_unit) {
|
||||
// 4-byte length; this must be in sync with ParseExtraData's
|
||||
// lengthSizeMinusOne == 3.
|
||||
AppendU32(nal_unit.size(), avc_sample);
|
||||
avc_sample->append(nal_unit.data(), nal_unit.size());
|
||||
return IterationControl::kContinue;
|
||||
};
|
||||
if (!internal::DecodeH264AnnexB(annexb_sample, fn, error_message)) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace moonfire_nvr
|
||||
|
31
src/h264.h
31
src/h264.h
@ -29,20 +29,16 @@
|
||||
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
//
|
||||
// h264.h: H.264 decoding. For the most part, Moonfire NVR does not try to
|
||||
// understand the video codec. There's one exception. It must construct the
|
||||
// .mp4 sample description table, and for AVC, this includes the ISO/IEC
|
||||
// 14496-15 section 5.2.4.1 AVCDecoderConfigurationRecord.
|
||||
// understand the video codec. However, H.264 has two byte stream encodings:
|
||||
// ISO/IEC 14496-10 Annex B, and ISO/IEC 14496-15 AVC access units.
|
||||
// When streaming from RTSP, ffmpeg supplies the former. We need the latter
|
||||
// to stick into .mp4 files. This file manages the conversion, both for
|
||||
// the ffmpeg "extra data" (which should become the ISO/IEC 14496-15
|
||||
// section 5.2.4.1 AVCDecoderConfigurationRecord) and the actual samples.
|
||||
//
|
||||
// When handling a RTSP input source, ffmpeg supplies as "extradata" an
|
||||
// ISO/IEC 14496-10 Annex B byte stream containing SPS (sequence parameter
|
||||
// set) and PPS (picture parameter set) NAL units from which this can be
|
||||
// constructed. ffmpeg of course also has logic for converting "extradata"
|
||||
// to the AVCDecoderConfigurationRecord, but unfortunately it is not exposed
|
||||
// except through ffmpeg's own generated .mp4 file. Extracting just this part
|
||||
// of their .mp4 files would be more trouble than it's worth.
|
||||
//
|
||||
// Just to make things interesting, when handling a .mp4 file, ffmpeg supplies
|
||||
// as "extradata" an AVCDecoderConfiguration.
|
||||
// ffmpeg of course has logic to do the same thing, but unfortunately it is
|
||||
// not exposed except through ffmpeg's own generated .mp4 file. Extracting
|
||||
// just this part of their .mp4 files would be more trouble than it's worth.
|
||||
|
||||
#ifndef MOONFIRE_NVR_H264_H
|
||||
#define MOONFIRE_NVR_H264_H
|
||||
@ -76,9 +72,12 @@ bool DecodeH264AnnexB(re2::StringPiece data, NalUnitFunction process_nal_unit,
|
||||
// Gets a H.264 sample entry (AVCSampleEntry, which extends
|
||||
// VisualSampleEntry), given the "extradata", width, and height supplied by
|
||||
// ffmpeg.
|
||||
bool GetH264SampleEntry(re2::StringPiece extradata, uint16_t width,
|
||||
uint16_t height, std::string *out,
|
||||
std::string *error_message);
|
||||
bool ParseExtraData(re2::StringPiece extradata, uint16_t width, uint16_t height,
|
||||
std::string *sample_entry, bool *need_transform,
|
||||
std::string *error_message);
|
||||
|
||||
bool TransformSampleData(re2::StringPiece annexb_sample,
|
||||
std::string *avc_sample, std::string *error_message);
|
||||
|
||||
} // namespace moonfire_nvr
|
||||
|
||||
|
@ -224,12 +224,14 @@ class IntegrationTest : public testing::Test {
|
||||
|
||||
video_sample_entry_.width = in->stream()->codec->width;
|
||||
video_sample_entry_.height = in->stream()->codec->height;
|
||||
if (!GetH264SampleEntry(GetExtradata(in.get()), in->stream()->codec->width,
|
||||
in->stream()->codec->height,
|
||||
&video_sample_entry_.data, &error_message)) {
|
||||
bool need_transform;
|
||||
if (!ParseExtraData(in->extradata(), in->stream()->codec->width,
|
||||
in->stream()->codec->height, &video_sample_entry_.data,
|
||||
&need_transform, &error_message)) {
|
||||
ADD_FAILURE() << "GetH264SampleEntry: " << error_message;
|
||||
return recording;
|
||||
}
|
||||
EXPECT_FALSE(need_transform);
|
||||
|
||||
while (true) {
|
||||
VideoPacket pkt;
|
||||
@ -286,7 +288,7 @@ class IntegrationTest : public testing::Test {
|
||||
StrCat(tmpdir_path_, "/clip.new.mp4"), &error_message);
|
||||
ASSERT_TRUE(copied != nullptr) << error_message;
|
||||
|
||||
EXPECT_EQ(GetExtradata(original.get()), GetExtradata(copied.get()));
|
||||
EXPECT_EQ(original->extradata(), copied->extradata());
|
||||
EXPECT_EQ(original->stream()->codec->width, copied->stream()->codec->width);
|
||||
EXPECT_EQ(original->stream()->codec->height,
|
||||
copied->stream()->codec->height);
|
||||
@ -310,12 +312,6 @@ class IntegrationTest : public testing::Test {
|
||||
}
|
||||
}
|
||||
|
||||
re2::StringPiece GetExtradata(InputVideoPacketStream *stream) {
|
||||
return re2::StringPiece(
|
||||
reinterpret_cast<const char *>(stream->stream()->codec->extradata),
|
||||
stream->stream()->codec->extradata_size);
|
||||
}
|
||||
|
||||
re2::StringPiece GetData(const VideoPacket &pkt) {
|
||||
return re2::StringPiece(reinterpret_cast<const char *>(pkt.pkt()->data),
|
||||
pkt.pkt()->size);
|
||||
|
Loading…
x
Reference in New Issue
Block a user