AnnexB->AVC sample data converter

I discovered that the mp4 files I was writing were viewable in VLC and in
Chrome-on-desktop (ffmpeg-based) but not in Chrome-on-Android
(libstagefright-based). It turns out that I was writing Annex B sample data
rather than the correct AVCParameterSample format. ffmpeg gives both the
"extradata" and the actual frames in Annex B format when reading from rtsp.

This is still my simple, unoptimized implementation of the Annex B parser. My
Raspberry Pi 2 is still able to record my six streams using about 30% of 1
core, so it will do for the moment at least.
This commit is contained in:
Scott Lamb 2016-02-02 20:18:41 -08:00
parent 8ee1ab1c7b
commit ca368944ec
5 changed files with 142 additions and 83 deletions

View File

@ -46,6 +46,7 @@
#include <string>
#include <glog/logging.h>
#include <re2/stringpiece.h>
extern "C" {
#include <libavformat/avformat.h>
@ -74,6 +75,11 @@ class VideoPacket {
AVPacket *pkt() { return &pkt_; }
const AVPacket *pkt() const { return &pkt_; }
re2::StringPiece data() {
return re2::StringPiece(reinterpret_cast<const char *>(pkt_.data),
pkt_.size);
}
private:
AVPacket pkt_;
};
@ -99,6 +105,12 @@ class InputVideoPacketStream {
// Returns the video stream.
virtual const AVStream *stream() const = 0;
re2::StringPiece extradata() const {
return re2::StringPiece(
reinterpret_cast<const char *>(stream()->codec->extradata),
stream()->codec->extradata_size);
}
};
// A class which opens streams.

View File

@ -82,29 +82,63 @@ TEST(H264Test, DecodeOnly) {
"68 ee 3c 80"));
}
TEST(H264Test, SampleDataFromAnnexBExtraData) {
TEST(H264Test, SampleEntryFromAnnexBExtraData) {
re2::StringPiece test_input(reinterpret_cast<const char *>(kAnnexBTestInput),
sizeof(kAnnexBTestInput));
std::string sample_entry;
std::string error_message;
ASSERT_TRUE(
GetH264SampleEntry(test_input, 1280, 720, &sample_entry, &error_message))
bool need_transform;
ASSERT_TRUE(ParseExtraData(test_input, 1280, 720, &sample_entry,
&need_transform, &error_message))
<< error_message;
EXPECT_EQ(kTestOutput, ToHex(sample_entry, true));
EXPECT_TRUE(need_transform);
}
TEST(H264Test, SampleDataFromAvcDecoderConfigExtraData) {
TEST(H264Test, SampleEntryFromAvcDecoderConfigExtraData) {
re2::StringPiece test_input(
reinterpret_cast<const char *>(kAvcDecoderConfigTestInput),
sizeof(kAvcDecoderConfigTestInput));
std::string sample_entry;
std::string error_message;
ASSERT_TRUE(
GetH264SampleEntry(test_input, 1280, 720, &sample_entry, &error_message))
bool need_transform;
ASSERT_TRUE(ParseExtraData(test_input, 1280, 720, &sample_entry,
&need_transform, &error_message))
<< error_message;
EXPECT_EQ(kTestOutput, ToHex(sample_entry, true));
EXPECT_FALSE(need_transform);
}
TEST(H264Test, TransformSampleEntry) {
const uint8_t kInput[] = {
0x00, 0x00, 0x00, 0x01, 0x67, 0x4d, 0x00, 0x1f, 0x9a, 0x66,
0x02, 0x80, 0x2d, 0xff, 0x35, 0x01, 0x01, 0x01, 0x40, 0x00,
0x00, 0xfa, 0x00, 0x00, 0x1d, 0x4c, 0x01,
0x00, 0x00, 0x00, 0x01, 0x68, 0xee, 0x3c, 0x80,
0x00, 0x00, 0x00, 0x01, 0x06, 0x06, 0x01, 0xc4, 0x80,
0x00, 0x00, 0x00, 0x01, 0x65, 0x88, 0x80, 0x10, 0x00, 0x08,
0x7f, 0x00, 0x5d, 0x27, 0xb5, 0xc1, 0xff, 0x8c, 0xd6, 0x35,
// (truncated)
};
const char kExpectedOutput[] =
"00 00 00 17 "
"67 4d 00 1f 9a 66 02 80 2d ff 35 01 01 01 40 00 00 fa 00 00 1d 4c 01 "
"00 00 00 04 68 ee 3c 80 "
"00 00 00 05 06 06 01 c4 80 "
"00 00 00 10 "
"65 88 80 10 00 08 7f 00 5d 27 b5 c1 ff 8c d6 35";
re2::StringPiece input(reinterpret_cast<const char *>(kInput),
sizeof(kInput));
std::string out;
std::string error_message;
ASSERT_TRUE(TransformSampleData(input, &out, &error_message))
<< error_message;
EXPECT_EQ(kExpectedOutput, ToHex(out, true));
}
} // namespace

View File

@ -46,15 +46,16 @@ namespace {
const int kNalUnitSeqParameterSet = 7;
const int kNalUnitPicParameterSet = 8;
const uint8_t kNalUnitTypeMask = 0x1F; // bottom 5 bits of first byte of unit.
// Parse sequence parameter set and picture parameter set from ffmpeg's
// "extra_data".
bool ParseAnnexBExtraData(re2::StringPiece extradata, re2::StringPiece *sps,
re2::StringPiece *pps, std::string *error_message) {
bool ok = true;
internal::NalUnitFunction fn = [&ok, sps, pps,
error_message](re2::StringPiece nal_unit) {
internal::NalUnitFunction fn = [&](re2::StringPiece nal_unit) {
// See ISO/IEC 14496-10 section 7.3.1, which defines nal_unit.
uint8_t nal_type = nal_unit[0] & 0x1F; // bottom 5 bits of first byte.
uint8_t nal_type = nal_unit[0] & kNalUnitTypeMask;
switch (nal_type) {
case kNalUnitSeqParameterSet:
*sps = nal_unit;
@ -85,8 +86,7 @@ bool ParseAnnexBExtraData(re2::StringPiece extradata, re2::StringPiece *sps,
namespace internal {
// See ISO/IEC 14496-10 section B.2: Byte stream NAL unit decoding process.
// This is a relatively simple, unoptimized implementation given that it
// only processes a few dozen bytes per recording.
// This is a relatively simple, unoptimized implementation.
bool DecodeH264AnnexB(re2::StringPiece data, NalUnitFunction process_nal_unit,
std::string *error_message) {
static const RE2 kStartCode("(\\x00{2,}\\x01)");
@ -125,9 +125,9 @@ bool DecodeH264AnnexB(re2::StringPiece data, NalUnitFunction process_nal_unit,
} // namespace internal
bool GetH264SampleEntry(re2::StringPiece extradata, uint16_t width,
uint16_t height, std::string *out,
std::string *error_message) {
bool ParseExtraData(re2::StringPiece extradata, uint16_t width, uint16_t height,
std::string *sample_entry, bool *need_transform,
std::string *error_message) {
uint32_t avcc_len;
re2::StringPiece sps;
re2::StringPiece pps;
@ -140,42 +140,44 @@ bool GetH264SampleEntry(re2::StringPiece extradata, uint16_t width,
// This magic value is checked at the end.
avcc_len = 19 + sps.size() + pps.size();
*need_transform = true;
} else {
// Assume "extradata" holds an AVCDecoderConfiguration.
avcc_len = 8 + extradata.size();
*need_transform = false;
}
// This magic value is also checked at the end.
uint32_t avc1_len = 86 + avcc_len;
out->clear();
out->reserve(avc1_len);
sample_entry->clear();
sample_entry->reserve(avc1_len);
// This is a concatenation of the following boxes/classes.
// SampleEntry, ISO/IEC 14496-10 section 8.5.2.
uint32_t avc1_len_pos = out->size();
AppendU32(avc1_len, out); // length
out->append("avc1"); // type
out->append(6, '\x00'); // reserved
AppendU16(1, out); // data_reference_index = 1
uint32_t avc1_len_pos = sample_entry->size();
AppendU32(avc1_len, sample_entry); // length
sample_entry->append("avc1"); // type
sample_entry->append(6, '\x00'); // reserved
AppendU16(1, sample_entry); // data_reference_index = 1
// VisualSampleEntry, ISO/IEC 14496-12 section 12.1.3.
out->append(16, '\x00'); // pre_defined + reserved
AppendU16(width, out);
AppendU16(height, out);
AppendU32(UINT32_C(0x00480000), out); // horizresolution
AppendU32(UINT32_C(0x00480000), out); // vertresolution
AppendU32(0, out); // reserved
AppendU16(1, out); // frame count
out->append(32, '\x00'); // compressorname
AppendU16(0x0018, out); // depth
Append16(-1, out); // pre_defined
sample_entry->append(16, '\x00'); // pre_defined + reserved
AppendU16(width, sample_entry);
AppendU16(height, sample_entry);
AppendU32(UINT32_C(0x00480000), sample_entry); // horizresolution
AppendU32(UINT32_C(0x00480000), sample_entry); // vertresolution
AppendU32(0, sample_entry); // reserved
AppendU16(1, sample_entry); // frame count
sample_entry->append(32, '\x00'); // compressorname
AppendU16(0x0018, sample_entry); // depth
Append16(-1, sample_entry); // pre_defined
// AVCSampleEntry, ISO/IEC 14496-15 section 5.3.4.1.
// AVCConfigurationBox, ISO/IEC 14496-15 section 5.3.4.1.
uint32_t avcc_len_pos = out->size();
AppendU32(avcc_len, out); // length
out->append("avcC"); // type
uint32_t avcc_len_pos = sample_entry->size();
AppendU32(avcc_len, sample_entry); // length
sample_entry->append("avcC"); // type
if (!sps.empty() && !pps.empty()) {
// Create the AVCDecoderConfiguration, ISO/IEC 14496-15 section 5.2.4.1.
@ -186,43 +188,42 @@ bool GetH264SampleEntry(re2::StringPiece extradata, uint16_t width,
// "emulation_prevention_three_byte" in ISO/IEC 14496-10 section 7.4.
// It looks like 00 is not a valid value of profile_idc, so this distinction
// shouldn't be relevant here. And ffmpeg seems to ignore it.
out->push_back(1); // configurationVersion
out->push_back(sps[1]); // profile_idc -> AVCProfileIndication
out->push_back(sps[2]); // ...misc bits... -> profile_compatibility
out->push_back(sps[3]); // level_idc -> AVCLevelIndication
sample_entry->push_back(1); // configurationVersion
sample_entry->push_back(sps[1]); // profile_idc -> AVCProfileIndication
sample_entry->push_back(
sps[2]); // ...misc bits... -> profile_compatibility
sample_entry->push_back(sps[3]); // level_idc -> AVCLevelIndication
// Hardcode lengthSizeMinusOne to 3. This needs to match what ffmpeg uses
// when generating AVCParameterSamples (ISO/IEC 14496-15 section 5.3.2).
// There doesn't seem to be a clean way to get this from ffmpeg, but it's
// always 3.
out->push_back(static_cast<char>(0xff));
// Hardcode lengthSizeMinusOne to 3, matching TransformSampleData's 4-byte
// lengths.
sample_entry->push_back(static_cast<char>(0xff));
// Only support one SPS and PPS.
// ffmpeg's ff_isom_write_avcc has the same limitation, so it's probably
// fine. This next byte is a reserved 0b111 + a 5-bit # of SPSs (1).
out->push_back(static_cast<char>(0xe1));
AppendU16(sps.size(), out);
out->append(sps.data(), sps.size());
out->push_back(1); // # of PPSs.
AppendU16(pps.size(), out);
out->append(pps.data(), pps.size());
sample_entry->push_back(static_cast<char>(0xe1));
AppendU16(sps.size(), sample_entry);
sample_entry->append(sps.data(), sps.size());
sample_entry->push_back(1); // # of PPSs.
AppendU16(pps.size(), sample_entry);
sample_entry->append(pps.data(), pps.size());
if (out->size() - avcc_len_pos != avcc_len) {
*error_message =
StrCat("internal error: anticipated AVCConfigurationBox length ",
avcc_len, ", but was actually ", out->size() - avcc_len_pos,
"; sps length ", sps.size(), ", pps length ", pps.size());
if (sample_entry->size() - avcc_len_pos != avcc_len) {
*error_message = StrCat(
"internal error: anticipated AVCConfigurationBox length ", avcc_len,
", but was actually ", sample_entry->size() - avcc_len_pos,
"; sps length ", sps.size(), ", pps length ", pps.size());
return false;
}
} else {
out->append(extradata.data(), extradata.size());
sample_entry->append(extradata.data(), extradata.size());
}
if (out->size() - avc1_len_pos != avc1_len) {
if (sample_entry->size() - avc1_len_pos != avc1_len) {
*error_message =
StrCat("internal error: anticipated AVCSampleEntry length ", avc1_len,
", but was actually ", out->size() - avc1_len_pos,
", but was actually ", sample_entry->size() - avc1_len_pos,
"; sps length ", sps.size(), ", pps length ", pps.size());
return false;
}
@ -230,4 +231,21 @@ bool GetH264SampleEntry(re2::StringPiece extradata, uint16_t width,
return true;
}
bool TransformSampleData(re2::StringPiece annexb_sample,
std::string *avc_sample, std::string *error_message) {
// See AVCParameterSamples, ISO/IEC 14496-15 section 5.3.2.
avc_sample->clear();
auto fn = [&](re2::StringPiece nal_unit) {
// 4-byte length; this must be in sync with ParseExtraData's
// lengthSizeMinusOne == 3.
AppendU32(nal_unit.size(), avc_sample);
avc_sample->append(nal_unit.data(), nal_unit.size());
return IterationControl::kContinue;
};
if (!internal::DecodeH264AnnexB(annexb_sample, fn, error_message)) {
return false;
}
return true;
}
} // namespace moonfire_nvr

View File

@ -29,20 +29,16 @@
// along with this program. If not, see <http://www.gnu.org/licenses/>.
//
// h264.h: H.264 decoding. For the most part, Moonfire NVR does not try to
// understand the video codec. There's one exception. It must construct the
// .mp4 sample description table, and for AVC, this includes the ISO/IEC
// 14496-15 section 5.2.4.1 AVCDecoderConfigurationRecord.
// understand the video codec. However, H.264 has two byte stream encodings:
// ISO/IEC 14496-10 Annex B, and ISO/IEC 14496-15 AVC access units.
// When streaming from RTSP, ffmpeg supplies the former. We need the latter
// to stick into .mp4 files. This file manages the conversion, both for
// the ffmpeg "extra data" (which should become the ISO/IEC 14496-15
// section 5.2.4.1 AVCDecoderConfigurationRecord) and the actual samples.
//
// When handling a RTSP input source, ffmpeg supplies as "extradata" an
// ISO/IEC 14496-10 Annex B byte stream containing SPS (sequence parameter
// set) and PPS (picture parameter set) NAL units from which this can be
// constructed. ffmpeg of course also has logic for converting "extradata"
// to the AVCDecoderConfigurationRecord, but unfortunately it is not exposed
// except through ffmpeg's own generated .mp4 file. Extracting just this part
// of their .mp4 files would be more trouble than it's worth.
//
// Just to make things interesting, when handling a .mp4 file, ffmpeg supplies
// as "extradata" an AVCDecoderConfiguration.
// ffmpeg of course has logic to do the same thing, but unfortunately it is
// not exposed except through ffmpeg's own generated .mp4 file. Extracting
// just this part of their .mp4 files would be more trouble than it's worth.
#ifndef MOONFIRE_NVR_H264_H
#define MOONFIRE_NVR_H264_H
@ -76,9 +72,12 @@ bool DecodeH264AnnexB(re2::StringPiece data, NalUnitFunction process_nal_unit,
// Gets a H.264 sample entry (AVCSampleEntry, which extends
// VisualSampleEntry), given the "extradata", width, and height supplied by
// ffmpeg.
bool GetH264SampleEntry(re2::StringPiece extradata, uint16_t width,
uint16_t height, std::string *out,
std::string *error_message);
bool ParseExtraData(re2::StringPiece extradata, uint16_t width, uint16_t height,
std::string *sample_entry, bool *need_transform,
std::string *error_message);
bool TransformSampleData(re2::StringPiece annexb_sample,
std::string *avc_sample, std::string *error_message);
} // namespace moonfire_nvr

View File

@ -224,12 +224,14 @@ class IntegrationTest : public testing::Test {
video_sample_entry_.width = in->stream()->codec->width;
video_sample_entry_.height = in->stream()->codec->height;
if (!GetH264SampleEntry(GetExtradata(in.get()), in->stream()->codec->width,
in->stream()->codec->height,
&video_sample_entry_.data, &error_message)) {
bool need_transform;
if (!ParseExtraData(in->extradata(), in->stream()->codec->width,
in->stream()->codec->height, &video_sample_entry_.data,
&need_transform, &error_message)) {
ADD_FAILURE() << "GetH264SampleEntry: " << error_message;
return recording;
}
EXPECT_FALSE(need_transform);
while (true) {
VideoPacket pkt;
@ -286,7 +288,7 @@ class IntegrationTest : public testing::Test {
StrCat(tmpdir_path_, "/clip.new.mp4"), &error_message);
ASSERT_TRUE(copied != nullptr) << error_message;
EXPECT_EQ(GetExtradata(original.get()), GetExtradata(copied.get()));
EXPECT_EQ(original->extradata(), copied->extradata());
EXPECT_EQ(original->stream()->codec->width, copied->stream()->codec->width);
EXPECT_EQ(original->stream()->codec->height,
copied->stream()->codec->height);
@ -310,12 +312,6 @@ class IntegrationTest : public testing::Test {
}
}
re2::StringPiece GetExtradata(InputVideoPacketStream *stream) {
return re2::StringPiece(
reinterpret_cast<const char *>(stream->stream()->codec->extradata),
stream->stream()->codec->extradata_size);
}
re2::StringPiece GetData(const VideoPacket &pkt) {
return re2::StringPiece(reinterpret_cast<const char *>(pkt.pkt()->data),
pkt.pkt()->size);