Support trimming video segments.

* If the end of a segment is between samples, the last included sample will
  have a shortened duration.

* If the beginning of a segment not on a key frame (aka sync sample), the
  prefix will be included but trimmed using an edit list. (It seems like a
  ctts box might be able to accomplish the same thing, fwiw.)
This commit is contained in:
Scott Lamb 2016-05-01 09:16:14 -07:00
parent 713d7863de
commit 3030e3fb32
4 changed files with 153 additions and 36 deletions

View File

@ -256,11 +256,11 @@ class IntegrationTest : public testing::Test {
}
std::shared_ptr<VirtualFile> CreateMp4FromSingleRecording(
const Recording &recording, bool include_ts) {
const Recording &recording, int32_t rel_start_90k, int32_t rel_end_90k,
bool include_ts) {
Mp4FileBuilder builder(tmpdir_.get());
builder.SetSampleEntry(video_sample_entry_);
builder.Append(Recording(recording), 0,
std::numeric_limits<int32_t>::max());
builder.Append(Recording(recording), rel_start_90k, rel_end_90k);
builder.include_timestamp_subtitle_track(include_ts);
std::string error_message;
auto mp4 = builder.Build(&error_message);
@ -280,7 +280,7 @@ class IntegrationTest : public testing::Test {
WriteFileOrDie(StrCat(tmpdir_path_, "/clip.new.mp4"), &buf);
}
void CompareMp4s() {
void CompareMp4s(int64_t pts_offset) {
std::string error_message;
auto original = GetRealVideoSource()->OpenFile("../src/testdata/clip.mp4",
&error_message);
@ -294,22 +294,33 @@ class IntegrationTest : public testing::Test {
EXPECT_EQ(original->stream()->codec->height,
copied->stream()->codec->height);
int pkt = 0;
while (true) {
VideoPacket original_pkt;
VideoPacket copied_pkt;
bool original_has_next = original->GetNext(&original_pkt, &error_message);
ASSERT_TRUE(original_has_next || error_message.empty()) << error_message;
ASSERT_TRUE(original_has_next || error_message.empty())
<< "pkt " << pkt << ": " << error_message;
bool copied_has_next = copied->GetNext(&copied_pkt, &error_message);
ASSERT_TRUE(copied_has_next || error_message.empty()) << error_message;
ASSERT_TRUE(copied_has_next || error_message.empty())
<< "pkt " << pkt << ": " << error_message;
if (!original_has_next && !copied_has_next) {
break;
}
ASSERT_TRUE(original_has_next);
ASSERT_TRUE(copied_has_next);
EXPECT_EQ(original_pkt.pkt()->pts, copied_pkt.pkt()->pts);
EXPECT_EQ(original_pkt.pkt()->duration, copied_pkt.pkt()->duration);
EXPECT_EQ(GetData(original_pkt), GetData(copied_pkt));
ASSERT_TRUE(original_has_next) << "pkt " << pkt;
ASSERT_TRUE(copied_has_next) << "pkt " << pkt;
EXPECT_EQ(original_pkt.pkt()->pts + pts_offset, copied_pkt.pkt()->pts)
<< "pkt " << pkt;
// One would normally expect the duration to be exactly the same, but
// when using an edit list, ffmpeg appears to extend the last packet's
// duration by the amount skipped at the beginning. I think this is a
// bug on their side.
EXPECT_LE(original_pkt.pkt()->duration, copied_pkt.pkt()->duration)
<< "pkt " << pkt;
EXPECT_EQ(GetData(original_pkt), GetData(copied_pkt)) << "pkt " << pkt;
++pkt;
}
}
@ -329,9 +340,10 @@ TEST_F(IntegrationTest, RoundTrip) {
if (HasFailure()) {
return;
}
auto f = CreateMp4FromSingleRecording(recording, false);
auto f = CreateMp4FromSingleRecording(
recording, 0, std::numeric_limits<int32_t>::max(), false);
WriteMp4(f.get());
CompareMp4s();
CompareMp4s(0);
}
TEST_F(IntegrationTest, RoundTripWithSubtitle) {
@ -339,9 +351,21 @@ TEST_F(IntegrationTest, RoundTripWithSubtitle) {
if (HasFailure()) {
return;
}
auto f = CreateMp4FromSingleRecording(recording, true);
auto f = CreateMp4FromSingleRecording(
recording, 0, std::numeric_limits<int32_t>::max(), true);
WriteMp4(f.get());
CompareMp4s();
CompareMp4s(0);
}
TEST_F(IntegrationTest, RoundTripWithEditList) {
Recording recording = CopyMp4ToSingleRecording();
if (HasFailure()) {
return;
}
auto f = CreateMp4FromSingleRecording(
recording, 1, std::numeric_limits<int32_t>::max(), false);
WriteMp4(f.get());
CompareMp4s(-1);
}
TEST_F(IntegrationTest, Metadata) {
@ -349,14 +373,15 @@ TEST_F(IntegrationTest, Metadata) {
if (HasFailure()) {
return;
}
auto f = CreateMp4FromSingleRecording(recording, false);
auto f = CreateMp4FromSingleRecording(
recording, 0, std::numeric_limits<int32_t>::max(), false);
// This test is brittle, which is the point. Any time the digest comparison
// here fails, it can be updated, but the etag must change as well!
// Otherwise clients may combine ranges from the new format with ranges
// from the old format!
EXPECT_EQ("1e5331e8371bd97ac3158b3a86494abc87cdc70e", Digest(f.get()));
EXPECT_EQ("\"62f5e00a6e1e6dd893add217b1bf7ed7446b8b9d\"", f->etag());
EXPECT_EQ("\"a9ce99a83a177516e7f862fed405e2b47b7d4ae3\"", f->etag());
// 10 seconds later than the segment's start time.
EXPECT_EQ(1430006410, f->last_modified());

View File

@ -106,7 +106,7 @@ const size_t kSubtitleLength = strlen("2015-07-02 17:10:00 -0700");
// that causes the different bytes to be output for a particular set of
// Mp4Builder options. Incrementing this value will cause the etag to change
// as well.
const char kFormatVersion[] = {0x00};
const char kFormatVersion[] = {0x01};
// ISO/IEC 14496-12 section 4.3, ftyp.
const char kFtypBox[] = {
@ -293,6 +293,18 @@ struct TrackHeaderBoxVersion0 { // ISO/IEC 14496-12 section 8.3.2, tkhd.
uint32_t height = NET_UINT32_C(0);
} __attribute__((packed));
struct EditBox { // ISO/IEC 14496-12 section 8.6.5, edts.
uint32_t size = NET_UINT32_C(0);
const char type[4] = {'e', 'd', 't', 's'};
} __attribute__((packed));
struct EditListBoxVersion0 { // ISO/IEC 14496-12 section 8.6.6, elst.
uint32_t size = NET_UINT32_C(0);
const char type[4] = {'e', 'l', 's', 't'};
const uint32_t version_and_flags = NET_UINT32_C(0);
uint32_t entry_count = NET_UINT32_C(0);
};
struct MediaBox { // ISO/IEC 14496-12 section 8.4.1, mdia.
uint32_t size = NET_UINT32_C(0);
const char type[4] = {'m', 'd', 'i', 'a'};
@ -425,6 +437,8 @@ class ScopedMp4Box {
//
// ** trak (video: container for an individual track or stream)
// *** tkhd (track header, overall information about the track)
// *** (optional) edts (edit list container)
// **** elst (an edit list)
// *** mdia (container for the media information in a track)
// **** mdhd (media header, overall information about the media)
// *** minf (media information container)
@ -439,7 +453,7 @@ class ScopedMp4Box {
// ***** co64 (64-bit chunk offset)
// ***** stss (sync sample table)
//
// ** trak (subtitle: container for an individual track or stream)
// ** (optional) trak (subtitle: container for an individual track or stream)
// *** tkhd (track header, overall information about the track)
// *** mdia (container for the media information in a track)
// **** mdhd (media header, overall information about the media)
@ -480,9 +494,9 @@ class Mp4File : public VirtualFile {
uint32_t duration = 0;
int64_t max_time_90k = 0;
for (const auto &segment : segments_) {
duration += segment->pieces.duration_90k();
duration += segment->pieces.end_90k() - segment->rel_start_90k;
int64_t start_90k =
segment->recording.start_time_90k + segment->pieces.start_90k();
segment->recording.start_time_90k + segment->rel_start_90k;
int64_t end_90k =
segment->recording.start_time_90k + segment->pieces.end_90k();
int64_t start_ts = start_90k / kTimeUnitsPerSecond;
@ -568,6 +582,7 @@ class Mp4File : public VirtualFile {
moov_video_trak_tkhd_.header().height =
NET_UINT32_C(video_sample_entry_.height << 16);
}
MaybeAppendVideoEdts();
{
CONSTRUCT_BOX(moov_video_trak_mdia_);
{
@ -590,6 +605,53 @@ class Mp4File : public VirtualFile {
}
}
void MaybeAppendVideoEdts() {
struct Entry {
Entry(int32_t segment_duration, int32_t media_time)
: segment_duration(segment_duration), media_time(media_time) {}
int32_t segment_duration = 0;
int32_t media_time = 0;
int32_t end() const { return segment_duration + media_time; }
};
std::vector<Entry> entries;
int64_t cur_media_time = 0;
for (const auto &segment : segments_) {
auto skip = segment->rel_start_90k - segment->pieces.start_90k();
auto keep = segment->pieces.end_90k() - segment->rel_start_90k;
DCHECK_GE(skip, 0);
DCHECK_GT(keep, 0);
cur_media_time += skip;
if (!entries.empty() && entries.back().end() == cur_media_time) {
entries.back().segment_duration += keep;
} else {
entries.emplace_back(keep, cur_media_time);
}
DCHECK_GT(segment->pieces.duration_90k(), 0);
cur_media_time += keep;
}
if (entries.size() == 1 && entries[0].media_time == 0) {
return; // use implicit one-to-one mapping.
}
VLOG(1) << "Using edit list with " << entries.size() << " entries.";
std::string *s = &moov_video_trak_edts_elst_entries_str_;
for (const auto &entry : entries) {
VLOG(2) << "...duration=" << entry.segment_duration
<< ", time=" << entry.media_time;
AppendU32(entry.segment_duration, s);
AppendU32(entry.media_time, s);
AppendU16(1, s); // media_rate_integer
AppendU16(1, s); // media_rate_fraction
}
CONSTRUCT_BOX(moov_video_trak_edts_);
CONSTRUCT_BOX(moov_video_trak_edts_elst_);
moov_video_trak_edts_elst_.header().entry_count =
ToNetworkU32(entries.size());
moov_video_trak_edts_elst_entries_.Init(
moov_video_trak_edts_elst_entries_str_);
slices_.Append(&moov_video_trak_edts_elst_entries_);
}
void AppendVideoStbl() {
CONSTRUCT_BOX(moov_video_trak_mdia_minf_stbl_);
{
@ -763,7 +825,7 @@ class Mp4File : public VirtualFile {
std::string &s = moov_subtitle_trak_mdia_minf_stbl_stts_entries_str_;
for (const auto &segment : segments_) {
int64_t start_90k =
segment->recording.start_time_90k + segment->pieces.start_90k();
segment->recording.start_time_90k + segment->rel_start_90k;
int64_t end_90k =
segment->recording.start_time_90k + segment->pieces.end_90k();
int64_t start_next_90k =
@ -802,7 +864,7 @@ class Mp4File : public VirtualFile {
memset(&mytm, 0, sizeof(mytm));
for (const auto &segment : segments_) {
int64_t start_90k =
segment->recording.start_time_90k + segment->pieces.start_90k();
segment->recording.start_time_90k + segment->rel_start_90k;
int64_t end_90k =
segment->recording.start_time_90k + segment->pieces.end_90k();
int64_t start_ts = start_90k / kTimeUnitsPerSecond;
@ -839,6 +901,10 @@ class Mp4File : public VirtualFile {
Mp4Box<TrackBox> moov_video_trak_;
Mp4Box<TrackHeaderBoxVersion0> moov_video_trak_tkhd_;
Mp4Box<EditBox> moov_video_trak_edts_;
Mp4Box<EditListBoxVersion0> moov_video_trak_edts_elst_;
StringPieceSlice moov_video_trak_edts_elst_entries_;
std::string moov_video_trak_edts_elst_entries_str_;
Mp4Box<MediaBox> moov_video_trak_mdia_;
Mp4Box<MediaHeaderBoxVersion0> moov_video_trak_mdia_mdhd_;
StringPieceSlice moov_video_trak_mdia_hdlr_;
@ -906,6 +972,7 @@ bool Mp4SampleTablePieces::Init(const Recording *recording,
key_frames_ = recording->video_sync_samples;
actual_end_90k_ = recording_duration_90k;
} else {
VLOG(1) << "Slow path.";
if (!it.done() && !it.is_key()) {
*error_message = "First frame must be a key frame.";
return false;
@ -941,6 +1008,7 @@ bool Mp4SampleTablePieces::Init(const Recording *recording,
*error_message = it.error();
return false;
}
actual_end_90k_ = std::min(actual_end_90k_, desired_end_90k_);
VLOG(1) << "requested ts [" << start_90k << ", " << end_90k << "), got ts ["
<< begin_.start_90k() << ", " << actual_end_90k_ << "), " << frames_
<< " frames (" << key_frames_
@ -967,8 +1035,18 @@ bool Mp4SampleTablePieces::FillSttsEntries(std::string *s,
for (it = begin_; !it.done() && it.start_90k() < desired_end_90k_;
it.Next()) {
AppendU32(1, s);
// The final sample may be shortened to the desired end.
if (it.end_90k() > desired_end_90k_) {
auto new_duration = desired_end_90k_ - it.start_90k();
VLOG(1) << "Shortening final sample duration from " << it.duration_90k()
<< " to " << new_duration;
AppendU32(new_duration, s);
break;
} else {
AppendU32(it.duration_90k(), s);
}
}
if (it.has_error()) {
*error_message = it.error();
return false;

View File

@ -66,10 +66,10 @@ class Mp4SampleTablePieces {
// samples() values.
//
// |start_90k| and |end_90k| should be relative to the start of the recording.
// They indicate the *desired* time range. The *actual* time range will be
// from the last sync sample <= |start_90k| to the last sample with start time
// <= |end_90k|. TODO: support edit lists and duration trimming to produce
// the exact correct time range.
// They indicate the *desired* time range. The *actual* time range will
// start at the last sync sample <= |start_90k|. (The caller is responsible
// for creating an edit list to skip the undesired portion.) It will end at
// the desired range, or the end of the recording, whichever is sooner.
bool Init(const Recording *recording, int sample_entry_index,
int32_t sample_offset, int32_t start_90k, int32_t end_90k,
std::string *error_message);
@ -88,8 +88,8 @@ class Mp4SampleTablePieces {
// Return the byte range in the sample file of the frames represented here.
ByteRange sample_pos() const { return sample_pos_; }
// As described above, these may differ from the desired range.
uint64_t duration_90k() const { return actual_end_90k_ - begin_.start_90k(); }
int32_t start_90k() const { return begin_.start_90k(); }
int32_t end_90k() const { return actual_end_90k_; }
@ -121,7 +121,15 @@ struct Mp4FileSegment {
Recording recording;
Mp4SampleTablePieces pieces;
RealFileSlice sample_file_slice;
// Requested start time, relative to recording.start_90k.
// If there is no key frame at exactly this position, |pieces| will actually
// start sooner, and an edit list should be used to skip the undesired
// prefix.
int32_t rel_start_90k = 0;
// Requested end time, relative to recording.end_90k.
// This will be clamped to the actual duration of the recording.
int32_t rel_end_90k = std::numeric_limits<int32_t>::max();
};

View File

@ -375,14 +375,15 @@ std::shared_ptr<VirtualFile> WebInterface::BuildMp4(
bool ok = true;
auto row_cb = [&](Recording &recording,
const VideoSampleEntry &sample_entry) {
if (rows == 0 && recording.start_time_90k != next_row_start_time_90k) {
if (rows == 0 && recording.start_time_90k > next_row_start_time_90k) {
*error_message = StrCat(
"recording starts late: ", PrettyTimestamp(recording.start_time_90k),
" (", recording.start_time_90k, ") rather than requested: ",
PrettyTimestamp(start_time_90k), " (", start_time_90k, ")");
ok = false;
return IterationControl::kBreak;
} else if (recording.start_time_90k != next_row_start_time_90k) {
} else if (rows > 0 &&
recording.start_time_90k != next_row_start_time_90k) {
*error_message = StrCat("gap/overlap in recording: ",
PrettyTimestamp(next_row_start_time_90k), " (",
next_row_start_time_90k, ") to: ",
@ -405,10 +406,15 @@ std::shared_ptr<VirtualFile> WebInterface::BuildMp4(
builder.SetSampleEntry(sample_entry);
}
// TODO: correct bounds within recording.
// Currently this can return too much data.
builder.Append(std::move(recording), 0,
std::numeric_limits<int32_t>::max());
int32_t rel_start_90k = 0;
int32_t rel_end_90k = std::numeric_limits<int32_t>::max();
if (recording.start_time_90k < start_time_90k) {
rel_start_90k = start_time_90k - recording.start_time_90k;
}
if (recording.end_time_90k > end_time_90k) {
rel_end_90k = end_time_90k - recording.start_time_90k;
}
builder.Append(std::move(recording), rel_start_90k, rel_end_90k);
++rows;
return IterationControl::kContinue;
};
@ -421,7 +427,7 @@ std::shared_ptr<VirtualFile> WebInterface::BuildMp4(
*error_message = StrCat("no recordings in range");
return false;
}
if (next_row_start_time_90k != end_time_90k) {
if (next_row_start_time_90k < end_time_90k) {
*error_message = StrCat("recording ends early: ",
PrettyTimestamp(next_row_start_time_90k), " (",
next_row_start_time_90k, "), not requested: ",