diff --git a/src/coding.h b/src/coding.h
index 5318134..ff983c7 100644
--- a/src/coding.h
+++ b/src/coding.h
@@ -154,6 +154,16 @@ inline void Append32(int32_t in, std::string *out) {
   out->append(reinterpret_cast<const char *>(&net), sizeof(int32_t));
 }
 
+inline void AppendU64(uint64_t in, std::string *out) {
+  uint64_t net = ToNetworkU64(in);
+  out->append(reinterpret_cast<const char *>(&net), sizeof(uint64_t));
+}
+
+inline void Append64(int64_t in, std::string *out) {
+  int64_t net = ToNetwork64(in);
+  out->append(reinterpret_cast<const char *>(&net), sizeof(int64_t));
+}
+
 }  // namespace moonfire_nvr
 
 #endif  // MOONFIRE_NVR_CODING_H
diff --git a/src/ffmpeg.h b/src/ffmpeg.h
index ed82b58..ca6a588 100644
--- a/src/ffmpeg.h
+++ b/src/ffmpeg.h
@@ -72,6 +72,7 @@ class VideoPacket {
   int64_t pts() const { return pkt_.pts; }
 
   AVPacket *pkt() { return &pkt_; }
+  const AVPacket *pkt() const { return &pkt_; }
 
  private:
   AVPacket pkt_;
diff --git a/src/http.cc b/src/http.cc
index 6afe1f4..1a6b553 100644
--- a/src/http.cc
+++ b/src/http.cc
@@ -67,7 +67,6 @@ class RealFile : public VirtualFile {
   int64_t size() const final { return stat_.st_size; }
   time_t last_modified() const final { return stat_.st_mtime; }
   std::string mime_type() const final { return mime_type_; }
-  std::string filename() const final { return slice_.filename(); }
 
   std::string etag() const final {
     return StrCat("\"", stat_.st_ino, ":", stat_.st_size, ":",
@@ -317,8 +316,7 @@ void HttpServe(const VirtualFile &file, evhttp_request *req) {
       evhttp_add_header(out_hdrs, "Content-Range", range_hdr.c_str());
       http_status = 416;
       http_status_str = "Range Not Satisfiable";
-      LOG(INFO) << "Replying to non-satisfiable range request for "
-                << file.filename() << ": " << range_hdr;
+      LOG(INFO) << "Replying to non-satisfiable range request: " << range_hdr;
       break;
     }
 
@@ -328,27 +326,26 @@ void HttpServe(const VirtualFile &file, evhttp_request *req) {
         std::string range_hdr = StrCat("bytes ", ranges[0].begin, "-",
                                        ranges[0].end - 1, "/", file.size());
         if (!file.AddRange(ranges[0], &buf, &error_message)) {
-          LOG(ERROR) << "Unable to serve " << file.filename() << " "
-                     << ranges[0] << ": " << error_message;
+          LOG(ERROR) << "Unable to serve range " << ranges[0] << ": "
+                     << error_message;
           return evhttp_send_error(req, HTTP_INTERNAL,
                                    EscapeHtml(error_message).c_str());
         }
         evhttp_add_header(out_hdrs, "Content-Range", range_hdr.c_str());
         http_status = 206;
         http_status_str = "Partial Content";
-        LOG(INFO) << "Replying to range request for " << file.filename();
+        LOG(INFO) << "Replying to range request";
         break;
       }
     // FALLTHROUGH
 
     case internal::RangeHeaderType::kAbsentOrInvalid:
       if (!file.AddRange(ByteRange(0, file.size()), &buf, &error_message)) {
-        LOG(ERROR) << "Unable to serve " << file.filename() << ": "
-                   << error_message;
+        LOG(ERROR) << "Unable to serve file: " << error_message;
         return evhttp_send_error(req, HTTP_INTERNAL,
                                  EscapeHtml(error_message).c_str());
       }
-      LOG(INFO) << "Replying to whole-file request for " << file.filename();
+      LOG(INFO) << "Replying to whole-file request";
       http_status = HTTP_OK;
       http_status_str = "OK";
   }
diff --git a/src/http.h b/src/http.h
index 32bf0e6..9736a77 100644
--- a/src/http.h
+++ b/src/http.h
@@ -131,15 +131,12 @@ class VirtualFile : public FileSlice {
   virtual time_t last_modified() const = 0;
   virtual std::string etag() const = 0;
   virtual std::string mime_type() const = 0;
-  virtual std::string filename() const = 0;  // for logging.
 };
 
 class RealFileSlice : public FileSlice {
  public:
   void Init(re2::StringPiece filename, ByteRange range);
 
-  const std::string filename() const { return filename_; }
-
   int64_t size() const final { return range_.size(); }
 
   bool AddRange(ByteRange range, EvBuffer *buf,
diff --git a/src/mp4-test.cc b/src/mp4-test.cc
index b16643a..f192302 100644
--- a/src/mp4-test.cc
+++ b/src/mp4-test.cc
@@ -30,13 +30,20 @@
 //
 // mp4_test.cc: tests of the mp4.h interface.
 
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
 #include <gflags/gflags.h>
 #include <gmock/gmock.h>
 #include <gtest/gtest.h>
 
+#include "ffmpeg.h"
+#include "h264.h"
 #include "http.h"
 #include "mp4.h"
 #include "string.h"
+#include "testutil.h"
 
 DECLARE_bool(alsologtostderr);
 
@@ -146,6 +153,132 @@ TEST(Mp4SampleTablePiecesTest, Stsz) {
   EXPECT_EQ(kExpectedEntries, ToHex(pieces.stsz_entries()));
 }
 
+class IntegrationTest : public testing::Test {
+ protected:
+  IntegrationTest() {
+    tmpdir_path_ = PrepareTempDirOrDie("mp4-integration-test");
+    int ret =
+        GetRealFilesystem()->Open(tmpdir_path_.c_str(), O_RDONLY, &tmpdir_);
+    CHECK_EQ(0, ret) << strerror(ret);
+  }
+
+  void CopyMp4ToSingleRecording() {
+    std::string error_message;
+    SampleIndexEncoder index;
+    SampleFileWriter writer(tmpdir_.get());
+    recording_.sample_file_path = StrCat(tmpdir_path_, "/clip.sample");
+    if (!writer.Open("clip.sample", &error_message)) {
+      ADD_FAILURE() << "open clip.sample: " << error_message;
+      return;
+    }
+    auto in = GetRealVideoSource()->OpenFile("../src/testdata/clip.mp4",
+                                             &error_message);
+    if (in == nullptr) {
+      ADD_FAILURE() << "open clip.mp4" << error_message;
+      return;
+    }
+
+    video_sample_entry_.width = in->stream()->codec->width;
+    video_sample_entry_.height = in->stream()->codec->height;
+    if (!GetH264SampleEntry(GetExtradata(in.get()), in->stream()->codec->width,
+                            in->stream()->codec->height,
+                            &video_sample_entry_.data, &error_message)) {
+      ADD_FAILURE() << "GetH264SampleEntry: " << error_message;
+      return;
+    }
+
+    while (true) {
+      VideoPacket pkt;
+      if (!in->GetNext(&pkt, &error_message)) {
+        if (!error_message.empty()) {
+          ADD_FAILURE() << "GetNext: " << error_message;
+          return;
+        }
+        break;
+      }
+      if (!writer.Write(GetData(pkt), &error_message)) {
+        ADD_FAILURE() << "Write: " << error_message;
+        return;
+      }
+      index.AddSample(pkt.pkt()->duration, pkt.pkt()->size, pkt.is_key());
+    }
+
+    if (!writer.Close(&recording_.sample_file_sha1, &error_message)) {
+      ADD_FAILURE() << "Close: " << error_message;
+    }
+    recording_.video_index = index.data().as_string();
+  }
+
+  void CopySingleRecordingToNewMp4() {
+    Mp4FileBuilder builder;
+    builder.SetSampleEntry(video_sample_entry_);
+    builder.Append(Recording(recording_), 0,
+                   std::numeric_limits<int32_t>::max());
+    std::string error_message;
+    auto mp4 = builder.Build(&error_message);
+    ASSERT_TRUE(mp4 != nullptr) << error_message;
+    EvBuffer buf;
+    ASSERT_TRUE(mp4->AddRange(ByteRange(0, mp4->size()), &buf, &error_message))
+        << error_message;
+    WriteFileOrDie(StrCat(tmpdir_path_, "/clip.new.mp4"), &buf);
+  }
+
+  void CompareMp4s() {
+    std::string error_message;
+    auto original = GetRealVideoSource()->OpenFile("../src/testdata/clip.mp4",
+                                                   &error_message);
+    ASSERT_TRUE(original != nullptr) << error_message;
+    auto copied = GetRealVideoSource()->OpenFile(
+        StrCat(tmpdir_path_, "/clip.new.mp4"), &error_message);
+    ASSERT_TRUE(copied != nullptr) << error_message;
+
+    EXPECT_EQ(GetExtradata(original.get()), GetExtradata(copied.get()));
+    EXPECT_EQ(original->stream()->codec->width, copied->stream()->codec->width);
+    EXPECT_EQ(original->stream()->codec->height,
+              copied->stream()->codec->height);
+
+    while (true) {
+      VideoPacket original_pkt;
+      VideoPacket copied_pkt;
+
+      bool original_has_next = original->GetNext(&original_pkt, &error_message);
+      ASSERT_TRUE(original_has_next || error_message.empty()) << error_message;
+      bool copied_has_next = copied->GetNext(&copied_pkt, &error_message);
+      ASSERT_TRUE(copied_has_next || error_message.empty()) << error_message;
+      if (!original_has_next && !copied_has_next) {
+        break;
+      }
+      ASSERT_TRUE(original_has_next);
+      ASSERT_TRUE(copied_has_next);
+      EXPECT_EQ(original_pkt.pkt()->pts, copied_pkt.pkt()->pts);
+      EXPECT_EQ(original_pkt.pkt()->duration, copied_pkt.pkt()->duration);
+      EXPECT_EQ(GetData(original_pkt), GetData(copied_pkt));
+    }
+  }
+
+  re2::StringPiece GetExtradata(InputVideoPacketStream *stream) {
+    return re2::StringPiece(
+        reinterpret_cast<const char *>(stream->stream()->codec->extradata),
+        stream->stream()->codec->extradata_size);
+  }
+
+  re2::StringPiece GetData(const VideoPacket &pkt) {
+    return re2::StringPiece(reinterpret_cast<const char *>(pkt.pkt()->data),
+                            pkt.pkt()->size);
+  }
+
+  std::string tmpdir_path_;
+  std::unique_ptr<File> tmpdir_;
+  Recording recording_;
+  VideoSampleEntry video_sample_entry_;
+};
+
+TEST_F(IntegrationTest, RoundTrip) {
+  CopyMp4ToSingleRecording();
+  CopySingleRecordingToNewMp4();
+  CompareMp4s();
+}
+
 }  // namespace
 }  // namespace moonfire_nvr
 
diff --git a/src/mp4.cc b/src/mp4.cc
index c17fedf..c33ca80 100644
--- a/src/mp4.cc
+++ b/src/mp4.cc
@@ -29,13 +29,516 @@
 // along with this program.  If not, see <http://www.gnu.org/licenses/>.
 //
 // mp4.cc: implementation of mp4.h interface.
+//
+// This implementation will make the most sense when read side-by-side with
+// ISO/IEC 14496-12:2015, available at the following URL:
+// <http://standards.iso.org/ittf/PubliclyAvailableStandards/index.html>
+//
+// mp4.cc generates VirtualFiles via an array of FileSlices. Each FileSlice
+// is responsible for some portion of the .mp4 file, generally some subset of
+// a single .mp4 "box". Slices fall into these categories:
+//
+// 1. entirely static data from a const char kConstant[]. This is preferred in
+//    the interest of simplicity and efficiency when there is only one useful
+//    value for all the fields in the box, including its length.
+//
+//    These slices are represented using the StaticStringPieceSlice class.
+//
+// 2. a box's fixed-length fields. In some cases a slice represents the entire
+//    contents of a FullBox type; in others a slice represents only the
+//    "length" and "type" fields of a container Box, while other contents
+//    (such as child boxes) are appended to the box as a separate slice.
+//
+//    These slices are represented using a specific "struct ...Box" type for
+//    type safety and simplicity. The structs match the actual wire format---in
+//    particular, they are packed and store fields in network byte order.
+//    sizeof(...Box) is meaningful and structure data can be simply written
+//    with memcpy, as opposed to via manually-written or generated serialization
+//    code. (This approach could be revisited if there's ever a need to run
+//    on a compiler that doesn't support __attribute__((packed)) or a
+//    processor that doesn't support unaligned access.) The structs are
+//    wrapped with the Mp4Box<> template class which manages child slices and
+//    fills in the box's length field automatically.
+//
+// 3. variable-length data generated using the Mp4SampleTablePieces class,
+//    representing part of one box dealing with a single recording. These
+//    are the largest portion of a typical .mp4's metadata.
+//
+//    These slices are generated using the FillerFileSlice class. They
+//    determine their sizes eagerly (so that the size of the file is known and
+//    so that later byte ranges can be served correctly) but only generate
+//    their contents when the requested byte range overlaps with the slice
+//    (for memory/CPU efficiency).
+//
+// 4. file-backed variable-length data, representing actual video samples.
+//
+//    These are represented using the FileSlice class and are mmap()ed via
+//    libevent, letting the kernel decide how much to page in at once.
+//
+// The box hierarchy is constructed through append operations on the Mp4Box
+// subclasses. Most of the static data is always in RAM when the VirtualFile
+// is, but the file-backed and sample table portions are not. This should be
+// a reasonable compromise between simplicity of implementation and memory
+// efficiency.
 
 #include "mp4.h"
 
 #include "coding.h"
 
+#define NET_UINT64_C(x) ::moonfire_nvr::ToNetworkU64(UINT64_C(x))
+#define NET_INT64_C(x) ::moonfire_nvr::ToNetwork64(UINT64_C(x))
+#define NET_UINT32_C(x) ::moonfire_nvr::ToNetworkU32(UINT32_C(x))
+#define NET_INT32_C(x) ::moonfire_nvr::ToNetwork32(UINT32_C(x))
+#define NET_UINT16_C(x) ::moonfire_nvr::ToNetworkU16(UINT16_C(x))
+#define NET_INT16_C(x) ::moonfire_nvr::ToNetwork16(UINT16_C(x))
+
+using ::moonfire_nvr::internal::Mp4FileSegment;
+
 namespace moonfire_nvr {
 
+namespace {
+
+// ISO/IEC 14496-12 section 4.3, ftyp.
+const char kFtypBox[] = {
+    0x00, 0x00, 0x00, 0x20,  // length = 32, sizeof(kFtypBox)
+    'f',  't',  'y',  'p',   // type
+    'i',  's',  'o',  'm',   // major_brand
+    0x00, 0x00, 0x02, 0x00,  // minor_version
+    'i',  's',  'o',  'm',   // compatible_brands[0]
+    'i',  's',  'o',  '2',   // compatible_brands[1]
+    'a',  'v',  'c',  '1',   // compatible_brands[2]
+    'm',  'p',  '4',  '1',   // compatible_brands[3]
+};
+
+// vmhd and dinf boxes. These are both completely static and adjacent in the
+// structure, so they're in a single constant.
+const char kVmhdAndDinfBoxes[] = {
+    // A vmhd box; the "graphicsmode" and "opcolor" values don't have any
+    // meaningful use.
+    0x00, 0x00, 0x00, 0x14,  // length == sizeof(kVmhdBox)
+    'v', 'm', 'h', 'd',      // type = vmhd, ISO/IEC 14496-12 section 12.1.2.
+    0x00, 0x00, 0x00, 0x01,  // version + flags(1)
+    0x00, 0x00, 0x00, 0x00,  // graphicsmode (copy), opcolor[0]
+    0x00, 0x00, 0x00, 0x00,  // opcolor[1], opcolor[2]
+
+    // A dinf box suitable for a "self-contained" .mp4 file (no URL/URN
+    // references to external data).
+    0x00, 0x00, 0x00, 0x24,  // length == sizeof(kDinfBox)
+    'd', 'i', 'n', 'f',      // type = dinf, ISO/IEC 14496-12 section 8.7.1.
+    0x00, 0x00, 0x00, 0x1c,  // length
+    'd', 'r', 'e', 'f',      // type = dref, ISO/IEC 14496-12 section 8.7.2.
+    0x00, 0x00, 0x00, 0x00,  // version and flags
+    0x00, 0x00, 0x00, 0x01,  // entry_count
+    0x00, 0x00, 0x00, 0x0c,  // length
+    'u', 'r', 'l', ' ',      // type = url, ISO/IEC 14496-12 section 8.7.2.
+    0x00, 0x00, 0x00, 0x01,  // version=0, flags=self-contained
+};
+
+// A hdlr box suitable for a video track.
+const char kHdlrBox[] = {
+    0x00, 0x00, 0x00, 0x21,  // length == sizeof(kHdlrBox)
+    'h',  'd',  'l',  'r',   // type == hdlr, ISO/IEC 14496-12 section 8.4.3.
+    0x00, 0x00, 0x00, 0x00,  // version + flags
+    0x00, 0x00, 0x00, 0x00,  // pre_defined
+    'v',  'i',  'd',  'e',   // handler = vide
+    0x00, 0x00, 0x00, 0x00,  // reserved[0]
+    0x00, 0x00, 0x00, 0x00,  // reserved[1]
+    0x00, 0x00, 0x00, 0x00,  // reserved[2]
+    0x00,                    // name, zero-terminated (empty)
+};
+
+// Convert from 90kHz units since 1970-01-01 00:00:00 UTC to
+// seconds since 1904-01-01 00:00:00 UTC.
+uint32_t ToIso14496Timestamp(uint64_t time_90k) {
+  return time_90k / kTimeUnitsPerSecond + 24107 * 86400;
+}
+
+struct MovieBox {  // ISO/IEC 14496-12 section 8.2.1, moov.
+  uint32_t size = NET_UINT32_C(0);
+  const char type[4] = {'m', 'o', 'o', 'v'};
+};
+
+struct MovieHeaderBoxVersion0 {  // ISO/IEC 14496-12 section 8.2.2, mvhd.
+  uint32_t size = NET_UINT32_C(0);
+  const char type[4] = {'m', 'v', 'h', 'd'};
+  const uint32_t version_and_flags = NET_UINT32_C(0);
+  uint32_t creation_time = NET_UINT32_C(0);
+  uint32_t modification_time = NET_UINT32_C(0);
+  uint32_t timescale = ToNetworkU32(kTimeUnitsPerSecond);
+  uint32_t duration = NET_UINT32_C(0);
+  const int32_t rate = NET_UINT32_C(0x00010000);
+  const int16_t volume = NET_INT16_C(0x0100);
+  const int16_t reserved = NET_UINT16_C(0);
+  const uint32_t more_reserved[2] = {NET_UINT32_C(0), NET_UINT32_C(0)};
+  const int32_t matrix[9] = {
+      NET_INT32_C(0x00010000), NET_INT32_C(0), NET_INT32_C(0), NET_INT32_C(0),
+      NET_INT32_C(0x00010000), NET_INT32_C(0), NET_INT32_C(0), NET_INT32_C(0),
+      NET_INT32_C(0x40000000)};
+  const uint32_t pre_defined[6] = {NET_UINT32_C(0), NET_UINT32_C(0),
+                                   NET_UINT32_C(0), NET_UINT32_C(0),
+                                   NET_UINT32_C(0), NET_UINT32_C(0)};
+  uint32_t next_track_id = NET_UINT32_C(2);
+} __attribute__((packed));
+
+struct TrackBox {  // ISO/IEC 14496-12 section 8.3.1, trak.
+  uint32_t size = NET_UINT32_C(0);
+  const char type[4] = {'t', 'r', 'a', 'k'};
+} __attribute__((packed));
+
+struct TrackHeaderBoxVersion0 {  // ISO/IEC 14496-12 section 8.3.2, tkhd.
+  uint32_t size = NET_UINT32_C(0);
+  const char type[4] = {'t', 'k', 'h', 'd'};
+  // flags 7 = track_enabled | track_in_movie | track_in_preview
+  const uint32_t version_and_flags = NET_UINT32_C(7);
+  uint32_t creation_time = NET_UINT32_C(0);
+  uint32_t modification_time = NET_UINT32_C(0);
+  uint32_t track_id = NET_UINT32_C(0);
+  const uint32_t reserved1 = NET_UINT64_C(0);
+  uint32_t duration = NET_UINT32_C(0);
+  const uint32_t reserved2[2] = {NET_UINT32_C(0), NET_UINT32_C(0)};
+  const uint16_t layer = NET_UINT16_C(0);
+  const uint16_t alternate_group = NET_UINT16_C(0);
+  const uint16_t volume = NET_UINT16_C(0);
+  const uint16_t reserved3 = NET_UINT16_C(0);
+  const int32_t matrix[9] = {
+      NET_INT32_C(0x00010000), NET_INT32_C(0), NET_INT32_C(0), NET_INT32_C(0),
+      NET_INT32_C(0x00010000), NET_INT32_C(0), NET_INT32_C(0), NET_INT32_C(0),
+      NET_INT32_C(0x40000000)};
+  uint32_t width = NET_UINT32_C(0);
+  uint32_t height = NET_UINT32_C(0);
+} __attribute__((packed));
+
+struct MediaBox {  // ISO/IEC 14496-12 section 8.4.1, mdia.
+  uint32_t size = NET_UINT32_C(0);
+  const char type[4] = {'m', 'd', 'i', 'a'};
+} __attribute__((packed));
+
+struct MediaHeaderBoxVersion0 {  // ISO/IEC 14496-12 section 8.4.2, mdhd.
+  uint32_t size = NET_UINT32_C(0);
+  const char type[4] = {'m', 'd', 'h', 'd'};
+  const uint32_t version_and_flags = NET_UINT32_C(0);
+  uint32_t creation_time = NET_UINT32_C(0);
+  uint32_t modification_time = NET_UINT32_C(0);
+  uint32_t timescale = ToNetworkU32(kTimeUnitsPerSecond);
+  uint32_t duration = NET_UINT32_C(0);
+  uint16_t languages = NET_UINT16_C(0x55c4);  // undetermined
+  const uint16_t pre_defined = NET_UINT32_C(0);
+} __attribute__((packed));
+
+struct MediaInformationBox {  // ISO/IEC 14496-12 section 8.4.4, minf.
+  uint32_t size = NET_UINT32_C(0);
+  const char type[4] = {'m', 'i', 'n', 'f'};
+} __attribute__((packed));
+
+struct SampleTableBox {  // ISO/IEC 14496-12 section 8.5.1, stbl.
+  uint32_t size = NET_UINT32_C(0);
+  const char type[4] = {'s', 't', 'b', 'l'};
+} __attribute__((packed));
+
+struct SampleDescriptionBoxVersion0 {  // ISO/IEC 14496-12 section 8.5.2, stsd.
+  uint32_t size = NET_UINT32_C(0);
+  const char type[4] = {'s', 't', 's', 'd'};
+  const uint32_t version_and_flags = NET_UINT32_C(0 << 24);
+  uint32_t entry_count = NET_UINT32_C(0);
+} __attribute__((packed));
+
+struct TimeToSampleBoxVersion0 {  // ISO/IEC 14496-12 section 8.6.1.2, stts.
+  uint32_t size = NET_UINT32_C(0);
+  const char type[4] = {'s', 't', 't', 's'};
+  const uint32_t version_and_flags = NET_UINT32_C(0);
+  uint32_t entry_count = NET_UINT32_C(0);
+} __attribute__((packed));
+
+struct SampleToChunkBoxVersion0 {  // ISO/IEC 14496-12 section 8.7.4, stsc.
+  uint32_t size = NET_UINT32_C(0);
+  const char type[4] = {'s', 't', 's', 'c'};
+  const uint32_t version_and_flags = NET_UINT32_C(0);
+  uint32_t entry_count = NET_UINT32_C(0);
+} __attribute__((packed));
+
+struct SampleSizeBoxVersion0 {  // ISO/IEC 14496-12 section 8.7.3, stsz.
+  uint32_t size = NET_UINT32_C(0);
+  const char type[4] = {'s', 't', 's', 'z'};
+  const uint32_t version_and_flags = NET_UINT32_C(0);
+  uint32_t sample_size = NET_UINT32_C(0);
+  uint32_t sample_count = NET_UINT32_C(0);
+} __attribute__((packed));
+
+struct ChunkLargeOffsetBoxVersion0 {  // ISO/IEC 14496-12 section 8.7.5, co64.
+  uint32_t size = NET_UINT32_C(0);
+  const char type[4] = {'c', 'o', '6', '4'};
+  const uint32_t version_and_flags = NET_UINT32_C(0);
+  uint32_t entry_count = NET_UINT32_C(0);
+} __attribute__((packed));
+
+struct SyncSampleBoxVersion0 {  // ISO/IEC 14496-12 section 8.6.2, stss.
+  uint32_t size = NET_UINT32_C(0);
+  const char type[4] = {'s', 't', 's', 's'};
+  const uint32_t version_and_flags = NET_UINT32_C(0);
+  uint32_t entry_count = NET_UINT32_C(0);
+} __attribute__((packed));
+
+struct LargeMediaDataBox {  // ISO/IEC 14496-12 section 8.1.1, mdat.
+  const uint32_t size = NET_UINT32_C(1);
+  const char type[4] = {'m', 'd', 'a', 't'};
+  uint64_t largesize = NET_UINT64_C(0);
+};
+
+// Grouping of a box's header and the slice representing the header.
+// See also ScopedMp4Box, which calculates the length.
+template <typename Header>
+class Mp4Box {
+ public:
+  Mp4Box()
+      : header_slice_(re2::StringPiece(reinterpret_cast<const char *>(&header_),
+                                       sizeof(header_))) {}
+
+  Header &header() { return header_; }
+  const FileSlice *header_slice() const { return &header_slice_; }
+
+ private:
+  Header header_;
+  CopyingStringPieceSlice header_slice_;
+};
+
+// Helper for adding a mp4 box which calculates the header's size field.
+// Construction appends the box to the FileSlices; destruction automatically
+// calculates the length including any other slices added in the meantime.
+// See also CONSTRUCT_BOX macro.
+template <typename Box>
+class ScopedMp4Box {
+ public:
+  explicit ScopedMp4Box(FileSlices *slices, Box *box)
+      : starting_size_(slices->size()), slices_(slices), box_(box) {
+    slices_->Append(box->header_slice());
+  }
+
+  ScopedMp4Box(const ScopedMp4Box<Box> &) = delete;
+  void operator=(const ScopedMp4Box<Box> &) = delete;
+
+  ~ScopedMp4Box() {
+    box_->header().size = ToNetwork32(slices_->size() - starting_size_);
+  }
+
+ private:
+  int64_t starting_size_;
+  FileSlices *slices_;
+  Box *box_;
+};
+
+// Macro for less verbose ScopedMp4Box instantiation.
+// For use only within Mp4File.
+#define CONSTRUCT_BOX(box) \
+  ScopedMp4Box<decltype(box)> _scoped_##box(&slices_, &box);
+
+// .mp4 file, constructed from boxes arranged in the order suggested by
+// ISO/IEC 14496-12 section 6.2.3 (see Table 1):
+// * ftyp (file type and compatibility)
+// * moov (container for all the metadata)
+// ** mvhd (movie header, overall declarations)
+// ** trak (container for an individual track or stream)
+// *** tkhd (track header, overall information about the track)
+// *** mdia (container for the media information in a track)
+// **** mdhd (media header, overall information about the media)
+// *** minf (media information container)
+// **** vmhd (video media header, overall information (video track only))
+// **** dinf (data information box, container)
+// ***** dref (data reference box, declares source(s) of media data in track)
+// **** stbl (sample table box, container for the time/space map)
+// ***** stsd (sample descriptions (codec types, initilization etc.)
+// ***** stts ((decoding) time-to-sample)
+// ***** stsc (sample-to-chunk, partial data-offset information)
+// ***** stsz (samples sizes (framing))
+// ***** co64 (64-bit chunk offset)
+// ***** stss (sync sample table)
+// * mdat (media data container)
+class Mp4File : public VirtualFile {
+ public:
+  Mp4File(std::vector<std::unique_ptr<Mp4FileSegment>> segments,
+          VideoSampleEntry &&video_sample_entry)
+      : segments_(std::move(segments)),
+        video_sample_entry_(std::move(video_sample_entry)),
+        ftyp_(re2::StringPiece(kFtypBox, sizeof(kFtypBox))),
+        moov_trak_mdia_hdlr_(re2::StringPiece(kHdlrBox, sizeof(kHdlrBox))),
+        moov_trak_mdia_minf_vmhddinf_(
+            re2::StringPiece(kVmhdAndDinfBoxes, sizeof(kVmhdAndDinfBoxes))),
+        moov_trak_mdia_minf_stbl_stsd_entry_(video_sample_entry_.data) {
+    uint32_t duration = 0;
+    int64_t max_time_90k = 0;
+    for (const auto &segment : segments_) {
+      duration += segment->pieces.duration_90k();
+      max_time_90k = std::max(max_time_90k, segment->recording.start_time_90k +
+                                                segment->rel_end_90k);
+    }
+    auto net_duration = ToNetworkU32(duration);
+    auto net_creation_ts = ToNetworkU32(ToIso14496Timestamp(max_time_90k));
+
+    slices_.Append(&ftyp_);
+    AppendMoov(net_duration, net_creation_ts);
+
+    // Add the mdat_ without using CONSTRUCT_BOX.
+    // mdat_ is special because it uses largesize rather than size.
+    slices_.Append(mdat_.header_slice());
+    initial_sample_byte_pos_ = slices_.size();
+    for (const auto &segment : segments_) {
+      segment->sample_file_slice.Init(segment->recording.sample_file_path,
+                                      segment->pieces.sample_pos());
+      slices_.Append(&segment->sample_file_slice);
+    }
+    mdat_.header().largesize =
+        ToNetworkU64(slices_.size() - initial_sample_byte_pos_);
+  }
+
+  time_t last_modified() const final { return 0; }  // TODO
+  std::string etag() const final { return ""; }     // TODO
+  std::string mime_type() const final { return "video/mp4"; }
+  int64_t size() const final { return slices_.size(); }
+  bool AddRange(ByteRange range, EvBuffer *buf,
+                std::string *error_message) const final {
+    return slices_.AddRange(range, buf, error_message);
+  }
+
+ private:
+  void AppendMoov(uint32_t net_duration, uint32_t net_creation_ts) {
+    CONSTRUCT_BOX(moov_);
+    {
+      CONSTRUCT_BOX(moov_mvhd_);
+      moov_mvhd_.header().creation_time = net_creation_ts;
+      moov_mvhd_.header().modification_time = net_creation_ts;
+      moov_mvhd_.header().duration = net_duration;
+      moov_mvhd_.header().duration = net_duration;
+    }
+    {
+      CONSTRUCT_BOX(moov_trak_);
+      {
+        CONSTRUCT_BOX(moov_trak_tkhd_);
+        moov_trak_tkhd_.header().creation_time = net_creation_ts;
+        moov_trak_tkhd_.header().modification_time = net_creation_ts;
+        moov_trak_tkhd_.header().track_id = NET_UINT32_C(1);
+        moov_trak_tkhd_.header().duration = net_duration;
+        moov_trak_tkhd_.header().width =
+            NET_UINT32_C(video_sample_entry_.width << 16);
+        moov_trak_tkhd_.header().height =
+            NET_UINT32_C(video_sample_entry_.height << 16);
+      }
+      {
+        CONSTRUCT_BOX(moov_trak_mdia_);
+        {
+          CONSTRUCT_BOX(moov_trak_mdia_mdhd_);
+          moov_trak_mdia_mdhd_.header().creation_time = net_creation_ts;
+          moov_trak_mdia_mdhd_.header().modification_time = net_creation_ts;
+          moov_trak_mdia_mdhd_.header().duration = net_duration;
+        }
+        slices_.Append(&moov_trak_mdia_hdlr_);
+        {
+          CONSTRUCT_BOX(moov_trak_mdia_minf_);
+          slices_.Append(&moov_trak_mdia_minf_vmhddinf_);
+          AppendStbl();
+        }
+      }
+    }
+  }
+
+  void AppendStbl() {
+    CONSTRUCT_BOX(moov_trak_mdia_minf_stbl_);
+    {
+      CONSTRUCT_BOX(moov_trak_mdia_minf_stbl_stsd_);
+      moov_trak_mdia_minf_stbl_stsd_.header().entry_count = NET_UINT32_C(1);
+      slices_.Append(&moov_trak_mdia_minf_stbl_stsd_entry_);
+    }
+    {
+      CONSTRUCT_BOX(moov_trak_mdia_minf_stbl_stts_);
+      int32_t stts_entry_count = 0;
+      for (const auto &segment : segments_) {
+        stts_entry_count += segment->pieces.stts_entry_count();
+        slices_.Append(segment->pieces.stts_entries());
+      }
+      moov_trak_mdia_minf_stbl_stts_.header().entry_count =
+          ToNetwork32(stts_entry_count);
+    }
+    {
+      CONSTRUCT_BOX(moov_trak_mdia_minf_stbl_stsc_);
+      uint32_t stsc_entry_count = 0;
+      for (const auto &segment : segments_) {
+        stsc_entry_count += segment->pieces.stsc_entry_count();
+        slices_.Append(segment->pieces.stsc_entries());
+      }
+      moov_trak_mdia_minf_stbl_stsc_.header().entry_count =
+          ToNetwork32(stsc_entry_count);
+    }
+    {
+      CONSTRUCT_BOX(moov_trak_mdia_minf_stbl_stsz_);
+      uint32_t stsz_entry_count = 0;
+      for (const auto &segment : segments_) {
+        stsz_entry_count += segment->pieces.stsz_entry_count();
+        slices_.Append(segment->pieces.stsz_entries());
+      }
+      moov_trak_mdia_minf_stbl_stsz_.header().sample_count =
+          ToNetwork32(stsz_entry_count);
+    }
+    {
+      CONSTRUCT_BOX(moov_trak_mdia_minf_stbl_co64_);
+      moov_trak_mdia_minf_stbl_co64_entries_.Init(
+          sizeof(uint64_t) * segments_.size(),
+          [this](std::string *s, std::string *error_message) {
+            return FillCo64Entries(s, error_message);
+          });
+      moov_trak_mdia_minf_stbl_co64_.header().entry_count =
+          ToNetwork32(segments_.size());
+      slices_.Append(&moov_trak_mdia_minf_stbl_co64_entries_);
+    }
+    {
+      CONSTRUCT_BOX(moov_trak_mdia_minf_stbl_stss_);
+      uint32_t stss_entry_count = 0;
+      for (const auto &segment : segments_) {
+        stss_entry_count += segment->pieces.stss_entry_count();
+        slices_.Append(segment->pieces.stss_entries());
+      }
+      moov_trak_mdia_minf_stbl_stss_.header().entry_count =
+          ToNetwork32(stss_entry_count);
+    }
+  }
+
+  bool FillCo64Entries(std::string *s, std::string *error_message) {
+    int64_t pos = initial_sample_byte_pos_;
+    for (const auto &segment : segments_) {
+      AppendU64(pos, s);
+      pos += segment->sample_file_slice.size();
+    }
+    return true;
+  }
+
+  int64_t initial_sample_byte_pos_ = 0;
+  std::vector<std::unique_ptr<Mp4FileSegment>> segments_;
+  VideoSampleEntry video_sample_entry_;
+  FileSlices slices_;
+
+  StaticStringPieceSlice ftyp_;
+  Mp4Box<MovieBox> moov_;
+  Mp4Box<MovieHeaderBoxVersion0> moov_mvhd_;
+  Mp4Box<TrackBox> moov_trak_;
+  Mp4Box<TrackHeaderBoxVersion0> moov_trak_tkhd_;
+  Mp4Box<MediaBox> moov_trak_mdia_;
+  Mp4Box<MediaHeaderBoxVersion0> moov_trak_mdia_mdhd_;
+  StaticStringPieceSlice moov_trak_mdia_hdlr_;
+  Mp4Box<MediaInformationBox> moov_trak_mdia_minf_;
+  StaticStringPieceSlice moov_trak_mdia_minf_vmhddinf_;
+  Mp4Box<SampleTableBox> moov_trak_mdia_minf_stbl_;
+  Mp4Box<SampleDescriptionBoxVersion0> moov_trak_mdia_minf_stbl_stsd_;
+  CopyingStringPieceSlice moov_trak_mdia_minf_stbl_stsd_entry_;
+  Mp4Box<TimeToSampleBoxVersion0> moov_trak_mdia_minf_stbl_stts_;
+  Mp4Box<SampleToChunkBoxVersion0> moov_trak_mdia_minf_stbl_stsc_;
+  Mp4Box<SampleSizeBoxVersion0> moov_trak_mdia_minf_stbl_stsz_;
+  Mp4Box<ChunkLargeOffsetBoxVersion0> moov_trak_mdia_minf_stbl_co64_;
+  FillerFileSlice moov_trak_mdia_minf_stbl_co64_entries_;
+  Mp4Box<SyncSampleBoxVersion0> moov_trak_mdia_minf_stbl_stss_;
+  Mp4Box<LargeMediaDataBox> mdat_;
+};
+
+#undef CONSTRUCT_BOX
+
+}  // namespace
+
 namespace internal {
 
 bool Mp4SampleTablePieces::Init(re2::StringPiece video_index_blob,
@@ -162,4 +665,50 @@ bool Mp4SampleTablePieces::FillStszEntries(std::string *s,
 
 }  // namespace internal
 
+Mp4FileBuilder &Mp4FileBuilder::Append(Recording &&recording,
+                                       int32_t rel_start_90k,
+                                       int32_t rel_end_90k) {
+  std::unique_ptr<Mp4FileSegment> s(new Mp4FileSegment);
+  s->recording = std::move(recording);
+  s->rel_start_90k = rel_start_90k;
+  s->rel_end_90k = rel_end_90k;
+  segments_.push_back(std::move(s));
+  return *this;
+}
+
+Mp4FileBuilder &Mp4FileBuilder::SetSampleEntry(const VideoSampleEntry &entry) {
+  video_sample_entry_ = entry;
+  return *this;
+}
+
+std::unique_ptr<VirtualFile> Mp4FileBuilder::Build(std::string *error_message) {
+  int32_t sample_offset = 1;
+  for (auto &segment : segments_) {
+    if (segment->recording.video_sample_entry_sha1 !=
+        video_sample_entry_.sha1) {
+      *error_message =
+          StrCat("inconsistent video sample entries. builder has: ",
+                 ToHex(video_sample_entry_.sha1), ", segment has: ",
+                 ToHex(segment->recording.video_sample_entry_sha1));
+      return std::unique_ptr<VirtualFile>();
+    }
+
+    if (!segment->pieces.Init(segment->recording.video_index,
+                              1,  // sample entry index
+                              sample_offset, segment->rel_start_90k,
+                              segment->rel_end_90k, error_message)) {
+      return std::unique_ptr<VirtualFile>();
+    }
+    sample_offset += segment->pieces.samples();
+  }
+
+  if (segments_.empty()) {
+    *error_message = "Can't construct empty .mp4";
+    return std::unique_ptr<VirtualFile>();
+  }
+
+  return std::unique_ptr<VirtualFile>(
+      new Mp4File(std::move(segments_), std::move(video_sample_entry_)));
+}
+
 }  // namespace moonfire_nvr
diff --git a/src/mp4.h b/src/mp4.h
index 8e11515..61cc810 100644
--- a/src/mp4.h
+++ b/src/mp4.h
@@ -36,6 +36,9 @@
 #ifndef MOONFIRE_NVR_MP4_H
 #define MOONFIRE_NVR_MP4_H
 
+#include <memory>
+#include <vector>
+
 #include "recording.h"
 #include "http.h"
 
@@ -118,8 +121,52 @@ class Mp4SampleTablePieces {
   int32_t key_frames_ = 0;
 };
 
+struct Mp4FileSegment {
+  Recording recording;
+  Mp4SampleTablePieces pieces;
+  RealFileSlice sample_file_slice;
+  int32_t rel_start_90k = 0;
+  int32_t rel_end_90k = std::numeric_limits<int32_t>::max();
+};
+
 }  // namespace internal
 
+// Builder for a virtual .mp4 file.
+class Mp4FileBuilder {
+ public:
+  // Append part or all of a recording.
+  // Note that |recording.video_sample_entry_sha1| must be added via
+  // AddSampleEntry.
+  Mp4FileBuilder &Append(Recording &&recording, int32_t rel_start_300ths,
+                         int32_t rel_end_300ths);
+
+  // TODO: support multiple sample entries?
+  Mp4FileBuilder &SetSampleEntry(const VideoSampleEntry &entry);
+
+  // Set if a subtitle track should be added with timestamps.
+  // TODO: unimplemented.
+  Mp4FileBuilder &include_timestamp_subtitle_track(bool);
+
+  // TODO: MPEG-DASH / ISO BMFF Byte Stream Format support.
+
+  // Build the .mp4 file, returning it to the caller.
+  // The Mp4FileBuilder is left in an undefined state; it should not
+  // be used afterward. On error, nullptr is returned, with |error_message|
+  // populated.
+  //
+  // Errors include:
+  // * TODO: width/height mismatch? or is this okay?
+  // * No segments.
+  // * Non-final segment has zero duration of last sample.
+  // * Data error in one of the recording sample indexes.
+  // * Invalid start/end.
+  std::unique_ptr<VirtualFile> Build(std::string *error_message);
+
+ private:
+  std::vector<std::unique_ptr<internal::Mp4FileSegment>> segments_;
+  VideoSampleEntry video_sample_entry_;
+};
+
 }  // namespace moonfire_nvr
 
 #endif  // MOONFIRE_NVR_MP4_H
diff --git a/src/recording.h b/src/recording.h
index 2d2cb75..2f074b0 100644
--- a/src/recording.h
+++ b/src/recording.h
@@ -47,6 +47,8 @@
 
 namespace moonfire_nvr {
 
+constexpr uint32_t kTimeUnitsPerSecond = 90000;
+
 // Encodes a sample index.
 class SampleIndexEncoder {
  public:
@@ -167,6 +169,26 @@ class SampleFileWriter {
   bool corrupt_ = false;
 };
 
+struct VideoSampleEntry {
+  std::string sha1;
+  std::string data;
+  uint16_t width = 0;
+  uint16_t height = 0;
+};
+
+// Various fields from the "recording" table which are useful when viewing
+// recordings.
+struct Recording {
+  int64_t start_time_90k = -1;
+  int64_t end_time_90k = -1;
+  int64_t sample_file_bytes = -1;
+  std::string sample_file_path;
+  std::string sample_file_uuid;
+  std::string sample_file_sha1;
+  std::string video_sample_entry_sha1;
+  std::string video_index;
+};
+
 }  // namespace moonfire_nvr
 
 #endif  // MOONFIRE_NVR_RECORDING_H
diff --git a/src/schema.sql b/src/schema.sql
index 80570aa..1238b5f 100644
--- a/src/schema.sql
+++ b/src/schema.sql
@@ -73,7 +73,7 @@ create table recording (
 
   sample_file_uuid blob unique not null,
   sample_file_sha1 blob,
-  sample_file_size integer,
+  sample_file_bytes integer,
 
   -- The starting and ending time of the recording, in 90 kHz units since
   -- 1970-01-01 00:00:00 UTC.
diff --git a/src/testutil.cc b/src/testutil.cc
index 997f84b..ce28e79 100644
--- a/src/testutil.cc
+++ b/src/testutil.cc
@@ -39,6 +39,7 @@
 #include <sys/stat.h>
 #include <sys/types.h>
 
+#include <event2/buffer.h>
 #include <glog/logging.h>
 
 #include "filesystem.h"
@@ -118,6 +119,16 @@ void WriteFileOrDie(const std::string &path, re2::StringPiece contents) {
   CHECK_EQ(ret, 0) << "close " << path << ": " << strerror(ret);
 }
 
+void WriteFileOrDie(const std::string &path, EvBuffer *buf) {
+  int fd = open(path.c_str(), O_CREAT | O_WRONLY | O_TRUNC, 0600);
+  PCHECK(fd >= 0) << "open: " << path;
+  size_t buf_len = evbuffer_get_length(buf->get());
+  int written = evbuffer_write(buf->get(), fd);
+  PCHECK(written >= 0 && buf_len == static_cast<size_t>(written))
+      << "buf_len: " << buf_len << ", written: " << written;
+  PCHECK(close(fd) == 0) << "close";
+}
+
 std::string ReadFileOrDie(const std::string &path) {
   std::unique_ptr<File> f;
   int ret = GetRealFilesystem()->Open(path.c_str(), O_RDONLY, &f);
diff --git a/src/testutil.h b/src/testutil.h
index 08c839c..cad816b 100644
--- a/src/testutil.h
+++ b/src/testutil.h
@@ -37,6 +37,8 @@
 #include <gmock/gmock.h>
 #include <re2/stringpiece.h>
 
+#include "http.h"
+
 namespace moonfire_nvr {
 
 // Create or empty the given test directory, or die.
@@ -45,6 +47,7 @@ std::string PrepareTempDirOrDie(const std::string &test_name);
 
 // Write the given file contents to the given path, or die.
 void WriteFileOrDie(const std::string &path, re2::StringPiece contents);
+void WriteFileOrDie(const std::string &path, EvBuffer *buf);
 
 // Read the contents of the given path, or die.
 std::string ReadFileOrDie(const std::string &path);