diff --git a/owntone.conf.in b/owntone.conf.in
index 1c1ad7b3..30d67b19 100644
--- a/owntone.conf.in
+++ b/owntone.conf.in
@@ -187,19 +187,24 @@ library {
 	# Should we import the content of iTunes smart playlists?
 #	itunes_smartpl = false
 
-	# Decoding options for DAAP and RSP clients
+	# Transcoding options for DAAP and RSP clients
 	# Since iTunes has native support for mpeg, mp4a, mp4v, alac and wav,
-	# such files will be sent as they are. Any other formats will be decoded
-	# to raw wav. If OwnTone detects a non-iTunes DAAP client, it is
-	# assumed to only support mpeg and wav, other formats will be decoded.
-	# Here you can change when to decode. Note that these settings only
-	# affect serving media to DAAP and RSP clients, they have no effect on
+	# such files will be sent as they are. Any other formats will be
+	# transcoded. Some other clients, including Roku/RSP, announce what
+	# formats they support, and the server will transcode to one of those if
+	# necessary. Clients that don't announce supported formats are assumed
+	# to support mpeg (mp3), wav and alac.
+	# Here you can change when and how to transcode. The settings *only*
+	# affect serving audio to DAAP and RSP clients, they have no effect on
 	# direct AirPlay, Chromecast and local audio playback.
 	# Formats: mp4a, mp4v, mpeg, alac, flac, mpc, ogg, wma, wmal, wmav, aif, wav
-	# Formats that should never be decoded
+	# Formats that should never be transcoded
 #	no_decode = { "format", "format" }
-	# Formats that should always be decoded
+	# Formats that should always be transcoded
 #	force_decode = { "format", "format" }
+	# Prefer transcode to wav (default), alac or mpeg (mp3 with the bit rate
+	# configured below in the streaming section)
+#	prefer_format = "format"
 
 	# Set ffmpeg filters (similar to 'ffmpeg -af xxx') that you want the
 	# server to use when decoding files from your library. Examples:
diff --git a/src/httpd.c b/src/httpd.c
index be7f2301..65a50720 100644
--- a/src/httpd.c
+++ b/src/httpd.c
@@ -105,18 +105,19 @@ struct stream_ctx {
 
 static const struct content_type_map ext2ctype[] =
   {
-    { ".html", XCODE_NONE, "text/html; charset=utf-8" },
-    { ".xml",  XCODE_NONE, "text/xml; charset=utf-8" },
-    { ".css",  XCODE_NONE, "text/css; charset=utf-8" },
-    { ".txt",  XCODE_NONE, "text/plain; charset=utf-8" },
-    { ".js",   XCODE_NONE, "application/javascript; charset=utf-8" },
-    { ".gif",  XCODE_NONE, "image/gif" },
-    { ".ico",  XCODE_NONE, "image/x-ico" },
-    { ".png",  XCODE_PNG,  "image/png" },
-    { ".jpg",  XCODE_JPEG, "image/jpeg" },
-    { ".mp3",  XCODE_MP3,  "audio/mpeg" },
-    { ".wav",  XCODE_WAV,  "audio/wav" },
-    { NULL,    XCODE_NONE, NULL }
+    { ".html", XCODE_NONE,      "text/html; charset=utf-8" },
+    { ".xml",  XCODE_NONE,      "text/xml; charset=utf-8" },
+    { ".css",  XCODE_NONE,      "text/css; charset=utf-8" },
+    { ".txt",  XCODE_NONE,      "text/plain; charset=utf-8" },
+    { ".js",   XCODE_NONE,      "application/javascript; charset=utf-8" },
+    { ".gif",  XCODE_NONE,      "image/gif" },
+    { ".ico",  XCODE_NONE,      "image/x-ico" },
+    { ".png",  XCODE_PNG,       "image/png" },
+    { ".jpg",  XCODE_JPEG,      "image/jpeg" },
+    { ".mp3",  XCODE_MP3,       "audio/mpeg" },
+    { ".m4a",  XCODE_MP4_ALAC,  "audio/mp4" },
+    { ".wav",  XCODE_WAV,       "audio/wav" },
+    { NULL,    XCODE_NONE,      NULL }
   };
 
 static char webroot_directory[PATH_MAX];
@@ -672,8 +673,8 @@ static struct stream_ctx *
 stream_new_transcode(struct media_file_info *mfi, enum transcode_profile profile, struct httpd_request *hreq,
                      int64_t offset, int64_t end_offset, event_callback_fn stream_cb)
 {
+  struct media_quality quality = { 0 };
   struct stream_ctx *st;
-  struct media_quality quality = { HTTPD_STREAM_SAMPLE_RATE, HTTPD_STREAM_BPS, HTTPD_STREAM_CHANNELS, HTTPD_STREAM_BIT_RATE };
 
   st = stream_new(mfi, hreq, stream_cb);
   if (!st)
@@ -681,6 +682,8 @@ stream_new_transcode(struct media_file_info *mfi, enum transcode_profile profile
       goto error;
     }
 
+  // We use source sample rate etc, but for MP3 we must set a bit rate
+  quality.bit_rate = cfg_getint(cfg_getsec(cfg, "streaming"), "bit_rate");
   st->xcode = transcode_setup(profile, &quality, mfi->data_kind, mfi->path, mfi->song_length);
   if (!st->xcode)
     {
diff --git a/src/httpd_daap.c b/src/httpd_daap.c
index e5238938..20f753cf 100644
--- a/src/httpd_daap.c
+++ b/src/httpd_daap.c
@@ -1151,7 +1151,7 @@ daap_reply_songlist_generic(struct httpd_request *hreq, int playlist)
   size_t len;
   enum transcode_profile profile;
   struct transcode_metadata_string xcode_metadata;
-  struct media_quality quality = { HTTPD_STREAM_SAMPLE_RATE, HTTPD_STREAM_BPS, HTTPD_STREAM_CHANNELS, HTTPD_STREAM_BIT_RATE };
+  struct media_quality quality = { 0 };
   uint32_t len_ms;
   int nmeta = 0;
   int sort_headers;
@@ -1239,6 +1239,11 @@ daap_reply_songlist_generic(struct httpd_request *hreq, int playlist)
 	  if (safe_atou32(dbmfi.song_length, &len_ms) < 0)
 	    len_ms = 3 * 60 * 1000; // just a fallback default
 
+	  safe_atoi32(dbmfi.samplerate, &quality.sample_rate);
+	  safe_atoi32(dbmfi.bits_per_sample, &quality.bits_per_sample);
+	  safe_atoi32(dbmfi.channels, &quality.channels);
+	  quality.bit_rate = cfg_getint(cfg_getsec(cfg, "streaming"), "bit_rate");
+
 	  transcode_metadata_strings_set(&xcode_metadata, profile, &quality, len_ms);
 	  dbmfi.type        = xcode_metadata.type;
 	  dbmfi.codectype   = xcode_metadata.codectype;
diff --git a/src/httpd_internal.h b/src/httpd_internal.h
index 62e285b2..28262572 100644
--- a/src/httpd_internal.h
+++ b/src/httpd_internal.h
@@ -36,11 +36,6 @@
 #define HTTP_BADGATEWAY        502	/**< received an invalid response from the upstream */
 #define HTTP_SERVUNAVAIL       503	/**< the server is not available */
 
-#define HTTPD_STREAM_SAMPLE_RATE 44100
-#define HTTPD_STREAM_BPS         16
-#define HTTPD_STREAM_CHANNELS    2
-#define HTTPD_STREAM_BIT_RATE    320000
-
 
 struct httpd_request;
 
diff --git a/src/httpd_rsp.c b/src/httpd_rsp.c
index 2ea40213..4e53b067 100644
--- a/src/httpd_rsp.c
+++ b/src/httpd_rsp.c
@@ -417,7 +417,7 @@ rsp_reply_db(struct httpd_request *hreq)
 static int
 item_add(xml_node *parent, struct query_params *qp, const char *user_agent, const char *client_codecs, int mode)
 {
-  struct media_quality quality = { HTTPD_STREAM_SAMPLE_RATE, HTTPD_STREAM_BPS, HTTPD_STREAM_CHANNELS, HTTPD_STREAM_BIT_RATE };
+  struct media_quality quality = { 0 };
   struct db_media_file_info dbmfi;
   struct transcode_metadata_string xcode_metadata;
   enum transcode_profile profile;
@@ -444,6 +444,11 @@ item_add(xml_node *parent, struct query_params *qp, const char *user_agent, cons
       if (safe_atou32(dbmfi.song_length, &len_ms) < 0)
         len_ms = 3 * 60 * 1000; // just a fallback default
 
+      safe_atoi32(dbmfi.samplerate, &quality.sample_rate);
+      safe_atoi32(dbmfi.bits_per_sample, &quality.bits_per_sample);
+      safe_atoi32(dbmfi.channels, &quality.channels);
+      quality.bit_rate = cfg_getint(cfg_getsec(cfg, "streaming"), "bit_rate");
+
       transcode_metadata_strings_set(&xcode_metadata, profile, &quality, len_ms);
       dbmfi.type        = xcode_metadata.type;
       dbmfi.codectype   = xcode_metadata.codectype;
@@ -715,6 +720,16 @@ rsp_stream(struct httpd_request *hreq)
 //  /rsp/stream/36364
 //  /rsp/db/0?query=id%3D36365&type=full
 //  /rsp/stream/36365
+//
+// Headers sent from Roku M2000 and M1001 in stream requests (and other?):
+//
+// 'User-Agent': 'Roku SoundBridge/3.0'
+// 'Host': '192.168.1.119:3689'
+// 'Accept': '*/*'
+// 'Pragma': 'no-cache'
+// 'accept-codecs': 'wma,mpeg,wav,mp4a,alac'
+// 'rsp-version': '0.1'
+// 'transcode-codecs': 'wav,mp3'
 static struct httpd_uri_map rsp_handlers[] =
   {
     {
diff --git a/src/transcode.c b/src/transcode.c
index 6dc9308d..632df7df 100644
--- a/src/transcode.c
+++ b/src/transcode.c
@@ -20,6 +20,8 @@
 # include <config.h>
 #endif
 
+#define _GNU_SOURCE // For memmem()
+
 #include <stdio.h>
 #include <stdbool.h>
 #include <string.h>
@@ -64,14 +66,19 @@
 #define WAV_HEADER_LEN 44
 // Max filters in a filtergraph
 #define MAX_FILTERS 9
+// Set to same size as in httpd.c (but can be set to something else)
+#define STREAM_CHUNK_SIZE (64 * 1024)
 
-static const char *default_codecs = "mpeg,wav";
+static const char *default_codecs = "mpeg,alac,wav";
 static const char *roku_codecs = "mpeg,mp4a,wma,alac,wav";
 static const char *itunes_codecs = "mpeg,mp4a,mp4v,alac,wav";
 
 // Used for passing errors to DPRINTF (can't count on av_err2str being present)
 static char errbuf[64];
 
+// Used by dummy_seek to mark a seek requested by ffmpeg
+static const uint8_t xcode_seek_marker[8] = { 0x0D, 0x0E, 0x0A, 0x0D, 0x0B, 0x0E, 0x0E, 0x0F };
+
 // The settings struct will be filled out based on the profile enum
 struct settings_ctx
 {
@@ -94,12 +101,15 @@ struct settings_ctx
   AVChannelLayout channel_layout;
 #else
   uint64_t channel_layout;
-  int channels;
 #endif
+  int nb_channels;
   int bit_rate;
   int frame_size;
   enum AVSampleFormat sample_format;
+  bool with_mp4_header;
   bool with_wav_header;
+  bool without_libav_header;
+  bool without_libav_trailer;
   bool with_icy;
   bool with_user_filters;
 
@@ -180,6 +190,9 @@ struct encode_ctx
   // The ffmpeg muxer writes to this buffer using the avio_evbuffer interface
   struct evbuffer *obuf;
 
+  // IO Context for non-file output
+  struct transcode_evbuf_io evbuf_io;
+
   // Contains the most recent packet from av_buffersink_get_frame()
   AVFrame *filt_frame;
 
@@ -195,9 +208,6 @@ struct encode_ctx
   // Used to check for ICY metadata changes at certain intervals
   uint32_t icy_interval;
   uint32_t icy_hash;
-
-  // WAV header
-  uint8_t wav_header[WAV_HEADER_LEN];
 };
 
 enum probe_type
@@ -290,6 +300,21 @@ init_settings(struct settings_ctx *settings, enum transcode_profile profile, str
 	settings->frame_size = 352;
 	break;
 
+      case XCODE_MP4_ALAC:
+	settings->with_mp4_header = true;
+	settings->encode_audio = true;
+	settings->format = "data";
+	settings->audio_codec = AV_CODEC_ID_ALAC;
+	break;
+
+      case XCODE_MP4_ALAC_HEADER:
+	settings->without_libav_header = true;
+	settings->without_libav_trailer = true;
+	settings->encode_audio = true;
+	settings->format = "ipod"; // ffmpeg default mp4 variant ("mp4" doesn't work with SoundBridge because of the btrt atom in the header)
+	settings->audio_codec = AV_CODEC_ID_ALAC;
+	break;
+
       case XCODE_OGG:
 	settings->encode_audio = true;
 	settings->in_format = "ogg";
@@ -354,7 +379,7 @@ init_settings(struct settings_ctx *settings, enum transcode_profile profile, str
       av_channel_layout_default(&settings->channel_layout, quality->channels);
 #else
       settings->channel_layout = av_get_default_channel_layout(quality->channels);
-      settings->channels       = quality->channels;
+      settings->nb_channels    = quality->channels;
 #endif
     }
 
@@ -372,6 +397,66 @@ init_settings(struct settings_ctx *settings, enum transcode_profile profile, str
   return 0;
 }
 
+static int
+init_settings_from_video(struct settings_ctx *settings, enum transcode_profile profile, struct decode_ctx *src_ctx, int width, int height)
+{
+  settings->width = width;
+  settings->height = height;
+
+  return 0;
+}
+
+static int
+init_settings_from_audio(struct settings_ctx *settings, enum transcode_profile profile, struct decode_ctx *src_ctx, struct media_quality *quality)
+{
+  int src_bytes_per_sample = av_get_bytes_per_sample(src_ctx->audio_stream.codec->sample_fmt);
+
+  // Initialize unset settings that are source-dependent, not profile-dependent
+  if (!settings->sample_rate)
+    settings->sample_rate = src_ctx->audio_stream.codec->sample_rate;
+
+#if USE_CH_LAYOUT
+  if (!av_channel_layout_check(&settings->channel_layout))
+    av_channel_layout_copy(&settings->channel_layout, &src_ctx->audio_stream.codec->ch_layout);
+
+  settings->nb_channels = settings->channel_layout.nb_channels;
+#else
+  if (settings->nb_channels == 0)
+    {
+      settings->nb_channels = src_ctx->audio_stream.codec->channels;
+      settings->channel_layout = src_ctx->audio_stream.codec->channel_layout;
+    }
+#endif
+
+  // Initialize settings that are both source-dependent and profile-dependent
+  switch (profile)
+    {
+      case XCODE_MP4_ALAC:
+      case XCODE_MP4_ALAC_HEADER:
+	if (!settings->sample_format)
+	  settings->sample_format = (src_bytes_per_sample == 4) ? AV_SAMPLE_FMT_S32P : AV_SAMPLE_FMT_S16P;
+	break;
+
+      case XCODE_PCM_NATIVE:
+	if (!settings->sample_format)
+	  settings->sample_format = (src_bytes_per_sample == 4) ? AV_SAMPLE_FMT_S32 : AV_SAMPLE_FMT_S16;
+	if (!settings->audio_codec)
+	  settings->audio_codec = (src_bytes_per_sample == 4) ? AV_CODEC_ID_PCM_S32LE : AV_CODEC_ID_PCM_S16LE;
+	if (!settings->format)
+	  settings->format = (src_bytes_per_sample == 4) ? "s32le" : "s16le";
+	break;
+
+      default:
+	if (settings->sample_format && settings->audio_codec && settings->format)
+	  return 0;
+
+	DPRINTF(E_LOG, L_XCODE, "Bug! Profile %d has unset encoding parameters\n", profile);
+	return -1;
+    }
+
+  return 0;
+}
+
 static void
 stream_settings_set(struct stream_ctx *s, struct settings_ctx *settings, enum AVMediaType type)
 {
@@ -382,7 +467,7 @@ stream_settings_set(struct stream_ctx *s, struct settings_ctx *settings, enum AV
       av_channel_layout_copy(&s->codec->ch_layout, &(settings->channel_layout));
 #else
       s->codec->channel_layout = settings->channel_layout;
-      s->codec->channels       = settings->channels;
+      s->codec->channels       = settings->nb_channels;
 #endif
       s->codec->sample_fmt     = settings->sample_format;
       s->codec->time_base      = (AVRational){1, settings->sample_rate};
@@ -436,26 +521,47 @@ add_le32(uint8_t *dst, uint32_t val)
   dst[3] = (val >> 24) & 0xff;
 }
 
-/*
- * header must have size WAV_HEADER_LEN (44 bytes)
- */
-static void
-make_wav_header(uint8_t *header, int sample_rate, int bytes_per_sample, int channels, off_t bytes_total)
+// Copies the src buffer to position pos of the dst buffer, expanding dst if
+// needed to fit src. Can be called with *dst = NULL and *dst_len = 0. Returns
+// the number of bytes dst was expanded with.
+static int
+copy_buffer_to_position(uint8_t **dst, size_t *dst_len, uint8_t *src, size_t src_len, int64_t pos)
 {
-  uint32_t wav_size = bytes_total - WAV_HEADER_LEN;
+  int bytes_added = 0;
 
-  memcpy(header, "RIFF", 4);
-  add_le32(header + 4, 36 + wav_size);
-  memcpy(header + 8, "WAVEfmt ", 8);
-  add_le32(header + 16, 16);
-  add_le16(header + 20, 1);
-  add_le16(header + 22, channels);     /* channels */
-  add_le32(header + 24, sample_rate);  /* samplerate */
-  add_le32(header + 28, sample_rate * channels * bytes_per_sample); /* byte rate */
-  add_le16(header + 32, channels * bytes_per_sample);               /* block align */
-  add_le16(header + 34, 8 * bytes_per_sample);                      /* bits per sample */
-  memcpy(header + 36, "data", 4);
-  add_le32(header + 40, wav_size);
+  if (pos < 0 || pos > *dst_len)
+    return -1; // Out of bounds
+  if (src_len == 0)
+    return 0; // Nothing to do
+
+  if (pos + src_len > *dst_len)
+    {
+      bytes_added = pos + src_len - *dst_len;
+      *dst_len += bytes_added;
+      CHECK_NULL(L_XCODE, *dst = realloc(*dst, *dst_len));
+    }
+
+  memcpy(*dst + pos, src, src_len);
+  return bytes_added;
+}
+
+// Doesn't actually seek, just inserts a marker in the obuf
+static int64_t
+dummy_seek(void *arg, int64_t offset, enum transcode_seek_type type)
+{
+  struct transcode_ctx *ctx = arg;
+  struct encode_ctx *enc_ctx = ctx->encode_ctx;
+
+  if (type == XCODE_SEEK_SET)
+    {
+      evbuffer_add(enc_ctx->obuf, xcode_seek_marker, sizeof(xcode_seek_marker));
+      evbuffer_add(enc_ctx->obuf, &offset, sizeof(offset));
+      return offset;
+    }
+  else if (type == XCODE_SEEK_SIZE)
+    return enc_ctx->bytes_total;
+
+  return -1;
 }
 
 static off_t
@@ -475,6 +581,8 @@ size_estimate(enum transcode_profile profile, int bit_rate, int sample_rate, int
     bytes = (int64_t)len_ms * channels * bytes_per_sample * sample_rate / 1000 + WAV_HEADER_LEN;
   else if (profile == XCODE_MP3)
     bytes = (int64_t)len_ms * bit_rate / 8000;
+  else if (profile == XCODE_MP4_ALAC)
+    bytes = (int64_t)len_ms * channels * bytes_per_sample * sample_rate / 1000 / 2; // FIXME
   else
     bytes = -1;
 
@@ -841,6 +949,7 @@ static int
 read_decode_filter_encode_write(struct transcode_ctx *ctx)
 {
   struct decode_ctx *dec_ctx = ctx->decode_ctx;
+  struct encode_ctx *enc_ctx = ctx->encode_ctx;
   enum AVMediaType type;
   int ret;
 
@@ -855,12 +964,10 @@ read_decode_filter_encode_write(struct transcode_ctx *ctx)
       if (dec_ctx->video_stream.stream)
 	decode_filter_encode_write(ctx, &dec_ctx->video_stream, NULL, AVMEDIA_TYPE_VIDEO);
 
-      // Flush muxer
-      if (ctx->encode_ctx)
-	{
-	  av_interleaved_write_frame(ctx->encode_ctx->ofmt_ctx, NULL);
-	  av_write_trailer(ctx->encode_ctx->ofmt_ctx);
-	}
+      if (enc_ctx)
+	av_interleaved_write_frame(enc_ctx->ofmt_ctx, NULL); // Flush muxer
+      if (enc_ctx && !enc_ctx->settings.without_libav_trailer)
+	av_write_trailer(enc_ctx->ofmt_ctx);
 
       return ret;
     }
@@ -951,7 +1058,7 @@ avio_evbuffer_open(struct transcode_evbuf_io *evbuf_io, int is_output)
   ae->seekfn_arg = evbuf_io->seekfn_arg;
 
   if (is_output)
-    s = avio_alloc_context(ae->buffer, AVIO_BUFFER_SIZE, 1, ae, NULL, avio_evbuffer_write, NULL);
+    s = avio_alloc_context(ae->buffer, AVIO_BUFFER_SIZE, 1, ae, NULL, avio_evbuffer_write, (evbuf_io->seekfn ? avio_evbuffer_seek : NULL));
   else
     s = avio_alloc_context(ae->buffer, AVIO_BUFFER_SIZE, 0, ae, avio_evbuffer_read, NULL, (evbuf_io->seekfn ? avio_evbuffer_seek : NULL));
 
@@ -969,22 +1076,6 @@ avio_evbuffer_open(struct transcode_evbuf_io *evbuf_io, int is_output)
   return s;
 }
 
-static AVIOContext *
-avio_input_evbuffer_open(struct transcode_evbuf_io *evbuf_io)
-{
-  return avio_evbuffer_open(evbuf_io, 0);
-}
-
-static AVIOContext *
-avio_output_evbuffer_open(struct evbuffer *evbuf)
-{
-  struct transcode_evbuf_io evbuf_io = { 0 };
-
-  evbuf_io.evbuf = evbuf;
-
-  return avio_evbuffer_open(&evbuf_io, 1);
-}
-
 static void
 avio_evbuffer_close(AVIOContext *s)
 {
@@ -1004,6 +1095,220 @@ avio_evbuffer_close(AVIOContext *s)
 }
 
 
+/* ----------------------- CUSTOM HEADER GENERATION ------------------------ */
+
+static int
+make_wav_header(struct evbuffer **wav_header, int sample_rate, int bytes_per_sample, int channels, off_t bytes_total)
+{
+  uint8_t header[WAV_HEADER_LEN];
+
+  uint32_t wav_size = bytes_total - WAV_HEADER_LEN;
+
+  memcpy(header, "RIFF", 4);
+  add_le32(header + 4, 36 + wav_size);
+  memcpy(header + 8, "WAVEfmt ", 8);
+  add_le32(header + 16, 16);
+  add_le16(header + 20, 1);
+  add_le16(header + 22, channels);     /* channels */
+  add_le32(header + 24, sample_rate);  /* samplerate */
+  add_le32(header + 28, sample_rate * channels * bytes_per_sample); /* byte rate */
+  add_le16(header + 32, channels * bytes_per_sample);               /* block align */
+  add_le16(header + 34, 8 * bytes_per_sample);                      /* bits per sample */
+  memcpy(header + 36, "data", 4);
+  add_le32(header + 40, wav_size);
+
+  *wav_header = evbuffer_new();
+  evbuffer_add(*wav_header, header, sizeof(header));
+  return 0;
+}
+
+static int
+mp4_adjust_moov_stco_offset(uint8_t *moov, size_t moov_len)
+{
+  uint8_t stco_needle[8] = { 's', 't', 'c', 'o', 0, 0, 0, 0 };
+  uint32_t be32;
+  uint32_t n_entries;
+  uint32_t entry;
+  uint8_t *ptr;
+  uint8_t *end;
+
+  end = moov + moov_len;
+  ptr = memmem(moov, moov_len, stco_needle, sizeof(stco_needle));
+  if (!ptr || ptr + sizeof(stco_needle) + sizeof(be32) > end)
+    return -1;
+
+  ptr += sizeof(stco_needle);
+  memcpy(&be32, ptr, sizeof(be32));
+  for (n_entries = be32toh(be32); n_entries > 0; n_entries--)
+    {
+      ptr += sizeof(be32);
+      if (ptr + sizeof(be32) > end)
+	return -1;
+
+      memcpy(&be32, ptr, sizeof(be32));
+      entry = be32toh(be32);
+      be32 = htobe32(entry + moov_len);
+      memcpy(ptr, &be32, sizeof(be32));
+    }
+
+  return 0;
+}
+
+static int
+mp4_header_trailer_from_evbuf(uint8_t **header, size_t *header_len, uint8_t **trailer, size_t *trailer_len, struct evbuffer *evbuf, int64_t start_pos)
+{
+  uint8_t *buf = evbuffer_pullup(evbuf, -1);
+  size_t buf_len = evbuffer_get_length(evbuf);
+  int64_t pos = start_pos;
+  int bytes_added = 0;
+  uint8_t *marker;
+  size_t len;
+  int ret;
+
+  while (buf_len > 0)
+    {
+      marker = memmem(buf, buf_len, xcode_seek_marker, sizeof(xcode_seek_marker));
+      len = marker ? marker - buf : buf_len;
+
+      if (pos <= *header_len) // Either first write of header or seek to pos inside header
+	ret = copy_buffer_to_position(header, header_len, buf, len, pos);
+      else if (pos >= start_pos) // Either first write of trailer or seek to pos inside trailer
+	ret = copy_buffer_to_position(trailer, trailer_len, buf, len, pos - start_pos);
+      else // Unexpected seek to body (pos is before trailer but not in header)
+	ret = -1;
+
+      if (ret < 0)
+	return -1;
+
+      bytes_added += ret;
+      if (!marker)
+	break;
+
+      memcpy(&pos, marker + sizeof(xcode_seek_marker), sizeof(pos));
+      buf += len + sizeof(xcode_seek_marker) + sizeof(pos);
+      buf_len -= len + sizeof(xcode_seek_marker) + sizeof(pos);
+  }
+
+  evbuffer_drain(evbuf, -1);
+  return bytes_added;
+}
+
+// Transcodes the entire file so that we can grab the header, which will then
+// have a correct moov atom. The moov atom contains elements like stco and stsz
+// which can only be made when the encoding has been done, since they contain
+// information about where the frames are in the file. iTunes and Soundsbrdige
+// requires these to be correct, otherwise they won't play our transcoded files.
+// They also require that the atom is in the beginning of the file. ffmpeg's
+// "faststart" option does this, but is difficult to use with non-file output,
+// instead we move the atom ourselves.
+static int
+make_mp4_header(struct evbuffer **mp4_header, const char *url)
+{
+  struct transcode_ctx ctx = { 0 };
+  struct transcode_evbuf_io evbuf_io = { 0 };
+  uint8_t free_tag[4] = { 'f', 'r', 'e', 'e' };
+  uint8_t *header = NULL;
+  uint8_t *trailer = NULL;
+  size_t header_len = 0;
+  size_t trailer_len = 0;
+  uint8_t *ptr;
+  int ret;
+
+  if (!url || *url != '/')
+    return -1;
+
+  CHECK_NULL(L_XCODE, evbuf_io.evbuf = evbuffer_new());
+
+  evbuf_io.seekfn = dummy_seek;
+  evbuf_io.seekfn_arg = &ctx;
+
+  ctx.decode_ctx = transcode_decode_setup(XCODE_MP4_ALAC_HEADER, NULL, DATA_KIND_FILE, url, NULL, -1);
+  if (!ctx.decode_ctx)
+    goto error;
+
+  ctx.encode_ctx = transcode_encode_setup_with_io(XCODE_MP4_ALAC_HEADER, NULL, &evbuf_io, ctx.decode_ctx, 0, 0);
+  if (!ctx.encode_ctx)
+    goto error;
+
+  // Save the template header, which looks something like this (note that the
+  // mdate size is still unknown, so just zeroes, and there is no moov):
+  //
+  //  0000  00 00 00 1c 66 74 79 70 69 73 6f 6d 00 00 02 00  ....ftypisom....
+  //  0010  69 73 6f 6d 69 73 6f 32 6d 70 34 31 00 00 00 08  isomiso2mp41....
+  //  0020  66 72 65 65 00 00 00 00 6d 64 61 74              free....mdat
+  ret = avformat_write_header(ctx.encode_ctx->ofmt_ctx, NULL);
+  if (ret < 0)
+    goto error;
+
+  // Writes the obuf to the header buffer, bytes_processed is 0
+  ret = mp4_header_trailer_from_evbuf(&header, &header_len, &trailer, &trailer_len, ctx.encode_ctx->obuf, ctx.encode_ctx->bytes_processed);
+  if (ret < 0)
+    goto error;
+
+  ctx.encode_ctx->bytes_processed += ret;
+
+  // Encode but discard result, this is just so that ffmpeg can create the
+  // missing header data.
+  while (read_decode_filter_encode_write(&ctx) == 0)
+    {
+      ctx.encode_ctx->bytes_processed += evbuffer_get_length(ctx.encode_ctx->obuf);
+      evbuffer_drain(ctx.encode_ctx->obuf, -1);
+    }
+
+  // Here, ffmpeg will seek back and write the size to the mdat atom and then
+  // seek forward again to write the trailer. Since we can't actually seek, we
+  // instead look for the markers that dummy_seek() inserted.
+  av_write_trailer(ctx.encode_ctx->ofmt_ctx);
+  ret = mp4_header_trailer_from_evbuf(&header, &header_len, &trailer, &trailer_len, ctx.encode_ctx->obuf, ctx.encode_ctx->bytes_processed);
+  if (ret < 0 || !header || !trailer)
+    goto error;
+
+  // The trailer buffer should now contain the moov atom. We need to adjust the
+  // chunk offset (stco) in it because we will move it to the beginning of the
+  // file.
+  ret = mp4_adjust_moov_stco_offset(trailer, trailer_len);
+  if (ret < 0)
+    goto error;
+
+  // Now we want to move the trailer (which has the moov atom) into the header.
+  // We insert it before the free atom, because that's what ffmpeg does when
+  // the "faststart" option is set.
+  CHECK_NULL(L_XCODE, header = realloc(header, header_len + trailer_len));
+
+  ptr = memmem(header, header_len, free_tag, sizeof(free_tag));
+  if (!ptr || ptr - header < sizeof(uint32_t))
+    goto error;
+
+  ptr -= sizeof(uint32_t);
+  memmove(ptr + trailer_len, ptr, header + header_len - ptr);
+  memcpy(ptr, trailer, trailer_len);
+  header_len += trailer_len;
+
+  *mp4_header = evbuffer_new();
+  evbuffer_add(*mp4_header, header, header_len);
+
+  free(header);
+  free(trailer);
+  transcode_decode_cleanup(&ctx.decode_ctx);
+  transcode_encode_cleanup(&ctx.encode_ctx);
+  evbuffer_free(evbuf_io.evbuf);
+  return 0;
+
+ error:
+  if (header)
+    DHEXDUMP(E_DBG, L_XCODE, header, header_len, "MP4 header\n");
+  if (trailer)
+    DHEXDUMP(E_DBG, L_XCODE, trailer, trailer_len, "MP4 trailer\n");
+
+  free(header);
+  free(trailer);
+  transcode_decode_cleanup(&ctx.decode_ctx);
+  transcode_encode_cleanup(&ctx.encode_ctx);
+  evbuffer_free(evbuf_io.evbuf);
+  return -1;
+}
+
+
 /* --------------------------- INPUT/OUTPUT INIT --------------------------- */
 
 static int
@@ -1051,6 +1356,19 @@ open_decoder(AVCodecContext **dec_ctx, unsigned int *stream_index, struct decode
   return 0;
 }
 
+static void
+close_input(struct decode_ctx *ctx)
+{
+  if (!ctx->ifmt_ctx)
+    return;
+
+  avio_evbuffer_close(ctx->avio);
+  avcodec_free_context(&ctx->audio_stream.codec);
+  avcodec_free_context(&ctx->video_stream.codec);
+  avformat_close_input(&ctx->ifmt_ctx);
+  ctx->ifmt_ctx = NULL;
+}
+
 static int
 open_input(struct decode_ctx *ctx, const char *path, struct transcode_evbuf_io *evbuf_io, enum probe_type probe_type)
 {
@@ -1106,7 +1424,7 @@ open_input(struct decode_ctx *ctx, const char *path, struct transcode_evbuf_io *
 	  goto out_fail;
 	}
 
-      CHECK_NULL(L_XCODE, ctx->avio = avio_input_evbuffer_open(evbuf_io));
+      CHECK_NULL(L_XCODE, ctx->avio = avio_evbuffer_open(evbuf_io, 0));
 
       ctx->ifmt_ctx->pb = ctx->avio;
       ret = avformat_open_input(&ctx->ifmt_ctx, NULL, ifmt, &options);
@@ -1167,25 +1485,25 @@ open_input(struct decode_ctx *ctx, const char *path, struct transcode_evbuf_io *
   return 0;
 
  out_fail:
-  avio_evbuffer_close(ctx->avio);
-  avcodec_free_context(&ctx->audio_stream.codec);
-  avcodec_free_context(&ctx->video_stream.codec);
-  avformat_close_input(&ctx->ifmt_ctx);
-
+  close_input(ctx);
   return (ret < 0 ? ret : -1); // If we got an error code from ffmpeg then return that
 }
 
 static void
-close_input(struct decode_ctx *ctx)
+close_output(struct encode_ctx *ctx)
 {
-  avio_evbuffer_close(ctx->avio);
+  if (!ctx->ofmt_ctx)
+    return;
+
   avcodec_free_context(&ctx->audio_stream.codec);
   avcodec_free_context(&ctx->video_stream.codec);
-  avformat_close_input(&ctx->ifmt_ctx);
+  avio_evbuffer_close(ctx->ofmt_ctx->pb);
+  avformat_free_context(ctx->ofmt_ctx);
+  ctx->ofmt_ctx = NULL;
 }
 
 static int
-open_output(struct encode_ctx *ctx, struct decode_ctx *src_ctx)
+open_output(struct encode_ctx *ctx, struct transcode_evbuf_io *evbuf_io, struct decode_ctx *src_ctx)
 {
 #if USE_CONST_AVFORMAT
   const AVOutputFormat *oformat;
@@ -1193,6 +1511,8 @@ open_output(struct encode_ctx *ctx, struct decode_ctx *src_ctx)
   // Not const before ffmpeg 5.0
   AVOutputFormat *oformat;
 #endif
+  AVDictionary *options = NULL;
+  struct evbuffer *header = NULL;
   int ret;
 
   oformat = av_guess_format(ctx->settings.format, NULL, NULL);
@@ -1214,74 +1534,78 @@ open_output(struct encode_ctx *ctx, struct decode_ctx *src_ctx)
   ctx->ofmt_ctx->oformat = oformat;
 #endif
 
-  ctx->obuf = evbuffer_new();
-  if (!ctx->obuf)
-    {
-      DPRINTF(E_LOG, L_XCODE, "Could not create output evbuffer\n");
-      goto out_free_output;
-    }
-
-  ctx->ofmt_ctx->pb = avio_output_evbuffer_open(ctx->obuf);
-  if (!ctx->ofmt_ctx->pb)
-    {
-      DPRINTF(E_LOG, L_XCODE, "Could not create output avio pb\n");
-      goto out_free_evbuf;
-    }
+  CHECK_NULL(L_XCODE, ctx->ofmt_ctx->pb = avio_evbuffer_open(evbuf_io, 1));
+  ctx->obuf = evbuf_io->evbuf;
 
   if (ctx->settings.encode_audio)
     {
       ret = stream_add(ctx, &ctx->audio_stream, ctx->settings.audio_codec);
       if (ret < 0)
-	goto out_free_streams;
+	goto error;
     }
 
   if (ctx->settings.encode_video)
     {
       ret = stream_add(ctx, &ctx->video_stream, ctx->settings.video_codec);
       if (ret < 0)
-	goto out_free_streams;
+	goto error;
     }
 
-  // Notice, this will not write WAV header (so we do that manually)
-  ret = avformat_write_header(ctx->ofmt_ctx, NULL);
+  ret = avformat_init_output(ctx->ofmt_ctx, &options);
   if (ret < 0)
     {
-      DPRINTF(E_LOG, L_XCODE, "Error writing header to output buffer: %s\n", err2str(ret));
-      goto out_free_streams;
+      DPRINTF(E_LOG, L_XCODE, "Error initializing output: %s\n", err2str(ret));
+      goto error;
+    }
+  else if (options)
+    {
+      DPRINTF(E_WARN, L_XCODE, "Didn't recognize all options given to avformat_init_output\n");
+      av_dict_free(&options);
+      goto error;
     }
 
+  // For WAV output, both avformat_write_header() and manual wav header is required
+  if (!ctx->settings.without_libav_header)
+    {
+      ret = avformat_write_header(ctx->ofmt_ctx, NULL);
+      if (ret < 0)
+	{
+	  DPRINTF(E_LOG, L_XCODE, "Error writing header to output buffer: %s\n", err2str(ret));
+	  goto error;
+	}
+    }
   if (ctx->settings.with_wav_header)
     {
-      evbuffer_add(ctx->obuf, ctx->wav_header, sizeof(ctx->wav_header));
+      ret = make_wav_header(&header, ctx->settings.sample_rate, av_get_bytes_per_sample(ctx->settings.sample_format), ctx->settings.nb_channels, ctx->bytes_total);
+      if (ret < 0)
+	{
+	  DPRINTF(E_LOG, L_XCODE, "Error creating WAV header\n");
+	  goto error;
+	}
+
+      evbuffer_add_buffer(ctx->obuf, header);
+      evbuffer_free(header);
+    }
+  if (ctx->settings.with_mp4_header)
+    {
+      ret = make_mp4_header(&header, src_ctx->ifmt_ctx->url);
+      if (ret < 0)
+	{
+	  DPRINTF(E_LOG, L_XCODE, "Error creating MP4 header\n");
+	  goto error;
+	}
+
+      evbuffer_add_buffer(ctx->obuf, header);
+      evbuffer_free(header);
     }
 
   return 0;
 
- out_free_streams:
-  avcodec_free_context(&ctx->audio_stream.codec);
-  avcodec_free_context(&ctx->video_stream.codec);
-
-  avio_evbuffer_close(ctx->ofmt_ctx->pb);
- out_free_evbuf:
-  evbuffer_free(ctx->obuf);
- out_free_output:
-  avformat_free_context(ctx->ofmt_ctx);
-
+ error:
+  close_output(ctx);
   return -1;
 }
 
-static void
-close_output(struct encode_ctx *ctx)
-{
-  avcodec_free_context(&ctx->audio_stream.codec);
-  avcodec_free_context(&ctx->video_stream.codec);
-
-  avio_evbuffer_close(ctx->ofmt_ctx->pb);
-  evbuffer_free(ctx->obuf);
-
-  avformat_free_context(ctx->ofmt_ctx);
-}
-
 static int
 filter_def_abuffer(struct filter_def *def, struct stream_ctx *out_stream, struct stream_ctx *in_stream, const char *deffn_arg)
 {
@@ -1540,6 +1864,13 @@ create_filtergraph(struct stream_ctx *out_stream, struct filters *filters, size_
   return -1;
 }
 
+static void
+close_filters(struct encode_ctx *ctx)
+{
+  avfilter_graph_free(&ctx->audio_stream.filter_graph);
+  avfilter_graph_free(&ctx->video_stream.filter_graph);
+}
+
 static int
 open_filters(struct encode_ctx *ctx, struct decode_ctx *src_ctx)
 {
@@ -1576,18 +1907,10 @@ open_filters(struct encode_ctx *ctx, struct decode_ctx *src_ctx)
   return 0;
 
  out_fail:
-  avfilter_graph_free(&ctx->audio_stream.filter_graph);
-  avfilter_graph_free(&ctx->video_stream.filter_graph);
+  close_filters(ctx);
   return -1;
 }
 
-static void
-close_filters(struct encode_ctx *ctx)
-{
-  avfilter_graph_free(&ctx->audio_stream.filter_graph);
-  avfilter_graph_free(&ctx->video_stream.filter_graph);
-}
-
 
 /* ----------------------------- TRANSCODE API ----------------------------- */
 
@@ -1634,92 +1957,55 @@ transcode_decode_setup(enum transcode_profile profile, struct media_quality *qua
 }
 
 struct encode_ctx *
-transcode_encode_setup(enum transcode_profile profile, struct media_quality *quality, struct decode_ctx *src_ctx, int width, int height)
+transcode_encode_setup_with_io(enum transcode_profile profile, struct media_quality *quality, struct transcode_evbuf_io *evbuf_io, struct decode_ctx *src_ctx, int width, int height)
 {
   struct encode_ctx *ctx;
-  int src_bytes_per_sample;
   int dst_bytes_per_sample;
-  int channels;
 
   CHECK_NULL(L_XCODE, ctx = calloc(1, sizeof(struct encode_ctx)));
   CHECK_NULL(L_XCODE, ctx->filt_frame = av_frame_alloc());
   CHECK_NULL(L_XCODE, ctx->encoded_pkt = av_packet_alloc());
+  CHECK_NULL(L_XCODE, ctx->evbuf_io.evbuf = evbuffer_new());
 
+  // Caller didn't specify one, so use our own
+  if (!evbuf_io)
+    evbuf_io = &ctx->evbuf_io;
+
+  // Initialize general settings
   if (init_settings(&ctx->settings, profile, quality) < 0)
-    goto fail_free;
+    goto error;
 
-  ctx->settings.width = width;
-  ctx->settings.height = height;
+  if (ctx->settings.encode_audio && init_settings_from_audio(&ctx->settings, profile, src_ctx, quality) < 0)
+    goto error;
 
-  // Caller did not specify a sample rate -> use same as source
-  if (!ctx->settings.sample_rate && ctx->settings.encode_audio)
-    {
-      ctx->settings.sample_rate = src_ctx->audio_stream.codec->sample_rate;
-    }
-
-  // Caller did not specify a sample format -> determine from source
-  if (!ctx->settings.sample_format && ctx->settings.encode_audio)
-    {
-      src_bytes_per_sample = av_get_bytes_per_sample(src_ctx->audio_stream.codec->sample_fmt);
-      if (src_bytes_per_sample == 4)
-	{
-	  ctx->settings.sample_format = AV_SAMPLE_FMT_S32;
-	  ctx->settings.audio_codec = AV_CODEC_ID_PCM_S32LE;
-	  ctx->settings.format = "s32le";
-	}
-      else
-	{
-	  ctx->settings.sample_format = AV_SAMPLE_FMT_S16;
-	  ctx->settings.audio_codec = AV_CODEC_ID_PCM_S16LE;
-	  ctx->settings.format = "s16le";
-	}
-    }
-
-#if USE_CH_LAYOUT
-  // Caller did not specify channels -> use same as source
-  if (!av_channel_layout_check(&ctx->settings.channel_layout) && ctx->settings.encode_audio)
-    {
-      av_channel_layout_copy(&ctx->settings.channel_layout, &src_ctx->audio_stream.codec->ch_layout);
-    }
-
-  channels = ctx->settings.channel_layout.nb_channels;
-#else
-  // Caller did not specify channels -> use same as source
-  if (ctx->settings.channels == 0 && ctx->settings.encode_audio)
-    {
-      ctx->settings.channels = src_ctx->audio_stream.codec->channels;
-      ctx->settings.channel_layout = src_ctx->audio_stream.codec->channel_layout;
-    }
-
-  channels = ctx->settings.channels;
-#endif
+  if (ctx->settings.encode_video && init_settings_from_video(&ctx->settings, profile, src_ctx, width, height) < 0)
+    goto error;
 
   dst_bytes_per_sample = av_get_bytes_per_sample(ctx->settings.sample_format);
+  ctx->bytes_total = size_estimate(profile, ctx->settings.bit_rate, ctx->settings.sample_rate, dst_bytes_per_sample, ctx->settings.nb_channels, src_ctx->len_ms);
 
-  ctx->bytes_total = size_estimate(profile, ctx->settings.bit_rate, ctx->settings.sample_rate, dst_bytes_per_sample, channels, src_ctx->len_ms);
-
-  if (ctx->settings.with_wav_header)
-    make_wav_header(ctx->wav_header, ctx->settings.sample_rate, dst_bytes_per_sample, channels, ctx->bytes_total);
   if (ctx->settings.with_icy && src_ctx->data_kind == DATA_KIND_HTTP)
-    ctx->icy_interval = METADATA_ICY_INTERVAL * channels * dst_bytes_per_sample * ctx->settings.sample_rate;
+    ctx->icy_interval = METADATA_ICY_INTERVAL * ctx->settings.nb_channels * dst_bytes_per_sample * ctx->settings.sample_rate;
 
-  if (open_output(ctx, src_ctx) < 0)
-    goto fail_free;
+  if (open_output(ctx, evbuf_io, src_ctx) < 0)
+    goto error;
 
   if (open_filters(ctx, src_ctx) < 0)
-    goto fail_close;
+    goto error;
 
   return ctx;
 
- fail_close:
-  close_output(ctx);
- fail_free:
-  av_packet_free(&ctx->encoded_pkt);
-  av_frame_free(&ctx->filt_frame);
-  free(ctx);
+ error:
+  transcode_encode_cleanup(&ctx);
   return NULL;
 }
 
+struct encode_ctx *
+transcode_encode_setup(enum transcode_profile profile, struct media_quality *quality, struct decode_ctx *src_ctx, int width, int height)
+{
+  return transcode_encode_setup_with_io(profile, quality, NULL, src_ctx, width, height);
+}
+
 struct transcode_ctx *
 transcode_setup(enum transcode_profile profile, struct media_quality *quality, enum data_kind data_kind, const char *path, uint32_t len_ms)
 {
@@ -1814,6 +2100,7 @@ transcode_needed(const char *user_agent, const char *client_codecs, char *file_c
   const char *prefer_format;
   cfg_t *lib;
   bool force_xcode;
+  bool supports_alac;
   bool supports_mpeg;
   bool supports_wav;
   int count;
@@ -1872,6 +2159,7 @@ transcode_needed(const char *user_agent, const char *client_codecs, char *file_c
   if (!force_xcode && strstr(client_codecs, file_codectype))
     return XCODE_NONE;
 
+  supports_alac = strstr(client_codecs, "alac") || strstr(client_codecs, "mp4a");
   supports_mpeg = strstr(client_codecs, "mpeg") && avcodec_find_encoder(AV_CODEC_ID_MP3);
   supports_wav = strstr(client_codecs, "wav");
 
@@ -1880,18 +2168,22 @@ transcode_needed(const char *user_agent, const char *client_codecs, char *file_c
     {
       if (strcmp(prefer_format, "wav") == 0 && supports_wav)
 	return XCODE_WAV;
-      else if (strcmp(prefer_format, "mpeg") == 0 && supports_mpeg)
+      if (strcmp(prefer_format, "mpeg") == 0 && supports_mpeg)
 	return XCODE_MP3;
+      if (strcmp(prefer_format, "alac") == 0 && supports_alac)
+	return XCODE_MP4_ALAC;
     }
 
   // This order determines the default if user didn't configure a preference.
   // The lossless formats are given highest preference.
   if (supports_wav)
     return XCODE_WAV;
-  else if (supports_mpeg)
+  if (supports_mpeg)
     return XCODE_MP3;
-  else
-    return XCODE_UNKNOWN;
+  if (supports_alac)
+    return XCODE_MP4_ALAC;
+
+  return XCODE_UNKNOWN;
 }
 
 
@@ -1920,6 +2212,7 @@ transcode_encode_cleanup(struct encode_ctx **ctx)
   close_filters(*ctx);
   close_output(*ctx);
 
+  evbuffer_free((*ctx)->evbuf_io.evbuf);
   av_packet_free(&(*ctx)->encoded_pkt);
   av_frame_free(&(*ctx)->filt_frame);
   free(*ctx);
@@ -2312,6 +2605,17 @@ transcode_metadata_strings_set(struct transcode_metadata_string *s, enum transco
 	snprintf(s->file_size, sizeof(s->file_size), "%d", (int)bytes);
 	break;
 
+      case XCODE_MP4_ALAC:
+	s->type = "m4a";
+	s->codectype = "alac";
+	s->description = "Apple Lossless audio file";
+
+	snprintf(s->bitrate, sizeof(s->bitrate), "%d", 8 * STOB(q->sample_rate, q->bits_per_sample, q->channels) / 1000); // 44100/16/2 -> 1411
+
+	bytes = size_estimate(profile, q->bit_rate, q->sample_rate, q->bits_per_sample / 8, q->channels, len_ms);
+	snprintf(s->file_size, sizeof(s->file_size), "%d", (int)bytes);
+	break;
+
       default:
 	DPRINTF(E_WARN, L_XCODE, "transcode_metadata_strings_set() called with unknown profile %d\n", profile);
     }
diff --git a/src/transcode.h b/src/transcode.h
index d0550dc1..f72b7a4e 100644
--- a/src/transcode.h
+++ b/src/transcode.h
@@ -23,10 +23,14 @@ enum transcode_profile
   XCODE_PCM32,
   // Transcodes the best audio stream to MP3
   XCODE_MP3,
-  // Transcodes the best audio stream to OPUS
+  // Transcodes the best audio stream to raw OPUS (no container)
   XCODE_OPUS,
-  // Transcodes the best audio stream to ALAC
+  // Transcodes the best audio stream to raw ALAC (no container)
   XCODE_ALAC,
+  // Transcodes the best audio stream to ALAC in a MP4 container
+  XCODE_MP4_ALAC,
+  // Produces just the header for a MP4 container with ALAC
+  XCODE_MP4_ALAC_HEADER,
   // Transcodes the best audio stream from OGG
   XCODE_OGG,
   // Transcodes the best video stream to JPEG/PNG/VP8
@@ -79,6 +83,9 @@ transcode_decode_setup(enum transcode_profile profile, struct media_quality *qua
 struct encode_ctx *
 transcode_encode_setup(enum transcode_profile profile, struct media_quality *quality, struct decode_ctx *src_ctx, int width, int height);
 
+struct encode_ctx *
+transcode_encode_setup_with_io(enum transcode_profile profile, struct media_quality *quality, struct transcode_evbuf_io *evbuf_io, struct decode_ctx *src_ctx, int width, int height);
+
 struct transcode_ctx *
 transcode_setup(enum transcode_profile profile, struct media_quality *quality, enum data_kind data_kind, const char *path, uint32_t len_ms);
 
@@ -182,9 +189,15 @@ transcode_encode_query(struct encode_ctx *ctx, const char *query);
 struct http_icy_metadata *
 transcode_metadata(struct transcode_ctx *ctx, int *changed);
 
-// When transcoding, we are in essence serving a different source file than the
-// original to the client. So we can't serve some of the file metadata from the
-// filescanner. This function creates strings to be used for override.
+/* When transcoding, we are in essence serving a different source file than the
+ * original to the client. So we can't serve some of the file metadata from the
+ * filescanner. This function creates strings to be used for override.
+ *
+ * @out s          Structure with (non-allocated) strings
+ * @in  profile    Transcoding profile
+ * @in  q          Transcoding quality
+ * @in  len_ms     Length of source track
+ */
 void
 transcode_metadata_strings_set(struct transcode_metadata_string *s, enum transcode_profile profile, struct media_quality *q, uint32_t len_ms);