[input/xcode] Write to input buffer with the sources native sample rate/format

Still WIP at this point since the player and output can't use the use improved quality yet, and because rtptimes etc. are likely incorrect
2025-11-25 20:16:14 -05:00 · 2019-01-11 19:34:36 +01:00
parent 84e813038b
commit 9182597605
8 changed files with 146 additions and 61 deletions
--- a/src/transcode.c
+++ b/src/transcode.c
@@ -76,12 +76,10 @@ struct settings_ctx

  // Audio settings
  enum AVCodecID audio_codec;
-  const char *audio_codec_name;
  int sample_rate;
  uint64_t channel_layout;
  int channels;
  enum AVSampleFormat sample_format;
-  int byte_depth;
  bool wavheader;
  bool icy;

@@ -179,20 +177,12 @@ struct encode_ctx
  uint8_t header[44];
 };

-struct transcode_ctx
-{
-  struct decode_ctx *decode_ctx;
-  struct encode_ctx *encode_ctx;
-};
-

 /* -------------------------- PROFILE CONFIGURATION ------------------------ */

 static int
 init_settings(struct settings_ctx *settings, enum transcode_profile profile)
 {
-  const AVCodecDescriptor *codec_desc;
-
  memset(settings, 0, sizeof(struct settings_ctx));

  switch (profile)
@@ -207,7 +197,13 @@ init_settings(struct settings_ctx *settings, enum transcode_profile profile)
 	settings->channel_layout = AV_CH_LAYOUT_STEREO;
 	settings->channels = 2;
 	settings->sample_format = AV_SAMPLE_FMT_S16;
-	settings->byte_depth = 2; // Bytes per sample = 16/8
+	settings->icy = 1;
+	break;
+
+      case XCODE_PCM_NATIVE: // Sample rate and bit depth determined by source
+	settings->encode_audio = 1;
+	settings->channel_layout = AV_CH_LAYOUT_STEREO;
+	settings->channels = 2;
 	settings->icy = 1;
 	break;

@@ -219,7 +215,6 @@ init_settings(struct settings_ctx *settings, enum transcode_profile profile)
 	settings->channel_layout = AV_CH_LAYOUT_STEREO;
 	settings->channels = 2;
 	settings->sample_format = AV_SAMPLE_FMT_S16P;
-	settings->byte_depth = 2; // Bytes per sample = 16/8
 	break;

      case XCODE_OPUS:
@@ -230,7 +225,6 @@ init_settings(struct settings_ctx *settings, enum transcode_profile profile)
 	settings->channel_layout = AV_CH_LAYOUT_STEREO;
 	settings->channels = 2;
 	settings->sample_format = AV_SAMPLE_FMT_S16; // Only libopus support
-	settings->byte_depth = 2; // Bytes per sample = 16/8
 	break;

      case XCODE_JPEG:
@@ -253,18 +247,6 @@ init_settings(struct settings_ctx *settings, enum transcode_profile profile)
 	return -1;
    }

-  if (settings->audio_codec)
-    {
-      codec_desc = avcodec_descriptor_get(settings->audio_codec);
-      settings->audio_codec_name = codec_desc->name;
-    }
-
-  if (settings->video_codec)
-    {
-      codec_desc = avcodec_descriptor_get(settings->video_codec);
-      settings->video_codec_name = codec_desc->name;
-    }
-
  return 0;
 }

@@ -319,13 +301,15 @@ make_wav_header(struct encode_ctx *ctx, struct decode_ctx *src_ctx, off_t *est_s
 {
  uint32_t wav_len;
  int duration;
+  int bps;

  if (src_ctx->duration)
    duration = src_ctx->duration;
  else
    duration = 3 * 60 * 1000; /* 3 minutes, in ms */

-  wav_len = ctx->settings.channels * ctx->settings.byte_depth * ctx->settings.sample_rate * (duration / 1000);
+  bps = av_get_bits_per_sample(ctx->settings.audio_codec);
+  wav_len = ctx->settings.channels * (bps / 8) * ctx->settings.sample_rate * (duration / 1000);

  *est_size = wav_len + sizeof(ctx->header);

@@ -336,9 +320,9 @@ make_wav_header(struct encode_ctx *ctx, struct decode_ctx *src_ctx, off_t *est_s
  add_le16(ctx->header + 20, 1);
  add_le16(ctx->header + 22, ctx->settings.channels);     /* channels */
  add_le32(ctx->header + 24, ctx->settings.sample_rate);  /* samplerate */
-  add_le32(ctx->header + 28, ctx->settings.sample_rate * ctx->settings.channels * ctx->settings.byte_depth); /* byte rate */
-  add_le16(ctx->header + 32, ctx->settings.channels * ctx->settings.byte_depth);                             /* block align */
-  add_le16(ctx->header + 34, ctx->settings.byte_depth * 8);                                                  /* bits per sample */
+  add_le32(ctx->header + 28, ctx->settings.sample_rate * ctx->settings.channels * (bps / 8)); /* byte rate */
+  add_le16(ctx->header + 32, ctx->settings.channels * (bps / 8));                             /* block align */
+  add_le16(ctx->header + 34, bps);                                                            /* bits per sample */
  memcpy(ctx->header + 36, "data", 4);
  add_le32(ctx->header + 40, wav_len);
 }
@@ -368,20 +352,27 @@ stream_find(struct decode_ctx *ctx, unsigned int stream_index)
 * @out ctx       A pre-allocated stream ctx where we save stream and codec info
 * @in output     Output to add the stream to
 * @in codec_id   What kind of codec should we use
- * @in codec_name Name of codec (only used for logging)
 * @return        Negative on failure, otherwise zero
 */
 static int
-stream_add(struct encode_ctx *ctx, struct stream_ctx *s, enum AVCodecID codec_id, const char *codec_name)
+stream_add(struct encode_ctx *ctx, struct stream_ctx *s, enum AVCodecID codec_id)
 {
+  const AVCodecDescriptor *codec_desc;
  AVCodec *encoder;
  AVDictionary *options = NULL;
  int ret;

+  codec_desc = avcodec_descriptor_get(codec_id);
+  if (!codec_desc)
+    {
+      DPRINTF(E_LOG, L_XCODE, "Invalid codec ID (%d)\n", codec_id);
+      return -1;
+    }
+
  encoder = avcodec_find_encoder(codec_id);
  if (!encoder)
    {
-      DPRINTF(E_LOG, L_XCODE, "Necessary encoder (%s) not found\n", codec_name);
+      DPRINTF(E_LOG, L_XCODE, "Necessary encoder (%s) not found\n", codec_desc->name);
      return -1;
    }

@@ -393,7 +384,7 @@ stream_add(struct encode_ctx *ctx, struct stream_ctx *s, enum AVCodecID codec_id
  if (!s->codec->pix_fmt)
    {
      s->codec->pix_fmt = avcodec_default_get_format(s->codec, encoder->pix_fmts);
-      DPRINTF(E_DBG, L_XCODE, "Pixel format set to %s (encoder is %s)\n", av_get_pix_fmt_name(s->codec->pix_fmt), codec_name);
+      DPRINTF(E_DBG, L_XCODE, "Pixel format set to %s (encoder is %s)\n", av_get_pix_fmt_name(s->codec->pix_fmt), codec_desc->name);
    }

  if (ctx->ofmt_ctx->oformat->flags & AVFMT_GLOBALHEADER)
@@ -406,7 +397,7 @@ stream_add(struct encode_ctx *ctx, struct stream_ctx *s, enum AVCodecID codec_id
  ret = avcodec_open2(s->codec, NULL, &options);
  if (ret < 0)
    {
-      DPRINTF(E_LOG, L_XCODE, "Cannot open encoder (%s): %s\n", codec_name, err2str(ret));
+      DPRINTF(E_LOG, L_XCODE, "Cannot open encoder (%s): %s\n", codec_desc->name, err2str(ret));
      avcodec_free_context(&s->codec);
      return -1;
    }
@@ -415,7 +406,7 @@ stream_add(struct encode_ctx *ctx, struct stream_ctx *s, enum AVCodecID codec_id
  ret = avcodec_parameters_from_context(s->stream->codecpar, s->codec);
  if (ret < 0)
    {
-      DPRINTF(E_LOG, L_XCODE, "Cannot copy stream parameters (%s): %s\n", codec_name, err2str(ret));
+      DPRINTF(E_LOG, L_XCODE, "Cannot copy stream parameters (%s): %s\n", codec_desc->name, err2str(ret));
      avcodec_free_context(&s->codec);
      return -1;
    }
@@ -888,14 +879,14 @@ open_output(struct encode_ctx *ctx, struct decode_ctx *src_ctx)

  if (ctx->settings.encode_audio)
    {
-      ret = stream_add(ctx, &ctx->audio_stream, ctx->settings.audio_codec, ctx->settings.audio_codec_name);
+      ret = stream_add(ctx, &ctx->audio_stream, ctx->settings.audio_codec);
      if (ret < 0)
 	goto out_free_streams;
    }

  if (ctx->settings.encode_video)
    {
-      ret = stream_add(ctx, &ctx->video_stream, ctx->settings.video_codec, ctx->settings.video_codec_name);
+      ret = stream_add(ctx, &ctx->video_stream, ctx->settings.video_codec);
      if (ret < 0)
 	goto out_free_streams;
    }
@@ -1161,6 +1152,7 @@ struct encode_ctx *
 transcode_encode_setup(enum transcode_profile profile, struct decode_ctx *src_ctx, off_t *est_size, int width, int height)
 {
  struct encode_ctx *ctx;
+  int bps;

  CHECK_NULL(L_XCODE, ctx = calloc(1, sizeof(struct encode_ctx)));
  CHECK_NULL(L_XCODE, ctx->filt_frame = av_frame_alloc());
@@ -1172,6 +1164,26 @@ transcode_encode_setup(enum transcode_profile profile, struct decode_ctx *src_ct
  ctx->settings.width = width;
  ctx->settings.height = height;

+  if (!ctx->settings.sample_rate && ctx->settings.encode_audio)
+    ctx->settings.sample_rate = src_ctx->audio_stream.codec->sample_rate;
+
+  if (!ctx->settings.sample_format && ctx->settings.encode_audio)
+    {
+      bps = av_get_bits_per_sample(src_ctx->audio_stream.codec->codec_id);
+      if (bps >= 24)
+	{
+	  ctx->settings.sample_format = AV_SAMPLE_FMT_S32;
+	  ctx->settings.audio_codec = AV_CODEC_ID_PCM_S24LE;
+	  ctx->settings.format = "s24le";
+	}
+      else
+	{
+	  ctx->settings.sample_format = AV_SAMPLE_FMT_S16;
+	  ctx->settings.audio_codec = AV_CODEC_ID_PCM_S16LE;
+	  ctx->settings.format = "s16le";
+	}
+    }
+
  if (ctx->settings.wavheader)
    make_wav_header(ctx, src_ctx, est_size);

@@ -1182,7 +1194,10 @@ transcode_encode_setup(enum transcode_profile profile, struct decode_ctx *src_ct
    goto fail_close;

  if (ctx->settings.icy && src_ctx->data_kind == DATA_KIND_HTTP)
-    ctx->icy_interval = METADATA_ICY_INTERVAL * ctx->settings.channels * ctx->settings.byte_depth * ctx->settings.sample_rate;
+    {
+      bps = av_get_bits_per_sample(ctx->settings.audio_codec);
+      ctx->icy_interval = METADATA_ICY_INTERVAL * ctx->settings.channels * (bps / 8) * ctx->settings.sample_rate;
+    }

  return ctx;

@@ -1223,6 +1238,7 @@ transcode_setup(enum transcode_profile profile, enum data_kind data_kind, const
 struct decode_ctx *
 transcode_decode_setup_raw(void)
 {
+  const AVCodecDescriptor *codec_desc;
  struct decode_ctx *ctx;
  AVCodec *decoder;
  int ret;
@@ -1234,13 +1250,20 @@ transcode_decode_setup_raw(void)
      goto out_free_ctx;
    }

+  codec_desc = avcodec_descriptor_get(ctx->settings.audio_codec);
+  if (!codec_desc)
+    {
+      DPRINTF(E_LOG, L_XCODE, "Invalid codec ID (%d)\n", ctx->settings.audio_codec);
+      goto out_free_ctx;
+    }
+
  // In raw mode we won't actually need to read or decode, but we still setup
  // the decode_ctx because transcode_encode_setup() gets info about the input
  // through this structure (TODO dont' do that)
  decoder = avcodec_find_decoder(ctx->settings.audio_codec);
  if (!decoder)
    {
-      DPRINTF(E_LOG, L_XCODE, "Could not find decoder for: %s\n", ctx->settings.audio_codec_name);
+      DPRINTF(E_LOG, L_XCODE, "Could not find decoder for: %s\n", codec_desc->name);
      goto out_free_ctx;
    }

@@ -1255,7 +1278,7 @@ transcode_decode_setup_raw(void)
  ret = avcodec_parameters_from_context(ctx->audio_stream.stream->codecpar, ctx->audio_stream.codec);
  if (ret < 0)
    {
-      DPRINTF(E_LOG, L_XCODE, "Cannot copy stream parameters (%s): %s\n", ctx->settings.audio_codec_name, err2str(ret));
+      DPRINTF(E_LOG, L_XCODE, "Cannot copy stream parameters (%s): %s\n", codec_desc->name, err2str(ret));
      goto out_free_codec;
    }

@@ -1659,6 +1682,24 @@ transcode_decode_query(struct decode_ctx *ctx, const char *query)
  return -1;
 }

+int
+transcode_encode_query(struct encode_ctx *ctx, const char *query)
+{
+  if (strcmp(query, "sample_rate") == 0)
+    {
+      if (ctx->audio_stream.stream)
+	return ctx->audio_stream.stream->codecpar->sample_rate;
+    }
+  else if (strcmp(query, "bits_per_sample") == 0)
+    {
+      if (ctx->audio_stream.stream)
+	return av_get_bits_per_sample(ctx->audio_stream.stream->codecpar->codec_id);
+    }
+
+  return -1;
+}
+
+
 /*                                  Metadata                                 */

 struct http_icy_metadata *