[input/xcode] Write to input buffer with the sources native sample rate/format

Still WIP at this point since the player and output can't use the use improved quality yet, and because rtptimes etc. are likely incorrect
2025-07-28 01:40:12 -04:00 · 2019-01-11 19:34:36 +01:00 · 2019-01-11 19:34:36 +01:00 · 9182597605
commit 9182597605
parent 84e813038b
8 changed files with 146 additions and 61 deletions
--- a/src/conffile.c
+++ b/src/conffile.c
@ -100,6 +100,8 @@ static cfg_opt_t sec_library[] =
    CFG_STR_LIST("no_decode", NULL, CFGF_NONE),
    CFG_STR_LIST("force_decode", NULL, CFGF_NONE),
    CFG_BOOL("pipe_autostart", cfg_true, CFGF_NONE),
+    CFG_INT("pipe_sample_rate", 44100, CFGF_NONE),
+    CFG_INT("pipe_bits_per_sample", 16, CFGF_NONE),
    CFG_BOOL("rating_updates", cfg_false, CFGF_NONE),
    CFG_END()
  };
--- a/src/input.c
+++ b/src/input.c
@ -215,7 +215,7 @@ playback(void *arg)
  // Loops until input_loop_break is set or no more input, e.g. EOF
  ret = inputs[type]->start(ps);
  if (ret < 0)
-    input_write(NULL, INPUT_FLAG_ERROR);
+    input_write(NULL, 0, 0, INPUT_FLAG_ERROR);

 #ifdef DEBUG
  DPRINTF(E_DBG, L_PLAYER, "Playback loop stopped (break is %d, ret %d)\n", input_loop_break, ret);
@ -240,7 +240,7 @@ input_wait(void)

 // Called by input modules from within the playback loop
 int
-input_write(struct evbuffer *evbuf, short flags)
+input_write(struct evbuffer *evbuf, int sample_rate, int bits_per_sample, short flags)
 {
  struct timespec ts;
  int ret;
--- a/src/input.h
+++ b/src/input.h
@ -140,18 +140,20 @@ struct input_definition
 int input_loop_break;

 /*
- * Transfer stream data to the player's input buffer. The input evbuf will be
- * drained on succesful write. This is to avoid copying memory. If the player's
- * input buffer is full the function will block until the write can be made
- * (unless INPUT_FILE_NONBLOCK is set).
+ * Transfer stream data to the player's input buffer. Data must be PCM-LE
+ * samples. The input evbuf will be drained on succesful write. This is to avoid
+ * copying memory. If the player's input buffer is full the function will block
+ * until the write can be made (unless INPUT_FILE_NONBLOCK is set).
 *
- * @in  evbuf    Raw audio data to write
+ * @in  evbuf    Raw PCM_LE audio data to write
+ * @in  evbuf    Sample rate of the data
+ * @in  evbuf    Bits per sample (typically 16 or 24)
 * @in  flags    One or more INPUT_FLAG_*
 * @return       0 on success, EAGAIN if buffer was full (and _NONBLOCK is set),
 *               -1 on error
 */
 int
-input_write(struct evbuffer *evbuf, short flags);
+input_write(struct evbuffer *evbuf, int sample_rate, int bits_per_sample, short flags);

 /*
 * Input modules can use this to wait in the playback loop (like input_write()
--- a/src/inputs/file_http.c
+++ b/src/inputs/file_http.c
@ -26,12 +26,13 @@
 #include "transcode.h"
 #include "http.h"
 #include "misc.h"
+#include "logger.h"
 #include "input.h"

 static int
 setup(struct player_source *ps)
 {
-  ps->input_ctx = transcode_setup(XCODE_PCM16_NOHEADER, ps->data_kind, ps->path, ps->len_ms, NULL);
+  ps->input_ctx = transcode_setup(XCODE_PCM_NATIVE, ps->data_kind, ps->path, ps->len_ms, NULL);
  if (!ps->input_ctx)
    return -1;

@ -57,27 +58,33 @@ setup_http(struct player_source *ps)
 static int
 start(struct player_source *ps)
 {
+  struct transcode_ctx *ctx = ps->input_ctx;
  struct evbuffer *evbuf;
  short flags;
+  int sample_rate;
+  int bps;
  int ret;
  int icy_timer;

  evbuf = evbuffer_new();

+  sample_rate = transcode_encode_query(ctx->encode_ctx, "sample_rate");
+  bps = transcode_encode_query(ctx->encode_ctx, "bits_per_sample");
+
  ret = -1;
  flags = 0;
  while (!input_loop_break && !(flags & INPUT_FLAG_EOF))
    {
      // We set "wanted" to 1 because the read size doesn't matter to us
      // TODO optimize?
-      ret = transcode(evbuf, &icy_timer, ps->input_ctx, 1);
+      ret = transcode(evbuf, &icy_timer, ctx, 1);
      if (ret < 0)
 	break;

      flags = ((ret == 0) ? INPUT_FLAG_EOF : 0) |
               (icy_timer ? INPUT_FLAG_METADATA : 0);

-      ret = input_write(evbuf, flags);
+      ret = input_write(evbuf, sample_rate, bps, flags);
      if (ret < 0)
 	break;
    }
--- a/src/inputs/pipe.c
+++ b/src/inputs/pipe.c
@ -103,6 +103,9 @@ static pthread_t tid_pipe;
 static struct event_base *evbase_pipe;
 static struct commands_base *cmdbase;

+// From config - the sample rate and bps of the pipe input
+static int pipe_sample_rate;
+static int pipe_bits_per_sample;
 // From config - should we watch library pipes for data or only start on request
 static int pipe_autostart;
 // The mfi id of the pipe autostarted by the pipe thread
@ -307,7 +310,7 @@ parse_progress(struct input_metadata *m, char *progress)

  m->rtptime = start; // Not actually used - we have our own rtptime
  m->offset = (pos > start) ? (pos - start) : 0;
-  m->song_length = (end - start) * 10 / 441; // Convert to ms based on 44100
+  m->song_length = (end - start) * 1000 / pipe_sample_rate;
 }

 static void
@ -845,7 +848,7 @@ start(struct player_source *ps)
      ret = evbuffer_read(evbuf, pipe->fd, PIPE_READ_MAX);
      if ((ret == 0) && (pipe->is_autostarted))
 	{
-	  input_write(evbuf, INPUT_FLAG_EOF); // Autostop
+	  input_write(evbuf, pipe_sample_rate, pipe_bits_per_sample, INPUT_FLAG_EOF); // Autostop
 	  break;
 	}
      else if ((ret == 0) || ((ret < 0) && (errno == EAGAIN)))
@ -862,7 +865,7 @@ start(struct player_source *ps)
      flags = (pipe_metadata_is_new ? INPUT_FLAG_METADATA : 0);
      pipe_metadata_is_new = 0;

-      ret = input_write(evbuf, flags);
+      ret = input_write(evbuf, pipe_sample_rate, pipe_bits_per_sample, flags);
      if (ret < 0)
 	break;
    }
@ -945,6 +948,20 @@ init(void)
      CHECK_ERR(L_PLAYER, listener_add(pipe_listener_cb, LISTENER_DATABASE));
    }

+  pipe_sample_rate = cfg_getint(cfg_getsec(cfg, "library"), "pipe_sample_rate");
+  if (pipe_sample_rate != 44100 || pipe_sample_rate != 48000 || pipe_sample_rate != 96000)
+    {
+      DPRINTF(E_FATAL, L_PLAYER, "The configuration of pipe_sample_rate is invalid: %d\n", pipe_sample_rate);
+      return -1;
+    }
+
+  pipe_bits_per_sample = cfg_getint(cfg_getsec(cfg, "library"), "pipe_bits_per_sample");
+  if (pipe_bits_per_sample != 16 || pipe_bits_per_sample != 24)
+    {
+      DPRINTF(E_FATAL, L_PLAYER, "The configuration of pipe_bits_per_sample is invalid: %d\n", pipe_bits_per_sample);
+      return -1;
+    }
+
  return 0;
 }

--- a/src/spotify.c
+++ b/src/spotify.c
@ -719,7 +719,7 @@ playback_eot(void *arg, int *retval)
  g_state = SPOTIFY_STATE_STOPPING;

  // TODO 1) This will block for a while, but perhaps ok?
-  input_write(spotify_audio_buffer, INPUT_FLAG_EOF);
+  input_write(spotify_audio_buffer, 0, 0, INPUT_FLAG_EOF);

  *retval = 0;
  return COMMAND_END;
@ -1011,9 +1011,9 @@ static int music_delivery(sp_session *sess, const sp_audioformat *format,
  int ret;

  /* No support for resampling right now */
-  if ((format->sample_rate != 44100) || (format->channels != 2))
+  if ((format->sample_type != SP_SAMPLETYPE_INT16_NATIVE_ENDIAN) || (format->channels != 2))
    {
-      DPRINTF(E_LOG, L_SPOTIFY, "Got music with unsupported samplerate or channels, stopping playback\n");
+      DPRINTF(E_LOG, L_SPOTIFY, "Got music with unsupported sample format or number of channels, stopping playback\n");
      spotify_playback_stop_nonblock();
      return num_frames;
    }
@ -1037,7 +1037,7 @@ static int music_delivery(sp_session *sess, const sp_audioformat *format,
  // The input buffer only accepts writing when it is approaching depletion, and
  // because we use NONBLOCK it will just return if this is not the case. So in
  // most cases no actual write is made and spotify_audio_buffer will just grow.
-  input_write(spotify_audio_buffer, INPUT_FLAG_NONBLOCK);
+  input_write(spotify_audio_buffer, format->sample_rate, 16, INPUT_FLAG_NONBLOCK);

  return num_frames;
 }
--- a/src/transcode.c
+++ b/src/transcode.c
@ -76,12 +76,10 @@ struct settings_ctx

  // Audio settings
  enum AVCodecID audio_codec;
-  const char *audio_codec_name;
  int sample_rate;
  uint64_t channel_layout;
  int channels;
  enum AVSampleFormat sample_format;
-  int byte_depth;
  bool wavheader;
  bool icy;

@ -179,20 +177,12 @@ struct encode_ctx
  uint8_t header[44];
 };

-struct transcode_ctx
-{
-  struct decode_ctx *decode_ctx;
-  struct encode_ctx *encode_ctx;
-};
-

 /* -------------------------- PROFILE CONFIGURATION ------------------------ */

 static int
 init_settings(struct settings_ctx *settings, enum transcode_profile profile)
 {
-  const AVCodecDescriptor *codec_desc;
-
  memset(settings, 0, sizeof(struct settings_ctx));

  switch (profile)
@ -207,7 +197,13 @@ init_settings(struct settings_ctx *settings, enum transcode_profile profile)
 	settings->channel_layout = AV_CH_LAYOUT_STEREO;
 	settings->channels = 2;
 	settings->sample_format = AV_SAMPLE_FMT_S16;
-	settings->byte_depth = 2; // Bytes per sample = 16/8
+	settings->icy = 1;
+	break;
+
+      case XCODE_PCM_NATIVE: // Sample rate and bit depth determined by source
+	settings->encode_audio = 1;
+	settings->channel_layout = AV_CH_LAYOUT_STEREO;
+	settings->channels = 2;
 	settings->icy = 1;
 	break;

@ -219,7 +215,6 @@ init_settings(struct settings_ctx *settings, enum transcode_profile profile)
 	settings->channel_layout = AV_CH_LAYOUT_STEREO;
 	settings->channels = 2;
 	settings->sample_format = AV_SAMPLE_FMT_S16P;
-	settings->byte_depth = 2; // Bytes per sample = 16/8
 	break;

      case XCODE_OPUS:
@ -230,7 +225,6 @@ init_settings(struct settings_ctx *settings, enum transcode_profile profile)
 	settings->channel_layout = AV_CH_LAYOUT_STEREO;
 	settings->channels = 2;
 	settings->sample_format = AV_SAMPLE_FMT_S16; // Only libopus support
-	settings->byte_depth = 2; // Bytes per sample = 16/8
 	break;

      case XCODE_JPEG:
@ -253,18 +247,6 @@ init_settings(struct settings_ctx *settings, enum transcode_profile profile)
 	return -1;
    }

-  if (settings->audio_codec)
-    {
-      codec_desc = avcodec_descriptor_get(settings->audio_codec);
-      settings->audio_codec_name = codec_desc->name;
-    }
-
-  if (settings->video_codec)
-    {
-      codec_desc = avcodec_descriptor_get(settings->video_codec);
-      settings->video_codec_name = codec_desc->name;
-    }
-
  return 0;
 }

@ -319,13 +301,15 @@ make_wav_header(struct encode_ctx *ctx, struct decode_ctx *src_ctx, off_t *est_s
 {
  uint32_t wav_len;
  int duration;
+  int bps;

  if (src_ctx->duration)
    duration = src_ctx->duration;
  else
    duration = 3 * 60 * 1000; /* 3 minutes, in ms */

-  wav_len = ctx->settings.channels * ctx->settings.byte_depth * ctx->settings.sample_rate * (duration / 1000);
+  bps = av_get_bits_per_sample(ctx->settings.audio_codec);
+  wav_len = ctx->settings.channels * (bps / 8) * ctx->settings.sample_rate * (duration / 1000);

  *est_size = wav_len + sizeof(ctx->header);

@ -336,9 +320,9 @@ make_wav_header(struct encode_ctx *ctx, struct decode_ctx *src_ctx, off_t *est_s
  add_le16(ctx->header + 20, 1);
  add_le16(ctx->header + 22, ctx->settings.channels);     /* channels */
  add_le32(ctx->header + 24, ctx->settings.sample_rate);  /* samplerate */
-  add_le32(ctx->header + 28, ctx->settings.sample_rate * ctx->settings.channels * ctx->settings.byte_depth); /* byte rate */
-  add_le16(ctx->header + 32, ctx->settings.channels * ctx->settings.byte_depth);                             /* block align */
-  add_le16(ctx->header + 34, ctx->settings.byte_depth * 8);                                                  /* bits per sample */
+  add_le32(ctx->header + 28, ctx->settings.sample_rate * ctx->settings.channels * (bps / 8)); /* byte rate */
+  add_le16(ctx->header + 32, ctx->settings.channels * (bps / 8));                             /* block align */
+  add_le16(ctx->header + 34, bps);                                                            /* bits per sample */
  memcpy(ctx->header + 36, "data", 4);
  add_le32(ctx->header + 40, wav_len);
 }
@ -368,20 +352,27 @@ stream_find(struct decode_ctx *ctx, unsigned int stream_index)
 * @out ctx       A pre-allocated stream ctx where we save stream and codec info
 * @in output     Output to add the stream to
 * @in codec_id   What kind of codec should we use
- * @in codec_name Name of codec (only used for logging)
 * @return        Negative on failure, otherwise zero
 */
 static int
-stream_add(struct encode_ctx *ctx, struct stream_ctx *s, enum AVCodecID codec_id, const char *codec_name)
+stream_add(struct encode_ctx *ctx, struct stream_ctx *s, enum AVCodecID codec_id)
 {
+  const AVCodecDescriptor *codec_desc;
  AVCodec *encoder;
  AVDictionary *options = NULL;
  int ret;

+  codec_desc = avcodec_descriptor_get(codec_id);
+  if (!codec_desc)
+    {
+      DPRINTF(E_LOG, L_XCODE, "Invalid codec ID (%d)\n", codec_id);
+      return -1;
+    }
+
  encoder = avcodec_find_encoder(codec_id);
  if (!encoder)
    {
-      DPRINTF(E_LOG, L_XCODE, "Necessary encoder (%s) not found\n", codec_name);
+      DPRINTF(E_LOG, L_XCODE, "Necessary encoder (%s) not found\n", codec_desc->name);
      return -1;
    }

@ -393,7 +384,7 @@ stream_add(struct encode_ctx *ctx, struct stream_ctx *s, enum AVCodecID codec_id
  if (!s->codec->pix_fmt)
    {
      s->codec->pix_fmt = avcodec_default_get_format(s->codec, encoder->pix_fmts);
-      DPRINTF(E_DBG, L_XCODE, "Pixel format set to %s (encoder is %s)\n", av_get_pix_fmt_name(s->codec->pix_fmt), codec_name);
+      DPRINTF(E_DBG, L_XCODE, "Pixel format set to %s (encoder is %s)\n", av_get_pix_fmt_name(s->codec->pix_fmt), codec_desc->name);
    }

  if (ctx->ofmt_ctx->oformat->flags & AVFMT_GLOBALHEADER)
@ -406,7 +397,7 @@ stream_add(struct encode_ctx *ctx, struct stream_ctx *s, enum AVCodecID codec_id
  ret = avcodec_open2(s->codec, NULL, &options);
  if (ret < 0)
    {
-      DPRINTF(E_LOG, L_XCODE, "Cannot open encoder (%s): %s\n", codec_name, err2str(ret));
+      DPRINTF(E_LOG, L_XCODE, "Cannot open encoder (%s): %s\n", codec_desc->name, err2str(ret));
      avcodec_free_context(&s->codec);
      return -1;
    }
@ -415,7 +406,7 @@ stream_add(struct encode_ctx *ctx, struct stream_ctx *s, enum AVCodecID codec_id
  ret = avcodec_parameters_from_context(s->stream->codecpar, s->codec);
  if (ret < 0)
    {
-      DPRINTF(E_LOG, L_XCODE, "Cannot copy stream parameters (%s): %s\n", codec_name, err2str(ret));
+      DPRINTF(E_LOG, L_XCODE, "Cannot copy stream parameters (%s): %s\n", codec_desc->name, err2str(ret));
      avcodec_free_context(&s->codec);
      return -1;
    }
@ -888,14 +879,14 @@ open_output(struct encode_ctx *ctx, struct decode_ctx *src_ctx)

  if (ctx->settings.encode_audio)
    {
-      ret = stream_add(ctx, &ctx->audio_stream, ctx->settings.audio_codec, ctx->settings.audio_codec_name);
+      ret = stream_add(ctx, &ctx->audio_stream, ctx->settings.audio_codec);
      if (ret < 0)
 	goto out_free_streams;
    }

  if (ctx->settings.encode_video)
    {
-      ret = stream_add(ctx, &ctx->video_stream, ctx->settings.video_codec, ctx->settings.video_codec_name);
+      ret = stream_add(ctx, &ctx->video_stream, ctx->settings.video_codec);
      if (ret < 0)
 	goto out_free_streams;
    }
@ -1161,6 +1152,7 @@ struct encode_ctx *
 transcode_encode_setup(enum transcode_profile profile, struct decode_ctx *src_ctx, off_t *est_size, int width, int height)
 {
  struct encode_ctx *ctx;
+  int bps;

  CHECK_NULL(L_XCODE, ctx = calloc(1, sizeof(struct encode_ctx)));
  CHECK_NULL(L_XCODE, ctx->filt_frame = av_frame_alloc());
@ -1172,6 +1164,26 @@ transcode_encode_setup(enum transcode_profile profile, struct decode_ctx *src_ct
  ctx->settings.width = width;
  ctx->settings.height = height;

+  if (!ctx->settings.sample_rate && ctx->settings.encode_audio)
+    ctx->settings.sample_rate = src_ctx->audio_stream.codec->sample_rate;
+
+  if (!ctx->settings.sample_format && ctx->settings.encode_audio)
+    {
+      bps = av_get_bits_per_sample(src_ctx->audio_stream.codec->codec_id);
+      if (bps >= 24)
+	{
+	  ctx->settings.sample_format = AV_SAMPLE_FMT_S32;
+	  ctx->settings.audio_codec = AV_CODEC_ID_PCM_S24LE;
+	  ctx->settings.format = "s24le";
+	}
+      else
+	{
+	  ctx->settings.sample_format = AV_SAMPLE_FMT_S16;
+	  ctx->settings.audio_codec = AV_CODEC_ID_PCM_S16LE;
+	  ctx->settings.format = "s16le";
+	}
+    }
+
  if (ctx->settings.wavheader)
    make_wav_header(ctx, src_ctx, est_size);

@ -1182,7 +1194,10 @@ transcode_encode_setup(enum transcode_profile profile, struct decode_ctx *src_ct
    goto fail_close;

  if (ctx->settings.icy && src_ctx->data_kind == DATA_KIND_HTTP)
-    ctx->icy_interval = METADATA_ICY_INTERVAL * ctx->settings.channels * ctx->settings.byte_depth * ctx->settings.sample_rate;
+    {
+      bps = av_get_bits_per_sample(ctx->settings.audio_codec);
+      ctx->icy_interval = METADATA_ICY_INTERVAL * ctx->settings.channels * (bps / 8) * ctx->settings.sample_rate;
+    }

  return ctx;

@ -1223,6 +1238,7 @@ transcode_setup(enum transcode_profile profile, enum data_kind data_kind, const
 struct decode_ctx *
 transcode_decode_setup_raw(void)
 {
+  const AVCodecDescriptor *codec_desc;
  struct decode_ctx *ctx;
  AVCodec *decoder;
  int ret;
@ -1234,13 +1250,20 @@ transcode_decode_setup_raw(void)
      goto out_free_ctx;
    }

+  codec_desc = avcodec_descriptor_get(ctx->settings.audio_codec);
+  if (!codec_desc)
+    {
+      DPRINTF(E_LOG, L_XCODE, "Invalid codec ID (%d)\n", ctx->settings.audio_codec);
+      goto out_free_ctx;
+    }
+
  // In raw mode we won't actually need to read or decode, but we still setup
  // the decode_ctx because transcode_encode_setup() gets info about the input
  // through this structure (TODO dont' do that)
  decoder = avcodec_find_decoder(ctx->settings.audio_codec);
  if (!decoder)
    {
-      DPRINTF(E_LOG, L_XCODE, "Could not find decoder for: %s\n", ctx->settings.audio_codec_name);
+      DPRINTF(E_LOG, L_XCODE, "Could not find decoder for: %s\n", codec_desc->name);
      goto out_free_ctx;
    }

@ -1255,7 +1278,7 @@ transcode_decode_setup_raw(void)
  ret = avcodec_parameters_from_context(ctx->audio_stream.stream->codecpar, ctx->audio_stream.codec);
  if (ret < 0)
    {
-      DPRINTF(E_LOG, L_XCODE, "Cannot copy stream parameters (%s): %s\n", ctx->settings.audio_codec_name, err2str(ret));
+      DPRINTF(E_LOG, L_XCODE, "Cannot copy stream parameters (%s): %s\n", codec_desc->name, err2str(ret));
      goto out_free_codec;
    }

@ -1659,6 +1682,24 @@ transcode_decode_query(struct decode_ctx *ctx, const char *query)
  return -1;
 }

+int
+transcode_encode_query(struct encode_ctx *ctx, const char *query)
+{
+  if (strcmp(query, "sample_rate") == 0)
+    {
+      if (ctx->audio_stream.stream)
+	return ctx->audio_stream.stream->codecpar->sample_rate;
+    }
+  else if (strcmp(query, "bits_per_sample") == 0)
+    {
+      if (ctx->audio_stream.stream)
+	return av_get_bits_per_sample(ctx->audio_stream.stream->codecpar->codec_id);
+    }
+
+  return -1;
+}
+
+
 /*                                  Metadata                                 */

 struct http_icy_metadata *
--- a/src/transcode.h
+++ b/src/transcode.h
@ -8,10 +8,12 @@

 enum transcode_profile
 {
-  // Transcodes the best audio stream into PCM16 (does not add wav header)
+  // Decodes/resamples the best audio stream into 44100 PCM16 (does not add wav header)
  XCODE_PCM16_NOHEADER,
-  // Transcodes the best audio stream into PCM16 (with wav header)
+  // Decodes/resamples the best audio stream into 44100 PCM16 (with wav header)
  XCODE_PCM16_HEADER,
+  // Decodes the best audio stream into PCM16 or PCM24, no resampling (does not add wav header)
+  XCODE_PCM_NATIVE,
  // Transcodes the best audio stream into MP3
  XCODE_MP3,
  // Transcodes the best audio stream into OPUS
@ -23,7 +25,11 @@ enum transcode_profile

 struct decode_ctx;
 struct encode_ctx;
-struct transcode_ctx;
+struct transcode_ctx
+{
+  struct decode_ctx *decode_ctx;
+  struct encode_ctx *encode_ctx;
+};

 typedef void transcode_frame;

@ -122,6 +128,16 @@ transcode_seek(struct transcode_ctx *ctx, int ms);
 int
 transcode_decode_query(struct decode_ctx *ctx, const char *query);

+/* Query for information (e.g. sample rate) about the output being produced by
+ * the transcoding
+ *
+ * @in  ctx        Encode context
+ * @in  query      Query - see implementation for supported queries
+ * @return         Negative if error, otherwise query dependent
+ */
+int
+transcode_encode_query(struct encode_ctx *ctx, const char *query);
+
 // Metadata
 struct http_icy_metadata *
 transcode_metadata(struct transcode_ctx *ctx, int *changed);