[xcode] Add OPUS encoding profile for RTP Chromecasting

2025-04-15 00:35:55 -04:00 · 2019-01-10 10:52:56 +01:00 · 2019-01-10 10:52:56 +01:00 · d850c2e691
commit d850c2e691
parent 8e474dfd7f
3 changed files with 51 additions and 21 deletions
--- a/src/transcode.c
+++ b/src/transcode.c
@ -40,6 +40,7 @@
 #include "conffile.h"
 #include "db.h"
 #include "avio_evbuffer.h"
 #include "misc.h"
 #include "transcode.h"
 // Interval between ICY metadata checks for streams, in seconds
@ -221,6 +222,17 @@ init_settings(struct settings_ctx *settings, enum transcode_profile profile)
 	settings->byte_depth = 2; // Bytes per sample = 16/8
 	break;
      case XCODE_OPUS:
 	settings->encode_audio = 1;
 	settings->format = "data"; // Means we get the raw packet from the encoder, no muxing
 	settings->audio_codec = AV_CODEC_ID_OPUS;
 	settings->sample_rate = 48000;
 	settings->channel_layout = AV_CH_LAYOUT_STEREO;
 	settings->channels = 2;
 	settings->sample_format = AV_SAMPLE_FMT_S16; // Only libopus support
 	settings->byte_depth = 2; // Bytes per sample = 16/8
 	break;
      case XCODE_JPEG:
 	settings->encode_video = 1;
 	settings->silent = 1;
@ -1383,7 +1395,7 @@ transcode_cleanup(struct transcode_ctx **ctx)
 /*                       Encoding, decoding and transcoding                  */
 int
-transcode_decode(void **frame, struct decode_ctx *dec_ctx)
+transcode_decode(transcode_frame **frame, struct decode_ctx *dec_ctx)
 {
  struct transcode_ctx ctx;
  int ret;
@ -1414,7 +1426,7 @@ transcode_decode(void **frame, struct decode_ctx *dec_ctx)
 // Filters and encodes
 int
-transcode_encode(struct evbuffer *evbuf, struct encode_ctx *ctx, void *frame, int eof)
+transcode_encode(struct evbuffer *evbuf, struct encode_ctx *ctx, transcode_frame *frame, int eof)
 {
  AVFrame *f = frame;
  struct stream_ctx *s;
@ -1489,8 +1501,8 @@ transcode(struct evbuffer *evbuf, int *icy_timer, struct transcode_ctx *ctx, int
  return processed;
 }
-void *
+transcode_frame *
-transcode_frame_new(enum transcode_profile profile, uint8_t *data, size_t size)
+transcode_frame_new(enum transcode_profile profile, void *data, size_t size)
 {
  AVFrame *f;
  int ret;
@ -1502,7 +1514,7 @@ transcode_frame_new(enum transcode_profile profile, uint8_t *data, size_t size)
      return NULL;
    }
-  f->nb_samples     = size / 4;
+  f->nb_samples     = BTOS(size);
  f->format         = AV_SAMPLE_FMT_S16;
  f->channel_layout = AV_CH_LAYOUT_STEREO;
 #ifdef HAVE_FFMPEG
@ -1511,7 +1523,9 @@ transcode_frame_new(enum transcode_profile profile, uint8_t *data, size_t size)
  f->pts            = AV_NOPTS_VALUE;
  f->sample_rate    = 44100;
-  ret = avcodec_fill_audio_frame(f, 2, f->format, data, size, 0);
+  // We don't align because the frame won't be given directly to the encoder
  // anyway, it will first go through the filter (which might align it...?)
  ret = avcodec_fill_audio_frame(f, 2, f->format, data, size, 1);
  if (ret < 0)
    {
      DPRINTF(E_LOG, L_XCODE, "Error filling frame with rawbuf: %s\n", err2str(ret));
@ -1523,7 +1537,7 @@ transcode_frame_new(enum transcode_profile profile, uint8_t *data, size_t size)
 }
 void
-transcode_frame_free(void *frame)
+transcode_frame_free(transcode_frame *frame)
 {
  AVFrame *f = frame;
--- a/src/transcode.h
+++ b/src/transcode.h
@ -14,6 +14,8 @@ enum transcode_profile
  XCODE_PCM16_HEADER,
  // Transcodes the best audio stream into MP3
  XCODE_MP3,
  // Transcodes the best audio stream into OPUS
  XCODE_OPUS,
  // Transcodes the best video stream into JPEG/PNG
  XCODE_JPEG,
  XCODE_PNG,
@ -23,6 +25,8 @@ struct decode_ctx;
 struct encode_ctx;
 struct transcode_ctx;
 typedef void transcode_frame;
 // Setting up
 struct decode_ctx *
 transcode_decode_setup(enum transcode_profile profile, enum data_kind data_kind, const char *path, struct evbuffer *evbuf, uint32_t song_length);
@ -60,7 +64,7 @@ transcode_cleanup(struct transcode_ctx **ctx);
 * @return         Positive if OK, negative if error, 0 if EOF
 */
 int
-transcode_decode(void **frame, struct decode_ctx *ctx);
+transcode_decode(transcode_frame **frame, struct decode_ctx *ctx);
 /* Encodes and remuxes a frame. Also resamples if needed.
 *
@ -71,7 +75,7 @@ transcode_decode(void **frame, struct decode_ctx *ctx);
 * @return         Bytes added if OK, negative if error
 */
 int
-transcode_encode(struct evbuffer *evbuf, struct encode_ctx *ctx, void *frame, int eof);
+transcode_encode(struct evbuffer *evbuf, struct encode_ctx *ctx, transcode_frame *frame, int eof);
 /* Demuxes, decodes, encodes and remuxes from the input.
 *
@ -87,17 +91,18 @@ int
 transcode(struct evbuffer *evbuf, int *icy_timer, struct transcode_ctx *ctx, int want_bytes);
 /* Converts a buffer with raw data to a frame that can be passed directly to the
- * transcode_encode() function
+ * transcode_encode() function. It does not copy, so if you free the data the
 * frame will become invalid.
 *
 * @in  profile    Tells the function what kind of frame to create
 * @in  data       Buffer with raw data
 * @in  size       Size of buffer
 * @return         Opaque pointer to frame if OK, otherwise NULL
 */
-void *
+transcode_frame *
-transcode_frame_new(enum transcode_profile profile, uint8_t *data, size_t size);
+transcode_frame_new(enum transcode_profile profile, void *data, size_t size);
 void
-transcode_frame_free(void *frame);
+transcode_frame_free(transcode_frame *frame);
 /* Seek to the specified position - next transcode() will return this packet
 *
--- a/src/transcode_legacy.c
+++ b/src/transcode_legacy.c
@ -39,6 +39,7 @@
 #include "conffile.h"
 #include "db.h"
 #include "avio_evbuffer.h"
 #include "misc.h"
 #include "transcode.h"
 // Interval between ICY metadata checks for streams, in seconds
@ -162,6 +163,16 @@ init_profile(struct encode_ctx *ctx, enum transcode_profile profile)
 	ctx->byte_depth = 2; // Bytes per sample = 16/8
 	return 0;
      case XCODE_OPUS:
 	ctx->format = "data"; // Means we get the raw packet from the encoder, no muxing
 	ctx->audio_codec = AV_CODEC_ID_OPUS;
 	ctx->sample_rate = 48000;
 	ctx->channel_layout = AV_CH_LAYOUT_STEREO;
 	ctx->channels = 2;
 	ctx->sample_format = AV_SAMPLE_FMT_S16; // Only libopus support
 	ctx->byte_depth = 2; // Bytes per sample = 16/8
 	return 0;
      case XCODE_MP3:
 	ctx->format = "mp3";
 	ctx->audio_codec = AV_CODEC_ID_MP3;
@ -1337,7 +1348,7 @@ transcode_cleanup(struct transcode_ctx **ctx)
 }
 void
-transcode_frame_free(void *frame)
+transcode_frame_free(transcode_frame *frame)
 {
  struct decoded_frame *decoded = frame;
@ -1350,7 +1361,7 @@ transcode_frame_free(void *frame)
 int
-transcode_decode(void **frame, struct decode_ctx *ctx)
+transcode_decode(transcode_frame **frame, struct decode_ctx *ctx)
 {
  struct decoded_frame *decoded;
  AVPacket packet;
@ -1450,7 +1461,7 @@ transcode_decode(void **frame, struct decode_ctx *ctx)
 // Filters and encodes
 int
-transcode_encode(struct evbuffer *evbuf, struct encode_ctx *ctx, void *frame, int eof)
+transcode_encode(struct evbuffer *evbuf, struct encode_ctx *ctx, transcode_frame *frame, int eof)
 {
  struct decoded_frame *decoded = frame;
  int stream_index;
@ -1486,7 +1497,7 @@ transcode_encode(struct evbuffer *evbuf, struct encode_ctx *ctx, void *frame, in
 int
 transcode(struct evbuffer *evbuf, int *icy_timer, struct transcode_ctx *ctx, int want_bytes)
 {
-  void *frame;
+  transcode_frame *frame;
  int processed;
  int ret;
@ -1515,8 +1526,8 @@ transcode(struct evbuffer *evbuf, int *icy_timer, struct transcode_ctx *ctx, int
  return processed;
 }
-void *
+transcode_frame *
-transcode_frame_new(enum transcode_profile profile, uint8_t *data, size_t size)
+transcode_frame_new(enum transcode_profile profile, void *data, size_t size)
 {
  struct decoded_frame *decoded;
  AVFrame *f;
@ -1540,7 +1551,7 @@ transcode_frame_new(enum transcode_profile profile, uint8_t *data, size_t size)
  decoded->stream_index = 0;
  decoded->frame = f;
-  f->nb_samples     = size / 4;
+  f->nb_samples     = BTOS(size);
  f->format         = AV_SAMPLE_FMT_S16;
  f->channel_layout = AV_CH_LAYOUT_STEREO;
 #ifdef HAVE_FFMPEG
@ -1549,7 +1560,7 @@ transcode_frame_new(enum transcode_profile profile, uint8_t *data, size_t size)
  f->pts            = AV_NOPTS_VALUE;
  f->sample_rate    = 44100;
-  ret = avcodec_fill_audio_frame(f, 2, f->format, data, size, 0);
+  ret = avcodec_fill_audio_frame(f, 2, f->format, data, size, 1);
  if (ret < 0)
    {
      DPRINTF(E_LOG, L_XCODE, "Error filling frame with rawbuf: %s\n", err2str(ret));