[transcode] Improve decoding to allow for decoder special cases, like decoders not processing

entire packets, rejecting packets and not returning frames. Should fix issue with network streams not opening.
2025-07-14 11:21:55 -04:00 · 2015-10-21 23:53:21 +02:00 · 2015-10-21 23:53:21 +02:00 · 058b2fb0d6
commit 058b2fb0d6
parent 9853b8f9bd
3 changed files with 166 additions and 80 deletions
--- a/configure.ac
+++ b/configure.ac
@ -96,8 +96,6 @@ AC_CHECK_LIB([avfilter], [av_buffersink_get_frame],
 	AC_DEFINE(HAVE_LIBAV_BUFFERSINK_GET_FRAME, 1, [Define to 1 if you have ffmpeg/libav with av_buffersink_get_frame]))
 AC_CHECK_LIB([avfilter], [avfilter_graph_parse_ptr],
 	AC_DEFINE(HAVE_LIBAV_GRAPH_PARSE_PTR, 1, [Define to 1 if you have ffmpeg/libav with avfilter_graph_parse_ptr]))
 AC_CHECK_LIB([avcodec], [av_copy_packet],
 	AC_DEFINE(HAVE_LIBAV_COPY_PACKET, 1, [Define to 1 if you have ffmpeg/libav with av_copy_packet]))
 AC_CHECK_LIB([avcodec], [av_packet_rescale_ts],
 	AC_DEFINE(HAVE_LIBAV_PACKET_RESCALE_TS, 1, [Define to 1 if you have ffmpeg/libav with av_packet_rescale_ts]))
 AC_CHECK_LIB([avformat], [avformat_alloc_output_context2],
--- a/src/ffmpeg-compat.c
+++ b/src/ffmpeg-compat.c
@ -10,10 +10,6 @@
 # define avcodec_find_best_pix_fmt_of_list(a, b, c, d) avcodec_find_best_pix_fmt2((enum AVPixelFormat *)(a), (b), (c), (d))
 #endif
 #ifndef HAVE_LIBAV_COPY_PACKET
 # define av_copy_packet(dst, src) memcpy(dst, src, sizeof(AVPacket))
 #endif
 #ifndef HAVE_LIBAV_FRAME_ALLOC
 # define av_frame_alloc() avcodec_alloc_frame()
 # define av_frame_free(x) avcodec_free_frame((x))
--- a/src/transcode.c
+++ b/src/transcode.c
@ -48,6 +48,8 @@
 #define METADATA_ICY_INTERVAL 5
 // Maximum number of streams in a file that we will accept
 #define MAX_STREAMS 64
 // Maximum number of times we retry when we encounter bad packets
 #define MAX_BAD_PACKETS 3
 static char *default_codecs = "mpeg,wav";
 static char *roku_codecs = "mpeg,mp4a,wma,wav";
@ -71,12 +73,12 @@ struct decode_ctx {
  // Duration (used to make wav header)
  uint32_t duration;
-  // Used for seeking
+  // Contains the most recent packet from av_read_frame
  // Used for resuming after seek and for freeing correctly
  // in transcode_decode()
  AVPacket packet;
  int resume;
-  AVPacket seek_packet;
+  int resume_offset;
  // Used to keep track of retries when decoding fails
  int retry;
 };
 struct encode_ctx {
@ -238,6 +240,61 @@ decode_stream(struct decode_ctx *ctx, AVStream *in_stream)
          (in_stream == ctx->subtitle_stream));
 }
 /* Will read the next packet from the source, unless we are in resume mode,
 * where the previous packet is returned (again), but with an adjusted data
 * pointer. *packet must already be an already allocated AVPacket, which will be
 * updated by this function. Must not be freed with av_free_packet().
 */
 static int
 read_packet(AVPacket *packet, AVStream **stream, unsigned int *stream_index, struct decode_ctx *ctx)
 {
  AVStream *in_stream;
  char *errmsg;
  int ret;
  do
    {
      if (ctx->resume)
 	{
 	  // Copies packet struct, but not actual packet payload, and adjusts
 	  // data pointer to somewhere inside the payload if resume_offset is set
 	  *packet = ctx->packet;
 	  packet->data += ctx->resume_offset;
 	  packet->size -= ctx->resume_offset;
 	  ctx->resume = 0;
        }
      else
 	{
 	  // We are going to read a new packet from source, so now it is safe to
 	  // discard the previous packet and reset resume_offset
 	  av_free_packet(&ctx->packet);
 	  ctx->resume_offset = 0;
 	  ret = av_read_frame(ctx->ifmt_ctx, &ctx->packet);
 	  if (ret < 0)
 	    {
 	      errmsg = malloc(128);
 	      av_strerror(ret, errmsg, 128);
 	      DPRINTF(E_WARN, L_XCODE, "Could not read frame: %s\n", errmsg);
 	      free(errmsg);
              return -1;
 	    }
 	  *packet = ctx->packet;
 	}
      in_stream = ctx->ifmt_ctx->streams[packet->stream_index];
    }
  while (!decode_stream(ctx, in_stream));
  av_packet_rescale_ts(packet, in_stream->time_base, in_stream->codec->time_base);
  *stream = in_stream;
  *stream_index = packet->stream_index;
  return 0;
 }
 static int
 encode_write_frame(struct encode_ctx *ctx, AVFrame *filt_frame, unsigned int stream_index, int *got_frame)
 {
@ -395,6 +452,44 @@ filter_encode_write_frame(struct encode_ctx *ctx, AVFrame *frame, unsigned int s
 }
 #endif
 /* Will step through each stream and feed the stream decoder with empty packets
 * to see if the decoder has more frames lined up. Will return non-zero if a
 * frame is found. Should be called until it stops returning anything.
 */
 static int
 flush_decoder(AVFrame *frame, AVStream **stream, unsigned int *stream_index, struct decode_ctx *ctx)
 {
  AVStream *in_stream;
  AVPacket dummypacket;
  int got_frame;
  int i;
  memset(&dummypacket, 0, sizeof(AVPacket));
  for (i = 0; i < ctx->ifmt_ctx->nb_streams; i++)
    {
      in_stream = ctx->ifmt_ctx->streams[i];
      if (!decode_stream(ctx, in_stream))
 	continue;
      if (in_stream->codec->codec_type == AVMEDIA_TYPE_AUDIO)
 	avcodec_decode_audio4(in_stream->codec, frame, &got_frame, &dummypacket);
      else
 	avcodec_decode_video2(in_stream->codec, frame, &got_frame, &dummypacket);
      if (!got_frame)
 	continue;
      DPRINTF(E_DBG, L_XCODE, "Flushing decoders produced a frame from stream %d\n", i);
      *stream = in_stream;
      *stream_index = i;
      return got_frame;
    }
  return 0;
 }
 static void
 flush_encoder(struct encode_ctx *ctx, unsigned int stream_index)
 {
@ -1079,6 +1174,8 @@ transcode_decode_setup(struct media_file_info *mfi, int decode_video)
  ctx->duration = mfi->song_length;
  av_init_packet(&ctx->packet);
  return ctx;
 }
@ -1273,7 +1370,7 @@ transcode_needed(const char *user_agent, const char *client_codecs, char *file_c
 void
 transcode_decode_cleanup(struct decode_ctx *ctx)
 {
-  av_free_packet(&ctx->seek_packet);
+  av_free_packet(&ctx->packet);
  close_input(ctx);
  free(ctx);
 }
@ -1324,83 +1421,71 @@ transcode_decode(struct decode_ctx *ctx)
  AVPacket packet;
  AVStream *in_stream;
  AVFrame *frame;
  char *errmsg;
  unsigned int stream_index;
  int got_frame;
  int retry;
  int ret;
  int used;
-  // Read a packet
+  // Alloc the frame we will return on success
  packet.data = NULL;
  packet.size = 0;
  av_init_packet(&packet);
  while (1)
    {
      if (ctx->resume)
 	{
 	  av_copy_packet(&packet, &ctx->seek_packet);
 	  av_init_packet(&ctx->seek_packet);
 	  ctx->resume = 0;
        }
      else if ((ret = av_read_frame(ctx->ifmt_ctx, &packet)) < 0)
 	{
 	  errmsg = malloc(128);
 	  av_strerror(ret, errmsg, 128);
 	  DPRINTF(E_WARN, L_XCODE, "Could not read frame: %s\n", errmsg);
 	  free(errmsg);
          return NULL;
 	}
      stream_index = packet.stream_index;
      in_stream = ctx->ifmt_ctx->streams[stream_index];
      if (decode_stream(ctx, in_stream))
 	break;
      av_free_packet(&packet);
    }
  // Decode into a frame
  frame = av_frame_alloc();
  if (!frame)
    {
      DPRINTF(E_LOG, L_XCODE, "Out of memory for decode frame\n");
      av_free_packet(&packet);
      return NULL;
    }
-  av_packet_rescale_ts(&packet, in_stream->time_base, in_stream->codec->time_base);
+  // Loop until we either fail or get a frame
-
+  retry = 0;
-  if (in_stream->codec->codec_type == AVMEDIA_TYPE_AUDIO)
+  do
    ret = avcodec_decode_audio4(in_stream->codec, frame, &got_frame, &packet);
  else
    ret = avcodec_decode_video2(in_stream->codec, frame, &got_frame, &packet);
  av_free_packet(&packet);
  // On failure, try reading another packet (just one)
  // TODO Check if treating !got_frame like this is ok
  if (!got_frame || (ret < 0))
    {
-      av_frame_free(&frame);
+      ret = read_packet(&packet, &in_stream, &stream_index, ctx);
-
+      if (ret < 0)
      if (ctx->retry)
 	{
-	  DPRINTF(E_LOG, L_XCODE, "Couldn't decode packet after two attempts, giving up\n");
+	  // Some decoders need to be flushed, meaning the decoder is to be called
-	  return NULL;
+	  // with empty input until no more frames are returned
 	  DPRINTF(E_DBG, L_XCODE, "Could not read packet (eof?), will flush decoders\n");
 	  got_frame = flush_decoder(frame, &in_stream, &stream_index, ctx);
 	  if (got_frame)
 	    break;
 	  else
 	    goto out_error;
 	}
      else
 	{
 	  DPRINTF(E_WARN, L_XCODE, "Couldn't decode packet, let's try the next one\n");
 	  ctx->retry = 1;
 	  decoded = transcode_decode(ctx);
 	  if (!decoded)
 	    return NULL;
-	  frame = decoded->frame;
+      // "used" will tell us how much of the packet was decoded. We may
-	  stream_index = decoded->stream_index;
+      // not get a frame because of insufficient input, in which case we loop to
-	  free(decoded);
+      // read another packet.
      if (in_stream->codec->codec_type == AVMEDIA_TYPE_AUDIO)
 	used = avcodec_decode_audio4(in_stream->codec, frame, &got_frame, &packet);
      else
 	used = avcodec_decode_video2(in_stream->codec, frame, &got_frame, &packet);
      // decoder returned an error, but maybe the packet was just a bad apple,
      // so let's try MAX_BAD_PACKETS times before giving up
      if (used < 0)
 	{
 	  DPRINTF(E_WARN, L_XCODE, "Couldn't decode packet\n");
 	  retry += 1;
 	  if (retry < MAX_BAD_PACKETS)
 	    continue;
 	  else
 	    goto out_error;
 	}
      // decoder didn't process the entire packet, so flag a resume, meaning
      // that the next read_packet() will return this same packet, but where the
      // data pointer is adjusted with an offset
      if (used < packet.size)
 	{
 	  DPRINTF(E_DBG, L_XCODE, "Decoder did not finish packet, packet will be resumed\n");
 	  ctx->resume_offset += used;
 	  ctx->resume = 1;
 	}
    }
  while (!got_frame);
  // Return the decoded frame and stream index
  frame->pts = av_frame_get_best_effort_timestamp(frame);
@ -1409,14 +1494,20 @@ transcode_decode(struct decode_ctx *ctx)
  if (!decoded)
    {
      DPRINTF(E_LOG, L_XCODE, "Out of memory for decoded result\n");
-      av_frame_free(&frame);
+      goto out_error;
      return NULL;
    }
  decoded->frame = frame;
  decoded->stream_index = stream_index;
  return decoded;
 out_error:
  DPRINTF(E_DBG, L_XCODE, "Decode failure - probably EOF\n");
  av_frame_free(&frame);
  return NULL;
 }
 // Filters and encodes
@ -1569,9 +1660,9 @@ transcode_seek(struct transcode_ctx *ctx, int ms)
  in_stream->codec->skip_frame = AVDISCARD_NONREF;
  while (1)
    {
-      av_free_packet(&decode_ctx->seek_packet);
+      av_free_packet(&decode_ctx->packet);
-      ret = av_read_frame(decode_ctx->ifmt_ctx, &decode_ctx->seek_packet);
+      ret = av_read_frame(decode_ctx->ifmt_ctx, &decode_ctx->packet);
      if (ret < 0)
 	{
 	  DPRINTF(E_WARN, L_XCODE, "Could not read more data while seeking\n");
@ -1579,22 +1670,23 @@ transcode_seek(struct transcode_ctx *ctx, int ms)
 	  return -1;
 	}
-      if (decode_ctx->seek_packet.stream_index != in_stream->index)
+      if (decode_ctx->packet.stream_index != in_stream->index)
 	continue;
      // Need a pts to return the real position
-      if (decode_ctx->seek_packet.pts == AV_NOPTS_VALUE)
+      if (decode_ctx->packet.pts == AV_NOPTS_VALUE)
 	continue;
      break;
    }
  in_stream->codec->skip_frame = AVDISCARD_DEFAULT;
-  // Tell transcode_decode() to resume with seek_packet
+  // Tell transcode_decode() to resume with ctx->packet
  decode_ctx->resume = 1;
  decode_ctx->resume_offset = 0;
  // Compute position in ms from pts
-  got_pts = decode_ctx->seek_packet.pts;
+  got_pts = decode_ctx->packet.pts;
  if ((start_time != AV_NOPTS_VALUE) && (start_time > 0))
    got_pts -= start_time;