Merge pull request #1547 from owntone/xcode_fixup1

Adjustments to ffmpeg/transcoding
This commit is contained in:
ejurgensen 2022-10-28 21:19:58 +02:00 committed by GitHub
commit 49f442bf9e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 95 additions and 94 deletions

View File

@ -240,12 +240,6 @@ OWNTONE_MODULES_CHECK([OWNTONE], [LIBAV],
[libavformat/avformat.h])
])
AS_IF([test "$is_ffmpeg" = "yes"],
[PKG_CHECK_EXISTS([libavcodec >= 59.24.100],
[AC_DEFINE([HAVE_FFMPEG_CH_LAYOUT], 1,
[Define to 1 if ffmpeg-libavcodec defines new 5.1.x ch_layout])])]
)
AC_CHECK_SIZEOF([void *])
dnl --- Begin configuring the options ---

View File

@ -452,7 +452,8 @@ scan_metadata_ffmpeg(struct media_file_info *mfi, const char *file)
codec_id = ctx->streams[i]->codecpar->codec_id;
sample_rate = ctx->streams[i]->codecpar->sample_rate;
sample_fmt = ctx->streams[i]->codecpar->format;
#ifdef HAVE_FFMPEG_CH_LAYOUT
// Matches USE_CH_LAYOUT in transcode.c
#if (LIBAVCODEC_VERSION_MAJOR > 59) || ((LIBAVCODEC_VERSION_MAJOR == 59) && (LIBAVCODEC_VERSION_MINOR > 24))
channels = ctx->streams[i]->codecpar->ch_layout.nb_channels;
#else
channels = ctx->streams[i]->codecpar->channels;

View File

@ -42,6 +42,13 @@
#include "misc.h"
#include "transcode.h"
// Switches for compability with ffmpeg's ever changing API
#define USE_IMAGE2PIPE (LIBAVFORMAT_VERSION_MAJOR > 58) || ((LIBAVFORMAT_VERSION_MAJOR == 58) && (LIBAVFORMAT_VERSION_MINOR > 29))
#define USE_CONST_AVFORMAT (LIBAVFORMAT_VERSION_MAJOR > 59) || ((LIBAVFORMAT_VERSION_MAJOR == 59) && (LIBAVFORMAT_VERSION_MINOR > 15))
#define USE_CONST_AVCODEC (LIBAVFORMAT_VERSION_MAJOR > 59) || ((LIBAVFORMAT_VERSION_MAJOR == 59) && (LIBAVFORMAT_VERSION_MINOR > 15))
#define USE_NO_CLEAR_AVFMT_NOFILE (LIBAVFORMAT_VERSION_MAJOR > 59) || ((LIBAVFORMAT_VERSION_MAJOR == 59) && (LIBAVFORMAT_VERSION_MINOR > 15))
#define USE_CH_LAYOUT (LIBAVCODEC_VERSION_MAJOR > 59) || ((LIBAVCODEC_VERSION_MAJOR == 59) && (LIBAVCODEC_VERSION_MINOR > 24))
// Interval between ICY metadata checks for streams, in seconds
#define METADATA_ICY_INTERVAL 5
// Maximum number of streams in a file that we will accept
@ -52,6 +59,8 @@
#define READ_TIMEOUT 30000000
// Buffer size for reading/writing input and output evbuffers
#define AVIO_BUFFER_SIZE 4096
// Size of the wav header that iTunes needs
#define WAV_HEADER_LEN 44
static const char *default_codecs = "mpeg,wav";
static const char *roku_codecs = "mpeg,mp4a,wma,alac,wav";
@ -78,15 +87,15 @@ struct settings_ctx
// Audio settings
enum AVCodecID audio_codec;
int sample_rate;
#ifdef HAVE_FFMPEG_CH_LAYOUT
#if USE_CH_LAYOUT
AVChannelLayout channel_layout;
#else
uint64_t channel_layout;
#endif
int channels;
#endif
int bit_rate;
enum AVSampleFormat sample_format;
bool wavheader;
bool wav_header;
bool icy;
// Video settings
@ -180,7 +189,7 @@ struct encode_ctx
uint32_t icy_hash;
// WAV header
uint8_t header[44];
uint8_t wav_header[WAV_HEADER_LEN];
};
enum probe_type
@ -212,7 +221,7 @@ init_settings(struct settings_ctx *settings, enum transcode_profile profile, str
break;
case XCODE_PCM16_HEADER:
settings->wavheader = 1;
settings->wav_header = 1;
case XCODE_PCM16:
settings->encode_audio = 1;
settings->format = "s16le";
@ -266,7 +275,7 @@ init_settings(struct settings_ctx *settings, enum transcode_profile profile, str
// With ffmpeg 4.3 (> libavformet 58.29) "image2" only works for actual file
// output. It's possible we should have used "image2pipe" all along, but since
// "image2" has been working we only replace it going forward.
#if (LIBAVFORMAT_VERSION_MAJOR > 58) || ((LIBAVFORMAT_VERSION_MAJOR == 58) && (LIBAVFORMAT_VERSION_MINOR > 29))
#if USE_IMAGE2PIPE
settings->format = "image2pipe";
#else
settings->format = "image2";
@ -281,7 +290,7 @@ init_settings(struct settings_ctx *settings, enum transcode_profile profile, str
settings->encode_video = 1;
settings->silent = 1;
// See explanation above
#if (LIBAVFORMAT_VERSION_MAJOR > 58) || ((LIBAVFORMAT_VERSION_MAJOR == 58) && (LIBAVFORMAT_VERSION_MINOR > 29))
#if USE_IMAGE2PIPE
settings->format = "image2pipe";
#else
settings->format = "image2";
@ -294,7 +303,7 @@ init_settings(struct settings_ctx *settings, enum transcode_profile profile, str
settings->encode_video = 1;
settings->silent = 1;
// See explanation above
#if (LIBAVFORMAT_VERSION_MAJOR > 58) || ((LIBAVFORMAT_VERSION_MAJOR == 58) && (LIBAVFORMAT_VERSION_MINOR > 29))
#if USE_IMAGE2PIPE
settings->format = "image2pipe";
#else
settings->format = "image2";
@ -315,11 +324,11 @@ init_settings(struct settings_ctx *settings, enum transcode_profile profile, str
if (quality && quality->channels)
{
settings->channels = quality->channels;
#ifdef HAVE_FFMPEG_CH_LAYOUT
#if USE_CH_LAYOUT
av_channel_layout_default(&settings->channel_layout, quality->channels);
#else
settings->channel_layout = av_get_default_channel_layout(quality->channels);
settings->channels = quality->channels;
#endif
}
@ -343,7 +352,7 @@ stream_settings_set(struct stream_ctx *s, struct settings_ctx *settings, enum AV
if (type == AVMEDIA_TYPE_AUDIO)
{
s->codec->sample_rate = settings->sample_rate;
#ifdef HAVE_FFMPEG_CH_LAYOUT
#if USE_CH_LAYOUT
av_channel_layout_copy(&s->codec->ch_layout, &(settings->channel_layout));
#else
s->codec->channel_layout = settings->channel_layout;
@ -401,36 +410,34 @@ add_le32(uint8_t *dst, uint32_t val)
dst[3] = (val >> 24) & 0xff;
}
/*
* header must have size WAV_HEADER_LEN (44 bytes)
*/
static void
make_wav_header(struct encode_ctx *ctx, struct decode_ctx *src_ctx, off_t *est_size)
make_wav_header(uint8_t *header, off_t *est_size, int sample_rate, int bps, int channels, int duration)
{
uint32_t wav_len;
int duration;
int bps;
if (src_ctx->duration)
duration = src_ctx->duration;
else
if (duration == 0)
duration = 3 * 60 * 1000; /* 3 minutes, in ms */
bps = av_get_bytes_per_sample(ctx->settings.sample_format);
wav_len = ctx->settings.channels * bps * ctx->settings.sample_rate * (duration / 1000);
wav_len = channels * bps * sample_rate * (duration / 1000);
if (est_size)
*est_size = wav_len + sizeof(ctx->header);
*est_size = wav_len + WAV_HEADER_LEN;
memcpy(ctx->header, "RIFF", 4);
add_le32(ctx->header + 4, 36 + wav_len);
memcpy(ctx->header + 8, "WAVEfmt ", 8);
add_le32(ctx->header + 16, 16);
add_le16(ctx->header + 20, 1);
add_le16(ctx->header + 22, ctx->settings.channels); /* channels */
add_le32(ctx->header + 24, ctx->settings.sample_rate); /* samplerate */
add_le32(ctx->header + 28, ctx->settings.sample_rate * ctx->settings.channels * bps); /* byte rate */
add_le16(ctx->header + 32, ctx->settings.channels * bps); /* block align */
add_le16(ctx->header + 34, 8 * bps); /* bits per sample */
memcpy(ctx->header + 36, "data", 4);
add_le32(ctx->header + 40, wav_len);
memcpy(header, "RIFF", 4);
add_le32(header + 4, 36 + wav_len);
memcpy(header + 8, "WAVEfmt ", 8);
add_le32(header + 16, 16);
add_le16(header + 20, 1);
add_le16(header + 22, channels); /* channels */
add_le32(header + 24, sample_rate); /* samplerate */
add_le32(header + 28, sample_rate * channels * bps); /* byte rate */
add_le16(header + 32, channels * bps); /* block align */
add_le16(header + 34, 8 * bps); /* bits per sample */
memcpy(header + 36, "data", 4);
add_le32(header + 40, wav_len);
}
/*
@ -464,7 +471,7 @@ static int
stream_add(struct encode_ctx *ctx, struct stream_ctx *s, enum AVCodecID codec_id)
{
const AVCodecDescriptor *codec_desc;
#if (LIBAVFORMAT_VERSION_MAJOR > 59) || ((LIBAVFORMAT_VERSION_MAJOR == 59) && (LIBAVFORMAT_VERSION_MINOR > 15))
#if USE_CONST_AVCODEC
const AVCodec *encoder;
#else
// Not const before ffmpeg 5.0
@ -933,15 +940,11 @@ avio_evbuffer_close(AVIOContext *s)
static int
open_decoder(AVCodecContext **dec_ctx, unsigned int *stream_index, struct decode_ctx *ctx, enum AVMediaType type)
{
#if (LIBAVFORMAT_VERSION_MAJOR > 59) || ((LIBAVFORMAT_VERSION_MAJOR == 59) && (LIBAVFORMAT_VERSION_MINOR > 15))
#if USE_CONST_AVCODEC
const AVCodec *decoder;
#else
// Not const before ffmpeg 5.0
AVCodec *decoder;
#endif
AVDictionary *options = NULL;
#ifdef HAVE_FFMPEG_CH_LAYOUT
char downmix_layout[64];
#endif
int ret;
@ -968,19 +971,7 @@ open_decoder(AVCodecContext **dec_ctx, unsigned int *stream_index, struct decode
return ret;
}
if (type == AVMEDIA_TYPE_AUDIO)
{
(*dec_ctx)->request_sample_fmt = ctx->settings.sample_format;
#ifdef HAVE_FFMPEG_CH_LAYOUT
// option types is a string - see AV_OPT_TYPE_CHLAYOUT handling in ffmpeg:libavutil/opt.c
av_channel_layout_describe(&ctx->settings.channel_layout, downmix_layout, sizeof(downmix_layout));
av_dict_set(&options, "downmix", downmix_layout, 0);
#else
(*dec_ctx)->request_channel_layout = ctx->settings.channel_layout;
#endif
}
ret = avcodec_open2(*dec_ctx, NULL, options == NULL ? NULL : &options);
ret = avcodec_open2(*dec_ctx, NULL, NULL);
if (ret < 0)
{
DPRINTF(E_LOG, L_XCODE, "Failed to open decoder for stream #%d: %s\n", *stream_index, err2str(ret));
@ -996,7 +987,7 @@ open_input(struct decode_ctx *ctx, const char *path, struct transcode_evbuf_io *
{
AVDictionary *options = NULL;
AVCodecContext *dec_ctx;
#if (LIBAVFORMAT_VERSION_MAJOR > 59) || ((LIBAVFORMAT_VERSION_MAJOR == 59) && (LIBAVFORMAT_VERSION_MINOR > 15))
#if USE_CONST_AVFORMAT
const AVInputFormat *ifmt;
#else
// Not const before ffmpeg 5.0
@ -1121,7 +1112,7 @@ close_input(struct decode_ctx *ctx)
static int
open_output(struct encode_ctx *ctx, struct decode_ctx *src_ctx)
{
#if (LIBAVFORMAT_VERSION_MAJOR > 59) || ((LIBAVFORMAT_VERSION_MAJOR == 59) && (LIBAVFORMAT_VERSION_MINOR > 15))
#if USE_CONST_AVFORMAT
const AVOutputFormat *oformat;
#else
// Not const before ffmpeg 5.0
@ -1136,7 +1127,7 @@ open_output(struct encode_ctx *ctx, struct decode_ctx *src_ctx)
return -1;
}
#if (LIBAVFORMAT_VERSION_MAJOR > 59) || ((LIBAVFORMAT_VERSION_MAJOR == 59) && (LIBAVFORMAT_VERSION_MINOR > 15))
#if USE_NO_CLEAR_AVFMT_NOFILE
CHECK_ERRNO(L_XCODE, avformat_alloc_output_context2(&ctx->ofmt_ctx, oformat, NULL, NULL));
#else
// Clear AVFMT_NOFILE bit, it is not allowed as we will set our own AVIOContext.
@ -1184,9 +1175,9 @@ open_output(struct encode_ctx *ctx, struct decode_ctx *src_ctx)
goto out_free_streams;
}
if (ctx->settings.wavheader)
if (ctx->settings.wav_header)
{
evbuffer_add(ctx->obuf, ctx->header, sizeof(ctx->header));
evbuffer_add(ctx->obuf, ctx->wav_header, sizeof(ctx->wav_header));
}
return 0;
@ -1229,7 +1220,7 @@ open_filter(struct stream_ctx *out_stream, struct stream_ctx *in_stream)
AVFilterContext *buffersink_ctx;
AVFilterGraph *filter_graph;
char args[512];
#ifdef HAVE_FFMPEG_CH_LAYOUT
#if USE_CH_LAYOUT
char buf[64];
#endif
int ret;
@ -1247,9 +1238,11 @@ open_filter(struct stream_ctx *out_stream, struct stream_ctx *in_stream)
goto out_fail;
}
#ifdef HAVE_FFMPEG_CH_LAYOUT
if (!av_channel_layout_check(&in_stream->codec->ch_layout))
#if USE_CH_LAYOUT
// Some AIFF files only have a channel number, not a layout
if (in_stream->codec->ch_layout.order == AV_CHANNEL_ORDER_UNSPEC)
av_channel_layout_default(&in_stream->codec->ch_layout, in_stream->codec->ch_layout.nb_channels);
av_channel_layout_describe(&in_stream->codec->ch_layout, buf, sizeof(buf));
snprintf(args, sizeof(args),
@ -1277,10 +1270,10 @@ open_filter(struct stream_ctx *out_stream, struct stream_ctx *in_stream)
DPRINTF(E_DBG, L_XCODE, "Created 'in' filter: %s\n", args);
// For some AIFF files, ffmpeg (3.4.6) will not give us a channel_layout (bug in ffmpeg?)
#ifdef HAVE_FFMPEG_CH_LAYOUT
if (!av_channel_layout_check(&out_stream->codec->ch_layout))
#if USE_CH_LAYOUT
if (out_stream->codec->ch_layout.order == AV_CHANNEL_ORDER_UNSPEC)
av_channel_layout_default(&out_stream->codec->ch_layout, out_stream->codec->ch_layout.nb_channels);
av_channel_layout_describe(&out_stream->codec->ch_layout, buf, sizeof(buf));
snprintf(args, sizeof(args),
@ -1288,6 +1281,7 @@ open_filter(struct stream_ctx *out_stream, struct stream_ctx *in_stream)
av_get_sample_fmt_name(out_stream->codec->sample_fmt), out_stream->codec->sample_rate,
buf);
#else
// For some AIFF files, ffmpeg (3.4.6) will not give us a channel_layout (bug in ffmpeg?)
if (!out_stream->codec->channel_layout)
out_stream->codec->channel_layout = av_get_default_channel_layout(out_stream->codec->channels);
@ -1487,7 +1481,9 @@ struct encode_ctx *
transcode_encode_setup(enum transcode_profile profile, struct media_quality *quality, struct decode_ctx *src_ctx, off_t *est_size, int width, int height)
{
struct encode_ctx *ctx;
int bps;
int src_bps;
int dst_bps;
int channels;
CHECK_NULL(L_XCODE, ctx = calloc(1, sizeof(struct encode_ctx)));
CHECK_NULL(L_XCODE, ctx->filt_frame = av_frame_alloc());
@ -1505,11 +1501,11 @@ transcode_encode_setup(enum transcode_profile profile, struct media_quality *qua
ctx->settings.sample_rate = src_ctx->audio_stream.codec->sample_rate;
}
// Caller did not specify a sample format -> use same as source
// Caller did not specify a sample format -> determine from source
if (!ctx->settings.sample_format && ctx->settings.encode_audio)
{
bps = av_get_bytes_per_sample(src_ctx->audio_stream.codec->sample_fmt);
if (bps == 4)
src_bps = av_get_bytes_per_sample(src_ctx->audio_stream.codec->sample_fmt);
if (src_bps == 4)
{
ctx->settings.sample_format = AV_SAMPLE_FMT_S32;
ctx->settings.audio_codec = AV_CODEC_ID_PCM_S32LE;
@ -1523,20 +1519,36 @@ transcode_encode_setup(enum transcode_profile profile, struct media_quality *qua
}
}
#if USE_CH_LAYOUT
// Caller did not specify channels -> use same as source
if (!ctx->settings.channels && ctx->settings.encode_audio)
if (!av_channel_layout_check(&ctx->settings.channel_layout) && ctx->settings.encode_audio)
{
#ifdef HAVE_FFMPEG_CH_LAYOUT
ctx->settings.channels = src_ctx->audio_stream.codec->ch_layout.nb_channels;
av_channel_layout_copy(&ctx->settings.channel_layout, &src_ctx->audio_stream.codec->ch_layout);
#else
ctx->settings.channels = src_ctx->audio_stream.codec->channels;
ctx->settings.channel_layout = src_ctx->audio_stream.codec->channel_layout;
#endif
}
if (ctx->settings.wavheader)
make_wav_header(ctx, src_ctx, est_size);
channels = ctx->settings.channel_layout.nb_channels;
#else
// Caller did not specify channels -> use same as source
if (ctx->settings.channels == 0 && ctx->settings.encode_audio)
{
ctx->settings.channels = src_ctx->audio_stream.codec->channels;
ctx->settings.channel_layout = src_ctx->audio_stream.codec->channel_layout;
}
channels = ctx->settings.channels;
#endif
if (ctx->settings.wav_header)
{
dst_bps = av_get_bytes_per_sample(ctx->settings.sample_format);
make_wav_header(ctx->wav_header, est_size, ctx->settings.sample_rate, dst_bps, channels, src_ctx->duration);
}
if (ctx->settings.icy && src_ctx->data_kind == DATA_KIND_HTTP)
{
dst_bps = av_get_bytes_per_sample(ctx->settings.sample_format);
ctx->icy_interval = METADATA_ICY_INTERVAL * channels * dst_bps * ctx->settings.sample_rate;
}
if (open_output(ctx, src_ctx) < 0)
goto fail_free;
@ -1544,12 +1556,6 @@ transcode_encode_setup(enum transcode_profile profile, struct media_quality *qua
if (open_filters(ctx, src_ctx) < 0)
goto fail_close;
if (ctx->settings.icy && src_ctx->data_kind == DATA_KIND_HTTP)
{
bps = av_get_bytes_per_sample(ctx->settings.sample_format);
ctx->icy_interval = METADATA_ICY_INTERVAL * ctx->settings.channels * bps * ctx->settings.sample_rate;
}
return ctx;
fail_close:
@ -1591,7 +1597,7 @@ transcode_decode_setup_raw(enum transcode_profile profile, struct media_quality
{
const AVCodecDescriptor *codec_desc;
struct decode_ctx *ctx;
#if (LIBAVFORMAT_VERSION_MAJOR > 59) || ((LIBAVFORMAT_VERSION_MAJOR == 59) && (LIBAVFORMAT_VERSION_MINOR > 15))
#if USE_CONST_AVCODEC
const AVCodec *decoder;
#else
// Not const before ffmpeg 5.0
@ -1820,7 +1826,7 @@ transcode_encode(struct evbuffer *evbuf, struct encode_ctx *ctx, transcode_frame
start_length = evbuffer_get_length(ctx->obuf);
// Really crappy way of detecting if frame is audio, video or something else
#ifdef HAVE_FFMPEG_CH_LAYOUT
#if USE_CH_LAYOUT
if (f->ch_layout.nb_channels && f->sample_rate)
#else
if (f->channel_layout && f->sample_rate)
@ -1912,13 +1918,13 @@ transcode_frame_new(void *data, size_t size, int nsamples, struct media_quality
f->sample_rate = quality->sample_rate;
f->nb_samples = nsamples;
#ifdef HAVE_FFMPEG_CH_LAYOUT
#if USE_CH_LAYOUT
av_channel_layout_default(&f->ch_layout, quality->channels);
#else
f->channel_layout = av_get_default_channel_layout(quality->channels);
#ifdef HAVE_FFMPEG
# ifdef HAVE_FFMPEG
f->channels = quality->channels;
#endif
# endif
#endif
f->pts = AV_NOPTS_VALUE;
@ -2076,7 +2082,7 @@ transcode_encode_query(struct encode_ctx *ctx, const char *query)
else if (strcmp(query, "channels") == 0)
{
if (ctx->audio_stream.stream)
#ifdef HAVE_FFMPEG_CH_LAYOUT
#if USE_CH_LAYOUT
return ctx->audio_stream.stream->codecpar->ch_layout.nb_channels;
#else
return ctx->audio_stream.stream->codecpar->channels;