diff --git a/configure.ac b/configure.ac index deffd218..2527bd95 100644 --- a/configure.ac +++ b/configure.ac @@ -240,12 +240,6 @@ OWNTONE_MODULES_CHECK([OWNTONE], [LIBAV], [libavformat/avformat.h]) ]) -AS_IF([test "$is_ffmpeg" = "yes"], - [PKG_CHECK_EXISTS([libavcodec >= 59.24.100], - [AC_DEFINE([HAVE_FFMPEG_CH_LAYOUT], 1, - [Define to 1 if ffmpeg-libavcodec defines new 5.1.x ch_layout])])] - ) - AC_CHECK_SIZEOF([void *]) dnl --- Begin configuring the options --- diff --git a/src/library/filescanner_ffmpeg.c b/src/library/filescanner_ffmpeg.c index 79de4f56..e489885e 100644 --- a/src/library/filescanner_ffmpeg.c +++ b/src/library/filescanner_ffmpeg.c @@ -452,7 +452,8 @@ scan_metadata_ffmpeg(struct media_file_info *mfi, const char *file) codec_id = ctx->streams[i]->codecpar->codec_id; sample_rate = ctx->streams[i]->codecpar->sample_rate; sample_fmt = ctx->streams[i]->codecpar->format; -#ifdef HAVE_FFMPEG_CH_LAYOUT +// Matches USE_CH_LAYOUT in transcode.c +#if (LIBAVCODEC_VERSION_MAJOR > 59) || ((LIBAVCODEC_VERSION_MAJOR == 59) && (LIBAVCODEC_VERSION_MINOR > 24)) channels = ctx->streams[i]->codecpar->ch_layout.nb_channels; #else channels = ctx->streams[i]->codecpar->channels; diff --git a/src/transcode.c b/src/transcode.c index 79ca828e..dc7d2d0b 100644 --- a/src/transcode.c +++ b/src/transcode.c @@ -42,6 +42,13 @@ #include "misc.h" #include "transcode.h" +// Switches for compability with ffmpeg's ever changing API +#define USE_IMAGE2PIPE (LIBAVFORMAT_VERSION_MAJOR > 58) || ((LIBAVFORMAT_VERSION_MAJOR == 58) && (LIBAVFORMAT_VERSION_MINOR > 29)) +#define USE_CONST_AVFORMAT (LIBAVFORMAT_VERSION_MAJOR > 59) || ((LIBAVFORMAT_VERSION_MAJOR == 59) && (LIBAVFORMAT_VERSION_MINOR > 15)) +#define USE_CONST_AVCODEC (LIBAVFORMAT_VERSION_MAJOR > 59) || ((LIBAVFORMAT_VERSION_MAJOR == 59) && (LIBAVFORMAT_VERSION_MINOR > 15)) +#define USE_NO_CLEAR_AVFMT_NOFILE (LIBAVFORMAT_VERSION_MAJOR > 59) || ((LIBAVFORMAT_VERSION_MAJOR == 59) && (LIBAVFORMAT_VERSION_MINOR > 15)) +#define USE_CH_LAYOUT (LIBAVCODEC_VERSION_MAJOR > 59) || ((LIBAVCODEC_VERSION_MAJOR == 59) && (LIBAVCODEC_VERSION_MINOR > 24)) + // Interval between ICY metadata checks for streams, in seconds #define METADATA_ICY_INTERVAL 5 // Maximum number of streams in a file that we will accept @@ -52,6 +59,8 @@ #define READ_TIMEOUT 30000000 // Buffer size for reading/writing input and output evbuffers #define AVIO_BUFFER_SIZE 4096 +// Size of the wav header that iTunes needs +#define WAV_HEADER_LEN 44 static const char *default_codecs = "mpeg,wav"; static const char *roku_codecs = "mpeg,mp4a,wma,alac,wav"; @@ -78,15 +87,15 @@ struct settings_ctx // Audio settings enum AVCodecID audio_codec; int sample_rate; -#ifdef HAVE_FFMPEG_CH_LAYOUT +#if USE_CH_LAYOUT AVChannelLayout channel_layout; #else uint64_t channel_layout; -#endif int channels; +#endif int bit_rate; enum AVSampleFormat sample_format; - bool wavheader; + bool wav_header; bool icy; // Video settings @@ -180,7 +189,7 @@ struct encode_ctx uint32_t icy_hash; // WAV header - uint8_t header[44]; + uint8_t wav_header[WAV_HEADER_LEN]; }; enum probe_type @@ -212,7 +221,7 @@ init_settings(struct settings_ctx *settings, enum transcode_profile profile, str break; case XCODE_PCM16_HEADER: - settings->wavheader = 1; + settings->wav_header = 1; case XCODE_PCM16: settings->encode_audio = 1; settings->format = "s16le"; @@ -266,7 +275,7 @@ init_settings(struct settings_ctx *settings, enum transcode_profile profile, str // With ffmpeg 4.3 (> libavformet 58.29) "image2" only works for actual file // output. It's possible we should have used "image2pipe" all along, but since // "image2" has been working we only replace it going forward. -#if (LIBAVFORMAT_VERSION_MAJOR > 58) || ((LIBAVFORMAT_VERSION_MAJOR == 58) && (LIBAVFORMAT_VERSION_MINOR > 29)) +#if USE_IMAGE2PIPE settings->format = "image2pipe"; #else settings->format = "image2"; @@ -281,7 +290,7 @@ init_settings(struct settings_ctx *settings, enum transcode_profile profile, str settings->encode_video = 1; settings->silent = 1; // See explanation above -#if (LIBAVFORMAT_VERSION_MAJOR > 58) || ((LIBAVFORMAT_VERSION_MAJOR == 58) && (LIBAVFORMAT_VERSION_MINOR > 29)) +#if USE_IMAGE2PIPE settings->format = "image2pipe"; #else settings->format = "image2"; @@ -294,7 +303,7 @@ init_settings(struct settings_ctx *settings, enum transcode_profile profile, str settings->encode_video = 1; settings->silent = 1; // See explanation above -#if (LIBAVFORMAT_VERSION_MAJOR > 58) || ((LIBAVFORMAT_VERSION_MAJOR == 58) && (LIBAVFORMAT_VERSION_MINOR > 29)) +#if USE_IMAGE2PIPE settings->format = "image2pipe"; #else settings->format = "image2"; @@ -315,11 +324,11 @@ init_settings(struct settings_ctx *settings, enum transcode_profile profile, str if (quality && quality->channels) { - settings->channels = quality->channels; -#ifdef HAVE_FFMPEG_CH_LAYOUT +#if USE_CH_LAYOUT av_channel_layout_default(&settings->channel_layout, quality->channels); #else settings->channel_layout = av_get_default_channel_layout(quality->channels); + settings->channels = quality->channels; #endif } @@ -343,7 +352,7 @@ stream_settings_set(struct stream_ctx *s, struct settings_ctx *settings, enum AV if (type == AVMEDIA_TYPE_AUDIO) { s->codec->sample_rate = settings->sample_rate; -#ifdef HAVE_FFMPEG_CH_LAYOUT +#if USE_CH_LAYOUT av_channel_layout_copy(&s->codec->ch_layout, &(settings->channel_layout)); #else s->codec->channel_layout = settings->channel_layout; @@ -401,36 +410,34 @@ add_le32(uint8_t *dst, uint32_t val) dst[3] = (val >> 24) & 0xff; } +/* + * header must have size WAV_HEADER_LEN (44 bytes) + */ static void -make_wav_header(struct encode_ctx *ctx, struct decode_ctx *src_ctx, off_t *est_size) +make_wav_header(uint8_t *header, off_t *est_size, int sample_rate, int bps, int channels, int duration) { uint32_t wav_len; - int duration; - int bps; - if (src_ctx->duration) - duration = src_ctx->duration; - else + if (duration == 0) duration = 3 * 60 * 1000; /* 3 minutes, in ms */ - bps = av_get_bytes_per_sample(ctx->settings.sample_format); - wav_len = ctx->settings.channels * bps * ctx->settings.sample_rate * (duration / 1000); + wav_len = channels * bps * sample_rate * (duration / 1000); if (est_size) - *est_size = wav_len + sizeof(ctx->header); + *est_size = wav_len + WAV_HEADER_LEN; - memcpy(ctx->header, "RIFF", 4); - add_le32(ctx->header + 4, 36 + wav_len); - memcpy(ctx->header + 8, "WAVEfmt ", 8); - add_le32(ctx->header + 16, 16); - add_le16(ctx->header + 20, 1); - add_le16(ctx->header + 22, ctx->settings.channels); /* channels */ - add_le32(ctx->header + 24, ctx->settings.sample_rate); /* samplerate */ - add_le32(ctx->header + 28, ctx->settings.sample_rate * ctx->settings.channels * bps); /* byte rate */ - add_le16(ctx->header + 32, ctx->settings.channels * bps); /* block align */ - add_le16(ctx->header + 34, 8 * bps); /* bits per sample */ - memcpy(ctx->header + 36, "data", 4); - add_le32(ctx->header + 40, wav_len); + memcpy(header, "RIFF", 4); + add_le32(header + 4, 36 + wav_len); + memcpy(header + 8, "WAVEfmt ", 8); + add_le32(header + 16, 16); + add_le16(header + 20, 1); + add_le16(header + 22, channels); /* channels */ + add_le32(header + 24, sample_rate); /* samplerate */ + add_le32(header + 28, sample_rate * channels * bps); /* byte rate */ + add_le16(header + 32, channels * bps); /* block align */ + add_le16(header + 34, 8 * bps); /* bits per sample */ + memcpy(header + 36, "data", 4); + add_le32(header + 40, wav_len); } /* @@ -464,7 +471,7 @@ static int stream_add(struct encode_ctx *ctx, struct stream_ctx *s, enum AVCodecID codec_id) { const AVCodecDescriptor *codec_desc; -#if (LIBAVFORMAT_VERSION_MAJOR > 59) || ((LIBAVFORMAT_VERSION_MAJOR == 59) && (LIBAVFORMAT_VERSION_MINOR > 15)) +#if USE_CONST_AVCODEC const AVCodec *encoder; #else // Not const before ffmpeg 5.0 @@ -933,15 +940,11 @@ avio_evbuffer_close(AVIOContext *s) static int open_decoder(AVCodecContext **dec_ctx, unsigned int *stream_index, struct decode_ctx *ctx, enum AVMediaType type) { -#if (LIBAVFORMAT_VERSION_MAJOR > 59) || ((LIBAVFORMAT_VERSION_MAJOR == 59) && (LIBAVFORMAT_VERSION_MINOR > 15)) +#if USE_CONST_AVCODEC const AVCodec *decoder; #else // Not const before ffmpeg 5.0 AVCodec *decoder; -#endif - AVDictionary *options = NULL; -#ifdef HAVE_FFMPEG_CH_LAYOUT - char downmix_layout[64]; #endif int ret; @@ -968,19 +971,7 @@ open_decoder(AVCodecContext **dec_ctx, unsigned int *stream_index, struct decode return ret; } - if (type == AVMEDIA_TYPE_AUDIO) - { - (*dec_ctx)->request_sample_fmt = ctx->settings.sample_format; -#ifdef HAVE_FFMPEG_CH_LAYOUT - // option types is a string - see AV_OPT_TYPE_CHLAYOUT handling in ffmpeg:libavutil/opt.c - av_channel_layout_describe(&ctx->settings.channel_layout, downmix_layout, sizeof(downmix_layout)); - av_dict_set(&options, "downmix", downmix_layout, 0); -#else - (*dec_ctx)->request_channel_layout = ctx->settings.channel_layout; -#endif - } - - ret = avcodec_open2(*dec_ctx, NULL, options == NULL ? NULL : &options); + ret = avcodec_open2(*dec_ctx, NULL, NULL); if (ret < 0) { DPRINTF(E_LOG, L_XCODE, "Failed to open decoder for stream #%d: %s\n", *stream_index, err2str(ret)); @@ -996,7 +987,7 @@ open_input(struct decode_ctx *ctx, const char *path, struct transcode_evbuf_io * { AVDictionary *options = NULL; AVCodecContext *dec_ctx; -#if (LIBAVFORMAT_VERSION_MAJOR > 59) || ((LIBAVFORMAT_VERSION_MAJOR == 59) && (LIBAVFORMAT_VERSION_MINOR > 15)) +#if USE_CONST_AVFORMAT const AVInputFormat *ifmt; #else // Not const before ffmpeg 5.0 @@ -1121,7 +1112,7 @@ close_input(struct decode_ctx *ctx) static int open_output(struct encode_ctx *ctx, struct decode_ctx *src_ctx) { -#if (LIBAVFORMAT_VERSION_MAJOR > 59) || ((LIBAVFORMAT_VERSION_MAJOR == 59) && (LIBAVFORMAT_VERSION_MINOR > 15)) +#if USE_CONST_AVFORMAT const AVOutputFormat *oformat; #else // Not const before ffmpeg 5.0 @@ -1136,7 +1127,7 @@ open_output(struct encode_ctx *ctx, struct decode_ctx *src_ctx) return -1; } -#if (LIBAVFORMAT_VERSION_MAJOR > 59) || ((LIBAVFORMAT_VERSION_MAJOR == 59) && (LIBAVFORMAT_VERSION_MINOR > 15)) +#if USE_NO_CLEAR_AVFMT_NOFILE CHECK_ERRNO(L_XCODE, avformat_alloc_output_context2(&ctx->ofmt_ctx, oformat, NULL, NULL)); #else // Clear AVFMT_NOFILE bit, it is not allowed as we will set our own AVIOContext. @@ -1184,9 +1175,9 @@ open_output(struct encode_ctx *ctx, struct decode_ctx *src_ctx) goto out_free_streams; } - if (ctx->settings.wavheader) + if (ctx->settings.wav_header) { - evbuffer_add(ctx->obuf, ctx->header, sizeof(ctx->header)); + evbuffer_add(ctx->obuf, ctx->wav_header, sizeof(ctx->wav_header)); } return 0; @@ -1229,7 +1220,7 @@ open_filter(struct stream_ctx *out_stream, struct stream_ctx *in_stream) AVFilterContext *buffersink_ctx; AVFilterGraph *filter_graph; char args[512]; -#ifdef HAVE_FFMPEG_CH_LAYOUT +#if USE_CH_LAYOUT char buf[64]; #endif int ret; @@ -1247,9 +1238,11 @@ open_filter(struct stream_ctx *out_stream, struct stream_ctx *in_stream) goto out_fail; } -#ifdef HAVE_FFMPEG_CH_LAYOUT - if (!av_channel_layout_check(&in_stream->codec->ch_layout)) +#if USE_CH_LAYOUT + // Some AIFF files only have a channel number, not a layout + if (in_stream->codec->ch_layout.order == AV_CHANNEL_ORDER_UNSPEC) av_channel_layout_default(&in_stream->codec->ch_layout, in_stream->codec->ch_layout.nb_channels); + av_channel_layout_describe(&in_stream->codec->ch_layout, buf, sizeof(buf)); snprintf(args, sizeof(args), @@ -1277,10 +1270,10 @@ open_filter(struct stream_ctx *out_stream, struct stream_ctx *in_stream) DPRINTF(E_DBG, L_XCODE, "Created 'in' filter: %s\n", args); - // For some AIFF files, ffmpeg (3.4.6) will not give us a channel_layout (bug in ffmpeg?) -#ifdef HAVE_FFMPEG_CH_LAYOUT - if (!av_channel_layout_check(&out_stream->codec->ch_layout)) +#if USE_CH_LAYOUT + if (out_stream->codec->ch_layout.order == AV_CHANNEL_ORDER_UNSPEC) av_channel_layout_default(&out_stream->codec->ch_layout, out_stream->codec->ch_layout.nb_channels); + av_channel_layout_describe(&out_stream->codec->ch_layout, buf, sizeof(buf)); snprintf(args, sizeof(args), @@ -1288,6 +1281,7 @@ open_filter(struct stream_ctx *out_stream, struct stream_ctx *in_stream) av_get_sample_fmt_name(out_stream->codec->sample_fmt), out_stream->codec->sample_rate, buf); #else + // For some AIFF files, ffmpeg (3.4.6) will not give us a channel_layout (bug in ffmpeg?) if (!out_stream->codec->channel_layout) out_stream->codec->channel_layout = av_get_default_channel_layout(out_stream->codec->channels); @@ -1487,7 +1481,9 @@ struct encode_ctx * transcode_encode_setup(enum transcode_profile profile, struct media_quality *quality, struct decode_ctx *src_ctx, off_t *est_size, int width, int height) { struct encode_ctx *ctx; - int bps; + int src_bps; + int dst_bps; + int channels; CHECK_NULL(L_XCODE, ctx = calloc(1, sizeof(struct encode_ctx))); CHECK_NULL(L_XCODE, ctx->filt_frame = av_frame_alloc()); @@ -1505,11 +1501,11 @@ transcode_encode_setup(enum transcode_profile profile, struct media_quality *qua ctx->settings.sample_rate = src_ctx->audio_stream.codec->sample_rate; } - // Caller did not specify a sample format -> use same as source + // Caller did not specify a sample format -> determine from source if (!ctx->settings.sample_format && ctx->settings.encode_audio) { - bps = av_get_bytes_per_sample(src_ctx->audio_stream.codec->sample_fmt); - if (bps == 4) + src_bps = av_get_bytes_per_sample(src_ctx->audio_stream.codec->sample_fmt); + if (src_bps == 4) { ctx->settings.sample_format = AV_SAMPLE_FMT_S32; ctx->settings.audio_codec = AV_CODEC_ID_PCM_S32LE; @@ -1523,20 +1519,36 @@ transcode_encode_setup(enum transcode_profile profile, struct media_quality *qua } } +#if USE_CH_LAYOUT // Caller did not specify channels -> use same as source - if (!ctx->settings.channels && ctx->settings.encode_audio) + if (!av_channel_layout_check(&ctx->settings.channel_layout) && ctx->settings.encode_audio) { -#ifdef HAVE_FFMPEG_CH_LAYOUT - ctx->settings.channels = src_ctx->audio_stream.codec->ch_layout.nb_channels; av_channel_layout_copy(&ctx->settings.channel_layout, &src_ctx->audio_stream.codec->ch_layout); -#else - ctx->settings.channels = src_ctx->audio_stream.codec->channels; - ctx->settings.channel_layout = src_ctx->audio_stream.codec->channel_layout; -#endif } - if (ctx->settings.wavheader) - make_wav_header(ctx, src_ctx, est_size); + channels = ctx->settings.channel_layout.nb_channels; +#else + // Caller did not specify channels -> use same as source + if (ctx->settings.channels == 0 && ctx->settings.encode_audio) + { + ctx->settings.channels = src_ctx->audio_stream.codec->channels; + ctx->settings.channel_layout = src_ctx->audio_stream.codec->channel_layout; + } + + channels = ctx->settings.channels; +#endif + + if (ctx->settings.wav_header) + { + dst_bps = av_get_bytes_per_sample(ctx->settings.sample_format); + make_wav_header(ctx->wav_header, est_size, ctx->settings.sample_rate, dst_bps, channels, src_ctx->duration); + } + + if (ctx->settings.icy && src_ctx->data_kind == DATA_KIND_HTTP) + { + dst_bps = av_get_bytes_per_sample(ctx->settings.sample_format); + ctx->icy_interval = METADATA_ICY_INTERVAL * channels * dst_bps * ctx->settings.sample_rate; + } if (open_output(ctx, src_ctx) < 0) goto fail_free; @@ -1544,12 +1556,6 @@ transcode_encode_setup(enum transcode_profile profile, struct media_quality *qua if (open_filters(ctx, src_ctx) < 0) goto fail_close; - if (ctx->settings.icy && src_ctx->data_kind == DATA_KIND_HTTP) - { - bps = av_get_bytes_per_sample(ctx->settings.sample_format); - ctx->icy_interval = METADATA_ICY_INTERVAL * ctx->settings.channels * bps * ctx->settings.sample_rate; - } - return ctx; fail_close: @@ -1591,7 +1597,7 @@ transcode_decode_setup_raw(enum transcode_profile profile, struct media_quality { const AVCodecDescriptor *codec_desc; struct decode_ctx *ctx; -#if (LIBAVFORMAT_VERSION_MAJOR > 59) || ((LIBAVFORMAT_VERSION_MAJOR == 59) && (LIBAVFORMAT_VERSION_MINOR > 15)) +#if USE_CONST_AVCODEC const AVCodec *decoder; #else // Not const before ffmpeg 5.0 @@ -1820,7 +1826,7 @@ transcode_encode(struct evbuffer *evbuf, struct encode_ctx *ctx, transcode_frame start_length = evbuffer_get_length(ctx->obuf); // Really crappy way of detecting if frame is audio, video or something else -#ifdef HAVE_FFMPEG_CH_LAYOUT +#if USE_CH_LAYOUT if (f->ch_layout.nb_channels && f->sample_rate) #else if (f->channel_layout && f->sample_rate) @@ -1912,13 +1918,13 @@ transcode_frame_new(void *data, size_t size, int nsamples, struct media_quality f->sample_rate = quality->sample_rate; f->nb_samples = nsamples; -#ifdef HAVE_FFMPEG_CH_LAYOUT +#if USE_CH_LAYOUT av_channel_layout_default(&f->ch_layout, quality->channels); #else f->channel_layout = av_get_default_channel_layout(quality->channels); -#ifdef HAVE_FFMPEG +# ifdef HAVE_FFMPEG f->channels = quality->channels; -#endif +# endif #endif f->pts = AV_NOPTS_VALUE; @@ -2076,7 +2082,7 @@ transcode_encode_query(struct encode_ctx *ctx, const char *query) else if (strcmp(query, "channels") == 0) { if (ctx->audio_stream.stream) -#ifdef HAVE_FFMPEG_CH_LAYOUT +#if USE_CH_LAYOUT return ctx->audio_stream.stream->codecpar->ch_layout.nb_channels; #else return ctx->audio_stream.stream->codecpar->channels;