/* * Copyright (C) 2015-17 Espen Jurgensen * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifdef HAVE_CONFIG_H # include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "logger.h" #include "conffile.h" #include "misc.h" #include "transcode.h" // Switches for compability with ffmpeg's ever changing API #define USE_IMAGE2PIPE (LIBAVFORMAT_VERSION_MAJOR > 58) || ((LIBAVFORMAT_VERSION_MAJOR == 58) && (LIBAVFORMAT_VERSION_MINOR > 29)) #define USE_CONST_AVFORMAT (LIBAVFORMAT_VERSION_MAJOR > 59) || ((LIBAVFORMAT_VERSION_MAJOR == 59) && (LIBAVFORMAT_VERSION_MINOR > 15)) #define USE_CONST_AVCODEC (LIBAVFORMAT_VERSION_MAJOR > 59) || ((LIBAVFORMAT_VERSION_MAJOR == 59) && (LIBAVFORMAT_VERSION_MINOR > 15)) #define USE_NO_CLEAR_AVFMT_NOFILE (LIBAVFORMAT_VERSION_MAJOR > 59) || ((LIBAVFORMAT_VERSION_MAJOR == 59) && (LIBAVFORMAT_VERSION_MINOR > 15)) #define USE_CH_LAYOUT (LIBAVCODEC_VERSION_MAJOR > 59) || ((LIBAVCODEC_VERSION_MAJOR == 59) && (LIBAVCODEC_VERSION_MINOR > 24)) #define USE_CONST_AVIO_WRITE_PACKET (LIBAVFORMAT_VERSION_MAJOR > 61) || ((LIBAVFORMAT_VERSION_MAJOR == 61) && (LIBAVFORMAT_VERSION_MINOR > 0)) // Interval between ICY metadata checks for streams, in seconds #define METADATA_ICY_INTERVAL 5 // Maximum number of streams in a file that we will accept #define MAX_STREAMS 64 // Maximum number of times we retry when we encounter bad packets #define MAX_BAD_PACKETS 5 // How long to wait (in microsec) before interrupting av_read_frame #define READ_TIMEOUT 30000000 // Buffer size for reading/writing input and output evbuffers #define AVIO_BUFFER_SIZE 4096 // Size of the wav header that iTunes needs #define WAV_HEADER_LEN 44 // Max filters in a filtergraph #define MAX_FILTERS 9 // Set to same size as in httpd.c (but can be set to something else) #define STREAM_CHUNK_SIZE (64 * 1024) static const char *default_codecs = "mpeg,alac,wav"; static const char *roku_codecs = "mpeg,mp4a,wma,alac,wav"; static const char *itunes_codecs = "mpeg,mp4a,mp4v,alac,wav"; // Used for passing errors to DPRINTF (can't count on av_err2str being present) static char errbuf[64]; // Used by dummy_seek to mark a seek requested by ffmpeg static const uint8_t xcode_seek_marker[8] = { 0x0D, 0x0E, 0x0A, 0x0D, 0x0B, 0x0E, 0x0E, 0x0F }; // The settings struct will be filled out based on the profile enum struct settings_ctx { bool encode_video; bool encode_audio; // Silence some log messages bool silent; // Output format (for the muxer) const char *format; // Input format (for the demuxer) const char *in_format; // Audio settings enum AVCodecID audio_codec; int sample_rate; #if USE_CH_LAYOUT AVChannelLayout channel_layout; #else uint64_t channel_layout; #endif int nb_channels; int bit_rate; int frame_size; enum AVSampleFormat sample_format; bool with_mp4_header; bool with_wav_header; bool without_libav_header; bool without_libav_trailer; bool with_icy; bool with_user_filters; // Video settings enum AVCodecID video_codec; const char *video_codec_name; enum AVPixelFormat pix_fmt; int height; int width; }; struct stream_ctx { AVStream *stream; AVCodecContext *codec; AVFilterContext *buffersink_ctx; AVFilterContext *buffersrc_ctx; AVFilterGraph *filter_graph; // Used for seeking int64_t prev_pts; int64_t offset_pts; }; struct decode_ctx { // Settings derived from the profile struct settings_ctx settings; // Input format context AVFormatContext *ifmt_ctx; // IO Context for non-file input AVIOContext *avio; // Stream and decoder data struct stream_ctx audio_stream; struct stream_ctx video_stream; // Source duration in ms as provided by caller uint32_t len_ms; // Used to determine if ICY metadata is relevant to look for bool is_http; // Set to true if we just seeked bool resume; // Set to true if we have reached eof bool eof; // Set to true if avcodec_receive_frame() gave us a frame bool got_frame; // Contains the most recent packet from av_read_frame() AVPacket *packet; // Contains the most recent frame from avcodec_receive_frame() AVFrame *decoded_frame; // Used to measure if av_read_frame is taking too long int64_t timestamp; }; struct encode_ctx { // Settings derived from the profile struct settings_ctx settings; // Output format context AVFormatContext *ofmt_ctx; // Stream, filter and decoder data struct stream_ctx audio_stream; struct stream_ctx video_stream; // The ffmpeg muxer writes to this buffer using the avio_evbuffer interface struct evbuffer *obuf; // IO Context for non-file output struct transcode_evbuf_io evbuf_io; // Contains the most recent packet from av_buffersink_get_frame() AVFrame *filt_frame; // Contains the most recent packet from avcodec_receive_packet() AVPacket *encoded_pkt; // How many output bytes we have processed in total off_t bytes_processed; // Estimated total size of output off_t bytes_total; // Used to check for ICY metadata changes at certain intervals uint32_t icy_interval; uint32_t icy_hash; }; enum probe_type { PROBE_TYPE_DEFAULT, PROBE_TYPE_QUICK, }; struct avio_evbuffer { struct evbuffer *evbuf; uint8_t *buffer; transcode_seekfn seekfn; void *seekfn_arg; }; struct filter_def { char name[64]; char args[512]; }; struct filters { AVFilterContext *av_ctx; // Function that will create the filter arguments for ffmpeg int (*deffn)(struct filter_def *, struct stream_ctx *, struct stream_ctx *, const char *); const char *deffn_arg; }; /* -------------------------- PROFILE CONFIGURATION ------------------------ */ static int init_settings(struct settings_ctx *settings, enum transcode_profile profile, struct media_quality *quality) { memset(settings, 0, sizeof(struct settings_ctx)); switch (profile) { case XCODE_PCM_NATIVE: // Sample rate and bit depth determined by source settings->encode_audio = true; settings->with_icy = true; settings->with_user_filters = true; break; case XCODE_WAV: settings->with_wav_header = true; settings->with_user_filters = true; case XCODE_PCM16: settings->encode_audio = true; settings->format = "s16le"; settings->audio_codec = AV_CODEC_ID_PCM_S16LE; settings->sample_format = AV_SAMPLE_FMT_S16; break; case XCODE_PCM24: settings->encode_audio = true; settings->format = "s24le"; settings->audio_codec = AV_CODEC_ID_PCM_S24LE; settings->sample_format = AV_SAMPLE_FMT_S32; break; case XCODE_PCM32: settings->encode_audio = true; settings->format = "s32le"; settings->audio_codec = AV_CODEC_ID_PCM_S32LE; settings->sample_format = AV_SAMPLE_FMT_S32; break; case XCODE_MP3: settings->encode_audio = true; settings->format = "mp3"; settings->audio_codec = AV_CODEC_ID_MP3; settings->sample_format = AV_SAMPLE_FMT_S16P; break; case XCODE_OPUS: settings->encode_audio = true; settings->format = "data"; // Means we get the raw packet from the encoder, no muxing settings->audio_codec = AV_CODEC_ID_OPUS; settings->sample_format = AV_SAMPLE_FMT_S16; // Only libopus support break; case XCODE_ALAC: settings->encode_audio = true; settings->format = "data"; // Means we get the raw packet from the encoder, no muxing settings->audio_codec = AV_CODEC_ID_ALAC; settings->sample_format = AV_SAMPLE_FMT_S16P; settings->frame_size = 352; break; case XCODE_MP4_ALAC: settings->with_mp4_header = true; settings->encode_audio = true; settings->format = "data"; settings->audio_codec = AV_CODEC_ID_ALAC; break; case XCODE_MP4_ALAC_HEADER: settings->without_libav_header = true; settings->without_libav_trailer = true; settings->encode_audio = true; settings->format = "ipod"; // ffmpeg default mp4 variant ("mp4" doesn't work with SoundBridge because of the btrt atom in the header) settings->audio_codec = AV_CODEC_ID_ALAC; break; case XCODE_OGG: settings->encode_audio = true; settings->in_format = "ogg"; break; case XCODE_JPEG: settings->encode_video = true; settings->silent = 1; // With ffmpeg 4.3 (> libavformet 58.29) "image2" only works for actual file // output. It's possible we should have used "image2pipe" all along, but since // "image2" has been working we only replace it going forward. #if USE_IMAGE2PIPE settings->format = "image2pipe"; #else settings->format = "image2"; #endif settings->in_format = "mjpeg"; settings->pix_fmt = AV_PIX_FMT_YUVJ420P; settings->video_codec = AV_CODEC_ID_MJPEG; break; case XCODE_PNG: settings->encode_video = true; settings->silent = true; // See explanation above #if USE_IMAGE2PIPE settings->format = "image2pipe"; #else settings->format = "image2"; #endif settings->pix_fmt = AV_PIX_FMT_RGB24; settings->video_codec = AV_CODEC_ID_PNG; break; case XCODE_VP8: settings->encode_video = true; settings->silent = true; // See explanation above #if USE_IMAGE2PIPE settings->format = "image2pipe"; #else settings->format = "image2"; #endif settings->pix_fmt = AV_PIX_FMT_YUVJ420P; settings->video_codec = AV_CODEC_ID_VP8; break; default: DPRINTF(E_LOG, L_XCODE, "Bug! Unknown transcoding profile\n"); return -1; } if (quality && quality->sample_rate) { settings->sample_rate = quality->sample_rate; } if (quality && quality->channels) { #if USE_CH_LAYOUT av_channel_layout_default(&settings->channel_layout, quality->channels); #else settings->channel_layout = av_get_default_channel_layout(quality->channels); settings->nb_channels = quality->channels; #endif } if (quality && quality->bit_rate) { settings->bit_rate = quality->bit_rate; } if (quality && quality->bits_per_sample && (quality->bits_per_sample != 8 * av_get_bytes_per_sample(settings->sample_format))) { DPRINTF(E_LOG, L_XCODE, "Bug! Mismatch between profile (%d bps) and media quality (%d bps)\n", 8 * av_get_bytes_per_sample(settings->sample_format), quality->bits_per_sample); return -1; } return 0; } static int init_settings_from_video(struct settings_ctx *settings, enum transcode_profile profile, struct decode_ctx *src_ctx, int width, int height) { settings->width = width; settings->height = height; return 0; } static int init_settings_from_audio(struct settings_ctx *settings, enum transcode_profile profile, struct decode_ctx *src_ctx, struct media_quality *quality) { int src_bytes_per_sample = av_get_bytes_per_sample(src_ctx->audio_stream.codec->sample_fmt); // Initialize unset settings that are source-dependent, not profile-dependent if (!settings->sample_rate) settings->sample_rate = src_ctx->audio_stream.codec->sample_rate; #if USE_CH_LAYOUT if (!av_channel_layout_check(&settings->channel_layout)) av_channel_layout_copy(&settings->channel_layout, &src_ctx->audio_stream.codec->ch_layout); settings->nb_channels = settings->channel_layout.nb_channels; #else if (settings->nb_channels == 0) { settings->nb_channels = src_ctx->audio_stream.codec->channels; settings->channel_layout = src_ctx->audio_stream.codec->channel_layout; } #endif // Initialize settings that are both source-dependent and profile-dependent switch (profile) { case XCODE_MP4_ALAC: case XCODE_MP4_ALAC_HEADER: if (!settings->sample_format) settings->sample_format = (src_bytes_per_sample == 4) ? AV_SAMPLE_FMT_S32P : AV_SAMPLE_FMT_S16P; break; case XCODE_PCM_NATIVE: if (!settings->sample_format) settings->sample_format = (src_bytes_per_sample == 4) ? AV_SAMPLE_FMT_S32 : AV_SAMPLE_FMT_S16; if (!settings->audio_codec) settings->audio_codec = (src_bytes_per_sample == 4) ? AV_CODEC_ID_PCM_S32LE : AV_CODEC_ID_PCM_S16LE; if (!settings->format) settings->format = (src_bytes_per_sample == 4) ? "s32le" : "s16le"; break; default: if (settings->sample_format && settings->audio_codec && settings->format) return 0; DPRINTF(E_LOG, L_XCODE, "Bug! Profile %d has unset encoding parameters\n", profile); return -1; } return 0; } static void stream_settings_set(struct stream_ctx *s, struct settings_ctx *settings, enum AVMediaType type) { if (type == AVMEDIA_TYPE_AUDIO) { s->codec->sample_rate = settings->sample_rate; #if USE_CH_LAYOUT av_channel_layout_copy(&s->codec->ch_layout, &(settings->channel_layout)); #else s->codec->channel_layout = settings->channel_layout; s->codec->channels = settings->nb_channels; #endif s->codec->sample_fmt = settings->sample_format; s->codec->time_base = (AVRational){1, settings->sample_rate}; s->codec->bit_rate = settings->bit_rate; } else if (type == AVMEDIA_TYPE_VIDEO) { s->codec->height = settings->height; s->codec->width = settings->width; s->codec->pix_fmt = settings->pix_fmt; s->codec->time_base = (AVRational){1, 25}; } } /* -------------------------------- HELPERS -------------------------------- */ static enum AVSampleFormat bitdepth2format(int bits_per_sample) { if (bits_per_sample == 16) return AV_SAMPLE_FMT_S16; else if (bits_per_sample == 24) return AV_SAMPLE_FMT_S32; else if (bits_per_sample == 32) return AV_SAMPLE_FMT_S32; else return AV_SAMPLE_FMT_NONE; } static inline char * err2str(int errnum) { av_strerror(errnum, errbuf, sizeof(errbuf)); return errbuf; } static inline void add_le16(uint8_t *dst, uint16_t val) { dst[0] = val & 0xff; dst[1] = (val >> 8) & 0xff; } static inline void add_le32(uint8_t *dst, uint32_t val) { dst[0] = val & 0xff; dst[1] = (val >> 8) & 0xff; dst[2] = (val >> 16) & 0xff; dst[3] = (val >> 24) & 0xff; } // Copies the src buffer to position pos of the dst buffer, expanding dst if // needed to fit src. Can be called with *dst = NULL and *dst_len = 0. Returns // the number of bytes dst was expanded with. static int copy_buffer_to_position(uint8_t **dst, size_t *dst_len, uint8_t *src, size_t src_len, int64_t pos) { int bytes_added = 0; if (pos < 0 || pos > *dst_len) return -1; // Out of bounds if (src_len == 0) return 0; // Nothing to do if (pos + src_len > *dst_len) { bytes_added = pos + src_len - *dst_len; *dst_len += bytes_added; CHECK_NULL(L_XCODE, *dst = realloc(*dst, *dst_len)); } memcpy(*dst + pos, src, src_len); return bytes_added; } // Doesn't actually seek, just inserts a marker in the obuf static int64_t dummy_seek(void *arg, int64_t offset, enum transcode_seek_type type) { struct transcode_ctx *ctx = arg; struct encode_ctx *enc_ctx = ctx->encode_ctx; if (type == XCODE_SEEK_SET) { evbuffer_add(enc_ctx->obuf, xcode_seek_marker, sizeof(xcode_seek_marker)); evbuffer_add(enc_ctx->obuf, &offset, sizeof(offset)); return offset; } else if (type == XCODE_SEEK_SIZE) return enc_ctx->bytes_total; return -1; } static off_t size_estimate(enum transcode_profile profile, uint32_t bit_rate, uint32_t sample_rate, uint16_t bytes_per_sample, uint16_t channels, uint32_t len_ms) { off_t bytes = 0; uint64_t nsamples; if (len_ms == 0) len_ms = 3 * 60 * 1000; nsamples = (uint64_t)sample_rate * (uint64_t)len_ms / 1000 + 1; // The +1 is to round up if (profile == XCODE_WAV) bytes = nsamples * channels * bytes_per_sample + WAV_HEADER_LEN; else if (profile == XCODE_MP3) bytes = (uint64_t)len_ms * (uint64_t)bit_rate / 8000; else if (profile == XCODE_MP4_ALAC) bytes = nsamples * channels * bytes_per_sample / 2; // FIXME return bytes; } /* * Checks if this stream index is one that we are decoding * * @in ctx Decode context * @in stream_index Index of stream to check * @return Type of stream, unknown if we are not decoding the stream */ static enum AVMediaType stream_find(struct decode_ctx *ctx, unsigned int stream_index) { if (ctx->audio_stream.stream && (stream_index == ctx->audio_stream.stream->index)) return AVMEDIA_TYPE_AUDIO; if (ctx->video_stream.stream && (stream_index == ctx->video_stream.stream->index)) return AVMEDIA_TYPE_VIDEO; return AVMEDIA_TYPE_UNKNOWN; } /* * Adds a stream to an output * * @out ctx A pre-allocated stream ctx where we save stream and codec info * @in output Output to add the stream to * @in codec_id What kind of codec should we use * @return Negative on failure, otherwise zero */ static int stream_add(struct encode_ctx *ctx, struct stream_ctx *s, enum AVCodecID codec_id) { const AVCodecDescriptor *codec_desc; #if USE_CONST_AVCODEC const AVCodec *encoder; #else // Not const before ffmpeg 5.0 AVCodec *encoder; #endif AVDictionary *options = NULL; int ret; codec_desc = avcodec_descriptor_get(codec_id); if (!codec_desc) { DPRINTF(E_LOG, L_XCODE, "Invalid codec ID (%d)\n", codec_id); return -1; } encoder = avcodec_find_encoder(codec_id); if (!encoder) { DPRINTF(E_LOG, L_XCODE, "Necessary encoder (%s) not found\n", codec_desc->name); return -1; } DPRINTF(E_DBG, L_XCODE, "Selected encoder '%s'\n", encoder->long_name); CHECK_NULL(L_XCODE, s->stream = avformat_new_stream(ctx->ofmt_ctx, NULL)); CHECK_NULL(L_XCODE, s->codec = avcodec_alloc_context3(encoder)); stream_settings_set(s, &ctx->settings, encoder->type); if (!s->codec->pix_fmt) { s->codec->pix_fmt = avcodec_default_get_format(s->codec, encoder->pix_fmts); DPRINTF(E_DBG, L_XCODE, "Pixel format set to %s (encoder is %s)\n", av_get_pix_fmt_name(s->codec->pix_fmt), codec_desc->name); } if (ctx->ofmt_ctx->oformat->flags & AVFMT_GLOBALHEADER) s->codec->flags |= AV_CODEC_FLAG_GLOBAL_HEADER; // With ffmpeg 3.4, jpeg encoding with optimal huffman tables will segfault, see issue #502 if (codec_id == AV_CODEC_ID_MJPEG) av_dict_set(&options, "huffman", "default", 0); // 20 ms frames is the current ffmpeg default, but we set it anyway, so that // we don't risk issues if future versions change the default (it would become // an issue because outputs/cast.c relies on 20 ms frames) if (codec_id == AV_CODEC_ID_OPUS) av_dict_set(&options, "frame_duration", "20", 0); ret = avcodec_open2(s->codec, NULL, &options); if (ret < 0) { DPRINTF(E_LOG, L_XCODE, "Cannot open encoder (%s): %s\n", codec_desc->name, err2str(ret)); goto error; } // airplay.c "misuses" the ffmpeg alac encoder in that it pushes frames with // 352 samples even though the encoder wants 4096 (and doesn't have variable // frame capability). This worked with no issues until ffmpeg 6, where it // seems a frame size check was added. The below circumvents the check, but is // dirty because we shouldn't be writing to this data element. if (ctx->settings.frame_size) s->codec->frame_size = ctx->settings.frame_size; // Copy the codec parameters we just set to the stream, so the muxer knows them ret = avcodec_parameters_from_context(s->stream->codecpar, s->codec); if (ret < 0) { DPRINTF(E_LOG, L_XCODE, "Cannot copy stream parameters (%s): %s\n", codec_desc->name, err2str(ret)); goto error; } if (options) { DPRINTF(E_WARN, L_XCODE, "Encoder %s didn't recognize all options given to avcodec_open2\n", codec_desc->name); av_dict_free(&options); } return 0; error: if (s->codec) avcodec_free_context(&s->codec); if (options) av_dict_free(&options); return -1; } /* * Called by libavformat while demuxing. Used to interrupt/unblock av_read_frame * in case a source (especially a network stream) becomes unavailable. * * @in arg Will point to the decode context * @return Non-zero if av_read_frame should be interrupted */ static int decode_interrupt_cb(void *arg) { struct decode_ctx *ctx; ctx = (struct decode_ctx *)arg; if (av_gettime() - ctx->timestamp > READ_TIMEOUT) { DPRINTF(E_LOG, L_XCODE, "Timeout while reading source (connection problem?)\n"); return 1; } return 0; } /* Will read the next packet from the source, unless we are resuming after a * seek in which case the most recent packet found by transcode_seek() will be * returned. The packet will be put in ctx->packet. * * @out type Media type of packet * @in ctx Decode context * @return 0 if OK, < 0 on error or end of file */ static int read_packet(enum AVMediaType *type, struct decode_ctx *dec_ctx) { int ret; // We just seeked, so transcode_seek() will have found a new ctx->packet and // we should start with that if (dec_ctx->resume) { dec_ctx->resume = 0; *type = stream_find(dec_ctx, dec_ctx->packet->stream_index); if (*type != AVMEDIA_TYPE_UNKNOWN) return 0; } do { dec_ctx->timestamp = av_gettime(); av_packet_unref(dec_ctx->packet); ret = av_read_frame(dec_ctx->ifmt_ctx, dec_ctx->packet); if (ret < 0) { DPRINTF(E_WARN, L_XCODE, "Could not read frame: %s\n", err2str(ret)); return ret; } *type = stream_find(dec_ctx, dec_ctx->packet->stream_index); } while (*type == AVMEDIA_TYPE_UNKNOWN); return 0; } // Prepares a packet from the encoder for muxing static void packet_prepare(AVPacket *pkt, struct stream_ctx *s) { pkt->stream_index = s->stream->index; // This "wonderful" peace of code makes sure that the timestamp always increases, // even if the user seeked backwards. The muxer will not accept non-increasing // timestamps. pkt->pts += s->offset_pts; if (pkt->pts < s->prev_pts) { s->offset_pts += s->prev_pts - pkt->pts; pkt->pts = s->prev_pts; } s->prev_pts = pkt->pts; pkt->dts = pkt->pts; //FIXME av_packet_rescale_ts(pkt, s->codec->time_base, s->stream->time_base); } /* * Part 4+5 of the conversion chain: read -> decode -> filter -> encode -> write * */ static int encode_write(struct encode_ctx *ctx, struct stream_ctx *s, AVFrame *filt_frame) { int ret; // If filt_frame is null then flushing will be initiated by the codec ret = avcodec_send_frame(s->codec, filt_frame); if (ret < 0) return ret; while (1) { ret = avcodec_receive_packet(s->codec, ctx->encoded_pkt); if (ret < 0) { if (ret == AVERROR(EAGAIN)) ret = 0; break; } packet_prepare(ctx->encoded_pkt, s); ret = av_interleaved_write_frame(ctx->ofmt_ctx, ctx->encoded_pkt); if (ret < 0) { DPRINTF(E_WARN, L_XCODE, "av_interleaved_write_frame() failed: %s\n", err2str(ret)); break; } } return ret; } /* * Part 3 of the conversion chain: read -> decode -> filter -> encode -> write * * transcode_encode() starts here since the caller already has a frame * */ static int filter_encode_write(struct encode_ctx *ctx, struct stream_ctx *s, AVFrame *frame) { int ret; // Push the decoded frame into the filtergraph if (frame) { ret = av_buffersrc_add_frame(s->buffersrc_ctx, frame); if (ret < 0) { DPRINTF(E_LOG, L_XCODE, "Error while feeding the filtergraph: %s\n", err2str(ret)); return -1; } } // Pull filtered frames from the filtergraph and pass to encoder while (1) { ret = av_buffersink_get_frame(s->buffersink_ctx, ctx->filt_frame); if (ret < 0) { if (!frame) // We are flushing ret = encode_write(ctx, s, NULL); else if (ret == AVERROR(EAGAIN)) ret = 0; break; } ret = encode_write(ctx, s, ctx->filt_frame); av_frame_unref(ctx->filt_frame); if (ret < 0) break; } return ret; } /* * Part 2 of the conversion chain: read -> decode -> filter -> encode -> write * * If there is no encode_ctx the chain will aborted here * */ static int decode_filter_encode_write(struct transcode_ctx *ctx, struct stream_ctx *s, AVPacket *pkt, enum AVMediaType type) { struct decode_ctx *dec_ctx = ctx->decode_ctx; struct stream_ctx *out_stream = NULL; int ret; ret = avcodec_send_packet(s->codec, pkt); if (ret < 0 && (ret != AVERROR_INVALIDDATA) && (ret != AVERROR(EAGAIN))) // We don't bail on invalid data, some streams work anyway { DPRINTF(E_LOG, L_XCODE, "Decoder error, avcodec_send_packet said '%s' (%d)\n", err2str(ret), ret); return ret; } if (ctx->encode_ctx) { if (type == AVMEDIA_TYPE_AUDIO) out_stream = &ctx->encode_ctx->audio_stream; else if (type == AVMEDIA_TYPE_VIDEO) out_stream = &ctx->encode_ctx->video_stream; else return -1; } while (1) { ret = avcodec_receive_frame(s->codec, dec_ctx->decoded_frame); if (ret < 0) { if (ret == AVERROR(EAGAIN)) ret = 0; else if (out_stream) ret = filter_encode_write(ctx->encode_ctx, out_stream, NULL); // Flush break; } dec_ctx->got_frame = 1; if (!out_stream) break; ret = filter_encode_write(ctx->encode_ctx, out_stream, dec_ctx->decoded_frame); if (ret < 0) break; } return ret; } /* * Part 1 of the conversion chain: read -> decode -> filter -> encode -> write * * Will read exactly one packet from the input and put it in the chain. You * cannot count on anything coming out of the other end from just one packet, * so you probably should loop when calling this and check the contents of * enc_ctx->obuf. * */ static int read_decode_filter_encode_write(struct transcode_ctx *ctx) { struct decode_ctx *dec_ctx = ctx->decode_ctx; struct encode_ctx *enc_ctx = ctx->encode_ctx; enum AVMediaType type; int ret; ret = read_packet(&type, dec_ctx); if (ret < 0) { if (ret == AVERROR_EOF) dec_ctx->eof = 1; if (dec_ctx->audio_stream.stream) decode_filter_encode_write(ctx, &dec_ctx->audio_stream, NULL, AVMEDIA_TYPE_AUDIO); if (dec_ctx->video_stream.stream) decode_filter_encode_write(ctx, &dec_ctx->video_stream, NULL, AVMEDIA_TYPE_VIDEO); if (enc_ctx) av_interleaved_write_frame(enc_ctx->ofmt_ctx, NULL); // Flush muxer if (enc_ctx && !enc_ctx->settings.without_libav_trailer) av_write_trailer(enc_ctx->ofmt_ctx); return ret; } if (type == AVMEDIA_TYPE_AUDIO) ret = decode_filter_encode_write(ctx, &dec_ctx->audio_stream, dec_ctx->packet, type); else if (type == AVMEDIA_TYPE_VIDEO) ret = decode_filter_encode_write(ctx, &dec_ctx->video_stream, dec_ctx->packet, type); return ret; } /* ------------------------------- CUSTOM I/O ------------------------------ */ /* For using ffmpeg with evbuffer input/output instead of files */ static int avio_evbuffer_read(void *opaque, uint8_t *buf, int size) { struct avio_evbuffer *ae = (struct avio_evbuffer *)opaque; int ret; ret = evbuffer_remove(ae->evbuf, buf, size); // Must return AVERROR, see avio.h: avio_alloc_context() return (ret > 0) ? ret : AVERROR_EOF; } #if USE_CONST_AVIO_WRITE_PACKET static int avio_evbuffer_write(void *opaque, const uint8_t *buf, int size) #else static int avio_evbuffer_write(void *opaque, uint8_t *buf, int size) #endif { struct avio_evbuffer *ae = (struct avio_evbuffer *)opaque; int ret; ret = evbuffer_add(ae->evbuf, buf, size); return (ret == 0) ? size : -1; } static int64_t avio_evbuffer_seek(void *opaque, int64_t offset, int whence) { struct avio_evbuffer *ae = (struct avio_evbuffer *)opaque; enum transcode_seek_type seek_type; // Caller shouldn't need to know about ffmpeg defines if (whence & AVSEEK_SIZE) seek_type = XCODE_SEEK_SIZE; else if (whence == SEEK_SET) seek_type = XCODE_SEEK_SET; else if (whence == SEEK_CUR) seek_type = XCODE_SEEK_CUR; else return -1; return ae->seekfn(ae->seekfn_arg, offset, seek_type); } static AVIOContext * avio_evbuffer_open(struct transcode_evbuf_io *evbuf_io, int is_output) { struct avio_evbuffer *ae; AVIOContext *s; ae = calloc(1, sizeof(struct avio_evbuffer)); if (!ae) { DPRINTF(E_LOG, L_FFMPEG, "Out of memory for avio_evbuffer\n"); return NULL; } ae->buffer = av_mallocz(AVIO_BUFFER_SIZE); if (!ae->buffer) { DPRINTF(E_LOG, L_FFMPEG, "Out of memory for avio buffer\n"); free(ae); return NULL; } ae->evbuf = evbuf_io->evbuf; ae->seekfn = evbuf_io->seekfn; ae->seekfn_arg = evbuf_io->seekfn_arg; if (is_output) s = avio_alloc_context(ae->buffer, AVIO_BUFFER_SIZE, 1, ae, NULL, avio_evbuffer_write, (evbuf_io->seekfn ? avio_evbuffer_seek : NULL)); else s = avio_alloc_context(ae->buffer, AVIO_BUFFER_SIZE, 0, ae, avio_evbuffer_read, NULL, (evbuf_io->seekfn ? avio_evbuffer_seek : NULL)); if (!s) { DPRINTF(E_LOG, L_FFMPEG, "Could not allocate AVIOContext\n"); av_free(ae->buffer); free(ae); return NULL; } s->seekable = (evbuf_io->seekfn ? AVIO_SEEKABLE_NORMAL : 0); return s; } static void avio_evbuffer_close(AVIOContext *s) { struct avio_evbuffer *ae; if (!s) return; ae = (struct avio_evbuffer *)s->opaque; avio_flush(s); av_free(s->buffer); free(ae); av_free(s); } /* ----------------------- CUSTOM HEADER GENERATION ------------------------ */ static int make_wav_header(struct evbuffer **wav_header, uint32_t sample_rate, uint16_t bytes_per_sample, uint16_t channels, uint32_t bytes_total) { uint8_t header[WAV_HEADER_LEN]; memcpy(header, "RIFF", 4); add_le32(header + 4, bytes_total - 8); // Total file size - 8 bytes as defined by the format memcpy(header + 8, "WAVEfmt ", 8); add_le32(header + 16, 16); add_le16(header + 20, 1); // AudioFormat (PCM) add_le16(header + 22, channels); /* channels */ add_le32(header + 24, sample_rate); /* samplerate */ add_le32(header + 28, sample_rate * channels * bytes_per_sample); /* byte rate */ add_le16(header + 32, channels * bytes_per_sample); /* block align */ add_le16(header + 34, 8 * bytes_per_sample); /* bits per sample */ memcpy(header + 36, "data", 4); add_le32(header + 40, bytes_total - WAV_HEADER_LEN); *wav_header = evbuffer_new(); evbuffer_add(*wav_header, header, sizeof(header)); return 0; } static int mp4_adjust_moov_stco_offset(uint8_t *moov, size_t moov_len) { uint8_t stco_needle[8] = { 's', 't', 'c', 'o', 0, 0, 0, 0 }; uint32_t be32; uint32_t n_entries; uint32_t entry; uint8_t *ptr; uint8_t *end; end = moov + moov_len; ptr = memmem(moov, moov_len, stco_needle, sizeof(stco_needle)); if (!ptr || ptr + sizeof(stco_needle) + sizeof(be32) > end) return -1; ptr += sizeof(stco_needle); memcpy(&be32, ptr, sizeof(be32)); for (n_entries = be32toh(be32); n_entries > 0; n_entries--) { ptr += sizeof(be32); if (ptr + sizeof(be32) > end) return -1; memcpy(&be32, ptr, sizeof(be32)); entry = be32toh(be32); be32 = htobe32(entry + moov_len); memcpy(ptr, &be32, sizeof(be32)); } return 0; } static int mp4_header_trailer_from_evbuf(uint8_t **header, size_t *header_len, uint8_t **trailer, size_t *trailer_len, struct evbuffer *evbuf, int64_t start_pos) { uint8_t *buf = evbuffer_pullup(evbuf, -1); size_t buf_len = evbuffer_get_length(evbuf); int64_t pos = start_pos; int bytes_added = 0; uint8_t *marker; size_t len; int ret; while (buf_len > 0) { marker = memmem(buf, buf_len, xcode_seek_marker, sizeof(xcode_seek_marker)); len = marker ? marker - buf : buf_len; if (pos <= *header_len) // Either first write of header or seek to pos inside header ret = copy_buffer_to_position(header, header_len, buf, len, pos); else if (pos >= start_pos) // Either first write of trailer or seek to pos inside trailer ret = copy_buffer_to_position(trailer, trailer_len, buf, len, pos - start_pos); else // Unexpected seek to body (pos is before trailer but not in header) ret = -1; if (ret < 0) return -1; bytes_added += ret; if (!marker) break; memcpy(&pos, marker + sizeof(xcode_seek_marker), sizeof(pos)); buf += len + sizeof(xcode_seek_marker) + sizeof(pos); buf_len -= len + sizeof(xcode_seek_marker) + sizeof(pos); } evbuffer_drain(evbuf, -1); return bytes_added; } // Transcodes the entire file so that we can grab the header, which will then // have a correct moov atom. The moov atom contains elements like stco and stsz // which can only be made when the encoding has been done, since they contain // information about where the frames are in the file. iTunes and Soundsbrdige // requires these to be correct, otherwise they won't play our transcoded files. // They also require that the atom is in the beginning of the file. ffmpeg's // "faststart" option does this, but is difficult to use with non-file output, // instead we move the atom ourselves. static int make_mp4_header(struct evbuffer **mp4_header, const char *url) { struct transcode_decode_setup_args decode_args = { .profile = XCODE_MP4_ALAC_HEADER }; struct transcode_encode_setup_args encode_args = { .profile = XCODE_MP4_ALAC_HEADER }; struct transcode_ctx ctx = { 0 }; struct transcode_evbuf_io evbuf_io = { 0 }; uint8_t free_tag[4] = { 'f', 'r', 'e', 'e' }; uint8_t *header = NULL; uint8_t *trailer = NULL; size_t header_len = 0; size_t trailer_len = 0; uint8_t *ptr; int ret; if (!url || *url != '/') return -1; CHECK_NULL(L_XCODE, evbuf_io.evbuf = evbuffer_new()); evbuf_io.seekfn = dummy_seek; evbuf_io.seekfn_arg = &ctx; decode_args.path = url; ctx.decode_ctx = transcode_decode_setup(decode_args); if (!ctx.decode_ctx) goto error; encode_args.evbuf_io = &evbuf_io; encode_args.src_ctx = ctx.decode_ctx; ctx.encode_ctx = transcode_encode_setup(encode_args); if (!ctx.encode_ctx) goto error; // Save the template header, which looks something like this (note that the // mdate size is still unknown, so just zeroes, and there is no moov): // // 0000 00 00 00 1c 66 74 79 70 69 73 6f 6d 00 00 02 00 ....ftypisom.... // 0010 69 73 6f 6d 69 73 6f 32 6d 70 34 31 00 00 00 08 isomiso2mp41.... // 0020 66 72 65 65 00 00 00 00 6d 64 61 74 free....mdat ret = avformat_write_header(ctx.encode_ctx->ofmt_ctx, NULL); if (ret < 0) goto error; // Writes the obuf to the header buffer, bytes_processed is 0 ret = mp4_header_trailer_from_evbuf(&header, &header_len, &trailer, &trailer_len, ctx.encode_ctx->obuf, ctx.encode_ctx->bytes_processed); if (ret < 0) goto error; ctx.encode_ctx->bytes_processed += ret; // Encode but discard result, this is just so that ffmpeg can create the // missing header data. while (read_decode_filter_encode_write(&ctx) == 0) { ctx.encode_ctx->bytes_processed += evbuffer_get_length(ctx.encode_ctx->obuf); evbuffer_drain(ctx.encode_ctx->obuf, -1); } // Here, ffmpeg will seek back and write the size to the mdat atom and then // seek forward again to write the trailer. Since we can't actually seek, we // instead look for the markers that dummy_seek() inserted. av_write_trailer(ctx.encode_ctx->ofmt_ctx); ret = mp4_header_trailer_from_evbuf(&header, &header_len, &trailer, &trailer_len, ctx.encode_ctx->obuf, ctx.encode_ctx->bytes_processed); if (ret < 0 || !header || !trailer) goto error; // The trailer buffer should now contain the moov atom. We need to adjust the // chunk offset (stco) in it because we will move it to the beginning of the // file. ret = mp4_adjust_moov_stco_offset(trailer, trailer_len); if (ret < 0) goto error; // Now we want to move the trailer (which has the moov atom) into the header. // We insert it before the free atom, because that's what ffmpeg does when // the "faststart" option is set. CHECK_NULL(L_XCODE, header = realloc(header, header_len + trailer_len)); ptr = memmem(header, header_len, free_tag, sizeof(free_tag)); if (!ptr || ptr - header < sizeof(uint32_t)) goto error; ptr -= sizeof(uint32_t); memmove(ptr + trailer_len, ptr, header + header_len - ptr); memcpy(ptr, trailer, trailer_len); header_len += trailer_len; *mp4_header = evbuffer_new(); evbuffer_add(*mp4_header, header, header_len); free(header); free(trailer); transcode_decode_cleanup(&ctx.decode_ctx); transcode_encode_cleanup(&ctx.encode_ctx); evbuffer_free(evbuf_io.evbuf); return 0; error: if (header) DHEXDUMP(E_DBG, L_XCODE, header, header_len, "MP4 header\n"); if (trailer) DHEXDUMP(E_DBG, L_XCODE, trailer, trailer_len, "MP4 trailer\n"); free(header); free(trailer); transcode_decode_cleanup(&ctx.decode_ctx); transcode_encode_cleanup(&ctx.encode_ctx); evbuffer_free(evbuf_io.evbuf); return -1; } /* --------------------------- INPUT/OUTPUT INIT --------------------------- */ static int open_decoder(AVCodecContext **dec_ctx, unsigned int *stream_index, struct decode_ctx *ctx, enum AVMediaType type) { #if USE_CONST_AVCODEC const AVCodec *decoder; #else // Not const before ffmpeg 5.0 AVCodec *decoder; #endif int ret; ret = av_find_best_stream(ctx->ifmt_ctx, type, -1, -1, &decoder, 0); if (ret < 0) { if (!ctx->settings.silent) DPRINTF(E_LOG, L_XCODE, "Error finding best stream: %s\n", err2str(ret)); return ret; } *stream_index = (unsigned int)ret; CHECK_NULL(L_XCODE, *dec_ctx = avcodec_alloc_context3(decoder)); // Filter creation will need the sample rate and format that the decoder is // giving us - however sample rate of dec_ctx will be 0 if we don't prime it // with the streams codecpar data. ret = avcodec_parameters_to_context(*dec_ctx, ctx->ifmt_ctx->streams[*stream_index]->codecpar); if (ret < 0) { DPRINTF(E_LOG, L_XCODE, "Failed to copy codecpar for stream #%d: %s\n", *stream_index, err2str(ret)); avcodec_free_context(dec_ctx); return ret; } ret = avcodec_open2(*dec_ctx, NULL, NULL); if (ret < 0) { DPRINTF(E_LOG, L_XCODE, "Failed to open decoder for stream #%d: %s\n", *stream_index, err2str(ret)); avcodec_free_context(dec_ctx); return ret; } return 0; } static void close_input(struct decode_ctx *ctx) { if (!ctx->ifmt_ctx) return; avio_evbuffer_close(ctx->avio); avcodec_free_context(&ctx->audio_stream.codec); avcodec_free_context(&ctx->video_stream.codec); avformat_close_input(&ctx->ifmt_ctx); ctx->ifmt_ctx = NULL; } static int open_input(struct decode_ctx *ctx, const char *path, struct transcode_evbuf_io *evbuf_io, enum probe_type probe_type) { AVDictionary *options = NULL; AVCodecContext *dec_ctx; #if USE_CONST_AVFORMAT const AVInputFormat *ifmt; #else // Not const before ffmpeg 5.0 AVInputFormat *ifmt; #endif unsigned int stream_index; const char *user_agent; int ret = 0; CHECK_NULL(L_XCODE, ctx->ifmt_ctx = avformat_alloc_context()); // Caller can ask for small probe to start quicker + search for embedded // artwork quicker. Especially useful for http sources. The standard probe // size takes around 5 sec for an mp3, while the below only takes around a // second. The improved performance comes at the cost of possible inaccuracy. if (probe_type == PROBE_TYPE_QUICK) { ctx->ifmt_ctx->probesize = 65536; ctx->ifmt_ctx->format_probesize = 65536; } if (ctx->is_http) { av_dict_set(&options, "icy", "1", 0); user_agent = cfg_getstr(cfg_getsec(cfg, "general"), "user_agent"); av_dict_set(&options, "user_agent", user_agent, 0); av_dict_set(&options, "reconnect", "1", 0); // The below option disabled because it does not work with m3u8 streams, // see https://lists.ffmpeg.org/pipermail/ffmpeg-user/2018-September/041109.html // av_dict_set(&options, "reconnect_at_eof", "1", 0); av_dict_set(&options, "reconnect_streamed", "1", 0); } // TODO Newest versions of ffmpeg have timeout and reconnect options we should use ctx->ifmt_ctx->interrupt_callback.callback = decode_interrupt_cb; ctx->ifmt_ctx->interrupt_callback.opaque = ctx; ctx->timestamp = av_gettime(); if (evbuf_io) { ifmt = av_find_input_format(ctx->settings.in_format); if (!ifmt) { DPRINTF(E_LOG, L_XCODE, "Could not find input format: '%s'\n", ctx->settings.in_format); goto out_fail; } CHECK_NULL(L_XCODE, ctx->avio = avio_evbuffer_open(evbuf_io, 0)); ctx->ifmt_ctx->pb = ctx->avio; ret = avformat_open_input(&ctx->ifmt_ctx, NULL, ifmt, &options); } else { ret = avformat_open_input(&ctx->ifmt_ctx, path, NULL, &options); } if (options) av_dict_free(&options); if (ret < 0) { DPRINTF(E_LOG, L_XCODE, "Cannot open '%s': %s\n", path, err2str(ret)); goto out_fail; } // If the source has REPLAYGAIN_TRACK_GAIN metadata, this will inject the // values into the the next packet's side data (as AV_FRAME_DATA_REPLAYGAIN), // which has the effect that a volume replaygain filter works. Note that // ffmpeg itself uses another method in process_input() in ffmpeg.c. av_format_inject_global_side_data(ctx->ifmt_ctx); ret = avformat_find_stream_info(ctx->ifmt_ctx, NULL); if (ret < 0) { DPRINTF(E_LOG, L_XCODE, "Cannot find stream information: %s\n", err2str(ret)); goto out_fail; } if (ctx->ifmt_ctx->nb_streams > MAX_STREAMS) { DPRINTF(E_LOG, L_XCODE, "File '%s' has too many streams (%u)\n", path, ctx->ifmt_ctx->nb_streams); goto out_fail; } if (ctx->settings.encode_audio) { ret = open_decoder(&dec_ctx, &stream_index, ctx, AVMEDIA_TYPE_AUDIO); if (ret < 0) goto out_fail; ctx->audio_stream.codec = dec_ctx; ctx->audio_stream.stream = ctx->ifmt_ctx->streams[stream_index]; } if (ctx->settings.encode_video) { ret = open_decoder(&dec_ctx, &stream_index, ctx, AVMEDIA_TYPE_VIDEO); if (ret < 0) goto out_fail; ctx->video_stream.codec = dec_ctx; ctx->video_stream.stream = ctx->ifmt_ctx->streams[stream_index]; } return 0; out_fail: close_input(ctx); return (ret < 0 ? ret : -1); // If we got an error code from ffmpeg then return that } static void close_output(struct encode_ctx *ctx) { if (!ctx->ofmt_ctx) return; avcodec_free_context(&ctx->audio_stream.codec); avcodec_free_context(&ctx->video_stream.codec); avio_evbuffer_close(ctx->ofmt_ctx->pb); avformat_free_context(ctx->ofmt_ctx); ctx->ofmt_ctx = NULL; } static int open_output(struct encode_ctx *ctx, struct transcode_evbuf_io *evbuf_io, struct evbuffer *prepared_header, struct decode_ctx *src_ctx) { #if USE_CONST_AVFORMAT const AVOutputFormat *oformat; #else // Not const before ffmpeg 5.0 AVOutputFormat *oformat; #endif AVDictionary *options = NULL; struct evbuffer *header = NULL; int ret; oformat = av_guess_format(ctx->settings.format, NULL, NULL); if (!oformat) { DPRINTF(E_LOG, L_XCODE, "ffmpeg/libav could not find the '%s' output format\n", ctx->settings.format); return -1; } #if USE_NO_CLEAR_AVFMT_NOFILE CHECK_ERRNO(L_XCODE, avformat_alloc_output_context2(&ctx->ofmt_ctx, oformat, NULL, NULL)); #else // Clear AVFMT_NOFILE bit, it is not allowed as we will set our own AVIOContext. // If this is not done with e.g. ffmpeg 3.4 then artwork rescaling will fail. oformat->flags &= ~AVFMT_NOFILE; CHECK_NULL(L_XCODE, ctx->ofmt_ctx = avformat_alloc_context()); ctx->ofmt_ctx->oformat = oformat; #endif CHECK_NULL(L_XCODE, ctx->ofmt_ctx->pb = avio_evbuffer_open(evbuf_io, 1)); ctx->obuf = evbuf_io->evbuf; if (ctx->settings.encode_audio) { ret = stream_add(ctx, &ctx->audio_stream, ctx->settings.audio_codec); if (ret < 0) goto error; } if (ctx->settings.encode_video) { ret = stream_add(ctx, &ctx->video_stream, ctx->settings.video_codec); if (ret < 0) goto error; } ret = avformat_init_output(ctx->ofmt_ctx, &options); if (ret < 0) { DPRINTF(E_LOG, L_XCODE, "Error initializing output: %s\n", err2str(ret)); goto error; } else if (options) { DPRINTF(E_WARN, L_XCODE, "Didn't recognize all options given to avformat_init_output\n"); av_dict_free(&options); goto error; } // For WAV output, both avformat_write_header() and manual wav header is required if (!ctx->settings.without_libav_header) { ret = avformat_write_header(ctx->ofmt_ctx, NULL); if (ret < 0) { DPRINTF(E_LOG, L_XCODE, "Error writing header to output buffer: %s\n", err2str(ret)); goto error; } } if (ctx->settings.with_wav_header) { ret = make_wav_header(&header, ctx->settings.sample_rate, av_get_bytes_per_sample(ctx->settings.sample_format), ctx->settings.nb_channels, ctx->bytes_total); if (ret < 0) { DPRINTF(E_LOG, L_XCODE, "Error creating WAV header\n"); goto error; } evbuffer_add_buffer(ctx->obuf, header); evbuffer_free(header); } if (ctx->settings.with_mp4_header && prepared_header) { evbuffer_add_buffer(ctx->obuf, prepared_header); } else if (ctx->settings.with_mp4_header) { ret = make_mp4_header(&header, src_ctx->ifmt_ctx->url); if (ret < 0) { DPRINTF(E_LOG, L_XCODE, "Error creating MP4 header\n"); goto error; } evbuffer_add_buffer(ctx->obuf, header); evbuffer_free(header); } return 0; error: close_output(ctx); return -1; } static int filter_def_abuffer(struct filter_def *def, struct stream_ctx *out_stream, struct stream_ctx *in_stream, const char *deffn_arg) { #if USE_CH_LAYOUT char buf[64]; // Some AIFF files only have a channel number, not a layout if (in_stream->codec->ch_layout.order == AV_CHANNEL_ORDER_UNSPEC) av_channel_layout_default(&in_stream->codec->ch_layout, in_stream->codec->ch_layout.nb_channels); av_channel_layout_describe(&in_stream->codec->ch_layout, buf, sizeof(buf)); snprintf(def->args, sizeof(def->args), "time_base=%d/%d:sample_rate=%d:sample_fmt=%s:channel_layout=%s", in_stream->stream->time_base.num, in_stream->stream->time_base.den, in_stream->codec->sample_rate, av_get_sample_fmt_name(in_stream->codec->sample_fmt), buf); #else if (!in_stream->codec->channel_layout) in_stream->codec->channel_layout = av_get_default_channel_layout(in_stream->codec->channels); snprintf(def->args, sizeof(def->args), "time_base=%d/%d:sample_rate=%d:sample_fmt=%s:channel_layout=0x%"PRIx64, in_stream->stream->time_base.num, in_stream->stream->time_base.den, in_stream->codec->sample_rate, av_get_sample_fmt_name(in_stream->codec->sample_fmt), in_stream->codec->channel_layout); #endif snprintf(def->name, sizeof(def->name), "abuffer"); return 0; } static int filter_def_aformat(struct filter_def *def, struct stream_ctx *out_stream, struct stream_ctx *in_stream, const char *deffn_arg) { #if USE_CH_LAYOUT char buf[64]; if (out_stream->codec->ch_layout.order == AV_CHANNEL_ORDER_UNSPEC) av_channel_layout_default(&out_stream->codec->ch_layout, out_stream->codec->ch_layout.nb_channels); av_channel_layout_describe(&out_stream->codec->ch_layout, buf, sizeof(buf)); snprintf(def->args, sizeof(def->args), "sample_fmts=%s:sample_rates=%d:channel_layouts=%s", av_get_sample_fmt_name(out_stream->codec->sample_fmt), out_stream->codec->sample_rate, buf); #else // For some AIFF files, ffmpeg (3.4.6) will not give us a channel_layout (bug in ffmpeg?) if (!out_stream->codec->channel_layout) out_stream->codec->channel_layout = av_get_default_channel_layout(out_stream->codec->channels); snprintf(def->args, sizeof(def->args), "sample_fmts=%s:sample_rates=%d:channel_layouts=0x%"PRIx64, av_get_sample_fmt_name(out_stream->codec->sample_fmt), out_stream->codec->sample_rate, out_stream->codec->channel_layout); #endif snprintf(def->name, sizeof(def->name), "aformat"); return 0; } static int filter_def_abuffersink(struct filter_def *def, struct stream_ctx *out_stream, struct stream_ctx *in_stream, const char *deffn_arg) { snprintf(def->name, sizeof(def->name), "abuffersink"); *def->args = '\0'; return 0; } static int filter_def_buffer(struct filter_def *def, struct stream_ctx *out_stream, struct stream_ctx *in_stream, const char *deffn_arg) { snprintf(def->name, sizeof(def->name), "buffer"); snprintf(def->args, sizeof(def->args), "width=%d:height=%d:pix_fmt=%s:time_base=%d/%d:sar=%d/%d", in_stream->codec->width, in_stream->codec->height, av_get_pix_fmt_name(in_stream->codec->pix_fmt), in_stream->stream->time_base.num, in_stream->stream->time_base.den, in_stream->codec->sample_aspect_ratio.num, in_stream->codec->sample_aspect_ratio.den); return 0; } static int filter_def_format(struct filter_def *def, struct stream_ctx *out_stream, struct stream_ctx *in_stream, const char *deffn_arg) { snprintf(def->name, sizeof(def->name), "format"); snprintf(def->args, sizeof(def->args), "pix_fmts=%s", av_get_pix_fmt_name(out_stream->codec->pix_fmt)); return 0; } static int filter_def_scale(struct filter_def *def, struct stream_ctx *out_stream, struct stream_ctx *in_stream, const char *deffn_arg) { snprintf(def->name, sizeof(def->name), "scale"); snprintf(def->args, sizeof(def->args), "w=%d:h=%d", out_stream->codec->width, out_stream->codec->height); return 0; } static int filter_def_buffersink(struct filter_def *def, struct stream_ctx *out_stream, struct stream_ctx *in_stream, const char *deffn_arg) { snprintf(def->name, sizeof(def->name), "buffersink"); *def->args = '\0'; return 0; } static int filter_def_user(struct filter_def *def, struct stream_ctx *out_stream, struct stream_ctx *in_stream, const char *deffn_arg) { char *ptr; snprintf(def->name, sizeof(def->name), "%s", deffn_arg); ptr = strchr(def->name, '='); if (ptr) { *ptr = '\0'; snprintf(def->args, sizeof(def->args), "%s", ptr + 1); } else *def->args = '\0'; return 0; } static int define_audio_filters(struct filters *filters, size_t filters_len, bool with_user_filters) { int num_user_filters; int i; num_user_filters = cfg_size(cfg_getsec(cfg, "library"), "decode_audio_filters"); if (filters_len < num_user_filters + 3) { DPRINTF(E_LOG, L_XCODE, "Too many audio filters configured (%d, max is %zu)\n", num_user_filters, filters_len - 3); return -1; } filters[0].deffn = filter_def_abuffer; for (i = 0; with_user_filters && i < num_user_filters; i++) { filters[1 + i].deffn = filter_def_user; filters[1 + i].deffn_arg = cfg_getnstr(cfg_getsec(cfg, "library"), "decode_audio_filters", i); } filters[1 + i].deffn = filter_def_aformat; filters[2 + i].deffn = filter_def_abuffersink; return 0; } static int define_video_filters(struct filters *filters, size_t filters_len, bool with_user_filters) { int num_user_filters; int i; num_user_filters = cfg_size(cfg_getsec(cfg, "library"), "decode_video_filters"); if (filters_len < num_user_filters + 3) { DPRINTF(E_LOG, L_XCODE, "Too many video filters configured (%d, max is %zu)\n", num_user_filters, filters_len - 3); return -1; } filters[0].deffn = filter_def_buffer; for (i = 0; with_user_filters && i < num_user_filters; i++) { filters[1 + i].deffn = filter_def_user; filters[1 + i].deffn_arg = cfg_getnstr(cfg_getsec(cfg, "library"), "decode_video_filters", i); } filters[1 + i].deffn = filter_def_format; filters[2 + i].deffn = filter_def_scale; filters[3 + i].deffn = filter_def_buffersink; return 0; } static int add_filters(int *num_added, AVFilterGraph *filter_graph, struct filters *filters, size_t filters_len, struct stream_ctx *out_stream, struct stream_ctx *in_stream) { const AVFilter *av_filter; struct filter_def def; int i; int ret; for (i = 0; i < filters_len && filters[i].deffn; i++) { ret = filters[i].deffn(&def, out_stream, in_stream, filters[i].deffn_arg); if (ret < 0) { DPRINTF(E_LOG, L_XCODE, "Error creating filter definition\n"); return -1; } av_filter = avfilter_get_by_name(def.name); if (!av_filter) { DPRINTF(E_LOG, L_XCODE, "Could not find filter '%s'\n", def.name); return -1; } ret = avfilter_graph_create_filter(&filters[i].av_ctx, av_filter, def.name, def.args, NULL, filter_graph); if (ret < 0) { DPRINTF(E_LOG, L_XCODE, "Error creating filter '%s': %s\n", def.name, err2str(ret)); return -1; } DPRINTF(E_DBG, L_XCODE, "Created '%s' filter: '%s'\n", def.name, def.args); if (i == 0) continue; ret = avfilter_link(filters[i - 1].av_ctx, 0, filters[i].av_ctx, 0); if (ret < 0) { DPRINTF(E_LOG, L_XCODE, "Error connecting filters: %s\n", err2str(ret)); return -1; } } *num_added = i; return 0; } static int create_filtergraph(struct stream_ctx *out_stream, struct filters *filters, size_t filters_len, struct stream_ctx *in_stream) { AVFilterGraph *filter_graph; int ret; int added; CHECK_NULL(L_XCODE, filter_graph = avfilter_graph_alloc()); ret = add_filters(&added, filter_graph, filters, filters_len, out_stream, in_stream); if (ret < 0) { goto out_fail; } ret = avfilter_graph_config(filter_graph, NULL); if (ret < 0) { DPRINTF(E_LOG, L_XCODE, "Filter graph config failed: %s\n", err2str(ret)); goto out_fail; } out_stream->buffersrc_ctx = filters[0].av_ctx; out_stream->buffersink_ctx = filters[added - 1].av_ctx; out_stream->filter_graph = filter_graph; return 0; out_fail: avfilter_graph_free(&filter_graph); return -1; } static void close_filters(struct encode_ctx *ctx) { avfilter_graph_free(&ctx->audio_stream.filter_graph); avfilter_graph_free(&ctx->video_stream.filter_graph); } static int open_filters(struct encode_ctx *ctx, struct decode_ctx *src_ctx) { struct filters filters[MAX_FILTERS] = { 0 }; int ret; if (ctx->settings.encode_audio) { ret = define_audio_filters(filters, ARRAY_SIZE(filters), ctx->settings.with_user_filters); if (ret < 0) goto out_fail; ret = create_filtergraph(&ctx->audio_stream, filters, ARRAY_SIZE(filters), &src_ctx->audio_stream); if (ret < 0) goto out_fail; // Many audio encoders require a fixed frame size. This will ensure that // the filt_frame from av_buffersink_get_frame has that size (except EOF). if (! (ctx->audio_stream.codec->codec->capabilities & AV_CODEC_CAP_VARIABLE_FRAME_SIZE)) av_buffersink_set_frame_size(ctx->audio_stream.buffersink_ctx, ctx->audio_stream.codec->frame_size); } if (ctx->settings.encode_video) { ret = define_video_filters(filters, ARRAY_SIZE(filters), ctx->settings.with_user_filters); if (ret < 0) goto out_fail; ret = create_filtergraph(&ctx->video_stream, filters, ARRAY_SIZE(filters), &src_ctx->video_stream); if (ret < 0) goto out_fail; } return 0; out_fail: close_filters(ctx); return -1; } /* ----------------------------- TRANSCODE API ----------------------------- */ /* Setup */ struct decode_ctx * transcode_decode_setup(struct transcode_decode_setup_args args) { struct decode_ctx *ctx; int ret; CHECK_NULL(L_XCODE, ctx = calloc(1, sizeof(struct decode_ctx))); CHECK_NULL(L_XCODE, ctx->decoded_frame = av_frame_alloc()); CHECK_NULL(L_XCODE, ctx->packet = av_packet_alloc()); ctx->len_ms = args.len_ms; ret = init_settings(&ctx->settings, args.profile, args.quality); if (ret < 0) goto fail_free; if (args.is_http) { ctx->is_http = true; ret = open_input(ctx, args.path, args.evbuf_io, PROBE_TYPE_QUICK); // Retry with a default, slower probe size if (ret == AVERROR_STREAM_NOT_FOUND) ret = open_input(ctx, args.path, args.evbuf_io, PROBE_TYPE_DEFAULT); } else ret = open_input(ctx, args.path, args.evbuf_io, PROBE_TYPE_DEFAULT); if (ret < 0) goto fail_free; return ctx; fail_free: av_packet_free(&ctx->packet); av_frame_free(&ctx->decoded_frame); free(ctx); return NULL; } struct encode_ctx * transcode_encode_setup(struct transcode_encode_setup_args args) { struct encode_ctx *ctx; int dst_bytes_per_sample; CHECK_NULL(L_XCODE, ctx = calloc(1, sizeof(struct encode_ctx))); CHECK_NULL(L_XCODE, ctx->filt_frame = av_frame_alloc()); CHECK_NULL(L_XCODE, ctx->encoded_pkt = av_packet_alloc()); CHECK_NULL(L_XCODE, ctx->evbuf_io.evbuf = evbuffer_new()); // Caller didn't specify one, so use our own if (!args.evbuf_io) args.evbuf_io = &ctx->evbuf_io; // Initialize general settings if (init_settings(&ctx->settings, args.profile, args.quality) < 0) goto error; if (ctx->settings.encode_audio && init_settings_from_audio(&ctx->settings, args.profile, args.src_ctx, args.quality) < 0) goto error; if (ctx->settings.encode_video && init_settings_from_video(&ctx->settings, args.profile, args.src_ctx, args.width, args.height) < 0) goto error; dst_bytes_per_sample = av_get_bytes_per_sample(ctx->settings.sample_format); ctx->bytes_total = size_estimate(args.profile, ctx->settings.bit_rate, ctx->settings.sample_rate, dst_bytes_per_sample, ctx->settings.nb_channels, args.src_ctx->len_ms); if (ctx->settings.with_icy && args.src_ctx->is_http) ctx->icy_interval = METADATA_ICY_INTERVAL * ctx->settings.nb_channels * dst_bytes_per_sample * ctx->settings.sample_rate; if (open_output(ctx, args.evbuf_io, args.prepared_header, args.src_ctx) < 0) goto error; if (open_filters(ctx, args.src_ctx) < 0) goto error; return ctx; error: transcode_encode_cleanup(&ctx); return NULL; } struct transcode_ctx * transcode_setup(struct transcode_decode_setup_args decode_args, struct transcode_encode_setup_args encode_args) { struct transcode_ctx *ctx; CHECK_NULL(L_XCODE, ctx = calloc(1, sizeof(struct transcode_ctx))); ctx->decode_ctx = transcode_decode_setup(decode_args); if (!ctx->decode_ctx) { free(ctx); return NULL; } encode_args.src_ctx = ctx->decode_ctx; ctx->encode_ctx = transcode_encode_setup(encode_args); if (!ctx->encode_ctx) { transcode_decode_cleanup(&ctx->decode_ctx); free(ctx); return NULL; } return ctx; } struct decode_ctx * transcode_decode_setup_raw(enum transcode_profile profile, struct media_quality *quality) { const AVCodecDescriptor *codec_desc; struct decode_ctx *ctx; #if USE_CONST_AVCODEC const AVCodec *decoder; #else // Not const before ffmpeg 5.0 AVCodec *decoder; #endif int ret; CHECK_NULL(L_XCODE, ctx = calloc(1, sizeof(struct decode_ctx))); if (init_settings(&ctx->settings, profile, quality) < 0) { goto out_free_ctx; } codec_desc = avcodec_descriptor_get(ctx->settings.audio_codec); if (!codec_desc) { DPRINTF(E_LOG, L_XCODE, "Invalid codec ID (%d)\n", ctx->settings.audio_codec); goto out_free_ctx; } // In raw mode we won't actually need to read or decode, but we still setup // the decode_ctx because transcode_encode_setup() gets info about the input // through this structure (TODO dont' do that) decoder = avcodec_find_decoder(ctx->settings.audio_codec); if (!decoder) { DPRINTF(E_LOG, L_XCODE, "Could not find decoder for: %s\n", codec_desc->name); goto out_free_ctx; } CHECK_NULL(L_XCODE, ctx->ifmt_ctx = avformat_alloc_context()); CHECK_NULL(L_XCODE, ctx->audio_stream.codec = avcodec_alloc_context3(decoder)); CHECK_NULL(L_XCODE, ctx->audio_stream.stream = avformat_new_stream(ctx->ifmt_ctx, NULL)); stream_settings_set(&ctx->audio_stream, &ctx->settings, decoder->type); // Copy the data we just set to the structs we will be querying later, e.g. in open_filter ctx->audio_stream.stream->time_base = ctx->audio_stream.codec->time_base; ret = avcodec_parameters_from_context(ctx->audio_stream.stream->codecpar, ctx->audio_stream.codec); if (ret < 0) { DPRINTF(E_LOG, L_XCODE, "Cannot copy stream parameters (%s): %s\n", codec_desc->name, err2str(ret)); goto out_free_codec; } return ctx; out_free_codec: avcodec_free_context(&ctx->audio_stream.codec); avformat_free_context(ctx->ifmt_ctx); out_free_ctx: free(ctx); return NULL; } enum transcode_profile transcode_needed(const char *user_agent, const char *client_codecs, const char *file_codectype) { const char *codectype; const char *prefer_format; cfg_t *lib; bool force_xcode; bool supports_alac; bool supports_mpeg; bool supports_wav; int count; int i; if (!file_codectype) { return XCODE_UNKNOWN; } lib = cfg_getsec(cfg, "library"); count = cfg_size(lib, "no_decode"); for (i = 0; i < count; i++) { codectype = cfg_getnstr(lib, "no_decode", i); if (strcmp(file_codectype, codectype) == 0) return XCODE_NONE; // Codectype is in no_decode } count = cfg_size(lib, "force_decode"); for (i = 0, force_xcode = false; i < count && !force_xcode; i++) { codectype = cfg_getnstr(lib, "force_decode", i); if (strcmp(file_codectype, codectype) == 0) force_xcode = true; // Codectype is in force_decode } if (!client_codecs && user_agent) { if (strncmp(user_agent, "iTunes", strlen("iTunes")) == 0) client_codecs = itunes_codecs; else if (strncmp(user_agent, "Music/", strlen("Music/")) == 0) // Apple Music, include slash because the name is generic client_codecs = itunes_codecs; else if (strncmp(user_agent, "QuickTime", strlen("QuickTime")) == 0) client_codecs = itunes_codecs; // Use iTunes codecs else if (strncmp(user_agent, "Front%20Row", strlen("Front%20Row")) == 0) client_codecs = itunes_codecs; // Use iTunes codecs else if (strncmp(user_agent, "AppleCoreMedia", strlen("AppleCoreMedia")) == 0) client_codecs = itunes_codecs; // Use iTunes codecs else if (strncmp(user_agent, "Roku", strlen("Roku")) == 0) client_codecs = roku_codecs; else if (strncmp(user_agent, "Hifidelio", strlen("Hifidelio")) == 0) /* Allegedly can't transcode for Hifidelio because their * HTTP implementation doesn't honour Connection: close. * At least, that's why mt-daapd didn't do it. */ return XCODE_NONE; } if (!client_codecs) client_codecs = default_codecs; else DPRINTF(E_SPAM, L_XCODE, "Client advertises codecs: %s\n", client_codecs); if (!force_xcode && strstr(client_codecs, file_codectype)) return XCODE_NONE; supports_alac = strstr(client_codecs, "alac") || strstr(client_codecs, "mp4a"); supports_mpeg = strstr(client_codecs, "mpeg") && avcodec_find_encoder(AV_CODEC_ID_MP3); supports_wav = strstr(client_codecs, "wav"); prefer_format = cfg_getstr(lib, "prefer_format"); if (prefer_format) { if (strcmp(prefer_format, "wav") == 0 && supports_wav) return XCODE_WAV; if (strcmp(prefer_format, "mpeg") == 0 && supports_mpeg) return XCODE_MP3; if (strcmp(prefer_format, "alac") == 0 && supports_alac) return XCODE_MP4_ALAC; } // This order determines the default if user didn't configure a preference. // The lossless formats are given highest preference. if (supports_wav) return XCODE_WAV; if (supports_mpeg) return XCODE_MP3; if (supports_alac) return XCODE_MP4_ALAC; return XCODE_UNKNOWN; } /* Cleanup */ void transcode_decode_cleanup(struct decode_ctx **ctx) { if (!(*ctx)) return; close_input(*ctx); av_packet_free(&(*ctx)->packet); av_frame_free(&(*ctx)->decoded_frame); free(*ctx); *ctx = NULL; } void transcode_encode_cleanup(struct encode_ctx **ctx) { if (!*ctx) return; close_filters(*ctx); close_output(*ctx); evbuffer_free((*ctx)->evbuf_io.evbuf); av_packet_free(&(*ctx)->encoded_pkt); av_frame_free(&(*ctx)->filt_frame); free(*ctx); *ctx = NULL; } void transcode_cleanup(struct transcode_ctx **ctx) { if (!*ctx) return; transcode_encode_cleanup(&(*ctx)->encode_ctx); transcode_decode_cleanup(&(*ctx)->decode_ctx); free(*ctx); *ctx = NULL; } /* Encoding, decoding and transcoding */ int transcode_decode(transcode_frame **frame, struct decode_ctx *dec_ctx) { struct transcode_ctx ctx; int ret; if (dec_ctx->got_frame) DPRINTF(E_LOG, L_XCODE, "Bug! Currently no support for multiple calls to transcode_decode()\n"); ctx.decode_ctx = dec_ctx; ctx.encode_ctx = NULL; do { // This function stops after decoding because ctx->encode_ctx is NULL ret = read_decode_filter_encode_write(&ctx); } while ((ret == 0) && (!dec_ctx->got_frame)); if (ret < 0) return -1; *frame = dec_ctx->decoded_frame; if (dec_ctx->eof) return 0; return 1; } // Filters and encodes int transcode_encode(struct evbuffer *evbuf, struct encode_ctx *ctx, transcode_frame *frame, int eof) { AVFrame *f = frame; struct stream_ctx *s; size_t start_length; int ret; start_length = evbuffer_get_length(ctx->obuf); // Really crappy way of detecting if frame is audio, video or something else #if USE_CH_LAYOUT if (f->ch_layout.nb_channels && f->sample_rate) #else if (f->channel_layout && f->sample_rate) #endif s = &ctx->audio_stream; else if (f->width && f->height) s = &ctx->video_stream; else { DPRINTF(E_LOG, L_XCODE, "Bug! Encoder could not detect frame type\n"); return -1; } ret = filter_encode_write(ctx, s, f); if (ret < 0) { DPRINTF(E_LOG, L_XCODE, "Error occurred while encoding: %s\n", err2str(ret)); return ret; } // Flush if (eof) { filter_encode_write(ctx, s, NULL); av_write_trailer(ctx->ofmt_ctx); } ret = evbuffer_get_length(ctx->obuf) - start_length; evbuffer_add_buffer(evbuf, ctx->obuf); return ret; } int transcode(struct evbuffer *evbuf, int *icy_timer, struct transcode_ctx *ctx, int want_bytes) { size_t start_length; int processed = 0; int ret; if (icy_timer) *icy_timer = 0; if (ctx->decode_ctx->eof) return 0; start_length = evbuffer_get_length(ctx->encode_ctx->obuf); do { ret = read_decode_filter_encode_write(ctx); processed = evbuffer_get_length(ctx->encode_ctx->obuf) - start_length; } while ((ret == 0) && (!want_bytes || (processed < want_bytes))); evbuffer_add_buffer(evbuf, ctx->encode_ctx->obuf); ctx->encode_ctx->bytes_processed += processed; if (icy_timer && ctx->encode_ctx->icy_interval) *icy_timer = (ctx->encode_ctx->bytes_processed % ctx->encode_ctx->icy_interval < processed); if ((ret < 0) && (ret != AVERROR_EOF)) return ret; return processed; } transcode_frame * transcode_frame_new(void *data, size_t size, int nsamples, struct media_quality *quality) { AVFrame *f; int ret; f = av_frame_alloc(); if (!f) { DPRINTF(E_LOG, L_XCODE, "Out of memory for frame\n"); return NULL; } f->format = bitdepth2format(quality->bits_per_sample); if (f->format == AV_SAMPLE_FMT_NONE) { DPRINTF(E_LOG, L_XCODE, "transcode_frame_new() called with unsupported bps (%d)\n", quality->bits_per_sample); av_frame_free(&f); return NULL; } f->sample_rate = quality->sample_rate; f->nb_samples = nsamples; #if USE_CH_LAYOUT av_channel_layout_default(&f->ch_layout, quality->channels); #else f->channel_layout = av_get_default_channel_layout(quality->channels); # ifdef HAVE_FFMPEG f->channels = quality->channels; # endif #endif f->pts = AV_NOPTS_VALUE; // We don't align because the frame won't be given directly to the encoder // anyway, it will first go through the filter (which might align it...?) ret = avcodec_fill_audio_frame(f, quality->channels, f->format, data, size, 1); if (ret < 0) { DPRINTF(E_LOG, L_XCODE, "Error filling frame with rawbuf, size %zu, samples %d (%d/%d/%d): %s\n", size, nsamples, quality->sample_rate, quality->bits_per_sample, quality->channels, err2str(ret)); av_frame_free(&f); return NULL; } return f; } void transcode_frame_free(transcode_frame *frame) { AVFrame *f = frame; av_frame_free(&f); } /* Seeking */ int transcode_seek(struct transcode_ctx *ctx, int ms) { struct decode_ctx *dec_ctx = ctx->decode_ctx; struct stream_ctx *s; int64_t start_time; int64_t target_pts; int64_t got_pts; int got_ms; int ret; s = &dec_ctx->audio_stream; if (!s->stream) { DPRINTF(E_LOG, L_XCODE, "Could not seek in non-audio input\n"); return -1; } start_time = s->stream->start_time; target_pts = ms; target_pts = target_pts * AV_TIME_BASE / 1000; target_pts = av_rescale_q(target_pts, AV_TIME_BASE_Q, s->stream->time_base); if ((start_time != AV_NOPTS_VALUE) && (start_time > 0)) target_pts += start_time; ret = av_seek_frame(dec_ctx->ifmt_ctx, s->stream->index, target_pts, AVSEEK_FLAG_BACKWARD); if (ret < 0) { DPRINTF(E_WARN, L_XCODE, "Could not seek into stream: %s\n", err2str(ret)); return -1; } avcodec_flush_buffers(s->codec); // Fast forward until first packet with a timestamp is found s->codec->skip_frame = AVDISCARD_NONREF; while (1) { dec_ctx->timestamp = av_gettime(); av_packet_unref(dec_ctx->packet); ret = av_read_frame(dec_ctx->ifmt_ctx, dec_ctx->packet); if (ret < 0) { DPRINTF(E_WARN, L_XCODE, "Could not read more data while seeking: %s\n", err2str(ret)); s->codec->skip_frame = AVDISCARD_DEFAULT; return -1; } if (stream_find(dec_ctx, dec_ctx->packet->stream_index) == AVMEDIA_TYPE_UNKNOWN) continue; // Need a pts to return the real position if (dec_ctx->packet->pts == AV_NOPTS_VALUE) continue; break; } s->codec->skip_frame = AVDISCARD_DEFAULT; // Tell read_packet() to resume with dec_ctx->packet dec_ctx->resume = 1; // Compute position in ms from pts got_pts = dec_ctx->packet->pts; if ((start_time != AV_NOPTS_VALUE) && (start_time > 0)) got_pts -= start_time; got_pts = av_rescale_q(got_pts, s->stream->time_base, AV_TIME_BASE_Q); got_ms = got_pts / (AV_TIME_BASE / 1000); // Since negative return would mean error, we disallow it here if (got_ms < 0) got_ms = 0; DPRINTF(E_DBG, L_XCODE, "Seek wanted %d ms, got %d ms\n", ms, got_ms); return got_ms; } /* Querying */ int transcode_decode_query(struct decode_ctx *ctx, const char *query) { if (strcmp(query, "width") == 0) { if (ctx->video_stream.stream) return ctx->video_stream.stream->codecpar->width; } else if (strcmp(query, "height") == 0) { if (ctx->video_stream.stream) return ctx->video_stream.stream->codecpar->height; } else if (strcmp(query, "is_png") == 0) { if (ctx->video_stream.stream) return (ctx->video_stream.stream->codecpar->codec_id == AV_CODEC_ID_PNG); } else if (strcmp(query, "is_jpeg") == 0) { if (ctx->video_stream.stream) return (ctx->video_stream.stream->codecpar->codec_id == AV_CODEC_ID_MJPEG); } return -1; } int transcode_encode_query(struct encode_ctx *ctx, const char *query) { if (strcmp(query, "sample_rate") == 0) { if (ctx->audio_stream.stream) return ctx->audio_stream.stream->codecpar->sample_rate; } else if (strcmp(query, "bits_per_sample") == 0) { if (ctx->audio_stream.stream) return av_get_bits_per_sample(ctx->audio_stream.stream->codecpar->codec_id); } else if (strcmp(query, "channels") == 0) { if (ctx->audio_stream.stream) #if USE_CH_LAYOUT return ctx->audio_stream.stream->codecpar->ch_layout.nb_channels; #else return ctx->audio_stream.stream->codecpar->channels; #endif } else if (strcmp(query, "samples_per_frame") == 0) { if (ctx->audio_stream.stream) return ctx->audio_stream.stream->codecpar->frame_size; } else if (strcmp(query, "estimated_size") == 0) { if (ctx->audio_stream.stream) return ctx->bytes_total; } return -1; } /* Metadata */ struct http_icy_metadata * transcode_metadata(struct transcode_ctx *ctx, int *changed) { struct http_icy_metadata *m; if (!ctx->decode_ctx->ifmt_ctx) return NULL; m = http_icy_metadata_get(ctx->decode_ctx->ifmt_ctx, 1); if (!m) return NULL; *changed = (m->hash != ctx->encode_ctx->icy_hash); ctx->encode_ctx->icy_hash = m->hash; return m; } void transcode_metadata_strings_set(struct transcode_metadata_string *s, enum transcode_profile profile, struct media_quality *q, uint32_t len_ms) { off_t bytes; memset(s, 0, sizeof(struct transcode_metadata_string)); switch (profile) { case XCODE_WAV: s->type = "wav"; s->codectype = "wav"; s->description = "WAV audio file"; snprintf(s->bitrate, sizeof(s->bitrate), "%d", 8 * STOB(q->sample_rate, q->bits_per_sample, q->channels) / 1000); // 44100/16/2 -> 1411 bytes = size_estimate(profile, q->bit_rate, q->sample_rate, q->bits_per_sample / 8, q->channels, len_ms); snprintf(s->file_size, sizeof(s->file_size), "%d", (int)bytes); break; case XCODE_MP3: s->type = "mp3"; s->codectype = "mpeg"; s->description = "MPEG audio file"; snprintf(s->bitrate, sizeof(s->bitrate), "%d", q->bit_rate / 1000); bytes = size_estimate(profile, q->bit_rate, q->sample_rate, q->bits_per_sample / 8, q->channels, len_ms); snprintf(s->file_size, sizeof(s->file_size), "%d", (int)bytes); break; case XCODE_MP4_ALAC: s->type = "m4a"; s->codectype = "alac"; s->description = "Apple Lossless audio file"; snprintf(s->bitrate, sizeof(s->bitrate), "%d", 8 * STOB(q->sample_rate, q->bits_per_sample, q->channels) / 1000); // 44100/16/2 -> 1411 bytes = size_estimate(profile, q->bit_rate, q->sample_rate, q->bits_per_sample / 8, q->channels, len_ms); snprintf(s->file_size, sizeof(s->file_size), "%d", (int)bytes); break; default: DPRINTF(E_WARN, L_XCODE, "transcode_metadata_strings_set() called with unknown profile %d\n", profile); } } int transcode_prepare_header(struct evbuffer **header, enum transcode_profile profile, const char *path) { int ret; switch (profile) { case XCODE_MP4_ALAC: ret = make_mp4_header(header, path); break; default: ret = -1; } return ret; }