From 91825976053fdb49ce95b8f10038134bc62db62a Mon Sep 17 00:00:00 2001 From: ejurgensen Date: Fri, 11 Jan 2019 19:34:36 +0100 Subject: [PATCH] [input/xcode] Write to input buffer with the sources native sample rate/format Still WIP at this point since the player and output can't use the use improved quality yet, and because rtptimes etc. are likely incorrect --- src/conffile.c | 2 + src/input.c | 4 +- src/input.h | 14 +++-- src/inputs/file_http.c | 13 ++++- src/inputs/pipe.c | 23 +++++++- src/spotify.c | 8 +-- src/transcode.c | 121 +++++++++++++++++++++++++++-------------- src/transcode.h | 22 +++++++- 8 files changed, 146 insertions(+), 61 deletions(-) diff --git a/src/conffile.c b/src/conffile.c index a0afe934..ca4bbbec 100644 --- a/src/conffile.c +++ b/src/conffile.c @@ -100,6 +100,8 @@ static cfg_opt_t sec_library[] = CFG_STR_LIST("no_decode", NULL, CFGF_NONE), CFG_STR_LIST("force_decode", NULL, CFGF_NONE), CFG_BOOL("pipe_autostart", cfg_true, CFGF_NONE), + CFG_INT("pipe_sample_rate", 44100, CFGF_NONE), + CFG_INT("pipe_bits_per_sample", 16, CFGF_NONE), CFG_BOOL("rating_updates", cfg_false, CFGF_NONE), CFG_END() }; diff --git a/src/input.c b/src/input.c index 8c8dd3d5..63c6191a 100644 --- a/src/input.c +++ b/src/input.c @@ -215,7 +215,7 @@ playback(void *arg) // Loops until input_loop_break is set or no more input, e.g. EOF ret = inputs[type]->start(ps); if (ret < 0) - input_write(NULL, INPUT_FLAG_ERROR); + input_write(NULL, 0, 0, INPUT_FLAG_ERROR); #ifdef DEBUG DPRINTF(E_DBG, L_PLAYER, "Playback loop stopped (break is %d, ret %d)\n", input_loop_break, ret); @@ -240,7 +240,7 @@ input_wait(void) // Called by input modules from within the playback loop int -input_write(struct evbuffer *evbuf, short flags) +input_write(struct evbuffer *evbuf, int sample_rate, int bits_per_sample, short flags) { struct timespec ts; int ret; diff --git a/src/input.h b/src/input.h index c988d858..c4eafe9d 100644 --- a/src/input.h +++ b/src/input.h @@ -140,18 +140,20 @@ struct input_definition int input_loop_break; /* - * Transfer stream data to the player's input buffer. The input evbuf will be - * drained on succesful write. This is to avoid copying memory. If the player's - * input buffer is full the function will block until the write can be made - * (unless INPUT_FILE_NONBLOCK is set). + * Transfer stream data to the player's input buffer. Data must be PCM-LE + * samples. The input evbuf will be drained on succesful write. This is to avoid + * copying memory. If the player's input buffer is full the function will block + * until the write can be made (unless INPUT_FILE_NONBLOCK is set). * - * @in evbuf Raw audio data to write + * @in evbuf Raw PCM_LE audio data to write + * @in evbuf Sample rate of the data + * @in evbuf Bits per sample (typically 16 or 24) * @in flags One or more INPUT_FLAG_* * @return 0 on success, EAGAIN if buffer was full (and _NONBLOCK is set), * -1 on error */ int -input_write(struct evbuffer *evbuf, short flags); +input_write(struct evbuffer *evbuf, int sample_rate, int bits_per_sample, short flags); /* * Input modules can use this to wait in the playback loop (like input_write() diff --git a/src/inputs/file_http.c b/src/inputs/file_http.c index 7422eacb..780ac50a 100644 --- a/src/inputs/file_http.c +++ b/src/inputs/file_http.c @@ -26,12 +26,13 @@ #include "transcode.h" #include "http.h" #include "misc.h" +#include "logger.h" #include "input.h" static int setup(struct player_source *ps) { - ps->input_ctx = transcode_setup(XCODE_PCM16_NOHEADER, ps->data_kind, ps->path, ps->len_ms, NULL); + ps->input_ctx = transcode_setup(XCODE_PCM_NATIVE, ps->data_kind, ps->path, ps->len_ms, NULL); if (!ps->input_ctx) return -1; @@ -57,27 +58,33 @@ setup_http(struct player_source *ps) static int start(struct player_source *ps) { + struct transcode_ctx *ctx = ps->input_ctx; struct evbuffer *evbuf; short flags; + int sample_rate; + int bps; int ret; int icy_timer; evbuf = evbuffer_new(); + sample_rate = transcode_encode_query(ctx->encode_ctx, "sample_rate"); + bps = transcode_encode_query(ctx->encode_ctx, "bits_per_sample"); + ret = -1; flags = 0; while (!input_loop_break && !(flags & INPUT_FLAG_EOF)) { // We set "wanted" to 1 because the read size doesn't matter to us // TODO optimize? - ret = transcode(evbuf, &icy_timer, ps->input_ctx, 1); + ret = transcode(evbuf, &icy_timer, ctx, 1); if (ret < 0) break; flags = ((ret == 0) ? INPUT_FLAG_EOF : 0) | (icy_timer ? INPUT_FLAG_METADATA : 0); - ret = input_write(evbuf, flags); + ret = input_write(evbuf, sample_rate, bps, flags); if (ret < 0) break; } diff --git a/src/inputs/pipe.c b/src/inputs/pipe.c index e1128912..95678ce5 100644 --- a/src/inputs/pipe.c +++ b/src/inputs/pipe.c @@ -103,6 +103,9 @@ static pthread_t tid_pipe; static struct event_base *evbase_pipe; static struct commands_base *cmdbase; +// From config - the sample rate and bps of the pipe input +static int pipe_sample_rate; +static int pipe_bits_per_sample; // From config - should we watch library pipes for data or only start on request static int pipe_autostart; // The mfi id of the pipe autostarted by the pipe thread @@ -307,7 +310,7 @@ parse_progress(struct input_metadata *m, char *progress) m->rtptime = start; // Not actually used - we have our own rtptime m->offset = (pos > start) ? (pos - start) : 0; - m->song_length = (end - start) * 10 / 441; // Convert to ms based on 44100 + m->song_length = (end - start) * 1000 / pipe_sample_rate; } static void @@ -845,7 +848,7 @@ start(struct player_source *ps) ret = evbuffer_read(evbuf, pipe->fd, PIPE_READ_MAX); if ((ret == 0) && (pipe->is_autostarted)) { - input_write(evbuf, INPUT_FLAG_EOF); // Autostop + input_write(evbuf, pipe_sample_rate, pipe_bits_per_sample, INPUT_FLAG_EOF); // Autostop break; } else if ((ret == 0) || ((ret < 0) && (errno == EAGAIN))) @@ -862,7 +865,7 @@ start(struct player_source *ps) flags = (pipe_metadata_is_new ? INPUT_FLAG_METADATA : 0); pipe_metadata_is_new = 0; - ret = input_write(evbuf, flags); + ret = input_write(evbuf, pipe_sample_rate, pipe_bits_per_sample, flags); if (ret < 0) break; } @@ -945,6 +948,20 @@ init(void) CHECK_ERR(L_PLAYER, listener_add(pipe_listener_cb, LISTENER_DATABASE)); } + pipe_sample_rate = cfg_getint(cfg_getsec(cfg, "library"), "pipe_sample_rate"); + if (pipe_sample_rate != 44100 || pipe_sample_rate != 48000 || pipe_sample_rate != 96000) + { + DPRINTF(E_FATAL, L_PLAYER, "The configuration of pipe_sample_rate is invalid: %d\n", pipe_sample_rate); + return -1; + } + + pipe_bits_per_sample = cfg_getint(cfg_getsec(cfg, "library"), "pipe_bits_per_sample"); + if (pipe_bits_per_sample != 16 || pipe_bits_per_sample != 24) + { + DPRINTF(E_FATAL, L_PLAYER, "The configuration of pipe_bits_per_sample is invalid: %d\n", pipe_bits_per_sample); + return -1; + } + return 0; } diff --git a/src/spotify.c b/src/spotify.c index ed6d5745..b332f251 100644 --- a/src/spotify.c +++ b/src/spotify.c @@ -719,7 +719,7 @@ playback_eot(void *arg, int *retval) g_state = SPOTIFY_STATE_STOPPING; // TODO 1) This will block for a while, but perhaps ok? - input_write(spotify_audio_buffer, INPUT_FLAG_EOF); + input_write(spotify_audio_buffer, 0, 0, INPUT_FLAG_EOF); *retval = 0; return COMMAND_END; @@ -1011,9 +1011,9 @@ static int music_delivery(sp_session *sess, const sp_audioformat *format, int ret; /* No support for resampling right now */ - if ((format->sample_rate != 44100) || (format->channels != 2)) + if ((format->sample_type != SP_SAMPLETYPE_INT16_NATIVE_ENDIAN) || (format->channels != 2)) { - DPRINTF(E_LOG, L_SPOTIFY, "Got music with unsupported samplerate or channels, stopping playback\n"); + DPRINTF(E_LOG, L_SPOTIFY, "Got music with unsupported sample format or number of channels, stopping playback\n"); spotify_playback_stop_nonblock(); return num_frames; } @@ -1037,7 +1037,7 @@ static int music_delivery(sp_session *sess, const sp_audioformat *format, // The input buffer only accepts writing when it is approaching depletion, and // because we use NONBLOCK it will just return if this is not the case. So in // most cases no actual write is made and spotify_audio_buffer will just grow. - input_write(spotify_audio_buffer, INPUT_FLAG_NONBLOCK); + input_write(spotify_audio_buffer, format->sample_rate, 16, INPUT_FLAG_NONBLOCK); return num_frames; } diff --git a/src/transcode.c b/src/transcode.c index 252d41fa..4673b831 100644 --- a/src/transcode.c +++ b/src/transcode.c @@ -76,12 +76,10 @@ struct settings_ctx // Audio settings enum AVCodecID audio_codec; - const char *audio_codec_name; int sample_rate; uint64_t channel_layout; int channels; enum AVSampleFormat sample_format; - int byte_depth; bool wavheader; bool icy; @@ -179,20 +177,12 @@ struct encode_ctx uint8_t header[44]; }; -struct transcode_ctx -{ - struct decode_ctx *decode_ctx; - struct encode_ctx *encode_ctx; -}; - /* -------------------------- PROFILE CONFIGURATION ------------------------ */ static int init_settings(struct settings_ctx *settings, enum transcode_profile profile) { - const AVCodecDescriptor *codec_desc; - memset(settings, 0, sizeof(struct settings_ctx)); switch (profile) @@ -207,7 +197,13 @@ init_settings(struct settings_ctx *settings, enum transcode_profile profile) settings->channel_layout = AV_CH_LAYOUT_STEREO; settings->channels = 2; settings->sample_format = AV_SAMPLE_FMT_S16; - settings->byte_depth = 2; // Bytes per sample = 16/8 + settings->icy = 1; + break; + + case XCODE_PCM_NATIVE: // Sample rate and bit depth determined by source + settings->encode_audio = 1; + settings->channel_layout = AV_CH_LAYOUT_STEREO; + settings->channels = 2; settings->icy = 1; break; @@ -219,7 +215,6 @@ init_settings(struct settings_ctx *settings, enum transcode_profile profile) settings->channel_layout = AV_CH_LAYOUT_STEREO; settings->channels = 2; settings->sample_format = AV_SAMPLE_FMT_S16P; - settings->byte_depth = 2; // Bytes per sample = 16/8 break; case XCODE_OPUS: @@ -230,7 +225,6 @@ init_settings(struct settings_ctx *settings, enum transcode_profile profile) settings->channel_layout = AV_CH_LAYOUT_STEREO; settings->channels = 2; settings->sample_format = AV_SAMPLE_FMT_S16; // Only libopus support - settings->byte_depth = 2; // Bytes per sample = 16/8 break; case XCODE_JPEG: @@ -253,18 +247,6 @@ init_settings(struct settings_ctx *settings, enum transcode_profile profile) return -1; } - if (settings->audio_codec) - { - codec_desc = avcodec_descriptor_get(settings->audio_codec); - settings->audio_codec_name = codec_desc->name; - } - - if (settings->video_codec) - { - codec_desc = avcodec_descriptor_get(settings->video_codec); - settings->video_codec_name = codec_desc->name; - } - return 0; } @@ -319,13 +301,15 @@ make_wav_header(struct encode_ctx *ctx, struct decode_ctx *src_ctx, off_t *est_s { uint32_t wav_len; int duration; + int bps; if (src_ctx->duration) duration = src_ctx->duration; else duration = 3 * 60 * 1000; /* 3 minutes, in ms */ - wav_len = ctx->settings.channels * ctx->settings.byte_depth * ctx->settings.sample_rate * (duration / 1000); + bps = av_get_bits_per_sample(ctx->settings.audio_codec); + wav_len = ctx->settings.channels * (bps / 8) * ctx->settings.sample_rate * (duration / 1000); *est_size = wav_len + sizeof(ctx->header); @@ -336,9 +320,9 @@ make_wav_header(struct encode_ctx *ctx, struct decode_ctx *src_ctx, off_t *est_s add_le16(ctx->header + 20, 1); add_le16(ctx->header + 22, ctx->settings.channels); /* channels */ add_le32(ctx->header + 24, ctx->settings.sample_rate); /* samplerate */ - add_le32(ctx->header + 28, ctx->settings.sample_rate * ctx->settings.channels * ctx->settings.byte_depth); /* byte rate */ - add_le16(ctx->header + 32, ctx->settings.channels * ctx->settings.byte_depth); /* block align */ - add_le16(ctx->header + 34, ctx->settings.byte_depth * 8); /* bits per sample */ + add_le32(ctx->header + 28, ctx->settings.sample_rate * ctx->settings.channels * (bps / 8)); /* byte rate */ + add_le16(ctx->header + 32, ctx->settings.channels * (bps / 8)); /* block align */ + add_le16(ctx->header + 34, bps); /* bits per sample */ memcpy(ctx->header + 36, "data", 4); add_le32(ctx->header + 40, wav_len); } @@ -368,20 +352,27 @@ stream_find(struct decode_ctx *ctx, unsigned int stream_index) * @out ctx A pre-allocated stream ctx where we save stream and codec info * @in output Output to add the stream to * @in codec_id What kind of codec should we use - * @in codec_name Name of codec (only used for logging) * @return Negative on failure, otherwise zero */ static int -stream_add(struct encode_ctx *ctx, struct stream_ctx *s, enum AVCodecID codec_id, const char *codec_name) +stream_add(struct encode_ctx *ctx, struct stream_ctx *s, enum AVCodecID codec_id) { + const AVCodecDescriptor *codec_desc; AVCodec *encoder; AVDictionary *options = NULL; int ret; + codec_desc = avcodec_descriptor_get(codec_id); + if (!codec_desc) + { + DPRINTF(E_LOG, L_XCODE, "Invalid codec ID (%d)\n", codec_id); + return -1; + } + encoder = avcodec_find_encoder(codec_id); if (!encoder) { - DPRINTF(E_LOG, L_XCODE, "Necessary encoder (%s) not found\n", codec_name); + DPRINTF(E_LOG, L_XCODE, "Necessary encoder (%s) not found\n", codec_desc->name); return -1; } @@ -393,7 +384,7 @@ stream_add(struct encode_ctx *ctx, struct stream_ctx *s, enum AVCodecID codec_id if (!s->codec->pix_fmt) { s->codec->pix_fmt = avcodec_default_get_format(s->codec, encoder->pix_fmts); - DPRINTF(E_DBG, L_XCODE, "Pixel format set to %s (encoder is %s)\n", av_get_pix_fmt_name(s->codec->pix_fmt), codec_name); + DPRINTF(E_DBG, L_XCODE, "Pixel format set to %s (encoder is %s)\n", av_get_pix_fmt_name(s->codec->pix_fmt), codec_desc->name); } if (ctx->ofmt_ctx->oformat->flags & AVFMT_GLOBALHEADER) @@ -406,7 +397,7 @@ stream_add(struct encode_ctx *ctx, struct stream_ctx *s, enum AVCodecID codec_id ret = avcodec_open2(s->codec, NULL, &options); if (ret < 0) { - DPRINTF(E_LOG, L_XCODE, "Cannot open encoder (%s): %s\n", codec_name, err2str(ret)); + DPRINTF(E_LOG, L_XCODE, "Cannot open encoder (%s): %s\n", codec_desc->name, err2str(ret)); avcodec_free_context(&s->codec); return -1; } @@ -415,7 +406,7 @@ stream_add(struct encode_ctx *ctx, struct stream_ctx *s, enum AVCodecID codec_id ret = avcodec_parameters_from_context(s->stream->codecpar, s->codec); if (ret < 0) { - DPRINTF(E_LOG, L_XCODE, "Cannot copy stream parameters (%s): %s\n", codec_name, err2str(ret)); + DPRINTF(E_LOG, L_XCODE, "Cannot copy stream parameters (%s): %s\n", codec_desc->name, err2str(ret)); avcodec_free_context(&s->codec); return -1; } @@ -888,14 +879,14 @@ open_output(struct encode_ctx *ctx, struct decode_ctx *src_ctx) if (ctx->settings.encode_audio) { - ret = stream_add(ctx, &ctx->audio_stream, ctx->settings.audio_codec, ctx->settings.audio_codec_name); + ret = stream_add(ctx, &ctx->audio_stream, ctx->settings.audio_codec); if (ret < 0) goto out_free_streams; } if (ctx->settings.encode_video) { - ret = stream_add(ctx, &ctx->video_stream, ctx->settings.video_codec, ctx->settings.video_codec_name); + ret = stream_add(ctx, &ctx->video_stream, ctx->settings.video_codec); if (ret < 0) goto out_free_streams; } @@ -1161,6 +1152,7 @@ struct encode_ctx * transcode_encode_setup(enum transcode_profile profile, struct decode_ctx *src_ctx, off_t *est_size, int width, int height) { struct encode_ctx *ctx; + int bps; CHECK_NULL(L_XCODE, ctx = calloc(1, sizeof(struct encode_ctx))); CHECK_NULL(L_XCODE, ctx->filt_frame = av_frame_alloc()); @@ -1172,6 +1164,26 @@ transcode_encode_setup(enum transcode_profile profile, struct decode_ctx *src_ct ctx->settings.width = width; ctx->settings.height = height; + if (!ctx->settings.sample_rate && ctx->settings.encode_audio) + ctx->settings.sample_rate = src_ctx->audio_stream.codec->sample_rate; + + if (!ctx->settings.sample_format && ctx->settings.encode_audio) + { + bps = av_get_bits_per_sample(src_ctx->audio_stream.codec->codec_id); + if (bps >= 24) + { + ctx->settings.sample_format = AV_SAMPLE_FMT_S32; + ctx->settings.audio_codec = AV_CODEC_ID_PCM_S24LE; + ctx->settings.format = "s24le"; + } + else + { + ctx->settings.sample_format = AV_SAMPLE_FMT_S16; + ctx->settings.audio_codec = AV_CODEC_ID_PCM_S16LE; + ctx->settings.format = "s16le"; + } + } + if (ctx->settings.wavheader) make_wav_header(ctx, src_ctx, est_size); @@ -1182,7 +1194,10 @@ transcode_encode_setup(enum transcode_profile profile, struct decode_ctx *src_ct goto fail_close; if (ctx->settings.icy && src_ctx->data_kind == DATA_KIND_HTTP) - ctx->icy_interval = METADATA_ICY_INTERVAL * ctx->settings.channels * ctx->settings.byte_depth * ctx->settings.sample_rate; + { + bps = av_get_bits_per_sample(ctx->settings.audio_codec); + ctx->icy_interval = METADATA_ICY_INTERVAL * ctx->settings.channels * (bps / 8) * ctx->settings.sample_rate; + } return ctx; @@ -1223,6 +1238,7 @@ transcode_setup(enum transcode_profile profile, enum data_kind data_kind, const struct decode_ctx * transcode_decode_setup_raw(void) { + const AVCodecDescriptor *codec_desc; struct decode_ctx *ctx; AVCodec *decoder; int ret; @@ -1234,13 +1250,20 @@ transcode_decode_setup_raw(void) goto out_free_ctx; } + codec_desc = avcodec_descriptor_get(ctx->settings.audio_codec); + if (!codec_desc) + { + DPRINTF(E_LOG, L_XCODE, "Invalid codec ID (%d)\n", ctx->settings.audio_codec); + goto out_free_ctx; + } + // In raw mode we won't actually need to read or decode, but we still setup // the decode_ctx because transcode_encode_setup() gets info about the input // through this structure (TODO dont' do that) decoder = avcodec_find_decoder(ctx->settings.audio_codec); if (!decoder) { - DPRINTF(E_LOG, L_XCODE, "Could not find decoder for: %s\n", ctx->settings.audio_codec_name); + DPRINTF(E_LOG, L_XCODE, "Could not find decoder for: %s\n", codec_desc->name); goto out_free_ctx; } @@ -1255,7 +1278,7 @@ transcode_decode_setup_raw(void) ret = avcodec_parameters_from_context(ctx->audio_stream.stream->codecpar, ctx->audio_stream.codec); if (ret < 0) { - DPRINTF(E_LOG, L_XCODE, "Cannot copy stream parameters (%s): %s\n", ctx->settings.audio_codec_name, err2str(ret)); + DPRINTF(E_LOG, L_XCODE, "Cannot copy stream parameters (%s): %s\n", codec_desc->name, err2str(ret)); goto out_free_codec; } @@ -1659,6 +1682,24 @@ transcode_decode_query(struct decode_ctx *ctx, const char *query) return -1; } +int +transcode_encode_query(struct encode_ctx *ctx, const char *query) +{ + if (strcmp(query, "sample_rate") == 0) + { + if (ctx->audio_stream.stream) + return ctx->audio_stream.stream->codecpar->sample_rate; + } + else if (strcmp(query, "bits_per_sample") == 0) + { + if (ctx->audio_stream.stream) + return av_get_bits_per_sample(ctx->audio_stream.stream->codecpar->codec_id); + } + + return -1; +} + + /* Metadata */ struct http_icy_metadata * diff --git a/src/transcode.h b/src/transcode.h index 79e89268..333d7df0 100644 --- a/src/transcode.h +++ b/src/transcode.h @@ -8,10 +8,12 @@ enum transcode_profile { - // Transcodes the best audio stream into PCM16 (does not add wav header) + // Decodes/resamples the best audio stream into 44100 PCM16 (does not add wav header) XCODE_PCM16_NOHEADER, - // Transcodes the best audio stream into PCM16 (with wav header) + // Decodes/resamples the best audio stream into 44100 PCM16 (with wav header) XCODE_PCM16_HEADER, + // Decodes the best audio stream into PCM16 or PCM24, no resampling (does not add wav header) + XCODE_PCM_NATIVE, // Transcodes the best audio stream into MP3 XCODE_MP3, // Transcodes the best audio stream into OPUS @@ -23,7 +25,11 @@ enum transcode_profile struct decode_ctx; struct encode_ctx; -struct transcode_ctx; +struct transcode_ctx +{ + struct decode_ctx *decode_ctx; + struct encode_ctx *encode_ctx; +}; typedef void transcode_frame; @@ -122,6 +128,16 @@ transcode_seek(struct transcode_ctx *ctx, int ms); int transcode_decode_query(struct decode_ctx *ctx, const char *query); +/* Query for information (e.g. sample rate) about the output being produced by + * the transcoding + * + * @in ctx Encode context + * @in query Query - see implementation for supported queries + * @return Negative if error, otherwise query dependent + */ +int +transcode_encode_query(struct encode_ctx *ctx, const char *query); + // Metadata struct http_icy_metadata * transcode_metadata(struct transcode_ctx *ctx, int *changed);