[airplay] Use compressed ALAC to save bandwidth

Uses ffmpeg to compress ALAC. Before each audio packet was always 1411 bytes,
now they are half or even much less if it is just silence.
This commit is contained in:
ejurgensen 2021-01-17 22:45:47 +01:00
parent 1977db0319
commit 275d66b6ad

View File

@ -1,8 +1,4 @@
/* /*
* ALAC encoding adapted from airplay_play
* Copyright (C) 2005 Shiro Ninomiya <shiron@snino.com>
* GPLv2+
*
* This program is free software; you can redistribute it and/or modify * This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by * it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or * the Free Software Foundation; either version 2 of the License, or
@ -55,6 +51,7 @@
#include "artwork.h" #include "artwork.h"
#include "dmap_common.h" #include "dmap_common.h"
#include "rtp_common.h" #include "rtp_common.h"
#include "transcode.h"
#include "outputs.h" #include "outputs.h"
#include "pair.h" #include "pair.h"
@ -64,7 +61,6 @@
* inplace encryption * inplace encryption
* latency needs different handling * latency needs different handling
* support ipv6, e.g. in SETPEERS * support ipv6, e.g. in SETPEERS
* ffmpeg alac encoding
* *
*/ */
@ -77,9 +73,6 @@
// Full traffic dumps in the log in debug mode // Full traffic dumps in the log in debug mode
#define AIRPLAY_DUMP_TRAFFIC 0 #define AIRPLAY_DUMP_TRAFFIC 0
#define ALAC_HEADER_LEN 3
#define AIRPLAY_QUALITY_SAMPLE_RATE_DEFAULT 44100 #define AIRPLAY_QUALITY_SAMPLE_RATE_DEFAULT 44100
#define AIRPLAY_QUALITY_BITS_PER_SAMPLE_DEFAULT 16 #define AIRPLAY_QUALITY_BITS_PER_SAMPLE_DEFAULT 16
#define AIRPLAY_QUALITY_CHANNELS_DEFAULT 2 #define AIRPLAY_QUALITY_CHANNELS_DEFAULT 2
@ -211,8 +204,12 @@ struct airplay_extra
struct airplay_master_session struct airplay_master_session
{ {
struct evbuffer *evbuf; struct evbuffer *input_buffer;
int evbuf_samples; int input_buffer_samples;
// ALAC encoder and buffer for encoded data
struct encode_ctx *encode_ctx;
struct evbuffer *encoded_buffer;
struct rtp_session *rtp_session; struct rtp_session *rtp_session;
@ -222,6 +219,8 @@ struct airplay_master_session
size_t rawbuf_size; size_t rawbuf_size;
int samples_per_packet; int samples_per_packet;
struct media_quality quality;
// Number of samples that we tell the output to buffer (this will mean that // Number of samples that we tell the output to buffer (this will mean that
// the position that we send in the sync packages are offset by this amount // the position that we send in the sync packages are offset by this amount
// compared to the rtptimes of the corresponding RTP packages we are sending) // compared to the rtptimes of the corresponding RTP packages we are sending)
@ -474,82 +473,28 @@ sequence_continue(struct airplay_seq_ctx *seq_ctx);
/* ------------------------------- MISC HELPERS ----------------------------- */ /* ------------------------------- MISC HELPERS ----------------------------- */
/* ALAC bits writer - big endian static inline int
* p outgoing buffer pointer alac_encode(struct evbuffer *evbuf, struct encode_ctx *encode_ctx, uint8_t *rawbuf, size_t rawbuf_size, int nsamples, struct media_quality *quality)
* val bitfield value
* blen bitfield length, max 8 bits
* bpos bit position in the current byte (pointed by *p)
*/
static inline void
alac_write_bits(uint8_t **p, uint8_t val, int blen, int *bpos)
{ {
int lb; transcode_frame *frame;
int rb; int len;
int bd;
/* Remaining bits in the current byte */ frame = transcode_frame_new(rawbuf, rawbuf_size, nsamples, quality);
lb = 7 - *bpos + 1; if (!frame)
/* Number of bits overflowing */
rb = lb - blen;
if (rb >= 0)
{ {
bd = val << rb; DPRINTF(E_LOG, L_AIRPLAY, "Could not convert raw PCM to frame (bufsize=%zu)\n", rawbuf_size);
if (*bpos == 0) return -1;
**p = bd;
else
**p |= bd;
/* No over- nor underflow, we're done with this byte */
if (rb == 0)
{
*p += 1;
*bpos = 0;
}
else
*bpos += blen;
} }
else
len = transcode_encode(evbuf, encode_ctx, frame, 0);
transcode_frame_free(frame);
if (len < 0)
{ {
/* Fill current byte */ DPRINTF(E_LOG, L_AIRPLAY, "Could not ALAC encode frame\n");
bd = val >> -rb; return -1;
**p |= bd;
/* Overflow goes to the next byte */
*p += 1;
**p = val << (8 + rb);
*bpos = -rb;
} }
}
/* Raw data must be little endian */ return len;
static void
alac_encode(uint8_t *dst, uint8_t *raw, int len)
{
uint8_t *maxraw;
int bpos;
bpos = 0;
maxraw = raw + len;
alac_write_bits(&dst, 1, 3, &bpos); /* channel=1, stereo */
alac_write_bits(&dst, 0, 4, &bpos); /* unknown */
alac_write_bits(&dst, 0, 8, &bpos); /* unknown */
alac_write_bits(&dst, 0, 4, &bpos); /* unknown */
alac_write_bits(&dst, 0, 1, &bpos); /* hassize */
alac_write_bits(&dst, 0, 2, &bpos); /* unused */
alac_write_bits(&dst, 1, 1, &bpos); /* is-not-compressed */
for (; raw < maxraw; raw += 4)
{
/* Byteswap to big endian */
alac_write_bits(&dst, *(raw + 1), 8, &bpos);
alac_write_bits(&dst, *raw, 8, &bpos);
alac_write_bits(&dst, *(raw + 3), 8, &bpos);
alac_write_bits(&dst, *(raw + 2), 8, &bpos);
}
} }
/* AirTunes v2 time synchronization helpers */ /* AirTunes v2 time synchronization helpers */
@ -1144,50 +1089,6 @@ session_status(struct airplay_session *rs)
rs->callback_id = -1; rs->callback_id = -1;
} }
static struct airplay_master_session *
master_session_make(struct media_quality *quality)
{
struct airplay_master_session *rms;
int ret;
// First check if we already have a suitable session
for (rms = airplay_master_sessions; rms; rms = rms->next)
{
if (quality_is_equal(quality, &rms->rtp_session->quality))
return rms;
}
// Let's create a master session
ret = outputs_quality_subscribe(quality);
if (ret < 0)
{
DPRINTF(E_LOG, L_AIRPLAY, "Could not subscribe to required audio quality (%d/%d/%d)\n", quality->sample_rate, quality->bits_per_sample, quality->channels);
return NULL;
}
CHECK_NULL(L_AIRPLAY, rms = calloc(1, sizeof(struct airplay_master_session)));
rms->rtp_session = rtp_session_new(quality, AIRPLAY_PACKET_BUFFER_SIZE, 0);
if (!rms->rtp_session)
{
outputs_quality_unsubscribe(quality);
free(rms);
return NULL;
}
rms->samples_per_packet = AIRPLAY_SAMPLES_PER_PACKET;
rms->rawbuf_size = STOB(rms->samples_per_packet, quality->bits_per_sample, quality->channels);
rms->output_buffer_samples = OUTPUTS_BUFFER_DURATION * quality->sample_rate;
CHECK_NULL(L_AIRPLAY, rms->rawbuf = malloc(rms->rawbuf_size));
CHECK_NULL(L_AIRPLAY, rms->evbuf = evbuffer_new());
rms->next = airplay_master_sessions;
airplay_master_sessions = rms;
return rms;
}
static void static void
master_session_free(struct airplay_master_session *rms) master_session_free(struct airplay_master_session *rms)
{ {
@ -1196,7 +1097,14 @@ master_session_free(struct airplay_master_session *rms)
outputs_quality_unsubscribe(&rms->rtp_session->quality); outputs_quality_unsubscribe(&rms->rtp_session->quality);
rtp_session_free(rms->rtp_session); rtp_session_free(rms->rtp_session);
evbuffer_free(rms->evbuf);
transcode_encode_cleanup(&rms->encode_ctx);
if (rms->input_buffer)
evbuffer_free(rms->input_buffer);
if (rms->encoded_buffer)
evbuffer_free(rms->encoded_buffer);
free(rms->rawbuf); free(rms->rawbuf);
free(rms); free(rms);
} }
@ -1230,6 +1138,70 @@ master_session_cleanup(struct airplay_master_session *rms)
master_session_free(rms); master_session_free(rms);
} }
static struct airplay_master_session *
master_session_make(struct media_quality *quality)
{
struct airplay_master_session *rms;
struct decode_ctx *decode_ctx;
int ret;
// First check if we already have a suitable session
for (rms = airplay_master_sessions; rms; rms = rms->next)
{
if (quality_is_equal(quality, &rms->rtp_session->quality))
return rms;
}
// Let's create a master session
ret = outputs_quality_subscribe(quality);
if (ret < 0)
{
DPRINTF(E_LOG, L_AIRPLAY, "Could not subscribe to required audio quality (%d/%d/%d)\n", quality->sample_rate, quality->bits_per_sample, quality->channels);
return NULL;
}
CHECK_NULL(L_AIRPLAY, rms = calloc(1, sizeof(struct airplay_master_session)));
rms->rtp_session = rtp_session_new(quality, AIRPLAY_PACKET_BUFFER_SIZE, 0);
if (!rms->rtp_session)
{
goto error;
}
decode_ctx = transcode_decode_setup_raw(XCODE_PCM16, quality);
if (!decode_ctx)
{
DPRINTF(E_LOG, L_AIRPLAY, "Could not create decoding context\n");
goto error;
}
rms->encode_ctx = transcode_encode_setup(XCODE_ALAC, quality, decode_ctx, NULL, 0, 0);
transcode_decode_cleanup(&decode_ctx);
if (!rms->encode_ctx)
{
DPRINTF(E_LOG, L_AIRPLAY, "Will not be able to stream AirPlay 2, ffmpeg has no ALAC encoder\n");
goto error;
}
rms->quality = *quality;
rms->samples_per_packet = AIRPLAY_SAMPLES_PER_PACKET;
rms->rawbuf_size = STOB(rms->samples_per_packet, quality->bits_per_sample, quality->channels);
rms->output_buffer_samples = OUTPUTS_BUFFER_DURATION * quality->sample_rate;
CHECK_NULL(L_AIRPLAY, rms->rawbuf = malloc(rms->rawbuf_size));
CHECK_NULL(L_AIRPLAY, rms->input_buffer = evbuffer_new());
CHECK_NULL(L_AIRPLAY, rms->encoded_buffer = evbuffer_new());
rms->next = airplay_master_sessions;
airplay_master_sessions = rms;
return rms;
error:
master_session_free(rms);
return NULL;
}
static void static void
session_free(struct airplay_session *rs) session_free(struct airplay_session *rs)
{ {
@ -2005,10 +1977,15 @@ packets_send(struct airplay_master_session *rms)
{ {
struct rtp_packet *pkt; struct rtp_packet *pkt;
struct airplay_session *rs; struct airplay_session *rs;
int len;
pkt = rtp_packet_next(rms->rtp_session, ALAC_HEADER_LEN + rms->rawbuf_size, rms->samples_per_packet, AIRPLAY_RTP_PAYLOADTYPE, 0); len = alac_encode(rms->encoded_buffer, rms->encode_ctx, rms->rawbuf, rms->rawbuf_size, rms->samples_per_packet, &rms->quality);
if (len < 0)
return -1;
alac_encode(pkt->payload, rms->rawbuf, rms->rawbuf_size); pkt = rtp_packet_next(rms->rtp_session, len, rms->samples_per_packet, AIRPLAY_RTP_PAYLOADTYPE, 0);
evbuffer_remove(rms->encoded_buffer, pkt->payload, pkt->payload_len);
for (rs = airplay_sessions; rs; rs = rs->next) for (rs = airplay_sessions; rs; rs = rs->next)
{ {
@ -2061,15 +2038,15 @@ timestamp_set(struct airplay_master_session *rms, struct timespec ts)
// -> we should be playing rtptime X + 600 // -> we should be playing rtptime X + 600
// //
// So how do we measure samples received from player? We know that from the // So how do we measure samples received from player? We know that from the
// pos, which says how much has been sent to the device, and from rms->evbuf, // pos, which says how much has been sent to the device, and from rms->input_buffer,
// which is the unsent stuff being buffered: // which is the unsent stuff being buffered:
// - received = (pos - X) + rms->evbuf_samples // - received = (pos - X) + rms->input_buffer_samples
// //
// This means the rtptime is computed as: // This means the rtptime is computed as:
// - rtptime = X + received - rms->output_buffer_samples // - rtptime = X + received - rms->output_buffer_samples
// -> rtptime = X + (pos - X) + rms->evbuf_samples - rms->out_buffer_samples // -> rtptime = X + (pos - X) + rms->input_buffer_samples - rms->out_buffer_samples
// -> rtptime = pos + rms->evbuf_samples - rms->output_buffer_samples // -> rtptime = pos + rms->input_buffer_samples - rms->output_buffer_samples
rms->cur_stamp.pos = rms->rtp_session->pos + rms->evbuf_samples - rms->output_buffer_samples; rms->cur_stamp.pos = rms->rtp_session->pos + rms->input_buffer_samples - rms->output_buffer_samples;
} }
static void static void
@ -4352,14 +4329,14 @@ airplay_write(struct output_buffer *obuf)
packets_sync_send(rms); packets_sync_send(rms);
// TODO avoid this copy // TODO avoid this copy
evbuffer_add(rms->evbuf, obuf->data[i].buffer, obuf->data[i].bufsize); evbuffer_add(rms->input_buffer, obuf->data[i].buffer, obuf->data[i].bufsize);
rms->evbuf_samples += obuf->data[i].samples; rms->input_buffer_samples += obuf->data[i].samples;
// Send as many packets as we have data for (one packet requires rawbuf_size bytes) // Send as many packets as we have data for (one packet requires rawbuf_size bytes)
while (evbuffer_get_length(rms->evbuf) >= rms->rawbuf_size) while (evbuffer_get_length(rms->input_buffer) >= rms->rawbuf_size)
{ {
evbuffer_remove(rms->evbuf, rms->rawbuf, rms->rawbuf_size); evbuffer_remove(rms->input_buffer, rms->rawbuf, rms->rawbuf_size);
rms->evbuf_samples -= rms->samples_per_packet; rms->input_buffer_samples -= rms->samples_per_packet;
packets_send(rms); packets_send(rms);
} }