21 #include <freerdp/config.h>
23 #include <freerdp/log.h>
25 #include <libavcodec/avcodec.h>
26 #include <libavutil/avutil.h>
27 #include <libavutil/opt.h>
28 #if defined(SWRESAMPLE_FOUND)
29 #include <libswresample/swresample.h>
30 #elif defined(AVRESAMPLE_FOUND)
31 #include <libavresample/avresample.h>
33 #error "libswresample or libavresample required"
37 #include "dsp_ffmpeg.h"
39 #define TAG FREERDP_TAG("dsp.ffmpeg")
41 struct S_FREERDP_DSP_CONTEXT
47 UINT32 bufferedSamples;
51 AVCodecContext* context;
56 #if defined(SWRESAMPLE_FOUND)
59 AVAudioResampleContext* rcontext;
63 static BOOL ffmpeg_codec_is_filtered(
enum AVCodecID
id, BOOL encoder)
67 #if !defined(WITH_DSP_EXPERIMENTAL)
69 case AV_CODEC_ID_ADPCM_IMA_OKI:
71 case AV_CODEC_ID_ADPCM_MS:
72 case AV_CODEC_ID_G723_1:
73 case AV_CODEC_ID_GSM_MS:
74 case AV_CODEC_ID_PCM_ALAW:
75 case AV_CODEC_ID_PCM_MULAW:
79 case AV_CODEC_ID_NONE:
83 case AV_CODEC_ID_AAC_LATM:
91 static enum AVCodecID ffmpeg_get_avcodec(
const AUDIO_FORMAT* WINPR_RESTRICT format)
94 return AV_CODEC_ID_NONE;
96 switch (format->wFormatTag)
98 case WAVE_FORMAT_UNKNOWN:
99 return AV_CODEC_ID_NONE;
101 case WAVE_FORMAT_PCM:
102 switch (format->wBitsPerSample)
105 return AV_CODEC_ID_PCM_U16LE;
108 return AV_CODEC_ID_PCM_U8;
111 return AV_CODEC_ID_NONE;
114 case WAVE_FORMAT_DVI_ADPCM:
115 return AV_CODEC_ID_ADPCM_IMA_OKI;
117 case WAVE_FORMAT_ADPCM:
118 return AV_CODEC_ID_ADPCM_MS;
120 case WAVE_FORMAT_ALAW:
121 return AV_CODEC_ID_PCM_ALAW;
123 case WAVE_FORMAT_MULAW:
124 return AV_CODEC_ID_PCM_MULAW;
126 case WAVE_FORMAT_GSM610:
127 return AV_CODEC_ID_GSM_MS;
129 case WAVE_FORMAT_MSG723:
130 return AV_CODEC_ID_G723_1;
132 case WAVE_FORMAT_AAC_MS:
133 return AV_CODEC_ID_AAC;
135 case WAVE_FORMAT_OPUS:
136 return AV_CODEC_ID_OPUS;
139 return AV_CODEC_ID_NONE;
143 static int ffmpeg_sample_format(
const AUDIO_FORMAT* WINPR_RESTRICT format)
145 switch (format->wFormatTag)
147 case WAVE_FORMAT_PCM:
148 switch (format->wBitsPerSample)
151 return AV_SAMPLE_FMT_U8;
154 return AV_SAMPLE_FMT_S16;
160 case WAVE_FORMAT_DVI_ADPCM:
161 case WAVE_FORMAT_ADPCM:
162 return AV_SAMPLE_FMT_S16P;
164 case WAVE_FORMAT_MPEGLAYER3:
165 case WAVE_FORMAT_AAC_MS:
166 return AV_SAMPLE_FMT_FLTP;
168 case WAVE_FORMAT_OPUS:
169 return AV_SAMPLE_FMT_S16;
171 case WAVE_FORMAT_MSG723:
172 case WAVE_FORMAT_GSM610:
173 return AV_SAMPLE_FMT_S16P;
175 case WAVE_FORMAT_ALAW:
176 return AV_SAMPLE_FMT_S16;
183 static void ffmpeg_close_context(FREERDP_DSP_CONTEXT* WINPR_RESTRICT context)
187 if (context->context)
188 avcodec_free_context(&context->context);
191 av_frame_free(&context->frame);
193 if (context->resampled)
194 av_frame_free(&context->resampled);
196 if (context->buffered)
197 av_frame_free(&context->buffered);
200 av_packet_free(&context->packet);
202 if (context->rcontext)
204 #if defined(SWRESAMPLE_FOUND)
205 swr_free(&context->rcontext);
207 avresample_free(&context->rcontext);
211 context->id = AV_CODEC_ID_NONE;
212 context->codec = NULL;
213 context->isOpen = FALSE;
214 context->context = NULL;
215 context->frame = NULL;
216 context->resampled = NULL;
217 context->packet = NULL;
218 context->rcontext = NULL;
222 static BOOL ffmpeg_open_context(FREERDP_DSP_CONTEXT* WINPR_RESTRICT context)
226 if (!context || context->isOpen)
233 context->id = ffmpeg_get_avcodec(format);
235 if (ffmpeg_codec_is_filtered(context->id, context->common.encoder))
238 if (context->common.encoder)
239 context->codec = avcodec_find_encoder(context->id);
241 context->codec = avcodec_find_decoder(context->id);
246 context->context = avcodec_alloc_context3(context->codec);
248 if (!context->context)
254 case AV_CODEC_ID_GSM_MS:
255 context->context->strict_std_compliance = FF_COMPLIANCE_UNOFFICIAL;
258 case AV_CODEC_ID_AAC:
259 context->context->profile = FF_PROFILE_AAC_MAIN;
266 context->context->max_b_frames = 1;
267 context->context->delay = 0;
269 #if LIBAVUTIL_VERSION_INT >= AV_VERSION_INT(57, 28, 100)
270 av_channel_layout_default(&context->context->ch_layout, format->nChannels);
272 context->context->channels = format->nChannels;
273 const int64_t layout = av_get_default_channel_layout(format->nChannels);
274 context->context->channel_layout = layout;
276 context->context->sample_rate = (int)format->nSamplesPerSec;
277 context->context->block_align = format->nBlockAlign;
278 context->context->bit_rate = format->nAvgBytesPerSec * 8LL;
279 context->context->sample_fmt = ffmpeg_sample_format(format);
280 context->context->time_base = av_make_q(1, context->context->sample_rate);
282 if ((ret = avcodec_open2(context->context, context->codec, NULL)) < 0)
284 const char* err = av_err2str(ret);
285 WLog_ERR(TAG,
"Error avcodec_open2 %s [%d]", err, ret);
289 context->packet = av_packet_alloc();
291 if (!context->packet)
294 context->frame = av_frame_alloc();
299 context->resampled = av_frame_alloc();
301 if (!context->resampled)
304 context->buffered = av_frame_alloc();
306 if (!context->buffered)
309 #if defined(SWRESAMPLE_FOUND)
310 context->rcontext = swr_alloc();
312 context->rcontext = avresample_alloc_context();
315 if (!context->rcontext)
318 #if LIBAVUTIL_VERSION_INT >= AV_VERSION_INT(57, 28, 100)
319 av_channel_layout_default(&context->frame->ch_layout, format->nChannels);
321 context->frame->channel_layout = layout;
322 context->frame->channels = format->nChannels;
324 WINPR_ASSERT(format->nSamplesPerSec <= INT_MAX);
325 context->frame->sample_rate = (int)format->nSamplesPerSec;
326 context->frame->format = AV_SAMPLE_FMT_S16;
328 if (context->common.encoder)
330 context->resampled->format = context->context->sample_fmt;
331 context->resampled->sample_rate = context->context->sample_rate;
335 context->resampled->format = AV_SAMPLE_FMT_S16;
337 WINPR_ASSERT(format->nSamplesPerSec <= INT_MAX);
338 context->resampled->sample_rate = (int)format->nSamplesPerSec;
341 #if LIBAVUTIL_VERSION_INT >= AV_VERSION_INT(57, 28, 100)
342 av_channel_layout_default(&context->resampled->ch_layout, format->nChannels);
344 context->resampled->channel_layout = layout;
345 context->resampled->channels = format->nChannels;
348 if (context->context->frame_size > 0)
350 #if LIBAVUTIL_VERSION_INT >= AV_VERSION_INT(57, 28, 100)
351 ret = av_channel_layout_copy(&context->buffered->ch_layout, &context->resampled->ch_layout);
355 context->buffered->channel_layout = context->resampled->channel_layout;
356 context->buffered->channels = context->resampled->channels;
358 context->buffered->format = context->resampled->format;
359 context->buffered->nb_samples = context->context->frame_size;
361 ret = av_frame_get_buffer(context->buffered, 1);
366 context->isOpen = TRUE;
369 ffmpeg_close_context(context);
373 #if defined(SWRESAMPLE_FOUND)
374 static BOOL ffmpeg_resample_frame(SwrContext* WINPR_RESTRICT context, AVFrame* WINPR_RESTRICT in,
375 AVFrame* WINPR_RESTRICT out)
379 if (!swr_is_initialized(context))
381 if ((ret = swr_config_frame(context, out, in)) < 0)
383 const char* err = av_err2str(ret);
384 WLog_ERR(TAG,
"Error during resampling %s [%d]", err, ret);
388 if ((ret = (swr_init(context))) < 0)
390 const char* err = av_err2str(ret);
391 WLog_ERR(TAG,
"Error during resampling %s [%d]", err, ret);
396 if ((ret = swr_convert_frame(context, out, in)) < 0)
398 const char* err = av_err2str(ret);
399 WLog_ERR(TAG,
"Error during resampling %s [%d]", err, ret);
406 static BOOL ffmpeg_resample_frame(AVAudioResampleContext* WINPR_RESTRICT context,
407 AVFrame* WINPR_RESTRICT in, AVFrame* WINPR_RESTRICT out)
411 if (!avresample_is_open(context))
413 if ((ret = avresample_config(context, out, in)) < 0)
415 const char* err = av_err2str(ret);
416 WLog_ERR(TAG,
"Error during resampling %s [%d]", err, ret);
420 if ((ret = (avresample_open(context))) < 0)
422 const char* err = av_err2str(ret);
423 WLog_ERR(TAG,
"Error during resampling %s [%d]", err, ret);
428 if ((ret = avresample_convert_frame(context, out, in)) < 0)
430 const char* err = av_err2str(ret);
431 WLog_ERR(TAG,
"Error during resampling %s [%d]", err, ret);
439 static BOOL ffmpeg_encode_frame(AVCodecContext* WINPR_RESTRICT context, AVFrame* WINPR_RESTRICT in,
440 AVPacket* WINPR_RESTRICT packet,
wStream* WINPR_RESTRICT out)
442 if (in->format == AV_SAMPLE_FMT_FLTP)
444 uint8_t** pp = in->extended_data;
445 #if LIBAVUTIL_VERSION_INT < AV_VERSION_INT(57, 28, 100)
446 const int nr_channels = in->channels;
448 const int nr_channels = in->ch_layout.nb_channels;
451 for (
int y = 0; y < nr_channels; y++)
453 float* data = (
float*)pp[y];
454 for (
int x = 0; x < in->nb_samples; x++)
456 const float val1 = data[x];
459 else if (isinf(val1))
470 int ret = avcodec_send_frame(context, in);
474 const char* err = av_err2str(ret);
475 WLog_ERR(TAG,
"Error submitting the packet to the encoder %s [%d]", err, ret);
482 ret = avcodec_receive_packet(context, packet);
484 if ((ret == AVERROR(EAGAIN)) || (ret == AVERROR_EOF))
489 const char* err = av_err2str(ret);
490 WLog_ERR(TAG,
"Error during encoding %s [%d]", err, ret);
494 WINPR_ASSERT(packet->size >= 0);
495 if (!Stream_EnsureRemainingCapacity(out, (
size_t)packet->size))
498 Stream_Write(out, packet->data, (
size_t)packet->size);
499 av_packet_unref(packet);
505 static BOOL ffmpeg_fill_frame(AVFrame* WINPR_RESTRICT frame,
507 const BYTE* WINPR_RESTRICT data,
size_t size)
510 #if LIBAVUTIL_VERSION_INT < AV_VERSION_INT(57, 28, 100)
511 frame->channels = inputFormat->nChannels;
512 frame->channel_layout = av_get_default_channel_layout(frame->channels);
514 av_channel_layout_default(&frame->ch_layout, inputFormat->nChannels);
516 WINPR_ASSERT(inputFormat->nSamplesPerSec <= INT_MAX);
517 frame->sample_rate = (int)inputFormat->nSamplesPerSec;
518 frame->format = ffmpeg_sample_format(inputFormat);
520 const int bpp = av_get_bytes_per_sample(frame->format);
521 WINPR_ASSERT(bpp >= 0);
522 WINPR_ASSERT(size <= INT_MAX);
523 const size_t nb_samples = size / inputFormat->nChannels / (size_t)bpp;
524 frame->nb_samples = (int)nb_samples;
526 if ((ret = avcodec_fill_audio_frame(frame, inputFormat->nChannels, frame->format, data,
529 const char* err = av_err2str(ret);
530 WLog_ERR(TAG,
"Error during audio frame fill %s [%d]", err, ret);
536 #if defined(SWRESAMPLE_FOUND)
537 static BOOL ffmpeg_decode(AVCodecContext* WINPR_RESTRICT dec_ctx, AVPacket* WINPR_RESTRICT pkt,
538 AVFrame* WINPR_RESTRICT frame, SwrContext* WINPR_RESTRICT resampleContext,
539 AVFrame* WINPR_RESTRICT resampled,
wStream* WINPR_RESTRICT out)
541 static BOOL ffmpeg_decode(AVCodecContext* dec_ctx, AVPacket* pkt, AVFrame* frame,
542 AVAudioResampleContext* resampleContext, AVFrame* resampled,
wStream* out)
547 ret = avcodec_send_packet(dec_ctx, pkt);
551 const char* err = av_err2str(ret);
552 WLog_ERR(TAG,
"Error submitting the packet to the decoder %s [%d]", err, ret);
559 ret = avcodec_receive_frame(dec_ctx, frame);
561 if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF)
565 const char* err = av_err2str(ret);
566 WLog_ERR(TAG,
"Error during decoding %s [%d]", err, ret);
570 #if defined(SWRESAMPLE_FOUND)
571 if (!swr_is_initialized(resampleContext))
573 if ((ret = swr_config_frame(resampleContext, resampled, frame)) < 0)
576 if (!avresample_is_open(resampleContext))
578 if ((ret = avresample_config(resampleContext, resampled, frame)) < 0)
581 const char* err = av_err2str(ret);
582 WLog_ERR(TAG,
"Error during resampling %s [%d]", err, ret);
586 #if defined(SWRESAMPLE_FOUND)
587 if ((ret = (swr_init(resampleContext))) < 0)
589 if ((ret = (avresample_open(resampleContext))) < 0)
592 const char* err = av_err2str(ret);
593 WLog_ERR(TAG,
"Error during resampling %s [%d]", err, ret);
598 #if defined(SWRESAMPLE_FOUND)
599 if ((ret = swr_convert_frame(resampleContext, resampled, frame)) < 0)
601 if ((ret = avresample_convert_frame(resampleContext, resampled, frame)) < 0)
604 const char* err = av_err2str(ret);
605 WLog_ERR(TAG,
"Error during resampling %s [%d]", err, ret);
611 #if LIBAVUTIL_VERSION_INT >= AV_VERSION_INT(57, 28, 100)
612 WINPR_ASSERT(resampled->ch_layout.nb_channels >= 0);
613 const size_t nrchannels = (size_t)resampled->ch_layout.nb_channels;
615 const size_t nrchannels = resampled->channels;
617 WINPR_ASSERT(resampled->nb_samples >= 0);
618 const size_t data_size = nrchannels * (size_t)resampled->nb_samples * 2ull;
619 if (!Stream_EnsureRemainingCapacity(out, data_size))
621 Stream_Write(out, resampled->data[0], data_size);
628 BOOL freerdp_dsp_ffmpeg_supports_format(
const AUDIO_FORMAT* WINPR_RESTRICT format, BOOL encode)
630 enum AVCodecID
id = ffmpeg_get_avcodec(format);
632 if (ffmpeg_codec_is_filtered(
id, encode))
636 return avcodec_find_encoder(
id) != NULL;
638 return avcodec_find_decoder(
id) != NULL;
641 FREERDP_DSP_CONTEXT* freerdp_dsp_ffmpeg_context_new(BOOL encode)
643 FREERDP_DSP_CONTEXT* context = NULL;
644 #if LIBAVCODEC_VERSION_INT < AV_VERSION_INT(58, 10, 100)
645 avcodec_register_all();
647 context = calloc(1,
sizeof(FREERDP_DSP_CONTEXT));
652 if (!freerdp_dsp_common_context_init(&context->common, encode))
658 WINPR_PRAGMA_DIAG_PUSH
659 WINPR_PRAGMA_DIAG_IGNORED_MISMATCHED_DEALLOC
660 freerdp_dsp_ffmpeg_context_free(context);
661 WINPR_PRAGMA_DIAG_POP
665 void freerdp_dsp_ffmpeg_context_free(FREERDP_DSP_CONTEXT* context)
669 ffmpeg_close_context(context);
670 freerdp_dsp_common_context_uninit(&context->common);
675 BOOL freerdp_dsp_ffmpeg_context_reset(FREERDP_DSP_CONTEXT* WINPR_RESTRICT context,
678 if (!context || !targetFormat)
681 ffmpeg_close_context(context);
682 context->common.format = *targetFormat;
683 return ffmpeg_open_context(context);
686 static BOOL freerdp_dsp_channel_mix(FREERDP_DSP_CONTEXT* WINPR_RESTRICT context,
687 const BYTE* WINPR_RESTRICT src,
size_t size,
689 const BYTE** WINPR_RESTRICT data,
size_t* WINPR_RESTRICT length,
695 if (!context || !data || !length || !dstFormat)
698 if (srcFormat->wFormatTag != WAVE_FORMAT_PCM)
701 bpp = srcFormat->wBitsPerSample > 8 ? 2 : 1;
702 samples = size / bpp / srcFormat->nChannels;
704 *dstFormat = *srcFormat;
705 if (context->common.format.nChannels == srcFormat->nChannels)
712 Stream_SetPosition(context->common.channelmix, 0);
715 if (context->common.format.nChannels > srcFormat->nChannels)
717 switch (srcFormat->nChannels)
720 if (!Stream_EnsureCapacity(context->common.channelmix, size * 2))
723 for (
size_t x = 0; x < samples; x++)
725 for (
size_t y = 0; y < bpp; y++)
726 Stream_Write_UINT8(context->common.channelmix, src[x * bpp + y]);
728 for (
size_t y = 0; y < bpp; y++)
729 Stream_Write_UINT8(context->common.channelmix, src[x * bpp + y]);
732 Stream_SealLength(context->common.channelmix);
733 *data = Stream_Buffer(context->common.channelmix);
734 *length = Stream_Length(context->common.channelmix);
735 dstFormat->nChannels = 2;
740 WLog_WARN(TAG,
"[%s] unsupported source channel count %" PRIu16, __func__,
741 srcFormat->nChannels);
747 switch (srcFormat->nChannels)
750 if (!Stream_EnsureCapacity(context->common.channelmix, size / 2))
755 for (
size_t x = 0; x < samples; x++)
757 for (
size_t y = 0; y < bpp; y++)
758 Stream_Write_UINT8(context->common.channelmix, src[2 * x * bpp + y]);
761 Stream_SealLength(context->common.channelmix);
762 *data = Stream_Buffer(context->common.channelmix);
763 *length = Stream_Length(context->common.channelmix);
764 dstFormat->nChannels = 1;
769 WLog_WARN(TAG,
"[%s] unsupported channel count %" PRIu16, __func__,
770 srcFormat->nChannels);
777 BOOL freerdp_dsp_ffmpeg_encode(FREERDP_DSP_CONTEXT* WINPR_RESTRICT context,
779 const BYTE* WINPR_RESTRICT sdata,
size_t length,
784 if (!context || !format || !sdata || !out || !context->common.encoder)
787 if (!context || !sdata || !out)
794 const BYTE* data = NULL;
795 if (!freerdp_dsp_channel_mix(context, sdata, length, format, &data, &length, &fmt))
799 if (!ffmpeg_fill_frame(context->frame, format, data, length))
803 if (!ffmpeg_resample_frame(context->rcontext, context->frame, context->resampled))
806 if (context->context->frame_size <= 0)
808 return ffmpeg_encode_frame(context->context, context->resampled, context->packet, out);
813 int rest = context->resampled->nb_samples;
817 int inSamples = rest;
819 if ((inSamples < 0) || (context->bufferedSamples > (UINT32)(INT_MAX - inSamples)))
822 if (inSamples + (
int)context->bufferedSamples > context->context->frame_size)
823 inSamples = context->context->frame_size - (int)context->bufferedSamples;
825 #
if LIBAVUTIL_VERSION_INT >= AV_VERSION_INT(57, 28, 100)
826 const int nrchannels = context->context->ch_layout.nb_channels;
828 const int nrchannels = context->context->channels;
831 av_samples_copy(context->buffered->extended_data, context->resampled->extended_data,
832 (
int)context->bufferedSamples, copied, inSamples, nrchannels,
833 context->context->sample_fmt);
838 context->bufferedSamples += (UINT32)inSamples;
840 if (context->context->frame_size <= (
int)context->bufferedSamples)
843 if (!ffmpeg_encode_frame(context->context, context->buffered, context->packet, out))
846 context->bufferedSamples = 0;
854 BOOL freerdp_dsp_ffmpeg_decode(FREERDP_DSP_CONTEXT* WINPR_RESTRICT context,
856 const BYTE* WINPR_RESTRICT data,
size_t length,
859 if (!context || !srcFormat || !data || !out || context->common.encoder)
862 #if LIBAVCODEC_VERSION_INT < AV_VERSION_INT(58, 133, 100)
863 av_init_packet(context->packet);
865 context->packet->data = WINPR_CAST_CONST_PTR_AWAY(data, uint8_t*);
867 WINPR_ASSERT(length <= INT_MAX);
868 context->packet->size = (int)length;
869 return ffmpeg_decode(context->context, context->packet, context->frame, context->rcontext,
870 context->resampled, out);