diff options
Diffstat (limited to 'transcode.c')
| -rw-r--r-- | transcode.c | 323 |
1 files changed, 323 insertions, 0 deletions
diff --git a/transcode.c b/transcode.c new file mode 100644 index 0000000..2d1c68e --- /dev/null +++ b/transcode.c @@ -0,0 +1,323 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +/* + * transcode.c - convert audio file to WAVE + * + * Copyright (C) 2019 Andrew Clayton <andrew@digital-domain.net> + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <stdbool.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <sys/mman.h> + +#include <libavutil/opt.h> +#include <libavcodec/avcodec.h> +#include <libavformat/avformat.h> +#include <libswresample/swresample.h> + +#include "short_types.h" + +#define WAVE_SAMPLE_RATE 16000 +#define AVIO_CTX_BUF_SZ 4096 + +/* + * WAVE file header based on definition from + * https://gist.github.com/Jon-Schneider/8b7c53d27a7a13346a643dac9c19d34f + * + * We must ensure this structure doesn't have any holes or + * padding so we can just map it straight to the WAVE data. + */ +struct wave_hdr { + /* RIFF Header: "RIFF" */ + char riff_header[4]; + /* size of audio data + sizeof(struct wave_hdr) - 8 */ + int wav_size; + /* "WAVE" */ + char wav_header[4]; + + /* Format Header */ + /* "fmt " (includes trailing space) */ + char fmt_header[4]; + /* Should be 16 for PCM */ + int fmt_chunk_size; + /* Should be 1 for PCM. 3 for IEEE Float */ + s16 audio_format; + s16 num_channels; + int sample_rate; + /* + * Number of bytes per second + * sample_rate * num_channels * bit_depth/8 + */ + int byte_rate; + /* num_channels * bytes per sample */ + s16 sample_alignment; + /* bits per sample */ + s16 bit_depth; + + /* Data Header */ + /* "data" */ + char data_header[4]; + /* + * size of audio + * number of samples * num_channels * bit_depth/8 + */ + int data_bytes; +} __attribute__((__packed__)); + +struct audio_buffer { + u8 *ptr; + int size; /* size left in the buffer */ +}; + +static void write_wave_hdr(int fd, size_t size) +{ + struct wave_hdr wh; + + memcpy(&wh.riff_header, "RIFF", 4); + wh.wav_size = size + sizeof(struct wave_hdr) - 8; + memcpy(&wh.wav_header, "WAVE", 4); + memcpy(&wh.fmt_header, "fmt ", 4); + wh.fmt_chunk_size = 16; + wh.audio_format = 1; + wh.num_channels = 1; + wh.sample_rate = WAVE_SAMPLE_RATE; + wh.sample_alignment = 2; + wh.bit_depth = 16; + wh.byte_rate = wh.sample_rate * wh.sample_alignment; + memcpy(&wh.data_header, "data", 4); + wh.data_bytes = size; + + write(fd, &wh, sizeof(struct wave_hdr)); +} + +static int map_file(int fd, u8 **ptr, size_t *size) +{ + struct stat sb; + + fstat(fd, &sb); + *size = sb.st_size; + + *ptr = mmap(NULL, *size, PROT_READ|PROT_WRITE, MAP_PRIVATE, fd, 0); + if (*ptr == MAP_FAILED) { + perror("mmap"); + return -1; + } + + return 0; +} + +static int read_packet(void *opaque, u8 *buf, int buf_size) +{ + struct audio_buffer *audio_buf = opaque; + + buf_size = FFMIN(buf_size, audio_buf->size); + + /* copy internal buffer data to buf */ + memcpy(buf, audio_buf->ptr, buf_size); + audio_buf->ptr += buf_size; + audio_buf->size -= buf_size; + + return buf_size; +} + +static void convert_frame(struct SwrContext *swr, AVCodecContext *codec, + AVFrame *frame, s16 **data, int *size, bool flush) +{ + int nr_samples; + s64 delay; + u8 *buffer; + + delay = swr_get_delay(swr, codec->sample_rate); + nr_samples = av_rescale_rnd(delay + frame->nb_samples, + WAVE_SAMPLE_RATE, codec->sample_rate, + AV_ROUND_UP); + av_samples_alloc(&buffer, NULL, 1, nr_samples, AV_SAMPLE_FMT_S16, 0); + + /* + * !flush is used to check if we are flushing any remaining + * conversion buffers... + */ + nr_samples = swr_convert(swr, &buffer, nr_samples, + !flush ? (const u8 **)frame->data : NULL, + !flush ? frame->nb_samples : 0); + + *data = realloc(*data, (*size + nr_samples) * sizeof(s16)); + memcpy(*data + *size, buffer, nr_samples * sizeof(s16)); + *size += nr_samples; + av_freep(&buffer); +} + +static bool is_audio_stream(const AVStream *stream) +{ + if (stream->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) + return true; + + return false; +} + +static int decode_audio(struct audio_buffer *audio_buf, s16 **data, int *size) +{ + AVFormatContext *fmt_ctx; + AVIOContext *avio_ctx; + AVStream *stream; + AVCodecContext *codec; + AVPacket packet; + AVFrame *frame; + struct SwrContext *swr; + u8 *avio_ctx_buffer; + unsigned int i; + int stream_index = -1; + int err; + + fmt_ctx = avformat_alloc_context(); + avio_ctx_buffer = av_malloc(AVIO_CTX_BUF_SZ); + avio_ctx = avio_alloc_context(avio_ctx_buffer, AVIO_CTX_BUF_SZ, 0, + audio_buf, &read_packet, NULL, NULL); + fmt_ctx->pb = avio_ctx; + + err = avformat_open_input(&fmt_ctx, NULL, NULL, NULL); + if (err) { + fprintf(stderr, "Could not read audio buffer\n"); + return -1; + } + + err = avformat_find_stream_info(fmt_ctx, NULL); + if (err < 0) { + fprintf(stderr, + "Could not retrieve stream info from audio buffer\n"); + return -1; + } + + for (i = 0; i < fmt_ctx->nb_streams; i++) { + if (is_audio_stream(fmt_ctx->streams[i])) { + stream_index = i; + break; + } + } + + if (stream_index == -1) { + fprintf(stderr, + "Could not retrieve audio stream from buffer\n"); + return -1; + } + + stream = fmt_ctx->streams[stream_index]; + codec = avcodec_alloc_context3( + avcodec_find_decoder(stream->codecpar->codec_id)); + avcodec_parameters_to_context(codec, stream->codecpar); + err = avcodec_open2(codec, avcodec_find_decoder(codec->codec_id), + NULL); + if (err) { + fprintf(stderr, + "Failed to open decoder for stream #%d in audio buffer\n", + stream_index); + return -1; + } + + /* prepare resampler */ + swr = swr_alloc(); + + av_opt_set_int(swr, "in_channel_count", codec->channels, 0); + av_opt_set_int(swr, "out_channel_count", 1, 0); + av_opt_set_int(swr, "in_channel_layout", codec->channel_layout, 0); + av_opt_set_int(swr, "out_channel_layout", AV_CH_LAYOUT_MONO, 0); + av_opt_set_int(swr, "in_sample_rate", codec->sample_rate, 0); + av_opt_set_int(swr, "out_sample_rate", WAVE_SAMPLE_RATE, 0); + av_opt_set_sample_fmt(swr, "in_sample_fmt", codec->sample_fmt, 0); + av_opt_set_sample_fmt(swr, "out_sample_fmt", AV_SAMPLE_FMT_S16, 0); + + swr_init(swr); + if (!swr_is_initialized(swr)) { + fprintf(stderr, + "Resampler has not been properly initialized\n"); + return -1; + } + + av_init_packet(&packet); + frame = av_frame_alloc(); + if (!frame) { + fprintf(stderr, "Error allocating the frame\n"); + return -1; + } + + /* iterate through frames */ + *data = NULL; + *size = 0; + while (av_read_frame(fmt_ctx, &packet) >= 0) { + avcodec_send_packet(codec, &packet); + + err = avcodec_receive_frame(codec, frame); + if (err == AVERROR(EAGAIN)) + continue; + + convert_frame(swr, codec, frame, data, size, false); + } + /* Flush any remaining conversion buffers... */ + convert_frame(swr, codec, frame, data, size, true); + + av_frame_free(&frame); + swr_free(&swr); + avcodec_close(codec); + avformat_close_input(&fmt_ctx); + avformat_free_context(fmt_ctx); + + if (avio_ctx) { + av_freep(&avio_ctx->buffer); + av_freep(&avio_ctx); + } + + return 0; +} + +int main(int argc, char *argv[]) +{ + struct audio_buffer audio_buf; + size_t buf_size; + s16 *data; + u8 *buf; + int size; + int ifd; + int ofd; + int err; + + if (argc < 3) { + fprintf(stderr, "Usage: transcode in_file out_wave\n"); + exit(EXIT_FAILURE); + } + + ifd = open(argv[1], O_RDONLY); + if (ifd == -1) { + fprintf(stderr, "Couldn't open input file\n"); + exit(EXIT_FAILURE); + } + + ofd = open(argv[2], O_WRONLY|O_CREAT|O_TRUNC, 0666); + + err = map_file(ifd, &buf, &buf_size); + if (err) + exit(EXIT_FAILURE); + + audio_buf.ptr = buf; + audio_buf.size = buf_size; + + err = decode_audio(&audio_buf, &data, &size); + if (err) + exit(EXIT_FAILURE); + + write_wave_hdr(ofd, size * sizeof(s16)); + write(ofd, data, size * sizeof(s16)); + + free(data); + munmap(buf, buf_size); + + close(ifd); + close(ofd); + + exit(EXIT_SUCCESS); +} |
