summaryrefslogtreecommitdiffhomepage
path: root/transcode.c
diff options
context:
space:
mode:
Diffstat (limited to 'transcode.c')
-rw-r--r--transcode.c323
1 files changed, 323 insertions, 0 deletions
diff --git a/transcode.c b/transcode.c
new file mode 100644
index 0000000..2d1c68e
--- /dev/null
+++ b/transcode.c
@@ -0,0 +1,323 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+/*
+ * transcode.c - convert audio file to WAVE
+ *
+ * Copyright (C) 2019 Andrew Clayton <andrew@digital-domain.net>
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdbool.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/mman.h>
+
+#include <libavutil/opt.h>
+#include <libavcodec/avcodec.h>
+#include <libavformat/avformat.h>
+#include <libswresample/swresample.h>
+
+#include "short_types.h"
+
+#define WAVE_SAMPLE_RATE 16000
+#define AVIO_CTX_BUF_SZ 4096
+
+/*
+ * WAVE file header based on definition from
+ * https://gist.github.com/Jon-Schneider/8b7c53d27a7a13346a643dac9c19d34f
+ *
+ * We must ensure this structure doesn't have any holes or
+ * padding so we can just map it straight to the WAVE data.
+ */
+struct wave_hdr {
+ /* RIFF Header: "RIFF" */
+ char riff_header[4];
+ /* size of audio data + sizeof(struct wave_hdr) - 8 */
+ int wav_size;
+ /* "WAVE" */
+ char wav_header[4];
+
+ /* Format Header */
+ /* "fmt " (includes trailing space) */
+ char fmt_header[4];
+ /* Should be 16 for PCM */
+ int fmt_chunk_size;
+ /* Should be 1 for PCM. 3 for IEEE Float */
+ s16 audio_format;
+ s16 num_channels;
+ int sample_rate;
+ /*
+ * Number of bytes per second
+ * sample_rate * num_channels * bit_depth/8
+ */
+ int byte_rate;
+ /* num_channels * bytes per sample */
+ s16 sample_alignment;
+ /* bits per sample */
+ s16 bit_depth;
+
+ /* Data Header */
+ /* "data" */
+ char data_header[4];
+ /*
+ * size of audio
+ * number of samples * num_channels * bit_depth/8
+ */
+ int data_bytes;
+} __attribute__((__packed__));
+
+struct audio_buffer {
+ u8 *ptr;
+ int size; /* size left in the buffer */
+};
+
+static void write_wave_hdr(int fd, size_t size)
+{
+ struct wave_hdr wh;
+
+ memcpy(&wh.riff_header, "RIFF", 4);
+ wh.wav_size = size + sizeof(struct wave_hdr) - 8;
+ memcpy(&wh.wav_header, "WAVE", 4);
+ memcpy(&wh.fmt_header, "fmt ", 4);
+ wh.fmt_chunk_size = 16;
+ wh.audio_format = 1;
+ wh.num_channels = 1;
+ wh.sample_rate = WAVE_SAMPLE_RATE;
+ wh.sample_alignment = 2;
+ wh.bit_depth = 16;
+ wh.byte_rate = wh.sample_rate * wh.sample_alignment;
+ memcpy(&wh.data_header, "data", 4);
+ wh.data_bytes = size;
+
+ write(fd, &wh, sizeof(struct wave_hdr));
+}
+
+static int map_file(int fd, u8 **ptr, size_t *size)
+{
+ struct stat sb;
+
+ fstat(fd, &sb);
+ *size = sb.st_size;
+
+ *ptr = mmap(NULL, *size, PROT_READ|PROT_WRITE, MAP_PRIVATE, fd, 0);
+ if (*ptr == MAP_FAILED) {
+ perror("mmap");
+ return -1;
+ }
+
+ return 0;
+}
+
+static int read_packet(void *opaque, u8 *buf, int buf_size)
+{
+ struct audio_buffer *audio_buf = opaque;
+
+ buf_size = FFMIN(buf_size, audio_buf->size);
+
+ /* copy internal buffer data to buf */
+ memcpy(buf, audio_buf->ptr, buf_size);
+ audio_buf->ptr += buf_size;
+ audio_buf->size -= buf_size;
+
+ return buf_size;
+}
+
+static void convert_frame(struct SwrContext *swr, AVCodecContext *codec,
+ AVFrame *frame, s16 **data, int *size, bool flush)
+{
+ int nr_samples;
+ s64 delay;
+ u8 *buffer;
+
+ delay = swr_get_delay(swr, codec->sample_rate);
+ nr_samples = av_rescale_rnd(delay + frame->nb_samples,
+ WAVE_SAMPLE_RATE, codec->sample_rate,
+ AV_ROUND_UP);
+ av_samples_alloc(&buffer, NULL, 1, nr_samples, AV_SAMPLE_FMT_S16, 0);
+
+ /*
+ * !flush is used to check if we are flushing any remaining
+ * conversion buffers...
+ */
+ nr_samples = swr_convert(swr, &buffer, nr_samples,
+ !flush ? (const u8 **)frame->data : NULL,
+ !flush ? frame->nb_samples : 0);
+
+ *data = realloc(*data, (*size + nr_samples) * sizeof(s16));
+ memcpy(*data + *size, buffer, nr_samples * sizeof(s16));
+ *size += nr_samples;
+ av_freep(&buffer);
+}
+
+static bool is_audio_stream(const AVStream *stream)
+{
+ if (stream->codecpar->codec_type == AVMEDIA_TYPE_AUDIO)
+ return true;
+
+ return false;
+}
+
+static int decode_audio(struct audio_buffer *audio_buf, s16 **data, int *size)
+{
+ AVFormatContext *fmt_ctx;
+ AVIOContext *avio_ctx;
+ AVStream *stream;
+ AVCodecContext *codec;
+ AVPacket packet;
+ AVFrame *frame;
+ struct SwrContext *swr;
+ u8 *avio_ctx_buffer;
+ unsigned int i;
+ int stream_index = -1;
+ int err;
+
+ fmt_ctx = avformat_alloc_context();
+ avio_ctx_buffer = av_malloc(AVIO_CTX_BUF_SZ);
+ avio_ctx = avio_alloc_context(avio_ctx_buffer, AVIO_CTX_BUF_SZ, 0,
+ audio_buf, &read_packet, NULL, NULL);
+ fmt_ctx->pb = avio_ctx;
+
+ err = avformat_open_input(&fmt_ctx, NULL, NULL, NULL);
+ if (err) {
+ fprintf(stderr, "Could not read audio buffer\n");
+ return -1;
+ }
+
+ err = avformat_find_stream_info(fmt_ctx, NULL);
+ if (err < 0) {
+ fprintf(stderr,
+ "Could not retrieve stream info from audio buffer\n");
+ return -1;
+ }
+
+ for (i = 0; i < fmt_ctx->nb_streams; i++) {
+ if (is_audio_stream(fmt_ctx->streams[i])) {
+ stream_index = i;
+ break;
+ }
+ }
+
+ if (stream_index == -1) {
+ fprintf(stderr,
+ "Could not retrieve audio stream from buffer\n");
+ return -1;
+ }
+
+ stream = fmt_ctx->streams[stream_index];
+ codec = avcodec_alloc_context3(
+ avcodec_find_decoder(stream->codecpar->codec_id));
+ avcodec_parameters_to_context(codec, stream->codecpar);
+ err = avcodec_open2(codec, avcodec_find_decoder(codec->codec_id),
+ NULL);
+ if (err) {
+ fprintf(stderr,
+ "Failed to open decoder for stream #%d in audio buffer\n",
+ stream_index);
+ return -1;
+ }
+
+ /* prepare resampler */
+ swr = swr_alloc();
+
+ av_opt_set_int(swr, "in_channel_count", codec->channels, 0);
+ av_opt_set_int(swr, "out_channel_count", 1, 0);
+ av_opt_set_int(swr, "in_channel_layout", codec->channel_layout, 0);
+ av_opt_set_int(swr, "out_channel_layout", AV_CH_LAYOUT_MONO, 0);
+ av_opt_set_int(swr, "in_sample_rate", codec->sample_rate, 0);
+ av_opt_set_int(swr, "out_sample_rate", WAVE_SAMPLE_RATE, 0);
+ av_opt_set_sample_fmt(swr, "in_sample_fmt", codec->sample_fmt, 0);
+ av_opt_set_sample_fmt(swr, "out_sample_fmt", AV_SAMPLE_FMT_S16, 0);
+
+ swr_init(swr);
+ if (!swr_is_initialized(swr)) {
+ fprintf(stderr,
+ "Resampler has not been properly initialized\n");
+ return -1;
+ }
+
+ av_init_packet(&packet);
+ frame = av_frame_alloc();
+ if (!frame) {
+ fprintf(stderr, "Error allocating the frame\n");
+ return -1;
+ }
+
+ /* iterate through frames */
+ *data = NULL;
+ *size = 0;
+ while (av_read_frame(fmt_ctx, &packet) >= 0) {
+ avcodec_send_packet(codec, &packet);
+
+ err = avcodec_receive_frame(codec, frame);
+ if (err == AVERROR(EAGAIN))
+ continue;
+
+ convert_frame(swr, codec, frame, data, size, false);
+ }
+ /* Flush any remaining conversion buffers... */
+ convert_frame(swr, codec, frame, data, size, true);
+
+ av_frame_free(&frame);
+ swr_free(&swr);
+ avcodec_close(codec);
+ avformat_close_input(&fmt_ctx);
+ avformat_free_context(fmt_ctx);
+
+ if (avio_ctx) {
+ av_freep(&avio_ctx->buffer);
+ av_freep(&avio_ctx);
+ }
+
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ struct audio_buffer audio_buf;
+ size_t buf_size;
+ s16 *data;
+ u8 *buf;
+ int size;
+ int ifd;
+ int ofd;
+ int err;
+
+ if (argc < 3) {
+ fprintf(stderr, "Usage: transcode in_file out_wave\n");
+ exit(EXIT_FAILURE);
+ }
+
+ ifd = open(argv[1], O_RDONLY);
+ if (ifd == -1) {
+ fprintf(stderr, "Couldn't open input file\n");
+ exit(EXIT_FAILURE);
+ }
+
+ ofd = open(argv[2], O_WRONLY|O_CREAT|O_TRUNC, 0666);
+
+ err = map_file(ifd, &buf, &buf_size);
+ if (err)
+ exit(EXIT_FAILURE);
+
+ audio_buf.ptr = buf;
+ audio_buf.size = buf_size;
+
+ err = decode_audio(&audio_buf, &data, &size);
+ if (err)
+ exit(EXIT_FAILURE);
+
+ write_wave_hdr(ofd, size * sizeof(s16));
+ write(ofd, data, size * sizeof(s16));
+
+ free(data);
+ munmap(buf, buf_size);
+
+ close(ifd);
+ close(ofd);
+
+ exit(EXIT_SUCCESS);
+}