From e1983a1358ef60c4e5ad625811024905c971606f Mon Sep 17 00:00:00 2001
From: Peter Nelson <peter1138@openttd.org>
Date: Mon, 27 Nov 2023 19:25:32 +0000
Subject: [PATCH] Codechange: Cache loaded sounds in memory.

Sound files are currently loaded from file, including allocating memory
and minor format conversion, and then thrown away, every time a sound
is played.

Instead retain the loaded sound data in memroy so that it can be reused.

This moves raw or RIFF/WAVE header parsing to a single place, and
validates more of the RIFF header.
---
 src/mixer.cpp        |  15 +--
 src/mixer.h          |   2 +-
 src/newgrf.cpp       |   1 +
 src/newgrf_sound.cpp |  74 ++---------
 src/newgrf_sound.h   |   2 +-
 src/sound.cpp        | 292 +++++++++++++++++++++++++++++--------------
 src/sound_func.h     |   2 +
 src/sound_type.h     |   8 ++
 8 files changed, 224 insertions(+), 172 deletions(-)
diff --git a/src/mixer.cpp b/src/mixer.cpp
index 3b30f3d25d..918cb9ce72 100644
--- a/src/mixer.cpp
+++ b/src/mixer.cpp
@@ -19,7 +19,7 @@
 
 struct MixerChannel {
 	/* pointer to allocated buffer memory */
-	int8_t *memory;
+	std::shared_ptr<std::vector<byte>> memory;
 
 	/* current position in memory */
 	uint32_t pos;
@@ -73,7 +73,7 @@ static void mix_int16(MixerChannel *sc, int16_t *buffer, uint samples, uint8_t e
 	sc->samples_left -= samples;
 	assert(samples > 0);
 
-	const T *b = (const T *)sc->memory + sc->pos;
+	const T *b = (const T *)sc->memory->data() + sc->pos;
 	uint32_t frac_pos = sc->frac_pos;
 	uint32_t frac_speed = sc->frac_speed;
 	int volume_left = sc->volume_left * effect_vol / 255;
@@ -100,7 +100,7 @@ static void mix_int16(MixerChannel *sc, int16_t *buffer, uint samples, uint8_t e
 	}
 
 	sc->frac_pos = frac_pos;
-	sc->pos = b - (const T *)sc->memory;
+	sc->pos = b - (const T *)sc->memory->data();
 }
 
 static void MxCloseChannel(uint8_t channel_index)
@@ -157,19 +157,19 @@ MixerChannel *MxAllocateChannel()
 	uint8_t channel_index = FindFirstBit(available);
 
 	MixerChannel *mc = &_channels[channel_index];
-	free(mc->memory);
 	mc->memory = nullptr;
 	return mc;
 }
 
-void MxSetChannelRawSrc(MixerChannel *mc, int8_t *mem, size_t size, uint rate, bool is16bit)
+void MxSetChannelRawSrc(MixerChannel *mc, const std::shared_ptr<std::vector<byte>> &mem, uint rate, bool is16bit)
 {
 	mc->memory = mem;
 	mc->frac_pos = 0;
 	mc->pos = 0;
 
-	mc->frac_speed = (rate << 16) / _play_rate;
+	mc->frac_speed = (rate << 16U) / _play_rate;
 
+	size_t size = mc->memory->size();
 	if (is16bit) size /= 2;
 
 	/* adjust the magnitude to prevent overflow */
@@ -178,7 +178,8 @@ void MxSetChannelRawSrc(MixerChannel *mc, int8_t *mem, size_t size, uint rate, b
 		rate = (rate >> 1) + 1;
 	}
 
-	mc->samples_left = (uint)size * _play_rate / rate;
+	/* Scale number of samples by play rate, less 1 to allow for the resampler's padding sample. */
+	mc->samples_left = (uint)size * _play_rate / rate - 1;
 	mc->is16bit = is16bit;
 }
 
diff --git a/src/mixer.h b/src/mixer.h
index ad94f5440c..8d294030f8 100644
--- a/src/mixer.h
+++ b/src/mixer.h
@@ -24,7 +24,7 @@ bool MxInitialize(uint rate);
 void MxMixSamples(void *buffer, uint samples);
 
 MixerChannel *MxAllocateChannel();
-void MxSetChannelRawSrc(MixerChannel *mc, int8_t *mem, size_t size, uint rate, bool is16bit);
+void MxSetChannelRawSrc(MixerChannel *mc, const std::shared_ptr<std::vector<byte>> &mem, uint rate, bool is16bit);
 void MxSetChannelVolume(MixerChannel *mc, uint volume, float pan);
 void MxActivateChannel(MixerChannel*);
 
diff --git a/src/newgrf.cpp b/src/newgrf.cpp
index 72687655ca..1273d3f98d 100644
--- a/src/newgrf.cpp
+++ b/src/newgrf.cpp
@@ -7838,6 +7838,7 @@ static void LoadGRFSound(size_t offs, SoundEntry *sound)
 		/* Sound is present in the NewGRF. */
 		sound->file = _cur.file;
 		sound->file_offset = offs;
+		sound->source = SoundSource::NewGRF;
 		sound->grf_container_ver = _cur.file->GetContainerVersion();
 	}
 }
diff --git a/src/newgrf_sound.cpp b/src/newgrf_sound.cpp
index 356b7dfaf8..d3e8b84de9 100644
--- a/src/newgrf_sound.cpp
+++ b/src/newgrf_sound.cpp
@@ -63,7 +63,7 @@ uint GetNumSounds()
  * @param sound Sound to load.
  * @return True if a valid sound was loaded.
  */
-bool LoadNewGRFSound(SoundEntry *sound)
+bool LoadNewGRFSound(SoundEntry *sound, SoundID sound_id)
 {
 	if (sound->file_offset == SIZE_MAX || sound->file == nullptr) return false;
 
@@ -76,7 +76,7 @@ bool LoadNewGRFSound(SoundEntry *sound)
 
 	/* Format: <num> <FF> <FF> <name_len> <name> '\0' <data> */
 
-	uint32_t num = sound->grf_container_ver >= 2 ? file.ReadDword() : file.ReadWord();
+	sound->file_size = sound->grf_container_ver >= 2 ? file.ReadDword() : file.ReadWord();
 	if (file.ReadByte() != 0xFF) return false;
 	if (file.ReadByte() != 0xFF) return false;
 
@@ -85,78 +85,20 @@ bool LoadNewGRFSound(SoundEntry *sound)
 	file.ReadBlock(name.data(), name_len + 1);
 
 	/* Test string termination */
-	if (name[name_len] != 0) {
+	if (name[name_len] != '\0') {
 		Debug(grf, 2, "LoadNewGRFSound [{}]: Name not properly terminated", file.GetSimplifiedFilename());
 		return false;
 	}
 
-	Debug(grf, 2, "LoadNewGRFSound [{}]: Sound name '{}'...", file.GetSimplifiedFilename(), name);
-
-	if (file.ReadDword() != BSWAP32('RIFF')) {
-		Debug(grf, 1, "LoadNewGRFSound [{}]: Missing RIFF header", file.GetSimplifiedFilename());
-		return false;
+	if (LoadSound(*sound, true, sound_id, name)) {
+		Debug(grf, 2, "LoadNewGRFSound [{}]: channels {}, sample rate {}, bits per sample {}, length {}", file.GetSimplifiedFilename(), sound->channels, sound->rate, sound->bits_per_sample, sound->file_size);
+		return true;
 	}
 
-	uint32_t total_size = file.ReadDword();
-	uint header_size = 11;
-	if (sound->grf_container_ver >= 2) header_size++; // The first FF in the sprite is only counted for container version >= 2.
-	if (total_size + name_len + header_size > num) {
-		Debug(grf, 1, "LoadNewGRFSound [{}]: RIFF was truncated", file.GetSimplifiedFilename());
-		return false;
-	}
-
-	if (file.ReadDword() != BSWAP32('WAVE')) {
-		Debug(grf, 1, "LoadNewGRFSound [{}]: Invalid RIFF type", file.GetSimplifiedFilename());
-		return false;
-	}
-
-	while (total_size >= 8) {
-		uint32_t tag  = file.ReadDword();
-		uint32_t size = file.ReadDword();
-		total_size -= 8;
-		if (total_size < size) {
-			Debug(grf, 1, "LoadNewGRFSound [{}]: Invalid RIFF", file.GetSimplifiedFilename());
-			return false;
-		}
-		total_size -= size;
-
-		switch (tag) {
-			case ' tmf': // 'fmt '
-				/* Audio format, must be 1 (PCM) */
-				if (size < 16 || file.ReadWord() != 1) {
-					Debug(grf, 1, "LoadGRFSound [{}]: Invalid audio format", file.GetSimplifiedFilename());
-					return false;
-				}
-				sound->channels = file.ReadWord();
-				sound->rate = file.ReadDword();
-				file.ReadDword();
-				file.ReadWord();
-				sound->bits_per_sample = file.ReadWord();
-
-				/* The rest will be skipped */
-				size -= 16;
-				break;
-
-			case 'atad': // 'data'
-				sound->file_size   = size;
-				sound->file_offset = file.GetPos();
-
-				Debug(grf, 2, "LoadNewGRFSound [{}]: channels {}, sample rate {}, bits per sample {}, length {}", file.GetSimplifiedFilename(), sound->channels, sound->rate, sound->bits_per_sample, size);
-				return true; // the fmt chunk has to appear before data, so we are finished
-
-			default:
-				/* Skip unknown chunks */
-				break;
-		}
-
-		/* Skip rest of chunk */
-		if (size > 0) file.SkipBytes(size);
-	}
-
-	Debug(grf, 1, "LoadNewGRFSound [{}]: RIFF does not contain any sound data", file.GetSimplifiedFilename());
+	Debug(grf, 1, "LoadNewGRFSound [{}]: does not contain any sound data", file.GetSimplifiedFilename());
 
 	/* Clear everything that was read */
-	*sound = {};
+	sound = {};
 	return false;
 }
 
diff --git a/src/newgrf_sound.h b/src/newgrf_sound.h
index 5b1d1cbc57..f6e9aae462 100644
--- a/src/newgrf_sound.h
+++ b/src/newgrf_sound.h
@@ -30,7 +30,7 @@ enum VehicleSoundEvent {
 
 SoundEntry *AllocateSound(uint num);
 void InitializeSoundPool();
-bool LoadNewGRFSound(SoundEntry *sound);
+bool LoadNewGRFSound(SoundEntry *sound, SoundID sound_id);
 SoundID GetNewGRFSoundID(const struct GRFFile *file, SoundID sound_id);
 SoundEntry *GetSound(SoundID sound_id);
 uint GetNumSounds();
diff --git a/src/sound.cpp b/src/sound.cpp
index b0c3ed856e..cf75914874 100644
--- a/src/sound.cpp
+++ b/src/sound.cpp
@@ -8,6 +8,7 @@
 /** @file sound.cpp Handling of playing sounds. */
 
 #include "stdafx.h"
+#include "core/endian_func.hpp"
 #include "landscape.h"
 #include "mixer.h"
 #include "newgrf_sound.h"
@@ -23,6 +24,180 @@
 
 static std::array<SoundEntry, ORIGINAL_SAMPLE_COUNT> _original_sounds;
 
+/**
+ * Convert u8 samples to i8.
+ * @param in buffer of samples to convert.
+ */
+void NormaliseInt8(std::vector<byte> &in)
+{
+	/* Convert 8-bit samples from unsigned to signed. */
+	uint8_t *inb = reinterpret_cast<uint8_t *>(&*std::begin(in));
+	uint8_t *ine = reinterpret_cast<uint8_t *>(&*std::end(in)) - 1;
+	for (; inb != ine; ++inb) {
+		*inb = *inb - 128;
+	}
+}
+
+/**
+ * Convert i16 samples from little endian.
+ * @param in buffer of samples to convert.
+ */
+void NormaliseInt16(std::vector<byte> &in)
+{
+	/* Buffer sized must be aligned to 2 bytes. */
+	assert((in.size() & 1) == 0);
+
+	/* Convert samples from little endian. On a LE system this will do nothing. */
+	int16_t *inb = reinterpret_cast<int16_t *>(&*std::begin(in));
+	int16_t *ine = reinterpret_cast<int16_t *>(&*std::end(in));
+	for (; inb != ine; ++inb) {
+		*inb = FROM_LE16(*inb);
+	}
+}
+
+/**
+ * Raw PCM sound loader, used as a fallback if the WAV sound loader fails.
+ * @param[in,out] sound Sound to load. Playback parameters will be filled in.
+ * @param new_format Whether this sound comes from a new format file.
+ * @param[in,out] data buffer to load sound data into.
+ */
+static bool LoadSoundRaw(SoundEntry &sound, bool new_format, std::vector<byte> &data)
+{
+	/* No raw sounds are permitted with a new format file. */
+	if (new_format) return false;
+
+	/*
+	 * Special case for the jackhammer sound
+	 * (name in Windows sample.cat is "Corrupt sound")
+	 * It's no RIFF file, but raw PCM data
+	 */
+	sound.channels = 1;
+	sound.rate = 11025;
+	sound.bits_per_sample = 8;
+
+	/* Allocate an extra sample to ensure the runtime resampler doesn't go out of bounds.*/
+	data.resize(sound.file_size + 1);
+	sound.file->ReadBlock(data.data(), sound.file_size);
+
+	NormaliseInt8(data);
+
+	return true;
+}
+
+/**
+ * Wav file (RIFF/WAVE) sound louder.
+ * @param[in,out] sound Sound to load. Playback parameters will be filled in.
+ * @param new_format Whether this sound comes from a new format file.
+ * @param[in,out] data buffer to load sound data into.
+ */
+static bool LoadSoundWav(SoundEntry &sound, bool new_format, std::vector<byte> &data)
+{
+	RandomAccessFile &file = *sound.file;
+
+	/* Check RIFF/WAVE header. */
+	if (file.ReadDword() != BSWAP32('RIFF')) return false;
+	file.ReadDword(); // Skip data size
+	if (file.ReadDword() != BSWAP32('WAVE')) return false;
+
+	/* Read riff tags */
+	for (;;) {
+		uint32_t tag = file.ReadDword();
+		uint32_t size = file.ReadDword();
+
+		if (tag == BSWAP32('fmt ')) {
+			uint16_t format = file.ReadWord();        // wFormatTag
+			if (format != 1) return false; // File must be Uncompressed PCM
+			sound.channels = file.ReadWord();         // wChannels
+			sound.rate     = file.ReadDword();        // samples per second
+			if (!new_format) sound.rate = 11025;      // seems like all old samples should be played at this rate.
+			file.ReadDword();                         // avg bytes per second
+			file.ReadWord();                          // alignment
+			sound.bits_per_sample = file.ReadWord();  // bits per sample
+			if (sound.bits_per_sample != 8 && sound.bits_per_sample != 16) return false; // File must be 8 or 16 BPS.
+		} else if (tag == BSWAP32('data')) {
+			uint align = sound.channels * sound.bits_per_sample / 8;
+			if ((size & (align - 1)) != 0) return false; // Ensure length is aligned correctly for channels and BPS.
+
+			sound.file_size = size;
+			if (size == 0) return true; // No need to continue.
+
+			/* Allocate an extra sample to ensure the runtime resampler doesn't go out of bounds.*/
+			data.resize(sound.file_size + sound.channels * sound.bits_per_sample / 8);
+			file.ReadBlock(data.data(), sound.file_size);
+
+			if (sound.bits_per_sample == 8) NormaliseInt8(data);
+			if (sound.bits_per_sample == 16) NormaliseInt16(data);
+
+			return true;
+		} else {
+			sound.file_size = 0;
+			break;
+		}
+	}
+
+	return false;
+}
+
+using SoundLoader = bool (*)(SoundEntry &sound, bool new_format, std::vector<byte> &data);
+
+static std::initializer_list<SoundLoader> _sound_loaders = {
+	LoadSoundWav,
+	LoadSoundRaw,
+};
+
+bool LoadSound(SoundEntry &sound, bool new_format, SoundID sound_id, const std::string &name)
+{
+	/* Check for valid sound size. */
+	if (sound.file_size == 0 || sound.file_size > ((size_t)-1) - 2) return false;
+
+	size_t pos = sound.file->GetPos();
+	sound.data = std::make_shared<std::vector<byte>>();
+	for (auto &sl : _sound_loaders) {
+		sound.file->SeekTo(pos, SEEK_SET);
+		if (sl(sound, new_format, *sound.data)) break;
+	}
+
+	if (sound.data->empty()) {
+		/* Some sounds are unused so it does not matter if they are empty. */
+		static const std::initializer_list<SoundID> UNUSED_SOUNDS = {
+			SND_0D_UNUSED + 2, SND_11_UNUSED + 2, SND_22_UNUSED + 2, SND_23_UNUSED + 2, SND_32_UNUSED,
+		};
+
+		if (std::find(std::begin(UNUSED_SOUNDS), std::end(UNUSED_SOUNDS), sound_id) == std::end(UNUSED_SOUNDS)) {
+			Debug(grf, 0, "LoadSound [{}]: Failed to load sound '{}' for slot {}", sound.file->GetSimplifiedFilename(), name, sound_id);
+		}
+		return false;
+	}
+
+	assert(sound.bits_per_sample == 8 || sound.bits_per_sample == 16);
+	assert(sound.channels == 1);
+	assert(!sound.data->empty() && sound.rate != 0);
+
+	return true;
+}
+
+static bool LoadBasesetSound(SoundEntry &sound, bool new_format, SoundID sound_id)
+{
+	sound.file->SeekTo(sound.file_offset, SEEK_SET);
+
+	/* Read name of sound for diagnostics. */
+	size_t name_len = sound.file->ReadByte();
+	std::string name(name_len, '\0');
+	sound.file->ReadBlock(name.data(), name_len);
+
+	return LoadSound(sound, new_format, sound_id, name);
+}
+
+static bool LoadSoundSource(SoundEntry &sound, SoundID sound_id)
+{
+	switch (sound.source) {
+		case SoundSource::BasesetOldFormat: return LoadBasesetSound(sound, false, sound_id);
+		case SoundSource::BasesetNewFormat: return LoadBasesetSound(sound, true, sound_id);
+		case SoundSource::NewGRF: return LoadNewGRFSound(&sound, sound_id);
+		default: NOT_REACHED();
+	}
+}
+
 static void OpenBankFile(const std::string &filename)
 {
 	/**
@@ -41,7 +216,7 @@ static void OpenBankFile(const std::string &filename)
 	uint count = original_sound_file->ReadDword();
 
 	/* The new format has the highest bit always set */
-	bool new_format = HasBit(count, 31);
+	auto source = HasBit(count, 31) ? SoundSource::BasesetNewFormat : SoundSource::BasesetOldFormat;
 	ClrBit(count, 31);
 	count /= 8;
 
@@ -56,101 +231,32 @@ static void OpenBankFile(const std::string &filename)
 
 	original_sound_file->SeekTo(pos, SEEK_SET);
 
-	for (uint i = 0; i != ORIGINAL_SAMPLE_COUNT; i++) {
-		_original_sounds[i].file = original_sound_file.get();
-		_original_sounds[i].file_offset = GB(original_sound_file->ReadDword(), 0, 31) + pos;
-		_original_sounds[i].file_size = original_sound_file->ReadDword();
-	}
-
-	for (uint i = 0; i != ORIGINAL_SAMPLE_COUNT; i++) {
-		SoundEntry *sound = &_original_sounds[i];
-		char name[255];
-
-		original_sound_file->SeekTo(sound->file_offset, SEEK_SET);
-
-		/* Check for special case, see else case */
-		original_sound_file->ReadBlock(name, original_sound_file->ReadByte()); // Read the name of the sound
-		if (new_format || strcmp(name, "Corrupt sound") != 0) {
-			original_sound_file->SeekTo(12, SEEK_CUR); // Skip past RIFF header
-
-			/* Read riff tags */
-			for (;;) {
-				uint32_t tag = original_sound_file->ReadDword();
-				uint32_t size = original_sound_file->ReadDword();
-
-				if (tag == ' tmf') {
-					original_sound_file->ReadWord();                          // wFormatTag
-					sound->channels = original_sound_file->ReadWord();        // wChannels
-					sound->rate     = original_sound_file->ReadDword();       // samples per second
-					if (!new_format) sound->rate = 11025;                      // seems like all old samples should be played at this rate.
-					original_sound_file->ReadDword();                         // avg bytes per second
-					original_sound_file->ReadWord();                          // alignment
-					sound->bits_per_sample = original_sound_file->ReadByte(); // bits per sample
-					original_sound_file->SeekTo(size - (2 + 2 + 4 + 4 + 2 + 1), SEEK_CUR);
-				} else if (tag == 'atad') {
-					sound->file_size = size;
-					sound->file = original_sound_file.get();
-					sound->file_offset = original_sound_file->GetPos();
-					break;
-				} else {
-					sound->file_size = 0;
-					break;
-				}
-			}
-		} else {
-			/*
-			 * Special case for the jackhammer sound
-			 * (name in sample.cat is "Corrupt sound")
-			 * It's no RIFF file, but raw PCM data
-			 */
-			sound->channels = 1;
-			sound->rate = 11025;
-			sound->bits_per_sample = 8;
-			sound->file = original_sound_file.get();
-			sound->file_offset = original_sound_file->GetPos();
-		}
+	/* Read sound file positions. */
+	for (auto &sound : _original_sounds) {
+		sound.file = original_sound_file.get();
+		sound.file_offset = GB(original_sound_file->ReadDword(), 0, 31) + pos;
+		sound.file_size = original_sound_file->ReadDword();
+		sound.source = source;
 	}
 }
 
-static bool SetBankSource(MixerChannel *mc, const SoundEntry *sound)
+static bool SetBankSource(MixerChannel *mc, SoundEntry *sound, SoundID sound_id)
 {
 	assert(sound != nullptr);
 
-	/* Check for valid sound size. */
-	if (sound->file_size == 0 || sound->file_size > ((size_t)-1) - 2) return false;
-
-	int8_t *mem = MallocT<int8_t>(sound->file_size + 2);
-	/* Add two extra bytes so rate conversion can read these
-	 * without reading out of its input buffer. */
-	mem[sound->file_size    ] = 0;
-	mem[sound->file_size + 1] = 0;
-
-	RandomAccessFile *file = sound->file;
-	file->SeekTo(sound->file_offset, SEEK_SET);
-	file->ReadBlock(mem, sound->file_size);
-
-	/* 16-bit PCM WAV files should be signed by default */
-	if (sound->bits_per_sample == 8) {
-		for (uint i = 0; i != sound->file_size; i++) {
-			mem[i] += -128; // Convert unsigned sound data to signed
+	if (sound->file != nullptr) {
+		if (!LoadSoundSource(*sound, sound_id)) {
+			/* Mark as invalid. */
+			sound->file = nullptr;
+			return false;
 		}
+		sound->file = nullptr;
 	}
 
-#if TTD_ENDIAN == TTD_BIG_ENDIAN
-	if (sound->bits_per_sample == 16) {
-		uint num_samples = sound->file_size / 2;
-		int16_t *samples = (int16_t *)mem;
-		for (uint i = 0; i < num_samples; i++) {
-			samples[i] = BSWAP16(samples[i]);
-		}
-	}
-#endif
+	/* Check for valid sound. */
+	if (sound->data->empty()) return false;
 
-	assert(sound->bits_per_sample == 8 || sound->bits_per_sample == 16);
-	assert(sound->channels == 1);
-	assert(sound->file_size != 0 && sound->rate != 0);
-
-	MxSetChannelRawSrc(mc, mem, sound->file_size, sound->rate, sound->bits_per_sample == 16);
+	MxSetChannelRawSrc(mc, sound->data, sound->rate, sound->bits_per_sample == 16);
 
 	return true;
 }
@@ -161,6 +267,7 @@ void InitializeSound()
 	OpenBankFile(BaseSounds::GetUsedSet()->files->filename);
 }
 
+
 /* Low level sound player */
 static void StartSound(SoundID sound_id, float pan, uint volume)
 {
@@ -169,22 +276,13 @@ static void StartSound(SoundID sound_id, float pan, uint volume)
 	SoundEntry *sound = GetSound(sound_id);
 	if (sound == nullptr) return;
 
-	/* NewGRF sound that wasn't loaded yet? */
-	if (sound->rate == 0 && sound->file != nullptr) {
-		if (!LoadNewGRFSound(sound)) {
-			/* Mark as invalid. */
-			sound->file = nullptr;
-			return;
-		}
-	}
-
 	/* Empty sound? */
-	if (sound->rate == 0) return;
+	if (sound->rate == 0 && sound->file == nullptr) return;
 
 	MixerChannel *mc = MxAllocateChannel();
 	if (mc == nullptr) return;
 
-	if (!SetBankSource(mc, sound)) return;
+	if (!SetBankSource(mc, sound, sound_id)) return;
 
 	/* Apply the sound effect's own volume. */
 	volume = sound->volume * volume;
diff --git a/src/sound_func.h b/src/sound_func.h
index b378dbdfe2..eac3370567 100644
--- a/src/sound_func.h
+++ b/src/sound_func.h
@@ -14,6 +14,8 @@
 #include "vehicle_type.h"
 #include "tile_type.h"
 
+bool LoadSound(SoundEntry &sound, bool new_format, SoundID sound_id, const std::string &name);
+
 void SndPlayTileFx(SoundID sound, TileIndex tile);
 void SndPlayVehicleFx(SoundID sound, const Vehicle *v);
 void SndPlayFx(SoundID sound);
diff --git a/src/sound_type.h b/src/sound_type.h
index adc1438e6b..0a02bba289 100644
--- a/src/sound_type.h
+++ b/src/sound_type.h
@@ -10,7 +10,14 @@
 #ifndef SOUND_TYPE_H
 #define SOUND_TYPE_H
 
+enum class SoundSource : uint8_t {
+	BasesetOldFormat,
+	BasesetNewFormat,
+	NewGRF,
+};
+
 struct SoundEntry {
+	std::shared_ptr<std::vector<byte>> data;
 	class RandomAccessFile *file;
 	size_t file_offset;
 	size_t file_size;
@@ -19,6 +26,7 @@ struct SoundEntry {
 	uint8_t channels;
 	uint8_t volume;
 	uint8_t priority;
+	SoundSource source;
 	byte grf_container_ver; ///< NewGRF container version if the sound is from a NewGRF.
 };