Commit fb1462c0 authored by m4xw's avatar m4xw
Browse files

git subrepo clone --force...

git subrepo clone --force https://github.com/libretro-fork/libretro-common-mupen64plus-nx.git libretro-common/

subrepo:
  subdir:   "libretro-common"
  merged:   "cab3b7d16"
upstream:
  origin:   "https://github.com/libretro-fork/libretro-common-mupen64plus-nx.git"
  branch:   "master"
  commit:   "cab3b7d16"
git-subrepo:
  version:  "0.4.0"
  origin:   "???"
  commit:   "???"
parent 350f90a7
Pipeline #85497 passed with stages
in 4 minutes and 47 seconds
......@@ -6,7 +6,7 @@
[subrepo]
remote = https://github.com/libretro-fork/libretro-common-mupen64plus-nx.git
branch = master
commit = 59e3f6afd6d31286c3bb3f67a61ae1d8b6a4af06
parent = 5cbca0e53e256648bec89c830482c9b837e392bc
commit = cab3b7d16c6ee6599fb1d69f1c952acdd91b193e
parent = 350f90a73cf0f5d65357ce982ccbaa3b22fc3569
method = rebase
cmdver = 0.4.0
......@@ -114,7 +114,8 @@ void audio_mix_free_chunk(audio_chunk_t *chunk)
free(chunk);
}
audio_chunk_t* audio_mix_load_wav_file(const char *path, int sample_rate)
audio_chunk_t* audio_mix_load_wav_file(const char *path, int sample_rate,
const char *resampler_ident, enum resampler_quality quality)
{
#ifdef HAVE_RWAV
int sample_size;
......@@ -233,8 +234,8 @@ audio_chunk_t* audio_mix_load_wav_file(const char *path, int sample_rate)
retro_resampler_realloc(&chunk->resampler_data,
&chunk->resampler,
NULL,
RESAMPLER_QUALITY_DONTCARE,
resampler_ident,
quality,
chunk->ratio);
if (chunk->resampler && chunk->resampler_data)
......
......@@ -267,28 +267,27 @@ static bool wav_to_float(const rwav_t* wav, float** pcm, size_t samples_out)
}
static bool one_shot_resample(const float* in, size_t samples_in,
unsigned rate, float** out, size_t* samples_out)
unsigned rate, const char *resampler_ident, enum resampler_quality quality,
float** out, size_t* samples_out)
{
struct resampler_data info;
void* data = NULL;
const retro_resampler_t* resampler = NULL;
float ratio = (double)s_rate / (double)rate;
if (!retro_resampler_realloc(&data, &resampler, NULL,
RESAMPLER_QUALITY_DONTCARE, ratio))
if (!retro_resampler_realloc(&data, &resampler,
resampler_ident, quality, ratio))
return false;
/*
* Allocate on a 16-byte boundary, and pad to a multiple of 16 bytes. We
/* Allocate on a 16-byte boundary, and pad to a multiple of 16 bytes. We
* add four more samples in the formula below just as safeguard, because
* resampler->process sometimes reports more output samples than the
* formula below calculates. Ideally, audio resamplers should have a
* function to return the number of samples they will output given a
* count of input samples.
*/
*samples_out = samples_in * ratio + 4;
* count of input samples. */
*samples_out = (size_t)(samples_in * ratio);
*out = (float*)memalign_alloc(16,
((*samples_out + 15) & ~15) * sizeof(float));
(((*samples_out + 4) + 15) & ~15) * sizeof(float));
if (*out == NULL)
return false;
......@@ -323,12 +322,12 @@ void audio_mixer_done(void)
s_voices[i].type = AUDIO_MIXER_TYPE_NONE;
}
audio_mixer_sound_t* audio_mixer_load_wav(void *buffer, int32_t size)
audio_mixer_sound_t* audio_mixer_load_wav(void *buffer, int32_t size,
const char *resampler_ident, enum resampler_quality quality)
{
#ifdef HAVE_RWAV
/* WAV data */
rwav_t wav;
enum rwav_state rwav_ret;
/* WAV samples converted to float */
float* pcm = NULL;
size_t samples = 0;
......@@ -342,7 +341,7 @@ audio_mixer_sound_t* audio_mixer_load_wav(void *buffer, int32_t size)
wav.subchunk2size = 0;
wav.samples = NULL;
if ((rwav_ret = rwav_load(&wav, buffer, size)) != RWAV_ITERATE_DONE)
if ((rwav_load(&wav, buffer, size)) != RWAV_ITERATE_DONE)
return NULL;
samples = wav.numsamples * 2;
......@@ -354,8 +353,9 @@ audio_mixer_sound_t* audio_mixer_load_wav(void *buffer, int32_t size)
{
float* resampled = NULL;
if (!one_shot_resample(pcm, samples,
wav.samplerate, &resampled, &samples))
if (!one_shot_resample(pcm, samples, wav.samplerate,
resampler_ident, quality,
&resampled, &samples))
return NULL;
memalign_free((void*)pcm);
......@@ -515,6 +515,8 @@ static bool audio_mixer_play_ogg(
audio_mixer_sound_t* sound,
audio_mixer_voice_t* voice,
bool repeat, float volume,
const char *resampler_ident,
enum resampler_quality quality,
audio_mixer_stop_cb_t stop_cb)
{
stb_vorbis_info info;
......@@ -538,14 +540,20 @@ static bool audio_mixer_play_ogg(
ratio = (double)s_rate / (double)info.sample_rate;
if (!retro_resampler_realloc(&resampler_data,
&resamp, NULL, RESAMPLER_QUALITY_DONTCARE,
&resamp, resampler_ident, quality,
ratio))
goto error;
}
/* Allocate on a 16-byte boundary, and pad to a multiple of 16 bytes. We
* add four more samples in the formula below just as safeguard, because
* resampler->process sometimes reports more output samples than the
* formula below calculates. Ideally, audio resamplers should have a
* function to return the number of samples they will output given a
* count of input samples. */
samples = (unsigned)(AUDIO_MIXER_TEMP_BUFFER * ratio);
ogg_buffer = (float*)memalign_alloc(16,
((samples + 15) & ~15) * sizeof(float));
(((samples + 4) + 15) & ~15) * sizeof(float));
if (!ogg_buffer)
{
......@@ -663,6 +671,8 @@ static bool audio_mixer_play_flac(
audio_mixer_sound_t* sound,
audio_mixer_voice_t* voice,
bool repeat, float volume,
const char *resampler_ident,
enum resampler_quality quality,
audio_mixer_stop_cb_t stop_cb)
{
float ratio = 1.0f;
......@@ -679,14 +689,20 @@ static bool audio_mixer_play_flac(
ratio = (double)s_rate / (double)(dr_flac->sampleRate);
if (!retro_resampler_realloc(&resampler_data,
&resamp, NULL, RESAMPLER_QUALITY_DONTCARE,
&resamp, resampler_ident, quality,
ratio))
goto error;
}
/* Allocate on a 16-byte boundary, and pad to a multiple of 16 bytes. We
* add four more samples in the formula below just as safeguard, because
* resampler->process sometimes reports more output samples than the
* formula below calculates. Ideally, audio resamplers should have a
* function to return the number of samples they will output given a
* count of input samples. */
samples = (unsigned)(AUDIO_MIXER_TEMP_BUFFER * ratio);
flac_buffer = (float*)memalign_alloc(16,
((samples + 15) & ~15) * sizeof(float));
flac_buffer = (float*)memalign_alloc(16,
(((samples + 4) + 15) & ~15) * sizeof(float));
if (!flac_buffer)
{
......@@ -724,6 +740,8 @@ static bool audio_mixer_play_mp3(
audio_mixer_sound_t* sound,
audio_mixer_voice_t* voice,
bool repeat, float volume,
const char *resampler_ident,
enum resampler_quality quality,
audio_mixer_stop_cb_t stop_cb)
{
float ratio = 1.0f;
......@@ -749,14 +767,20 @@ static bool audio_mixer_play_mp3(
ratio = (double)s_rate / (double)(voice->types.mp3.stream.sampleRate);
if (!retro_resampler_realloc(&resampler_data,
&resamp, NULL, RESAMPLER_QUALITY_DONTCARE,
&resamp, resampler_ident, quality,
ratio))
goto error;
}
/* Allocate on a 16-byte boundary, and pad to a multiple of 16 bytes. We
* add four more samples in the formula below just as safeguard, because
* resampler->process sometimes reports more output samples than the
* formula below calculates. Ideally, audio resamplers should have a
* function to return the number of samples they will output given a
* count of input samples. */
samples = (unsigned)(AUDIO_MIXER_TEMP_BUFFER * ratio);
mp3_buffer = (float*)memalign_alloc(16,
((samples + 15) & ~15) * sizeof(float));
(((samples + 4) + 15) & ~15) * sizeof(float));
if (!mp3_buffer)
{
......@@ -787,8 +811,11 @@ error:
}
#endif
audio_mixer_voice_t* audio_mixer_play(audio_mixer_sound_t* sound, bool repeat,
float volume, audio_mixer_stop_cb_t stop_cb)
audio_mixer_voice_t* audio_mixer_play(audio_mixer_sound_t* sound,
bool repeat, float volume,
const char *resampler_ident,
enum resampler_quality quality,
audio_mixer_stop_cb_t stop_cb)
{
unsigned i;
bool res = false;
......@@ -809,7 +836,8 @@ audio_mixer_voice_t* audio_mixer_play(audio_mixer_sound_t* sound, bool repeat,
break;
case AUDIO_MIXER_TYPE_OGG:
#ifdef HAVE_STB_VORBIS
res = audio_mixer_play_ogg(sound, voice, repeat, volume, stop_cb);
res = audio_mixer_play_ogg(sound, voice, repeat, volume,
resampler_ident, quality, stop_cb);
#endif
break;
case AUDIO_MIXER_TYPE_MOD:
......@@ -819,12 +847,14 @@ audio_mixer_voice_t* audio_mixer_play(audio_mixer_sound_t* sound, bool repeat,
break;
case AUDIO_MIXER_TYPE_FLAC:
#ifdef HAVE_DR_FLAC
res = audio_mixer_play_flac(sound, voice, repeat, volume, stop_cb);
res = audio_mixer_play_flac(sound, voice, repeat, volume,
resampler_ident, quality, stop_cb);
#endif
break;
case AUDIO_MIXER_TYPE_MP3:
#ifdef HAVE_DR_MP3
res = audio_mixer_play_mp3(sound, voice, repeat, volume, stop_cb);
res = audio_mixer_play_mp3(sound, voice, repeat, volume,
resampler_ident, quality, stop_cb);
#endif
break;
case AUDIO_MIXER_TYPE_NONE:
......
/* Copyright (C) 2010-2020 The RetroArch team
/* Copyright (C) 2010-2021 The RetroArch team
*
* ---------------------------------------------------------------------------------------
* The following license statement only applies to this file (float_to_s16.c).
......@@ -28,37 +28,75 @@
#include <altivec.h>
#endif
#if (defined(__ARM_NEON__) && !defined(DONT_WANT_ARM_OPTIMIZATIONS)) || defined(HAVE_NEON)
#ifndef HAVE_ARM_NEON_OPTIMIZATIONS
#define HAVE_ARM_NEON_OPTIMIZATIONS
#endif
#endif
#include <features/features_cpu.h>
#include <audio/conversion/float_to_s16.h>
#if defined(HAVE_ARM_NEON_OPTIMIZATIONS)
#if (defined(__ARM_NEON__) || defined(HAVE_NEON))
static bool float_to_s16_neon_enabled = false;
void convert_float_s16_asm(int16_t *out, const float *in, size_t samples);
#ifdef HAVE_ARM_NEON_ASM_OPTIMIZATIONS
void convert_float_s16_asm(int16_t *out,
const float *in, size_t samples);
#else
#include <arm_neon.h>
#endif
/**
* convert_float_to_s16:
* @out : output buffer
* @in : input buffer
* @samples : size of samples to be converted
*
* Converts floating point
* to signed integer 16-bit.
*
* C implementation callback function.
**/
void convert_float_to_s16(int16_t *out,
const float *in, size_t samples)
{
size_t i = 0;
size_t i = 0;
if (float_to_s16_neon_enabled)
{
float gf = (1<<15);
float32x4_t vgf = {gf, gf, gf, gf};
while (samples >= 8)
{
#ifdef HAVE_ARM_NEON_ASM_OPTIMIZATIONS
size_t aligned_samples = samples & ~7;
if (aligned_samples)
convert_float_s16_asm(out, in, aligned_samples);
out += aligned_samples;
in += aligned_samples;
samples -= aligned_samples;
i = 0;
#else
int16x4x2_t oreg;
int32x4x2_t creg;
float32x4x2_t inreg = vld2q_f32(in);
creg.val[0] = vcvtq_s32_f32(vmulq_f32(inreg.val[0], vgf));
creg.val[1] = vcvtq_s32_f32(vmulq_f32(inreg.val[1], vgf));
oreg.val[0] = vqmovn_s32(creg.val[0]);
oreg.val[1] = vqmovn_s32(creg.val[1]);
vst2_s16(out, oreg);
in += 8;
out += 8;
samples -= 8;
#endif
}
}
for (; i < samples; i++)
{
int32_t val = (int32_t)(in[i] * 0x8000);
out[i] = (val > 0x7FFF) ? 0x7FFF :
(val < -0x8000 ? -0x8000 : (int16_t)val);
}
}
void convert_float_to_s16_init_simd(void)
{
uint64_t cpu = cpu_features_get();
if (cpu & RETRO_SIMD_NEON)
float_to_s16_neon_enabled = true;
}
#else
void convert_float_to_s16(int16_t *out,
const float *in, size_t samples)
{
size_t i = 0;
#if defined(__SSE2__)
__m128 factor = _mm_set1_ps((float)0x8000);
__m128 factor = _mm_set1_ps((float)0x8000);
for (i = 0; i + 8 <= samples; i += 8, in += 8, out += 8)
{
......@@ -73,10 +111,10 @@ void convert_float_to_s16(int16_t *out,
_mm_storeu_si128((__m128i *)out, packed);
}
samples = samples - i;
i = 0;
samples = samples - i;
i = 0;
#elif defined(__ALTIVEC__)
int samples_in = samples;
int samples_in = samples;
/* Unaligned loads/store is a bit expensive,
* so we optimize for the good path (very likely). */
......@@ -92,25 +130,12 @@ void convert_float_to_s16(int16_t *out,
vec_st(vec_packs(result0, result1), 0, out);
}
samples_in -= i;
samples_in -= i;
}
samples = samples_in;
i = 0;
#elif defined(HAVE_ARM_NEON_OPTIMIZATIONS)
if (float_to_s16_neon_enabled)
{
size_t aligned_samples = samples & ~7;
if (aligned_samples)
convert_float_s16_asm(out, in, aligned_samples);
out = out + aligned_samples;
in = in + aligned_samples;
samples = samples - aligned_samples;
i = 0;
}
samples = samples_in;
i = 0;
#elif defined(_MIPS_ARCH_ALLEGREX)
#ifdef DEBUG
/* Make sure the buffers are 16 byte aligned, this should be
* the default behaviour of malloc in the PSPSDK.
......@@ -138,29 +163,16 @@ void convert_float_to_s16(int16_t *out,
".set pop \n"
:: "r"(in + i), "r"(out + i));
}
#endif
for (; i < samples; i++)
{
int32_t val = (int32_t)(in[i] * 0x8000);
out[i] = (val > 0x7FFF) ? 0x7FFF :
(val < -0x8000 ? -0x8000 : (int16_t)val);
int32_t val = (int32_t)(in[i] * 0x8000);
out[i] = (val > 0x7FFF)
? 0x7FFF
: (val < -0x8000 ? -0x8000 : (int16_t)val);
}
}
/**
* convert_float_to_s16_init_simd:
*
* Sets up function pointers for conversion
* functions based on CPU features.
**/
void convert_float_to_s16_init_simd(void)
{
#if defined(HAVE_ARM_NEON_OPTIMIZATIONS)
unsigned cpu = cpu_features_get();
if (cpu & RETRO_SIMD_NEON)
float_to_s16_neon_enabled = true;
void convert_float_to_s16_init_simd(void) { }
#endif
}
......@@ -19,7 +19,7 @@
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#if defined(__ARM_NEON__) && !defined(DONT_WANT_ARM_OPTIMIZATIONS)
#if defined(__ARM_NEON__) && defined(HAVE_ARM_NEON_ASM_OPTIMIZATIONS)
#ifndef __MACH__
.arm
......
......@@ -19,7 +19,7 @@
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#if defined(__ARM_NEON__) && !defined(DONT_WANT_ARM_OPTIMIZATIONS)
#if defined(__ARM_NEON__) && defined(HAVE_ARM_NEON_ASM_OPTIMIZATIONS)
#if defined(__thumb__)
#define DECL_ARMMODE(x) " .align 2\n" " .global " x "\n" " .thumb\n" " .thumb_func\n" " .type " x ", %function\n" x ":\n"
......
/* Copyright (C) 2010-2020 The RetroArch team
/* Copyright (C) 2010-2021 The RetroArch team
*
* ---------------------------------------------------------------------------------------
* The following license statement only applies to this file (s16_to_float.c).
......@@ -29,30 +29,67 @@
#include <features/features_cpu.h>
#include <audio/conversion/s16_to_float.h>
#if (defined(__ARM_NEON__) && !defined(DONT_WANT_ARM_OPTIMIZATIONS)) || defined(HAVE_NEON)
#ifndef HAVE_ARM_NEON_OPTIMIZATIONS
#define HAVE_ARM_NEON_OPTIMIZATIONS
#endif
#endif
#if defined(HAVE_ARM_NEON_OPTIMIZATIONS)
#if (defined(__ARM_NEON__) || defined(HAVE_NEON))
static bool s16_to_float_neon_enabled = false;
#ifdef HAVE_ARM_NEON_ASM_OPTIMIZATIONS
/* Avoid potential hard-float/soft-float ABI issues. */
void convert_s16_float_asm(float *out, const int16_t *in,
size_t samples, const float *gain);
#else
#include <arm_neon.h>
#endif
/**
* convert_s16_to_float:
* @out : output buffer
* @in : input buffer
* @samples : size of samples to be converted
* @gain : gain applied (.e.g. audio volume)
*
* Converts from signed integer 16-bit
* to floating point.
**/
void convert_s16_to_float(float *out,
const int16_t *in, size_t samples, float gain)
{
unsigned i = 0;
if (s16_to_float_neon_enabled)
{
#ifdef HAVE_ARM_NEON_ASM_OPTIMIZATIONS
size_t aligned_samples = samples & ~7;
if (aligned_samples)
convert_s16_float_asm(out, in, aligned_samples, &gain);
/* Could do all conversion in ASM, but keep it simple for now. */
out += aligned_samples;
in += aligned_samples;
samples -= aligned_samples;
i = 0;
#else
float gf = gain / (1 << 15);
float32x4_t vgf = {gf, gf, gf, gf};
while (samples >= 8)
{
float32x4x2_t oreg;
int16x4x2_t inreg = vld2_s16(in);
int32x4_t p1 = vmovl_s16(inreg.val[0]);
int32x4_t p2 = vmovl_s16(inreg.val[1]);
oreg.val[0] = vmulq_f32(vcvtq_f32_s32(p1), vgf);
oreg.val[1] = vmulq_f32(vcvtq_f32_s32(p2), vgf);
vst2q_f32(out, oreg);
in += 8;
out += 8;
samples -= 8;
}
#endif
}
gain /= 0x8000;
for (; i < samples; i++)
out[i] = (float)in[i] * gain;
}
void convert_s16_to_float_init_simd(void)
{
uint64_t cpu = cpu_features_get();
if (cpu & RETRO_SIMD_NEON)
s16_to_float_neon_enabled = true;
}
#else
void convert_s16_to_float(float *out,
const int16_t *in, size_t samples, float gain)
{
......@@ -103,24 +140,9 @@ void convert_s16_to_float(float *out,
samples = samples_in;
i = 0;
#elif defined(HAVE_ARM_NEON_OPTIMIZATIONS)
if (s16_to_float_neon_enabled)
{
size_t aligned_samples = samples & ~7;
if (aligned_samples)
convert_s16_float_asm(out, in, aligned_samples, &gain);
/* Could do all conversion in ASM, but keep it simple for now. */
out = out + aligned_samples;
in = in + aligned_samples;
samples = samples - aligned_samples;
i = 0;
}
#endif
gain = gain / 0x8000;
gain /= 0x8000;
#if defined(_MIPS_ARCH_ALLEGREX)
#ifdef DEBUG
......@@ -172,25 +194,12 @@ void convert_s16_to_float(float *out,
".set pop \n"
:: "r"(in + i), "r"(out + i));
}
#endif
for (; i < samples; i++)
out[i] = (float)in[i] * gain;
}
/**
* convert_s16_to_float_init_simd:
*
* Sets up function pointers for conversion
* functions based on CPU features.
**/
void convert_s16_to_float_init_simd(void)
{
#if defined(HAVE_ARM_NEON_OPTIMIZATIONS)
unsigned cpu = cpu_features_get();
if (cpu & RETRO_SIMD_NEON)
s16_to_float_neon_enabled = true;
void convert_s16_to_float_init_simd(void) { }
#endif
}
......@@ -19,7 +19,7 @@
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#if defined(__ARM_NEON__) && !defined(DONT_WANT_ARM_OPTIMIZATIONS)
#if defined(__ARM_NEON__) && defined(HAVE_ARM_NEON_ASM_OPTIMIZATIONS)
#ifndef __MACH__
.arm
......
......@@ -19,7 +19,7 @@
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#if defined(__ARM_NEON__) && !defined(DONT_WANT_ARM_OPTIMIZATIONS)
#if defined(__ARM_NEON__) && defined(HAVE_ARM_NEON_ASM_OPTIMIZATIONS)
#if defined(__thumb__)
#define DECL_ARMMODE(x) " .align 2\n" " .global " x "\n" " .thumb\n" " .thumb_func\n" " .type " x ", %function\n" x ":\n"
......
......@@ -54,7 +54,6 @@ else ifeq ($(platform), osx)
MINVERFLAGS=
ifeq ($(shell uname -p),arm)
MINVERFLAGS = -mmacosx-version-min=10.15 -stdlib=libc++ # macOS (Metal, ARM 64bit)
MINVERFLAGS += -DDONT_WANT_ARM_OPTIMIZATIONS