#include <arm_neon.h>
void s16_to_float_intrinsics(const int16_t* src, float* dst) {
int16x8_t in = vld1q_s16(src);
float32x4_t scale = vdupq_n_f32(1.0f / 32768.0f);
// Младшие 4
int32x4_t low = vmovl_s16(vget_low_s16(in));
float32x4_t low_f = vcvtq_f32_s32(low);
vst1q_f32(dst, vmulq_f32(low_f, scale));
// Старшие 4
int32x4_t high = vmovl_high_s16(in);
float32x4_t high_f = vcvtq_f32_s32(high);
vst1q_f32(dst + 4, vmulq_f32(high_f, scale));
}