Загрузка данных


#define EC_DUMP
#define USE_CONVERT_NEON

#ifdef USE_CONVERT_NEON
static void s16_to_float_neon(const int16_t* src, float* dst) {
    asm volatile (
        "ld1    {v0.8h}, [%[in]]        \n\t"
        "sxtl   v1.4s, v0.4h            \n\t"
        "sxtl2  v2.4s, v0.8h            \n\t"
        "scvtf  v1.4s, v1.4s            \n\t"
        "scvtf  v2.4s, v2.4s            \n\t"
        "st1    {v1.4s, v2.4s}, [%[out]] \n\t"
        : 
        : [in] "r" (src), [out] "r" (dst)
        : "v0", "v1", "v2", "memory"
    );
}

void float_to_s16_neon(const float* src, int16_t* dst) {
    asm volatile (
        "ld1    {v1.4s, v2.4s}, [%[in]] \n\t"
        "fcvtns v1.4s, v1.4s            \n\t"
        "fcvtns v2.4s, v2.4s            \n\t"
        "sqxtn  v0.4h, v1.4s            \n\t"
        "sqxtn2 v0.8h, v2.4s            \n\t"
        "st1    {v0.8h}, [%[out]]       \n\t"
        : 
        : [in] "r" (src), [out] "r" (dst)
        : "v0", "v1", "v2", "memory"
    );
}
#endif

static void imx_ai_aec_process(MSFilter *f) {
	ImxAiAecState *s = (ImxAiAecState *)f->data;
	int nsamples = s->framesize;
	int nbytes = s->framesize * sizeof(int16_t);
	mblk_t *refm;
	int16_t *ref_tmp = (int16_t *)alloca(nbytes);
	int16_t *echo_tmp = (int16_t *)alloca(nbytes);
	float *ref_float = (float *)alloca(nsamples * sizeof(float));
	float *echo_float = (float *)alloca(nsamples * sizeof(float));
	float *out_float = (float *)alloca(nsamples * sizeof(float));

	if (s->bypass_mode) {
		while ((refm = ms_queue_get(f->inputs[0])) != NULL) ms_queue_put(f->outputs[0], refm);
		while ((refm = ms_queue_get(f->inputs[1])) != NULL) ms_queue_put(f->outputs[1], refm);
		return;
	}


	// 1. Get ref signal (from remote)
	if (f->inputs[0] != NULL) {
		if (s->echostarted) {
			while ((refm = ms_queue_get(f->inputs[0])) != NULL) {
				ms_bufferizer_put(&s->delayed_ref, dupmsg(refm));
				ms_flow_controlled_bufferizer_put(&s->ref, refm);
			}
		} else {
			ms_queue_flush(f->inputs[0]);
		}
	}

	// 2. Get mic signal
	ms_bufferizer_put_from_queue(&s->echo, f->inputs[1]);

	// 3. Main cycle
	while (ms_bufferizer_get_avail(&s->echo) >= (size_t)nbytes) {
		mblk_t *out_clean = allocb(nbytes, 0);
		int avail;

		if (!s->echostarted) s->echostarted = TRUE;

		// Checking if there is data in the delay buffer
		avail = (int)ms_bufferizer_get_avail(&s->delayed_ref);
		if (avail < ((s->nominal_ref_samples * 2) + nbytes)) {
			// Silence injection if the reference is lagging behind
			avail = nbytes;
			refm = allocb(nbytes, 0);
			memset(refm->b_wptr, 0, nbytes);
			refm->b_wptr += nbytes;
			ms_bufferizer_put(&s->delayed_ref, refm);
			ms_queue_put(f->outputs[0], dupmsg(refm));
			if (!s->using_zeroes) {
				ms_warning("IMX AEC: Not enough ref samples, using zeroes");
				s->using_zeroes = TRUE;
			}
		} else {
			s->using_zeroes = FALSE;
			refm = allocb(nbytes, 0);
			ms_flow_controlled_bufferizer_read(&s->ref, refm->b_wptr, nbytes);
			refm->b_wptr += nbytes;
			ms_queue_put(f->outputs[0], refm);
		}

		// Reading aligned data
		ms_bufferizer_read(&s->echo, (uint8_t *)echo_tmp, nbytes);
		ms_bufferizer_read(&s->delayed_ref, (uint8_t *)ref_tmp, nbytes);
		avail -= nbytes;
#ifdef EC_DUMP
		if (s->reffile) fwrite(ref_tmp, nbytes, 1, s->reffile);
		if (s->echofile) fwrite(echo_tmp, nbytes, 1, s->echofile);
#endif
#ifdef USE_CONVERT_NEON
		for(int i = 0; i < nsamples; i+=8) {
			s16_to_float_neon(&ref_tmp[i], &ref_float[i]);
			s16_to_float_neon(&echo_tmp[i], &echo_float[i]);
		}
#else
		//Converts int16_t to float data
		for(int i = 0; i < nsamples; i++) {
			ref_float[i] = (float)ref_tmp[i] / 32768.0f;
			echo_float[i] = (float)echo_tmp[i] / 32768.0f;
		}
#endif
#ifdef EC_DUMP
		if (s->reffile_float) fwrite(ref_float, sizeof(float) * nsamples, 1, s->reffile_float);
		if (s->echofile_float) fwrite(echo_float, sizeof(float) * nsamples, 1, s->echofile_float);
#endif
		// AI Processing (AEC + NR)
		if (imx_ai_aecnr_core_process(s->aec_core, ref_float, echo_float, out_float) != 0)
		{
			ms_error("imx_ai_aecnr_core_process failed");
			//memcpy(out_clean->b_wptr, echo_tmp, nbytes); // Fallback
			for (int i = 0; i < nsamples; i++) out_float[i] = echo_float[i];
		}
#ifdef EC_DUMP
		if (s->cleanfile_float) fwrite(out_float, sizeof(float) * nsamples, 1, s->cleanfile_float);
#endif
		//Converts float data to int16_t
		int16_t *out_ptr = (int16_t *)out_clean->b_wptr;
#ifdef USE_CONVERT_NEON
		for(int i = 0; i < nsamples; i+=8) {
			float_to_s16_neon(&out_float[i], &out_ptr[i]);
		}
#else
		for(int i = 0; i < nsamples; i++) {
			float val = out_float[i] * 32768.0f;
			if(val > 32767.0f) val = 32767.0f;
			if(val < -32768.0f) val = -32768.0f;
			out_ptr[i] = (int16_t)val;
		}
#endif
#ifdef EC_DUMP
		if (s->cleanfile) fwrite(out_ptr, nbytes, 1, s->cleanfile);
#endif
		out_clean->b_wptr += nbytes;
		ms_queue_put(f->outputs[1], out_clean);
	}
}

Если включен USE_CONVERT_NEON - преобразование типов работает неправильно