This commit is contained in:
Steven Dan
2025-12-11 09:43:42 +08:00
commit d8b2974133
1822 changed files with 280037 additions and 0 deletions

View File

@@ -0,0 +1,82 @@
// Copyright 2024-2025 XMOS LIMITED.
// This Software is subject to the terms of the XMOS Public Licence: Version 1.
#include <stdint.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <xcore/hwtimer.h>
#include "../autogen/dut.h"
#include "../autogen/dut_debug.h"
#include "ref_fir.h"
/*
This tests for equlivance between the FD implementation and the TD reference.
It has an allowed error of 32 for mean abs error and abs mean error.
*/
int run_test(void){
int32_t __attribute__((aligned (8))) data[dut_DATA_BUFFER_ELEMENTS];
int32_t __attribute__((aligned (8))) new_data[dut_TD_BLOCK_LENGTH];
int32_t __attribute__((aligned (8))) data_td[debug_dut_DATA_BUFFER_ELEMENTS];
memset(new_data, 0, sizeof(new_data));
memset(data_td, 0, sizeof(data_td));
memset(data, 0, sizeof(data));
fd_fir_data_t fd_fir_data_dut;
fd_block_fir_data_init(&fd_fir_data_dut, data,
dut_FRAME_ADVANCE,
dut_TD_BLOCK_LENGTH,
dut_BLOCK_COUNT);
int error_sum = 0;
int abs_error_sum = 0;
int count = 0;
int32_t frame_overlap[dut_FRAME_OVERLAP];
memset(frame_overlap, 0, sizeof(frame_overlap));
for(int j=0;j<dut_BLOCK_COUNT + 2;j++)
{
for(int i=0;i<dut_FRAME_ADVANCE;i++)
new_data[i] = rand()-rand();
int32_t td_processed[dut_FRAME_ADVANCE + dut_FRAME_OVERLAP];
memcpy(td_processed, frame_overlap, sizeof(frame_overlap));
for(int i=0;i<dut_FRAME_ADVANCE;i++)
td_processed[i+dut_FRAME_OVERLAP] = td_reference_fir(new_data[i], &td_block_debug_fir_filter_dut, data_td);
memcpy(frame_overlap, td_processed + dut_FRAME_ADVANCE, sizeof(frame_overlap));
int32_t __attribute__((aligned (8))) fd_processed[dut_TD_BLOCK_LENGTH] = {0};
fd_block_fir_add_data(new_data, &fd_fir_data_dut);
fd_block_fir_compute(
fd_processed,
&fd_fir_data_dut,
&fd_fir_filter_dut);
for(int i=0;i<dut_FRAME_ADVANCE + dut_FRAME_OVERLAP;i++){
int error = td_processed[i] - fd_processed[i];
// printf("%2d td:%12ld fd:%12ld error:%d\n", i, td_processed[i], fd_processed[i], error);
error_sum += error;
if(error < 0) error = -error;
abs_error_sum += error;
count++;
}
}
float error_ave_abs = (float)error_sum / count;
if(error_ave_abs<0)error_ave_abs=-error_ave_abs;
if (error_ave_abs > 32.0){
printf("avg error:%f avg abs error:%f dut_TD_BLOCK_LENGTH:%d dut_BLOCK_COUNT:%d DATA_BUFFER_ELEMENTS:%d\n", (float)error_sum / count, (float)abs_error_sum / count, dut_TD_BLOCK_LENGTH, dut_BLOCK_COUNT, debug_dut_DATA_BUFFER_ELEMENTS);
return 1;
}
if(((float)abs_error_sum / count) > 32.0){
printf("avg error:%f avg abs error:%f dut_TD_BLOCK_LENGTH:%d dut_BLOCK_COUNT:%d DATA_BUFFER_ELEMENTS:%d\n", (float)error_sum / count, (float)abs_error_sum / count, dut_TD_BLOCK_LENGTH, dut_BLOCK_COUNT, debug_dut_DATA_BUFFER_ELEMENTS);
return 1;
}
return 0;
}
int main() {
return run_test();
}

View File

@@ -0,0 +1,120 @@
// Copyright 2024-2025 XMOS LIMITED.
// This Software is subject to the terms of the XMOS Public Licence: Version 1.
// Code for reference: accurate but slow
// prod_shr prevents accumulator overflow
// accu_shr returns the accumulator to the correct output q value
#include "ref_fir.h"
#include <string.h>
#include "xmath/xs3/vpu_scalar_ops.h"
int32_t td_reference_fir(
int32_t new_sample,
td_reference_fir_filter_t *filter,
int32_t *data)
{
for (uint32_t i = filter->length - 1; i > 0; i--)
data[i] = data[i - 1];
data[0] = new_sample;
int64_t accu = 0;
for (uint32_t i = 0; i < filter->length; i++)
{
int64_t p = (int64_t)data[i] * (int64_t)filter->coefs[i];
accu += ((p + (1 << (filter->prod_shr - 1))) >> filter->prod_shr);
}
int64_t res = (accu + (1 << (filter->accu_shr - 1))) >> filter->accu_shr;
if (res > INT32_MAX)
res = INT32_MAX;
if (res < INT32_MIN)
res = INT32_MIN;
return res;
}
void td_block_fir_add_data_ref(
int32_t samples_in[TD_BLOCK_FIR_LENGTH],
td_block_fir_data_t *fir_data)
{
int head;
// if this is the end of the buffer then paste it onto the front too
memcpy((void *)fir_data->data + fir_data->index, samples_in, sizeof(int32_t) * TD_BLOCK_FIR_LENGTH);
if (fir_data->index == fir_data->data_stride)
{
memcpy(fir_data->data + 0, samples_in, sizeof(int32_t) * TD_BLOCK_FIR_LENGTH);
head = 32;
}
else
{
head = fir_data->index + 32;
}
fir_data->index = head;
}
void td_block_fir_compute_ref(
int32_t output_block[TD_BLOCK_FIR_LENGTH],
td_block_fir_data_t *fir_data,
td_block_fir_filter_t *fir_filter)
{
int64_t accu[TD_BLOCK_FIR_LENGTH];
memset(accu, 0, sizeof(accu));
void *data_p = (void *)fir_data->data + fir_data->index + fir_data->data_stride - fir_filter->block_count * 32;
int second_loop_coutner = (fir_data->index - 32) / 32;
int first_loop_coutner = fir_filter->block_count - second_loop_coutner;
if (first_loop_coutner <= 0)
{
second_loop_coutner += first_loop_coutner;
first_loop_coutner = 0;
}
void *filter_p = fir_filter->coefs;
while (first_loop_coutner != 0)
{
for (int b = 0; b < TD_BLOCK_FIR_LENGTH; b++)
{
accu[TD_BLOCK_FIR_LENGTH - 1 - b] = vlmaccr32(accu[TD_BLOCK_FIR_LENGTH - 1 - b], data_p, filter_p);
data_p -= 4;
}
data_p += 64;
filter_p += 32;
first_loop_coutner--;
}
data_p -= fir_data->data_stride;
while (second_loop_coutner != 0)
{
for (int b = 0; b < TD_BLOCK_FIR_LENGTH; b++)
{
accu[TD_BLOCK_FIR_LENGTH - 1 - b] = vlmaccr32(accu[TD_BLOCK_FIR_LENGTH - 1 - b], data_p, filter_p);
data_p -= 4;
}
data_p += 64;
filter_p += 32;
second_loop_coutner--;
}
uint32_t accu_shr = fir_filter->accu_shr;
uint32_t accu_shl = fir_filter->accu_shl;
for (int i = 0; i < TD_BLOCK_FIR_LENGTH; i++)
{
int64_t t = (accu[i] + (1 << (accu_shr - 1))) >> accu_shr;
int64_t res = t << accu_shl;
if (res > INT32_MAX)
res = INT32_MAX;
if (res < INT32_MIN)
res = INT32_MIN;
output_block[i] = res;
}
}

View File

@@ -0,0 +1,60 @@
// Copyright 2024-2025 XMOS LIMITED.
// This Software is subject to the terms of the XMOS Public Licence: Version 1.
#pragma once
#include "xmath/filter.h"
#include "dsp/td_block_fir.h"
/**
* @brief Time domain filter struct for reference.
*/
typedef struct td_reference_fir_filter_t{
/** Pointer to the actual coefficients. */
int32_t * coefs;
/** The count of coefficients. */
uint32_t length;
/** The output exponent(for printing). */
uint32_t exponent;
/** The amount to shr the accumulator after all accumulation is complete. */
uint32_t accu_shr;
/** The amount to shr the product of data and coef before accumulating. */
uint32_t prod_shr;
} td_reference_fir_filter_t;
/**
* @brief This implements a FIR at the highest possile precision in a human readable way. Its use
* is for debug and regression.
*
* @param new_sample A single sample to add to the time series data.
* @param filter Pointer to the td_reference_fir_filter_t struct.
* @param data Pointer to the actual time series data.
* @return int32_t The output of the filtered data.
*/
int32_t td_reference_fir(
int32_t new_sample,
td_reference_fir_filter_t * filter,
int32_t * data);
/**
* @brief Function to add samples to the FIR data structure. This is for debug and test only.
*
* @param input_block Array of int32_t samples of length TD_BLOCK_FIR_LENGTH.
* @param fir_data Pointer to struct of type td_block_fir_data_t to which the samples will be added.
*/
void td_block_fir_add_data_ref(
int32_t input_block[TD_BLOCK_FIR_LENGTH],
td_block_fir_data_t * fir_data);
/**
* @brief Function to compute the convolution between fir_data and fir_filter. This is for debug and test only.
*
* @param samples_out Array of length TD_BLOCK_FIR_LENGTH(8), which will be used to return the
processed samples.
* @param fir_data Pointer to struct of type td_block_fir_data_t from which the data samples will be obtained.
* @param fir_filter Pointer to struct of type td_block_fir_filter_t from which the coefficients will be obtained.
*/
void td_block_fir_compute_ref(
int32_t samples_out[TD_BLOCK_FIR_LENGTH],
td_block_fir_data_t * fir_data,
td_block_fir_filter_t * fir_filter
);