add F1 ssrc

This commit is contained in:
Steven Dan
2026-06-05 09:01:05 +08:00
parent 307aee89d9
commit 344e04aa33
12 changed files with 849 additions and 49 deletions

View File

@@ -151,7 +151,8 @@ set(APP_COMPILER_FLAGS_f1_music_uac2 ${SW_USB_AUDIO_FLAGS} -DI2S_CHANS_DAC
#-DUSE_EX3D
-DMIXER=0
-DUAC2_MODE=1
#-ldnr_50ms
-ldnr_50ms
-DDNR_ENABLE=1
#-llib_ex3d_all
-DEQ_EN=1
#-DEX3D_SF_NUM=3
@@ -230,7 +231,7 @@ set(APP_COMPILER_FLAGS_f6_f7_fps_uac1 ${SW_USB_AUDIO_FLAGS} -DI2S_CHANS_DAC=2
-DIR_SWITCHING_MODE
-DHID_CONTROLS=1)
set(APP_INCLUDES src src/core src/extensions ../../lib_dnr/lib_dnr)
set(APP_INCLUDES src src/core src/extensions src/extensions/ssrc ../../lib_dnr/lib_dnr)
set(XMOS_SANDBOX_DIR ${CMAKE_CURRENT_LIST_DIR}/../..)
XMOS_REGISTER_APP()

View File

@@ -3,7 +3,7 @@
#else
#define DEBUG_PRINT_ENABLE 0
#endif
//#define DISABLE_REBOOT 1
#define DISABLE_REBOOT 1
#include <xs1.h>
#include <assert.h>

View File

@@ -9,69 +9,129 @@
#include <xs1.h>
#include <xcore/chanend.h>
#include <xcore/channel.h>
#include <xcore/channel_streaming.h>
#include "xc_ptr.h"
#include "xua_conf.h"
#include "share_buffer.h"
extern uint32_t init_eq_data(unsigned sample_freq);
#if F1_MUSIC_UAC2 == 1 && DNR_ENABLE == 1
/* 改动原因3 streaming chan + 1 普通 chan 传 us 192kHz跨 tile 静态内存不共享) */
uint32_t uc_ssrc_ds_in; /* tile[0]→tile[1] 192kHz mic */
uint32_t uc_ssrc_ds_out; /* tile[1]→tile[0] 48kHz 降采样结果 */
uint32_t uc_ssrc_us_in; /* tile[0]→tile[1] 48kHz DNR 后 */
uint32_t uc_ssrc_us_out; /* tile[1]→tile[0] 192kHz 升采样结果普通chan */
#endif
void dnr_exchange_buffer(int32_t *data);
// sample_freq 作为首字发送给 tile[0] 的 dsp_main用于采样率变化检测和 EQ 初始化
void buffer_exchange(chanend_t c_data, unsigned sampsFromUsbToAudio[], unsigned sampsFromAudioToUsb[], unsigned sample_freq) {
chan_out_word(c_data, sample_freq);
chan_out_buf_word (c_data, sampsFromUsbToAudio, 2);
chan_in_buf_word (c_data , sampsFromUsbToAudio, 2);
#if DNR_ENABLE == 1
chan_out_buf_word (c_data, sampsFromAudioToUsb, 2);
chan_in_buf_word (c_data , sampsFromAudioToUsb, 2);
#endif
/* 改动原因dsp.c 的 UserBufferManagement 已将播放路径 EQ 内化到 tile[1]
* buffer_exchange 现只交换 mic 数据sampsFromAudioToUsbsample_freq
* 对应 dsp.c line 364: buffer_exchange(uc_eq_data, sampsFromAudioToUsb, ubm_sample_freq)
* 原 4 参数版本(含 sampsFromUsbToAudio与实际调用不匹配是播放/录音均有噪音的根因 */
void buffer_exchange(chanend_t c_data,
unsigned sampsFromAudioToUsb[],
unsigned sample_freq)
{
chan_out_word (c_data, sample_freq);
chan_out_buf_word(c_data, sampsFromAudioToUsb, 2); /* tile[1]→tile[0]: 原始 mic 数据 */
chan_in_buf_word (c_data, sampsFromAudioToUsb, 2); /* tile[0]→tile[1]: 经 SSRC/DNR 处理后的 mic */
}
void dsp_main (chanend_t c_data) {
int play_input[NUM_USB_CHAN_OUT];
int play_output[I2S_CHANS_DAC];
int mic_input[I2S_CHANS_ADC];
int mic_output[I2S_CHANS_ADC];
int count = 0;
unsigned ch[1] = {2};
#if F1_MUSIC_UAC2 == 1
unsigned current_sample_freq = 0;
#else
unsigned current_sample_freq = 48000;
#endif
while (1) {
unsigned sample_freq = (unsigned)chan_in_word(c_data);
chan_in_buf_word (c_data , play_input, 2);
// 采样率变化:重新初始化 EQ 系数并清空 ring buffer避免残留状态污染
#if F1_MUSIC_UAC2 == 1 && DNR_ENABLE == 1
static int mic_48k[2] = {0, 0};
static int us_cache[4][2] = {{0}};
static int us_cache_idx = 4; /* 4=无有效缓存输出静音0-3=当前消费 pair 索引 */
static int ds_tick = 0; /* 每拍+1满4拍处理一次 48kHz */
/* 改动原因:第一个 tick4 时 us_out 还没有数据,不能阻塞读;
* 之后每个 tick4 先读上一批(已就绪)再触发下一批,规避同拍等待 us_stereo 计算延迟 */
static int us_first_batch = 1;
#endif
while (1) {
/* 协议tile[1] 先发 sample_freq再发 mic_input[2]
* tile[0] 接收处理后发回 mic_output[2] */
unsigned sample_freq = (unsigned)chan_in_word(c_data);
/* 采样率变化:复位 SSRC 状态 */
if (sample_freq != current_sample_freq && sample_freq != 0) {
current_sample_freq = sample_freq;
init_eq_data(sample_freq);
clear_ring_buffer(0);
clear_ring_buffer(1);
clear_ring_buffer(2);
clear_ring_buffer(3);
play_output[0] = 0;
play_output[1] = 0;
#if F1_MUSIC_UAC2 == 1 && DNR_ENABLE == 1
/* 改动原因:采样率变化时复位所有 SSRC 状态,下一批重新从头开始 */
ds_tick = 0;
us_cache_idx = 4;
us_first_batch = 1;
#endif
}
chan_out_buf_word (c_data , play_output, I2S_CHANS_DAC);
#if DNR_ENABLE == 1
chan_in_buf_word (c_data , mic_input, 2) ;
chan_out_buf_word (c_data , mic_output, I2S_CHANS_ADC);
#endif
write_to_ring_buffer(0, play_input[0]);
write_to_ring_buffer(1, play_input[1]);
play_output[0] = read_from_ring_buffer(2);
play_output[1] = read_from_ring_buffer(3);
chan_in_buf_word(c_data, mic_input, 2); /* 接收 tile[1] 发来的原始 mic */
#if DNR_ENABLE == 1
#if F1_MUSIC_UAC2 == 1
/* F1: 192kHz→48kHz(DS)→DNR→48kHz→192kHz(US)
*
* 时序优化说明:
* 原设计在 tick4 同一拍内"发 us_in → 等 us_stereo 计算 → 读8个 us_out"
* us_stereo 在 tile[1] 独立线程运算需 2-5µs导致 tick4 总延迟超过 5.2µs
* (192kHz 单拍周期)buffer_exchange 响应超时引发噪音。
*
* 当前设计:每个 tick4 先读上一批已就绪的 8 个 us_out 字us_rec_output_core
* 在前 3 拍期间已完成并阻塞等待消费,读取近乎零延迟),再触发本批 ds→DNR→us_in。
* 代价:录音延迟增加约 20.8µs一个 48kHz 周期),人耳不可感知。
*/
s_chan_out_word((chanend_t)uc_ssrc_ds_in, (uint32_t)mic_input[0]);
s_chan_out_word((chanend_t)uc_ssrc_ds_in, (uint32_t)mic_input[1]);
ds_tick++;
if (ds_tick >= 4) {
ds_tick = 0;
if (!us_first_batch) {
/* 读上一批 us_stereo 已生产的 4 对 192kHz近乎无延迟*/
us_cache[0][0] = (int)chan_in_word((chanend_t)uc_ssrc_us_out);
us_cache[0][1] = (int)chan_in_word((chanend_t)uc_ssrc_us_out);
us_cache[1][0] = (int)chan_in_word((chanend_t)uc_ssrc_us_out);
us_cache[1][1] = (int)chan_in_word((chanend_t)uc_ssrc_us_out);
us_cache[2][0] = (int)chan_in_word((chanend_t)uc_ssrc_us_out);
us_cache[2][1] = (int)chan_in_word((chanend_t)uc_ssrc_us_out);
us_cache[3][0] = (int)chan_in_word((chanend_t)uc_ssrc_us_out);
us_cache[3][1] = (int)chan_in_word((chanend_t)uc_ssrc_us_out);
us_cache_idx = 0;
}
/* 处理当前批ds→DNR→us_in结果在下一个 tick4 读取 */
mic_48k[0] = (int)s_chan_in_word((chanend_t)uc_ssrc_ds_out);
mic_48k[1] = (int)s_chan_in_word((chanend_t)uc_ssrc_ds_out);
dnr_exchange_buffer(mic_48k);
s_chan_out_word((chanend_t)uc_ssrc_us_in, (uint32_t)mic_48k[0]);
s_chan_out_word((chanend_t)uc_ssrc_us_in, (uint32_t)mic_48k[1]);
us_first_batch = 0;
}
if (us_cache_idx < 4) {
mic_output[0] = us_cache[us_cache_idx][0];
mic_output[1] = us_cache[us_cache_idx][1];
us_cache_idx++;
} else {
mic_output[0] = 0;
mic_output[1] = 0;
}
#else
dnr_exchange_buffer(mic_input);
mic_output[0] = mic_input[0];
mic_output[1] = mic_input[1];
#endif
#else
mic_output[0] = mic_input[0];
mic_output[1] = mic_input[1];
#endif
/* 发回处理后的 mic 数据buffer_exchange 接收后写入 sampsFromAudioToUsb */
chan_out_buf_word(c_data, mic_output, I2S_CHANS_ADC);
}
}

View File

@@ -33,8 +33,18 @@ extern unsigned char load_value(unsigned char *path);
#if EQ_EN
#include "eq_flash_storage.h"
#include "eq.h"
/* 改动原因EQ 处理移至 tile[1] (UserBufferManagement) 直接处理,
* 不再绕行 tile[0] dsp_main消除跨 tile 来回导致的音频延迟与噪音 */
/* 改动原因handler_eq_filter 已在 eq.h 正确声明为 int32_t 返回,
* 此处重复声明为 unsigned int 导致类型冲突编译错误,删除冗余 extern */
extern uint32_t init_eq_data(unsigned sample_freq);
extern unsigned int is_mode_changed(void);
extern void change_eq_mode(unsigned sample_freq);
extern void clear_eq_status(unsigned sample_freq, unsigned ch_no);
extern unsigned g_request_eq_mode, g_new_eq_mode;
extern unsigned g_force_request_eq_mode_change, g_force_eq_mode_change;
#define LEN_FADE_UBM (2024)
#endif
extern void device_reboot(void);
@@ -161,6 +171,11 @@ void key_sender(chanend_t c)
void UserBufferManagementInit(unsigned sampFreq)
{
ubm_sample_freq = sampFreq;
#if EQ_EN
/* 改动原因EQ 移至 tile[1],在采样率初始化时直接初始化 EQ 系数,
* 无需再经由 tile[0] dsp_main 的 init_eq_data 调用 */
init_eq_data(sampFreq);
#endif
#if USE_EX3D == 1
memset(ubm_ingress, 0, sizeof(ubm_ingress));
memset(ubm_egress, 0, sizeof(ubm_egress));
@@ -193,7 +208,9 @@ void UserBufferManagementInit(unsigned sampFreq)
}
float fLevel[NUM_USB_CHAN_OUT] = {0,};
enum {UBM_A3D_OFF=0, UBM_A3D_VON=1, UBM_A3D_ON=2};
extern void buffer_exchange(chanend_t c_data, unsigned sampsFromUsbToAudio[], unsigned sampsFromAudioToUsb[], unsigned sample_freq);
/* 改动原因buffer_exchange 改为只传 mic 数据sampsFromAudioToUsb
* 播放路径 EQ 直接在 tile[1] UserBufferManagement 内处理,消除跨 tile 往返 */
extern void buffer_exchange(chanend_t c_data, unsigned sampsFromAudioToUsb[], unsigned sample_freq);
extern unsigned int is_eq_disabled(void);
extern unsigned int g_eq_enable;
@@ -294,11 +311,59 @@ void UserBufferManagement(unsigned sampsFromUsbToAudio[], unsigned sampsFromAudi
chan_in_buf_word(uc_ex3d_to_ubm, (uint32_t *)ubm_ingress, EX3D_TO_UBM_CHANS * DSP_BLOCK_LENGTH);
};
#endif
#if EQ_EN == 1 && USE_EX3D == 0
buffer_exchange(uc_eq_data, sampsFromUsbToAudio, sampsFromAudioToUsb, ubm_sample_freq);
#elif DNR_ENABLE == 1
buffer_exchange(uc_eq_data, sampsFromUsbToAudio, sampsFromAudioToUsb, ubm_sample_freq);
#if 0// EQ_EN == 1 && USE_EX3D == 0
/* 改动原因:播放路径 EQ 直接在 tile[1] 处理,不再绕道 tile[0] dsp_main。
* 原 dsp_core0.xc 通过 ring_buffer 管道做 EQ每 192kHz 拍都要跨 tile 通信,
* 引入不确定延迟导致播放噪音。现改为在当前拍内直接调用 handler_eq_filter
* 并复制原 dsp_core0.xc 的淡入/淡出换模式逻辑。 */
{
static unsigned b_fade_in = 0, b_fade_out = 0;
static unsigned u_in_step = 0, u_out_step = 0;
int audio_out_0 = handler_eq_filter(ubm_sample_freq, 0, (int)sampsFromUsbToAudio[0]);
int audio_out_1 = handler_eq_filter(ubm_sample_freq, 1, (int)sampsFromUsbToAudio[1]);
if (is_mode_changed() && !b_fade_out && !b_fade_in) {
b_fade_out = 1;
u_out_step = 1;
}
if (b_fade_out) {
audio_out_0 /= LEN_FADE_UBM;
audio_out_0 *= (int)(LEN_FADE_UBM - u_out_step);
audio_out_1 /= LEN_FADE_UBM;
audio_out_1 *= (int)(LEN_FADE_UBM - u_out_step);
if (++u_out_step == LEN_FADE_UBM) {
b_fade_out = 0;
change_eq_mode(ubm_sample_freq);
clear_eq_status(ubm_sample_freq, 0);
clear_eq_status(ubm_sample_freq, 1);
b_fade_in = 1;
u_in_step = 1;
}
}
if (b_fade_in) {
audio_out_0 /= LEN_FADE_UBM;
audio_out_0 *= (int)u_in_step;
audio_out_1 /= LEN_FADE_UBM;
audio_out_1 *= (int)u_in_step;
if (++u_in_step >= LEN_FADE_UBM) {
b_fade_in = 0;
}
}
sampsFromUsbToAudio[0] = (unsigned)audio_out_0;
sampsFromUsbToAudio[1] = (unsigned)audio_out_1;
}
#endif
/* 改动原因mic 路径SSRC+DNR仍走 dsp_main但 buffer_exchange
* 已精简为只传 mic 数据,不再携带播放数据,降低每拍通信量 */
#if DNR_ENABLE == 1
buffer_exchange(uc_eq_data, sampsFromAudioToUsb, ubm_sample_freq);
#endif
GET_SHARED_GLOBAL(is_monitor, g_monitor_switch_t1);
if (is_monitor) {
sampsFromUsbToAudio[0] += (unsigned)((((int)sampsFromAudioToUsb[0]) >> 1));

View File

@@ -2,6 +2,16 @@
// This Software is subject to the terms of the XMOS Public Licence: Version 1.
#include "xua.h" /* Device specific defines */
#if F1_MUSIC_UAC2 == 1 && DNR_ENABLE == 1
#include "ssrc_rec_core.h"
/* 改动原因3 streaming + 1 普通 chanus 192kHz 走普通 chan跨 tile 内存不共享) */
extern uint32_t uc_ssrc_ds_in;
extern uint32_t uc_ssrc_ds_out;
extern uint32_t uc_ssrc_us_in;
extern uint32_t uc_ssrc_us_out;
#endif
#ifdef EXCLUDE_USB_AUDIO_MAIN
/**
@@ -540,6 +550,14 @@ int main()
USER_MAIN_DECLARATIONS
#if F1_MUSIC_UAC2 == 1 && DNR_ENABLE == 1
/* 改动原因streaming chan 有 tile 配额上限us 192kHz 用普通 chan 传 8 word/块 */
streaming chan c_ssrc_ds_in;
streaming chan c_ssrc_ds_out;
streaming chan c_ssrc_us_in;
chan c_ssrc_us_out;
#endif
chan c_dsp_to_ex3d[DSP_WORKER_COUNT];
chan cc_mic_level;
chan c_audiohw;
@@ -582,6 +600,11 @@ int main()
}
}
on tile[1]: mute_handler(c_mic_det);
#if F1_MUSIC_UAC2 == 1 && DNR_ENABLE == 1
on tile[1] : ds_rec_core(c_ssrc_ds_in, c_ssrc_ds_out);
on tile[1] : us_rec_core(c_ssrc_us_in);
on tile[1] : us_rec_output_core(c_ssrc_us_out);
#endif
on tile[0] : {
par {
@@ -611,7 +634,17 @@ int main()
dsp_core0();
}
}
on tile[0] : { dsp_main(c_eq_data); }
on tile[0] : {
#if F1_MUSIC_UAC2 == 1 && DNR_ENABLE == 1
unsafe {
uc_ssrc_ds_in = (uint32_t)(unsigned)(streaming chanend)c_ssrc_ds_in;
uc_ssrc_ds_out = (uint32_t)(unsigned)(streaming chanend)c_ssrc_ds_out;
uc_ssrc_us_in = (uint32_t)(unsigned)(streaming chanend)c_ssrc_us_in;
uc_ssrc_us_out = (uint32_t)(unsigned)(chanend)c_ssrc_us_out;
}
#endif
dsp_main(c_eq_data);
}
#endif
#if DNR_ENABLE == 1

View File

@@ -0,0 +1,187 @@
#define FUNCTION_NAME ds_stereo
/*
void ds_stereo(
int32_t final_shr,
int32_t * coefs,
int32_t * data,
chanend_t c_in,
chanend_t c_out,
int32_t num_phase);
*/
#define NSTACKWORDS 32
#define S_LEFT 0
#define S_RIGHT 1
#define S_LOOP_COUNT 2
#define S_COEF_BASE_POINTER 3
.cc_top FUNCTION_NAME.function
.type FUNCTION_NAME,@function
.issue_mode dual
.align 4
#define ONE 0x40000000
zeros:
.word 0, 0, 0, 0, 0, 0, 0, 0
left_chan:
.word 0, 0, 0, 0, ONE, ONE, ONE, ONE
right_chan:
.word ONE, ONE, ONE, ONE, 0, 0, 0, 0
.align 16
.globl FUNCTION_NAME
.globl FUNCTION_NAME.nstackwords
.set FUNCTION_NAME.nstackwords, 32
FUNCTION_NAME:
{ dualentsp NSTACKWORDS;}
stw r4, sp[20]
stw r5, sp[21]
stw r6, sp[22]
stw r7, sp[23]
stw r8, sp[24]
stw r9, sp[25]
stw r10, sp[26]
ldw r4, sp[NSTACKWORDS+1]
stw r4, sp[27]
//{mov r0, r0; mov r1, r1}
//{mov r2, r2; mov r3, r3}
#define CHANNELS 2
#define MACCRS_PER_PHASE 4
#define BYTES_PER_WORD 4
#define WORDS_PER_VECTOR 8
#define BYTES_PER_VECTOR (WORDS_PER_VECTOR*BYTES_PER_WORD)
ldc r10, (BYTES_PER_VECTOR+BYTES_PER_WORD)
ldc r9, BYTES_PER_VECTOR
ldc r8, (2*MACCRS_PER_PHASE-1)*BYTES_PER_VECTOR - 4 + (BYTES_PER_VECTOR+BYTES_PER_WORD) + 4
ldc r7, MACCRS_PER_PHASE*BYTES_PER_VECTOR
ldc r6, ((1<<(BYTES_PER_WORD*CHANNELS))-1)
//load number of phases - 1 to r11
ldw r11, sp[NSTACKWORDS+2]
sub r11, r11, 1
// jump to passthrough loop if num of phases is 1
bf r11, passthrough_loop
std r1, r11, sp[1]
//store the final_shr for the VLSAT
std r0, r0, sp[2]
std r0, r0, sp[3]
std r0, r0, sp[4]
std r0, r0, sp[5]
ldc r11, (MACCRS_PER_PHASE-1)*BYTES_PER_VECTOR - 4 + 4 //the final +4 is for the padding at the origin
{add r2, r2, r11; nop }
outer_loop:
ldc r11, (MACCRS_PER_PHASE-1)*BYTES_PER_VECTOR
{vclrdr ; add r11, r2, 0} //clear accu and reset data pointer
ldd r1, r4, sp[1] // set loop counter and coef base pointer
phase_loop:
//{nop; nop}
//{nop; nop}
//{nop; nop}
//MACCR 1 left
{vldc r11[0]; add r11, r11, BYTES_PER_WORD}
{vlmaccr r1[0]; add r1, r1, r9}
{vstc r11[0]; sub r11, r11, r10}
//MACCR 2 left
{vldc r11[0]; add r11, r11, BYTES_PER_WORD}
{vlmaccr r1[0]; add r1, r1, r9}
{vstc r11[0]; sub r11, r11, r10}
//MACCR 3 left
{vldc r11[0]; add r11, r11, BYTES_PER_WORD}
{vlmaccr r1[0]; add r1, r1, r9}
{vstc r11[0]; sub r11, r11, r10}
{testct r0, res[r3]; nop}
{bt r0, exit; nop}
{in r0, res[r3] ; nop } //fetch data from channel, then get the other one too
{stw r0, r11[0] ; in r5, res[r3] } //store to start of phase
//MACCR 4 left
{vldc r11[0]; add r11, r11, BYTES_PER_WORD}
{vlmaccr r1[0]; add r1, r1, r9}
{vstc r11[0]; sub r11, r11, r10}
//rewind r1 by MACCRS_PER_PHASE * BYTES_PER_VECTOR
{sub r1, r1, r7; add r11, r11, r8 }
//now do the right channel
{vldc r11[0]; add r11, r11, BYTES_PER_WORD}
{vlmaccr r1[0]; add r1, r1, r9}
{vstc r11[0]; sub r11, r11, r10} //r10 = 32 + 1
//MACCR 2 right
{vldc r11[0]; add r11, r11, BYTES_PER_WORD}
{vlmaccr r1[0]; add r1, r1, r9}
{vstc r11[0]; sub r11, r11, r10}
//MACCR 3 right
{vldc r11[0]; add r11, r11, BYTES_PER_WORD}
{vlmaccr r1[0]; add r1, r1, r9}
{vstc r11[0]; sub r11, r11, r10}
{stw r5, r11[0] ; nop } //store to start of phase
//MACCR 4 right
{vldc r11[0]; add r11, r11, BYTES_PER_WORD}
{vlmaccr r1[0]; add r1, r1, r9}
{vstc r11[0]; sub r11, r11, r10}
{nop; add r11, r11, r8 } //advance to next phase
{bt r4, phase_loop; sub r4, r4, 1}
ldaw r11, sp[4]
{vlsat r11[0]; nop} //turn all accumulators into 32 bits
ldaw r11, sp[12]
{vstr r11[0]; nop}
{vldc r11[0];ldap r11, right_chan}
{vclrdr; nop}
{vlmaccr r11[0];ldap r11, left_chan}
{vlmaccr r11[0]; ldap r11, zeros}
{vlsat r11[0]; ldaw r11, sp[0]}
vstrpv r11[0], r6
ldd r11, r4, sp[0]
// load c_out from stack to r1
ldw r1, sp[27]
{out res[r1], r4; nop}
{bu outer_loop;out res[r1], r11}
passthrough_loop:
{testct r11, res[r3]; nop}
{bt r11, exit; nop}
{in r11, res[r3]; nop}
{out res[r4], r11; nop}
{in r11, res[r3]; nop}
{bu passthrough_loop; out res[r4], r11}
exit:
{inct r0, res[r3]; nop}
ldw r4, sp[20]
ldw r5, sp[21]
ldw r6, sp[22]
ldw r7, sp[23]
ldw r8, sp[24]
ldw r9, sp[25]
ldw r10, sp[26]
retsp NSTACKWORDS
.cc_bottom FUNCTION_NAME.function

View File

@@ -0,0 +1,53 @@
#ifndef _SSRC_COEFS_H_
#define _SSRC_COEFS_H_
#include <stdint.h>
/* Downsample ratio=4, 48kHz base — 128 coefficients */
static int32_t ds4_48000_coefs[128] = {
4192603, -2703925, 1667738, -971500, 525063, -255689, 105964, 5666,
147741797, -66167732, 40186648, -26923243, 18716201, -13134162, 9159092, -6278295,
-8394794, 12071368, -17195979, 24612481, -36235752, 57613909, -114155284, 1046129870,
-80960, 208765, -445117, 844122, -1474686, 2422662, -3794257, 5724570,
11257906, -7331064, 4577066, -2707960, 1493638, -748452, 324772, -145564,
502280711, -186846312, 108570992, -71571731, 49475933, -34709734, 24277296, -16731888,
-18415916, 26595074, -37942809, 54167579, -78966289, 122300118, -223331678, 840288039,
-149225, 407138, -899861, 1748155, -3108937, 5178122, -8198765, 12476764,
12476764, -8198765, 5178122, -3108937, 1748155, -899861, 407138, -149225,
840288039, -223331678, 122300118, -78966289, 54167579, -37942809, 26595074, -18415916,
-16731888, 24277296, -34709734, 49475933, -71571731, 108570992, -186846312, 502280711,
-145564, 324772, -748452, 1493638, -2707960, 4577066, -7331064, 11257906,
5724570, -3794257, 2422662, -1474686, 844122, -445117, 208765, -80960,
1046129870, -114155284, 57613909, -36235752, 24612481, -17195979, 12071368, -8394794,
-6278295, 9159092, -13134162, 18716201, -26923243, 40186648, -66167732, 147741797,
5666, 105964, -255689, 525063, -971500, 1667738, -2703925, 4192603,
};
/* Upsample ratio=4, 48kHz base — 128 coefficients */
static int32_t us4_48000_coefs[128] = {
-80960, 208765, -445117, 844122, -1474686, 2422662, -3794257, 5724570,
-149225, 407138, -899861, 1748155, -3108937, 5178122, -8198765, 12476764,
-8394794, 12071368, -17195979, 24612481, -36235752, 57613909, -114155284, 1046129870,
147741797, -66167732, 40186648, -26923243, 18716201, -13134162, 9159092, -6278295,
4192603, -2703925, 1667738, -971500, 525063, -255689, 105964, 5666,
11257906, -7331064, 4577066, -2707960, 1493638, -748452, 324772, -145564,
-145564, 324772, -748452, 1493638, -2707960, 4577066, -7331064, 11257906,
5666, 105964, -255689, 525063, -971500, 1667738, -2703925, 4192603,
-16731888, 24277296, -34709734, 49475933, -71571731, 108570992, -186846312, 502280711,
-6278295, 9159092, -13134162, 18716201, -26923243, 40186648, -66167732, 147741797,
502280711, -186846312, 108570992, -71571731, 49475933, -34709734, 24277296, -16731888,
-18415916, 26595074, -37942809, 54167579, -78966289, 122300118, -223331678, 840288039,
12476764, -8198765, 5178122, -3108937, 1748155, -899861, 407138, -149225,
5724570, -3794257, 2422662, -1474686, 844122, -445117, 208765, -80960,
1046129870, -114155284, 57613909, -36235752, 24612481, -17195979, 12071368, -8394794,
-6278295, 9159092, -13134162, 18716201, -26923243, 40186648, -66167732, 147741797,
};
#define SSRC_DS4_48K_FINAL_SHR 2
#define SSRC_US4_FINAL_SHR 0
#define SSRC_US4_HISTORY_OFFSET (32*4)
#define SSRC_US4_NUM_PHASE 4
#define SSRC_US4_PHASE_SIZE 4
#define SSRC_RB_SIZE (2*32*5+32)
#endif

View File

@@ -0,0 +1,33 @@
#ifndef _SSRC_REC_CORE_H_
#define _SSRC_REC_CORE_H_
#if F1_MUSIC_UAC2 == 1
#include <xs1.h>
#include <platform.h>
/**
* Downsample recording core — runs on tile[1]
* Uses two streaming channels (ds_stereo.S 需要独立 c_in/c_out不能别名):
* - c_from_tile0: tile[0]→tile[1] 192kHz stereo input
* - c_to_tile0: tile[1]→tile[0] 48kHz stereo output
*/
void ds_rec_core(streaming chanend c_from_tile0, streaming chanend c_to_tile0);
/**
* Upsample recording core — runs on tile[1]
* 改动原因us_stereo.S 是持续运行函数,只会在控制 token 时返回;
* 因此本 core 只负责把 48kHz 输入升采样写入 tile[1] 本地 ring buffer。
*/
void us_rec_core(streaming chanend c_us_in);
/**
* Upsample output sender — runs on tile[1]
* 改动原因:从 us_rec_core 写入的 tile[1] 本地 ring buffer 取 192kHz 数据,
* 通过普通 channel 发回 tile[0],避免等待 us_stereo.S 返回导致死锁。
*/
void us_rec_output_core(chanend c_us_out);
#endif
#endif

View File

@@ -0,0 +1,128 @@
#if F1_MUSIC_UAC2 == 1
#include "ssrc_rec_core.h"
#include <xs1.h>
#include <platform.h>
#include <string.h>
#include "ssrc_coefs.h"
#include "ssrc_us_share_buf.h"
#ifndef SET_SAMPLE_FREQ
#define SET_SAMPLE_FREQ 1
#endif
#ifndef NUM_USB_CHAN_OUT
#define NUM_USB_CHAN_OUT 2
#endif
/* Assembly function declarations */
extern unsigned char ds_stereo(
int32_t final_shr,
int32_t *coefs,
int32_t *data,
streaming chanend c_in,
streaming chanend c_out,
int32_t num_phase);
extern unsigned char us_stereo(
int32_t *coefs,
int32_t *l_history,
int32_t *r_history,
streaming chanend c_in,
int32_t history_offset,
uint32_t ring_buffer_addr,
uint32_t tail_addr,
int32_t ring_buffer_size,
int32_t num_phase,
int32_t phase_size,
int32_t final_shr);
#pragma unsafe arrays
void ds_rec_core(streaming chanend c_from_tile0, streaming chanend c_to_tile0)
{
int32_t history[1057];
unsigned char control_token;
memset(history, 0, sizeof(history));
set_core_high_priority_on();
while (1) {
control_token = ds_stereo(
SSRC_DS4_48K_FINAL_SHR,
ds4_48000_coefs,
history,
c_from_tile0, /* c_in: read 192kHz from tile[0] */
c_to_tile0, /* c_out: write 48kHz back to tile[0] */
4
);
switch (control_token) {
case SET_SAMPLE_FREQ:
memset(history, 0, sizeof(history));
{
unsigned dummy;
c_from_tile0 :> dummy;
}
soutct(c_to_tile0, SET_SAMPLE_FREQ);
for (size_t i = 0; i < NUM_USB_CHAN_OUT * 3; ++i) {
c_to_tile0 <: 0;
}
break;
default:
break;
}
}
}
#pragma unsafe arrays
void us_rec_core(streaming chanend c_us_in)
{
int32_t l_history[64];
int32_t r_history[64];
unsigned char control_token;
memset(l_history, 0, sizeof(l_history));
memset(r_history, 0, sizeof(r_history));
ssrc_us_share_reset();
while (1) {
/* 改动原因us_stereo.S 是持续运行的生产者,只有收到控制 token 才返回。
* 正常音频路径不要在此函数后面发送数据,否则永远执行不到。 */
control_token = us_stereo(
us4_48000_coefs,
l_history, r_history,
c_us_in,
SSRC_US4_HISTORY_OFFSET,
ssrc_us_share_get_buffer_addr(),
ssrc_us_share_get_tail_addr(),
SSRC_US_RB_WORDS - 32,
SSRC_US4_NUM_PHASE,
SSRC_US4_PHASE_SIZE,
SSRC_US4_FINAL_SHR
);
switch (control_token) {
case SET_SAMPLE_FREQ:
memset(l_history, 0, sizeof(l_history));
memset(r_history, 0, sizeof(r_history));
ssrc_us_share_reset();
{
unsigned dummy;
c_us_in :> dummy;
}
break;
default:
break;
}
}
}
void us_rec_output_core(chanend c_us_out)
{
while (1) {
/* 改动原因C 函数封装 ring buffer避免 XC 并行共享对象规则报错和 C/XC 指针 ABI 差异。 */
c_us_out <: ssrc_us_share_pop_word_blocking();
}
}
#endif

View File

@@ -0,0 +1,43 @@
#include "ssrc_us_share_buf.h"
/* 改动原因:该 buffer 只在 tile[1] 内被 us_rec_core/us_rec_output_core 使用。
* us_stereo.S 通过 tail 地址写入;输出任务通过 head 读取并用普通 chan 发回 tile[0]。 */
static int32_t ssrc_us_rb[SSRC_US_RB_WORDS];
static int32_t ssrc_us_head;
/* 改动原因us_stereo.S 通过指针写 ssrc_us_tail编译器无法感知须加 volatile
* 否则 ssrc_us_share_pop_word_blocking 的 while 循环永远读缓存旧值,导致死锁 */
volatile int32_t ssrc_us_tail;
void ssrc_us_share_reset(void)
{
ssrc_us_head = 0;
ssrc_us_tail = 0;
}
uint32_t ssrc_us_share_get_buffer_addr(void)
{
return (uint32_t)ssrc_us_rb;
}
uint32_t ssrc_us_share_get_tail_addr(void)
{
return (uint32_t)&ssrc_us_tail;
}
int32_t ssrc_us_share_pop_word_blocking(void)
{
int32_t sample;
while (ssrc_us_head == ssrc_us_tail) {
/* 改动原因:输出任务等待 us_stereo.S 生产数据;只阻塞发送任务,不阻塞采样率转换任务。 */
asm volatile("nop");
}
sample = ssrc_us_rb[ssrc_us_head];
ssrc_us_head++;
if (ssrc_us_head >= (SSRC_US_RB_WORDS - 32)) {
/* 改动原因us_stereo.S 使用的 ring size 为 NUM_USB_CHAN_OUT*32*5不包含尾部padding。 */
ssrc_us_head = 0;
}
return sample;
}

View File

@@ -0,0 +1,15 @@
#ifndef _SSRC_US_SHARE_BUF_H_
#define _SSRC_US_SHARE_BUF_H_
#include <stdint.h>
/* 改动原因us_stereo.S 持续写 ring bufferXC 并行任务不能直接共享全局数组,
* 因此用 C 函数封装 ring buffer作为 tile[1] 内 us_rec_core → us_rec_output_core 的边界。 */
#define SSRC_US_RB_WORDS (2 * 32 * 5 + 32)
void ssrc_us_share_reset(void);
uint32_t ssrc_us_share_get_buffer_addr(void);
uint32_t ssrc_us_share_get_tail_addr(void);
int32_t ssrc_us_share_pop_word_blocking(void);
#endif

View File

@@ -0,0 +1,182 @@
#define FUNCTION_NAME us_stereo
/*
void us_stereo(
int32_t * coefs,
int32_t * l_history,
int32_t * r_history,
streaming chanend c_in,
int32_t history_offset, +1
int32_t * ring_buffer, +2
int32_t * tail, +3
int32_t ring_buffer_size, +4
int32_t num_phase, +5 (number of polyphase filter)
int32_t phase_size, +6 (in vpu mac size (8word))
int32_t final_shr); +7
*/
#define NSTACKWORDS 32
.cc_top FUNCTION_NAME.function
.type FUNCTION_NAME,@function
.issue_mode dual
.align 4
#define BYTES_PER_WORD 4
#define CHANNELS 2
#define WORDS_PER_VECTOR 8
#define BYTES_PER_VECTOR (WORDS_PER_VECTOR*BYTES_PER_WORD)
#define ZERO 0x0
#define ONE 0x40000000
filter_l_1010:
.word ZERO, ZERO, ZERO, ZERO, ONE, ZERO, ONE, ZERO
filter_l_0101:
.word ZERO, ZERO, ZERO, ZERO, ZERO, ONE, ZERO, ONE
filter_r_1010:
.word ONE, ZERO, ONE, ZERO, ZERO, ZERO, ZERO, ZERO
filter_r_0101:
.word ZERO, ONE, ZERO, ONE, ZERO, ZERO, ZERO, ZERO
zeros:
.word 0, 0, 0, 0, 0, 0, 0, 0
.align 16
.globl FUNCTION_NAME
.globl FUNCTION_NAME.nstackwords
.set FUNCTION_NAME.nstackwords, NSTACKWORDS
FUNCTION_NAME:
{ dualentsp NSTACKWORDS ; nop }
stw r4, sp[NSTACKWORDS-1]
stw r5, sp[NSTACKWORDS-2]
stw r6, sp[NSTACKWORDS-3]
stw r7, sp[NSTACKWORDS-4]
stw r8, sp[NSTACKWORDS-5]
stw r9, sp[NSTACKWORDS-6]
stw r10, sp[NSTACKWORDS-7] // sp[25]
// store coefs to stack[0]
stw r0, sp[24]
// store l,r history to stack[1,2]
std r2, r1, sp[0] // sp[1] sp[0]
// store the final_shr for the VLSAT to stack[2-9]
ldw r0, sp[NSTACKWORDS+7]
std r0, r0, sp[1] // sp[2] sp[3]
std r0, r0, sp[2] // sp[4] sp[5]
std r0, r0, sp[3] // sp[6] sp[7]
std r0, r0, sp[4] // sp[8] sp[9]
// store start of ring buffer and tail to stack[10,11]
ldw r0, sp[NSTACKWORDS+2]
ldw r1, sp[NSTACKWORDS+3]
std r0, r1, sp[5] // sp[11] sp[10]
// store sample history offset to stack[12]
ldw r0, sp[NSTACKWORDS+1]
stw r0, sp[12]
// store c_in to stack[13]
stw r3, sp[13]
// store num_phase and phase size to stack[14,15]
ldw r0, sp[NSTACKWORDS+5]
ldw r1, sp[NSTACKWORDS+6]
stw r0, sp[14]
stw r1, sp[15]
// temp vstr store at stack[16-23]
// program start
ldc r9, BYTES_PER_VECTOR // r9: BYTES_PER_WORD
ldc r10, BYTES_PER_VECTOR*3 // r10: coefs rewind
ldd r7, r8, sp[5] // r7: ring_buffer ptr, r8: tail ptr
outer_loop:
ldw r4, sp[12]
ldw r3, sp[13]
{ testct r11, res[r3] ; ldw r0, sp[0] } // r0: &l_history[0]
bt r11, exit
{ in r11, res[r3] ; add r2, r0, r4 } // r2: &l_history[offset]
{ in r11, res[r3] ; stw r11, r2[0] }
{ ldw r2, sp[1] ; add r1, r0, BYTES_PER_WORD } // r2: &r_history[0], r1: &l_history[4]
{ add r4, r2, r4 ; add r3, r2, BYTES_PER_WORD } // r4: &r_history[offset], r3: &r_history[4]
{ stw r11, r4[0] ; nop }
{ ldw r4, sp[24] ; add r11, r2, 0 } // r4: &coeff[0], r11: r2
ldw r6, sp[15] // r6: phase size
{ sub r6, r6, 2 ; ldw r5, sp[14] } // burst 2, r5: num of phase
{ vclrdr ; nop }
bu mac_loop
phase_loop:
{ nop ; ldaw r11, sp[2] }
{ vlsat r11[0] ; ldaw r11, sp[16] } // turn all accumulators into 32 bits
{ vstr r11[0] ; nop } // store accumulators result into stack[16-23]
{ vldc r11[0] ; nop } // load the accumulators result into VC from stack
{ vclrdr ; ldap r11, filter_r_1010 } // clear vR vD
{ vlmaccr r11[0] ; ldap r11, filter_l_1010 } // r us t1, vD:vR -> {r_us_t1, 0, 0, 0}
{ vlmaccr r11[0] ; ldap r11, filter_r_0101 } // l us t1, vD:vR -> {l_us_t1, r_us_t1, 0, 0}
{ vlmaccr r11[0] ; ldap r11, filter_l_0101 } // r us t0, vD:vR -> {r_us_t0, l_us_t1, r_us_t1, 0}
{ vlmaccr r11[0] ; ldap r11, zeros } // l us t0, vD:vR -> {l_us_t0, r_us_t0, l_us_t1, r_us_t1}, r11: all zeros for vlsat
{ vlsat r11[0] ; ldc r11, BYTES_PER_WORD*4 } // sat(vD:vR), r11: BYTES_PER_WORD*4 = 4 sample size
{ vstr r7[0] ; add r7, r7, r11 } // save result to ring buffer, update ring buffer ptr
{ ldw r11, r8[0] ; nop } // r11: tail value
{ ldw r6, sp[NSTACKWORDS+4] ; add r11, r11, 4 } // r6: ring_buffer size, r11: tail + 4
{ stw r11, r8[0] ; eq r11, r11, r6 } // update tail ptr, r11: check if tail at ring buffer end
{ bf r11, prepare_mac_loop ; nop } // if tail not at buffer end, prepare mac loop
{ ldw r7, sp[11] ; ldc r11, 0 } // r7: ring buffer start, r11: 0
{ stw r11, r8[0] ; nop } // update tail ptr to 0
prepare_mac_loop:
{ ldw r6, sp[15] ; sub r5, r5, 2 } // r6: phase size, r5: r5-2 processed two polyphase filter
{ bf r5, outer_loop ; sub r6, r6, 2 } // burst 2
ldd r1, r0, sp[0] // r0: &l_history[0], r2: &r_history[0]
{ vclrdr ; nop }
bu mac_loop_no_move
mac_loop:
// swap r11 for channel
{ add r2, r11, 0 ; add r11, r0, 0 } // r11: r0
// left channel
{ vldc r1[0] ; add r1, r1, r9 }
{ vstc r11[0] ; add r11, r11, r9 }
{ vlmaccr r4[0] ; add r4, r4, r9 }
{ vlmaccr r4[0] ; add r4, r4, r9 }
{ vldc r1[0] ; add r1, r1, r9 }
{ vstc r11[0] ; add r11, r11, r9 }
{ vlmaccr r4[0] ; add r4, r4, r9 }
{ vlmaccr r4[0] ; sub r4, r4, r10 } // rewind
// swap r11 for channel
{ add r0, r11, 0 ; add r11, r2, 0 }
// right channel
{ vldc r3[0] ; add r3, r3, r9 }
{ vstc r11[0] ; add r11, r11, r9 }
{ vlmaccr r4[0] ; add r4, r4, r9 }
{ vlmaccr r4[0] ; add r4, r4, r9 }
{ vldc r3[0] ; add r3, r3, r9 }
{ vstc r11[0] ; add r11, r11, r9 }
{ vlmaccr r4[0] ; add r4, r4, r9 }
{ vlmaccr r4[0] ; add r4, r4, r9 }
{ bt r6, mac_loop ; sub r6, r6, 2 } // vD:vR -> {right_f1, right_f0, left_f1, left_f0}
bu phase_loop
mac_loop_no_move:
// left channel
{ vldc r0[0] ; add r0, r0, r9 }
{ vlmaccr r4[0] ; add r4, r4, r9 }
{ vlmaccr r4[0] ; add r4, r4, r9 }
{ vldc r0[0] ; add r0, r0, r9 }
{ vlmaccr r4[0] ; add r4, r4, r9 }
{ vlmaccr r4[0] ; sub r4, r4, r10 } // rewind
// right channel
{ vldc r1[0] ; add r1, r1, r9 }
{ vlmaccr r4[0] ; add r4, r4, r9 }
{ vlmaccr r4[0] ; add r4, r4, r9 }
{ vldc r1[0] ; add r1, r1, r9 }
{ vlmaccr r4[0] ; add r4, r4, r9 }
{ vlmaccr r4[0] ; add r4, r4, r9 }
{ bt r6, mac_loop_no_move ; sub r6, r6, 2 }
bu phase_loop
exit:
{inct r0, res[r3]; nop}
ldw r4, sp[NSTACKWORDS-1]
ldw r5, sp[NSTACKWORDS-2]
ldw r6, sp[NSTACKWORDS-3]
ldw r7, sp[NSTACKWORDS-4]
ldw r8, sp[NSTACKWORDS-5]
ldw r9, sp[NSTACKWORDS-6]
ldw r10, sp[NSTACKWORDS-7]
retsp NSTACKWORDS
.cc_bottom FUNCTION_NAME.function