FunASR/funasr/models/rwkv_bat/template.yaml

64 lines
1.1 KiB
YAML

# network architecture
model: Transducer
model_conf:
auxiliary_ctc_weight: 0.0
# encoder
encoder: RWKVEncoder
encoder_conf:
kernel: 3
subsampling_factor: 4
output_size: 512
num_blocks: 18
time_reduction_factor: 2
att_dropout_rate: 0.1
ffn_dropout_rate: 0.1
dropout_rate: 0.1
# decoder (prediction network)
decoder: rnnt_decoder
decoder_conf:
embed_size: 512
hidden_size: 512
embed_dropout_rate: 0.1
dropout_rate: 0.1
use_embed_mask: false
# joint network
joint_network: joint_network
joint_network_conf:
joint_space_size: 512
frontend: WavFrontend
frontend_conf:
fs: 16000
window: hamming
n_mels: 80
frame_length: 25
frame_shift: 10
lfr_m: 1
lfr_n: 1
upsacle_samples: true
specaug: SpecAugLFR
specaug_conf:
apply_time_warp: false
time_warp_window: 5
time_warp_mode: bicubic
apply_freq_mask: true
freq_mask_width_range:
- 0
- 30
lfr_rate: 6
num_freq_mask: 1
apply_time_mask: true
time_mask_width_range:
- 0
- 12
num_time_mask: 1
tokenizer: CharTokenizer
tokenizer_conf:
unk_symbol: <unk>
split_with_space: true