FunASR/fun_text_processing/text_normalization/ru/verbalizers/electronic.py

23 lines
955 B
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import pynini
from fun_text_processing.text_normalization.en.graph_utils import GraphFst
from fun_text_processing.text_normalization.ru.alphabet import RU_ALPHA
from pynini.lib import pynutil
class ElectronicFst(GraphFst):
"""
Finite state transducer for verbalizing electronic
e.g. electronic { username: "эй би собака эн ди точка ру" } -> "эй би собака эн ди точка ру"
Args:
deterministic: if True will provide a single transduction option,
for False multiple transduction are generated (used for audio-based normalization)
"""
def __init__(self, deterministic: bool = True):
super().__init__(name="electronic", kind="verbalize", deterministic=deterministic)
graph = pynutil.delete('username: "') + pynini.closure(RU_ALPHA | " ") + pynutil.delete('"')
delete_tokens = self.delete_tokens(graph)
self.fst = delete_tokens.optimize()