71 lines
3.3 KiB
Python
71 lines
3.3 KiB
Python
from fun_text_processing.text_normalization.en.graph_utils import GraphFst
|
|
from fun_text_processing.text_normalization.en.verbalizers.abbreviation import AbbreviationFst
|
|
from fun_text_processing.text_normalization.en.verbalizers.cardinal import CardinalFst
|
|
from fun_text_processing.text_normalization.en.verbalizers.date import DateFst
|
|
from fun_text_processing.text_normalization.en.verbalizers.decimal import DecimalFst
|
|
from fun_text_processing.text_normalization.en.verbalizers.electronic import ElectronicFst
|
|
from fun_text_processing.text_normalization.en.verbalizers.fraction import FractionFst
|
|
from fun_text_processing.text_normalization.en.verbalizers.measure import MeasureFst
|
|
from fun_text_processing.text_normalization.en.verbalizers.money import MoneyFst
|
|
from fun_text_processing.text_normalization.en.verbalizers.ordinal import OrdinalFst
|
|
from fun_text_processing.text_normalization.en.verbalizers.roman import RomanFst
|
|
from fun_text_processing.text_normalization.en.verbalizers.telephone import TelephoneFst
|
|
from fun_text_processing.text_normalization.en.verbalizers.time import TimeFst
|
|
from fun_text_processing.text_normalization.en.verbalizers.whitelist import WhiteListFst
|
|
|
|
|
|
class VerbalizeFst(GraphFst):
|
|
"""
|
|
Composes other verbalizer grammars.
|
|
For deployment, this grammar will be compiled and exported to OpenFst Finate State Archiv (FAR) File.
|
|
More details to deployment at NeMo/tools/text_processing_deployment.
|
|
|
|
Args:
|
|
deterministic: if True will provide a single transduction option,
|
|
for False multiple options (used for audio-based normalization)
|
|
"""
|
|
|
|
def __init__(self, deterministic: bool = True):
|
|
super().__init__(name="verbalize", kind="verbalize", deterministic=deterministic)
|
|
cardinal = CardinalFst(deterministic=deterministic)
|
|
cardinal_graph = cardinal.fst
|
|
decimal = DecimalFst(cardinal=cardinal, deterministic=deterministic)
|
|
decimal_graph = decimal.fst
|
|
ordinal = OrdinalFst(deterministic=deterministic)
|
|
ordinal_graph = ordinal.fst
|
|
fraction = FractionFst(deterministic=deterministic)
|
|
fraction_graph = fraction.fst
|
|
telephone_graph = TelephoneFst(deterministic=deterministic).fst
|
|
electronic_graph = ElectronicFst(deterministic=deterministic).fst
|
|
measure = MeasureFst(
|
|
decimal=decimal, cardinal=cardinal, fraction=fraction, deterministic=deterministic
|
|
)
|
|
measure_graph = measure.fst
|
|
time_graph = TimeFst(deterministic=deterministic).fst
|
|
date_graph = DateFst(ordinal=ordinal, deterministic=deterministic).fst
|
|
money_graph = MoneyFst(decimal=decimal, deterministic=deterministic).fst
|
|
whitelist_graph = WhiteListFst(deterministic=deterministic).fst
|
|
|
|
graph = (
|
|
time_graph
|
|
| date_graph
|
|
| money_graph
|
|
| measure_graph
|
|
| ordinal_graph
|
|
| decimal_graph
|
|
| cardinal_graph
|
|
| telephone_graph
|
|
| electronic_graph
|
|
| fraction_graph
|
|
| whitelist_graph
|
|
)
|
|
|
|
roman_graph = RomanFst(deterministic=deterministic).fst
|
|
graph |= roman_graph
|
|
|
|
if not deterministic:
|
|
abbreviation_graph = AbbreviationFst(deterministic=deterministic).fst
|
|
graph |= abbreviation_graph
|
|
|
|
self.fst = graph
|