95 lines
3.2 KiB
Python
95 lines
3.2 KiB
Python
|
import pynini
|
||
|
from fun_text_processing.text_normalization.en.graph_utils import (
|
||
|
DAMO_NOT_QUOTE,
|
||
|
DAMO_SIGMA,
|
||
|
GraphFst,
|
||
|
insert_space,
|
||
|
)
|
||
|
from fun_text_processing.text_normalization.en.verbalizers.ordinal import OrdinalFst
|
||
|
from pynini.examples import plurals
|
||
|
from pynini.lib import pynutil
|
||
|
|
||
|
|
||
|
class FractionFst(GraphFst):
|
||
|
"""
|
||
|
Finite state transducer for verbalizing fraction
|
||
|
e.g. tokens { fraction { integer: "twenty three" numerator: "four" denominator: "five" } } ->
|
||
|
twenty three and four fifth
|
||
|
|
||
|
Args:
|
||
|
deterministic: if True will provide a single transduction option,
|
||
|
for False multiple transduction are generated (used for audio-based normalization)
|
||
|
"""
|
||
|
|
||
|
def __init__(self, deterministic: bool = True, lm: bool = False):
|
||
|
super().__init__(name="fraction", kind="verbalize", deterministic=deterministic)
|
||
|
suffix = OrdinalFst().suffix
|
||
|
|
||
|
integer = (
|
||
|
pynutil.delete('integer_part: "')
|
||
|
+ pynini.closure(DAMO_NOT_QUOTE)
|
||
|
+ pynutil.delete('" ')
|
||
|
)
|
||
|
denominator_one = pynini.cross('denominator: "one"', "over one")
|
||
|
denominator_half = pynini.cross('denominator: "two"', "half")
|
||
|
denominator_quarter = pynini.cross('denominator: "four"', "quarter")
|
||
|
|
||
|
denominator_rest = (
|
||
|
pynutil.delete('denominator: "')
|
||
|
+ pynini.closure(DAMO_NOT_QUOTE) @ suffix
|
||
|
+ pynutil.delete('"')
|
||
|
)
|
||
|
|
||
|
denominators = plurals._priority_union(
|
||
|
denominator_one,
|
||
|
plurals._priority_union(
|
||
|
denominator_half,
|
||
|
plurals._priority_union(denominator_quarter, denominator_rest, DAMO_SIGMA),
|
||
|
DAMO_SIGMA,
|
||
|
),
|
||
|
DAMO_SIGMA,
|
||
|
).optimize()
|
||
|
if not deterministic:
|
||
|
denominators |= (
|
||
|
pynutil.delete('denominator: "')
|
||
|
+ (pynini.accep("four") @ suffix)
|
||
|
+ pynutil.delete('"')
|
||
|
)
|
||
|
|
||
|
numerator_one = pynutil.delete('numerator: "') + pynini.accep("one") + pynutil.delete('" ')
|
||
|
numerator_one = numerator_one + insert_space + denominators
|
||
|
numerator_rest = (
|
||
|
pynutil.delete('numerator: "')
|
||
|
+ (pynini.closure(DAMO_NOT_QUOTE) - pynini.accep("one"))
|
||
|
+ pynutil.delete('" ')
|
||
|
)
|
||
|
numerator_rest = numerator_rest + insert_space + denominators
|
||
|
numerator_rest @= pynini.cdrewrite(
|
||
|
plurals._priority_union(
|
||
|
pynini.cross("half", "halves"), pynutil.insert("s"), DAMO_SIGMA
|
||
|
),
|
||
|
"",
|
||
|
"[EOS]",
|
||
|
DAMO_SIGMA,
|
||
|
)
|
||
|
|
||
|
graph = numerator_one | numerator_rest
|
||
|
|
||
|
conjunction = pynutil.insert("and ")
|
||
|
if not deterministic and not lm:
|
||
|
conjunction = pynini.closure(conjunction, 0, 1)
|
||
|
|
||
|
integer = pynini.closure(integer + insert_space + conjunction, 0, 1)
|
||
|
|
||
|
graph = integer + graph
|
||
|
graph @= pynini.cdrewrite(
|
||
|
pynini.cross("and one half", "and a half") | pynini.cross("over ones", "over one"),
|
||
|
"",
|
||
|
"[EOS]",
|
||
|
DAMO_SIGMA,
|
||
|
)
|
||
|
|
||
|
self.graph = graph
|
||
|
delete_tokens = self.delete_tokens(self.graph)
|
||
|
self.fst = delete_tokens.optimize()
|