FunASR/fun_text_processing/text_normalization/en/verbalizers/time.py

89 lines
2.9 KiB
Python
Raw Normal View History

2024-05-18 15:50:56 +08:00
import pynini
from fun_text_processing.text_normalization.en.graph_utils import (
DAMO_NOT_QUOTE,
DAMO_SIGMA,
GraphFst,
delete_space,
insert_space,
)
from pynini.lib import pynutil
class TimeFst(GraphFst):
"""
Finite state transducer for verbalizing time, e.g.
time { hours: "twelve" minutes: "thirty" suffix: "a m" zone: "e s t" } -> twelve thirty a m e s t
time { hours: "twelve" } -> twelve o'clock
Args:
deterministic: if True will provide a single transduction option,
for False multiple transduction are generated (used for audio-based normalization)
"""
def __init__(self, deterministic: bool = True):
super().__init__(name="time", kind="verbalize", deterministic=deterministic)
hour = (
pynutil.delete("hours:")
+ delete_space
+ pynutil.delete('"')
+ pynini.closure(DAMO_NOT_QUOTE, 1)
+ pynutil.delete('"')
)
minute = (
pynutil.delete("minutes:")
+ delete_space
+ pynutil.delete('"')
+ pynini.closure(DAMO_NOT_QUOTE, 1)
+ pynutil.delete('"')
)
suffix = (
pynutil.delete("suffix:")
+ delete_space
+ pynutil.delete('"')
+ pynini.closure(DAMO_NOT_QUOTE, 1)
+ pynutil.delete('"')
)
optional_suffix = pynini.closure(delete_space + insert_space + suffix, 0, 1)
zone = (
pynutil.delete("zone:")
+ delete_space
+ pynutil.delete('"')
+ pynini.closure(DAMO_NOT_QUOTE, 1)
+ pynutil.delete('"')
)
optional_zone = pynini.closure(delete_space + insert_space + zone, 0, 1)
second = (
pynutil.delete("seconds:")
+ delete_space
+ pynutil.delete('"')
+ pynini.closure(DAMO_NOT_QUOTE, 1)
+ pynutil.delete('"')
)
graph_hms = (
hour
+ pynutil.insert(" hours ")
+ delete_space
+ minute
+ pynutil.insert(" minutes and ")
+ delete_space
+ second
+ pynutil.insert(" seconds")
+ optional_suffix
+ optional_zone
)
graph_hms @= pynini.cdrewrite(
pynutil.delete("o ")
| pynini.cross("one minutes", "one minute")
| pynini.cross("one seconds", "one second")
| pynini.cross("one hours", "one hour"),
pynini.union(" ", "[BOS]"),
"",
DAMO_SIGMA,
)
graph = hour + delete_space + insert_space + minute + optional_suffix + optional_zone
graph |= hour + insert_space + pynutil.insert("o'clock") + optional_zone
graph |= hour + delete_space + insert_space + suffix + optional_zone
graph |= graph_hms
delete_tokens = self.delete_tokens(graph)
self.fst = delete_tokens.optimize()