89 lines
2.9 KiB
Python
89 lines
2.9 KiB
Python
import pynini
|
|
from fun_text_processing.text_normalization.en.graph_utils import (
|
|
DAMO_NOT_QUOTE,
|
|
DAMO_SIGMA,
|
|
GraphFst,
|
|
delete_space,
|
|
insert_space,
|
|
)
|
|
from pynini.lib import pynutil
|
|
|
|
|
|
class TimeFst(GraphFst):
|
|
"""
|
|
Finite state transducer for verbalizing time, e.g.
|
|
time { hours: "twelve" minutes: "thirty" suffix: "a m" zone: "e s t" } -> twelve thirty a m e s t
|
|
time { hours: "twelve" } -> twelve o'clock
|
|
|
|
Args:
|
|
deterministic: if True will provide a single transduction option,
|
|
for False multiple transduction are generated (used for audio-based normalization)
|
|
"""
|
|
|
|
def __init__(self, deterministic: bool = True):
|
|
super().__init__(name="time", kind="verbalize", deterministic=deterministic)
|
|
hour = (
|
|
pynutil.delete("hours:")
|
|
+ delete_space
|
|
+ pynutil.delete('"')
|
|
+ pynini.closure(DAMO_NOT_QUOTE, 1)
|
|
+ pynutil.delete('"')
|
|
)
|
|
minute = (
|
|
pynutil.delete("minutes:")
|
|
+ delete_space
|
|
+ pynutil.delete('"')
|
|
+ pynini.closure(DAMO_NOT_QUOTE, 1)
|
|
+ pynutil.delete('"')
|
|
)
|
|
suffix = (
|
|
pynutil.delete("suffix:")
|
|
+ delete_space
|
|
+ pynutil.delete('"')
|
|
+ pynini.closure(DAMO_NOT_QUOTE, 1)
|
|
+ pynutil.delete('"')
|
|
)
|
|
optional_suffix = pynini.closure(delete_space + insert_space + suffix, 0, 1)
|
|
zone = (
|
|
pynutil.delete("zone:")
|
|
+ delete_space
|
|
+ pynutil.delete('"')
|
|
+ pynini.closure(DAMO_NOT_QUOTE, 1)
|
|
+ pynutil.delete('"')
|
|
)
|
|
optional_zone = pynini.closure(delete_space + insert_space + zone, 0, 1)
|
|
second = (
|
|
pynutil.delete("seconds:")
|
|
+ delete_space
|
|
+ pynutil.delete('"')
|
|
+ pynini.closure(DAMO_NOT_QUOTE, 1)
|
|
+ pynutil.delete('"')
|
|
)
|
|
graph_hms = (
|
|
hour
|
|
+ pynutil.insert(" hours ")
|
|
+ delete_space
|
|
+ minute
|
|
+ pynutil.insert(" minutes and ")
|
|
+ delete_space
|
|
+ second
|
|
+ pynutil.insert(" seconds")
|
|
+ optional_suffix
|
|
+ optional_zone
|
|
)
|
|
graph_hms @= pynini.cdrewrite(
|
|
pynutil.delete("o ")
|
|
| pynini.cross("one minutes", "one minute")
|
|
| pynini.cross("one seconds", "one second")
|
|
| pynini.cross("one hours", "one hour"),
|
|
pynini.union(" ", "[BOS]"),
|
|
"",
|
|
DAMO_SIGMA,
|
|
)
|
|
graph = hour + delete_space + insert_space + minute + optional_suffix + optional_zone
|
|
graph |= hour + insert_space + pynutil.insert("o'clock") + optional_zone
|
|
graph |= hour + delete_space + insert_space + suffix + optional_zone
|
|
graph |= graph_hms
|
|
delete_tokens = self.delete_tokens(graph)
|
|
self.fst = delete_tokens.optimize()
|