71 lines
2.1 KiB
Python
71 lines
2.1 KiB
Python
|
from pathlib import Path
|
||
|
from typing import Union
|
||
|
import warnings
|
||
|
|
||
|
|
||
|
class DatadirWriter:
|
||
|
"""Writer class to create kaldi like data directory.
|
||
|
|
||
|
Examples:
|
||
|
>>> with DatadirWriter("output") as writer:
|
||
|
... # output/sub.txt is created here
|
||
|
... subwriter = writer["sub.txt"]
|
||
|
... # Write "uttidA some/where/a.wav"
|
||
|
... subwriter["uttidA"] = "some/where/a.wav"
|
||
|
... subwriter["uttidB"] = "some/where/b.wav"
|
||
|
|
||
|
"""
|
||
|
|
||
|
def __init__(self, p: Union[Path, str]):
|
||
|
self.path = Path(p)
|
||
|
self.chilidren = {}
|
||
|
self.fd = None
|
||
|
self.has_children = False
|
||
|
self.keys = set()
|
||
|
|
||
|
def __enter__(self):
|
||
|
return self
|
||
|
|
||
|
def __getitem__(self, key: str) -> "DatadirWriter":
|
||
|
if self.fd is not None:
|
||
|
raise RuntimeError("This writer points out a file")
|
||
|
|
||
|
if key not in self.chilidren:
|
||
|
w = DatadirWriter((self.path / key))
|
||
|
self.chilidren[key] = w
|
||
|
self.has_children = True
|
||
|
|
||
|
retval = self.chilidren[key]
|
||
|
return retval
|
||
|
|
||
|
def __setitem__(self, key: str, value: str):
|
||
|
if self.has_children:
|
||
|
raise RuntimeError("This writer points out a directory")
|
||
|
if key in self.keys:
|
||
|
warnings.warn(f"Duplicated: {key}")
|
||
|
|
||
|
if self.fd is None:
|
||
|
self.path.parent.mkdir(parents=True, exist_ok=True)
|
||
|
self.fd = self.path.open("w", encoding="utf-8")
|
||
|
|
||
|
self.keys.add(key)
|
||
|
self.fd.write(f"{key} {value}\n")
|
||
|
self.fd.flush()
|
||
|
|
||
|
def __exit__(self, exc_type, exc_val, exc_tb):
|
||
|
self.close()
|
||
|
|
||
|
def close(self):
|
||
|
if self.has_children:
|
||
|
prev_child = None
|
||
|
for child in self.chilidren.values():
|
||
|
child.close()
|
||
|
if prev_child is not None and prev_child.keys != child.keys:
|
||
|
warnings.warn(
|
||
|
f"Ids are mismatching between " f"{prev_child.path} and {child.path}"
|
||
|
)
|
||
|
prev_child = child
|
||
|
|
||
|
elif self.fd is not None:
|
||
|
self.fd.close()
|