FunASR/runtime/onnxruntime/third_party/kaldi/lat/word-align-lattice.h

212 lines
9.2 KiB
C++

// lat/word-align-lattice.h
// Copyright 2009-2012 Microsoft Corporation Johns Hopkins University (Author: Daniel Povey)
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#ifndef KALDI_LAT_WORD_ALIGN_LATTICE_H_
#define KALDI_LAT_WORD_ALIGN_LATTICE_H_
#include <fst/fstlib.h>
#include <fst/fst-decl.h>
#include "base/kaldi-common.h"
#include "util/common-utils.h"
#include "fstext/fstext-lib.h"
#include "itf/transition-information.h"
#include "lat/kaldi-lattice.h"
namespace kaldi {
struct WordBoundaryInfoOpts {
// Note: use of this structure
// is deprecated, see WordBoundaryInfoNewOpts.
// Note: this structure (and the code in word-align-lattice.{h,cc}
// makes stronger assumptions than the rest of the Kaldi toolkit:
// that is, it assumes you have word-position-dependent phones,
// with disjoint subsets of phones for (word-begin, word-end,
// word-internal, word-begin-and-end), and of course silence,
// which is assumed not to be inside a word [it will just print
// a warning if it is, though, and should give the right output
// as long as it's not at the beginning or end of a word].
std::string wbegin_phones;
std::string wend_phones;
std::string wbegin_and_end_phones;
std::string winternal_phones;
std::string silence_phones;
int32 silence_label;
int32 partial_word_label;
bool reorder;
bool silence_may_be_word_internal;
bool silence_has_olabels;
WordBoundaryInfoOpts(): silence_label(0), partial_word_label(0),
reorder(true), silence_may_be_word_internal(false),
silence_has_olabels(false) { }
void Register(OptionsItf *opts) {
opts->Register("wbegin-phones", &wbegin_phones, "Colon-separated list of "
"numeric ids of phones that begin a word");
opts->Register("wend-phones", &wend_phones, "Colon-separated list of "
"numeric ids of phones that end a word");
opts->Register("winternal-phones", &winternal_phones, "Colon-separated list "
"of numeric ids of phones that are internal to a word");
opts->Register("wbegin-and-end-phones", &wbegin_and_end_phones, "Colon-separated "
"list of numeric ids of phones that are used for "
"single-phone words.");
opts->Register("silence-phones", &silence_phones, "Colon-separated list of "
"numeric ids of phones that are used for silence (and other "
"non-word events such as noise - anything that doesn't have "
"a corresponding symbol in the lexicon.");
opts->Register("silence-label", &silence_label, "Numeric id of word symbol "
"that is to be used for silence arcs in the word-aligned "
"lattice (zero is OK)");
opts->Register("partial-word-label", &partial_word_label, "Numeric id of "
"word symbol that is to be used for arcs in the word-aligned "
"lattice corresponding to partial words at the end of "
"\"forced-out\" utterances (zero is OK)");
opts->Register("reorder", &reorder, "True if the lattices were generated "
"from graphs that had the --reorder option true, relating to "
"reordering self-loops (typically true)");
opts->Register("silence-may-be-word-internal", &silence_may_be_word_internal,
"If true, silence may appear inside words' prons (but not at begin/end!)\n");
opts->Register("silence-has-olabels", &silence_has_olabels,
"If true, silence phones have output labels in the lattice, just\n"
"like regular words. [This means you can't have un-labeled silences]");
}
};
// This structure is to be used for newer code, from s5 scripts on.
struct WordBoundaryInfoNewOpts {
int32 silence_label;
int32 partial_word_label;
bool reorder;
WordBoundaryInfoNewOpts(): silence_label(0), partial_word_label(0),
reorder(true) { }
void Register(OptionsItf *opts) {
opts->Register("silence-label", &silence_label, "Numeric id of word symbol "
"that is to be used for silence arcs in the word-aligned "
"lattice (zero is OK)");
opts->Register("partial-word-label", &partial_word_label, "Numeric id of "
"word symbol that is to be used for arcs in the word-aligned "
"lattice corresponding to partial words at the end of "
"\"forced-out\" utterances (zero is OK)");
opts->Register("reorder", &reorder, "True if the lattices were generated "
"from graphs that had the --reorder option true, relating to "
"reordering self-loops (typically true)");
}
};
struct WordBoundaryInfo {
// This initializer will be deleted eventually.
WordBoundaryInfo(const WordBoundaryInfoOpts &opts); // Initialize from
// options class. Note: this throws. Don't try to catch this error
// and continue; catching errors thrown from initializers is dangerous.
// Note: the following vectors are initialized from the corresponding
// options strings in the options class, but if silence_may_be_word_internal=true
// or silence_has_olabels=true, we modify them as needed to make
// silence phones behave in this way.
// This initializer is to be used in future.
WordBoundaryInfo(const WordBoundaryInfoNewOpts &opts);
WordBoundaryInfo(const WordBoundaryInfoNewOpts &opts,
std::string word_boundary_file);
void Init(std::istream &stream);
enum PhoneType {
kNoPhone = 0,
kWordBeginPhone,
kWordEndPhone,
kWordBeginAndEndPhone,
kWordInternalPhone,
kNonWordPhone // non-word phones are typically silence phones; but the point
// is that there is
// no word label associated with them in the lattice. If a silence phone
// had a word label with it, we'd have to call it kWordBeginAndEndPhone.
};
PhoneType TypeOfPhone(int32 p) const {
if ((p < 0 || p > phone_to_type.size()))
KALDI_ERR << "Phone " << p << " was not specified in "
"word-boundary file (or options)";
return phone_to_type[p];
}
std::vector<PhoneType> phone_to_type;
int32 silence_label; // The integer label we give to silence words.
// (May be zero).
int32 partial_word_label; // The label we give to partially
// formed words that we might get at the end of the utterance
// if the lattice was "forced out" (no end state was reached).
bool reorder; // True if the "reordering" of self-loops versus
// forward-transition was done during graph creation (will
// normally be true.
private:
// This is to be removed eventually, when we all move to s5 scripts.
void SetOptions(const std::string int_list, PhoneType phone_type);
};
/// Align lattice so that each arc has the transition-ids on it
/// that correspond to the word that is on that arc. [May also have
/// epsilon arcs for optional silences.]
/// Returns true if everything was OK, false if some kind of
/// error was detected (e.g. the words didn't have the kinds of
/// sequences we would expect if the WordBoundaryInfo was
/// correct). Note: we don't expect silence inside words,
/// or empty words (words with no phones), and we expect
/// the word to start with a wbegin_phone, to end with
/// a wend_phone, and to possibly have winternal_phones
/// inside (or to consist of just one wbegin_and_end_phone).
/// Note: if it returns false, it doesn't mean the lattice
/// that the output is necessarily bad: it might just be that
/// the lattice was "forced out" as the end-state was not
/// reached during decoding, and in this case the output might
/// be usable.
/// If max_states > 0, if this code detects that the #states
/// of the output will be greater than max_states, it will
/// abort the computation, return false and produce an empty
/// lattice out.
bool WordAlignLattice(const CompactLattice &lat,
const TransitionInformation &tmodel,
const WordBoundaryInfo &info,
int32 max_states,
CompactLattice *lat_out);
/// This function is designed to crash if something went wrong with the
/// word-alignment of the lattice. It verifies
/// that arcs are of 4 types:
/// properly-aligned word arcs, with a word label.
/// partial-word arcs, with the partial-word label.
/// silence arcs, with the silence label.
void TestWordAlignedLattice(const CompactLattice &lat,
const TransitionInformation &tmodel,
const WordBoundaryInfo &info,
const CompactLattice &aligned_lat);
} // end namespace kaldi
#endif