FunASR/runtime/onnxruntime/third_party/kaldi/lat/word-align-lattice-lexicon.h

161 lines
7.1 KiB
C
Raw Normal View History

2024-05-18 15:50:56 +08:00
// lat/word-align-lattice-lexicon.h
// Copyright 2013 Johns Hopkins University (Author: Daniel Povey)
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#ifndef KALDI_LAT_WORD_ALIGN_LATTICE_LEXICON_H_
#define KALDI_LAT_WORD_ALIGN_LATTICE_LEXICON_H_
#include <fst/fstlib.h>
#include <fst/fst-decl.h>
#include "base/kaldi-common.h"
#include "util/common-utils.h"
#include "fstext/fstext-lib.h"
#include "hmm/transition-model.h"
#include "lat/kaldi-lattice.h"
namespace kaldi {
/** Read the lexicon in the special format required for word alignment. Each line has
a series of integers on it (at least two on each line), representing:
<old-word-id> <new-word-id> [<phone-id-1> [<phone-id-2> ... ] ]
Here, <old-word-id> is the word-id that appears in the lattice before alignment, and
<new-word-id> is the word-is that should appear in the lattice after alignment. This
is mainly useful when the lattice may have no symbol for the optional-silence arcs
(so <old-word-id> would equal zero), but we want it to be output with a symbol on those
arcs (so <new-word-id> would be nonzero).
If the silence should not be added to the lattice, both <old-word-id> and <new-word-id>
may be zero.
This function is very simple: it just reads in a series of lines from a text file,
each with at least two integers on them.
*/
bool ReadLexiconForWordAlign (std::istream &is,
std::vector<std::vector<int32> > *lexicon);
/// This class extracts some information from the lexicon and stores it
/// in a suitable form for the word-alignment code to use.
class WordAlignLatticeLexiconInfo {
public:
WordAlignLatticeLexiconInfo(const std::vector<std::vector<int32> > &lexicon);
/// Returns true if this lexicon-entry can appear, intepreted as
/// (output-word phone1 phone2 ...). This is just used in testing code.
bool IsValidEntry(const std::vector<int32> &entry) const;
/// Purely for the testing code, we map words into equivalence classes derived
/// from the mappings in the first two fields of each line in the lexicon. This
/// function maps from each word-id to the lowest member of its equivalence class.
int32 EquivalenceClassOf(int32 word) const;
protected:
friend class LatticeLexiconWordAligner;
void UpdateViabilityMap(const std::vector<int32> &lexicon_entry);
void UpdateLexiconMap(const std::vector<int32> &lexicon_entry);
void UpdateNumPhonesMap(const std::vector<int32> &lexicon_entry);
void UpdateEquivalenceMap(const std::vector<std::vector<int32> > &lexicon);
void FinalizeViabilityMap(); // sorts the vectors.
/// The type ViabilityMap maps from sequences of phones (excluding the empty
/// sequence), to the sets of all word-labels [on the input lattice] that
/// could correspond to phone sequences that start with s [but are longer than
/// s]. The sets of word-labels are represented as sorted vectors of int32
/// Note: the zero word-label is included here. This is used in a kind
/// of co-accessibility test, to see whether it is worth extending this state
/// by traversing arcs in the input lattice.
typedef unordered_map<std::vector<int32>,
std::vector<int32>,
VectorHasher<int32> > ViabilityMap;
/// This is a map from a vector (orig-word-symbol phone1 phone2 ... ) to
/// the new word-symbol. [todo: make sure the new word-symbol is always nonzero.]
typedef unordered_map<std::vector<int32>, int32,
VectorHasher<int32> > LexiconMap;
/// This is a map from the word-id (as present in the original lattice)
/// to the minimum and maximum #phones of lexicon entries for that word.
/// It helps improve efficiency.
typedef unordered_map<int32, std::pair<int32, int32> > NumPhonesMap;
/// This is used only in testing code; it defines a mapping from a word
/// to the primary member of that word's equivalence-class.
typedef unordered_map<int32, int32> EquivalenceMap;
// The following three variables represent various types of information
// gathered from the lexicon.
LexiconMap lexicon_map_;
NumPhonesMap num_phones_map_;
ViabilityMap viability_map_;
// As lexicon_map but in reverse sense w.r.t. words [we only
// do this for asymmetric entries.] Used only in testing code.
LexiconMap reverse_lexicon_map_;
// This is used only in testing code; it defines a mapping from a word
// to the primary member of that word's equivalence-class. If an index
// is not present in the map, it's assumed to map to itself.
EquivalenceMap equivalence_map_;
};
struct WordAlignLatticeLexiconOpts {
int32 partial_word_label;
bool reorder;
BaseFloat max_expand;
WordAlignLatticeLexiconOpts(): partial_word_label(0), reorder(true),
max_expand(-1.0) { }
void Register(OptionsItf *opts) {
opts->Register("partial-word-label", &partial_word_label, "Numeric id of "
"word symbol that is to be used for arcs in the word-aligned "
"lattice corresponding to partial words at the end of "
"\"forced-out\" utterances (zero is OK)");
opts->Register("reorder", &reorder, "True if the lattices were generated "
"from graphs that had the --reorder option true, relating to "
"reordering self-loops (typically true)");
opts->Register("max-expand", &max_expand, "If >0.0, the maximum ratio "
"by which we allow the lattice-alignment code to increase the #states "
"in a lattice (vs. the phone-aligned lattice) before we fail and "
"refuse to align the lattice. This is helpful in order to "
"prevent 'pathological' lattices from causing the program to "
"exhaust memory. Actual max-states is 1000 + max-expand * "
"orig-num-states.");
}
};
/// Align lattice so that each arc has the transition-ids on it
/// that correspond to the word that is on that arc. [May also have
/// epsilon arcs for optional silences.]
/// Returns true if everything was OK, false if there was any kind of
/// error including when the the lattice seems to have been "forced out"
/// (did not reach end state, resulting in partial word at end).
bool WordAlignLatticeLexicon(const CompactLattice &lat,
const TransitionInformation &tmodel,
const WordAlignLatticeLexiconInfo &lexicon_info,
const WordAlignLatticeLexiconOpts &opts,
CompactLattice *lat_out);
} // namespace kaldi
#endif