// decoder/lattice-incremental-decoder.h

// Copyright      2019  Zhehuai Chen, Daniel Povey

// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//  http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.

#ifndef KALDI_DECODER_LATTICE_INCREMENTAL_DECODER_H_
#define KALDI_DECODER_LATTICE_INCREMENTAL_DECODER_H_

#include "util/stl-utils.h"
#include "util/hash-list.h"
#include "fst/fstlib.h"
#include "itf/decodable-itf.h"
#include "fstext/fstext-lib.h"
#include "lat/determinize-lattice-pruned.h"
#include "lat/kaldi-lattice.h"
#include "decoder/grammar-fst.h"
#include "decoder/lattice-faster-decoder.h"

namespace kaldi {
/**
   The normal decoder, lattice-faster-decoder.h, sometimes has an issue when
   doing real-time applications with long utterances, that each time you get the
   lattice the lattice determinization can take a considerable amount of time;
   this introduces latency.  This version of the decoder spreads the work of
   lattice determinization out throughout the decoding process.

   NOTE:

   Please see https://www.danielpovey.com/files/ *TBD* .pdf for a technical
   explanation of what is going on here.

   GLOSSARY OF TERMS:
      chunk: We do the determinization on chunks of frames; these
          may coincide with the chunks on which the user calls
          AdvanceDecoding().  The basic idea is to extract chunks
          of the raw lattice and determinize them individually, but
          it gets much more complicated than that.  The chunks
          should normally be at least as long as a word (let's say,
          at least 20 frames), or the overhead of this algorithm
          might become excessive and affect RTF.

      raw lattice chunk: A chunk of raw (i.e. undeterminized) lattice
          that we will determinize.  In the paper this corresponds
          to the FST B that is described in Section 5.2.

      token_label, state_label / token-label, state-label:

          In the paper these are both referred to as `state labels` (these are
          special, large integer id's that refer to states in the undeterminized
          lattice and in the the determinized lattice); but we use two separate
          terms here, for more clarity, when referring to the undeterminized
          vs. determinized lattice.

           token_label conceptually refers to states in the
           raw lattice, but we don't materialize the entire
           raw lattice as a physical FST and and these tokens
           are actually tokens (template type Token) held by
           the decoder

           state_label when used in this code refers specifically
           to labels that identify states in the determinized
           lattice (i.e. state indexes in lat_).

       token-final state
          A state in a raw lattice or in a determinized chunk that has an arc
          entering it that has a `token-label` on it (as defined above).
          These states will have nonzero final-probs.

       redeterminized-non-splice-state, aka ns_redet:
         A redeterminized state which is not also a splice state;
         refer to the paper for explanation.  In the already-determinized
         part this means a redeterminized state which is not final.

       canonical appended lattice:  This is the appended compact lattice
         that we conceptually have (i.e. what we described in the paper).
         The difference from the "actual appended lattice" stored
         in LatticeIncrementalDeterminizer::clat_ is that the
         actual appended lattice has all its final-arcs replaced with
         final-probs, and we keep the real final-arcs "on the side" in a
         separate data structure.  The final-probs in clat_ aren't
         necessarily related to the costs on the final-arcs; instead
         they can have arbitrary values passed in by the user (e.g.
         if we want to include final-probs).  This means that the
         clat_ can be returned without modification to the user who wants
         a partially determinized result.

       final-arc:  An arc in the canonical appended CompactLattice which
         goes to a final-state.  These arcs will have `state-labels` as
         their labels.

 */
struct LatticeIncrementalDecoderConfig {
  // All the configuration values until det_opts are the same as in
  // LatticeFasterDecoder.  For clarity we repeat them rather than inheriting.
  BaseFloat beam;
  int32 max_active;
  int32 min_active;
  BaseFloat lattice_beam;
  int32 prune_interval;
  BaseFloat beam_delta; // has nothing to do with beam_ratio
  BaseFloat hash_ratio;
  BaseFloat prune_scale; // Note: we don't make this configurable on the command line,
                         // it's not a very important parameter.  It affects the
                         // algorithm that prunes the tokens as we go.
  // Most of the options inside det_opts are not actually queried by the
  // LatticeIncrementalDecoder class itself, but by the code that calls it, for
  // example in the function DecodeUtteranceLatticeIncremental.
  fst::DeterminizeLatticePhonePrunedOptions det_opts;

  // The configuration values from this point on are specific to the
  // incremental determinization.  See where they are registered for
  // explanation.
  // Caution: these are only inspected in UpdateLatticeDeterminization().
  // If you call
  int32 determinize_max_delay;
  int32 determinize_min_chunk_size;
  int32 determinize_max_active;


  LatticeIncrementalDecoderConfig()
      : beam(16.0),
        max_active(std::numeric_limits<int32>::max()),
        min_active(200),
        lattice_beam(10.0),
        prune_interval(25),
        beam_delta(0.5),
        hash_ratio(2.0),
        prune_scale(0.01),
        determinize_max_delay(60),
        determinize_min_chunk_size(20),
        determinize_max_active(200) {
    det_opts.minimize = false;
  }
  void Register(OptionsItf *opts) {
    det_opts.Register(opts);
    opts->Register("beam", &beam, "Decoding beam.  Larger->slower, more accurate.");
    opts->Register("max-active", &max_active,
                   "Decoder max active states.  Larger->slower; "
                   "more accurate");
    opts->Register("min-active", &min_active, "Decoder minimum #active states.");
    opts->Register("lattice-beam", &lattice_beam,
                   "Lattice generation beam.  Larger->slower, "
                   "and deeper lattices");
    opts->Register("prune-interval", &prune_interval,
                   "Interval (in frames) at "
                   "which to prune tokens");
    opts->Register("beam-delta", &beam_delta,
                   "Increment used in decoding-- this "
                   "parameter is obscure and relates to a speedup in the way the "
                   "max-active constraint is applied.  Larger is more accurate.");
    opts->Register("hash-ratio", &hash_ratio,
                   "Setting used in decoder to "
                   "control hash behavior");
    opts->Register("determinize-max-delay", &determinize_max_delay,
                   "Maximum frames of delay between decoding a frame and "
                   "determinizing it");
    opts->Register("determinize-min-chunk-size", &determinize_min_chunk_size,
                   "Minimum chunk size used in determinization");
    opts->Register("determinize-max-active", &determinize_max_active,
                   "Maximum number of active tokens to update determinization");

  }
  void Check() const {
    if (!(beam > 0.0 && max_active > 1 && lattice_beam > 0.0 &&
          min_active <= max_active && prune_interval > 0 &&
          beam_delta > 0.0 && hash_ratio >= 1.0 &&
          prune_scale > 0.0 && prune_scale < 1.0 &&
          determinize_max_delay > determinize_min_chunk_size &&
          determinize_min_chunk_size > 0 &&
          determinize_max_active >= 0))
        KALDI_ERR << "Invalid options given to decoder";
    /* Minimization of the chunks is not compatible withour algorithm (or at
       least, would require additional complexity to implement.) */
    if (det_opts.minimize || !det_opts.word_determinize)
      KALDI_ERR << "Invalid determinization options given to decoder.";
  }
};


/**
   This class is used inside LatticeIncrementalDecoderTpl; it handles
   some of the details of incremental determinization.
   https://www.danielpovey.com/files/ *TBD*.pdf for the paper.

*/
class LatticeIncrementalDeterminizer {
 public:
  using Label = typename LatticeArc::Label;  /* Actualy the same labels appear
                                                in both lattice and compact
                                                lattice, so we don't use the
                                                specific type all the time but
                                                just say 'Label' */
  LatticeIncrementalDeterminizer(
      const TransitionInformation &trans_model,
      const LatticeIncrementalDecoderConfig &config):
      trans_model_(trans_model), config_(config) { }

  // Resets the lattice determinization data for new utterance
  void Init();

  // Returns the current determinized lattice.
  const CompactLattice &GetDeterminizedLattice() const { return clat_; }

  /**
     Starts the process of creating a raw lattice chunk.  (Search the glossary
     for "raw lattice chunk").  This just sets up the initial states and
     redeterminized-states in the chunk.  Relates to sec. 5.2 in the paper,
     specifically the initial-state i and the redeterminized-states.

     After calling this, the caller would add the remaining arcs and states
     to `olat` and then call AcceptRawLatticeChunk() with the result.

        @param [out] olat    The lattice to be (partially) created

        @param [out] token_label2state  This function outputs to here
                a map from `token-label` to the state we created for
                it in *olat.  See glossary for `token-label`.
                The keys actually correspond to the .nextstate fields
                in the arcs in final_arcs_; values are states in `olat`.
                See the last bullet point before Sec. 5.3 in the paper.
  */
  void InitializeRawLatticeChunk(
      Lattice *olat,
      unordered_map<Label, LatticeArc::StateId> *token_label2state);

  /**
     This function accepts the raw FST (state-level lattice) corresponding to a
     single chunk of the lattice, determinizes it and appends it to this->clat_.
     Unless this was the

     Note: final-probs in `raw_fst` are treated specially: they are used to
     guide the pruned determinization, but when you call GetLattice() it will be
     -- except for pruning effects-- as if all nonzero final-probs in `raw_fst`
     were: One() if final_costs == NULL; else the value present in `final_costs`.

       @param [in] raw_fst  (Consumed destructively).  The input
                  raw (state-level) lattice.  Would correspond to the
                  FST A in the paper if first_frame == 0, and B
                  otherwise.

     @return returns false if determinization finished earlier than the beam
         or the determinized lattice was empty; true otherwise.

     NOTE: if this is not the final chunk, you will probably want to call
     SetFinalCosts() directly after calling this.
  */
  bool AcceptRawLatticeChunk(Lattice *raw_fst);

  /*
    Sets final-probs in `clat_`.  Must only be called if the final chunk
    has not been processed.  (The final chunk is whenever GetLattice() is
    called with finalize == true).

    The reason this is a separate function from AcceptRawLatticeChunk() is that
    there may be situations where a user wants to get the latice with
    final-probs in it, after previously getting it without final-probs; or
    vice versa.  By final-probs, we mean the Final() probabilities in the
    HCLG (decoding graph; this->fst_).

       @param [in] token_label2final_cost   A map from the token-label
              corresponding to Tokens active on the final frame of the
              lattice in the object, to the final-cost we want to use for
              those tokens.  If NULL, it means all Tokens should be treated
              as final with probability One().  If non-NULL, and a particular
              token-label is not a key of this map, it means that Token
              corresponded to a state that was not final in HCLG; and
              such tokens will be treated as non-final.  However,
              if this would result in no states in the lattice being final,
              we will treat all Tokens as final with probability One(),
              a warning will be printed (this should not happen.)
  */
  void SetFinalCosts(const unordered_map<Label, BaseFloat> *token_label2final_cost = NULL);

  const CompactLattice &GetLattice() { return clat_; }

  // kStateLabelOffset is what we add to state-ids in clat_ to produce labels
  // to identify them in the raw lattice chunk
  // kTokenLabelOffset is where we start allocating labels corresponding to Tokens
  // (these correspond with raw lattice states);
  enum  { kStateLabelOffset = (int)1e8,  kTokenLabelOffset = (int)2e8, kMaxTokenLabel = (int)3e8 };

 private:

  // [called from AcceptRawLatticeChunk()]
  // Gets the final costs from token-final states in the raw lattice (see
  // glossary for definition).  These final costs will be subtracted after
  // determinization; in the normal case they are `temporaries` used to guide
  // pruning.  NOTE: the index of the array is not the FST state that is final,
  // but the label on arcs entering it (these will be `token-labels`).  Each
  // token-final state will have the same label on all arcs entering it.
  //
  // `old_final_costs` is assumed to be empty at entry.
  void GetRawLatticeFinalCosts(const Lattice &raw_fst,
                               std::unordered_map<Label, BaseFloat> *old_final_costs);

  // Sets up non_final_redet_states_.  See documentation for that variable.
  void GetNonFinalRedetStates();

  /** [called from AcceptRawLatticeChunk()] Processes arcs that leave the
      start-state of `chunk_clat` (if this is not the first chunk); does nothing
      if this is the first chunk.  This includes using the `state-labels` to
      work out which states in clat_ these states correspond to, and writing
      that mapping to `state_map`.

      Also modifies forward_costs_, because it has to do a kind of reweighting
      of the clat states that are the values it puts in `state_map`, to take
      account of the probabilities on the arcs from the start state of
      chunk_clat to the states corresponding to those redeterminized-states
      (i.e. the states in clat corresponding to the values it puts in
      `*state_map`).  It also modifies arcs_in_, mostly because there
      are rare cases when we end up `merging` sets of those redeterminized-states,
      because the determinization process mapped them to a single state,
      and that means we need to reroute the arcs into members of that
      set into one single member (which will appear as a value in
      `*state_map`).

        @param [in] chunk_clat   The determinized chunk of lattice we are
                          processing
        @param [out] state_map    Mapping from states in chunk_clat to
                          the state in clat_ they correspond to.
        @return     Returns true if this is the first chunk.
  */
  bool ProcessArcsFromChunkStartState(
      const CompactLattice &chunk_clat,
      std::unordered_map<CompactLattice::StateId, CompactLattice::StateId> *state_map);

  /**
     This function, called from AcceptRawLatticeChunk(), transfers arcs from
     `chunk_clat` to clat_.  For those arcs that have `token-labels` on them,
     they don't get written to clat_ but instead are stored in the arcs_ array.

        @param [in] chunk_clat    The determinized lattice for the chunk
                         we are processing; this is the source of the arcs
                         we are moving.
        @param [in] is_first_chunk  True if this is the first chunk in the
                         utterance; it's needed because if it is, we
                         will also transfer arcs from the start state of
                         chunk_clat.
        @param [in] state_map  Map from state-ids in chunk_clat to state-ids
                         in clat_.
        @param [in] chunk_state_to_token  Map from `token-final states`
                         (see glossary) in chunk_clat, to the token-label
                         on arcs entering those states.
        @param [in] old_final_costs  Map from token-label to the
                         final-costs that were on the corresponding
                         token-final states in the undeterminized lattice;
                         these final-costs need to be removed when
                         we record the weights in final_arcs_, because
                         they were just temporary.
   */
  void TransferArcsToClat(
      const CompactLattice &chunk_clat,
      bool is_first_chunk,
      const std::unordered_map<CompactLattice::StateId, CompactLattice::StateId> &state_map,
      const std::unordered_map<CompactLattice::StateId, Label> &chunk_state_to_token,
      const std::unordered_map<Label, BaseFloat> &old_final_costs);


  /**
     Adds one arc to `clat_`.  It's like clat_.AddArc(state, arc), except
     it also modifies arcs_in_ and forward_costs_.
   */
  void AddArcToClat(CompactLattice::StateId state,
                    const CompactLatticeArc &arc);
  CompactLattice::StateId AddStateToClat();


  // Identifies token-final states in `chunk_clat`; see glossary above for
  // definition of `token-final`.  This function outputs a map from such states
  // in chunk_clat, to the `token-label` on arcs entering them.  (It is not
  // possible that the same state would have multiple arcs entering it with
  // different token-labels, or some arcs entering with one token-label and some
  // another, or be both initial and have such arcs; this is true due to how we
  // construct the raw lattice.)
  void IdentifyTokenFinalStates(
      const CompactLattice &chunk_clat,
      std::unordered_map<CompactLattice::StateId, CompactLatticeArc::Label> *token_map) const;

  // trans_model_ is needed by DeterminizeLatticePhonePrunedWrapper() which this
  // class calls.
  const TransitionInformation &trans_model_;
  // config_ is needed by DeterminizeLatticePhonePrunedWrapper() which this
  // class calls.
  const LatticeIncrementalDecoderConfig &config_;


  // Contains the set of redeterminized-states which are not final in the
  // canonical appended lattice.  Since the final ones don't physically appear
  // in clat_, this means the set of redeterminized-states which are physically
  // in clat_.  In code terms, this means set of .first elements in final_arcs,
  // plus whatever other states in clat_ are reachable from such states.
  std::unordered_set<CompactLattice::StateId> non_final_redet_states_;


  // clat_ is the appended lattice (containing all chunks processed so
  // far), except its `final-arcs` (i.e. arcs which in the canonical
  // lattice would go to final-states) are not present (they are stored
  // separately in final_arcs_) and states which in the canonical lattice
  // should have final-arcs leaving them will instead have a final-prob.
  CompactLattice clat_;


  // arcs_in_ is indexed by (state-id in clat_), and is a list of
  // arcs that come into this state, in the form (prev-state,
  // arc-index).  CAUTION: not all these input-arc records will always
  // be valid (some may be out-of-date, and may refer to an out-of-range
  // arc or an arc that does not point to this state).  But all
  // input arcs will always be listed.
  std::vector<std::vector<std::pair<CompactLattice::StateId, int32> > > arcs_in_;

  // final_arcs_ contains arcs which would appear in the canonical appended
  // lattice but for implementation reasons are not physically present in clat_.
  // These are arcs to final states in the canonical appended lattice.  The
  // .first elements are the source states in clat_ (these will all be elements
  // of non_final_redet_states_); the .nextstate elements of the arcs does not
  // contain a physical state, but contain state-labels allocated by
  // AllocateNewStateLabel().
  std::vector<CompactLatticeArc> final_arcs_;

  // forward_costs_, indexed by the state-id in clat_, stores the alpha
  // (forward) costs, i.e. the minimum cost from the start state to each state
  // in clat_.  This is relevant for pruned determinization.  The BaseFloat can
  // be thought of as the sum of a Value1() + Value2() in a LatticeWeight.
  std::vector<BaseFloat> forward_costs_;

  // temporary used in a function, kept here to avoid excessive reallocation.
  std::unordered_set<int32> temp_;

  KALDI_DISALLOW_COPY_AND_ASSIGN(LatticeIncrementalDeterminizer);
};


/** This is an extention to the "normal" lattice-generating decoder.
   See \ref lattices_generation \ref decoders_faster and \ref decoders_simple
    for more information.

   The main difference is the incremental determinization which will be
   discussed in the function GetLattice().  This means that the work of determinizatin
   isn't done all at once at the end of the file, but incrementally while decoding.
   See the comment at the top of this file for more explanation.

   The decoder is templated on the FST type and the token type.  The token type
   will normally be StdToken, but also may be BackpointerToken which is to support
   quick lookup of the current best path (see lattice-faster-online-decoder.h)

   The FST you invoke this decoder with is expected to be of type
   Fst::Fst<fst::StdArc>, a.k.a. StdFst, or GrammarFst.  If you invoke it with
   FST == StdFst and it notices that the actual FST type is
   fst::VectorFst<fst::StdArc> or fst::ConstFst<fst::StdArc>, the decoder object
   will internally cast itself to one that is templated on those more specific
   types; this is an optimization for speed.
 */
template <typename FST, typename Token = decoder::StdToken>
class LatticeIncrementalDecoderTpl {
 public:
  using Arc = typename FST::Arc;
  using Label = typename Arc::Label;
  using StateId = typename Arc::StateId;
  using Weight = typename Arc::Weight;
  using ForwardLinkT = decoder::ForwardLink<Token>;

  // Instantiate this class once for each thing you have to decode.
  // This version of the constructor does not take ownership of
  // 'fst'.
  LatticeIncrementalDecoderTpl(const FST &fst, const TransitionInformation &trans_model,
                               const LatticeIncrementalDecoderConfig &config);

  // This version of the constructor takes ownership of the fst, and will delete
  // it when this object is destroyed.
  LatticeIncrementalDecoderTpl(const LatticeIncrementalDecoderConfig &config,
                               FST *fst, const TransitionInformation &trans_model);

  void SetOptions(const LatticeIncrementalDecoderConfig &config) { config_ = config; }

  const LatticeIncrementalDecoderConfig &GetOptions() const { return config_; }

  ~LatticeIncrementalDecoderTpl();

  /**
     CAUTION: it's unlikely that you will ever want to call this function.  In a
     scenario where you have the entire file and just want to decode it, there
     is no point using this decoder.

     An example of how to do decoding together with incremental
     determinization. It decodes until there are no more frames left in the
     "decodable" object.

     In this example, config_.determinize_max_delay, config_.determinize_min_chunk_size
     and config_.determinize_max_active are used to determine the time to
     call GetLattice().

     Users will probably want to use appropriate combinations of
     AdvanceDecoding() and GetLattice() to build their application; this just
     gives you some idea how.

     The function returns true if any kind of traceback is available (not
     necessarily from a final state).
  */
  bool Decode(DecodableInterface *decodable);

  /// says whether a final-state was active on the last frame.  If it was not,
  /// the lattice (or traceback) will end with states that are not final-states.
  bool ReachedFinal() const {
    return FinalRelativeCost() != std::numeric_limits<BaseFloat>::infinity();
  }

  /**
     This decoder has no GetBestPath() function.
     If you need that functionality you should probably use lattice-incremental-online-decoder.h,
     which makes it very efficient to obtain the best path. */

  /**
     This GetLattice() function returns the lattice containing
     `num_frames_to_decode` frames; this will be all frames decoded so
     far, if you let num_frames_to_decode == NumFramesDecoded(),
     but it will generally be better to make it a few frames less than
     that to avoid the lattice having too many active states at
     the end.

     @param [in] num_frames_to_include  The number of frames that you want
                     to be included in the lattice.  Must be >=
                     NumFramesInLattice() and <= NumFramesDecoded().

     @param [in] use_final_probs  True if you want the final-probs
                    of HCLG to be included in the output lattice.  Must not
                    be set to true if num_frames_to_include !=
                    NumFramesDecoded().  Must be set to true if you have
                    previously called FinalizeDecoding().

                    (If no state was final on frame `num_frames_to_include`, the
                    final-probs won't be included regardless of
                    `use_final_probs`; you can test whether this
                    was the case by calling ReachedFinal().

      @return clat   The CompactLattice representing what has been decoded
                     up until `num_frames_to_include` (e.g., LatticeStateTimes()
                     on this lattice would return `num_frames_to_include`).

     See also UpdateLatticeDeterminizaton().  Caution: this const ref
     is only valid until the next time you call AdvanceDecoding() or
     GetLattice().

     CAUTION: the lattice may contain disconnnected states; you should
     call Connect() on the output before writing it out.
  */
  const CompactLattice &GetLattice(int32 num_frames_to_include,
                                   bool use_final_probs = false);

  /*
    Returns the number of frames in the currently-determinized part of the
    lattice which will be a number in [0, NumFramesDecoded()].  It will
    be the largest number that GetLattice() was called with, but note
    that GetLattice() may be called from UpdateLatticeDeterminization().

    Made available in case the user wants to give that same number to
    GetLattice().
   */
  int NumFramesInLattice() const { return num_frames_in_lattice_; }

  /**
     InitDecoding initializes the decoding, and should only be used if you
     intend to call AdvanceDecoding().  If you call Decode(), you don't need to
     call this.  You can also call InitDecoding if you have already decoded an
     utterance and want to start with a new utterance.
  */
  void InitDecoding();

  /**
     This will decode until there are no more frames ready in the decodable
     object.  You can keep calling it each time more frames become available
     (this is the normal pattern in a real-time/online decoding scenario).
     If max_num_frames is specified, it specifies the maximum number of frames
     the function will decode before returning.
  */
  void AdvanceDecoding(DecodableInterface *decodable, int32 max_num_frames = -1);


  /** FinalRelativeCost() serves the same purpose as ReachedFinal(), but gives
      more information.  It returns the difference between the best (final-cost
      plus cost) of any token on the final frame, and the best cost of any token
      on the final frame.  If it is infinity it means no final-states were
      present on the final frame.  It will usually be nonnegative.  If it not
      too positive (e.g. < 5 is my first guess, but this is not tested) you can
      take it as a good indication that we reached the final-state with
      reasonable likelihood. */
  BaseFloat FinalRelativeCost() const;

  /** Returns the number of frames decoded so far. */
  inline int32 NumFramesDecoded() const { return active_toks_.size() - 1; }

  /**
     Finalizes the decoding, doing an extra pruning step on the last frame
     that uses the final-probs.  May be called only once.
  */
  void FinalizeDecoding();

 protected:
  /* Some protected things are needed in LatticeIncrementalOnlineDecoderTpl. */

  /** NOTE: for parts the internal implementation that are shared with LatticeFasterDecoer,
      we have removed the comments.*/
  inline static void DeleteForwardLinks(Token *tok);
  struct TokenList {
    Token *toks;
    bool must_prune_forward_links;
    bool must_prune_tokens;
    int32 num_toks;  /* Note: you can only trust `num_toks` if must_prune_tokens
                      * == false, because it is only set in
                      * PruneTokensForFrame(). */
    TokenList()
        : toks(NULL), must_prune_forward_links(true), must_prune_tokens(true),
          num_toks(-1) {}
  };
  using Elem = typename HashList<StateId, Token *>::Elem;
  void PossiblyResizeHash(size_t num_toks);
  inline Token *FindOrAddToken(StateId state, int32 frame_plus_one,
                               BaseFloat tot_cost, Token *backpointer, bool *changed);
  void PruneForwardLinks(int32 frame_plus_one, bool *extra_costs_changed,
                         bool *links_pruned, BaseFloat delta);
  void ComputeFinalCosts(unordered_map<Token *, BaseFloat> *final_costs,
                         BaseFloat *final_relative_cost,
                         BaseFloat *final_best_cost) const;
  void PruneForwardLinksFinal();
  void PruneTokensForFrame(int32 frame_plus_one);
  void PruneActiveTokens(BaseFloat delta);
  BaseFloat GetCutoff(Elem *list_head, size_t *tok_count, BaseFloat *adaptive_beam,
                      Elem **best_elem);
  BaseFloat ProcessEmitting(DecodableInterface *decodable);
  void ProcessNonemitting(BaseFloat cost_cutoff);

  HashList<StateId, Token *> toks_;
  std::vector<TokenList> active_toks_;  // indexed by frame.
  std::vector<StateId> queue_;       // temp variable used in ProcessNonemitting,
  std::vector<BaseFloat> tmp_array_; // used in GetCutoff.
  const FST *fst_;
  bool delete_fst_;
  std::vector<BaseFloat> cost_offsets_;
  int32 num_toks_;
  bool warned_;
  bool decoding_finalized_;

  unordered_map<Token *, BaseFloat> final_costs_;
  BaseFloat final_relative_cost_;
  BaseFloat final_best_cost_;

  /***********************
      Variables below this point relate to the incremental
      determinization.
  *********************/
  LatticeIncrementalDecoderConfig config_;
  /** Much of the the incremental determinization algorithm is encapsulated in
      the determinize_ object.  */
  LatticeIncrementalDeterminizer determinizer_;


  /* Just a temporary used in a function; stored here to avoid reallocation. */
  unordered_map<Token*, StateId> temp_token_map_;

  /** num_frames_in_lattice_ is the highest `num_frames_to_include_` argument
      for any prior call to GetLattice(). */
  int32 num_frames_in_lattice_;

  // A map from Token to its token_label.  Will contain an entry for
  // each Token in active_toks_[num_frames_in_lattice_].
  unordered_map<Token*, Label> token2label_map_;

  // A temporary used in a function, kept here to avoid reallocation.
  unordered_map<Token*, Label> token2label_map_temp_;

  // we allocate a unique id for each Token
  Label next_token_label_;

  inline Label AllocateNewTokenLabel() { return next_token_label_++; }


  // There are various cleanup tasks... the the toks_ structure contains
  // singly linked lists of Token pointers, where Elem is the list type.
  // It also indexes them in a hash, indexed by state (this hash is only
  // maintained for the most recent frame).  toks_.Clear()
  // deletes them from the hash and returns the list of Elems.  The
  // function DeleteElems calls toks_.Delete(elem) for each elem in
  // the list, which returns ownership of the Elem to the toks_ structure
  // for reuse, but does not delete the Token pointer.  The Token pointers
  // are reference-counted and are ultimately deleted in PruneTokensForFrame,
  // but are also linked together on each frame by their own linked-list,
  // using the "next" pointer.  We delete them manually.
  void DeleteElems(Elem *list);

  void ClearActiveTokens();


  // Returns the number of active tokens on frame `frame`.  Can be used as part
  // of a heuristic to decide which frame to determinize until, if you are not
  // at the end of an utterance.
  int32 GetNumToksForFrame(int32 frame);

  /**
     UpdateLatticeDeterminization() ensures the work of determinization is kept
     up to date so that when you do need the lattice you can get it fast.  It
     uses the configuration values `determinize_max_delay`, `determinize_min_chunk_size`
     and `determinize_max_active`, to decide whether and when to call
     GetLattice().  You can safely call this as often as you want (e.g.  after
     each time you call AdvanceDecoding(); it won't do subtantially more work if
     it is called frequently.
  */
  void UpdateLatticeDeterminization();


  KALDI_DISALLOW_COPY_AND_ASSIGN(LatticeIncrementalDecoderTpl);
};

typedef LatticeIncrementalDecoderTpl<fst::StdFst, decoder::StdToken>
    LatticeIncrementalDecoder;


} // end namespace kaldi.

#endif