FunASR/runtime/onnxruntime/third_party/kaldi/util/edit-distance-test.cc

268 lines
7.0 KiB
C++
Raw Permalink Normal View History

2024-05-18 15:50:56 +08:00
// util/edit-distance-test.cc
// Copyright 2009-2011 Microsoft Corporation; Haihua Xu
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#include "base/kaldi-common.h"
#include "util/edit-distance.h"
namespace kaldi {
void TestEditDistance() {
std::vector<int32> a;
std::vector<int32> b;
KALDI_ASSERT(LevenshteinEditDistance(a, b) == 0);
a.push_back(1);
KALDI_ASSERT(LevenshteinEditDistance(a, b) == 1);
b.push_back(1);
KALDI_ASSERT(LevenshteinEditDistance(a, b) == 0);
b.push_back(2);
KALDI_ASSERT(LevenshteinEditDistance(a, b) == 1);
a.push_back(2);
KALDI_ASSERT(LevenshteinEditDistance(a, b) == 0);
a.push_back(3);
a.push_back(4);
b.push_back(4);
KALDI_ASSERT(LevenshteinEditDistance(a, b) == 1);
a.push_back(5);
KALDI_ASSERT(LevenshteinEditDistance(a, b) == 2);
b.push_back(6);
KALDI_ASSERT(LevenshteinEditDistance(a, b) == 2);
a.push_back(1);
b.push_back(1);
KALDI_ASSERT(LevenshteinEditDistance(a, b) == 2);
b.push_back(10);
KALDI_ASSERT(LevenshteinEditDistance(a, b) == 3);
}
void TestEditDistanceString() {
std::vector<std::string> a;
std::vector<std::string> b;
KALDI_ASSERT(LevenshteinEditDistance(a, b) == 0);
a.push_back("1");
KALDI_ASSERT(LevenshteinEditDistance(a, b) == 1);
b.push_back("1");
KALDI_ASSERT(LevenshteinEditDistance(a, b) == 0);
b.push_back("2");
KALDI_ASSERT(LevenshteinEditDistance(a, b) == 1);
a.push_back("2");
KALDI_ASSERT(LevenshteinEditDistance(a, b) == 0);
a.push_back("3");
a.push_back("4");
b.push_back("4");
KALDI_ASSERT(LevenshteinEditDistance(a, b) == 1);
a.push_back("5");
KALDI_ASSERT(LevenshteinEditDistance(a, b) == 2);
b.push_back("6");
KALDI_ASSERT(LevenshteinEditDistance(a, b) == 2);
a.push_back("1");
b.push_back("1");
KALDI_ASSERT(LevenshteinEditDistance(a, b) == 2);
b.push_back("10");
KALDI_ASSERT(LevenshteinEditDistance(a, b) == 3);
}
// edit distance calculate
void TestEditDistance2() {
std::vector<int32> hyp;
std::vector<int32> ref;
int32 ins, del, sub, total_cost;
// initialize hypothesis
hyp.push_back(1);
hyp.push_back(3);
hyp.push_back(4);
hyp.push_back(5);
// initialize reference
ref.push_back(2);
ref.push_back(3);
ref.push_back(4);
ref.push_back(5);
ref.push_back(6);
ref.push_back(7);
total_cost = LevenshteinEditDistance(ref, hyp, &ins, &del, &sub);
KALDI_ASSERT(total_cost == 3 && ins == 0 && del == 2 && sub == 1);
std::swap(hyp, ref);
total_cost = LevenshteinEditDistance(ref, hyp, &ins, &del, &sub);
KALDI_ASSERT(total_cost == 3 && ins == 2 && del == 0 && sub == 1);
hyp.clear();
ref.clear();
hyp.push_back(1);
ref.push_back(1);
total_cost = LevenshteinEditDistance(ref, hyp, &ins, &del, &sub);
KALDI_ASSERT(total_cost == 0 && ins+del+sub == 0);
hyp.push_back(2);
ref.push_back(3);
total_cost = LevenshteinEditDistance(ref, hyp, &ins, &del, &sub);
KALDI_ASSERT(total_cost == 1 && ins == 0 && del == 0 && sub == 1);
// randomized test
size_t num = 0;
for (; num < 1000; num ++) {
int32 hyp_len = Rand()%11;
int32 ref_len = Rand()%3;
hyp.resize(hyp_len);
ref.resize(ref_len);
int32 index = 0;
for (; index < hyp_len; index ++)
hyp[index] = Rand()%4;
for (index = 0; index < ref_len; index ++)
ref[index] = Rand()%4;
// current version
total_cost = LevenshteinEditDistance(ref, hyp, &ins, &del, &sub);
// previous version
int32 total_cost2 = LevenshteinEditDistance(hyp, ref);
// verify both are the same
KALDI_ASSERT(total_cost == total_cost2);
KALDI_ASSERT(ins+del+sub == total_cost);
KALDI_ASSERT(del-ins == static_cast<int32>(ref.size() -hyp.size()));
}
return;
}
// edit distance calculate
void TestEditDistance2String() {
std::vector<std::string> hyp;
std::vector<std::string> ref;
int32 ins, del, sub, total_cost;
// initialize hypothesis
hyp.push_back("1");
hyp.push_back("3");
hyp.push_back("4");
hyp.push_back("5");
// initialize reference
ref.push_back("2");
ref.push_back("3");
ref.push_back("4");
ref.push_back("5");
ref.push_back("6");
ref.push_back("7");
total_cost = LevenshteinEditDistance(ref, hyp, &ins, &del, &sub);
KALDI_ASSERT(total_cost == 3 && ins == 0 && del == 2 && sub == 1);
std::swap(hyp, ref);
total_cost = LevenshteinEditDistance(ref, hyp, &ins, &del, &sub);
KALDI_ASSERT(total_cost == 3 && ins == 2 && del == 0 && sub == 1);
hyp.clear();
ref.clear();
hyp.push_back("1");
ref.push_back("1");
total_cost = LevenshteinEditDistance(ref, hyp, &ins, &del, &sub);
KALDI_ASSERT(total_cost == 0 && ins+del+sub == 0);
hyp.push_back("2");
ref.push_back("3");
total_cost = LevenshteinEditDistance(ref, hyp, &ins, &del, &sub);
KALDI_ASSERT(total_cost == 1 && ins == 0 && del == 0 && sub == 1);
// randomized test
size_t num = 0;
for (; num < 1000; num ++) {
int32 hyp_len = Rand()%11;
int32 ref_len = Rand()%3;
hyp.resize(hyp_len);
ref.resize(ref_len);
int32 index = 0;
for (; index < hyp_len; index ++)
hyp[index] = Rand()%4;
for (index = 0; index < ref_len; index ++)
ref[index] = Rand()%4;
// current version
total_cost = LevenshteinEditDistance(ref, hyp, &ins, &del, &sub);
// previous version
int32 total_cost2 = LevenshteinEditDistance(hyp, ref);
// verify both are the same
KALDI_ASSERT(total_cost == total_cost2);
KALDI_ASSERT(ins+del+sub == total_cost);
KALDI_ASSERT(del-ins == static_cast<int32>(ref.size() -hyp.size()));
}
return;
}
void TestLevenshteinAlignment() {
for (size_t i = 0; i < 100; i++) {
size_t a_sz = Rand() % 5, b_sz = Rand() % 5;
std::vector<int32> a, b;
for (size_t j = 0; j < a_sz; j++) a.push_back(Rand() % 10);
for (size_t j = 0; j < b_sz; j++) b.push_back(Rand() % 10);
int32 eps_sym = -1;
std::vector<std::pair<int32, int32> > ans;
int32 e1 = LevenshteinEditDistance(a, b),
e2 = LevenshteinAlignment(a, b, eps_sym, &ans);
KALDI_ASSERT(e1 == e2);
std::vector<int32> a2, b2;
for (size_t i = 0; i < ans.size(); i++) {
if (ans[i].first != eps_sym) a2.push_back(ans[i].first);
if (ans[i].second != eps_sym) b2.push_back(ans[i].second);
}
KALDI_ASSERT(a == a2);
KALDI_ASSERT(b == b2);
}
}
} // end namespace kaldi
int main() {
using namespace kaldi;
TestEditDistance();
TestEditDistanceString();
TestEditDistance2();
TestEditDistance2String();
TestLevenshteinAlignment();
std::cout << "Test OK\n";
}