FunASR/runtime/onnxruntime/third_party/kaldi/util/kaldi-io-test.cc

371 lines
13 KiB
C++
Raw Normal View History

2024-05-18 15:50:56 +08:00
// util/kaldi-io-test.cc
// Copyright 2009-2011 Microsoft Corporation
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#ifndef _MSC_VER
#include <unistd.h>
#endif
#include "base/io-funcs.h"
#include "util/kaldi-io.h"
#include "base/kaldi-math.h"
#include "base/kaldi-utils.h"
namespace kaldi {
void UnitTestClassifyRxfilename() {
KALDI_ASSERT(ClassifyRxfilename("") == kStandardInput);
KALDI_ASSERT(ClassifyRxfilename(" ") == kNoInput);
KALDI_ASSERT(ClassifyRxfilename(" a ") == kNoInput);
KALDI_ASSERT(ClassifyRxfilename("a ") == kNoInput);
KALDI_ASSERT(ClassifyRxfilename("a") == kFileInput);
KALDI_ASSERT(ClassifyRxfilename("-") == kStandardInput);
KALDI_ASSERT(ClassifyRxfilename("b|") == kPipeInput);
KALDI_ASSERT(ClassifyRxfilename("|b") == kNoInput);
KALDI_ASSERT(ClassifyRxfilename("b c|") == kPipeInput);
KALDI_ASSERT(ClassifyRxfilename(" b c|") == kPipeInput);
KALDI_ASSERT(ClassifyRxfilename("a b c:123") == kOffsetFileInput);
KALDI_ASSERT(ClassifyRxfilename("a b c:3") == kOffsetFileInput);
KALDI_ASSERT(ClassifyRxfilename("a b c:") == kFileInput);
KALDI_ASSERT(ClassifyRxfilename("a b c/3") == kFileInput);
KALDI_ASSERT(ClassifyRxfilename("ark,s,cs:a b c") == kNoInput);
KALDI_ASSERT(ClassifyRxfilename("scp:a b c") == kNoInput);
}
void UnitTestClassifyWxfilename() {
KALDI_ASSERT(ClassifyWxfilename("") == kStandardOutput);
KALDI_ASSERT(ClassifyWxfilename(" ") == kNoOutput);
KALDI_ASSERT(ClassifyWxfilename(" a ") == kNoOutput);
KALDI_ASSERT(ClassifyWxfilename("a ") == kNoOutput);
KALDI_ASSERT(ClassifyWxfilename("a") == kFileOutput);
KALDI_ASSERT(ClassifyWxfilename("-") == kStandardOutput);
KALDI_ASSERT(ClassifyWxfilename("b|") == kNoOutput);
KALDI_ASSERT(ClassifyWxfilename("|b") == kPipeOutput);
KALDI_ASSERT(ClassifyWxfilename("| b ") == kPipeOutput);
KALDI_ASSERT(ClassifyWxfilename("b c|") == kNoOutput);
KALDI_ASSERT(ClassifyWxfilename("a b c:123") == kNoOutput);
KALDI_ASSERT(ClassifyWxfilename("ark,s,cs:a b c") == kNoOutput);
KALDI_ASSERT(ClassifyWxfilename("scp:a b c") == kNoOutput);
KALDI_ASSERT(ClassifyWxfilename("a b c:3") == kNoOutput);
KALDI_ASSERT(ClassifyWxfilename("a b c:") == kFileOutput);
KALDI_ASSERT(ClassifyWxfilename("a b c/3") == kFileOutput);
}
void UnitTestIoNew(bool binary) {
{
const char *filename = "tmpf";
Output ko(filename, binary);
std::ostream &outfile = ko.Stream();
if (!binary) outfile << "\t";
int64 i1 = Rand() % 10000;
WriteBasicType(outfile, binary, i1);
uint16 i2 = Rand() % 10000;
WriteBasicType(outfile, binary, i2);
if (!binary) outfile << "\t";
char c = Rand();
WriteBasicType(outfile, binary, c);
if (!binary && Rand()%2 == 0) outfile << " \n";
std::vector<int32> vec1;
WriteIntegerVector(outfile, binary, vec1);
if (!binary && Rand()%2 == 0) outfile << " \n";
std::vector<uint16> vec2;
for (size_t i = 0; i < 10; i++) vec2.push_back(Rand()%100 - 10);
WriteIntegerVector(outfile, binary, vec2);
if (!binary) outfile << " \n";
std::vector<char> vec3;
for (size_t i = 0; i < 10; i++) vec3.push_back(Rand()%100);
WriteIntegerVector(outfile, binary, vec3);
if (!binary && Rand()%2 == 0) outfile << " \n";
const char *token1 = "Hi";
WriteToken(outfile, binary, token1);
if (!binary) outfile << " \n";
std::string token2 = "There.";
WriteToken(outfile, binary, token2);
if (!binary && Rand()%2 == 0) outfile << " \n";
std::string token3 = "You.";
WriteToken(outfile, binary, token3);
if (!binary && Rand()%2 == 0) outfile << " ";
float f1 = RandUniform();
WriteBasicType(outfile, binary, f1);
if (!binary && Rand()%2 == 0) outfile << "\t";
float f2 = RandUniform();
WriteBasicType(outfile, binary, f2);
double d1 = RandUniform();
WriteBasicType(outfile, binary, d1);
if (!binary && Rand()%2 == 0) outfile << "\t";
double d2 = RandUniform();
WriteBasicType(outfile, binary, d2);
if (!binary && Rand()%2 == 0) outfile << "\t";
ko.Close();
{
bool binary_in;
Input ki(filename, &binary_in);
std::istream &infile = ki.Stream();
int64 i1_in;
ReadBasicType(infile, binary_in, &i1_in);
KALDI_ASSERT(i1_in == i1);
uint16 i2_in;
ReadBasicType(infile, binary_in, &i2_in);
KALDI_ASSERT(i2_in == i2);
char c_in;
ReadBasicType(infile, binary_in, &c_in);
KALDI_ASSERT(c_in == c);
std::vector<int32> vec1_in;
ReadIntegerVector(infile, binary_in, &vec1_in);
KALDI_ASSERT(vec1_in == vec1);
std::vector<uint16> vec2_in;
ReadIntegerVector(infile, binary_in, &vec2_in);
KALDI_ASSERT(vec2_in == vec2);
std::vector<char> vec3_in;
ReadIntegerVector(infile, binary_in, &vec3_in);
KALDI_ASSERT(vec3_in == vec3);
std::string token1_in, token2_in;
KALDI_ASSERT(Peek(infile, binary_in) == static_cast<int>(*token1));
ReadToken(infile, binary_in, &token1_in);
KALDI_ASSERT(token1_in == (std::string)token1);
ReadToken(infile, binary_in, &token2_in);
KALDI_ASSERT(token2_in == token2);
if (Rand() % 2 == 0)
ExpectToken(infile, binary_in, token3.c_str());
else
ExpectToken(infile, binary_in, token3);
float f1_in; // same type.
ReadBasicType(infile, binary_in, &f1_in);
AssertEqual(f1_in, f1);
double f2_in; // wrong type.
ReadBasicType(infile, binary_in, &f2_in);
AssertEqual(f2_in, f2);
double d1_in; // same type.
ReadBasicType(infile, binary_in, &d1_in);
AssertEqual(d1_in, d1);
float d2_in; // wrong type.
ReadBasicType(infile, binary_in, &d2_in);
AssertEqual(d2_in, d2);
KALDI_ASSERT(Peek(infile, binary_in) == -1);
}
unlink(filename);
}
}
void UnitTestIoPipe(bool binary) {
// This is as UnitTestIoNew except with different filenames.
{
#if defined(_MSC_VER) && !defined(KALDI_CYGWIN_COMPAT)
// self-invocation on Windows that emulates cat(1)
const char *filename_out = "|kaldi-io-test cat > tmpf.gz",
*filename_in = "kaldi-io-test cat tmpf.gz|";
#else
const char *filename_out = "|gzip -c > tmpf.gz",
*filename_in = "gunzip -c tmpf.gz |";
#endif
Output ko(filename_out, binary);
std::ostream &outfile = ko.Stream();
if (!binary) outfile << "\t";
int64 i1 = Rand() % 10000;
WriteBasicType(outfile, binary, i1);
uint16 i2 = Rand() % 10000;
WriteBasicType(outfile, binary, i2);
if (!binary) outfile << "\t";
char c = Rand();
WriteBasicType(outfile, binary, c);
if (!binary && Rand()%2 == 0) outfile << " \n";
std::vector<int32> vec1;
WriteIntegerVector(outfile, binary, vec1);
if (!binary && Rand()%2 == 0) outfile << " \n";
std::vector<uint16> vec2;
for (size_t i = 0; i < 10; i++) vec2.push_back(Rand()%100 - 10);
WriteIntegerVector(outfile, binary, vec2);
if (!binary) outfile << " \n";
WriteToken(outfile, binary, "<foo>");
std::vector<char> vec3;
for (size_t i = 0; i < 10; i++) vec3.push_back(Rand()%100);
WriteIntegerVector(outfile, binary, vec3);
if (!binary && Rand()%2 == 0) outfile << " \n";
const char *token1 = "Hi";
WriteToken(outfile, binary, token1);
if (!binary) outfile << " \n";
std::string token2 = "There.";
WriteToken(outfile, binary, token2);
if (!binary && Rand()%2 == 0) outfile << " \n";
std::string token3 = "You.";
WriteToken(outfile, binary, token3);
if (!binary && Rand()%2 == 0) outfile << " ";
float f1 = RandUniform();
WriteBasicType(outfile, binary, f1);
if (!binary && Rand()%2 == 0) outfile << "\t";
float f2 = RandUniform();
WriteBasicType(outfile, binary, f2);
double d1 = RandUniform();
WriteBasicType(outfile, binary, d1);
if (!binary && Rand()%2 == 0) outfile << "\t";
double d2 = RandUniform();
WriteBasicType(outfile, binary, d2);
if (!binary && Rand()%2 == 0) outfile << "\t";
bool ans = ko.Close();
KALDI_ASSERT(ans);
#ifndef _MSC_VER
Sleep(1); // This test does not work without this sleep:
// seems to be some kind of file-system latency.
#endif
{
bool binary_in;
Input ki(filename_in, &binary_in);
std::istream &infile = ki.Stream();
int64 i1_in;
ReadBasicType(infile, binary_in, &i1_in);
KALDI_ASSERT(i1_in == i1);
uint16 i2_in;
ReadBasicType(infile, binary_in, &i2_in);
KALDI_ASSERT(i2_in == i2);
char c_in;
ReadBasicType(infile, binary_in, &c_in);
KALDI_ASSERT(c_in == c);
std::vector<int32> vec1_in;
ReadIntegerVector(infile, binary_in, &vec1_in);
KALDI_ASSERT(vec1_in == vec1);
std::vector<uint16> vec2_in;
ReadIntegerVector(infile, binary_in, &vec2_in);
KALDI_ASSERT(vec2_in == vec2);
std::vector<char> vec3_in;
KALDI_ASSERT(PeekToken(infile, binary_in) == static_cast<int>('f'));
ExpectToken(infile, binary_in, "<foo>");
ReadIntegerVector(infile, binary_in, &vec3_in);
KALDI_ASSERT(vec3_in == vec3);
std::string token1_in, token2_in;
KALDI_ASSERT(Peek(infile, binary_in) == static_cast<int>(*token1));
ReadToken(infile, binary_in, &token1_in);
KALDI_ASSERT(token1_in == (std::string)token1);
ReadToken(infile, binary_in, &token2_in);
KALDI_ASSERT(token2_in == token2);
if (Rand() % 2 == 0)
ExpectToken(infile, binary_in, token3.c_str());
else
ExpectToken(infile, binary_in, token3);
float f1_in; // same type.
ReadBasicType(infile, binary_in, &f1_in);
AssertEqual(f1_in, f1);
double f2_in; // wrong type.
ReadBasicType(infile, binary_in, &f2_in);
AssertEqual(f2_in, f2);
double d1_in; // same type.
ReadBasicType(infile, binary_in, &d1_in);
AssertEqual(d1_in, d1);
float d2_in; // wrong type.
ReadBasicType(infile, binary_in, &d2_in);
AssertEqual(d2_in, d2);
KALDI_ASSERT(Peek(infile, binary_in) == -1);
}
}
unlink("tmpf.txt");
unlink("tmpf.gz");
}
void UnitTestIoStandard() {
/*
Don't do the the following part because it requires
to pipe from an empty file, for it to not hang.
{
Input inp("", NULL); // standard input.
KALDI_ASSERT(inp.Stream().get() == -1);
}
{
Input inp("-", NULL); // standard input.
KALDI_ASSERT(inp.Stream().get() == -1);
}*/
{
std::cout << "Should see: foo\n";
Output out("", false);
out.Stream() << "foo\n";
}
{
std::cout << "Should see: bar\n";
Output out("-", false);
out.Stream() << "bar\n";
}
}
// This is Windows-specific.
void UnitTestNativeFilename() {
#ifdef KALDI_CYGWIN_COMPAT
extern std::string MapCygwinPath(const std::string &filename);
KALDI_ASSERT(MapCygwinPath("") == "");
KALDI_ASSERT(MapCygwinPath(".") == ".");
KALDI_ASSERT(MapCygwinPath("..") == "..");
KALDI_ASSERT(MapCygwinPath("/dev/null")[0] != '/');
KALDI_ASSERT(MapCygwinPath("/tmp")[1] == ':');
KALDI_ASSERT(MapCygwinPath("/tmp/")[1] == ':');
KALDI_ASSERT(MapCygwinPath("/tmp/foo")[1] == ':');
KALDI_ASSERT(MapCygwinPath("/cygdrive/c") == "c:/");
KALDI_ASSERT(MapCygwinPath("/cygdrive/c/") == "c:/");
KALDI_ASSERT(MapCygwinPath("/cygdrive/c/foo") == "c:/foo");
#endif
}
} // end namespace kaldi.
#if defined(_MSC_VER) && !defined(KALDI_CYGWIN_COMPAT)
// Windows has no cat! There is probably no suitable tool to test popen I/O on
// Windows, so we emulate a lame version of cat(1).
static int TinyCat(int argc, const char** argv) {
const char* name_in = argc > 0 && strcmp(argv[0], "-") ? argv[0] : NULL;
int fd_in = name_in ? _open(name_in, _O_RDONLY) : _fileno(stdin);
if (fd_in < 0)
return 1;
int fd_out = _fileno(stdout);
_setmode(fd_in, _O_BINARY);
_setmode(fd_out, _O_BINARY);
char buffer[100];
int last_read;
while ((last_read = _read(fd_in, buffer, sizeof(buffer))) > 0)
_write(fd_out, buffer, last_read);
if (name_in) _close(fd_in);
return 0;
}
#endif
int main(int argc, const char** argv) {
using namespace kaldi;
#if defined(_MSC_VER) && !defined(KALDI_CYGWIN_COMPAT)
if (argc > 1 && strcmp(argv[1], "cat") == 0)
return TinyCat(argc - 2, argv + 2);
#endif
UnitTestNativeFilename();
UnitTestIoNew(false);
UnitTestIoNew(true);
UnitTestIoPipe(true);
UnitTestIoPipe(false);
UnitTestIoStandard();
UnitTestClassifyRxfilename();
UnitTestClassifyWxfilename();
KALDI_ASSERT(1); // just wanted to check that KALDI_ASSERT does not fail
// for 1.
return 0;
}