371 lines
13 KiB
C++
371 lines
13 KiB
C++
// util/kaldi-io-test.cc
|
|
|
|
// Copyright 2009-2011 Microsoft Corporation
|
|
|
|
// See ../../COPYING for clarification regarding multiple authors
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
|
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
|
// MERCHANTABLITY OR NON-INFRINGEMENT.
|
|
// See the Apache 2 License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
#ifndef _MSC_VER
|
|
#include <unistd.h>
|
|
#endif
|
|
#include "base/io-funcs.h"
|
|
#include "util/kaldi-io.h"
|
|
#include "base/kaldi-math.h"
|
|
#include "base/kaldi-utils.h"
|
|
|
|
namespace kaldi {
|
|
|
|
|
|
|
|
void UnitTestClassifyRxfilename() {
|
|
KALDI_ASSERT(ClassifyRxfilename("") == kStandardInput);
|
|
KALDI_ASSERT(ClassifyRxfilename(" ") == kNoInput);
|
|
KALDI_ASSERT(ClassifyRxfilename(" a ") == kNoInput);
|
|
KALDI_ASSERT(ClassifyRxfilename("a ") == kNoInput);
|
|
KALDI_ASSERT(ClassifyRxfilename("a") == kFileInput);
|
|
KALDI_ASSERT(ClassifyRxfilename("-") == kStandardInput);
|
|
KALDI_ASSERT(ClassifyRxfilename("b|") == kPipeInput);
|
|
KALDI_ASSERT(ClassifyRxfilename("|b") == kNoInput);
|
|
KALDI_ASSERT(ClassifyRxfilename("b c|") == kPipeInput);
|
|
KALDI_ASSERT(ClassifyRxfilename(" b c|") == kPipeInput);
|
|
KALDI_ASSERT(ClassifyRxfilename("a b c:123") == kOffsetFileInput);
|
|
KALDI_ASSERT(ClassifyRxfilename("a b c:3") == kOffsetFileInput);
|
|
KALDI_ASSERT(ClassifyRxfilename("a b c:") == kFileInput);
|
|
KALDI_ASSERT(ClassifyRxfilename("a b c/3") == kFileInput);
|
|
KALDI_ASSERT(ClassifyRxfilename("ark,s,cs:a b c") == kNoInput);
|
|
KALDI_ASSERT(ClassifyRxfilename("scp:a b c") == kNoInput);
|
|
|
|
}
|
|
|
|
|
|
void UnitTestClassifyWxfilename() {
|
|
KALDI_ASSERT(ClassifyWxfilename("") == kStandardOutput);
|
|
KALDI_ASSERT(ClassifyWxfilename(" ") == kNoOutput);
|
|
KALDI_ASSERT(ClassifyWxfilename(" a ") == kNoOutput);
|
|
KALDI_ASSERT(ClassifyWxfilename("a ") == kNoOutput);
|
|
KALDI_ASSERT(ClassifyWxfilename("a") == kFileOutput);
|
|
KALDI_ASSERT(ClassifyWxfilename("-") == kStandardOutput);
|
|
KALDI_ASSERT(ClassifyWxfilename("b|") == kNoOutput);
|
|
KALDI_ASSERT(ClassifyWxfilename("|b") == kPipeOutput);
|
|
KALDI_ASSERT(ClassifyWxfilename("| b ") == kPipeOutput);
|
|
KALDI_ASSERT(ClassifyWxfilename("b c|") == kNoOutput);
|
|
KALDI_ASSERT(ClassifyWxfilename("a b c:123") == kNoOutput);
|
|
KALDI_ASSERT(ClassifyWxfilename("ark,s,cs:a b c") == kNoOutput);
|
|
KALDI_ASSERT(ClassifyWxfilename("scp:a b c") == kNoOutput);
|
|
KALDI_ASSERT(ClassifyWxfilename("a b c:3") == kNoOutput);
|
|
KALDI_ASSERT(ClassifyWxfilename("a b c:") == kFileOutput);
|
|
KALDI_ASSERT(ClassifyWxfilename("a b c/3") == kFileOutput);
|
|
}
|
|
|
|
void UnitTestIoNew(bool binary) {
|
|
{
|
|
const char *filename = "tmpf";
|
|
|
|
Output ko(filename, binary);
|
|
std::ostream &outfile = ko.Stream();
|
|
if (!binary) outfile << "\t";
|
|
int64 i1 = Rand() % 10000;
|
|
WriteBasicType(outfile, binary, i1);
|
|
uint16 i2 = Rand() % 10000;
|
|
WriteBasicType(outfile, binary, i2);
|
|
if (!binary) outfile << "\t";
|
|
char c = Rand();
|
|
WriteBasicType(outfile, binary, c);
|
|
if (!binary && Rand()%2 == 0) outfile << " \n";
|
|
std::vector<int32> vec1;
|
|
WriteIntegerVector(outfile, binary, vec1);
|
|
if (!binary && Rand()%2 == 0) outfile << " \n";
|
|
std::vector<uint16> vec2;
|
|
for (size_t i = 0; i < 10; i++) vec2.push_back(Rand()%100 - 10);
|
|
WriteIntegerVector(outfile, binary, vec2);
|
|
if (!binary) outfile << " \n";
|
|
std::vector<char> vec3;
|
|
for (size_t i = 0; i < 10; i++) vec3.push_back(Rand()%100);
|
|
WriteIntegerVector(outfile, binary, vec3);
|
|
if (!binary && Rand()%2 == 0) outfile << " \n";
|
|
const char *token1 = "Hi";
|
|
WriteToken(outfile, binary, token1);
|
|
if (!binary) outfile << " \n";
|
|
std::string token2 = "There.";
|
|
WriteToken(outfile, binary, token2);
|
|
if (!binary && Rand()%2 == 0) outfile << " \n";
|
|
std::string token3 = "You.";
|
|
WriteToken(outfile, binary, token3);
|
|
if (!binary && Rand()%2 == 0) outfile << " ";
|
|
float f1 = RandUniform();
|
|
WriteBasicType(outfile, binary, f1);
|
|
if (!binary && Rand()%2 == 0) outfile << "\t";
|
|
float f2 = RandUniform();
|
|
WriteBasicType(outfile, binary, f2);
|
|
double d1 = RandUniform();
|
|
WriteBasicType(outfile, binary, d1);
|
|
if (!binary && Rand()%2 == 0) outfile << "\t";
|
|
double d2 = RandUniform();
|
|
WriteBasicType(outfile, binary, d2);
|
|
if (!binary && Rand()%2 == 0) outfile << "\t";
|
|
ko.Close();
|
|
|
|
{
|
|
bool binary_in;
|
|
Input ki(filename, &binary_in);
|
|
std::istream &infile = ki.Stream();
|
|
int64 i1_in;
|
|
ReadBasicType(infile, binary_in, &i1_in);
|
|
KALDI_ASSERT(i1_in == i1);
|
|
uint16 i2_in;
|
|
ReadBasicType(infile, binary_in, &i2_in);
|
|
KALDI_ASSERT(i2_in == i2);
|
|
char c_in;
|
|
ReadBasicType(infile, binary_in, &c_in);
|
|
KALDI_ASSERT(c_in == c);
|
|
std::vector<int32> vec1_in;
|
|
ReadIntegerVector(infile, binary_in, &vec1_in);
|
|
KALDI_ASSERT(vec1_in == vec1);
|
|
std::vector<uint16> vec2_in;
|
|
ReadIntegerVector(infile, binary_in, &vec2_in);
|
|
KALDI_ASSERT(vec2_in == vec2);
|
|
std::vector<char> vec3_in;
|
|
ReadIntegerVector(infile, binary_in, &vec3_in);
|
|
KALDI_ASSERT(vec3_in == vec3);
|
|
std::string token1_in, token2_in;
|
|
KALDI_ASSERT(Peek(infile, binary_in) == static_cast<int>(*token1));
|
|
ReadToken(infile, binary_in, &token1_in);
|
|
KALDI_ASSERT(token1_in == (std::string)token1);
|
|
ReadToken(infile, binary_in, &token2_in);
|
|
KALDI_ASSERT(token2_in == token2);
|
|
if (Rand() % 2 == 0)
|
|
ExpectToken(infile, binary_in, token3.c_str());
|
|
else
|
|
ExpectToken(infile, binary_in, token3);
|
|
float f1_in; // same type.
|
|
ReadBasicType(infile, binary_in, &f1_in);
|
|
AssertEqual(f1_in, f1);
|
|
double f2_in; // wrong type.
|
|
ReadBasicType(infile, binary_in, &f2_in);
|
|
AssertEqual(f2_in, f2);
|
|
double d1_in; // same type.
|
|
ReadBasicType(infile, binary_in, &d1_in);
|
|
AssertEqual(d1_in, d1);
|
|
float d2_in; // wrong type.
|
|
ReadBasicType(infile, binary_in, &d2_in);
|
|
AssertEqual(d2_in, d2);
|
|
KALDI_ASSERT(Peek(infile, binary_in) == -1);
|
|
}
|
|
unlink(filename);
|
|
}
|
|
}
|
|
|
|
void UnitTestIoPipe(bool binary) {
|
|
// This is as UnitTestIoNew except with different filenames.
|
|
{
|
|
#if defined(_MSC_VER) && !defined(KALDI_CYGWIN_COMPAT)
|
|
// self-invocation on Windows that emulates cat(1)
|
|
const char *filename_out = "|kaldi-io-test cat > tmpf.gz",
|
|
*filename_in = "kaldi-io-test cat tmpf.gz|";
|
|
#else
|
|
const char *filename_out = "|gzip -c > tmpf.gz",
|
|
*filename_in = "gunzip -c tmpf.gz |";
|
|
#endif
|
|
|
|
Output ko(filename_out, binary);
|
|
std::ostream &outfile = ko.Stream();
|
|
if (!binary) outfile << "\t";
|
|
int64 i1 = Rand() % 10000;
|
|
WriteBasicType(outfile, binary, i1);
|
|
uint16 i2 = Rand() % 10000;
|
|
WriteBasicType(outfile, binary, i2);
|
|
if (!binary) outfile << "\t";
|
|
char c = Rand();
|
|
WriteBasicType(outfile, binary, c);
|
|
if (!binary && Rand()%2 == 0) outfile << " \n";
|
|
std::vector<int32> vec1;
|
|
WriteIntegerVector(outfile, binary, vec1);
|
|
if (!binary && Rand()%2 == 0) outfile << " \n";
|
|
std::vector<uint16> vec2;
|
|
for (size_t i = 0; i < 10; i++) vec2.push_back(Rand()%100 - 10);
|
|
WriteIntegerVector(outfile, binary, vec2);
|
|
if (!binary) outfile << " \n";
|
|
WriteToken(outfile, binary, "<foo>");
|
|
std::vector<char> vec3;
|
|
for (size_t i = 0; i < 10; i++) vec3.push_back(Rand()%100);
|
|
WriteIntegerVector(outfile, binary, vec3);
|
|
if (!binary && Rand()%2 == 0) outfile << " \n";
|
|
const char *token1 = "Hi";
|
|
WriteToken(outfile, binary, token1);
|
|
if (!binary) outfile << " \n";
|
|
std::string token2 = "There.";
|
|
WriteToken(outfile, binary, token2);
|
|
if (!binary && Rand()%2 == 0) outfile << " \n";
|
|
std::string token3 = "You.";
|
|
WriteToken(outfile, binary, token3);
|
|
if (!binary && Rand()%2 == 0) outfile << " ";
|
|
float f1 = RandUniform();
|
|
WriteBasicType(outfile, binary, f1);
|
|
if (!binary && Rand()%2 == 0) outfile << "\t";
|
|
float f2 = RandUniform();
|
|
WriteBasicType(outfile, binary, f2);
|
|
double d1 = RandUniform();
|
|
WriteBasicType(outfile, binary, d1);
|
|
if (!binary && Rand()%2 == 0) outfile << "\t";
|
|
double d2 = RandUniform();
|
|
WriteBasicType(outfile, binary, d2);
|
|
if (!binary && Rand()%2 == 0) outfile << "\t";
|
|
bool ans = ko.Close();
|
|
KALDI_ASSERT(ans);
|
|
#ifndef _MSC_VER
|
|
Sleep(1); // This test does not work without this sleep:
|
|
// seems to be some kind of file-system latency.
|
|
#endif
|
|
{
|
|
bool binary_in;
|
|
Input ki(filename_in, &binary_in);
|
|
std::istream &infile = ki.Stream();
|
|
int64 i1_in;
|
|
ReadBasicType(infile, binary_in, &i1_in);
|
|
KALDI_ASSERT(i1_in == i1);
|
|
uint16 i2_in;
|
|
ReadBasicType(infile, binary_in, &i2_in);
|
|
KALDI_ASSERT(i2_in == i2);
|
|
char c_in;
|
|
ReadBasicType(infile, binary_in, &c_in);
|
|
KALDI_ASSERT(c_in == c);
|
|
std::vector<int32> vec1_in;
|
|
ReadIntegerVector(infile, binary_in, &vec1_in);
|
|
KALDI_ASSERT(vec1_in == vec1);
|
|
std::vector<uint16> vec2_in;
|
|
ReadIntegerVector(infile, binary_in, &vec2_in);
|
|
KALDI_ASSERT(vec2_in == vec2);
|
|
std::vector<char> vec3_in;
|
|
KALDI_ASSERT(PeekToken(infile, binary_in) == static_cast<int>('f'));
|
|
ExpectToken(infile, binary_in, "<foo>");
|
|
ReadIntegerVector(infile, binary_in, &vec3_in);
|
|
KALDI_ASSERT(vec3_in == vec3);
|
|
std::string token1_in, token2_in;
|
|
KALDI_ASSERT(Peek(infile, binary_in) == static_cast<int>(*token1));
|
|
ReadToken(infile, binary_in, &token1_in);
|
|
KALDI_ASSERT(token1_in == (std::string)token1);
|
|
ReadToken(infile, binary_in, &token2_in);
|
|
KALDI_ASSERT(token2_in == token2);
|
|
if (Rand() % 2 == 0)
|
|
ExpectToken(infile, binary_in, token3.c_str());
|
|
else
|
|
ExpectToken(infile, binary_in, token3);
|
|
float f1_in; // same type.
|
|
ReadBasicType(infile, binary_in, &f1_in);
|
|
AssertEqual(f1_in, f1);
|
|
double f2_in; // wrong type.
|
|
ReadBasicType(infile, binary_in, &f2_in);
|
|
AssertEqual(f2_in, f2);
|
|
double d1_in; // same type.
|
|
ReadBasicType(infile, binary_in, &d1_in);
|
|
AssertEqual(d1_in, d1);
|
|
float d2_in; // wrong type.
|
|
ReadBasicType(infile, binary_in, &d2_in);
|
|
AssertEqual(d2_in, d2);
|
|
KALDI_ASSERT(Peek(infile, binary_in) == -1);
|
|
}
|
|
}
|
|
unlink("tmpf.txt");
|
|
unlink("tmpf.gz");
|
|
}
|
|
|
|
void UnitTestIoStandard() {
|
|
/*
|
|
Don't do the the following part because it requires
|
|
to pipe from an empty file, for it to not hang.
|
|
{
|
|
Input inp("", NULL); // standard input.
|
|
KALDI_ASSERT(inp.Stream().get() == -1);
|
|
}
|
|
{
|
|
Input inp("-", NULL); // standard input.
|
|
KALDI_ASSERT(inp.Stream().get() == -1);
|
|
}*/
|
|
|
|
{
|
|
std::cout << "Should see: foo\n";
|
|
Output out("", false);
|
|
out.Stream() << "foo\n";
|
|
}
|
|
{
|
|
std::cout << "Should see: bar\n";
|
|
Output out("-", false);
|
|
out.Stream() << "bar\n";
|
|
}
|
|
}
|
|
|
|
// This is Windows-specific.
|
|
void UnitTestNativeFilename() {
|
|
#ifdef KALDI_CYGWIN_COMPAT
|
|
extern std::string MapCygwinPath(const std::string &filename);
|
|
|
|
KALDI_ASSERT(MapCygwinPath("") == "");
|
|
KALDI_ASSERT(MapCygwinPath(".") == ".");
|
|
KALDI_ASSERT(MapCygwinPath("..") == "..");
|
|
KALDI_ASSERT(MapCygwinPath("/dev/null")[0] != '/');
|
|
KALDI_ASSERT(MapCygwinPath("/tmp")[1] == ':');
|
|
KALDI_ASSERT(MapCygwinPath("/tmp/")[1] == ':');
|
|
KALDI_ASSERT(MapCygwinPath("/tmp/foo")[1] == ':');
|
|
KALDI_ASSERT(MapCygwinPath("/cygdrive/c") == "c:/");
|
|
KALDI_ASSERT(MapCygwinPath("/cygdrive/c/") == "c:/");
|
|
KALDI_ASSERT(MapCygwinPath("/cygdrive/c/foo") == "c:/foo");
|
|
#endif
|
|
}
|
|
|
|
} // end namespace kaldi.
|
|
|
|
#if defined(_MSC_VER) && !defined(KALDI_CYGWIN_COMPAT)
|
|
// Windows has no cat! There is probably no suitable tool to test popen I/O on
|
|
// Windows, so we emulate a lame version of cat(1).
|
|
static int TinyCat(int argc, const char** argv) {
|
|
const char* name_in = argc > 0 && strcmp(argv[0], "-") ? argv[0] : NULL;
|
|
int fd_in = name_in ? _open(name_in, _O_RDONLY) : _fileno(stdin);
|
|
if (fd_in < 0)
|
|
return 1;
|
|
|
|
int fd_out = _fileno(stdout);
|
|
_setmode(fd_in, _O_BINARY);
|
|
_setmode(fd_out, _O_BINARY);
|
|
|
|
char buffer[100];
|
|
int last_read;
|
|
while ((last_read = _read(fd_in, buffer, sizeof(buffer))) > 0)
|
|
_write(fd_out, buffer, last_read);
|
|
|
|
if (name_in) _close(fd_in);
|
|
return 0;
|
|
}
|
|
#endif
|
|
|
|
int main(int argc, const char** argv) {
|
|
using namespace kaldi;
|
|
#if defined(_MSC_VER) && !defined(KALDI_CYGWIN_COMPAT)
|
|
if (argc > 1 && strcmp(argv[1], "cat") == 0)
|
|
return TinyCat(argc - 2, argv + 2);
|
|
#endif
|
|
|
|
UnitTestNativeFilename();
|
|
UnitTestIoNew(false);
|
|
UnitTestIoNew(true);
|
|
UnitTestIoPipe(true);
|
|
UnitTestIoPipe(false);
|
|
UnitTestIoStandard();
|
|
UnitTestClassifyRxfilename();
|
|
UnitTestClassifyWxfilename();
|
|
|
|
KALDI_ASSERT(1); // just wanted to check that KALDI_ASSERT does not fail
|
|
// for 1.
|
|
return 0;
|
|
}
|