diff --git a/.github/workflows/pre-commit-checks.yml b/.github/workflows/pre-commit-checks.yml index fe71286ca..ae35cea1c 100644 --- a/.github/workflows/pre-commit-checks.yml +++ b/.github/workflows/pre-commit-checks.yml @@ -60,6 +60,18 @@ jobs: - name: Generate Compile Database run: | make compile_db_all + - name: Ensure generated headers exist + run: | + # _compile_db (invoked by compile_db_all) is supposed to build these, + # but make it explicit so clang-tidy in the next step never races + # against a missing dwrf_schema.pb.h / parquet thrift header. If the + # underlying chain already produced them, this is a no-op rebuild. + cmake --build --preset conan-release \ + --target generate_parquet_thrift \ + --target bolt_dwio_dwrf_proto + test -f _build/Release/bolt/dwio/dwrf/proto/dwrf_schema.pb.h \ + || { echo "::error::dwrf_schema.pb.h missing after proto build" >&2; \ + ls -la _build/Release/bolt/dwio/dwrf/proto/ || true; exit 1; } - name: Run pre-commit hooks run: | pre-commit run --all-files diff --git a/bolt/benchmarks/QueryBenchmarkBase.cpp b/bolt/benchmarks/QueryBenchmarkBase.cpp index 8e756800f..2e339c6b8 100644 --- a/bolt/benchmarks/QueryBenchmarkBase.cpp +++ b/bolt/benchmarks/QueryBenchmarkBase.cpp @@ -30,6 +30,8 @@ #include "bolt/benchmarks/QueryBenchmarkBase.h" +#include "bolt/common/caching/SsdCache.h" + DEFINE_string(data_format, "parquet", "Data format"); DEFINE_validator( diff --git a/bolt/common/base/tests/FsTest.cpp b/bolt/common/base/tests/FsTest.cpp index 5634b646c..3b12d53b5 100644 --- a/bolt/common/base/tests/FsTest.cpp +++ b/bolt/common/base/tests/FsTest.cpp @@ -30,14 +30,15 @@ #include "bolt/common/base/Fs.h" #include -#include "bolt/exec/tests/utils/TempDirectoryPath.h" +#include "bolt/common/testutil/TempDirectoryPath.h" #include "boost/filesystem.hpp" namespace bytedance::bolt::common { class FsTest : public testing::Test {}; TEST_F(FsTest, createDirectory) { - auto rootPath = exec::test::TempDirectoryPath::createTempDirectory(); + auto rootPath = + bytedance::bolt::test::TempDirectoryPath::createTempDirectory(); auto tmpDirectoryPath = rootPath + "/first/second/third"; // First time should generate directory successfully. EXPECT_FALSE(fs::exists(tmpDirectoryPath.c_str())); diff --git a/bolt/common/caching/tests/AsyncDataCacheTest.cpp b/bolt/common/caching/tests/AsyncDataCacheTest.cpp index 4246454c6..d056200b8 100644 --- a/bolt/common/caching/tests/AsyncDataCacheTest.cpp +++ b/bolt/common/caching/tests/AsyncDataCacheTest.cpp @@ -39,8 +39,8 @@ #ifndef NDEBUG #include "bolt/common/testutil/ScopedTestTime.h" #endif +#include "bolt/common/testutil/TempDirectoryPath.h" #include "bolt/common/testutil/TestValue.h" -#include "bolt/exec/tests/utils/TempDirectoryPath.h" #include "folly/experimental/EventCount.h" #include @@ -118,7 +118,7 @@ class AsyncDataCacheTest : public testing::Test { // second creation of cache must find the checkpoint of the // previous one. if (tempDirectory_ == nullptr) { - tempDirectory_ = exec::test::TempDirectoryPath::create(); + tempDirectory_ = bytedance::bolt::test::TempDirectoryPath::create(); } ssdCache = std::make_unique( fmt::format("{}/cache", tempDirectory_->path), @@ -258,7 +258,7 @@ class AsyncDataCacheTest : public testing::Test { } } - std::shared_ptr tempDirectory_; + std::shared_ptr tempDirectory_; std::unique_ptr manager_; memory::MemoryAllocator* allocator_; std::shared_ptr cache_; diff --git a/bolt/common/caching/tests/SsdFileTest.cpp b/bolt/common/caching/tests/SsdFileTest.cpp index 9a3dcf577..8bb9792d2 100644 --- a/bolt/common/caching/tests/SsdFileTest.cpp +++ b/bolt/common/caching/tests/SsdFileTest.cpp @@ -31,7 +31,7 @@ #include "bolt/common/caching/FileIds.h" #include "bolt/common/caching/SsdCache.h" #include "bolt/common/memory/Memory.h" -#include "bolt/exec/tests/utils/TempDirectoryPath.h" +#include "bolt/common/testutil/TempDirectoryPath.h" #include #include @@ -79,7 +79,7 @@ class SsdFileTest : public testing::Test { fileName_ = StringIdLease(fileIds(), "fileInStorage"); - tempDirectory_ = exec::test::TempDirectoryPath::create(); + tempDirectory_ = bytedance::bolt::test::TempDirectoryPath::create(); ssdFile_ = std::make_unique( fmt::format("{}/ssdtest", tempDirectory_->path), 0, // shardId @@ -243,7 +243,7 @@ class SsdFileTest : public testing::Test { readAndCheckPins(pins); } - std::shared_ptr tempDirectory_; + std::shared_ptr tempDirectory_; std::shared_ptr cache_; StringIdLease fileName_; diff --git a/bolt/common/file/tests/AsyncFileTest.cpp b/bolt/common/file/tests/AsyncFileTest.cpp index cb3cacebb..b85180315 100644 --- a/bolt/common/file/tests/AsyncFileTest.cpp +++ b/bolt/common/file/tests/AsyncFileTest.cpp @@ -18,8 +18,8 @@ #include "bolt/common/file/File.h" #include "bolt/common/file/FileSystems.h" -#include "bolt/exec/tests/utils/TempDirectoryPath.h" -#include "bolt/exec/tests/utils/TempFilePath.h" +#include "bolt/common/testutil/TempDirectoryPath.h" +#include "bolt/common/testutil/TempFilePath.h" #include "gtest/gtest.h" using namespace bytedance::bolt; @@ -96,7 +96,7 @@ void readDataAsyncForWirteBuffers( } TEST(AsyncLocalFile, writeAndRead) { - auto tempFile = ::exec::test::TempFilePath::create(); + auto tempFile = ::bytedance::bolt::test::TempFilePath::create(); const auto& filename = tempFile->path.c_str(); remove(filename); { @@ -109,7 +109,7 @@ TEST(AsyncLocalFile, writeAndRead) { TEST(AsyncLocalFile, viaRegistry) { filesystems::registerLocalFileSystem(); - auto tempFile = ::exec::test::TempFilePath::create(); + auto tempFile = ::bytedance::bolt::test::TempFilePath::create(); const auto& filename = tempFile->path.c_str(); remove(filename); auto lfs = filesystems::getFileSystem(filename, nullptr); @@ -124,7 +124,7 @@ TEST(AsyncLocalFile, viaRegistry) { TEST(AsyncLocalFileWrite, viaWriteBuffer) { filesystems::registerLocalFileSystem(); - auto tempFile = ::exec::test::TempFilePath::create(); + auto tempFile = ::bytedance::bolt::test::TempFilePath::create(); const auto& filename = tempFile->path.c_str(); remove(filename); auto lfs = filesystems::getFileSystem(filename, nullptr); diff --git a/bolt/common/file/tests/FileInputStreamTest.cpp b/bolt/common/file/tests/FileInputStreamTest.cpp index 04cdec833..5eacafddc 100644 --- a/bolt/common/file/tests/FileInputStreamTest.cpp +++ b/bolt/common/file/tests/FileInputStreamTest.cpp @@ -34,7 +34,7 @@ #include "bolt/common/file/FileInputStream.h" #include "bolt/common/file/FileSystems.h" #include "bolt/common/memory/MmapAllocator.h" -#include "bolt/exec/tests/utils/TempDirectoryPath.h" +#include "bolt/common/testutil/TempDirectoryPath.h" #include @@ -59,7 +59,7 @@ class FileInputStreamTest : public testing::Test { mmapAllocator_ = static_cast(memoryManager_->allocator()); pool_ = memoryManager_->addLeafPool("ByteStreamTest"); rng_.seed(124); - tempDirPath_ = exec::test::TempDirectoryPath::create(); + tempDirPath_ = bytedance::bolt::test::TempDirectoryPath::create(); fs_ = filesystems::getFileSystem(tempDirPath_->getPath(), nullptr); } @@ -87,7 +87,7 @@ class FileInputStreamTest : public testing::Test { MmapAllocator* mmapAllocator_; std::shared_ptr pool_; std::atomic_uint64_t fileId_{0}; - std::shared_ptr tempDirPath_; + std::shared_ptr tempDirPath_; std::shared_ptr fs_; }; diff --git a/bolt/common/file/tests/FileTest.cpp b/bolt/common/file/tests/FileTest.cpp index edb186571..8078cad76 100644 --- a/bolt/common/file/tests/FileTest.cpp +++ b/bolt/common/file/tests/FileTest.cpp @@ -36,8 +36,8 @@ #include "bolt/common/config/Config.h" #include "bolt/common/file/File.h" #include "bolt/common/file/FileSystems.h" -#include "bolt/exec/tests/utils/TempDirectoryPath.h" -#include "bolt/exec/tests/utils/TempFilePath.h" +#include "bolt/common/testutil/TempDirectoryPath.h" +#include "bolt/common/testutil/TempFilePath.h" #include "gtest/gtest.h" using namespace bytedance::bolt; @@ -190,7 +190,7 @@ TEST(InMemoryFile, preadv) { TEST(LocalFile, writeAndRead) { for (bool useIOBuf : {true, false}) { - auto tempFile = ::exec::test::TempFilePath::create(); + auto tempFile = ::bytedance::bolt::test::TempFilePath::create(); const auto& filename = tempFile->path.c_str(); remove(filename); { @@ -220,7 +220,7 @@ TEST(LocalFile, writeAndRead) { TEST(LocalFile, viaRegistry) { filesystems::registerLocalFileSystem(); - auto tempFile = ::exec::test::TempFilePath::create(); + auto tempFile = ::bytedance::bolt::test::TempFilePath::create(); const auto& filename = tempFile->path.c_str(); remove(filename); auto lfs = filesystems::getFileSystem(filename, nullptr); @@ -237,7 +237,7 @@ TEST(LocalFile, viaRegistry) { TEST(LocalFile, rename) { filesystems::registerLocalFileSystem(); - auto tempFolder = ::exec::test::TempDirectoryPath::create(); + auto tempFolder = ::test::TempDirectoryPath::create(); auto a = fmt::format("{}/a", tempFolder->path); auto b = fmt::format("{}/b", tempFolder->path); auto newA = fmt::format("{}/newA", tempFolder->path); @@ -265,7 +265,7 @@ TEST(LocalFile, rename) { TEST(LocalFile, exists) { filesystems::registerLocalFileSystem(); - auto tempFolder = ::exec::test::TempDirectoryPath::create(); + auto tempFolder = ::test::TempDirectoryPath::create(); auto a = fmt::format("{}/a", tempFolder->path); auto b = fmt::format("{}/b", tempFolder->path); auto localFs = filesystems::getFileSystem(a, nullptr); @@ -285,7 +285,7 @@ TEST(LocalFile, exists) { TEST(LocalFile, list) { filesystems::registerLocalFileSystem(); - auto tempFolder = ::exec::test::TempDirectoryPath::create(); + auto tempFolder = ::test::TempDirectoryPath::create(); auto a = fmt::format("{}/1", tempFolder->path); auto b = fmt::format("{}/2", tempFolder->path); auto localFs = filesystems::getFileSystem(a, nullptr); @@ -305,7 +305,7 @@ TEST(LocalFile, list) { } TEST(LocalFile, readFileDestructor) { - auto tempFile = ::exec::test::TempFilePath::create(); + auto tempFile = ::bytedance::bolt::test::TempFilePath::create(); const auto& filename = tempFile->path.c_str(); remove(filename); { @@ -338,7 +338,7 @@ TEST(LocalFile, readFileDestructor) { TEST(LocalFile, mkdir) { filesystems::registerLocalFileSystem(); - auto tempFolder = ::exec::test::TempDirectoryPath::create(); + auto tempFolder = ::test::TempDirectoryPath::create(); std::string path = tempFolder->path; auto localFs = filesystems::getFileSystem(path, nullptr); @@ -364,7 +364,7 @@ TEST(LocalFile, mkdir) { TEST(LocalFile, rmdir) { filesystems::registerLocalFileSystem(); - auto tempFolder = ::exec::test::TempDirectoryPath::create(); + auto tempFolder = ::test::TempDirectoryPath::create(); std::string path = tempFolder->path; auto localFs = filesystems::getFileSystem(path, nullptr); @@ -398,7 +398,7 @@ TEST(LocalFile, rmdir) { TEST(LocalFile, fileNotFound) { filesystems::registerLocalFileSystem(); - auto tempFolder = ::exec::test::TempDirectoryPath::create(); + auto tempFolder = ::test::TempDirectoryPath::create(); auto path = fmt::format("{}/file", tempFolder->path); auto localFs = filesystems::getFileSystem(path, nullptr); BOLT_ASSERT_RUNTIME_THROW_CODE( diff --git a/bolt/common/memory/tests/ArbitrationParticipantTest.cpp b/bolt/common/memory/tests/ArbitrationParticipantTest.cpp index 0cc8fee8c..872e2fa71 100644 --- a/bolt/common/memory/tests/ArbitrationParticipantTest.cpp +++ b/bolt/common/memory/tests/ArbitrationParticipantTest.cpp @@ -46,10 +46,10 @@ #include "bolt/common/memory/Memory.h" #include "bolt/common/memory/MemoryArbitrator.h" #include "bolt/common/memory/SharedArbitrator.h" +#include "bolt/common/testutil/TempDirectoryPath.h" #include "bolt/common/testutil/TestValue.h" #include "bolt/exec/OperatorUtils.h" #include "bolt/exec/tests/utils/PlanBuilder.h" -#include "bolt/exec/tests/utils/TempDirectoryPath.h" #include "gmock/gmock-matchers.h" DECLARE_bool(bolt_memory_leak_check_enabled); @@ -59,6 +59,7 @@ using namespace ::testing; using namespace bytedance::bolt::common::testutil; using namespace bytedance::bolt::exec; using namespace bytedance::bolt::exec::test; +using namespace bytedance::bolt::test; namespace bytedance::bolt::memory { static const std::string arbitratorKind("TEST"); diff --git a/bolt/common/memory/tests/ByteStreamTest.cpp b/bolt/common/memory/tests/ByteStreamTest.cpp index 2056b1cd7..40f6c411e 100644 --- a/bolt/common/memory/tests/ByteStreamTest.cpp +++ b/bolt/common/memory/tests/ByteStreamTest.cpp @@ -34,7 +34,7 @@ #include "bolt/common/file/FileInputStream.h" #include "bolt/common/file/FileSystems.h" #include "bolt/common/memory/MmapAllocator.h" -#include "bolt/exec/tests/utils/TempDirectoryPath.h" +#include "bolt/common/testutil/TempDirectoryPath.h" #include #include @@ -342,7 +342,7 @@ class InputByteStreamTest : public ByteStreamTest, void SetUp() override { ByteStreamTest::SetUp(); - tempDirPath_ = exec::test::TempDirectoryPath::create(); + tempDirPath_ = bytedance::bolt::test::TempDirectoryPath::create(); fs_ = filesystems::getFileSystem(tempDirPath_->getPath(), nullptr); } @@ -366,7 +366,7 @@ class InputByteStreamTest : public ByteStreamTest, } std::atomic_uint64_t fileId_{0}; - std::shared_ptr tempDirPath_; + std::shared_ptr tempDirPath_; std::shared_ptr fs_; }; diff --git a/bolt/common/memory/tests/MockSharedArbitratorTest.cpp b/bolt/common/memory/tests/MockSharedArbitratorTest.cpp index 4c0e52700..49e1f88bb 100644 --- a/bolt/common/memory/tests/MockSharedArbitratorTest.cpp +++ b/bolt/common/memory/tests/MockSharedArbitratorTest.cpp @@ -40,10 +40,10 @@ #include "bolt/common/memory/MemoryArbitrator.h" #include "bolt/common/memory/SharedArbitrator.h" #include "bolt/common/memory/tests/SharedArbitratorTestUtil.h" +#include "bolt/common/testutil/TempDirectoryPath.h" #include "bolt/common/testutil/TestValue.h" #include "bolt/exec/OperatorUtils.h" #include "bolt/exec/tests/utils/PlanBuilder.h" -#include "bolt/exec/tests/utils/TempDirectoryPath.h" #include "folly/experimental/EventCount.h" DECLARE_bool(bolt_memory_leak_check_enabled); @@ -53,6 +53,7 @@ using namespace ::testing; using namespace bytedance::bolt::common::testutil; using namespace bytedance::bolt::exec; using namespace bytedance::bolt::exec::test; +using namespace bytedance::bolt::test; namespace bytedance::bolt::memory { // Class to write runtime stats in the tests to the stats container. class TestRuntimeStatWriter : public BaseRuntimeStatWriter { diff --git a/bolt/common/memory/tests/SharedArbitratorTest.cpp b/bolt/common/memory/tests/SharedArbitratorTest.cpp index d93f3c641..a04b15aba 100644 --- a/bolt/common/memory/tests/SharedArbitratorTest.cpp +++ b/bolt/common/memory/tests/SharedArbitratorTest.cpp @@ -60,6 +60,7 @@ using namespace ::testing; using namespace bytedance::bolt::common::testutil; using namespace bytedance::bolt::exec; using namespace bytedance::bolt::exec::test; +using namespace bytedance::bolt::test; namespace bytedance::bolt::memory { // Custom node for the custom factory. class FakeMemoryNode : public core::PlanNode { @@ -372,7 +373,8 @@ DEBUG_ONLY_TEST_P( queryCtxStateChecked = true; }))); - const auto spillDirectory = exec::test::TempDirectoryPath::create(); + const auto spillDirectory = + bytedance::bolt::test::TempDirectoryPath::create(); TestScopedSpillInjection scopedSpillInjection(100); core::PlanNodeId aggregationNodeId; newQueryBuilder() @@ -420,7 +422,8 @@ DEBUG_ONLY_TEST_P( }))); std::thread queryThread([&] { - const auto spillDirectory = exec::test::TempDirectoryPath::create(); + const auto spillDirectory = + bytedance::bolt::test::TempDirectoryPath::create(); core::PlanNodeId aggregationNodeId; auto plan = PlanBuilder() .values(vectors) @@ -499,7 +502,8 @@ DEBUG_ONLY_TEST_P( .singleAggregation({"c0", "c1"}, {"array_agg(c2)"}) .planNode(); std::thread spillableThread([&]() { - const auto spillDirectory = exec::test::TempDirectoryPath::create(); + const auto spillDirectory = + bytedance::bolt::test::TempDirectoryPath::create(); newQueryBuilder(spillPlan) .queryCtx(queryCtx) .spillDirectory(spillDirectory->getPath()) @@ -954,7 +958,8 @@ DEBUG_ONLY_TEST_P( }))); const int numDrivers = 1; - const auto spillDirectory = exec::test::TempDirectoryPath::create(); + const auto spillDirectory = + bytedance::bolt::test::TempDirectoryPath::create(); std::thread queryThread([&]() { BOLT_ASSERT_THROW( newQueryBuilder() @@ -1027,7 +1032,8 @@ DEBUG_ONLY_TEST_P( [&]() { return aggregationAllocationUnblocked.load(); }); }))); - const auto spillDirectory = exec::test::TempDirectoryPath::create(); + const auto spillDirectory = + bytedance::bolt::test::TempDirectoryPath::create(); std::shared_ptr task; std::thread queryThread([&]() { task = newQueryBuilder() @@ -1097,7 +1103,8 @@ DEBUG_ONLY_TEST_P(SharedArbitrationTestWithThreadingModes, runtimeStats) { values->pool()->free(buffer, fakeAllocationSize); }))); - const auto spillDirectory = exec::test::TempDirectoryPath::create(); + const auto spillDirectory = + bytedance::bolt::test::TempDirectoryPath::create(); const auto outputDirectory = TempDirectoryPath::create(); const auto queryCtx = newQueryCtx(memoryManager_.get(), executor_.get(), memoryCapacity); diff --git a/bolt/common/testutil/CMakeLists.txt b/bolt/common/testutil/CMakeLists.txt index f3a76197d..b1649c8b9 100644 --- a/bolt/common/testutil/CMakeLists.txt +++ b/bolt/common/testutil/CMakeLists.txt @@ -33,6 +33,10 @@ if(CMAKE_BUILD_TYPE STREQUAL "Debug") endif() endif() +bolt_add_library(bolt_temp_path TempDirectoryPath.cpp TempFilePath.cpp) + +target_link_libraries(bolt_temp_path bolt_exception) + if(${BOLT_ENABLE_PERF}) bolt_add_library(bolt_test_util_gperf OBJECT GPerf.cpp) diff --git a/bolt/exec/tests/utils/TempDirectoryPath.cpp b/bolt/common/testutil/TempDirectoryPath.cpp similarity index 92% rename from bolt/exec/tests/utils/TempDirectoryPath.cpp rename to bolt/common/testutil/TempDirectoryPath.cpp index 428334e73..df2b8b9bf 100644 --- a/bolt/exec/tests/utils/TempDirectoryPath.cpp +++ b/bolt/common/testutil/TempDirectoryPath.cpp @@ -28,10 +28,10 @@ * -------------------------------------------------------------------------- */ -#include "bolt/exec/tests/utils/TempDirectoryPath.h" +#include "bolt/common/testutil/TempDirectoryPath.h" #include "boost/filesystem.hpp" -namespace bytedance::bolt::exec::test { +namespace bytedance::bolt::test { std::shared_ptr TempDirectoryPath::create() { struct SharedTempDirectoryPath : public TempDirectoryPath { @@ -50,4 +50,4 @@ TempDirectoryPath::~TempDirectoryPath() { } } -} // namespace bytedance::bolt::exec::test +} // namespace bytedance::bolt::test diff --git a/bolt/exec/tests/utils/TempDirectoryPath.h b/bolt/common/testutil/TempDirectoryPath.h similarity index 96% rename from bolt/exec/tests/utils/TempDirectoryPath.h rename to bolt/common/testutil/TempDirectoryPath.h index 236170c08..66097ac50 100644 --- a/bolt/exec/tests/utils/TempDirectoryPath.h +++ b/bolt/common/testutil/TempDirectoryPath.h @@ -36,7 +36,7 @@ #include #include "bolt/common/base/Exceptions.h" -namespace bytedance::bolt::exec::test { +namespace bytedance::bolt::test { // It manages the lifetime of a temporary directory. class TempDirectoryPath { @@ -65,4 +65,4 @@ class TempDirectoryPath { return tempDirectoryPath; } }; -} // namespace bytedance::bolt::exec::test +} // namespace bytedance::bolt::test diff --git a/bolt/exec/tests/utils/TempFilePath.cpp b/bolt/common/testutil/TempFilePath.cpp similarity index 91% rename from bolt/exec/tests/utils/TempFilePath.cpp rename to bolt/common/testutil/TempFilePath.cpp index 60b430613..e6fc11bf1 100644 --- a/bolt/exec/tests/utils/TempFilePath.cpp +++ b/bolt/common/testutil/TempFilePath.cpp @@ -28,8 +28,8 @@ * -------------------------------------------------------------------------- */ -#include "bolt/exec/tests/utils/TempFilePath.h" -namespace bytedance::bolt::exec::test { +#include "bolt/common/testutil/TempFilePath.h" +namespace bytedance::bolt::test { std::shared_ptr TempFilePath::create() { struct SharedTempFilePath : public TempFilePath { @@ -38,4 +38,4 @@ std::shared_ptr TempFilePath::create() { return std::make_shared(); } -} // namespace bytedance::bolt::exec::test +} // namespace bytedance::bolt::test diff --git a/bolt/exec/tests/utils/TempFilePath.h b/bolt/common/testutil/TempFilePath.h similarity index 97% rename from bolt/exec/tests/utils/TempFilePath.h rename to bolt/common/testutil/TempFilePath.h index 278d09969..f2ca702a8 100644 --- a/bolt/exec/tests/utils/TempFilePath.h +++ b/bolt/common/testutil/TempFilePath.h @@ -38,7 +38,7 @@ #include #include "bolt/common/base/Exceptions.h" -namespace bytedance::bolt::exec::test { +namespace bytedance::bolt::test { // It manages the lifetime of a temporary file. class TempFilePath { @@ -99,4 +99,4 @@ class TempFilePath { } }; -} // namespace bytedance::bolt::exec::test +} // namespace bytedance::bolt::test diff --git a/bolt/connectors/hive/CMakeLists.txt b/bolt/connectors/hive/CMakeLists.txt index 6d3a815ab..86e67f3c5 100644 --- a/bolt/connectors/hive/CMakeLists.txt +++ b/bolt/connectors/hive/CMakeLists.txt @@ -52,6 +52,7 @@ bolt_add_library( PaimonRowIterator.cpp PaimonSplitReader.cpp PartitionIdGenerator.cpp + RegisterHiveConnector.cpp SplitReader.cpp TableHandle.cpp ) diff --git a/bolt/connectors/hive/HiveConnector.cpp b/bolt/connectors/hive/HiveConnector.cpp index 1b92b70f1..dadd9f15b 100644 --- a/bolt/connectors/hive/HiveConnector.cpp +++ b/bolt/connectors/hive/HiveConnector.cpp @@ -160,6 +160,8 @@ void HiveConnectorFactory::initialize() { static bool once = []() { HiveTableHandle::registerSerDe(); HiveColumnHandle::registerSerDe(); + LocationHandle::registerSerDe(); + HiveInsertTableHandle::registerSerDe(); dwio::common::registerFileSinks(); dwrf::registerDwrfReaderFactory(); dwrf::registerDwrfWriterFactory(); diff --git a/bolt/connectors/hive/HiveConnector.h b/bolt/connectors/hive/HiveConnector.h index dadb2ffbf..e9ac274f5 100644 --- a/bolt/connectors/hive/HiveConnector.h +++ b/bolt/connectors/hive/HiveConnector.h @@ -195,6 +195,9 @@ class HivePartitionFunctionSpec : public core::PartitionFunctionSpec { void registerHivePartitionFunctionSerDe(); +// Registers all built-in Hive connector factories for the current process. +void registerHiveConnectorFactories(); + template bool CheckHiveConnectorFactoryInit() { static bool init = bytedance::bolt::connector::registerConnectorFactory( diff --git a/bolt/connectors/hive/HiveObjectFactory.cpp b/bolt/connectors/hive/HiveObjectFactory.cpp index 7df8574f0..fa428982b 100644 --- a/bolt/connectors/hive/HiveObjectFactory.cpp +++ b/bolt/connectors/hive/HiveObjectFactory.cpp @@ -20,6 +20,7 @@ #include +#include "bolt/common/memory/Memory.h" #include "bolt/connectors/ConnectorNames.h" #include "bolt/connectors/ConnectorOptions.h" #include "bolt/connectors/hive/HiveConnectorSplit.h" @@ -148,6 +149,9 @@ std::shared_ptr HiveObjectFactory::makeColumnHandle( case static_cast(HiveColumnType::kSynthesized): hiveColumnType = HiveColumnType::kSynthesized; break; + case static_cast(HiveColumnType::kRowIndex): + hiveColumnType = HiveColumnType::kRowIndex; + break; default: BOLT_UNSUPPORTED("Unsupported ColumnType ", columnType); @@ -201,8 +205,8 @@ std::shared_ptr HiveObjectFactory::makeTableHandle( core::TypedExprPtr remainingFilter = nullptr; if (auto rf = options.get_ptr("remainingFilter")) { - // assuming rf["expr"] holds the serialized expression - remainingFilter = ISerializable::deserialize(*rf); + remainingFilter = ISerializable::deserialize( + *rf, memory::MemoryManager::getInstance()->tracePool()); } std::unordered_map tableParameters; @@ -212,17 +216,22 @@ std::shared_ptr HiveObjectFactory::makeTableHandle( } } - // build RowTypePtr from columnHandles - std::vector names; - std::vector types; - names.reserve(columnHandles.size()); - types.reserve(columnHandles.size()); - for (auto& col : columnHandles) { - auto hiveCol = std::static_pointer_cast(col); - names.push_back(hiveCol->name()); - types.push_back(hiveCol->dataType()); + RowTypePtr dataColumns; + if (const auto* dc = options.get_ptr("dataColumns")) { + dataColumns = std::dynamic_pointer_cast( + ISerializable::deserialize(*dc, nullptr)); + } else { + std::vector names; + std::vector types; + names.reserve(columnHandles.size()); + types.reserve(columnHandles.size()); + for (const auto& col : columnHandles) { + auto hiveCol = std::static_pointer_cast(col); + names.push_back(hiveCol->name()); + types.push_back(hiveCol->dataType()); + } + dataColumns = ROW(std::move(names), std::move(types)); } - auto dataColumns = ROW(std::move(names), std::move(types)); return std::make_shared( connectorId(), diff --git a/bolt/connectors/hive/RegisterHiveConnector.cpp b/bolt/connectors/hive/RegisterHiveConnector.cpp new file mode 100644 index 000000000..38f117a77 --- /dev/null +++ b/bolt/connectors/hive/RegisterHiveConnector.cpp @@ -0,0 +1,50 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * -------------------------------------------------------------------------- + * Copyright (c) ByteDance Ltd. and/or its affiliates. + * SPDX-License-Identifier: Apache-2.0 + * + * This file has been modified by ByteDance Ltd. and/or its affiliates on + * 2025-11-11. + * + * Original file was released under the Apache License 2.0, + * with the full license text available at: + * http://www.apache.org/licenses/LICENSE-2.0 + * + * This modified file is released under the same license. + * -------------------------------------------------------------------------- + */ + +#include "bolt/connectors/hive/HiveConnector.h" + +namespace bytedance::bolt::connector::hive { + +void registerHiveConnectorFactories() { + if (!connector::hasConnectorFactory(kHiveConnectorName)) { + connector::registerConnectorFactory( + std::make_shared()); + } + if (!connector::hasConnectorFactory(kHiveHadoop2ConnectorName)) { + connector::registerConnectorFactory( + std::make_shared()); + } + if (!connector::hasConnectorFactory(kTosConnectorName)) { + connector::registerConnectorFactory( + std::make_shared()); + } +} + +} // namespace bytedance::bolt::connector::hive diff --git a/bolt/connectors/hive/storage_adapters/abfs/tests/AbfsFileSystemTest.cpp b/bolt/connectors/hive/storage_adapters/abfs/tests/AbfsFileSystemTest.cpp index 22d76c05f..732952c77 100644 --- a/bolt/connectors/hive/storage_adapters/abfs/tests/AbfsFileSystemTest.cpp +++ b/bolt/connectors/hive/storage_adapters/abfs/tests/AbfsFileSystemTest.cpp @@ -41,6 +41,7 @@ #include "bolt/connectors/hive/storage_adapters/abfs/AbfsFileSystem.h" #include "bolt/connectors/hive/storage_adapters/abfs/AbfsPath.h" +#include "bolt/common/testutil/TempFilePath.h" #include "bolt/connectors/hive/storage_adapters/abfs/AbfsReadFile.h" #include "bolt/connectors/hive/storage_adapters/abfs/AbfsWriteFile.h" #include "bolt/connectors/hive/storage_adapters/abfs/RegisterAbfsFileSystem.h" @@ -48,7 +49,6 @@ #include "bolt/connectors/hive/storage_adapters/abfs/tests/MockDataLakeFileClient.h" #include "bolt/dwio/common/FileSink.h" #include "bolt/exec/tests/utils/PortUtil.h" -#include "bolt/exec/tests/utils/TempFilePath.h" #include "connectors/hive/storage_adapters/abfs/AzureClientProviderFactories.h" #include "connectors/hive/storage_adapters/abfs/AzureClientProviderImpl.h" #include "connectors/hive/storage_adapters/abfs/RegisterAbfsFileSystem.h" @@ -126,8 +126,8 @@ class AbfsFileSystemTest : public testing::Test { } private: - static std::shared_ptr createFile() { - auto tempFile = TempFilePath::create(); + static std::shared_ptr<::bytedance::bolt::test::TempFilePath> createFile() { + auto tempFile = ::bytedance::bolt::test::TempFilePath::create(); tempFile->append("aaaaa"); tempFile->append("bbbbb"); tempFile->append(std::string(kOneMB, 'c')); diff --git a/bolt/connectors/hive/storage_adapters/abfs/tests/AzuriteServer.h b/bolt/connectors/hive/storage_adapters/abfs/tests/AzuriteServer.h index eb0e7cb40..dd351c831 100644 --- a/bolt/connectors/hive/storage_adapters/abfs/tests/AzuriteServer.h +++ b/bolt/connectors/hive/storage_adapters/abfs/tests/AzuriteServer.h @@ -29,7 +29,7 @@ */ #include "bolt/common/config/Config.h" -#include "bolt/exec/tests/utils/TempDirectoryPath.h" +#include "bolt/common/testutil/TempDirectoryPath.h" #include #include @@ -42,7 +42,7 @@ namespace bytedance::bolt::filesystems { using namespace Azure::Storage::Blobs; -using TempDirectoryPath = exec::test::TempDirectoryPath; +using TempDirectoryPath = bytedance::bolt::test::TempDirectoryPath; static std::string_view kAzuriteServerExecutableName{"azurite-blob"}; static std::string_view kAzuriteSearchPath{":/usr/bin/azurite"}; diff --git a/bolt/connectors/hive/storage_adapters/abfs/tests/MockDataLakeFileClient.h b/bolt/connectors/hive/storage_adapters/abfs/tests/MockDataLakeFileClient.h index 23d40a951..04bc286c4 100644 --- a/bolt/connectors/hive/storage_adapters/abfs/tests/MockDataLakeFileClient.h +++ b/bolt/connectors/hive/storage_adapters/abfs/tests/MockDataLakeFileClient.h @@ -28,12 +28,12 @@ * -------------------------------------------------------------------------- */ -#include "bolt/exec/tests/utils/TempFilePath.h" +#include "bolt/common/testutil/TempFilePath.h" #include "bolt/connectors/hive/storage_adapters/abfs/AzureDataLakeFileClient.h" using namespace Azure::Storage::Files::DataLake::Models; -using namespace bytedance::bolt::exec::test; +using namespace bytedance::bolt::test; namespace bytedance::bolt::filesystems { @@ -41,7 +41,7 @@ namespace bytedance::bolt::filesystems { class MockDataLakeFileClient : public AzureDataLakeFileClient { public: MockDataLakeFileClient() { - auto tempFile = TempFilePath::create(); + auto tempFile = ::bytedance::bolt::test::TempFilePath::create(); filePath_ = tempFile->getPath(); } diff --git a/bolt/connectors/hive/storage_adapters/gcs/tests/GcsFileSystemTest.cpp b/bolt/connectors/hive/storage_adapters/gcs/tests/GcsFileSystemTest.cpp index a16b68201..9d812e0e8 100644 --- a/bolt/connectors/hive/storage_adapters/gcs/tests/GcsFileSystemTest.cpp +++ b/bolt/connectors/hive/storage_adapters/gcs/tests/GcsFileSystemTest.cpp @@ -31,10 +31,10 @@ #include "bolt/connectors/hive/storage_adapters/gcs/GcsFileSystem.h" #include "bolt/common/base/tests/GTestUtils.h" #include "bolt/common/file/File.h" +#include "bolt/common/testutil/TempFilePath.h" #include "bolt/connectors/hive/storage_adapters/gcs/GcsUtil.h" #include "bolt/connectors/hive/storage_adapters/gcs/RegisterGcsFileSystem.h" #include "bolt/connectors/hive/storage_adapters/gcs/tests/GcsEmulator.h" -#include "bolt/exec/tests/utils/TempFilePath.h" #include "gtest/gtest.h" @@ -244,7 +244,7 @@ TEST_F(GcsFileSystemTest, credentialsConfig) { " \"auth_provider_x509_cert_url\": \"https://www.googleapis.com/oauth2/v1/certs\",\n" + " \"client_x509_cert_url\": \"https://www.googleapis.com/robot/v1/metadata/x509/foo-email%40foo-project.iam.g" + std::string("serviceaccount") + ".com\"\n" + "}\n"; - auto jsonFile = ::bytedance::bolt::exec::test::TempFilePath::create(); + auto jsonFile = ::bytedance::bolt::test::TempFilePath::create(); std::ofstream credsOut(jsonFile->getPath()); credsOut << kCreds; credsOut.close(); diff --git a/bolt/connectors/hive/storage_adapters/hdfs/tests/HdfsFileSystemTest.cpp b/bolt/connectors/hive/storage_adapters/hdfs/tests/HdfsFileSystemTest.cpp index 6b0b9e9fb..a86a8f30f 100644 --- a/bolt/connectors/hive/storage_adapters/hdfs/tests/HdfsFileSystemTest.cpp +++ b/bolt/connectors/hive/storage_adapters/hdfs/tests/HdfsFileSystemTest.cpp @@ -38,10 +38,10 @@ #include #include "HdfsMiniCluster.h" #include "bolt/common/base/tests/GTestUtils.h" +#include "bolt/common/testutil/TempFilePath.h" #include "bolt/connectors/hive/storage_adapters/hdfs/HdfsReadFile.h" #include "bolt/connectors/hive/storage_adapters/hdfs/RegisterHdfsFileSystem.h" #include "bolt/core/QueryConfig.h" -#include "bolt/exec/tests/utils/TempFilePath.h" #include "gtest/gtest.h" using namespace bytedance::bolt; @@ -89,8 +89,8 @@ class HdfsFileSystemTest : public testing::Test { static std::shared_ptr miniCluster; private: - static std::shared_ptr<::exec::test::TempFilePath> createFile() { - auto tempFile = ::exec::test::TempFilePath::create(); + static std::shared_ptr<::bytedance::bolt::test::TempFilePath> createFile() { + auto tempFile = ::bytedance::bolt::test::TempFilePath::create(); tempFile->append("aaaaa"); tempFile->append("bbbbb"); tempFile->append(std::string(kOneMB, 'c')); diff --git a/bolt/connectors/hive/storage_adapters/hdfs/tests/HdfsMiniCluster.h b/bolt/connectors/hive/storage_adapters/hdfs/tests/HdfsMiniCluster.h index ef5beb585..5796580a8 100644 --- a/bolt/connectors/hive/storage_adapters/hdfs/tests/HdfsMiniCluster.h +++ b/bolt/connectors/hive/storage_adapters/hdfs/tests/HdfsMiniCluster.h @@ -28,7 +28,7 @@ * -------------------------------------------------------------------------- */ -#include "bolt/exec/tests/utils/TempDirectoryPath.h" +#include "bolt/common/testutil/TempDirectoryPath.h" #include #include diff --git a/bolt/connectors/hive/storage_adapters/s3fs/tests/MinioServer.h b/bolt/connectors/hive/storage_adapters/s3fs/tests/MinioServer.h index 22bcec0f9..b48702603 100644 --- a/bolt/connectors/hive/storage_adapters/s3fs/tests/MinioServer.h +++ b/bolt/connectors/hive/storage_adapters/s3fs/tests/MinioServer.h @@ -31,8 +31,8 @@ #pragma once #include "bolt/common/config/Config.h" +#include "bolt/common/testutil/TempDirectoryPath.h" #include "bolt/exec/tests/utils/PortUtil.h" -#include "bolt/exec/tests/utils/TempDirectoryPath.h" #include "boost/process.hpp" @@ -48,7 +48,7 @@ constexpr char const* kMinioSecretKey{"miniopass"}; // Adapted from the Apache Arrow library. class MinioServer { public: - MinioServer() : tempPath_(::exec::test::TempDirectoryPath::create()) { + MinioServer() : tempPath_(::test::TempDirectoryPath::create()) { constexpr auto kHostAddressTemplate = "127.0.0.1:{}"; auto ports = bytedance::bolt::exec::test::getFreePorts(2); connectionString_ = fmt::format(kHostAddressTemplate, ports[0]); @@ -88,7 +88,7 @@ class MinioServer { } private: - const std::shared_ptr tempPath_; + const std::shared_ptr tempPath_; std::string connectionString_; std::string consoleAddress_; const std::string accessKey_ = kMinioAccessKey; diff --git a/bolt/connectors/hive/storage_adapters/s3fs/tests/S3Test.h b/bolt/connectors/hive/storage_adapters/s3fs/tests/S3Test.h index 9c6565dba..9af4f3b6d 100644 --- a/bolt/connectors/hive/storage_adapters/s3fs/tests/S3Test.h +++ b/bolt/connectors/hive/storage_adapters/s3fs/tests/S3Test.h @@ -29,12 +29,12 @@ */ #include "bolt/common/base/tests/GTestUtils.h" #include "bolt/common/file/File.h" +#include "bolt/common/testutil/TempFilePath.h" #include "bolt/connectors/hive/FileHandle.h" #include "bolt/connectors/hive/storage_adapters/s3fs/S3FileSystem.h" #include "bolt/connectors/hive/storage_adapters/s3fs/S3Util.h" #include "bolt/connectors/hive/storage_adapters/s3fs/tests/MinioServer.h" #include "bolt/exec/tests/utils/HiveConnectorTestBase.h" -#include "bolt/exec/tests/utils/TempFilePath.h" #include "gtest/gtest.h" diff --git a/bolt/connectors/hive/tests/CMakeLists.txt b/bolt/connectors/hive/tests/CMakeLists.txt index f88875ad6..9a13458d0 100644 --- a/bolt/connectors/hive/tests/CMakeLists.txt +++ b/bolt/connectors/hive/tests/CMakeLists.txt @@ -33,6 +33,8 @@ add_executable( HiveConnectorTest.cpp HiveRowIndexTest.cpp HiveDataSinkTest.cpp + HiveAssertQueryBuilderTest.cpp + HivePartitionFunctionPlanNodeToStringTest.cpp HivePartitionFunctionTest.cpp HivePartitionUtilTest.cpp PartitionIdGeneratorTest.cpp @@ -43,6 +45,9 @@ add_test(bolt_hive_connector_test bolt_hive_connector_test) target_link_libraries( bolt_hive_connector_test bolt_testutils + bolt_exec_test_lib + bolt_hive_connector + bolt_vector_test_lib GTest::gtest GTest::gtest_main ) diff --git a/bolt/connectors/hive/tests/FileHandleTest.cpp b/bolt/connectors/hive/tests/FileHandleTest.cpp index e7acc3c67..0daa65929 100644 --- a/bolt/connectors/hive/tests/FileHandleTest.cpp +++ b/bolt/connectors/hive/tests/FileHandleTest.cpp @@ -33,14 +33,14 @@ #include "bolt/common/caching/SimpleLRUCache.h" #include "bolt/common/file/File.h" #include "bolt/common/file/FileSystems.h" -#include "bolt/exec/tests/utils/TempFilePath.h" +#include "bolt/common/testutil/TempFilePath.h" #include "gtest/gtest.h" using namespace bytedance::bolt; TEST(FileHandleTest, localFile) { filesystems::registerLocalFileSystem(); - auto tempFile = ::exec::test::TempFilePath::create(); + auto tempFile = ::bytedance::bolt::test::TempFilePath::create(); const auto& filename = tempFile->path; remove(filename.c_str()); diff --git a/bolt/connectors/hive/tests/HiveAssertQueryBuilderTest.cpp b/bolt/connectors/hive/tests/HiveAssertQueryBuilderTest.cpp new file mode 100644 index 000000000..b44a43e47 --- /dev/null +++ b/bolt/connectors/hive/tests/HiveAssertQueryBuilderTest.cpp @@ -0,0 +1,97 @@ +/* + * Copyright (c) ByteDance Ltd. and/or its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "bolt/common/testutil/TempFilePath.h" +#include "bolt/connectors/hive/HiveConnectorSplit.h" +#include "bolt/exec/tests/utils/AssertQueryBuilder.h" +#include "bolt/exec/tests/utils/HiveConnectorTestBase.h" +#include "bolt/exec/tests/utils/PlanBuilder.h" + +namespace bytedance::bolt::exec::test { + +using connector::hive::HiveConnectorSplitBuilder; + +/// Exercises Hive-specific AssertQueryBuilder paths (partition-keyed splits, +/// HiveConnectorSplitBuilder). The connector-agnostic AssertQueryBuilder +/// coverage lives in bolt/exec/tests/AssertQueryBuilderTest.cpp. +class HiveAssertQueryBuilderTest : public HiveConnectorTestBase {}; + +TEST_F(HiveAssertQueryBuilderTest, hiveSplits) { + auto data = makeRowVector({makeFlatVector({1, 2, 3})}); + + auto file = ::bytedance::bolt::test::TempFilePath::create(); + writeToFile(file->path, {data}); + + // Single leaf node. + AssertQueryBuilder( + PlanBuilder().tableScan(asRowType(data->type())).planNode(), + duckDbQueryRunner_) + .split(makeHiveConnectorSplit(file->path)) + .assertResults("VALUES (1), (2), (3)"); + + // Split with partition key. + ColumnHandleMap assignments = { + {"ds", partitionKey("ds", VARCHAR())}, + {"c0", regularColumn("c0", BIGINT())}}; + + AssertQueryBuilder( + PlanBuilder() + .startTableScan() + .outputType(ROW({"c0", "ds"}, {INTEGER(), VARCHAR()})) + .tableHandle(makeTableHandle()) + .assignments(assignments) + .endTableScan() + .planNode(), + duckDbQueryRunner_) + .split(HiveConnectorSplitBuilder(file->path) + .connectorId(kHiveConnectorId) + .fileFormat(dwio::common::FileFormat::DWRF) + .partitionKey("ds", "2022-05-10") + .build()) + .assertResults( + "VALUES (1, '2022-05-10'), (2, '2022-05-10'), (3, '2022-05-10')"); + + // Two leaf nodes. + auto buildData = makeRowVector({makeFlatVector({2, 3})}); + auto buildFile = ::bytedance::bolt::test::TempFilePath::create(); + writeToFile(buildFile->path, {buildData}); + + auto planNodeIdGenerator = std::make_shared(); + core::PlanNodeId probeScanId; + core::PlanNodeId buildScanId; + auto joinPlan = PlanBuilder(planNodeIdGenerator) + .tableScan(asRowType(data->type())) + .capturePlanNodeId(probeScanId) + .hashJoin( + {"c0"}, + {"b_c0"}, + PlanBuilder(planNodeIdGenerator) + .tableScan(asRowType(data->type())) + .capturePlanNodeId(buildScanId) + .project({"c0 as b_c0"}) + .planNode(), + "", + {"c0", "b_c0"}) + .singleAggregation({}, {"count(1)"}) + .planNode(); + + AssertQueryBuilder(joinPlan, duckDbQueryRunner_) + .split(probeScanId, makeHiveConnectorSplit(file->path)) + .split(buildScanId, makeHiveConnectorSplit(buildFile->path)) + .assertResults("SELECT 2"); +} + +} // namespace bytedance::bolt::exec::test diff --git a/bolt/connectors/hive/tests/HiveDataSinkTest.cpp b/bolt/connectors/hive/tests/HiveDataSinkTest.cpp index 7222caa20..199bbc33b 100644 --- a/bolt/connectors/hive/tests/HiveDataSinkTest.cpp +++ b/bolt/connectors/hive/tests/HiveDataSinkTest.cpp @@ -36,15 +36,16 @@ #include "bolt/common/base/Fs.h" #include "bolt/common/base/tests/GTestUtils.h" #include "bolt/common/config/Config.h" +#include "bolt/common/testutil/TempDirectoryPath.h" #include "bolt/common/testutil/TestValue.h" #include "bolt/dwio/common/Options.h" #include "bolt/exec/tests/utils/PlanBuilder.h" -#include "bolt/exec/tests/utils/TempDirectoryPath.h" #include "bolt/vector/fuzzer/VectorFuzzer.h" namespace bytedance::bolt::connector::hive { namespace { using namespace bytedance::bolt::common; using namespace bytedance::bolt::exec::test; +using namespace bytedance::bolt::test; using namespace bytedance::bolt::common::testutil; constexpr const char* kHiveConnectorId = "test-hive"; @@ -705,7 +706,7 @@ TEST_F(HiveDataSinkTest, memoryReclaim) { std::shared_ptr spillDirectory; std::unique_ptr spillConfig; if (testData.writerSpillEnabled) { - spillDirectory = exec::test::TempDirectoryPath::create(); + spillDirectory = bytedance::bolt::test::TempDirectoryPath::create(); spillConfig = getSpillConfig(spillDirectory->path, testData.writerFlushThreshold); auto connectorQueryCtx = std::make_unique( @@ -843,7 +844,7 @@ TEST_F(HiveDataSinkTest, memoryReclaimAfterClose) { std::shared_ptr spillDirectory; std::unique_ptr spillConfig; if (testData.writerSpillEnabled) { - spillDirectory = exec::test::TempDirectoryPath::create(); + spillDirectory = bytedance::bolt::test::TempDirectoryPath::create(); spillConfig = getSpillConfig(spillDirectory->path, 0); auto connectorQueryCtx = std::make_unique( opPool_.get(), @@ -934,7 +935,7 @@ DEBUG_ONLY_TEST_F(HiveDataSinkTest, sortWriterFailureTest) { std::make_shared( "c1", core::SortOrder{false, false})}); const std::shared_ptr spillDirectory = - exec::test::TempDirectoryPath::create(); + bytedance::bolt::test::TempDirectoryPath::create(); std::unique_ptr spillConfig = getSpillConfig(spillDirectory->path, 0); // Triggers the memory reservation in sort buffer. diff --git a/bolt/connectors/hive/tests/HivePartitionFunctionPlanNodeToStringTest.cpp b/bolt/connectors/hive/tests/HivePartitionFunctionPlanNodeToStringTest.cpp new file mode 100644 index 000000000..37045a618 --- /dev/null +++ b/bolt/connectors/hive/tests/HivePartitionFunctionPlanNodeToStringTest.cpp @@ -0,0 +1,76 @@ +/* + * Copyright (c) ByteDance Ltd. and/or its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include "bolt/common/memory/Memory.h" +#include "bolt/connectors/hive/HiveConnector.h" +#include "bolt/exec/tests/utils/PlanBuilder.h" +#include "bolt/functions/prestosql/aggregates/RegisterAggregateFunctions.h" +#include "bolt/functions/prestosql/registration/RegistrationFunctions.h" +#include "bolt/parse/TypeResolver.h" +#include "bolt/vector/tests/utils/VectorTestBase.h" + +namespace bytedance::bolt::connector::hive::test { +namespace { + +using bytedance::bolt::exec::test::PlanBuilder; + +/// Covers the Hive-specific HivePartitionFunctionSpec branch of +/// PartitionedOutputNode::toString. The connector-agnostic parts of +/// PartitionedOutputNode rendering live in +/// bolt/exec/tests/PlanNodeToStringTest.cpp. +class HivePartitionFunctionPlanNodeToStringTest + : public ::testing::Test, + public bolt::test::VectorTestBase { + public: + HivePartitionFunctionPlanNodeToStringTest() { + functions::prestosql::registerAllScalarFunctions(); + aggregate::prestosql::registerAllAggregateFunctions(); + parse::registerTypeResolver(); + data_ = makeRowVector( + {makeFlatVector({0, 1, 2, 3, 4}), + makeFlatVector({0, 1, 2, 3, 4}), + makeFlatVector({0, 1, 2, 3, 4})}); + } + + protected: + static void SetUpTestCase() { + memory::MemoryManager::testingSetInstance(memory::MemoryManager::Options{}); + } + + RowVectorPtr data_; +}; + +TEST_F(HivePartitionFunctionPlanNodeToStringTest, partitionedOutput) { + auto hiveSpec = std::make_shared( + 4, + std::vector{0, 1, 0, 1}, + std::vector{1, 2}, + std::vector{}); + + auto plan = PlanBuilder() + .values({data_}) + .partitionedOutput({"c1", "c2"}, 2, false, hiveSpec) + .planNode(); + ASSERT_EQ("-- PartitionedOutput[1]\n", plan->toString(false, false, true)); + ASSERT_EQ( + "-- PartitionedOutput[1][partitionFunction: HIVE((1, 2) buckets: 4) with 2 partitions] -> c0:SMALLINT, c1:INTEGER, c2:BIGINT\n", + plan->toString(true, false, true)); +} + +} // namespace +} // namespace bytedance::bolt::connector::hive::test diff --git a/bolt/connectors/paimon/tests/PaimonConnectorTest.cpp b/bolt/connectors/paimon/tests/PaimonConnectorTest.cpp index 5dcd2fbd0..eac5e67fe 100644 --- a/bolt/connectors/paimon/tests/PaimonConnectorTest.cpp +++ b/bolt/connectors/paimon/tests/PaimonConnectorTest.cpp @@ -25,13 +25,13 @@ #include #include #include "bolt/common/memory/Memory.h" +#include "bolt/common/testutil/TempDirectoryPath.h" #include "bolt/connectors/paimon/BoltMemoryPool.h" #include "bolt/connectors/paimon/PaimonConfig.h" #include "bolt/connectors/paimon/PaimonConnectorSplit.h" #include "bolt/connectors/paimon/PaimonTableHandle.h" #include "bolt/exec/tests/utils/OperatorTestBase.h" #include "bolt/exec/tests/utils/PlanBuilder.h" -#include "bolt/exec/tests/utils/TempDirectoryPath.h" #include "bolt/type/TimestampConversion.h" #include "bolt/type/Type.h" #include "bolt/vector/tests/utils/VectorMaker.h" @@ -43,7 +43,7 @@ class PaimonConnectorTest protected: static void SetUpTestCase() { // Create a temporary directory for the test - tempDir_ = exec::test::TempDirectoryPath::create(); + tempDir_ = bytedance::bolt::test::TempDirectoryPath::create(); LOG(INFO) << "Test using temporary directory: " << tempDir_->path; // Run create_test_tables.py with the temporary directory. @@ -90,11 +90,11 @@ class PaimonConnectorTest exec::test::OperatorTestBase::TearDown(); } - static std::shared_ptr tempDir_; + static std::shared_ptr tempDir_; }; -std::shared_ptr PaimonConnectorTest::tempDir_ = - nullptr; +std::shared_ptr + PaimonConnectorTest::tempDir_ = nullptr; TEST_F(PaimonConnectorTest, TestTableScanBasic) { // Create Parquet data with unique id diff --git a/bolt/connectors/tests/CMakeLists.txt b/bolt/connectors/tests/CMakeLists.txt index 52c65b982..10c4f7a38 100644 --- a/bolt/connectors/tests/CMakeLists.txt +++ b/bolt/connectors/tests/CMakeLists.txt @@ -25,7 +25,10 @@ # This modified file is released under the same license. # -------------------------------------------------------------------------- +add_subdirectory(utils) + add_executable(bolt_connector_test ConnectorTest.cpp) + add_test(bolt_connector_test bolt_connector_test) target_link_libraries( diff --git a/bolt/connectors/tests/utils/CMakeLists.txt b/bolt/connectors/tests/utils/CMakeLists.txt new file mode 100644 index 000000000..5ed32d7a5 --- /dev/null +++ b/bolt/connectors/tests/utils/CMakeLists.txt @@ -0,0 +1,33 @@ +# Copyright (c) ByteDance Ltd. and/or its affiliates. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +bolt_add_library( + bolt_connector_test_lib + ConnectorTestBase.cpp +) + +target_link_libraries( + bolt_connector_test_lib + bolt_connector + bolt_core + bolt_dwio_common + bolt_file + bolt_serialization + bolt_temp_path + bolt_type + bolt_filter + bolt_vector_test_lib + GTest::gtest + Folly::folly +) diff --git a/bolt/connectors/tests/utils/ConnectorTestBase.cpp b/bolt/connectors/tests/utils/ConnectorTestBase.cpp new file mode 100644 index 000000000..27b33a1db --- /dev/null +++ b/bolt/connectors/tests/utils/ConnectorTestBase.cpp @@ -0,0 +1,364 @@ +/* + * Copyright (c) ByteDance Ltd. and/or its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "bolt/connectors/tests/utils/ConnectorTestBase.h" + +#include + +#include "bolt/common/file/FileSystems.h" +#include "bolt/common/serialization/Serializable.h" +#include "bolt/common/testutil/TempFilePath.h" +#include "bolt/type/filter/FilterBase.h" + +namespace bytedance::bolt::connector::test { + +namespace { + +std::string toEffectivePath(const std::string& filePath) { + return filePath.find('/') == 0 ? "file:" + filePath : filePath; +} + +uint64_t fileSize(const std::string& filePath) { + auto file = + filesystems::getFileSystem(filePath, nullptr)->openFileForRead(filePath); + return static_cast(file->size()); +} + +} // namespace + +void registerTestConnector( + const std::string& connectorName, + const std::string& connectorId, + folly::Executor* ioExecutor, + const std::shared_ptr& config, + const FactoryRegistrar& factoryRegistrar) { + if (factoryRegistrar && !connector::hasConnectorFactory(connectorName)) { + factoryRegistrar(); + } + auto factory = connector::getConnectorFactory(connectorName); + factory->registerObjectFactory(connectorId); + connector::registerConnector( + factory->newConnector(connectorId, config, ioExecutor)); +} + +void unregisterTestConnector( + const std::string& connectorName, + const std::string& connectorId) { + connector::unregisterConnector(connectorId); + connector::unregisterConnectorObjectFactory(connectorName); +} + +ConnectorTestParam paramFor(const std::string& connectorName) { + return { + connectorName, + "test-" + connectorName, + /*factoryRegistrar=*/nullptr}; +} + +std::vector paramsFor( + std::vector connectorNames) { + std::vector params; + params.reserve(connectorNames.size()); + for (auto& name : connectorNames) { + params.push_back(paramFor(name)); + } + return params; +} + +std::vector> makeConnectorSplits( + const std::string& connectorName, + const std::string& directoryPath, + dwio::common::FileFormat format) { + std::vector> splits; + for (const auto& path : + std::filesystem::recursive_directory_iterator(directoryPath)) { + if (path.is_regular_file()) { + splits.emplace_back(makeConnectorSplits( + connectorName, path.path().string(), 1, format)[0]); + } + } + return splits; +} + +std::vector> makeConnectorSplits( + const std::string& connectorName, + const std::vector& filePaths, + dwio::common::FileFormat format) { + std::vector> splits; + splits.reserve(filePaths.size()); + for (const auto& filePath : filePaths) { + splits.emplace_back( + makeConnectorSplits(connectorName, filePath.string(), 1, format)[0]); + } + return splits; +} + +std::vector> makeConnectorSplits( + const std::string& connectorName, + const std::vector>& + filePaths) { + std::vector> splits; + splits.reserve(filePaths.size()); + for (const auto& filePath : filePaths) { + splits.emplace_back(makeConnectorSplit( + connectorName, + filePath->path, + filePath->fileSize(), + filePath->fileModifiedTime(), + 0, + std::numeric_limits::max())); + } + return splits; +} + +std::vector> makeConnectorSplits( + const std::string& connectorName, + const std::string& filePath, + uint32_t splitCount, + dwio::common::FileFormat format) { + auto file = + filesystems::getFileSystem(filePath, nullptr)->openFileForRead(filePath); + const int64_t fileSize = file->size(); + const auto splitSize = + static_cast((fileSize + splitCount - 1) / splitCount); + std::vector> splits; + splits.reserve(splitCount); + auto factory = connector::getConnectorObjectFactory(connectorName); + const auto effectivePath = toEffectivePath(filePath); + for (uint32_t i = 0; i < splitCount; ++i) { + splits.emplace_back(factory->makeConnectorSplit( + effectivePath, + i * splitSize, + splitSize, + connector::makeOptions({{"fileFormat", static_cast(format)}}))); + } + return splits; +} + +std::shared_ptr makeConnectorSplit( + const std::string& connectorName, + const std::string& filePath, + uint64_t start, + uint64_t length) { + return connector::getConnectorObjectFactory(connectorName) + ->makeConnectorSplit( + toEffectivePath(filePath), + start, + length, + connector::makeOptions( + {{"fileFormat", + static_cast(dwio::common::FileFormat::DWRF)}})); +} + +std::shared_ptr makeConnectorSplit( + const std::string& connectorName, + const std::string& filePath, + uint64_t start, + uint64_t length, + connector::DynamicConnectorOptions options) { + if (!options.options.isObject()) { + options.options = folly::dynamic::object; + } + if (!options.options.count("fileFormat")) { + options.options["fileFormat"] = + static_cast(dwio::common::FileFormat::DWRF); + } + return connector::getConnectorObjectFactory(connectorName) + ->makeConnectorSplit(toEffectivePath(filePath), start, length, options); +} + +std::shared_ptr makeConnectorSplit( + const std::string& connectorName, + const std::string& filePath, + int64_t fileSize, + int64_t fileModifiedTime, + uint64_t start, + uint64_t length) { + connector::DynamicConnectorOptions options; + options.options = folly::dynamic::object; + options.options["fileFormat"] = + static_cast(dwio::common::FileFormat::DWRF); + folly::dynamic infoColumns = folly::dynamic::object; + infoColumns["$file_size"] = fmt::format("{}", fileSize); + infoColumns["$file_modified_time"] = fmt::format("{}", fileModifiedTime); + options.options["infoColumns"] = infoColumns; + return connector::getConnectorObjectFactory(connectorName) + ->makeConnectorSplit(toEffectivePath(filePath), start, length, options); +} + +std::shared_ptr makeColumnHandle( + const std::string& connectorName, + const std::string& name, + const TypePtr& type) { + return connector::getConnectorObjectFactory(connectorName) + ->makeColumnHandle(name, type, connector::makeOptions({})); +} + +std::shared_ptr makeColumnHandle( + const std::string& connectorName, + const std::string& name, + const TypePtr& type, + connector::ConnectorOptions options) { + return connector::getConnectorObjectFactory(connectorName) + ->makeColumnHandle(name, type, options); +} + +std::shared_ptr makeTableHandle( + const std::string& connectorName, + const std::string& tableName, + const core::TypedExprPtr& remainingFilter) { + auto tableOptions = connector::makeOptions({}); + if (remainingFilter) { + tableOptions.options["remainingFilter"] = + ISerializable::serialize(remainingFilter); + } + return connector::getConnectorObjectFactory(connectorName) + ->makeTableHandle(tableName, {}, tableOptions); +} + +ConnectorTestBase::ConnectorTestBase( + std::string connectorName, + std::string connectorId, + FactoryRegistrar factoryRegistrar) + : connectorName_(std::move(connectorName)), + connectorId_(std::move(connectorId)), + factoryRegistrar_(std::move(factoryRegistrar)) {} + +ConnectorTestBase::~ConnectorTestBase() = default; + +void ConnectorTestBase::SetUp() { + Type::registerSerDe(); + common::Filter::registerSerDe(); + core::ITypedExpr::registerSerDe(); + ioExecutor_ = std::make_unique(3); + auto emptyConfig = std::make_shared( + std::unordered_map()); + registerTestConnector( + connectorName_, + connectorId_, + ioExecutor_.get(), + emptyConfig, + factoryRegistrar_); + filesystems::registerLocalFileSystem(); +} + +void ConnectorTestBase::TearDown() { + // Make sure all pending loads finish or are cancelled before unregistering + // the connector. + ioExecutor_.reset(); + unregisterTestConnector(connectorName_, connectorId_); +} + +void ConnectorTestBase::resetConnector( + const std::shared_ptr& config) { + connector::unregisterConnector(connectorId_); + if (factoryRegistrar_ && !connector::hasConnectorFactory(connectorName_)) { + factoryRegistrar_(); + } + connector::registerConnector( + connector::getConnectorFactory(connectorName_) + ->newConnector(connectorId_, config, ioExecutor_.get())); +} + +std::shared_ptr +ConnectorTestBase::connectorObjectFactory() const { + return connector::getConnectorObjectFactory(connectorName_); +} + +std::vector> +ConnectorTestBase::makeConnectorSplits( + const std::string& filePath, + uint32_t splitCount, + dwio::common::FileFormat format) const { + const auto size = fileSize(filePath); + const auto splitSize = + static_cast((size + splitCount - 1) / splitCount); + std::vector> splits; + splits.reserve(splitCount); + auto factory = connectorObjectFactory(); + const auto effectivePath = toEffectivePath(filePath); + for (uint32_t i = 0; i < splitCount; ++i) { + splits.emplace_back(factory->makeConnectorSplit( + effectivePath, + i * splitSize, + splitSize, + connector::makeOptions({{"fileFormat", static_cast(format)}}))); + } + return splits; +} + +std::shared_ptr +ConnectorTestBase::makeConnectorSplit(const std::string& filePath) const { + return makeConnectorSplit(filePath, 0, fileSize(filePath)); +} + +std::shared_ptr +ConnectorTestBase::makeConnectorSplit( + const std::string& filePath, + uint64_t start, + uint64_t length) const { + return connectorObjectFactory()->makeConnectorSplit( + toEffectivePath(filePath), + start, + length, + connector::makeOptions( + {{"fileFormat", static_cast(dwio::common::FileFormat::DWRF)}})); +} + +std::shared_ptr +ConnectorTestBase::makeConnectorSplit( + const std::string& filePath, + uint64_t start, + uint64_t length, + connector::DynamicConnectorOptions options) const { + if (!options.options.isObject()) { + options.options = folly::dynamic::object; + } + if (!options.options.count("fileFormat")) { + options.options["fileFormat"] = + static_cast(dwio::common::FileFormat::DWRF); + } + return connectorObjectFactory()->makeConnectorSplit( + toEffectivePath(filePath), start, length, options); +} + +std::shared_ptr ConnectorTestBase::makeColumnHandle( + const std::string& name, + const TypePtr& type) const { + return connectorObjectFactory()->makeColumnHandle( + name, type, connector::makeOptions({})); +} + +std::shared_ptr ConnectorTestBase::makeColumnHandle( + const std::string& name, + const TypePtr& type, + connector::ConnectorOptions options) const { + return connectorObjectFactory()->makeColumnHandle(name, type, options); +} + +std::shared_ptr +ConnectorTestBase::makeTableHandle( + const std::string& tableName, + const core::TypedExprPtr& remainingFilter) const { + auto tableOptions = connector::makeOptions({}); + if (remainingFilter) { + tableOptions.options["remainingFilter"] = + ISerializable::serialize(remainingFilter); + } + return connectorObjectFactory()->makeTableHandle(tableName, {}, tableOptions); +} + +} // namespace bytedance::bolt::connector::test diff --git a/bolt/connectors/tests/utils/ConnectorTestBase.h b/bolt/connectors/tests/utils/ConnectorTestBase.h new file mode 100644 index 000000000..cc379ced1 --- /dev/null +++ b/bolt/connectors/tests/utils/ConnectorTestBase.h @@ -0,0 +1,263 @@ +/* + * Copyright (c) ByteDance Ltd. and/or its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "bolt/common/testutil/TempFilePath.h" +#include "bolt/connectors/Connector.h" +#include "bolt/connectors/ConnectorObjectFactory.h" +#include "bolt/connectors/ConnectorOptions.h" +#include "bolt/core/ITypedExpr.h" +#include "bolt/dwio/common/Options.h" +#include "bolt/type/Type.h" +#include "bolt/vector/tests/utils/VectorTestBase.h" + +namespace bytedance::bolt::connector::test { + +/// Default connector instance ID used when callers don't supply one. +constexpr std::string_view kDefaultConnectorId = "test-connector"; + +using ColumnHandleMap = + std::unordered_map>; + +/// Callback invoked from ConnectorTestBase::SetUp() to ensure the named +/// connector factory is registered with the runtime. Each connector library +/// exposes one (e.g. connector::hive::registerHiveConnectorFactories, +/// connector::tpch::registerTpchConnectorFactories). +using FactoryRegistrar = std::function; + +/// Parameter type for connector-parameterized GTest fixtures. Tests that need +/// to run against multiple connectors instantiate one ConnectorTestParam per +/// connector and use ::testing::WithParamInterface. +struct ConnectorTestParam { + std::string connectorName; + std::string connectorId; + FactoryRegistrar factoryRegistrar; +}; + +/// Returns a ConnectorTestParam for the named connector. factoryRegistrar is +/// nullptr; the connector's factory is expected to already be registered with +/// the runtime before tests run -- either via the connector library's +/// static-init self-registration (i.e. linking bolt__connector), or via +/// an explicit registerConnectorFactories() call somewhere ahead of +/// INSTANTIATE_TEST_SUITE_P (e.g. in main() or a TestEnvironment). +/// +/// Use this helper so test source files can stay connector-agnostic: they +/// reference connector names as strings (kHiveConnectorName etc.) instead of +/// including connector-specific headers like HiveConnector.h. +ConnectorTestParam paramFor(const std::string& connectorName); + +/// Convenience: returns params for a list of connector names. +std::vector paramsFor( + std::vector connectorNames); + +/// Builds connector splits for every regular file under @c directoryPath via +/// the connector object factory registered for @c connectorName. +std::vector> makeConnectorSplits( + const std::string& connectorName, + const std::string& directoryPath, + dwio::common::FileFormat format = dwio::common::FileFormat::DWRF); + +/// Builds connector splits, one per file in @c filePaths. +std::vector> makeConnectorSplits( + const std::string& connectorName, + const std::vector& filePaths, + dwio::common::FileFormat format = dwio::common::FileFormat::DWRF); + +/// Builds connector splits from a list of TempFilePath, preserving +/// $file_size and $file_modified_time info columns. +std::vector> makeConnectorSplits( + const std::string& connectorName, + const std::vector>& + filePaths); + +/// Splits @c filePath into @c splitCount contiguous chunks. +std::vector> makeConnectorSplits( + const std::string& connectorName, + const std::string& filePath, + uint32_t splitCount, + dwio::common::FileFormat format); + +/// Single DWRF split covering [start, start + length). +std::shared_ptr makeConnectorSplit( + const std::string& connectorName, + const std::string& filePath, + uint64_t start = 0, + uint64_t length = std::numeric_limits::max()); + +/// Single split with explicit options merged in. Adds fileFormat=DWRF if not +/// already present in @c options. +std::shared_ptr makeConnectorSplit( + const std::string& connectorName, + const std::string& filePath, + uint64_t start, + uint64_t length, + connector::DynamicConnectorOptions options); + +/// Single DWRF split carrying $file_size / $file_modified_time info columns. +std::shared_ptr makeConnectorSplit( + const std::string& connectorName, + const std::string& filePath, + int64_t fileSize, + int64_t fileModifiedTime, + uint64_t start, + uint64_t length); + +/// Regular column handle (no required subfields) via the factory registered +/// for @c connectorName. +std::shared_ptr makeColumnHandle( + const std::string& connectorName, + const std::string& name, + const TypePtr& type); + +std::shared_ptr makeColumnHandle( + const std::string& connectorName, + const std::string& name, + const TypePtr& type, + connector::ConnectorOptions options); + +/// Default table handle with optional remaining filter. +std::shared_ptr makeTableHandle( + const std::string& connectorName, + const std::string& tableName = "test_table", + const core::TypedExprPtr& remainingFilter = nullptr); + +/// Registers the named connector instance with the runtime: invokes +/// @c factoryRegistrar (if non-null and the factory isn't already registered), +/// then registers an object factory under @c connectorId and a Connector +/// constructed via the factory using @c config and @c ioExecutor. +/// +/// Reusable outside ConnectorTestBase by callers (e.g. the legacy +/// exec::test::HiveConnectorTestBase) that need the same registration +/// semantics without inheriting this fixture. +void registerTestConnector( + const std::string& connectorName, + const std::string& connectorId, + folly::Executor* ioExecutor, + const std::shared_ptr& config, + const FactoryRegistrar& factoryRegistrar); + +/// Reverses registerTestConnector: unregisters the Connector instance and +/// the object factory. +void unregisterTestConnector( + const std::string& connectorName, + const std::string& connectorId); + +/// Connector-agnostic GTest fixture for connector-driven tests. +/// +/// Lifecycle: +/// SetUp() registers +/// (i) standard serdes (Type, Filter, ITypedExpr), +/// (ii) the connector factory via @c factoryRegistrar (if not already +/// registered), +/// (iii) the connector instance under @c connectorId, and +/// (iv) the local file system. +/// TearDown() reverses (iii) and unregisters the connector object factory. +class ConnectorTestBase : public ::testing::Test, + public bolt::test::VectorTestBase { + public: + /// @param connectorName Connector type name (e.g. "hive", "tpch"). + /// @param connectorId Connector instance ID registered with the + /// runtime. + /// @param factoryRegistrar Invoked from SetUp() if @c connectorName is not + /// yet registered. May be null when the test has + /// another way of ensuring registration. + ConnectorTestBase( + std::string connectorName, + std::string connectorId, + FactoryRegistrar factoryRegistrar); + + ~ConnectorTestBase() override; + + void SetUp() override; + void TearDown() override; + + /// Re-creates the connector with a custom config (e.g. to test config + /// knobs). Reuses the existing ioExecutor_. + void resetConnector(const std::shared_ptr& config); + + /// Returns the ConnectorObjectFactory registered for this fixture's + /// connector. + std::shared_ptr connectorObjectFactory() + const; + + const std::string& connectorId() const { + return connectorId_; + } + + const std::string& connectorName() const { + return connectorName_; + } + + /// Splits @c filePath into @c splitCount contiguous chunks. All splits use + /// DWRF unless @c format says otherwise. + std::vector> makeConnectorSplits( + const std::string& filePath, + uint32_t splitCount, + dwio::common::FileFormat format = dwio::common::FileFormat::DWRF) const; + + /// Single DWRF split for @c filePath covering the full file. + std::shared_ptr makeConnectorSplit( + const std::string& filePath) const; + + /// Single DWRF split for @c filePath covering [start, start + length). + std::shared_ptr makeConnectorSplit( + const std::string& filePath, + uint64_t start = 0, + uint64_t length = std::numeric_limits::max()) const; + + /// Single split for @c filePath with explicit options merged in. Defaults + /// fileFormat to DWRF if not present in @c options. + std::shared_ptr makeConnectorSplit( + const std::string& filePath, + uint64_t start, + uint64_t length, + connector::DynamicConnectorOptions options) const; + + /// Regular column handle (no required subfields) via the factory. + std::shared_ptr makeColumnHandle( + const std::string& name, + const TypePtr& type) const; + + std::shared_ptr makeColumnHandle( + const std::string& name, + const TypePtr& type, + connector::ConnectorOptions options) const; + + /// Default table handle (no subfield filters), optional remaining filter. + std::shared_ptr makeTableHandle( + const std::string& tableName = "test_table", + const core::TypedExprPtr& remainingFilter = nullptr) const; + + protected: + std::string connectorName_; + std::string connectorId_; + FactoryRegistrar factoryRegistrar_; + std::unique_ptr ioExecutor_; +}; + +} // namespace bytedance::bolt::connector::test diff --git a/bolt/connectors/tpch/CMakeLists.txt b/bolt/connectors/tpch/CMakeLists.txt index 3ff9e2bf1..d63dc1370 100644 --- a/bolt/connectors/tpch/CMakeLists.txt +++ b/bolt/connectors/tpch/CMakeLists.txt @@ -25,7 +25,7 @@ # This modified file is released under the same license. # -------------------------------------------------------------------------- -bolt_add_library(bolt_tpch_connector TpchConnector.cpp) +bolt_add_library(bolt_tpch_connector TpchConnector.cpp RegisterTpchConnector.cpp) target_link_libraries(bolt_tpch_connector bolt_connector bolt_tpch_gen fmt::fmt) diff --git a/bolt/connectors/tpch/RegisterTpchConnector.cpp b/bolt/connectors/tpch/RegisterTpchConnector.cpp new file mode 100644 index 000000000..a5c3aa1db --- /dev/null +++ b/bolt/connectors/tpch/RegisterTpchConnector.cpp @@ -0,0 +1,42 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * -------------------------------------------------------------------------- + * Copyright (c) ByteDance Ltd. and/or its affiliates. + * SPDX-License-Identifier: Apache-2.0 + * + * This file has been modified by ByteDance Ltd. and/or its affiliates on + * 2025-11-11. + * + * Original file was released under the Apache License 2.0, + * with the full license text available at: + * http://www.apache.org/licenses/LICENSE-2.0 + * + * This modified file is released under the same license. + * -------------------------------------------------------------------------- + */ + +#include "bolt/connectors/tpch/TpchConnector.h" + +namespace bytedance::bolt::connector::tpch { + +void registerTpchConnectorFactories() { + if (!connector::hasConnectorFactory(kTpchConnectorName)) { + connector::registerConnectorFactory( + std::make_shared()); + } +} + +} // namespace bytedance::bolt::connector::tpch diff --git a/bolt/connectors/tpch/TpchConnector.h b/bolt/connectors/tpch/TpchConnector.h index 405c5dfb5..ee24a7bcb 100644 --- a/bolt/connectors/tpch/TpchConnector.h +++ b/bolt/connectors/tpch/TpchConnector.h @@ -233,6 +233,8 @@ class TpchConnectorFactory : public ConnectorFactory { } }; +void registerTpchConnectorFactories(); + template bool CheckTpchConnectorFactoryInit() { static bool init = bytedance::bolt::connector::registerConnectorFactory( diff --git a/bolt/core/Expressions.cpp b/bolt/core/Expressions.cpp index 45ce7d0f9..b4bc95e30 100644 --- a/bolt/core/Expressions.cpp +++ b/bolt/core/Expressions.cpp @@ -30,6 +30,7 @@ #include "bolt/core/Expressions.h" #include "bolt/common/encode/Base64.h" +#include "bolt/common/memory/Memory.h" #include "bolt/vector/VectorSaver.h" namespace bytedance::bolt::core { @@ -123,6 +124,9 @@ TypedExprPtr ConstantTypedExpr::create( std::istringstream dataStream(serializedData); auto* pool = static_cast(context); + if (pool == nullptr) { + pool = memory::MemoryManager::getInstance()->tracePool(); + } return std::make_shared(restoreVector(dataStream, pool)); } diff --git a/bolt/dwio/common/fuzzer/DwioFuzzer.cpp b/bolt/dwio/common/fuzzer/DwioFuzzer.cpp index 0a2a4800a..8c64602f8 100644 --- a/bolt/dwio/common/fuzzer/DwioFuzzer.cpp +++ b/bolt/dwio/common/fuzzer/DwioFuzzer.cpp @@ -129,7 +129,7 @@ DwioFuzzer::DwioFuzzer( size_t initialSeed, const Options& options, common::FileFormat fileFormat) - : tempPath_(exec::test::TempDirectoryPath::create()), + : tempPath_(bytedance::bolt::test::TempDirectoryPath::create()), options_(options), fileFormat_{fileFormat}, aggregatePool_( diff --git a/bolt/dwio/common/fuzzer/DwioFuzzer.h b/bolt/dwio/common/fuzzer/DwioFuzzer.h index 07c3c20bb..e11380791 100644 --- a/bolt/dwio/common/fuzzer/DwioFuzzer.h +++ b/bolt/dwio/common/fuzzer/DwioFuzzer.h @@ -18,8 +18,8 @@ #include #include +#include "bolt/common/testutil/TempDirectoryPath.h" #include "bolt/dwio/common/Options.h" -#include "bolt/exec/tests/utils/TempDirectoryPath.h" #include "bolt/vector/fuzzer/VectorFuzzer.h" #include "gtest/gtest.h" namespace bytedance::bolt::dwio { @@ -118,7 +118,7 @@ class DwioFuzzer { const std::unique_ptr reader, const RowTypePtr& rowType); - std::shared_ptr tempPath_; + std::shared_ptr tempPath_; Options options_; diff --git a/bolt/dwio/common/tests/LocalFileSinkTest.cpp b/bolt/dwio/common/tests/LocalFileSinkTest.cpp index d0b09e2ef..bff99e18f 100644 --- a/bolt/dwio/common/tests/LocalFileSinkTest.cpp +++ b/bolt/dwio/common/tests/LocalFileSinkTest.cpp @@ -30,13 +30,13 @@ #include "bolt/common/base/Fs.h" #include "bolt/common/base/tests/GTestUtils.h" +#include "bolt/common/testutil/TempDirectoryPath.h" #include "bolt/dwio/common/FileSink.h" -#include "bolt/exec/tests/utils/TempDirectoryPath.h" #include using namespace ::testing; -using namespace bytedance::bolt::exec::test; +using namespace bytedance::bolt::test; namespace bytedance::bolt::dwio::common { class LocalFileSinkTest : public testing::Test { diff --git a/bolt/dwio/common/tests/ReadFileInputStreamTests.cpp b/bolt/dwio/common/tests/ReadFileInputStreamTests.cpp index c394157bb..be232b36c 100644 --- a/bolt/dwio/common/tests/ReadFileInputStreamTests.cpp +++ b/bolt/dwio/common/tests/ReadFileInputStreamTests.cpp @@ -29,8 +29,8 @@ */ #include "bolt/common/file/FileSystems.h" +#include "bolt/common/testutil/TempFilePath.h" #include "bolt/dwio/common/InputStream.h" -#include "bolt/exec/tests/utils/TempFilePath.h" #include #include "folly/io/Cursor.h" @@ -48,7 +48,7 @@ class ReadFileInputStreamTest : public testing::Test { }; TEST_F(ReadFileInputStreamTest, LocalReadFile) { - auto tempFile = ::exec::test::TempFilePath::create(); + auto tempFile = ::bytedance::bolt::test::TempFilePath::create(); const auto& filename = tempFile->path; remove(filename.c_str()); { diff --git a/bolt/dwio/dwrf/test/CacheInputTest.cpp b/bolt/dwio/dwrf/test/CacheInputTest.cpp index 0e3295602..1af7b878f 100644 --- a/bolt/dwio/dwrf/test/CacheInputTest.cpp +++ b/bolt/dwio/dwrf/test/CacheInputTest.cpp @@ -36,10 +36,10 @@ #include "bolt/common/io/IoStatistics.h" #include "bolt/common/io/Options.h" #include "bolt/common/memory/MmapAllocator.h" +#include "bolt/common/testutil/TempDirectoryPath.h" #include "bolt/dwio/common/CachedBufferedInput.h" #include "bolt/dwio/dwrf/common/Common.h" #include "bolt/dwio/dwrf/test/TestReadFile.h" -#include "bolt/exec/tests/utils/TempDirectoryPath.h" #include using namespace bytedance::bolt; @@ -89,7 +89,7 @@ class CacheTest : public testing::Test { std::unique_ptr ssd; if (ssdBytes) { FLAGS_ssd_odirect = false; - tempDirectory_ = exec::test::TempDirectoryPath::create(); + tempDirectory_ = bytedance::bolt::test::TempDirectoryPath::create(); ssd = std::make_unique( fmt::format("{}/cache", tempDirectory_->path), ssdBytes, @@ -387,7 +387,7 @@ class CacheTest : public testing::Test { std::mutex mutex_; std::vector fileIds_; folly::F14FastMap> pathToInput_; - std::shared_ptr tempDirectory_; + std::shared_ptr tempDirectory_; cache::FileGroupStats* FOLLY_NULLABLE groupStats_ = nullptr; std::shared_ptr allocator_; std::shared_ptr cache_; @@ -691,7 +691,8 @@ TEST_F(CacheTest, readAhead) { const void* buffer; int32_t size; if (!files[i]->next(buffer, size)) { - // End of file. Check that a multiple of file size has been read. + // End of file. Check that a multiple of file size has been + // read. EXPECT_EQ(0, totalRead[i] % FileWithReadAhead::kFileSize); if (totalRead[i] >= 3 * FileWithReadAhead::kFileSize) { files[i] = nullptr; diff --git a/bolt/dwio/orc/test/OrcTpchTest.cpp b/bolt/dwio/orc/test/OrcTpchTest.cpp index d4cb0a28b..ad4322bb9 100644 --- a/bolt/dwio/orc/test/OrcTpchTest.cpp +++ b/bolt/dwio/orc/test/OrcTpchTest.cpp @@ -18,13 +18,13 @@ #include #include "bolt/common/file/FileSystems.h" +#include "bolt/common/testutil/TempDirectoryPath.h" #include "bolt/connectors/tpch/TpchConnector.h" #include "bolt/dwio/orc/reader/RegisterOrcReader.h" #include "bolt/dwio/orc/writer/RegisterOrcWriter.h" #include "bolt/exec/tests/utils/AssertQueryBuilder.h" #include "bolt/exec/tests/utils/HiveConnectorTestBase.h" #include "bolt/exec/tests/utils/PlanBuilder.h" -#include "bolt/exec/tests/utils/TempDirectoryPath.h" #include "bolt/exec/tests/utils/TpchQueryBuilder.h" #include "bolt/functions/prestosql/aggregates/RegisterAggregateFunctions.h" #include "bolt/functions/prestosql/registration/RegistrationFunctions.h" @@ -34,6 +34,7 @@ using namespace bytedance::bolt; using namespace bytedance::bolt::exec; using namespace bytedance::bolt::exec::test; +using namespace bytedance::bolt::test; using bytedance::bolt::connector::tpch::kBoltTpchConnectorId; class OrcTpchTest : public testing::Test { diff --git a/bolt/dwio/orc/test/OrcWriterTest.cpp b/bolt/dwio/orc/test/OrcWriterTest.cpp index 25bb1e214..849ab09c9 100644 --- a/bolt/dwio/orc/test/OrcWriterTest.cpp +++ b/bolt/dwio/orc/test/OrcWriterTest.cpp @@ -17,10 +17,10 @@ #include #include +#include "bolt/common/testutil/TempDirectoryPath.h" #include "bolt/dwio/common/tests/utils/BatchMaker.h" #include "bolt/dwio/dwrf/reader/DwrfReader.h" #include "bolt/dwio/orc/writer/OrcWriter.h" -#include "bolt/exec/tests/utils/TempDirectoryPath.h" #include "bolt/type/fbhive/HiveTypeParser.h" using namespace bytedance::bolt; using namespace bytedance::bolt::common; @@ -38,7 +38,7 @@ class OrcWriterTest : public testing::Test, public test::VectorTestBase { dwio::common::LocalFileSink::registerFactory(); rootPool_ = memory::memoryManager()->addRootPool("ParquetTests"); leafPool_ = rootPool_->addLeafChild("ParquetTests"); - tempPath_ = exec::test::TempDirectoryPath::create(); + tempPath_ = bytedance::bolt::test::TempDirectoryPath::create(); } protected: @@ -107,7 +107,7 @@ class OrcWriterTest : public testing::Test, public test::VectorTestBase { std::shared_ptr rootPool_; std::shared_ptr leafPool_; - std::shared_ptr tempPath_; + std::shared_ptr tempPath_; }; std::map compressionKindMap = { diff --git a/bolt/dwio/paimon/reader/tests/PaimonReaderAggregateTest.cpp b/bolt/dwio/paimon/reader/tests/PaimonReaderAggregateTest.cpp index aeb35f253..124a56385 100644 --- a/bolt/dwio/paimon/reader/tests/PaimonReaderAggregateTest.cpp +++ b/bolt/dwio/paimon/reader/tests/PaimonReaderAggregateTest.cpp @@ -38,12 +38,12 @@ #include "bolt/exec/Task.h" #include "bolt/exec/tests/utils/PlanBuilder.h" +#include "bolt/common/testutil/TempDirectoryPath.h" #include "bolt/connectors/Connector.h" #include "bolt/connectors/hive/PaimonConnectorSplit.h" #include "bolt/connectors/hive/PaimonConstants.h" #include "bolt/connectors/hive/paimon_merge_engines/PaimonRowKind.h" #include "bolt/dwio/paimon/reader/tests/PaimonTestUtils.h" -#include "bolt/exec/tests/utils/TempDirectoryPath.h" #include #include @@ -134,7 +134,7 @@ TEST_F(PaimonReaderAggregateTest, sumNoPrimaryKey) { std::make_shared( std::thread::hardware_concurrency())); - auto tempDir = exec::test::TempDirectoryPath::create(); + auto tempDir = bytedance::bolt::test::TempDirectoryPath::create(); auto rowType = ROW( {{"a", INTEGER()}, @@ -269,7 +269,7 @@ TEST_F(PaimonReaderAggregateTest, sumWithPrimaryKey) { std::make_shared( std::thread::hardware_concurrency())); - auto tempDir = exec::test::TempDirectoryPath::create(); + auto tempDir = bytedance::bolt::test::TempDirectoryPath::create(); auto rowType = ROW( {{"a", INTEGER()}, @@ -408,7 +408,7 @@ TEST_F(PaimonReaderAggregateTest, listAgg) { std::make_shared( std::thread::hardware_concurrency())); - auto tempDir = exec::test::TempDirectoryPath::create(); + auto tempDir = bytedance::bolt::test::TempDirectoryPath::create(); auto rowType = ROW({{"a", INTEGER()}, {"b", VARCHAR()}}); auto fileRowType = createPaimonFile( @@ -509,7 +509,7 @@ TEST_F(PaimonReaderAggregateTest, listAggDelimiter) { std::make_shared( std::thread::hardware_concurrency())); - auto tempDir = exec::test::TempDirectoryPath::create(); + auto tempDir = bytedance::bolt::test::TempDirectoryPath::create(); auto rowType = ROW({{"a", INTEGER()}, {"b", VARCHAR()}}); auto fileRowType = createPaimonFile( @@ -610,7 +610,7 @@ TEST_F(PaimonReaderAggregateTest, collectWithPrimaryKey) { std::make_shared( std::thread::hardware_concurrency())); - auto tempDir = exec::test::TempDirectoryPath::create(); + auto tempDir = bytedance::bolt::test::TempDirectoryPath::create(); auto rowType = ROW( {{"a", INTEGER()}, @@ -751,7 +751,7 @@ TEST_F(PaimonReaderAggregateTest, collectDistinctWithPrimaryKey) { std::make_shared( std::thread::hardware_concurrency())); - auto tempDir = exec::test::TempDirectoryPath::create(); + auto tempDir = bytedance::bolt::test::TempDirectoryPath::create(); auto rowType = ROW( {{"a", INTEGER()}, diff --git a/bolt/dwio/paimon/reader/tests/PaimonReaderDeduplicateTest.cpp b/bolt/dwio/paimon/reader/tests/PaimonReaderDeduplicateTest.cpp index 506d80ecf..097bc952a 100644 --- a/bolt/dwio/paimon/reader/tests/PaimonReaderDeduplicateTest.cpp +++ b/bolt/dwio/paimon/reader/tests/PaimonReaderDeduplicateTest.cpp @@ -38,12 +38,12 @@ #include "bolt/exec/Task.h" #include "bolt/exec/tests/utils/PlanBuilder.h" +#include "bolt/common/testutil/TempDirectoryPath.h" #include "bolt/connectors/Connector.h" #include "bolt/connectors/hive/PaimonConnectorSplit.h" #include "bolt/connectors/hive/PaimonConstants.h" #include "bolt/connectors/hive/paimon_merge_engines/PaimonRowKind.h" #include "bolt/dwio/paimon/reader/tests/PaimonTestUtils.h" -#include "bolt/exec/tests/utils/TempDirectoryPath.h" #include #include @@ -132,7 +132,7 @@ TEST_F(PaimonReaderDeduplicateTest, insertAllUpdate) { std::make_shared( std::thread::hardware_concurrency())); - auto tempDir = exec::test::TempDirectoryPath::create(); + auto tempDir = bytedance::bolt::test::TempDirectoryPath::create(); /* Merging following _KEY_a a b _SEQUENCE_NUMBER _VALUE_KIND @@ -239,7 +239,7 @@ TEST_F(PaimonReaderDeduplicateTest, basicNoUpdate) { std::make_shared( std::thread::hardware_concurrency())); - auto tempDir = exec::test::TempDirectoryPath::create(); + auto tempDir = bytedance::bolt::test::TempDirectoryPath::create(); /* Merging following _KEY_a a b _SEQUENCE_NUMBER _VALUE_KIND @@ -346,7 +346,7 @@ TEST_F(PaimonReaderDeduplicateTest, basicFirstUpdate) { std::make_shared( std::thread::hardware_concurrency())); - auto tempDir = exec::test::TempDirectoryPath::create(); + auto tempDir = bytedance::bolt::test::TempDirectoryPath::create(); /* Merging following _KEY_a a b _SEQUENCE_NUMBER _VALUE_KIND @@ -453,7 +453,7 @@ TEST_F(PaimonReaderDeduplicateTest, basicLastUpdate) { std::make_shared( std::thread::hardware_concurrency())); - auto tempDir = exec::test::TempDirectoryPath::create(); + auto tempDir = bytedance::bolt::test::TempDirectoryPath::create(); /* Merging following _KEY_a a b _SEQUENCE_NUMBER _VALUE_KIND @@ -559,7 +559,7 @@ TEST_F(PaimonReaderDeduplicateTest, differentPrimaryKeys) { std::make_shared( std::thread::hardware_concurrency())); - auto tempDir = exec::test::TempDirectoryPath::create(); + auto tempDir = bytedance::bolt::test::TempDirectoryPath::create(); /* Merging following PK a b _SEQUENCE_NUMBER _VALUE_KIND @@ -667,7 +667,7 @@ TEST_F(PaimonReaderDeduplicateTest, multiplePrimaryKeys) { std::make_shared( std::thread::hardware_concurrency())); - auto tempDir = exec::test::TempDirectoryPath::create(); + auto tempDir = bytedance::bolt::test::TempDirectoryPath::create(); /* Merging following _KEY_a _KEY_b a b c _SEQUENCE_NUMBER _VALUE_KIND @@ -775,7 +775,7 @@ TEST_F(PaimonReaderDeduplicateTest, multiplePrimaryKeysReverse) { std::make_shared( std::thread::hardware_concurrency())); - auto tempDir = exec::test::TempDirectoryPath::create(); + auto tempDir = bytedance::bolt::test::TempDirectoryPath::create(); /* Merging following _KEY_b _KEY_a a b c _SEQUENCE_NUMBER _VALUE_KIND @@ -883,7 +883,7 @@ TEST_F(PaimonReaderDeduplicateTest, basicIgnoreDeleteAll) { std::make_shared( std::thread::hardware_concurrency())); - auto tempDir = exec::test::TempDirectoryPath::create(); + auto tempDir = bytedance::bolt::test::TempDirectoryPath::create(); /* Merging following PK_a a b _SEQUENCE_NUMBER _VALUE_KIND @@ -980,7 +980,7 @@ TEST_F(PaimonReaderDeduplicateTest, basicIgnoreLastDelete) { std::make_shared( std::thread::hardware_concurrency())); - auto tempDir = exec::test::TempDirectoryPath::create(); + auto tempDir = bytedance::bolt::test::TempDirectoryPath::create(); /* Merging following: PK_a a b _SEQUENCE_NUMBER _VALUE_KIND @@ -1091,7 +1091,7 @@ TEST_F(PaimonReaderDeduplicateTest, basicIgnoreNonLastDelete) { std::make_shared( std::thread::hardware_concurrency())); - auto tempDir = exec::test::TempDirectoryPath::create(); + auto tempDir = bytedance::bolt::test::TempDirectoryPath::create(); auto rowType = ROW({{"a", INTEGER()}, {"b", INTEGER()}}); @@ -1198,7 +1198,7 @@ TEST_F(PaimonReaderDeduplicateTest, basicNoIgnoreLastDelete) { std::make_shared( std::thread::hardware_concurrency())); - auto tempDir = exec::test::TempDirectoryPath::create(); + auto tempDir = bytedance::bolt::test::TempDirectoryPath::create(); /* Merging following: PK_a a b _SEQUENCE_NUMBER _VALUE_KIND @@ -1306,7 +1306,7 @@ TEST_F(PaimonReaderDeduplicateTest, basicMultipleBatchesAllUpdate) { std::make_shared( std::thread::hardware_concurrency())); - auto tempDir = exec::test::TempDirectoryPath::create(); + auto tempDir = bytedance::bolt::test::TempDirectoryPath::create(); auto rowType = ROW({{"a", INTEGER()}, {"b", INTEGER()}}); int len = 20000; @@ -1439,7 +1439,7 @@ TEST_F(PaimonReaderDeduplicateTest, basicMultipleBatchesNoUpdate) { std::make_shared( std::thread::hardware_concurrency())); - auto tempDir = exec::test::TempDirectoryPath::create(); + auto tempDir = bytedance::bolt::test::TempDirectoryPath::create(); auto rowType = ROW({{"a", INTEGER()}, {"b", INTEGER()}}); int len = 20000; @@ -1572,7 +1572,7 @@ TEST_F(PaimonReaderDeduplicateTest, basicMultipleBatches) { std::make_shared( std::thread::hardware_concurrency())); - auto tempDir = exec::test::TempDirectoryPath::create(); + auto tempDir = bytedance::bolt::test::TempDirectoryPath::create(); /* Merging following: _KEY_a _SEQUENCE_NUMBER _VALUE_KIND a b diff --git a/bolt/dwio/paimon/reader/tests/PaimonReaderMetadataFieldTest.cpp b/bolt/dwio/paimon/reader/tests/PaimonReaderMetadataFieldTest.cpp index 9d75577a0..259e0c1f3 100644 --- a/bolt/dwio/paimon/reader/tests/PaimonReaderMetadataFieldTest.cpp +++ b/bolt/dwio/paimon/reader/tests/PaimonReaderMetadataFieldTest.cpp @@ -27,6 +27,7 @@ #include "bolt/common/file/FileSystems.h" #include "bolt/common/file/Utils.h" #include "bolt/common/memory/Memory.h" +#include "bolt/common/testutil/TempDirectoryPath.h" #include "bolt/connectors/Connector.h" #include "bolt/connectors/hive/HiveConnector.h" #include "bolt/connectors/hive/PaimonConnectorSplit.h" @@ -35,7 +36,6 @@ #include "bolt/dwio/paimon/reader/tests/PaimonTestUtils.h" #include "bolt/exec/Task.h" #include "bolt/exec/tests/utils/PlanBuilder.h" -#include "bolt/exec/tests/utils/TempDirectoryPath.h" #include "bolt/functions/prestosql/registration/RegistrationFunctions.h" #include "bolt/parse/Expressions.h" #include "bolt/parse/TypeResolver.h" @@ -101,7 +101,7 @@ class PaimonReaderMetadataFieldTest void assertOutput( const RowTypePtr& readType, RowTypePtr fileRowType, - const std::shared_ptr& tempDir, + const std::shared_ptr& tempDir, std::unordered_map tableParameters, std::unordered_map> assignments, @@ -204,7 +204,7 @@ class PaimonReaderMetadataFieldTest const std::string PaimonReaderMetadataFieldTest::kHiveConnectorId = "test-hive"; TEST_F(PaimonReaderMetadataFieldTest, testPaimonRowIndex) { - auto tempDir = exec::test::TempDirectoryPath::create(); + auto tempDir = bytedance::bolt::test::TempDirectoryPath::create(); auto rowType = ROW({{"a", INTEGER()}, {"b", INTEGER()}}); // Create vectors with 20000 elements, a starts at 101, b starts at 103 @@ -280,7 +280,7 @@ TEST_F(PaimonReaderMetadataFieldTest, testPaimonRowIndex) { } TEST_F(PaimonReaderMetadataFieldTest, testPaimonRowIndexWithFilters) { - auto tempDir = exec::test::TempDirectoryPath::create(); + auto tempDir = bytedance::bolt::test::TempDirectoryPath::create(); auto rowType = ROW({{"a", INTEGER()}, {"b", INTEGER()}}); auto numRows = 3 * 1024 * 1024; // roughly 3 row groups with default settings auto aVec = std::vector(numRows); @@ -406,7 +406,7 @@ TEST_F( // DISABLED until bug in parquet filter pushdown is resolved // reads high end of file that has multiple row groups with filters - auto tempDir = exec::test::TempDirectoryPath::create(); + auto tempDir = bytedance::bolt::test::TempDirectoryPath::create(); auto rowType = ROW({{"a", INTEGER()}, {"b", INTEGER()}}); auto numRows = 3 * 1024 * 1024; // roughly 3 row groups with default settings auto aVec = std::vector(numRows); @@ -488,7 +488,7 @@ TEST_F( } TEST_F(PaimonReaderMetadataFieldTest, testPaimonRowIndexWithSelectiveFilter) { - auto tempDir = exec::test::TempDirectoryPath::create(); + auto tempDir = bytedance::bolt::test::TempDirectoryPath::create(); auto rowType = ROW({{"a", INTEGER()}, {"b", INTEGER()}}); auto numRows = 3 * 1024 * 1024; auto aVec = std::vector(numRows); @@ -548,7 +548,7 @@ TEST_F(PaimonReaderMetadataFieldTest, testPaimonRowIndexWithSelectiveFilter) { } TEST_F(PaimonReaderMetadataFieldTest, testPaimonRowId) { - auto tempDir = exec::test::TempDirectoryPath::create(); + auto tempDir = bytedance::bolt::test::TempDirectoryPath::create(); auto rowType = ROW({{"a", INTEGER()}, {"b", INTEGER()}}); auto fileRowType = createPaimonFile( vectorMaker_, @@ -612,7 +612,7 @@ TEST_F(PaimonReaderMetadataFieldTest, testPaimonRowId) { // Disabled until paimon support querying extra columns TEST_F(PaimonReaderMetadataFieldTest, DISABLED_testPaimonExtraColumns) { - auto tempDir = exec::test::TempDirectoryPath::create(); + auto tempDir = bytedance::bolt::test::TempDirectoryPath::create(); auto rowType = ROW({{"a", INTEGER()}, {"b", INTEGER()}}); auto fileRowType = createPaimonFile( vectorMaker_, @@ -686,7 +686,7 @@ TEST_F(PaimonReaderMetadataFieldTest, DISABLED_testPaimonExtraColumns) { } TEST_F(PaimonReaderMetadataFieldTest, testPaimonFilePathColumn) { - auto tempDir = exec::test::TempDirectoryPath::create(); + auto tempDir = bytedance::bolt::test::TempDirectoryPath::create(); auto rowType = ROW({{"a", INTEGER()}, {"b", INTEGER()}}); auto fileRowType = createPaimonFile( vectorMaker_, @@ -761,7 +761,7 @@ TEST_F(PaimonReaderMetadataFieldTest, testPaimonFilePathColumn) { vectorMaker_.flatVector({}), vectorMaker_.flatVector({}), }); - auto emptyTableDir = exec::test::TempDirectoryPath::create(); + auto emptyTableDir = bytedance::bolt::test::TempDirectoryPath::create(); assertOutput( readType, fileRowType, @@ -772,7 +772,7 @@ TEST_F(PaimonReaderMetadataFieldTest, testPaimonFilePathColumn) { } TEST_F(PaimonReaderMetadataFieldTest, testPaimonFilePathColumnMultiFile) { - auto tempDir = exec::test::TempDirectoryPath::create(); + auto tempDir = bytedance::bolt::test::TempDirectoryPath::create(); auto rowType = ROW({{"a", INTEGER()}, {"b", INTEGER()}}); auto fileRowType = createPaimonFile( vectorMaker_, @@ -886,7 +886,7 @@ TEST_F(PaimonReaderMetadataFieldTest, testPaimonFilePathColumnMultiFile) { } TEST_F(PaimonReaderMetadataFieldTest, testPaimonBucketColumn) { - auto tempDir = exec::test::TempDirectoryPath::create(); + auto tempDir = bytedance::bolt::test::TempDirectoryPath::create(); auto rowType = ROW({{"a", INTEGER()}, {"b", INTEGER()}}); auto fileRowType = createPaimonFile( vectorMaker_, @@ -949,7 +949,7 @@ TEST_F(PaimonReaderMetadataFieldTest, testPaimonBucketColumn) { } TEST_F(PaimonReaderMetadataFieldTest, testPaimonPartitionColumn) { - auto tempDir = exec::test::TempDirectoryPath::create(); + auto tempDir = bytedance::bolt::test::TempDirectoryPath::create(); auto rowType = ROW({{"a", INTEGER()}, {"b", INTEGER()}}); auto fileRowType = createPaimonFile( vectorMaker_, @@ -1042,7 +1042,7 @@ TEST_F(PaimonReaderMetadataFieldTest, testPaimonPartitionColumn) { TEST_F( PaimonReaderMetadataFieldTest, testPaimonPartitionColumnPreservesDeclaredSchemaOrder) { - auto tempDir = exec::test::TempDirectoryPath::create(); + auto tempDir = bytedance::bolt::test::TempDirectoryPath::create(); auto rowType = ROW({{"a", INTEGER()}, {"b", INTEGER()}}); auto fileRowType = createPaimonFile( vectorMaker_, @@ -1087,7 +1087,7 @@ TEST_F( } TEST_F(PaimonReaderMetadataFieldTest, testPaimonRowIdColumnWithoutRowIdInFile) { - auto tempDir = exec::test::TempDirectoryPath::create(); + auto tempDir = bytedance::bolt::test::TempDirectoryPath::create(); auto rowType = ROW({{"a", INTEGER()}, {"b", INTEGER()}}); // write a paimon file, without the rowId column auto fileRowType = createPaimonFile( @@ -1164,7 +1164,7 @@ TEST_F(PaimonReaderMetadataFieldTest, testPaimonRowIdColumnWithoutRowIdInFile) { } TEST_F(PaimonReaderMetadataFieldTest, testPaimonRowIdColumnWithRowIdInFile) { - auto tempDir = exec::test::TempDirectoryPath::create(); + auto tempDir = bytedance::bolt::test::TempDirectoryPath::create(); auto rowType = ROW({{"a", INTEGER()}, {"b", INTEGER()}}); // write a paimon file, with the rowId column auto fileRowType = createPaimonFile( @@ -1233,7 +1233,7 @@ TEST_F(PaimonReaderMetadataFieldTest, testPaimonRowIdColumnWithRowIdInFile) { TEST_F( PaimonReaderMetadataFieldTest, testPaimonSequenceNumberColumnWithSequenceNumberInFile) { - auto tempDir = exec::test::TempDirectoryPath::create(); + auto tempDir = bytedance::bolt::test::TempDirectoryPath::create(); auto rowType = ROW({{"a", INTEGER()}, {"b", INTEGER()}}); // write a paimon file, with the sequence number column auto fileRowType = createPaimonFile( @@ -1312,7 +1312,7 @@ TEST_F( TEST_F( PaimonReaderMetadataFieldTest, testPaimonSequenceNumberColumnWithoutSequenceNumberInFile) { - auto tempDir = exec::test::TempDirectoryPath::create(); + auto tempDir = bytedance::bolt::test::TempDirectoryPath::create(); auto rowType = ROW({{"a", INTEGER()}, {"b", INTEGER()}}); // write a paimon file, with the sequence number column auto fileRowType = createPaimonFile( diff --git a/bolt/dwio/paimon/reader/tests/PaimonReaderPartialUpdateTest.cpp b/bolt/dwio/paimon/reader/tests/PaimonReaderPartialUpdateTest.cpp index ee6daca8f..888fe1c2f 100644 --- a/bolt/dwio/paimon/reader/tests/PaimonReaderPartialUpdateTest.cpp +++ b/bolt/dwio/paimon/reader/tests/PaimonReaderPartialUpdateTest.cpp @@ -31,11 +31,11 @@ #include "bolt/exec/Task.h" #include "bolt/exec/tests/utils/PlanBuilder.h" +#include "bolt/common/testutil/TempDirectoryPath.h" #include "bolt/connectors/Connector.h" #include "bolt/connectors/hive/PaimonConnectorSplit.h" #include "bolt/connectors/hive/PaimonConstants.h" #include "bolt/connectors/hive/paimon_merge_engines/PaimonRowKind.h" -#include "bolt/exec/tests/utils/TempDirectoryPath.h" #include using namespace bytedance::bolt; @@ -146,7 +146,7 @@ TEST_F(PaimonReaderPartialUpdateTest, nonNullUpdate) { std::make_shared( std::thread::hardware_concurrency())); - auto tempDir = exec::test::TempDirectoryPath::create(); + auto tempDir = bytedance::bolt::test::TempDirectoryPath::create(); /* Merging following _KEY_k k a b c _SEQUENCE_NUMBER _VALUE_KIND @@ -286,7 +286,7 @@ TEST_F(PaimonReaderPartialUpdateTest, sequenceGroupUpdate) { std::make_shared( std::thread::hardware_concurrency())); - auto tempDir = exec::test::TempDirectoryPath::create(); + auto tempDir = bytedance::bolt::test::TempDirectoryPath::create(); /* Merging following _KEY_k k a b g_1 c d g_2 _SEQUENCE_NUMBER _VALUE_KIND @@ -464,7 +464,7 @@ TEST_F(PaimonReaderPartialUpdateTest, multipleSequenceGroupUpdate) { std::make_shared( std::thread::hardware_concurrency())); - auto tempDir = exec::test::TempDirectoryPath::create(); + auto tempDir = bytedance::bolt::test::TempDirectoryPath::create(); /* Merging following _KEY_k k a b g_1 c d g_2 g_3 _SEQUENCE_NUMBER _VALUE_KIND @@ -641,7 +641,7 @@ TEST_F(PaimonReaderPartialUpdateTest, sequenceGroupAggregateUpdate) { std::make_shared( std::thread::hardware_concurrency())); - auto tempDir = exec::test::TempDirectoryPath::create(); + auto tempDir = bytedance::bolt::test::TempDirectoryPath::create(); /* Merging following _KEY_k k a b c d _SEQUENCE_NUMBER _VALUE_KIND @@ -830,7 +830,7 @@ TEST_F(PaimonReaderPartialUpdateTest, multipleSequenceGroupAggregateUpdate) { std::make_shared( std::thread::hardware_concurrency())); - auto tempDir = exec::test::TempDirectoryPath::create(); + auto tempDir = bytedance::bolt::test::TempDirectoryPath::create(); /* Merging following _KEY_k k a b g_1 c g_2 g_3 _SEQUENCE_NUMBER _VALUE_KIND @@ -1013,7 +1013,7 @@ TEST_F(PaimonReaderPartialUpdateTest, sequenceGroupDefaultAggregateUpdate) { std::make_shared( std::thread::hardware_concurrency())); - auto tempDir = exec::test::TempDirectoryPath::create(); + auto tempDir = bytedance::bolt::test::TempDirectoryPath::create(); /* Merging following _KEY_k k a b c d _SEQUENCE_NUMBER _VALUE_KIND @@ -1200,7 +1200,7 @@ TEST_F(PaimonReaderPartialUpdateTest, uninitializedMemoryTest) { std::make_shared( std::thread::hardware_concurrency())); - auto tempDir = exec::test::TempDirectoryPath::create(); + auto tempDir = bytedance::bolt::test::TempDirectoryPath::create(); /* * We want to test that if a column is NEVER updated for a PK, it should be @@ -1334,7 +1334,7 @@ TEST_F(PaimonReaderPartialUpdateTest, samePrimaryKeyAcrossOutputBatches) { std::make_shared( std::thread::hardware_concurrency())); - auto tempDir = exec::test::TempDirectoryPath::create(); + auto tempDir = bytedance::bolt::test::TempDirectoryPath::create(); /* * Reproducer: diff --git a/bolt/dwio/parquet/arrow/tests/MetadataTest.cpp b/bolt/dwio/parquet/arrow/tests/MetadataTest.cpp index e805c9693..21e51b916 100644 --- a/bolt/dwio/parquet/arrow/tests/MetadataTest.cpp +++ b/bolt/dwio/parquet/arrow/tests/MetadataTest.cpp @@ -35,16 +35,16 @@ #include #include "arrow/util/key_value_metadata.h" +#include "bolt/common/testutil/TempFilePath.h" #include "bolt/dwio/parquet/arrow/FileWriter.h" #include "bolt/dwio/parquet/arrow/tests/TestUtil.h" #include "bolt/dwio/parquet/reader/ParquetReader.h" -#include "bolt/exec/tests/utils/TempFilePath.h" namespace bytedance::bolt::parquet::arrow { namespace metadata { namespace { void writeToFile( - std::shared_ptr filePath, + std::shared_ptr<::bytedance::bolt::test::TempFilePath> filePath, std::shared_ptr buffer) { auto localWriteFile = std::make_unique(filePath->getPath(), false, false); @@ -416,7 +416,7 @@ TEST(Metadata, TestAddKeyValueMetadata) { PARQUET_ASSIGN_OR_THROW(auto buffer, sink->Finish()); // Write the buffer to a temp file path - auto filePath = exec::test::TempFilePath::create(); + auto filePath = ::bytedance::bolt::test::TempFilePath::create(); writeToFile(filePath, buffer); memory::MemoryManager::testingSetInstance({}); std::shared_ptr rootPool = @@ -530,7 +530,7 @@ TEST(Metadata, TestSortingColumns) { PARQUET_ASSIGN_OR_THROW(auto buffer, sink->Finish()); // Write the buffer to a temp file path - auto filePath = exec::test::TempFilePath::create(); + auto filePath = ::bytedance::bolt::test::TempFilePath::create(); writeToFile(filePath, buffer); memory::MemoryManager::testingSetInstance({}); std::shared_ptr rootPool = diff --git a/bolt/dwio/parquet/tests/ParquetTestBase.h b/bolt/dwio/parquet/tests/ParquetTestBase.h index 0d8fca84b..dd2f03b67 100644 --- a/bolt/dwio/parquet/tests/ParquetTestBase.h +++ b/bolt/dwio/parquet/tests/ParquetTestBase.h @@ -33,13 +33,13 @@ #include #include #include "bolt/common/base/Fs.h" +#include "bolt/common/testutil/TempDirectoryPath.h" #include "bolt/dwio/common/FileSink.h" #include "bolt/dwio/common/Reader.h" #include "bolt/dwio/common/tests/utils/DataFiles.h" #include "bolt/dwio/parquet/reader/PageReader.h" #include "bolt/dwio/parquet/reader/ParquetReader.h" #include "bolt/dwio/parquet/writer/Writer.h" -#include "bolt/exec/tests/utils/TempDirectoryPath.h" #include "bolt/vector/fuzzer/VectorFuzzer.h" #include "bolt/vector/tests/utils/VectorTestBase.h" namespace bytedance::bolt::parquet { @@ -54,7 +54,7 @@ class ParquetTestBase : public testing::Test, public test::VectorTestBase { dwio::common::LocalFileSink::registerFactory(); rootPool_ = memory::memoryManager()->addRootPool("ParquetTests"); leafPool_ = rootPool_->addLeafChild("ParquetTests"); - tempPath_ = exec::test::TempDirectoryPath::create(); + tempPath_ = bytedance::bolt::test::TempDirectoryPath::create(); } static RowTypePtr sampleSchema() { @@ -210,7 +210,7 @@ class ParquetTestBase : public testing::Test, public test::VectorTestBase { static constexpr uint64_t kBytesInRowGroup = 128 * 1'024 * 1'024; std::shared_ptr rootPool_; std::shared_ptr leafPool_; - std::shared_ptr tempPath_; + std::shared_ptr tempPath_; TimestampPrecision timestampPrecision_{TimestampPrecision::kMilliseconds}; }; } // namespace bytedance::bolt::parquet diff --git a/bolt/dwio/parquet/tests/ParquetTpchTest.cpp b/bolt/dwio/parquet/tests/ParquetTpchTest.cpp index 41d068c70..398205aed 100644 --- a/bolt/dwio/parquet/tests/ParquetTpchTest.cpp +++ b/bolt/dwio/parquet/tests/ParquetTpchTest.cpp @@ -32,13 +32,13 @@ #include #include "bolt/common/file/FileSystems.h" +#include "bolt/common/testutil/TempDirectoryPath.h" #include "bolt/connectors/tpch/TpchConnector.h" #include "bolt/dwio/parquet/RegisterParquetReader.h" #include "bolt/dwio/parquet/RegisterParquetWriter.h" #include "bolt/exec/tests/utils/AssertQueryBuilder.h" #include "bolt/exec/tests/utils/HiveConnectorTestBase.h" #include "bolt/exec/tests/utils/PlanBuilder.h" -#include "bolt/exec/tests/utils/TempDirectoryPath.h" #include "bolt/exec/tests/utils/TpchQueryBuilder.h" #include "bolt/functions/prestosql/aggregates/RegisterAggregateFunctions.h" #include "bolt/functions/prestosql/registration/RegistrationFunctions.h" @@ -47,6 +47,7 @@ using namespace bytedance::bolt; using namespace bytedance::bolt::exec; using namespace bytedance::bolt::exec::test; +using namespace bytedance::bolt::test; using bytedance::bolt::connector::tpch::kBoltTpchConnectorId; class ParquetTpchTest : public testing::Test { diff --git a/bolt/dwio/parquet/tests/reader/ParquetDictionaryFilterTest.cpp b/bolt/dwio/parquet/tests/reader/ParquetDictionaryFilterTest.cpp index 2c2e51714..dbc70b1fe 100644 --- a/bolt/dwio/parquet/tests/reader/ParquetDictionaryFilterTest.cpp +++ b/bolt/dwio/parquet/tests/reader/ParquetDictionaryFilterTest.cpp @@ -16,11 +16,11 @@ #include #include +#include "bolt/common/testutil/TempFilePath.h" #include "bolt/dwio/common/ScanSpec.h" #include "bolt/dwio/parquet/reader/DictionaryFilter.h" #include "bolt/dwio/parquet/reader/ParquetData.h" #include "bolt/dwio/parquet/tests/ParquetTestBase.h" -#include "bolt/exec/tests/utils/TempFilePath.h" #include "bolt/type/Type.h" #include "bolt/type/filter/FilterUtil.h" namespace bytedance::bolt::parquet { @@ -30,7 +30,7 @@ class DictionaryFilterTest : public ParquetTestBase { protected: void SetUp() override { ParquetTestBase::SetUp(); - filePath_ = exec::test::TempFilePath::create()->path; + filePath_ = ::bytedance::bolt::test::TempFilePath::create()->path; } void createTestFile() { diff --git a/bolt/dwio/parquet/tests/reader/ParquetReaderBenchmark.cpp b/bolt/dwio/parquet/tests/reader/ParquetReaderBenchmark.cpp index 4977a02ce..000532423 100644 --- a/bolt/dwio/parquet/tests/reader/ParquetReaderBenchmark.cpp +++ b/bolt/dwio/parquet/tests/reader/ParquetReaderBenchmark.cpp @@ -28,6 +28,7 @@ * -------------------------------------------------------------------------- */ +#include "bolt/common/testutil/TempDirectoryPath.h" #include "bolt/dwio/common/FileSink.h" #include "bolt/dwio/common/Options.h" #include "bolt/dwio/common/Statistics.h" @@ -35,7 +36,6 @@ #include "bolt/dwio/parquet/RegisterParquetReader.h" #include "bolt/dwio/parquet/reader/ParquetReader.h" #include "bolt/dwio/parquet/writer/Writer.h" -#include "bolt/exec/tests/utils/TempDirectoryPath.h" #include #include @@ -267,8 +267,8 @@ class ParquetReaderBenchmark { private: const std::string fileName_ = "test.parquet"; - const std::shared_ptr - fileFolder_ = bytedance::bolt::exec::test::TempDirectoryPath::create(); + const std::shared_ptr fileFolder_ = + bytedance::bolt::test::TempDirectoryPath::create(); const bool disableDictionary_; std::unique_ptr dataSetBuilder_; diff --git a/bolt/dwio/parquet/tests/reader/ParquetReaderCliTest.cpp b/bolt/dwio/parquet/tests/reader/ParquetReaderCliTest.cpp index c3cb19ba6..ebb0eea8a 100644 --- a/bolt/dwio/parquet/tests/reader/ParquetReaderCliTest.cpp +++ b/bolt/dwio/parquet/tests/reader/ParquetReaderCliTest.cpp @@ -43,7 +43,7 @@ class ParquetReaderCliTest : public ParquetTestBase { uint32_t batchCount = kBatchCount, uint32_t numRows = kNumRows, uint32_t numRowsPerGroup = kNumRowsPerGroup) { - fileDirectory_ = exec::test::TempDirectoryPath::create(); + fileDirectory_ = bytedance::bolt::test::TempDirectoryPath::create(); auto dataSetBuilder_ = std::make_unique(*leafPool_, 0); for (uint32_t i = 0; i < numFiles; i++) { @@ -76,8 +76,7 @@ class ParquetReaderCliTest : public ParquetTestBase { std::shared_ptr rootPool_; std::shared_ptr leafPool_; - std::shared_ptr - fileDirectory_; + std::shared_ptr fileDirectory_; std::vector> files_; }; diff --git a/bolt/dwio/parquet/tests/reader/ParquetReaderTest.cpp b/bolt/dwio/parquet/tests/reader/ParquetReaderTest.cpp index d70671380..45caf7dc2 100644 --- a/bolt/dwio/parquet/tests/reader/ParquetReaderTest.cpp +++ b/bolt/dwio/parquet/tests/reader/ParquetReaderTest.cpp @@ -33,11 +33,11 @@ #include #include #include +#include "bolt/common/testutil/TempFilePath.h" #include "bolt/core/QueryCtx.h" #include "bolt/dwio/parquet/reader/RepeatedColumnReader.h" #include "bolt/dwio/parquet/tests/ParquetTestBase.h" #include "bolt/dwio/parquet/writer/Writer.h" -#include "bolt/exec/tests/utils/TempFilePath.h" #include "bolt/expression/Expr.h" #include "bolt/expression/ExprToSubfieldFilter.h" #include "bolt/expression/StringWriter.h" @@ -1770,7 +1770,7 @@ TEST_F(ParquetReaderTest, integerToVarcharSchemaMismatchCast) { auto data = makeRowVector({"col"}, {makeFlatVector({1, 2, 3, 42, -100})}); - auto tempFile = exec::test::TempFilePath::create(); + auto tempFile = ::bytedance::bolt::test::TempFilePath::create(); { auto writeFile = std::make_unique(tempFile->getPath(), true, false); @@ -1832,7 +1832,7 @@ TEST_F(ParquetReaderTest, varcharToBigintSchemaMismatchCast) { auto data = makeRowVector( {"col"}, {makeFlatVector({"100", "200", "300", "-42", "0"})}); - auto tempFile = exec::test::TempFilePath::create(); + auto tempFile = ::bytedance::bolt::test::TempFilePath::create(); { auto writeFile = std::make_unique(tempFile->getPath(), true, false); diff --git a/bolt/dwio/parquet/tests/reader/ParquetRowGroupFilterTest.cpp b/bolt/dwio/parquet/tests/reader/ParquetRowGroupFilterTest.cpp index 5befb56ae..1e930a99a 100644 --- a/bolt/dwio/parquet/tests/reader/ParquetRowGroupFilterTest.cpp +++ b/bolt/dwio/parquet/tests/reader/ParquetRowGroupFilterTest.cpp @@ -18,18 +18,18 @@ #include #include + #include "bolt/common/base/Fs.h" #include "bolt/common/base/tests/GTestUtils.h" +#include "bolt/common/testutil/TempFilePath.h" #include "bolt/common/testutil/TestValue.h" #include "bolt/connectors/hive/HiveConfig.h" #include "bolt/connectors/hive/HiveConnector.h" #include "bolt/dwio/common/FileSink.h" -#include "bolt/dwio/common/tests/utils/DataSetBuilder.h" -#include "bolt/dwio/parquet/writer/Writer.h" -#include "folly/experimental/EventCount.h" - #include "bolt/dwio/common/tests/utils/DataFiles.h" +#include "bolt/dwio/common/tests/utils/DataSetBuilder.h" #include "bolt/dwio/paimon/deletionvectors/DeletionFileReader.h" +#include "bolt/dwio/parquet/writer/Writer.h" #include "bolt/exec/OutputBufferManager.h" #include "bolt/exec/PlanNodeStats.h" #include "bolt/exec/TableScan.h" @@ -42,6 +42,7 @@ #include "bolt/type/Timestamp.h" #include "bolt/type/Type.h" #include "bolt/type/tests/SubfieldFiltersBuilder.h" +#include "folly/experimental/EventCount.h" using namespace bytedance::bolt; using namespace bytedance::bolt::connector::hive; using namespace bytedance::bolt::core; @@ -110,7 +111,7 @@ void ParquetRowGroupFilterTest::testSubfieldPruning( } auto mapType = batch[0]->childAt(1)->type(); auto rowType = asRowType(batch[0]->type()); - auto filePath = TempFilePath::create(); + auto filePath = ::bytedance::bolt::test::TempFilePath::create(); auto path = filePath->path; auto localWriteFile = std::make_unique(path, true, false); diff --git a/bolt/dwio/parquet/tests/reader/ParquetTableScanTest.cpp b/bolt/dwio/parquet/tests/reader/ParquetTableScanTest.cpp index 29b6b8aae..613de5c07 100644 --- a/bolt/dwio/parquet/tests/reader/ParquetTableScanTest.cpp +++ b/bolt/dwio/parquet/tests/reader/ParquetTableScanTest.cpp @@ -32,6 +32,7 @@ #include #include "bolt/common/base/tests/GTestUtils.h" +#include "bolt/common/testutil/TempFilePath.h" #include "bolt/connectors/hive/HiveConfig.h" #include "bolt/dwio/common/tests/utils/DataFiles.h" #include "bolt/dwio/parquet/RegisterParquetReader.h" @@ -136,7 +137,7 @@ class ParquetTableScanTest : public HiveConnectorTestBase { auto plan = PlanBuilder().tableScan(rowType).planNode(); - assertQuery(plan, splits_, sql); + OperatorTestBase::assertQuery(plan, splits_, sql); } void assertSelectWithDataColumns( @@ -341,7 +342,7 @@ class ParquetTableScanTest : public HiveConnectorTestBase { makeFlatVector(values), }); auto schema = asRowType(vector->type()); - auto file = TempFilePath::create(); + auto file = ::bytedance::bolt::test::TempFilePath::create(); writeToParquetFile(file->getPath(), {vector}, options); loadData(file->getPath(), schema, vector); @@ -489,7 +490,7 @@ TEST_F(ParquetTableScanTest, aggregatePushdownToSmallPages) { makeFlatVector({std::to_string(row)}), })); } - const auto filePath = TempFilePath::create(); + const auto filePath = ::bytedance::bolt::test::TempFilePath::create(); WriterOptions options; options.dataPageSize = 1; writeToParquetFile(filePath->getPath(), data, options); @@ -581,7 +582,7 @@ TEST_F(ParquetTableScanTest, map) { } TEST_F(ParquetTableScanTest, variantE2EProjectAndAggregation) { - auto file = TempFilePath::create(); + auto file = ::bytedance::bolt::test::TempFilePath::create(); WriterOptions writerOptions; auto data = makeVariantParquetBatch( pool(), @@ -606,7 +607,8 @@ TEST_F(ParquetTableScanTest, variantE2EProjectAndAggregation) { auto expected = makeRowVector( {"g", "sum_a"}, {makeFlatVector({1, 2}), makeFlatVector({3, 3})}); - ASSERT_TRUE(assertEqualResults({expected}, {results})); + ASSERT_TRUE(assertEqualResults( + std::vector{expected}, std::vector{results})); } TEST_F(ParquetTableScanTest, nullMap) { @@ -788,7 +790,9 @@ TEST_F(ParquetTableScanTest, readAsLowerCase) { auto result = readCursor(params, addSplits); ASSERT_TRUE(waitForTaskCompletion(result.first->task().get())); assertEqualResults( - result.second, {makeRowVector({"a"}, {makeFlatVector({0, 1})})}); + result.second, + std::vector{ + makeRowVector({"a"}, {makeFlatVector({0, 1})})}); } TEST_F(ParquetTableScanTest, rowIndex) { @@ -1034,7 +1038,7 @@ TEST_F(ParquetTableScanTest, timestampPrecisionMicrosecond) { kSize, [](auto i) { return Timestamp(i, i * 1'001'001); }), }); auto schema = asRowType(vector->type()); - auto file = TempFilePath::create(); + auto file = ::bytedance::bolt::test::TempFilePath::create(); WriterOptions options; options.writeInt96AsTimestamp = true; writeToParquetFile(file->getPath(), {vector}, options); @@ -1073,7 +1077,7 @@ TEST_F(ParquetTableScanTest, timestampPrecisionMicrosecond) { makeFlatVector( kSize, [](auto i) { return Timestamp(i, i * 1'001'000); }), }); - assertEqualResults({expected}, result.second); + assertEqualResults(std::vector{expected}, result.second); } TEST_F(ParquetTableScanTest, structMatchByName) { @@ -1112,7 +1116,7 @@ TEST_F(ParquetTableScanTest, structMatchByName) { const auto address = makeFlatVector({"567 Maple Drive"}); auto vector = makeRowVector({"id", "name", "address"}, {id, name, address}); - auto file = TempFilePath::create(); + auto file = ::bytedance::bolt::test::TempFilePath::create(); writeToParquetFile(file->getPath(), {vector}, {}); loadData(file->getPath(), asRowType(vector->type()), vector); @@ -1178,7 +1182,7 @@ TEST_F(ParquetTableScanTest, structMatchByName) { makeFlatVector({"Jones"}), }), address}); - file = TempFilePath::create(); + file = ::bytedance::bolt::test::TempFilePath::create(); writeToParquetFile(file->getPath(), {vector}, {}); rowType = @@ -1456,7 +1460,7 @@ TEST_F(ParquetTableScanTest, convertTypePolicyMatrix) { SCOPED_TRACE(c.name); auto data = makeSampleData(c.fileType); - auto file = exec::test::TempFilePath::create(); + auto file = ::bytedance::bolt::test::TempFilePath::create(); WriterOptions writerOptions; writeToParquetFile(file->getPath(), {data}, writerOptions); @@ -1492,7 +1496,7 @@ TEST_F(ParquetTableScanTest, convertTypePolicyValueChecks) { // 1. INT widening: file INT32 [1,2,3] read as BIGINT -> [1,2,3]. { auto data = makeRowVector({"c0"}, {makeFlatVector({1, 2, 3})}); - auto file = exec::test::TempFilePath::create(); + auto file = ::bytedance::bolt::test::TempFilePath::create(); writeToParquetFile(file->getPath(), {data}, WriterOptions{}); auto declared = ROW({"c0"}, {BIGINT()}); @@ -1502,14 +1506,16 @@ TEST_F(ParquetTableScanTest, convertTypePolicyValueChecks) { .split(makeSplit(file->getPath())) .copyResults(pool()); auto expected = makeRowVector({"c0"}, {makeFlatVector({1, 2, 3})}); - EXPECT_TRUE(assertEqualResults({expected}, {result})); + EXPECT_TRUE(assertEqualResults( + std::vector{expected}, + std::vector{result})); } // 2. Float widening: REAL [1.5, 2.5, 3.5] read as DOUBLE. { auto data = makeRowVector({"c0"}, {makeFlatVector({1.5f, 2.5f, 3.5f})}); - auto file = exec::test::TempFilePath::create(); + auto file = ::bytedance::bolt::test::TempFilePath::create(); writeToParquetFile(file->getPath(), {data}, WriterOptions{}); auto declared = ROW({"c0"}, {DOUBLE()}); @@ -1520,7 +1526,9 @@ TEST_F(ParquetTableScanTest, convertTypePolicyValueChecks) { .copyResults(pool()); auto expected = makeRowVector({"c0"}, {makeFlatVector({1.5, 2.5, 3.5})}); - EXPECT_TRUE(assertEqualResults({expected}, {result})); + EXPECT_TRUE(assertEqualResults( + std::vector{expected}, + std::vector{result})); } #ifdef SPARK_COMPATIBLE @@ -1531,7 +1539,7 @@ TEST_F(ParquetTableScanTest, convertTypePolicyValueChecks) { { auto data = makeRowVector( {"c0"}, {makeFlatVector({"100", "200", "300"})}); - auto file = exec::test::TempFilePath::create(); + auto file = ::bytedance::bolt::test::TempFilePath::create(); writeToParquetFile(file->getPath(), {data}, WriterOptions{}); auto declared = ROW({"c0"}, {BIGINT()}); @@ -1542,7 +1550,9 @@ TEST_F(ParquetTableScanTest, convertTypePolicyValueChecks) { .copyResults(pool()); auto expected = makeRowVector({"c0"}, {makeFlatVector({100, 200, 300})}); - EXPECT_TRUE(assertEqualResults({expected}, {result})); + EXPECT_TRUE(assertEqualResults( + std::vector{expected}, + std::vector{result})); } #endif @@ -1554,7 +1564,7 @@ TEST_F(ParquetTableScanTest, convertTypePolicyValueChecks) { // does the silent truncation, matching Spark / Trino / parquet-mr. { auto data = makeRowVector({"c0"}, {makeFlatVector({1, 2, 3})}); - auto file = exec::test::TempFilePath::create(); + auto file = ::bytedance::bolt::test::TempFilePath::create(); writeToParquetFile(file->getPath(), {data}, WriterOptions{}); { @@ -1566,7 +1576,9 @@ TEST_F(ParquetTableScanTest, convertTypePolicyValueChecks) { .copyResults(pool()); auto expected = makeRowVector({"c0"}, {makeFlatVector({1, 2, 3})}); - EXPECT_TRUE(assertEqualResults({expected}, {result})); + EXPECT_TRUE(assertEqualResults( + std::vector{expected}, + std::vector{result})); } { auto declared = ROW({"c0"}, {SMALLINT()}); @@ -1577,7 +1589,9 @@ TEST_F(ParquetTableScanTest, convertTypePolicyValueChecks) { .copyResults(pool()); auto expected = makeRowVector({"c0"}, {makeFlatVector({1, 2, 3})}); - EXPECT_TRUE(assertEqualResults({expected}, {result})); + EXPECT_TRUE(assertEqualResults( + std::vector{expected}, + std::vector{result})); } } @@ -1586,7 +1600,7 @@ TEST_F(ParquetTableScanTest, convertTypePolicyValueChecks) { // through, and IntegerColumnReader::makeCastExpr handles the cast. { auto data = makeRowVector({"c0"}, {makeFlatVector({1, 2, 3})}); - auto file = exec::test::TempFilePath::create(); + auto file = ::bytedance::bolt::test::TempFilePath::create(); writeToParquetFile(file->getPath(), {data}, WriterOptions{}); auto declared = ROW({"c0"}, {VARCHAR()}); @@ -1597,7 +1611,9 @@ TEST_F(ParquetTableScanTest, convertTypePolicyValueChecks) { .copyResults(pool()); auto expected = makeRowVector({"c0"}, {makeFlatVector({"1", "2", "3"})}); - EXPECT_TRUE(assertEqualResults({expected}, {result})); + EXPECT_TRUE(assertEqualResults( + std::vector{expected}, + std::vector{result})); } } diff --git a/bolt/dwio/parquet/tests/writer/ParquetWriterBenchmark.cpp b/bolt/dwio/parquet/tests/writer/ParquetWriterBenchmark.cpp index 2d7fb63e0..f119342b5 100644 --- a/bolt/dwio/parquet/tests/writer/ParquetWriterBenchmark.cpp +++ b/bolt/dwio/parquet/tests/writer/ParquetWriterBenchmark.cpp @@ -14,6 +14,7 @@ * limitations under the License. */ +#include "bolt/common/testutil/TempDirectoryPath.h" #include "bolt/dwio/common/FileSink.h" #include "bolt/dwio/common/Options.h" #include "bolt/dwio/common/Statistics.h" @@ -22,7 +23,6 @@ #include "bolt/dwio/parquet/arrow/Properties.h" #include "bolt/dwio/parquet/reader/ParquetReader.h" #include "bolt/dwio/parquet/writer/Writer.h" -#include "bolt/exec/tests/utils/TempDirectoryPath.h" #include #include @@ -94,8 +94,8 @@ class ParquetWriterBenchmark { private: const std::string fileName_ = "test.parquet"; - const std::shared_ptr - fileFolder_ = bytedance::bolt::exec::test::TempDirectoryPath::create(); + const std::shared_ptr fileFolder_ = + bytedance::bolt::test::TempDirectoryPath::create(); const bool disableDictionary_; std::unique_ptr dataSetBuilder_; diff --git a/bolt/dwio/txt/tests/TxtTestBase.h b/bolt/dwio/txt/tests/TxtTestBase.h index 1ffd63a7f..70e0b72e2 100644 --- a/bolt/dwio/txt/tests/TxtTestBase.h +++ b/bolt/dwio/txt/tests/TxtTestBase.h @@ -19,11 +19,11 @@ #include #include #include "bolt/common/base/Fs.h" +#include "bolt/common/testutil/TempDirectoryPath.h" #include "bolt/dwio/common/FileSink.h" #include "bolt/dwio/common/Reader.h" #include "bolt/dwio/common/tests/utils/DataFiles.h" #include "bolt/dwio/txt/reader/TxtReader.h" -#include "bolt/exec/tests/utils/TempDirectoryPath.h" #include "bolt/vector/fuzzer/VectorFuzzer.h" #include "bolt/vector/tests/utils/VectorTestBase.h" @@ -39,7 +39,7 @@ class TxtTestBase : public testing::Test, public test::VectorTestBase { dwio::common::LocalFileSink::registerFactory(); rootPool_ = memory::memoryManager()->addRootPool("TxtTests"); leafPool_ = rootPool_->addLeafChild("TxtTests"); - tempPath_ = exec::test::TempDirectoryPath::create(); + tempPath_ = bytedance::bolt::test::TempDirectoryPath::create(); } static RowTypePtr sampleSchema() { @@ -178,7 +178,7 @@ class TxtTestBase : public testing::Test, public test::VectorTestBase { std::shared_ptr rootPool_; std::shared_ptr leafPool_; - std::shared_ptr tempPath_; + std::shared_ptr tempPath_; TimestampPrecision timestampPrecision_{TimestampPrecision::kMilliseconds}; }; } // namespace bytedance::bolt::txt diff --git a/bolt/dwio/txt/tests/writer/TxtWriterTest.cpp b/bolt/dwio/txt/tests/writer/TxtWriterTest.cpp index d47faee39..0f7b845b3 100644 --- a/bolt/dwio/txt/tests/writer/TxtWriterTest.cpp +++ b/bolt/dwio/txt/tests/writer/TxtWriterTest.cpp @@ -16,10 +16,10 @@ #include "bolt/dwio/txt/writer/TxtWriter.h" #include +#include "bolt/common/testutil/TempDirectoryPath.h" #include "bolt/dwio/common/tests/utils/BatchMaker.h" #include "bolt/dwio/txt/reader/TxtReader.h" #include "bolt/dwio/txt/tests/TxtTestBase.h" -#include "bolt/exec/tests/utils/TempDirectoryPath.h" #include "bolt/type/fbhive/HiveTypeParser.h" #include "bolt/vector/tests/utils/VectorTestBase.h" @@ -40,7 +40,7 @@ class TxtWriterTest : public bytedance::bolt::txt::TxtTestBase { dwio::common::LocalFileSink::registerFactory(); rootPool_ = memory::memoryManager()->addRootPool("txtWriterTests"); leafPool_ = rootPool_->addLeafChild("txtWriterTests"); - tempPath_ = exec::test::TempDirectoryPath::create(); + tempPath_ = bytedance::bolt::test::TempDirectoryPath::create(); } protected: @@ -113,7 +113,7 @@ class TxtWriterTest : public bytedance::bolt::txt::TxtTestBase { std::shared_ptr rootPool_; std::shared_ptr leafPool_; - std::shared_ptr tempPath_; + std::shared_ptr tempPath_; }; TEST_F(TxtWriterTest, comparison) { diff --git a/bolt/examples/ScanAndSort.cpp b/bolt/examples/ScanAndSort.cpp index 526308217..f46566ae8 100644 --- a/bolt/examples/ScanAndSort.cpp +++ b/bolt/examples/ScanAndSort.cpp @@ -31,13 +31,13 @@ #include "bolt/common/base/Fs.h" #include "bolt/common/file/FileSystems.h" #include "bolt/common/memory/Memory.h" +#include "bolt/common/testutil/TempDirectoryPath.h" #include "bolt/connectors/hive/HiveConnector.h" #include "bolt/connectors/hive/HiveConnectorSplit.h" #include "bolt/dwio/dwrf/reader/DwrfReader.h" #include "bolt/exec/Task.h" #include "bolt/exec/tests/utils/HiveConnectorTestBase.h" #include "bolt/exec/tests/utils/PlanBuilder.h" -#include "bolt/exec/tests/utils/TempDirectoryPath.h" #include "bolt/type/Type.h" #include "bolt/vector/BaseVector.h" @@ -117,7 +117,7 @@ int main(int argc, char** argv) { // Create a temporary dir to store the local file created. Note that this // directory is automatically removed when the `tempDir` object runs out of // scope. - auto tempDir = exec::test::TempDirectoryPath::create(); + auto tempDir = bytedance::bolt::test::TempDirectoryPath::create(); // Once we finalize setting up the Hive connector, let's define our query // plan. We use the helper `PlanBuilder` class to generate the query plan diff --git a/bolt/examples/ScanOrc.cpp b/bolt/examples/ScanOrc.cpp index 9d5586460..ce8d250ef 100644 --- a/bolt/examples/ScanOrc.cpp +++ b/bolt/examples/ScanOrc.cpp @@ -33,8 +33,8 @@ #include "bolt/common/file/FileSystems.h" #include "bolt/common/memory/Memory.h" +#include "bolt/common/testutil/TempDirectoryPath.h" #include "bolt/dwio/dwrf/reader/DwrfReader.h" -#include "bolt/exec/tests/utils/TempDirectoryPath.h" #include "bolt/vector/BaseVector.h" using namespace bytedance::bolt; using namespace bytedance::bolt::dwio::common; diff --git a/bolt/exec/benchmarks/SortRandomDataBenchmark.cpp b/bolt/exec/benchmarks/SortRandomDataBenchmark.cpp index a9dc11848..c6dcba078 100644 --- a/bolt/exec/benchmarks/SortRandomDataBenchmark.cpp +++ b/bolt/exec/benchmarks/SortRandomDataBenchmark.cpp @@ -19,10 +19,11 @@ #include #include +#include "bolt/common/testutil/TempDirectoryPath.h" +#include "bolt/common/testutil/TempFilePath.h" #include "bolt/exec/Operator.h" #include "bolt/exec/tests/utils/HiveConnectorTestBase.h" #include "bolt/exec/tests/utils/PlanBuilder.h" -#include "bolt/exec/tests/utils/TempDirectoryPath.h" #include "bolt/vector/fuzzer/VectorFuzzer.h" DEFINE_string(temp_file_path, "", "file path of input file"); @@ -32,6 +33,7 @@ DEFINE_int64(fuzzer_seed, 99887766, "Seed for random input dataset generator"); using namespace bytedance::bolt; using namespace bytedance::bolt::connector::hive; using namespace bytedance::bolt::exec::test; +using namespace bytedance::bolt::test; static constexpr int32_t kNumVectors = 1'000; static constexpr int32_t kRowsPerVector = 10'000; @@ -170,7 +172,7 @@ class SortRandomDataBenchmark : public HiveConnectorTestBase { vectors.emplace_back(makeRowVector(inputType_->names(), children)); } - filePath_ = TempFilePath::create()->path; + filePath_ = ::bytedance::bolt::test::TempFilePath::create()->path; writeToFile(filePath_, vectors); std::cout << filePath_ << std::endl; } else { diff --git a/bolt/exec/benchmarks/SortWindowBenchmark.cpp b/bolt/exec/benchmarks/SortWindowBenchmark.cpp index 277e40127..8c36b7d9d 100644 --- a/bolt/exec/benchmarks/SortWindowBenchmark.cpp +++ b/bolt/exec/benchmarks/SortWindowBenchmark.cpp @@ -19,6 +19,7 @@ #include #include +#include "bolt/common/testutil/TempFilePath.h" #include "bolt/exec/Operator.h" #include "bolt/exec/Window.h" #include "bolt/exec/tests/utils/HiveConnectorTestBase.h" @@ -39,6 +40,7 @@ DEFINE_int32(warmup_rounds, 0, "nums of warmup rounds"); using namespace bytedance::bolt; using namespace bytedance::bolt::connector::hive; using namespace bytedance::bolt::exec::test; +using namespace bytedance::bolt::test; int32_t kNumVectors = 2000; int32_t kRowsPerVector = 1000; @@ -203,8 +205,9 @@ class SortWindowBenchmark : public HiveConnectorTestBase { vectors.emplace_back(input); } - filePath_ = FLAGS_temp_file_path.empty() ? TempFilePath::create()->path - : FLAGS_temp_file_path; + filePath_ = FLAGS_temp_file_path.empty() + ? ::bytedance::bolt::test::TempFilePath::create()->path + : FLAGS_temp_file_path; writeToFile(filePath_, vectors); diff --git a/bolt/exec/fuzzer/AggregationFuzzer.cpp b/bolt/exec/fuzzer/AggregationFuzzer.cpp index 72e91cd76..e9a25b0cd 100644 --- a/bolt/exec/fuzzer/AggregationFuzzer.cpp +++ b/bolt/exec/fuzzer/AggregationFuzzer.cpp @@ -35,8 +35,8 @@ #include "bolt/connectors/hive/TableHandle.h" #include "bolt/dwio/dwrf/reader/DwrfReader.h" +#include "bolt/common/testutil/TempDirectoryPath.h" #include "bolt/exec/tests/utils/PlanBuilder.h" -#include "bolt/exec/tests/utils/TempDirectoryPath.h" #include "bolt/exec/PartitionFunction.h" #include "bolt/exec/fuzzer/AggregationFuzzerBase.h" @@ -750,7 +750,7 @@ bool AggregationFuzzer::verifyAggregation( std::vector plans; plans.push_back({firstPlan, {}}); - auto directory = exec::test::TempDirectoryPath::create(); + auto directory = bytedance::bolt::test::TempDirectoryPath::create(); // Alternate between using Values and TableScan node. @@ -861,10 +861,10 @@ bool AggregationFuzzer::verifySortedAggregation( {}}); } - std::shared_ptr directory; + std::shared_ptr directory; const auto inputRowType = asRowType(input[0]->type()); if (isTableScanSupported(inputRowType)) { - directory = exec::test::TempDirectoryPath::create(); + directory = bytedance::bolt::test::TempDirectoryPath::create(); auto splits = makeSplits(input, directory->path); plans.push_back( @@ -1168,10 +1168,10 @@ bool AggregationFuzzer::verifyDistinctAggregation( // Alternate between using Values and TableScan node. - std::shared_ptr directory; + std::shared_ptr directory; const auto inputRowType = asRowType(input[0]->type()); if (isTableScanSupported(inputRowType) && vectorFuzzer_.coinToss(0.5)) { - directory = exec::test::TempDirectoryPath::create(); + directory = bytedance::bolt::test::TempDirectoryPath::create(); auto splits = makeSplits(input, directory->path); plans.push_back( diff --git a/bolt/exec/fuzzer/AggregationFuzzerBase.cpp b/bolt/exec/fuzzer/AggregationFuzzerBase.cpp index cceabaf8f..7f5535608 100644 --- a/bolt/exec/fuzzer/AggregationFuzzerBase.cpp +++ b/bolt/exec/fuzzer/AggregationFuzzerBase.cpp @@ -33,12 +33,12 @@ #include #include "bolt/common/base/BoltException.h" #include "bolt/common/base/Fs.h" +#include "bolt/common/testutil/TempDirectoryPath.h" #include "bolt/connectors/hive/HiveConnectorSplit.h" #include "bolt/dwio/dwrf/reader/DwrfReader.h" #include "bolt/dwio/dwrf/writer/Writer.h" #include "bolt/exec/fuzzer/DuckQueryRunner.h" #include "bolt/exec/fuzzer/PrestoQueryRunner.h" -#include "bolt/exec/tests/utils/TempDirectoryPath.h" #include "bolt/expression/SignatureBinder.h" #include "bolt/expression/fuzzer/ArgumentTypeFuzzer.h" #include "bolt/vector/VectorSaver.h" @@ -431,14 +431,14 @@ bolt::fuzzer::ResultOrError AggregationFuzzerBase::execute( bolt::fuzzer::ResultOrError resultOrError; try { - std::shared_ptr spillDirectory; + std::shared_ptr spillDirectory; AssertQueryBuilder builder(plan); builder.configs(queryConfigs_); int32_t spillPct{0}; if (injectSpill) { - spillDirectory = exec::test::TempDirectoryPath::create(); + spillDirectory = bytedance::bolt::test::TempDirectoryPath::create(); builder.spillDirectory(spillDirectory->path) .config(core::QueryConfig::kSpillEnabled, "true") .config(core::QueryConfig::kAggregationSpillEnabled, "true") diff --git a/bolt/exec/fuzzer/WindowFuzzer.cpp b/bolt/exec/fuzzer/WindowFuzzer.cpp index b2a4bed99..60f61452e 100644 --- a/bolt/exec/fuzzer/WindowFuzzer.cpp +++ b/bolt/exec/fuzzer/WindowFuzzer.cpp @@ -30,8 +30,8 @@ #include "bolt/exec/fuzzer/WindowFuzzer.h" +#include "bolt/common/testutil/TempDirectoryPath.h" #include "bolt/exec/tests/utils/PlanBuilder.h" -#include "bolt/exec/tests/utils/TempDirectoryPath.h" DEFINE_bool( enable_window_reference_verification, @@ -257,7 +257,7 @@ void WindowFuzzer::testAlternativePlans( } // With TableScan. - auto directory = exec::test::TempDirectoryPath::create(); + auto directory = bytedance::bolt::test::TempDirectoryPath::create(); const auto inputRowType = asRowType(input[0]->type()); if (isTableScanSupported(inputRowType)) { auto splits = makeSplits(input, directory->path); diff --git a/bolt/exec/tests/AggregationTest.cpp b/bolt/exec/tests/AggregationTest.cpp index 04d819a0c..5390c6867 100644 --- a/bolt/exec/tests/AggregationTest.cpp +++ b/bolt/exec/tests/AggregationTest.cpp @@ -34,6 +34,7 @@ #include "bolt/common/base/tests/GTestUtils.h" #include "bolt/common/file/FileSystems.h" +#include "bolt/common/testutil/TempDirectoryPath.h" #include "bolt/common/testutil/TestValue.h" #include "bolt/cudf/tests/CudfResource.h" #include "bolt/dwio/common/tests/utils/BatchMaker.h" @@ -46,7 +47,6 @@ #include "bolt/exec/tests/utils/OperatorTestBase.h" #include "bolt/exec/tests/utils/PlanBuilder.h" #include "bolt/exec/tests/utils/SumNonPODAggregate.h" -#include "bolt/exec/tests/utils/TempDirectoryPath.h" #include "bolt/exec/tests/utils/WithGPUParamInterface.h" #include "bolt/serializers/ArrowSerializer.h" #include "folly/experimental/EventCount.h" @@ -1562,7 +1562,7 @@ TEST_P(AggregationTest, spillWithMemoryLimit) { for (const auto& testData : testSettings) { SCOPED_TRACE(testData.debugString()); - auto spillDirectory = exec::test::TempDirectoryPath::create(); + auto spillDirectory = bytedance::bolt::test::TempDirectoryPath::create(); auto task = AssertQueryBuilder(plan) .spillDirectory(spillDirectory->path) .config(QueryConfig::kSpillEnabled, true) @@ -1601,7 +1601,7 @@ TEST_P(AggregationTest, rowBasedspillWithMemoryLimit) { SCOPED_TRACE("rowbased aggregationMemLimit: 1, expectSpill: true"); - auto spillDirectory = exec::test::TempDirectoryPath::create(); + auto spillDirectory = bytedance::bolt::test::TempDirectoryPath::create(); auto task = AssertQueryBuilder(plan) .spillDirectory(spillDirectory->path) .config(QueryConfig::kSpillEnabled, true) @@ -1691,7 +1691,7 @@ DEBUG_ONLY_TEST_P(AggregationTest, DISABLED_spillWithEmptyPartition) { .planNode()) .copyResults(pool_.get()); - auto tempDirectory = exec::test::TempDirectoryPath::create(); + auto tempDirectory = bytedance::bolt::test::TempDirectoryPath::create(); auto queryCtx = core::QueryCtx::create(executor_.get()); queryCtx->testingOverrideMemoryPool( memory::memoryManager()->addRootPool(queryCtx->queryId(), kMaxBytes)); @@ -1815,7 +1815,7 @@ TEST_P(AggregationTest, DISABLED_spillWithNonSpillingPartition) { .planNode()) .copyResults(pool_.get()); - auto tempDirectory = exec::test::TempDirectoryPath::create(); + auto tempDirectory = bytedance::bolt::test::TempDirectoryPath::create(); auto queryCtx = core::QueryCtx::create(executor_.get()); queryCtx->testingOverrideMemoryPool( memory::memoryManager()->addRootPool(queryCtx->queryId(), kMaxBytes)); @@ -1830,8 +1830,8 @@ TEST_P(AggregationTest, DISABLED_spillWithNonSpillingPartition) { .config(QueryConfig::kSpillEnabled, "true") .config(QueryConfig::kAggregationSpillEnabled, "true") // .config(QueryConfig::kAggregationSpillAll, "false") - // Set to increase the hash table a little bit to only trigger spill - // on the partition with most spillable data. + // Set to increase the hash table a little bit to only trigger + // spill on the partition with most spillable data. .config(QueryConfig::kSpillableReservationGrowthPct, "25") .config(QueryConfig::kPreferredOutputBatchBytes, "1024") .assertResults(results); @@ -1866,7 +1866,7 @@ TEST_P(AggregationTest, spillAll) { auto results = AssertQueryBuilder(plan).copyResults(pool_.get()); - auto tempDirectory = exec::test::TempDirectoryPath::create(); + auto tempDirectory = bytedance::bolt::test::TempDirectoryPath::create(); auto queryCtx = core::QueryCtx::create(executor_.get()); auto task = AssertQueryBuilder(plan) .spillDirectory(tempDirectory->path) @@ -1903,7 +1903,7 @@ TEST_P(AggregationTest, spillWithArrowSerde) { .planNode(); auto results = AssertQueryBuilder(plan).copyResults(pool_.get()); - auto tempDirectory = exec::test::TempDirectoryPath::create(); + auto tempDirectory = bytedance::bolt::test::TempDirectoryPath::create(); bool sawArrowSerde = false; SCOPED_TESTVALUE_SET( "bytedance::bolt::exec::SpillState::appendToPartition", @@ -2482,7 +2482,7 @@ TEST_P(AggregationTest, outputBatchSizeCheckWithSpill) { inputs = largeVectors; } createDuckDbTable(inputs); - auto tempDirectory = exec::test::TempDirectoryPath::create(); + auto tempDirectory = bytedance::bolt::test::TempDirectoryPath::create(); core::PlanNodeId aggrNodeId; auto task = AssertQueryBuilder(duckDbQueryRunner_) @@ -2530,7 +2530,7 @@ TEST_P(AggregationTest, spillDuringOutputProcessing) { createDuckDbTable({input}); const int numOutputRows = 5; - auto tempDirectory = exec::test::TempDirectoryPath::create(); + auto tempDirectory = bytedance::bolt::test::TempDirectoryPath::create(); core::PlanNodeId aggrNodeId; auto task = AssertQueryBuilder(duckDbQueryRunner_) @@ -2681,7 +2681,7 @@ DEBUG_ONLY_TEST_P(AggregationTest, minSpillableMemoryReservation) { currentUsedBytes * minSpillableReservationPct / 100); }))); - auto spillDirectory = exec::test::TempDirectoryPath::create(); + auto spillDirectory = bytedance::bolt::test::TempDirectoryPath::create(); auto task = AssertQueryBuilder(duckDbQueryRunner_) .spillDirectory(spillDirectory->path) @@ -2710,7 +2710,7 @@ TEST_P(AggregationTest, distinctWithSpilling) { auto vectors = makeVectors(rowType_, 10, 100); createDuckDbTable(vectors); - auto spillDirectory = exec::test::TempDirectoryPath::create(); + auto spillDirectory = bytedance::bolt::test::TempDirectoryPath::create(); core::PlanNodeId aggrNodeId; auto task = AssertQueryBuilder(duckDbQueryRunner_) .spillDirectory(spillDirectory->path) @@ -2733,7 +2733,7 @@ TEST_P(AggregationTest, distinctWithSpilling) { TEST_P(AggregationTest, spillingForAggrsWithDistinct) { auto vectors = makeVectors(rowType_, 100, 10); createDuckDbTable(vectors); - auto spillDirectory = exec::test::TempDirectoryPath::create(); + auto spillDirectory = bytedance::bolt::test::TempDirectoryPath::create(); core::PlanNodeId aggrNodeId; auto task = AssertQueryBuilder(duckDbQueryRunner_) @@ -2790,7 +2790,7 @@ TEST_P(AggregationTest, distinctSpillWithMemoryLimit) { for (const auto& testData : testSettings) { SCOPED_TRACE(testData.debugString()); - auto tempDirectory = exec::test::TempDirectoryPath::create(); + auto tempDirectory = bytedance::bolt::test::TempDirectoryPath::create(); auto queryCtx = core::QueryCtx::create(executor_.get()); queryCtx->testingOverrideMemoryPool( memory::memoryManager()->addRootPool(queryCtx->queryId(), kMaxBytes)); @@ -2858,7 +2858,7 @@ TEST_P(AggregationTest, spillingForAggrsWithSorting) { auto vectors = makeVectors(rowType_, 100, 10); createDuckDbTable(vectors); - auto spillDirectory = exec::test::TempDirectoryPath::create(); + auto spillDirectory = bytedance::bolt::test::TempDirectoryPath::create(); core::PlanNodeId aggrNodeId; @@ -2911,7 +2911,7 @@ TEST_P(AggregationTest, preGroupedAggregationWithSpilling) { makeFlatVector(10, [](auto row) { return row; })})); } createDuckDbTable(vectors); - auto spillDirectory = exec::test::TempDirectoryPath::create(); + auto spillDirectory = bytedance::bolt::test::TempDirectoryPath::create(); core::PlanNodeId aggrNodeId; auto task = AssertQueryBuilder(duckDbQueryRunner_) @@ -3019,7 +3019,7 @@ DEBUG_ONLY_TEST_P(AggregationTest, reclaimDuringInputProcessing) { for (const auto& testData : testSettings) { SCOPED_TRACE(testData.debugString()); - auto tempDirectory = exec::test::TempDirectoryPath::create(); + auto tempDirectory = bytedance::bolt::test::TempDirectoryPath::create(); auto queryCtx = core::QueryCtx::create(executor_.get()); queryCtx->testingOverrideMemoryPool(memory::memoryManager()->addRootPool( queryCtx->queryId(), kMaxBytes, memory::MemoryReclaimer::create())); @@ -3165,7 +3165,7 @@ DEBUG_ONLY_TEST_P(AggregationTest, reclaimDuringReserve) { batches.push_back(fuzzer.fuzzRow(rowType)); } - auto tempDirectory = exec::test::TempDirectoryPath::create(); + auto tempDirectory = bytedance::bolt::test::TempDirectoryPath::create(); auto queryCtx = core::QueryCtx::create(executor_.get()); queryCtx->testingOverrideMemoryPool(memory::memoryManager()->addRootPool( queryCtx->queryId(), kMaxBytes, memory::MemoryReclaimer::create())); @@ -3278,7 +3278,7 @@ DEBUG_ONLY_TEST_P(AggregationTest, reclaimDuringAllocation) { for (bool enableSpilling : enableSpillings) { SCOPED_TRACE(fmt::format("enableSpilling {}", enableSpilling)); - auto tempDirectory = exec::test::TempDirectoryPath::create(); + auto tempDirectory = bytedance::bolt::test::TempDirectoryPath::create(); auto queryCtx = core::QueryCtx::create(executor_.get()); queryCtx->testingOverrideMemoryPool( memory::memoryManager()->addRootPool(queryCtx->queryId(), kMaxBytes)); @@ -3408,7 +3408,7 @@ DEBUG_ONLY_TEST_P(AggregationTest, reclaimDuringOutputProcessing) { for (bool enableSpilling : enableSpillings) { SCOPED_TRACE(fmt::format("enableSpilling {}", enableSpilling)); - auto tempDirectory = exec::test::TempDirectoryPath::create(); + auto tempDirectory = bytedance::bolt::test::TempDirectoryPath::create(); auto queryCtx = core::QueryCtx::create(executor_.get()); queryCtx->testingOverrideMemoryPool(memory::memoryManager()->addRootPool( queryCtx->queryId(), kMaxBytes, memory::MemoryReclaimer::create())); @@ -3555,7 +3555,7 @@ DEBUG_ONLY_TEST_P(AggregationTest, reclaimDuringNonReclaimableSection) { for (const auto& testData : testSettings) { SCOPED_TRACE(fmt::format("testData {}", testData.debugString())); - auto tempDirectory = exec::test::TempDirectoryPath::create(); + auto tempDirectory = bytedance::bolt::test::TempDirectoryPath::create(); auto queryCtx = core::QueryCtx::create(executor_.get()); queryCtx->testingOverrideMemoryPool( memory::memoryManager()->addRootPool(queryCtx->queryId(), kMaxBytes)); @@ -3712,7 +3712,7 @@ DEBUG_ONLY_TEST_P(AggregationTest, reclaimWithEmptyAggregationTable) { for (bool enableSpilling : enableSpillings) { SCOPED_TRACE(fmt::format("enableSpilling {}", enableSpilling)); - auto tempDirectory = exec::test::TempDirectoryPath::create(); + auto tempDirectory = bytedance::bolt::test::TempDirectoryPath::create(); auto queryCtx = core::QueryCtx::create(executor_.get()); queryCtx->testingOverrideMemoryPool( memory::memoryManager()->addRootPool(queryCtx->queryId(), kMaxBytes)); @@ -4132,7 +4132,7 @@ DEBUG_ONLY_TEST_P(AggregationTest, reclaimEmptyInput) { } })); - auto tempDirectory = exec::test::TempDirectoryPath::create(); + auto tempDirectory = bytedance::bolt::test::TempDirectoryPath::create(); auto queryCtx = core::QueryCtx::create(executor_.get()); queryCtx->testingOverrideMemoryPool(memory::memoryManager()->addRootPool( queryCtx->queryId(), kMaxBytes, memory::MemoryReclaimer::create())); @@ -4205,7 +4205,7 @@ DEBUG_ONLY_TEST_P(AggregationTest, reclaimEmptyOutput) { } }))); - auto tempDirectory = exec::test::TempDirectoryPath::create(); + auto tempDirectory = bytedance::bolt::test::TempDirectoryPath::create(); auto queryCtx = core::QueryCtx::create(executor_.get()); queryCtx->testingOverrideMemoryPool(memory::memoryManager()->addRootPool( queryCtx->queryId(), kMaxBytes, memory::MemoryReclaimer::create())); @@ -4220,8 +4220,8 @@ DEBUG_ONLY_TEST_P(AggregationTest, reclaimEmptyOutput) { .queryCtx(queryCtx) .config(QueryConfig::kSpillEnabled, true) .config(QueryConfig::kAggregationSpillEnabled, true) - // Set the output query configs to ensure fetch the result in one - // output batch. + // Set the output query configs to ensure fetch the result in + // one output batch. .config(QueryConfig::kPreferredOutputBatchBytes, 1UL << 30) .config(QueryConfig::kMaxOutputBatchRows, 1024) .assertResults(expectedResult); @@ -4248,7 +4248,7 @@ TEST_P(AggregationTest, maxSpillBytes) { .singleAggregation({"c0", "c1"}, {"array_agg(c2)"}) .capturePlanNodeId(aggregationNodeId) .planNode(); - auto spillDirectory = exec::test::TempDirectoryPath::create(); + auto spillDirectory = bytedance::bolt::test::TempDirectoryPath::create(); auto queryCtx = core::QueryCtx::create(executor_.get()); struct { @@ -4310,7 +4310,8 @@ DEBUG_ONLY_TEST_P(AggregationTest, reclaimFromAggregation) { testingRunArbitration(op->pool()); }))); - const auto spillDirectory = exec::test::TempDirectoryPath::create(); + const auto spillDirectory = + bytedance::bolt::test::TempDirectoryPath::create(); core::PlanNodeId aggrNodeId; auto task = AssertQueryBuilder(duckDbQueryRunner_) @@ -4362,7 +4363,8 @@ DEBUG_ONLY_TEST_P(AggregationTest, reclaimFromDistinctAggregation) { testingRunArbitration(op->pool()); }))); - const auto spillDirectory = exec::test::TempDirectoryPath::create(); + const auto spillDirectory = + bytedance::bolt::test::TempDirectoryPath::create(); core::PlanNodeId aggrNodeId; auto task = AssertQueryBuilder(duckDbQueryRunner_) .spillDirectory(spillDirectory->path) @@ -4395,7 +4397,8 @@ DEBUG_ONLY_TEST_P(AggregationTest, reclaimFromAggregationOnNoMoreInput) { std::vector sameQueries = {false, true}; for (bool sameQuery : sameQueries) { SCOPED_TRACE(fmt::format("sameQuery {}", sameQuery)); - const auto spillDirectory = exec::test::TempDirectoryPath::create(); + const auto spillDirectory = + bytedance::bolt::test::TempDirectoryPath::create(); std::shared_ptr fakeQueryCtx = core::QueryCtx::create(executor_.get()); std::shared_ptr aggregationQueryCtx; @@ -4487,7 +4490,8 @@ DEBUG_ONLY_TEST_P(AggregationTest, reclaimFromAggregationDuringOutput) { std::vector sameQueries = {false, true}; for (bool sameQuery : sameQueries) { SCOPED_TRACE(fmt::format("sameQuery {}", sameQuery)); - const auto spillDirectory = exec::test::TempDirectoryPath::create(); + const auto spillDirectory = + bytedance::bolt::test::TempDirectoryPath::create(); std::shared_ptr fakeQueryCtx = core::QueryCtx::create(executor_.get()); std::shared_ptr aggregationQueryCtx; @@ -4568,7 +4572,8 @@ TEST_P(AggregationTest, reclaimFromCompletedAggregation) { std::vector sameQueries = {false, true}; for (bool sameQuery : sameQueries) { SCOPED_TRACE(fmt::format("sameQuery {}", sameQuery)); - const auto spillDirectory = exec::test::TempDirectoryPath::create(); + const auto spillDirectory = + bytedance::bolt::test::TempDirectoryPath::create(); std::shared_ptr fakeQueryCtx = core::QueryCtx::create(executor_.get()); std::shared_ptr aggregationQueryCtx; @@ -4692,7 +4697,7 @@ TEST_P(AggregationTest, mutliKeysWithStringSpill) { createDuckDbTable({data}); auto plan = makePlan(true); std::string duckDbSql = "SELECT c0, c1, sum(c2) FROM tmp GROUP BY c0, c1"; - auto spillDirectory = exec::test::TempDirectoryPath::create(); + auto spillDirectory = bytedance::bolt::test::TempDirectoryPath::create(); auto queryCtx = core::QueryCtx::create(executor_.get()); queryCtx->testingOverrideConfigUnsafe({ {core::QueryConfig::kTestingSpillPct, "100"}, @@ -4747,7 +4752,7 @@ TEST_P(AggregationTest, partialSpillWithMemoryLimit) { for (const auto& testData : testSettings) { SCOPED_TRACE(testData.debugString()); - auto spillDirectory = exec::test::TempDirectoryPath::create(); + auto spillDirectory = bytedance::bolt::test::TempDirectoryPath::create(); auto task = AssertQueryBuilder(plan) .spillDirectory(spillDirectory->path) @@ -4811,7 +4816,7 @@ TEST_P(AggregationTest, partialDistinctSpillWithMemoryLimit) { for (const auto& testData : testSettings) { SCOPED_TRACE(testData.debugString()); - auto spillDirectory = exec::test::TempDirectoryPath::create(); + auto spillDirectory = bytedance::bolt::test::TempDirectoryPath::create(); auto task = AssertQueryBuilder(plan) .spillDirectory(spillDirectory->path) @@ -4877,7 +4882,7 @@ TEST_P(AggregationTest, rowBasedSpillNull) { for (const auto& testData : testSettings) { SCOPED_TRACE(testData.debugString()); - auto spillDirectory = exec::test::TempDirectoryPath::create(); + auto spillDirectory = bytedance::bolt::test::TempDirectoryPath::create(); auto task = AssertQueryBuilder(plan) .spillDirectory(spillDirectory->path) diff --git a/bolt/exec/tests/AssertQueryBuilderTest.cpp b/bolt/exec/tests/AssertQueryBuilderTest.cpp index f73d6ed0b..cf7223d4c 100644 --- a/bolt/exec/tests/AssertQueryBuilderTest.cpp +++ b/bolt/exec/tests/AssertQueryBuilderTest.cpp @@ -29,16 +29,37 @@ */ #include "bolt/exec/tests/utils/AssertQueryBuilder.h" -#include "bolt/connectors/hive/HiveConnectorSplit.h" -#include "bolt/exec/tests/utils/HiveConnectorTestBase.h" +#include "bolt/connectors/ConnectorNames.h" +#include "bolt/connectors/tests/utils/ConnectorTestBase.h" +#include "bolt/exec/tests/utils/OperatorTestBase.h" #include "bolt/exec/tests/utils/PlanBuilder.h" #include "bolt/vector/fuzzer/VectorFuzzer.h" namespace bytedance::bolt::exec::test { -using connector::hive::HiveConnectorSplitBuilder; -class AssertQueryBuilderTest : public HiveConnectorTestBase {}; - -TEST_F(AssertQueryBuilderTest, basic) { +class AssertQueryBuilderTest : public OperatorTestBase, + public ::testing::WithParamInterface< + connector::test::ConnectorTestParam> { + protected: + void SetUp() override { + OperatorTestBase::SetUp(); + auto emptyConfig = std::make_shared( + std::unordered_map()); + connector::test::registerTestConnector( + GetParam().connectorName, + GetParam().connectorId, + ioExecutor_.get(), + emptyConfig, + GetParam().factoryRegistrar); + } + + void TearDown() override { + connector::test::unregisterTestConnector( + GetParam().connectorName, GetParam().connectorId); + OperatorTestBase::TearDown(); + } +}; + +TEST_P(AssertQueryBuilderTest, basic) { auto data = makeRowVector({makeFlatVector({1, 2, 3})}); AssertQueryBuilder( @@ -49,7 +70,7 @@ TEST_F(AssertQueryBuilderTest, basic) { .assertResults(data); } -TEST_F(AssertQueryBuilderTest, serialExecution) { +TEST_P(AssertQueryBuilderTest, serialExecution) { auto data = makeRowVector({makeFlatVector({1, 2, 3})}); PlanBuilder builder; @@ -62,7 +83,7 @@ TEST_F(AssertQueryBuilderTest, serialExecution) { AssertQueryBuilder(plan).serialExecution(true).assertResults(data); } -TEST_F(AssertQueryBuilderTest, orderedResults) { +TEST_P(AssertQueryBuilderTest, orderedResults) { auto data = makeRowVector({makeFlatVector({1, 2, 3})}); AssertQueryBuilder( @@ -71,7 +92,7 @@ TEST_F(AssertQueryBuilderTest, orderedResults) { .assertResults("VALUES (3), (2), (1)", {{0}}); } -TEST_F(AssertQueryBuilderTest, concurrency) { +TEST_P(AssertQueryBuilderTest, concurrency) { auto data = makeRowVector({makeFlatVector({1, 2, 3})}); AssertQueryBuilder( @@ -84,7 +105,7 @@ TEST_F(AssertQueryBuilderTest, concurrency) { .assertResults({data, data, data}); } -TEST_F(AssertQueryBuilderTest, config) { +TEST_P(AssertQueryBuilderTest, config) { auto data = makeRowVector({makeFlatVector({1, 2, 3})}); AssertQueryBuilder( @@ -94,72 +115,11 @@ TEST_F(AssertQueryBuilderTest, config) { .assertResults("VALUES (2), (4), (6)"); } -TEST_F(AssertQueryBuilderTest, hiveSplits) { - auto data = makeRowVector({makeFlatVector({1, 2, 3})}); - - auto file = TempFilePath::create(); - writeToFile(file->path, {data}); - - // Single leaf node. - AssertQueryBuilder( - PlanBuilder().tableScan(asRowType(data->type())).planNode(), - duckDbQueryRunner_) - .split(makeHiveConnectorSplit(file->path)) - .assertResults("VALUES (1), (2), (3)"); +// Connector-specific split coverage (partition-keyed splits, etc.) lives in +// each connector's test directory (e.g. +// bolt/connectors/hive/tests/HiveAssertQueryBuilderTest.cpp for the Hive case). - // Split with partition key. - ColumnHandleMap assignments = { - {"ds", partitionKey("ds", VARCHAR())}, - {"c0", regularColumn("c0", BIGINT())}}; - - AssertQueryBuilder( - PlanBuilder() - .startTableScan() - .outputType(ROW({"c0", "ds"}, {INTEGER(), VARCHAR()})) - .tableHandle(makeTableHandle()) - .assignments(assignments) - .endTableScan() - .planNode(), - duckDbQueryRunner_) - .split(HiveConnectorSplitBuilder(file->path) - .connectorId(kHiveConnectorId) - .fileFormat(dwio::common::FileFormat::DWRF) - .partitionKey("ds", "2022-05-10") - .build()) - .assertResults( - "VALUES (1, '2022-05-10'), (2, '2022-05-10'), (3, '2022-05-10')"); - - // Two leaf nodes. - auto buildData = makeRowVector({makeFlatVector({2, 3})}); - auto buildFile = TempFilePath::create(); - writeToFile(buildFile->path, {buildData}); - - auto planNodeIdGenerator = std::make_shared(); - core::PlanNodeId probeScanId; - core::PlanNodeId buildScanId; - auto joinPlan = PlanBuilder(planNodeIdGenerator) - .tableScan(asRowType(data->type())) - .capturePlanNodeId(probeScanId) - .hashJoin( - {"c0"}, - {"b_c0"}, - PlanBuilder(planNodeIdGenerator) - .tableScan(asRowType(data->type())) - .capturePlanNodeId(buildScanId) - .project({"c0 as b_c0"}) - .planNode(), - "", - {"c0", "b_c0"}) - .singleAggregation({}, {"count(1)"}) - .planNode(); - - AssertQueryBuilder(joinPlan, duckDbQueryRunner_) - .split(probeScanId, makeHiveConnectorSplit(file->path)) - .split(buildScanId, makeHiveConnectorSplit(buildFile->path)) - .assertResults("SELECT 2"); -} - -TEST_F(AssertQueryBuilderTest, encodedResults) { +TEST_P(AssertQueryBuilderTest, encodedResults) { VectorFuzzer::Options opts; opts.vectorSize = 1000; opts.nullRatio = 0.1; @@ -203,7 +163,7 @@ TEST_F(AssertQueryBuilderTest, encodedResults) { assertEqualResults({flatInput}, {input}); } -TEST_F(AssertQueryBuilderTest, nestedArrayMapResults) { +TEST_P(AssertQueryBuilderTest, nestedArrayMapResults) { VectorFuzzer::Options opts; opts.vectorSize = 1000; opts.nullRatio = 0.1; @@ -230,4 +190,10 @@ TEST_F(AssertQueryBuilderTest, nestedArrayMapResults) { assertEqualResults({input}, {input}); } +INSTANTIATE_TEST_SUITE_P( + Connectors, + AssertQueryBuilderTest, + ::testing::ValuesIn(connector::test::paramsFor( + {std::string(connector::kHiveConnectorName)}))); + } // namespace bytedance::bolt::exec::test diff --git a/bolt/exec/tests/AsyncSpillerTest.cpp b/bolt/exec/tests/AsyncSpillerTest.cpp index 976705df6..baeeb5874 100644 --- a/bolt/exec/tests/AsyncSpillerTest.cpp +++ b/bolt/exec/tests/AsyncSpillerTest.cpp @@ -28,6 +28,7 @@ using namespace bytedance::bolt; using namespace bytedance::bolt::exec; using namespace bytedance::bolt::exec::test; +using namespace bytedance::bolt::test; using namespace bytedance::bolt::common::testutil; using bytedance::bolt::filesystems::FileSystem; @@ -138,7 +139,7 @@ class SpillerTest : public exec::test::RowContainerTestBase { void SetUp() override { RowContainerTestBase::SetUp(); rng_.seed(1); - tempDirPath_ = exec::test::TempDirectoryPath::create(); + tempDirPath_ = bytedance::bolt::test::TempDirectoryPath::create(); fs_ = filesystems::getFileSystem(tempDirPath_->path, nullptr); rowType_ = ROW({ {"bool_val", BOOLEAN()}, diff --git a/bolt/exec/tests/ConcatFilesSpillMergeStreamTest.cpp b/bolt/exec/tests/ConcatFilesSpillMergeStreamTest.cpp index fd96784c2..9864da7e3 100644 --- a/bolt/exec/tests/ConcatFilesSpillMergeStreamTest.cpp +++ b/bolt/exec/tests/ConcatFilesSpillMergeStreamTest.cpp @@ -29,10 +29,10 @@ */ #include "bolt/common/file/FileSystems.h" +#include "bolt/common/testutil/TempDirectoryPath.h" #include "bolt/exec/SortBuffer.h" #include "bolt/exec/Spill.h" #include "bolt/exec/tests/utils/OperatorTestBase.h" -#include "bolt/exec/tests/utils/TempDirectoryPath.h" #include "bolt/type/Type.h" #include "bolt/vector/fuzzer/VectorFuzzer.h" #include "bolt/vector/tests/utils/VectorTestBase.h" @@ -41,6 +41,7 @@ using namespace bytedance::bolt::exec; using namespace bytedance::bolt::exec::test; +using namespace bytedance::bolt::test; using namespace bytedance::bolt; using namespace bytedance::bolt::memory; @@ -214,7 +215,7 @@ class ConcatFilesSpillMergeStreamTest : public OperatorTestBase { const std::vector sortingKeys_ = SpillState::makeSortingKeys(sortColumnIndices_, sortCompareFlags_); const std::shared_ptr spillDirectory_ = - exec::test::TempDirectoryPath::create(); + bytedance::bolt::test::TempDirectoryPath::create(); const common::SpillConfig spillConfig_{ [&]() -> const std::string& { return spillDirectory_->getPath(); }, [&](uint64_t) {}, diff --git a/bolt/exec/tests/GroupedExecutionTest.cpp b/bolt/exec/tests/GroupedExecutionTest.cpp index b86aecc8d..c25fcbb24 100644 --- a/bolt/exec/tests/GroupedExecutionTest.cpp +++ b/bolt/exec/tests/GroupedExecutionTest.cpp @@ -30,25 +30,45 @@ #include -#include #include "bolt/common/base/tests/GTestUtils.h" -#include "bolt/connectors/hive/HiveConnectorSplit.h" +#include "bolt/common/file/FileSystems.h" +#include "bolt/common/testutil/TempFilePath.h" +#include "bolt/connectors/ConnectorNames.h" +#include "bolt/connectors/tests/utils/ConnectorTestBase.h" #include "bolt/exec/OutputBufferManager.h" #include "bolt/exec/TableScan.h" #include "bolt/exec/tests/utils/Cursor.h" -#include "bolt/exec/tests/utils/HiveConnectorTestBase.h" +#include "bolt/exec/tests/utils/OperatorTestBase.h" #include "bolt/exec/tests/utils/PlanBuilder.h" #include "bolt/type/Type.h" + namespace bytedance::bolt::exec::test { -class GroupedExecutionTest : public virtual HiveConnectorTestBase { +class GroupedExecutionTest : public virtual OperatorTestBase, + public ::testing::WithParamInterface< + connector::test::ConnectorTestParam> { protected: void SetUp() override { - HiveConnectorTestBase::SetUp(); + OperatorTestBase::SetUp(); + bytedance::bolt::filesystems::registerLocalFileSystem(); + auto emptyConfig = std::make_shared( + std::unordered_map()); + connector::test::registerTestConnector( + GetParam().connectorName, + GetParam().connectorId, + ioExecutor_.get(), + emptyConfig, + GetParam().factoryRegistrar); + } + + void TearDown() override { + connector::test::unregisterTestConnector( + GetParam().connectorName, GetParam().connectorId); + OperatorTestBase::TearDown(); } static void SetUpTestCase() { - HiveConnectorTestBase::SetUpTestCase(); + OperatorTestBase::SetUpTestCase(); } std::vector makeVectors( @@ -56,15 +76,19 @@ class GroupedExecutionTest : public virtual HiveConnectorTestBase { int32_t rowsPerVector, const RowTypePtr& rowType = nullptr) { auto inputs = rowType ? rowType : rowType_; - return HiveConnectorTestBase::makeVectors(inputs, count, rowsPerVector); + return OperatorTestBase::makeVectors(inputs, count, rowsPerVector); } - exec::Split makeHiveSplitWithGroup(std::string path, int32_t group) { - return exec::Split(makeHiveConnectorSplit(std::move(path)), group); + exec::Split makeSplitWithGroup(std::string path, int32_t group) { + return exec::Split( + connector::test::makeConnectorSplit( + GetParam().connectorName, std::move(path)), + group); } exec::Split makeHiveSplit(std::string path) { - return exec::Split(makeHiveConnectorSplit(std::move(path))); + return exec::Split(connector::test::makeConnectorSplit( + GetParam().connectorName, std::move(path))); } static core::PlanNodePtr tableScanNode(const RowTypePtr& outputType) { @@ -101,7 +125,7 @@ class GroupedExecutionTest : public virtual HiveConnectorTestBase { }; // Here we test the grouped execution sanity checks. -TEST_F(GroupedExecutionTest, groupedExecutionErrors) { +TEST_P(GroupedExecutionTest, groupedExecutionErrors) { auto planNodeIdGenerator = std::make_shared(); core::PlanNodeId tableScanNodeId; core::PlanNodeId projectNodeId; @@ -215,10 +239,10 @@ TEST_F(GroupedExecutionTest, groupedExecutionErrors) { // Here we test various aspects of grouped/bucketed execution involving // output buffer and 3 pipelines. -TEST_F(GroupedExecutionTest, groupedExecutionWithOutputBuffer) { +TEST_P(GroupedExecutionTest, groupedExecutionWithOutputBuffer) { // Create source file - we will read from it in 6 splits. auto vectors = makeVectors(10, 1'000); - auto filePath = TempFilePath::create(); + auto filePath = ::bytedance::bolt::test::TempFilePath::create(); writeToFile(filePath->path, vectors); // A chain of three pipelines separated by local exchange with the leaf one @@ -254,7 +278,7 @@ TEST_F(GroupedExecutionTest, groupedExecutionWithOutputBuffer) { EXPECT_EQ(0, task->numRunningDrivers()); // Add one split for group (8). - task->addSplit("0", makeHiveSplitWithGroup(filePath->path, 8)); + task->addSplit("0", makeSplitWithGroup(filePath->path, 8)); // Only one split group should be in the processing mode, so 9 drivers (3 per // pipeline). @@ -262,11 +286,11 @@ TEST_F(GroupedExecutionTest, groupedExecutionWithOutputBuffer) { EXPECT_EQ(std::unordered_set{}, getCompletedSplitGroups(task)); // Add the rest of splits - task->addSplit("0", makeHiveSplitWithGroup(filePath->path, 1)); - task->addSplit("0", makeHiveSplitWithGroup(filePath->path, 5)); - task->addSplit("0", makeHiveSplitWithGroup(filePath->path, 8)); - task->addSplit("0", makeHiveSplitWithGroup(filePath->path, 5)); - task->addSplit("0", makeHiveSplitWithGroup(filePath->path, 8)); + task->addSplit("0", makeSplitWithGroup(filePath->path, 1)); + task->addSplit("0", makeSplitWithGroup(filePath->path, 5)); + task->addSplit("0", makeSplitWithGroup(filePath->path, 8)); + task->addSplit("0", makeSplitWithGroup(filePath->path, 5)); + task->addSplit("0", makeSplitWithGroup(filePath->path, 8)); // One split group should be in the processing mode, so 9 drivers. EXPECT_EQ(9, task->numRunningDrivers()); @@ -328,10 +352,10 @@ TEST_F(GroupedExecutionTest, groupedExecutionWithOutputBuffer) { // Here we test various aspects of grouped/bucketed execution involving // output buffer and 3 pipelines. -TEST_F(GroupedExecutionTest, groupedExecutionWithHashAndNestedLoopJoin) { +TEST_P(GroupedExecutionTest, groupedExecutionWithHashAndNestedLoopJoin) { // Create source file - we will read from it in 6 splits. auto vectors = makeVectors(4, 20); - auto filePath = TempFilePath::create(); + auto filePath = ::bytedance::bolt::test::TempFilePath::create(); writeToFile(filePath->path, vectors); // Run the test twice - for Hash and Cross Join. @@ -398,10 +422,13 @@ TEST_F(GroupedExecutionTest, groupedExecutionWithHashAndNestedLoopJoin) { EXPECT_EQ(3, task->numRunningDrivers()); // Add single split to the build scan. - task->addSplit(buildScanNodeId, makeHiveSplit(filePath->path)); + task->addSplit( + buildScanNodeId, + Split(connector::test::makeConnectorSplit( + GetParam().connectorName, filePath->path))); // Add one split for group (8). - task->addSplit(probeScanNodeId, makeHiveSplitWithGroup(filePath->path, 8)); + task->addSplit(probeScanNodeId, makeSplitWithGroup(filePath->path, 8)); // Only one split group should be in the processing mode, so 9 drivers (3 // per pipeline) grouped + 3 ungrouped. @@ -409,11 +436,11 @@ TEST_F(GroupedExecutionTest, groupedExecutionWithHashAndNestedLoopJoin) { EXPECT_EQ(std::unordered_set{}, getCompletedSplitGroups(task)); // Add the rest of splits - task->addSplit(probeScanNodeId, makeHiveSplitWithGroup(filePath->path, 1)); - task->addSplit(probeScanNodeId, makeHiveSplitWithGroup(filePath->path, 5)); - task->addSplit(probeScanNodeId, makeHiveSplitWithGroup(filePath->path, 8)); - task->addSplit(probeScanNodeId, makeHiveSplitWithGroup(filePath->path, 5)); - task->addSplit(probeScanNodeId, makeHiveSplitWithGroup(filePath->path, 8)); + task->addSplit(probeScanNodeId, makeSplitWithGroup(filePath->path, 1)); + task->addSplit(probeScanNodeId, makeSplitWithGroup(filePath->path, 5)); + task->addSplit(probeScanNodeId, makeSplitWithGroup(filePath->path, 8)); + task->addSplit(probeScanNodeId, makeSplitWithGroup(filePath->path, 5)); + task->addSplit(probeScanNodeId, makeSplitWithGroup(filePath->path, 8)); // One split group should be in the processing mode, so 9 drivers (3 per // pipeline) grouped + 3 ungrouped. @@ -494,11 +521,11 @@ TEST_F(GroupedExecutionTest, groupedExecutionWithHashAndNestedLoopJoin) { } // Here we test various aspects of grouped/bucketed execution. -TEST_F(GroupedExecutionTest, groupedExecution) { +TEST_P(GroupedExecutionTest, groupedExecution) { // Create source file - we will read from it in 6 splits. const size_t numSplits{6}; auto vectors = makeVectors(10, 1'000); - auto filePath = TempFilePath::create(); + auto filePath = ::bytedance::bolt::test::TempFilePath::create(); writeToFile(filePath->path, vectors); CursorParameters params; @@ -519,7 +546,7 @@ TEST_F(GroupedExecutionTest, groupedExecution) { auto task = cursor->task(); // Add one splits before start to ensure we can handle such cases. - task->addSplit("0", makeHiveSplitWithGroup(filePath->path, 8)); + task->addSplit("0", makeSplitWithGroup(filePath->path, 8)); // Start task now. cursor->start(); @@ -529,11 +556,11 @@ TEST_F(GroupedExecutionTest, groupedExecution) { EXPECT_EQ(std::unordered_set{}, getCompletedSplitGroups(task)); // Add the rest of splits - task->addSplit("0", makeHiveSplitWithGroup(filePath->path, 1)); - task->addSplit("0", makeHiveSplitWithGroup(filePath->path, 5)); - task->addSplit("0", makeHiveSplitWithGroup(filePath->path, 8)); - task->addSplit("0", makeHiveSplitWithGroup(filePath->path, 5)); - task->addSplit("0", makeHiveSplitWithGroup(filePath->path, 8)); + task->addSplit("0", makeSplitWithGroup(filePath->path, 1)); + task->addSplit("0", makeSplitWithGroup(filePath->path, 5)); + task->addSplit("0", makeSplitWithGroup(filePath->path, 8)); + task->addSplit("0", makeSplitWithGroup(filePath->path, 5)); + task->addSplit("0", makeSplitWithGroup(filePath->path, 8)); // Only two split groups should be in the processing mode, so 4 drivers. EXPECT_EQ(4, task->numRunningDrivers()); @@ -587,4 +614,10 @@ TEST_F(GroupedExecutionTest, groupedExecution) { EXPECT_EQ(numRead, numSplits * 10'000); } +INSTANTIATE_TEST_SUITE_P( + Connectors, + GroupedExecutionTest, + ::testing::ValuesIn(connector::test::paramsFor( + {std::string(connector::kHiveConnectorName)}))); + } // namespace bytedance::bolt::exec::test diff --git a/bolt/exec/tests/HashJoinBridgeTest.cpp b/bolt/exec/tests/HashJoinBridgeTest.cpp index 93a3a1405..a5aad2880 100644 --- a/bolt/exec/tests/HashJoinBridgeTest.cpp +++ b/bolt/exec/tests/HashJoinBridgeTest.cpp @@ -31,12 +31,12 @@ #include "bolt/exec/HashJoinBridge.h" #include "bolt/common/base/tests/GTestUtils.h" #include "bolt/common/file/FileSystems.h" +#include "bolt/common/testutil/TempDirectoryPath.h" #include "bolt/exec/HashTable.h" #include "bolt/exec/Spill.h" -#include "bolt/exec/tests/utils/TempDirectoryPath.h" using namespace bytedance::bolt; using namespace bytedance::bolt::exec; -using bytedance::bolt::exec::test::TempDirectoryPath; +using bytedance::bolt::test::TempDirectoryPath; struct TestParam { int32_t numProbers{1}; @@ -73,7 +73,7 @@ class HashJoinBridgeTest : public testing::Test, void SetUp() override { rng_.seed(1245); - tempDir_ = exec::test::TempDirectoryPath::create(); + tempDir_ = bytedance::bolt::test::TempDirectoryPath::create(); } void TearDown() override {} diff --git a/bolt/exec/tests/HashJoinTest.cpp b/bolt/exec/tests/HashJoinTest.cpp index 50b48cfa1..6c54d648a 100644 --- a/bolt/exec/tests/HashJoinTest.cpp +++ b/bolt/exec/tests/HashJoinTest.cpp @@ -28,29 +28,30 @@ * -------------------------------------------------------------------------- */ -#include +#include #include -#include #include "bolt/common/base/tests/GTestUtils.h" +#include "bolt/common/testutil/TempDirectoryPath.h" +#include "bolt/common/testutil/TempFilePath.h" #include "bolt/common/testutil/TestValue.h" +#include "bolt/core/PlanNode.h" #include "bolt/dwio/common/tests/utils/BatchMaker.h" #include "bolt/exec/HashBuild.h" #include "bolt/exec/HashJoinBridge.h" #include "bolt/exec/PlanNodeStats.h" -#include "bolt/exec/TableScan.h" #include "bolt/exec/tests/utils/ArbitratorTestUtil.h" #include "bolt/exec/tests/utils/AssertQueryBuilder.h" #include "bolt/exec/tests/utils/Cursor.h" #include "bolt/exec/tests/utils/HiveConnectorTestBase.h" #include "bolt/exec/tests/utils/PlanBuilder.h" -#include "bolt/exec/tests/utils/TempDirectoryPath.h" #include "bolt/exec/tests/utils/VectorTestUtil.h" #include "bolt/vector/fuzzer/VectorFuzzer.h" #include "folly/experimental/EventCount.h" using namespace bytedance::bolt; using namespace bytedance::bolt::exec; using namespace bytedance::bolt::exec::test; +using namespace bytedance::bolt::test; using namespace bytedance::bolt::common::testutil; using bytedance::bolt::connector::hive::HiveConnectorSplitBuilder; @@ -620,7 +621,7 @@ class HashJoinBuilder { } int32_t spillPct{0}; if (injectSpill) { - spillDirectory = exec::test::TempDirectoryPath::create(); + spillDirectory = bytedance::bolt::test::TempDirectoryPath::create(); builder.spillDirectory(spillDirectory->path); config(core::QueryConfig::kJitLevel, "-1"); config(core::QueryConfig::kSpillEnabled, "true"); @@ -634,7 +635,7 @@ class HashJoinBuilder { enableSkewedPartitionTest ? "true" : "false"); spillPct = 100; } else if (spillMemoryThreshold_ != 0) { - spillDirectory = exec::test::TempDirectoryPath::create(); + spillDirectory = bytedance::bolt::test::TempDirectoryPath::create(); builder.spillDirectory(spillDirectory->path); config(core::QueryConfig::kSpillEnabled, "true"); config(core::QueryConfig::kMaxSpillLevel, std::to_string(maxSpillLevel)); @@ -805,7 +806,9 @@ class HashJoinTest : public HiveConnectorTestBase { // Make splits with each plan node having a number of source files. SplitInput makeSpiltInput( const std::vector& nodeIds, - const std::vector>>& files) { + const std::vector< + std::vector>>& + files) { BOLT_CHECK_EQ(nodeIds.size(), files.size()); SplitInput splitInput; for (int i = 0; i < nodeIds.size(); ++i) { @@ -860,10 +863,12 @@ class HashJoinTest : public HiveConnectorTestBase { vectorSize, [](auto row) { return row * 3; })}); }); - std::shared_ptr probeFile = TempFilePath::create(); + std::shared_ptr<::bytedance::bolt::test::TempFilePath> probeFile = + ::bytedance::bolt::test::TempFilePath::create(); writeToFile(probeFile->getPath(), probeVectors); - std::shared_ptr buildFile = TempFilePath::create(); + std::shared_ptr<::bytedance::bolt::test::TempFilePath> buildFile = + ::bytedance::bolt::test::TempFilePath::create(); writeToFile(buildFile->getPath(), buildVectors); createDuckDbTable("t", probeVectors); @@ -1920,10 +1925,12 @@ TEST_P(MultiThreadedHashJoinTest, semiFilterOverLazyVectors) { }); }); - std::shared_ptr probeFile = TempFilePath::create(); + std::shared_ptr<::bytedance::bolt::test::TempFilePath> probeFile = + ::bytedance::bolt::test::TempFilePath::create(); writeToFile(probeFile->path, probeVectors); - std::shared_ptr buildFile = TempFilePath::create(); + std::shared_ptr<::bytedance::bolt::test::TempFilePath> buildFile = + ::bytedance::bolt::test::TempFilePath::create(); writeToFile(buildFile->path, buildVectors); createDuckDbTable("t", probeVectors); @@ -3582,10 +3589,12 @@ TEST_F(HashJoinTest, nullAwareRightSemiProjectOverScan) { makeNullableFlatVector({1, 2, 3, std::nullopt}), }); - std::shared_ptr probeFile = TempFilePath::create(); + std::shared_ptr<::bytedance::bolt::test::TempFilePath> probeFile = + ::bytedance::bolt::test::TempFilePath::create(); writeToFile(probeFile->path, {probe}); - std::shared_ptr buildFile = TempFilePath::create(); + std::shared_ptr<::bytedance::bolt::test::TempFilePath> buildFile = + ::bytedance::bolt::test::TempFilePath::create(); writeToFile(buildFile->path, {build}); createDuckDbTable("t", {probe}); @@ -4182,10 +4191,12 @@ TEST_F(HashJoinTest, semiProjectOverLazyVectors) { }); }); - std::shared_ptr probeFile = TempFilePath::create(); + std::shared_ptr<::bytedance::bolt::test::TempFilePath> probeFile = + ::bytedance::bolt::test::TempFilePath::create(); writeToFile(probeFile->path, probeVectors); - std::shared_ptr buildFile = TempFilePath::create(); + std::shared_ptr<::bytedance::bolt::test::TempFilePath> buildFile = + ::bytedance::bolt::test::TempFilePath::create(); writeToFile(buildFile->path, buildVectors); createDuckDbTable("t", probeVectors); @@ -4329,16 +4340,16 @@ TEST_F(HashJoinTest, lazyVectors) { 10'000, [](auto row) { return row % 31; })}); }); - std::vector> tempFiles; + std::vector> tempFiles; for (const auto& probeVector : probeVectors) { - tempFiles.push_back(TempFilePath::create()); + tempFiles.push_back(::bytedance::bolt::test::TempFilePath::create()); writeToFile(tempFiles.back()->path, probeVector); } createDuckDbTable("t", probeVectors); for (const auto& buildVector : buildVectors) { - tempFiles.push_back(TempFilePath::create()); + tempFiles.push_back(::bytedance::bolt::test::TempFilePath::create()); writeToFile(tempFiles.back()->path, buildVector); } createDuckDbTable("u", buildVectors); @@ -4577,7 +4588,7 @@ TEST_F(HashJoinTest, dynamicFilters) { std::vector probeVectors; probeVectors.reserve(numSplits); - std::vector> tempFiles; + std::vector> tempFiles; for (int32_t i = 0; i < numSplits; ++i) { auto rowVector = makeRowVector({ makeFlatVector( @@ -4585,7 +4596,7 @@ TEST_F(HashJoinTest, dynamicFilters) { makeFlatVector(numRowsProbe, [](auto row) { return row; }), }); probeVectors.push_back(rowVector); - tempFiles.push_back(TempFilePath::create()); + tempFiles.push_back(::bytedance::bolt::test::TempFilePath::create()); writeToFile(tempFiles.back()->path, rowVector); } auto makeInputSplits = [&](const core::PlanNodeId& nodeId) { @@ -5239,7 +5250,7 @@ TEST_F(HashJoinTest, dynamicFiltersStatsWithChainedJoins) { std::vector probeVectors; probeVectors.reserve(numSplits); - std::vector> tempFiles; + std::vector> tempFiles; for (int32_t i = 0; i < numSplits; ++i) { auto rowVector = makeRowVector({ makeFlatVector( @@ -5247,7 +5258,7 @@ TEST_F(HashJoinTest, dynamicFiltersStatsWithChainedJoins) { makeFlatVector(numProbeRows, [](auto row) { return row; }), }); probeVectors.push_back(rowVector); - tempFiles.push_back(TempFilePath::create()); + tempFiles.push_back(::bytedance::bolt::test::TempFilePath::create()); writeToFile(tempFiles.back()->getPath(), rowVector); } auto makeInputSplits = [&](const core::PlanNodeId& nodeId) { @@ -5338,7 +5349,7 @@ TEST_F(HashJoinTest, dynamicFiltersWithSkippedSplits) { std::vector probeVectors; probeVectors.reserve(numSplits); - std::vector> tempFiles; + std::vector> tempFiles; // Each split has a column containing // the split number. This is used to filter out whole splits based // on metadata. We test how using metadata for dropping splits @@ -5354,7 +5365,7 @@ TEST_F(HashJoinTest, dynamicFiltersWithSkippedSplits) { numRowsProbe, [&](auto /*row*/) { return i % 2 == 0 ? 0 : i; }), }); probeVectors.push_back(rowVector); - tempFiles.push_back(TempFilePath::create()); + tempFiles.push_back(::bytedance::bolt::test::TempFilePath::create()); writeToFile(tempFiles.back()->path, rowVector); } @@ -5559,7 +5570,7 @@ TEST_F(HashJoinTest, dynamicFiltersAppliedToPreloadedSplits) { probeVectors.reserve(numSplits); // Prepare probe side table. - std::vector> tempFiles; + std::vector> tempFiles; std::vector probeSplits; for (int32_t i = 0; i < numSplits; ++i) { auto rowVector = makeRowVector( @@ -5570,7 +5581,7 @@ TEST_F(HashJoinTest, dynamicFiltersAppliedToPreloadedSplits) { makeFlatVector(size, [&](auto /*row*/) { return i; }), }); probeVectors.push_back(rowVector); - tempFiles.push_back(TempFilePath::create()); + tempFiles.push_back(::bytedance::bolt::test::TempFilePath::create()); writeToFile(tempFiles.back()->path, rowVector); auto split = HiveConnectorSplitBuilder(tempFiles.back()->path) .connectorId(kHiveConnectorId) @@ -5646,7 +5657,8 @@ TEST_F(HashJoinTest, dynamicFiltersPushDownThroughAgg) { makeFlatVector(numRowsProbe, [&](auto row) { return row - 10; }), makeFlatVector(numRowsProbe, folly::identity), })}; - std::shared_ptr probeFile = TempFilePath::create(); + std::shared_ptr<::bytedance::bolt::test::TempFilePath> probeFile = + ::bytedance::bolt::test::TempFilePath::create(); writeToFile(probeFile->getPath(), probeVectors); // Create build data @@ -5723,7 +5735,7 @@ TEST_F(HashJoinTest, noDynamicFiltersPushDownThroughRightJoin) { makeFlatVector(10, folly::identity), makeFlatVector(10, folly::identity), })}; - auto file = TempFilePath::create(); + auto file = ::bytedance::bolt::test::TempFilePath::create(); writeToFile(file->getPath(), rightProbe); auto planNodeIdGenerator = std::make_shared(); core::PlanNodeId scanNodeId; @@ -6149,7 +6161,7 @@ DEBUG_ONLY_TEST_F(HashJoinTest, buildReservationReleaseCheck) { // NOTE: the spilling setup is to trigger memory reservation code path which // only gets executed when spilling is enabled. We don't care about if // spilling is really triggered in test or not. - auto spillDirectory = exec::test::TempDirectoryPath::create(); + auto spillDirectory = bytedance::bolt::test::TempDirectoryPath::create(); params.spillDirectory = spillDirectory->path; params.queryCtx->testingOverrideConfigUnsafe( {{core::QueryConfig::kSpillEnabled, "true"}, @@ -6258,7 +6270,7 @@ DEBUG_ONLY_TEST_F(HashJoinTest, reclaimDuringInputProcessing) { for (const auto& testData : testSettings) { SCOPED_TRACE(testData.debugString()); - auto tempDirectory = exec::test::TempDirectoryPath::create(); + auto tempDirectory = bytedance::bolt::test::TempDirectoryPath::create(); auto queryPool = memory::memoryManager()->addRootPool( "", kMaxBytes, memory::MemoryReclaimer::create()); @@ -6407,7 +6419,7 @@ DEBUG_ONLY_TEST_F(HashJoinTest, reclaimDuringReserve) { createDuckDbTable("t", probeVectors); createDuckDbTable("u", buildVectors); - auto tempDirectory = exec::test::TempDirectoryPath::create(); + auto tempDirectory = bytedance::bolt::test::TempDirectoryPath::create(); auto queryPool = memory::memoryManager()->addRootPool( "", kMaxBytes, memory::MemoryReclaimer::create()); @@ -6539,7 +6551,7 @@ DEBUG_ONLY_TEST_F(HashJoinTest, reclaimDuringAllocation) { for (bool enableSpilling : enableSpillings) { SCOPED_TRACE(fmt::format("enableSpilling {}", enableSpilling)); - auto tempDirectory = exec::test::TempDirectoryPath::create(); + auto tempDirectory = bytedance::bolt::test::TempDirectoryPath::create(); auto queryPool = memory::memoryManager()->addRootPool("", kMaxBytes); core::PlanNodeId probeScanId; @@ -6671,7 +6683,7 @@ DEBUG_ONLY_TEST_F(HashJoinTest, reclaimDuringOutputProcessing) { const std::vector enableSpillings = {false, true}; for (bool enableSpilling : enableSpillings) { SCOPED_TRACE(fmt::format("enableSpilling {}", enableSpilling)); - auto tempDirectory = exec::test::TempDirectoryPath::create(); + auto tempDirectory = bytedance::bolt::test::TempDirectoryPath::create(); auto queryPool = memory::memoryManager()->addRootPool( "", kMaxBytes, memory::MemoryReclaimer::create()); @@ -6798,7 +6810,7 @@ DEBUG_ONLY_TEST_F(HashJoinTest, reclaimDuringWaitForProbe) { createDuckDbTable("t", probeVectors); createDuckDbTable("u", buildVectors); - auto tempDirectory = exec::test::TempDirectoryPath::create(); + auto tempDirectory = bytedance::bolt::test::TempDirectoryPath::create(); auto queryPool = memory::memoryManager()->addRootPool( "", kMaxBytes, memory::MemoryReclaimer::create()); @@ -7388,7 +7400,7 @@ DEBUG_ONLY_TEST_F(HashJoinTest, minSpillableMemoryReservation) { currentUsedBytes * minSpillableReservationPct / 100); }))); - auto tempDirectory = exec::test::TempDirectoryPath::create(); + auto tempDirectory = bytedance::bolt::test::TempDirectoryPath::create(); HashJoinBuilder(*pool_, duckDbQueryRunner_, driverExecutor_.get()) .numDrivers(numDrivers_) .hybridJoin(hybridJoin_) @@ -7432,7 +7444,7 @@ TEST_F(HashJoinTest, exceededMaxSpillLevel) { concat(probeType_->names(), buildType_->names())) .planNode(); - auto tempDirectory = exec::test::TempDirectoryPath::create(); + auto tempDirectory = bytedance::bolt::test::TempDirectoryPath::create(); const int exceededMaxSpillLevelCount = common::globalSpillStats().spillMaxLevelExceededCount; HashJoinBuilder(*pool_, duckDbQueryRunner_, driverExecutor_.get()) @@ -7483,7 +7495,7 @@ TEST_F(HashJoinTest, maxSpillBytes) { core::JoinType::kInner) .planNode(); - auto spillDirectory = exec::test::TempDirectoryPath::create(); + auto spillDirectory = bytedance::bolt::test::TempDirectoryPath::create(); auto queryCtx = core::QueryCtx::create(executor_.get()); struct { @@ -7541,7 +7553,7 @@ TEST_F(HashJoinTest, onlyHashBuildMaxSpillBytes) { core::JoinType::kInner) .planNode(); - auto spillDirectory = exec::test::TempDirectoryPath::create(); + auto spillDirectory = bytedance::bolt::test::TempDirectoryPath::create(); auto queryCtx = core::QueryCtx::create(executor_.get()); struct { @@ -7586,7 +7598,8 @@ DEBUG_ONLY_TEST_F(HashJoinTest, reclaimFromJoinBuild) { std::vector sameQueries = {false, true}; for (bool sameQuery : sameQueries) { SCOPED_TRACE(fmt::format("sameQuery {}", sameQuery)); - const auto spillDirectory = exec::test::TempDirectoryPath::create(); + const auto spillDirectory = + bytedance::bolt::test::TempDirectoryPath::create(); std::shared_ptr fakeQueryCtx = newQueryCtx(memoryManager.get(), executor_.get(), kMemoryCapacity * 2); std::shared_ptr joinQueryCtx; @@ -7944,7 +7957,8 @@ DEBUG_ONLY_TEST_F(HashJoinTest, arbitrationTriggeredByEnsureJoinTableFit) { fuzzerOpts_.vectorSize = 128; auto probeVectors = createVectors(10, probeType_, fuzzerOpts_); auto buildVectors = createVectors(20, buildType_, fuzzerOpts_); - const auto spillDirectory = exec::test::TempDirectoryPath::create(); + const auto spillDirectory = + bytedance::bolt::test::TempDirectoryPath::create(); HashJoinBuilder(*pool_, duckDbQueryRunner_, driverExecutor_.get()) .numDrivers(1) .spillDirectory(spillDirectory->path) @@ -7997,7 +8011,8 @@ DEBUG_ONLY_TEST_F(HashJoinTest, joinBuildSpillError) { })); auto planNodeIdGenerator = std::make_shared(); - const auto spillDirectory = exec::test::TempDirectoryPath::create(); + const auto spillDirectory = + bytedance::bolt::test::TempDirectoryPath::create(); auto plan = PlanBuilder(planNodeIdGenerator) .values(vectors) .project({"c0 AS t0", "c1 AS t1", "c2 AS t2"}) @@ -8157,7 +8172,7 @@ DEBUG_ONLY_TEST_F(HashJoinTest, skewPartitionSpill) { for (const auto& testData : testSettings) { SCOPED_TRACE(testData.debugString()); - auto tempDirectory = exec::test::TempDirectoryPath::create(); + auto tempDirectory = bytedance::bolt::test::TempDirectoryPath::create(); auto queryPool = memory::memoryManager()->addRootPool( "", kMaxBytes, memory::MemoryReclaimer::create()); diff --git a/bolt/exec/tests/JoinFuzzer.cpp b/bolt/exec/tests/JoinFuzzer.cpp index c70cca8f7..eda202f90 100644 --- a/bolt/exec/tests/JoinFuzzer.cpp +++ b/bolt/exec/tests/JoinFuzzer.cpp @@ -32,13 +32,13 @@ #include #include #include "bolt/common/file/FileSystems.h" +#include "bolt/common/testutil/TempDirectoryPath.h" #include "bolt/connectors/hive/HiveConnector.h" #include "bolt/connectors/hive/HiveConnectorSplit.h" #include "bolt/dwio/dwrf/reader/DwrfReader.h" #include "bolt/dwio/dwrf/writer/Writer.h" #include "bolt/exec/tests/utils/AssertQueryBuilder.h" #include "bolt/exec/tests/utils/PlanBuilder.h" -#include "bolt/exec/tests/utils/TempDirectoryPath.h" #include "bolt/vector/fuzzer/VectorFuzzer.h" DEFINE_int32(steps, 10, "Number of plans to generate and test."); @@ -362,10 +362,10 @@ RowVectorPtr JoinFuzzer::execute(const PlanWithSplits& plan, bool injectSpill) { builder.splits(nodeId, nodeSplits); } - std::shared_ptr spillDirectory; + std::shared_ptr spillDirectory; int32_t spillPct{0}; if (injectSpill) { - spillDirectory = exec::test::TempDirectoryPath::create(); + spillDirectory = bytedance::bolt::test::TempDirectoryPath::create(); builder.config(core::QueryConfig::kSpillEnabled, "true") .config(core::QueryConfig::kAggregationSpillEnabled, "true") .spillDirectory(spillDirectory->path); @@ -873,7 +873,7 @@ void JoinFuzzer::verify(core::JoinType joinType) { makeAlternativePlans(plan.plan, probeInput, buildInput, altPlans); makeAlternativePlans(plan.plan, flatProbeInput, flatBuildInput, altPlans); - auto directory = exec::test::TempDirectoryPath::create(); + auto directory = bytedance::bolt::test::TempDirectoryPath::create(); if (isTableScanSupported(probeInput[0]->type()) && isTableScanSupported(buildInput[0]->type())) { diff --git a/bolt/exec/tests/LimitTest.cpp b/bolt/exec/tests/LimitTest.cpp index 5afb8608f..f73cbc2b6 100644 --- a/bolt/exec/tests/LimitTest.cpp +++ b/bolt/exec/tests/LimitTest.cpp @@ -28,15 +28,40 @@ * -------------------------------------------------------------------------- */ +#include "bolt/common/testutil/TempFilePath.h" +#include "bolt/connectors/ConnectorNames.h" +#include "bolt/connectors/tests/utils/ConnectorTestBase.h" #include "bolt/exec/OutputBufferManager.h" -#include "bolt/exec/tests/utils/HiveConnectorTestBase.h" +#include "bolt/exec/tests/utils/OperatorTestBase.h" #include "bolt/exec/tests/utils/PlanBuilder.h" using namespace bytedance::bolt; using namespace bytedance::bolt::exec::test; +using namespace bytedance::bolt::test; + +class LimitTest : public OperatorTestBase, + public ::testing::WithParamInterface< + connector::test::ConnectorTestParam> { + protected: + void SetUp() override { + OperatorTestBase::SetUp(); + auto emptyConfig = std::make_shared( + std::unordered_map()); + connector::test::registerTestConnector( + GetParam().connectorName, + GetParam().connectorId, + ioExecutor_.get(), + emptyConfig, + GetParam().factoryRegistrar); + } -class LimitTest : public HiveConnectorTestBase {}; + void TearDown() override { + connector::test::unregisterTestConnector( + GetParam().connectorName, GetParam().connectorId); + OperatorTestBase::TearDown(); + } +}; -TEST_F(LimitTest, basic) { +TEST_P(LimitTest, basic) { vector_size_t batchSize = 1'000; std::vector vectors; for (int32_t i = 0; i < 3; ++i) { @@ -78,11 +103,11 @@ TEST_F(LimitTest, basic) { assertQueryReturnsEmptyResult(makePlan(12'345, 10)); } -TEST_F(LimitTest, limitOverLocalExchange) { +TEST_P(LimitTest, limitOverLocalExchange) { auto data = makeRowVector( {makeFlatVector(1'000, [](auto row) { return row; })}); - auto file = TempFilePath::create(); + auto file = ::bytedance::bolt::test::TempFilePath::create(); writeToFile(file->path, {data}); core::PlanNodeId scanNodeId; @@ -97,7 +122,9 @@ TEST_F(LimitTest, limitOverLocalExchange) { auto cursor = TaskCursor::create(params); cursor->task()->addSplit( - scanNodeId, exec::Split(makeHiveConnectorSplit(file->path))); + scanNodeId, + exec::Split(connector::test::makeConnectorSplit( + GetParam().connectorName, file->path))); int32_t numRead = 0; while (cursor->moveNext()) { @@ -112,7 +139,7 @@ TEST_F(LimitTest, limitOverLocalExchange) { ASSERT_TRUE(waitForTaskCompletion(cursor->task().get())); } -TEST_F(LimitTest, partialLimitEagerFlush) { +TEST_P(LimitTest, partialLimitEagerFlush) { std::vector batches( 10, makeRowVector({makeFlatVector(std::vector(1, 0))})); auto test = [&](bool projectInBetween) { @@ -144,3 +171,9 @@ TEST_F(LimitTest, partialLimitEagerFlush) { test(true); test(false); } + +INSTANTIATE_TEST_SUITE_P( + Connectors, + LimitTest, + ::testing::ValuesIn(connector::test::paramsFor( + {std::string(connector::kHiveConnectorName)}))); diff --git a/bolt/exec/tests/LocalPartitionTest.cpp b/bolt/exec/tests/LocalPartitionTest.cpp index 9e8885597..11ba0bc99 100644 --- a/bolt/exec/tests/LocalPartitionTest.cpp +++ b/bolt/exec/tests/LocalPartitionTest.cpp @@ -28,18 +28,38 @@ * -------------------------------------------------------------------------- */ +#include "bolt/common/testutil/TempFilePath.h" +#include "bolt/connectors/ConnectorNames.h" +#include "bolt/connectors/tests/utils/ConnectorTestBase.h" #include "bolt/exec/PlanNodeStats.h" #include "bolt/exec/tests/utils/AssertQueryBuilder.h" -#include "bolt/exec/tests/utils/HiveConnectorTestBase.h" +#include "bolt/exec/tests/utils/OperatorTestBase.h" #include "bolt/exec/tests/utils/PlanBuilder.h" using namespace bytedance::bolt; using namespace bytedance::bolt::exec; using namespace bytedance::bolt::exec::test; +using namespace bytedance::bolt::test; -class LocalPartitionTest : public HiveConnectorTestBase { +class LocalPartitionTest : public OperatorTestBase, + public ::testing::WithParamInterface< + connector::test::ConnectorTestParam> { protected: void SetUp() override { - HiveConnectorTestBase::SetUp(); + OperatorTestBase::SetUp(); + auto emptyConfig = std::make_shared( + std::unordered_map()); + connector::test::registerTestConnector( + GetParam().connectorName, + GetParam().connectorId, + ioExecutor_.get(), + emptyConfig, + GetParam().factoryRegistrar); + } + + void TearDown() override { + connector::test::unregisterTestConnector( + GetParam().connectorName, GetParam().connectorId); + OperatorTestBase::TearDown(); } template @@ -53,8 +73,8 @@ class LocalPartitionTest : public HiveConnectorTestBase { size, [start, max](auto row) { return (start + row) % max; }); } - std::vector> writeToFiles( - const std::vector& vectors) { + std::vector> + writeToFiles(const std::vector& vectors) { auto filePaths = makeFilePaths(vectors.size()); for (auto i = 0; i < vectors.size(); i++) { writeToFile(filePaths[i]->path, vectors[i]); @@ -101,7 +121,7 @@ class LocalPartitionTest : public HiveConnectorTestBase { } }; -TEST_F(LocalPartitionTest, gather) { +TEST_P(LocalPartitionTest, gather) { std::vector vectors = { makeRowVector({makeFlatSequence(0, 100)}), makeRowVector({makeFlatSequence(53, 100)}), @@ -154,14 +174,16 @@ TEST_F(LocalPartitionTest, gather) { AssertQueryBuilder queryBuilder(op, duckDbQueryRunner_); for (auto i = 0; i < filePaths.size(); ++i) { queryBuilder.split( - scanNodeIds[i], makeHiveConnectorSplit(filePaths[i]->path)); + scanNodeIds[i], + connector::test::makeConnectorSplit( + GetParam().connectorName, filePaths[i]->path)); } task = queryBuilder.assertResults("SELECT 300, -71, 152"); verifyExchangeSourceOperatorStats(task, 300, 3); } -TEST_F(LocalPartitionTest, partition) { +TEST_P(LocalPartitionTest, partition) { std::vector vectors = { makeRowVector({makeFlatSequence(0, 100)}), makeRowVector({makeFlatSequence(53, 100)}), @@ -200,7 +222,9 @@ TEST_F(LocalPartitionTest, partition) { queryBuilder.maxDrivers(2); for (auto i = 0; i < filePaths.size(); ++i) { queryBuilder.split( - scanNodeIds[i], makeHiveConnectorSplit(filePaths[i]->path)); + scanNodeIds[i], + connector::test::makeConnectorSplit( + GetParam().connectorName, filePaths[i]->path)); } auto task = @@ -208,7 +232,7 @@ TEST_F(LocalPartitionTest, partition) { verifyExchangeSourceOperatorStats(task, 300, 6); } -TEST_F(LocalPartitionTest, maxBufferSizeGather) { +TEST_P(LocalPartitionTest, maxBufferSizeGather) { std::vector vectors; for (auto i = 0; i < 21; i++) { vectors.emplace_back(makeRowVector({makeFlatVector( @@ -242,7 +266,7 @@ TEST_F(LocalPartitionTest, maxBufferSizeGather) { verifyExchangeSourceOperatorStats(task, 2100, 21); } -TEST_F(LocalPartitionTest, maxBufferSizePartition) { +TEST_P(LocalPartitionTest, maxBufferSizePartition) { std::vector vectors; for (auto i = 0; i < 21; i++) { vectors.emplace_back(makeRowVector({makeFlatVector( @@ -281,7 +305,9 @@ TEST_F(LocalPartitionTest, maxBufferSizePartition) { queryBuilder.maxDrivers(2); for (auto i = 0; i < filePaths.size(); ++i) { queryBuilder.split( - scanNodeIds[i % 3], makeHiveConnectorSplit(filePaths[i]->path)); + scanNodeIds[i % 3], + connector::test::makeConnectorSplit( + GetParam().connectorName, filePaths[i]->path)); } queryBuilder.config( core::QueryConfig::kMaxLocalExchangeBufferSize, bufferSize); @@ -299,7 +325,7 @@ TEST_F(LocalPartitionTest, maxBufferSizePartition) { verifyExchangeSourceOperatorStats(task, 2100, 42); } -TEST_F(LocalPartitionTest, indicesBufferCapacity) { +TEST_P(LocalPartitionTest, indicesBufferCapacity) { std::vector vectors; for (auto i = 0; i < 21; i++) { vectors.emplace_back(makeRowVector({makeFlatVector( @@ -330,7 +356,9 @@ TEST_F(LocalPartitionTest, indicesBufferCapacity) { for (auto i = 0; i < filePaths.size(); ++i) { auto id = scanNodeIds[i % 3]; cursor->task()->addSplit( - id, Split(makeHiveConnectorSplit(filePaths[i]->path))); + id, + Split(connector::test::makeConnectorSplit( + GetParam().connectorName, filePaths[i]->path))); cursor->task()->noMoreSplits(id); } int numRows = 0; @@ -348,7 +376,7 @@ TEST_F(LocalPartitionTest, indicesBufferCapacity) { ASSERT_LE(capacity, 1.5 * numRows * sizeof(vector_size_t)); } -TEST_F(LocalPartitionTest, blockingOnLocalExchangeQueue) { +TEST_P(LocalPartitionTest, blockingOnLocalExchangeQueue) { auto localExchangeBufferSize = "1024"; auto baseVector = vectorMaker_.flatVector( 10240, [](auto row) { return row / 10; }); @@ -420,7 +448,7 @@ TEST_F(LocalPartitionTest, blockingOnLocalExchangeQueue) { } } -TEST_F(LocalPartitionTest, multipleExchanges) { +TEST_P(LocalPartitionTest, multipleExchanges) { std::vector vectors = { makeRowVector({ makeFlatSequence(0, 100), @@ -474,7 +502,9 @@ TEST_F(LocalPartitionTest, multipleExchanges) { AssertQueryBuilder queryBuilder(op, duckDbQueryRunner_); for (auto i = 0; i < filePaths.size(); ++i) { queryBuilder.split( - scanNodeIds[i], makeHiveConnectorSplit(filePaths[i]->path)); + scanNodeIds[i], + connector::test::makeConnectorSplit( + GetParam().connectorName, filePaths[i]->path)); } queryBuilder.maxDrivers(2).assertResults( @@ -483,7 +513,7 @@ TEST_F(LocalPartitionTest, multipleExchanges) { ") t GROUP BY 1"); } -TEST_F(LocalPartitionTest, earlyCompletion) { +TEST_P(LocalPartitionTest, earlyCompletion) { std::vector data = { makeRowVector({makeFlatSequence(3, 100)}), makeRowVector({makeFlatSequence(7, 100)}), @@ -508,7 +538,7 @@ TEST_F(LocalPartitionTest, earlyCompletion) { assertTaskReferenceCount(task, 1); } -TEST_F(LocalPartitionTest, earlyCancelation) { +TEST_P(LocalPartitionTest, earlyCancelation) { std::vector data = { makeRowVector({makeFlatSequence(3, 100)}), makeRowVector({makeFlatSequence(7, 100)}), @@ -563,7 +593,7 @@ TEST_F(LocalPartitionTest, earlyCancelation) { assertTaskReferenceCount(task, 1); } -TEST_F(LocalPartitionTest, producerError) { +TEST_P(LocalPartitionTest, producerError) { std::vector data = { makeRowVector({makeFlatSequence(3, 100)}), makeRowVector({makeFlatSequence(7, 100)}), @@ -600,7 +630,7 @@ TEST_F(LocalPartitionTest, producerError) { assertTaskReferenceCount(task, 1); } -TEST_F(LocalPartitionTest, unionAll) { +TEST_P(LocalPartitionTest, unionAll) { auto data1 = makeRowVector( {"d0", "d1"}, {makeFlatVector({10, 11}), @@ -631,7 +661,7 @@ TEST_F(LocalPartitionTest, unionAll) { "SELECT * FROM t1 UNION ALL SELECT * FROM t2"); } -TEST_F(LocalPartitionTest, unionAllLocalExchange) { +TEST_P(LocalPartitionTest, unionAllLocalExchange) { auto data1 = makeRowVector({"d0"}, {makeFlatVector({"x"})}); auto data2 = makeRowVector({"e0"}, {makeFlatVector({"y"})}); @@ -656,3 +686,9 @@ TEST_F(LocalPartitionTest, unionAllLocalExchange) { " SELECT * FROM (VALUES ('y')) as t2(c0)" ")"); } + +INSTANTIATE_TEST_SUITE_P( + Connectors, + LocalPartitionTest, + ::testing::ValuesIn(connector::test::paramsFor( + {std::string(connector::kHiveConnectorName)}))); diff --git a/bolt/exec/tests/LocalShuffleTest.cpp b/bolt/exec/tests/LocalShuffleTest.cpp index 50a0fd215..1ec345b0d 100644 --- a/bolt/exec/tests/LocalShuffleTest.cpp +++ b/bolt/exec/tests/LocalShuffleTest.cpp @@ -21,6 +21,7 @@ #include #include "bolt/common/base/tests/GTestUtils.h" #include "bolt/common/file/FileSystems.h" +#include "bolt/common/testutil/TempDirectoryPath.h" #include "bolt/common/testutil/TestValue.h" #include "bolt/core/QueryConfig.h" #include "bolt/exec/PlanNodeStats.h" @@ -29,7 +30,6 @@ #include "bolt/exec/tests/utils/OperatorTestBase.h" #include "bolt/exec/tests/utils/PlanBuilder.h" #include "bolt/exec/tests/utils/QueryAssertions.h" -#include "bolt/exec/tests/utils/TempDirectoryPath.h" #include "bolt/vector/VectorPrinter.h" #include "bolt/vector/fuzzer/VectorFuzzer.h" #include "folly/experimental/EventCount.h" @@ -45,6 +45,7 @@ using namespace bytedance::bolt::exec; using namespace bytedance::bolt::common::testutil; using namespace bytedance::bolt::core; using namespace bytedance::bolt::exec::test; +using namespace bytedance::bolt::test; namespace bytedance::bolt::exec::test { class LocalShuffleTest : public OperatorTestBase { protected: diff --git a/bolt/exec/tests/MergeJoinTest.cpp b/bolt/exec/tests/MergeJoinTest.cpp index 1f370962c..bccc217a2 100644 --- a/bolt/exec/tests/MergeJoinTest.cpp +++ b/bolt/exec/tests/MergeJoinTest.cpp @@ -30,9 +30,12 @@ #include #include "bolt/common/base/tests/GTestUtils.h" +#include "bolt/common/testutil/TempFilePath.h" #include "bolt/common/testutil/TestValue.h" +#include "bolt/connectors/ConnectorNames.h" +#include "bolt/connectors/tests/utils/ConnectorTestBase.h" #include "bolt/exec/tests/utils/AssertQueryBuilder.h" -#include "bolt/exec/tests/utils/HiveConnectorTestBase.h" +#include "bolt/exec/tests/utils/OperatorTestBase.h" #include "bolt/exec/tests/utils/PlanBuilder.h" #include "bolt/vector/BaseVector.h" @@ -41,6 +44,7 @@ using namespace bytedance::bolt; using namespace bytedance::bolt::common::testutil; using namespace bytedance::bolt::exec; using namespace bytedance::bolt::exec::test; +using namespace bytedance::bolt::test; namespace { @@ -79,10 +83,31 @@ makeDictionaryJoinRegressionData() { } // namespace -class MergeJoinTest : public HiveConnectorTestBase { +class MergeJoinTest : public OperatorTestBase, + public ::testing::WithParamInterface< + connector::test::ConnectorTestParam> { protected: using OperatorTestBase::assertQuery; + void SetUp() override { + OperatorTestBase::SetUp(); + filesystems::registerLocalFileSystem(); + auto emptyConfig = std::make_shared( + std::unordered_map()); + connector::test::registerTestConnector( + GetParam().connectorName, + GetParam().connectorId, + ioExecutor_.get(), + emptyConfig, + GetParam().factoryRegistrar); + } + + void TearDown() override { + connector::test::unregisterTestConnector( + GetParam().connectorName, GetParam().connectorId); + OperatorTestBase::TearDown(); + } + CursorParameters makeCursorParameters( const std::shared_ptr& planNode, uint32_t preferredOutputBatchSize) { @@ -531,27 +556,27 @@ class MergeJoinTest : public HiveConnectorTestBase { } }; -TEST_F(MergeJoinTest, oneToOneAllMatch) { +TEST_P(MergeJoinTest, oneToOneAllMatch) { testJoin([](auto row) { return row; }, [](auto row) { return row; }); } -TEST_F(MergeJoinTest, someDontMatch) { +TEST_P(MergeJoinTest, someDontMatch) { testJoin( [](auto row) { return row % 5 == 0 ? row - 1 : row; }, [](auto row) { return row % 7 == 0 ? row - 1 : row; }); } -TEST_F(MergeJoinTest, fewMatch) { +TEST_P(MergeJoinTest, fewMatch) { testJoin( [](auto row) { return row * 5; }, [](auto row) { return row * 7; }); } -TEST_F(MergeJoinTest, duplicateMatch) { +TEST_P(MergeJoinTest, duplicateMatch) { testJoin( [](auto row) { return row / 2; }, [](auto row) { return row / 3; }); } -TEST_F(MergeJoinTest, allRowsMatch) { +TEST_P(MergeJoinTest, allRowsMatch) { std::vector leftKeys = { makeFlatVector(2, [](auto /* row */) { return 5; }), makeFlatVector(3, [](auto /* row */) { return 5; }), @@ -564,13 +589,13 @@ TEST_F(MergeJoinTest, allRowsMatch) { testJoin(rightKeys, leftKeys); } -TEST_F(MergeJoinTest, keySkew) { +TEST_P(MergeJoinTest, keySkew) { testJoin( [](auto row) { return row; }, [](auto row) { return row < 10 ? row : row + 10240; }); } -TEST_F(MergeJoinTest, dictionaryEncodedRightProjectionRegression) { +TEST_P(MergeJoinTest, dictionaryEncodedRightProjectionRegression) { const auto [leftRows, rightRows] = makeDictionaryJoinRegressionData(); auto leftBatch = makeDictionaryJoinLeftBatch(leftRows); @@ -607,7 +632,7 @@ TEST_F(MergeJoinTest, dictionaryEncodedRightProjectionRegression) { "u._hoodie_record_key"); } -TEST_F(MergeJoinTest, aggregationOverJoin) { +TEST_P(MergeJoinTest, aggregationOverJoin) { auto left = makeRowVector({"t_c0"}, {makeFlatVector({1, 2, 3, 4, 5})}); auto right = makeRowVector({"u_c0"}, {makeFlatVector({2, 4, 6})}); @@ -631,7 +656,7 @@ TEST_F(MergeJoinTest, aggregationOverJoin) { ASSERT_EQ(2, result.value()); } -TEST_F(MergeJoinTest, nonFirstJoinKeys) { +TEST_P(MergeJoinTest, nonFirstJoinKeys) { auto left = makeRowVector( {"t_data", "t_key"}, { @@ -661,7 +686,7 @@ TEST_F(MergeJoinTest, nonFirstJoinKeys) { assertQuery(plan, "VALUES (2, 40, 23), (4, 20, 22)"); } -TEST_F(MergeJoinTest, innerJoinFilter) { +TEST_P(MergeJoinTest, innerJoinFilter) { vector_size_t size = 1'000; // Join keys on the left side: 0, 10, 20,.. // Payload on the left side: 0, 1, 2, 3,.. @@ -733,7 +758,7 @@ TEST_F(MergeJoinTest, innerJoinFilter) { "SELECT t_c0, u_c0, u_c1 FROM t, u WHERE t_c0 = u_c0 AND (t_c1 + u_c1) % 2 = 0"); } -TEST_F(MergeJoinTest, leftAndRightJoinFilter) { +TEST_P(MergeJoinTest, leftAndRightJoinFilter) { // Each row on the left side has at most one match on the right side. auto left = makeRowVector( {"t_c0", "t_c1"}, @@ -829,7 +854,7 @@ TEST_F(MergeJoinTest, leftAndRightJoinFilter) { } } -TEST_F(MergeJoinTest, rightJoinWithDuplicateMatch) { +TEST_P(MergeJoinTest, rightJoinWithDuplicateMatch) { // Each row on the left side has at most one match on the right side. auto left = makeRowVector( {"a", "b"}, @@ -868,7 +893,7 @@ TEST_F(MergeJoinTest, rightJoinWithDuplicateMatch) { .assertResults("SELECT * from t RIGHT JOIN u ON a = c AND b < d"); } -TEST_F(MergeJoinTest, fullJoinNullLess) { +TEST_P(MergeJoinTest, fullJoinNullLess) { // Each row on the left side has at most one match on the right side. auto right = makeRowVector( {"c", "d"}, @@ -912,7 +937,7 @@ TEST_F(MergeJoinTest, fullJoinNullLess) { .assertResults("SELECT * from t full JOIN u ON t.a=u.c and t.b=u.d"); } -TEST_F(MergeJoinTest, rightJoinFilterWithNull) { +TEST_P(MergeJoinTest, rightJoinFilterWithNull) { auto left = makeRowVector( {"a", "b"}, { @@ -951,7 +976,7 @@ TEST_F(MergeJoinTest, rightJoinFilterWithNull) { // Verify that both left-side and right-side pipelines feeding the merge join // always run single-threaded. -TEST_F(MergeJoinTest, numDrivers) { +TEST_P(MergeJoinTest, numDrivers) { auto left = makeRowVector({"t_c0"}, {makeFlatVector({1, 2, 3})}); auto right = makeRowVector({"u_c0"}, {makeFlatVector({0, 2, 5})}); @@ -977,7 +1002,7 @@ TEST_F(MergeJoinTest, numDrivers) { EXPECT_EQ(2, task->numFinishedDrivers()); } -TEST_F(MergeJoinTest, lazyVectors) { +TEST_P(MergeJoinTest, lazyVectors) { // A dataset of multiple row groups with multiple columns. We create // different dictionary wrappings for different columns and load the // rows in scope at different times. We make 11000 repeats of 300 @@ -1003,11 +1028,11 @@ TEST_F(MergeJoinTest, lazyVectors) { makeFlatVector(10'000, [](auto row) { return row % 31; }), }); - auto leftFile = TempFilePath::create(); + auto leftFile = ::bytedance::bolt::test::TempFilePath::create(); writeToFile(leftFile->path, leftVectors); createDuckDbTable("t", {leftVectors}); - auto rightFile = TempFilePath::create(); + auto rightFile = ::bytedance::bolt::test::TempFilePath::create(); writeToFile(rightFile->path, rightVectors); createDuckDbTable("u", {rightVectors}); @@ -1039,8 +1064,14 @@ TEST_F(MergeJoinTest, lazyVectors) { .planNode(); AssertQueryBuilder(op, duckDbQueryRunner_) - .split(rightScanId, makeHiveConnectorSplit(rightFile->path)) - .split(leftScanId, makeHiveConnectorSplit(leftFile->path)) + .split( + rightScanId, + connector::test::makeConnectorSplit( + GetParam().connectorName, rightFile->path)) + .split( + leftScanId, + connector::test::makeConnectorSplit( + GetParam().connectorName, leftFile->path)) .assertResults(fmt::format( "SELECT c0, rc0, c1, rc1, c2, c3 FROM t {} JOIN u " "ON t.c0 = u.rc0 AND c1 + rc1 < 30", @@ -1049,7 +1080,7 @@ TEST_F(MergeJoinTest, lazyVectors) { } // Ensures the output of merge joins are dictionaries. -TEST_F(MergeJoinTest, dictionaryOutput) { +TEST_P(MergeJoinTest, dictionaryOutput) { auto left = makeRowVector({"t_c0"}, {makeFlatVector({1, 2, 3, 4, 5})}); auto right = makeRowVector({"u_c0"}, {makeFlatVector({2, 4, 6})}); @@ -1096,7 +1127,7 @@ TEST_F(MergeJoinTest, dictionaryOutput) { output.reset(); } -TEST_F(MergeJoinTest, semiJoin) { +TEST_P(MergeJoinTest, semiJoin) { auto left = makeRowVector( {"t0"}, {makeNullableFlatVector({1, 2, 2, 6, std::nullopt})}); @@ -1139,7 +1170,7 @@ TEST_F(MergeJoinTest, semiJoin) { core::JoinType::kRightSemiFilter); } -TEST_F(MergeJoinTest, semiJoinWithMultiMatchedRowsInDifferentBatches) { +TEST_P(MergeJoinTest, semiJoinWithMultiMatchedRowsInDifferentBatches) { auto left = makeRowVector({"t0"}, {makeNullableFlatVector({2, 2, 2, 2, 2})}); @@ -1197,7 +1228,7 @@ TEST_F(MergeJoinTest, semiJoinWithMultiMatchedRowsInDifferentBatches) { core::JoinType::kRightSemiFilter); } -TEST_F(MergeJoinTest, leftJoinWithFilter) { +TEST_P(MergeJoinTest, leftJoinWithFilter) { auto left = makeRowVector({"t0"}, {makeNullableFlatVector({1, 1})}); auto right = makeRowVector({"u0"}, {makeNullableFlatVector({1, 1})}); @@ -1227,7 +1258,7 @@ TEST_F(MergeJoinTest, leftJoinWithFilter) { "SELECT t0 FROM t WHERE NOT exists (select 1 from u where t0 = u0 AND t.t0 > 2 ) "); } -TEST_F( +TEST_P( MergeJoinTest, antiJoinWithFilterWithMultiMatchedRowsInDifferentBatches) { auto left = @@ -1261,7 +1292,7 @@ TEST_F( "SELECT t0 FROM t WHERE NOT exists (select 1 from u where t0 = u0 AND t.t0 > 2 ) "); } -TEST_F(MergeJoinTest, antiJoinWithFilterWithMultiMatchedRows) { +TEST_P(MergeJoinTest, antiJoinWithFilterWithMultiMatchedRows) { auto left = makeRowVector({"t0"}, {makeNullableFlatVector({1, 2})}); auto right = @@ -1289,7 +1320,7 @@ TEST_F(MergeJoinTest, antiJoinWithFilterWithMultiMatchedRows) { "SELECT t0 FROM t WHERE NOT exists (select 1 from u where t0 = u0 AND t.t0 > 2 ) "); } -TEST_F(MergeJoinTest, antiJoinWithTwoJoinKeysInDifferentBatch) { +TEST_P(MergeJoinTest, antiJoinWithTwoJoinKeysInDifferentBatch) { auto left = makeRowVector( {"a", "b"}, {makeNullableFlatVector({1, 1, 1, 1}), @@ -1323,7 +1354,7 @@ TEST_F(MergeJoinTest, antiJoinWithTwoJoinKeysInDifferentBatch) { "SELECT * FROM t WHERE NOT exists (select * from u where t.a = u.c and t.b < u.d)"); } -TEST_F(MergeJoinTest, rightJoin) { +TEST_P(MergeJoinTest, rightJoin) { auto left = makeRowVector( {"t0"}, {makeNullableFlatVector( @@ -1369,7 +1400,7 @@ TEST_F(MergeJoinTest, rightJoin) { AssertQueryBuilder(rightPlan).assertResults(expectedResult); } -TEST_F(MergeJoinTest, nullKeys) { +TEST_P(MergeJoinTest, nullKeys) { auto left = makeRowVector( {"t0"}, {makeNullableFlatVector({1, 2, 5, std::nullopt})}); @@ -1425,7 +1456,7 @@ TEST_F(MergeJoinTest, nullKeys) { .assertResults("SELECT * FROM t FULL JOIN u ON t.t0 = u.u0"); } -TEST_F(MergeJoinTest, antiJoinWithFilter) { +TEST_P(MergeJoinTest, antiJoinWithFilter) { auto left = makeRowVector( {"t0"}, {makeNullableFlatVector( @@ -1458,7 +1489,7 @@ TEST_F(MergeJoinTest, antiJoinWithFilter) { "SELECT t0 FROM t WHERE NOT exists (select 1 from u where t0 = u0 AND t.t0 > 2 ) "); } -TEST_F(MergeJoinTest, antiJoinFailed) { +TEST_P(MergeJoinTest, antiJoinFailed) { auto size = 1'00; auto left = makeRowVector( {"t0"}, {makeFlatVector(size, [](auto row) { return row; })}); @@ -1490,7 +1521,7 @@ TEST_F(MergeJoinTest, antiJoinFailed) { "SELECT t0 FROM t WHERE NOT exists (select 1 from u where t0 = u0) "); } -TEST_F(MergeJoinTest, antiJoinWithTwoJoinKeys) { +TEST_P(MergeJoinTest, antiJoinWithTwoJoinKeys) { auto left = makeRowVector( {"a", "b"}, {makeNullableFlatVector( @@ -1527,7 +1558,7 @@ TEST_F(MergeJoinTest, antiJoinWithTwoJoinKeys) { "SELECT * FROM t WHERE NOT exists (select * from u where t.a = u.c and t.b < u.d)"); } -TEST_F(MergeJoinTest, antiJoinWithUniqueJoinKeys) { +TEST_P(MergeJoinTest, antiJoinWithUniqueJoinKeys) { auto left = makeRowVector( {"a", "b"}, {makeNullableFlatVector( @@ -1562,7 +1593,7 @@ TEST_F(MergeJoinTest, antiJoinWithUniqueJoinKeys) { "SELECT * FROM t WHERE NOT exists (select * from u where t.a = u.c and t.b < u.d)"); } -TEST_F(MergeJoinTest, antiJoinNoFilter) { +TEST_P(MergeJoinTest, antiJoinNoFilter) { auto left = makeRowVector( {"t0"}, {makeNullableFlatVector( @@ -1595,7 +1626,7 @@ TEST_F(MergeJoinTest, antiJoinNoFilter) { "SELECT t0 FROM t WHERE NOT exists (select 1 from u where t0 = u0)"); } -TEST_F(MergeJoinTest, fullOuterJoin) { +TEST_P(MergeJoinTest, fullOuterJoin) { auto left = makeRowVector( {"t0"}, {makeNullableFlatVector( @@ -1627,7 +1658,7 @@ TEST_F(MergeJoinTest, fullOuterJoin) { "SELECT * FROM t FULL OUTER JOIN u ON t.t0 = u.u0 AND t.t0 > 2"); } -TEST_F(MergeJoinTest, fullOuterJoinWithDuplicateMatch) { +TEST_P(MergeJoinTest, fullOuterJoinWithDuplicateMatch) { // Each row on the left side has at most one match on the right side. auto left = makeRowVector( {"a", "b"}, @@ -1666,7 +1697,7 @@ TEST_F(MergeJoinTest, fullOuterJoinWithDuplicateMatch) { .assertResults("SELECT * from t FULL OUTER JOIN u ON a = c AND b < d"); } -TEST_F(MergeJoinTest, fullOuterJoinNoFilter) { +TEST_P(MergeJoinTest, fullOuterJoinNoFilter) { auto left = makeRowVector( {"t0", "t1", "t2", "t3"}, {makeNullableFlatVector( @@ -1726,7 +1757,7 @@ TEST_F(MergeJoinTest, fullOuterJoinNoFilter) { "SELECT t0, t1 FROM t FULL OUTER JOIN u ON t3 = u3 and t2 = u2 and t1 = u1 and t.t0 = u.u0"); } -TEST_F(MergeJoinTest, fullOuterJoinWithNullCompare) { +TEST_P(MergeJoinTest, fullOuterJoinWithNullCompare) { auto right = makeRowVector( {"u0", "u1"}, {makeNullableFlatVector({false, true}), @@ -1759,7 +1790,7 @@ TEST_F(MergeJoinTest, fullOuterJoinWithNullCompare) { "SELECT t0, t1, u0, u1 FROM t FULL OUTER JOIN u ON t.t0 = u.u0 and t1 = u1"); } -TEST_F(MergeJoinTest, complexTypedFilter) { +TEST_P(MergeJoinTest, complexTypedFilter) { constexpr vector_size_t size{1000}; auto right = makeRowVector( @@ -1866,7 +1897,7 @@ TEST_F(MergeJoinTest, complexTypedFilter) { } } -TEST_F(MergeJoinTest, aggregationOverFullJoin) { +TEST_P(MergeJoinTest, aggregationOverFullJoin) { auto left = makeRowVector({"t_c0"}, {makeFlatVector({1, 2, 3, 4, 5})}); auto right = makeRowVector({"u_c0"}, {makeFlatVector({2, 4, 6})}); @@ -1888,7 +1919,7 @@ TEST_F(MergeJoinTest, aggregationOverFullJoin) { ASSERT_EQ(6, result.value()); } -TEST_F(MergeJoinTest, nonFirstJoinKeysFullJoin) { +TEST_P(MergeJoinTest, nonFirstJoinKeysFullJoin) { auto left = makeRowVector( {"t_data", "t_key"}, { @@ -1918,7 +1949,7 @@ TEST_F(MergeJoinTest, nonFirstJoinKeysFullJoin) { "VALUES (1, 50, null), (2, 40, 23), (3, 30, null), (4, 20, 22), (5, 10, null), (null, null, 21)"); } -TEST_F(MergeJoinTest, outputRightRestRows) { +TEST_P(MergeJoinTest, outputRightRestRows) { // S5 match finished, output right rest std::vector leftKeys = { makeNullableFlatVector({0, 0, 0, 1, 3}), @@ -1937,7 +1968,7 @@ TEST_F(MergeJoinTest, outputRightRestRows) { testJoin1(leftKeys, rightKeys); testJoin1(rightKeys, leftKeys); } -TEST_F(MergeJoinTest, outputAfterRightMatch) { +TEST_P(MergeJoinTest, outputAfterRightMatch) { // S4 rightMatch_ get all inputs (across different batchs), go to next // nonNullRow std::vector leftKeys = { @@ -1954,7 +1985,7 @@ TEST_F(MergeJoinTest, outputAfterRightMatch) { testJoin1(leftKeys, rightKeys); testJoin1(rightKeys, leftKeys); } -TEST_F(MergeJoinTest, outputAfterRightMatchInBatch) { +TEST_P(MergeJoinTest, outputAfterRightMatchInBatch) { // S3 rightMatch_ and leftMatch_ complete(in one batch), right go to next // nonNullRow std::vector leftKeys = { @@ -1972,7 +2003,7 @@ TEST_F(MergeJoinTest, outputAfterRightMatchInBatch) { testJoin1(rightKeys, leftKeys); } -TEST_F(MergeJoinTest, outputLeftRightComplete) { +TEST_P(MergeJoinTest, outputLeftRightComplete) { // S3 rightMatch_ and leftMatch_ complete(in one batch), right go to next // nonNullRow std::vector leftKeys = { @@ -1994,7 +2025,7 @@ TEST_F(MergeJoinTest, outputLeftRightComplete) { testJoin1(rightKeys, leftKeys); } -TEST_F(MergeJoinTest, outputAfterRightMoveNext) { +TEST_P(MergeJoinTest, outputAfterRightMoveNext) { // S6 Match miss, right go down to 1st nonNullRow with S2 std::vector leftKeys = { makeNullableFlatVector({1, 2, 3, 4, 5}), @@ -2023,7 +2054,7 @@ TEST_F(MergeJoinTest, outputAfterRightMoveNext) { testJoin1(rightKeys, leftKeys); } -DEBUG_ONLY_TEST_F(MergeJoinTest, failureOnRightSide) { +DEBUG_ONLY_TEST_P(MergeJoinTest, failureOnRightSide) { // Test that the Task terminates cleanly when the right side of the join // throws an exception. @@ -2099,3 +2130,9 @@ DEBUG_ONLY_TEST_F(MergeJoinTest, failureOnRightSide) { waitForAllTasksToBeDeleted(); } + +INSTANTIATE_TEST_SUITE_P( + Connectors, + MergeJoinTest, + ::testing::ValuesIn(connector::test::paramsFor( + {std::string(connector::kHiveConnectorName)}))); diff --git a/bolt/exec/tests/MergeTest.cpp b/bolt/exec/tests/MergeTest.cpp index a65b520f0..6a75277db 100644 --- a/bolt/exec/tests/MergeTest.cpp +++ b/bolt/exec/tests/MergeTest.cpp @@ -29,16 +29,17 @@ */ #include "bolt/exec/Merge.h" #include "bolt/common/base/tests/GTestUtils.h" +#include "bolt/common/testutil/TempDirectoryPath.h" #include "bolt/common/testutil/TestValue.h" #include "bolt/exec/PlanNodeStats.h" #include "bolt/exec/tests/utils/AssertQueryBuilder.h" #include "bolt/exec/tests/utils/OperatorTestBase.h" #include "bolt/exec/tests/utils/PlanBuilder.h" -#include "bolt/exec/tests/utils/TempDirectoryPath.h" #include "folly/experimental/EventCount.h" using namespace bytedance::bolt; using namespace bytedance::bolt::exec::test; +using namespace bytedance::bolt::test; using namespace bytedance::bolt::common::testutil; class MergeTest : public OperatorTestBase { diff --git a/bolt/exec/tests/MergerTest.cpp b/bolt/exec/tests/MergerTest.cpp index f30cacf22..7c9a42fdf 100644 --- a/bolt/exec/tests/MergerTest.cpp +++ b/bolt/exec/tests/MergerTest.cpp @@ -30,12 +30,12 @@ #include "bolt/common/base/tests/GTestUtils.h" #include "bolt/common/file/FileSystems.h" +#include "bolt/common/testutil/TempDirectoryPath.h" #include "bolt/exec/Merge.h" #include "bolt/exec/MergeSource.h" #include "bolt/exec/SortBuffer.h" #include "bolt/exec/Spill.h" #include "bolt/exec/tests/utils/OperatorTestBase.h" -#include "bolt/exec/tests/utils/TempDirectoryPath.h" #include "bolt/type/Type.h" #include "bolt/vector/fuzzer/VectorFuzzer.h" #include "bolt/vector/tests/utils/VectorTestBase.h" @@ -44,6 +44,7 @@ using namespace bytedance::bolt::exec; using namespace bytedance::bolt::exec::test; +using namespace bytedance::bolt::test; using namespace bytedance::bolt; using namespace bytedance::bolt::memory; @@ -306,7 +307,7 @@ class MergerTest : public OperatorTestBase { const std::vector sortingKeys_ = SpillState::makeSortingKeys(sortColumnIndices_, sortCompareFlags_); const std::shared_ptr spillDirectory_ = - exec::test::TempDirectoryPath::create(); + bytedance::bolt::test::TempDirectoryPath::create(); const common::SpillConfig spillConfig_{ [&]() -> const std::string& { return spillDirectory_->getPath(); }, [&](uint64_t) {}, diff --git a/bolt/exec/tests/MorselDrivenTest.cpp b/bolt/exec/tests/MorselDrivenTest.cpp index a24b49185..210129940 100644 --- a/bolt/exec/tests/MorselDrivenTest.cpp +++ b/bolt/exec/tests/MorselDrivenTest.cpp @@ -14,26 +14,46 @@ * limitations under the License. */ +#include "bolt/common/testutil/TempFilePath.h" +#include "bolt/connectors/ConnectorNames.h" +#include "bolt/connectors/tests/utils/ConnectorTestBase.h" #include "bolt/exec/Exchange.h" #include "bolt/exec/PlanNodeStats.h" #include "bolt/exec/tests/utils/AssertQueryBuilder.h" -#include "bolt/exec/tests/utils/HiveConnectorTestBase.h" #include "bolt/exec/tests/utils/LocalExchangeSource.h" +#include "bolt/exec/tests/utils/OperatorTestBase.h" #include "bolt/exec/tests/utils/PlanBuilder.h" #include "bolt/functions/prestosql/window/WindowFunctionsRegistration.h" using namespace bytedance::bolt; using namespace bytedance::bolt::exec; using namespace bytedance::bolt::exec::test; +using namespace bytedance::bolt::test; -class MorselDrivenTest : public HiveConnectorTestBase { +class MorselDrivenTest : public OperatorTestBase, + public ::testing::WithParamInterface< + connector::test::ConnectorTestParam> { protected: void SetUp() override { - HiveConnectorTestBase::SetUp(); + OperatorTestBase::SetUp(); + auto emptyConfig = std::make_shared( + std::unordered_map()); + connector::test::registerTestConnector( + GetParam().connectorName, + GetParam().connectorId, + ioExecutor_.get(), + emptyConfig, + GetParam().factoryRegistrar); window::prestosql::registerAllWindowFunctions(); exec::ExchangeSource::factories().clear(); exec::ExchangeSource::registerFactory(createLocalExchangeSource); } + void TearDown() override { + connector::test::unregisterTestConnector( + GetParam().connectorName, GetParam().connectorId); + OperatorTestBase::TearDown(); + } + template FlatVectorPtr makeFlatSequence(T start, vector_size_t size) { return makeFlatVector(size, [start](auto row) { return start + row; }); @@ -49,8 +69,8 @@ class MorselDrivenTest : public HiveConnectorTestBase { return fmt::format("local://{}-{}", prefix, num); } - std::vector> writeToFiles( - const std::vector& vectors) { + std::vector> + writeToFiles(const std::vector& vectors) { auto filePaths = makeFilePaths(vectors.size()); for (auto i = 0; i < vectors.size(); i++) { writeToFile(filePaths[i]->path, vectors[i]); @@ -126,7 +146,7 @@ class MorselDrivenTest : public HiveConnectorTestBase { }; // Test 1: Verify "LocalExchange + PartialAgg" is morsel-driven -TEST_F(MorselDrivenTest, morselDrivenEnabledForPartialAgg) { +TEST_P(MorselDrivenTest, morselDrivenEnabledForPartialAgg) { std::vector vectors = { makeRowVector({makeFlatSequence(0, 100)}), makeRowVector({makeFlatSequence(53, 100)}), @@ -164,7 +184,9 @@ TEST_F(MorselDrivenTest, morselDrivenEnabledForPartialAgg) { AssertQueryBuilder queryBuilder(op, duckDbQueryRunner_); for (auto i = 0; i < filePaths.size(); ++i) { queryBuilder.split( - scanNodeIds[i], makeHiveConnectorSplit(filePaths[i]->path)); + scanNodeIds[i], + connector::test::makeConnectorSplit( + GetParam().connectorName, filePaths[i]->path)); } auto task = queryBuilder.maxDrivers(4) @@ -178,7 +200,7 @@ TEST_F(MorselDrivenTest, morselDrivenEnabledForPartialAgg) { } // Test 2: Verify morsel-driven is disabled for "LocalExchange + SingleAgg" -TEST_F(MorselDrivenTest, morselDrivenDisabledForSingleAgg) { +TEST_P(MorselDrivenTest, morselDrivenDisabledForSingleAgg) { std::vector vectors; for (auto i = 0; i < 21; i++) { vectors.emplace_back(makeRowVector({makeFlatVector( @@ -214,7 +236,7 @@ TEST_F(MorselDrivenTest, morselDrivenDisabledForSingleAgg) { } // Test 3: Verify morsel-driven is disabled for "LocalExchange + FinalAgg" -TEST_F(MorselDrivenTest, morselDrivenDisabledForFinalAgg) { +TEST_P(MorselDrivenTest, morselDrivenDisabledForFinalAgg) { std::vector vectors = { makeRowVector({makeFlatSequence(0, 100)}), makeRowVector({makeFlatSequence(53, 100)}), @@ -252,7 +274,9 @@ TEST_F(MorselDrivenTest, morselDrivenDisabledForFinalAgg) { AssertQueryBuilder queryBuilder(op, duckDbQueryRunner_); for (auto i = 0; i < filePaths.size(); ++i) { queryBuilder.split( - scanNodeIds[i], makeHiveConnectorSplit(filePaths[i]->path)); + scanNodeIds[i], + connector::test::makeConnectorSplit( + GetParam().connectorName, filePaths[i]->path)); } auto task = queryBuilder.maxDrivers(4) @@ -266,7 +290,7 @@ TEST_F(MorselDrivenTest, morselDrivenDisabledForFinalAgg) { } // Test 4: Verify morsel-driven is disabled for "LocalExchange + Window" -TEST_F(MorselDrivenTest, morselDrivenDisabledForWindow) { +TEST_P(MorselDrivenTest, morselDrivenDisabledForWindow) { const vector_size_t size = 1'000; std::vector vectors = { @@ -312,7 +336,9 @@ TEST_F(MorselDrivenTest, morselDrivenDisabledForWindow) { for (auto i = 0; i < filePaths.size(); ++i) { queryBuilder.split( - scanNodeIds[i], makeHiveConnectorSplit(filePaths[i]->path)); + scanNodeIds[i], + connector::test::makeConnectorSplit( + GetParam().connectorName, filePaths[i]->path)); } auto task = queryBuilder.maxDrivers(4) @@ -325,7 +351,7 @@ TEST_F(MorselDrivenTest, morselDrivenDisabledForWindow) { // Test 5: Verify morsel-driven is disabled for "HashBuild" for efficiency // reason -TEST_F(MorselDrivenTest, morselDrivenDisabledForHashBuild) { +TEST_P(MorselDrivenTest, morselDrivenDisabledForHashBuild) { std::vector probeVectors = { makeRowVector( {"t_k1", "t_p", "t_s"}, @@ -384,7 +410,7 @@ TEST_F(MorselDrivenTest, morselDrivenDisabledForHashBuild) { // Test 6: Verify morsel-driven can rollback plan rewrite (inserting a // localPartition in between Exchange->HashJoin) when the resulting pipeline // (hashProbe+singleAgg) cannot be morsel-driven. -TEST_F(MorselDrivenTest, morselDrivenPlanRewriteRollback) { +TEST_P(MorselDrivenTest, morselDrivenPlanRewriteRollback) { std::vector> tasks; auto planNodeIdGenerator = std::make_shared(); configSettings_[core::QueryConfig::kEnableMorselDriven] = "true"; @@ -462,7 +488,7 @@ TEST_F(MorselDrivenTest, morselDrivenPlanRewriteRollback) { configSettings_[core::QueryConfig::kEnableMorselDriven] = "false"; } -TEST_F(MorselDrivenTest, improvedEarlyCompletion) { +TEST_P(MorselDrivenTest, improvedEarlyCompletion) { std::vector> tasks; auto planNodeIdGenerator = std::make_shared(); configSettings_[core::QueryConfig::kEnableMorselDriven] = "true"; @@ -531,3 +557,9 @@ TEST_F(MorselDrivenTest, improvedEarlyCompletion) { configSettings_[core::QueryConfig::kEnableMorselDriven] = "false"; } + +INSTANTIATE_TEST_SUITE_P( + Connectors, + MorselDrivenTest, + ::testing::ValuesIn(connector::test::paramsFor( + {std::string(connector::kHiveConnectorName)}))); diff --git a/bolt/exec/tests/MultiFragmentTest.cpp b/bolt/exec/tests/MultiFragmentTest.cpp index 37a482a15..2e933ea50 100644 --- a/bolt/exec/tests/MultiFragmentTest.cpp +++ b/bolt/exec/tests/MultiFragmentTest.cpp @@ -30,8 +30,11 @@ #include "bolt/common/base/tests/GTestUtils.h" #include "bolt/common/file/FileSystems.h" +#include "bolt/common/testutil/TempDirectoryPath.h" +#include "bolt/common/testutil/TempFilePath.h" #include "bolt/common/testutil/TestValue.h" -#include "bolt/connectors/hive/HiveConnectorSplit.h" +#include "bolt/connectors/ConnectorNames.h" +#include "bolt/connectors/tests/utils/ConnectorTestBase.h" #include "bolt/dwio/common/FileSink.h" #include "bolt/dwio/common/tests/utils/BatchMaker.h" #include "bolt/exec/Exchange.h" @@ -39,23 +42,33 @@ #include "bolt/exec/PlanNodeStats.h" #include "bolt/exec/RoundRobinPartitionFunction.h" #include "bolt/exec/tests/utils/AssertQueryBuilder.h" -#include "bolt/exec/tests/utils/HiveConnectorTestBase.h" #include "bolt/exec/tests/utils/LocalExchangeSource.h" +#include "bolt/exec/tests/utils/OperatorTestBase.h" #include "bolt/exec/tests/utils/PlanBuilder.h" -#include "bolt/exec/tests/utils/TempDirectoryPath.h" #include "folly/experimental/EventCount.h" using namespace bytedance::bolt::exec::test; +using namespace bytedance::bolt::test; using bytedance::bolt::common::testutil::TestValue; using bytedance::bolt::test::BatchMaker; namespace bytedance::bolt::exec { namespace { -class MultiFragmentTest : public HiveConnectorTestBase { +class MultiFragmentTest : public OperatorTestBase, + public ::testing::WithParamInterface< + connector::test::ConnectorTestParam> { protected: void SetUp() override { - HiveConnectorTestBase::SetUp(); + OperatorTestBase::SetUp(); + auto emptyConfig = std::make_shared( + std::unordered_map()); + connector::test::registerTestConnector( + GetParam().connectorName, + GetParam().connectorId, + ioExecutor_.get(), + emptyConfig, + GetParam().factoryRegistrar); exec::ExchangeSource::factories().clear(); exec::ExchangeSource::registerFactory(createLocalExchangeSource); BOLT_TEST_VALUE_ENABLE(); @@ -63,7 +76,9 @@ class MultiFragmentTest : public HiveConnectorTestBase { void TearDown() override { vectors_.clear(); - HiveConnectorTestBase::TearDown(); + connector::test::unregisterTestConnector( + GetParam().connectorName, GetParam().connectorId); + OperatorTestBase::TearDown(); } static std::string makeTaskId(const std::string& prefix, int num) { @@ -143,13 +158,12 @@ class MultiFragmentTest : public HiveConnectorTestBase { static void addHiveSplits( const std::shared_ptr& task, - const std::vector>& filePaths) { + const std::vector>& + filePaths) { for (auto& filePath : filePaths) { auto split = exec::Split( - std::make_shared( - kHiveConnectorId, - "file:" + filePath->path, - bytedance::bolt::dwio::common::FileFormat::DWRF), + connector::test::makeConnectorSplit( + GetParam().connectorName, std::move(filePath->path)), -1); task->addSplit("0", std::move(split)); VLOG(1) << filePath->path << "\n"; @@ -160,14 +174,13 @@ class MultiFragmentTest : public HiveConnectorTestBase { static void addHiveSplits( const std::shared_ptr& task, const std::vector& scanNodeIds, - const std::vector>& filePaths) { + const std::vector>& + filePaths) { for (auto i = 0; i < filePaths.size(); ++i) { const auto& filePath = filePaths[i]; auto split = exec::Split( - std::make_shared( - kHiveConnectorId, - "file:" + filePath->getPath(), - bytedance::bolt::dwio::common::FileFormat::DWRF), + connector::test::makeConnectorSplit( + GetParam().connectorName, std::move(filePath->path)), -1); task->addSplit(scanNodeIds[i % scanNodeIds.size()], std::move(split)); VLOG(1) << filePath->getPath() << "\n"; @@ -270,13 +283,14 @@ class MultiFragmentTest : public HiveConnectorTestBase { ROW({"c0", "c1", "c2", "c3", "c4", "c5"}, {BIGINT(), INTEGER(), SMALLINT(), REAL(), DOUBLE(), VARCHAR()})}; std::unordered_map configSettings_; - std::vector> filePaths_; + std::vector> + filePaths_; std::vector vectors_; std::shared_ptr bufferManager_{ OutputBufferManager::getInstance().lock()}; }; -TEST_F(MultiFragmentTest, aggregationSingleKey) { +TEST_P(MultiFragmentTest, aggregationSingleKey) { setupSources(10, 1000); std::vector> tasks; auto leafTaskId = makeTaskId("leaf", 0); @@ -363,7 +377,7 @@ TEST_F(MultiFragmentTest, aggregationSingleKey) { } } -TEST_F(MultiFragmentTest, aggregationMultiKey) { +TEST_P(MultiFragmentTest, aggregationMultiKey) { setupSources(10, 1'000); std::vector> tasks; auto leafTaskId = makeTaskId("leaf", 0); @@ -411,7 +425,7 @@ TEST_F(MultiFragmentTest, aggregationMultiKey) { } } -TEST_F(MultiFragmentTest, distributedTableScan) { +TEST_P(MultiFragmentTest, distributedTableScan) { setupSources(10, 1000); // Run the table scan several times to test the caching. for (int i = 0; i < 3; ++i) { @@ -437,19 +451,20 @@ TEST_F(MultiFragmentTest, distributedTableScan) { } } -TEST_F(MultiFragmentTest, mergeExchange) { +TEST_P(MultiFragmentTest, mergeExchange) { setupSources(20, 1000); static const core::SortOrder kAscNullsLast(true, false); std::vector> tasks; - std::vector> filePaths0( - filePaths_.begin(), filePaths_.begin() + 10); - std::vector> filePaths1( - filePaths_.begin() + 10, filePaths_.end()); + std::vector> + filePaths0(filePaths_.begin(), filePaths_.begin() + 10); + std::vector> + filePaths1(filePaths_.begin() + 10, filePaths_.end()); - std::vector>> filePathsList = { - filePaths0, filePaths1}; + std::vector< + std::vector>> + filePathsList = {filePaths0, filePaths1}; std::vector partialSortTaskIds; RowTypePtr outputType; @@ -502,7 +517,7 @@ TEST_F(MultiFragmentTest, mergeExchange) { } // Test reordering and dropping columns in PartitionedOutput operator. -TEST_F(MultiFragmentTest, partitionedOutput) { +TEST_P(MultiFragmentTest, partitionedOutput) { setupSources(10, 1000); // Test dropping columns only @@ -638,16 +653,17 @@ TEST_F(MultiFragmentTest, partitionedOutput) { } } -TEST_F(MultiFragmentTest, mergeExchangeMultiMerge) { +TEST_P(MultiFragmentTest, mergeExchangeMultiMerge) { setupSources(20, 1000); static const core::SortOrder kAscNullsLast(true, false); std::vector> tasks; - std::vector> filePaths0( - filePaths_.begin(), filePaths_.begin() + 10); - std::vector> filePaths1( - filePaths_.begin() + 10, filePaths_.end()); - std::vector>> filePathsList = { - filePaths0, filePaths1}; + std::vector> + filePaths0(filePaths_.begin(), filePaths_.begin() + 10); + std::vector> + filePaths1(filePaths_.begin() + 10, filePaths_.end()); + std::vector< + std::vector>> + filePathsList = {filePaths0, filePaths1}; std::vector partialSortTaskIds; RowTypePtr outputType; std::vector> spillDirectories; @@ -732,7 +748,7 @@ TEST_F(MultiFragmentTest, mergeExchangeMultiMerge) { EXPECT_LT(0, mergeExchangeStats.rawInputBytes); } -TEST_F(MultiFragmentTest, partitionedOutputWithLargeInput) { +TEST_P(MultiFragmentTest, partitionedOutputWithLargeInput) { // Verify that partitionedOutput operator is able to split a single input // vector if it hits memory or row limits. // We create a large vector that hits the row limit (70% - 120% of 10,000). @@ -807,7 +823,7 @@ TEST_F(MultiFragmentTest, partitionedOutputWithLargeInput) { } } -TEST_F(MultiFragmentTest, broadcast) { +TEST_P(MultiFragmentTest, broadcast) { auto data = makeRowVector( {makeFlatVector(1'000, [](auto row) { return row; })}); @@ -853,7 +869,7 @@ TEST_F(MultiFragmentTest, broadcast) { leafTask->updateOutputBuffers(finalAggTaskIds.size(), true); } -TEST_F(MultiFragmentTest, roundRobinPartition) { +TEST_P(MultiFragmentTest, roundRobinPartition) { auto data = { makeRowVector({ makeFlatVector({1, 2, 3, 4, 5}), @@ -918,7 +934,7 @@ TEST_F(MultiFragmentTest, roundRobinPartition) { } // Test PartitionedOutput operator with constant partitioning keys. -TEST_F(MultiFragmentTest, constantKeys) { +TEST_P(MultiFragmentTest, constantKeys) { auto data = makeRowVector({ makeFlatVector( 1'000, [](auto row) { return row; }, nullEvery(7)), @@ -978,7 +994,7 @@ TEST_F(MultiFragmentTest, constantKeys) { } } -TEST_F(MultiFragmentTest, replicateNullsAndAny) { +TEST_P(MultiFragmentTest, replicateNullsAndAny) { auto data = makeRowVector({makeFlatVector( 1'000, [](auto row) { return row; }, nullEvery(7))}); @@ -1037,11 +1053,11 @@ TEST_F(MultiFragmentTest, replicateNullsAndAny) { } // Test query finishing before all splits have been scheduled. -TEST_F(MultiFragmentTest, limit) { +TEST_P(MultiFragmentTest, limit) { auto data = makeRowVector({makeFlatVector( 1'000, [](auto row) { return row; }, nullEvery(7))}); - auto file = TempFilePath::create(); + auto file = ::bytedance::bolt::test::TempFilePath::create(); writeToFile(file->path, {data}); // Make leaf task: Values -> PartialLimit(10) -> Repartitioning(0). @@ -1056,7 +1072,9 @@ TEST_F(MultiFragmentTest, limit) { leafTask->start(1); leafTask.get()->addSplit( - "0", exec::Split(makeHiveConnectorSplit(file->path))); + "0", + exec::Split(connector::test::makeConnectorSplit( + GetParam().connectorName, file->path))); // Make final task: Exchange -> FinalLimit(10). auto plan = PlanBuilder() @@ -1083,7 +1101,7 @@ TEST_F(MultiFragmentTest, limit) { ASSERT_TRUE(waitForTaskCompletion(leafTask.get())) << leafTask->taskId(); } -TEST_F(MultiFragmentTest, mergeExchangeOverEmptySources) { +TEST_P(MultiFragmentTest, mergeExchangeOverEmptySources) { std::vector> tasks; std::vector leafTaskIds; @@ -1113,7 +1131,7 @@ TEST_F(MultiFragmentTest, mergeExchangeOverEmptySources) { } } -DEBUG_ONLY_TEST_F(MultiFragmentTest, mergeExchangeFailureOnStart) { +DEBUG_ONLY_TEST_P(MultiFragmentTest, mergeExchangeFailureOnStart) { std::vector> tasks; std::vector leafTaskIds; @@ -1198,7 +1216,7 @@ core::PlanNodePtr makeSequentialJoinsOverExchangePlan( } } // namespace -TEST_F(MultiFragmentTest, earlyCompletion) { +TEST_P(MultiFragmentTest, earlyCompletion) { // Setup a distributed query with 4 tasks: // - 1 leaf task with results partitioned 2 ways; // - 2 intermediate tasks reading from 2 partitions produced by the leaf task. @@ -1271,7 +1289,7 @@ TEST_F(MultiFragmentTest, earlyCompletion) { } } -TEST_F(MultiFragmentTest, morselDrivenEarlyCompletion) { +TEST_P(MultiFragmentTest, morselDrivenEarlyCompletion) { // We leverage the same test plan of earlyCompletion to test MorselDriven // execution model under early termination and verify: // 1. "Exchange->HashJoin(probe)" is split to two pipelines, @@ -1362,7 +1380,7 @@ TEST_F(MultiFragmentTest, morselDrivenEarlyCompletion) { configSettings_[core::QueryConfig::kEnableMorselDriven] = "false"; } -TEST_F(MultiFragmentTest, morselDrivenEarlyCompletion2) { +TEST_P(MultiFragmentTest, morselDrivenEarlyCompletion2) { // We test early termination of MorselDriven execution model in the // "Exchange->HashJoin(probe)->HashJoin(probe)" and verify: // 1. "Exchange->HashJoin(probe)->HashJoin(probe)" is split to two pipelines, @@ -1458,7 +1476,7 @@ TEST_F(MultiFragmentTest, morselDrivenEarlyCompletion2) { configSettings_[core::QueryConfig::kEnableMorselDriven] = "false"; } -TEST_F(MultiFragmentTest, earlyCompletionBroadcast) { +TEST_P(MultiFragmentTest, earlyCompletionBroadcast) { // Same as 'earlyCompletion' test, but broadcasts leaf task results to all // intermediate tasks. @@ -1523,7 +1541,7 @@ TEST_F(MultiFragmentTest, earlyCompletionBroadcast) { } } -TEST_F(MultiFragmentTest, earlyCompletionMerge) { +TEST_P(MultiFragmentTest, earlyCompletionMerge) { // Same as 'earlyCompletion' test, but uses MergeExchange instead of Exchange. std::vector> tasks; @@ -1673,7 +1691,7 @@ class SlowOperatorTranslator : public Operator::PlanNodeTranslator { } }; -TEST_F(MultiFragmentTest, exchangeDestruction) { +TEST_P(MultiFragmentTest, exchangeDestruction) { // This unit test tests the proper destruction of ExchangeClient upon // task destruction. Operator::registerOperator(std::make_unique()); @@ -1722,7 +1740,7 @@ TEST_F(MultiFragmentTest, exchangeDestruction) { rootTask = nullptr; } -TEST_F(MultiFragmentTest, cancelledExchange) { +TEST_P(MultiFragmentTest, cancelledExchange) { // Create a source fragment borrow the output type from it. auto planFragment = exec::test::PlanBuilder() .tableScan(rowType_) @@ -1820,7 +1838,7 @@ class TestCustomExchangeTranslator : public exec::Operator::PlanNodeTranslator { } }; -TEST_F(MultiFragmentTest, customPlanNodeWithExchangeClient) { +TEST_P(MultiFragmentTest, customPlanNodeWithExchangeClient) { setupSources(5, 100); Operator::registerOperator(std::make_unique()); auto leafTaskId = makeTaskId("leaf", 0); @@ -1867,7 +1885,7 @@ TEST_F(MultiFragmentTest, customPlanNodeWithExchangeClient) { // task is not running. // T5: task terminate processes the pending remote splits by accessing the // associated exchange client and run into segment fault. -DEBUG_ONLY_TEST_F( +DEBUG_ONLY_TEST_P( MultiFragmentTest, raceBetweenTaskTerminateAndTaskNoMoreSplits) { setupSources(10, 1000); @@ -1933,7 +1951,7 @@ DEBUG_ONLY_TEST_F( ASSERT_TRUE(waitForTaskFailure(rootTask.get(), 1'000'000'000)); } -TEST_F(MultiFragmentTest, taskTerminateWithPendingOutputBuffers) { +TEST_P(MultiFragmentTest, taskTerminateWithPendingOutputBuffers) { setupSources(8, 1000); auto taskId = makeTaskId("task", 0); core::PlanNodePtr leafPlan; @@ -1998,7 +2016,7 @@ TEST_F(MultiFragmentTest, taskTerminateWithPendingOutputBuffers) { task.reset(); } -TEST_F(MultiFragmentTest, taskTerminateWithProblematicRemainingRemoteSplits) { +TEST_P(MultiFragmentTest, taskTerminateWithProblematicRemainingRemoteSplits) { // Start the task with 2 drivers. auto probeData = makeRowVector({"p_c0"}, {makeFlatVector({1, 2, 3})}); @@ -2047,7 +2065,7 @@ TEST_F(MultiFragmentTest, taskTerminateWithProblematicRemainingRemoteSplits) { ASSERT_TRUE(waitForTaskFailure(task.get(), 30'000'000)) << task->taskId(); } -TEST_F( +TEST_P( MultiFragmentTest, morselDrivenTestSplitHashJoinWithExchangeOnBuildSide) { // We leverage the same test plan of @@ -2124,10 +2142,10 @@ TEST_F( configSettings_[core::QueryConfig::kEnableMorselDriven] = "false"; } -DEBUG_ONLY_TEST_F(MultiFragmentTest, mergeWithEarlyTermination) { +DEBUG_ONLY_TEST_P(MultiFragmentTest, mergeWithEarlyTermination) { setupSources(10, 1000); - std::vector> filePaths( + std::vector> filePaths( filePaths_.begin(), filePaths_.begin()); std::vector partialSortTaskIds; @@ -2275,7 +2293,7 @@ class DataFetcher { /// granularity. It can do so only if PartitionedOutput operator limits the size /// of individual pages. PartitionedOutput operator is expected to limit page /// sizes to no more than 1MB give and take 30%. -TEST_F(MultiFragmentTest, maxBytes) { +TEST_P(MultiFragmentTest, maxBytes) { std::string s(25, 'x'); // Keep the row count under 7000 to avoid hitting the row limit in the // operator instead. @@ -2339,7 +2357,7 @@ TEST_F(MultiFragmentTest, maxBytes) { } /// Verify that ExchangeClient stats are populated even if task fails. -DEBUG_ONLY_TEST_F(MultiFragmentTest, exchangeStatsOnFailure) { +DEBUG_ONLY_TEST_P(MultiFragmentTest, exchangeStatsOnFailure) { // Trigger a failure after fetching first 10 pages. BOLT_TEST_VALUE_ENABLE(); SCOPED_TESTVALUE_SET( @@ -2385,7 +2403,7 @@ DEBUG_ONLY_TEST_F(MultiFragmentTest, exchangeStatsOnFailure) { ASSERT_TRUE(waitForTaskCompletion(producerTask.get(), 3'000'000)); } -TEST_F(MultiFragmentTest, earlyTaskFailure) { +TEST_P(MultiFragmentTest, earlyTaskFailure) { setupSources(1, 10); const auto partialSortTaskId = makeTaskId("partialSortBy", 0); @@ -2440,7 +2458,7 @@ TEST_F(MultiFragmentTest, earlyTaskFailure) { } } -TEST_F(MultiFragmentTest, mergeSmallBatchesInExchange) { +TEST_P(MultiFragmentTest, mergeSmallBatchesInExchange) { auto data = makeRowVector({makeFlatVector({1, 2, 3})}); const int32_t numPartitions = 100; @@ -2499,5 +2517,11 @@ TEST_F(MultiFragmentTest, mergeSmallBatchesInExchange) { test(100'000, 1); } +INSTANTIATE_TEST_SUITE_P( + Connectors, + MultiFragmentTest, + ::testing::ValuesIn(connector::test::paramsFor( + {std::string(connector::kHiveConnectorName)}))); + } // namespace } // namespace bytedance::bolt::exec diff --git a/bolt/exec/tests/NestedLoopJoinTest.cpp b/bolt/exec/tests/NestedLoopJoinTest.cpp index 5a6b72b2f..1426aeed6 100644 --- a/bolt/exec/tests/NestedLoopJoinTest.cpp +++ b/bolt/exec/tests/NestedLoopJoinTest.cpp @@ -31,7 +31,7 @@ #include "bolt/core/PlanNode.h" #include "bolt/exec/NestedLoopJoinBuild.h" #include "bolt/exec/tests/utils/AssertQueryBuilder.h" -#include "bolt/exec/tests/utils/HiveConnectorTestBase.h" +#include "bolt/exec/tests/utils/OperatorTestBase.h" #include "bolt/exec/tests/utils/PlanBuilder.h" #include "bolt/exec/tests/utils/VectorTestUtil.h" #include "bolt/vector/fuzzer/VectorFuzzer.h" @@ -41,7 +41,7 @@ namespace { using bytedance::bolt::test::assertEqualVectors; -class NestedLoopJoinTest : public HiveConnectorTestBase { +class NestedLoopJoinTest : public OperatorTestBase { protected: void setProbeType(const RowTypePtr& probeType) { probeType_ = probeType; diff --git a/bolt/exec/tests/OperatorTraceTest.cpp b/bolt/exec/tests/OperatorTraceTest.cpp index ebca8aa37..7872a59b6 100644 --- a/bolt/exec/tests/OperatorTraceTest.cpp +++ b/bolt/exec/tests/OperatorTraceTest.cpp @@ -35,6 +35,8 @@ #include "bolt/common/base/tests/GTestUtils.h" #include "bolt/common/file/FileSystems.h" +#include "bolt/common/testutil/TempDirectoryPath.h" +#include "bolt/common/testutil/TempFilePath.h" #include "bolt/connectors/hive/HiveConnectorSplit.h" #include "bolt/exec/OperatorTraceReader.h" #include "bolt/exec/OperatorTraceWriter.h" @@ -48,9 +50,9 @@ #include "bolt/exec/tests/utils/AssertQueryBuilder.h" #include "bolt/exec/tests/utils/HiveConnectorTestBase.h" #include "bolt/exec/tests/utils/PlanBuilder.h" -#include "bolt/exec/tests/utils/TempDirectoryPath.h" #include "bolt/serializers/PrestoSerializer.h" using namespace bytedance::bolt::exec::test; +using namespace bytedance::bolt::test; namespace bytedance::bolt::exec::trace::test { class OperatorTraceTest : public HiveConnectorTestBase { protected: @@ -752,9 +754,10 @@ TEST_F(OperatorTraceTest, traceSplitRoundTrip) { const auto testDir = TempDirectoryPath::create(); const auto traceRoot = fmt::format("{}/{}", testDir->getPath(), "traceRoot"); const auto fs = filesystems::getFileSystem(testDir->getPath(), nullptr); - std::vector> splitFiles; + std::vector> + splitFiles; for (int i = 0; i < numSplits; ++i) { - auto filePath = TempFilePath::create(); + auto filePath = ::bytedance::bolt::test::TempFilePath::create(); writeToFile(filePath->getPath(), vectors); splitFiles.push_back(std::move(filePath)); } @@ -819,9 +822,10 @@ TEST_F(OperatorTraceTest, traceSplitPartial) { const auto testDir = TempDirectoryPath::create(); const auto traceRoot = fmt::format("{}/{}", testDir->getPath(), "traceRoot"); const auto fs = filesystems::getFileSystem(testDir->getPath(), nullptr); - std::vector> splitFiles; + std::vector> + splitFiles; for (int i = 0; i < numSplits; ++i) { - auto filePath = TempFilePath::create(); + auto filePath = ::bytedance::bolt::test::TempFilePath::create(); writeToFile(filePath->getPath(), vectors); splitFiles.push_back(std::move(filePath)); } @@ -908,9 +912,10 @@ TEST_F(OperatorTraceTest, traceSplitCorrupted) { const auto testDir = TempDirectoryPath::create(); const auto traceRoot = fmt::format("{}/{}", testDir->getPath(), "traceRoot"); const auto fs = filesystems::getFileSystem(testDir->getPath(), nullptr); - std::vector> splitFiles; + std::vector> + splitFiles; for (int i = 0; i < numSplits; ++i) { - auto filePath = TempFilePath::create(); + auto filePath = ::bytedance::bolt::test::TempFilePath::create(); writeToFile(filePath->getPath(), vectors); splitFiles.push_back(std::move(filePath)); } @@ -1164,9 +1169,10 @@ TEST_F(OperatorTraceTest, hiveConnectorId) { const auto testDir = TempDirectoryPath::create(); const auto traceRoot = fmt::format("{}/{}", testDir->getPath(), "traceRoot"); const auto fs = filesystems::getFileSystem(testDir->getPath(), nullptr); - std::vector> splitFiles; + std::vector> + splitFiles; for (int i = 0; i < numSplits; ++i) { - auto filePath = TempFilePath::create(); + auto filePath = ::bytedance::bolt::test::TempFilePath::create(); writeToFile(filePath->getPath(), vectors); splitFiles.push_back(std::move(filePath)); } diff --git a/bolt/exec/tests/OrderByTest.cpp b/bolt/exec/tests/OrderByTest.cpp index 5facb7acf..74fbc300f 100644 --- a/bolt/exec/tests/OrderByTest.cpp +++ b/bolt/exec/tests/OrderByTest.cpp @@ -40,6 +40,7 @@ #include "bolt/common/base/tests/GTestUtils.h" #include "bolt/common/file/FileSystems.h" +#include "bolt/common/testutil/TempDirectoryPath.h" #include "bolt/common/testutil/TestValue.h" #include "bolt/core/QueryConfig.h" #include "bolt/cudf/tests/CudfResource.h" @@ -51,7 +52,6 @@ #include "bolt/exec/tests/utils/OperatorTestBase.h" #include "bolt/exec/tests/utils/PlanBuilder.h" #include "bolt/exec/tests/utils/QueryAssertions.h" -#include "bolt/exec/tests/utils/TempDirectoryPath.h" #include "bolt/exec/tests/utils/WithGPUParamInterface.h" #include "bolt/serializers/ArrowSerializer.h" #include "bolt/vector/fuzzer/VectorFuzzer.h" @@ -61,6 +61,7 @@ using namespace bytedance::bolt::exec; using namespace bytedance::bolt::common::testutil; using namespace bytedance::bolt::core; using namespace bytedance::bolt::exec::test; +using namespace bytedance::bolt::test; namespace bytedance::bolt::exec::test { namespace { // Returns aggregated spilled stats by 'task'. @@ -293,7 +294,7 @@ class OrderByTest : public OperatorTestBase, public WithGPUParamInterface<> { } { SCOPED_TRACE("run with spilling"); - auto spillDirectory = exec::test::TempDirectoryPath::create(); + auto spillDirectory = bytedance::bolt::test::TempDirectoryPath::create(); auto queryCtx = core::QueryCtx::create(executor_.get()); TestScopedSpillInjection scopedSpillInjection(100); queryCtx->testingOverrideConfigUnsafe({ @@ -1052,7 +1053,7 @@ TEST_P(OrderByTest, spill) { const auto expectedResult = AssertQueryBuilder(plan).copyResults(pool_.get()); - auto spillDirectory = exec::test::TempDirectoryPath::create(); + auto spillDirectory = bytedance::bolt::test::TempDirectoryPath::create(); auto task = AssertQueryBuilder(plan) .spillDirectory(spillDirectory->path) .config(core::QueryConfig::kSpillEnabled, true) @@ -1110,7 +1111,7 @@ TEST_P(OrderByTest, spillWithArrowSerde) { .capturePlanNodeId(orderById) .planNode(); - auto spillDirectory = exec::test::TempDirectoryPath::create(); + auto spillDirectory = bytedance::bolt::test::TempDirectoryPath::create(); auto queryCtx = core::QueryCtx::create(executor_.get()); TestScopedSpillInjection scopedSpillInjection(100); bool sawArrowSerde = false; @@ -1178,7 +1179,7 @@ TEST_P(OrderByTest, spillWithMemoryLimit) { {1'000'000'000, false}}; for (const auto& testData : testSettings) { SCOPED_TRACE(testData.debugString()); - auto tempDirectory = exec::test::TempDirectoryPath::create(); + auto tempDirectory = bytedance::bolt::test::TempDirectoryPath::create(); auto queryCtx = core::QueryCtx::create(executor_.get()); queryCtx->testingOverrideMemoryPool( memory::memoryManager()->addRootPool(queryCtx->queryId(), kMaxBytes)); @@ -1246,7 +1247,7 @@ DEBUG_ONLY_TEST_P(OrderByTest, reclaimDuringInputProcessing) { for (const auto& testData : testSettings) { SCOPED_TRACE(testData.debugString()); - auto tempDirectory = exec::test::TempDirectoryPath::create(); + auto tempDirectory = bytedance::bolt::test::TempDirectoryPath::create(); auto queryCtx = core::QueryCtx::create(executor_.get()); queryCtx->testingOverrideMemoryPool(memory::memoryManager()->addRootPool( queryCtx->queryId(), kMaxBytes, memory::MemoryReclaimer::create())); @@ -1389,7 +1390,7 @@ DEBUG_ONLY_TEST_P(OrderByTest, reclaimDuringReserve) { batches.push_back(fuzzer.fuzzRow(rowType)); } - auto tempDirectory = exec::test::TempDirectoryPath::create(); + auto tempDirectory = bytedance::bolt::test::TempDirectoryPath::create(); auto queryCtx = core::QueryCtx::create(executor_.get()); queryCtx->testingOverrideMemoryPool(memory::memoryManager()->addRootPool( queryCtx->queryId(), kMaxBytes, memory::MemoryReclaimer::create())); @@ -1506,7 +1507,7 @@ DEBUG_ONLY_TEST_P(OrderByTest, reclaimDuringAllocation) { const std::vector enableSpillings = {false, true}; for (bool enableSpilling : enableSpillings) { SCOPED_TRACE(fmt::format("enableSpilling {}", enableSpilling)); - auto tempDirectory = exec::test::TempDirectoryPath::create(); + auto tempDirectory = bytedance::bolt::test::TempDirectoryPath::create(); auto queryCtx = core::QueryCtx::create(executor_.get()); queryCtx->testingOverrideMemoryPool( memory::memoryManager()->addRootPool(queryCtx->queryId(), kMaxBytes)); @@ -1640,7 +1641,7 @@ DEBUG_ONLY_TEST_P(OrderByTest, reclaimDuringOutputProcessing) { const std::vector enableSpillings = {false, true}; for (bool enableSpilling : enableSpillings) { SCOPED_TRACE(fmt::format("enableSpilling {}", enableSpilling)); - auto tempDirectory = exec::test::TempDirectoryPath::create(); + auto tempDirectory = bytedance::bolt::test::TempDirectoryPath::create(); auto queryCtx = core::QueryCtx::create(executor_.get()); queryCtx->testingOverrideMemoryPool(memory::memoryManager()->addRootPool( queryCtx->queryId(), kMaxBytes, memory::MemoryReclaimer::create())); @@ -1980,7 +1981,7 @@ DEBUG_ONLY_TEST_P(OrderByTest, spillWithNoMoreOutput) { ASSERT_EQ(reclaimerStats_.reclaimedBytes, 0); }))); - auto spillDirectory = exec::test::TempDirectoryPath::create(); + auto spillDirectory = bytedance::bolt::test::TempDirectoryPath::create(); auto task = AssertQueryBuilder(plan) .spillDirectory(spillDirectory->path) @@ -2017,7 +2018,7 @@ TEST_P(OrderByTest, maxSpillBytes) { .orderBy({fmt::format("{} ASC NULLS LAST", "c0")}, false) .capturePlanNodeId(orderNodeId) .planNode(); - auto spillDirectory = exec::test::TempDirectoryPath::create(); + auto spillDirectory = bytedance::bolt::test::TempDirectoryPath::create(); auto queryCtx = core::QueryCtx::create(executor_.get()); struct { @@ -2076,7 +2077,8 @@ DEBUG_ONLY_TEST_P(OrderByTest, reclaimFromOrderBy) { memory::testingRunArbitration(); }))); - const auto spillDirectory = exec::test::TempDirectoryPath::create(); + const auto spillDirectory = + bytedance::bolt::test::TempDirectoryPath::create(); core::PlanNodeId orderById; auto task = AssertQueryBuilder(duckDbQueryRunner_) @@ -2119,7 +2121,8 @@ DEBUG_ONLY_TEST_P(OrderByTest, reclaimFromEmptyOrderBy) { testingRunArbitration(op->pool()); }))); - const auto spillDirectory = exec::test::TempDirectoryPath::create(); + const auto spillDirectory = + bytedance::bolt::test::TempDirectoryPath::create(); auto task = AssertQueryBuilder(duckDbQueryRunner_) .spillDirectory(spillDirectory->path) diff --git a/bolt/exec/tests/PlanNodeToStringTest.cpp b/bolt/exec/tests/PlanNodeToStringTest.cpp index c1afe79ab..28fc49f3a 100644 --- a/bolt/exec/tests/PlanNodeToStringTest.cpp +++ b/bolt/exec/tests/PlanNodeToStringTest.cpp @@ -28,18 +28,15 @@ * -------------------------------------------------------------------------- */ -#include "bolt/connectors/hive/HiveConnectorSplit.h" #include "bolt/exec/WindowFunction.h" -#include "bolt/exec/tests/utils/HiveConnectorTestBase.h" #include "bolt/exec/tests/utils/PlanBuilder.h" #include "bolt/functions/prestosql/aggregates/RegisterAggregateFunctions.h" #include "bolt/functions/prestosql/registration/RegistrationFunctions.h" #include "bolt/parse/TypeResolver.h" +#include "bolt/vector/tests/utils/VectorTestBase.h" #include using namespace bytedance::bolt; -using namespace bytedance::bolt::common::test; - using bytedance::bolt::exec::test::PlanBuilder; class PlanNodeToStringTest : public testing::Test, public test::VectorTestBase { @@ -646,20 +643,10 @@ TEST_F(PlanNodeToStringTest, partitionedOutput) { "-- PartitionedOutput[1][partitionFunction: HASH(c1, c2) with 5 partitions replicate nulls and any] -> c0:SMALLINT, c1:INTEGER, c2:BIGINT\n", plan->toString(true, false, true)); - auto hiveSpec = std::make_shared( - 4, - std::vector{0, 1, 0, 1}, - std::vector{1, 2}, - std::vector{}); - - plan = PlanBuilder() - .values({data_}) - .partitionedOutput({"c1", "c2"}, 2, false, hiveSpec) - .planNode(); - ASSERT_EQ("-- PartitionedOutput[1]\n", plan->toString(false, false, true)); - ASSERT_EQ( - "-- PartitionedOutput[1][partitionFunction: HIVE((1, 2) buckets: 4) with 2 partitions] -> c0:SMALLINT, c1:INTEGER, c2:BIGINT\n", - plan->toString(true, false, true)); + // Connector-specific partition function spec coverage lives in each + // connector's test directory (e.g. + // bolt/connectors/hive/tests/HivePartitionFunctionPlanNodeToStringTest.cpp + // for HivePartitionFunctionSpec). } TEST_F(PlanNodeToStringTest, localMerge) { diff --git a/bolt/exec/tests/PrintPlanWithStatsTest.cpp b/bolt/exec/tests/PrintPlanWithStatsTest.cpp index ab233115b..621913102 100644 --- a/bolt/exec/tests/PrintPlanWithStatsTest.cpp +++ b/bolt/exec/tests/PrintPlanWithStatsTest.cpp @@ -28,19 +28,44 @@ * -------------------------------------------------------------------------- */ +#include "bolt/common/testutil/TempFilePath.h" +#include "bolt/connectors/ConnectorNames.h" +#include "bolt/connectors/tests/utils/ConnectorTestBase.h" #include "bolt/exec/PlanNodeStats.h" #include "bolt/exec/tests/utils/AssertQueryBuilder.h" -#include "bolt/exec/tests/utils/HiveConnectorTestBase.h" +#include "bolt/exec/tests/utils/OperatorTestBase.h" #include "bolt/exec/tests/utils/PlanBuilder.h" #include #include using namespace bytedance::bolt; using namespace bytedance::bolt::exec::test; +using namespace bytedance::bolt::test; using bytedance::bolt::exec::test::PlanBuilder; -class PrintPlanWithStatsTest : public HiveConnectorTestBase {}; +class PrintPlanWithStatsTest : public OperatorTestBase, + public ::testing::WithParamInterface< + connector::test::ConnectorTestParam> { + protected: + void SetUp() override { + OperatorTestBase::SetUp(); + auto emptyConfig = std::make_shared( + std::unordered_map()); + connector::test::registerTestConnector( + GetParam().connectorName, + GetParam().connectorId, + ioExecutor_.get(), + emptyConfig, + GetParam().factoryRegistrar); + } + + void TearDown() override { + connector::test::unregisterTestConnector( + GetParam().connectorName, GetParam().connectorId); + OperatorTestBase::TearDown(); + } +}; struct ExpectedLine { std::string line; @@ -87,7 +112,7 @@ void ensureTaskCompletion(exec::Task* task) { // printPlanWithStats. A failure likely means that the documentation needs an // update as well. -TEST_F(PrintPlanWithStatsTest, DISABLED_innerJoinWithTableScan) { +TEST_P(PrintPlanWithStatsTest, DISABLED_innerJoinWithTableScan) { const int32_t numSplits = 20; const int32_t numRowsProbe = 1024; const int32_t numRowsBuild = 100; @@ -137,7 +162,10 @@ TEST_F(PrintPlanWithStatsTest, DISABLED_innerJoinWithTableScan) { auto task = AssertQueryBuilder(op, duckDbQueryRunner_) - .splits(leftScanId, makeHiveConnectorSplits(leftFiles)) + .splits( + leftScanId, + connector::test::makeConnectorSplits( + GetParam().connectorName, leftFiles)) .assertResults( "SELECT t.c0, t.c1 + 1, t.c1 + u.c1 FROM t, u WHERE t.c0 = u.c0"); @@ -238,7 +266,7 @@ TEST_F(PrintPlanWithStatsTest, DISABLED_innerJoinWithTableScan) { {" runningGetOutputWallNanos\\s+sum: .+, count: 1, min: .+, max: .+"}}); } -TEST_F(PrintPlanWithStatsTest, DISABLED_partialAggregateWithTableScan) { +TEST_P(PrintPlanWithStatsTest, DISABLED_partialAggregateWithTableScan) { RowTypePtr rowType{ ROW({"c0", "c1", "c2", "c3", "c4", "c5"}, {BIGINT(), INTEGER(), SMALLINT(), REAL(), DOUBLE(), VARCHAR()})}; @@ -249,7 +277,7 @@ TEST_F(PrintPlanWithStatsTest, DISABLED_partialAggregateWithTableScan) { for (const auto& numPrefetchSplit : numPrefetchSplits) { SCOPED_TRACE(fmt::format("numPrefetchSplit {}", numPrefetchSplit)); asyncDataCache_->clear(); - auto filePath = TempFilePath::create(); + auto filePath = ::bytedance::bolt::test::TempFilePath::create(); writeToFile(filePath->path, vectors); auto op = @@ -264,7 +292,8 @@ TEST_F(PrintPlanWithStatsTest, DISABLED_partialAggregateWithTableScan) { .config( core::QueryConfig::kMaxSplitPreloadPerDriver, std::to_string(numPrefetchSplit)) - .splits(makeHiveConnectorSplits({filePath})) + .splits(connector::test::makeConnectorSplits( + GetParam().connectorName, {filePath})) .assertResults( "SELECT c5, max(c0), sum(c1), sum(c2), sum(c3), sum(c4) FROM tmp group by c5"); ensureTaskCompletion(task.get()); @@ -324,3 +353,9 @@ TEST_F(PrintPlanWithStatsTest, DISABLED_partialAggregateWithTableScan) { {" totalScanTime [ ]* sum: .+, count: .+, min: .+, max: .+"}}); } } + +INSTANTIATE_TEST_SUITE_P( + Connectors, + PrintPlanWithStatsTest, + ::testing::ValuesIn(connector::test::paramsFor( + {std::string(connector::kHiveConnectorName)}))); diff --git a/bolt/exec/tests/RowBasedCompareTest.cpp b/bolt/exec/tests/RowBasedCompareTest.cpp index 72a307100..ea8274801 100644 --- a/bolt/exec/tests/RowBasedCompareTest.cpp +++ b/bolt/exec/tests/RowBasedCompareTest.cpp @@ -25,11 +25,11 @@ #include "bolt/common/base/tests/GTestUtils.h" #include "bolt/common/file/FileSystems.h" #include "bolt/common/memory/MemoryPool.h" +#include "bolt/common/testutil/TempDirectoryPath.h" #include "bolt/exec/RowBasedCompare.h" #include "bolt/exec/RowContainer.h" #include "bolt/exec/tests/utils/OperatorTestBase.h" #include "bolt/exec/tests/utils/RowBasedSerde.h" -#include "bolt/exec/tests/utils/TempDirectoryPath.h" #include "bolt/type/HugeInt.h" #include "bolt/type/StringView.h" #include "bolt/type/Timestamp.h" @@ -40,6 +40,7 @@ #include "bolt/vector/tests/utils/VectorTestBase.h" using namespace bytedance::bolt::exec; using namespace bytedance::bolt::exec::test; +using namespace bytedance::bolt::test; using namespace bytedance::bolt; using namespace bytedance::bolt::memory; namespace bytedance::bolt::exec::test { diff --git a/bolt/exec/tests/RowNumberTest.cpp b/bolt/exec/tests/RowNumberTest.cpp index 90e6feb48..58294a04f 100644 --- a/bolt/exec/tests/RowNumberTest.cpp +++ b/bolt/exec/tests/RowNumberTest.cpp @@ -29,11 +29,11 @@ */ #include "bolt/common/file/FileSystems.h" +#include "bolt/common/testutil/TempDirectoryPath.h" #include "bolt/exec/PlanNodeStats.h" #include "bolt/exec/tests/utils/AssertQueryBuilder.h" #include "bolt/exec/tests/utils/OperatorTestBase.h" #include "bolt/exec/tests/utils/PlanBuilder.h" -#include "bolt/exec/tests/utils/TempDirectoryPath.h" namespace bytedance::bolt::exec::test { class RowNumberTest : public OperatorTestBase { @@ -45,7 +45,7 @@ class RowNumberTest : public OperatorTestBase { #ifndef SPARK_COMPATIBLE TEST_F(RowNumberTest, spill) { - auto spillDirectory = exec::test::TempDirectoryPath::create(); + auto spillDirectory = bytedance::bolt::test::TempDirectoryPath::create(); auto test = [&](int32_t vectorSize) { SCOPED_TRACE(vectorSize); @@ -252,7 +252,7 @@ TEST_F(RowNumberTest, maxSpillBytes) { } } testSettings[] = {{1 << 30, false}, {16 << 20, true}, {0, false}}; - auto spillDirectory = exec::test::TempDirectoryPath::create(); + auto spillDirectory = bytedance::bolt::test::TempDirectoryPath::create(); auto queryCtx = core::QueryCtx::create(executor_.get()); for (const auto& testData : testSettings) { @@ -296,7 +296,7 @@ TEST_F(RowNumberTest, memoryUsage) { for (const auto& spillEnable : {false, true}) { auto queryCtx = core::QueryCtx::create(executor_.get()); - auto spillDirectory = exec::test::TempDirectoryPath::create(); + auto spillDirectory = bytedance::bolt::test::TempDirectoryPath::create(); const std::string spillEnableConfig = std::to_string(spillEnable); std::shared_ptr task; diff --git a/bolt/exec/tests/RowStreamingWindowTest.cpp b/bolt/exec/tests/RowStreamingWindowTest.cpp index 6755fe8b8..582cdc115 100644 --- a/bolt/exec/tests/RowStreamingWindowTest.cpp +++ b/bolt/exec/tests/RowStreamingWindowTest.cpp @@ -16,14 +16,15 @@ #include "bolt/common/base/tests/GTestUtils.h" #include "bolt/common/file/FileSystems.h" +#include "bolt/common/testutil/TempDirectoryPath.h" #include "bolt/exec/PlanNodeStats.h" #include "bolt/exec/Window.h" #include "bolt/exec/tests/utils/AssertQueryBuilder.h" #include "bolt/exec/tests/utils/OperatorTestBase.h" #include "bolt/exec/tests/utils/PlanBuilder.h" -#include "bolt/exec/tests/utils/TempDirectoryPath.h" #include "bolt/functions/prestosql/window/WindowFunctionsRegistration.h" using namespace bytedance::bolt::exec::test; +using namespace bytedance::bolt::test; namespace bytedance::bolt::exec { namespace { diff --git a/bolt/exec/tests/RowToColumnVectorTest.cpp b/bolt/exec/tests/RowToColumnVectorTest.cpp index 3c9e8c51e..2420147b3 100644 --- a/bolt/exec/tests/RowToColumnVectorTest.cpp +++ b/bolt/exec/tests/RowToColumnVectorTest.cpp @@ -25,11 +25,11 @@ #include "bolt/common/base/CompareFlags.h" #include "bolt/common/base/tests/GTestUtils.h" #include "bolt/common/file/FileSystems.h" +#include "bolt/common/testutil/TempDirectoryPath.h" #include "bolt/exec/RowContainer.h" #include "bolt/exec/RowToColumnVector.h" #include "bolt/exec/tests/utils/OperatorTestBase.h" #include "bolt/exec/tests/utils/RowBasedSerde.h" -#include "bolt/exec/tests/utils/TempDirectoryPath.h" #include "bolt/type/HugeInt.h" #include "bolt/type/StringView.h" #include "bolt/type/Timestamp.h" @@ -39,6 +39,7 @@ #include "bolt/vector/tests/utils/VectorTestBase.h" using namespace bytedance::bolt::exec; using namespace bytedance::bolt::exec::test; +using namespace bytedance::bolt::test; using namespace bytedance::bolt; using namespace bytedance::bolt::memory; namespace bytedance::bolt::exec::test { diff --git a/bolt/exec/tests/SortAndWindowTest.cpp b/bolt/exec/tests/SortAndWindowTest.cpp index 7df6f8026..761d1d49b 100644 --- a/bolt/exec/tests/SortAndWindowTest.cpp +++ b/bolt/exec/tests/SortAndWindowTest.cpp @@ -16,14 +16,15 @@ #include "bolt/common/base/tests/GTestUtils.h" #include "bolt/common/file/FileSystems.h" +#include "bolt/common/testutil/TempDirectoryPath.h" #include "bolt/exec/PlanNodeStats.h" #include "bolt/exec/tests/utils/AssertQueryBuilder.h" #include "bolt/exec/tests/utils/OperatorTestBase.h" #include "bolt/exec/tests/utils/PlanBuilder.h" -#include "bolt/exec/tests/utils/TempDirectoryPath.h" #include "bolt/functions/lib/aggregates/tests/utils/AggregationTestBase.h" #include "bolt/functions/prestosql/window/WindowFunctionsRegistration.h" using namespace bytedance::bolt::exec::test; +using namespace bytedance::bolt::test; namespace bytedance::bolt::exec { namespace { diff --git a/bolt/exec/tests/SortBufferTest.cpp b/bolt/exec/tests/SortBufferTest.cpp index 447408215..83a83c257 100644 --- a/bolt/exec/tests/SortBufferTest.cpp +++ b/bolt/exec/tests/SortBufferTest.cpp @@ -33,13 +33,14 @@ #include "bolt/common/base/tests/GTestUtils.h" #include "bolt/common/file/FileSystems.h" +#include "bolt/common/testutil/TempDirectoryPath.h" #include "bolt/exec/tests/utils/OperatorTestBase.h" -#include "bolt/exec/tests/utils/TempDirectoryPath.h" #include "bolt/type/Type.h" #include "bolt/vector/fuzzer/VectorFuzzer.h" #include "bolt/vector/tests/utils/VectorTestBase.h" using namespace bytedance::bolt::exec; using namespace bytedance::bolt::exec::test; +using namespace bytedance::bolt::test; using namespace bytedance::bolt; using namespace bytedance::bolt::memory; namespace bytedance::bolt::functions::test { @@ -386,7 +387,7 @@ TEST_F(SortBufferTest, batchOutput) { TestScopedSpillInjection scopedSpillInjection(100); for (const auto& testData : testSettings) { SCOPED_TRACE(testData.debugString()); - auto spillDirectory = exec::test::TempDirectoryPath::create(); + auto spillDirectory = bytedance::bolt::test::TempDirectoryPath::create(); auto spillConfig = common::SpillConfig( [&]() -> const std::string& { return spillDirectory->path; }, [&](uint64_t) {}, @@ -500,7 +501,7 @@ TEST_F(SortBufferTest, spill) { for (const auto& testData : testSettings) { SCOPED_TRACE(testData.debugString()); - auto spillDirectory = exec::test::TempDirectoryPath::create(); + auto spillDirectory = bytedance::bolt::test::TempDirectoryPath::create(); // memory pool limit is 20M // Set 'kSpillableReservationGrowthPct' to an extreme large value to trigger // memory reservation failure and thus trigger disk spilling. @@ -577,7 +578,7 @@ TEST_F(SortBufferTest, spill) { } DEBUG_ONLY_TEST_F(SortBufferTest, reserveMemoryGetOutput) { - auto spillDirectory = exec::test::TempDirectoryPath::create(); + auto spillDirectory = bytedance::bolt::test::TempDirectoryPath::create(); auto spillConfig = common::SpillConfig( [&]() -> const std::string& { return spillDirectory->getPath(); }, [&](uint64_t) {}, @@ -657,7 +658,7 @@ TEST_F(SortBufferTest, emptySpill) { for (const auto& testData : testSettings) { SCOPED_TRACE(testData.debugString()); - auto spillDirectory = exec::test::TempDirectoryPath::create(); + auto spillDirectory = bytedance::bolt::test::TempDirectoryPath::create(); auto spillConfig = getSpillConfig(spillDirectory->path); auto sortBuffer = std::make_unique( inputType_, @@ -681,7 +682,7 @@ TEST_F(SortBufferTest, emptySpill) { } TEST_F(SortBufferTest, rowBasedSpillMemory) { - auto spillDirectory = exec::test::TempDirectoryPath::create(); + auto spillDirectory = bytedance::bolt::test::TempDirectoryPath::create(); // memory pool limit is 20M // Set 'kSpillableReservationGrowthPct' to an extreme large value to trigger // memory reservation failure and thus trigger disk spilling. @@ -766,7 +767,7 @@ TEST_F(SortBufferTest, spillWithHybridModeValidateOutput) { for (const auto& testData : testSettings) { SCOPED_TRACE(testData.debugString()); - auto spillDirectory = exec::test::TempDirectoryPath::create(); + auto spillDirectory = bytedance::bolt::test::TempDirectoryPath::create(); auto spillConfig = common::SpillConfig( [&]() -> const std::string& { return spillDirectory->path; }, [&](uint64_t) {}, diff --git a/bolt/exec/tests/SortWindowTest.cpp b/bolt/exec/tests/SortWindowTest.cpp index 8c7a361df..92881aade 100644 --- a/bolt/exec/tests/SortWindowTest.cpp +++ b/bolt/exec/tests/SortWindowTest.cpp @@ -16,15 +16,16 @@ #include "bolt/common/base/tests/GTestUtils.h" #include "bolt/common/file/FileSystems.h" +#include "bolt/common/testutil/TempDirectoryPath.h" #include "bolt/exec/PlanNodeStats.h" #include "bolt/exec/Window.h" #include "bolt/exec/tests/utils/AssertQueryBuilder.h" #include "bolt/exec/tests/utils/OperatorTestBase.h" #include "bolt/exec/tests/utils/PlanBuilder.h" -#include "bolt/exec/tests/utils/TempDirectoryPath.h" #include "bolt/functions/lib/aggregates/tests/utils/AggregationTestBase.h" #include "bolt/functions/prestosql/window/WindowFunctionsRegistration.h" using namespace bytedance::bolt::exec::test; +using namespace bytedance::bolt::test; namespace bytedance::bolt::exec { namespace { diff --git a/bolt/exec/tests/SpillTest.cpp b/bolt/exec/tests/SpillTest.cpp index 9a0ae55ca..03590d6b4 100644 --- a/bolt/exec/tests/SpillTest.cpp +++ b/bolt/exec/tests/SpillTest.cpp @@ -36,16 +36,16 @@ #include "bolt/common/base/RuntimeMetrics.h" #include "bolt/common/base/tests/GTestUtils.h" #include "bolt/common/file/FileSystems.h" +#include "bolt/common/testutil/TempDirectoryPath.h" #include "bolt/exec/OperatorUtils.h" #include "bolt/exec/Spill.h" -#include "bolt/exec/tests/utils/TempDirectoryPath.h" #include "bolt/serializers/PrestoSerializer.h" #include "bolt/type/Timestamp.h" #include "bolt/vector/tests/utils/VectorTestBase.h" using namespace bytedance::bolt; using namespace bytedance::bolt::exec; using namespace bytedance::bolt::filesystems; -using bytedance::bolt::exec::test::TempDirectoryPath; +using bytedance::bolt::test::TempDirectoryPath; namespace { static const int64_t kGB = 1'000'000'000; @@ -87,7 +87,7 @@ class SpillTest : public ::testing::TestWithParam, void SetUp() override { allocator_ = memory::memoryManager()->allocator(); - tempDir_ = exec::test::TempDirectoryPath::create(); + tempDir_ = bytedance::bolt::test::TempDirectoryPath::create(); if (!isRegisteredVectorSerde()) { bytedance::bolt::serializer::presto::PrestoVectorSerde:: registerVectorSerde(); @@ -471,7 +471,7 @@ TEST_P(SpillTest, DISABLED_spillState) { TEST_P(SpillTest, spillTimestamp) { // Verify that timestamp type retains it nanosecond precision when spilled and // read back. - auto tempDirectory = exec::test::TempDirectoryPath::create(); + auto tempDirectory = bytedance::bolt::test::TempDirectoryPath::create(); std::vector emptyCompareFlags; const std::string spillPath = tempDirectory->path + "/test"; std::vector timeValues = { diff --git a/bolt/exec/tests/SpillableWindowTest.cpp b/bolt/exec/tests/SpillableWindowTest.cpp index c52ffd7fc..7d1769d6f 100644 --- a/bolt/exec/tests/SpillableWindowTest.cpp +++ b/bolt/exec/tests/SpillableWindowTest.cpp @@ -16,14 +16,15 @@ #include "bolt/common/base/tests/GTestUtils.h" #include "bolt/common/file/FileSystems.h" +#include "bolt/common/testutil/TempDirectoryPath.h" #include "bolt/exec/PlanNodeStats.h" #include "bolt/exec/Window.h" #include "bolt/exec/tests/utils/AssertQueryBuilder.h" #include "bolt/exec/tests/utils/OperatorTestBase.h" #include "bolt/exec/tests/utils/PlanBuilder.h" -#include "bolt/exec/tests/utils/TempDirectoryPath.h" #include "bolt/functions/prestosql/window/WindowFunctionsRegistration.h" using namespace bytedance::bolt::exec::test; +using namespace bytedance::bolt::test; namespace bytedance::bolt::exec { namespace { diff --git a/bolt/exec/tests/SpillerAggregateBenchmarkTest.cpp b/bolt/exec/tests/SpillerAggregateBenchmarkTest.cpp index 7699ed670..c77db4ba7 100644 --- a/bolt/exec/tests/SpillerAggregateBenchmarkTest.cpp +++ b/bolt/exec/tests/SpillerAggregateBenchmarkTest.cpp @@ -58,7 +58,8 @@ int main(int argc, char* argv[]) { "The spiller type {} is not one of [AGGREGATE_INPUT, AGGREGATE_OUTPUT], the aggregate spiller dose not support it.", spillerTypeName); } - auto test = std::make_unique(spillerType); + auto test = + std::make_unique(spillerType); test->setUp(); test->run(); test->printStats(); diff --git a/bolt/exec/tests/SpillerBenchmarkBase.cpp b/bolt/exec/tests/SpillerBenchmarkBase.cpp index c4836f355..983a40d3b 100644 --- a/bolt/exec/tests/SpillerBenchmarkBase.cpp +++ b/bolt/exec/tests/SpillerBenchmarkBase.cpp @@ -37,9 +37,9 @@ #include "bolt/common/compression/Compression.h" #include "bolt/common/file/FileSystems.h" #include "bolt/common/memory/MmapAllocator.h" +#include "bolt/common/testutil/TempDirectoryPath.h" #include "bolt/exec/Spiller.h" #include "bolt/exec/tests/SpillerBenchmarkBase.h" -#include "bolt/exec/tests/utils/TempDirectoryPath.h" #include "bolt/vector/fuzzer/VectorFuzzer.h" DEFINE_string( @@ -120,7 +120,7 @@ void SpillerBenchmarkBase::setUp() { } if (FLAGS_spiller_benchmark_path.empty()) { - tempDir_ = exec::test::TempDirectoryPath::create(); + tempDir_ = bytedance::bolt::test::TempDirectoryPath::create(); spillDir_ = tempDir_->path; } else { spillDir_ = FLAGS_spiller_benchmark_path; diff --git a/bolt/exec/tests/SpillerBenchmarkBase.h b/bolt/exec/tests/SpillerBenchmarkBase.h index fc41b8f3d..40fbdb82a 100644 --- a/bolt/exec/tests/SpillerBenchmarkBase.h +++ b/bolt/exec/tests/SpillerBenchmarkBase.h @@ -33,8 +33,8 @@ #include #include "bolt/common/file/FileSystems.h" #include "bolt/common/memory/MmapAllocator.h" +#include "bolt/common/testutil/TempDirectoryPath.h" #include "bolt/exec/Spiller.h" -#include "bolt/exec/tests/utils/TempDirectoryPath.h" #include "bolt/type/Type.h" #include "bolt/vector/fuzzer/VectorFuzzer.h" @@ -78,7 +78,7 @@ class SpillerBenchmarkBase { std::unique_ptr vectorFuzzer_; std::vector rowVectors_; std::unique_ptr executor_; - std::shared_ptr tempDir_; + std::shared_ptr tempDir_; std::string spillDir_; std::shared_ptr fs_; common::SpillConfig spillConfig_; diff --git a/bolt/exec/tests/SpillerTest.cpp b/bolt/exec/tests/SpillerTest.cpp index 3febc35c5..b7586566c 100644 --- a/bolt/exec/tests/SpillerTest.cpp +++ b/bolt/exec/tests/SpillerTest.cpp @@ -42,6 +42,7 @@ using namespace bytedance::bolt; using namespace bytedance::bolt::exec; using namespace bytedance::bolt::exec::test; +using namespace bytedance::bolt::test; using namespace bytedance::bolt::common::testutil; using bytedance::bolt::filesystems::FileSystem; @@ -152,7 +153,7 @@ class SpillerTest : public exec::test::RowContainerTestBase { void SetUp() override { RowContainerTestBase::SetUp(); rng_.seed(1); - tempDirPath_ = exec::test::TempDirectoryPath::create(); + tempDirPath_ = bytedance::bolt::test::TempDirectoryPath::create(); fs_ = filesystems::getFileSystem(tempDirPath_->path, nullptr); rowType_ = ROW({ {"bool_val", BOOLEAN()}, diff --git a/bolt/exec/tests/TableScanTest.cpp b/bolt/exec/tests/TableScanTest.cpp index eeb974ba9..71994f787 100644 --- a/bolt/exec/tests/TableScanTest.cpp +++ b/bolt/exec/tests/TableScanTest.cpp @@ -34,6 +34,8 @@ #include "bolt/common/base/tests/GTestUtils.h" #include "bolt/common/caching/AsyncDataCache.h" #include "bolt/common/memory/MemoryArbitrator.h" +#include "bolt/common/testutil/TempDirectoryPath.h" +#include "bolt/common/testutil/TempFilePath.h" #include "bolt/common/testutil/TestValue.h" #include "bolt/connectors/hive/HiveConfig.h" #include "bolt/connectors/hive/HiveConnector.h" @@ -47,7 +49,6 @@ #include "bolt/exec/tests/utils/Cursor.h" #include "bolt/exec/tests/utils/HiveConnectorTestBase.h" #include "bolt/exec/tests/utils/PlanBuilder.h" -#include "bolt/exec/tests/utils/TempDirectoryPath.h" #include "bolt/expression/ExprToSubfieldFilter.h" #include "bolt/functions/sparksql/registration/Register.h" #include "bolt/type/Timestamp.h" @@ -66,6 +67,7 @@ using namespace bytedance::bolt::core; using namespace bytedance::bolt::exec; using namespace bytedance::bolt::common::test; using namespace bytedance::bolt::exec::test; +using namespace bytedance::bolt::test; DECLARE_int32(cache_prefetch_min_pct); @@ -120,14 +122,16 @@ class TableScanTest : public virtual HiveConnectorTestBase { std::shared_ptr assertQuery( const PlanNodePtr& plan, - const std::vector>& filePaths, + const std::vector>& + filePaths, const std::string& duckDbSql) { return HiveConnectorTestBase::assertQuery(plan, filePaths, duckDbSql); } std::shared_ptr assertQuery( const PlanNodePtr& plan, - const std::vector>& filePaths, + const std::vector>& + filePaths, const std::string& duckDbSql, const int32_t numPrefetchSplit) { return AssertQueryBuilder(plan, duckDbQueryRunner_) @@ -141,7 +145,8 @@ class TableScanTest : public virtual HiveConnectorTestBase { // Run query with spill enabled. std::shared_ptr assertQuery( const PlanNodePtr& plan, - const std::vector>& filePaths, + const std::vector>& + filePaths, const std::string& spillDirectory, const std::string& duckDbSql) { return AssertQueryBuilder(plan, duckDbQueryRunner_) @@ -276,7 +281,7 @@ class TableScanTest : public virtual HiveConnectorTestBase { TEST_F(TableScanTest, allColumns) { auto vectors = makeVectors(10, 1'000); - auto filePath = TempFilePath::create(); + auto filePath = ::bytedance::bolt::test::TempFilePath::create(); writeToFile(filePath->path, vectors); createDuckDbTable(vectors); @@ -302,7 +307,7 @@ TEST_F(TableScanTest, directBufferInputRawInputBytes) { makeFlatVector(kSize, folly::identity), makeFlatVector(kSize, folly::identity), }); - auto filePath = TempFilePath::create(); + auto filePath = ::bytedance::bolt::test::TempFilePath::create(); createDuckDbTable({vector}); writeToFile(filePath->getPath(), {vector}); @@ -358,7 +363,7 @@ DEBUG_ONLY_TEST_F(TableScanTest, pendingCoalescedIoWhenTaskFailed) { for (int i = 0; i < numBatches; ++i) { tableInputs.push_back(fuzzer.fuzzInputRow(tableType)); } - auto filePath = TempFilePath::create(); + auto filePath = ::bytedance::bolt::test::TempFilePath::create(); writeToFile(filePath->getPath(), tableInputs); auto plan = PlanBuilder(pool_.get()) @@ -413,7 +418,7 @@ TEST_F(TableScanTest, connectorStats) { for (size_t i = 0; i < 99; i++) { auto vectors = makeVectors(10, 10); - auto filePath = TempFilePath::create(); + auto filePath = ::bytedance::bolt::test::TempFilePath::create(); writeToFile(filePath->path, vectors); createDuckDbTable(vectors); auto plan = tableScanNode(); @@ -426,7 +431,7 @@ TEST_F(TableScanTest, connectorStats) { TEST_F(TableScanTest, columnAliases) { auto vectors = makeVectors(1, 1'000); - auto filePath = TempFilePath::create(); + auto filePath = ::bytedance::bolt::test::TempFilePath::create(); writeToFile(filePath->path, vectors); createDuckDbTable(vectors); @@ -467,7 +472,7 @@ TEST_F(TableScanTest, columnAliases) { TEST_F(TableScanTest, partitionKeyAlias) { auto vectors = makeVectors(1, 1'000); - auto filePath = TempFilePath::create(); + auto filePath = ::bytedance::bolt::test::TempFilePath::create(); writeToFile(filePath->path, vectors); createDuckDbTable(vectors); @@ -494,7 +499,7 @@ TEST_F(TableScanTest, partitionKeyAlias) { TEST_F(TableScanTest, columnPruning) { auto vectors = makeVectors(10, 1'000); - auto filePath = TempFilePath::create(); + auto filePath = ::bytedance::bolt::test::TempFilePath::create(); writeToFile(filePath->path, vectors); createDuckDbTable(vectors); @@ -531,7 +536,7 @@ TEST_F(TableScanTest, timestamp) { return row % 5 == 0; /* null every 5 rows */ })}); - auto filePath = TempFilePath::create(); + auto filePath = ::bytedance::bolt::test::TempFilePath::create(); writeToFile(filePath->path, {rowVector}); createDuckDbTable({rowVector}); @@ -608,12 +613,12 @@ DEBUG_ONLY_TEST_F(TableScanTest, timeLimitInGetOutput) { // Prepare the data files and tables with 2/3 of them having no null row // vector. const size_t numFiles{20}; - std::vector> filePaths; + std::vector> filePaths; std::vector vectorsForDuckDb; filePaths.reserve(numFiles); vectorsForDuckDb.reserve(numFiles); for (auto i = 0; i < numFiles; ++i) { - filePaths.emplace_back(TempFilePath::create()); + filePaths.emplace_back(::bytedance::bolt::test::TempFilePath::create()); const auto& vec = (i % 3 == 0) ? rowVector : rowVectorNoNulls; writeToFile(filePaths.back()->path, vec); vectorsForDuckDb.emplace_back(vec); @@ -669,7 +674,7 @@ TEST_F(TableScanTest, subfieldPruningRowType) { auto columnType = ROW({"c", "d"}, {innerType, BIGINT()}); auto rowType = ROW({"e"}, {columnType}); auto vectors = makeVectors(10, 1'000, rowType); - auto filePath = TempFilePath::create(); + auto filePath = ::bytedance::bolt::test::TempFilePath::create(); writeToFile(filePath->path, vectors); std::vector requiredSubfields; requiredSubfields.emplace_back("e.c"); @@ -724,7 +729,7 @@ TEST_F(TableScanTest, subfieldPruningRemainingFilterSubfieldsMissing) { auto columnType = ROW({"a", "b", "c"}, {BIGINT(), BIGINT(), BIGINT()}); auto rowType = ROW({"e"}, {columnType}); auto vectors = makeVectors(10, 1'000, rowType); - auto filePath = TempFilePath::create(); + auto filePath = ::bytedance::bolt::test::TempFilePath::create(); writeToFile(filePath->path, vectors); std::vector requiredSubfields; requiredSubfields.emplace_back("e.c"); @@ -763,7 +768,7 @@ TEST_F(TableScanTest, subfieldPruningRemainingFilterRootFieldMissing) { auto columnType = ROW({"a", "b", "c"}, {BIGINT(), BIGINT(), BIGINT()}); auto rowType = ROW({"d", "e"}, {BIGINT(), columnType}); auto vectors = makeVectors(10, 1'000, rowType); - auto filePath = TempFilePath::create(); + auto filePath = ::bytedance::bolt::test::TempFilePath::create(); writeToFile(filePath->path, vectors); std::unordered_map> assignments; @@ -800,7 +805,7 @@ TEST_F(TableScanTest, subfieldPruningRemainingFilterStruct) { auto structType = ROW({"a", "b"}, {BIGINT(), BIGINT()}); auto rowType = ROW({"c", "d"}, {structType, BIGINT()}); auto vectors = makeVectors(3, 10, rowType); - auto filePath = TempFilePath::create(); + auto filePath = ::bytedance::bolt::test::TempFilePath::create(); writeToFile(filePath->path, vectors); enum { kNoOutput = 0, kWholeColumn = 1, kSubfieldOnly = 2 }; for (int outputColumn = kNoOutput; outputColumn <= kSubfieldOnly; @@ -886,7 +891,7 @@ TEST_F(TableScanTest, subfieldPruningRemainingFilterMap) { auto vector = makeRowVector( {"a", "b"}, {makeFlatVector(10, folly::identity), mapVector}); auto rowType = asRowType(vector->type()); - auto filePath = TempFilePath::create(); + auto filePath = ::bytedance::bolt::test::TempFilePath::create(); writeToFile(filePath->path, {vector}); enum { kNoOutput = 0, kWholeColumn = 1, kSubfieldOnly = 2 }; for (int outputColumn = kNoOutput; outputColumn <= kSubfieldOnly; @@ -972,7 +977,7 @@ TEST_F(TableScanTest, subfieldPruningRemainingFilterMapNullChecks) { auto vector = makeRowVector( {"a", "b"}, {makeFlatVector(10, folly::identity), mapVector}); auto rowType = asRowType(vector->type()); - auto filePath = TempFilePath::create(); + auto filePath = ::bytedance::bolt::test::TempFilePath::create(); writeToFile(filePath->path, {vector}); // Test different null check filters @@ -1148,7 +1153,7 @@ TEST_F(TableScanTest, subfieldPruningMapType) { vectors.push_back(makeRowVector({"c"}, {maps})); } auto rowType = asRowType(vectors[0]->type()); - auto filePath = TempFilePath::create(); + auto filePath = ::bytedance::bolt::test::TempFilePath::create(); writeToFile(filePath->path, vectors); std::vector requiredSubfields; requiredSubfields.emplace_back("c[0]"); @@ -1223,7 +1228,7 @@ TEST_F(TableScanTest, subfieldPruningArrayType) { vectors.push_back(makeRowVector({"c"}, {arrays})); } auto rowType = asRowType(vectors[0]->type()); - auto filePath = TempFilePath::create(); + auto filePath = ::bytedance::bolt::test::TempFilePath::create(); writeToFile(filePath->path, vectors); std::vector requiredSubfields; requiredSubfields.emplace_back("c[3]"); @@ -1418,7 +1423,7 @@ TEST_F(TableScanTest, constDictLazy) { [](auto row) { return row; }, [](auto row) { return row * 0.1; })}); - auto filePath = TempFilePath::create(); + auto filePath = ::bytedance::bolt::test::TempFilePath::create(); writeToFile(filePath->path, {rowVector}); createDuckDbTable({rowVector}); @@ -1461,7 +1466,7 @@ TEST_F(TableScanTest, constDictLazy) { TEST_F(TableScanTest, count) { auto vectors = makeVectors(10, 1'000); - auto filePath = TempFilePath::create(); + auto filePath = ::bytedance::bolt::test::TempFilePath::create(); writeToFile(filePath->path, vectors); CursorParameters params; @@ -1500,7 +1505,7 @@ TEST_F(TableScanTest, batchSize) { ROW(std::move(names), std::vector(numColumns, BIGINT())); auto vector = makeVectors(1, numRows, rowType); - auto filePath = TempFilePath::create(); + auto filePath = ::bytedance::bolt::test::TempFilePath::create(); writeToFile(filePath->path, vector); createDuckDbTable(vector); @@ -1577,7 +1582,7 @@ TEST_F(TableScanTest, batchSize) { // double read and the 2nd split is ignored. TEST_F(TableScanTest, sequentialSplitNoDoubleRead) { auto vectors = makeVectors(10, 1'000); - auto filePath = TempFilePath::create(); + auto filePath = ::bytedance::bolt::test::TempFilePath::create(); writeToFile(filePath->path, vectors); CursorParameters params; @@ -1607,7 +1612,7 @@ TEST_F(TableScanTest, sequentialSplitNoDoubleRead) { // ignored. TEST_F(TableScanTest, outOfOrderSplits) { auto vectors = makeVectors(10, 1'000); - auto filePath = TempFilePath::create(); + auto filePath = ::bytedance::bolt::test::TempFilePath::create(); writeToFile(filePath->path, vectors); CursorParameters params; @@ -1637,7 +1642,7 @@ TEST_F(TableScanTest, outOfOrderSplits) { // double read, as expected. TEST_F(TableScanTest, splitDoubleRead) { auto vectors = makeVectors(10, 1'000); - auto filePath = TempFilePath::create(); + auto filePath = ::bytedance::bolt::test::TempFilePath::create(); writeToFile(filePath->path, vectors); CursorParameters params; @@ -1749,7 +1754,7 @@ TEST_F(TableScanTest, waitForSplit) { TEST_F(TableScanTest, splitOffsetAndLength) { auto vectors = makeVectors(10, 1'000); - auto filePath = TempFilePath::create(); + auto filePath = ::bytedance::bolt::test::TempFilePath::create(); writeToFile(filePath->path, vectors); createDuckDbTable(vectors); @@ -1803,7 +1808,7 @@ TEST_F(TableScanTest, validFileNoData) { // An invalid (size = 0) file. TEST_F(TableScanTest, emptyFile) { - auto filePath = TempFilePath::create(); + auto filePath = ::bytedance::bolt::test::TempFilePath::create(); try { assertQuery( @@ -1819,7 +1824,7 @@ TEST_F(TableScanTest, emptyFile) { TEST_F(TableScanTest, partitionedTableVarcharKey) { auto rowType = ROW({"c0", "c1"}, {BIGINT(), DOUBLE()}); auto vectors = makeVectors(10, 1'000, rowType); - auto filePath = TempFilePath::create(); + auto filePath = ::bytedance::bolt::test::TempFilePath::create(); writeToFile(filePath->path, vectors); createDuckDbTable(vectors); @@ -1829,7 +1834,7 @@ TEST_F(TableScanTest, partitionedTableVarcharKey) { TEST_F(TableScanTest, partitionedTableBigIntKey) { auto rowType = ROW({"c0", "c1"}, {BIGINT(), DOUBLE()}); auto vectors = makeVectors(10, 1'000, rowType); - auto filePath = TempFilePath::create(); + auto filePath = ::bytedance::bolt::test::TempFilePath::create(); writeToFile(filePath->path, vectors); createDuckDbTable(vectors); testPartitionedTable(filePath->path, BIGINT(), "123456789123456789"); @@ -1838,7 +1843,7 @@ TEST_F(TableScanTest, partitionedTableBigIntKey) { TEST_F(TableScanTest, partitionedTableIntegerKey) { auto rowType = ROW({"c0", "c1"}, {BIGINT(), DOUBLE()}); auto vectors = makeVectors(10, 1'000, rowType); - auto filePath = TempFilePath::create(); + auto filePath = ::bytedance::bolt::test::TempFilePath::create(); writeToFile(filePath->path, vectors); createDuckDbTable(vectors); testPartitionedTable(filePath->path, INTEGER(), "123456789"); @@ -1847,7 +1852,7 @@ TEST_F(TableScanTest, partitionedTableIntegerKey) { TEST_F(TableScanTest, partitionedTableSmallIntKey) { auto rowType = ROW({"c0", "c1"}, {BIGINT(), DOUBLE()}); auto vectors = makeVectors(10, 1'000, rowType); - auto filePath = TempFilePath::create(); + auto filePath = ::bytedance::bolt::test::TempFilePath::create(); writeToFile(filePath->path, vectors); createDuckDbTable(vectors); testPartitionedTable(filePath->path, SMALLINT(), "1"); @@ -1856,7 +1861,7 @@ TEST_F(TableScanTest, partitionedTableSmallIntKey) { TEST_F(TableScanTest, partitionedTableTinyIntKey) { auto rowType = ROW({"c0", "c1"}, {BIGINT(), DOUBLE()}); auto vectors = makeVectors(10, 1'000, rowType); - auto filePath = TempFilePath::create(); + auto filePath = ::bytedance::bolt::test::TempFilePath::create(); writeToFile(filePath->path, vectors); createDuckDbTable(vectors); testPartitionedTable(filePath->path, TINYINT(), "1"); @@ -1865,7 +1870,7 @@ TEST_F(TableScanTest, partitionedTableTinyIntKey) { TEST_F(TableScanTest, partitionedTableBooleanKey) { auto rowType = ROW({"c0", "c1"}, {BIGINT(), DOUBLE()}); auto vectors = makeVectors(10, 1'000, rowType); - auto filePath = TempFilePath::create(); + auto filePath = ::bytedance::bolt::test::TempFilePath::create(); writeToFile(filePath->path, vectors); createDuckDbTable(vectors); testPartitionedTable(filePath->path, BOOLEAN(), "0"); @@ -1874,7 +1879,7 @@ TEST_F(TableScanTest, partitionedTableBooleanKey) { TEST_F(TableScanTest, partitionedTableRealKey) { auto rowType = ROW({"c0", "c1"}, {BIGINT(), DOUBLE()}); auto vectors = makeVectors(10, 1'000, rowType); - auto filePath = TempFilePath::create(); + auto filePath = ::bytedance::bolt::test::TempFilePath::create(); writeToFile(filePath->path, vectors); createDuckDbTable(vectors); testPartitionedTable(filePath->path, REAL(), "3.5"); @@ -1883,7 +1888,7 @@ TEST_F(TableScanTest, partitionedTableRealKey) { TEST_F(TableScanTest, partitionedTableDoubleKey) { auto rowType = ROW({"c0", "c1"}, {BIGINT(), DOUBLE()}); auto vectors = makeVectors(10, 1'000, rowType); - auto filePath = TempFilePath::create(); + auto filePath = ::bytedance::bolt::test::TempFilePath::create(); writeToFile(filePath->path, vectors); createDuckDbTable(vectors); testPartitionedTable(filePath->path, DOUBLE(), "3.5"); @@ -1892,7 +1897,7 @@ TEST_F(TableScanTest, partitionedTableDoubleKey) { TEST_F(TableScanTest, partitionedTableDateKey) { auto rowType = ROW({"c0", "c1"}, {BIGINT(), DOUBLE()}); auto vectors = makeVectors(10, 1'000, rowType); - auto filePath = TempFilePath::create(); + auto filePath = ::bytedance::bolt::test::TempFilePath::create(); writeToFile(filePath->getPath(), vectors); createDuckDbTable(vectors); const std::string partitionValue = "2023-10-27"; @@ -2890,7 +2895,7 @@ TEST_F(TableScanTest, integerNotEqualFilter) { makeFlatVector( size, [](auto row) { return row % 210; }, nullEvery(11))}); - auto filePath = TempFilePath::create(); + auto filePath = ::bytedance::bolt::test::TempFilePath::create(); writeToFile(filePath->path, rowVector); createDuckDbTable({rowVector}); @@ -2925,7 +2930,7 @@ TEST_F(TableScanTest, integerNotEqualFilter) { TEST_F(TableScanTest, floatingPointNotEqualFilter) { auto vectors = makeVectors(1, 1'000); - auto filePath = TempFilePath::create(); + auto filePath = ::bytedance::bolt::test::TempFilePath::create(); writeToFile(filePath->path, vectors); createDuckDbTable(vectors); @@ -2963,7 +2968,7 @@ TEST_F(TableScanTest, stringNotEqualFilter) { return colourViews[row % colourViews.size()]; })}); - auto filePath = TempFilePath::create(); + auto filePath = ::bytedance::bolt::test::TempFilePath::create(); writeToFile(filePath->path, rowVector); createDuckDbTable({rowVector}); @@ -3169,7 +3174,7 @@ TEST_F(TableScanTest, skipStridesForParentNulls) { auto b = makeFlatVector(10'000, folly::identity); auto a = makeRowVector({"b"}, {b}, [](auto i) { return i % 2 == 0; }); auto vector = makeRowVector({"a"}, {a}); - auto file = TempFilePath::create(); + auto file = ::bytedance::bolt::test::TempFilePath::create(); writeToFile(file->path, {vector}); auto plan = PlanBuilder() .tableScan(asRowType(vector->type()), {"a.b IS NULL"}) @@ -3213,7 +3218,7 @@ TEST_F(TableScanTest, remainingFilterConstantResult) { }), }; - auto filePath = TempFilePath::create(); + auto filePath = ::bytedance::bolt::test::TempFilePath::create(); writeToFile(filePath->path, data); createDuckDbTable(data); @@ -3232,7 +3237,7 @@ TEST_F(TableScanTest, remainingFilterConstantResult) { TEST_F(TableScanTest, aggregationPushdown) { auto vectors = makeVectors(10, 1'000); - auto filePath = TempFilePath::create(); + auto filePath = ::bytedance::bolt::test::TempFilePath::create(); writeToFile(filePath->path, vectors); createDuckDbTable(vectors); @@ -3357,7 +3362,7 @@ TEST_F(TableScanTest, decimalDisableAggregationPushdown) { size, [](auto row) { return row; }, nullptr, DECIMAL(18, 2)), }); - auto filePath = TempFilePath::create(); + auto filePath = ::bytedance::bolt::test::TempFilePath::create(); writeToFile(filePath->getPath(), {rowVector}); createDuckDbTable({rowVector}); @@ -3378,7 +3383,7 @@ TEST_F(TableScanTest, decimalDisableAggregationPushdown) { TEST_F(TableScanTest, bitwiseAggregationPushdown) { auto vectors = makeVectors(10, 1'000); - auto filePath = TempFilePath::create(); + auto filePath = ::bytedance::bolt::test::TempFilePath::create(); writeToFile(filePath->path, vectors); createDuckDbTable(vectors); @@ -3424,7 +3429,7 @@ TEST_F(TableScanTest, structLazy) { [](auto row) { return row; }, [](auto row) { return row * 0.1; })})}); - auto filePath = TempFilePath::create(); + auto filePath = ::bytedance::bolt::test::TempFilePath::create(); writeToFile(filePath->path, {rowVector}); // Exclude struct columns as DuckDB doesn't support complex types yet. @@ -3452,7 +3457,7 @@ TEST_F(TableScanTest, interleaveLazyEager) { makeRowVector({makeFlatVector(kSize, folly::identity)})}); auto rows = makeRowVector({column}); auto rowType = asRowType(rows->type()); - auto lazyFile = TempFilePath::create(); + auto lazyFile = ::bytedance::bolt::test::TempFilePath::create(); writeToFile(lazyFile->path, {rows}); auto rowsWithNulls = makeVectors(1, kSize, rowType); int numNonNull = 0; @@ -3464,7 +3469,7 @@ TEST_F(TableScanTest, interleaveLazyEager) { auto& c0c0 = c0->asUnchecked()->childAt(0); numNonNull += !c0c0->isNullAt(i); } - auto eagerFile = TempFilePath::create(); + auto eagerFile = ::bytedance::bolt::test::TempFilePath::create(); writeToFile(eagerFile->path, rowsWithNulls); ColumnHandleMap assignments = {{"c0", regularColumn("c0", column->type())}}; @@ -3495,7 +3500,7 @@ TEST_F(TableScanTest, lazyVectorAccessTwiceWithDifferentRows) { makeNullableFlatVector({0, 1, 2, 3}), }); - auto filePath = TempFilePath::create(); + auto filePath = ::bytedance::bolt::test::TempFilePath::create(); writeToFile(filePath->path, {data}); createDuckDbTable({data}); @@ -3541,7 +3546,7 @@ TEST_F(TableScanTest, structInArrayOrMap) { sizes, innerRow)}); - auto filePath = TempFilePath::create(); + auto filePath = ::bytedance::bolt::test::TempFilePath::create(); writeToFile(filePath->path, {rowVector}); // Exclude struct columns as DuckDB doesn't support complex types yet. @@ -3561,7 +3566,7 @@ TEST_F(TableScanTest, addSplitsToFailedTask) { auto data = makeRowVector( {makeFlatVector(12'000, [](auto row) { return row % 5; })}); - auto filePath = TempFilePath::create(); + auto filePath = ::bytedance::bolt::test::TempFilePath::create(); writeToFile(filePath->path, {data}); core::PlanNodeId scanNodeId; @@ -3589,7 +3594,7 @@ TEST_F(TableScanTest, errorInLoadLazy) { auto cache = cache::AsyncDataCache::getInstance(); BOLT_CHECK_NOT_NULL(cache); auto vectors = makeVectors(10, 1'000); - auto filePath = TempFilePath::create(); + auto filePath = ::bytedance::bolt::test::TempFilePath::create(); writeToFile(filePath->path, vectors); std::atomic counter = 0; @@ -3629,7 +3634,7 @@ TEST_F(TableScanTest, parallelPrepare) { auto data = makeRowVector( {makeFlatVector(10, [](auto row) { return row % 5; })}); - auto filePath = TempFilePath::create(); + auto filePath = ::bytedance::bolt::test::TempFilePath::create(); writeToFile(filePath->path, {data}); auto plan = exec::test::PlanBuilder(pool_.get()) @@ -3666,7 +3671,7 @@ TEST_F(TableScanTest, dictionaryMemo) { makeFlatVector({baseStrings[0], baseStrings[1]})); auto rows = makeRowVector({"a", "b"}, {dict, makeRowVector({"c"}, {dict})}); auto rowType = asRowType(rows->type()); - auto file = TempFilePath::create(); + auto file = ::bytedance::bolt::test::TempFilePath::create(); writeToFile(file->path, {rows}); auto plan = PlanBuilder() .tableScan(rowType, {}, "a like '%m'") @@ -3697,7 +3702,7 @@ TEST_F(TableScanTest, reuseRowVector) { auto iota = makeFlatVector(10, folly::identity); auto data = makeRowVector({iota, makeRowVector({iota})}); auto rowType = asRowType(data->type()); - auto file = TempFilePath::create(); + auto file = ::bytedance::bolt::test::TempFilePath::create(); writeToFile(file->path, {data}); auto plan = PlanBuilder() .tableScan(rowType, {}, "c0 < 5") @@ -3717,7 +3722,7 @@ TEST_F(TableScanTest, readMissingFields) { vector_size_t size = 10; auto iota = makeFlatVector(size, folly::identity); auto rowVector = makeRowVector({makeRowVector({iota, iota}), iota}); - auto filePath = TempFilePath::create(); + auto filePath = ::bytedance::bolt::test::TempFilePath::create(); writeToFile(filePath->path, {rowVector}); // Create a row type with additional fields not present in the file. auto rowType = makeRowType( @@ -3734,7 +3739,7 @@ TEST_F(TableScanTest, readExtraFields) { vector_size_t size = 10; auto iota = makeFlatVector(size, folly::identity); auto rowVector = makeRowVector({makeRowVector({iota, iota}), iota}); - auto filePath = TempFilePath::create(); + auto filePath = ::bytedance::bolt::test::TempFilePath::create(); writeToFile(filePath->path, {rowVector}); auto rowType = makeRowType({makeRowType({BIGINT()}), BIGINT()}); auto op = PlanBuilder().tableScan(rowType).planNode(); @@ -3753,7 +3758,7 @@ TEST_F(TableScanTest, readMissingFieldsFilesVary) { makeFlatVector(size, [](auto row) { return row; }), })}); - auto missingFieldsFilePath = TempFilePath::create(); + auto missingFieldsFilePath = ::bytedance::bolt::test::TempFilePath::create(); writeToFile(missingFieldsFilePath->path, {rowVectorMissingFields}); auto rowVectorWithAllFields = makeRowVector({makeRowVector({ @@ -3763,7 +3768,7 @@ TEST_F(TableScanTest, readMissingFieldsFilesVary) { makeFlatVector(size, [](auto row) { return row + 1; }), })}); - auto allFieldsFilePath = TempFilePath::create(); + auto allFieldsFilePath = ::bytedance::bolt::test::TempFilePath::create(); writeToFile(allFieldsFilePath->path, {rowVectorWithAllFields}); auto op = PlanBuilder() @@ -3834,7 +3839,7 @@ TEST_F(TableScanTest, readMissingFieldsInArray) { } auto arrayVector = makeArrayVector(offsets, rowVector); - auto filePath = TempFilePath::create(); + auto filePath = ::bytedance::bolt::test::TempFilePath::create(); writeToFile(filePath->path, {makeRowVector({arrayVector})}); // Create a row type with additional fields not present in the file. auto rowType = makeRowType( @@ -3891,7 +3896,7 @@ TEST_F(TableScanTest, readMissingFieldsInMap) { auto mapVector = makeMapVector(offsets, keysVector, valuesVector); auto arrayVector = makeArrayVector(offsets, valuesVector); - auto filePath = TempFilePath::create(); + auto filePath = ::bytedance::bolt::test::TempFilePath::create(); writeToFile(filePath->path, {makeRowVector({mapVector, arrayVector})}); // Create a row type with additional fields in the structure not present in @@ -4036,7 +4041,7 @@ TEST_F(TableScanTest, tableScanProjections) { makeFlatVector(size, [](auto row) { return row + 3; }), }); - auto filePath = TempFilePath::create(); + auto filePath = ::bytedance::bolt::test::TempFilePath::create(); writeToFile(filePath->path, {rowVector}); auto testQueryRow = [&](const std::vector& projections) { @@ -4108,7 +4113,7 @@ TEST_F(TableScanTest, readMissingFieldsWithMoreColumns) { return fruitViews[row % fruitViews.size()]; })}); - auto filePath = TempFilePath::create(); + auto filePath = ::bytedance::bolt::test::TempFilePath::create(); writeToFile(filePath->path, {rowVector}); // Create a row type with additional fields in the structure not present in @@ -4243,7 +4248,7 @@ TEST_F(TableScanTest, readMissingFieldsWithMoreColumns) { TEST_F(TableScanTest, varbinaryPartitionKey) { auto vectors = makeVectors(1, 1'000); - auto filePath = TempFilePath::create(); + auto filePath = ::bytedance::bolt::test::TempFilePath::create(); writeToFile(filePath->path, vectors); createDuckDbTable(vectors); @@ -4284,7 +4289,7 @@ TEST_F(TableScanTest, timestampPartitionKey) { }), }); auto vectors = makeVectors(1, 1); - auto filePath = TempFilePath::create(); + auto filePath = ::bytedance::bolt::test::TempFilePath::create(); writeToFile(filePath->path, vectors); ColumnHandleMap assignments = {{"t", partitionKey("t", TIMESTAMP())}}; std::vector> splits; @@ -4308,7 +4313,7 @@ TEST_F(TableScanTest, paimonDeletionVector) { auto iota = makeFlatVector(15, folly::identity); auto data = makeRowVector({iota, makeRowVector({iota})}); auto rowType = asRowType(data->type()); - auto file = TempFilePath::create(); + auto file = ::bytedance::bolt::test::TempFilePath::create(); writeToFile(file->path, {data}); auto plan = PlanBuilder() .tableScan(rowType, {}, "c0 < 7") @@ -4438,7 +4443,7 @@ TEST_F(TableScanTest, structMatchByName) { const auto address = makeFlatVector({"567 Maple Drive"}); auto vector = makeRowVector({"id", "name", "address"}, {id, name, address}); - auto file = TempFilePath::create(); + auto file = ::bytedance::bolt::test::TempFilePath::create(); writeToFile(file->getPath(), {vector}); // Add one non-existing subfield 'middle' to the 'name' field and rename filed @@ -4499,7 +4504,7 @@ DEBUG_ONLY_TEST_F( TableScanTest, DISABLED_memoryArbitrationByTableScanAllocation) { auto vectors = makeVectors(10, 1'000); - auto filePath = TempFilePath::create(); + auto filePath = ::bytedance::bolt::test::TempFilePath::create(); writeToFile(filePath->getPath(), vectors); createDuckDbTable(vectors); @@ -4593,7 +4598,7 @@ TEST_F(TableScanTest, ignoreCorruptFileWhenPrepareDisable) { auto iota = makeFlatVector(10, folly::identity); auto data = makeRowVector({iota, makeRowVector({iota})}); auto rowType = asRowType(data->type()); - auto file = TempFilePath::create(); + auto file = ::bytedance::bolt::test::TempFilePath::create(); writeToFile(file->path, {data}); auto plan = PlanBuilder() .tableScan(rowType, {}, "c0 < 5") @@ -4625,7 +4630,7 @@ TEST_F(TableScanTest, ignoreCorruptFileWhenPrepareAttempt3) { auto iota = makeFlatVector(10, folly::identity); auto data = makeRowVector({iota, makeRowVector({iota})}); auto rowType = asRowType(data->type()); - auto file = TempFilePath::create(); + auto file = ::bytedance::bolt::test::TempFilePath::create(); writeToFile(file->path, {data}); auto plan = PlanBuilder() .tableScan(rowType, {}, "c0 < 5") @@ -4663,7 +4668,7 @@ TEST_F(TableScanTest, ignoreCorruptFileWhenPrepareCanIgnore) { auto iota = makeFlatVector(10, folly::identity); auto data = makeRowVector({iota, makeRowVector({iota})}); auto rowType = asRowType(data->type()); - auto file = TempFilePath::create(); + auto file = ::bytedance::bolt::test::TempFilePath::create(); writeToFile(file->path, {data}); auto plan = PlanBuilder() .tableScan(rowType, {}, "c0 < 5") @@ -4697,7 +4702,7 @@ TEST_F(TableScanTest, ignoreCorruptFileWhenNextDisable) { auto iota = makeFlatVector(10, folly::identity); auto data = makeRowVector({iota, makeRowVector({iota})}); auto rowType = asRowType(data->type()); - auto file = TempFilePath::create(); + auto file = ::bytedance::bolt::test::TempFilePath::create(); writeToFile(file->path, {data}); auto plan = PlanBuilder() .tableScan(rowType, {}, "c0 < 5") @@ -4731,7 +4736,7 @@ TEST_F(TableScanTest, ignoreCorruptFileWhenNextAttempt3) { auto iota = makeFlatVector(10, folly::identity); auto data = makeRowVector({iota, makeRowVector({iota})}); auto rowType = asRowType(data->type()); - auto file = TempFilePath::create(); + auto file = ::bytedance::bolt::test::TempFilePath::create(); writeToFile(file->path, {data}); auto plan = PlanBuilder() .tableScan(rowType, {}, "c0 < 5") @@ -4768,7 +4773,7 @@ TEST_F(TableScanTest, ignoreCorruptFileWhenNextCanIgnore) { auto iota = makeFlatVector(10, folly::identity); auto data = makeRowVector({iota, makeRowVector({iota})}); auto rowType = asRowType(data->type()); - auto file = TempFilePath::create(); + auto file = ::bytedance::bolt::test::TempFilePath::create(); writeToFile(file->path, {data}); auto plan = PlanBuilder() .tableScan(rowType, {}, "c0 < 5") @@ -4794,7 +4799,7 @@ TEST_F(TableScanTest, filterMissingFields) { constexpr int kSize = 10; auto iota = makeFlatVector(kSize, folly::identity); auto data = makeRowVector({makeRowVector({iota})}); - auto file = TempFilePath::create(); + auto file = ::bytedance::bolt::test::TempFilePath::create(); writeToFile(file->getPath(), {data}); auto schema = makeRowType({ makeRowType({BIGINT(), BIGINT()}), @@ -4851,10 +4856,10 @@ TEST_F(TableScanTest, dynamicFilterWithRowIndexColumn) { connector::hive::HiveColumnHandle::ColumnType::kRowIndex, BIGINT(), BIGINT()); - std::shared_ptr files[2]; - files[0] = TempFilePath::create(); + std::shared_ptr<::bytedance::bolt::test::TempFilePath> files[2]; + files[0] = ::bytedance::bolt::test::TempFilePath::create(); writeToFile(files[0]->getPath(), {aVector}); - files[1] = TempFilePath::create(); + files[1] = ::bytedance::bolt::test::TempFilePath::create(); writeToFile(files[1]->getPath(), {bVector}); auto planNodeIdGenerator = std::make_shared(); core::PlanNodeId aScanId; @@ -4886,7 +4891,7 @@ TEST_F(TableScanTest, rowNumberInRemainingFilter) { auto vector = makeRowVector({ makeFlatVector(kSize, folly::identity), }); - auto file = TempFilePath::create(); + auto file = ::bytedance::bolt::test::TempFilePath::create(); writeToFile(file->getPath(), {vector}); auto outputType = ROW({"c0"}, {BIGINT()}); auto remainingFilter = parseExpr("r1 % 2 == 0", ROW({"r1"}, {BIGINT()})); @@ -4923,7 +4928,7 @@ TEST_F(TableScanTest, duplicateFieldProject) { makeFlatVector({"Alice", "John"}), }); - auto file = TempFilePath::create(); + auto file = ::bytedance::bolt::test::TempFilePath::create(); writeToFile(file->getPath(), vector); createDuckDbTable({vector}); diff --git a/bolt/exec/tests/TableWriteTest.cpp b/bolt/exec/tests/TableWriteTest.cpp index de4a540d7..b41b6603d 100644 --- a/bolt/exec/tests/TableWriteTest.cpp +++ b/bolt/exec/tests/TableWriteTest.cpp @@ -31,6 +31,8 @@ #include "bolt/common/base/Fs.h" #include "bolt/common/base/tests/GTestUtils.h" #include "bolt/common/hyperloglog/SparseHll.h" +#include "bolt/common/testutil/TempDirectoryPath.h" +#include "bolt/common/testutil/TempFilePath.h" #include "bolt/common/testutil/TestValue.h" #include "bolt/connectors/hive/HiveConfig.h" #include "bolt/connectors/hive/HivePartitionFunction.h" @@ -40,7 +42,6 @@ #include "bolt/exec/tests/utils/AssertQueryBuilder.h" #include "bolt/exec/tests/utils/HiveConnectorTestBase.h" #include "bolt/exec/tests/utils/PlanBuilder.h" -#include "bolt/exec/tests/utils/TempDirectoryPath.h" #include "bolt/functions/prestosql/aggregates/RegisterAggregateFunctions.h" #include "bolt/vector/fuzzer/VectorFuzzer.h" #include "folly/dynamic.h" @@ -57,6 +58,7 @@ using namespace bytedance::bolt::core; using namespace bytedance::bolt::common; using namespace bytedance::bolt::exec; using namespace bytedance::bolt::exec::test; +using namespace bytedance::bolt::test; using namespace bytedance::bolt::connector; using namespace bytedance::bolt::connector::hive; using namespace bytedance::bolt::dwio::common; @@ -289,7 +291,8 @@ class TableWriteTest : public HiveConnectorTestBase { std::shared_ptr assertQueryWithWriterConfigs( const core::PlanNodePtr& plan, - std::vector> filePaths, + std::vector> + filePaths, const std::string& duckDbSql, bool spillEnabled = false) { std::vector splits; @@ -310,7 +313,8 @@ class TableWriteTest : public HiveConnectorTestBase { .splits(splits) .assertResults(duckDbSql); } - const auto spillDirectory = exec::test::TempDirectoryPath::create(); + const auto spillDirectory = + bytedance::bolt::test::TempDirectoryPath::create(); TestScopedSpillInjection scopedSpillInjection(100); return AssertQueryBuilder(plan, duckDbQueryRunner_) .spillDirectory(spillDirectory->path) @@ -348,7 +352,8 @@ class TableWriteTest : public HiveConnectorTestBase { .assertResults(duckDbSql); } - const auto spillDirectory = exec::test::TempDirectoryPath::create(); + const auto spillDirectory = + bytedance::bolt::test::TempDirectoryPath::create(); TestScopedSpillInjection scopedSpillInjection(100); return AssertQueryBuilder(plan, duckDbQueryRunner_) .spillDirectory(spillDirectory->path) @@ -381,7 +386,8 @@ class TableWriteTest : public HiveConnectorTestBase { .copyResults(pool()); } - const auto spillDirectory = exec::test::TempDirectoryPath::create(); + const auto spillDirectory = + bytedance::bolt::test::TempDirectoryPath::create(); return AssertQueryBuilder(plan, duckDbQueryRunner_) .spillDirectory(spillDirectory->path) .maxDrivers( @@ -1107,7 +1113,7 @@ TEST_F(BasicTableWriteTest, roundTrip) { size, [](auto row) { return row * 2; }, nullEvery(7)), }); - auto sourceFilePath = TempFilePath::create(); + auto sourceFilePath = ::bytedance::bolt::test::TempFilePath::create(); writeToFile(sourceFilePath->path, data); auto targetDirectoryPath = TempDirectoryPath::create(); @@ -3647,7 +3653,7 @@ DEBUG_ONLY_TEST_F(TableWriterArbitrationTest, reclaimFromTableWriter) { } }))); - auto spillDirectory = exec::test::TempDirectoryPath::create(); + auto spillDirectory = bytedance::bolt::test::TempDirectoryPath::create(); auto outputDirectory = TempDirectoryPath::create(); auto writerPlan = PlanBuilder() @@ -3734,7 +3740,7 @@ DEBUG_ONLY_TEST_F(TableWriterArbitrationTest, reclaimFromSortTableWriter) { } }))); - auto spillDirectory = exec::test::TempDirectoryPath::create(); + auto spillDirectory = bytedance::bolt::test::TempDirectoryPath::create(); auto outputDirectory = TempDirectoryPath::create(); auto writerPlan = PlanBuilder() @@ -3825,7 +3831,7 @@ DEBUG_ONLY_TEST_F(TableWriterArbitrationTest, writerFlushThreshold) { } }))); - auto spillDirectory = exec::test::TempDirectoryPath::create(); + auto spillDirectory = bytedance::bolt::test::TempDirectoryPath::create(); auto outputDirectory = TempDirectoryPath::create(); auto writerPlan = PlanBuilder() @@ -3911,7 +3917,8 @@ DEBUG_ONLY_TEST_F( {fmt::format("sum({})", TableWriteTraits::rowCountColumnName())}) .planNode(); - const auto spillDirectory = exec::test::TempDirectoryPath::create(); + const auto spillDirectory = + bytedance::bolt::test::TempDirectoryPath::create(); AssertQueryBuilder(duckDbQueryRunner_) .queryCtx(queryCtx) .maxDrivers(1) @@ -4003,7 +4010,8 @@ DEBUG_ONLY_TEST_F( {fmt::format("sum({})", TableWriteTraits::rowCountColumnName())}) .planNode(); - const auto spillDirectory = exec::test::TempDirectoryPath::create(); + const auto spillDirectory = + bytedance::bolt::test::TempDirectoryPath::create(); AssertQueryBuilder(duckDbQueryRunner_) .queryCtx(queryCtx) .maxDrivers(1) @@ -4092,7 +4100,8 @@ DEBUG_ONLY_TEST_F( .planNode(); const auto spillStats = common::globalSpillStats(); - const auto spillDirectory = exec::test::TempDirectoryPath::create(); + const auto spillDirectory = + bytedance::bolt::test::TempDirectoryPath::create(); AssertQueryBuilder(duckDbQueryRunner_) .queryCtx(queryCtx) .maxDrivers(1) @@ -4163,7 +4172,8 @@ DEBUG_ONLY_TEST_F(TableWriterArbitrationTest, tableFileWriteError) { BOLT_FAIL("inject writer error"); }))); - const auto spillDirectory = exec::test::TempDirectoryPath::create(); + const auto spillDirectory = + bytedance::bolt::test::TempDirectoryPath::create(); const auto outputDirectory = TempDirectoryPath::create(); auto writerPlan = PlanBuilder() .values(vectors) @@ -4252,7 +4262,8 @@ DEBUG_ONLY_TEST_F(TableWriterArbitrationTest, tableWriteSpillUseMoreMemory) { injectedWriterAllocation.free(); }))); - const auto spillDirectory = exec::test::TempDirectoryPath::create(); + const auto spillDirectory = + bytedance::bolt::test::TempDirectoryPath::create(); const auto outputDirectory = TempDirectoryPath::create(); auto writerPlan = PlanBuilder() .values(vectors) @@ -4343,7 +4354,8 @@ DEBUG_ONLY_TEST_F(TableWriterArbitrationTest, tableWriteReclaimOnClose) { std::function( [&](dwrf::Writer* writer) { fakeAllocation.free(); })); - const auto spillDirectory = exec::test::TempDirectoryPath::create(); + const auto spillDirectory = + bytedance::bolt::test::TempDirectoryPath::create(); const auto outputDirectory = TempDirectoryPath::create(); auto writerPlan = PlanBuilder() diff --git a/bolt/exec/tests/TaskTest.cpp b/bolt/exec/tests/TaskTest.cpp index 05d7f0742..7ba58cd41 100644 --- a/bolt/exec/tests/TaskTest.cpp +++ b/bolt/exec/tests/TaskTest.cpp @@ -30,22 +30,29 @@ #include "bolt/exec/Task.h" #include "bolt/common/base/tests/GTestUtils.h" +#include "bolt/common/file/FileSystems.h" #include "bolt/common/future/BoltPromise.h" +#include "bolt/common/testutil/TempDirectoryPath.h" +#include "bolt/common/testutil/TempFilePath.h" #include "bolt/common/testutil/TestValue.h" +#include "bolt/connectors/ConnectorNames.h" #include "bolt/connectors/hive/HiveConnector.h" #include "bolt/connectors/hive/HiveConnectorSplit.h" +#include "bolt/connectors/tests/utils/ConnectorTestBase.h" #include "bolt/exec/OutputBufferManager.h" #include "bolt/exec/PlanNodeStats.h" #include "bolt/exec/Values.h" #include "bolt/exec/tests/utils/AssertQueryBuilder.h" #include "bolt/exec/tests/utils/Cursor.h" -#include "bolt/exec/tests/utils/HiveConnectorTestBase.h" +#include "bolt/exec/tests/utils/OperatorTestBase.h" #include "bolt/exec/tests/utils/PlanBuilder.h" #include "bolt/exec/tests/utils/QueryAssertions.h" -#include "bolt/exec/tests/utils/TempDirectoryPath.h" #include "folly/experimental/EventCount.h" + using namespace bytedance::bolt; using namespace bytedance::bolt::common::testutil; +using namespace bytedance::bolt::test; + namespace bytedance::bolt::exec::test { namespace { // A test join node whose build is skewed in terms of process time. The driver @@ -468,7 +475,9 @@ class TestBadMemoryTranslator : public exec::Operator::PlanNodeTranslator { } }; } // namespace -class TaskTest : public HiveConnectorTestBase { +class TaskTest : public OperatorTestBase, + public ::testing::WithParamInterface< + connector::test::ConnectorTestParam> { protected: static void SetUpTestCase() { FLAGS_bolt_testing_enable_arbitration = true; @@ -480,7 +489,26 @@ class TaskTest : public HiveConnectorTestBase { OperatorTestBase::TearDownTestCase(); } - static std::pair, std::vector> + void SetUp() override { + OperatorTestBase::SetUp(); + filesystems::registerLocalFileSystem(); + auto emptyConfig = std::make_shared( + std::unordered_map()); + connector::test::registerTestConnector( + GetParam().connectorName, + GetParam().connectorId, + ioExecutor_.get(), + emptyConfig, + GetParam().factoryRegistrar); + } + + void TearDown() override { + connector::test::unregisterTestConnector( + GetParam().connectorName, GetParam().connectorId); + OperatorTestBase::TearDown(); + } + + std::pair, std::vector> executeSerial( core::PlanFragment plan, const std::unordered_map>& @@ -494,7 +522,10 @@ class TaskTest : public HiveConnectorTestBase { for (const auto& [nodeId, paths] : filePaths) { for (const auto& path : paths) { - task->addSplit(nodeId, exec::Split(makeHiveConnectorSplit(path))); + task->addSplit( + nodeId, + exec::Split(connector::test::makeConnectorSplit( + GetParam().connectorName, path))); } task->noMoreSplits(nodeId); } @@ -528,7 +559,7 @@ class TaskTest : public HiveConnectorTestBase { } }; -TEST_F(TaskTest, wrongPlanNodeForSplit) { +TEST_P(TaskTest, wrongPlanNodeForSplit) { auto connectorSplit = std::make_shared( "test", "file:/tmp/abc", @@ -609,7 +640,7 @@ TEST_F(TaskTest, wrongPlanNodeForSplit) { errorMessage) } -TEST_F(TaskTest, duplicatePlanNodeIds) { +TEST_P(TaskTest, duplicatePlanNodeIds) { auto plan = PlanBuilder() .tableScan(ROW({"a", "b"}, {INTEGER(), DOUBLE()})) .hashJoin( @@ -646,7 +677,7 @@ TEST_F(TaskTest, duplicatePlanNodeIds) { // task. Setting error requires to acquire the same task lock again. // 6. Since we use immediate executor to execute these futures, a deadlock // happens. -TEST_F(TaskTest, testTerminateDeadlock) { +TEST_P(TaskTest, testTerminateDeadlock) { const int64_t kSlowJoinBuildDelaySeconds = 2; const int64_t kTaskAbortDelaySeconds = 1; const int64_t kMaxErrorTimeSeconds = 3; @@ -706,7 +737,7 @@ TEST_F(TaskTest, testTerminateDeadlock) { cursor->task()->toString().find("zombie drivers:"), std::string::npos); } -TEST_F(TaskTest, singleThreadedExecution) { +TEST_P(TaskTest, singleThreadedExecution) { auto data = makeRowVector({ makeFlatVector(1'000, [](auto row) { return row; }), }); @@ -765,7 +796,7 @@ TEST_F(TaskTest, singleThreadedExecution) { ASSERT_EQ(numDeletedTasks + 1, Task::numDeletedTasks()); // Project + Aggregation over TableScan. - auto filePath = TempFilePath::create(); + auto filePath = ::bytedance::bolt::test::TempFilePath::create(); writeToFile(filePath->path, {data, data}); core::PlanNodeId scanId; @@ -786,14 +817,14 @@ TEST_F(TaskTest, singleThreadedExecution) { BOLT_ASSERT_THROW(executeSerial(plan), "division by zero"); } -TEST_F(TaskTest, singleThreadedHashJoin) { +TEST_P(TaskTest, singleThreadedHashJoin) { auto left = makeRowVector( {"t_c0", "t_c1"}, { makeFlatVector({1, 2, 3, 4}), makeFlatVector({10, 20, 30, 40}), }); - auto leftPath = TempFilePath::create(); + auto leftPath = ::bytedance::bolt::test::TempFilePath::create(); writeToFile(leftPath->path, {left}); auto right = makeRowVector( @@ -801,7 +832,7 @@ TEST_F(TaskTest, singleThreadedHashJoin) { { makeFlatVector({0, 1, 3, 5}), }); - auto rightPath = TempFilePath::create(); + auto rightPath = ::bytedance::bolt::test::TempFilePath::create(); writeToFile(rightPath->path, {right}); auto planNodeIdGenerator = std::make_shared(); @@ -835,13 +866,13 @@ TEST_F(TaskTest, singleThreadedHashJoin) { } } -TEST_F(TaskTest, singleThreadedCrossJoin) { +TEST_P(TaskTest, singleThreadedCrossJoin) { auto left = makeRowVector({"t_c0"}, {makeFlatVector({1, 2, 3})}); - auto leftPath = TempFilePath::create(); + auto leftPath = ::bytedance::bolt::test::TempFilePath::create(); writeToFile(leftPath->path, {left}); auto right = makeRowVector({"u_c0"}, {makeFlatVector({10, 20})}); - auto rightPath = TempFilePath::create(); + auto rightPath = ::bytedance::bolt::test::TempFilePath::create(); writeToFile(rightPath->path, {right}); auto planNodeIdGenerator = std::make_shared(); @@ -872,7 +903,7 @@ TEST_F(TaskTest, singleThreadedCrossJoin) { } } -TEST_F(TaskTest, singleThreadedExecutionExternalBlockable) { +TEST_P(TaskTest, singleThreadedExecutionExternalBlockable) { exec::Operator::registerOperator( std::make_unique()); auto data = makeRowVector({ @@ -943,7 +974,7 @@ TEST_F(TaskTest, singleThreadedExecutionExternalBlockable) { EXPECT_EQ(3, results.size()); } -TEST_F(TaskTest, supportSerialExecutionMode) { +TEST_P(TaskTest, supportSerialExecutionMode) { auto plan = PlanBuilder() .tableScan(ROW({"c0"}, {BIGINT()})) .project({"c0 % 10"}) @@ -959,7 +990,7 @@ TEST_F(TaskTest, supportSerialExecutionMode) { ""); } -TEST_F(TaskTest, updateBroadCastOutputBuffers) { +TEST_P(TaskTest, updateBroadCastOutputBuffers) { auto plan = PlanBuilder() .tableScan(ROW({"c0"}, {BIGINT()})) .project({"c0 % 10"}) @@ -1006,7 +1037,7 @@ TEST_F(TaskTest, updateBroadCastOutputBuffers) { } } -DEBUG_ONLY_TEST_F(TaskTest, outputDriverFinishEarly) { +DEBUG_ONLY_TEST_P(TaskTest, outputDriverFinishEarly) { const int32_t numBatches = 10; std::vector dataBatches; dataBatches.reserve(numBatches); @@ -1090,7 +1121,7 @@ DEBUG_ONLY_TEST_F(TaskTest, outputDriverFinishEarly) { } /// Test that we export operator stats for unfinished (running) operators. -DEBUG_ONLY_TEST_F(TaskTest, liveStats) { +DEBUG_ONLY_TEST_P(TaskTest, liveStats) { constexpr int32_t numBatches = 10; std::vector dataBatches; dataBatches.reserve(numBatches); @@ -1178,7 +1209,7 @@ DEBUG_ONLY_TEST_F(TaskTest, liveStats) { EXPECT_EQ(terminationTimeMs, task->taskStats().terminationTimeMs); } -TEST_F(TaskTest, outputBufferSize) { +TEST_P(TaskTest, outputBufferSize) { constexpr int32_t numBatches = 10; std::vector dataBatches; dataBatches.reserve(numBatches); @@ -1223,7 +1254,7 @@ TEST_F(TaskTest, outputBufferSize) { task->requestCancel(); } -DEBUG_ONLY_TEST_F(TaskTest, inconsistentExecutionMode) { +DEBUG_ONLY_TEST_P(TaskTest, inconsistentExecutionMode) { { // Scenario 1: Parallel execution starts first then kicks in Serial // execution. @@ -1285,7 +1316,7 @@ DEBUG_ONLY_TEST_F(TaskTest, inconsistentExecutionMode) { } } -DEBUG_ONLY_TEST_F(TaskTest, findPeerOperators) { +DEBUG_ONLY_TEST_P(TaskTest, findPeerOperators) { const std::vector probeVectors = {makeRowVector( {"t_c0", "t_c1"}, { @@ -1354,7 +1385,7 @@ DEBUG_ONLY_TEST_F(TaskTest, findPeerOperators) { } } -DEBUG_ONLY_TEST_F(TaskTest, raceBetweenTaskPauseAndTerminate) { +DEBUG_ONLY_TEST_P(TaskTest, raceBetweenTaskPauseAndTerminate) { const std::vector values = {makeRowVector( {"t_c0", "t_c1"}, { @@ -1427,7 +1458,7 @@ DEBUG_ONLY_TEST_F(TaskTest, raceBetweenTaskPauseAndTerminate) { taskThread.join(); } -TEST_F(TaskTest, driverCreationMemoryAllocationCheck) { +TEST_P(TaskTest, driverCreationMemoryAllocationCheck) { exec::Operator::registerOperator(std::make_unique()); auto data = makeRowVector({ makeFlatVector(1'000, [](auto row) { return row; }), @@ -1465,7 +1496,7 @@ TEST_F(TaskTest, driverCreationMemoryAllocationCheck) { } } -TEST_F(TaskTest, spillDirectoryLifecycleManagement) { +TEST_P(TaskTest, spillDirectoryLifecycleManagement) { // Marks the spill directory as not already created and ensures that the Task // handles creating it on first use and eventually deleting it on destruction. auto data = makeRowVector({ @@ -1489,7 +1520,7 @@ TEST_F(TaskTest, spillDirectoryLifecycleManagement) { auto cursor = TaskCursor::create(params); std::shared_ptr task = cursor->task(); - auto rootTempDir = exec::test::TempDirectoryPath::create(); + auto rootTempDir = bytedance::bolt::test::TempDirectoryPath::create(); auto tmpDirectoryPath = rootTempDir->path + "/spillDirectoryLifecycleManagement"; task->setSpillDirectory(tmpDirectoryPath, false); @@ -1505,7 +1536,7 @@ TEST_F(TaskTest, spillDirectoryLifecycleManagement) { OperatorTestBase::deleteTaskAndCheckSpillDirectory(task); } -TEST_F(TaskTest, spillDirNotCreated) { +TEST_P(TaskTest, spillDirNotCreated) { // Verify that no spill directory is created if spilling is not engaged. const std::vector probeVectors = {makeRowVector( {"t_c0", "t_c1"}, @@ -1546,7 +1577,7 @@ TEST_F(TaskTest, spillDirNotCreated) { auto cursor = TaskCursor::create(params); auto* task = cursor->task().get(); - auto rootTempDir = exec::test::TempDirectoryPath::create(); + auto rootTempDir = bytedance::bolt::test::TempDirectoryPath::create(); auto tmpDirectoryPath = rootTempDir->path + "/spillDirNotCreated"; task->setSpillDirectory(tmpDirectoryPath, false); @@ -1563,7 +1594,7 @@ TEST_F(TaskTest, spillDirNotCreated) { EXPECT_FALSE(fs->exists(tmpDirectoryPath)); } -DEBUG_ONLY_TEST_F(TaskTest, resumeAfterTaskFinish) { +DEBUG_ONLY_TEST_P(TaskTest, resumeAfterTaskFinish) { auto probeVector = makeRowVector( {"t_c0"}, {makeFlatVector(10, [](auto row) { return row; })}); auto buildVector = makeRowVector( @@ -1614,7 +1645,7 @@ DEBUG_ONLY_TEST_F(TaskTest, resumeAfterTaskFinish) { waitForAllTasksToBeDeleted(); } -DEBUG_ONLY_TEST_F( +DEBUG_ONLY_TEST_P( TaskTest, singleThreadedLongRunningOperatorInTaskReclaimerAbort) { auto data = makeRowVector({ @@ -1689,7 +1720,7 @@ DEBUG_ONLY_TEST_F( }); } -DEBUG_ONLY_TEST_F(TaskTest, longRunningOperatorInTaskReclaimerAbort) { +DEBUG_ONLY_TEST_P(TaskTest, longRunningOperatorInTaskReclaimerAbort) { auto data = makeRowVector({ makeFlatVector(1'000, [](auto row) { return row; }), }); @@ -1747,7 +1778,7 @@ DEBUG_ONLY_TEST_F(TaskTest, longRunningOperatorInTaskReclaimerAbort) { }); } -DEBUG_ONLY_TEST_F(TaskTest, taskReclaimStats) { +DEBUG_ONLY_TEST_P(TaskTest, taskReclaimStats) { const auto data = makeRowVector({ makeFlatVector(50, folly::identity), makeFlatVector(50, folly::identity), @@ -1799,7 +1830,7 @@ DEBUG_ONLY_TEST_F(TaskTest, taskReclaimStats) { waitForAllTasksToBeDeleted(); } -DEBUG_ONLY_TEST_F(TaskTest, driverEnqueAfterFailedAndPausedTask) { +DEBUG_ONLY_TEST_P(TaskTest, driverEnqueAfterFailedAndPausedTask) { const auto data = makeRowVector({ makeFlatVector(50, [](auto row) { return row; }), makeFlatVector(50, [](auto row) { return row; }), @@ -1847,4 +1878,11 @@ DEBUG_ONLY_TEST_F(TaskTest, driverEnqueAfterFailedAndPausedTask) { task.reset(); waitForAllTasksToBeDeleted(); } + +INSTANTIATE_TEST_SUITE_P( + Connectors, + TaskTest, + ::testing::ValuesIn(connector::test::paramsFor( + {std::string(connector::kHiveConnectorName)}))); + } // namespace bytedance::bolt::exec::test diff --git a/bolt/exec/tests/TopNRowNumberTest.cpp b/bolt/exec/tests/TopNRowNumberTest.cpp index 54e7fb5ea..c5aac6704 100644 --- a/bolt/exec/tests/TopNRowNumberTest.cpp +++ b/bolt/exec/tests/TopNRowNumberTest.cpp @@ -30,12 +30,13 @@ #include "bolt/common/base/tests/GTestUtils.h" #include "bolt/common/file/FileSystems.h" +#include "bolt/common/testutil/TempDirectoryPath.h" #include "bolt/exec/PlanNodeStats.h" #include "bolt/exec/tests/utils/AssertQueryBuilder.h" #include "bolt/exec/tests/utils/OperatorTestBase.h" #include "bolt/exec/tests/utils/PlanBuilder.h" -#include "bolt/exec/tests/utils/TempDirectoryPath.h" using namespace bytedance::bolt::exec::test; +using namespace bytedance::bolt::test; namespace bytedance::bolt::exec { namespace { @@ -125,7 +126,7 @@ TEST_F(TopNRowNumberTest, largeOutput) { createDuckDbTable(data); - auto spillDirectory = exec::test::TempDirectoryPath::create(); + auto spillDirectory = bytedance::bolt::test::TempDirectoryPath::create(); auto testLimit = [&](auto limit) { SCOPED_TRACE(fmt::format("Limit: {}", limit)); @@ -234,7 +235,7 @@ TEST_F(TopNRowNumberTest, manyPartitions) { createDuckDbTable(data); - auto spillDirectory = exec::test::TempDirectoryPath::create(); + auto spillDirectory = bytedance::bolt::test::TempDirectoryPath::create(); auto testLimit = [&](auto limit, size_t outputBatchBytes = 1024) { SCOPED_TRACE(fmt::format("Limit: {}", limit)); @@ -390,7 +391,7 @@ TEST_F(TopNRowNumberTest, maxSpillBytes) { } } testSettings[] = {{1 << 30, false}, {13 << 20, true}, {0, false}}; - auto spillDirectory = exec::test::TempDirectoryPath::create(); + auto spillDirectory = bytedance::bolt::test::TempDirectoryPath::create(); auto queryCtx = core::QueryCtx::create(executor_.get()); for (const auto& testData : testSettings) { diff --git a/bolt/exec/tests/TraceUtilTest.cpp b/bolt/exec/tests/TraceUtilTest.cpp index c1969d9b8..55094699c 100644 --- a/bolt/exec/tests/TraceUtilTest.cpp +++ b/bolt/exec/tests/TraceUtilTest.cpp @@ -34,10 +34,10 @@ #include "bolt/common/base/tests/GTestUtils.h" #include "bolt/common/file/FileSystems.h" +#include "bolt/common/testutil/TempDirectoryPath.h" #include "bolt/exec/Trace.h" #include "bolt/exec/TraceUtil.h" -#include "bolt/exec/tests/utils/TempDirectoryPath.h" -using namespace bytedance::bolt::exec::test; +using namespace bytedance::bolt::test; namespace bytedance::bolt::exec::trace::test { class TraceUtilTest : public testing::Test { protected: diff --git a/bolt/exec/tests/WindowTest.cpp b/bolt/exec/tests/WindowTest.cpp index b81e7ee70..ee6a537b0 100644 --- a/bolt/exec/tests/WindowTest.cpp +++ b/bolt/exec/tests/WindowTest.cpp @@ -30,13 +30,14 @@ #include "bolt/common/base/tests/GTestUtils.h" #include "bolt/common/file/FileSystems.h" +#include "bolt/common/testutil/TempDirectoryPath.h" #include "bolt/exec/PlanNodeStats.h" #include "bolt/exec/tests/utils/AssertQueryBuilder.h" #include "bolt/exec/tests/utils/OperatorTestBase.h" #include "bolt/exec/tests/utils/PlanBuilder.h" -#include "bolt/exec/tests/utils/TempDirectoryPath.h" #include "bolt/functions/prestosql/window/WindowFunctionsRegistration.h" using namespace bytedance::bolt::exec::test; +using namespace bytedance::bolt::test; namespace bytedance::bolt::exec { namespace { diff --git a/bolt/exec/tests/utils/ArbitratorTestUtil.cpp b/bolt/exec/tests/utils/ArbitratorTestUtil.cpp index 378ce68dd..bf40178b5 100644 --- a/bolt/exec/tests/utils/ArbitratorTestUtil.cpp +++ b/bolt/exec/tests/utils/ArbitratorTestUtil.cpp @@ -35,6 +35,7 @@ using namespace bytedance::bolt; using namespace bytedance::bolt::exec; using namespace bytedance::bolt::exec::test; +using namespace bytedance::bolt::test; using namespace bytedance::bolt::memory; namespace bytedance::bolt::exec::test { @@ -116,7 +117,8 @@ QueryTestResult runHashJoinTask( QueryTestResult result; const auto plan = hashJoinPlan(vectors, result.planNodeId); if (enableSpilling) { - const auto spillDirectory = exec::test::TempDirectoryPath::create(); + const auto spillDirectory = + bytedance::bolt::test::TempDirectoryPath::create(); result.data = AssertQueryBuilder(plan) .serialExecution(serialExecution) .spillDirectory(spillDirectory->getPath()) @@ -161,7 +163,8 @@ QueryTestResult runAggregateTask( QueryTestResult result; const auto plan = aggregationPlan(vectors, result.planNodeId); if (enableSpilling) { - const auto spillDirectory = exec::test::TempDirectoryPath::create(); + const auto spillDirectory = + bytedance::bolt::test::TempDirectoryPath::create(); result.data = AssertQueryBuilder(plan) .serialExecution(serialExecution) @@ -207,7 +210,8 @@ QueryTestResult runOrderByTask( QueryTestResult result; const auto plan = orderByPlan(vectors, result.planNodeId); if (enableSpilling) { - const auto spillDirectory = exec::test::TempDirectoryPath::create(); + const auto spillDirectory = + bytedance::bolt::test::TempDirectoryPath::create(); result.data = AssertQueryBuilder(plan) .serialExecution(serialExecution) .spillDirectory(spillDirectory->getPath()) @@ -252,7 +256,8 @@ QueryTestResult runRowNumberTask( QueryTestResult result; const auto plan = rowNumberPlan(vectors, result.planNodeId); if (enableSpilling) { - const auto spillDirectory = exec::test::TempDirectoryPath::create(); + const auto spillDirectory = + bytedance::bolt::test::TempDirectoryPath::create(); result.data = AssertQueryBuilder(plan) .serialExecution(serialExecution) .spillDirectory(spillDirectory->getPath()) @@ -297,7 +302,8 @@ QueryTestResult runTopNTask( QueryTestResult result; const auto plan = topNPlan(vectors, result.planNodeId); if (enableSpilling) { - const auto spillDirectory = exec::test::TempDirectoryPath::create(); + const auto spillDirectory = + bytedance::bolt::test::TempDirectoryPath::create(); result.data = AssertQueryBuilder(plan) .serialExecution(serialExecution) @@ -347,7 +353,8 @@ QueryTestResult runWriteTask( const auto outputDirectory = TempDirectoryPath::create(); auto plan = writePlan(vectors, outputDirectory->getPath(), result.planNodeId); if (enableSpilling) { - const auto spillDirectory = exec::test::TempDirectoryPath::create(); + const auto spillDirectory = + bytedance::bolt::test::TempDirectoryPath::create(); result.data = AssertQueryBuilder(plan) .serialExecution(serialExecution) diff --git a/bolt/exec/tests/utils/ArbitratorTestUtil.h b/bolt/exec/tests/utils/ArbitratorTestUtil.h index f1b88534f..607ef9cc8 100644 --- a/bolt/exec/tests/utils/ArbitratorTestUtil.h +++ b/bolt/exec/tests/utils/ArbitratorTestUtil.h @@ -33,12 +33,12 @@ #include #include "bolt/common/base/tests/GTestUtils.h" #include "bolt/common/memory/MemoryPool.h" +#include "bolt/common/testutil/TempDirectoryPath.h" #include "bolt/exec/Driver.h" #include "bolt/exec/MemoryReclaimer.h" #include "bolt/exec/Task.h" #include "bolt/exec/tests/utils/AssertQueryBuilder.h" #include "bolt/exec/tests/utils/PlanBuilder.h" -#include "bolt/exec/tests/utils/TempDirectoryPath.h" namespace bytedance::bolt::exec::test { constexpr int64_t KB = 1024L; diff --git a/bolt/exec/tests/utils/CMakeLists.txt b/bolt/exec/tests/utils/CMakeLists.txt index 2e6b6d23f..a2c689314 100644 --- a/bolt/exec/tests/utils/CMakeLists.txt +++ b/bolt/exec/tests/utils/CMakeLists.txt @@ -25,10 +25,6 @@ # This modified file is released under the same license. # -------------------------------------------------------------------------- -bolt_add_library(bolt_temp_path TempDirectoryPath.cpp TempFilePath.cpp) - -target_link_libraries(bolt_temp_path bolt_exception) - bolt_add_library( bolt_exec_test_lib ArbitratorTestUtil.cpp @@ -61,6 +57,7 @@ target_link_libraries( bolt_dwio_common_test_utils bolt_file_test_utils bolt_type_fbhive + bolt_connector_test_lib bolt_hive_connector bolt_tpch_connector bolt_presto_serializer diff --git a/bolt/exec/tests/utils/HiveConnectorTestBase.cpp b/bolt/exec/tests/utils/HiveConnectorTestBase.cpp index 8c1b3ba18..a007984f3 100644 --- a/bolt/exec/tests/utils/HiveConnectorTestBase.cpp +++ b/bolt/exec/tests/utils/HiveConnectorTestBase.cpp @@ -31,10 +31,10 @@ #include "bolt/exec/tests/utils/HiveConnectorTestBase.h" #include "bolt/common/file/FileSystems.h" +#include "bolt/common/testutil/TempFilePath.h" +#include "bolt/connectors/hive/HiveConnector.h" #include "bolt/connectors/hive/HiveDataSink.h" -#include "bolt/dwio/common/tests/utils/BatchMaker.h" -#include "bolt/dwio/dwrf/reader/DwrfReader.h" -#include "bolt/dwio/dwrf/writer/Writer.h" +#include "bolt/connectors/tests/utils/ConnectorTestBase.h" namespace bytedance::bolt::exec::test { using connector::hive::HiveConnectorSplitBuilder; @@ -45,23 +45,22 @@ HiveConnectorTestBase::HiveConnectorTestBase() { void HiveConnectorTestBase::SetUp() { OperatorTestBase::SetUp(); - bytedance::bolt::connector::hive::CheckHiveConnectorFactoryInit< - bytedance::bolt::connector::hive::HiveConnectorFactory>(); - auto hiveConnector = - connector::getConnectorFactory(connector::kHiveConnectorName) - ->newConnector( - kHiveConnectorId, - std::make_shared( - std::unordered_map()), - ioExecutor_.get()); - connector::registerConnector(hiveConnector); + auto emptyConfig = std::make_shared( + std::unordered_map()); + connector::test::registerTestConnector( + connector::kHiveConnectorName, + kHiveConnectorId, + ioExecutor_.get(), + emptyConfig, + &connector::hive::registerHiveConnectorFactories); } void HiveConnectorTestBase::TearDown() { // Make sure all pending loads are finished or cancelled before unregister // connector. ioExecutor_.reset(); - connector::unregisterConnector(kHiveConnectorId); + connector::test::unregisterTestConnector( + connector::kHiveConnectorName, kHiveConnectorId); OperatorTestBase::TearDown(); } @@ -74,63 +73,15 @@ void HiveConnectorTestBase::resetHiveConnector( connector::registerConnector(hiveConnector); } -void HiveConnectorTestBase::writeToFile( - const std::string& filePath, - RowVectorPtr vector) { - writeToFile(filePath, std::vector{vector}); -} - -void HiveConnectorTestBase::writeToFile( - const std::string& filePath, - const std::vector& vectors, - std::shared_ptr config) { - bolt::dwrf::WriterOptions options; - options.config = config; - options.schema = vectors[0]->type(); - auto localWriteFile = std::make_unique(filePath, true, false); - auto sink = std::make_unique( - std::move(localWriteFile), filePath); - auto childPool = rootPool_->addAggregateChild("HiveConnectorTestBase.Writer"); - options.memoryPool = childPool.get(); - bytedance::bolt::dwrf::Writer writer{std::move(sink), options}; - for (size_t i = 0; i < vectors.size(); ++i) { - writer.write(vectors[i]); - } - writer.close(); -} - -std::vector HiveConnectorTestBase::makeVectors( - const RowTypePtr& rowType, - int32_t numVectors, - int32_t rowsPerVector) { - std::vector vectors; - for (int32_t i = 0; i < numVectors; ++i) { - auto vector = std::dynamic_pointer_cast( - bolt::test::BatchMaker::createBatch(rowType, rowsPerVector, *pool_)); - vectors.push_back(vector); - } - return vectors; -} - std::shared_ptr HiveConnectorTestBase::assertQuery( const core::PlanNodePtr& plan, - const std::vector>& filePaths, + const std::vector>& + filePaths, const std::string& duckDbSql) { return OperatorTestBase::assertQuery( plan, makeHiveConnectorSplits(filePaths), duckDbSql); } -std::vector> HiveConnectorTestBase::makeFilePaths( - int count) { - std::vector> filePaths; - - filePaths.reserve(count); - for (auto i = 0; i < count; ++i) { - filePaths.emplace_back(TempFilePath::create()); - } - return filePaths; -} - std::vector> HiveConnectorTestBase::makeHiveConnectorSplits( const std::string& filePath, @@ -200,7 +151,8 @@ HiveConnectorTestBase::makeColumnHandle( std::vector> HiveConnectorTestBase::makeHiveConnectorSplits( - const std::vector>& filePaths) { + const std::vector>& + filePaths) { std::vector> splits; for (auto filePath : filePaths) { splits.push_back(makeHiveConnectorSplit( diff --git a/bolt/exec/tests/utils/HiveConnectorTestBase.h b/bolt/exec/tests/utils/HiveConnectorTestBase.h index 07f7a7f2b..ffb128b4f 100644 --- a/bolt/exec/tests/utils/HiveConnectorTestBase.h +++ b/bolt/exec/tests/utils/HiveConnectorTestBase.h @@ -31,6 +31,7 @@ #pragma once #include +#include "bolt/common/testutil/TempFilePath.h" #include "bolt/connectors/hive/HiveConnector.h" #include "bolt/connectors/hive/HiveConnectorSplit.h" #include "bolt/connectors/hive/HiveDataSink.h" @@ -38,7 +39,6 @@ #include "bolt/dwio/dwrf/common/Config.h" #include "bolt/exec/Operator.h" #include "bolt/exec/tests/utils/OperatorTestBase.h" -#include "bolt/exec/tests/utils/TempFilePath.h" #include "bolt/type/tests/SubfieldFiltersBuilder.h" namespace bytedance::bolt::exec::test { @@ -57,32 +57,19 @@ class HiveConnectorTestBase : public OperatorTestBase { void resetHiveConnector( const std::shared_ptr& config); - void writeToFile(const std::string& filePath, RowVectorPtr vector); - - void writeToFile( - const std::string& filePath, - const std::vector& vectors, - std::shared_ptr config = - std::make_shared()); - - std::vector makeVectors( - const RowTypePtr& rowType, - int32_t numVectors, - int32_t rowsPerVector); - using OperatorTestBase::assertQuery; /// Assumes plan has a single TableScan node. std::shared_ptr assertQuery( const core::PlanNodePtr& plan, - const std::vector>& filePaths, + const std::vector>& + filePaths, const std::string& duckDbSql); - static std::vector> makeFilePaths(int count); - static std::vector> makeHiveConnectorSplits( - const std::vector>& filePaths); + const std::vector>& + filePaths); static std::shared_ptr makeHiveConnectorSplit( const std::string& filePath, diff --git a/bolt/exec/tests/utils/OperatorTestBase.cpp b/bolt/exec/tests/utils/OperatorTestBase.cpp index 45990021b..8f19d158b 100644 --- a/bolt/exec/tests/utils/OperatorTestBase.cpp +++ b/bolt/exec/tests/utils/OperatorTestBase.cpp @@ -34,10 +34,12 @@ #include "bolt/common/file/FileSystems.h" #include "bolt/common/memory/MallocAllocator.h" #include "bolt/common/memory/SharedArbitrator.h" +#include "bolt/common/testutil/TempFilePath.h" #include "bolt/common/testutil/TestValue.h" #include "bolt/dwio/common/FileSink.h" -#include "bolt/exec/Exchange.h" -#include "bolt/exec/OutputBufferManager.h" +#include "bolt/dwio/common/tests/utils/BatchMaker.h" +#include "bolt/dwio/dwrf/common/Config.h" +#include "bolt/dwio/dwrf/writer/Writer.h" #include "bolt/exec/tests/utils/AssertQueryBuilder.h" #include "bolt/exec/tests/utils/LocalExchangeSource.h" #include "bolt/functions/prestosql/aggregates/RegisterAggregateFunctions.h" @@ -176,6 +178,73 @@ void OperatorTestBase::TearDown() { resetMemory(); } +void OperatorTestBase::writeToFile( + const std::string& filePath, + RowVectorPtr vector) { + writeToFile(filePath, std::vector{std::move(vector)}); +} + +void OperatorTestBase::writeToFile( + const std::string& filePath, + const std::vector& vectors, + std::shared_ptr config) { + bolt::dwrf::WriterOptions options; + if (config == nullptr) { + config = std::make_shared(); + } + options.config = config; + options.schema = vectors[0]->type(); + auto localWriteFile = std::make_unique(filePath, true, false); + auto sink = std::make_unique( + std::move(localWriteFile), filePath); + auto childPool = rootPool_->addAggregateChild("OperatorTestBase.Writer"); + options.memoryPool = childPool.get(); + bytedance::bolt::dwrf::Writer writer{std::move(sink), options}; + for (const auto& vector : vectors) { + writer.write(vector); + } + writer.close(); +} + +void OperatorTestBase::writeToFile( + const std::string& path, + const VectorPtr& vector, + memory::MemoryPool* pool) { + bolt::dwrf::WriterOptions options; + options.schema = vector->type(); + options.memoryPool = pool; + auto writeFile = std::make_unique(path, true, false); + auto sink = + std::make_unique(std::move(writeFile), path); + bytedance::bolt::dwrf::Writer writer{std::move(sink), options}; + writer.write(vector); + writer.close(); +} + +std::vector OperatorTestBase::makeVectors( + const RowTypePtr& rowType, + int32_t numVectors, + int32_t rowsPerVector) { + std::vector vectors; + for (int32_t i = 0; i < numVectors; ++i) { + auto vector = std::dynamic_pointer_cast( + bolt::test::BatchMaker::createBatch(rowType, rowsPerVector, *pool_)); + vectors.push_back(vector); + } + return vectors; +} + +/*static*/ +std::vector> +OperatorTestBase::makeFilePaths(int count) { + std::vector> filePaths; + filePaths.reserve(count); + for (auto i = 0; i < count; ++i) { + filePaths.emplace_back(::bytedance::bolt::test::TempFilePath::create()); + } + return filePaths; +} + std::shared_ptr OperatorTestBase::assertQuery( const core::PlanNodePtr& plan, const std::vector>& diff --git a/bolt/exec/tests/utils/OperatorTestBase.h b/bolt/exec/tests/utils/OperatorTestBase.h index b876698b7..943ac36d0 100644 --- a/bolt/exec/tests/utils/OperatorTestBase.h +++ b/bolt/exec/tests/utils/OperatorTestBase.h @@ -33,18 +33,27 @@ #include #include -#include "bolt/common/caching/SsdCache.h" +#include "bolt/common/testutil/TempFilePath.h" #include "bolt/core/Expressions.h" #include "bolt/core/PlanNode.h" #include "bolt/exec/tests/utils/QueryAssertions.h" #include "bolt/parse/ExpressionsParser.h" -#include "bolt/type/Variant.h" #include "bolt/vector/FlatVector.h" #include "bolt/vector/tests/utils/VectorMaker.h" #include "bolt/vector/tests/utils/VectorTestBase.h" DECLARE_bool(bolt_testing_enable_arbitration); + +namespace bytedance::bolt::dwrf { +class Config; +} + +namespace bytedance::bolt::test { +class TempFilePath; +} + namespace bytedance::bolt::exec::test { + class OperatorTestBase : public testing::Test, public bolt::test::VectorTestBase { public: @@ -82,6 +91,32 @@ class OperatorTestBase : public testing::Test, /// By default, registers Presto-compatible serde. virtual void registerVectorSerde(); + /// Writes 'vector' to 'filePath' as a single-batch DWRF file. + void writeToFile(const std::string& filePath, RowVectorPtr vector); + + /// Writes 'vectors' to 'filePath' as a DWRF file. If 'config' is null, uses + /// a default-constructed dwrf::Config. + void writeToFile( + const std::string& filePath, + const std::vector& vectors, + std::shared_ptr config = nullptr); + + void writeToFile( + const std::string& path, + const VectorPtr& vector, + memory::MemoryPool* pool); + + /// Generates 'numVectors' RowVectors of 'rowType', each containing + /// 'rowsPerVector' rows of random data drawn from BatchMaker. + std::vector makeVectors( + const RowTypePtr& rowType, + int32_t numVectors, + int32_t rowsPerVector); + + /// Returns 'count' unique ::bytedance::bolt::test::TempFilePath handles. + static std::vector> + makeFilePaths(int count); + void createDuckDbTable(const std::vector& data) { duckDbQueryRunner_.createTable("tmp", data); } diff --git a/bolt/exec/tests/utils/PlanBuilder.cpp b/bolt/exec/tests/utils/PlanBuilder.cpp index fbabe7970..597814cf4 100644 --- a/bolt/exec/tests/utils/PlanBuilder.cpp +++ b/bolt/exec/tests/utils/PlanBuilder.cpp @@ -29,6 +29,7 @@ */ #include "bolt/exec/tests/utils/PlanBuilder.h" +#include "bolt/common/testutil/TempDirectoryPath.h" #include "bolt/connectors/arrow/ArrowMemoryConnector.h" #include "bolt/connectors/hive/HiveConnector.h" #include "bolt/connectors/hive/TableHandle.h" @@ -39,7 +40,6 @@ #include "bolt/exec/RoundRobinPartitionFunction.h" #include "bolt/exec/TableWriter.h" #include "bolt/exec/WindowFunction.h" -#include "bolt/exec/tests/utils/TempDirectoryPath.h" #include "bolt/expression/Expr.h" #include "bolt/expression/ExprToSubfieldFilter.h" #include "bolt/expression/FunctionCallToSpecialForm.h" diff --git a/bolt/exec/tests/utils/RowContainerTestBase.h b/bolt/exec/tests/utils/RowContainerTestBase.h index 99b192b82..c1516cf0f 100644 --- a/bolt/exec/tests/utils/RowContainerTestBase.h +++ b/bolt/exec/tests/utils/RowContainerTestBase.h @@ -33,10 +33,10 @@ #include #include #include "bolt/common/file/FileSystems.h" +#include "bolt/common/testutil/TempDirectoryPath.h" #include "bolt/dwio/common/tests/utils/BatchMaker.h" #include "bolt/exec/RowContainer.h" #include "bolt/exec/VectorHasher.h" -#include "bolt/exec/tests/utils/TempDirectoryPath.h" #include "bolt/serializers/PrestoSerializer.h" #include "bolt/vector/tests/utils/VectorTestBase.h" namespace bytedance::bolt::exec::test { diff --git a/bolt/expression/tests/ExprTest.cpp b/bolt/expression/tests/ExprTest.cpp index 7350eeff1..e56f25566 100644 --- a/bolt/expression/tests/ExprTest.cpp +++ b/bolt/expression/tests/ExprTest.cpp @@ -39,7 +39,7 @@ #include "bolt/expression/Expr.h" #include "bolt/common/base/tests/GTestUtils.h" -#include "bolt/exec/tests/utils/TempDirectoryPath.h" +#include "bolt/common/testutil/TempDirectoryPath.h" #include "bolt/expression/CoalesceExpr.h" #include "bolt/expression/ConjunctExpr.h" #include "bolt/expression/ConstantExpr.h" @@ -2507,7 +2507,7 @@ TEST_P(ParameterizedExprTest, exceptionContext) { } // Enable saving vector and expression SQL for system errors only. - auto tempDirectory = exec::test::TempDirectoryPath::create(); + auto tempDirectory = bytedance::bolt::test::TempDirectoryPath::create(); FLAGS_bolt_save_input_on_expression_system_failure_path = tempDirectory->path; try { diff --git a/bolt/expression/tests/ExpressionRunnerUnitTest.cpp b/bolt/expression/tests/ExpressionRunnerUnitTest.cpp index a2a0070b2..392317808 100644 --- a/bolt/expression/tests/ExpressionRunnerUnitTest.cpp +++ b/bolt/expression/tests/ExpressionRunnerUnitTest.cpp @@ -29,8 +29,8 @@ */ #include +#include "bolt/common/testutil/TempFilePath.h" #include "bolt/dwio/common/tests/utils/BatchMaker.h" -#include "bolt/exec/tests/utils/TempFilePath.h" #include "bolt/expression/Expr.h" #include "bolt/expression/SignatureBinder.h" #include "bolt/expression/fuzzer/ExpressionFuzzer.h" @@ -59,9 +59,9 @@ class ExpressionRunnerUnitTest : public testing::Test, public VectorTestBase { }; TEST_F(ExpressionRunnerUnitTest, run) { - auto inputFile = exec::test::TempFilePath::create(); - auto sqlFile = exec::test::TempFilePath::create(); - auto resultFile = exec::test::TempFilePath::create(); + auto inputFile = ::bytedance::bolt::test::TempFilePath::create(); + auto sqlFile = ::bytedance::bolt::test::TempFilePath::create(); + auto resultFile = ::bytedance::bolt::test::TempFilePath::create(); const char* inputPath = inputFile->path.data(); const char* resultPath = resultFile->path.data(); const int vectorSize = 100; @@ -107,8 +107,8 @@ TEST_F(ExpressionRunnerUnitTest, persistAndReproComplexSql) { auto rowVector = makeRowVector(complexConstants); // Emulate a reproduce from complex constant SQL - auto sqlFile = exec::test::TempFilePath::create(); - auto complexConstantsFile = exec::test::TempFilePath::create(); + auto sqlFile = ::bytedance::bolt::test::TempFilePath::create(); + auto complexConstantsFile = ::bytedance::bolt::test::TempFilePath::create(); auto sqlPath = sqlFile->path.c_str(); auto complexConstantsPath = complexConstantsFile->path.c_str(); diff --git a/bolt/expression/tests/ExpressionVerifierUnitTest.cpp b/bolt/expression/tests/ExpressionVerifierUnitTest.cpp index d2b765857..7d3c011a9 100644 --- a/bolt/expression/tests/ExpressionVerifierUnitTest.cpp +++ b/bolt/expression/tests/ExpressionVerifierUnitTest.cpp @@ -36,7 +36,7 @@ #include "bolt/common/base/tests/GTestUtils.h" #include "bolt/common/file/FileSystems.h" -#include "bolt/exec/tests/utils/TempDirectoryPath.h" +#include "bolt/common/testutil/TempDirectoryPath.h" #include "bolt/functions/Registerer.h" #include "bolt/parse/Expressions.h" #include "bolt/parse/ExpressionsParser.h" @@ -101,7 +101,7 @@ class ExpressionVerifierUnitTest : public testing::Test, public VectorTestBase { TEST_F(ExpressionVerifierUnitTest, persistReproInfo) { filesystems::registerLocalFileSystem(); - auto reproFolder = exec::test::TempDirectoryPath::create(); + auto reproFolder = bytedance::bolt::test::TempDirectoryPath::create(); const auto reproPath = reproFolder->path; auto localFs = filesystems::getFileSystem(reproPath, nullptr); diff --git a/bolt/functions/lib/aggregates/tests/utils/AggregationTestBase.cpp b/bolt/functions/lib/aggregates/tests/utils/AggregationTestBase.cpp index 741d8c8cc..006eea5b4 100644 --- a/bolt/functions/lib/aggregates/tests/utils/AggregationTestBase.cpp +++ b/bolt/functions/lib/aggregates/tests/utils/AggregationTestBase.cpp @@ -29,18 +29,18 @@ */ #include "bolt/functions/lib/aggregates/tests/utils/AggregationTestBase.h" -#include -#include "bolt/common/base/tests/GTestUtils.h" +#include "bolt/common/base/tests/GTestUtils.h" #include "bolt/common/file/FileSystems.h" +#include "bolt/common/testutil/TempDirectoryPath.h" +#include "bolt/common/testutil/TempFilePath.h" #include "bolt/connectors/hive/HiveConnector.h" #include "bolt/connectors/hive/HiveConnectorSplit.h" +#include "bolt/core/QueryConfig.h" #include "bolt/dwio/common/tests/utils/BatchMaker.h" #include "bolt/dwio/dwrf/writer/Writer.h" #include "bolt/exec/AggregateCompanionSignatures.h" #include "bolt/exec/PlanNodeStats.h" -#include "bolt/exec/tests/utils/TempDirectoryPath.h" -#include "bolt/exec/tests/utils/TempFilePath.h" #include "bolt/expression/Expr.h" #include "bolt/expression/SignatureBinder.h" @@ -65,19 +65,6 @@ void enableAbandonPartialAggregation(AssertQueryBuilder& queryBuilder) { } // namespace -std::vector AggregationTestBase::makeVectors( - const RowTypePtr& rowType, - vector_size_t size, - int numVectors) { - std::vector vectors; - for (int32_t i = 0; i < numVectors; ++i) { - auto vector = std::dynamic_pointer_cast( - bolt::test::BatchMaker::createBatch(rowType, size, *pool_)); - vectors.push_back(vector); - } - return vectors; -} - }; // namespace bytedance::bolt::functions::aggregate::test namespace bytedance::bolt::BaseStatsReporter { @@ -408,7 +395,7 @@ void AggregationTestBase::testAggregationsWithCompanion( builder.project(postAggregationProjections); } - auto spillDirectory = exec::test::TempDirectoryPath::create(); + auto spillDirectory = bytedance::bolt::test::TempDirectoryPath::create(); AssertQueryBuilder queryBuilder(builder.planNode(), duckDbQueryRunner_); queryBuilder.configs(config) @@ -543,21 +530,6 @@ void AggregationTestBase::testAggregationsWithCompanion( namespace { -void writeToFile( - const std::string& path, - const VectorPtr& vector, - memory::MemoryPool* pool) { - dwrf::WriterOptions options; - options.schema = vector->type(); - options.memoryPool = pool; - auto writeFile = std::make_unique(path, true, false); - auto sink = - std::make_unique(std::move(writeFile), path); - dwrf::Writer writer(std::move(sink), options); - writer.write(vector); - writer.close(); -} - template class ScopedChange { public: @@ -595,11 +567,11 @@ void AggregationTestBase::testReadFromFiles( auto size2 = input->size() - size1; auto input1 = input->slice(0, size1); auto input2 = input->slice(size1, size2); - std::vector> files; + std::vector> files; std::vector splits; auto writerPool = rootPool_->addAggregateChild("AggregationTestBase.writer"); for (auto& vector : {input1, input2}) { - auto file = exec::test::TempFilePath::create(); + auto file = ::bytedance::bolt::test::TempFilePath::create(); writeToFile(file->path, vector, writerPool.get()); files.push_back(file); splits.emplace_back(std::make_shared( @@ -790,7 +762,7 @@ void AggregationTestBase::testAggregationsImpl( builder.project(postAggregationProjections); } - auto spillDirectory = exec::test::TempDirectoryPath::create(); + auto spillDirectory = bytedance::bolt::test::TempDirectoryPath::create(); ASSERT_EQ(memory::spillMemoryPool()->stats().currentBytes, 0); const auto peakSpillMemoryUsage = @@ -838,7 +810,7 @@ void AggregationTestBase::testAggregationsImpl( builder.project(postAggregationProjections); } - auto spillDirectory = exec::test::TempDirectoryPath::create(); + auto spillDirectory = bytedance::bolt::test::TempDirectoryPath::create(); ASSERT_EQ(memory::spillMemoryPool()->stats().currentBytes, 0); const auto peakSpillMemoryUsage = @@ -887,7 +859,7 @@ void AggregationTestBase::testAggregationsImpl( builder.project(postAggregationProjections); } - auto spillDirectory = exec::test::TempDirectoryPath::create(); + auto spillDirectory = bytedance::bolt::test::TempDirectoryPath::create(); ASSERT_EQ(memory::spillMemoryPool()->stats().currentBytes, 0); const auto peakSpillMemoryUsage = @@ -981,7 +953,7 @@ void AggregationTestBase::testAggregationsImpl( builder.project(postAggregationProjections); } - auto spillDirectory = exec::test::TempDirectoryPath::create(); + auto spillDirectory = bytedance::bolt::test::TempDirectoryPath::create(); AssertQueryBuilder queryBuilder(builder.planNode(), duckDbQueryRunner_); queryBuilder.configs(config).config(core::QueryConfig::kTestingSpillPct, "100") diff --git a/bolt/functions/lib/aggregates/tests/utils/AggregationTestBase.h b/bolt/functions/lib/aggregates/tests/utils/AggregationTestBase.h index 90555d064..b62a02a54 100644 --- a/bolt/functions/lib/aggregates/tests/utils/AggregationTestBase.h +++ b/bolt/functions/lib/aggregates/tests/utils/AggregationTestBase.h @@ -45,9 +45,6 @@ class AggregationTestBase : public exec::test::OperatorTestBase { static void SetUpTestCase(); static void TearDownTestCase(); - std::vector - makeVectors(const RowTypePtr& rowType, vector_size_t size, int numVectors); - /// Generates a variety of logically equivalent plans to compute aggregations /// using combinations of partial, final, single, and intermediate /// aggregations with and without local exchanges. Runs all these plans and diff --git a/bolt/functions/prestosql/aggregates/benchmarks/JoinBenchmarks.cpp b/bolt/functions/prestosql/aggregates/benchmarks/JoinBenchmarks.cpp index 6088b8dd4..f40674367 100644 --- a/bolt/functions/prestosql/aggregates/benchmarks/JoinBenchmarks.cpp +++ b/bolt/functions/prestosql/aggregates/benchmarks/JoinBenchmarks.cpp @@ -23,6 +23,7 @@ #include #include +#include "bolt/common/testutil/TempFilePath.h" #include "bolt/exec/Task.h" #include "bolt/exec/tests/utils/AssertQueryBuilder.h" #include "bolt/exec/tests/utils/HiveConnectorTestBase.h" @@ -39,6 +40,7 @@ DEFINE_int32(jit_level, 0, "jit level"); using namespace bytedance::bolt; using namespace bytedance::bolt::connector::hive; using namespace bytedance::bolt::exec::test; +using namespace bytedance::bolt::test; using namespace bytedance::bolt::common::testutil; static constexpr int32_t kNumVectors = 4000; @@ -242,7 +244,7 @@ class VariousAggregatesBenchmark : public HiveConnectorTestBase { vectors.emplace_back(makeRowVector(inputType_->names(), children)); } - filePath_ = TempFilePath::create()->path; + filePath_ = ::bytedance::bolt::test::TempFilePath::create()->path; writeToFile(filePath_, vectors); createDuckDbTable("t", vectors); std::cout << filePath_ << std::endl; diff --git a/bolt/functions/prestosql/aggregates/benchmarks/ReduceAgg.cpp b/bolt/functions/prestosql/aggregates/benchmarks/ReduceAgg.cpp index 8a1fe4ca8..4d1d8c9e7 100644 --- a/bolt/functions/prestosql/aggregates/benchmarks/ReduceAgg.cpp +++ b/bolt/functions/prestosql/aggregates/benchmarks/ReduceAgg.cpp @@ -32,6 +32,7 @@ #include #include +#include "bolt/common/testutil/TempFilePath.h" #include "bolt/exec/tests/utils/Cursor.h" #include "bolt/exec/tests/utils/HiveConnectorTestBase.h" #include "bolt/exec/tests/utils/PlanBuilder.h" @@ -41,6 +42,7 @@ DEFINE_int64(fuzzer_seed, 99887766, "Seed for random input dataset generator"); using namespace bytedance::bolt; using namespace bytedance::bolt::connector::hive; using namespace bytedance::bolt::exec::test; +using namespace bytedance::bolt::test; static constexpr int32_t kNumVectors = 10; static constexpr int32_t kRowsPerVector = 10'000; @@ -76,7 +78,7 @@ class ReduceAggBenchmark : public HiveConnectorTestBase { vectors.emplace_back(fuzzer.fuzzInputRow(inputType_)); } - filePath_ = TempFilePath::create(); + filePath_ = ::bytedance::bolt::test::TempFilePath::create(); writeToFile(filePath_->path, vectors); } @@ -221,7 +223,7 @@ class ReduceAggBenchmark : public HiveConnectorTestBase { } RowTypePtr inputType_; - std::shared_ptr filePath_; + std::shared_ptr<::bytedance::bolt::test::TempFilePath> filePath_; }; std::unique_ptr benchmark; diff --git a/bolt/functions/prestosql/aggregates/benchmarks/SimpleAggregates.cpp b/bolt/functions/prestosql/aggregates/benchmarks/SimpleAggregates.cpp index fb4a0f06f..e42547fd7 100644 --- a/bolt/functions/prestosql/aggregates/benchmarks/SimpleAggregates.cpp +++ b/bolt/functions/prestosql/aggregates/benchmarks/SimpleAggregates.cpp @@ -33,6 +33,7 @@ #include #include +#include "bolt/common/testutil/TempFilePath.h" #include "bolt/exec/tests/utils/Cursor.h" #include "bolt/exec/tests/utils/HiveConnectorTestBase.h" #include "bolt/exec/tests/utils/PlanBuilder.h" @@ -48,6 +49,7 @@ DEFINE_int64(k_array_size, 17, "group number of k_array"); using namespace bytedance::bolt; using namespace bytedance::bolt::connector::hive; using namespace bytedance::bolt::exec::test; +using namespace bytedance::bolt::test; static constexpr int32_t kNumVectors = 1'000; static constexpr int32_t kRowsPerVector = 10'000; @@ -145,7 +147,7 @@ class SimpleAggregatesBenchmark : public HiveConnectorTestBase { vectors.emplace_back(makeRowVector(inputType_->names(), children)); } - filePath_ = TempFilePath::create()->path; + filePath_ = ::bytedance::bolt::test::TempFilePath::create()->path; writeToFile(filePath_, vectors); std::cout << filePath_ << std::endl; } else { diff --git a/bolt/functions/prestosql/aggregates/benchmarks/TwoStringKeys.cpp b/bolt/functions/prestosql/aggregates/benchmarks/TwoStringKeys.cpp index 727a5a177..f65c14ceb 100644 --- a/bolt/functions/prestosql/aggregates/benchmarks/TwoStringKeys.cpp +++ b/bolt/functions/prestosql/aggregates/benchmarks/TwoStringKeys.cpp @@ -32,6 +32,7 @@ #include #include +#include "bolt/common/testutil/TempFilePath.h" #include "bolt/exec/PlanNodeStats.h" #include "bolt/exec/tests/utils/Cursor.h" #include "bolt/exec/tests/utils/HiveConnectorTestBase.h" @@ -42,6 +43,7 @@ DEFINE_int64(fuzzer_seed, 99887766, "Seed for random input dataset generator"); using namespace bytedance::bolt; using namespace bytedance::bolt::connector::hive; using namespace bytedance::bolt::exec::test; +using namespace bytedance::bolt::test; static constexpr int32_t kNumVectors = 7'000; static constexpr int32_t kRowsPerVector = 4'000; @@ -79,7 +81,7 @@ class TwoStringKeysBenchmark : public HiveConnectorTestBase { vectors.emplace_back(fuzzer.fuzzInputFlatRow(inputType_)); } - filePath_ = TempFilePath::create(); + filePath_ = ::bytedance::bolt::test::TempFilePath::create(); writeToFile(filePath_->path, vectors); } @@ -145,7 +147,7 @@ class TwoStringKeysBenchmark : public HiveConnectorTestBase { } RowTypePtr inputType_; - std::shared_ptr filePath_; + std::shared_ptr<::bytedance::bolt::test::TempFilePath> filePath_; }; std::unique_ptr benchmark; diff --git a/bolt/functions/prestosql/aggregates/benchmarks/VariousAggregates.cpp b/bolt/functions/prestosql/aggregates/benchmarks/VariousAggregates.cpp index 8f1fe2843..c66938a6b 100644 --- a/bolt/functions/prestosql/aggregates/benchmarks/VariousAggregates.cpp +++ b/bolt/functions/prestosql/aggregates/benchmarks/VariousAggregates.cpp @@ -21,6 +21,7 @@ #include #include +#include "bolt/common/testutil/TempFilePath.h" #include "bolt/exec/tests/utils/HiveConnectorTestBase.h" #include "bolt/exec/tests/utils/PlanBuilder.h" #include "bolt/functions/sparksql/aggregates/Register.h" @@ -37,6 +38,7 @@ DEFINE_bool(jit_row_eq_vectors, false, "enable jit row_eq_vectors"); using namespace bytedance::bolt; using namespace bytedance::bolt::connector::hive; using namespace bytedance::bolt::exec::test; +using namespace bytedance::bolt::test; static constexpr int32_t kNumVectors = 4000; static constexpr int32_t kRowsPerVector = 1024; @@ -234,7 +236,7 @@ class VariousAggregatesBenchmark : public HiveConnectorTestBase { vectors.emplace_back(makeRowVector(inputType_->names(), children)); } - filePath_ = TempFilePath::create()->path; + filePath_ = ::bytedance::bolt::test::TempFilePath::create()->path; writeToFile(filePath_, vectors); std::cout << filePath_ << std::endl; } else { diff --git a/bolt/functions/prestosql/aggregates/tests/AggregationLazyInputTest.cpp b/bolt/functions/prestosql/aggregates/tests/AggregationLazyInputTest.cpp index dbf58e70e..7fb88684d 100644 --- a/bolt/functions/prestosql/aggregates/tests/AggregationLazyInputTest.cpp +++ b/bolt/functions/prestosql/aggregates/tests/AggregationLazyInputTest.cpp @@ -17,6 +17,7 @@ #include #include +#include "bolt/common/testutil/TempFilePath.h" #include "bolt/exec/tests/utils/Cursor.h" #include "bolt/exec/tests/utils/HiveConnectorTestBase.h" #include "bolt/exec/tests/utils/PlanBuilder.h" @@ -24,6 +25,7 @@ using namespace bytedance::bolt; using namespace bytedance::bolt::connector::hive; using namespace bytedance::bolt::exec::test; +using namespace bytedance::bolt::test; static constexpr int32_t kNumVectors = 1'0; static constexpr int32_t kRowsPerVector = 10'000; @@ -61,7 +63,7 @@ class AggregationLazyInputTest : public HiveConnectorTestBase { vectors.emplace_back(makeRowVector(inputType_->names(), children)); } - filePath_ = TempFilePath::create()->path; + filePath_ = ::bytedance::bolt::test::TempFilePath::create()->path; writeToFile(filePath_, vectors); auto plan = PlanBuilder() .tableScan(inputType_) diff --git a/bolt/functions/prestosql/aggregates/tests/AverageAggregationTest.cpp b/bolt/functions/prestosql/aggregates/tests/AverageAggregationTest.cpp index 8e80d3ecf..dedc6d367 100644 --- a/bolt/functions/prestosql/aggregates/tests/AverageAggregationTest.cpp +++ b/bolt/functions/prestosql/aggregates/tests/AverageAggregationTest.cpp @@ -204,7 +204,7 @@ TEST_F(AverageAggregationTest, avgNulls) { TEST_F(AverageAggregationTest, avg) { auto testFunction = [this](const std::string& functionName) { - auto vectors = makeVectors(rowType_, 10, 100); + auto vectors = makeVectors(rowType_, 100, 10); createDuckDbTable(vectors); // global aggregation diff --git a/bolt/functions/prestosql/aggregates/tests/BitwiseAggregationTest.cpp b/bolt/functions/prestosql/aggregates/tests/BitwiseAggregationTest.cpp index dfa1edc73..d4b998e76 100644 --- a/bolt/functions/prestosql/aggregates/tests/BitwiseAggregationTest.cpp +++ b/bolt/functions/prestosql/aggregates/tests/BitwiseAggregationTest.cpp @@ -49,7 +49,7 @@ class BitwiseAggregationTest : public AggregationTestBase { }; TEST_F(BitwiseAggregationTest, bitwiseOr) { - auto vectors = makeVectors(rowType_, 10, 100); + auto vectors = makeVectors(rowType_, 100, 10); createDuckDbTable(vectors); // Global aggregation. @@ -76,7 +76,7 @@ TEST_F(BitwiseAggregationTest, bitwiseOr) { } TEST_F(BitwiseAggregationTest, bitwiseAnd) { - auto vectors = makeVectors(rowType_, 10, 100); + auto vectors = makeVectors(rowType_, 100, 10); createDuckDbTable(vectors); // Global aggregation. @@ -103,7 +103,7 @@ TEST_F(BitwiseAggregationTest, bitwiseAnd) { } TEST_F(BitwiseAggregationTest, bitwiseXor) { - auto vectors = makeVectors(rowType_, 10, 100); + auto vectors = makeVectors(rowType_, 100, 10); createDuckDbTable(vectors); // Global aggregation. diff --git a/bolt/functions/prestosql/aggregates/tests/CountAggregationTest.cpp b/bolt/functions/prestosql/aggregates/tests/CountAggregationTest.cpp index 78da1704e..33a4c2e12 100644 --- a/bolt/functions/prestosql/aggregates/tests/CountAggregationTest.cpp +++ b/bolt/functions/prestosql/aggregates/tests/CountAggregationTest.cpp @@ -58,7 +58,7 @@ class CountAggregationTest : public AggregationTestBase { }; TEST_F(CountAggregationTest, count) { - auto vectors = makeVectors(rowType_, 10, 100); + auto vectors = makeVectors(rowType_, 100, 10); createDuckDbTable(vectors); testAggregations(vectors, {}, {"count()"}, "SELECT count(1) FROM tmp"); diff --git a/bolt/functions/prestosql/aggregates/tests/CountIfAggregationTest.cpp b/bolt/functions/prestosql/aggregates/tests/CountIfAggregationTest.cpp index 9a66d4d45..a615f600e 100644 --- a/bolt/functions/prestosql/aggregates/tests/CountIfAggregationTest.cpp +++ b/bolt/functions/prestosql/aggregates/tests/CountIfAggregationTest.cpp @@ -87,7 +87,7 @@ TEST_F(CountIfAggregationTest, oneAggregateSingleGroup) { } TEST_F(CountIfAggregationTest, oneAggregateMultipleGroups) { - auto vectors = makeVectors(rowType_, 10, 100); + auto vectors = makeVectors(rowType_, 100, 10); createDuckDbTable(vectors); testAggregations( @@ -98,7 +98,7 @@ TEST_F(CountIfAggregationTest, oneAggregateMultipleGroups) { } TEST_F(CountIfAggregationTest, twoAggregatesSingleGroup) { - auto vectors = makeVectors(rowType_, 10, 100); + auto vectors = makeVectors(rowType_, 100, 10); createDuckDbTable(vectors); testAggregations( @@ -109,7 +109,7 @@ TEST_F(CountIfAggregationTest, twoAggregatesSingleGroup) { } TEST_F(CountIfAggregationTest, twoAggregatesMultipleGroups) { - auto vectors = makeVectors(rowType_, 10, 100); + auto vectors = makeVectors(rowType_, 100, 10); createDuckDbTable(vectors); testAggregations( @@ -120,7 +120,7 @@ TEST_F(CountIfAggregationTest, twoAggregatesMultipleGroups) { } TEST_F(CountIfAggregationTest, twoAggregatesMultipleGroupsWrapped) { - auto vectors = makeVectors(rowType_, 10, 100); + auto vectors = makeVectors(rowType_, 100, 10); createDuckDbTable(vectors); testAggregations( diff --git a/bolt/functions/prestosql/aggregates/tests/MinMaxByAggregationTest.cpp b/bolt/functions/prestosql/aggregates/tests/MinMaxByAggregationTest.cpp index 4ef6d3e86..2b021cd4d 100644 --- a/bolt/functions/prestosql/aggregates/tests/MinMaxByAggregationTest.cpp +++ b/bolt/functions/prestosql/aggregates/tests/MinMaxByAggregationTest.cpp @@ -365,7 +365,7 @@ void MinMaxByAggregationTestBase::SetUp() { } } ASSERT_EQ(dataVectorsByType_.size(), kSupportedTypes.size()); - rowVectors_ = makeVectors(rowType_, 5, 10); + rowVectors_ = makeVectors(rowType_, 10, 5); createDuckDbTable(rowVectors_); }; diff --git a/bolt/functions/prestosql/aggregates/tests/VarianceAggregationTest.cpp b/bolt/functions/prestosql/aggregates/tests/VarianceAggregationTest.cpp index cc38a9fa6..b743bcf57 100644 --- a/bolt/functions/prestosql/aggregates/tests/VarianceAggregationTest.cpp +++ b/bolt/functions/prestosql/aggregates/tests/VarianceAggregationTest.cpp @@ -175,7 +175,7 @@ TEST_F(VarianceAggregationTest, varianceNulls) { // integers and enable more testing by calling allowInputShuffle() from // Setup(). TEST_F(VarianceAggregationTest, varianceWithGlobalAggregation) { - auto vectors = makeVectors(rowType_, 10, 20); + auto vectors = makeVectors(rowType_, 20, 10); createDuckDbTable(vectors); for (const auto& aggrName : aggrNames_) { @@ -206,7 +206,7 @@ TEST_F(VarianceAggregationTest, varianceWithGlobalAggregation) { } TEST_F(VarianceAggregationTest, varianceWithGlobalAggregationAndFilter) { - auto vectors = makeVectors(rowType_, 10, 20); + auto vectors = makeVectors(rowType_, 20, 10); createDuckDbTable(vectors); for (const auto& aggrName : aggrNames_) { @@ -226,7 +226,7 @@ TEST_F(VarianceAggregationTest, varianceWithGlobalAggregationAndFilter) { TEST_F(VarianceAggregationTest, varianceWithGroupBy) { // TODO: increase number of batches after fixing // https://github.com/facebookincubator/velox/issues/6505. - auto vectors = makeVectors(rowType_, 10, 8); + auto vectors = makeVectors(rowType_, 8, 10); createDuckDbTable(vectors); for (const auto& aggrName : aggrNames_) { @@ -265,7 +265,7 @@ TEST_F(VarianceAggregationTest, varianceWithGroupBy) { } TEST_F(VarianceAggregationTest, varianceWithGroupByAndFilter) { - auto vectors = makeVectors(rowType_, 10, 20); + auto vectors = makeVectors(rowType_, 20, 10); createDuckDbTable(vectors); for (const auto& aggrName : aggrNames_) { diff --git a/bolt/functions/sparksql/aggregates/tests/BitwiseXorAggregationTest.cpp b/bolt/functions/sparksql/aggregates/tests/BitwiseXorAggregationTest.cpp index b7e5d77ec..709137911 100644 --- a/bolt/functions/sparksql/aggregates/tests/BitwiseXorAggregationTest.cpp +++ b/bolt/functions/sparksql/aggregates/tests/BitwiseXorAggregationTest.cpp @@ -48,7 +48,7 @@ class BitwiseXorAggregationTest : public aggregate::test::AggregationTestBase { }; TEST_F(BitwiseXorAggregationTest, bitwiseXor) { - auto vectors = makeVectors(rowType_, 10, 100); + auto vectors = makeVectors(rowType_, 100, 10); createDuckDbTable(vectors); // Global aggregation. diff --git a/bolt/functions/sparksql/aggregates/tests/FirstAggregateTest.cpp b/bolt/functions/sparksql/aggregates/tests/FirstAggregateTest.cpp index a839bd812..885607f95 100644 --- a/bolt/functions/sparksql/aggregates/tests/FirstAggregateTest.cpp +++ b/bolt/functions/sparksql/aggregates/tests/FirstAggregateTest.cpp @@ -28,9 +28,9 @@ * -------------------------------------------------------------------------- */ +#include "bolt/common/testutil/TempDirectoryPath.h" #include "bolt/exec/tests/utils/AssertQueryBuilder.h" #include "bolt/exec/tests/utils/PlanBuilder.h" -#include "bolt/exec/tests/utils/TempDirectoryPath.h" #include "bolt/functions/lib/aggregates/tests/utils/AggregationTestBase.h" #include "bolt/functions/sparksql/aggregates/Register.h" using namespace bytedance::bolt::functions::aggregate::test; diff --git a/bolt/shuffle/sparksql/tests/ShuffleTestBase.cpp b/bolt/shuffle/sparksql/tests/ShuffleTestBase.cpp index badaa582d..589453368 100644 --- a/bolt/shuffle/sparksql/tests/ShuffleTestBase.cpp +++ b/bolt/shuffle/sparksql/tests/ShuffleTestBase.cpp @@ -20,13 +20,13 @@ #include "bolt/common/memory/sparksql/ExecutionMemoryPool.h" #include "bolt/common/memory/sparksql/NativeMemoryManagerFactory.h" #include "bolt/common/memory/sparksql/tests/MemoryTestUtils.h" +#include "bolt/common/testutil/TempDirectoryPath.h" #include "bolt/core/PlanNode.h" #include "bolt/exec/tests/utils/Cursor.h" #include "bolt/exec/tests/utils/LocalExchangeSource.h" #include "bolt/exec/tests/utils/OperatorTestBase.h" #include "bolt/exec/tests/utils/PlanBuilder.h" #include "bolt/exec/tests/utils/QueryAssertions.h" -#include "bolt/exec/tests/utils/TempDirectoryPath.h" #include "bolt/shuffle/sparksql/CelebornReaderStreamIterator.h" #include "bolt/shuffle/sparksql/Options.h" #include "bolt/shuffle/sparksql/ShuffleReaderNode.h" @@ -478,7 +478,7 @@ ShuffleRunResult ShuffleTestBase::runShuffle( ShuffleRunResult result; - auto tempDir = exec::test::TempDirectoryPath::create(); + auto tempDir = bytedance::bolt::test::TempDirectoryPath::create(); std::string localDir = tempDir->path + "/local_dir"; std::filesystem::create_directories(localDir); diff --git a/bolt/substrait/tests/Substrait2BoltPlanConversionTest.cpp b/bolt/substrait/tests/Substrait2BoltPlanConversionTest.cpp index d9de9d4d8..f005f793d 100644 --- a/bolt/substrait/tests/Substrait2BoltPlanConversionTest.cpp +++ b/bolt/substrait/tests/Substrait2BoltPlanConversionTest.cpp @@ -31,11 +31,11 @@ #include "bolt/substrait/tests/JsonToProtoConverter.h" #include "bolt/common/base/tests/GTestUtils.h" +#include "bolt/common/testutil/TempDirectoryPath.h" #include "bolt/dwio/common/tests/utils/DataFiles.h" #include "bolt/exec/tests/utils/AssertQueryBuilder.h" #include "bolt/exec/tests/utils/HiveConnectorTestBase.h" #include "bolt/exec/tests/utils/PlanBuilder.h" -#include "bolt/exec/tests/utils/TempDirectoryPath.h" #include "bolt/substrait/SubstraitToBoltPlan.h" #include "bolt/type/Type.h" using namespace bytedance::bolt; @@ -83,8 +83,8 @@ class Substrait2BoltPlanConversionTest return splits; } - std::shared_ptr tmpDir_{ - exec::test::TempDirectoryPath::create()}; + std::shared_ptr tmpDir_{ + bytedance::bolt::test::TempDirectoryPath::create()}; }; // This test will firstly generate mock TPC-H lineitem ORC file. Then, Bolt's diff --git a/bolt/tool/trace/OperatorReplayerBase.cpp b/bolt/tool/trace/OperatorReplayerBase.cpp index 21ae655a1..0512b01b7 100644 --- a/bolt/tool/trace/OperatorReplayerBase.cpp +++ b/bolt/tool/trace/OperatorReplayerBase.cpp @@ -32,13 +32,13 @@ #include +#include "bolt/common/testutil/TempDirectoryPath.h" #include "bolt/core/PlanNode.h" #include "bolt/exec/PlanNodeStats.h" #include "bolt/exec/TaskTraceReader.h" #include "bolt/exec/TraceUtil.h" #include "bolt/exec/tests/utils/AssertQueryBuilder.h" #include "bolt/exec/tests/utils/PlanBuilder.h" -#include "bolt/exec/tests/utils/TempDirectoryPath.h" #include "bolt/tool/trace/OperatorReplayerBase.h" #include "bolt/tool/trace/TraceReplayTaskRunner.h" @@ -91,9 +91,9 @@ OperatorReplayerBase::OperatorReplayerBase( RowVectorPtr OperatorReplayerBase::run(bool copyResults) { auto queryCtx = createQueryCtx(); - std::shared_ptr spillDirectory; + std::shared_ptr spillDirectory; if (queryCtx->queryConfig().spillEnabled()) { - spillDirectory = exec::test::TempDirectoryPath::create(); + spillDirectory = bytedance::bolt::test::TempDirectoryPath::create(); } TraceReplayTaskRunner traceTaskRunner(createPlan(), std::move(queryCtx)); diff --git a/bolt/tool/trace/tests/AggregationReplayerTest.cpp b/bolt/tool/trace/tests/AggregationReplayerTest.cpp index 791633ee6..a1d21c3f0 100644 --- a/bolt/tool/trace/tests/AggregationReplayerTest.cpp +++ b/bolt/tool/trace/tests/AggregationReplayerTest.cpp @@ -37,6 +37,8 @@ #include "bolt/common/file/FileSystems.h" #include "bolt/common/hyperloglog/SparseHll.h" +#include "bolt/common/testutil/TempDirectoryPath.h" +#include "bolt/common/testutil/TempFilePath.h" #include "bolt/common/testutil/TestValue.h" #include "bolt/dwio/dwrf/writer/Writer.h" #include "bolt/exec/PartitionFunction.h" @@ -46,7 +48,6 @@ #include "bolt/exec/tests/utils/AssertQueryBuilder.h" #include "bolt/exec/tests/utils/HiveConnectorTestBase.h" #include "bolt/exec/tests/utils/PlanBuilder.h" -#include "bolt/exec/tests/utils/TempDirectoryPath.h" #include "bolt/functions/prestosql/aggregates/RegisterAggregateFunctions.h" #include "bolt/functions/prestosql/registration/RegistrationFunctions.h" #include "bolt/serializers/PrestoSerializer.h" @@ -61,6 +62,7 @@ using namespace bytedance::bolt::core; using namespace bytedance::bolt::common; using namespace bytedance::bolt::exec; using namespace bytedance::bolt::exec::test; +using namespace bytedance::bolt::test; using namespace bytedance::bolt::connector; using namespace bytedance::bolt::connector::hive; using namespace bytedance::bolt::dwio::common; @@ -198,7 +200,7 @@ TEST_F(AggregationReplayerTest, test) { const auto data = generateInput(groupingKeys_, keyTypes_); const auto planWithNames = aggregatePlans(asRowType(data[0]->type()), prefix); - const auto sourceFilePath = TempFilePath::create(); + const auto sourceFilePath = ::bytedance::bolt::test::TempFilePath::create(); writeToFile(sourceFilePath->getPath(), data); if (!prefix.empty()) { diff --git a/bolt/tool/trace/tests/FilterProjectReplayerTest.cpp b/bolt/tool/trace/tests/FilterProjectReplayerTest.cpp index 0e83eca3c..f4df1ea66 100644 --- a/bolt/tool/trace/tests/FilterProjectReplayerTest.cpp +++ b/bolt/tool/trace/tests/FilterProjectReplayerTest.cpp @@ -38,6 +38,7 @@ #include "bolt/common/file/FileSystems.h" #include "bolt/common/hyperloglog/SparseHll.h" +#include "bolt/common/testutil/TempDirectoryPath.h" #include "bolt/common/testutil/TestValue.h" #include "bolt/dwio/dwrf/writer/Writer.h" #include "bolt/exec/PartitionFunction.h" @@ -47,7 +48,6 @@ #include "bolt/exec/tests/utils/AssertQueryBuilder.h" #include "bolt/exec/tests/utils/HiveConnectorTestBase.h" #include "bolt/exec/tests/utils/PlanBuilder.h" -#include "bolt/exec/tests/utils/TempDirectoryPath.h" #include "bolt/serializers/PrestoSerializer.h" #include "bolt/tool/trace/FilterProjectReplayer.h" @@ -60,6 +60,7 @@ using namespace bytedance::bolt::core; using namespace bytedance::bolt::common; using namespace bytedance::bolt::exec; using namespace bytedance::bolt::exec::test; +using namespace bytedance::bolt::test; using namespace bytedance::bolt::connector; using namespace bytedance::bolt::connector::hive; using namespace bytedance::bolt::dwio::common; diff --git a/bolt/tool/trace/tests/HashJoinReplayerTest.cpp b/bolt/tool/trace/tests/HashJoinReplayerTest.cpp index 9dab74dfa..b2c3269a6 100644 --- a/bolt/tool/trace/tests/HashJoinReplayerTest.cpp +++ b/bolt/tool/trace/tests/HashJoinReplayerTest.cpp @@ -38,6 +38,7 @@ #include "bolt/common/file/FileSystems.h" #include "bolt/common/file/Utils.h" #include "bolt/common/hyperloglog/SparseHll.h" +#include "bolt/common/testutil/TempDirectoryPath.h" #include "bolt/common/testutil/TestValue.h" #include "bolt/dwio/dwrf/writer/Writer.h" #include "bolt/exec/HashJoinBridge.h" @@ -50,7 +51,6 @@ #include "bolt/exec/tests/utils/AssertQueryBuilder.h" #include "bolt/exec/tests/utils/HiveConnectorTestBase.h" #include "bolt/exec/tests/utils/PlanBuilder.h" -#include "bolt/exec/tests/utils/TempDirectoryPath.h" #include "bolt/serializers/PrestoSerializer.h" #include "bolt/tool/trace/HashJoinReplayer.h" #include "bolt/tool/trace/TraceReplayRunner.h" @@ -60,6 +60,7 @@ using namespace bytedance::bolt::core; using namespace bytedance::bolt::common; using namespace bytedance::bolt::exec; using namespace bytedance::bolt::exec::test; +using namespace bytedance::bolt::test; using namespace bytedance::bolt::connector; using namespace bytedance::bolt::connector::hive; using namespace bytedance::bolt::dwio::common; diff --git a/bolt/tool/trace/tests/PartitionedOutputReplayerTest.cpp b/bolt/tool/trace/tests/PartitionedOutputReplayerTest.cpp index d23f2260d..0af1bae94 100644 --- a/bolt/tool/trace/tests/PartitionedOutputReplayerTest.cpp +++ b/bolt/tool/trace/tests/PartitionedOutputReplayerTest.cpp @@ -34,12 +34,12 @@ #include "bolt/common/base/tests/GTestUtils.h" #include "bolt/common/file/FileSystems.h" +#include "bolt/common/testutil/TempDirectoryPath.h" #include "bolt/exec/OperatorTraceReader.h" #include "bolt/exec/PartitionFunction.h" #include "bolt/exec/TraceUtil.h" #include "bolt/exec/tests/utils/HiveConnectorTestBase.h" #include "bolt/exec/tests/utils/PlanBuilder.h" -#include "bolt/exec/tests/utils/TempDirectoryPath.h" #include "bolt/serializers/PrestoSerializer.h" #include "bolt/tool/trace/PartitionedOutputReplayer.h" #include "folly/dynamic.h" @@ -48,6 +48,7 @@ using namespace bytedance::bolt::core; using namespace bytedance::bolt::common; using namespace bytedance::bolt::exec; using namespace bytedance::bolt::exec::test; +using namespace bytedance::bolt::test; namespace bytedance::bolt::tool::trace::test { class PartitionedOutputReplayerTest : public HiveConnectorTestBase, diff --git a/bolt/tool/trace/tests/TableScanReplayerTest.cpp b/bolt/tool/trace/tests/TableScanReplayerTest.cpp index 8cf1338bb..fb954fecd 100644 --- a/bolt/tool/trace/tests/TableScanReplayerTest.cpp +++ b/bolt/tool/trace/tests/TableScanReplayerTest.cpp @@ -38,6 +38,8 @@ #include "bolt/common/base/Fs.h" #include "bolt/common/file/FileSystems.h" #include "bolt/common/hyperloglog/SparseHll.h" +#include "bolt/common/testutil/TempDirectoryPath.h" +#include "bolt/common/testutil/TempFilePath.h" #include "bolt/exec/OperatorTraceReader.h" #include "bolt/exec/PartitionFunction.h" #include "bolt/exec/TableWriter.h" @@ -45,7 +47,6 @@ #include "bolt/exec/tests/utils/AssertQueryBuilder.h" #include "bolt/exec/tests/utils/HiveConnectorTestBase.h" #include "bolt/exec/tests/utils/PlanBuilder.h" -#include "bolt/exec/tests/utils/TempDirectoryPath.h" #include "bolt/serializers/PrestoSerializer.h" #include "bolt/tool/trace/TableScanReplayer.h" #include "bolt/tool/trace/TraceReplayRunner.h" @@ -56,6 +57,7 @@ using namespace bytedance::bolt::core; using namespace bytedance::bolt::common; using namespace bytedance::bolt::exec; using namespace bytedance::bolt::exec::test; +using namespace bytedance::bolt::test; using namespace bytedance::bolt::connector; using namespace bytedance::bolt::connector::hive; using namespace bytedance::bolt::dwio::common; @@ -120,9 +122,10 @@ TEST_F(TableScanReplayerTest, runner) { const auto traceRoot = fmt::format("{}/{}", testDir->getPath(), "traceRoot"); const auto fs = filesystems::getFileSystem(testDir->getPath(), nullptr); const int numSplits{5}; - std::vector> splitFiles; + std::vector> + splitFiles; for (int i = 0; i < numSplits; ++i) { - auto filePath = TempFilePath::create(); + auto filePath = ::bytedance::bolt::test::TempFilePath::create(); writeToFile(filePath->getPath(), vectors); splitFiles.push_back(std::move(filePath)); } @@ -189,9 +192,10 @@ TEST_F(TableScanReplayerTest, basic) { const auto testDir = TempDirectoryPath::create(); const auto traceRoot = fmt::format("{}/{}", testDir->getPath(), "traceRoot"); const auto fs = filesystems::getFileSystem(testDir->getPath(), nullptr); - std::vector> splitFiles; + std::vector> + splitFiles; for (int i = 0; i < 5; ++i) { - auto filePath = TempFilePath::create(); + auto filePath = ::bytedance::bolt::test::TempFilePath::create(); writeToFile(filePath->getPath(), vectors); splitFiles.push_back(std::move(filePath)); } @@ -256,9 +260,10 @@ TEST_F(TableScanReplayerTest, columnPrunning) { const auto testDir = TempDirectoryPath::create(); const auto traceRoot = fmt::format("{}/{}", testDir->getPath(), "traceRoot"); const auto fs = filesystems::getFileSystem(testDir->getPath(), nullptr); - std::vector> splitFiles; + std::vector> + splitFiles; for (int i = 0; i < 5; ++i) { - auto filePath = TempFilePath::create(); + auto filePath = ::bytedance::bolt::test::TempFilePath::create(); writeToFile(filePath->getPath(), vectors); splitFiles.push_back(std::move(filePath)); } @@ -309,7 +314,7 @@ TEST_F(TableScanReplayerTest, subfieldPrunning) { auto columnType = ROW({"c", "d"}, {innerType, BIGINT()}); auto rowType = ROW({"e"}, {columnType}); auto vectors = makeVectors(10, 1'000, rowType); - auto filePath = TempFilePath::create(); + auto filePath = ::bytedance::bolt::test::TempFilePath::create(); writeToFile(filePath->getPath(), vectors); std::vector requiredSubfields; requiredSubfields.emplace_back("e.c"); @@ -366,9 +371,10 @@ TEST_F(TableScanReplayerTest, concurrent) { const auto testDir = TempDirectoryPath::create(); const auto traceRoot = fmt::format("{}/{}", testDir->getPath(), "traceRoot"); const auto fs = filesystems::getFileSystem(testDir->getPath(), nullptr); - std::vector> splitFiles; + std::vector> + splitFiles; for (int i = 0; i < 2; ++i) { - auto filePath = TempFilePath::create(); + auto filePath = ::bytedance::bolt::test::TempFilePath::create(); writeToFile(filePath->getPath(), vectors); splitFiles.push_back(std::move(filePath)); } diff --git a/bolt/tool/trace/tests/TableWriterReplayerTest.cpp b/bolt/tool/trace/tests/TableWriterReplayerTest.cpp index 8eeda223e..4c30e5fd2 100644 --- a/bolt/tool/trace/tests/TableWriterReplayerTest.cpp +++ b/bolt/tool/trace/tests/TableWriterReplayerTest.cpp @@ -37,6 +37,8 @@ #include "bolt/common/base/Fs.h" #include "bolt/common/file/FileSystems.h" #include "bolt/common/hyperloglog/SparseHll.h" +#include "bolt/common/testutil/TempDirectoryPath.h" +#include "bolt/common/testutil/TempFilePath.h" #include "bolt/exec/OperatorTraceReader.h" #include "bolt/exec/PartitionFunction.h" #include "bolt/exec/TableWriter.h" @@ -44,7 +46,6 @@ #include "bolt/exec/tests/utils/AssertQueryBuilder.h" #include "bolt/exec/tests/utils/HiveConnectorTestBase.h" #include "bolt/exec/tests/utils/PlanBuilder.h" -#include "bolt/exec/tests/utils/TempDirectoryPath.h" #include "bolt/serializers/PrestoSerializer.h" #include "bolt/tool/trace/TableWriterReplayer.h" #include "bolt/tool/trace/TraceReplayRunner.h" @@ -55,6 +56,7 @@ using namespace bytedance::bolt::core; using namespace bytedance::bolt::common; using namespace bytedance::bolt::exec; using namespace bytedance::bolt::exec::test; +using namespace bytedance::bolt::test; using namespace bytedance::bolt::connector; using namespace bytedance::bolt::connector::hive; using namespace bytedance::bolt::dwio::common; @@ -284,7 +286,7 @@ TEST_F(TableWriterReplayerTest, runner) { makeFlatVector( size, [](auto row) { return row * 2; }, nullEvery(7)), }); - auto sourceFilePath = TempFilePath::create(); + auto sourceFilePath = ::bytedance::bolt::test::TempFilePath::create(); writeToFile(sourceFilePath->getPath(), data); std::string traceNodeId; @@ -355,7 +357,7 @@ TEST_F(TableWriterReplayerTest, basic) { makeFlatVector( size, [](auto row) { return row * 2; }, nullEvery(7)), }); - auto sourceFilePath = TempFilePath::create(); + auto sourceFilePath = ::bytedance::bolt::test::TempFilePath::create(); writeToFile(sourceFilePath->getPath(), data); std::string planNodeId; diff --git a/bolt/tool/trace/tests/TraceFileToolTest.cpp b/bolt/tool/trace/tests/TraceFileToolTest.cpp index c5f9a3216..20f2fe35f 100644 --- a/bolt/tool/trace/tests/TraceFileToolTest.cpp +++ b/bolt/tool/trace/tests/TraceFileToolTest.cpp @@ -36,6 +36,7 @@ #include "bolt/common/file/FileSystems.h" #include "bolt/common/hyperloglog/SparseHll.h" +#include "bolt/common/testutil/TempDirectoryPath.h" #include "bolt/common/testutil/TestValue.h" #include "bolt/dwio/dwrf/writer/Writer.h" #include "bolt/exec/OperatorTraceReader.h" @@ -47,7 +48,6 @@ #include "bolt/exec/tests/utils/AssertQueryBuilder.h" #include "bolt/exec/tests/utils/HiveConnectorTestBase.h" #include "bolt/exec/tests/utils/PlanBuilder.h" -#include "bolt/exec/tests/utils/TempDirectoryPath.h" #include "bolt/serializers/PrestoSerializer.h" #include "bolt/tool/trace/HashJoinReplayer.h" #include "bolt/tool/trace/TraceFileToolRunner.h" @@ -57,6 +57,7 @@ using namespace bytedance::bolt::core; using namespace bytedance::bolt::common; using namespace bytedance::bolt::exec; using namespace bytedance::bolt::exec::test; +using namespace bytedance::bolt::test; using namespace bytedance::bolt::connector; using namespace bytedance::bolt::connector::hive; using namespace bytedance::bolt::dwio::common; diff --git a/bolt/vector/tests/VectorSaverTest.cpp b/bolt/vector/tests/VectorSaverTest.cpp index 5f223d519..0b4b1a163 100644 --- a/bolt/vector/tests/VectorSaverTest.cpp +++ b/bolt/vector/tests/VectorSaverTest.cpp @@ -32,8 +32,8 @@ #include #include "bolt/common/base/Fs.h" #include "bolt/common/base/tests/GTestUtils.h" -#include "bolt/exec/tests/utils/TempDirectoryPath.h" -#include "bolt/exec/tests/utils/TempFilePath.h" +#include "bolt/common/testutil/TempDirectoryPath.h" +#include "bolt/common/testutil/TempFilePath.h" #include "bolt/functions/prestosql/types/HyperLogLogType.h" #include "bolt/functions/prestosql/types/JsonType.h" #include "bolt/functions/prestosql/types/TimestampWithTimeZoneType.h" @@ -131,7 +131,7 @@ class VectorSaverTest : public testing::Test, public VectorTestBase { // Writes the passed vector to file and reads it back, returns the read // vector. VectorPtr takeRoundTrip(const VectorPtr& vector) { - auto path = exec::test::TempFilePath::create(); + auto path = ::bytedance::bolt::test::TempFilePath::create(); std::ofstream outputFile(path->path, std::ofstream::binary); saveVector(*vector, outputFile); @@ -150,7 +150,7 @@ class VectorSaverTest : public testing::Test, public VectorTestBase { } void testTypeRoundTrip(const TypePtr& type) { - auto path = exec::test::TempFilePath::create(); + auto path = ::bytedance::bolt::test::TempFilePath::create(); std::ofstream outputFile(path->path, std::ofstream::binary); saveType(type, outputFile); @@ -643,7 +643,7 @@ TEST_F(VectorSaverTest, LazyVector) { TEST_F(VectorSaverTest, stdVector) { std::vector intVector = {1, 2, 3, 4, 5}; - auto path = exec::test::TempFilePath::create(); + auto path = ::bytedance::bolt::test::TempFilePath::create(); saveStdVectorToFile(intVector, path->path.c_str()); auto copy = restoreStdVectorFromFile(path->path.c_str()); ASSERT_EQ(intVector, copy); @@ -662,7 +662,7 @@ struct VectorSaverInfo { /// A demonstration of using VectorSaver to save 'current' vector being /// processed to disk in case of an exception. TEST_F(VectorSaverTest, exceptionContext) { - auto tempDirectory = exec::test::TempDirectoryPath::create(); + auto tempDirectory = bytedance::bolt::test::TempDirectoryPath::create(); auto messageFunction = [](BoltException::Type /*exceptionType*/, auto* arg) -> std::string { diff --git a/scripts/run-clang-tidy.py b/scripts/run-clang-tidy.py index 83f9aadaf..2acd582b0 100755 --- a/scripts/run-clang-tidy.py +++ b/scripts/run-clang-tidy.py @@ -472,6 +472,37 @@ def _compute_changed_lines() -> Optional[Multimap]: f"Excluded {header_excluded} header file(s) (analyzed transitively via .cpp)." ) + # Drop files not in compile_commands.json. Mechanical refactor sweeps + # (include-path / namespace renames) can touch files whose CMake + # targets are commented out (e.g. bolt/examples/ScanAndSort.cpp). + # Without a compile entry, clang-tidy falls back to default flags + # and emits spurious "file not found" errors for project-internal + # includes. + db_path = os.path.join(build_path, "compile_commands.json") + if os.path.isfile(db_path): + try: + with open(db_path) as _f: + _db_files = {e["file"] for e in json.load(_f) if "file" in e} + _kept = [] + _orphans = [] + for _f in files_to_process: + if to_repo_abs(_f, git_root) in _db_files: + _kept.append(_f) + else: + _orphans.append(_f) + if _orphans: + print( + f"Skipped {len(_orphans)} file(s) not in compile_commands.json:" + ) + for _o in _orphans: + print(f" {_o}") + files_to_process = _kept + except (json.JSONDecodeError, KeyError, OSError) as _e: + print( + f"Warning: could not parse {db_path} for orphan filter: {_e}", + file=sys.stderr, + ) + if not files_to_process: print("No changed C/C++ lines detected for clang-tidy.") return 0