diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java index 14e063ac7f28d4..e54dc6681e1d00 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java @@ -2496,14 +2496,28 @@ public PlanFragment visitPhysicalSetOperation( setOperationNode.setColocate(true); } - // TODO: open comment when support `enable_local_shuffle_planner` - // for (Plan child : setOperation.children()) { - // PhysicalPlan childPhysicalPlan = (PhysicalPlan) child; - // if (JoinUtils.isStorageBucketed(childPhysicalPlan.getPhysicalProperties())) { - // setOperationNode.setDistributionMode(DistributionMode.BUCKET_SHUFFLE); - // break; - // } - // } + // A storage-bucketed child means set-op bucket shuffle was chosen by + // ChildrenPropertiesRegulator, which only does so under the FE local-shuffle planner; + // the gate here keeps the two sites explicitly consistent. Mark the node BUCKET_SHUFFLE + // so the set sink/probe align by bucket instead of execution-bucketed hash. + // + // Unlike hash join, BUCKET_SHUFFLE is not exclusive with isColocate above: for a set + // operation isColocate describes the bucket-aligned scheduling of the fragment (the + // basic child scans buckets directly), while BUCKET_SHUFFLE describes how the other + // children arrive (bucket-shuffle exchanges). Both routes converge to the same + // bucket-hash local exchange requirement in SetOperationNode.enforceAndDeriveLocalExchange. + ConnectContext setOperationConnectContext = context.getConnectContext(); + if (setOperationConnectContext != null + && setOperationConnectContext.getSessionVariable().isEnableLocalShufflePlanner() + && SessionVariable.canUseNereidsDistributePlanner(setOperationConnectContext)) { + for (Plan child : setOperation.children()) { + PhysicalPlan childPhysicalPlan = (PhysicalPlan) child; + if (JoinUtils.isStorageBucketed(childPhysicalPlan.getPhysicalProperties())) { + setOperationNode.setDistributionMode(DistributionMode.BUCKET_SHUFFLE); + break; + } + } + } return setOperationFragment; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/properties/ChildOutputPropertyDeriver.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/properties/ChildOutputPropertyDeriver.java index 8a71581ca974dc..d5858af29d8445 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/properties/ChildOutputPropertyDeriver.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/properties/ChildOutputPropertyDeriver.java @@ -30,6 +30,7 @@ import org.apache.doris.nereids.trees.expressions.SlotReference; import org.apache.doris.nereids.trees.expressions.functions.table.TableValuedFunction; import org.apache.doris.nereids.trees.plans.Plan; +import org.apache.doris.nereids.trees.plans.algebra.Union; import org.apache.doris.nereids.trees.plans.physical.AbstractPhysicalSort; import org.apache.doris.nereids.trees.plans.physical.PhysicalAssertNumRows; import org.apache.doris.nereids.trees.plans.physical.PhysicalBucketedHashAggregate; @@ -71,9 +72,11 @@ import com.google.common.collect.Maps; import com.google.common.collect.Sets; +import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.HashSet; +import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Objects; @@ -440,53 +443,56 @@ public PhysicalProperties visitPhysicalSetOperation(PhysicalSetOperation setOper return PhysicalProperties.GATHER; } - // TODO: open comment when support `enable_local_shuffle_planner` - // int distributeToChildIndex - // = setOperation.getMutableState(PhysicalSetOperation.DISTRIBUTE_TO_CHILD_INDEX).orElse(-1); - // if (distributeToChildIndex >= 0 - // && childrenDistribution.get(distributeToChildIndex) instanceof DistributionSpecHash) { - // DistributionSpecHash childDistribution - // = (DistributionSpecHash) childrenDistribution.get(distributeToChildIndex); - // List childToIndex = setOperation.getRegularChildrenOutputs().get(distributeToChildIndex); - // Map idToOutputIndex = new LinkedHashMap<>(); - // for (int j = 0; j < childToIndex.size(); j++) { - // idToOutputIndex.put(childToIndex.get(j).getExprId(), j); - // } - // - // List orderedShuffledColumns = childDistribution.getOrderedShuffledColumns(); - // List setOperationDistributeColumnIds = new ArrayList<>(); - // for (ExprId tableDistributeColumnId : orderedShuffledColumns) { - // Integer index = idToOutputIndex.get(tableDistributeColumnId); - // if (index == null) { - // break; - // } - // setOperationDistributeColumnIds.add(setOperation.getOutput().get(index).getExprId()); - // } - // // check whether the set operation output all distribution columns of the child - // if (setOperationDistributeColumnIds.size() == orderedShuffledColumns.size()) { - // boolean isUnion = setOperation instanceof Union; - // boolean shuffleToRight = distributeToChildIndex > 0; - // if (!isUnion && shuffleToRight) { - // return new PhysicalProperties( - // new DistributionSpecHash( - // setOperationDistributeColumnIds, - // ShuffleType.EXECUTION_BUCKETED - // ) - // ); - // } else { - // // keep the distribution as the child - // return new PhysicalProperties( - // new DistributionSpecHash( - // setOperationDistributeColumnIds, - // childDistribution.getShuffleType(), - // childDistribution.getTableId(), - // childDistribution.getSelectedIndexId(), - // childDistribution.getPartitionIds() - // ) - // ); - // } - // } - // } + // When set-op bucket shuffle is chosen (DISTRIBUTE_TO_CHILD_INDEX is set by + // ChildrenPropertiesRegulator, which only happens under the FE local-shuffle planner), + // the set operation keeps the basic child's bucket distribution as its own output so the + // bucket distribution propagates upward instead of being flattened to execution-bucketed. + int distributeToChildIndex + = setOperation.getMutableState(PhysicalSetOperation.DISTRIBUTE_TO_CHILD_INDEX).orElse(-1); + if (distributeToChildIndex >= 0 + && childrenDistribution.get(distributeToChildIndex) instanceof DistributionSpecHash) { + DistributionSpecHash childDistribution + = (DistributionSpecHash) childrenDistribution.get(distributeToChildIndex); + List childToIndex = setOperation.getRegularChildrenOutputs().get(distributeToChildIndex); + Map idToOutputIndex = new LinkedHashMap<>(); + for (int j = 0; j < childToIndex.size(); j++) { + idToOutputIndex.put(childToIndex.get(j).getExprId(), j); + } + + List orderedShuffledColumns = childDistribution.getOrderedShuffledColumns(); + List setOperationDistributeColumnIds = new ArrayList<>(); + for (ExprId tableDistributeColumnId : orderedShuffledColumns) { + Integer index = idToOutputIndex.get(tableDistributeColumnId); + if (index == null) { + break; + } + setOperationDistributeColumnIds.add(setOperation.getOutput().get(index).getExprId()); + } + // check whether the set operation output all distribution columns of the child + if (setOperationDistributeColumnIds.size() == orderedShuffledColumns.size()) { + boolean isUnion = setOperation instanceof Union; + boolean shuffleToRight = distributeToChildIndex > 0; + if (!isUnion && shuffleToRight) { + return new PhysicalProperties( + new DistributionSpecHash( + setOperationDistributeColumnIds, + ShuffleType.EXECUTION_BUCKETED + ) + ); + } else { + // keep the distribution as the child + return new PhysicalProperties( + new DistributionSpecHash( + setOperationDistributeColumnIds, + childDistribution.getShuffleType(), + childDistribution.getTableId(), + childDistribution.getSelectedIndexId(), + childDistribution.getPartitionIds() + ) + ); + } + } + } for (int i = 0; i < childrenDistribution.size(); i++) { DistributionSpec childDistribution = childrenDistribution.get(i); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/properties/ChildrenPropertiesRegulator.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/properties/ChildrenPropertiesRegulator.java index e5e0d9d1bd0d28..ef35962106800c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/properties/ChildrenPropertiesRegulator.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/properties/ChildrenPropertiesRegulator.java @@ -57,6 +57,7 @@ import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableSet; import com.google.common.collect.Lists; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; @@ -651,83 +652,103 @@ public List> visitPhysicalSetOperation(PhysicalSetOpera } else if (requiredDistributionSpec instanceof DistributionSpecHash) { // TODO: should use the most common hash spec as basic DistributionSpecHash basic = (DistributionSpecHash) requiredDistributionSpec; - // TODO: open comment when support `enable_local_shuffle_planner` - // int bucketShuffleBasicIndex = -1; - // double basicRowCount = -1; - - // find the bucket shuffle basic index - // try { - // ImmutableSet supportBucketShuffleTypes = ImmutableSet.of( - // ShuffleType.NATURAL, - // ShuffleType.STORAGE_BUCKETED - // ); - // for (int i = 0; i < originChildrenProperties.size(); i++) { - // PhysicalProperties originChildrenProperty = originChildrenProperties.get(i); - // DistributionSpec childDistribution = originChildrenProperty.getDistributionSpec(); - // if (childDistribution instanceof DistributionSpecHash - // && supportBucketShuffleTypes.contains( - // ((DistributionSpecHash) childDistribution).getShuffleType()) - // && !(isBucketShuffleDownGrade(setOperation.child(i)))) { - // Statistics stats = setOperation.child(i).getStats(); - // double rowCount = stats.getRowCount(); - // if (rowCount > basicRowCount) { - // basicRowCount = rowCount; - // bucketShuffleBasicIndex = i; - // } - // } - // } - // } catch (Throwable t) { - // // catch stats exception - // LOG.warn("Can not find the most (bucket num, rowCount): " + t, t); - // bucketShuffleBasicIndex = -1; - // } - - // use bucket shuffle - // if (bucketShuffleBasicIndex >= 0) { - // DistributionSpecHash notShuffleSideRequire - // = (DistributionSpecHash) requiredProperties.get(bucketShuffleBasicIndex) - // .getDistributionSpec(); - // - // DistributionSpecHash notNeedShuffleOutput - // = (DistributionSpecHash) originChildrenProperties.get(bucketShuffleBasicIndex) - // .getDistributionSpec(); - // - // for (int i = 0; i < originChildrenProperties.size(); i++) { - // DistributionSpecHash current - // = (DistributionSpecHash) originChildrenProperties.get(i).getDistributionSpec(); - // if (i == bucketShuffleBasicIndex) { - // continue; - // } - // - // DistributionSpecHash currentRequire - // = (DistributionSpecHash) requiredProperties.get(i).getDistributionSpec(); - // - // PhysicalProperties target = calAnotherSideRequired( - // ShuffleType.STORAGE_BUCKETED, - // notNeedShuffleOutput, current, - // notShuffleSideRequire, - // currentRequire); - // updateChildEnforceAndCost(i, target); - // } - // setOperation.setMutableState( - // PhysicalSetOperation.DISTRIBUTE_TO_CHILD_INDEX, bucketShuffleBasicIndex); - // use partitioned shuffle - // } else { - for (int i = 0; i < originChildrenProperties.size(); i++) { - DistributionSpecHash current - = (DistributionSpecHash) originChildrenProperties.get(i).getDistributionSpec(); - if (current.getShuffleType() != ShuffleType.EXECUTION_BUCKETED - || !bothSideShuffleKeysAreSameOrder(basic, current, - (DistributionSpecHash) requiredProperties.get(0).getDistributionSpec(), - (DistributionSpecHash) requiredProperties.get(i).getDistributionSpec())) { + int bucketShuffleBasicIndex = -1; + double basicRowCount = -1; + + // Bucket shuffle for set operation is only valid when the FE plans the local + // shuffle: with the BE-side local-shuffle planner the backend cannot infer the + // correct local shuffle type for the set sink/probe and computes wrong results. + // It also requires the nereids distribute planner: the legacy coordinator only + // supports bucket-shuffle-partitioned sinks whose dest fragment contains a bucket + // shuffle join, so a bucket-shuffle set operation fragment cannot be scheduled there. + // Otherwise, keep bucketShuffleBasicIndex = -1 and fall back to the + // execution-bucketed (partitioned) shuffle below. + ConnectContext setOperationContext = ConnectContext.get(); + boolean enableLocalShufflePlanner = setOperationContext != null + && setOperationContext.getSessionVariable().isEnableLocalShufflePlanner() + && SessionVariable.canUseNereidsDistributePlanner(setOperationContext); + + // find the bucket shuffle basic index: the largest natural / storage-bucketed child + // keeps its bucket distribution, every other child is bucket-shuffled to it. + // isBucketShuffleDownGrade reuses the join-side heuristics on purpose, including + // the enable_bucket_shuffle_join switch and bucket_shuffle_downgrade_ratio: bucket + // shuffle for set operation belongs to the same optimization family as bucket + // shuffle join, so the join switches govern both instead of introducing a separate + // session variable. + if (enableLocalShufflePlanner) { + try { + ImmutableSet supportBucketShuffleTypes = ImmutableSet.of( + ShuffleType.NATURAL, + ShuffleType.STORAGE_BUCKETED + ); + for (int i = 0; i < originChildrenProperties.size(); i++) { + PhysicalProperties originChildrenProperty = originChildrenProperties.get(i); + DistributionSpec childDistribution = originChildrenProperty.getDistributionSpec(); + if (childDistribution instanceof DistributionSpecHash + && supportBucketShuffleTypes.contains( + ((DistributionSpecHash) childDistribution).getShuffleType()) + && !(isBucketShuffleDownGrade(setOperation.child(i)))) { + Statistics stats = setOperation.child(i).getStats(); + double rowCount = stats.getRowCount(); + if (rowCount > basicRowCount) { + basicRowCount = rowCount; + bucketShuffleBasicIndex = i; + } + } + } + } catch (Throwable t) { + // catch stats exception + LOG.warn("Can not find the most (bucket num, rowCount): " + t, t); + bucketShuffleBasicIndex = -1; + } + } + + if (bucketShuffleBasicIndex >= 0) { + // use bucket shuffle + DistributionSpecHash notShuffleSideRequire + = (DistributionSpecHash) requiredProperties.get(bucketShuffleBasicIndex) + .getDistributionSpec(); + + DistributionSpecHash notNeedShuffleOutput + = (DistributionSpecHash) originChildrenProperties.get(bucketShuffleBasicIndex) + .getDistributionSpec(); + + for (int i = 0; i < originChildrenProperties.size(); i++) { + DistributionSpecHash current + = (DistributionSpecHash) originChildrenProperties.get(i).getDistributionSpec(); + if (i == bucketShuffleBasicIndex) { + continue; + } + + DistributionSpecHash currentRequire + = (DistributionSpecHash) requiredProperties.get(i).getDistributionSpec(); + PhysicalProperties target = calAnotherSideRequired( - ShuffleType.EXECUTION_BUCKETED, basic, current, - (DistributionSpecHash) requiredProperties.get(0).getDistributionSpec(), - (DistributionSpecHash) requiredProperties.get(i).getDistributionSpec()); + ShuffleType.STORAGE_BUCKETED, + notNeedShuffleOutput, current, + notShuffleSideRequire, + currentRequire); updateChildEnforceAndCost(i, target); } + setOperation.setMutableState( + PhysicalSetOperation.DISTRIBUTE_TO_CHILD_INDEX, bucketShuffleBasicIndex); + } else { + // use partitioned shuffle + for (int i = 0; i < originChildrenProperties.size(); i++) { + DistributionSpecHash current + = (DistributionSpecHash) originChildrenProperties.get(i).getDistributionSpec(); + if (current.getShuffleType() != ShuffleType.EXECUTION_BUCKETED + || !bothSideShuffleKeysAreSameOrder(basic, current, + (DistributionSpecHash) requiredProperties.get(0).getDistributionSpec(), + (DistributionSpecHash) requiredProperties.get(i).getDistributionSpec())) { + PhysicalProperties target = calAnotherSideRequired( + ShuffleType.EXECUTION_BUCKETED, basic, current, + (DistributionSpecHash) requiredProperties.get(0).getDistributionSpec(), + (DistributionSpecHash) requiredProperties.get(i).getDistributionSpec()); + updateChildEnforceAndCost(i, target); + } + } } - // } } return ImmutableList.of(originChildrenProperties); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/properties/RequestPropertyDeriver.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/properties/RequestPropertyDeriver.java index 70f2b51665b740..aaebf8ac43c637 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/properties/RequestPropertyDeriver.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/properties/RequestPropertyDeriver.java @@ -67,6 +67,7 @@ import org.apache.doris.nereids.util.AggregateUtils; import org.apache.doris.nereids.util.JoinUtils; import org.apache.doris.qe.ConnectContext; +import org.apache.doris.qe.SessionVariable; import org.apache.doris.statistics.Statistics; import com.google.common.base.Preconditions; @@ -339,14 +340,22 @@ public Void visitPhysicalSetOperation(PhysicalSetOperation setOperation, PlanCon // shuffle all column // TODO: for wide table, may be we should add a upper limit of shuffle columns - // TODO: open comment when support `enable_local_shuffle_planner` and change to REQUIRE - // intersect/except always need hash distribution, we use REQUIRE to auto select - // bucket shuffle or execution shuffle + // intersect/except always need hash distribution. Auto-selecting bucket shuffle + // (ShuffleType.REQUIRE) for set operation is only valid when the FE plans the local + // shuffle: with the BE-side local-shuffle planner the backend cannot infer the + // correct local shuffle type for the set sink/probe and computes wrong results. + // It also requires the nereids distribute planner: the legacy coordinator only + // supports bucket-shuffle-partitioned sinks whose dest fragment contains a bucket + // shuffle join. Fall back to EXECUTION_BUCKETED otherwise. + ShuffleType setOperationShuffleType = connectContext != null + && connectContext.getSessionVariable().isEnableLocalShufflePlanner() + && SessionVariable.canUseNereidsDistributePlanner(connectContext) + ? ShuffleType.REQUIRE : ShuffleType.EXECUTION_BUCKETED; addRequestPropertyToChildren(setOperation.getRegularChildrenOutputs().stream() .map(childOutputs -> childOutputs.stream() .map(SlotReference::getExprId) .collect(ImmutableList.toImmutableList())) - .map(l -> PhysicalProperties.createHash(l, ShuffleType.EXECUTION_BUCKETED)) + .map(l -> PhysicalProperties.createHash(l, setOperationShuffleType)) .collect(Collectors.toList())); } return null; diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/LocalExchangeNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/LocalExchangeNode.java index f939102d699a13..66eda40079952f 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/LocalExchangeNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/LocalExchangeNode.java @@ -283,8 +283,15 @@ public LocalExchangeType preferType() { @Override public LocalExchangeTypeRequire autoRequireHash() { - if (requireType == LocalExchangeType.GLOBAL_EXECUTION_HASH_SHUFFLE - || requireType == LocalExchangeType.BUCKET_HASH_SHUFFLE) { + // Callers are pass-through operators (union / streaming agg / sort) that report + // resolveExchangeType(requireChild) upward while leaving row placement to their + // children. A specific hash require must therefore be forwarded as-is: degrading + // LOCAL_EXECUTION_HASH_SHUFFLE to the generic RequireHash lets a bucket-distributed + // child satisfy the requirement and keep its bucket placement, while the operator + // still claims LOCAL_EXECUTION_HASH_SHUFFLE to its parent — the parent (e.g. a + // bucket join upgraded to local hash) then skips its re-align local exchange and + // the mixed placements compute wrong results. + if (requireType.isHashShuffle()) { return this; } return RequireHash.INSTANCE; diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/SetOperationNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/SetOperationNode.java index 4074ab3e85ae6e..1396701d3c72dd 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/SetOperationNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/SetOperationNode.java @@ -223,7 +223,15 @@ public Pair enforceAndDeriveLocalExchange(PlanTrans : LocalExchangeType.NOOP; } else { // Intersect / Except - if (AddLocalExchange.isColocated(this)) { + if (AddLocalExchange.isColocated(this) || isBucketShuffle()) { + // COLOCATE / BUCKET_SHUFFLE: every child is distributed by the basic child's + // storage bucket function (basic side scans buckets directly, other sides come + // from bucket-shuffle exchanges), so all children must stay aligned by that + // bucket function locally. requireBucketHash keeps bucket-distributed children + // as-is and re-aligns a serial (NOOP-claim) child with a BUCKET_HASH_SHUFFLE + // local exchange — same pattern as HashJoinNode's colocate/bucket-shuffle + // branch. An execution-hash require here would locally re-partition one side + // by a different hash function and break build/probe alignment. requireChild = LocalExchangeTypeRequire.requireBucketHash(); outputType = LocalExchangeType.BUCKET_HASH_SHUFFLE; } else { diff --git a/regression-test/data/query_p0/set_operations/bucket_shuffle_set_operation.out b/regression-test/data/query_p0/set_operations/bucket_shuffle_set_operation.out index 50a3c5cc131ef6..404cb9c3110e83 100644 --- a/regression-test/data/query_p0/set_operations/bucket_shuffle_set_operation.out +++ b/regression-test/data/query_p0/set_operations/bucket_shuffle_set_operation.out @@ -117,3 +117,21 @@ PhysicalResultSink 2 3 +-- !bucket_shuffle_join_as_basic_child_shape -- +PhysicalResultSink +--PhysicalIntersect[bucketShuffle] +----PhysicalProject +------hashJoin[INNER_JOIN broadcast] hashCondition=((a.id = b.id)) otherCondition=() +--------PhysicalProject +----------PhysicalOlapScan[bucket_shuffle_set_operation1(a)] +--------PhysicalProject +----------PhysicalOlapScan[bucket_shuffle_set_operation2(b)] +----PhysicalDistribute[DistributionSpecHash] +------PhysicalProject +--------PhysicalOlapScan[bucket_shuffle_set_operation3] + +-- !bucket_shuffle_join_as_basic_child_result -- +1 +2 +3 + diff --git a/regression-test/suites/query_p0/set_operations/bucket_shuffle_set_operation.groovy b/regression-test/suites/query_p0/set_operations/bucket_shuffle_set_operation.groovy index 5533853eaa93ad..fbc61894a4cf7c 100644 --- a/regression-test/suites/query_p0/set_operations/bucket_shuffle_set_operation.groovy +++ b/regression-test/suites/query_p0/set_operations/bucket_shuffle_set_operation.groovy @@ -16,9 +16,6 @@ // under the License. suite("bucket_shuffle_set_operation") { - // TODO: open comment when support `enable_local_shuffle_planner` and change to REQUIRE - return - multi_sql """ drop table if exists bucket_shuffle_set_operation1; create table bucket_shuffle_set_operation1(id int, value int) distributed by hash(id) buckets 10 properties('replication_num'='1'); @@ -37,6 +34,9 @@ suite("bucket_shuffle_set_operation") { // make bucket shuffle set operation stable sql "set parallel_pipeline_task_num=5" + // disable the bucket shuffle downgrade so the chosen shapes do not depend on the + // backend count / parallelism of the environment running this suite + sql "set bucket_shuffle_downgrade_ratio=0" def checkShapeAndResult = { String tag, String sqlStr -> quickTest(tag + "_shape", "explain shape plan " + sqlStr) @@ -95,6 +95,16 @@ suite("bucket_shuffle_set_operation") { select id from bucket_shuffle_set_operation2 where id=1 """) + // The basic child of a bucket-shuffle set operation can be a join output instead of a + // direct scan. In that shape the local exchange planned for the basic side must still + // partition by the storage bucket function: an execution-hash local exchange would not + // align with the bucket-distributed side and the set operation would compute wrong results. + checkShapeAndResult("bucket_shuffle_join_as_basic_child", """ + select a.id from bucket_shuffle_set_operation1 a + join bucket_shuffle_set_operation2 b on a.id = b.id + intersect + select id from bucket_shuffle_set_operation3""") + explain { sql """ select id, id as id2 from (select nullable(id) as id from bucket_shuffle_set_operation1)a