Skip to content
Merged
Original file line number Diff line number Diff line change
Expand Up @@ -48,10 +48,10 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: tbl_ice
filterExpr: (((b) IN ('four', 'one') or (a = 22)) is null or (((b < 'four') or ((b > 'four') and (b < 'one')) or (b > 'one')) and (a <> 22)) or (b) IN ('four', 'one') or (a = 22)) (type: boolean)
filterExpr: (((b) IN ('four', 'one') or (a = 22)) is null or ((b <> 'four') and (b <> 'one') and (a <> 22)) or (b) IN ('four', 'one') or (a = 22)) (type: boolean)
Statistics: Num rows: 7 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
predicate: ((((b) IN ('four', 'one') or (a = 22)) is null or (((b < 'four') or ((b > 'four') and (b < 'one')) or (b > 'one')) and (a <> 22))) and FILE__PATH is not null) (type: boolean)
predicate: ((((b) IN ('four', 'one') or (a = 22)) is null or ((b <> 'four') and (b <> 'one') and (a <> 22))) and FILE__PATH is not null) (type: boolean)
Statistics: Num rows: 7 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: a (type: int), b (type: string), c (type: int), PARTITION__SPEC__ID (type: int), PARTITION__HASH (type: bigint), FILE__PATH (type: string), ROW__POSITION (type: bigint), PARTITION__PROJECTION (type: string)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -150,27 +150,27 @@ Stage-0
File Output Operator [FS_61]
Limit [LIM_60] (rows=20 width=447)
Number of rows:20
Select Operator [SEL_59] (rows=473 width=447)
Select Operator [SEL_59] (rows=791 width=447)
Output:["_col0","_col1","_col2","_col3","_col4"]
<-Map 1 [SIMPLE_EDGE] vectorized, llap
SHUFFLE [RS_58]
Top N Key Operator [TNK_57] (rows=473 width=447)
Top N Key Operator [TNK_57] (rows=791 width=447)
keys:_col0,top n:20
Map Join Operator [MAPJOIN_56] (rows=473 width=447)
Map Join Operator [MAPJOIN_56] (rows=791 width=447)
BucketMapJoin:true,Conds:SEL_55._col0, _col1=RS_53._col0, _col1(Inner),Output:["_col0","_col1","_col2","_col3","_col4"]
<-Map 3 [CUSTOM_EDGE] vectorized, llap
MULTICAST [RS_53]
PartitionCols:_col0, _col1
Select Operator [SEL_52] (rows=387 width=178)
Select Operator [SEL_52] (rows=500 width=178)
Output:["_col0","_col1"]
Filter Operator [FIL_51] (rows=387 width=178)
predicate:(((key < '0') or ((key > '0') and (key < '100')) or (key > '100')) and value is not null)
Filter Operator [FIL_51] (rows=500 width=178)
predicate:((key <> '0') and (key <> '100') and value is not null)
TableScan [TS_3] (rows=500 width=178)
default@src,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"]
<-Select Operator [SEL_55] (rows=387 width=269)
<-Select Operator [SEL_55] (rows=500 width=269)
Output:["_col0","_col1","_col2"]
Filter Operator [FIL_54] (rows=387 width=269)
predicate:(((key1 < '0') or ((key1 > '0') and (key1 < '100')) or (key1 > '100')) and key2 is not null)
Filter Operator [FIL_54] (rows=500 width=269)
predicate:((key1 <> '0') and (key1 <> '100') and key2 is not null)
TableScan [TS_0] (rows=500 width=269)
default@srcbucket_big,a,Tbl:COMPLETE,Col:COMPLETE,Grouping Num Buckets:8,Grouping Partition Columns:["key1","key2"],Output:["key1","key2","value"]

Expand Down Expand Up @@ -346,27 +346,27 @@ Stage-0
File Output Operator [FS_41]
Limit [LIM_40] (rows=20 width=447)
Number of rows:20
Select Operator [SEL_39] (rows=473 width=447)
Select Operator [SEL_39] (rows=791 width=447)
Output:["_col0","_col1","_col2","_col3","_col4"]
<-Map 1 [SIMPLE_EDGE] vectorized, llap
SHUFFLE [RS_38]
Top N Key Operator [TNK_37] (rows=473 width=447)
Top N Key Operator [TNK_37] (rows=791 width=447)
keys:_col0,top n:20
Map Join Operator [MAPJOIN_36] (rows=473 width=447)
Map Join Operator [MAPJOIN_36] (rows=791 width=447)
BucketMapJoin:true,Conds:SEL_35._col0=RS_33._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4"]
<-Map 3 [CUSTOM_EDGE] vectorized, llap
MULTICAST [RS_33]
PartitionCols:_col0
Select Operator [SEL_32] (rows=387 width=178)
Select Operator [SEL_32] (rows=500 width=178)
Output:["_col0","_col1"]
Filter Operator [FIL_31] (rows=387 width=178)
predicate:((key < '0') or (key > '100') or ((key > '0') and (key < '100')))
Filter Operator [FIL_31] (rows=500 width=178)
predicate:((key <> '0') and (key <> '100'))
TableScan [TS_3] (rows=500 width=178)
default@src,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"]
<-Select Operator [SEL_35] (rows=387 width=269)
<-Select Operator [SEL_35] (rows=500 width=269)
Output:["_col0","_col1","_col2"]
Filter Operator [FIL_34] (rows=387 width=269)
predicate:((key1 < '0') or (key1 > '100') or ((key1 > '0') and (key1 < '100')))
Filter Operator [FIL_34] (rows=500 width=269)
predicate:((key1 <> '0') and (key1 <> '100'))
TableScan [TS_0] (rows=500 width=269)
default@srcbucket_big,a,Tbl:COMPLETE,Col:COMPLETE,Grouping Num Buckets:4,Grouping Partition Columns:["key1"],Output:["key1","key2","value"]

Expand Down Expand Up @@ -435,40 +435,40 @@ POSTHOOK: Input: default@srcbucket_big
Plan optimized by CBO.

Vertex dependency in root stage
Map 2 <- Map 1 (BROADCAST_EDGE)
Reducer 3 <- Map 2 (SIMPLE_EDGE)
Map 1 <- Map 3 (CUSTOM_EDGE)
Reducer 2 <- Map 1 (SIMPLE_EDGE)

Stage-0
Fetch Operator
limit:20
Stage-1
Reducer 3 vectorized, llap
Reducer 2 vectorized, llap
File Output Operator [FS_41]
Limit [LIM_40] (rows=20 width=447)
Number of rows:20
Select Operator [SEL_39] (rows=612 width=447)
Select Operator [SEL_39] (rows=791 width=447)
Output:["_col0","_col1","_col2","_col3","_col4"]
<-Map 2 [SIMPLE_EDGE] vectorized, llap
<-Map 1 [SIMPLE_EDGE] vectorized, llap
SHUFFLE [RS_38]
Top N Key Operator [TNK_37] (rows=612 width=447)
Top N Key Operator [TNK_37] (rows=791 width=447)
keys:_col0,top n:20
Map Join Operator [MAPJOIN_36] (rows=612 width=447)
Conds:RS_33._col0=SEL_35._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4"]
<-Map 1 [BROADCAST_EDGE] vectorized, llap
BROADCAST [RS_33]
Map Join Operator [MAPJOIN_36] (rows=791 width=447)
BucketMapJoin:true,Conds:SEL_35._col0=RS_33._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4"]
<-Map 3 [CUSTOM_EDGE] vectorized, llap
MULTICAST [RS_33]
PartitionCols:_col0
Select Operator [SEL_32] (rows=387 width=269)
Output:["_col0","_col1","_col2"]
Filter Operator [FIL_31] (rows=387 width=269)
predicate:(((key2 < 'val_0') or ((key2 > 'val_0') and (key2 < 'val_100')) or (key2 > 'val_100')) and key1 is not null)
TableScan [TS_0] (rows=500 width=269)
default@srcbucket_big,a,Tbl:COMPLETE,Col:COMPLETE,Output:["key1","key2","value"]
<-Select Operator [SEL_35] (rows=500 width=178)
Output:["_col0","_col1"]
Filter Operator [FIL_34] (rows=500 width=178)
predicate:key is not null
TableScan [TS_3] (rows=500 width=178)
default@src,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"]
Select Operator [SEL_32] (rows=500 width=178)
Output:["_col0","_col1"]
Filter Operator [FIL_31] (rows=500 width=178)
predicate:key is not null
TableScan [TS_3] (rows=500 width=178)
default@src,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"]
<-Select Operator [SEL_35] (rows=500 width=269)
Output:["_col0","_col1","_col2"]
Filter Operator [FIL_34] (rows=500 width=269)
predicate:((key2 <> 'val_0') and (key2 <> 'val_100') and key1 is not null)
TableScan [TS_0] (rows=500 width=269)
default@srcbucket_big,a,Tbl:COMPLETE,Col:COMPLETE,Grouping Num Buckets:4,Grouping Partition Columns:["key1"],Output:["key1","key2","value"]

PREHOOK: query: SELECT *
FROM srcbucket_big a
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,10 +71,10 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: tbl_ice
filterExpr: (((b) IN ('four', 'one') or (a = 22)) is null or (((b < 'four') or ((b > 'four') and (b < 'one')) or (b > 'one')) and (a <> 22))) (type: boolean)
filterExpr: (((b) IN ('four', 'one') or (a = 22)) is null or ((b <> 'four') and (b <> 'one') and (a <> 22))) (type: boolean)
Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: PARTIAL
Filter Operator
predicate: ((((b) IN ('four', 'one') or (a = 22)) is null or (((b < 'four') or ((b > 'four') and (b < 'one')) or (b > 'one')) and (a <> 22))) and FILE__PATH is not null) (type: boolean)
predicate: ((((b) IN ('four', 'one') or (a = 22)) is null or ((b <> 'four') and (b <> 'one') and (a <> 22))) and FILE__PATH is not null) (type: boolean)
Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: PARTIAL
Select Operator
expressions: a (type: int), b (type: string), c (type: int), PARTITION__SPEC__ID (type: int), PARTITION__HASH (type: bigint), FILE__PATH (type: string), ROW__POSITION (type: bigint), PARTITION__PROJECTION (type: string)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: tbl_ice
filterExpr: ((a = 22) or (b) IN ('four', 'one') or ((b) IN ('four', 'one') or (a = 22)) is null or (((b < 'four') or ((b > 'four') and (b < 'one')) or (b > 'one')) and (a <> 22))) (type: boolean)
filterExpr: ((a = 22) or (b) IN ('four', 'one') or ((b) IN ('four', 'one') or (a = 22)) is null or ((b <> 'four') and (b <> 'one') and (a <> 22))) (type: boolean)
Statistics: Num rows: 7 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
predicate: ((a = 22) or (b) IN ('four', 'one')) (type: boolean)
Expand All @@ -93,7 +93,7 @@ STAGE PLANS:
Map-reduce partition columns: FILE__PATH (type: string)
Statistics: Num rows: 4 Data size: 368 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
predicate: ((((b) IN ('four', 'one') or (a = 22)) is null or (((b < 'four') or ((b > 'four') and (b < 'one')) or (b > 'one')) and (a <> 22))) and FILE__PATH is not null) (type: boolean)
predicate: ((((b) IN ('four', 'one') or (a = 22)) is null or ((b <> 'four') and (b <> 'one') and (a <> 22))) and FILE__PATH is not null) (type: boolean)
Statistics: Num rows: 7 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: a (type: int), b (type: string), c (type: int), PARTITION__SPEC__ID (type: int), PARTITION__HASH (type: bigint), FILE__PATH (type: string), ROW__POSITION (type: bigint), PARTITION__PROJECTION (type: string)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,30 +72,44 @@ public SearchTransformer(RexBuilder rexBuilder, RexCall search, final RexUnknown
this.unknownContext = unknownContext;
}

/**
* Transforms the SEARCH expression into an equivalent RexNode expression.
* Warning: when called from a shuttle, callers of this method should consider flattening AND/OR expressions
* afterward, to get the same result as applying {@link SearchTransformer.Shuttle}.
*/
public RexNode transform() {
PerfLogger perfLogger = SessionState.getPerfLogger();
perfLogger.perfLogBegin(this.getClass().getName(), PerfLogger.SEARCH_TRANSFORMER);

RangeConverter<C> consumer = new RangeConverter<>(rexBuilder, operandType, ref);
RangeSets.forEach(sarg.rangeSet, consumer);

List<RexNode> orList = new ArrayList<>();
if (sarg.nullAs == RexUnknownAs.TRUE && unknownContext != RexUnknownAs.TRUE) {
orList.add(rexBuilder.makeCall(SqlStdOperatorTable.IS_NULL, ref));
}
switch (consumer.inLiterals.size()) {
case 0:
break;
case 1:
orList.add(rexBuilder.makeCall(SqlStdOperatorTable.EQUALS, ref, consumer.inLiterals.get(0)));
break;
default:
List<RexNode> operands = new ArrayList<>(consumer.inLiterals.size() + 1);
operands.add(ref);
operands.addAll(consumer.inLiterals);
orList.add(rexBuilder.makeCall(HiveIn.INSTANCE, operands));

if (sarg.isComplementedPoints()) {
Comment thread
zabetak marked this conversation as resolved.
// Generate 'ref <> value1 AND ... AND ref <> valueN'
List<RexNode> list = sarg.rangeSet.complement().asRanges().stream().map(
range -> rexBuilder.makeCall(SqlStdOperatorTable.NOT_EQUALS, ref,
rexBuilder.makeLiteral(range.lowerEndpoint(), operandType, true, true))).toList();
orList.add(RexUtil.composeConjunction(rexBuilder, list));
} else {
RangeConverter<C> consumer = new RangeConverter<>(rexBuilder, operandType, ref);
RangeSets.forEach(sarg.rangeSet, consumer);

switch (consumer.inLiterals.size()) {
case 0:
break;
case 1:
orList.add(rexBuilder.makeCall(SqlStdOperatorTable.EQUALS, ref, consumer.inLiterals.get(0)));
break;
default:
List<RexNode> operands = new ArrayList<>(consumer.inLiterals.size() + 1);
operands.add(ref);
operands.addAll(consumer.inLiterals);
orList.add(rexBuilder.makeCall(HiveIn.INSTANCE, operands));
}
orList.addAll(consumer.nodes);
}
orList.addAll(consumer.nodes);
RexNode x = RexUtil.composeDisjunction(rexBuilder, orList);

if (sarg.nullAs == RexUnknownAs.FALSE && unknownContext != RexUnknownAs.FALSE) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@
import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeDescUtils;
import org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
Expand Down Expand Up @@ -214,6 +215,12 @@ public ExprNodeDesc visitCall(RexCall call) {
&& SqlTypeUtil.equalSansNullability(dTFactory, call.getType(),
call.operands.get(0).getType())) {
return args.get(0);
} else if (call.isA(SqlKind.AND)) {
// Make sure AND is flattened (we may have nested ANDs due to SearchTransformer conversion above)
return ExprNodeDescUtils.and(args);
} else if (call.isA(SqlKind.OR)) {
// Make sure OR is flattened (we may have nested ORs due to SearchTransformer conversion above)
return ExprNodeDescUtils.or(args);
} else {
GenericUDF hiveUdf = SqlFunctionConverter.getHiveUDF(call.getOperator(), call.getType(),
args.size());
Expand Down
28 changes: 26 additions & 2 deletions ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.function.Predicate;


public class ExprNodeDescUtils {
Expand Down Expand Up @@ -243,6 +244,21 @@ public static ExprNodeGenericFuncDesc and(List<ExprNodeDesc> exps) {
return new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo, new GenericUDFOPAnd(), "and", flatExps);
}

/**
* Creates a disjunction (OR) of the given expressions flattening nested disjunctions if possible.
* <pre>
* Input: AND(A, B), C, OR(D, OR(E, F))
* Output: OR(AND(A, B), C, D, E, F)
* </pre>
*/
public static ExprNodeGenericFuncDesc or(List<ExprNodeDesc> exps) {
List<ExprNodeDesc> flatExps = new ArrayList<>();
for (ExprNodeDesc e : exps) {
split(e, flatExps, FunctionRegistry::isOpOr);
}
return new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo, new GenericUDFOPOr(), "or", flatExps);
}

/**
* Create an expression for computing a murmur hash by recursively hashing given expressions by two:
* <pre>
Expand Down Expand Up @@ -305,9 +321,17 @@ public static List<ExprNodeDesc> split(ExprNodeDesc current) {
* split predicates by AND op
*/
public static List<ExprNodeDesc> split(ExprNodeDesc current, List<ExprNodeDesc> splitted) {
if (FunctionRegistry.isOpAnd(current)) {
return split(current, splitted, FunctionRegistry::isOpAnd);
}

/**
* split predicates by a certain condition
*/
private static List<ExprNodeDesc> split(ExprNodeDesc current, List<ExprNodeDesc> splitted,
Predicate<ExprNodeDesc> condition) {
if (condition.test(current)) {
for (ExprNodeDesc child : current.getChildren()) {
split(child, splitted);
split(child, splitted, condition);
}
return splitted;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -371,6 +371,17 @@ public void testBetweenSelectivityLeftEqualsRight_KO() {
betweenSelectivity(KLL, 2, 2);
}

@Test
public void testComputeNotEqualsPredicateSelectivity() {
RexNode filter = REX_BUILDER.makeCall(SqlStdOperatorTable.AND,
REX_BUILDER.makeCall(SqlStdOperatorTable.NOT_EQUALS, inputRef0, int3),
REX_BUILDER.makeCall(SqlStdOperatorTable.NOT_EQUALS, inputRef0, int7));
filter = simplify(filter);
Assert.assertEquals(SqlKind.SEARCH, filter.getKind());
FilterSelectivityEstimator estimator = new FilterSelectivityEstimator(scan, mq);
Assert.assertEquals(0.8095238095238095, estimator.estimateSelectivity(filter), DELTA);
}

@Test
public void testComputeRangePredicateSelectivityWhenNoStats() {
RexNode filter = REX_BUILDER.makeCall(SqlStdOperatorTable.LESS_THAN, inputRef0, int3);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,9 @@ STAGE PLANS:
Processor Tree:
TableScan
alias: predicate_fold_tb
filterExpr: (value is null or (value < 3) or (value > 3)) (type: boolean)
filterExpr: ((value <> 3) or value is null) (type: boolean)
Filter Operator
predicate: (value is null or (value < 3) or (value > 3)) (type: boolean)
predicate: ((value <> 3) or value is null) (type: boolean)
Select Operator
expressions: value (type: int)
outputColumnNames: _col0
Expand Down
Loading
Loading