谓词下推使用异常

为了更快的定位您的问题,请提供以下信息,谢谢
【详述】谓词下推测试
【背景】业务中发现谓词下推和写法关系很大
【业务影响】
【是否存算分离】 或
【StarRocks版本】例如:2.1.4
【集群规模】例如:3fe(3 follower)+3be(fe与be混部)
【机器信息】48C/128G/万兆
【联系方式】
【附件】

谓词测试

– 无法下推
EXPLAIN WITH base as (
SELECT scan_id, scan_date_time from DEFECT_WAFER dw
WHERE scan_date_time in (‘2023-08-20 22:33:41’, ‘2021-10-17 10:30:27’) AND
scan_id in (500007,500008) )
select * from DEFECT_DATA join base on DEFECT_DATA.scan_id = base.scan_id and DEFECT_DATA.scan_date_time=base.scan_date_time;

– 使用 where 子查询
explain select defect_id from DEFECT_DATA WHERE scan_id in (
SELECT scan_id from DEFECT_WAFER dw
WHERE scan_date_time in (‘2023-08-20 22:33:41’, ‘2021-10-17 10:30:27’) AND
scan_id in (500007,500008) );

– 两个子查询分别完成
EXPLAIN WITH base as (
SELECT scan_id, scan_date_time from DEFECT_WAFER dw
WHERE scan_date_time in (‘2023-08-20 22:33:41’, ‘2021-10-17 10:30:27’) AND
scan_id in (500007,500008) ),
data as (SELECT scan_id,scan_date_time, defect_id from DEFECT_DATA)
select data.defect_id from base join data on data.scan_id = base.scan_id and data.scan_date_time=base.scan_date_time;

– 只有这个写法可以下推成功
explain WITH data as (
SELECT scan_id,scan_date_time, defect_id from DEFECT_DATA )
select data.defect_id from DEFECT_WAFER dw join data on dw.scan_id = data.scan_id and dw.scan_date_time=data.scan_date_time
WHERE dw.scan_date_time in (‘2023-08-20 22:33:41’, ‘2021-10-17 10:30:27’) AND
dw.scan_id in (500007,500008) ;

explain WITH data as (
SELECT scan_id,scan_date_time, defect_id from DEFECT_DATA )
select * from DEFECT_WAFER dw join data on dw.scan_id = data.scan_id and dw.scan_date_time=data.scan_date_time
WHERE dw.scan_date_time in (‘2023-08-20 22:33:41’, ‘2021-10-17 10:30:27’) AND
dw.scan_id in (500007,500008) ;

PLAN FRAGMENT 0
OUTPUT EXPRS:38: scan_id | 39: scan_date_time | 40: lot_id | 41: wafer_no | 42: test_no | 43: technology | 44: fab_id | 45: comments | 46: wafer_size | 47: step_id | 48: device_id | 49: slot_id | 50: wafer_mark_type | 51: wafer_mark_location | 52: process_equip_id | 53: inspect_prog_id | 54: inspect_equip_manufacturer | 55: inspect_equip_group | 56: inspect_equip_id | 57: total_scan_area | 58: total_die_tested | 59: setup_id | 60: setup_datetime | 61: image_count | 62: scan_sequence | 63: review_status | 64: last_modified_time | 65: adder_all_defects | 66: adder_random_defects | 67: adder_cluster_count | 68: all_defects | 69: random_defects | 70: cluster_count | 71: adder_defective_die | 72: adder_random_defective_die | 73: adder_cluster_defective_die | 74: defective_die | 75: random_defective_die | 76: cluster_defective_die | 77: test_plan | 78: classified_defect_count | 79: adder_classified_defect_count | 80: defects_withimage | 81: repeater_defect_count | 82: repeater_count | 83: x_origin | 84: y_origin | 85: x_die_size | 86: y_die_size | 87: x_die_pitch | 88: y_die_pitch | 89: x_alignment1 | 90: y_alignment1 | 91: x_alignment2 | 92: y_alignment2 | 93: x_alignment3 | 94: y_alignment3 | 95: x_alignment4 | 96: y_alignment4 | 97: x_alignment5 | 98: y_alignment5 | 99: polymerization_time | 100: load_time | 101: scan_id | 103: scan_date_time | 102: defect_id
PARTITION: UNPARTITIONED

RESULT SINK

5:EXCHANGE
limit: 200

PLAN FRAGMENT 1
OUTPUT EXPRS:
PARTITION: HASH_PARTITIONED: 38: scan_id, 39: scan_date_time

STREAM DATA SINK
EXCHANGE ID: 05
UNPARTITIONED

4:HASH JOIN
| join op: INNER JOIN (PARTITIONED)
| colocate: false, reason:
| equal join conjunct: 38: scan_id = 101: scan_id
| equal join conjunct: 39: scan_date_time = 103: scan_date_time
| limit: 200
|
|----3:EXCHANGE
|
1:EXCHANGE

PLAN FRAGMENT 2
OUTPUT EXPRS:
PARTITION: RANDOM

STREAM DATA SINK
EXCHANGE ID: 03
HASH_PARTITIONED: 101: scan_id, 103: scan_date_time

2:OlapScanNode
TABLE: DEFECT_DATA
PREAGGREGATION: ON
PREDICATES: 101: scan_id IN (500007, 500008), 103: scan_date_time IN (‘2023-08-20 22:33:41’, ‘2021-10-17 10:30:27’)
partitions=2/7
rollup: DEFECT_DATA
tabletRatio=2/6
tabletList=3520426,3520398
cardinality=422
avgRowSize=20.0
numNodes=0

PLAN FRAGMENT 3
OUTPUT EXPRS:
PARTITION: RANDOM

STREAM DATA SINK
EXCHANGE ID: 01
HASH_PARTITIONED: 38: scan_id, 39: scan_date_time

0:OlapScanNode
TABLE: DEFECT_WAFER
PREAGGREGATION: ON
PREDICATES: 39: scan_date_time IN (‘2023-08-20 22:33:41’, ‘2021-10-17 10:30:27’), 38: scan_id IN (500007, 500008)
partitions=2/8
rollup: DEFECT_WAFER
tabletRatio=2/4
tabletList=3520351,3520379
cardinality=1
avgRowSize=310.07693
numNodes=0

EXPLAIN WITH base as (
SELECT scan_id, scan_date_time from DEFECT_WAFER dw
WHERE scan_date_time in (‘2023-08-20 22:33:41’, ‘2021-10-17 10:30:27’) AND
scan_id in (500007,500008) ),
datas as (
select * from DEFECT_DATA WHERE scan_date_time in (‘2023-08-20 22:33:41’, ‘2021-10-17 10:30:27’) AND
scan_id in (500007,500008)
)
select * from datas join base on datas.scan_id = base.scan_id and datas.scan_date_time=base.scan_date_time;

PLAN FRAGMENT 0
OUTPUT EXPRS:101: scan_id | 102: defect_id | 103: scan_date_time | 104: lot_id | 105: wafer_no | 106: repeater_flag | 107: modify_record | 108: x_coor | 109: y_coor | 110: x_rel | 111: y_rel | 112: x_index | 113: y_index | 114: x_size | 115: y_size | 116: d_size | 117: defect_area | 118: test_no | 119: image_count | 120: cluster_type | 121: cluster_no | 122: adder_flag | 123: adder_scan_id | 124: review_sample | 125: dtype1_class | 126: dtype2_class | 127: dtype3_class | 128: dtype4_class | 129: dtype5_class | 130: dtype6_class | 131: dtype7_class | 132: dtype8_class | 133: dtype9_class | 134: dtype10_class | 135: review_ext | 136: inspect_ext | 137: load_time | 138: scan_id | 139: scan_date_time
PARTITION: UNPARTITIONED

RESULT SINK

4:EXCHANGE
limit: 200

PLAN FRAGMENT 1
OUTPUT EXPRS:
PARTITION: RANDOM

STREAM DATA SINK
EXCHANGE ID: 04
UNPARTITIONED

3:HASH JOIN
| join op: INNER JOIN (BROADCAST)
| colocate: false, reason:
| equal join conjunct: 101: scan_id = 138: scan_id
| equal join conjunct: 103: scan_date_time = 139: scan_date_time
| limit: 200
|
|----2:EXCHANGE
|
0:OlapScanNode
TABLE: DEFECT_DATA
PREAGGREGATION: ON
PREDICATES: 103: scan_date_time IN (‘2023-08-20 22:33:41’, ‘2021-10-17 10:30:27’), 101: scan_id IN (500007, 500008)
partitions=2/7
rollup: DEFECT_DATA
tabletRatio=2/6
tabletList=3520426,3520398
cardinality=422
avgRowSize=2193.1758
numNodes=0

PLAN FRAGMENT 2
OUTPUT EXPRS:
PARTITION: RANDOM

STREAM DATA SINK
EXCHANGE ID: 02
UNPARTITIONED

1:OlapScanNode
TABLE: DEFECT_WAFER
PREAGGREGATION: ON
PREDICATES: 139: scan_date_time IN (‘2023-08-20 22:33:41’, ‘2021-10-17 10:30:27’), 138: scan_id IN (500007, 500008)
partitions=2/8
rollup: DEFECT_WAFER
tabletRatio=2/4
tabletList=3520351,3520379
cardinality=1
avgRowSize=16.0
numNodes=0

上面写法那种优一些