基数比较低指的是列中包含很多不同的值,重复的值很少
不会的,有时候数据量大,SQL复杂,导致执行计划不一定正确,还是要用SQL改写的方式确保一下
基数低指重复的值多。
嗯嗯好的,理解反了
是的 from 大至小
我是这么考虑的目前hash join 的问题,我觉得是 DISTRIBUTED BY HASH(o_custkey
,o_shippriority
) BUCKETS 700 这的值也有关系,我觉是不这只留下一列就可以了
是不是把框起来的去掉
DISTRIBUTED BY HASH一个值也是可以 只要数据是分布均匀
这个方法也不行,能不能通过调整参数的方式,慢慢跑出来呢
DISTRIBUTED BY HASH 这个数据分布的话,用哪些列会更好呢
ERROR 1064 (HY000): row count of right table in hash join > 4294967295
还是这个报错信息吧?
这个需要你查一下数据,就是不倾斜,数据均匀的列
把您改后的SQL发一下还有explain cost
对,还是这个报错,这次换成sql 21 和sql 9 之前是sql 7 和sql8
sql21
select
s_name,
count(*) as numwait
from
lineitem l1,
orders,
supplier,
nation
where
s_suppkey = l1.l_suppkey
and o_orderkey = l1.l_orderkey
and o_orderstatus = ‘F’
and l1.l_receiptdate > l1.l_commitdate
and exists (
select
*
from
lineitem l2
where
l2.l_orderkey = l1.l_orderkey
and l2.l_suppkey <> l1.l_suppkey
)
and not exists (
select
*
from
lineitem l3
where
l3.l_orderkey = l1.l_orderkey
and l3.l_suppkey <> l1.l_suppkey
and l3.l_receiptdate > l3.l_commitdate
)
and s_nationkey = n_nationkey
and n_name = ‘MOZAMBIQUE’
group by
s_name
order by
numwait desc,
s_name
limit 100;
sql5
select count(*) from (
select nation, o_year, sum(amount) as sum_profit
from
(
select
n_name as nation,
extract(year from o_orderdate) as o_year,
l_extendedprice * (1 - l_discount) - ps_supplycost * l_quantity as amount
from lineitem, orders, partsupp, part, supplier, nation, orders
where
s_suppkey = l_suppkey
and ps_suppkey = l_suppkey
and ps_partkey = l_partkey
and p_partkey = l_partkey
and o_orderkey = l_orderkey
and s_nationkey = n_nationkey
and p_name like ‘%green%’
) as profit
group by nation, o_year
order by nation, o_year desc
) as t1;
select
s_name,
count(*) as numwait
from
lineitem l1,
orders,
supplier,
nation
where
s_suppkey = l1.l_suppkey
and o_orderkey = l1.l_orderkey
and o_orderstatus = ‘F’
and l1.l_receiptdate > l1.l_commitdate
and exists (
select
*
from
lineitem l2
where
l2.l_orderkey = l1.l_orderkey
and l2.l_suppkey <> l1.l_suppkey
)
and not exists (
select
*
from
lineitem l3
where
l3.l_orderkey = l1.l_orderkey
and l3.l_suppkey <> l1.l_suppkey
and l3.l_receiptdate > l3.l_commitdate
)
and s_nationkey = n_nationkey
and n_name = ‘MOZAMBIQUE’
group by
s_name
order by
numwait desc,
s_name
limit 100;
PLAN FRAGMENT 0 |
| OUTPUT EXPRS:29: s_name | 77: count |
| PARTITION: UNPARTITIONED |
| |
| RESULT SINK |
| |
| 30:MERGING-EXCHANGE |
| limit: 100 |
| |
| PLAN FRAGMENT 1 |
| OUTPUT EXPRS: |
| PARTITION: HASH_PARTITIONED: 29: s_name |
| |
| STREAM DATA SINK |
| EXCHANGE ID: 30 |
| UNPARTITIONED |
| |
| 29:TOP-N |
| | order by: <slot 77> 77: count DESC, <slot 29> 29: s_name ASC |
| | offset: 0 |
| | limit: 100 |
| | |
| 28:AGGREGATE (update finalize) |
| | output: count(*) |
| | group by: 29: s_name |
| | |
| 27:EXCHANGE |
| |
| PLAN FRAGMENT 2 |
| OUTPUT EXPRS: |
| PARTITION: HASH_PARTITIONED: 41: l_orderkey |
| |
| STREAM DATA SINK |
| EXCHANGE ID: 27 |
| HASH_PARTITIONED: 29: s_name |
| |
| 26:Project |
| | <slot 29> : 29: s_name |
| | |
| 25:HASH JOIN |
| | join op: RIGHT SEMI JOIN (PARTITIONED) |
| | hash predicates: |
| | colocate: false, reason: |
| | equal join conjunct: 41: l_orderkey = 1: l_orderkey |
| | other join predicates: 43: l_suppkey != 3: l_suppkey |
| | |
| |----24:EXCHANGE |
| | |
| 1:EXCHANGE |
| |
| PLAN FRAGMENT 3 |
| OUTPUT EXPRS: |
| PARTITION: HASH_PARTITIONED: 1: l_orderkey |
| |
| STREAM DATA SINK |
| EXCHANGE ID: 24 |
| HASH_PARTITIONED: 1: l_orderkey |
| |
| 23:Project |
| | <slot 1> : 1: l_orderkey |
| | <slot 3> : 3: l_suppkey |
| | <slot 29> : 29: s_name |
| | |
| 22:HASH JOIN |
| | join op: RIGHT ANTI JOIN (BUCKET_SHUFFLE(S)) |
| | hash predicates: |
| | colocate: false, reason: |
| | equal join conjunct: 59: l_orderkey = 1: l_orderkey |
| | other join predicates: 61: l_suppkey != 3: l_suppkey |
| | |
| |----21:Project |
| | | <slot 1> : 1: l_orderkey |
| | | <slot 3> : 3: l_suppkey |
| | | <slot 29> : 29: s_name |
| | | |
| | 20:HASH JOIN |
| | | join op: INNER JOIN (PARTITIONED) |
| | | hash predicates: |
| | | colocate: false, reason: |
| | | equal join conjunct: 1: l_orderkey = 18: o_orderkey |
| | | |
| | |----19:EXCHANGE |
| | | |
| | 16:EXCHANGE |
| | |
| 4:EXCHANGE |
| |
| PLAN FRAGMENT 4 |
| OUTPUT EXPRS: |
| PARTITION: RANDOM |
| |
| STREAM DATA SINK |
| EXCHANGE ID: 19 |
| HASH_PARTITIONED: 18: o_orderkey |
| |
| 18:Project |
| | <slot 18> : 18: o_orderkey |
| | |
| 17:OlapScanNode |
| TABLE: orders |
| PREAGGREGATION: ON |
| PREDICATES: 20: o_orderstatus = ‘F’ |
| partitions=80/82 |
| rollup: orders |
| tabletRatio=56000/56000 |
| tabletList=27958822,27958824,27958826,27958828,27958830,27958832,27958834,27958836,27958838,27958840 … |
| cardinality=7500000000 |
| avgRowSize=5.0 |
| numNodes=0 |
| |
| PLAN FRAGMENT 5 |
| OUTPUT EXPRS: |
| PARTITION: RANDOM |
| |
| STREAM DATA SINK |
| EXCHANGE ID: 16 |
| HASH_PARTITIONED: 1: l_orderkey |
| |
| 15:Project |
| | <slot 1> : 1: l_orderkey |
| | <slot 3> : 3: l_suppkey |
| | <slot 29> : 29: s_name |
| | |
| 14:HASH JOIN |
| | join op: INNER JOIN (BROADCAST) |
| | hash predicates: |
| | colocate: false, reason: |
| | equal join conjunct: 3: l_suppkey = 28: s_suppkey |
| | |
| |----13:EXCHANGE |
| | |
| 6:Project |
| | <slot 1> : 1: l_orderkey |
| | <slot 3> : 3: l_suppkey |
| | |
| 5:OlapScanNode |
| TABLE: lineitem |
| PREAGGREGATION: ON |
| PREDICATES: 13: l_receiptdate > 12: l_commitdate |
| partitions=84/84 |
| rollup: lineitem |
| tabletRatio=58800/58800 |
| tabletList=28126052,28126054,28126056,28126058,28126060,28126062,28126064,28126066,28126068,28126070 … |
| cardinality=30000016971 |
| avgRowSize=16.0 |
| numNodes=0 |
| |
| PLAN FRAGMENT 6 |
| OUTPUT EXPRS: |
| PARTITION: RANDOM |
| |
| STREAM DATA SINK |
| EXCHANGE ID: 13 |
| UNPARTITIONED |
| |
| 12:Project |
| | <slot 28> : 28: s_suppkey |
| | <slot 29> : 29: s_name |
| | |
| 11:HASH JOIN |
| | join op: INNER JOIN (BROADCAST) |
| | hash predicates: |
| | colocate: false, reason: |
| | equal join conjunct: 31: s_nationkey = 36: n_nationkey |
| | |
| |----10:EXCHANGE |
| | |
| 7:OlapScanNode |
| TABLE: supplier |
| PREAGGREGATION: ON |
| partitions=1/1 |
| rollup: supplier |
| tabletRatio=20/20 |
| tabletList=28028010,28028012,28028014,28028016,28028018,28028020,28028022,28028024,28028026,28028028 … |
| cardinality=100000000 |
| avgRowSize=26.0 |
| numNodes=0 |
| |
| PLAN FRAGMENT 7 |
| OUTPUT EXPRS: |
| PARTITION: RANDOM |
| |
| STREAM DATA SINK |
| EXCHANGE ID: 10 |
| UNPARTITIONED |
| |
| 9:Project |
| | <slot 36> : 36: n_nationkey |
| | |
| 8:OlapScanNode |
| TABLE: nation |
| PREAGGREGATION: ON |
| PREDICATES: 37: n_name = ‘MOZAMBIQUE’ |
| partitions=1/1 |
| rollup: nation |
| tabletRatio=1/1 |
| tabletList=31555 |
| cardinality=1 |
| avgRowSize=11.08 |
| numNodes=0 |
| |
| PLAN FRAGMENT 8 |
| OUTPUT EXPRS: |
| PARTITION: RANDOM |
| |
| STREAM DATA SINK |
| EXCHANGE ID: 04 |
| HASH_PARTITIONED: 59: l_orderkey |
| |
| 3:Project |
| | <slot 59> : 59: l_orderkey |
| | <slot 61> : 61: l_suppkey |
| | |
| 2:OlapScanNode |
| TABLE: lineitem |
| PREAGGREGATION: ON |
| PREDICATES: 71: l_receiptdate > 70: l_commitdate |
| partitions=84/84 |
| rollup: lineitem |
| tabletRatio=58800/58800 |
| tabletList=28126052,28126054,28126056,28126058,28126060,28126062,28126064,28126066,28126068,28126070 … |
| cardinality=30000016971 |
| avgRowSize=16.0 |
| numNodes=0 |
| |
| PLAN FRAGMENT 9 |
| OUTPUT EXPRS: |
| PARTITION: RANDOM |
| |
| STREAM DATA SINK |
| EXCHANGE ID: 01 |
| HASH_PARTITIONED: 41: l_orderkey |
| |
| 0:OlapScanNode |
| TABLE: lineitem |
| PREAGGREGATION: ON |
| partitions=84/84 |
| rollup: lineitem |
| tabletRatio=58800/58800 |
| tabletList=28126052,28126054,28126056,28126058,28126060,28126062,28126064,28126066,28126068,28126070 … |
| cardinality=60000033942 |
| avgRowSize=8.0 |
| numNodes=0
partsupp,
part,
这两个表多少数据量?
MySQL [tcph10tmp]> explain select count() from ( select nation, o_year, sum(amount) as sum_profit from ( select n_name as nation, extract(year from o_orderdate) as o_year, l_extendedprice * (1 - l_discount) - ps_supplycost * l_quantity as amount from lineitem, orders, partsupp, part, supplier, nation where s_suppkey = l_suppkey and ps_suppkey = l_suppkey and ps_partkey = l_partkey and p_partkey = l_partkey and o_orderkey = l_orderkey and s_nationkey = n_nationkey and p_name like ‘%green%’ ) as profit group by nation, o_year order by nation, o_year desc ) as t1;
±--------------------------------------------------------------------------------------------------------------+
| Explain String |
±--------------------------------------------------------------------------------------------------------------+
| PLAN FRAGMENT 0 |
| OUTPUT EXPRS:60: count |
| PARTITION: UNPARTITIONED |
| |
| RESULT SINK |
| |
| 28:AGGREGATE (update finalize) |
| | output: count() |
| | group by: |
| | |
| 27:EXCHANGE |
| |
| PLAN FRAGMENT 1 |
| OUTPUT EXPRS: |
| PARTITION: HASH_PARTITIONED: 53: n_name, 57: year |
| |
| STREAM DATA SINK |
| EXCHANGE ID: 27 |
| UNPARTITIONED |
| |
| 26:Project |
| | <slot 57> : 57: year |
| | |
| 25:AGGREGATE (merge finalize) |
| | group by: 53: n_name, 57: year |
| | |
| 24:EXCHANGE |
| |
| PLAN FRAGMENT 2 |
| OUTPUT EXPRS: |
| PARTITION: RANDOM |
| |
| STREAM DATA SINK |
| EXCHANGE ID: 24 |
| HASH_PARTITIONED: 53: n_name, 57: year |
| |
| 23:AGGREGATE (update serialize) |
| | STREAMING |
| | group by: 53: n_name, 57: year |
| | |
| 22:Project |
| | <slot 53> : 53: n_name |
| | <slot 57> : year(22: o_orderdate) |
| | |
| 21:HASH JOIN |
| | join op: INNER JOIN (BUCKET_SHUFFLE) |
| | hash predicates: |
| | colocate: false, reason: |
| | equal join conjunct: 52: n_nationkey = 47: s_nationkey |
| | |
| |----20:EXCHANGE |
| | |
| 0:OlapScanNode |
| TABLE: nation |
| PREAGGREGATION: ON |
| partitions=1/1 |
| rollup: nation |
| tabletRatio=1/1 |
| tabletList=31555 |
| cardinality=25 |
| avgRowSize=11.08 |
| numNodes=0 |
| |
| PLAN FRAGMENT 3 |
| OUTPUT EXPRS: |
| PARTITION: RANDOM |
| |
| STREAM DATA SINK |
| EXCHANGE ID: 20 |
| BUCKET_SHUFFLE_HASH_PARTITIONED: 47: s_nationkey |
| |
| 19:Project |
| | <slot 22> : 22: o_orderdate |
| | <slot 47> : 47: s_nationkey |
| | |
| 18:HASH JOIN |
| | join op: INNER JOIN (BUCKET_SHUFFLE) |
| | hash predicates: |
| | colocate: false, reason: |
| | equal join conjunct: 44: s_suppkey = 3: l_suppkey |
| | |
| |----17:EXCHANGE |
| | |
| 1:OlapScanNode |
| TABLE: supplier |
| PREAGGREGATION: ON |
| partitions=1/1 |
| rollup: supplier |
| tabletRatio=20/20 |
| tabletList=28028010,28028012,28028014,28028016,28028018,28028020,28028022,28028024,28028026,28028028 … |
| cardinality=100000000 |
| avgRowSize=8.0 |
| numNodes=0 |
| |
| PLAN FRAGMENT 4 |
| OUTPUT EXPRS: |
| PARTITION: RANDOM |
| |
| STREAM DATA SINK |
| EXCHANGE ID: 17 |
| BUCKET_SHUFFLE_HASH_PARTITIONED: 3: l_suppkey |
| |
| 16:Project |
| | <slot 3> : 3: l_suppkey |
| | <slot 22> : 22: o_orderdate |
| | |
| 15:HASH JOIN |
| | join op: INNER JOIN (BROADCAST) |
| | hash predicates: |
| | colocate: false, reason: |
| | equal join conjunct: 34: p_partkey = 2: l_partkey |
| | |
| |----14:EXCHANGE |
| | |
| 3:Project |
| | <slot 34> : 34: p_partkey |
| | |
| 2:OlapScanNode |
| TABLE: part |
| PREAGGREGATION: ON |
| PREDICATES: 35: p_name LIKE ‘%green%’ |
| partitions=1/1 |
| rollup: part |
| tabletRatio=360/360 |
| tabletList=27728,27730,27732,27734,27736,27738,27740,27742,27744,27746 … |
| cardinality=500000000 |
| avgRowSize=36.750008 |
| numNodes=0 |
| |
| PLAN FRAGMENT 5 |
| OUTPUT EXPRS: |
| PARTITION: RANDOM |
| |
| STREAM DATA SINK |
| EXCHANGE ID: 14 |
| UNPARTITIONED |
| |
| 13:Project |
| | <slot 2> : 2: l_partkey |
| | <slot 3> : 3: l_suppkey |
| | <slot 22> : 22: o_orderdate |
| | |
| 12:HASH JOIN |
| | join op: INNER JOIN (BROADCAST) |
| | hash predicates: |
| | colocate: false, reason: |
| | equal join conjunct: 29: ps_suppkey = 61: cast |
| | equal join conjunct: 28: ps_partkey = 2: l_partkey |
| | |
| |----11:EXCHANGE |
| | |
| 4:OlapScanNode |
| TABLE: partsupp |
| PREAGGREGATION: ON |
| partitions=1/1 |
| rollup: partsupp |
| tabletRatio=360/360 |
| tabletList=27005,27007,27009,27011,27013,27015,27017,27019,27021,27023 … |
| cardinality=7840000000 |
| avgRowSize=11.889456 |
| numNodes=0 |
| |
| PLAN FRAGMENT 6 |
| OUTPUT EXPRS: |
| PARTITION: HASH_PARTITIONED: 1: l_orderkey |
| |
| STREAM DATA SINK |
| EXCHANGE ID: 11 |
| UNPARTITIONED |
| |
| 10:Project |
| | <slot 2> : 2: l_partkey |
| | <slot 3> : 3: l_suppkey |
| | <slot 22> : 22: o_orderdate |
| | <slot 61> : CAST(3: l_suppkey AS VARCHAR(1048576)) |
| | |
| 9:HASH JOIN |
| | join op: INNER JOIN (PARTITIONED) |
| | hash predicates: |
| | colocate: false, reason: |
| | equal join conjunct: 1: l_orderkey = 18: o_orderkey |
| | |
| |----8:EXCHANGE |
| | |
| 6:EXCHANGE |
| |
| PLAN FRAGMENT 7 |
| OUTPUT EXPRS: |
| PARTITION: RANDOM |
| |
| STREAM DATA SINK |
| EXCHANGE ID: 08 |
| HASH_PARTITIONED: 18: o_orderkey |
| |
| 7:OlapScanNode |
| TABLE: orders |
| PREAGGREGATION: ON |
| partitions=80/82 |
| rollup: orders |
| tabletRatio=56000/56000 |
| tabletList=27958822,27958824,27958826,27958828,27958830,27958832,27958834,27958836,27958838,27958840 … |
| cardinality=15000000000 |
| avgRowSize=8.0 |
| numNodes=0 |
| |
| PLAN FRAGMENT 8 |
| OUTPUT EXPRS: |
| PARTITION: RANDOM |
| |
| STREAM DATA SINK |
| EXCHANGE ID: 06 |
| HASH_PARTITIONED: 1: l_orderkey |
| |
| 5:OlapScanNode |
| TABLE: lineitem |
| PREAGGREGATION: ON |
| partitions=84/84 |
| rollup: lineitem |
| tabletRatio=58800/58800 |
| tabletList=28126052,28126054,28126056,28126058,28126060,28126062,28126064,28126066,28126068,28126070 … |
| cardinality=60000033942 |
| avgRowSize=12.0 |
| numNodes=0 |
±--------------------------------------------------------------------------------------------------------------+
223 rows in set (0.18 sec)
partsupp 80亿 part 20亿