通常业务的数据会有时间的字段,根据实际查询需求设置就好了
例子:查询一个月内的流水,通常where 时间字段, 那么就建议使用这个时间字段来作分区键,分区键建议使用每天一个分区。
我刚才建表的时候好像只能是PARTITION BY使用第一个字段,PRIMARY KEY (cs_sold_date_sk) 使用第二字段就报错了Distribution column[inv_item_sk] is not key column
PARTITION BY 分区键,是需要放在DUPLICATE KEY 第一列
那我们这个应该用不了了,我们数据第一个字段是null ,我把表为null字段删除了之后也不行
所有字段都没有时间类型的字段吗?
create table store_sales
(
ss_sold_date_sk bigint,
ss_sold_time_sk bigint,
ss_item_sk bigint,
ss_customer_sk bigint,
ss_cdemo_sk bigint,
ss_hdemo_sk bigint,
ss_addr_sk bigint,
ss_store_sk bigint,
ss_promo_sk bigint,
ss_ticket_number bigint,
ss_quantity int,
ss_wholesale_cost decimal(7,2),
ss_list_price decimal(7,2),
ss_sales_price decimal(7,2),
ss_ext_discount_amt decimal(7,2),
ss_ext_sales_price decimal(7,2),
ss_ext_wholesale_cost decimal(7,2),
ss_ext_list_price decimal(7,2),
ss_ext_tax decimal(7,2),
ss_coupon_amt decimal(7,2),
ss_net_paid decimal(7,2),
ss_net_paid_inc_tax decimal(7,2),
ss_net_profit decimal(7,2)
) PRIMARY KEY (ss_item_sk)
DISTRIBUTED BY HASH(ss_item_sk) BUCKETS 3000
PROPERTIES (
“replication_num” = “1”,
“in_memory” = “false”,
“storage_format” = “DEFAULT”,
“storage_medium” = “SSD”,
“enable_persistent_index” = “true”,
“bloom_filter_columns” = “ss_sold_date_sk,ss_sold_time_sk,ss_item_sk”
); 第一个字段和第二个字段数据mull
Error: NULL value in non-nullable column ‘ss_sold_date_sk’. Row: [NULL, NULL, 382202, NULL, NULL, NULL, 32072768, NULL, 557, 43207576, NULL, 18.09, 18.99, NULL, 0.00, NULL, NULL, 1614.15, NULL, 0.00, 1178.10, 1248.78, -359.55, 0]
Error: NULL value in non-nullable column ‘ss_sold_date_sk’. Row: [NULL, NULL, 76693, NULL, 166795, NULL, NULL, NULL, 1562, 43207576, NULL, NULL, 121.65, 121.65, 0.00, 4379.40, 3589.92, NULL, NULL, 0.00, 4379.40, 4466.98, NULL, 0]
Error: NULL value in non-nullable column ‘ss_sold_date_sk’. Row: [NULL, NULL, 150252, 43032915, NULL, 683, 30685367, 700, NULL, 43207577, 19, NULL, NULL, 85.01, NULL, 1615.19, 1549.83, 2991.17, NULL, NULL, NULL, 1744.40, 65.36, 0]
Error: NULL value in non-nullable column ‘ss_sold_date_sk’. Row: [NULL, 32544, 368680, 1146273, 704493, NULL, 6036133, NULL, 36, 43207578, 41, NULL, NULL, NULL, 101.91, 566.21, NULL, NULL, NULL, 101.91, 464.30, 506.08, -2440.96, 0]
Error: NULL value in non-nullable column ‘ss_sold_date_sk’. Row: [NULL, NULL, 286185, 15156276, 1629849, 4669, NULL, 722, 972, 43207581, 10, 51.79, 86.48, NULL, 0.00, NULL, 517.90, 864.80, NULL, 0.00, 657.20, 657.20, NULL, 0]
看您的数据没有适合做范围分区的字段吧,那就不需要建立PARTITION BY
那还是使用DUPLICATE KEY DISTRIBUTED BY 或者是您这边建议呢
您这个表数据量多大
目前是三个大表 每个大表200多亿条数据,三个大表数据而且还有很多null值
我们看一下如何优化
嗯嗯好的,非常感谢,敬候佳音
还有一个疑问,ERROR 1064 (HY000): Memory of Query8cb8f860-2505-11ed-90d8-fa163e88b465 exceed limit. Pipeline Backend: 10.0.0.83, fragment: 8cb8f860-2505-11ed-90d8-fa163e88b48a Used: 438106711304, Limit: 438103947386. Mem usage has exceed the limit of single query, You can change the limit by set session variable exec_mem_limit. 这个告警的SQL能提供一下吗? 谢谢
WITH frequent_ss_items AS
(SELECT
substr(i_item_desc, 1, 30) itemdesc,
i_item_sk item_sk,
d_date solddate,
count() cnt
FROM store_sales, date_dim, item
WHERE ss_sold_date_sk = d_date_sk
AND ss_item_sk = i_item_sk
AND d_year IN (2000, 1999 + 1, 1999 + 2, 1999 + 3)
GROUP BY substr(i_item_desc, 1, 30), i_item_sk, d_date
HAVING count() > 4),
max_store_sales AS
(SELECT max(csales) tpcds_cmax
FROM (SELECT
c_customer_sk,
sum(ss_quantity * ss_sales_price) csales
FROM store_sales, customer, date_dim
WHERE ss_customer_sk = c_customer_sk
AND ss_sold_date_sk = d_date_sk
AND d_year IN (1999, 1999 + 1, 1999 + 2, 1999 + 3)
GROUP BY c_customer_sk) x),
best_ss_customer AS
(SELECT
c_customer_sk,
sum(ss_quantity * ss_sales_price) ssales
FROM store_sales, customer
WHERE ss_customer_sk = c_customer_sk
GROUP BY c_customer_sk
HAVING sum(ss_quantity * ss_sales_price) > (50 / 100.0) *
(SELECT *
FROM max_store_sales))
SELECT sum(sales)
FROM ((SELECT cs_quantity * cs_list_price sales
FROM catalog_sales, date_dim
WHERE d_year = 1999
AND d_moy = 2
AND cs_sold_date_sk = d_date_sk
AND cs_item_sk IN (SELECT item_sk
FROM frequent_ss_items)
AND cs_bill_customer_sk IN (SELECT c_customer_sk
FROM best_ss_customer))
UNION ALL
(SELECT ws_quantity * ws_list_price sales
FROM web_sales, date_dim
WHERE d_year = 1999
AND d_moy = 2
AND ws_sold_date_sk = d_date_sk
AND ws_item_sk IN (SELECT item_sk
FROM frequent_ss_items)
AND ws_bill_customer_sk IN (SELECT c_customer_sk
FROM best_ss_customer))) y
LIMIT 100;
–Query 23B
WITH frequent_ss_items AS
(SELECT
substr(i_item_desc, 1, 30) itemdesc,
i_item_sk item_sk,
d_date solddate,
count() cnt
FROM store_sales, date_dim, item
WHERE ss_sold_date_sk = d_date_sk
AND ss_item_sk = i_item_sk
AND d_year IN (1999, 1999 + 1, 1999 + 2, 1999 + 3)
GROUP BY substr(i_item_desc, 1, 30), i_item_sk, d_date
HAVING count() > 4),
max_store_sales AS
(SELECT max(csales) tpcds_cmax
FROM (SELECT
c_customer_sk,
sum(ss_quantity * ss_sales_price) csales
FROM store_sales, customer, date_dim
WHERE ss_customer_sk = c_customer_sk
AND ss_sold_date_sk = d_date_sk
AND d_year IN (1999, 1999 + 1, 1999 + 2, 1999 + 3)
GROUP BY c_customer_sk) x),
best_ss_customer AS
(SELECT
c_customer_sk,
sum(ss_quantity * ss_sales_price) ssales
FROM store_sales
, customer
WHERE ss_customer_sk = c_customer_sk
GROUP BY c_customer_sk
HAVING sum(ss_quantity * ss_sales_price) > (50 / 100.0) *
(SELECT *
FROM max_store_sales))
SELECT
c_last_name,
c_first_name,
sales
FROM ((SELECT
c_last_name,
c_first_name,
sum(cs_quantity * cs_list_price) sales
FROM catalog_sales, customer, date_dim
WHERE d_year = 1999
AND d_moy = 2
AND cs_sold_date_sk = d_date_sk
AND cs_item_sk IN (SELECT item_sk
FROM frequent_ss_items)
AND cs_bill_customer_sk IN (SELECT c_customer_sk
FROM best_ss_customer)
AND cs_bill_customer_sk = c_customer_sk
GROUP BY c_last_name, c_first_name)
UNION ALL
(SELECT
c_last_name,
c_first_name,
sum(ws_quantity * ws_list_price) sales
FROM web_sales, customer, date_dim
WHERE d_year = 1999
AND d_moy = 2
AND ws_sold_date_sk = d_date_sk
AND ws_item_sk IN (SELECT item_sk
FROM frequent_ss_items)
AND ws_bill_customer_sk IN (SELECT c_customer_sk
FROM best_ss_customer)
AND ws_bill_customer_sk = c_customer_sk
GROUP BY c_last_name, c_first_name)) y
ORDER BY c_last_name, c_first_name, sales
LIMIT 100;
麻烦提供一下explain costs 信息
Explain String
PLAN FRAGMENT 0
OUTPUT EXPRS:777: sum
PARTITION: UNPARTITIONED
RESULT SINK
123:AGGREGATE (merge finalize)
| output: sum(777: sum)
| group by:
| limit: 100
|
122:EXCHANGE
PLAN FRAGMENT 1
OUTPUT EXPRS:
PARTITION: RANDOM
STREAM DATA SINK
EXCHANGE ID: 122
UNPARTITIONED
121:AGGREGATE (update serialize)
| output: sum(776: expr)
| group by:
|
0:UNION
|
|----120:EXCHANGE
|
60:EXCHANGE
PLAN FRAGMENT 2
OUTPUT EXPRS:
PARTITION: HASH_PARTITIONED: 778: cast
STREAM DATA SINK
EXCHANGE ID: 120
RANDOM
119:Project
| <slot 775> : CAST(538: ws_quantity AS DECIMAL64(9,0)) * 540: ws_list_price
|
118:HASH JOIN
| join op: LEFT SEMI JOIN (PARTITIONED)
| hash predicates:
| colocate: false, reason:
| equal join conjunct: 778: cast = 681: c_customer_sk
|
|----117:EXCHANGE
|
86:EXCHANGE
PLAN FRAGMENT 3
OUTPUT EXPRS:
PARTITION: HASH_PARTITIONED: 681: c_customer_sk
STREAM DATA SINK
EXCHANGE ID: 117
HASH_PARTITIONED: 681: c_customer_sk
116:Project
| <slot 681> : 681: c_customer_sk
|
115:CROSS JOIN
| cross join:
| predicates: CAST(700: sum AS DOUBLE) > CAST(0.5 * 772: max AS DOUBLE)
|
|----114:EXCHANGE
|
95:AGGREGATE (merge finalize)
| output: sum(700: sum)
| group by: 681: c_customer_sk
|
94:EXCHANGE
PLAN FRAGMENT 4
OUTPUT EXPRS:
PARTITION: UNPARTITIONED
STREAM DATA SINK
EXCHANGE ID: 114
UNPARTITIONED
113:AGGREGATE (merge finalize)
| output: max(772: max)
| group by:
|
112:EXCHANGE
PLAN FRAGMENT 5
OUTPUT EXPRS:
PARTITION: HASH_PARTITIONED: 724: c_customer_sk
STREAM DATA SINK
EXCHANGE ID: 112
UNPARTITIONED
111:AGGREGATE (update serialize)
| output: max(771: sum)
| group by:
|
110:Project
| <slot 771> : 771: sum
|
109:AGGREGATE (merge finalize)
| output: sum(771: sum)
| group by: 724: c_customer_sk
|
108:EXCHANGE
PLAN FRAGMENT 6
OUTPUT EXPRS:
PARTITION: HASH_PARTITIONED: 704: ss_customer_sk
STREAM DATA SINK
EXCHANGE ID: 108
HASH_PARTITIONED: 724: c_customer_sk
107:AGGREGATE (update serialize)
| STREAMING
| output: sum(770: expr)
| group by: 724: c_customer_sk
|
106:Project
| <slot 724> : 724: c_customer_sk
| <slot 770> : CAST(711: ss_quantity AS DECIMAL64(9,0)) * 714: ss_sales_price
|
105:HASH JOIN
| join op: INNER JOIN (PARTITIONED)
| hash predicates:
| colocate: false, reason:
| equal join conjunct: 704: ss_customer_sk = 724: c_customer_sk
|
|----104:EXCHANGE
|
102:EXCHANGE
PLAN FRAGMENT 7
OUTPUT EXPRS:
PARTITION: RANDOM
STREAM DATA SINK
EXCHANGE ID: 104
HASH_PARTITIONED: 724: c_customer_sk
103:OlapScanNode
TABLE: customer
PREAGGREGATION: ON
partitions=1/1
rollup: customer
tabletRatio=8/8
tabletList=11985,11987,11989,11991,11993,11995,11997,11999
cardinality=64405602
avgRowSize=1.0
numNodes=0
PLAN FRAGMENT 8
OUTPUT EXPRS:
PARTITION: RANDOM
STREAM DATA SINK
EXCHANGE ID: 102
HASH_PARTITIONED: 704: ss_customer_sk
101:Project
| <slot 704> : 704: ss_customer_sk
| <slot 711> : 711: ss_quantity
| <slot 714> : 714: ss_sales_price
|
100:HASH JOIN
| join op: INNER JOIN (BROADCAST)
| hash predicates:
| colocate: false, reason:
| equal join conjunct: 701: ss_sold_date_sk = 742: d_date_sk
|
|----99:EXCHANGE
|
96:OlapScanNode
TABLE: store_sales
PREAGGREGATION: ON
partitions=1/1
rollup: store_sales
tabletRatio=950/950
tabletList=18677,18679,18681,18683,18685,18687,18689,18691,18693,18695 …
cardinality=11519847114
avgRowSize=4.0
numNodes=0
PLAN FRAGMENT 9
OUTPUT EXPRS:
PARTITION: RANDOM
STREAM DATA SINK
EXCHANGE ID: 99
UNPARTITIONED
98:Project
| <slot 742> : 742: d_date_sk
|
97:OlapScanNode
TABLE: date_dim
PREAGGREGATION: ON
PREDICATES: CAST(748: d_year AS BIGINT) IN (1999, 2000, 2001, 2002)
partitions=1/1
rollup: date_dim
tabletRatio=2/2
tabletList=11968,11970
cardinality=36525
avgRowSize=2.0
numNodes=0
PLAN FRAGMENT 10
OUTPUT EXPRS:
PARTITION: HASH_PARTITIONED: 661: ss_customer_sk
STREAM DATA SINK
EXCHANGE ID: 94
HASH_PARTITIONED: 681: c_customer_sk
93:AGGREGATE (update serialize)
| STREAMING
| output: sum(699: expr)
| group by: 681: c_customer_sk
|
92:Project
| <slot 681> : 681: c_customer_sk
| <slot 699> : CAST(668: ss_quantity AS DECIMAL64(9,0)) * 671: ss_sales_price
|
91:HASH JOIN
| join op: INNER JOIN (PARTITIONED)
| hash predicates:
| colocate: false, reason:
| equal join conjunct: 661: ss_customer_sk = 681: c_customer_sk
|
|----90:EXCHANGE
|
88:EXCHANGE
PLAN FRAGMENT 11
OUTPUT EXPRS:
PARTITION: RANDOM
STREAM DATA SINK
EXCHANGE ID: 90
HASH_PARTITIONED: 681: c_customer_sk
89:OlapScanNode
TABLE: customer
PREAGGREGATION: ON
partitions=1/1
rollup: customer
tabletRatio=8/8
tabletList=11985,11987,11989,11991,11993,11995,11997,11999
cardinality=64405602
avgRowSize=1.0
numNodes=0
PLAN FRAGMENT 12
OUTPUT EXPRS:
PARTITION: RANDOM
STREAM DATA SINK
EXCHANGE ID: 88
HASH_PARTITIONED: 661: ss_customer_sk
87:OlapScanNode
TABLE: store_sales
PREAGGREGATION: ON
partitions=1/1
rollup: store_sales
tabletRatio=950/950
tabletList=18677,18679,18681,18683,18685,18687,18689,18691,18693,18695 …
cardinality=11519847114
avgRowSize=3.0
numNodes=0
PLAN FRAGMENT 13
OUTPUT EXPRS:
PARTITION: HASH_PARTITIONED: 779: cast
STREAM DATA SINK
EXCHANGE ID: 86
HASH_PARTITIONED: 778: cast
85:Project
| <slot 538> : 538: ws_quantity
| <slot 540> : 540: ws_list_price
| <slot 778> : CAST(524: ws_bill_customer_sk AS BIGINT)
|
84:HASH JOIN
| join op: LEFT SEMI JOIN (PARTITIONED)
| hash predicates:
| colocate: false, reason:
| equal join conjunct: 779: cast = 633: i_item_sk
|
|----83:EXCHANGE
|
68:EXCHANGE
PLAN FRAGMENT 14
OUTPUT EXPRS:
PARTITION: HASH_PARTITIONED: 655: substr, 633: i_item_sk, 607: d_date
STREAM DATA SINK
EXCHANGE ID: 83
HASH_PARTITIONED: 633: i_item_sk
82:Project
| <slot 633> : 633: i_item_sk
|
81:AGGREGATE (merge finalize)
| output: count(656: count)
| group by: 655: substr, 633: i_item_sk, 607: d_date
| having: 656: count > 4
|
80:EXCHANGE
PLAN FRAGMENT 15
OUTPUT EXPRS:
PARTITION: RANDOM
STREAM DATA SINK
EXCHANGE ID: 80
HASH_PARTITIONED: 655: substr, 633: i_item_sk, 607: d_date
79:AGGREGATE (update serialize)
| STREAMING
| output: count(*)
| group by: 655: substr, 633: i_item_sk, 607: d_date
|
78:Project
| <slot 607> : 607: d_date
| <slot 633> : 633: i_item_sk
| <slot 655> : substr(637: i_item_desc, 1, 30)
|
77:HASH JOIN
| join op: INNER JOIN (BROADCAST)
| hash predicates:
| colocate: false, reason:
| equal join conjunct: 584: ss_item_sk = 633: i_item_sk
|
|----76:EXCHANGE
|
74:Project
| <slot 584> : 584: ss_item_sk
| <slot 607> : 607: d_date
|
73:HASH JOIN
| join op: INNER JOIN (BROADCAST)
| hash predicates:
| colocate: false, reason:
| equal join conjunct: 582: ss_sold_date_sk = 605: d_date_sk
|
|----72:EXCHANGE
|
69:OlapScanNode
TABLE: store_sales
PREAGGREGATION: ON
partitions=1/1
rollup: store_sales
tabletRatio=950/950
tabletList=18677,18679,18681,18683,18685,18687,18689,18691,18693,18695 …
cardinality=11519847114
avgRowSize=2.0
numNodes=0
PLAN FRAGMENT 16
OUTPUT EXPRS:
PARTITION: RANDOM
STREAM DATA SINK
EXCHANGE ID: 76
UNPARTITIONED
75:OlapScanNode
TABLE: item
PREAGGREGATION: ON
partitions=1/1
rollup: item
tabletRatio=1/1
tabletList=11975
cardinality=402000
avgRowSize=2.0
numNodes=0
PLAN FRAGMENT 17
OUTPUT EXPRS:
OUTPUT EXPRS:
PARTITION: RANDOM
STREAM DATA SINK
EXCHANGE ID: 72
UNPARTITIONED
71:Project
| <slot 605> : 605: d_date_sk
| <slot 607> : 607: d_date
|
70:OlapScanNode
TABLE: date_dim
PREAGGREGATION: ON
PREDICATES: CAST(611: d_year AS BIGINT) IN (2000, 2000, 2001, 2002)
partitions=1/1
rollup: date_dim
tabletRatio=2/2
tabletList=11968,11970
cardinality=36525
avgRowSize=3.0
numNodes=0
PLAN FRAGMENT 18
OUTPUT EXPRS:
PARTITION: RANDOM
STREAM DATA SINK
EXCHANGE ID: 68
HASH_PARTITIONED: 779: cast
67:Project
| <slot 524> : 524: ws_bill_customer_sk
| <slot 538> : 538: ws_quantity
| <slot 540> : 540: ws_list_price
| <slot 779> : CAST(523: ws_item_sk AS BIGINT)
|
66:HASH JOIN
| join op: INNER JOIN (BROADCAST)
| hash predicates:
| colocate: false, reason:
| equal join conjunct: 780: cast = 554: d_date_sk
|
|----65:EXCHANGE
|
62:Project
| <slot 523> : 523: ws_item_sk
| <slot 524> : 524: ws_bill_customer_sk
| <slot 538> : 538: ws_quantity
| <slot 540> : 540: ws_list_price
| <slot 780> : CAST(520: ws_sold_date_sk AS BIGINT)
|
61:OlapScanNode
TABLE: web_sales
PREAGGREGATION: ON
partitions=1/1
rollup: web_sales
tabletRatio=50/50
tabletList=12325,12327,12329,12331,12333,12335,12337,12339,12341,12343 …
cardinality=7199963324
avgRowSize=6.0
numNodes=0
PLAN FRAGMENT 19
OUTPUT EXPRS:
PARTITION: RANDOM
STREAM DATA SINK
EXCHANGE ID: 65
UNPARTITIONED
64:Project
| <slot 554> : 554: d_date_sk
|
63:OlapScanNode
TABLE: date_dim
PREAGGREGATION: ON
PREDICATES: 560: d_year = 1999, 562: d_moy = 2
partitions=1/1
rollup: date_dim
tabletRatio=2/2
tabletList=11968,11970
cardinality=18262
avgRowSize=3.0
numNodes=0
PLAN FRAGMENT 20
OUTPUT EXPRS:
PARTITION: HASH_PARTITIONED: 781: cast
STREAM DATA SINK
EXCHANGE ID: 60
RANDOM
59:Project
| <slot 519> : CAST(282: cs_quantity AS DECIMAL64(9,0)) * 284: cs_list_price
|
58:HASH JOIN
| join op: LEFT SEMI JOIN (PARTITIONED)
| hash predicates:
| colocate: false, reason:
| equal join conjunct: 781: cast = 425: c_customer_sk
|
|----57:EXCHANGE
|
26:EXCHANGE
PLAN FRAGMENT 21
OUTPUT EXPRS:
PARTITION: HASH_PARTITIONED: 425: c_customer_sk
STREAM DATA SINK
EXCHANGE ID: 57
HASH_PARTITIONED: 425: c_customer_sk
56:Project
| <slot 425> : 425: c_customer_sk
|
55:CROSS JOIN
| cross join:
| predicates: CAST(444: sum AS DOUBLE) > CAST(0.5 * 516: max AS DOUBLE)
|
|----54:EXCHANGE
|
35:AGGREGATE (merge finalize)
| output: sum(444: sum)
| group by: 425: c_customer_sk
|
34:EXCHANGE
PLAN FRAGMENT 22
OUTPUT EXPRS:
PARTITION: UNPARTITIONED
STREAM DATA SINK
EXCHANGE ID: 54
UNPARTITIONED
53:AGGREGATE (merge finalize)
| output: max(516: max)
| group by:
|
52:EXCHANGE
PLAN FRAGMENT 23
OUTPUT EXPRS:
PARTITION: HASH_PARTITIONED: 468: c_customer_sk
STREAM DATA SINK
EXCHANGE ID: 52
UNPARTITIONED
51:AGGREGATE (update serialize)
| output: max(515: sum)
| group by:
|
50:Project
| <slot 515> : 515: sum
|
49:AGGREGATE (merge finalize)
| output: sum(515: sum)
| group by: 468: c_customer_sk
|
48:EXCHANGE
PLAN FRAGMENT 24
OUTPUT EXPRS:
PARTITION: HASH_PARTITIONED: 448: ss_customer_sk
STREAM DATA SINK
EXCHANGE ID: 48
HASH_PARTITIONED: 468: c_customer_sk
47:AGGREGATE (update serialize)
| STREAMING
| output: sum(514: expr)
| group by: 468: c_customer_sk
|
46:Project
| <slot 468> : 468: c_customer_sk
| <slot 514> : CAST(455: ss_quantity AS DECIMAL64(9,0)) * 458: ss_sales_price
|
45:HASH JOIN
| join op: INNER JOIN (PARTITIONED)
| hash predicates:
| colocate: false, reason:
| equal join conjunct: 448: ss_customer_sk = 468: c_customer_sk
|
|----44:EXCHANGE
|
42:EXCHANGE
PLAN FRAGMENT 25
OUTPUT EXPRS:
PARTITION: RANDOM
STREAM DATA SINK
EXCHANGE ID: 44
HASH_PARTITIONED: 468: c_customer_sk
43:OlapScanNode
TABLE: customer
PREAGGREGATION: ON
partitions=1/1
rollup: customer
tabletRatio=8/8
tabletList=11985,11987,11989,11991,11993,11995,11997,11999
cardinality=64405602
avgRowSize=1.0
numNodes=0
PLAN FRAGMENT 26
OUTPUT EXPRS:
PARTITION: RANDOM
STREAM DATA SINK
EXCHANGE ID: 42
HASH_PARTITIONED: 448: ss_customer_sk
41:Project
| <slot 448> : 448: ss_customer_sk
| <slot 455> : 455: ss_quantity
| <slot 458> : 458: ss_sales_price
|
40:HASH JOIN
| join op: INNER JOIN (BROADCAST)
| hash predicates:
| colocate: false, reason:
| equal join conjunct: 445: ss_sold_date_sk = 486: d_date_sk
|
|----39:EXCHANGE
|
36:OlapScanNode
TABLE: store_sales
PREAGGREGATION: ON
partitions=1/1
rollup: store_sales
tabletRatio=950/950
tabletList=18677,18679,18681,18683,18685,18687,18689,18691,18693,18695 …
cardinality=11519847114
avgRowSize=4.0
numNodes=0
PLAN FRAGMENT 27
OUTPUT EXPRS:
PARTITION: RANDOM
STREAM DATA SINK
EXCHANGE ID: 39
UNPARTITIONED
38:Project
| <slot 486> : 486: d_date_sk
|
37:OlapScanNode
TABLE: date_dim
PREAGGREGATION: ON
PREDICATES: CAST(492: d_year AS BIGINT) IN (1999, 2000, 2001, 2002)
partitions=1/1
rollup: date_dim
tabletRatio=2/2
tabletList=11968,11970
cardinality=36525
avgRowSize=2.0
numNodes=0
PLAN FRAGMENT 28
OUTPUT EXPRS:
PARTITION: HASH_PARTITIONED: 405: ss_customer_sk
STREAM DATA SINK
EXCHANGE ID: 34
HASH_PARTITIONED: 425: c_customer_sk
33:AGGREGATE (update serialize)
| STREAMING
| output: sum(443: expr)
| group by: 425: c_customer_sk
|
32:Project
| <slot 425> : 425: c_customer_sk
| <slot 443> : CAST(412: ss_quantity AS DECIMAL64(9,0)) * 415: ss_sales_price
|
31:HASH JOIN
| join op: INNER JOIN (PARTITIONED)
| hash predicates:
| colocate: false, reason:
| equal join conjunct: 405: ss_customer_sk = 425: c_customer_sk
|
|----30:EXCHANGE
|
28:EXCHANGE
PLAN FRAGMENT 29
OUTPUT EXPRS:
PARTITION: RANDOM
STREAM DATA SINK
EXCHANGE ID: 30
HASH_PARTITIONED: 425: c_customer_sk
29:OlapScanNode
TABLE: customer
PREAGGREGATION: ON
partitions=1/1
rollup: customer
tabletRatio=8/8
tabletList=11985,11987,11989,11991,11993,11995,11997,11999
cardinality=64405602
avgRowSize=1.0
numNodes=0
PLAN FRAGMENT 30
OUTPUT EXPRS:
PARTITION: RANDOM
STREAM DATA SINK
EXCHANGE ID: 28
HASH_PARTITIONED: 405: ss_customer_sk
27:OlapScanNode
TABLE: store_sales
PREAGGREGATION: ON
partitions=1/1
rollup: store_sales
tabletRatio=950/950
tabletList=18677,18679,18681,18683,18685,18687,18689,18691,18693,18695 …
cardinality=11519847114
avgRowSize=3.0
numNodes=0
PLAN FRAGMENT 31
OUTPUT EXPRS:
PARTITION: HASH_PARTITIONED: 782: cast
STREAM DATA SINK
EXCHANGE ID: 26
HASH_PARTITIONED: 781: cast
25:Project
| <slot 282> : 282: cs_quantity
| <slot 284> : 284: cs_list_price
| <slot 781> : CAST(267: cs_bill_customer_sk AS BIGINT)
|
24:HASH JOIN
| join op: LEFT SEMI JOIN (PARTITIONED)
| hash predicates:
| colocate: false, reason:
| equal join conjunct: 782: cast = 377: i_item_sk
|
|----23:EXCHANGE
|
8:EXCHANGE
PLAN FRAGMENT 32
OUTPUT EXPRS:
PARTITION: HASH_PARTITIONED: 399: substr, 377: i_item_sk, 351: d_date
STREAM DATA SINK
EXCHANGE ID: 23
HASH_PARTITIONED: 377: i_item_sk
22:Project
| <slot 377> : 377: i_item_sk
|
21:AGGREGATE (merge finalize)
| output: count(400: count)
| group by: 399: substr, 377: i_item_sk, 351: d_date
| having: 400: count > 4
|
20:EXCHANGE
PLAN FRAGMENT 33
OUTPUT EXPRS:
PARTITION: RANDOM
STREAM DATA SINK
EXCHANGE ID: 20
HASH_PARTITIONED: 399: substr, 377: i_item_sk, 351: d_date
19:AGGREGATE (update serialize)
| STREAMING
| output: count(*)
| group by: 399: substr, 377: i_item_sk, 351: d_date
|
18:Project
| <slot 351> : 351: d_date
| <slot 377> : 377: i_item_sk
| <slot 399> : substr(381: i_item_desc, 1, 30)
|
17:HASH JOIN
| join op: INNER JOIN (BROADCAST)
| hash predicates:
| colocate: false, reason:
| equal join conjunct: 328: ss_item_sk = 377: i_item_sk
|
|----16:EXCHANGE
|
14:Project
| <slot 328> : 328: ss_item_sk
| <slot 351> : 351: d_date
|
13:HASH JOIN
| join op: INNER JOIN (BROADCAST)
| hash predicates:
| colocate: false, reason:
| equal join conjunct: 326: ss_sold_date_sk = 349: d_date_sk
|
|----12:EXCHANGE
|
9:OlapScanNode
TABLE: store_sales
PREAGGREGATION: ON
partitions=1/1
rollup: store_sales
tabletRatio=950/950
tabletList=18677,18679,18681,18683,18685,18687,18689,18691,18693,18695 …
cardinality=11519847114
avgRowSize=2.0
numNodes=0
PLAN FRAGMENT 34
OUTPUT EXPRS:
PARTITION: RANDOM
STREAM DATA SINK
EXCHANGE ID: 16
UNPARTITIONED
15:OlapScanNode
TABLE: item
PREAGGREGATION: ON
partitions=1/1
rollup: item
tabletRatio=1/1
tabletList=11975
cardinality=402000
avgRowSize=2.0
numNodes=0
PLAN FRAGMENT 35
OUTPUT EXPRS:
PARTITION: RANDOM
STREAM DATA SINK
EXCHANGE ID: 12
UNPARTITIONED
11:Project
| <slot 349> : 349: d_date_sk
| <slot 351> : 351: d_date
|
10:OlapScanNode
TABLE: date_dim
PREAGGREGATION: ON
PREDICATES: CAST(355: d_year AS BIGINT) IN (2000, 2000, 2001, 2002)
partitions=1/1
rollup: date_dim
tabletRatio=2/2
tabletList=11968,11970
cardinality=36525
avgRowSize=3.0
numNodes=0
PLAN FRAGMENT 36
OUTPUT EXPRS:
PARTITION: RANDOM
STREAM DATA SINK
EXCHANGE ID: 08
HASH_PARTITIONED: 782: cast
7:Project
| <slot 267> : 267: cs_bill_customer_sk
| <slot 282> : 282: cs_quantity
| <slot 284> : 284: cs_list_price
| <slot 782> : CAST(279: cs_item_sk AS BIGINT)
|
6:HASH JOIN
| join op: INNER JOIN (BROADCAST)
| hash predicates:
| colocate: false, reason:
| equal join conjunct: 783: cast = 298: d_date_sk
|
|----5:EXCHANGE
|
2:Project
| <slot 267> : 267: cs_bill_customer_sk
| <slot 279> : 279: cs_item_sk
| <slot 282> : 282: cs_quantity
| <slot 284> : 284: cs_list_price
| <slot 783> : CAST(264: cs_sold_date_sk AS BIGINT)
|
1:OlapScanNode
TABLE: catalog_sales
PREAGGREGATION: ON
partitions=1/1
rollup: catalog_sales
tabletRatio=700/700
tabletList=12428,12430,12432,12434,12436,12438,12440,12442,12444,12446 …
cardinality=14399964710
avgRowSize=28.0
numNodes=0
PLAN FRAGMENT 37
OUTPUT EXPRS:
PARTITION: RANDOM
STREAM DATA SINK
EXCHANGE ID: 05
UNPARTITIONED
4:Project
| <slot 298> : 298: d_date_sk
|
3:OlapScanNode
TABLE: date_dim
PREAGGREGATION: ON
PREDICATES: 304: d_year = 1999, 306: d_moy = 2
partitions=1/1
rollup: date_dim
tabletRatio=2/2
tabletList=11968,11970
cardinality=18262
avgRowSize=3.0
numNodes=0
还有提供一下 这几个表的数据量 store_sales,date_dim,item,customer,catalog_sale,web_sales 谢谢
[quote=“LIANGCHAOHUA, post:59, topic:3594”]
atalog_sal
[/quo |TPC-DS标准数据量|||||
| — | — | — | — | — |
|表|1G|50G|1T|10T|
|call_center|6|300|42|54|
|catalog_page|11,718|585,900|30,000|40,000|
|catalog_returns|144,067|7,203,350|143,996,756|1,440,033,112|
|catalog_sales|1,441,548|72,077,400|1,439,980,416|14,399,964,710|
|customer|100,000|5,000,000|12,000,000|65,000,000|
|customer_address|50,000|2,500,000|6,000,000|32,500,000|
|customer_demographics|1,920,800|96,040,000|1,920,800|1,920,800|
|date_dim|73,049|3,652,450|73,049|73,049|
|household_demographics|7,200|360,000|7,200|7,200|
|income_band|20|1,000|20|20|
|inventory|11,745,000|587,250,000|783,000,000|1,311,525,000|
|item|18,000|900,000|300,000|402,000|
|promotion|300|15,000|1,500|2,000|
|reason|35|1,750|65|70|
|ship_mode|20|1,000|20|20|
|store|12|600|1,002|1,500|
|store_returns|287,514|14,375,700|287,999,764|2,879,970,104|
|store_sales|2,880,404|144,020,200|2,879,987,999|28,799,983,563|
|time_dim|86,400|4,320,000|86,400|86,400|
|warehouse|5|250|20|25|
|web_page|60|3,000|3,000|4,002|
|web_returns|71,763|3,588,150|71,997,522|720,020,485|
|web_sales|719,384|35,969,200|720,000,376|7,199,963,324|
|web_site|30|1,500|54|78|