store_sales28,799,983,563 web_sales 720,000,376 customer 65,000,000 catalog_sale 14,399,964,710 其他的表就是 几万条 几千条
麻烦提供一下SR的版本
目前是2.2.1版本
StarRocks是把数据缓存到内存进行计算,您提供的SQL有几个大表全表关联查询,内存放不下这么大的数据量而出现内存不足告警了,暂时想到有以下方法解决:
1.把SQL拆分,分成2-3个步骤执行。
2.数据有时间范围,根据时间范围来建分区,查询一定时间范围内的数据,而不是全表。
3.增加服务器的内存。
4.增加多些BE节点。
select supp_nation, cust_nation, l_year, sum(volume) as revenue
from (
select
n1.n_name as supp_nation,
n2.n_name as cust_nation,
extract(year from l_shipdate) as l_year,
l_extendedprice * (1 - l_discount) as volume
from supplier, lineitem, orders, customer, nation n1, nation n2
where
s_suppkey = l_suppkey
and o_orderkey = l_orderkey
and c_custkey = o_custkey
and s_nationkey = n1.n_nationkey
and c_nationkey = n2.n_nationkey
and (
(n1.n_name = 'FRANCE' and n2.n_name = 'GERMANY')
or (n1.n_name = 'GERMANY' and n2.n_name = 'FRANCE')
)
and l_shipdate between date '1995-01-01' and date '1996-12-31'
) as shipping
group by supp_nation, cust_nation, l_year
order by supp_nation, cust_nation, l_year;
```这个sql报错是hash join 超范围了,数据量:目前这个lineitem这个表600多亿条数据,orders 15000000000条数据 customer 15亿数据量 supplier 10000000 这个表麻烦您帮忙分析一下给点建议
把or换成union all试试
l_shipdate between date ‘1995-01-01’ and date ‘1996-12-31’
像这些,l_shipdate 所在的表可以考虑使用分区
对于倾斜比较严重一个表1T,另一个表7T多,您这边有什么好的建议呢
我们数据库暂时没有对数据倾斜作自动化处理的方案,需要客户对key作预处理较大的改动。
目前索引已经加了,但是效果不明显,但是在两个表在做关联join的时候就会报,ERROR 1064 (HY000): row count of right table in hash join > 4294967295,预处理的话是修改sql逻辑先在对数据处理好
麻烦提供SQL 谢谢
还是上面那句SQL吗?
select supp_nation, cust_nation, l_year, sum(volume) as revenue
from (
select
n1.n_name as supp_nation,
n2.n_name as cust_nation,
extract(year from l_shipdate) as l_year,
l_extendedprice * (1 - l_discount) as volume
from supplier, lineitem, orders, customer, nation n1, nation n2
where
s_suppkey = l_suppkey
and o_orderkey = l_orderkey
and c_custkey = o_custkey
and s_nationkey = n1.n_nationkey
and c_nationkey = n2.n_nationkey
and (
(n1.n_name = 'FRANCE' and n2.n_name = 'GERMANY')
or (n1.n_name = 'GERMANY' and n2.n_name = 'FRANCE')
)
and l_shipdate between date '1995-01-01' and date '1996-12-31'
) as shipping
group by supp_nation, cust_nation, l_year
order by supp_nation, cust_nation, l_year;
--Q8
select
o_year, sum(case when nation = 'BRAZIL' then volume else 0 end) / sum(volume) as mkt_share
from
(
select
extract(year from o_orderdate) as o_year,
l_extendedprice * (1 - l_discount) as volume,
n2.n_name as nation
from part, supplier, lineitem, orders, customer, nation n1, nation n2, region
where
p_partkey = l_partkey
and s_suppkey = l_suppkey
and l_orderkey = o_orderkey
and o_custkey = c_custkey
and c_nationkey = n1.n_nationkey
and n1.n_regionkey = r_regionkey
and r_name = 'AMERICA'
and s_nationkey = n2.n_nationkey
and o_orderdate between date '1995-01-01' and date '1996-12-31'
and p_type = 'ECONOMY ANODIZED STEEL'
) as all_nations
group by o_year
order by o_year;
建表语句CREATE TABLE lineitem
(
l_orderkey
int(11) NULL COMMENT “”,
l_partkey
int(11) NULL COMMENT “”,
l_suppkey
int(11) NULL COMMENT “”,
l_linenumber
int(11) NULL COMMENT “”,
l_quantity
decimal64(15, 2) NULL COMMENT “”,
l_extendedprice
decimal64(15, 2) NULL COMMENT “”,
l_discount
decimal64(15, 2) NULL COMMENT “”,
l_tax
decimal64(15, 2) NULL COMMENT “”,
l_returnflag
varchar(1) NULL COMMENT “”,
l_linestatus
varchar(1) NULL COMMENT “”,
l_shipdate
date NULL COMMENT “”,
l_commitdate
date NULL COMMENT “”,
l_receiptdate
date NULL COMMENT “”,
l_shipinstruct
varchar(25) NULL COMMENT “”,
l_shipmode
varchar(10) NULL COMMENT “”,
l_comment
varchar(44) NULL COMMENT “”,
t
varchar(15) NULL COMMENT “”
) ENGINE=OLAP
DUPLICATE KEY(l_orderkey
, l_partkey
, l_suppkey
)
COMMENT “OLAP”
DISTRIBUTED BY HASH(l_orderkey
, l_partkey
, l_suppkey
) BUCKETS 1550
PROPERTIES (
“replication_num” = “1”,
“in_memory” = “false”,
“storage_medium” = “SSD”,
“storage_format” = “DEFAULT”
);
CREATE TABLE customer
(
c_custkey
int(11) NULL COMMENT “”,
c_name
varchar(25) NULL COMMENT “”,
c_address
varchar(40) NULL COMMENT “”,
c_nationkey
int(11) NULL COMMENT “”,
c_phone
char(15) NULL COMMENT “”,
c_acctbal
decimal64(15, 2) NULL COMMENT “”,
c_mktsegment
char(10) NULL COMMENT “”,
c_comment
varchar(117) NULL COMMENT “”,
t
char(15) NULL COMMENT “”
) ENGINE=OLAP
DUPLICATE KEY(c_custkey
, c_name
)
COMMENT “OLAP”
DISTRIBUTED BY HASH(c_custkey
, c_name
,c_nationkey
) BUCKETS 50
PROPERTIES (
“replication_num” = “1”,
“in_memory” = “false”,
“storage_medium” = “SSD”,
“storage_format” = “DEFAULT”
);
CREATE TABLE nation
(
n_nationkey
int(11) NULL COMMENT “”,
n_name
char(25) NULL COMMENT “”,
n_regionkey
int(11) NULL COMMENT “”,
n_comment
varchar(152) NULL COMMENT “”,
t
char(15) NULL COMMENT “”
) ENGINE=OLAP
DUPLICATE KEY(n_nationkey
, n_name
, n_regionkey
)
COMMENT “OLAP”
DISTRIBUTED BY HASH(n_nationkey
, n_name
) BUCKETS 1
PROPERTIES (
“replication_num” = “1”,
“in_memory” = “false”,
“storage_medium” = “SSD”,
“storage_format” = “DEFAULT”
);
CREATE TABLE orders
(
o_orderkey
int(11) NULL COMMENT “”,
o_custkey
int(11) NULL COMMENT “”,
o_orderstatus
char(1) NULL COMMENT “”,
o_totalprice
decimal64(15, 2) NULL COMMENT “”,
o_orderdate
date NULL COMMENT “”,
o_orderpriority
char(15) NULL COMMENT “”,
o_clerk
varchar(15) NULL COMMENT “”,
o_shippriority
int(11) NULL COMMENT “”,
o_comment
varchar(79) NULL COMMENT “”,
t
varchar(8) NULL COMMENT “”
) ENGINE=OLAP
DUPLICATE KEY(o_orderkey
, o_custkey
, o_orderstatus
)
COMMENT “OLAP”
DISTRIBUTED BY HASH(o_orderkey
, o_custkey
,o_orderstatus
) BUCKETS 360
PROPERTIES (
“replication_num” = “1”,
“in_memory” = “false”,
“storage_medium” = “SSD”,
“storage_format” = “DEFAULT”
);
CREATE TABLE part
(
p_partkey
int(11) NULL COMMENT “”,
p_name
varchar(55) NULL COMMENT “”,
p_mfgr
char(25) NULL COMMENT “”,
p_brand
char(10) NULL COMMENT “”,
p_type
varchar(25) NULL COMMENT “”,
p_size
int(11) NULL COMMENT “”,
p_container
char(10) NULL COMMENT “”,
p_retailprice
decimal64(15, 2) NULL COMMENT “”,
p_comment
varchar(23) NULL COMMENT “”,
t
char(15) NULL COMMENT “”
) ENGINE=OLAP
DUPLICATE KEY(p_partkey
)
COMMENT “OLAP”
DISTRIBUTED BY HASH(p_partkey
,p_size
) BUCKETS 50
PROPERTIES (
“replication_num” = “1”,
“in_memory” = “false”,
“storage_medium” = “SSD”,
“storage_format” = “DEFAULT”
);
CREATE TABLE partsupp
(
ps_partkey
int(11) NULL COMMENT “”,
ps_suppkey
char(25) NULL COMMENT “”,
ps_availqty
varchar(25) NULL COMMENT “”,
ps_supplycost
int(11) NULL COMMENT “”,
ps_comment
varchar(199) NULL COMMENT “”,
t
char(15) NULL COMMENT “”
) ENGINE=OLAP
DUPLICATE KEY(ps_partkey
, ps_suppkey
)
COMMENT “OLAP”
DISTRIBUTED BY HASH(ps_partkey
, ps_supplycost
) BUCKETS 252
PROPERTIES (
“replication_num” = “1”,
“in_memory” = “false”,
“storage_medium” = “SSD”,
“storage_format” = “DEFAULT”
);
CREATE TABLE region
(
r_regionkey
int(11) NULL COMMENT “”,
r_name
char(25) NULL COMMENT “”,
r_comment
varchar(152) NULL COMMENT “”,
t
char(25) NULL COMMENT “”
) ENGINE=OLAP
DUPLICATE KEY(r_regionkey
)
COMMENT “OLAP”
DISTRIBUTED BY HASH(r_regionkey
) BUCKETS 1
PROPERTIES (
“replication_num” = “1”,
“in_memory” = “false”,
“storage_medium” = “SSD”,
“storage_format” = “DEFAULT”
);
CREATE TABLE supplier
(
s_suppkey
int(11) NULL COMMENT “”,
s_name
char(25) NULL COMMENT “”,
s_address
varchar(40) NULL COMMENT “”,
s_nationkey
int(11) NULL COMMENT “”,
s_phone
char(15) NULL COMMENT “”,
s_acctbal
decimal64(15, 2) NULL COMMENT “”,
s_comment
varchar(101) NULL COMMENT “”,
t
char(25) NULL COMMENT “”
) ENGINE=OLAP
DUPLICATE KEY(s_suppkey
)
COMMENT “OLAP”
DISTRIBUTED BY HASH(s_suppkey
,s_nationkey
) BUCKETS 20
PROPERTIES (
“replication_num” = “1”,
“in_memory” = “false”,
“storage_medium” = “SSD”,
“storage_format” = “DEFAULT”
);
麻烦您帮忙看一下,给点宝贵的建议
be 如果是三副本的话是不是分布不同机器,如果有多台be 挂一台也没有关系
是的,同一个机器不会有2个副本,3个BE,3副本就是放在3个BE上。
select supp_nation, cust_nation, l_year, sum(volume) as revenue
from (
select
n1.n_name as supp_nation,
n2.n_name as cust_nation,
extract(year from l_shipdate) as l_year,
l_extendedprice * (1 - l_discount) as volume
from lineitem l JOIN [broadcast] supplier s ON l.l_suppkey = s.s_suppkey
JOIN [shuffle] orders o ON l.l_orderkey = o.o_orderkey
JOIN [shuffle] customer c ON o.o_custkey = c.c_custkey
JOIN nation n1 ON s.s_nationkey = n1.n_nationkey
JOIN nation n2 ON n1.s_nationkey = n2.n_nationkey
WHERE n1.n_name = ‘FRANCE’ and n2.n_name = ‘GERMANY’ and l_shipdate between date ‘1995-01-01’ and date ‘1996-12-31’
UNION ALL
select
n1.n_name as supp_nation,
n2.n_name as cust_nation,
extract(year from l_shipdate) as l_year,
l_extendedprice * (1 - l_discount) as volume
from lineitem l JOIN [broadcast] supplier s ON l.l_suppkey = s.s_suppkey
JOIN [shuffle] orders o ON l.l_orderkey = o.o_orderkey
JOIN [shuffle] customer c ON o.o_custkey = c.c_custkey
JOIN nation n1 ON s.s_nationkey = n1.n_nationkey
JOIN nation n2 ON n1.s_nationkey = n2.n_nationkey
WHERE n1.n_name = ‘GERMANY’ and n2.n_name = ‘FRANCE’ and l_shipdate between date ‘1995-01-01’ and date ‘1996-12-31’
) as shipping
group by supp_nation, cust_nation, l_year
order by supp_nation, cust_nation, l_year;
麻烦问一下,sql 7 和sql 8 合并成一个sql了吗