store_sales28,799,983,563 web_sales 720,000,376 customer 65,000,000 catalog_sale 14,399,964,710 其他的表就是 几万条 几千条
麻烦提供一下SR的版本
目前是2.2.1版本
StarRocks是把数据缓存到内存进行计算,您提供的SQL有几个大表全表关联查询,内存放不下这么大的数据量而出现内存不足告警了,暂时想到有以下方法解决:
1.把SQL拆分,分成2-3个步骤执行。
2.数据有时间范围,根据时间范围来建分区,查询一定时间范围内的数据,而不是全表。
3.增加服务器的内存。
4.增加多些BE节点。
select supp_nation, cust_nation, l_year, sum(volume) as revenue
from (
select
n1.n_name as supp_nation,
n2.n_name as cust_nation,
extract(year from l_shipdate) as l_year,
l_extendedprice * (1 - l_discount) as volume
from supplier, lineitem, orders, customer, nation n1, nation n2
where
s_suppkey = l_suppkey
and o_orderkey = l_orderkey
and c_custkey = o_custkey
and s_nationkey = n1.n_nationkey
and c_nationkey = n2.n_nationkey
and (
(n1.n_name = 'FRANCE' and n2.n_name = 'GERMANY')
or (n1.n_name = 'GERMANY' and n2.n_name = 'FRANCE')
)
and l_shipdate between date '1995-01-01' and date '1996-12-31'
) as shipping
group by supp_nation, cust_nation, l_year
order by supp_nation, cust_nation, l_year;
```这个sql报错是hash join 超范围了,数据量:目前这个lineitem这个表600多亿条数据,orders 15000000000条数据 customer 15亿数据量 supplier 10000000 这个表麻烦您帮忙分析一下给点建议
把or换成union all试试
l_shipdate between date ‘1995-01-01’ and date ‘1996-12-31’
像这些,l_shipdate 所在的表可以考虑使用分区
对于倾斜比较严重一个表1T,另一个表7T多,您这边有什么好的建议呢
我们数据库暂时没有对数据倾斜作自动化处理的方案,需要客户对key作预处理较大的改动。
目前索引已经加了,但是效果不明显,但是在两个表在做关联join的时候就会报,ERROR 1064 (HY000): row count of right table in hash join > 4294967295,预处理的话是修改sql逻辑先在对数据处理好
麻烦提供SQL 谢谢
还是上面那句SQL吗?
select supp_nation, cust_nation, l_year, sum(volume) as revenue
from (
select
n1.n_name as supp_nation,
n2.n_name as cust_nation,
extract(year from l_shipdate) as l_year,
l_extendedprice * (1 - l_discount) as volume
from supplier, lineitem, orders, customer, nation n1, nation n2
where
s_suppkey = l_suppkey
and o_orderkey = l_orderkey
and c_custkey = o_custkey
and s_nationkey = n1.n_nationkey
and c_nationkey = n2.n_nationkey
and (
(n1.n_name = 'FRANCE' and n2.n_name = 'GERMANY')
or (n1.n_name = 'GERMANY' and n2.n_name = 'FRANCE')
)
and l_shipdate between date '1995-01-01' and date '1996-12-31'
) as shipping
group by supp_nation, cust_nation, l_year
order by supp_nation, cust_nation, l_year;
--Q8
select
o_year, sum(case when nation = 'BRAZIL' then volume else 0 end) / sum(volume) as mkt_share
from
(
select
extract(year from o_orderdate) as o_year,
l_extendedprice * (1 - l_discount) as volume,
n2.n_name as nation
from part, supplier, lineitem, orders, customer, nation n1, nation n2, region
where
p_partkey = l_partkey
and s_suppkey = l_suppkey
and l_orderkey = o_orderkey
and o_custkey = c_custkey
and c_nationkey = n1.n_nationkey
and n1.n_regionkey = r_regionkey
and r_name = 'AMERICA'
and s_nationkey = n2.n_nationkey
and o_orderdate between date '1995-01-01' and date '1996-12-31'
and p_type = 'ECONOMY ANODIZED STEEL'
) as all_nations
group by o_year
order by o_year;
建表语句CREATE TABLE lineitem (
l_orderkey int(11) NULL COMMENT “”,
l_partkey int(11) NULL COMMENT “”,
l_suppkey int(11) NULL COMMENT “”,
l_linenumber int(11) NULL COMMENT “”,
l_quantity decimal64(15, 2) NULL COMMENT “”,
l_extendedprice decimal64(15, 2) NULL COMMENT “”,
l_discount decimal64(15, 2) NULL COMMENT “”,
l_tax decimal64(15, 2) NULL COMMENT “”,
l_returnflag varchar(1) NULL COMMENT “”,
l_linestatus varchar(1) NULL COMMENT “”,
l_shipdate date NULL COMMENT “”,
l_commitdate date NULL COMMENT “”,
l_receiptdate date NULL COMMENT “”,
l_shipinstruct varchar(25) NULL COMMENT “”,
l_shipmode varchar(10) NULL COMMENT “”,
l_comment varchar(44) NULL COMMENT “”,
t varchar(15) NULL COMMENT “”
) ENGINE=OLAP
DUPLICATE KEY(l_orderkey, l_partkey, l_suppkey)
COMMENT “OLAP”
DISTRIBUTED BY HASH(l_orderkey, l_partkey, l_suppkey) BUCKETS 1550
PROPERTIES (
“replication_num” = “1”,
“in_memory” = “false”,
“storage_medium” = “SSD”,
“storage_format” = “DEFAULT”
);
CREATE TABLE customer (
c_custkey int(11) NULL COMMENT “”,
c_name varchar(25) NULL COMMENT “”,
c_address varchar(40) NULL COMMENT “”,
c_nationkey int(11) NULL COMMENT “”,
c_phone char(15) NULL COMMENT “”,
c_acctbal decimal64(15, 2) NULL COMMENT “”,
c_mktsegment char(10) NULL COMMENT “”,
c_comment varchar(117) NULL COMMENT “”,
t char(15) NULL COMMENT “”
) ENGINE=OLAP
DUPLICATE KEY(c_custkey, c_name)
COMMENT “OLAP”
DISTRIBUTED BY HASH(c_custkey, c_name,c_nationkey) BUCKETS 50
PROPERTIES (
“replication_num” = “1”,
“in_memory” = “false”,
“storage_medium” = “SSD”,
“storage_format” = “DEFAULT”
);
CREATE TABLE nation (
n_nationkey int(11) NULL COMMENT “”,
n_name char(25) NULL COMMENT “”,
n_regionkey int(11) NULL COMMENT “”,
n_comment varchar(152) NULL COMMENT “”,
t char(15) NULL COMMENT “”
) ENGINE=OLAP
DUPLICATE KEY(n_nationkey, n_name, n_regionkey)
COMMENT “OLAP”
DISTRIBUTED BY HASH(n_nationkey, n_name) BUCKETS 1
PROPERTIES (
“replication_num” = “1”,
“in_memory” = “false”,
“storage_medium” = “SSD”,
“storage_format” = “DEFAULT”
);
CREATE TABLE orders (
o_orderkey int(11) NULL COMMENT “”,
o_custkey int(11) NULL COMMENT “”,
o_orderstatus char(1) NULL COMMENT “”,
o_totalprice decimal64(15, 2) NULL COMMENT “”,
o_orderdate date NULL COMMENT “”,
o_orderpriority char(15) NULL COMMENT “”,
o_clerk varchar(15) NULL COMMENT “”,
o_shippriority int(11) NULL COMMENT “”,
o_comment varchar(79) NULL COMMENT “”,
t varchar(8) NULL COMMENT “”
) ENGINE=OLAP
DUPLICATE KEY(o_orderkey, o_custkey, o_orderstatus)
COMMENT “OLAP”
DISTRIBUTED BY HASH(o_orderkey, o_custkey,o_orderstatus) BUCKETS 360
PROPERTIES (
“replication_num” = “1”,
“in_memory” = “false”,
“storage_medium” = “SSD”,
“storage_format” = “DEFAULT”
);
CREATE TABLE part (
p_partkey int(11) NULL COMMENT “”,
p_name varchar(55) NULL COMMENT “”,
p_mfgr char(25) NULL COMMENT “”,
p_brand char(10) NULL COMMENT “”,
p_type varchar(25) NULL COMMENT “”,
p_size int(11) NULL COMMENT “”,
p_container char(10) NULL COMMENT “”,
p_retailprice decimal64(15, 2) NULL COMMENT “”,
p_comment varchar(23) NULL COMMENT “”,
t char(15) NULL COMMENT “”
) ENGINE=OLAP
DUPLICATE KEY(p_partkey)
COMMENT “OLAP”
DISTRIBUTED BY HASH(p_partkey,p_size) BUCKETS 50
PROPERTIES (
“replication_num” = “1”,
“in_memory” = “false”,
“storage_medium” = “SSD”,
“storage_format” = “DEFAULT”
);
CREATE TABLE partsupp (
ps_partkey int(11) NULL COMMENT “”,
ps_suppkey char(25) NULL COMMENT “”,
ps_availqty varchar(25) NULL COMMENT “”,
ps_supplycost int(11) NULL COMMENT “”,
ps_comment varchar(199) NULL COMMENT “”,
t char(15) NULL COMMENT “”
) ENGINE=OLAP
DUPLICATE KEY(ps_partkey, ps_suppkey)
COMMENT “OLAP”
DISTRIBUTED BY HASH(ps_partkey, ps_supplycost) BUCKETS 252
PROPERTIES (
“replication_num” = “1”,
“in_memory” = “false”,
“storage_medium” = “SSD”,
“storage_format” = “DEFAULT”
);
CREATE TABLE region (
r_regionkey int(11) NULL COMMENT “”,
r_name char(25) NULL COMMENT “”,
r_comment varchar(152) NULL COMMENT “”,
t char(25) NULL COMMENT “”
) ENGINE=OLAP
DUPLICATE KEY(r_regionkey)
COMMENT “OLAP”
DISTRIBUTED BY HASH(r_regionkey) BUCKETS 1
PROPERTIES (
“replication_num” = “1”,
“in_memory” = “false”,
“storage_medium” = “SSD”,
“storage_format” = “DEFAULT”
);
CREATE TABLE supplier (
s_suppkey int(11) NULL COMMENT “”,
s_name char(25) NULL COMMENT “”,
s_address varchar(40) NULL COMMENT “”,
s_nationkey int(11) NULL COMMENT “”,
s_phone char(15) NULL COMMENT “”,
s_acctbal decimal64(15, 2) NULL COMMENT “”,
s_comment varchar(101) NULL COMMENT “”,
t char(25) NULL COMMENT “”
) ENGINE=OLAP
DUPLICATE KEY(s_suppkey)
COMMENT “OLAP”
DISTRIBUTED BY HASH(s_suppkey,s_nationkey) BUCKETS 20
PROPERTIES (
“replication_num” = “1”,
“in_memory” = “false”,
“storage_medium” = “SSD”,
“storage_format” = “DEFAULT”
);
麻烦您帮忙看一下,给点宝贵的建议
be 如果是三副本的话是不是分布不同机器,如果有多台be 挂一台也没有关系
是的,同一个机器不会有2个副本,3个BE,3副本就是放在3个BE上。
select supp_nation, cust_nation, l_year, sum(volume) as revenue
from (
select
n1.n_name as supp_nation,
n2.n_name as cust_nation,
extract(year from l_shipdate) as l_year,
l_extendedprice * (1 - l_discount) as volume
from lineitem l JOIN [broadcast] supplier s ON l.l_suppkey = s.s_suppkey
JOIN [shuffle] orders o ON l.l_orderkey = o.o_orderkey
JOIN [shuffle] customer c ON o.o_custkey = c.c_custkey
JOIN nation n1 ON s.s_nationkey = n1.n_nationkey
JOIN nation n2 ON n1.s_nationkey = n2.n_nationkey
WHERE n1.n_name = ‘FRANCE’ and n2.n_name = ‘GERMANY’ and l_shipdate between date ‘1995-01-01’ and date ‘1996-12-31’
UNION ALL
select
n1.n_name as supp_nation,
n2.n_name as cust_nation,
extract(year from l_shipdate) as l_year,
l_extendedprice * (1 - l_discount) as volume
from lineitem l JOIN [broadcast] supplier s ON l.l_suppkey = s.s_suppkey
JOIN [shuffle] orders o ON l.l_orderkey = o.o_orderkey
JOIN [shuffle] customer c ON o.o_custkey = c.c_custkey
JOIN nation n1 ON s.s_nationkey = n1.n_nationkey
JOIN nation n2 ON n1.s_nationkey = n2.n_nationkey
WHERE n1.n_name = ‘GERMANY’ and n2.n_name = ‘FRANCE’ and l_shipdate between date ‘1995-01-01’ and date ‘1996-12-31’
) as shipping
group by supp_nation, cust_nation, l_year
order by supp_nation, cust_nation, l_year;
麻烦问一下,sql 7 和sql 8 合并成一个sql了吗