【详述】starrocks中使用insert into 语句从内部表查询大概1000万数据量,再次插入到另外一张分区表。按天分区。13个分桶。集群版本是1.19.5 。跑一天的数据,io就达到100%。导致其他的查询就无法进行。
1、整个的tablet分布如下:
2、io使用截图如下:
3、根据之前了解的信息,这个说是跟tablet及分桶有关,但是不确实具体是什么问题?
4、集群规模例如:1 follower+3observer。单节点:cpu:24核,内存64G
5、建表语句如下:
CREATE TABLE dws_fact_flow_customer_goods_ymd
(
date_natural_k
bigint(20) NULL COMMENT “自然时间主键”,
o_user_id
bigint(20) NULL COMMENT “用户id”,
o_goods_code
int(11) NULL COMMENT “商品编码”,
o_user_uid
bigint(20) NULL COMMENT “用户uid”,
v_goods_exposure_count
bigint(20) SUM NULL DEFAULT “0” COMMENT “商品曝光次数”,
v_goods_click_count
bigint(20) SUM NULL DEFAULT “0” COMMENT “商品点击次数”,
v_goods_add_count
bigint(20) SUM NULL DEFAULT “0” COMMENT “商品加购次数”,
v_goods_add_goods_count
bigint(20) SUM NULL DEFAULT “0” COMMENT “商品加购数量”,
v_goods_add_goods_amount
bigint(20) SUM NULL DEFAULT “0” COMMENT “商品加购金额”,
v_goods_detail_exposure_count
bigint(20) SUM NULL DEFAULT “0” COMMENT “商品详情曝光次数”,
v_goods_exposure_duration_count
bigint(20) SUM NULL DEFAULT “0” COMMENT “商品曝光时长(曝光有值,点击及加购给默认值0)(单位:毫秒)”,
o_brand_code
int(11) REPLACE_IF_NOT_NULL NULL COMMENT “商品品牌编码”,
o_brand_name
varchar(100) REPLACE_IF_NOT_NULL NULL COMMENT “商品品牌名称”,
o_goods_name
varchar(300) REPLACE_IF_NOT_NULL NULL COMMENT “商品名称”,
o_goods_category_code
int(11) REPLACE_IF_NOT_NULL NULL COMMENT “商品品类编码”,
o_goods_category_name
varchar(100) REPLACE_IF_NOT_NULL NULL COMMENT “商品品类名称”,
o_one_rank_category_code
bigint(20) REPLACE_IF_NOT_NULL NULL COMMENT “后台一级类目编码”,
o_two_rank_category_code
bigint(20) REPLACE_IF_NOT_NULL NULL COMMENT “后台二级类目编码”,
o_one_rank_category_name
varchar(100) REPLACE_IF_NOT_NULL NULL COMMENT “后台一级类目名称”,
o_two_rank_category_name
varchar(100) REPLACE_IF_NOT_NULL NULL COMMENT “后台二级类目名称”,
o_city_code
bigint(20) REPLACE_IF_NOT_NULL NULL COMMENT “城市编码”,
o_city_name
varchar(100) REPLACE_IF_NOT_NULL NULL COMMENT “城市名称”,
o_tra_code
int(11) REPLACE_IF_NOT_NULL NULL COMMENT “商圈编码”,
o_tra_name
varchar(100) REPLACE_IF_NOT_NULL NULL COMMENT “商圈名称”,
create_time
datetime REPLACE_IF_NOT_NULL NULL COMMENT “记录创建时间”,
update_time
datetime REPLACE_IF_NOT_NULL NULL COMMENT “记录更新时间”,
create_datetime
bigint(20) REPLACE_IF_NOT_NULL NULL COMMENT “记录创建时间戳”,
update_datetime
bigint(20) REPLACE_IF_NOT_NULL NULL COMMENT “记录更新时间戳”
) ENGINE=OLAP
AGGREGATE KEY(date_natural_k
, o_user_id
, o_goods_code
, o_user_uid
)
COMMENT “OLAP”
PARTITION BY RANGE(date_natural_k
)
(
PARTITION p20220320 VALUES [(“20220320”), (“20220321”)),
PARTITION p20220321 VALUES [(“20220321”), (“20220322”)),
PARTITION p20220322 VALUES [(“20220322”), (“20220323”)),
PARTITION p20220323 VALUES [(“20220323”), (“20220324”)),
PARTITION p20220324 VALUES [(“20220324”), (“20220325”)),
PARTITION p20220325 VALUES [(“20220325”), (“20220326”)),
PARTITION p20220326 VALUES [(“20220326”), (“20220327”)),
PARTITION p20220327 VALUES [(“20220327”), (“20220328”)),
PARTITION p20220328 VALUES [(“20220328”), (“20220329”)))
DISTRIBUTED BY HASH(o_user_id
, o_goods_code
) BUCKETS 13
PROPERTIES (
“replication_num” = “3”,
“bloom_filter_columns” = “o_goods_code, o_user_id, o_user_uid, date_natural_k”,
“dynamic_partition.enable” = “true”,
“dynamic_partition.time_unit” = “DAY”,
“dynamic_partition.time_zone” = “Asia/Shanghai”,
“dynamic_partition.start” = “-700”,
“dynamic_partition.end” = “3”,
“dynamic_partition.prefix” = “p”,
“dynamic_partition.buckets” = “13”,
“in_memory” = “false”,
“storage_format” = “DEFAULT”
);
具体是什么原因导致的io过高,那位大神可以帮忙分析解决下!