【StarRocks版本】2.3.16
表结构如下:
CREATE TABLE table_bitmap (
section bigint(20) NULL,
today date NULL,
biz tinyint(4) NULL,
entryMode tinyint(4) NULL,
pageCate tinyint(4) NULL,
channelId int(11) NULL,
userFlag tinyint(4) NULL,
pid tinyint(4) NULL,
imei_bitmap bitmap BITMAP_UNION NULL COMMENT “imeibitmap”
) ENGINE=OLAP
AGGREGATE KEY(section, today, biz, entryMode, pageCate, channelId, userFlag, pid)
PARTITION BY RANGE(today)
(PARTITION p20240407 VALUES [(‘2024-04-07’), (‘2024-04-08’)),
PARTITION p20240408 VALUES [(‘2024-04-08’), (‘2024-04-09’)),
PARTITION p20240409 VALUES [(‘2024-04-09’), (‘2024-04-10’)),
PARTITION p20240410 VALUES [(‘2024-04-10’), (‘2024-04-11’)),
PARTITION p20240411 VALUES [(‘2024-04-11’), (‘2024-04-12’)))
DISTRIBUTED BY HASH(section, biz, entryMode, pageCate, channelId) BUCKETS 8
PROPERTIES (
“replication_num” = “3”,
“dynamic_partition.enable” = “true”,
“dynamic_partition.time_unit” = “DAY”,
“dynamic_partition.time_zone” = “Asia/Shanghai”,
“dynamic_partition.start” = “-8”,
“dynamic_partition.end” = “2”,
“dynamic_partition.prefix” = “p”,
“dynamic_partition.buckets” = “8”,
“in_memory” = “false”,
“storage_format” = “DEFAULT”,
“enable_persistent_index” = “false”
);
执行如下sql需要50多秒,请问为何这么慢?有什么解决方案吗?
select today,count(*) as pv, count(distinct imei_bitmap) as uv
-> fromtable_bitmap
-> where today = ‘2024-04-08’
-> group by today
-> limit 1000;
±-----------±-------±--------+
| today | pv | uv |
±-----------±-------±--------+
| 2024-04-08 | 174098 | 8728818 |
±-----------±-------±--------+
1 row in set (54.05 sec)
另外还有一个疑问,换成主键表,sql执行会快很多, 这又是什么原因呢?
CREATE TABLE table_primary (
section bigint(20) NOT NULL,
today date NOT NULL,
biz tinyint(4) NOT NULL,
entryMode tinyint(4) NOT NULL,
pageCate tinyint(4) NOT NULL,
channelId int(11) NOT NULL,
userFlag tinyint(4) NOT NULL,
imei bigint(20) NOT NULL
) ENGINE=OLAP
PRIMARY KEY(section, today, biz, entryMode, pageCate, channelId, userFlag, imei)
PARTITION BY RANGE(today)
(PARTITION p20240401 VALUES [(‘2024-04-01’), (‘2024-04-02’)),
PARTITION p20240402 VALUES [(‘2024-04-02’), (‘2024-04-03’)),
PARTITION p20240403 VALUES [(‘2024-04-03’), (‘2024-04-04’)),
PARTITION p20240404 VALUES [(‘2024-04-04’), (‘2024-04-05’)),
PARTITION p20240405 VALUES [(‘2024-04-05’), (‘2024-04-06’)),
PARTITION p20240406 VALUES [(‘2024-04-06’), (‘2024-04-07’)),
PARTITION p20240407 VALUES [(‘2024-04-07’), (‘2024-04-08’)),
PARTITION p20240408 VALUES [(‘2024-04-08’), (‘2024-04-09’)),
PARTITION p20240409 VALUES [(‘2024-04-09’), (‘2024-04-10’)),
PARTITION p20240410 VALUES [(‘2024-04-10’), (‘2024-04-11’)),
PARTITION p20240411 VALUES [(‘2024-04-11’), (‘2024-04-12’)))
DISTRIBUTED BY HASH(section, userFlag) BUCKETS 8
PROPERTIES (
“replication_num” = “3”,
“dynamic_partition.enable” = “true”,
“dynamic_partition.time_unit” = “DAY”,
“dynamic_partition.time_zone” = “Asia/Shanghai”,
“dynamic_partition.start” = “-8”,
“dynamic_partition.end” = “2”,
“dynamic_partition.prefix” = “p”,
“dynamic_partition.buckets” = “8”,
“in_memory” = “false”,
“storage_format” = “DEFAULT”,
“enable_persistent_index” = “false”
);
select today,count(*) as pv, count(distinct imei) as uv
-> fromtable_primary
-> where today = ‘2024-04-08’
-> group by today
-> limit 1000;
±-----------±---------±--------+
| today | pv | uv |
±-----------±---------±--------+
| 2024-04-08 | 57172762 | 8728818 |
±-----------±---------±--------+
1 row in set (0.82 sec)