【StarRocks版本】2.3.16
表结构如下:
CREATE TABLE table_bitmap
(
section
bigint(20) NULL,
today
date NULL,
biz
tinyint(4) NULL,
entryMode
tinyint(4) NULL,
pageCate
tinyint(4) NULL,
channelId
int(11) NULL,
userFlag
tinyint(4) NULL,
pid
tinyint(4) NULL,
imei_bitmap
bitmap BITMAP_UNION NULL COMMENT “imeibitmap”
) ENGINE=OLAP
AGGREGATE KEY(section
, today
, biz
, entryMode
, pageCate
, channelId
, userFlag
, pid
)
PARTITION BY RANGE(today
)
(PARTITION p20240407 VALUES [(‘2024-04-07’), (‘2024-04-08’)),
PARTITION p20240408 VALUES [(‘2024-04-08’), (‘2024-04-09’)),
PARTITION p20240409 VALUES [(‘2024-04-09’), (‘2024-04-10’)),
PARTITION p20240410 VALUES [(‘2024-04-10’), (‘2024-04-11’)),
PARTITION p20240411 VALUES [(‘2024-04-11’), (‘2024-04-12’)))
DISTRIBUTED BY HASH(section
, biz
, entryMode
, pageCate
, channelId
) BUCKETS 8
PROPERTIES (
“replication_num” = “3”,
“dynamic_partition.enable” = “true”,
“dynamic_partition.time_unit” = “DAY”,
“dynamic_partition.time_zone” = “Asia/Shanghai”,
“dynamic_partition.start” = “-8”,
“dynamic_partition.end” = “2”,
“dynamic_partition.prefix” = “p”,
“dynamic_partition.buckets” = “8”,
“in_memory” = “false”,
“storage_format” = “DEFAULT”,
“enable_persistent_index” = “false”
);
执行如下sql需要50多秒,请问为何这么慢?有什么解决方案吗?
select today,count(*) as pv, count(distinct imei_bitmap) as uv
-> fromtable_bitmap
-> where today = ‘2024-04-08’
-> group by today
-> limit 1000;
±-----------±-------±--------+
| today | pv | uv |
±-----------±-------±--------+
| 2024-04-08 | 174098 | 8728818 |
±-----------±-------±--------+
1 row in set (54.05 sec)
另外还有一个疑问,换成主键表,sql执行会快很多, 这又是什么原因呢?
CREATE TABLE table_primary
(
section
bigint(20) NOT NULL,
today
date NOT NULL,
biz
tinyint(4) NOT NULL,
entryMode
tinyint(4) NOT NULL,
pageCate
tinyint(4) NOT NULL,
channelId
int(11) NOT NULL,
userFlag
tinyint(4) NOT NULL,
imei
bigint(20) NOT NULL
) ENGINE=OLAP
PRIMARY KEY(section
, today
, biz
, entryMode
, pageCate
, channelId
, userFlag
, imei
)
PARTITION BY RANGE(today
)
(PARTITION p20240401 VALUES [(‘2024-04-01’), (‘2024-04-02’)),
PARTITION p20240402 VALUES [(‘2024-04-02’), (‘2024-04-03’)),
PARTITION p20240403 VALUES [(‘2024-04-03’), (‘2024-04-04’)),
PARTITION p20240404 VALUES [(‘2024-04-04’), (‘2024-04-05’)),
PARTITION p20240405 VALUES [(‘2024-04-05’), (‘2024-04-06’)),
PARTITION p20240406 VALUES [(‘2024-04-06’), (‘2024-04-07’)),
PARTITION p20240407 VALUES [(‘2024-04-07’), (‘2024-04-08’)),
PARTITION p20240408 VALUES [(‘2024-04-08’), (‘2024-04-09’)),
PARTITION p20240409 VALUES [(‘2024-04-09’), (‘2024-04-10’)),
PARTITION p20240410 VALUES [(‘2024-04-10’), (‘2024-04-11’)),
PARTITION p20240411 VALUES [(‘2024-04-11’), (‘2024-04-12’)))
DISTRIBUTED BY HASH(section
, userFlag
) BUCKETS 8
PROPERTIES (
“replication_num” = “3”,
“dynamic_partition.enable” = “true”,
“dynamic_partition.time_unit” = “DAY”,
“dynamic_partition.time_zone” = “Asia/Shanghai”,
“dynamic_partition.start” = “-8”,
“dynamic_partition.end” = “2”,
“dynamic_partition.prefix” = “p”,
“dynamic_partition.buckets” = “8”,
“in_memory” = “false”,
“storage_format” = “DEFAULT”,
“enable_persistent_index” = “false”
);
select today,count(*) as pv, count(distinct imei) as uv
-> fromtable_primary
-> where today = ‘2024-04-08’
-> group by today
-> limit 1000;
±-----------±---------±--------+
| today | pv | uv |
±-----------±---------±--------+
| 2024-04-08 | 57172762 | 8728818 |
±-----------±---------±--------+
1 row in set (0.82 sec)