-
导出
2.1 Mysql 协议导出
bitmap_to_string
版本: 2.5 ~ main
mysql> select c1, bitmap_to_string(c2) from t1;
+------+----------------------+
| c1 | bitmap_to_string(c2) |
+------+----------------------+
| 1 | 1,2,3,4,5,6,7,8,9,10 |
+------+----------------------+
bitmap_to_base64
版本: 2.5 ~ main
mysql> select c1, bitmap_to_base64(c2) from t1;
+------+----------------------------------------------------------------------------------------------------------------------+
| c1 | bitmap_to_base64(c2) |
+------+----------------------------------------------------------------------------------------------------------------------+
| 1 | CgoAAAABAAAAAAAAAAIAAAAAAAAAAwAAAAAAAAAEAAAAAAAAAAUAAAAAAAAABgAAAAAAAAAHAAAAAAAAAAgAAAAAAAAACQAAAAAAAAAKAAAAAAAAAA== |
+------+----------------------------------------------------------------------------------------------------------------------+
1 row in set (0.02 sec)
bitmap_to_binary + hex
版本: 3.0 ~ main
不建议使用
支持,但是会导出成一些不可见字符,需要转成Hex使用。
mysql> select c1, hex(bitmap_to_binary(c2)) from t1;
+------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| c1 | hex(bitmap_to_binary(c2)) |
+------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| 1 | 0A0A0000000100000000000000020000000000000003000000000000000400000000000000050000000000000006000000000000000700000000000000080000000000000009000000000000000A00000000000000 |
+------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
bitmap_to_array + unnest
版本: 2.5 ~ main
先转成Array,然后 Array 转成明细导出
mysql> select c1, unnest from t1, unnest(bitmap_to_array(c2));
+------+--------+
| c1 | unnest |
+------+--------+
| 1 | 1 |
| 1 | 2 |
| 1 | 3 |
| 1 | 4 |
| 1 | 5 |
| 1 | 6 |
| 1 | 7 |
| 1 | 8 |
| 1 | 9 |
| 1 | 10 |
+------+--------+
unnest_bitmap
版本: 3.0 ~ main
bitmap直接转成明细导出
mysql> select c1, unnest_bitmap from t1, unnest_bitmap(c2);
+------+---------------+
| c1 | unnest_bitmap |
+------+---------------+
| 1 | 1 |
| 1 | 2 |
| 1 | 3 |
| 1 | 4 |
| 1 | 5 |
| 1 | 6 |
| 1 | 7 |
| 1 | 8 |
| 1 | 9 |
| 1 | 10 |
+------+---------------+
10 rows in set (0.01 sec)
subdivde_bitmap 拆成小Bitmap导出
版本: 2.5 ~ main
防止1个包太大,Mysql协议报错
mysql> select bitmap_to_string(subdivide_bitmap) from t1, subdivide_bitmap(c2, 3);
+------------------------------------+
| bitmap_to_string(subdivide_bitmap) |
+------------------------------------+
| 1,2,3 |
| 4,5,6 |
| 7,8,9 |
| 10 |
+------------------------------------+
bitmap_to_array 导出
版本: 2.5 ~ main
mysql> select c1, bitmap_to_array(c2) from t1;
+------+------------------------+
| c1 | bitmap_to_array(c2) |
+------+------------------------+
| 1 | [1,2,3,4,5,6,7,8,9,10] |
+------+------------------------+
2.2 Insert into files 导出
bitmap_to_string
版本: 3.1 ~ main
# hive 表结构
create table t3(c1 int, c2 string) stored as parquet;
# StarRocks 表结构
CREATE TABLE `t1` (
`c1` int(11) NULL COMMENT "",
`c2` bitmap BITMAP_UNION NULL COMMENT ""
) ENGINE=OLAP
AGGREGATE KEY(`c1`)
DISTRIBUTED BY HASH(`c1`);
# insert into files
insert into files ("path" = "hdfs://xxx:9000/user/hive/warehouse/lxh.db/t3/", "format"="parquet", "compression" = "uncompressed")
select c1, bitmap_to_string(c2) as c2 from t1;
bitmap_to_base64
版本: 3.1 ~ main
# hive 表结构
create table t3(c1 int, c2 string) stored as parquet;
# StarRocks 表结构
CREATE TABLE `t1` (
`c1` int(11) NULL COMMENT "",
`c2` bitmap BITMAP_UNION NULL COMMENT ""
) ENGINE=OLAP
AGGREGATE KEY(`c1`)
DISTRIBUTED BY HASH(`c1`);
# insert into files
insert into files ("path" = "hdfs://xxx:9000/user/hive/warehouse/lxh.db/t3/", "format"="parquet", "compression" = "uncompressed") select c1, bitmap_to_base64(c2) as c2 from t1;
bitmap_to_binary
版本: 3.1 ~ main
# hive 表结构
create table t4(c1 int, c2 binary) stored as parquet;
# StarRocks 表结构
CREATE TABLE `t1` (
`c1` int(11) NULL COMMENT "",
`c2` bitmap BITMAP_UNION NULL COMMENT ""
) ENGINE=OLAP
AGGREGATE KEY(`c1`)
DISTRIBUTED BY HASH(`c1`);
# insert into files
insert into files ("path" = "hdfs://xxx:9000/user/hive/warehouse/lxh.db/t4/", "format"="parquet", "compression" = "uncompressed") select c1, bitmap_to_binary(c2) as c2 from t1;
bitmap_to_array + unnest
转成明细导出
版本: 3.1 ~ main
# hive 表结构
create table t2(c1 bigint, c2 bigint) format as parquet;
# StarRocks 表结构
CREATE TABLE `t1` (
`c1` int(11) NULL COMMENT "",
`c2` bitmap BITMAP_UNION NULL COMMENT ""
) ENGINE=OLAP
AGGREGATE KEY(`c1`)
DISTRIBUTED BY HASH(`c1`);
# insert into files
insert into files ("path" = "hdfs://xxx:9000/user/hive/warehouse/lxh.db/t2/", "format"="parquet", "compression" = "uncompressed") select c1, unnest as c2 from t1, unnest(bitmap_to_array(c2));
bitmap_unnest
Bitmap 直接转成明细导出
版本: 3.1 ~ main
# hive 表结构
create table t2(c1 bigint, c2 bigint) format as parquet;
# StarRocks 表结构
CREATE TABLE `t1` (
`c1` int(11) NULL COMMENT "",
`c2` bitmap BITMAP_UNION NULL COMMENT ""
) ENGINE=OLAP
AGGREGATE KEY(`c1`)
DISTRIBUTED BY HASH(`c1`);
# insert into files
insert into files ("path" = "hdfs://xxxx:9000/user/hive/warehouse/lxh.db/t2/", "format"="parquet", "compression" = "uncompressed") select c1, unnest_bitmap as c2 from t1, unnest_bitmap(c2);
bitmap_to_array
版本: 3.1 ~ main
# hive 表结构
create table t_array(c1 int, c2 array<bigint>) stored as parquet;
# StarRocks 表结构
CREATE TABLE `t1` (
`c1` int(11) NULL COMMENT "",
`c2` bitmap BITMAP_UNION NULL COMMENT ""
) ENGINE=OLAP
AGGREGATE KEY(`c1`)
DISTRIBUTED BY HASH(`c1`);
# insert into files
insert into files ("path" = "hdfs://xxx:9000/user/hive/warehouse/lxh.db/t_array/", "format"="parquet", "compression" = "uncompressed") select c1, bitmap_to_array(c2) as c2 from t1;
2.3 select into outfile 导出
逐渐废弃,不建议使用,用 insert into files 代替
3.0 版本当前支持 select into outfile,但是不成熟
bitmap_to_string
版本: 3.1 ~ main
# hive 表结构 (parquet)
create table t3(c1 int, c2 string) stored as parquet;
# StarRocks 表结构
CREATE TABLE `t1` (
`c1` int(11) NULL COMMENT "",
`c2` bitmap BITMAP_UNION NULL COMMENT ""
) ENGINE=OLAP
AGGREGATE KEY(`c1`)
DISTRIBUTED BY HASH(`c1`);
select c1, bitmap_to_string(c2) as c2 from t1 into outfile "hdfs://xxx:9000/user/hive/warehouse/lxh.db/t3/" format as parquet;
# hive 表结构 (csv)
create table t_string_csv(c1 int, c2 string) row format delimited fields
terminated by '\t' lines terminated by '\n' stored stored as textfile;
select c1, bitmap_to_string(c2) as c2 from t1
into outfile "hdfs://emr-header-1.cluster-49091:9000/user/hive/warehouse/lxh.db/t_string_csv/" format as csv
properties("column_separator" = "\t", "line_delimiter" = "\n");
bitmap_to_base64
版本: 3.1 ~ main
# hive 表结构 (parquet)
create table t3(c1 int, c2 string) stored as parquet;
# StarRocks 表结构
CREATE TABLE `t1` (
`c1` int(11) NULL COMMENT "",
`c2` bitmap BITMAP_UNION NULL COMMENT ""
) ENGINE=OLAP
AGGREGATE KEY(`c1`)
DISTRIBUTED BY HASH(`c1`);
select c1, bitmap_to_base64(c2) as c2 from t1 into outfile "hdfs://xxx:9000/user/hive/warehouse/lxh.db/t3/" format as parquet;
# hive 表结构 (csv)
create table t_string_csv(c1 int, c2 string) row format delimited fields
terminated by '\t' lines terminated by '\n' stored stored as textfile;
select c1, bitmap_to_base64(c2) as c2 from t1
into outfile "hdfs://xxx:9000/user/hive/warehouse/lxh.db/t_string_csv/" format as csv
properties("column_separator" = "\t", "line_delimiter" = "\n");
2.4 [不支持] export 导出
不支持 Bitmap 类型
2.5 外表 (catalog方式) 导出
insert into hive_catalog_hms.lxh.t4 select c1, bitmap_to_binary(c2) from ssb_20.t1;
2.6 外表(非 catalog 方式) 导出
//TODO