【详述】hive里查询hudi表和starrocks外表数据对不上
【背景】
– hive 表结构
CREATE EXTERNAL TABLE hudi_cache(
_hoodie_commit_time string,
_hoodie_commit_seqno string,
_hoodie_record_key string,
_hoodie_partition_path string,
_hoodie_file_name string,
id string)
PARTITIONED BY (
age bigint)
ROW FORMAT SERDE
‘org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe’
WITH SERDEPROPERTIES (
‘hoodie.query.as.ro.table’=‘false’,
‘path’=’/tmp/hudi_cache’)
STORED AS INPUTFORMAT
‘org.apache.hudi.hadoop.HoodieParquetInputFormat’
OUTPUTFORMAT
‘org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat’
LOCATION
‘hdfs://SZ-DATA-CM1:8020/tmp/hudi_cache’
TBLPROPERTIES (
‘last_commit_time_sync’=‘20220706193601516’,
‘spark.sql.sources.provider’=‘hudi’,
‘spark.sql.sources.schema.numPartCols’=‘1’,
‘spark.sql.sources.schema.numParts’=‘1’,
‘spark.sql.sources.schema.part.0’=’{“type”:“struct”,“fields”:[{“name”:"_hoodie_commit_time",“type”:“string”,“nullable”:true,“metadata”:{}},{“name”:"_hoodie_commit_seqno",“type”:“string”,“nullable”:true,“metadata”:{}},{“name”:"_hoodie_record_key",“type”:“string”,“nullable”:true,“metadata”:{}},{“name”:"_hoodie_partition_path",“type”:“string”,“nullable”:true,“metadata”:{}},{“name”:"_hoodie_file_name",“type”:“string”,“nullable”:true,“metadata”:{}},{“name”:“id”,“type”:“string”,“nullable”:true,“metadata”:{}},{“name”:“age”,“type”:“long”,“nullable”:true,“metadata”:{}}]}’,
‘spark.sql.sources.schema.partCol.0’=‘age’,
‘transient_lastDdlTime’=‘1657107366’)
– hudi 外表结构
drop table if EXISTS hudi_cache;
CREATE EXTERNAL TABLE hudi_cache (
id string,
age bigint
) ENGINE=HUDI
PROPERTIES (
“resource” = “hudi0”,
“database” = “db_hudi”,
“table” = “hudi_cache”
)
– hive 查询结果
–hudi 查询结果

【附件】starrocks版本 2.3.0 rc


