parent
c2a89c247f
commit
a001e27f01
@ -0,0 +1,107 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
# @Time : 2023/5/9 16:06
|
||||
# @Author : old tom
|
||||
# @File : metadat_scan.py
|
||||
# @Project : futool-tiny-datahub
|
||||
# @Desc :
|
||||
from datahub.datasource.datasource_manage import DataSourceManage
|
||||
from datahub.metadata.metadata_reader import MetadataReader
|
||||
from datahub.metadata.metadata_warehouse import MetadataWareHouse
|
||||
from datahub.metadata.constant.metadata_constant import MetaDataObjType
|
||||
from datahub.log_conf import log
|
||||
from datahub.metadata.metaversion.metadata_version import MetadataVersionKeeper
|
||||
from common.futool.core.fu_date import StopWatch
|
||||
|
||||
|
||||
class MetadataScanner(object):
|
||||
|
||||
def __init__(self):
|
||||
self.datasource_manage = DataSourceManage()
|
||||
|
||||
@staticmethod
|
||||
def _join_meta_detail(meta_datas, metadata_reader: MetadataReader, obj_type):
|
||||
"""
|
||||
组装明细结果
|
||||
:return:
|
||||
"""
|
||||
save_datas = []
|
||||
for i, t in enumerate(meta_datas):
|
||||
meta_id, obj_name = t[0], t[1]
|
||||
meta_result = metadata_reader.query_metadata_detail(obj_name, obj_type)
|
||||
# 组装插入数据
|
||||
obj_data = ([meta_id] + list(x) for x in meta_result)
|
||||
for d in obj_data:
|
||||
save_datas.append(d)
|
||||
log.info(f"剩余{len(meta_datas) - (i + 1)}")
|
||||
return save_datas
|
||||
|
||||
def _scan_detail(self, warehouse: MetadataWareHouse, metadata_reader: MetadataReader):
|
||||
"""
|
||||
获取明细数据
|
||||
:param warehouse: 元数据仓库
|
||||
:param metadata_reader: 元数据读取器
|
||||
:return:
|
||||
"""
|
||||
source_id = warehouse.source_id
|
||||
watch = StopWatch()
|
||||
# 读取表字段
|
||||
watch.start(f"{source_id}_table")
|
||||
meta_tables = warehouse.query_metadata_id_name(MetaDataObjType.Table.value)
|
||||
log.info(f"开始查询[{warehouse.source_id}] 表字段数据,共{len(meta_tables)}张表")
|
||||
table_field_datas = self._join_meta_detail(meta_tables, metadata_reader, MetaDataObjType.Table.value)
|
||||
warehouse.save_metadata_obj_field(table_field_datas)
|
||||
table_time = watch.stop(f"{source_id}_table")
|
||||
log.info(f"[{warehouse.source_id}] 表字段数据查询结束,耗时{table_time}秒")
|
||||
# 读取视图创建语句
|
||||
watch.start(f"{source_id}_view")
|
||||
meta_views = warehouse.query_metadata_id_name(MetaDataObjType.View.value)
|
||||
log.info(f"开始查询[{warehouse.source_id}] 视图创建语句,共{len(meta_views)}张视图")
|
||||
view_datas = self._join_meta_detail(meta_views, metadata_reader, MetaDataObjType.View.value)
|
||||
warehouse.save_metadata_obj_detail(view_datas)
|
||||
view_time = watch.stop(f"{source_id}_view")
|
||||
log.info(f"[{warehouse.source_id}] 视图语句查询结束,耗时{view_time}秒")
|
||||
# 读取存储过程创建语句
|
||||
watch.start(f"{source_id}_procedure")
|
||||
meta_procedure = warehouse.query_metadata_id_name(MetaDataObjType.Procedure.value)
|
||||
log.info(f"开始查询[{warehouse.source_id}] 存储过程创建语句,共{len(meta_views)}张过程")
|
||||
procedure_datas = self._join_meta_detail(meta_procedure, metadata_reader, MetaDataObjType.Procedure.value)
|
||||
warehouse.save_metadata_obj_detail(procedure_datas)
|
||||
procedure_time = watch.stop(f"{source_id}_procedure")
|
||||
log.info(f"[{warehouse.source_id}] 存储过程语句查询结束,耗时{procedure_time}秒")
|
||||
|
||||
def scan_metadata(self, source_id):
|
||||
"""
|
||||
TODO python暂时无法实现注解事务,后续使用动态代理或装饰器实现
|
||||
扫描元数据
|
||||
:param source_id:
|
||||
:return:
|
||||
"""
|
||||
# 元数据仓库API
|
||||
warehouse = MetadataWareHouse(source_id)
|
||||
# 获取待扫描数据源
|
||||
datasource = self.datasource_manage.get(source_id)
|
||||
# 初始化元数据读取器
|
||||
metadata_reader = MetadataReader(datasource)
|
||||
# 获取版本号
|
||||
|
||||
# version_keeper = MetadataVersionKeeper(source_id)
|
||||
# version_code = version_keeper.increase_version()
|
||||
# # 分别读取表\视图\存储过程并入库
|
||||
# log.info(f'开始扫描[{source_id}]元数据,版本号为[{version_code}]')
|
||||
# tables = metadata_reader.query_tables()
|
||||
# warehouse.save_metadata_obj(tables, MetaDataObjType.Table.value, version_code)
|
||||
# log.info(f'[{source_id}]读取表完毕,共{len(tables)}张')
|
||||
# views = metadata_reader.query_views()
|
||||
# warehouse.save_metadata_obj(views, MetaDataObjType.View.value, version_code)
|
||||
# log.info(f'[{source_id}]读取视图完毕,共{len(views)}张')
|
||||
# procedures = metadata_reader.query_procedure()
|
||||
# warehouse.save_metadata_obj(procedures, MetaDataObjType.Procedure.value, version_code)
|
||||
# log.info(f'[{source_id}]读取存储过程完毕,共{len(procedures)}个')
|
||||
# 查询明细数据
|
||||
self._scan_detail(warehouse, metadata_reader)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
scanner = MetadataScanner()
|
||||
scanner.scan_metadata('834164a2d62de959c0261e6239dd1e55')
|
Loading…
Reference in new issue