|
|
#!/usr/bin/env python
|
|
|
# -*- coding: utf-8 -*-
|
|
|
# @Time : 2023/4/9 9:18
|
|
|
# @Author : old tom
|
|
|
# @File : relation_analyze.py
|
|
|
# @Project : futool-tiny-datahub
|
|
|
# @Desc : 血缘关系分析
|
|
|
|
|
|
from sqllineage.runner import LineageRunner
|
|
|
|
|
|
from datahub.graph.graph_helper import MetaDataGraphBuilder
|
|
|
from datahub.local_db_conf import graph
|
|
|
|
|
|
from datahub.metadata.metadata_warehouse import MetadataWareHouse
|
|
|
from datahub.metadata.constant.metadata_constant import MetaDataObjType
|
|
|
|
|
|
|
|
|
class MetadataRelationAnalyzer(object):
|
|
|
"""
|
|
|
元数据关系分析
|
|
|
流程:从元数据存储读取视图-->分析视图(递归)-->视图与表关联关系-->存入Neo4j
|
|
|
"""
|
|
|
|
|
|
def __init__(self, source_id):
|
|
|
self.source_id = source_id
|
|
|
self.warehouse = MetadataWareHouse(source_id)
|
|
|
self.graph_builder = MetaDataGraphBuilder(graph)
|
|
|
|
|
|
def analyze(self):
|
|
|
"""
|
|
|
读取视图、存储过程
|
|
|
以视图为例:
|
|
|
视图数据来自多表,即source:[t1,t2,t3] dest:视图。根据结果创建出Node(节点)和RelationShip(关系),并且为单项
|
|
|
:return:
|
|
|
"""
|
|
|
views = self.warehouse.query_metadata_id_name(MetaDataObjType.View.value)
|
|
|
if views and len(views) > 0:
|
|
|
# 查询创建语句
|
|
|
for v in views:
|
|
|
create_sql = self.warehouse.query_view_create(v[0])
|
|
|
# 分析SQL
|
|
|
try:
|
|
|
analyze_result = LineageRunner(create_sql)
|
|
|
except Exception as e:
|
|
|
print(f'视图{v[1]}分析SQL异常,e={e}')
|
|
|
continue
|
|
|
# 数据来源
|
|
|
try:
|
|
|
source_obj = analyze_result.source_tables
|
|
|
except Exception as e:
|
|
|
print(f'获取数据来源表异常,e={e}')
|
|
|
continue
|
|
|
else:
|
|
|
# 确认数据来源类型,来自表、视图
|
|
|
# 表名全大写
|
|
|
source = []
|
|
|
for s in source_obj:
|
|
|
source_name = str(s).split(sep='.')[1].upper()
|
|
|
source_type = self.warehouse.query_metadata_type_by_name(source_name)
|
|
|
# 名称 类型
|
|
|
source.append((source_name, source_type))
|
|
|
# 创建节点及关系
|
|
|
self.graph_builder.add_view_relation(source, v[1])
|
|
|
pass
|
|
|
|
|
|
def analyze_view(self):
|
|
|
"""
|
|
|
分析视图
|
|
|
:return:
|
|
|
"""
|
|
|
pass
|
|
|
|
|
|
def analyze_procedure(self):
|
|
|
"""
|
|
|
分析存储过程
|
|
|
:return:
|
|
|
"""
|
|
|
pass
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
# graph.delete_all()
|
|
|
mta = MetadataRelationAnalyzer('834164a2d62de959c0261e6239dd1e55')
|
|
|
mta.analyze()
|