You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

85 lines
2.8 KiB

2 years ago
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time : 2023/4/9 9:18
# @Author : old tom
# @File : relation_analyze.py
# @Project : futool-tiny-datahub
# @Desc : 血缘关系分析
2 years ago
from sqllineage.runner import LineageRunner
from datahub.graph.graph_helper import MetaDataGraphBuilder
from datahub.local_db_conf import graph
from datahub.metadata.metadata_warehouse import MetadataWareHouse
from datahub.metadata.constant.metadata_constant import MetaDataObjType
2 years ago
class MetadataRelationAnalyzer(object):
"""
元数据关系分析
流程从元数据存储读取视图-->分析视图递归-->视图与表关联关系-->存入Neo4j
2 years ago
"""
def __init__(self, source_id):
self.source_id = source_id
self.warehouse = MetadataWareHouse(source_id)
self.graph_builder = MetaDataGraphBuilder(graph)
def analyze(self):
"""
读取视图存储过程
以视图为例
视图数据来自多表即source:[t1,t2,t3] dest:视图根据结果创建出Node(节点)和RelationShip(关系),并且为单项
:return:
"""
views = self.warehouse.query_metadata_id_name(MetaDataObjType.View.value)
if views and len(views) > 0:
# 查询创建语句
for v in views:
create_sql = self.warehouse.query_view_create(v[0])
# 分析SQL
try:
analyze_result = LineageRunner(create_sql)
except Exception as e:
print(f'视图{v[1]}分析SQL异常,e={e}')
continue
# 数据来源
try:
source_obj = analyze_result.source_tables
except Exception as e:
print(f'获取数据来源表异常,e={e}')
continue
else:
# 确认数据来源类型,来自表、视图
# 表名全大写
source = []
for s in source_obj:
source_name = str(s).split(sep='.')[1].upper()
source_type = self.warehouse.query_metadata_type_by_name(source_name)
# 名称 类型
source.append((source_name, source_type))
# 创建节点及关系
self.graph_builder.add_view_relation(source, v[1])
pass
def analyze_view(self):
"""
分析视图
:return:
"""
pass
def analyze_procedure(self):
"""
分析存储过程
:return:
"""
pass
if __name__ == '__main__':
# graph.delete_all()
mta = MetadataRelationAnalyzer('834164a2d62de959c0261e6239dd1e55')
mta.analyze()