#!/usr/bin/env python # -*- coding: utf-8 -*- # @Time : 2023/4/9 9:18 # @Author : old tom # @File : relation_analyze.py # @Project : futool-tiny-datahub # @Desc : 血缘关系分析 from sqllineage.runner import LineageRunner from datahub.graph.graph_helper import MetaDataGraphBuilder from datahub.local_db_conf import graph from datahub.metadata.metadata_warehouse import MetadataWareHouse from datahub.metadata.constant.metadata_constant import MetaDataObjType class MetadataRelationAnalyzer(object): """ 元数据关系分析 流程:从元数据存储读取视图-->分析视图(递归)-->视图与表关联关系-->存入Neo4j """ def __init__(self, source_id): self.source_id = source_id self.warehouse = MetadataWareHouse(source_id) self.graph_builder = MetaDataGraphBuilder(graph) def analyze(self): """ 读取视图、存储过程 以视图为例: 视图数据来自多表,即source:[t1,t2,t3] dest:视图。根据结果创建出Node(节点)和RelationShip(关系),并且为单项 :return: """ views = self.warehouse.query_metadata_id_name(MetaDataObjType.View.value) if views and len(views) > 0: # 查询创建语句 for v in views: create_sql = self.warehouse.query_view_create(v[0]) # 分析SQL try: analyze_result = LineageRunner(create_sql) except Exception as e: print(f'视图{v[1]}分析SQL异常,e={e}') continue # 数据来源 try: source_obj = analyze_result.source_tables except Exception as e: print(f'获取数据来源表异常,e={e}') continue else: # 确认数据来源类型,来自表、视图 # 表名全大写 source = [] for s in source_obj: source_name = str(s).split(sep='.')[1].upper() source_type = self.warehouse.query_metadata_type_by_name(source_name) # 名称 类型 source.append((source_name, source_type)) # 创建节点及关系 self.graph_builder.add_view_relation(source, v[1]) pass def analyze_view(self): """ 分析视图 :return: """ pass def analyze_procedure(self): """ 分析存储过程 :return: """ pass if __name__ == '__main__': # graph.delete_all() mta = MetadataRelationAnalyzer('834164a2d62de959c0261e6239dd1e55') mta.analyze()