@ -6,13 +6,45 @@
# @Project : futool-tiny-datahub
# @Desc : 血缘关系分析
import sys
import time
from sqllineage . runner import LineageRunner
from datahub . graph . graph_helper import MetaDataGraphBuilder
from datahub . graph . graph_helper import Neo4jHelper
from datahub . local_db_conf import graph
from common . log_conf import Logger
from datahub . metadata . metadata_warehouse import MetadataWareHouse
from datahub . metadata . constant . metadata_constant import MetaDataObjType
from common . futool . fu_collection import is_not_empty
logger = Logger ( ) . get_logger ( )
# 修改最大递归深度
sys . setrecursionlimit ( 500 )
def analyze_view_sql ( sql : str ) :
"""
sql 分析
: param sql :
: return :
"""
try :
analyze_result = LineageRunner ( sql )
except Exception as e :
logger . error ( f ' sql解析异常,e= { e } ' )
return None
try :
# 获取源表
source_table = analyze_result . source_tables
target_table = analyze_result . target_tables
if len ( source_table ) == len ( target_table ) and source_table [ 0 ] == target_table [ 0 ] :
# 防止无限递归
logger . warning ( f ' 源表与目标表相同 ' )
return None
except Exception as e :
logger . error ( f ' 获取源表异常,e= { e } ' )
return None
else :
return [ str ( s ) . split ( sep = ' . ' ) [ 1 ] . upper ( ) for s in source_table ]
class MetadataRelationAnalyzer ( object ) :
@ -24,7 +56,7 @@ class MetadataRelationAnalyzer(object):
def __init__ ( self , source_id ) :
self . source_id = source_id
self . warehouse = MetadataWareHouse ( source_id )
self . graph_builder = MetaDataGraphBuild er( graph )
self . neo4j_helper = Neo4jHelp er( graph )
def analyze ( self ) :
"""
@ -33,42 +65,66 @@ class MetadataRelationAnalyzer(object):
视图数据来自多表 , 即source : [ t1 , t2 , t3 ] dest : 视图 。 根据结果创建出Node ( 节点 ) 和RelationShip ( 关系 ) , 并且为单项
: return :
"""
# 分析视图
self . analyze_views ( )
# 分析存储过程
self . analyze_procedure ( )
def analyze_views ( self ) :
"""
分析视图
: return :
"""
views = self . warehouse . query_metadata_id_name ( MetaDataObjType . View . value )
if views and len ( views ) > 0 :
# 查询创建语句
for v in views :
create_sql = self . warehouse . query_view_create ( v [ 0 ] )
# 分析SQL
try :
analyze_result = LineageRunner ( create_sql )
except Exception as e :
print ( f ' 视图 { v [ 1 ] } 分析SQL异常,e= { e } ' )
continue
# 数据来源
for view in views :
logger . info ( f ' 开始分析视图 { view [ 1 ] } ' )
start_time = time . time ( )
try :
source_obj = analyze_result . source_tables
self . _recurrence_view ( view [ 1 ] )
except Exception as e :
print ( f ' 获取数据来源表异常,e= { e } ' )
continue
else :
# 确认数据来源类型,来自表、视图
# 表名全大写
source = [ ]
for s in source_obj :
source_name = str ( s ) . split ( sep = ' . ' ) [ 1 ] . upper ( )
source_type = self . warehouse . query_metadata_type_by_name ( source_name )
# 名称 类型
source . append ( ( source_name , source_type ) )
# 创建节点及关系
self . graph_builder . add_view_relation ( source , v [ 1 ] )
pass
logger . error ( f ' 视图 { view [ 1 ] } 分析异常,e= { e } ' )
finally :
stop_time = time . time ( )
logger . info ( f ' 视图 { view [ 1 ] } 分析结束,耗时 { round ( stop_time - start_time , 2 ) } 秒 ' )
def analyze_view( self ) :
def _recurrence_view ( self , view_name ) :
"""
分析视图
递归分析视图
: param view_name : 视图名称
: return :
"""
pass
create_sql = self . warehouse . query_view_create_by_name ( view_name )
# source_table 可能包含表或视图
source_tables = analyze_view_sql ( create_sql )
if is_not_empty ( source_tables ) :
tables = [ ]
views = [ ]
for source_name in source_tables :
source_type = self . warehouse . query_metadata_type_by_name ( source_name )
if source_type == MetaDataObjType . View . value :
views . append ( source_name )
elif source_type == MetaDataObjType . Table . value :
tables . append ( source_name )
# 构造节点与关系
# 根节点(视图),视图关系是1对多,使用传入的view_name作为根节点
root_node = self . neo4j_helper . create_node_with_check ( MetaDataObjType . View . value , self . source_id ,
name = view_name )
for t in tables :
# 表节点
t_node = self . neo4j_helper . create_node_with_check ( MetaDataObjType . Table . value , self . source_id , name = t )
self . neo4j_helper . create_relationship_with_merge ( t_node , ' from ' , root_node , MetaDataObjType . Table . value ,
' name ' )
for v in views :
# 视图节点
v_node = self . neo4j_helper . create_node_with_check ( MetaDataObjType . View . value , self . source_id , name = v )
self . neo4j_helper . create_relationship_with_merge ( v_node , ' from ' , root_node , MetaDataObjType . View . value ,
' name ' )
if is_not_empty ( views ) :
# 递归视图节点
for v in views :
self . _recurrence_view ( v )
def analyze_procedure ( self ) :
"""
@ -79,6 +135,6 @@ class MetadataRelationAnalyzer(object):
if __name__ == ' __main__ ' :
# graph.delete_all( )
graph . delete_all ( )
mta = MetadataRelationAnalyzer ( ' 834164a2d62de959c0261e6239dd1e55 ' )
mta . analyze ( )