You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

141 lines
4.6 KiB

#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time : 2023/4/11 19:08
# @Author : old tom
# @File : scan_task.py
# @Project : futool-tiny-datahub
# @Desc : 扫描任务,通过调度扫描数据源-->获取数据源-->读取元数据并同步
from datahub.datasource.datasource_manage import DataSourceManage
from datahub.local_db_conf import local_conn
from datahub.metadata.metadata_reader import MetadataReader
from datahub.metadata.metadata_warehouse import MetadataWareHouse
from datahub.scheduletask.scandao.scan_task_dao import ScanTaskDao
from common.futool.fu_function import singleton
from datahub.scheduletask.task_executor import ScheduleExecutor
from datahub.scheduletask.schedule import CronExpTrigger
from common.log_conf import Logger
from datahub.metadata.constant.metadata_constant import MetaDataObjType
logger = Logger().get_logger()
@singleton
class ScanTaskManage(object):
def __init__(self):
self.dao = ScanTaskDao(local_conn)
def add_task(self, source_id, cron='0 0 0 1/1 * ?'):
"""
添加任务
:param source_id: 数据源ID
:param cron: cron表达式
:return:
"""
if self.dao.exist_by_id(source_id):
return False, f'[{source_id}] all ready exist'
return self.dao.add_task(source_id, cron), 'add success'
def switch_task(self, source_id, enable='N'):
"""
开关任务
:param source_id: 源ID
:param enable: 是否开启 Y开|N关
:return:
"""
return self.dao.switch_task(source_id, enable)
def edit_cron(self, source_id, cron):
"""
修改任务CRON表达式
:param source_id: 源ID
:param cron: cron表达式
:return:
"""
return self.dao.edit_task_cron(source_id, cron)
def query_task(self, enable=None):
"""
任务查询
:param enable:
:return:
"""
return self.dao.query_all_task(enable)
def query_task_by_id(self, source_id):
"""
根据ID 查任务
:return:
"""
return self.dao.query_task_by_id(source_id)
class ScanTaskRunner(object):
"""
扫描任务执行器
"""
def __init__(self):
self.dao = ScanTaskDao(local_conn)
self.executor = ScheduleExecutor()
self.scanner = ScanTaskExecutor()
def run(self):
"""
获取扫描配置提交到执行器
:return:
"""
enable_task = self.dao.query_all_task('Y')
if enable_task:
for task in enable_task:
self.executor.submit(task[0], CronExpTrigger.parse_crontab(task[1]), self.scanner.scan_metadata)
logger.info(f'task [{task[0]}] submit success')
class ScanTaskExecutor(object):
def __init__(self):
self.datasource_manage = DataSourceManage()
def scan_metadata(self, source_id):
"""
扫描元数据
:param source_id:
:return:
"""
# 元数据仓库API
warehouse = MetadataWareHouse(source_id)
# 获取待扫描数据源
datasource = self.datasource_manage.get(source_id)
# 初始化元数据读取器
metadata_reader = MetadataReader(datasource)
# 分别读取表\视图\存储过程并入库
tables = metadata_reader.query_tables()
warehouse.save_metadata_obj(tables, MetaDataObjType.Table.value)
views = metadata_reader.query_views()
warehouse.save_metadata_obj(views, MetaDataObjType.View.value)
procedures = metadata_reader.query_procedure()
warehouse.save_metadata_obj(procedures, MetaDataObjType.Procedure.value)
# TODO 临时调用
# w_procedures = warehouse.query_metadata_id_name(MetaDataObjType.Procedure.value)
# w_procedures = warehouse.query_metadata_id_name(MetaDataObjType.View.value)
# 查询视图语句
# for v in w_procedures:
# sql = metadata_reader.query_metadata_detail(v[1], MetaDataObjType.View.value)
# # warehouse.save_metadata_obj_detail([list(v)[0]] + [sql])
# warehouse.save_metadata_obj_detail([(v[0], sql)])
# w_tables = warehouse.query_metadata_id_name(MetaDataObjType.Table.value)
# for t in w_tables:
# fields = metadata_reader.query_metadata_detail(t[1], MetaDataObjType.Table.value)
# fields_data = []
# for f in fields:
# fields_data.append(tuple([list(t)[0]] + list(f)))
# warehouse.save_metadata_obj_field(fields_data)
if __name__ == '__main__':
ste = ScanTaskExecutor()
ste.scan_metadata('834164a2d62de959c0261e6239dd1e55')