You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

132 lines
4.4 KiB

#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time : 2023/4/11 19:08
# @Author : old tom
# @File : scan_task.py
# @Project : futool-tiny-datahub
# @Desc : 扫描任务,通过调度扫描数据源-->获取数据源-->读取元数据并同步
from datahub.datasource.datasource_manage import DataSourceManage
from datahub.local_db_conf import local_conn
from datahub.metadata.metadata_reader import MetadataReader
from datahub.metadata.metadata_warehouse import MetadataWareHouse
from datahub.scheduletask.scandao.scan_task_dao import ScanTaskDao
from datahub.scheduletask.task_executor import ScheduleExecutor
from datahub.scheduletask.schedule import CronExpTrigger
from datahub.log_conf import log
from datahub.metadata.constant.metadata_constant import MetaDataObjType
class ScanTaskManage(object):
def __init__(self):
self.dao = ScanTaskDao(local_conn)
def add_task(self, source_id, cron='0 0 0 1/1 * ?'):
"""
添加任务
:param source_id: 数据源ID
:param cron: cron表达式,默认一天一次
:return:
"""
if self.dao.exist_by_id(source_id):
return False, f'[{source_id}] all ready exist'
return self.dao.add_task(source_id, cron), 'add success'
def switch_task(self, source_id, enable='N'):
"""
开关任务
:param source_id: 源ID
:param enable: 是否开启 Y开|N关
:return:
"""
return self.dao.switch_task(source_id, enable)
def edit_cron(self, source_id, cron):
"""
修改任务CRON表达式
:param source_id: 源ID
:param cron: cron表达式
:return:
"""
return self.dao.edit_task_cron(source_id, cron)
def query_task(self, enable=None):
"""
任务查询
:param enable:
:return:
"""
return self.dao.query_all_task(enable)
def query_task_by_id(self, source_id):
"""
根据ID 查任务
:return:
"""
return self.dao.query_task_by_id(source_id)
class ScanTaskRunner(object):
"""
扫描任务执行器
"""
def __init__(self):
self.dao = ScanTaskDao(local_conn)
self.executor = ScheduleExecutor()
self.scanner = ScanTaskExecutor()
def run(self):
"""
获取扫描配置提交到执行器
:return:
"""
enable_task = self.dao.query_all_task('Y')
if enable_task:
for task in enable_task:
self.executor.submit(task[0], CronExpTrigger.parse_crontab(task[1]), self.scanner.scan_print)
log.info(f'datasource scan task [{task[0]}] submit success')
class ScanTaskExecutor(object):
def __init__(self):
self.datasource_manage = DataSourceManage()
def scan_print(self, source_id):
# 元数据仓库API
warehouse = MetadataWareHouse(source_id)
# 获取待扫描数据源
datasource = self.datasource_manage.get(source_id)
# 初始化元数据读取器
metadata_reader = MetadataReader(datasource)
# 分别读取表\视图\存储过程并入库
log.info(f'开始扫描[{source_id}]元数据')
tables = metadata_reader.query_tables()
log.info(f'[{source_id}]读取表完毕,共{len(tables)}')
def scan_metadata(self, source_id):
"""
TODO python暂时无法实现注解事务,后续使用动态代理处理
扫描元数据
:param source_id:
:return:
"""
# 元数据仓库API
warehouse = MetadataWareHouse(source_id)
# 获取待扫描数据源
datasource = self.datasource_manage.get(source_id)
# 初始化元数据读取器
metadata_reader = MetadataReader(datasource)
# 分别读取表\视图\存储过程并入库
log.info(f'开始扫描[{source_id}]元数据')
tables = metadata_reader.query_tables()
warehouse.save_metadata_obj(tables, MetaDataObjType.Table.value)
log.info(f'[{source_id}]读取表完毕,共{len(tables)}')
views = metadata_reader.query_views()
warehouse.save_metadata_obj(views, MetaDataObjType.View.value)
log.info(f'[{source_id}]读取视图完毕,共{len(views)}')
procedures = metadata_reader.query_procedure()
warehouse.save_metadata_obj(procedures, MetaDataObjType.Procedure.value)
log.info(f'[{source_id}]读取存储过程完毕,共{len(procedures)}')