You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
134 lines
4.5 KiB
134 lines
4.5 KiB
#!/usr/bin/env python
|
|
# -*- coding: utf-8 -*-
|
|
# @Time : 2023/4/11 19:08
|
|
# @Author : old tom
|
|
# @File : scan_task.py
|
|
# @Project : futool-tiny-datahub
|
|
# @Desc : 扫描任务,通过调度扫描数据源-->获取数据源-->读取元数据并同步
|
|
|
|
from datahub.datasource.datasource_manage import DataSourceManage
|
|
from datahub.local_db_conf import local_conn
|
|
from datahub.metadata.metadata_reader import MetadataReader
|
|
from datahub.metadata.metadata_warehouse import MetadataWareHouse
|
|
from datahub.scheduletask.scandao.scan_task_dao import ScanTaskDao
|
|
from datahub.scheduletask.task_executor import ScheduleExecutor
|
|
from datahub.scheduletask.schedule import CronExpTrigger
|
|
from datahub.log_conf import Logger
|
|
from datahub.metadata.constant.metadata_constant import MetaDataObjType
|
|
|
|
logger = Logger().get_logger()
|
|
|
|
|
|
class ScanTaskManage(object):
|
|
def __init__(self):
|
|
self.dao = ScanTaskDao(local_conn)
|
|
|
|
def add_task(self, source_id, cron='0 0 0 1/1 * ?'):
|
|
"""
|
|
添加任务
|
|
:param source_id: 数据源ID
|
|
:param cron: cron表达式,默认一天一次
|
|
:return:
|
|
"""
|
|
if self.dao.exist_by_id(source_id):
|
|
return False, f'[{source_id}] all ready exist'
|
|
return self.dao.add_task(source_id, cron), 'add success'
|
|
|
|
def switch_task(self, source_id, enable='N'):
|
|
"""
|
|
开关任务
|
|
:param source_id: 源ID
|
|
:param enable: 是否开启 Y开|N关
|
|
:return:
|
|
"""
|
|
return self.dao.switch_task(source_id, enable)
|
|
|
|
def edit_cron(self, source_id, cron):
|
|
"""
|
|
修改任务CRON表达式
|
|
:param source_id: 源ID
|
|
:param cron: cron表达式
|
|
:return:
|
|
"""
|
|
return self.dao.edit_task_cron(source_id, cron)
|
|
|
|
def query_task(self, enable=None):
|
|
"""
|
|
任务查询
|
|
:param enable:
|
|
:return:
|
|
"""
|
|
return self.dao.query_all_task(enable)
|
|
|
|
def query_task_by_id(self, source_id):
|
|
"""
|
|
根据ID 查任务
|
|
:return:
|
|
"""
|
|
return self.dao.query_task_by_id(source_id)
|
|
|
|
|
|
class ScanTaskRunner(object):
|
|
"""
|
|
扫描任务执行器
|
|
"""
|
|
|
|
def __init__(self):
|
|
self.dao = ScanTaskDao(local_conn)
|
|
self.executor = ScheduleExecutor()
|
|
self.scanner = ScanTaskExecutor()
|
|
|
|
def run(self):
|
|
"""
|
|
获取扫描配置提交到执行器
|
|
:return:
|
|
"""
|
|
enable_task = self.dao.query_all_task('Y')
|
|
if enable_task:
|
|
for task in enable_task:
|
|
self.executor.submit(task[0], CronExpTrigger.parse_crontab(task[1]), self.scanner.scan_print)
|
|
logger.info(f'datasource scan task [{task[0]}] submit success')
|
|
|
|
|
|
class ScanTaskExecutor(object):
|
|
|
|
def __init__(self):
|
|
self.datasource_manage = DataSourceManage()
|
|
|
|
def scan_print(self, source_id):
|
|
# 元数据仓库API
|
|
warehouse = MetadataWareHouse(source_id)
|
|
# 获取待扫描数据源
|
|
datasource = self.datasource_manage.get(source_id)
|
|
# 初始化元数据读取器
|
|
metadata_reader = MetadataReader(datasource)
|
|
# 分别读取表\视图\存储过程并入库
|
|
logger.info(f'开始扫描[{source_id}]元数据')
|
|
tables = metadata_reader.query_tables()
|
|
logger.info(f'[{source_id}]读取表完毕,共{len(tables)}张')
|
|
|
|
def scan_metadata(self, source_id):
|
|
"""
|
|
TODO python暂时无法实现注解事务,后续使用动态代理处理
|
|
扫描元数据
|
|
:param source_id:
|
|
:return:
|
|
"""
|
|
# 元数据仓库API
|
|
warehouse = MetadataWareHouse(source_id)
|
|
# 获取待扫描数据源
|
|
datasource = self.datasource_manage.get(source_id)
|
|
# 初始化元数据读取器
|
|
metadata_reader = MetadataReader(datasource)
|
|
# 分别读取表\视图\存储过程并入库
|
|
logger.info(f'开始扫描[{source_id}]元数据')
|
|
tables = metadata_reader.query_tables()
|
|
warehouse.save_metadata_obj(tables, MetaDataObjType.Table.value)
|
|
logger.info(f'[{source_id}]读取表完毕,共{len(tables)}张')
|
|
views = metadata_reader.query_views()
|
|
warehouse.save_metadata_obj(views, MetaDataObjType.View.value)
|
|
logger.info(f'[{source_id}]读取视图完毕,共{len(views)}张')
|
|
procedures = metadata_reader.query_procedure()
|
|
warehouse.save_metadata_obj(procedures, MetaDataObjType.Procedure.value)
|
|
logger.info(f'[{source_id}]读取存储过程完毕,共{len(procedures)}个')
|