#!/usr/bin/env python # -*- coding: utf-8 -*- # @Time : 2023/4/11 19:08 # @Author : old tom # @File : scan_task.py # @Project : futool-tiny-datahub # @Desc : 扫描任务,通过调度扫描数据源-->获取数据源-->读取元数据并同步 from datahub.datasource.datasource_manage import DataSourceManage from datahub.local_db_conf import local_conn from datahub.metadata.metadata_reader import MetadataReader from datahub.metadata.metadata_warehouse import MetadataWareHouse from datahub.scheduletask.scandao.scan_task_dao import ScanTaskDao from common.futool.fu_function import singleton from datahub.scheduletask.task_executor import ScheduleExecutor from datahub.scheduletask.schedule import CronExpTrigger from common.log_conf import Logger from datahub.metadata.constant.metadata_constant import MetaDataObjType logger = Logger().get_logger() @singleton class ScanTaskManage(object): def __init__(self): self.dao = ScanTaskDao(local_conn) def add_task(self, source_id, cron='0 0 0 1/1 * ?'): """ 添加任务 :param source_id: 数据源ID :param cron: cron表达式 :return: """ if self.dao.exist_by_id(source_id): return False, f'[{source_id}] all ready exist' return self.dao.add_task(source_id, cron), 'add success' def switch_task(self, source_id, enable='N'): """ 开关任务 :param source_id: 源ID :param enable: 是否开启 Y开|N关 :return: """ return self.dao.switch_task(source_id, enable) def edit_cron(self, source_id, cron): """ 修改任务CRON表达式 :param source_id: 源ID :param cron: cron表达式 :return: """ return self.dao.edit_task_cron(source_id, cron) def query_task(self, enable=None): """ 任务查询 :param enable: :return: """ return self.dao.query_all_task(enable) def query_task_by_id(self, source_id): """ 根据ID 查任务 :return: """ return self.dao.query_task_by_id(source_id) class ScanTaskRunner(object): """ 扫描任务执行器 """ def __init__(self): self.dao = ScanTaskDao(local_conn) self.executor = ScheduleExecutor() self.scanner = ScanTaskExecutor() def run(self): """ 获取扫描配置提交到执行器 :return: """ enable_task = self.dao.query_all_task('Y') if enable_task: for task in enable_task: self.executor.submit(task[0], CronExpTrigger.parse_crontab(task[1]), self.scanner.scan_metadata) logger.info(f'task [{task[0]}] submit success') class ScanTaskExecutor(object): def __init__(self): self.datasource_manage = DataSourceManage() def scan_metadata(self, source_id): """ 扫描元数据 :param source_id: :return: """ # 元数据仓库API warehouse = MetadataWareHouse(source_id) # 获取待扫描数据源 datasource = self.datasource_manage.get(source_id) # 初始化元数据读取器 metadata_reader = MetadataReader(datasource) # 分别读取表\视图\存储过程并入库 tables = metadata_reader.query_tables() warehouse.save_metadata_obj(tables, MetaDataObjType.Table.value) views = metadata_reader.query_views() warehouse.save_metadata_obj(views, MetaDataObjType.View.value) procedures = metadata_reader.query_procedure() warehouse.save_metadata_obj(procedures, MetaDataObjType.Procedure.value) # TODO 临时调用 # w_procedures = warehouse.query_metadata_id_name(MetaDataObjType.Procedure.value) # w_procedures = warehouse.query_metadata_id_name(MetaDataObjType.View.value) # 查询视图语句 # for v in w_procedures: # sql = metadata_reader.query_metadata_detail(v[1], MetaDataObjType.View.value) # # warehouse.save_metadata_obj_detail([list(v)[0]] + [sql]) # warehouse.save_metadata_obj_detail([(v[0], sql)]) # w_tables = warehouse.query_metadata_id_name(MetaDataObjType.Table.value) # for t in w_tables: # fields = metadata_reader.query_metadata_detail(t[1], MetaDataObjType.Table.value) # fields_data = [] # for f in fields: # fields_data.append(tuple([list(t)[0]] + list(f))) # warehouse.save_metadata_obj_field(fields_data) if __name__ == '__main__': ste = ScanTaskExecutor() ste.scan_metadata('834164a2d62de959c0261e6239dd1e55')