feat：提交

8 months ago · 2a6cd9e8a0
parent c669cbb12b
commit 2a6cd9e8a0
4 changed files with 57 additions and 23 deletions
--- a/llmagent/init.py
+++ b/llmagent/init.py
@ -35,5 +35,22 @@ PROMPT_TEMPLATE = {
        通过等待并仔细考虑用户在每次使用工具后的回应，你可以相应地做出反应并做出关于如何继续任务的明智决定。这个迭代过程有助于确保你工作的整体成功和准确性。
        用户的指令是：{user_input}
        """
+    },
+    'TOOL_CALLER': {
+        'description': '工具调用器',
+        'template': """
+        你是工具调用器，用户将会告诉你一条指令 {user_input}, 请根据指令推理出需要调用哪个工具。
+        # 要求
+        1.根据指令和提供的工具描述选择最合适的工具,并仔细阅读工具参数说明。
+        2.你需要根据工具参数说明进行参数校验，评估用户输入的参数是否满足条件，如果不满足需要返回参数校验错误信息。
+        3.在任何情况下，都不要修改或扩展提供的工具参数
+        4.如果用户的指令是任意表示序号的数字或者语句，比如：1、第一、第1个、第二个。你需要回顾上一轮对话中用户指令并推断出本轮用户指令。
+        例如上一轮对话中的用户指令为：打开相机,你的回复是:请确认您要查看的相机具体名称：
+                                    1. 北卡口入境摄像头出场1号通道
+                                    2. 北卡口出口道路监控
+                                    3. 北卡口入境摄像头出场2号通道
+                                    您需要选择哪个选项？（请回复选项前的数字）
+        本轮用户指令为：1, 你需要推断出本轮用户指令为：打开北卡口入境摄像头出场1号通道相机
+        """
    }
 }
--- a/llmagent/llm_agent.py
+++ b/llmagent/llm_agent.py
@ -11,6 +11,7 @@ from abc import ABC
 from llmtools import TOOLS_BIND_FUNCTION, STRUCT_TOOLS
 from llmagent import PROMPT_TEMPLATE
 from langchain_openai import ChatOpenAI
+from langchain.globals import set_debug, set_verbose
 from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
 from langchain_core.output_parsers import StrOutputParser
 from langchain_core.chat_history import BaseChatMessageHistory, InMemoryChatMessageHistory
@ -18,9 +19,13 @@ from langchain_core.runnables.history import RunnableWithMessageHistory
 from langchain_core.messages import HumanMessage
 from log_conf import log

+# debug模式,有更多输出
+set_debug(True)
+set_verbose(False)
 # 默认系统提示词
 DEFAULT_SYS_PROMPT = ''

+# 字符串解析器 会自动提取 content 字段
 parser = StrOutputParser()

 # 模型初始化，注意修改env.toml中的配置
@ -73,7 +78,7 @@ class BaseChatAgent(ABC):

    def invoke(self, user_input: str) -> str:
        """
-        请求模型并一次性返回
+        单论对话并一次性返回
        :param user_input: 用户输入
        :return:
        """
@ -84,7 +89,7 @@ class BaseChatAgent(ABC):

    def invoke_by_stream(self, user_input: str):
        """
-        请求模型并流式返回（同步流）
+        单论对话并流式返回（同步流）
        :param user_input: 用户输入
        :return:
        """
@ -93,7 +98,7 @@ class BaseChatAgent(ABC):
        for chunk in response:
            print(chunk, flush=True, end='')

-    def multi_round_with_stream(self, user_input: str, session_id: int):
+    def multi_with_stream(self, user_input: str, session_id: int):
        """
        多轮对话
        :param user_input: 用户输入
@ -112,7 +117,7 @@ class BaseChatAgent(ABC):
    @staticmethod
    def invoke_with_tool(user_input: str):
        """
-        工具调用,function calling时system prompt不会生效，并且不支持流式返回
+        工具调用,用于测试模型选择工具
        :param user_input:
        :return: 这里返回的是LLM推理出的tool信息，格式如下：
            [{'name': 'get_current_weather', 'args': {'location': 'Beijing, China'}, 'id': 'call_xeeq4q52fw9x61lkrqwy9cr6', 'type': 'tool_call'}]
@ -120,28 +125,37 @@ class BaseChatAgent(ABC):
        llm_with_tools = llm.bind_tools(STRUCT_TOOLS)
        return llm_with_tools.invoke(user_input).tool_calls

-    @staticmethod
-    def invoke_with_tool_call(user_input: str):
-        """
-        单轮对话，调用工具并返给LLM
-        :param user_input:
-        :return:
+    def multi_with_tool_call_stream(self, user_input: str, session_id: int):
        """
-        # 自定义的提示词
+                多轮对话，包含工具调用
+                :param session_id: 对话sessionId
+                :param user_input:
+                :return: 流式输出
+                """
+        config = {"configurable": {"session_id": session_id}}
+        # 总体任务描述及提示词
        user_msg = PROMPT_TEMPLATE.get('VOICE_ASSISTANT')['template'].format(user_input=user_input)
        messages = [HumanMessage(user_msg)]
        llm_with_tools = llm.bind_tools(STRUCT_TOOLS)
-        # 这里是判断使用哪个工具，需要加提示限制模型不能修改参数
+        # 判断使用哪个工具，需要加提示词让模型判断参数是否符合规则
+        user_input = PROMPT_TEMPLATE.get('TOOL_CALLER')['template'].format(user_input=user_input)
        call_msg = llm_with_tools.invoke(user_input)
+        # 如果参数不满足要求 call_msg 的content会可能会包含参数校验失败信息，例：参数错误：分屏数量必须为大于0的整数。请检查指令中的"分屏数量"参数。
+        # 用模型进行参数校验很不稳定，不是每次都能输出错误信息。还是在tool中手动校验靠谱。
        messages.append(call_msg)
        for tool_call in call_msg.tool_calls:
            selected_tool = TOOLS_BIND_FUNCTION[tool_call["name"].lower()]
-            # 使用 tool_call 调用会生成ToolMessage
+            # 执行工具调用（同步）,返回ToolMessage
            tool_msg = selected_tool.invoke(tool_call)
            messages.append(tool_msg)
        log.info('【function call】构造输入为{}', messages)
-        # messages 中包含了 人类指令、AI指令、工具指令
-        return llm_with_tools.invoke(messages).content
+        # messages 中包含了 人类指令、AI指令、工具指令, 模型根据历史聊天组装成最后的回答
+        chain = self.multi_round_prompt | llm_with_tools | parser
+        # RunnableWithMessageHistory 会调用历史对话
+        with_message_history = RunnableWithMessageHistory(chain, get_session_history, input_messages_key="messages")
+        response = with_message_history.stream({'messages': messages}, config=config)
+        for chunk in response:
+            print(chunk, flush=True, end='')


 class ChatAgent(BaseChatAgent):
--- a/llmtools/tool_impl.py
+++ b/llmtools/tool_impl.py
@ -69,7 +69,7 @@ def query_camera_from_db(camera_name: str, top_n: int = 3) -> str:
    """
    rt = query_vector_db(camera_name)
    if rt:
-        log.info('【function call】相机相似度检索查询 {},返回 {}', camera_name, rt)
+        log.info('【function call】相机相似度检索查询[{}],返回 {}', camera_name, rt)
        # 判断相似度最高的相机是否超过阈值
        top_one = rt['hits'][0]
        # 相似度评分
--- a/main.py
+++ b/main.py
@ -11,11 +11,11 @@ from llmagent.llm_agent import ChatAgent
 dsr = ChatAgent()

 if __name__ == '__main__':
-    #print(dsr.invoke_with_tool_call('今天昆明天怎么样'))
+    # print(dsr.invoke_with_tool_call('今天昆明天怎么样'))
    ##########  测试 function call #########
-    # print(dsr.invoke_with_tool_call('播放南卡口出境2号相机'))
+    # print(dsr.invoke_with_tool_call('播放南卡口相机'))
    ## [{'name': 'play_video', 'args': {'camera_name': '北卡口1号道相机'}, 'id': 'call_apnb8fiqdkaz313katcs3tjf', 'type': 'tool_call'}]
-    # print(dsr.invoke_with_tool_call('将大屏切换为-1分屏'))
+    # dsr.invoke_with_tool_call('将大屏切换为-1分屏', 1)
    ## [{'name': 'split_screen', 'args': {'split_n': 2}, 'id': 'call_2o7c94f591xag8p6lcyice9q', 'type': 'tool_call'}]
    # print(dsr.invoke_with_tool('播放北卡口入境1号道录像,从今天到2025-03-16 02:09:31'))
    ## 由于大模型没有联网，所以无法判断‘今天’
@ -34,7 +34,10 @@ if __name__ == '__main__':
    ## [{'name': 'view_flight_details', 'args': {'airport_name': '成都天府机场', 'flight_code': 'K00', 'flight_date': '2004-01-01', 'ie_type': '入境'}, 'id': 'call_igummeorjq4r2pqjyr9tq6xq', 'type': 'tool_call'}]

    ##########  测试 多轮对话 #########
-    dsr.multi_round_with_stream('你是什么模型', 1)
-    dsr.multi_round_with_stream('你能做什么', 1)
-    dsr.multi_round_with_stream('我的上一个问题是什么?请直接返回问题,不要有多余输出及思考过程', 1)
-    dsr.multi_round_with_stream('我的第一个问题是什么?请直接返回问题,不要有多余输出及思考过程', 1)
+    # dsr.multi_with_stream('你是什么模型', 1)
+    # dsr.multi_with_stream('你能做什么', 1)
+    # dsr.multi_with_stream('我的上一个问题是什么?请直接返回问题,不要有多余输出及思考过程', 1)
+    # dsr.multi_with_stream('我的第一个问题是什么?请直接返回问题,不要有多余输出及思考过程', 1)
+
+    dsr.multi_with_tool_call_stream('播放南卡口相机', 1)
+    dsr.multi_with_tool_call_stream('1', 1)