Skip to content

Commit ff580fa

Browse files
committed
add qwen-audio support and support env for default settings
1 parent 6037e4e commit ff580fa

File tree

12 files changed

+105
-35
lines changed

12 files changed

+105
-35
lines changed

‎examples/assistant_audio.py‎

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
from qwen_agent.agents import Assistant
2+
from qwen_agent.gui import WebUI
3+
4+
5+
def test():
6+
bot = Assistant(llm={'model_type': 'qwenaudio_dashscope', 'model': 'qwen-audio-turbo-latest'})
7+
messages = [{
8+
'role':
9+
'user',
10+
'content': [{
11+
'audio': 'https://dashscope.oss-cn-beijing.aliyuncs.com/audios/welcome.mp3'
12+
}, {
13+
'text': '这段音频在说什么?'
14+
}]
15+
}]
16+
for rsp in bot.run(messages):
17+
print(rsp)
18+
19+
20+
def app_gui():
21+
# Define the agent
22+
bot = Assistant(llm={'model': 'qwen-audio-turbo-latest'})
23+
WebUI(bot).run()
24+
25+
26+
if __name__ == '__main__':
27+
# test()
28+
app_gui()

‎qwen_agent/agents/assistant.py‎

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,8 @@
11
import copy
22
import datetime
3+
import json
34
from typing import Dict, Iterator, List, Literal, Optional, Union
45

5-
import json5
6-
76
from qwen_agent.agents.fncall_agent import FnCallAgent
87
from qwen_agent.llm import BaseChatModel
98
from qwen_agent.llm.schema import CONTENT, DEFAULT_SYSTEM_MESSAGE, ROLE, SYSTEM, Message
@@ -41,7 +40,7 @@ def format_knowledge_to_source_and_content(result: Union[str, List[dict]]) -> Li
4140
if isinstance(result, str):
4241
result = f'{result}'.strip()
4342
try:
44-
docs = json5.loads(result)
43+
docs = json.loads(result)
4544
except Exception:
4645
print_traceback()
4746
knowledge.append({'source': '上传的文档', 'content': result})

‎qwen_agent/agents/dialogue_retrieval_agent.py‎

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ def _run(self,
7070
new_content = [ContentItem(text=query), ContentItem(file=file_path)]
7171
if isinstance(messages[-1].content, list):
7272
for item in messages[-1].content:
73-
if item.file or item.image:
73+
if item.file or item.image or item.audio:
7474
new_content.append(item)
7575
new_messages.append(Message(role=USER, content=new_content))
7676

‎qwen_agent/gui/web_ui.py‎

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from qwen_agent.agents.user_agent import PENDING_USER_INPUT
88
from qwen_agent.gui.gradio_utils import format_cover_html
99
from qwen_agent.gui.utils import convert_fncall_to_text, convert_history_to_chatbot, get_avatar_image
10-
from qwen_agent.llm.schema import CONTENT, FILE, IMAGE, NAME, ROLE, USER, Message
10+
from qwen_agent.llm.schema import AUDIO, CONTENT, FILE, IMAGE, NAME, ROLE, USER, Message
1111
from qwen_agent.log import logger
1212
from qwen_agent.utils.utils import print_traceback
1313

@@ -212,6 +212,8 @@ def add_text(self, _input, _chatbot, _history):
212212
for file in _input.files:
213213
if file.mime_type.startswith('image/'):
214214
_history[-1][CONTENT].append({IMAGE: 'file://' + file.path})
215+
elif file.mime_type.startswith('audio/'):
216+
_history[-1][CONTENT].append({AUDIO: 'file://' + file.path})
215217
else:
216218
_history[-1][CONTENT].append({FILE: file.path})
217219

‎qwen_agent/llm/__init__.py‎

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from .oai import TextChatAtOAI
77
from .openvino import OpenVINO
88
from .qwen_dashscope import QwenChatAtDS
9+
from .qwenaudio_dashscope import QwenAudioChatAtDS
910
from .qwenvl_dashscope import QwenVLChatAtDS
1011
from .qwenvl_oai import QwenVLChatAtOAI
1112

@@ -62,11 +63,15 @@ def get_chat_model(cfg: Union[dict, str] = 'qwen-plus') -> BaseChatModel:
6263

6364
model = cfg.get('model', '')
6465

65-
if 'qwen-vl' in model:
66+
if '-vl' in model.lower():
6667
model_type = 'qwenvl_dashscope'
6768
return LLM_REGISTRY[model_type](cfg)
6869

69-
if 'qwen' in model:
70+
if '-audio' in model.lower():
71+
model_type = 'qwenaudio_dashscope'
72+
return LLM_REGISTRY[model_type](cfg)
73+
74+
if 'qwen' in model.lower():
7075
model_type = 'qwen_dashscope'
7176
return LLM_REGISTRY[model_type](cfg)
7277

@@ -80,6 +85,7 @@ def get_chat_model(cfg: Union[dict, str] = 'qwen-plus') -> BaseChatModel:
8085
'TextChatAtAzure',
8186
'QwenVLChatAtDS',
8287
'QwenVLChatAtOAI',
88+
'QwenAudioChatAtDS',
8389
'OpenVINO',
8490
'get_chat_model',
8591
'ModelServiceError',
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
from typing import Dict, Optional
2+
3+
from qwen_agent.llm.base import register_llm
4+
from qwen_agent.llm.qwenvl_dashscope import QwenVLChatAtDS
5+
6+
7+
@register_llm('qwenaudio_dashscope')
8+
class QwenAudioChatAtDS(QwenVLChatAtDS):
9+
10+
def __init__(self, cfg: Optional[Dict] = None):
11+
super().__init__(cfg)
12+
self.model = self.model or 'qwen-audio-turbo-latest'

‎qwen_agent/llm/qwenvl_dashscope.py‎

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,22 @@ def _format_local_files(messages: List[Message]) -> List[Message]:
9393
fname = fname.replace('\\', '/')
9494
fname = 'file://' + fname
9595
item.image = fname
96+
if item.audio:
97+
fname = item.audio
98+
if not fname.startswith((
99+
'http://',
100+
'https://',
101+
'file://',
102+
'data:', # base64 such as f"data:image/jpg;base64,{image_base64}"
103+
)):
104+
if fname.startswith('~'):
105+
fname = os.path.expanduser(fname)
106+
fname = os.path.abspath(fname)
107+
if os.path.isfile(fname):
108+
if re.match(r'^[A-Za-z]:\\', fname):
109+
fname = fname.replace('\\', '/')
110+
fname = 'file://' + fname
111+
item.audio = fname
96112
return messages
97113

98114

‎qwen_agent/llm/schema.py‎

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515

1616
FILE = 'file'
1717
IMAGE = 'image'
18+
AUDIO = 'audio'
1819

1920

2021
class BaseModelCompatibleDict(BaseModel):
@@ -64,9 +65,14 @@ class ContentItem(BaseModelCompatibleDict):
6465
text: Optional[str] = None
6566
image: Optional[str] = None
6667
file: Optional[str] = None
68+
audio: Optional[str] = None
6769

68-
def __init__(self, text: Optional[str] = None, image: Optional[str] = None, file: Optional[str] = None):
69-
super().__init__(text=text, image=image, file=file)
70+
def __init__(self,
71+
text: Optional[str] = None,
72+
image: Optional[str] = None,
73+
file: Optional[str] = None,
74+
audio: Optional[str] = None):
75+
super().__init__(text=text, image=image, file=file, audio=audio)
7076

7177
@model_validator(mode='after')
7278
def check_exclusivity(self):
@@ -77,21 +83,23 @@ def check_exclusivity(self):
7783
provided_fields += 1
7884
if self.file:
7985
provided_fields += 1
86+
if self.audio:
87+
provided_fields += 1
8088

8189
if provided_fields != 1:
82-
raise ValueError("Exactly one of 'text', 'image', or 'file' must be provided.")
90+
raise ValueError("Exactly one of 'text', 'image', 'file', or 'audio' must be provided.")
8391
return self
8492

8593
def __repr__(self):
8694
return f'ContentItem({self.model_dump()})'
8795

88-
def get_type_and_value(self) -> Tuple[Literal['text', 'image', 'file'], str]:
96+
def get_type_and_value(self) -> Tuple[Literal['text', 'image', 'file', 'audio'], str]:
8997
(t, v), = self.model_dump().items()
90-
assert t in ('text', 'image', 'file')
98+
assert t in ('text', 'image', 'file', 'audio')
9199
return t, v
92100

93101
@property
94-
def type(self) -> Literal['text', 'image', 'file']:
102+
def type(self) -> Literal['text', 'image', 'file', 'audio']:
95103
t, v = self.get_type_and_value()
96104
return t
97105

‎qwen_agent/settings.py‎

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,25 @@
1+
import ast
2+
import os
13
from typing import List, Literal
24

35
# Settings for LLMs
4-
DEFAULT_MAX_INPUT_TOKENS: int = 28000 # The LLM will truncate the input messages if they exceed this limit
6+
DEFAULT_MAX_INPUT_TOKENS: int = int(os.getenv(
7+
'QWEN_AGENT_DEFAULT_MAX_INPUT_TOKENS', 30000)) # The LLM will truncate the input messages if they exceed this limit
58

69
# Settings for agents
7-
MAX_LLM_CALL_PER_RUN: int = 8
10+
MAX_LLM_CALL_PER_RUN: int = int(os.getenv('QWEN_AGENT_MAX_LLM_CALL_PER_RUN', 8))
811

912
# Settings for tools
10-
DEFAULT_WORKSPACE: str = 'workspace'
13+
DEFAULT_WORKSPACE: str = os.getenv('QWEN_AGENT_DEFAULT_WORKSPACE', 'workspace')
1114

1215
# Settings for RAG
13-
DEFAULT_MAX_REF_TOKEN: int = 4000 # The window size reserved for RAG materials
14-
DEFAULT_PARSER_PAGE_SIZE: int = 500 # Max tokens per chunk when doing RAG
16+
DEFAULT_MAX_REF_TOKEN: int = int(os.getenv('QWEN_AGENT_DEFAULT_MAX_REF_TOKEN',
17+
20000)) # The window size reserved for RAG materials
18+
DEFAULT_PARSER_PAGE_SIZE: int = int(os.getenv('QWEN_AGENT_DEFAULT_PARSER_PAGE_SIZE',
19+
500)) # Max tokens per chunk when doing RAG
1520
DEFAULT_RAG_KEYGEN_STRATEGY: Literal['None', 'GenKeyword', 'SplitQueryThenGenKeyword', 'GenKeywordWithKnowledge',
16-
'SplitQueryThenGenKeywordWithKnowledge'] = 'SplitQueryThenGenKeyword'
17-
DEFAULT_RAG_SEARCHERS: List[str] = ['keyword_search', 'front_page_search'] # Sub-searchers for hybrid retrieval
21+
'SplitQueryThenGenKeywordWithKnowledge'] = os.getenv(
22+
'QWEN_AGENT_DEFAULT_RAG_KEYGEN_STRATEGY', 'GenKeyword')
23+
DEFAULT_RAG_SEARCHERS: List[str] = ast.literal_eval(
24+
os.getenv('QWEN_AGENT_DEFAULT_RAG_SEARCHERS',
25+
"['keyword_search', 'front_page_search']")) # Sub-searchers for hybrid retrieval

‎qwen_agent/tools/doc_parser.py‎

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
import time
55
from typing import Dict, List, Optional, Union
66

7-
import json5
87
from pydantic import BaseModel
98

109
from qwen_agent.log import logger
@@ -90,12 +89,7 @@ def call(self, params: Union[str, dict], **kwargs) -> dict:
9089
try:
9190
# Directly load the chunked doc
9291
record = self.db.get(cached_name_chunking)
93-
try:
94-
record = json5.loads(record)
95-
except ValueError:
96-
logger.warning(
97-
f'Encountered ValueError raised by json5. Fall back to json. File: {cached_name_chunking}')
98-
record = json.loads(record)
92+
record = json.loads(record)
9993
logger.info(f'Read chunked {url} from cache.')
10094
return record
10195
except KeyNotExistsError:

0 commit comments

Comments
 (0)