76 lines
2.3 KiB
Python
76 lines
2.3 KiB
Python
from typing import Optional
|
|
from decimal import Decimal
|
|
from dashscope import MultiModalConversation
|
|
from models.logger import setup_logger
|
|
|
|
logger = setup_logger()
|
|
|
|
class Generator:
|
|
def __init__(self, api_key: str, model: str = 'qwen3-vl-plus', text: str = '使用简洁语言描述该表情包 ,例如 在吗 ,生气,?'):
|
|
self.api_key = api_key
|
|
self.model = model
|
|
self.text = text
|
|
|
|
def process_local_image(self, image_path: str):
|
|
"""处理本地单张图片"""
|
|
try:
|
|
image_url = f"file://{image_path}"
|
|
messages = [
|
|
{
|
|
'role': 'user',
|
|
'content': [
|
|
{'image': image_url},
|
|
{'text': self.text}
|
|
]
|
|
}
|
|
]
|
|
|
|
response = MultiModalConversation.call(
|
|
model=self.model,
|
|
messages=messages
|
|
)
|
|
|
|
return response
|
|
|
|
except Exception as e:
|
|
logger.error(f"API调用失败: {e}")
|
|
return None
|
|
|
|
def process_link_image(self, image_url: str):
|
|
"""处理链接图片"""
|
|
try:
|
|
messages = [
|
|
{
|
|
'role': 'user',
|
|
'content': [
|
|
{'image': image_url},
|
|
{'text': self.text}
|
|
]
|
|
}
|
|
]
|
|
|
|
response = MultiModalConversation.call(
|
|
model=self.model,
|
|
messages=messages
|
|
)
|
|
|
|
return response
|
|
|
|
except Exception as e:
|
|
logger.error(f"API调用失败: {e}")
|
|
return None
|
|
|
|
def get_data(self, image_string: str) -> Optional[tuple[str, tuple]]:
|
|
"""处理图片"""
|
|
if image_string.startswith(('http://', 'https://')):
|
|
response = self.process_link_image(image_string)
|
|
else:
|
|
response = self.process_local_image(image_string)
|
|
|
|
if isinstance(response.output.choices[0].message.content[0], dict):
|
|
text = response.output.choices[0].message.content[0].get("text", "")
|
|
tokens = (response.usage.input_tokens, response.usage.output_tokens)
|
|
return text, tokens
|
|
|
|
return None
|