diff --git a/models/generator.py b/models/generator.py index 865cdaa..03c2e8f 100644 --- a/models/generator.py +++ b/models/generator.py @@ -1,4 +1,3 @@ -import os from typing import Optional from dashscope import MultiModalConversation from models.logger import setup_logger @@ -6,12 +5,13 @@ from models.logger import setup_logger logger = setup_logger() class Generator: - def __init__(self, api_key: str, model: str = 'qwen3-vl-plus'): + def __init__(self, api_key: str, model: str = 'qwen3-vl-plus', text: str = '使用简洁语言描述该表情包 ,例如 在吗 ,生气,?'): self.api_key = api_key self.model = model + self.text = text - def process_single_image(self, image_path: str) -> Optional[str]: - """处理单张图片,生成描述词""" + def process_local_image(self, image_path: str) -> Optional[str]: + """处理本地单张图片,生成描述词""" try: image_url = f"file://{image_path}" messages = [ @@ -19,7 +19,7 @@ class Generator: 'role': 'user', 'content': [ {'image': image_url}, - {'text': '使用简洁语言描述该表情包 ,例如 在吗 ,生气,?'} + {'text': self.text} ] } ] @@ -43,4 +43,33 @@ class Generator: logger.error(f"API调用失败: {e}") return None - + def process_link_image(self, image_url: str) -> Optional[str]: + """处理链接图片,生成描述词""" + try: + messages = [ + { + 'role': 'user', + 'content': [ + {'image': image_url}, + {'text': self.text} + ] + } + ] + + response = MultiModalConversation.call( + model=self.model, + messages=messages + ) + + if response and hasattr(response, 'output'): + if hasattr(response.output.choices[0].message.content[0], 'text'): + return response.output.choices[0].message.content[0]["text"] + elif isinstance(response.output.choices[0].message.content[0], dict): + return response.output.choices[0].message.content[0].get("text", "") + return None + + except AttributeError as e: + raise AttributeError(e) + except Exception as e: + logger.error(f"API调用失败: {e}") + return None