from typing import Optional from decimal import Decimal from dashscope import MultiModalConversation from models.logger import setup_logger logger = setup_logger() class Generator: def __init__(self, api_key: str, model: str = 'qwen3-vl-plus', text: str = '使用简洁语言描述该表情包 ,例如 在吗 ,生气,?'): self.api_key = api_key self.model = model self.text = text def process_local_image(self, image_path: str): """处理本地单张图片""" try: image_url = f"file://{image_path}" messages = [ { 'role': 'user', 'content': [ {'image': image_url}, {'text': self.text} ] } ] response = MultiModalConversation.call( model=self.model, messages=messages ) return response except Exception as e: logger.error(f"API调用失败: {e}") return None def process_link_image(self, image_url: str): """处理链接图片""" try: messages = [ { 'role': 'user', 'content': [ {'image': image_url}, {'text': self.text} ] } ] response = MultiModalConversation.call( model=self.model, messages=messages ) return response except Exception as e: logger.error(f"API调用失败: {e}") return None def get_data(self, image_string: str) -> Optional[tuple[str, tuple]]: """处理图片""" if image_string.startswith(('http://', 'https://')): response = self.process_link_image(image_string) else: response = self.process_local_image(image_string) if isinstance(response.output.choices[0].message.content[0], dict): text = response.output.choices[0].message.content[0].get("text", "") tokens = (response.usage.input_tokens, response.usage.output_tokens) return text, tokens return None