123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168 |
- # -*- encoding: utf-8 -*-
- """
- @File : NovaImageTagObject.py
- @Time : 2025/8/29 09:03
- @Author : stephen
- @Email : zhangdongming@asj6.wecom.work
- @Software: PyCharm
- """
- import base64
- import imghdr
- import json
- import logging
- import re
- import boto3
- LOGGER = logging.getLogger('time')
- # --- 配置信息 ---
- MODEL_ID = "us.amazon.nova-lite-v1:0"
- class NovaImageTagObject(object):
- def __init__(self, aws_access_key_id, secret_access_key, region_name):
- self.bedrock = boto3.client(
- 'bedrock-runtime',
- aws_access_key_id=aws_access_key_id,
- aws_secret_access_key=secret_access_key,
- region_name=region_name
- )
- @staticmethod
- def safe_json_load(json_string):
- """
- 一个更健壮的JSON解析函数,尝试修复常见的模型输出格式问题。
- """
- try:
- # 寻找被代码块包围的JSON
- json_match = re.search(r'```json\s*([\s\S]*?)\s*```', json_string)
- if json_match:
- json_string = json_match.group(1)
- # 寻找常规的JSON对象或数组
- json_match = re.search(r'\{.*\}|\[.*\]', json_string, re.DOTALL)
- if json_match:
- json_string = json_match.group(0)
- return json.loads(json_string)
- except json.JSONDecodeError:
- LOGGER.error("JSON解析失败,尝试修复...")
- try:
- json_string = re.sub(r"(\w+):", r'"\1":', json_string)
- json_string = json_string.replace("'", '"')
- return json.loads(json_string)
- except Exception as e:
- LOGGER.error(f"无法解析模型返回的JSON: {e}")
- return None
- except Exception as e:
- LOGGER.error(f"发生未知解析错误: {e}")
- return None
- @staticmethod
- def format_and_convert_detections(nova_detections: list) -> list:
- """
- 将Nova模型返回的坐标转换为您指定的详细格式,包含原始坐标和Rekognition比例。
- """
- formatted_results = []
- if not isinstance(nova_detections, list):
- return []
- for item in nova_detections:
- if not isinstance(item, dict): continue
- label = list(item.keys())[0]
- nx1, ny1, nx2, ny2 = item[label]
- left = nx1 / 1000.0
- top = ny1 / 1000.0
- width = (nx2 - nx1) / 1000.0
- height = (ny2 - ny1) / 1000.0
- formatted_results.append({
- "x1": nx1, "x2": nx2, "y1": ny1, "y2": ny2,
- "Width": f"{width:.5f}", "Height": f"{height:.5f}",
- "Top": f"{top:.5f}", "Left": f"{left:.5f}",
- "class": label
- })
- return formatted_results
- def process_image_batch(self, base64_images: list, categories: list, uid=''):
- """
- 通过单次API调用处理一批图片,并返回结构化的检测结果。
- """
- if not base64_images:
- LOGGER.error(f"{uid}错误: 未提供图片数据。")
- return {}
- image_contents = []
- img_bytes_list = []
- for b64_image in base64_images:
- try:
- LOGGER.info('{}:{}'.format(uid, b64_image))
- img_bytes = base64.b64decode(b64_image)
- img_type = imghdr.what(None, h=img_bytes)
- if img_type.lower() not in ["jpeg", "jpg", "png", "webp"]:
- raise ValueError(f"不支持的图片格式: {img_type}")
- image_contents.append({"image": {"format": img_type, "source": {"bytes": img_bytes}}})
- img_bytes_list.append(img_bytes)
- except Exception as e:
- LOGGER.error(f"{uid}处理图片时出错,已跳过: {repr(e)}")
- img_bytes_list.append(None) # 添加占位符以保持索引一致
- if not image_contents:
- LOGGER.error(f"{uid}错误: 所有图片均无法处理。")
- return {}
- category_str = ", ".join([f'"{cat.lower()}"' for cat in categories])
- num_images = len(image_contents)
- # --- 关键改动:为多图片设计的全新Prompt ---
- prompt = f"""
- You have been provided with {num_images} images. Analyze each image sequentially.
- For each image, detect bounding boxes of objects from the following categories: {category_str}.
- Your output MUST be a single, valid JSON object.
- The keys of this object should be "image_0", "image_1", ..., "image_{num_images - 1}", corresponding to the first, second, and subsequent images provided.
- The value for each key must be a list of detected objects for that specific image. If no objects are detected in an image, the value should be an empty list [].
- Use a 1000x1000 coordinate system for the bounding boxes.
- Example output format for {num_images} images:
- {{
- "image_0": [{{"person": [100, 150, 200, 350]}}, {{"car": [400, 500, 600, 700]}}],
- "image_1": [],
- "image_2": [{{"package": [300, 300, 400, 400]}}]
- }}
- """
- messages = [{"role": "user", "content": image_contents + [{"text": prompt}]}]
- try:
- response = self.bedrock.converse(
- modelId=MODEL_ID,
- messages=messages,
- inferenceConfig={"temperature": 0.0, "maxTokens": 4096, "topP": 1.0},
- )
- model_output = response["output"]["message"]["content"][0]["text"]
- LOGGER.info(f"\n--- {uid}模型对整个批次的原始输出 ---\n{model_output}")
- # 解析模型返回的包含所有图片结果的JSON对象
- batch_results = self.safe_json_load(model_output)
- if not batch_results or not isinstance(batch_results, dict):
- LOGGER.error(f"{uid}模型未返回预期的字典格式结果。")
- return {}
- # --- 核心逻辑:将批处理结果映射回您的格式 ---
- final_output_dict = {}
- for i in range(len(base64_images)):
- # 从批处理结果中获取当前图片的数据,如果不存在则默认为空列表
- nova_detections = batch_results.get(f"image_{i}", [])
- # 转换为您最终需要的格式
- detailed_results = self.format_and_convert_detections(nova_detections)
- final_output_dict[f"file_{i}"] = detailed_results
- return final_output_dict
- except Exception as e:
- LOGGER.error(f"{uid}调用Bedrock模型或处理过程中发生错误: {repr(e)}")
- return {}
|