|
@@ -90,15 +90,21 @@ class NovaImageTagObject(object):
|
|
|
|
|
|
@staticmethod
|
|
|
def normalize_b64(b64_str: str) -> str:
|
|
|
- """清理并补齐base64字符串"""
|
|
|
+ """清理并验证base64字符串"""
|
|
|
if not b64_str:
|
|
|
return ""
|
|
|
+
|
|
|
+ # 移除可能的数据URL前缀
|
|
|
b64_str = re.sub(r"^data:image/[^;]+;base64,", "", b64_str)
|
|
|
- b64_str = b64_str.strip().replace("\n", "").replace(" ", "")
|
|
|
- # 补齐Base64填充
|
|
|
- padding = 4 - (len(b64_str) % 4)
|
|
|
- if padding and padding != 4:
|
|
|
- b64_str += "=" * padding
|
|
|
+
|
|
|
+ # 移除所有非Base64字符(包括空格、换行等)
|
|
|
+ b64_str = re.sub(r"[^A-Za-z0-9+/=]", "", b64_str)
|
|
|
+
|
|
|
+ # 检查Base64有效性
|
|
|
+ if len(b64_str) % 4 != 0:
|
|
|
+ # 自动补全填充位
|
|
|
+ b64_str += "=" * (4 - len(b64_str) % 4)
|
|
|
+
|
|
|
return b64_str
|
|
|
|
|
|
def process_image_batch(self, base64_images: list, categories: list, uid=''):
|
|
@@ -110,13 +116,23 @@ class NovaImageTagObject(object):
|
|
|
for idx, b64_image in enumerate(base64_images, start=1):
|
|
|
try:
|
|
|
# 规范化base64
|
|
|
+ original_b64 = b64_image # 保存原始值用于调试
|
|
|
b64_image = self.normalize_b64(b64_image)
|
|
|
+
|
|
|
if not b64_image:
|
|
|
raise ValueError("空的base64字符串")
|
|
|
|
|
|
+ # 调试输出
|
|
|
+ LOGGER.debug(f"{uid} 第{idx}张图处理前: {original_b64[:50]}...")
|
|
|
+ LOGGER.debug(f"{uid} 第{idx}张图处理后: {b64_image[:50]}...")
|
|
|
+
|
|
|
# 解码为二进制
|
|
|
img_bytes = base64.b64decode(b64_image)
|
|
|
|
|
|
+ # 验证解码后的数据
|
|
|
+ if len(img_bytes) == 0:
|
|
|
+ raise ValueError("解码后得到空字节数据")
|
|
|
+
|
|
|
# 使用PIL处理图像
|
|
|
image = Image.open(io.BytesIO(img_bytes))
|
|
|
|
|
@@ -127,18 +143,19 @@ class NovaImageTagObject(object):
|
|
|
# 转换为WebP格式
|
|
|
buffer = io.BytesIO()
|
|
|
image.save(buffer, format="webp", quality=90)
|
|
|
- img_bytes = buffer.getvalue()
|
|
|
+ webp_bytes = buffer.getvalue()
|
|
|
|
|
|
- # 直接传递二进制数据给Bedrock
|
|
|
image_contents.append({
|
|
|
- "image": {"format": "webp", "source": {"bytes": img_bytes}}
|
|
|
+ "image": {"format": "webp", "source": {"bytes": webp_bytes}}
|
|
|
})
|
|
|
|
|
|
- LOGGER.info(f"{uid} 第{idx}张图处理成功, 格式=webp, 大小={len(img_bytes)}B")
|
|
|
+ LOGGER.info(f"{uid} 第{idx}张图处理成功, 格式=webp, 大小={len(webp_bytes)}B")
|
|
|
|
|
|
except Exception as e:
|
|
|
- LOGGER.error(f"{uid} 第{idx}张图处理失败,已跳过: {repr(e)}")
|
|
|
- image_contents.append(None) # 添加占位符
|
|
|
+ LOGGER.error(f"{uid} 第{idx}张图处理失败: {repr(e)}")
|
|
|
+ LOGGER.debug(f"{uid}失败图像的Base64前100字符: {b64_image[:100]}")
|
|
|
+ # 不要添加None,而是跳过或使用占位符图像
|
|
|
+ continue # 直接跳过这张图
|
|
|
|
|
|
if not image_contents:
|
|
|
LOGGER.error(f"{uid}错误: 所有图片均无法处理。")
|