jjsos_JJdetection/check_dataset_match.py

39 lines
1.5 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

import os
def check_dataset_match(images_dir, labels_dir):
"""检查图片和标注文件数量是否匹配"""
# 获取图片文件列表
image_files = [f for f in os.listdir(images_dir) if f.lower().endswith('.jpg')]
# 获取标注文件列表排除classes.txt
label_files = [f for f in os.listdir(labels_dir)
if f.lower().endswith('.txt') and f != 'classes.txt']
# 提取文件名(不带扩展名)
image_names = {os.path.splitext(f)[0] for f in image_files}
label_names = {os.path.splitext(f)[0] for f in label_files}
# 找出不匹配的文件
missing_images = label_names - image_names
missing_labels = image_names - label_names
print(f"图片数量: {len(image_files)}")
print(f"标注数量: {len(label_files)}")
print(f"匹配状态: {'匹配' if len(image_files) == len(label_files) else '不匹配'}")
if missing_images:
print("\n缺少对应的图片文件:")
for name in sorted(missing_images):
print(f"- {name}.jpg")
if missing_labels:
print("\n缺少对应的标注文件:")
for name in sorted(missing_labels):
print(f"- {name}.txt")
if __name__ == "__main__":
images_dir = r'F:\myprojects\啾啾救援识别系统\JJCarDetection\datasets\猫狗\train\images'
labels_dir = r'F:\myprojects\啾啾救援识别系统\JJCarDetection\datasets\猫狗\train\labels'
print("正在检查数据集匹配情况...")
check_dataset_match(images_dir, labels_dir)
print("\n检查完成")