qwen_lora_test/csv2json.py

27 lines
699 B
Python

import pandas as pd
import json
# 载入CSV文件
df = pd.read_csv('qwen-test/coco-2024-dataset.csv')
conversations = []
# 添加对话数据
for i in range(len(df)):
conversations.append({
"id": f"identity_{i+1}",
"conversations": [
{
"from": "user",
"value": f"COCO Yes: <|vision_start|>{df.iloc[i]['image_path']}<|vision_end|>"
},
{
"from": "assistant",
"value": df.iloc[i]['caption']
}
]
})
# 保存为Json
with open('/root/PMN_WS/qwen-test/data_vl.json', 'w', encoding='utf-8') as f:
json.dump(conversations, f, ensure_ascii=False, indent=2)