27 lines
699 B
Python
27 lines
699 B
Python
import pandas as pd
|
|
import json
|
|
|
|
# 载入CSV文件
|
|
df = pd.read_csv('qwen-test/coco-2024-dataset.csv')
|
|
conversations = []
|
|
|
|
|
|
# 添加对话数据
|
|
for i in range(len(df)):
|
|
conversations.append({
|
|
"id": f"identity_{i+1}",
|
|
"conversations": [
|
|
{
|
|
"from": "user",
|
|
"value": f"COCO Yes: <|vision_start|>{df.iloc[i]['image_path']}<|vision_end|>"
|
|
},
|
|
{
|
|
"from": "assistant",
|
|
"value": df.iloc[i]['caption']
|
|
}
|
|
]
|
|
})
|
|
|
|
# 保存为Json
|
|
with open('/root/PMN_WS/qwen-test/data_vl.json', 'w', encoding='utf-8') as f:
|
|
json.dump(conversations, f, ensure_ascii=False, indent=2) |