Python实现对比不同版本数据之间的好坏

发布 : 2017-06-15 分类 : Python 浏览 :
1
2
3
4
5
6
7
8
9
10
对测试集中人工进行标注的skill_type与程序中意图识别出的skill进行对比
如果skill相同,则score标为1
如果skill不相同,则score标为0
最后得出这个版本的skill评分结果集
迭代下一个版本的时候使用测试集再次跑出skill评分结果集
再将这次的评分结果集与上一次的评分结果集相对比
分别用Good/Same/Bad进行统计
Same:如果上一次的score与这一次的score相同,则same+1
Bad:如果上一次的score是1,这一次的score是0,则bad+1
Good:如果上一次的score是0,这一次的score是1,则good+1

测试集

Markdown

读写操作2007版的Excel

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
import xlrd
import xlwt

@main.route('/get_testset_score', methods=['GET', 'POST'])
def get_api():
# 打开execl
data = xlrd.open_workbook('/data/python_server/code/wanghaodi/lalabot/data/test/机器人测试集.xls')
write_content = xlwt.Workbook(encoding='utf8')
table = data.sheet_by_index(0)
test_list = []
for x, y in zip(table.col_values(0), table.col_values(1)):
print(x, y)
# 初始化用户
u = User("lalabot", "123456")
query = x
type = y
# 初始化session
session = Session(u, query)
session.select_skill_and_process()

skill_name = session.skill.__class__.__name__
if type.find("/"):
skill_list_label = type.split("/")
score_list = []
for skill_label in skill_list_label:
if session.skill.__class__.__name__ == skill_label:
score = "1"
else:
score = "0"
score_list.append(score)
if "1" in score_list:
test = session.get_last_interaction().user_query + "|" + skill_name + "|" + "1"
else:
test = session.get_last_interaction().user_query + "|" + skill_name + "|" + "0"
test_list.append(test)
else:
if skill_name == type:
test = session.get_last_interaction().user_query + "|" + skill_name + "|" + "1"
else:
test = session.get_last_interaction().user_query + "|" + skill_name + "|" + "0"
test_list.append(test)

print(test_list)

# 保存打分结果
write_Excel = write_content.add_sheet('打分结果', cell_overwrite_ok=True)

# 设置Excel中的字体
font = xlwt.Font() # Create the Font
font.name = '微软雅黑'

# 设置Excel中的字体大小
font.height = 200

# 设置写入Excel的样式
style = xlwt.XFStyle()
style.font = font

write_Excel.write(0, 0, "query", style)
write_Excel.write(0, 1, "type", style)
write_Excel.write(0, 2, "score", style)
s = 1
for i in test_list:
write_Excel.write(s, 0, i.split("|")[0], style)
write_Excel.write(s, 1, i.split("|")[1], style)
write_Excel.write(s, 2, i.split("|")[2], style)
s = s + 1
write_content.save('/data/python_server/code/wanghaodi/lalabot/data/test/机器人QA评分v1.xls')
return "test"

将打分结果保存到csv文件中

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
import csv
import codecs
@main.route('/get_type_score', methods=['GET', 'POST'])
def type_score():
data = xlrd.open_workbook('/data/python_server/code/wanghaodi/lalabot/data/test/机器人测试集.xls')
table = data.sheet_by_index(0)
test_list = []
i = 0
for x, y in zip(table.col_values(0), table.col_values(1)):
if i != 0:
u = User("lalabot", "123456")
query = x
type = y
session = Session(u, query)
session.select_skill_and_process()

skill_name = session.skill.__class__.__name__
if type.find("/"):
skill_list_label = type.split("/")
score_list = []
for skill_label in skill_list_label:
if session.skill.__class__.__name__ == skill_label:
score = "1"
else:
score = "0"
score_list.append(score)
if "1" in score_list:
test = session.get_last_interaction().user_query + "|" + skill_name + "|" + "1"
else:
test = session.get_last_interaction().user_query + "|" + skill_name + "|" + "0"
test_list.append(test)
else:
if skill_name == type:
test = session.get_last_interaction().user_query + "|" + skill_name + "|" + "1"
else:
test = session.get_last_interaction().user_query + "|" + skill_name + "|" + "0"
test_list.append(test)
i = i + 1

data_list = []
for i in test_list:
rows = {}
rows.update({'query': i.split("|")[0]})
rows.update({'type': i.split("|")[1]})
rows.update({'score': i.split("|")[2]})
data_list.append(rows)

headers = ['query', 'type', 'score']
num = 1
while os.path.exists('/data/python_server/code/wanghaodi/lalabot/data/test/机器人qa评分结果集v' + str(
num) + '.csv') == True:
if os.path.exists('/data/python_server/code/wanghaodi/lalabot/data/test/机器人qa评分结果集v' + str(
num + 1) + '.csv') != True:
with codecs.open('/data/python_server/code/wanghaodi/lalabot/data/test/机器人qa评分结果集v' + str(num + 1) + '.csv',
'w', 'utf_8_sig') as f:
f_csv = csv.DictWriter(f, headers)
f_csv.writeheader()
f_csv.writerows(data_list)
break
else:
num = num + 1

return "ok"

打分结果

Markdown

对比不同版本数据之间的好坏

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
import os
import csv
import codecs
@main.route('/contrast_score', methods=['GET', 'POST'])
def gsb():
filename = '机器人qa评分结果集v'
for root, dirs, files in os.walk('/data/python_server/code/wanghaodi/lalabot/data/test'):
print(files)
file_list = []
for i in files:
if filename in i:
file_list.append(i)
print(file_list)

file1_name = '/data/python_server/code/wanghaodi/lalabot/data/test/机器人qa评分结果集v' + str(len(file_list) - 1) + '.csv'
file2_name = '/data/python_server/code/wanghaodi/lalabot/data/test/机器人qa评分结果集v' + str(len(file_list)) + '.csv'
file1_list = []
file2_list = []
with open(file1_name) as f:
f_csv = csv.reader(f)
headers = next(f_csv)
for row in f_csv:
row[0].encode('utf-8')
row[1].encode('utf-8')
row[2].encode('utf-8')
file1_list.append(row[0] + "|" + row[1] + "|" + row[2])
with open(file2_name) as f:
f_csv = csv.reader(f)
headers = next(f_csv)
for row in f_csv:
row[0].encode('utf-8')
row[1].encode('utf-8')
row[2].encode('utf-8')
file2_list.append(row[0] + "|" + row[1] + "|" + row[2])
good = 0
same = 0
bad = 0
rows = {}
for x, y in zip(file1_list, file2_list):
rows = {}
if int(x.split("|")[2]) == int(y.split("|")[2]):
same = same + 1
if int(x.split("|")[2]) > int(y.split("|")[2]):
bad = bad + 1
if int(x.split("|")[2]) < int(y.split("|")[2]):
good = good + 1
rows.update({"good": good})
rows.update({"same": same})
rows.update({"bad": bad})

return fmt.format_final_response({"response": rows})
本文作者 : Matrix
原文链接 : https://matrixsparse.github.io/2017/06/15/Python实现对比不同版本数据之间的好坏/
版权声明 : 本博客所有文章除特别声明外,均采用 CC BY-NC-SA 4.0 许可协议。转载请注明出处!

知识 & 情怀 | 二者兼得

微信扫一扫, 向我投食

微信扫一扫, 向我投食

支付宝扫一扫, 向我投食

支付宝扫一扫, 向我投食

留下足迹