diff --git a/tests/outputs/results (CoT).CSV b/tests/outputs/results (CoT).CSV new file mode 100644 index 0000000..95b2c48 --- /dev/null +++ b/tests/outputs/results (CoT).CSV @@ -0,0 +1,13 @@ +,model name,dataname,expected,human,AI,unsure,invalid +0,Microsoft Phi,LDAG,human,13,14,12,20 +1,Microsoft Phi,HLTC,human,12,11,19,17 +2,Microsoft Phi,LDAG,AI,10,18,14,17 +3,Microsoft Phi,HLTC,AI,14,13,23,9 +4,Gemma 2,LDAG,human,35,24,0,0 +5,Gemma 2,HLTC,human,45,14,0,0 +6,Gemma 2,LDAG,AI,51,8,0,0 +7,Gemma 2,HLTC,AI,42,17,0,0 +8,Tinyllama,LDAG,human,11,4,34,10 +9,Tinyllama,HLTC,human,13,5,30,11 +10,Tinyllama,LDAG,AI,11,5,31,12 +11,Tinyllama,HLTC,AI,11,3,35,10 diff --git a/tests/outputs/results (Multi-Shot).CSV b/tests/outputs/results (Multi-Shot).CSV new file mode 100644 index 0000000..74264db --- /dev/null +++ b/tests/outputs/results (Multi-Shot).CSV @@ -0,0 +1,13 @@ +,model name,dataname,expected,human,AI,unsure,invalid +0,Microsoft Phi,LDAG,human,1,10,46,2 +1,Microsoft Phi,HLTC,human,7,6,44,2 +2,Microsoft Phi,LDAG,AI,7,4,43,5 +3,Microsoft Phi,HLTC,AI,4,7,47,1 +4,Gemma 2,LDAG,human,24,35,0,0 +5,Gemma 2,HLTC,human,48,11,0,0 +6,Gemma 2,LDAG,AI,49,10,0,0 +7,Gemma 2,HLTC,AI,48,11,0,0 +8,Tinyllama,LDAG,human,24,5,28,2 +9,Tinyllama,HLTC,human,17,3,36,3 +10,Tinyllama,LDAG,AI,14,3,37,5 +11,Tinyllama,HLTC,AI,17,1,39,2 diff --git a/tests/outputs/results (classic).CSV b/tests/outputs/results (classic).CSV new file mode 100644 index 0000000..afb562c --- /dev/null +++ b/tests/outputs/results (classic).CSV @@ -0,0 +1,13 @@ +,model name,dataname,expected,human,AI,unsure,invalid +0,Microsoft Phi,LDAG,human,3,4,48,4 +1,Microsoft Phi,HLTC,human,7,6,42,4 +2,Microsoft Phi,LDAG,AI,5,9,43,2 +3,Microsoft Phi,HLTC,AI,4,4,49,2 +4,Gemma 2,LDAG,human,38,21,0,0 +5,Gemma 2,HLTC,human,47,12,0,0 +6,Gemma 2,LDAG,AI,51,8,0,0 +7,Gemma 2,HLTC,AI,46,13,0,0 +8,Tinyllama,LDAG,human,13,3,37,6 +9,Tinyllama,HLTC,human,9,3,40,7 +10,Tinyllama,LDAG,AI,8,5,40,6 +11,Tinyllama,HLTC,AI,11,4,37,7