''' ''' import json; import os; import sys; import pandas; import re; class ResultsAnalysis: results = {}; output = {}; imports = {}; ''' Get the final verdicts by the LLM. Returns: (dict) the verdicts ''' def analyse(self): self.results['verdicts'] = {}; for TYPE_NAME in list(self.imports['data'].keys()): self.results['verdicts'][TYPE_NAME] = {}; # Loop through each source for SOURCE_NAME in list(self.imports['data'][TYPE_NAME].keys()): self.results['verdicts'][TYPE_NAME][SOURCE_NAME] = pandas.DataFrame(columns=list(self.imports['models'].keys())); # Get the response of each LLM. for RESPONSES_ALL in self.imports['data'][TYPE_NAME][SOURCE_NAME]: RESPONSES_ROW = []; for RESPONSES in RESPONSES_ALL.values(): # Split each fragment. LASTLINE_FRAGMENTS = re.split(r'[-/ \s]+', re.sub('\t', ' ', RESPONSES[-1]).lower().strip()); # Flag VALID = False; if ('maybe' in LASTLINE_FRAGMENTS or 'or' in LASTLINE_FRAGMENTS): RESPONSES_ROW.append(''); else: while (len(LASTLINE_FRAGMENTS) and not(VALID)): FRAGMENT = LASTLINE_FRAGMENTS.pop(0); if (FRAGMENT in ['human', 'professional', 'expert', 'ai', 'llm', 'model']): VALID = True; if (FRAGMENT in ['human', 'professional', 'expert']): RESPONSES_ROW.append('human'); elif (FRAGMENT in ['ai', 'llm', 'model']): RESPONSES_ROW.append('AI'); if (not(VALID)): RESPONSES_ROW.append(False); self.results['verdicts'][TYPE_NAME][SOURCE_NAME].loc[len(self.results['verdicts'][TYPE_NAME][SOURCE_NAME])] = RESPONSES_ROW; return (self.results['verdicts']); ''' Determine the accuracy of the responses. Parameters: RESPONSES (dict): The expected responses and the actual responses Returns: (dict) the accuracy ''' def grade(self): self.results['accuracy'] = pandas.DataFrame(columns=['model name', 'dataname', 'expected', 'human', 'AI', 'unsure', 'invalid']); for MODEL_NAME in list(self.imports['models'].keys()): for AUTHOR_TYPE in list(self.results['verdicts']): for DATANAME in list(self.results['verdicts'][AUTHOR_TYPE]): ROW = [MODEL_NAME, DATANAME, AUTHOR_TYPE, self.results['verdicts'][AUTHOR_TYPE][DATANAME][MODEL_NAME].value_counts().get('human', 0), self.results['verdicts'][AUTHOR_TYPE][DATANAME][MODEL_NAME].value_counts().get('AI', 0), self.results['verdicts'][AUTHOR_TYPE][DATANAME][MODEL_NAME].value_counts().get('', 0), self.results['verdicts'][AUTHOR_TYPE][DATANAME][MODEL_NAME].value_counts().get(False, 0)]; self.results['accuracy'].loc[len(self.results['accuracy'])] = ROW; return (self.results['accuracy']); def export(self, PATH): self.results['accuracy'].to_csv(PATH); ''' Import the necessary files. Parameters: PARAMETERS: the input files ''' def __import(self, PARAMETERS): CONFIG = {'data': 1, 'models': 2}; for NAME in list(CONFIG.keys()): self.imports[NAME] = json.load(open(PARAMETERS[CONFIG[NAME]])); ''' Begin the results analysis. Parameters: PARAMETERS: launch parameters ''' def __init__(self, PARAMETERS): if (not(len(PARAMETERS) == 4)): raise ImportError("You are only allowed to have the following (in order): \n\t1.\tresponses file\n\t2.\tthe models file, and\n\t3.\tthe output file. "); # Select the paths. self.__import(PARAMETERS); self.output['path'] = PARAMETERS[3]; # Analyse the responses. self.analyse(); self.grade(); print(self.results['accuracy']); # Export the responses. self.export(self.output['path']); if (__name__ == "__main__"): ResultsAnalysis(sys.argv);