add: testing program
This script contains prompt generation and LLM testing.
This commit is contained in:
parent
c9efe75b77
commit
f5c6380b77
1 changed files with 273 additions and 0 deletions
273
tests/testing.PY
Executable file
273
tests/testing.PY
Executable file
|
@ -0,0 +1,273 @@
|
|||
#! /Library/Frameworks/Python.framework/Versions/3.12/bin/python3
|
||||
# describe.py
|
||||
# To fulfill the test of asking the LLMs to describe
|
||||
|
||||
# Import modules.
|
||||
import ollama;
|
||||
import json;
|
||||
import os;
|
||||
import datetime;
|
||||
|
||||
# Add source files.
|
||||
IMPORTED = {'Strings': "data/datasets/strings.JSON", 'Prompts': "tests/config/prompts.json", 'Models': 'tests/config/models.JSON'}
|
||||
|
||||
# Set up the main variables.
|
||||
RESPONSES = {};
|
||||
PROMPTS = {};
|
||||
RESULTS = {};
|
||||
|
||||
# This is the testing configuration.
|
||||
TEST_CONFIG = {};
|
||||
|
||||
# Read the files.
|
||||
for NAME in list(IMPORTED.keys()):
|
||||
try:
|
||||
DATA = json.load(open(IMPORTED[NAME]))
|
||||
except:
|
||||
DATA = open(IMPORTED[NAME]).read()
|
||||
|
||||
IMPORTED[NAME] = DATA;
|
||||
|
||||
# Download the models.
|
||||
def download_models():
|
||||
for MODEL_NAME in IMPORTED["Models"].keys():
|
||||
MODEL_ID = IMPORTED["Models"][MODEL_NAME];
|
||||
ollama.pull(MODEL_ID);
|
||||
|
||||
# Let the user choose the testing type.
|
||||
def select_testing_type():
|
||||
if not('CoT' in list(TEST_CONFIG.keys())):
|
||||
RESPONSE = abs(int(input("Type “1” for a chain-of-thought test or “0” for a classic test: ")));
|
||||
TEST_CONFIG['CoT'] = RESPONSE > 0;
|
||||
|
||||
if not('multi-shot' in list(TEST_CONFIG.keys())):
|
||||
RESPONSE = abs(int(input("Type “1” for a multi-shot test or “0” for a zero-shot test: ")));
|
||||
TEST_CONFIG['multi-shot'] = RESPONSE > 0;
|
||||
|
||||
for CONFIGURATION_TYPE in list(TEST_CONFIG.keys()):
|
||||
if (TEST_CONFIG[CONFIGURATION_TYPE]):
|
||||
print(f"{CONFIGURATION_TYPE}:\tEnabled")
|
||||
else:
|
||||
print(f"{CONFIGURATION_TYPE}:\tDisabled")
|
||||
|
||||
return (TEST_CONFIG);
|
||||
|
||||
'''
|
||||
Check for questions cache.
|
||||
Returns: cache validity
|
||||
'''
|
||||
def test_questions_cache():
|
||||
CACHE_VALID = False;
|
||||
|
||||
# Check for the validity of the cache files
|
||||
SOURCES = ["tests/cache/config.JSON", "tests/cache/prompts.JSON"];
|
||||
for FILE_NAME in SOURCES:
|
||||
CACHE_VALID = os.path.isfile(FILE_NAME);
|
||||
|
||||
if (not(CACHE_VALID)):
|
||||
break;
|
||||
|
||||
if (CACHE_VALID):
|
||||
try:
|
||||
# Open the last configuration file and check if the used testing configurations are equal.
|
||||
CACHED_DATA = json.load(open('tests/cache/config.JSON'));
|
||||
CACHE_VALID = CACHED_DATA == TEST_CONFIG;
|
||||
except:
|
||||
CACHE_VALID = False;
|
||||
|
||||
if (not(CACHE_VALID)):
|
||||
for FILE_NAME in ["tests/cache/config.JSON", "tests/cache/prompts.JSON", "tests/cache/responses.JSON"]:
|
||||
if (os.path.isfile(FILE_NAME)):
|
||||
# Remove the invalid caches.
|
||||
os.remove(FILE_NAME);
|
||||
|
||||
return (CACHE_VALID);
|
||||
|
||||
'''
|
||||
Format the questions.
|
||||
|
||||
Returns: (dict) the prompts
|
||||
'''
|
||||
def format_questions():
|
||||
CACHE_USABLE = test_questions_cache();
|
||||
|
||||
if (CACHE_USABLE):
|
||||
print("Using cache.");
|
||||
IMPORTED['Cached Prompts'] = json.load(open("tests/cache/prompts.JSON"));
|
||||
for TYPE_NAME in list(IMPORTED['Strings']['testing'].keys()):
|
||||
if (type(TYPE_NAME) is str):
|
||||
PROMPTS[TYPE_NAME.strip()] = IMPORTED['Cached Prompts'][TYPE_NAME];
|
||||
else:
|
||||
PROMPTS[TYPE_NAME] = IMPORTED['Cached Prompts'][TYPE_NAME];
|
||||
else:
|
||||
print("Generating questions…")
|
||||
|
||||
# Loop through each author type
|
||||
for TYPE_NAME in list(IMPORTED['Strings']['testing'].keys()):
|
||||
PROMPTS[TYPE_NAME] = {};
|
||||
|
||||
# Loop through each source
|
||||
for SOURCE_NAME in list(IMPORTED['Strings']['testing'][TYPE_NAME].keys()):
|
||||
PROMPTS[TYPE_NAME][SOURCE_NAME] = [];
|
||||
|
||||
for PASSAGE in IMPORTED['Strings']['testing'][TYPE_NAME][SOURCE_NAME]:
|
||||
PROMPT = "";
|
||||
|
||||
if TEST_CONFIG['multi-shot']:
|
||||
PROMPT = f"{IMPORTED['Prompts']["sample"]}"
|
||||
|
||||
for GENERATION_TYPE in IMPORTED['Strings']['training'].keys():
|
||||
for TEXT_NUMBER in range(len(IMPORTED['Strings']['training'][GENERATION_TYPE])):
|
||||
PROMPT = f"{PROMPT}\n\n{GENERATION_TYPE}-written #{str(TEXT_NUMBER + 1)}: \n“{'\n\n\t'.join(IMPORTED['Strings']['training'][GENERATION_TYPE][TEXT_NUMBER].strip().split("\n\n"))}”";
|
||||
|
||||
PROMPT = f"{PROMPT}\n\n{IMPORTED['Prompts']['bridge']}\n\n";
|
||||
|
||||
PROMPT = f"{PROMPT}{IMPORTED['Prompts']["introduction"]}\n\n“{'\n\t'.join(PASSAGE.strip().split("\n"))}”\n\n{IMPORTED['Prompts']["classify"]}"
|
||||
|
||||
PROMPTS[TYPE_NAME][SOURCE_NAME].append(PROMPT);
|
||||
|
||||
create_cache(exclude=['responses']);
|
||||
|
||||
return(IMPORTED['Prompts']);
|
||||
|
||||
def asking_execution():
|
||||
if (screen_asking()):
|
||||
ask_AI();
|
||||
save_responses();
|
||||
|
||||
'''
|
||||
This function will request the user’s final review before running the LLMs. This function also ensures that testing conditions are satisfied before executing.
|
||||
|
||||
Returns: (bool) user's proceed state
|
||||
'''
|
||||
def screen_asking():
|
||||
RESPONSE = '';
|
||||
|
||||
# Check the testing conditions.
|
||||
CONTINUE = len(list(IMPORTED['Models'].keys())) > 0;
|
||||
|
||||
if (CONTINUE):
|
||||
try:
|
||||
RESPONSE = input("\n\nDo you now want to begin interaction with the LLMs? \nThis process will take about 20 minutes. \n");
|
||||
except KeyboardInterrupt:
|
||||
CONTINUE = False;
|
||||
else:
|
||||
if ("n" in RESPONSE.lower().strip().rstrip('.').rstrip('!')):
|
||||
CONTINUE = False;
|
||||
else:
|
||||
print("No testing models configured. Change that configuration and run this script again once you're ready.")
|
||||
|
||||
return CONTINUE;
|
||||
|
||||
'''
|
||||
Ask the AI.
|
||||
|
||||
Parameters:
|
||||
models (array): the models to test
|
||||
Returns: (dict) the responses
|
||||
'''
|
||||
def ask_AI():
|
||||
# Loop through each author type
|
||||
for TYPE_NAME in list(PROMPTS.keys()):
|
||||
RESPONSES[TYPE_NAME] = {};
|
||||
|
||||
# Loop through each source
|
||||
for SOURCE_NAME in list(PROMPTS[TYPE_NAME].keys()):
|
||||
RESPONSES[TYPE_NAME][SOURCE_NAME] = [];
|
||||
|
||||
print("\n");
|
||||
TARGET_LENGTH = len(PROMPTS[TYPE_NAME][SOURCE_NAME]);
|
||||
|
||||
for PROMPT_NUMBER in range(TARGET_LENGTH - 1):
|
||||
print(f"\033[FAnswering prompt {PROMPT_NUMBER + 1} of {TARGET_LENGTH} from {TYPE_NAME} work in {SOURCE_NAME}…");
|
||||
PROMPT = PROMPTS[TYPE_NAME][SOURCE_NAME][PROMPT_NUMBER];
|
||||
MODEL_RESPONSES = {};
|
||||
|
||||
for MODEL_NAME in list(IMPORTED['Models'].keys()):
|
||||
# Get the model ID.
|
||||
MODEL_ID = IMPORTED['Models'][MODEL_NAME];
|
||||
|
||||
# Send update log.
|
||||
print(f"\033[FAnswering prompt {PROMPT_NUMBER + 1} of {TARGET_LENGTH} from {TYPE_NAME} work in {SOURCE_NAME} using {MODEL_NAME}…");
|
||||
|
||||
# Prepare the messages.
|
||||
MESSAGES = {};
|
||||
|
||||
# Set the messages.
|
||||
MESSAGES['User'] = [];
|
||||
MESSAGES['Model'] = [];
|
||||
MESSAGES['History'] = [];
|
||||
|
||||
# Add the order of the messages.
|
||||
MESSAGES['User'].append(PROMPT);
|
||||
(MESSAGES['User'].append(IMPORTED['Prompts']['judge'])) if (TEST_CONFIG['CoT']) else False;
|
||||
MESSAGES['User'].append(IMPORTED['Prompts']['answer format']);
|
||||
|
||||
for MESSAGE in (MESSAGES['User']):
|
||||
# Add the message.
|
||||
MESSAGES['History'].append({'role': 'user', 'content': MESSAGE});
|
||||
|
||||
MESSAGE_LAST = ((ollama.chat(model=MODEL_ID, messages=MESSAGES['History']))['message']['content']).strip("\t\n").strip();
|
||||
MESSAGES['Model'].append(MESSAGE_LAST);
|
||||
MESSAGES['History'].append({'role': 'assistant', 'content': MESSAGE_LAST});
|
||||
|
||||
|
||||
# Associate with the correct LLM model.
|
||||
del MESSAGES['User'];
|
||||
del MESSAGES['History'];
|
||||
MODEL_RESPONSES[MODEL_NAME] = MESSAGES['Model'];
|
||||
|
||||
# Append the messages.
|
||||
RESPONSES[TYPE_NAME][SOURCE_NAME].append(MODEL_RESPONSES);
|
||||
|
||||
# Cache the responses.
|
||||
create_cache(include=['responses']);
|
||||
|
||||
# Update the status.
|
||||
print(f"\033[FAnswered prompt {PROMPT_NUMBER + 1} of {TARGET_LENGTH} from {TYPE_NAME} work in {SOURCE_NAME}.");
|
||||
print(f"\033[FFinished answering all {TARGET_LENGTH} prompts consisting of {TYPE_NAME} work in {SOURCE_NAME}.");
|
||||
return False;
|
||||
|
||||
'''
|
||||
Generate a cache.
|
||||
|
||||
Parameters:
|
||||
exclude (list): Exclude items
|
||||
include (list): Include certain items
|
||||
'''
|
||||
def create_cache(**params):
|
||||
if (not('test config' in params['exclude']) if ('exclude' in list(params.keys())) else (('test config' in params['include']) if ('include' in list(params.keys())) else True)):
|
||||
save_data(dictionary=TEST_CONFIG, filename='tests/cache/config.JSON');
|
||||
if (not('prompts' in params['exclude']) if ('exclude' in list(params.keys())) else (('prompts' in params['include']) if ('include' in list(params.keys())) else True)):
|
||||
save_data(dictionary=PROMPTS, filename='tests/cache/prompts.JSON');
|
||||
if (not('responses' in params['exclude']) if ('exclude' in list(params.keys())) else (('responses' in params['include']) if ('include' in list(params.keys())) else True)):
|
||||
save_data(dictionary=RESPONSES, filename="tests/cache/responses.JSON");
|
||||
|
||||
'''
|
||||
Export the responses.
|
||||
'''
|
||||
def save_responses():
|
||||
save_data(dictionary=RESPONSES, filename=f"tests/outputs/responses{' multi-shot' if (TEST_CONFIG['multi-shot'] if 'multi-shot' in list(TEST_CONFIG.keys()) else False) else ''}{' CoT' if (TEST_CONFIG['CoT'] if 'CoT' in list(TEST_CONFIG.keys()) else False) else ''} {str(datetime.datetime.now().time())}.JSON");
|
||||
os.remove("tests/cache/responses.JSON");
|
||||
|
||||
'''
|
||||
Save the data.
|
||||
|
||||
Parameters:
|
||||
filename (str): The file name
|
||||
'''
|
||||
def save_data(**parameters):
|
||||
if (parameters['filename'].strip()):
|
||||
with open(parameters['filename'], 'w') as file:
|
||||
# print(f"Saving {parameters['filename']}…");
|
||||
json.dump(parameters['dictionary'], file);
|
||||
|
||||
|
||||
# Run the code.
|
||||
def main():
|
||||
select_testing_type();
|
||||
download_models();
|
||||
format_questions();
|
||||
asking_execution();
|
||||
|
||||
main();
|
Loading…
Add table
Add a link
Reference in a new issue