From 9ee26671b19a32b3195c1dfc4d5d0be06cdb2426 Mon Sep 17 00:00:00 2001 From: buzz-lightsnack-2007 <73412182+buzz-lightsnack-2007@users.noreply.github.com> Date: Thu, 22 Aug 2024 15:49:16 +0000 Subject: [PATCH] change local provider from gpt4all to ollama --- dependencies.txt | 2 +- main.ipynb | 128 ++++++++++++++++++++++++++++++++++++++++------- 2 files changed, 111 insertions(+), 19 deletions(-) diff --git a/dependencies.txt b/dependencies.txt index 1073d41..e882966 100644 --- a/dependencies.txt +++ b/dependencies.txt @@ -1,6 +1,6 @@ pip kaggle kagglehub -gpt4all +ollama google-generativeai pandas \ No newline at end of file diff --git a/main.ipynb b/main.ipynb index 94ad2da..a410f9c 100644 --- a/main.ipynb +++ b/main.ipynb @@ -27,13 +27,73 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "metadata": { "vscode": { "languageId": "shellscript" } }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: pip in /home/codespace/.python/current/lib/python3.12/site-packages (from -r dependencies.txt (line 1)) (24.2)\n", + "Requirement already satisfied: kaggle in /home/codespace/.python/current/lib/python3.12/site-packages (from -r dependencies.txt (line 2)) (1.6.17)\n", + "Requirement already satisfied: kagglehub in /home/codespace/.python/current/lib/python3.12/site-packages (from -r dependencies.txt (line 3)) (0.2.9)\n", + "Collecting ollama (from -r dependencies.txt (line 4))\n", + " Downloading ollama-0.3.1-py3-none-any.whl.metadata (3.8 kB)\n", + "Requirement already satisfied: google-generativeai in /home/codespace/.python/current/lib/python3.12/site-packages (from -r dependencies.txt (line 5)) (0.7.2)\n", + "Requirement already satisfied: pandas in /home/codespace/.local/lib/python3.12/site-packages (from -r dependencies.txt (line 6)) (2.2.2)\n", + "Requirement already satisfied: six>=1.10 in /home/codespace/.local/lib/python3.12/site-packages (from kaggle->-r dependencies.txt (line 2)) (1.16.0)\n", + "Requirement already satisfied: certifi>=2023.7.22 in /home/codespace/.local/lib/python3.12/site-packages (from kaggle->-r dependencies.txt (line 2)) (2024.7.4)\n", + "Requirement already satisfied: python-dateutil in /home/codespace/.local/lib/python3.12/site-packages (from kaggle->-r dependencies.txt (line 2)) (2.9.0.post0)\n", + "Requirement already satisfied: requests in /home/codespace/.local/lib/python3.12/site-packages (from kaggle->-r dependencies.txt (line 2)) (2.32.3)\n", + "Requirement already satisfied: tqdm in /home/codespace/.python/current/lib/python3.12/site-packages (from kaggle->-r dependencies.txt (line 2)) (4.66.5)\n", + "Requirement already satisfied: python-slugify in /home/codespace/.python/current/lib/python3.12/site-packages (from kaggle->-r dependencies.txt (line 2)) (8.0.4)\n", + "Requirement already satisfied: urllib3 in /home/codespace/.local/lib/python3.12/site-packages (from kaggle->-r dependencies.txt (line 2)) (2.2.2)\n", + "Requirement already satisfied: bleach in /home/codespace/.local/lib/python3.12/site-packages (from kaggle->-r dependencies.txt (line 2)) (6.1.0)\n", + "Requirement already satisfied: packaging in /home/codespace/.local/lib/python3.12/site-packages (from kagglehub->-r dependencies.txt (line 3)) (24.1)\n", + "Requirement already satisfied: httpx<0.28.0,>=0.27.0 in /home/codespace/.local/lib/python3.12/site-packages (from ollama->-r dependencies.txt (line 4)) (0.27.0)\n", + "Requirement already satisfied: google-ai-generativelanguage==0.6.6 in /home/codespace/.python/current/lib/python3.12/site-packages (from google-generativeai->-r dependencies.txt (line 5)) (0.6.6)\n", + "Requirement already satisfied: google-api-core in /home/codespace/.python/current/lib/python3.12/site-packages (from google-generativeai->-r dependencies.txt (line 5)) (2.19.1)\n", + "Requirement already satisfied: google-api-python-client in /home/codespace/.python/current/lib/python3.12/site-packages (from google-generativeai->-r dependencies.txt (line 5)) (2.142.0)\n", + "Requirement already satisfied: google-auth>=2.15.0 in /home/codespace/.python/current/lib/python3.12/site-packages (from google-generativeai->-r dependencies.txt (line 5)) (2.34.0)\n", + "Requirement already satisfied: protobuf in /home/codespace/.python/current/lib/python3.12/site-packages (from google-generativeai->-r dependencies.txt (line 5)) (4.25.4)\n", + "Requirement already satisfied: pydantic in /home/codespace/.python/current/lib/python3.12/site-packages (from google-generativeai->-r dependencies.txt (line 5)) (2.8.2)\n", + "Requirement already satisfied: typing-extensions in /home/codespace/.local/lib/python3.12/site-packages (from google-generativeai->-r dependencies.txt (line 5)) (4.12.2)\n", + "Requirement already satisfied: proto-plus<2.0.0dev,>=1.22.3 in /home/codespace/.python/current/lib/python3.12/site-packages (from google-ai-generativelanguage==0.6.6->google-generativeai->-r dependencies.txt (line 5)) (1.24.0)\n", + "Requirement already satisfied: numpy>=1.26.0 in /home/codespace/.local/lib/python3.12/site-packages (from pandas->-r dependencies.txt (line 6)) (2.0.1)\n", + "Requirement already satisfied: pytz>=2020.1 in /home/codespace/.local/lib/python3.12/site-packages (from pandas->-r dependencies.txt (line 6)) (2024.1)\n", + "Requirement already satisfied: tzdata>=2022.7 in /home/codespace/.local/lib/python3.12/site-packages (from pandas->-r dependencies.txt (line 6)) (2024.1)\n", + "Requirement already satisfied: googleapis-common-protos<2.0.dev0,>=1.56.2 in /home/codespace/.python/current/lib/python3.12/site-packages (from google-api-core->google-generativeai->-r dependencies.txt (line 5)) (1.63.2)\n", + "Requirement already satisfied: cachetools<6.0,>=2.0.0 in /home/codespace/.python/current/lib/python3.12/site-packages (from google-auth>=2.15.0->google-generativeai->-r dependencies.txt (line 5)) (5.5.0)\n", + "Requirement already satisfied: pyasn1-modules>=0.2.1 in /home/codespace/.python/current/lib/python3.12/site-packages (from google-auth>=2.15.0->google-generativeai->-r dependencies.txt (line 5)) (0.4.0)\n", + "Requirement already satisfied: rsa<5,>=3.1.4 in /home/codespace/.python/current/lib/python3.12/site-packages (from google-auth>=2.15.0->google-generativeai->-r dependencies.txt (line 5)) (4.9)\n", + "Requirement already satisfied: anyio in /home/codespace/.local/lib/python3.12/site-packages (from httpx<0.28.0,>=0.27.0->ollama->-r dependencies.txt (line 4)) (4.4.0)\n", + "Requirement already satisfied: httpcore==1.* in /home/codespace/.local/lib/python3.12/site-packages (from httpx<0.28.0,>=0.27.0->ollama->-r dependencies.txt (line 4)) (1.0.5)\n", + "Requirement already satisfied: idna in /home/codespace/.local/lib/python3.12/site-packages (from httpx<0.28.0,>=0.27.0->ollama->-r dependencies.txt (line 4)) (3.7)\n", + "Requirement already satisfied: sniffio in /home/codespace/.local/lib/python3.12/site-packages (from httpx<0.28.0,>=0.27.0->ollama->-r dependencies.txt (line 4)) (1.3.1)\n", + "Requirement already satisfied: h11<0.15,>=0.13 in /home/codespace/.local/lib/python3.12/site-packages (from httpcore==1.*->httpx<0.28.0,>=0.27.0->ollama->-r dependencies.txt (line 4)) (0.14.0)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /home/codespace/.local/lib/python3.12/site-packages (from requests->kaggle->-r dependencies.txt (line 2)) (3.3.2)\n", + "Requirement already satisfied: webencodings in /home/codespace/.local/lib/python3.12/site-packages (from bleach->kaggle->-r dependencies.txt (line 2)) (0.5.1)\n", + "Requirement already satisfied: httplib2<1.dev0,>=0.19.0 in /home/codespace/.python/current/lib/python3.12/site-packages (from google-api-python-client->google-generativeai->-r dependencies.txt (line 5)) (0.22.0)\n", + "Requirement already satisfied: google-auth-httplib2<1.0.0,>=0.2.0 in /home/codespace/.python/current/lib/python3.12/site-packages (from google-api-python-client->google-generativeai->-r dependencies.txt (line 5)) (0.2.0)\n", + "Requirement already satisfied: uritemplate<5,>=3.0.1 in /home/codespace/.python/current/lib/python3.12/site-packages (from google-api-python-client->google-generativeai->-r dependencies.txt (line 5)) (4.1.1)\n", + "Requirement already satisfied: annotated-types>=0.4.0 in /home/codespace/.python/current/lib/python3.12/site-packages (from pydantic->google-generativeai->-r dependencies.txt (line 5)) (0.7.0)\n", + "Requirement already satisfied: pydantic-core==2.20.1 in /home/codespace/.python/current/lib/python3.12/site-packages (from pydantic->google-generativeai->-r dependencies.txt (line 5)) (2.20.1)\n", + "Requirement already satisfied: text-unidecode>=1.3 in /home/codespace/.python/current/lib/python3.12/site-packages (from python-slugify->kaggle->-r dependencies.txt (line 2)) (1.3)\n", + "Requirement already satisfied: grpcio<2.0dev,>=1.33.2 in /home/codespace/.python/current/lib/python3.12/site-packages (from google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.10.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,!=2.8.*,!=2.9.*,<3.0.0dev,>=1.34.1->google-ai-generativelanguage==0.6.6->google-generativeai->-r dependencies.txt (line 5)) (1.65.5)\n", + "Requirement already satisfied: grpcio-status<2.0.dev0,>=1.33.2 in /home/codespace/.python/current/lib/python3.12/site-packages (from google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.10.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,!=2.8.*,!=2.9.*,<3.0.0dev,>=1.34.1->google-ai-generativelanguage==0.6.6->google-generativeai->-r dependencies.txt (line 5)) (1.62.3)\n", + "Requirement already satisfied: pyparsing!=3.0.0,!=3.0.1,!=3.0.2,!=3.0.3,<4,>=2.4.2 in /home/codespace/.local/lib/python3.12/site-packages (from httplib2<1.dev0,>=0.19.0->google-api-python-client->google-generativeai->-r dependencies.txt (line 5)) (3.1.2)\n", + "Requirement already satisfied: pyasn1<0.7.0,>=0.4.6 in /home/codespace/.python/current/lib/python3.12/site-packages (from pyasn1-modules>=0.2.1->google-auth>=2.15.0->google-generativeai->-r dependencies.txt (line 5)) (0.6.0)\n", + "Downloading ollama-0.3.1-py3-none-any.whl (10 kB)\n", + "Installing collected packages: ollama\n", + "Successfully installed ollama-0.3.1\n", + "Note: you may need to restart the kernel to use updated packages.\n" + ] + } + ], "source": [ "pip install --upgrade -r dependencies.txt" ] @@ -61,7 +121,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -80,7 +140,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -89,9 +149,19 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Attempting to download Training_Essay_Data.csv from dataset sunilthite/llm-detect-ai-generated-text-dataset/versions/1\n", + "Finished downloading Training_Essay_Data.csv from dataset sunilthite/llm-detect-ai-generated-text-dataset/versions/1\n", + "The file Training_Essay_Data.csv from dataset sunilthite/llm-detect-ai-generated-text-dataset/versions/1 has been linked.\n" + ] + } + ], "source": [ "def download_datasets(): \n", " # Read the dataset listings. \n", @@ -126,7 +196,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -160,9 +230,30 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " text generated\n", + "0 Car-free cities have become a subject of incre... 1\n", + "1 Car Free Cities Car-free cities, a concept ga... 1\n", + "2 A Sustainable Urban Future Car-free cities ... 1\n", + "3 Pioneering Sustainable Urban Living In an e... 1\n", + "4 The Path to Sustainable Urban Living In an ... 1\n", + "... ... ...\n", + "29140 There has been a fuss about the Elector Colleg... 0\n", + "29141 Limiting car usage has many advantages. Such a... 0\n", + "29142 There's a new trend that has been developing f... 0\n", + "29143 As we all know cars are a big part of our soci... 0\n", + "29144 Cars have been around since the 1800's and hav... 0\n", + "\n", + "[29145 rows x 2 columns]\n" + ] + } + ], "source": [ "print(DATAFRAMES[\"all\"]);" ] @@ -183,7 +274,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ @@ -213,7 +304,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 7, "metadata": {}, "outputs": [ { @@ -249,8 +340,8 @@ "metadata": {}, "source": [ "The models in use for this study are the following: \n", - "- `Meta-Llama-3-8B-Instruct.Q4_0.gguf`\n", - "- `Phi-3-mini-4k-instruct.Q4_0.gguf`\n", + "- `llama3.1`\n", + "- `phi3:3.8b`\n", "- `gemini-1.5-pro`\n", "\n", "The former two are yet to be downloaded, which is to be done here. " @@ -258,11 +349,11 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "metadata": {}, "outputs": [], "source": [ - "from gpt4all import GPT4All\n", + "import ollama\n", "import google.generativeai as Gemini" ] }, @@ -273,9 +364,10 @@ "outputs": [], "source": [ "MODELS = {};\n", - "for MODEL_NAME in [\"Meta-Llama-3-8B-Instruct.Q4_0.gguf\", \"Phi-3-mini-4k-instruct.Q4_0.gguf\"]: \n", - " MODELS[MODEL_NAME] = GPT4All(MODEL_NAME);\n", - "MODELS[\"gemini-1.5-pro\"] = Gemini.GenerativeModel('gemini-1.5-pro-exp-0801');\n" + "MODELS[\"gemini-1.5-pro\"] = Gemini.GenerativeModel('gemini-1.5-pro-exp-0801');\n", + "\n", + "for MODEL_NAME in [\"llama3.1\",\"phi3:latest\"]: \n", + " ollama.pull(MODEL_NAME);" ] } ],