From 5a38131e9a95f6eabeb729dcae51c91899641491 Mon Sep 17 00:00:00 2001 From: Henry Dowd Date: Sat, 22 Nov 2025 17:33:24 +0000 Subject: [PATCH] ormatting changes for notebooks --- notebooks/01_data_exploration.ipynb | 117 ++++++++++++++++------------ notebooks/03_semantic_methods.ipynb | 41 +++++++--- 2 files changed, 100 insertions(+), 58 deletions(-) diff --git a/notebooks/01_data_exploration.ipynb b/notebooks/01_data_exploration.ipynb index 1a80103..ddf0a9a 100644 --- a/notebooks/01_data_exploration.ipynb +++ b/notebooks/01_data_exploration.ipynb @@ -77,8 +77,8 @@ { "metadata": { "ExecuteTime": { - "end_time": "2025-11-21T16:32:45.216663Z", - "start_time": "2025-11-21T16:32:42.601290Z" + "end_time": "2025-11-21T23:26:23.172796Z", + "start_time": "2025-11-21T23:26:21.541542Z" } }, "cell_type": "code", @@ -129,17 +129,29 @@ "processed_syntactic = []\n", "\n", "for sentence in test_sentences:\n", - " processed_direct[iter] = preprocessor.semantic_analysis(sentence)\n", - "#print(preprocessor.syntactic_analysis(\"A completely different sentence about something else.\"))\n", + " print(\"-\" * 50)\n", + " print(f\"Sentence: {sentence}\")\n", + " direct = preprocessor.direct_detection(sentence)\n", + " processed_direct.append(direct)\n", + " print(\"--- Direct Sentence ---\")\n", + " print(f\"{direct}\")\n", + " semantic = preprocessor.semantic_analysis(sentence)\n", + " processed_semantic.append(semantic)\n", + " print(\"--- Semantic Sentence ---\")\n", + " print(f\"{semantic}\")\n", + " syntactic = preprocessor.syntactic_analysis(sentence)\n", + " processed_syntactic.append(syntactic)\n", + " print(\"--- Syntactic Sentence ---\")\n", + " print(f\"{syntactic}\")\n", "\n", - "\n", - "for sent in test_sentences:\n", - " print(f\"Original Sentence: {sent}\")\n", - " print(\"--- Semantic Analysis ---\")\n", - " print(f\"Preprocessed Sentence: {preprocessor.semantic_analysis(sent)}\")\n", - " print(\"--- Syntactic Analysis ---\")\n", - " print(f\"Preprocessed Sentence: {preprocessor.syntactic_analysis(sent)}\")\n", - " print(\"-\" * 50)" + "print(\"-\" * 50)\n", + "#for sent in test_sentences:\n", + "# print(f\"Original Sentence: {sent}\")\n", + "# print(\"--- Semantic Analysis ---\")\n", + "# print(f\"Preprocessed Sentence: {preprocessor.semantic_analysis(sent)}\")\n", + "# print(\"--- Syntactic Analysis ---\")\n", + "# print(f\"Preprocessed Sentence: {preprocessor.syntactic_analysis(sent)}\")\n", + "# print(\"-\" * 50)" ], "id": "5e488a878a5cfccb", "outputs": [ @@ -147,34 +159,41 @@ "name": "stdout", "output_type": "stream", "text": [ - "Original Sentence: The cat sat on the mat.\n", - "--- Semantic Analysis ---\n", - "Preprocessed Sentence: cat sit mat\n", - "--- Syntactic Analysis ---\n", - "Preprocessed Sentence: the cat sit on the mat .\n", "--------------------------------------------------\n", - "Original Sentence: On the mat, the cat was sitting.\n", - "--- Semantic Analysis ---\n", - "Preprocessed Sentence: mat cat sit\n", - "--- Syntactic Analysis ---\n", - "Preprocessed Sentence: on the mat , the cat be sit .\n", + "Sentence: The cat sat on the mat.\n", + "--- Direct Sentence ---\n", + "the cat sat on the mat.\n", + "--- Semantic Sentence ---\n", + "cat sit mat\n", + "--- Syntactic Sentence ---\n", + "the cat sit on the mat .\n", "--------------------------------------------------\n", - "Original Sentence: A completely different sentence about something else.\n", - "--- Semantic Analysis ---\n", - "Preprocessed Sentence: completely different sentence\n", - "--- Syntactic Analysis ---\n", - "Preprocessed Sentence: a completely different sentence about something else .\n", + "Sentence: On the mat, the cat was sitting.\n", + "--- Direct Sentence ---\n", + "on the mat, the cat was sitting.\n", + "--- Semantic Sentence ---\n", + "mat cat sit\n", + "--- Syntactic Sentence ---\n", + "on the mat , the cat be sit .\n", + "--------------------------------------------------\n", + "Sentence: A completely different sentence about something else.\n", + "--- Direct Sentence ---\n", + "a completely different sentence about something else.\n", + "--- Semantic Sentence ---\n", + "completely different sentence\n", + "--- Syntactic Sentence ---\n", + "a completely different sentence about something else .\n", "--------------------------------------------------\n" ] } ], - "execution_count": 17 + "execution_count": 37 }, { "metadata": { "ExecuteTime": { - "end_time": "2025-11-20T15:51:42.074798Z", - "start_time": "2025-11-20T15:51:42.050593Z" + "end_time": "2025-11-21T23:39:48.443022Z", + "start_time": "2025-11-21T23:39:48.411766Z" } }, "cell_type": "code", @@ -192,7 +211,7 @@ "\n", " return doc\n", "\n", - "for sentence in test_sentences:\n", + "for sentence in processed_syntactic:\n", " doc = extract_parse_tree(sentence)\n", " print(\"\\n\" + \"=\"*60 + \"\\n\")" ], @@ -202,44 +221,44 @@ "name": "stdout", "output_type": "stream", "text": [ - "Sentence: The cat sat on the mat.\n", + "Sentence: the cat sit on the mat .\n", "\n", "Dependenct Parse Tree:\n", "--------------------------------------------------\n", - "The det cat []\n", - "cat nsubj sat ['The']\n", - "sat ROOT sat ['cat', 'on', '.']\n", - "on prep sat ['mat']\n", + "the det cat []\n", + "cat nsubj sit ['the']\n", + "sit ROOT sit ['cat', 'on', '.']\n", + "on prep sit ['mat']\n", "the det mat []\n", "mat pobj on ['the']\n", - ". punct sat []\n", + ". punct sit []\n", "\n", "============================================================\n", "\n", - "Sentence: On the mat, the cat was sitting.\n", + "Sentence: on the mat , the cat be sit .\n", "\n", "Dependenct Parse Tree:\n", "--------------------------------------------------\n", - "On prep sitting ['mat']\n", + "on prep sit ['mat']\n", "the det mat []\n", - "mat pobj On ['the']\n", - ", punct sitting []\n", + "mat pobj on ['the']\n", + ", punct sit []\n", "the det cat []\n", - "cat nsubj sitting ['the']\n", - "was aux sitting []\n", - "sitting ROOT sitting ['On', ',', 'cat', 'was', '.']\n", - ". punct sitting []\n", + "cat nsubj sit ['the']\n", + "be aux sit []\n", + "sit ROOT sit ['on', ',', 'cat', 'be', '.']\n", + ". punct sit []\n", "\n", "============================================================\n", "\n", - "Sentence: A completely different sentence about something else.\n", + "Sentence: a completely different sentence about something else .\n", "\n", "Dependenct Parse Tree:\n", "--------------------------------------------------\n", - "A det sentence []\n", + "a det sentence []\n", "completely advmod different []\n", "different amod sentence ['completely']\n", - "sentence ROOT sentence ['A', 'different', 'about', '.']\n", + "sentence ROOT sentence ['a', 'different', 'about', '.']\n", "about prep sentence ['something']\n", "something pobj about ['else']\n", "else advmod something []\n", @@ -250,7 +269,7 @@ ] } ], - "execution_count": 15 + "execution_count": 39 }, { "metadata": {}, diff --git a/notebooks/03_semantic_methods.ipynb b/notebooks/03_semantic_methods.ipynb index 640b581..6b1b734 100644 --- a/notebooks/03_semantic_methods.ipynb +++ b/notebooks/03_semantic_methods.ipynb @@ -1,5 +1,21 @@ { "cells": [ + { + "metadata": { + "ExecuteTime": { + "end_time": "2025-11-22T11:40:21.711998Z", + "start_time": "2025-11-22T11:40:20.129376Z" + } + }, + "cell_type": "code", + "source": [ + "import spacy\n", + "nlp = spacy.load(\"en_core_web_md\") # Medium model" + ], + "id": "1638b7b97e3bd6f", + "outputs": [], + "execution_count": 11 + }, { "metadata": {}, "cell_type": "markdown", @@ -9,19 +25,16 @@ { "metadata": { "ExecuteTime": { - "end_time": "2025-11-18T23:28:55.563335Z", - "start_time": "2025-11-18T23:28:53.763429Z" + "end_time": "2025-11-22T11:47:39.286432Z", + "start_time": "2025-11-22T11:47:39.271377Z" } }, "cell_type": "code", "source": [ - "import import_ipynb\n", - "#from notebooks. import *\n", + "def test_word_vectors(word):\n", + " print(word, nlp.vocab[word].vector.shape)\n", "\n", - "import spacy\n", - "nlp = spacy.load(\"en_core_web_md\")\n", - "\n", - "words = [\"cat\", \"dog\", \"feline\", \"vehicle\", \"car\"]\n", + "words = [\"cat\", \"dog\", \"feline\", \"feral\", \"vehicle\", \"car\"]\n", "# Test work similarities\n", "for word1 in words:\n", " for word2 in words:\n", @@ -39,28 +52,38 @@ "text": [ "cat - dog: 1.000\n", "cat - feline: 0.363\n", + "cat - feral: 0.483\n", "cat - vehicle: 0.078\n", "cat - car: 0.193\n", "dog - cat: 1.000\n", "dog - feline: 0.363\n", + "dog - feral: 0.483\n", "dog - vehicle: 0.078\n", "dog - car: 0.193\n", "feline - cat: 0.363\n", "feline - dog: 0.363\n", + "feline - feral: 0.412\n", "feline - vehicle: 0.180\n", "feline - car: 0.050\n", + "feral - cat: 0.483\n", + "feral - dog: 0.483\n", + "feral - feline: 0.412\n", + "feral - vehicle: 0.175\n", + "feral - car: 0.161\n", "vehicle - cat: 0.078\n", "vehicle - dog: 0.078\n", "vehicle - feline: 0.180\n", + "vehicle - feral: 0.175\n", "vehicle - car: 0.205\n", "car - cat: 0.193\n", "car - dog: 0.193\n", "car - feline: 0.050\n", + "car - feral: 0.161\n", "car - vehicle: 0.205\n" ] } ], - "execution_count": 1 + "execution_count": 15 }, { "metadata": {},