Expanded on Baseline experimens, analyzed and compared

2025-11-30 19:38:42 +00:00
parent 37ccc03ac9
commit fe2c087093
1 changed files with 236 additions and 145 deletions
--- a/notebooks/02_baseline_experiments.ipynb
+++ b/notebooks/02_baseline_experiments.ipynb
@@ -2,7 +2,7 @@
 "cells": [
  {
   "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 1,
   "id": "d2aa2997",
   "metadata": {},
   "outputs": [
@@ -56,7 +56,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": 58,
   "id": "e60d024e969254a",
   "metadata": {
    "ExecuteTime": {
@@ -69,30 +69,20 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "'The cat sat on the mat.' vs 'The cat sat on the mat.': 1.000\n",
+      "Sentence 1                                Sentence 2:                              Similarity Score:\n",
-      "--------------------------------------------------\n",
+      "====================================================================================================\n",
-      "'The cat sat on the mat.' vs 'The cat sat on the mat': 0.667\n",
+      "The cat sat on the mat.        vs         The cat sat on the mat.                  1.000\n",
-      "--------------------------------------------------\n",
+      "The cat sat on the mat.        vs         The cat sat on the mat                   0.667\n",
-      "'The cat sat on the mat.' vs 'The  cat  sat  on  the  mat.': 1.000\n",
+      "The cat sat on the mat.        vs         The  cat  sat  on  the  mat.             1.000\n",
-      "--------------------------------------------------\n",
+      "The cat sat on the mat.        vs         On the mat, the cat was sitting.         0.375\n",
-      "'The cat sat on the mat.' vs 'On the mat, the cat was sitting.': 0.375\n",
+      "The cat sat on the mat.        vs         The feline rested on the rug.            0.250\n",
-      "--------------------------------------------------\n",
+      "The quick brown fox jumps.     vs         A fast brown fox leaps.                  0.250\n",
-      "'The cat sat on the mat.' vs 'The feline rested on the rug.': 0.250\n",
+      "The cat sat on the mat.        vs         The dog ran in the park.                 0.111\n",
-      "--------------------------------------------------\n",
+      "I love programming.            vs         She enjoys reading books.                0.000\n",
-      "'The quick brown fox jumps.' vs 'A fast brown fox leaps.': 0.250\n",
+      "The weather is nice today.     vs         It's raining outside.                    0.000\n",
-      "--------------------------------------------------\n",
+      "Short.                         vs         Short.                                   1.000\n",
-      "'The cat sat on the mat.' vs 'The dog ran in the park.': 0.111\n",
+      "A B C D E F G                  vs         A B C D E F G                            1.000\n",
-      "--------------------------------------------------\n",
+      "                               vs                                                  0.000\n"
      "'I love programming.' vs 'She enjoys reading books.': 0.000\n",
      "--------------------------------------------------\n",
      "'The weather is nice today.' vs 'It's raining outside.': 0.000\n",
      "--------------------------------------------------\n",
      "'Short.' vs 'Short.': 1.000\n",
      "--------------------------------------------------\n",
      "'A B C D E F G' vs 'A B C D E F G': 1.000\n",
      "--------------------------------------------------\n",
      "'' vs '': 0.000\n",
      "--------------------------------------------------\n"
     ]
    }
   ],
@@ -111,10 +101,13 @@
    "    (\"The cat sat on the mat.\", \"The dog ran in the park\")      # Different\n",
    "]\n",
    "\n",
    "print(f\"{'Sentence 1':<41} {'Sentence 2:':<40} {'Similarity Score:'}\")\n",
    "print(\"=\" * 100)\n",
    "\n",
    "for sent1, sent2 in test_pairs:\n",
    "    similarity = jaccard_similarity(sent1, sent2)\n",
-    "    print(f\"'{sent1}' vs '{sent2}': {similarity:.3f}\") # 3 decimal places\n",
+    "    print(f\"{sent1:<30} {'vs':<10} {sent2:<40} {similarity:.3f}\") # 3 decimal places\n",
-    "    print(\"-\"* 50)\n"
+    "    #print(\"-\"* 50)\n"
   ]
  },
  {
@@ -128,7 +121,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 57,
   "id": "0b68fdcd",
   "metadata": {},
   "outputs": [
@@ -136,42 +129,20 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "'The cat sat on the mat.' vs 'The cat sat on the mat.':\n",
+      "Sentence 1                                Sentence 2:          Similarities ->     Char:      Word:\n",
-      " Char similarity: 1.000 --- Word similarity: 1.000\n",
+      "====================================================================================================\n",
-      "--------------------------------------------------\n",
+      "The cat sat on the mat.        vs         The cat sat on the mat.                  1.000      1.000 \n",
-      "'The cat sat on the mat.' vs 'The cat sat on the mat':\n",
+      "The cat sat on the mat.        vs         The cat sat on the mat                   0.957      0.833 \n",
-      " Char similarity: 0.957 --- Word similarity: 0.833\n",
+      "The cat sat on the mat.        vs         The  cat  sat  on  the  mat.             0.821      1.000 \n",
-      "--------------------------------------------------\n",
+      "The cat sat on the mat.        vs         On the mat, the cat was sitting.         0.344      0.143 \n",
-      "'The cat sat on the mat.' vs 'The  cat  sat  on  the  mat.':\n",
+      "The cat sat on the mat.        vs         The feline rested on the rug.            0.517      0.500 \n",
-      " Char similarity: 0.821 --- Word similarity: 1.000\n",
+      "The quick brown fox jumps.     vs         A fast brown fox leaps.                  0.577      0.400 \n",
-      "--------------------------------------------------\n",
+      "The cat sat on the mat.        vs         The dog ran in the park.                 0.625      0.333 \n",
-      "'The cat sat on the mat.' vs 'On the mat, the cat was sitting.':\n",
+      "I love programming.            vs         She enjoys reading books.                0.200      0.000 \n",
-      " Char similarity: 0.344 --- Word similarity: 0.143\n",
+      "The weather is nice today.     vs         It's raining outside.                    0.192      0.000 \n",
-      "--------------------------------------------------\n",
+      "Short.                         vs         Short.                                   1.000      1.000 \n",
-      "'The cat sat on the mat.' vs 'The feline rested on the rug.':\n",
+      "A B C D E F G                  vs         A B C D E F G                            1.000      1.000 \n",
-      " Char similarity: 0.517 --- Word similarity: 0.500\n",
+      "                               vs                                                  1.000      1.000 \n"
      "--------------------------------------------------\n",
      "'The quick brown fox jumps.' vs 'A fast brown fox leaps.':\n",
      " Char similarity: 0.577 --- Word similarity: 0.400\n",
      "--------------------------------------------------\n",
      "'The cat sat on the mat.' vs 'The dog ran in the park.':\n",
      " Char similarity: 0.625 --- Word similarity: 0.333\n",
      "--------------------------------------------------\n",
      "'I love programming.' vs 'She enjoys reading books.':\n",
      " Char similarity: 0.200 --- Word similarity: 0.000\n",
      "--------------------------------------------------\n",
      "'The weather is nice today.' vs 'It's raining outside.':\n",
      " Char similarity: 0.192 --- Word similarity: 0.000\n",
      "--------------------------------------------------\n",
      "'Short.' vs 'Short.':\n",
      " Char similarity: 1.000 --- Word similarity: 1.000\n",
      "--------------------------------------------------\n",
      "'A B C D E F G' vs 'A B C D E F G':\n",
      " Char similarity: 1.000 --- Word similarity: 1.000\n",
      "--------------------------------------------------\n",
      "'' vs '':\n",
      " Char similarity: 1.000 --- Word similarity: 1.000\n",
      "--------------------------------------------------\n"
     ]
    }
   ],
@@ -201,12 +172,16 @@
    "    edit_distance = distance.Levenshtein.distance(words1, words2)\n",
    "    return 1 - (edit_distance / max_len)\n",
    "\n",
    "    \n",
    "\n",
    "print(f\"{'Sentence 1':<41} {'Sentence 2:':<20} {'Similarities ->':<19} {'Char:':<10} {'Word:'}\")\n",
    "print(\"=\" * 100)\n",
    "\n",
    "for sent1, sent2 in test_pairs:\n",
    "    char_similarity = char_levenshtein_similarity(sent1, sent2)\n",
    "    word_similarity = word_levenshtein_similarity(sent1, sent2)\n",
-    "    print(f\"'{sent1}' vs '{sent2}':\") \n",
+    "    print(f\"{sent1:<30} {'vs':<10} {sent2:<40} {char_similarity:.3f}{'':<5} {word_similarity:.3F} \") # 3 decimal places\n",
-    "    print(f\" Char similarity: {char_similarity:.3f} --- Word similarity: {word_similarity:.3f}\") # 3 decimal place\n",
+    "    #print(\"-\"* 50"
    "    print(\"-\"* 50)"
   ]
  },
  {
@@ -219,7 +194,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": 38,
   "id": "46a985b4",
   "metadata": {},
   "outputs": [
@@ -227,30 +202,20 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "'The cat sat on the mat.' vs 'The cat sat on the mat.': 1.000\n",
+      "Sentence 1                                Sentence 2:                              Similarity Score:\n",
-      "--------------------------------------------------\n",
+      "====================================================================================================\n",
-      "'The cat sat on the mat.' vs 'The cat sat on the mat': 1.000\n",
+      "The cat sat on the mat.        vs         The cat sat on the mat.                  1.000\n",
-      "--------------------------------------------------\n",
+      "The cat sat on the mat.        vs         The cat sat on the mat                   1.000\n",
-      "'The cat sat on the mat.' vs 'The  cat  sat  on  the  mat.': 1.000\n",
+      "The cat sat on the mat.        vs         The  cat  sat  on  the  mat.             1.000\n",
-      "--------------------------------------------------\n",
+      "The cat sat on the mat.        vs         On the mat, the cat was sitting.         0.825\n",
-      "'The cat sat on the mat.' vs 'On the mat, the cat was sitting.': 0.825\n",
+      "The cat sat on the mat.        vs         The feline rested on the rug.            0.625\n",
-      "--------------------------------------------------\n",
+      "The quick brown fox jumps.     vs         A fast brown fox leaps.                  0.400\n",
-      "'The cat sat on the mat.' vs 'The feline rested on the rug.': 0.625\n",
+      "The cat sat on the mat.        vs         The dog ran in the park.                 0.500\n",
-      "--------------------------------------------------\n",
+      "I love programming.            vs         She enjoys reading books.                0.000\n",
-      "'The quick brown fox jumps.' vs 'A fast brown fox leaps.': 0.400\n",
+      "The weather is nice today.     vs         It's raining outside.                    0.000\n",
-      "--------------------------------------------------\n",
+      "Short.                         vs         Short.                                   1.000\n",
-      "'The cat sat on the mat.' vs 'The dog ran in the park.': 0.500\n",
+      "A B C D E F G                  vs         A B C D E F G                            1.000\n",
-      "--------------------------------------------------\n",
+      "                               vs                                                  1.000\n"
      "'I love programming.' vs 'She enjoys reading books.': 0.000\n",
      "--------------------------------------------------\n",
      "'The weather is nice today.' vs 'It's raining outside.': 0.000\n",
      "--------------------------------------------------\n",
      "'Short.' vs 'Short.': 1.000\n",
      "--------------------------------------------------\n",
      "'A B C D E F G' vs 'A B C D E F G': 1.000\n",
      "--------------------------------------------------\n",
      "'' vs '': 1.000\n",
      "--------------------------------------------------\n"
     ]
    }
   ],
@@ -285,10 +250,13 @@
    "    \n",
    "    return dot_product / (norm1 * norm2)\n",
    "\n",
    "print(f\"{'Sentence 1':<41} {'Sentence 2:':<40} {'Similarity Score:'}\")\n",
    "print(\"=\" * 100)\n",
    "\n",
    "for sent1, sent2 in test_pairs:\n",
    "    similarity = cosine_similarity_bow(sent1, sent2)\n",
-    "    print(f\"'{sent1}' vs '{sent2}': {similarity:.3f}\") # 3 decimal places\n",
+    "    print(f\"{sent1:<30} {'vs':<10} {sent2:<40} {similarity:.3f}\") # 3 decimal places\n",
-    "    print(\"-\"* 50)"
+    "    #print(\"-\"* 50)"
   ]
  },
  {
@@ -301,7 +269,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 23,
+   "execution_count": 5,
   "id": "7dc7ac2e",
   "metadata": {},
   "outputs": [
@@ -379,7 +347,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 25,
+   "execution_count": 37,
   "id": "e6a4d4e2",
   "metadata": {},
   "outputs": [
@@ -387,30 +355,20 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "'The cat sat on the mat.' vs 'The cat sat on the mat.': 1.000\n",
+      "Sentence 1                                Sentence 2:                              Similarity Score:\n",
-      "--------------------------------------------------\n",
+      "====================================================================================================\n",
-      "'The cat sat on the mat.' vs 'The cat sat on the mat': 1.000\n",
+      "The cat sat on the mat.        vs         The cat sat on the mat.                  1.000\n",
-      "--------------------------------------------------\n",
+      "The cat sat on the mat.        vs         The cat sat on the mat                   1.000\n",
-      "'The cat sat on the mat.' vs 'The  cat  sat  on  the  mat.': 0.815\n",
+      "The cat sat on the mat.        vs         The  cat  sat  on  the  mat.             0.815\n",
-      "--------------------------------------------------\n",
+      "The cat sat on the mat.        vs         On the mat, the cat was sitting.         0.433\n",
-      "'The cat sat on the mat.' vs 'On the mat, the cat was sitting.': 0.433\n",
+      "The cat sat on the mat.        vs         The feline rested on the rug.            0.536\n",
-      "--------------------------------------------------\n",
+      "The quick brown fox jumps.     vs         A fast brown fox leaps.                  0.560\n",
-      "'The cat sat on the mat.' vs 'The feline rested on the rug.': 0.536\n",
+      "The cat sat on the mat.        vs         The dog ran in the park.                 0.609\n",
-      "--------------------------------------------------\n",
+      "I love programming.            vs         She enjoys reading books.                0.333\n",
-      "'The quick brown fox jumps.' vs 'A fast brown fox leaps.': 0.560\n",
+      "The weather is nice today.     vs         It's raining outside.                    0.360\n",
-      "--------------------------------------------------\n",
+      "Short.                         vs         Short.                                   1.000\n",
-      "'The cat sat on the mat.' vs 'The dog ran in the park.': 0.609\n",
+      "A B C D E F G                  vs         A B C D E F G                            1.000\n",
-      "--------------------------------------------------\n",
+      "                               vs                                                  0.000\n"
      "'I love programming.' vs 'She enjoys reading books.': 0.333\n",
      "--------------------------------------------------\n",
      "'The weather is nice today.' vs 'It's raining outside.': 0.360\n",
      "--------------------------------------------------\n",
      "'Short.' vs 'Short.': 1.000\n",
      "--------------------------------------------------\n",
      "'A B C D E F G' vs 'A B C D E F G': 1.000\n",
      "--------------------------------------------------\n",
      "'' vs '': 0.000\n",
      "--------------------------------------------------\n"
     ]
    }
   ],
@@ -437,10 +395,13 @@
    "    lcs_length = dp[m][n]\n",
    "    return lcs_length / max(m, n) if max(m, n) > 0 else 0.0\n",
    "\n",
    "print(f\"{'Sentence 1':<41} {'Sentence 2:':<40} {'Similarity Score:'}\")\n",
    "print(\"=\" * 100)\n",
    "\n",
    "for sent1, sent2 in test_pairs:\n",
    "    similarity = longest_common_subsequence(sent1, sent2)\n",
-    "    print(f\"'{sent1}' vs '{sent2}': {similarity:.3f}\") # 3 decimal places\n",
+    "    print(f\"{sent1:<30} {'vs':<10} {sent2:<40} {similarity:.3f}\") # 3 decimal places\n",
-    "    print(\"-\"* 50)"
+    "    #print(\"-\"* 50)"
   ]
  },
  {
@@ -471,8 +432,7 @@
    "    \n",
    "    common = words1.intersection(words2)\n",
    "    return len(common) / len(words1)\n",
-    "\n",
+    "\n"
    " "
   ]
  },
  {
@@ -485,7 +445,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 25,
   "id": "b3d07562",
   "metadata": {},
   "outputs": [
@@ -495,19 +455,150 @@
     "text": [
      "Pair                           Sentence 1                     Sentence 2                    \n",
      "====================================================================================================\n",
-      "Pair 1:                                  The cat sat on the mat.        The cat sat on the mat.       \n"
+      "Pair 1:                        The cat sat on the mat.        The cat sat on the mat.       \n",
-     ]
+      "                               Jaccard:             1.000\n",
-    },
+      "                               Levenshtein (char):  1.000\n",
-    {
+      "                               Levenshtein (word):  1.000\n",
-     "ename": "TypeError",
+      "                               Cosine BOW:          1.000\n",
-     "evalue": "tuple indices must be integers or slices, not dict",
+      "                               Fuzzy Ratio:         1.000\n",
-     "output_type": "error",
+      "                               Fuzzy Partial:       1.000\n",
-     "traceback": [
+      "                               Fuzzy Token Sort:    1.000\n",
-      "\u001b[31m---------------------------------------------------------------------------\u001b[39m",
+      "                               Fuzzy Token Set:     1.000\n",
-      "\u001b[31mTypeError\u001b[39m                                 Traceback (most recent call last)",
+      "                               LCS:                 1.000\n",
-      "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[28]\u001b[39m\u001b[32m, line 44\u001b[39m\n\u001b[32m     41\u001b[39m         \u001b[38;5;28mprint\u001b[39m(\u001b[33m\"\u001b[39m\u001b[33m-\u001b[39m\u001b[33m\"\u001b[39m * \u001b[32m100\u001b[39m)\n\u001b[32m     43\u001b[39m results = evaluate_baseline_methods(test_pairs)\n\u001b[32m---> \u001b[39m\u001b[32m44\u001b[39m \u001b[43mprint_comparison_table\u001b[49m\u001b[43m(\u001b[49m\u001b[43mresults\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtest_pairs\u001b[49m\u001b[43m)\u001b[49m\n",
+      "                               Containment:         1.000\n",
-      "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[28]\u001b[39m\u001b[32m, line 39\u001b[39m, in \u001b[36mprint_comparison_table\u001b[39m\u001b[34m(results, pairs)\u001b[39m\n\u001b[32m     37\u001b[39m \u001b[38;5;66;03m# Print similarities for this pair\u001b[39;00m\n\u001b[32m     38\u001b[39m \u001b[38;5;28;01mfor\u001b[39;00m method_name \u001b[38;5;129;01min\u001b[39;00m results:\n\u001b[32m---> \u001b[39m\u001b[32m39\u001b[39m     similarity = \u001b[43mresults\u001b[49m\u001b[43m[\u001b[49m\u001b[43mmethod_name\u001b[49m\u001b[43m]\u001b[49m[i]\n\u001b[32m     40\u001b[39m     \u001b[38;5;28mprint\u001b[39m(\u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[33m'\u001b[39m\u001b[33m'\u001b[39m\u001b[38;5;132;01m:\u001b[39;00m\u001b[33m<40\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mmethod_name\u001b[38;5;250m \u001b[39m+\u001b[38;5;250m \u001b[39m\u001b[33m'\u001b[39m\u001b[33m:\u001b[39m\u001b[33m'\u001b[39m\u001b[38;5;132;01m:\u001b[39;00m\u001b[33m<20\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m \u001b[39m\u001b[38;5;132;01m{\u001b[39;00msimilarity\u001b[38;5;132;01m:\u001b[39;00m\u001b[33m.3f\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m\"\u001b[39m)\n\u001b[32m     41\u001b[39m \u001b[38;5;28mprint\u001b[39m(\u001b[33m\"\u001b[39m\u001b[33m-\u001b[39m\u001b[33m\"\u001b[39m * \u001b[32m100\u001b[39m)\n",
+      "----------------------------------------------------------------------------------------------------\n",
-      "\u001b[31mTypeError\u001b[39m: tuple indices must be integers or slices, not dict"
+      "Pair 2:                        The cat sat on the mat.        The cat sat on the mat        \n",
      "                               Jaccard:             0.667\n",
      "                               Levenshtein (char):  0.957\n",
      "                               Levenshtein (word):  0.833\n",
      "                               Cosine BOW:          1.000\n",
      "                               Fuzzy Ratio:         0.978\n",
      "                               Fuzzy Partial:       1.000\n",
      "                               Fuzzy Token Sort:    0.978\n",
      "                               Fuzzy Token Set:     0.973\n",
      "                               LCS:                 1.000\n",
      "                               Containment:         1.000\n",
      "----------------------------------------------------------------------------------------------------\n",
      "Pair 3:                        The cat sat on the mat.        The  cat  sat  on  the  mat.  \n",
      "                               Jaccard:             1.000\n",
      "                               Levenshtein (char):  0.821\n",
      "                               Levenshtein (word):  1.000\n",
      "                               Cosine BOW:          1.000\n",
      "                               Fuzzy Ratio:         0.902\n",
      "                               Fuzzy Partial:       0.826\n",
      "                               Fuzzy Token Sort:    1.000\n",
      "                               Fuzzy Token Set:     1.000\n",
      "                               LCS:                 0.815\n",
      "                               Containment:         1.000\n",
      "----------------------------------------------------------------------------------------------------\n",
      "Pair 4:                        The cat sat on the mat.        On the mat, the cat was sit...\n",
      "                               Jaccard:             0.375\n",
      "                               Levenshtein (char):  0.344\n",
      "                               Levenshtein (word):  0.143\n",
      "                               Cosine BOW:          0.825\n",
      "                               Fuzzy Ratio:         0.509\n",
      "                               Fuzzy Partial:       0.619\n",
      "                               Fuzzy Token Sort:    0.764\n",
      "                               Fuzzy Token Set:     0.723\n",
      "                               LCS:                 0.433\n",
      "                               Containment:         0.800\n",
      "----------------------------------------------------------------------------------------------------\n",
      "Pair 5:                        The cat sat on the mat.        The feline rested on the rug. \n",
      "                               Jaccard:             0.250\n",
      "                               Levenshtein (char):  0.517\n",
      "                               Levenshtein (word):  0.500\n",
      "                               Cosine BOW:          0.625\n",
      "                               Fuzzy Ratio:         0.615\n",
      "                               Fuzzy Partial:       0.605\n",
      "                               Fuzzy Token Sort:    0.538\n",
      "                               Fuzzy Token Set:     0.480\n",
      "                               LCS:                 0.536\n",
      "                               Containment:         0.400\n",
      "----------------------------------------------------------------------------------------------------\n",
      "Pair 6:                        The quick brown fox jumps.     A fast brown fox leaps.       \n",
      "                               Jaccard:             0.250\n",
      "                               Levenshtein (char):  0.577\n",
      "                               Levenshtein (word):  0.400\n",
      "                               Cosine BOW:          0.400\n",
      "                               Fuzzy Ratio:         0.612\n",
      "                               Fuzzy Partial:       0.700\n",
      "                               Fuzzy Token Sort:    0.531\n",
      "                               Fuzzy Token Set:     0.562\n",
      "                               LCS:                 0.560\n",
      "                               Containment:         0.400\n",
      "----------------------------------------------------------------------------------------------------\n",
      "Pair 7:                        The cat sat on the mat.        The dog ran in the park.      \n",
      "                               Jaccard:             0.111\n",
      "                               Levenshtein (char):  0.625\n",
      "                               Levenshtein (word):  0.333\n",
      "                               Cosine BOW:          0.500\n",
      "                               Fuzzy Ratio:         0.638\n",
      "                               Fuzzy Partial:       0.636\n",
      "                               Fuzzy Token Sort:    0.553\n",
      "                               Fuzzy Token Set:     0.462\n",
      "                               LCS:                 0.609\n",
      "                               Containment:         0.200\n",
      "----------------------------------------------------------------------------------------------------\n",
      "Pair 8:                        I love programming.            She enjoys reading books.     \n",
      "                               Jaccard:             0.000\n",
      "                               Levenshtein (char):  0.200\n",
      "                               Levenshtein (word):  0.000\n",
      "                               Cosine BOW:          0.000\n",
      "                               Fuzzy Ratio:         0.409\n",
      "                               Fuzzy Partial:       0.432\n",
      "                               Fuzzy Token Sort:    0.364\n",
      "                               Fuzzy Token Set:     0.364\n",
      "                               LCS:                 0.333\n",
      "                               Containment:         0.000\n",
      "----------------------------------------------------------------------------------------------------\n",
      "Pair 9:                        The weather is nice today.     It's raining outside.         \n",
      "                               Jaccard:             0.000\n",
      "                               Levenshtein (char):  0.192\n",
      "                               Levenshtein (word):  0.000\n",
      "                               Cosine BOW:          0.000\n",
      "                               Fuzzy Ratio:         0.426\n",
      "                               Fuzzy Partial:       0.514\n",
      "                               Fuzzy Token Sort:    0.340\n",
      "                               Fuzzy Token Set:     0.340\n",
      "                               LCS:                 0.360\n",
      "                               Containment:         0.000\n",
      "----------------------------------------------------------------------------------------------------\n",
      "Pair 10:                       Short.                         Short.                        \n",
      "                               Jaccard:             1.000\n",
      "                               Levenshtein (char):  1.000\n",
      "                               Levenshtein (word):  1.000\n",
      "                               Cosine BOW:          1.000\n",
      "                               Fuzzy Ratio:         1.000\n",
      "                               Fuzzy Partial:       1.000\n",
      "                               Fuzzy Token Sort:    1.000\n",
      "                               Fuzzy Token Set:     1.000\n",
      "                               LCS:                 1.000\n",
      "                               Containment:         1.000\n",
      "----------------------------------------------------------------------------------------------------\n",
      "Pair 11:                       A B C D E F G                  A B C D E F G                 \n",
      "                               Jaccard:             1.000\n",
      "                               Levenshtein (char):  1.000\n",
      "                               Levenshtein (word):  1.000\n",
      "                               Cosine BOW:          1.000\n",
      "                               Fuzzy Ratio:         1.000\n",
      "                               Fuzzy Partial:       1.000\n",
      "                               Fuzzy Token Sort:    1.000\n",
      "                               Fuzzy Token Set:     1.000\n",
      "                               LCS:                 1.000\n",
      "                               Containment:         1.000\n",
      "----------------------------------------------------------------------------------------------------\n",
      "Pair 12:                                                                                    \n",
      "                               Jaccard:             0.000\n",
      "                               Levenshtein (char):  1.000\n",
      "                               Levenshtein (word):  1.000\n",
      "                               Cosine BOW:          1.000\n",
      "                               Fuzzy Ratio:         1.000\n",
      "                               Fuzzy Partial:       1.000\n",
      "                               Fuzzy Token Sort:    1.000\n",
      "                               Fuzzy Token Set:     0.000\n",
      "                               LCS:                 0.000\n",
      "                               Containment:         0.000\n",
      "----------------------------------------------------------------------------------------------------\n"
     ]
    }
   ],
@@ -538,23 +629,23 @@
    "\n",
    "def print_comparison_table(results, pairs):\n",
    "    \"\"\" Print a formatted comparison table \"\"\"\n",
-    "    print(f\"{'Pair':<40} {'Sentence 1':<30} {'Sentence 2':<30}\")\n",
+    "    print(f\"{'Pair':<30} {'Sentence 1':<30} {'Sentence 2':<30}\")\n",
    "    print(\"=\" * 100)\n",
    "    \n",
    "    for i, (sent1, sent2) in enumerate(pairs):\n",
-    "        # Truncate long sentences for display\n",
+    "        # if sentence too long\n",
    "        display_sent1 = sent1[:27] + \"...\" if len(sent1) > 30 else sent1\n",
    "        display_sent2 = sent2[:27] + \"...\" if len(sent2) > 30 else sent2\n",
    "        \n",
-    "        print(f\"{f'Pair {i+1}:':<40} {display_sent1:<30} {display_sent2:<30}\")\n",
+    "        print(f\"{f'Pair {i+1}:':<30} {display_sent1:<30} {display_sent2:<30}\")\n",
    "        \n",
    "        # Print similarities for this pair\n",
    "        for method_name in results:\n",
    "            similarity = results[method_name][i]\n",
-    "            print(f\"{'':<40} {method_name + ':':<20} {similarity:.3f}\")\n",
+    "            print(f\"{'':<30} {method_name + ':':<20} {similarity:.3f}\")\n",
    "        print(\"-\" * 100)\n",
    "\n",
-    "results = evaluate_baseline_methods(test_pairs)\n",
+    "results, methods = evaluate_baseline_methods(test_pairs)\n",
    "print_comparison_table(results, test_pairs)"
   ]
  }