frflccg/test.ipynb
2023-11-28 17:38:25 +01:00

1492 lines
50 KiB
Plaintext
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"from nltk.ccg import chart, lexicon\n",
"from nltk.ccg.lexicon import CCGLexicon, Token, augParseCategory\n",
"from nltk.ccg.chart import CCGChart,CCGLeafEdge,BinaryCombinatorRule,CCGEdge\n",
"from nltk.tree import Tree\n",
"import pandas as pd\n",
"import numpy as np"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Weighed Lexicon"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"from warnings import warn\n",
"\n",
"class WeighedToken(Token):\n",
" def __init__(self, token, categ, semantics=None, weight = 1.0):\n",
" super().__init__(token, categ, semantics= semantics)\n",
" self._weight = weight\n",
" def weight(self):\n",
" \"\"\"1.0 is considered the default weight for any token\"\"\"\n",
" try:\n",
" return self._weight\n",
" except AttributeError:\n",
" warn(f\"[{self.token} : {str(self)}] : this token has no weight attribute, defaulted to 1.0.\")\n",
" return 1.0\n",
"\n",
"class WeighedLexicon(CCGLexicon):\n",
" def __init__(self, start, primitives, families, entries):\n",
" super().__init__(start, primitives, families, entries)\n",
"\n",
" def weight(self, entry):\n",
" return entry.weight()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# CYK"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"We define the weight associated to each reduction rule.\n",
"`rweight(rule)` should return the weight associated to the rul, using its string representation (i.e. the name of the rule)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"valz = {\n",
" '>' : 0.8,\n",
" '<' : 0.7\n",
"}\n",
"def rweight(rule):\n",
" s = rule.__str__()\n",
" if s in valz:\n",
" return valz[s]\n",
" else:\n",
" return 1.0 # Base rules weight"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"`weightedParse` implements the CKY algorithm, based on the implementation in the nltk library.\n",
"We take the weight from the weighted lexicon for the leafs, and we compute it using the formula for each reduction rule.\n",
"$$ w_{node} = \\phi_r \\times w_{child1} \\times w_{child2}$$"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
"# Implements the CYK algorithm, code partly taken from nltk\n",
"def weightedParse(tokens, lex, rules):\n",
" \"\"\"made to take weighed tokens and lexicons\"\"\"\n",
" chart = CCGChart(list(tokens))\n",
" \n",
" # Initialize leaf edges.\n",
" for index in range(chart.num_leaves()):\n",
" for token in lex.categories(chart.leaf(index)):\n",
" new_edge = CCGLeafEdge(index, token, chart.leaf(index))\n",
" new_edge.weight = token.weight()\n",
" chart.insert(new_edge, ())\n",
"\n",
" # Select a span for the new edges\n",
" for span in range(2, chart.num_leaves() + 1):\n",
" for start in range(0, chart.num_leaves() - span + 1):\n",
" \n",
" print(\"==>\",span,start)\n",
" \n",
" bestedge = None\n",
" nedg = 0\n",
" # edges[s] is the best edge generating the category s\n",
" edges = dict()\n",
" \n",
" # Try all possible pairs of edges that could generate\n",
" # an edge for that span\n",
" for part in range(1, span):\n",
" lstart = start\n",
" mid = start + part\n",
" rend = start + span\n",
" \n",
" for left in chart.select(span=(lstart, mid)):\n",
" for right in chart.select(span=(mid, rend)):\n",
" # Generate all possible combinations of the two edges\n",
" for rule in rules:\n",
" # Can we apply the rule\n",
" if rule.can_combine(left.categ(), right.categ()):\n",
" for res in rule.combine(left.categ(), right.categ()):\n",
" # res is the new category\n",
" edge = CCGEdge(\n",
" span=(left.start(), right.end()),\n",
" categ=res,\n",
" rule=BinaryCombinatorRule(rule),\n",
" )\n",
" edge.weight = rweight(rule) * left.weight * right.weight\n",
" edge.triple = (rule,left,right)\n",
" if not(res in edges and edges[res].weight<=edge.weight):\n",
" edges[res] = edge\n",
" # end for rule loop\n",
" # end for right loop\n",
" # end for left loop\n",
" # end for part loop\n",
" for cat in edges:\n",
" chart.insert(edges[cat], (edges[cat].triple[1], edges[cat].triple[2]))\n",
" return chart"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
"def wpToTree(edge):\n",
" if isinstance(edge,CCGLeafEdge):\n",
" return Tree((edge.token(),\"Leaf\"),[Tree(edge.token(),[edge.leaf()])])\n",
" else:\n",
" return Tree(\n",
" (chart.Token(None,edge.categ()),edge.triple[0].__str__()),\n",
" [wpToTree(t) for t in (edge.triple[1:])])"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
"def bestTree(tokens, lex, rules):\n",
" # We build the weighgted parse tree using cky\n",
" wChart = weightedParse(tokens, lex, rules)\n",
" # We get the biggest edge\n",
" e = list(wChart.select(start=0,end=len(tokens)))[0]\n",
" print(\"Edge count:\",len(list(wChart.select(start=0,end=len(tokens)))))\n",
" # We get the tree that brought us to this edge\n",
" t = wChart._trees(e, True, dict(), Tree)[0]\n",
" # (wpToTree(e),e.weight)\n",
" return (t,e.weight)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Application"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [],
"source": [
"from numbers import Number\n",
"from nltk.sem.logic import Expression\n",
"from nltk.ccg.api import PrimitiveCategory\n",
"\n",
"def to_pseudo_entries(table, consider_semantics = True):\n",
" \"\"\"returns a list of lists in the format ['word', 'category', 'weight', None]\n",
" if consider_semantics == false else ['word', 'category', weight, 'semantic']\n",
" that is left to be converted into tokens by to_wlex_entries\"\"\"\n",
"\n",
" entries = list()\n",
" for line in range(len(table['MOT'])):\n",
" for wdi, word in enumerate(table['MOT'][line].replace(\" \", \"\").split('/')):\n",
" for j in range(3):\n",
" if isinstance(table['Cat'+str(j)][line],str):\n",
" category = table['Cat'+str(j)][line]\n",
" weight = float(table['Weights'+str(j)][line]) if isinstance(table['Weights'+str(j)][line], Number) else 1.0\n",
" if consider_semantics:\n",
" semantic = (table['Sem'+str(j)][line].replace('\\\\\\\\', '\\\\').split('/'))[wdi]\n",
" else:\n",
" semantic = None\n",
" entries.append([word, category, weight, semantic])\n",
" return entries\n",
"\n",
"def to_wlex_entries(pseudo_entries, primitives, families, var=None):\n",
" \"\"\"returns the entries to a weighed lexicon from pseudo_entries generated by to_pseudo_entries\"\"\"\n",
" entries = dict()\n",
" for entry in pseudo_entries:\n",
" if entry[0] not in entries:\n",
" entries[entry[0]] = list()\n",
" categ, _ = augParseCategory(entry[1], primitives, families, var)\n",
" token = WeighedToken(token= entry[0],\n",
" categ= categ,\n",
" semantics= None if entry[-1] is None else Expression.fromstring(entry[-1]),\n",
" weight= entry[2])\n",
" entries[entry[0]].append(token)\n",
" return entries\n",
" "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"We create our lexicon using the data from the server"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [],
"source": [
"# Catégories primitives et familles\n",
"primitives = ['S', 'N', 'Pp', 'pN']\n",
"V = augParseCategory(\"S\\\\N\", primitives = primitives, families={})\n",
"families = {'V': V}\n",
"\n",
"# On importe notre lexique sous forme de tableur\n",
"table = pd.read_excel(\"CategoriesGramaticalesCombinatoire.ods\", engine=\"odf\")\n",
"#print(table.keys())\n",
"\n",
"# On le convertit en Lexique pondéré\n",
"pe = to_pseudo_entries(table, consider_semantics = False)\n",
"#print(pe)\n",
"wEntries = to_wlex_entries(pseudo_entries= pe, primitives= primitives, families= families)\n",
"#print([list(map(lambda x: f\"{k} : \"+ str(x) + str(x._semantics), L)) for k, L in wEntries.items()])\n",
"lex = WeighedLexicon(start= 'S', primitives= primitives, families= families, entries= wEntries)\n",
"\n",
"\n",
"# On crée le parser, on donne l'ensemble des règles qu'il est cencé connaître\n",
"from nltk.ccg.combinator import (\n",
" BackwardApplication,\n",
" BackwardBx,\n",
" BackwardComposition,\n",
" BackwardSx,\n",
" ForwardApplication,\n",
" ForwardComposition,\n",
" ForwardSubstitution\n",
")\n",
"rulesC = [ForwardApplication,BackwardApplication] \n",
"rulesC += [ForwardComposition,BackwardComposition,BackwardBx]\n",
"rulesC += [ForwardSubstitution,BackwardSx]\n",
"rulesR = [BinaryCombinatorRule(c) for c in rulesC]\n",
"# chart.ApplicationRuleSet for only < and >\n",
"\n",
"parser = chart.CCGChartParser(lex, rulesR)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"On lit les phrases depuis le fichier `phrases.txt`, et pour chacune, on imprime le nombre de dérivations trouvées, ainsi que le meilleur arbre de dérivation (i.e. de meilleur poids)"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"2 found derivation for sentence: le méchant chat dort\n",
" le méchant chat dort\n",
" (N/pN) (pN/pN) pN (S\\N)\n",
" --------------->\n",
" pN\n",
"----------------------->\n",
" N\n",
"------------------------------<\n",
" S\n",
" le méchant chat dort\n",
" (N/pN) (pN/pN) pN (S\\N)\n",
"----------------->B\n",
" (N/pN)\n",
"----------------------->\n",
" N\n",
"------------------------------<\n",
" S\n",
"==> 2 0\n",
"==> 2 1\n",
"==> 2 2\n",
"==> 3 0\n",
"==> 3 1\n",
"==> 4 0\n",
"Edge count: 1\n",
"Best derivation tree has weight 0.24957917865181148\n",
" le méchant chat dort\n",
" (N/pN) (pN/pN) pN (S\\N)\n",
" --------------->\n",
" pN\n",
"----------------------->\n",
" N\n",
"------------------------------<\n",
" S\n",
"##########################################\n",
"2 found derivation for sentence: le méchant chat dort\n",
" le méchant chat dort\n",
" (N/pN) (pN/pN) pN (S\\N)\n",
" --------------->\n",
" pN\n",
"----------------------->\n",
" N\n",
"------------------------------<\n",
" S\n",
" le méchant chat dort\n",
" (N/pN) (pN/pN) pN (S\\N)\n",
"----------------->B\n",
" (N/pN)\n",
"----------------------->\n",
" N\n",
"------------------------------<\n",
" S\n",
"==> 2 0\n",
"==> 2 1\n",
"==> 2 2\n",
"==> 3 0\n",
"==> 3 1\n",
"==> 4 0\n",
"Edge count: 1\n",
"Best derivation tree has weight 0.24957917865181148\n",
" le méchant chat dort\n",
" (N/pN) (pN/pN) pN (S\\N)\n",
" --------------->\n",
" pN\n",
"----------------------->\n",
" N\n",
"------------------------------<\n",
" S\n",
"##########################################\n",
"2 found derivation for sentence: le méchant chat dort\n",
" le méchant chat dort\n",
" (N/pN) (pN/pN) pN (S\\N)\n",
" --------------->\n",
" pN\n",
"----------------------->\n",
" N\n",
"------------------------------<\n",
" S\n",
" le méchant chat dort\n",
" (N/pN) (pN/pN) pN (S\\N)\n",
"----------------->B\n",
" (N/pN)\n",
"----------------------->\n",
" N\n",
"------------------------------<\n",
" S\n",
"==> 2 0\n",
"==> 2 1\n",
"==> 2 2\n",
"==> 3 0\n",
"==> 3 1\n",
"==> 4 0\n",
"Edge count: 1\n",
"Best derivation tree has weight 0.24957917865181148\n",
" le méchant chat dort\n",
" (N/pN) (pN/pN) pN (S\\N)\n",
" --------------->\n",
" pN\n",
"----------------------->\n",
" N\n",
"------------------------------<\n",
" S\n",
"##########################################\n",
"2 found derivation for sentence: le méchant chat dort\n",
" le méchant chat dort\n",
" (N/pN) (pN/pN) pN (S\\N)\n",
" --------------->\n",
" pN\n",
"----------------------->\n",
" N\n",
"------------------------------<\n",
" S\n",
" le méchant chat dort\n",
" (N/pN) (pN/pN) pN (S\\N)\n",
"----------------->B\n",
" (N/pN)\n",
"----------------------->\n",
" N\n",
"------------------------------<\n",
" S\n",
"==> 2 0\n",
"==> 2 1\n",
"==> 2 2\n",
"==> 3 0\n",
"==> 3 1\n",
"==> 4 0\n",
"Edge count: 1\n",
"Best derivation tree has weight 0.24957917865181148\n",
" le méchant chat dort\n",
" (N/pN) (pN/pN) pN (S\\N)\n",
" --------------->\n",
" pN\n",
"----------------------->\n",
" N\n",
"------------------------------<\n",
" S\n",
"##########################################\n",
"2 found derivation for sentence: le méchant chat dort\n",
" le méchant chat dort\n",
" (N/pN) (pN/pN) pN (S\\N)\n",
" --------------->\n",
" pN\n",
"----------------------->\n",
" N\n",
"------------------------------<\n",
" S\n",
" le méchant chat dort\n",
" (N/pN) (pN/pN) pN (S\\N)\n",
"----------------->B\n",
" (N/pN)\n",
"----------------------->\n",
" N\n",
"------------------------------<\n",
" S\n",
"==> 2 0\n",
"==> 2 1\n",
"==> 2 2\n",
"==> 3 0\n",
"==> 3 1\n",
"==> 4 0\n",
"Edge count: 1\n",
"Best derivation tree has weight 0.24957917865181148\n",
" le méchant chat dort\n",
" (N/pN) (pN/pN) pN (S\\N)\n",
" --------------->\n",
" pN\n",
"----------------------->\n",
" N\n",
"------------------------------<\n",
" S\n",
"##########################################\n",
"2 found derivation for sentence: le méchant chat dort\n",
" le méchant chat dort\n",
" (N/pN) (pN/pN) pN (S\\N)\n",
" --------------->\n",
" pN\n",
"----------------------->\n",
" N\n",
"------------------------------<\n",
" S\n",
" le méchant chat dort\n",
" (N/pN) (pN/pN) pN (S\\N)\n",
"----------------->B\n",
" (N/pN)\n",
"----------------------->\n",
" N\n",
"------------------------------<\n",
" S\n",
"==> 2 0\n",
"==> 2 1\n",
"==> 2 2\n",
"==> 3 0\n",
"==> 3 1\n",
"==> 4 0\n",
"Edge count: 1\n",
"Best derivation tree has weight 0.24957917865181148\n",
" le méchant chat dort\n",
" (N/pN) (pN/pN) pN (S\\N)\n",
" --------------->\n",
" pN\n",
"----------------------->\n",
" N\n",
"------------------------------<\n",
" S\n",
"##########################################\n",
"2 found derivation for sentence: le méchant chat dort\n",
" le méchant chat dort\n",
" (N/pN) (pN/pN) pN (S\\N)\n",
" --------------->\n",
" pN\n",
"----------------------->\n",
" N\n",
"------------------------------<\n",
" S\n",
" le méchant chat dort\n",
" (N/pN) (pN/pN) pN (S\\N)\n",
"----------------->B\n",
" (N/pN)\n",
"----------------------->\n",
" N\n",
"------------------------------<\n",
" S\n",
"==> 2 0\n",
"==> 2 1\n",
"==> 2 2\n",
"==> 3 0\n",
"==> 3 1\n",
"==> 4 0\n",
"Edge count: 1\n",
"Best derivation tree has weight 0.24957917865181148\n",
" le méchant chat dort\n",
" (N/pN) (pN/pN) pN (S\\N)\n",
" --------------->\n",
" pN\n",
"----------------------->\n",
" N\n",
"------------------------------<\n",
" S\n",
"##########################################\n",
"2 found derivation for sentence: le méchant chat dort\n",
" le méchant chat dort\n",
" (N/pN) (pN/pN) pN (S\\N)\n",
" --------------->\n",
" pN\n",
"----------------------->\n",
" N\n",
"------------------------------<\n",
" S\n",
" le méchant chat dort\n",
" (N/pN) (pN/pN) pN (S\\N)\n",
"----------------->B\n",
" (N/pN)\n",
"----------------------->\n",
" N\n",
"------------------------------<\n",
" S\n",
"==> 2 0\n",
"==> 2 1\n",
"==> 2 2\n",
"==> 3 0\n",
"==> 3 1\n",
"==> 4 0\n",
"Edge count: 1\n",
"Best derivation tree has weight 0.24957917865181148\n",
" le méchant chat dort\n",
" (N/pN) (pN/pN) pN (S\\N)\n",
" --------------->\n",
" pN\n",
"----------------------->\n",
" N\n",
"------------------------------<\n",
" S\n",
"##########################################\n",
"2 found derivation for sentence: le méchant chat dort\n",
" le méchant chat dort\n",
" (N/pN) (pN/pN) pN (S\\N)\n",
" --------------->\n",
" pN\n",
"----------------------->\n",
" N\n",
"------------------------------<\n",
" S\n",
" le méchant chat dort\n",
" (N/pN) (pN/pN) pN (S\\N)\n",
"----------------->B\n",
" (N/pN)\n",
"----------------------->\n",
" N\n",
"------------------------------<\n",
" S\n",
"==> 2 0\n",
"==> 2 1\n",
"==> 2 2\n",
"==> 3 0\n",
"==> 3 1\n",
"==> 4 0\n",
"Edge count: 1\n",
"Best derivation tree has weight 0.24957917865181148\n",
" le méchant chat dort\n",
" (N/pN) (pN/pN) pN (S\\N)\n",
" --------------->\n",
" pN\n",
"----------------------->\n",
" N\n",
"------------------------------<\n",
" S\n",
"##########################################\n",
"2 found derivation for sentence: le méchant chat dort\n",
" le méchant chat dort\n",
" (N/pN) (pN/pN) pN (S\\N)\n",
" --------------->\n",
" pN\n",
"----------------------->\n",
" N\n",
"------------------------------<\n",
" S\n",
" le méchant chat dort\n",
" (N/pN) (pN/pN) pN (S\\N)\n",
"----------------->B\n",
" (N/pN)\n",
"----------------------->\n",
" N\n",
"------------------------------<\n",
" S\n",
"==> 2 0\n",
"==> 2 1\n",
"==> 2 2\n",
"==> 3 0\n",
"==> 3 1\n",
"==> 4 0\n",
"Edge count: 1\n",
"Best derivation tree has weight 0.24957917865181148\n",
" le méchant chat dort\n",
" (N/pN) (pN/pN) pN (S\\N)\n",
" --------------->\n",
" pN\n",
"----------------------->\n",
" N\n",
"------------------------------<\n",
" S\n",
"##########################################\n",
"2 found derivation for sentence: le méchant chat dort\n",
" le méchant chat dort\n",
" (N/pN) (pN/pN) pN (S\\N)\n",
" --------------->\n",
" pN\n",
"----------------------->\n",
" N\n",
"------------------------------<\n",
" S\n",
" le méchant chat dort\n",
" (N/pN) (pN/pN) pN (S\\N)\n",
"----------------->B\n",
" (N/pN)\n",
"----------------------->\n",
" N\n",
"------------------------------<\n",
" S\n",
"==> 2 0\n",
"==> 2 1\n",
"==> 2 2\n",
"==> 3 0\n",
"==> 3 1\n",
"==> 4 0\n",
"Edge count: 1\n",
"Best derivation tree has weight 0.24957917865181148\n",
" le méchant chat dort\n",
" (N/pN) (pN/pN) pN (S\\N)\n",
" --------------->\n",
" pN\n",
"----------------------->\n",
" N\n",
"------------------------------<\n",
" S\n",
"##########################################\n",
"2 found derivation for sentence: le méchant chat dort\n",
" le méchant chat dort\n",
" (N/pN) (pN/pN) pN (S\\N)\n",
" --------------->\n",
" pN\n",
"----------------------->\n",
" N\n",
"------------------------------<\n",
" S\n",
" le méchant chat dort\n",
" (N/pN) (pN/pN) pN (S\\N)\n",
"----------------->B\n",
" (N/pN)\n",
"----------------------->\n",
" N\n",
"------------------------------<\n",
" S\n",
"==> 2 0\n",
"==> 2 1\n",
"==> 2 2\n",
"==> 3 0\n",
"==> 3 1\n",
"==> 4 0\n",
"Edge count: 1\n",
"Best derivation tree has weight 0.24957917865181148\n",
" le méchant chat dort\n",
" (N/pN) (pN/pN) pN (S\\N)\n",
" --------------->\n",
" pN\n",
"----------------------->\n",
" N\n",
"------------------------------<\n",
" S\n",
"##########################################\n",
"2 found derivation for sentence: le méchant chat dort\n",
" le méchant chat dort\n",
" (N/pN) (pN/pN) pN (S\\N)\n",
" --------------->\n",
" pN\n",
"----------------------->\n",
" N\n",
"------------------------------<\n",
" S\n",
" le méchant chat dort\n",
" (N/pN) (pN/pN) pN (S\\N)\n",
"----------------->B\n",
" (N/pN)\n",
"----------------------->\n",
" N\n",
"------------------------------<\n",
" S\n",
"==> 2 0\n",
"==> 2 1\n",
"==> 2 2\n",
"==> 3 0\n",
"==> 3 1\n",
"==> 4 0\n",
"Edge count: 1\n",
"Best derivation tree has weight 0.24957917865181148\n",
" le méchant chat dort\n",
" (N/pN) (pN/pN) pN (S\\N)\n",
" --------------->\n",
" pN\n",
"----------------------->\n",
" N\n",
"------------------------------<\n",
" S\n",
"##########################################\n",
"2 found derivation for sentence: le méchant chat dort\n",
" le méchant chat dort\n",
" (N/pN) (pN/pN) pN (S\\N)\n",
" --------------->\n",
" pN\n",
"----------------------->\n",
" N\n",
"------------------------------<\n",
" S\n",
" le méchant chat dort\n",
" (N/pN) (pN/pN) pN (S\\N)\n",
"----------------->B\n",
" (N/pN)\n",
"----------------------->\n",
" N\n",
"------------------------------<\n",
" S\n",
"==> 2 0\n",
"==> 2 1\n",
"==> 2 2\n",
"==> 3 0\n",
"==> 3 1\n",
"==> 4 0\n",
"Edge count: 1\n",
"Best derivation tree has weight 0.24957917865181148\n",
" le méchant chat dort\n",
" (N/pN) (pN/pN) pN (S\\N)\n",
" --------------->\n",
" pN\n",
"----------------------->\n",
" N\n",
"------------------------------<\n",
" S\n",
"##########################################\n",
"2 found derivation for sentence: le méchant chat dort\n",
" le méchant chat dort\n",
" (N/pN) (pN/pN) pN (S\\N)\n",
" --------------->\n",
" pN\n",
"----------------------->\n",
" N\n",
"------------------------------<\n",
" S\n",
" le méchant chat dort\n",
" (N/pN) (pN/pN) pN (S\\N)\n",
"----------------->B\n",
" (N/pN)\n",
"----------------------->\n",
" N\n",
"------------------------------<\n",
" S\n",
"==> 2 0\n",
"==> 2 1\n",
"==> 2 2\n",
"==> 3 0\n",
"==> 3 1\n",
"==> 4 0\n",
"Edge count: 1\n",
"Best derivation tree has weight 0.24957917865181148\n",
" le méchant chat dort\n",
" (N/pN) (pN/pN) pN (S\\N)\n",
" --------------->\n",
" pN\n",
"----------------------->\n",
" N\n",
"------------------------------<\n",
" S\n",
"##########################################\n",
"2 found derivation for sentence: le méchant chat dort\n",
" le méchant chat dort\n",
" (N/pN) (pN/pN) pN (S\\N)\n",
" --------------->\n",
" pN\n",
"----------------------->\n",
" N\n",
"------------------------------<\n",
" S\n",
" le méchant chat dort\n",
" (N/pN) (pN/pN) pN (S\\N)\n",
"----------------->B\n",
" (N/pN)\n",
"----------------------->\n",
" N\n",
"------------------------------<\n",
" S\n",
"==> 2 0\n",
"==> 2 1\n",
"==> 2 2\n",
"==> 3 0\n",
"==> 3 1\n",
"==> 4 0\n",
"Edge count: 1\n",
"Best derivation tree has weight 0.24957917865181148\n",
" le méchant chat dort\n",
" (N/pN) (pN/pN) pN (S\\N)\n",
" --------------->\n",
" pN\n",
"----------------------->\n",
" N\n",
"------------------------------<\n",
" S\n",
"##########################################\n",
"2 found derivation for sentence: le méchant chat dort\n",
" le méchant chat dort\n",
" (N/pN) (pN/pN) pN (S\\N)\n",
" --------------->\n",
" pN\n",
"----------------------->\n",
" N\n",
"------------------------------<\n",
" S\n",
" le méchant chat dort\n",
" (N/pN) (pN/pN) pN (S\\N)\n",
"----------------->B\n",
" (N/pN)\n",
"----------------------->\n",
" N\n",
"------------------------------<\n",
" S\n",
"==> 2 0\n",
"==> 2 1\n",
"==> 2 2\n",
"==> 3 0\n",
"==> 3 1\n",
"==> 4 0\n",
"Edge count: 1\n",
"Best derivation tree has weight 0.24957917865181148\n",
" le méchant chat dort\n",
" (N/pN) (pN/pN) pN (S\\N)\n",
" --------------->\n",
" pN\n",
"----------------------->\n",
" N\n",
"------------------------------<\n",
" S\n",
"##########################################\n",
"2 found derivation for sentence: le méchant chat dort\n",
" le méchant chat dort\n",
" (N/pN) (pN/pN) pN (S\\N)\n",
" --------------->\n",
" pN\n",
"----------------------->\n",
" N\n",
"------------------------------<\n",
" S\n",
" le méchant chat dort\n",
" (N/pN) (pN/pN) pN (S\\N)\n",
"----------------->B\n",
" (N/pN)\n",
"----------------------->\n",
" N\n",
"------------------------------<\n",
" S\n",
"==> 2 0\n",
"==> 2 1\n",
"==> 2 2\n",
"==> 3 0\n",
"==> 3 1\n",
"==> 4 0\n",
"Edge count: 1\n",
"Best derivation tree has weight 0.24957917865181148\n",
" le méchant chat dort\n",
" (N/pN) (pN/pN) pN (S\\N)\n",
" --------------->\n",
" pN\n",
"----------------------->\n",
" N\n",
"------------------------------<\n",
" S\n",
"##########################################\n",
"2 found derivation for sentence: le méchant chat dort\n",
" le méchant chat dort\n",
" (N/pN) (pN/pN) pN (S\\N)\n",
" --------------->\n",
" pN\n",
"----------------------->\n",
" N\n",
"------------------------------<\n",
" S\n",
" le méchant chat dort\n",
" (N/pN) (pN/pN) pN (S\\N)\n",
"----------------->B\n",
" (N/pN)\n",
"----------------------->\n",
" N\n",
"------------------------------<\n",
" S\n",
"==> 2 0\n",
"==> 2 1\n",
"==> 2 2\n",
"==> 3 0\n",
"==> 3 1\n",
"==> 4 0\n",
"Edge count: 1\n",
"Best derivation tree has weight 0.24957917865181148\n",
" le méchant chat dort\n",
" (N/pN) (pN/pN) pN (S\\N)\n",
" --------------->\n",
" pN\n",
"----------------------->\n",
" N\n",
"------------------------------<\n",
" S\n",
"##########################################\n",
"2 found derivation for sentence: le méchant chat dort\n",
" le méchant chat dort\n",
" (N/pN) (pN/pN) pN (S\\N)\n",
" --------------->\n",
" pN\n",
"----------------------->\n",
" N\n",
"------------------------------<\n",
" S\n",
" le méchant chat dort\n",
" (N/pN) (pN/pN) pN (S\\N)\n",
"----------------->B\n",
" (N/pN)\n",
"----------------------->\n",
" N\n",
"------------------------------<\n",
" S\n",
"==> 2 0\n",
"==> 2 1\n",
"==> 2 2\n",
"==> 3 0\n",
"==> 3 1\n",
"==> 4 0\n",
"Edge count: 1\n",
"Best derivation tree has weight 0.24957917865181148\n",
" le méchant chat dort\n",
" (N/pN) (pN/pN) pN (S\\N)\n",
" --------------->\n",
" pN\n",
"----------------------->\n",
" N\n",
"------------------------------<\n",
" S\n",
"##########################################\n",
"2 found derivation for sentence: le méchant chat dort\n",
" le méchant chat dort\n",
" (N/pN) (pN/pN) pN (S\\N)\n",
" --------------->\n",
" pN\n",
"----------------------->\n",
" N\n",
"------------------------------<\n",
" S\n",
" le méchant chat dort\n",
" (N/pN) (pN/pN) pN (S\\N)\n",
"----------------->B\n",
" (N/pN)\n",
"----------------------->\n",
" N\n",
"------------------------------<\n",
" S\n",
"==> 2 0\n",
"==> 2 1\n",
"==> 2 2\n",
"==> 3 0\n",
"==> 3 1\n",
"==> 4 0\n",
"Edge count: 1\n",
"Best derivation tree has weight 0.24957917865181148\n",
" le méchant chat dort\n",
" (N/pN) (pN/pN) pN (S\\N)\n",
" --------------->\n",
" pN\n",
"----------------------->\n",
" N\n",
"------------------------------<\n",
" S\n",
"##########################################\n",
"2 found derivation for sentence: le méchant chat dort\n",
" le méchant chat dort\n",
" (N/pN) (pN/pN) pN (S\\N)\n",
" --------------->\n",
" pN\n",
"----------------------->\n",
" N\n",
"------------------------------<\n",
" S\n",
" le méchant chat dort\n",
" (N/pN) (pN/pN) pN (S\\N)\n",
"----------------->B\n",
" (N/pN)\n",
"----------------------->\n",
" N\n",
"------------------------------<\n",
" S\n",
"==> 2 0\n",
"==> 2 1\n",
"==> 2 2\n",
"==> 3 0\n",
"==> 3 1\n",
"==> 4 0\n",
"Edge count: 1\n",
"Best derivation tree has weight 0.24957917865181148\n",
" le méchant chat dort\n",
" (N/pN) (pN/pN) pN (S\\N)\n",
" --------------->\n",
" pN\n",
"----------------------->\n",
" N\n",
"------------------------------<\n",
" S\n",
"##########################################\n",
"2 found derivation for sentence: le méchant chat dort\n",
" le méchant chat dort\n",
" (N/pN) (pN/pN) pN (S\\N)\n",
" --------------->\n",
" pN\n",
"----------------------->\n",
" N\n",
"------------------------------<\n",
" S\n",
" le méchant chat dort\n",
" (N/pN) (pN/pN) pN (S\\N)\n",
"----------------->B\n",
" (N/pN)\n",
"----------------------->\n",
" N\n",
"------------------------------<\n",
" S\n",
"==> 2 0\n",
"==> 2 1\n",
"==> 2 2\n",
"==> 3 0\n",
"==> 3 1\n",
"==> 4 0\n",
"Edge count: 1\n",
"Best derivation tree has weight 0.24957917865181148\n",
" le méchant chat dort\n",
" (N/pN) (pN/pN) pN (S\\N)\n",
" --------------->\n",
" pN\n",
"----------------------->\n",
" N\n",
"------------------------------<\n",
" S\n",
"##########################################\n",
"2 found derivation for sentence: le méchant chat dort\n",
" le méchant chat dort\n",
" (N/pN) (pN/pN) pN (S\\N)\n",
" --------------->\n",
" pN\n",
"----------------------->\n",
" N\n",
"------------------------------<\n",
" S\n",
" le méchant chat dort\n",
" (N/pN) (pN/pN) pN (S\\N)\n",
"----------------->B\n",
" (N/pN)\n",
"----------------------->\n",
" N\n",
"------------------------------<\n",
" S\n",
"==> 2 0\n",
"==> 2 1\n",
"==> 2 2\n",
"==> 3 0\n",
"==> 3 1\n",
"==> 4 0\n",
"Edge count: 1\n",
"Best derivation tree has weight 0.24957917865181148\n",
" le méchant chat dort\n",
" (N/pN) (pN/pN) pN (S\\N)\n",
" --------------->\n",
" pN\n",
"----------------------->\n",
" N\n",
"------------------------------<\n",
" S\n",
"##########################################\n",
"2 found derivation for sentence: le méchant chat dort\n",
" le méchant chat dort\n",
" (N/pN) (pN/pN) pN (S\\N)\n",
" --------------->\n",
" pN\n",
"----------------------->\n",
" N\n",
"------------------------------<\n",
" S\n",
" le méchant chat dort\n",
" (N/pN) (pN/pN) pN (S\\N)\n",
"----------------->B\n",
" (N/pN)\n",
"----------------------->\n",
" N\n",
"------------------------------<\n",
" S\n",
"==> 2 0\n",
"==> 2 1\n",
"==> 2 2\n",
"==> 3 0\n",
"==> 3 1\n",
"==> 4 0\n",
"Edge count: 1\n",
"Best derivation tree has weight 0.24957917865181148\n",
" le méchant chat dort\n",
" (N/pN) (pN/pN) pN (S\\N)\n",
" --------------->\n",
" pN\n",
"----------------------->\n",
" N\n",
"------------------------------<\n",
" S\n",
"##########################################\n",
"2 found derivation for sentence: le méchant chat dort\n",
" le méchant chat dort\n",
" (N/pN) (pN/pN) pN (S\\N)\n",
" --------------->\n",
" pN\n",
"----------------------->\n",
" N\n",
"------------------------------<\n",
" S\n",
" le méchant chat dort\n",
" (N/pN) (pN/pN) pN (S\\N)\n",
"----------------->B\n",
" (N/pN)\n",
"----------------------->\n",
" N\n",
"------------------------------<\n",
" S\n",
"==> 2 0\n",
"==> 2 1\n",
"==> 2 2\n",
"==> 3 0\n",
"==> 3 1\n",
"==> 4 0\n",
"Edge count: 1\n",
"Best derivation tree has weight 0.24957917865181148\n",
" le méchant chat dort\n",
" (N/pN) (pN/pN) pN (S\\N)\n",
" --------------->\n",
" pN\n",
"----------------------->\n",
" N\n",
"------------------------------<\n",
" S\n",
"##########################################\n",
"2 found derivation for sentence: le méchant chat dort\n",
" le méchant chat dort\n",
" (N/pN) (pN/pN) pN (S\\N)\n",
" --------------->\n",
" pN\n",
"----------------------->\n",
" N\n",
"------------------------------<\n",
" S\n",
" le méchant chat dort\n",
" (N/pN) (pN/pN) pN (S\\N)\n",
"----------------->B\n",
" (N/pN)\n",
"----------------------->\n",
" N\n",
"------------------------------<\n",
" S\n",
"==> 2 0\n",
"==> 2 1\n",
"==> 2 2\n",
"==> 3 0\n",
"==> 3 1\n",
"==> 4 0\n",
"Edge count: 1\n",
"Best derivation tree has weight 0.24957917865181148\n",
" le méchant chat dort\n",
" (N/pN) (pN/pN) pN (S\\N)\n",
" --------------->\n",
" pN\n",
"----------------------->\n",
" N\n",
"------------------------------<\n",
" S\n",
"##########################################\n",
"2 found derivation for sentence: le méchant chat dort\n",
" le méchant chat dort\n",
" (N/pN) (pN/pN) pN (S\\N)\n",
" --------------->\n",
" pN\n",
"----------------------->\n",
" N\n",
"------------------------------<\n",
" S\n",
" le méchant chat dort\n",
" (N/pN) (pN/pN) pN (S\\N)\n",
"----------------->B\n",
" (N/pN)\n",
"----------------------->\n",
" N\n",
"------------------------------<\n",
" S\n",
"==> 2 0\n",
"==> 2 1\n",
"==> 2 2\n",
"==> 3 0\n",
"==> 3 1\n",
"==> 4 0\n",
"Edge count: 1\n",
"Best derivation tree has weight 0.24957917865181148\n",
" le méchant chat dort\n",
" (N/pN) (pN/pN) pN (S\\N)\n",
" --------------->\n",
" pN\n",
"----------------------->\n",
" N\n",
"------------------------------<\n",
" S\n",
"##########################################\n",
"2 found derivation for sentence: le méchant chat dort\n",
" le méchant chat dort\n",
" (N/pN) (pN/pN) pN (S\\N)\n",
" --------------->\n",
" pN\n",
"----------------------->\n",
" N\n",
"------------------------------<\n",
" S\n",
" le méchant chat dort\n",
" (N/pN) (pN/pN) pN (S\\N)\n",
"----------------->B\n",
" (N/pN)\n",
"----------------------->\n",
" N\n",
"------------------------------<\n",
" S\n",
"==> 2 0\n",
"==> 2 1\n",
"==> 2 2\n",
"==> 3 0\n",
"==> 3 1\n",
"==> 4 0\n",
"Edge count: 1\n",
"Best derivation tree has weight 0.24957917865181148\n",
" le méchant chat dort\n",
" (N/pN) (pN/pN) pN (S\\N)\n",
" --------------->\n",
" pN\n",
"----------------------->\n",
" N\n",
"------------------------------<\n",
" S\n",
"##########################################\n",
"2 found derivation for sentence: le méchant chat dort\n",
" le méchant chat dort\n",
" (N/pN) (pN/pN) pN (S\\N)\n",
" --------------->\n",
" pN\n",
"----------------------->\n",
" N\n",
"------------------------------<\n",
" S\n",
" le méchant chat dort\n",
" (N/pN) (pN/pN) pN (S\\N)\n",
"----------------->B\n",
" (N/pN)\n",
"----------------------->\n",
" N\n",
"------------------------------<\n",
" S\n",
"==> 2 0\n",
"==> 2 1\n",
"==> 2 2\n",
"==> 3 0\n",
"==> 3 1\n",
"==> 4 0\n",
"Edge count: 1\n",
"Best derivation tree has weight 0.24957917865181148\n",
" le méchant chat dort\n",
" (N/pN) (pN/pN) pN (S\\N)\n",
" --------------->\n",
" pN\n",
"----------------------->\n",
" N\n",
"------------------------------<\n",
" S\n",
"##########################################\n",
"2 found derivation for sentence: le méchant chat dort\n",
" le méchant chat dort\n",
" (N/pN) (pN/pN) pN (S\\N)\n",
" --------------->\n",
" pN\n",
"----------------------->\n",
" N\n",
"------------------------------<\n",
" S\n",
" le méchant chat dort\n",
" (N/pN) (pN/pN) pN (S\\N)\n",
"----------------->B\n",
" (N/pN)\n",
"----------------------->\n",
" N\n",
"------------------------------<\n",
" S\n",
"==> 2 0\n",
"==> 2 1\n",
"==> 2 2\n",
"==> 3 0\n",
"==> 3 1\n",
"==> 4 0\n",
"Edge count: 1\n",
"Best derivation tree has weight 0.24957917865181148\n",
" le méchant chat dort\n",
" (N/pN) (pN/pN) pN (S\\N)\n",
" --------------->\n",
" pN\n",
"----------------------->\n",
" N\n",
"------------------------------<\n",
" S\n",
"##########################################\n",
"2 found derivation for sentence: le méchant chat dort\n",
" le méchant chat dort\n",
" (N/pN) (pN/pN) pN (S\\N)\n",
" --------------->\n",
" pN\n",
"----------------------->\n",
" N\n",
"------------------------------<\n",
" S\n",
" le méchant chat dort\n",
" (N/pN) (pN/pN) pN (S\\N)\n",
"----------------->B\n",
" (N/pN)\n",
"----------------------->\n",
" N\n",
"------------------------------<\n",
" S\n",
"==> 2 0\n",
"==> 2 1\n",
"==> 2 2\n",
"==> 3 0\n",
"==> 3 1\n",
"==> 4 0\n",
"Edge count: 1\n",
"Best derivation tree has weight 0.24957917865181148\n",
" le méchant chat dort\n",
" (N/pN) (pN/pN) pN (S\\N)\n",
" --------------->\n",
" pN\n",
"----------------------->\n",
" N\n",
"------------------------------<\n",
" S\n",
"##########################################\n",
"2 found derivation for sentence: le méchant chat dort\n",
" le méchant chat dort\n",
" (N/pN) (pN/pN) pN (S\\N)\n",
" --------------->\n",
" pN\n",
"----------------------->\n",
" N\n",
"------------------------------<\n",
" S\n",
" le méchant chat dort\n",
" (N/pN) (pN/pN) pN (S\\N)\n",
"----------------->B\n",
" (N/pN)\n",
"----------------------->\n",
" N\n",
"------------------------------<\n",
" S\n",
"==> 2 0\n",
"==> 2 1\n",
"==> 2 2\n",
"==> 3 0\n",
"==> 3 1\n",
"==> 4 0\n",
"Edge count: 1\n",
"Best derivation tree has weight 0.24957917865181148\n",
" le méchant chat dort\n",
" (N/pN) (pN/pN) pN (S\\N)\n",
" --------------->\n",
" pN\n",
"----------------------->\n",
" N\n",
"------------------------------<\n",
" S\n",
"##########################################\n"
]
}
],
"source": [
"# On lit les phrases dans le fichier\n",
"with open('phrases.txt') as f:\n",
" lines = f.readlines()\n",
"\n",
" # On ajoute des phrases de test\n",
" lines.append(\"le chat et la souris dorment\")\n",
" \n",
" for phrase in lines:\n",
" # On met tout en minuscule\n",
" phrase = phrase.lower().strip()\n",
" phrase = \"le méchant chat dort\"\n",
" \n",
" # On compte les arbres de dérivation trouvés\n",
" i = len(list(parser.parse(phrase.split())))\n",
" print(i, \"found derivation for sentence:\",phrase)\n",
" g = parser.parse(phrase.split())\n",
" for t in g:\n",
" chart.printCCGDerivation(t)\n",
" \n",
" # On affiche la dérivation la meilleure pour l'arbre\n",
" if (i != 0):\n",
" t,d = bestTree(phrase.split(), lex, rulesC)\n",
" print(\"Best derivation tree has weight\",d)\n",
" chart.printCCGDerivation(t)\n",
" \n",
" print(\"#\"*42)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"print(lex)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "venv",
"language": "python",
"name": "venv"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.2"
}
},
"nbformat": 4,
"nbformat_minor": 2
}