Skip to content

Commit 513e491

Browse files
istvanspaceistvanlorincz-codebaskaryan
authored
experimental: LLMGraphTransformer - added relationship properties. (#21856)
- **Description:** The generated relationships in the graph had no properties, but the Relationship class was properly defined with properties. This made it very difficult to transform conditional sentences into a graph. Adding properties to relationships can solve this issue elegantly. The changes expand on the existing LLMGraphTransformer implementation but add the possibility to define allowed relationship properties like this: LLMGraphTransformer(llm=llm, relationship_properties=["Condition", "Time"],) - **Issue:** no issue found - **Dependencies:** n/a - **Twitter handle:** @istvanspace -Quick Test ================================================================= from dotenv import load_dotenv import os from langchain_community.graphs import Neo4jGraph from langchain_experimental.graph_transformers import LLMGraphTransformer from langchain_openai import ChatOpenAI from langchain_core.prompts import ChatPromptTemplate from langchain_core.documents import Document load_dotenv() os.environ["NEO4J_URI"] = os.getenv("NEO4J_URI") os.environ["NEO4J_USERNAME"] = os.getenv("NEO4J_USERNAME") os.environ["NEO4J_PASSWORD"] = os.getenv("NEO4J_PASSWORD") graph = Neo4jGraph() llm = ChatOpenAI(temperature=0, model_name="gpt-4o") llm_transformer = LLMGraphTransformer(llm=llm) #text = "Harry potter likes pies, but only if it rains outside" text = "Jack has a dog named Max. Jack only walks Max if it is sunny outside." documents = [Document(page_content=text)] llm_transformer_props = LLMGraphTransformer( llm=llm, relationship_properties=["Condition"], ) graph_documents_props = llm_transformer_props.convert_to_graph_documents(documents) print(f"Nodes:{graph_documents_props[0].nodes}") print(f"Relationships:{graph_documents_props[0].relationships}") graph.add_graph_documents(graph_documents_props) --------- Co-authored-by: Istvan Lorincz <[email protected]> Co-authored-by: Bagatur <[email protected]>
1 parent 694ae87 commit 513e491

File tree

1 file changed

+128
-42
lines changed
  • libs/experimental/langchain_experimental/graph_transformers

1 file changed

+128
-42
lines changed

libs/experimental/langchain_experimental/graph_transformers/llm.py

Lines changed: 128 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -72,8 +72,8 @@
7272
"You are a top-tier algorithm designed for extracting information in structured "
7373
"formats to build a knowledge graph.\n"
7474
"Try to capture as much information from the text as possible without "
75-
"sacrifing accuracy. Do not add any information that is not explicitly "
76-
"mentioned in the text\n"
75+
"sacrificing accuracy. Do not add any information that is not explicitly "
76+
"mentioned in the text.\n"
7777
"- **Nodes** represent entities and concepts.\n"
7878
"- The aim is to achieve simplicity and clarity in the knowledge graph, making it\n"
7979
"accessible for a vast audience.\n"
@@ -82,8 +82,8 @@
8282
"Ensure you use basic or elementary types for node labels.\n"
8383
"- For example, when you identify an entity representing a person, "
8484
"always label it as **'person'**. Avoid using more specific terms "
85-
"like 'mathematician' or 'scientist'"
86-
" - **Node IDs**: Never utilize integers as node IDs. Node IDs should be "
85+
"like 'mathematician' or 'scientist'."
86+
"- **Node IDs**: Never utilize integers as node IDs. Node IDs should be "
8787
"names or human-readable identifiers found in the text.\n"
8888
"- **Relationships** represent connections between entities or concepts.\n"
8989
"Ensure consistency and generality in relationship types when constructing "
@@ -138,8 +138,8 @@ def _get_additional_info(input_type: str) -> str:
138138
elif input_type == "relationship":
139139
return (
140140
"Instead of using specific and momentary types such as "
141-
"'BECAME_PROFESSOR', use more general and timeless relationship types like "
142-
"'PROFESSOR'. However, do not sacrifice any accuracy for generality"
141+
"'BECAME_PROFESSOR', use more general and timeless relationship types "
142+
"like 'PROFESSOR'. However, do not sacrifice any accuracy for generality"
143143
)
144144
elif input_type == "property":
145145
return ""
@@ -280,10 +280,32 @@ def create_simple_model(
280280
rel_types: Optional[List[str]] = None,
281281
node_properties: Union[bool, List[str]] = False,
282282
llm_type: Optional[str] = None,
283+
relationship_properties: Union[bool, List[str]] = False,
283284
) -> Type[_Graph]:
284285
"""
285-
Simple model allows to limit node and/or relationship types.
286-
Doesn't have any node or relationship properties.
286+
Create a simple graph model with optional constraints on node
287+
and relationship types.
288+
289+
Args:
290+
node_labels (Optional[List[str]]): Specifies the allowed node types.
291+
Defaults to None, allowing all node types.
292+
rel_types (Optional[List[str]]): Specifies the allowed relationship types.
293+
Defaults to None, allowing all relationship types.
294+
node_properties (Union[bool, List[str]]): Specifies if node properties should
295+
be included. If a list is provided, only properties with keys in the list
296+
will be included. If True, all properties are included. Defaults to False.
297+
relationship_properties (Union[bool, List[str]]): Specifies if relationship
298+
properties should be included. If a list is provided, only properties with
299+
keys in the list will be included. If True, all properties are included.
300+
Defaults to False.
301+
llm_type (Optional[str]): The type of the language model. Defaults to None.
302+
Only openai supports enum param: openai-chat.
303+
304+
Returns:
305+
Type[_Graph]: A graph model with the specified constraints.
306+
307+
Raises:
308+
ValueError: If 'id' is included in the node or relationship properties list.
287309
"""
288310

289311
node_fields: Dict[str, Tuple[Any, Any]] = {
@@ -325,39 +347,80 @@ class Property(BaseModel):
325347
)
326348
SimpleNode = create_model("SimpleNode", **node_fields) # type: ignore
327349

328-
class SimpleRelationship(BaseModel):
329-
"""Represents a directed relationship between two nodes in a graph."""
330-
331-
source_node_id: str = Field(
332-
description="Name or human-readable unique identifier of source node"
333-
)
334-
source_node_type: str = optional_enum_field(
335-
node_labels,
336-
description="The type or label of the source node.",
337-
input_type="node",
338-
llm_type=llm_type,
339-
)
340-
target_node_id: str = Field(
341-
description="Name or human-readable unique identifier of target node"
342-
)
343-
target_node_type: str = optional_enum_field(
344-
node_labels,
345-
description="The type or label of the target node.",
346-
input_type="node",
347-
llm_type=llm_type,
350+
relationship_fields: Dict[str, Tuple[Any, Any]] = {
351+
"source_node_id": (
352+
str,
353+
Field(
354+
...,
355+
description="Name or human-readable unique identifier of source node",
356+
),
357+
),
358+
"source_node_type": (
359+
str,
360+
optional_enum_field(
361+
node_labels,
362+
description="The type or label of the source node.",
363+
input_type="node",
364+
),
365+
),
366+
"target_node_id": (
367+
str,
368+
Field(
369+
...,
370+
description="Name or human-readable unique identifier of target node",
371+
),
372+
),
373+
"target_node_type": (
374+
str,
375+
optional_enum_field(
376+
node_labels,
377+
description="The type or label of the target node.",
378+
input_type="node",
379+
),
380+
),
381+
"type": (
382+
str,
383+
optional_enum_field(
384+
rel_types,
385+
description="The type of the relationship.",
386+
input_type="relationship",
387+
),
388+
),
389+
}
390+
if relationship_properties:
391+
if (
392+
isinstance(relationship_properties, list)
393+
and "id" in relationship_properties
394+
):
395+
raise ValueError(
396+
"The relationship property 'id' is reserved and cannot be used."
397+
)
398+
# Map True to empty array
399+
relationship_properties_mapped: List[str] = (
400+
[] if relationship_properties is True else relationship_properties
348401
)
349-
type: str = optional_enum_field(
350-
rel_types,
351-
description="The type of the relationship.",
352-
input_type="relationship",
353-
llm_type=llm_type,
402+
403+
class RelationshipProperty(BaseModel):
404+
"""A single property consisting of key and value"""
405+
406+
key: str = optional_enum_field(
407+
relationship_properties_mapped,
408+
description="Property key.",
409+
input_type="property",
410+
)
411+
value: str = Field(..., description="value")
412+
413+
relationship_fields["properties"] = (
414+
Optional[List[RelationshipProperty]],
415+
Field(None, description="List of relationship properties"),
354416
)
417+
SimpleRelationship = create_model("SimpleRelationship", **relationship_fields) # type: ignore
355418

356419
class DynamicGraph(_Graph):
357420
"""Represents a graph document consisting of nodes and relationships."""
358421

359422
nodes: Optional[List[SimpleNode]] = Field(description="List of nodes") # type: ignore
360-
relationships: Optional[List[SimpleRelationship]] = Field(
423+
relationships: Optional[List[SimpleRelationship]] = Field( # type: ignore
361424
description="List of relationships"
362425
)
363426

@@ -377,7 +440,13 @@ def map_to_base_relationship(rel: Any) -> Relationship:
377440
"""Map the SimpleRelationship to the base Relationship."""
378441
source = Node(id=rel.source_node_id, type=rel.source_node_type)
379442
target = Node(id=rel.target_node_id, type=rel.target_node_type)
380-
return Relationship(source=source, target=target, type=rel.type)
443+
properties = {}
444+
if hasattr(rel, "properties") and rel.properties:
445+
for p in rel.properties:
446+
properties[format_property_key(p.key)] = p.value
447+
return Relationship(
448+
source=source, target=target, type=rel.type, properties=properties
449+
)
381450

382451

383452
def _parse_and_clean_json(
@@ -387,10 +456,15 @@ def _parse_and_clean_json(
387456
for node in argument_json["nodes"]:
388457
if not node.get("id"): # Id is mandatory, skip this node
389458
continue
459+
node_properties = {}
460+
if "properties" in node and node["properties"]:
461+
for p in node["properties"]:
462+
node_properties[format_property_key(p["key"])] = p["value"]
390463
nodes.append(
391464
Node(
392465
id=node["id"],
393466
type=node.get("type"),
467+
properties=node_properties,
394468
)
395469
)
396470
relationships = []
@@ -423,6 +497,11 @@ def _parse_and_clean_json(
423497
except IndexError:
424498
rel["target_node_type"] = None
425499

500+
rel_properties = {}
501+
if "properties" in rel and rel["properties"]:
502+
for p in rel["properties"]:
503+
rel_properties[format_property_key(p["key"])] = p["value"]
504+
426505
source_node = Node(
427506
id=rel["source_node_id"],
428507
type=rel["source_node_type"],
@@ -436,6 +515,7 @@ def _parse_and_clean_json(
436515
source=source_node,
437516
target=target_node,
438517
type=rel["type"],
518+
properties=rel_properties,
439519
)
440520
)
441521
return nodes, relationships
@@ -458,6 +538,7 @@ def _format_relationships(rels: List[Relationship]) -> List[Relationship]:
458538
source=_format_nodes([el.source])[0],
459539
target=_format_nodes([el.target])[0],
460540
type=el.type.replace(" ", "_").upper(),
541+
properties=el.properties,
461542
)
462543
for el in rels
463544
]
@@ -513,8 +594,8 @@ class LLMGraphTransformer:
513594
"""Transform documents into graph-based documents using a LLM.
514595
515596
It allows specifying constraints on the types of nodes and relationships to include
516-
in the output graph. The class doesn't support neither extract and node or
517-
relationship properties
597+
in the output graph. The class supports extracting properties for both nodes and
598+
relationships.
518599
519600
Args:
520601
llm (BaseLanguageModel): An instance of a language model supporting structured
@@ -553,6 +634,7 @@ def __init__(
553634
prompt: Optional[ChatPromptTemplate] = None,
554635
strict_mode: bool = True,
555636
node_properties: Union[bool, List[str]] = False,
637+
relationship_properties: Union[bool, List[str]] = False,
556638
) -> None:
557639
self.allowed_nodes = allowed_nodes
558640
self.allowed_relationships = allowed_relationships
@@ -564,14 +646,14 @@ def __init__(
564646
except NotImplementedError:
565647
self._function_call = False
566648
if not self._function_call:
567-
if node_properties:
649+
if node_properties or relationship_properties:
568650
raise ValueError(
569-
"The 'node_properties' parameter cannot be used "
570-
"in combination with a LLM that doesn't support "
651+
"The 'node_properties' and 'relationship_properties' parameters "
652+
"cannot be used in combination with a LLM that doesn't support "
571653
"native function calling."
572654
)
573655
try:
574-
import json_repair
656+
import json_repair # type: ignore
575657

576658
self.json_repair = json_repair
577659
except ImportError:
@@ -590,7 +672,11 @@ def __init__(
590672
except AttributeError:
591673
llm_type = None
592674
schema = create_simple_model(
593-
allowed_nodes, allowed_relationships, node_properties, llm_type
675+
allowed_nodes,
676+
allowed_relationships,
677+
node_properties,
678+
llm_type,
679+
relationship_properties,
594680
)
595681
structured_llm = llm.with_structured_output(schema, include_raw=True)
596682
prompt = prompt or default_prompt

0 commit comments

Comments
 (0)