7272 "You are a top-tier algorithm designed for extracting information in structured "
7373 "formats to build a knowledge graph.\n "
7474 "Try to capture as much information from the text as possible without "
75- "sacrifing accuracy. Do not add any information that is not explicitly "
76- "mentioned in the text\n "
75+ "sacrificing accuracy. Do not add any information that is not explicitly "
76+ "mentioned in the text. \n "
7777 "- **Nodes** represent entities and concepts.\n "
7878 "- The aim is to achieve simplicity and clarity in the knowledge graph, making it\n "
7979 "accessible for a vast audience.\n "
8282 "Ensure you use basic or elementary types for node labels.\n "
8383 "- For example, when you identify an entity representing a person, "
8484 "always label it as **'person'**. Avoid using more specific terms "
85- "like 'mathematician' or 'scientist'"
86- " - **Node IDs**: Never utilize integers as node IDs. Node IDs should be "
85+ "like 'mathematician' or 'scientist'. "
86+ "- **Node IDs**: Never utilize integers as node IDs. Node IDs should be "
8787 "names or human-readable identifiers found in the text.\n "
8888 "- **Relationships** represent connections between entities or concepts.\n "
8989 "Ensure consistency and generality in relationship types when constructing "
@@ -138,8 +138,8 @@ def _get_additional_info(input_type: str) -> str:
138138 elif input_type == "relationship" :
139139 return (
140140 "Instead of using specific and momentary types such as "
141- "'BECAME_PROFESSOR', use more general and timeless relationship types like "
142- "'PROFESSOR'. However, do not sacrifice any accuracy for generality"
141+ "'BECAME_PROFESSOR', use more general and timeless relationship types "
142+ "like 'PROFESSOR'. However, do not sacrifice any accuracy for generality"
143143 )
144144 elif input_type == "property" :
145145 return ""
@@ -280,10 +280,32 @@ def create_simple_model(
280280 rel_types : Optional [List [str ]] = None ,
281281 node_properties : Union [bool , List [str ]] = False ,
282282 llm_type : Optional [str ] = None ,
283+ relationship_properties : Union [bool , List [str ]] = False ,
283284) -> Type [_Graph ]:
284285 """
285- Simple model allows to limit node and/or relationship types.
286- Doesn't have any node or relationship properties.
286+ Create a simple graph model with optional constraints on node
287+ and relationship types.
288+
289+ Args:
290+ node_labels (Optional[List[str]]): Specifies the allowed node types.
291+ Defaults to None, allowing all node types.
292+ rel_types (Optional[List[str]]): Specifies the allowed relationship types.
293+ Defaults to None, allowing all relationship types.
294+ node_properties (Union[bool, List[str]]): Specifies if node properties should
295+ be included. If a list is provided, only properties with keys in the list
296+ will be included. If True, all properties are included. Defaults to False.
297+ relationship_properties (Union[bool, List[str]]): Specifies if relationship
298+ properties should be included. If a list is provided, only properties with
299+ keys in the list will be included. If True, all properties are included.
300+ Defaults to False.
301+ llm_type (Optional[str]): The type of the language model. Defaults to None.
302+ Only openai supports enum param: openai-chat.
303+
304+ Returns:
305+ Type[_Graph]: A graph model with the specified constraints.
306+
307+ Raises:
308+ ValueError: If 'id' is included in the node or relationship properties list.
287309 """
288310
289311 node_fields : Dict [str , Tuple [Any , Any ]] = {
@@ -325,39 +347,80 @@ class Property(BaseModel):
325347 )
326348 SimpleNode = create_model ("SimpleNode" , ** node_fields ) # type: ignore
327349
328- class SimpleRelationship (BaseModel ):
329- """Represents a directed relationship between two nodes in a graph."""
330-
331- source_node_id : str = Field (
332- description = "Name or human-readable unique identifier of source node"
333- )
334- source_node_type : str = optional_enum_field (
335- node_labels ,
336- description = "The type or label of the source node." ,
337- input_type = "node" ,
338- llm_type = llm_type ,
339- )
340- target_node_id : str = Field (
341- description = "Name or human-readable unique identifier of target node"
342- )
343- target_node_type : str = optional_enum_field (
344- node_labels ,
345- description = "The type or label of the target node." ,
346- input_type = "node" ,
347- llm_type = llm_type ,
350+ relationship_fields : Dict [str , Tuple [Any , Any ]] = {
351+ "source_node_id" : (
352+ str ,
353+ Field (
354+ ...,
355+ description = "Name or human-readable unique identifier of source node" ,
356+ ),
357+ ),
358+ "source_node_type" : (
359+ str ,
360+ optional_enum_field (
361+ node_labels ,
362+ description = "The type or label of the source node." ,
363+ input_type = "node" ,
364+ ),
365+ ),
366+ "target_node_id" : (
367+ str ,
368+ Field (
369+ ...,
370+ description = "Name or human-readable unique identifier of target node" ,
371+ ),
372+ ),
373+ "target_node_type" : (
374+ str ,
375+ optional_enum_field (
376+ node_labels ,
377+ description = "The type or label of the target node." ,
378+ input_type = "node" ,
379+ ),
380+ ),
381+ "type" : (
382+ str ,
383+ optional_enum_field (
384+ rel_types ,
385+ description = "The type of the relationship." ,
386+ input_type = "relationship" ,
387+ ),
388+ ),
389+ }
390+ if relationship_properties :
391+ if (
392+ isinstance (relationship_properties , list )
393+ and "id" in relationship_properties
394+ ):
395+ raise ValueError (
396+ "The relationship property 'id' is reserved and cannot be used."
397+ )
398+ # Map True to empty array
399+ relationship_properties_mapped : List [str ] = (
400+ [] if relationship_properties is True else relationship_properties
348401 )
349- type : str = optional_enum_field (
350- rel_types ,
351- description = "The type of the relationship." ,
352- input_type = "relationship" ,
353- llm_type = llm_type ,
402+
403+ class RelationshipProperty (BaseModel ):
404+ """A single property consisting of key and value"""
405+
406+ key : str = optional_enum_field (
407+ relationship_properties_mapped ,
408+ description = "Property key." ,
409+ input_type = "property" ,
410+ )
411+ value : str = Field (..., description = "value" )
412+
413+ relationship_fields ["properties" ] = (
414+ Optional [List [RelationshipProperty ]],
415+ Field (None , description = "List of relationship properties" ),
354416 )
417+ SimpleRelationship = create_model ("SimpleRelationship" , ** relationship_fields ) # type: ignore
355418
356419 class DynamicGraph (_Graph ):
357420 """Represents a graph document consisting of nodes and relationships."""
358421
359422 nodes : Optional [List [SimpleNode ]] = Field (description = "List of nodes" ) # type: ignore
360- relationships : Optional [List [SimpleRelationship ]] = Field (
423+ relationships : Optional [List [SimpleRelationship ]] = Field ( # type: ignore
361424 description = "List of relationships"
362425 )
363426
@@ -377,7 +440,13 @@ def map_to_base_relationship(rel: Any) -> Relationship:
377440 """Map the SimpleRelationship to the base Relationship."""
378441 source = Node (id = rel .source_node_id , type = rel .source_node_type )
379442 target = Node (id = rel .target_node_id , type = rel .target_node_type )
380- return Relationship (source = source , target = target , type = rel .type )
443+ properties = {}
444+ if hasattr (rel , "properties" ) and rel .properties :
445+ for p in rel .properties :
446+ properties [format_property_key (p .key )] = p .value
447+ return Relationship (
448+ source = source , target = target , type = rel .type , properties = properties
449+ )
381450
382451
383452def _parse_and_clean_json (
@@ -387,10 +456,15 @@ def _parse_and_clean_json(
387456 for node in argument_json ["nodes" ]:
388457 if not node .get ("id" ): # Id is mandatory, skip this node
389458 continue
459+ node_properties = {}
460+ if "properties" in node and node ["properties" ]:
461+ for p in node ["properties" ]:
462+ node_properties [format_property_key (p ["key" ])] = p ["value" ]
390463 nodes .append (
391464 Node (
392465 id = node ["id" ],
393466 type = node .get ("type" ),
467+ properties = node_properties ,
394468 )
395469 )
396470 relationships = []
@@ -423,6 +497,11 @@ def _parse_and_clean_json(
423497 except IndexError :
424498 rel ["target_node_type" ] = None
425499
500+ rel_properties = {}
501+ if "properties" in rel and rel ["properties" ]:
502+ for p in rel ["properties" ]:
503+ rel_properties [format_property_key (p ["key" ])] = p ["value" ]
504+
426505 source_node = Node (
427506 id = rel ["source_node_id" ],
428507 type = rel ["source_node_type" ],
@@ -436,6 +515,7 @@ def _parse_and_clean_json(
436515 source = source_node ,
437516 target = target_node ,
438517 type = rel ["type" ],
518+ properties = rel_properties ,
439519 )
440520 )
441521 return nodes , relationships
@@ -458,6 +538,7 @@ def _format_relationships(rels: List[Relationship]) -> List[Relationship]:
458538 source = _format_nodes ([el .source ])[0 ],
459539 target = _format_nodes ([el .target ])[0 ],
460540 type = el .type .replace (" " , "_" ).upper (),
541+ properties = el .properties ,
461542 )
462543 for el in rels
463544 ]
@@ -513,8 +594,8 @@ class LLMGraphTransformer:
513594 """Transform documents into graph-based documents using a LLM.
514595
515596 It allows specifying constraints on the types of nodes and relationships to include
516- in the output graph. The class doesn't support neither extract and node or
517- relationship properties
597+ in the output graph. The class supports extracting properties for both nodes and
598+ relationships.
518599
519600 Args:
520601 llm (BaseLanguageModel): An instance of a language model supporting structured
@@ -553,6 +634,7 @@ def __init__(
553634 prompt : Optional [ChatPromptTemplate ] = None ,
554635 strict_mode : bool = True ,
555636 node_properties : Union [bool , List [str ]] = False ,
637+ relationship_properties : Union [bool , List [str ]] = False ,
556638 ) -> None :
557639 self .allowed_nodes = allowed_nodes
558640 self .allowed_relationships = allowed_relationships
@@ -564,14 +646,14 @@ def __init__(
564646 except NotImplementedError :
565647 self ._function_call = False
566648 if not self ._function_call :
567- if node_properties :
649+ if node_properties or relationship_properties :
568650 raise ValueError (
569- "The 'node_properties' parameter cannot be used "
570- "in combination with a LLM that doesn't support "
651+ "The 'node_properties' and 'relationship_properties' parameters "
652+ "cannot be used in combination with a LLM that doesn't support "
571653 "native function calling."
572654 )
573655 try :
574- import json_repair
656+ import json_repair # type: ignore
575657
576658 self .json_repair = json_repair
577659 except ImportError :
@@ -590,7 +672,11 @@ def __init__(
590672 except AttributeError :
591673 llm_type = None
592674 schema = create_simple_model (
593- allowed_nodes , allowed_relationships , node_properties , llm_type
675+ allowed_nodes ,
676+ allowed_relationships ,
677+ node_properties ,
678+ llm_type ,
679+ relationship_properties ,
594680 )
595681 structured_llm = llm .with_structured_output (schema , include_raw = True )
596682 prompt = prompt or default_prompt
0 commit comments