Skip to main content

Merging Examples

This page provides practical examples of merging extracted data into knowledge graphs using the Graphora client library.

Basic Merging Operation

After transforming documents, you can merge the extracted data into a knowledge graph:
import os
from graphora import GraphoraClient

client = GraphoraClient(
    auth_token=os.environ["GRAPHORA_AUTH_TOKEN"],
)

# Use a previously completed transformation
session_id = "ont_123456789"  # Ontology ID
transform_id = "transform_123456789"

# Start the merging process
merge = client.start_merge(session_id, transform_id)
print(f"Merge started with ID: {merge.merge_id}")

state = client.get_merge_status(merge.merge_id)
print("Merge state:", state)

Monitoring Merge Progress

For large datasets, merging can take some time. Here’s how to monitor the progress:
import os
import time
from graphora import GraphoraClient, MergeState

client = GraphoraClient(
    auth_token=os.environ["GRAPHORA_AUTH_TOKEN"],
)

# Start a merge (assuming you have already completed a transformation)
session_id = "ont_123456789"  # Ontology ID
transform_id = "transform_123456789"
merge_response = client.start_merge(session_id, transform_id)
merge_id = merge_response.merge_id

# Monitor the merge progress
completed = False
while not completed:
    state = client.get_merge_status(merge_id)
    print(f"Merge state: {state}")

    if state in (MergeState.COMPLETED, MergeState.FAILED):
        completed = True
    else:
        time.sleep(5)

if state == MergeState.COMPLETED:
    graph = client.get_merged_graph(merge_id, transform_id)
    print(f"Retrieved graph with {len(graph.nodes)} nodes and {len(graph.edges)} edges")

Handling Merge Conflicts

When merging data, conflicts may arise. Here’s how to handle them:
import os
from graphora import GraphoraClient, ResolutionStrategy

client = GraphoraClient(
    auth_token=os.environ["GRAPHORA_AUTH_TOKEN"],
)

# Get conflicts for a merge that has detected conflicts
merge_id = "merge_123456789"
conflicts = client.get_conflicts(merge_id)

print(f"Found {len(conflicts)} conflicts")

# Resolve each conflict
for conflict in conflicts:
    print(f"Conflict ID: {conflict.id}")
    print(f"Staging props: {conflict.staging_node.properties}")
    print(f"Production props: {conflict.prod_node.properties}")

    client.resolve_conflict(
        merge_id,
        conflict.id,
        changed_props={},
        resolution=ResolutionStrategy.KEEP_STAGING,
        learning_comment="Prefer staging record",
    )

Getting Merge Statistics

You can get detailed statistics about a merge operation:
from graphora import GraphoraClient

# Initialize the client with user ID (required)
client = GraphoraClient(
    base_url="https://api.graphora.io",
    user_id="your-user-id",  # Required for all API calls
    api_key="your-api-key"
)

# Get merge statistics
merge_id = "merge_123456789"
stats = client.get_merge_statistics(merge_id)

print("Merge Statistics:")
print(f"Total entities processed: {stats.get('total_entities', 0)}")
print(f"Entities merged: {stats.get('merged_entities', 0)}")
print(f"New entities created: {stats.get('new_entities', 0)}")
print(f"Conflicts detected: {stats.get('conflicts_detected', 0)}")
print(f"Conflicts resolved: {stats.get('conflicts_resolved', 0)}")

Retrieving and Using the Merged Graph

Once a merge is complete, you can retrieve and use the graph data:
from graphora import GraphoraClient

# Initialize the client with user ID (required)
client = GraphoraClient(
    base_url="https://api.graphora.io",
    user_id="your-user-id",  # Required for all API calls
    api_key="your-api-key"
)

# Get the merged graph
merge_id = "merge_123456789"
transform_id = "transform_123456789"
graph_response = client.get_merged_graph(merge_id, transform_id)

# Print some statistics
print(f"Graph contains {len(graph_response.nodes)} nodes and {len(graph_response.edges)} edges")

# Examine nodes
for node in graph_response.nodes[:5]:  # First 5 nodes
    print(f"Node {node.id}: {node.labels} - {node.properties}")

# Examine edges
for edge in graph_response.edges[:5]:  # First 5 edges
    print(f"Edge {edge.id}: {edge.source} -> {edge.target} ({edge.type})")
These examples demonstrate the basic operations for merging data with the Graphora client library. For more detailed information about merging concepts and options, see the Merging Concepts page.