56 lines
1.9 KiB
Python
56 lines
1.9 KiB
Python
import os
|
|
from dotenv import load_dotenv
|
|
from neo4j import GraphDatabase
|
|
|
|
# Load environment variables from .env file
|
|
load_dotenv()
|
|
|
|
# Get Neo4j connection info from environment variables
|
|
NEO4J_URI = os.getenv('NEO4J_URI')
|
|
NEO4J_USER = os.getenv('NEO4J_USER')
|
|
NEO4J_PASSWORD = os.getenv('NEO4J_PASSWORD')
|
|
|
|
# Function to connect to the Neo4j database and remove duplicate relationships while keeping one of each type
|
|
def remove_duplicate_relationships(uri, user, password):
|
|
driver = GraphDatabase.driver(uri, auth=(user, password))
|
|
|
|
with driver.session() as session:
|
|
# Cypher query to find nodes with duplicate relationships
|
|
identify_query = """
|
|
MATCH (n)-[r]->(m)
|
|
WITH type(r) AS relType, n AS startNode, m AS endNode, collect(id(r)) AS relIds
|
|
WHERE size(relIds) > 1
|
|
RETURN relType, startNode, endNode, relIds
|
|
ORDER BY size(relIds) DESC
|
|
"""
|
|
|
|
result = session.run(identify_query)
|
|
|
|
for record in result:
|
|
relationship_type = record['relType']
|
|
start_node = record['startNode']
|
|
end_node = record['endNode']
|
|
rel_ids = record['relIds']
|
|
|
|
# Keep only one relationship of each type and delete the rest
|
|
if len(rel_ids) > 1:
|
|
keep_rel_id = rel_ids[0]
|
|
delete_rel_ids = [rid for rid in rel_ids if rid != keep_rel_id]
|
|
|
|
# Delete extra relationships
|
|
for del_rel_id in delete_rel_ids:
|
|
session.run(
|
|
"MATCH ()-[r]->() WHERE id(r) = $relId DELETE r",
|
|
relId=del_rel_id
|
|
)
|
|
|
|
print(f"Deleted relationship with ID: {del_rel_id}")
|
|
|
|
print("Remaining Relationship ID:", keep_rel_id)
|
|
print("\n")
|
|
|
|
driver.close()
|
|
|
|
# Call the function with your connection info
|
|
remove_duplicate_relationships(NEO4J_URI, NEO4J_USER, NEO4J_PASSWORD)
|