add bill sponsors and dedupe script
This commit is contained in:
parent
c1e456aedc
commit
b012392fc9
55
add_bill_sponsors.py
Normal file
55
add_bill_sponsors.py
Normal file
@ -0,0 +1,55 @@
|
|||||||
|
import os
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
from neo4j import GraphDatabase
|
||||||
|
|
||||||
|
# Load environment variables from .env file
|
||||||
|
load_dotenv()
|
||||||
|
|
||||||
|
# Get Neo4j connection info from environment variables
|
||||||
|
NEO4J_URI = os.getenv('NEO4J_URI')
|
||||||
|
NEO4J_USER = os.getenv('NEO4J_USER')
|
||||||
|
NEO4J_PASSWORD = os.getenv('NEO4J_PASSWORD')
|
||||||
|
|
||||||
|
# Function to connect to the Neo4j database and fetch the "sponsors.0.bioguideId" property of all Bill nodes
|
||||||
|
def get_sponsors_bioguide_id(uri, user, password):
|
||||||
|
driver = GraphDatabase.driver(uri, auth=(user, password))
|
||||||
|
|
||||||
|
with driver.session() as session:
|
||||||
|
# Cypher query to get all Bill nodes and match Person node with the same bioguideId
|
||||||
|
query = """
|
||||||
|
MATCH (b:Bill)
|
||||||
|
WITH b, b.`sponsors.0.bioguideId` AS bioguideId
|
||||||
|
OPTIONAL MATCH (p:Person {bioguideId: bioguideId})
|
||||||
|
WHERE NOT EXISTS((b)-[:SPONSORED]->(p))
|
||||||
|
RETURN b.`sponsors.0.bioguideId` AS sponsorBioguideId, p
|
||||||
|
"""
|
||||||
|
|
||||||
|
result = session.run(query)
|
||||||
|
|
||||||
|
for record in result:
|
||||||
|
sponsor_bioguide_id = record['sponsorBioguideId']
|
||||||
|
matched_person = record['p']
|
||||||
|
|
||||||
|
# Print the value of sponsors.0.bioguideId and the matched Person node
|
||||||
|
# print(f"Value of sponsors.0.bioguideId: {sponsor_bioguide_id}")
|
||||||
|
if matched_person:
|
||||||
|
person_properties = matched_person.items()
|
||||||
|
print("Matched Person Node:")
|
||||||
|
for key, value in person_properties:
|
||||||
|
print(f"{key}: {value}")
|
||||||
|
|
||||||
|
# Create the SPONSORED relationship
|
||||||
|
create_relationship_query = """
|
||||||
|
MATCH (b:Bill), (p:Person {bioguideId: $bioguideId})
|
||||||
|
WHERE b.`sponsors.0.bioguideId` = $sponsorBioguideId
|
||||||
|
CREATE (p)-[:SPONSORED]->(b)
|
||||||
|
"""
|
||||||
|
session.run(create_relationship_query, bioguideId=sponsor_bioguide_id, sponsorBioguideId=sponsor_bioguide_id)
|
||||||
|
print("Created SPONSORED relationship.")
|
||||||
|
else:
|
||||||
|
continue # print("No matching Person node found.")
|
||||||
|
|
||||||
|
driver.close()
|
||||||
|
|
||||||
|
# Call the function with your connection info
|
||||||
|
get_sponsors_bioguide_id(NEO4J_URI, NEO4J_USER, NEO4J_PASSWORD)
|
55
dedupe_relationships.py
Normal file
55
dedupe_relationships.py
Normal file
@ -0,0 +1,55 @@
|
|||||||
|
import os
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
from neo4j import GraphDatabase
|
||||||
|
|
||||||
|
# Load environment variables from .env file
|
||||||
|
load_dotenv()
|
||||||
|
|
||||||
|
# Get Neo4j connection info from environment variables
|
||||||
|
NEO4J_URI = os.getenv('NEO4J_URI')
|
||||||
|
NEO4J_USER = os.getenv('NEO4J_USER')
|
||||||
|
NEO4J_PASSWORD = os.getenv('NEO4J_PASSWORD')
|
||||||
|
|
||||||
|
# Function to connect to the Neo4j database and remove duplicate relationships while keeping one of each type
|
||||||
|
def remove_duplicate_relationships(uri, user, password):
|
||||||
|
driver = GraphDatabase.driver(uri, auth=(user, password))
|
||||||
|
|
||||||
|
with driver.session() as session:
|
||||||
|
# Cypher query to find nodes with duplicate relationships
|
||||||
|
identify_query = """
|
||||||
|
MATCH (n)-[r]->(m)
|
||||||
|
WITH type(r) AS relType, n AS startNode, m AS endNode, collect(id(r)) AS relIds
|
||||||
|
WHERE size(relIds) > 1
|
||||||
|
RETURN relType, startNode, endNode, relIds
|
||||||
|
ORDER BY size(relIds) DESC
|
||||||
|
"""
|
||||||
|
|
||||||
|
result = session.run(identify_query)
|
||||||
|
|
||||||
|
for record in result:
|
||||||
|
relationship_type = record['relType']
|
||||||
|
start_node = record['startNode']
|
||||||
|
end_node = record['endNode']
|
||||||
|
rel_ids = record['relIds']
|
||||||
|
|
||||||
|
# Keep only one relationship of each type and delete the rest
|
||||||
|
if len(rel_ids) > 1:
|
||||||
|
keep_rel_id = rel_ids[0]
|
||||||
|
delete_rel_ids = [rid for rid in rel_ids if rid != keep_rel_id]
|
||||||
|
|
||||||
|
# Delete extra relationships
|
||||||
|
for del_rel_id in delete_rel_ids:
|
||||||
|
session.run(
|
||||||
|
"MATCH ()-[r]->() WHERE id(r) = $relId DELETE r",
|
||||||
|
relId=del_rel_id
|
||||||
|
)
|
||||||
|
|
||||||
|
print(f"Deleted relationship with ID: {del_rel_id}")
|
||||||
|
|
||||||
|
print("Remaining Relationship ID:", keep_rel_id)
|
||||||
|
print("\n")
|
||||||
|
|
||||||
|
driver.close()
|
||||||
|
|
||||||
|
# Call the function with your connection info
|
||||||
|
remove_duplicate_relationships(NEO4J_URI, NEO4J_USER, NEO4J_PASSWORD)
|
Loading…
Reference in New Issue
Block a user