diff --git a/add_bill_sponsors.py b/add_bill_sponsors.py new file mode 100644 index 0000000..fd393c2 --- /dev/null +++ b/add_bill_sponsors.py @@ -0,0 +1,55 @@ +import os +from dotenv import load_dotenv +from neo4j import GraphDatabase + +# Load environment variables from .env file +load_dotenv() + +# Get Neo4j connection info from environment variables +NEO4J_URI = os.getenv('NEO4J_URI') +NEO4J_USER = os.getenv('NEO4J_USER') +NEO4J_PASSWORD = os.getenv('NEO4J_PASSWORD') + +# Function to connect to the Neo4j database and fetch the "sponsors.0.bioguideId" property of all Bill nodes +def get_sponsors_bioguide_id(uri, user, password): + driver = GraphDatabase.driver(uri, auth=(user, password)) + + with driver.session() as session: + # Cypher query to get all Bill nodes and match Person node with the same bioguideId + query = """ + MATCH (b:Bill) + WITH b, b.`sponsors.0.bioguideId` AS bioguideId + OPTIONAL MATCH (p:Person {bioguideId: bioguideId}) + WHERE NOT EXISTS((b)-[:SPONSORED]->(p)) + RETURN b.`sponsors.0.bioguideId` AS sponsorBioguideId, p + """ + + result = session.run(query) + + for record in result: + sponsor_bioguide_id = record['sponsorBioguideId'] + matched_person = record['p'] + + # Print the value of sponsors.0.bioguideId and the matched Person node + # print(f"Value of sponsors.0.bioguideId: {sponsor_bioguide_id}") + if matched_person: + person_properties = matched_person.items() + print("Matched Person Node:") + for key, value in person_properties: + print(f"{key}: {value}") + + # Create the SPONSORED relationship + create_relationship_query = """ + MATCH (b:Bill), (p:Person {bioguideId: $bioguideId}) + WHERE b.`sponsors.0.bioguideId` = $sponsorBioguideId + CREATE (p)-[:SPONSORED]->(b) + """ + session.run(create_relationship_query, bioguideId=sponsor_bioguide_id, sponsorBioguideId=sponsor_bioguide_id) + print("Created SPONSORED relationship.") + else: + continue # print("No matching Person node found.") + + driver.close() + +# Call the function with your connection info +get_sponsors_bioguide_id(NEO4J_URI, NEO4J_USER, NEO4J_PASSWORD) diff --git a/dedupe_relationships.py b/dedupe_relationships.py new file mode 100644 index 0000000..0c007ad --- /dev/null +++ b/dedupe_relationships.py @@ -0,0 +1,55 @@ +import os +from dotenv import load_dotenv +from neo4j import GraphDatabase + +# Load environment variables from .env file +load_dotenv() + +# Get Neo4j connection info from environment variables +NEO4J_URI = os.getenv('NEO4J_URI') +NEO4J_USER = os.getenv('NEO4J_USER') +NEO4J_PASSWORD = os.getenv('NEO4J_PASSWORD') + +# Function to connect to the Neo4j database and remove duplicate relationships while keeping one of each type +def remove_duplicate_relationships(uri, user, password): + driver = GraphDatabase.driver(uri, auth=(user, password)) + + with driver.session() as session: + # Cypher query to find nodes with duplicate relationships + identify_query = """ + MATCH (n)-[r]->(m) + WITH type(r) AS relType, n AS startNode, m AS endNode, collect(id(r)) AS relIds + WHERE size(relIds) > 1 + RETURN relType, startNode, endNode, relIds + ORDER BY size(relIds) DESC + """ + + result = session.run(identify_query) + + for record in result: + relationship_type = record['relType'] + start_node = record['startNode'] + end_node = record['endNode'] + rel_ids = record['relIds'] + + # Keep only one relationship of each type and delete the rest + if len(rel_ids) > 1: + keep_rel_id = rel_ids[0] + delete_rel_ids = [rid for rid in rel_ids if rid != keep_rel_id] + + # Delete extra relationships + for del_rel_id in delete_rel_ids: + session.run( + "MATCH ()-[r]->() WHERE id(r) = $relId DELETE r", + relId=del_rel_id + ) + + print(f"Deleted relationship with ID: {del_rel_id}") + + print("Remaining Relationship ID:", keep_rel_id) + print("\n") + + driver.close() + +# Call the function with your connection info +remove_duplicate_relationships(NEO4J_URI, NEO4J_USER, NEO4J_PASSWORD)