import requests from neo4j import GraphDatabase from dotenv import load_dotenv import os # Load environment variables from .env file load_dotenv() # Retrieve Neo4j connection details and API key NEO4J_URI = os.getenv('NEO4J_URI') NEO4J_USER = os.getenv('NEO4J_USER') NEO4J_PASSWORD = os.getenv('NEO4J_PASSWORD') CONGRESS_API_KEY = os.getenv('CONGRESS_API_KEY') if not NEO4J_URI or not NEO4J_USER or not NEO4J_PASSWORD: raise ValueError("Neo4j connection details not found in .env file") if not CONGRESS_API_KEY: raise ValueError("Congress API key not found in .env file") # Initialize Neo4j driver driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USER, NEO4J_PASSWORD)) # Function to get all person nodes and their bioguideIds def get_person_bioguide_ids(driver): with driver.session() as session: result = session.run("MATCH (p:Person) RETURN p.bioguideId") return [record['p.bioguideId'] for record in result] # Get list of person bioguideIds person_bioguide_ids = get_person_bioguide_ids(driver) print("List of Person Bioguide IDs:", person_bioguide_ids) # Function to fetch sponsored legislation for a given bioguideId from Congress API def fetch_sponsored_legislation(bioguide_id, api_key): url = f"https://api.congress.gov/v3/member/{bioguide_id}/sponsored-legislation" headers = { 'X-API-Key': api_key, 'Content-Type': 'application/json' } params = { 'limit': 250 # Maximum limit allowed is 250 } response = requests.get(url, headers=headers, params=params) if response.status_code == 200: return response.json() else: print(f"Failed to retrieve sponsored legislation for bioguideId {bioguide_id}: {response.status_code}") print("Response Content:", response.content.decode('utf-8')) return None # Fetch and process sponsored legislation for each person legislation_dict = {} for bioguide_id in person_bioguide_ids: if bioguide_id is not None: # Check if the bioguideId is not None print(f"Fetching sponsored legislation for member with bioguideId {bioguide_id}") json_response = fetch_sponsored_legislation(bioguide_id, CONGRESS_API_KEY) if json_response and 'legislations' in json_response: for legislation in json_response['legislations']: legislation_dict[legislation.get('id')] = legislation # Create legislation nodes and sponsored relationships with driver.session() as session: for legislation_id, legislation in legislation_dict.items(): properties_str = ', '.join([f"'{key}': '{value}'" for key, value in legislation.items() if value is not None and value != '']) # Check if the legislation node already exists query_check = f"MATCH (l:Legislation {{id: '{legislation_id}'}}) RETURN l" result_check = session.run(query_check) legislation_exists = bool(result_check.data()) # Create or update the legislation node query_create_update = f"MATCH (p:Person {{bioguideId: '{bioguide_id}'}}) MERGE (l:Legislation {{id: '{legislation_id}'}}) SET l = {{{properties_str}}} RETURN l" result_create_update = session.run(query_create_update) # Print the creation or update status if legislation_exists: print(f"Updated Legislation Node for member {bioguide_id}: {result_create_update.single()}") else: print(f"Created Legislation Node for member {bioguide_id}: {result_create_update.single()}") # Create the sponsored relationship query_relationship = f"MATCH (p:Person {{bioguideId: '{bioguide_id}'}}), (l:Legislation {{id: '{legislation_id}'}}) MERGE (p)-[:SPONSORED]->(l)" session.run(query_relationship) print(f"Created Sponsored Relationship for Person {bioguide_id} and Legislation {legislation_id}") # Test that nodes and relationships were created properly with driver.session() as session: for bioguide_id, legislation in legislation_dict.items(): legislation_id = legislation['id'] # Check if the legislation node exists query_check_legislation = f"MATCH (l:Legislation {{id: '{legislation_id}'}}) RETURN l" result_check_legislation = session.run(query_check_legislation) if not result_check_legislation.data(): print(f"Error: Legislation Node with ID {legislation_id} does not exist.") # Check if the sponsored relationship exists query_check_relationship = f"MATCH (p:Person {{bioguideId: '{bioguide_id}'}})-[:SPONSORED]->(l:Legislation {{id: '{legislation_id}'}}) RETURN p, l" result_check_relationship = session.run(query_check_relationship) if not result_check_relationship.data(): print(f"Error: Sponsored Relationship for Person {bioguide_id} and Legislation {legislation_id} does not exist.") # Print success message print(f"Success: Node and Relationship for Person {bioguide_id} and Legislation {legislation_id} are created properly.") # Close the Neo4j driver connection driver.close()