From b9d22bede14b278b8d12ac10228ded228bb3f08af611c655b22856376d87891b Mon Sep 17 00:00:00 2001 From: Moses Rolston Date: Thu, 6 Mar 2025 14:29:01 -0800 Subject: [PATCH] checkpoint before converting to api --- .gitignore | 2 + add_bill_node.py | 64 +++++++++++++++++++++++++ add_csv.py | 12 ++--- api.py | 80 ++++++++++++++++++++++++++++++++ cosponsored_add.py | 104 +++++++++++++++++++++++++++++++++++++++++ executive_orders.csv | 66 ++++++++++++++++++++++++++ get_members.py | 80 ++++++++++++++++++++++++++++++++ members_add_node.py | 64 +++++++++++++++++++++++++ retrieve_bills.py | 66 ++++++++++++++++++++++++++ sponsored_add.py | 108 +++++++++++++++++++++++++++++++++++++++++++ write_json_to_csv.py | 78 +++++++++++++++++++++++++++++++ 11 files changed, 717 insertions(+), 7 deletions(-) create mode 100644 add_bill_node.py create mode 100644 api.py create mode 100644 cosponsored_add.py create mode 100644 get_members.py create mode 100644 members_add_node.py create mode 100644 retrieve_bills.py create mode 100644 sponsored_add.py create mode 100644 write_json_to_csv.py diff --git a/.gitignore b/.gitignore index 48646a8..ecb4608 100644 --- a/.gitignore +++ b/.gitignore @@ -248,3 +248,5 @@ inc/ /pm_to_blib /*.zip +# Data Files +/*.csv diff --git a/add_bill_node.py b/add_bill_node.py new file mode 100644 index 0000000..1ce26fb --- /dev/null +++ b/add_bill_node.py @@ -0,0 +1,64 @@ +import csv +from neo4j import GraphDatabase +from dotenv import load_dotenv +import os + +# Load environment variables from .env file +load_dotenv() + +# Retrieve Neo4j connection details from environment variables +NEO4J_URI = os.getenv('NEO4J_URI') +NEO4J_USER = os.getenv('NEO4J_USER') +NEO4J_PASSWORD = os.getenv('NEO4J_PASSWORD') + +if not NEO4J_URI or not NEO4J_USER or not NEO4J_PASSWORD: + raise ValueError("Neo4j connection details not found in .env file") + +TEST_FLAG = False + +# Function to create or update a bill node in Neo4j using MERGE +def merge_bill_node(driver, bill): + with driver.session() as session: + query = """ + MERGE (b:Bill {id: $id}) + ON CREATE SET b += $properties + ON MATCH SET b += $properties + RETURN b + """ + + # Filter out empty or None values and ensure keys are strings + properties = {str(key): value for key, value in bill.items() if value is not None and value != ""} + + if TEST_FLAG: + print(f"MERGE (b:Bill {{id: '{bill['number']}'}}) ON CREATE SET b += ${properties} ON MATCH SET b += ${properties}") + else: + result = session.run(query, id=bill['number'], properties=properties) + return result.single() + +# Read the CSV file and create/update nodes in Neo4j +def read_csv_and_merge_nodes(file_path): + driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USER, NEO4J_PASSWORD)) + + try: + with open(file_path, mode='r', newline='', encoding='utf-8') as csvfile: + reader = csv.DictReader(csvfile, delimiter='^') + + for row in reader: + bill = {key: value.strip() if isinstance(value, str) else value for key, value in row.items()} + + # Debugging print statement to check keys and values + print(f"Processing row with keys: {bill.keys()} and values: {bill.values()}") + + result = merge_bill_node(driver, bill) + if not TEST_FLAG: + print(f"Created/Updated node: {result}") + + finally: + if not TEST_FLAG: + driver.close() + +# Path to the CSV file +csv_file_path = 'bills.csv' + +# Run the function +read_csv_and_merge_nodes(csv_file_path) diff --git a/add_csv.py b/add_csv.py index 0ecbb1f..177926d 100644 --- a/add_csv.py +++ b/add_csv.py @@ -10,10 +10,10 @@ load_dotenv() def create_neo4j_session(uri, user, password): return GraphDatabase.driver(uri, auth=(user, password)) -def create_node(tx, node_type, properties): - # Create a Cypher query to create a node with given type and properties +def create_or_merge_node(tx, node_type, properties): + # Create a Cypher query to merge a node with given type and properties properties_string = ', '.join([f"{key}: '{value}'" for key, value in properties.items()]) - query = f"CREATE (n:{node_type} {{{properties_string}}})" + query = f"MERGE (n:{node_type} {{{properties_string}}})" tx.run(query) def main(csv_file_path): @@ -42,15 +42,13 @@ def main(csv_file_path): 'date_enacted': pd.to_datetime(row['date_enacted']).strftime('%Y-%m-%d') if pd.notnull(row['date_enacted']) else None, 'date_repealed': pd.to_datetime(row['date_repealed']).strftime('%Y-%m-%d') if pd.notnull(row['date_repealed']) else None } - - # Create the node in Neo4j - session.write_transaction(create_node, node_type, properties) + # Create or merge the node in Neo4j + session.write_transaction(create_or_merge_node, node_type, properties) driver.close() if __name__ == "__main__": parser = argparse.ArgumentParser(description="Read a CSV file and create nodes in Neo4j.") parser.add_argument("csv_file_path", type=str, help="Path to the CSV file") - args = parser.parse_args() main(args.csv_file_path) diff --git a/api.py b/api.py new file mode 100644 index 0000000..16b006d --- /dev/null +++ b/api.py @@ -0,0 +1,80 @@ +from flask import Flask, request, jsonify +import requests +from neo4j import GraphDatabase +from dotenv import load_dotenv +import os +import logging +from cachetools import TTLCache + +# Set up basic configuration for logging +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +# Load environment variables from .env file +load_dotenv() + +# Retrieve Neo4j connection details and API key +NEO4J_URI = os.getenv('NEO4J_URI') +NEO4J_USER = os.getenv('NEO4J_USER') +NEO4J_PASSWORD = os.getenv('NEO4J_PASSWORD') +CONGRESS_API_KEY = os.getenv('CONGRESS_API_KEY') + +if not NEO4J_URI or not NEO4J_USER or not NEO4J_PASSWORD: + raise ValueError("Neo4j connection details not found in .env file") +if not CONGRESS_API_KEY: + raise ValueError("Congress API key not found in .env file") + +# Initialize Neo4j driver +driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USER, NEO4J_PASSWORD)) + +# Initialize caches +neo4j_cache = TTLCache(maxsize=100, ttl=3600) # Cache for 1 hour +congress_api_cache = TTLCache(maxsize=100, ttl=3600) # Cache for 1 hour + +app = Flask(__name__) + +@app.route('/sponsored_legislation', methods=['GET']) +def get_sponsored_legislation(): + bioguide_id = request.args.get('bioguideId') + if not bioguide_id: + return jsonify({"error": "bioguideId is required"}), 400 + + logger.info(f"Fetching sponsored legislation for member with bioguideId {bioguide_id}") + + # Check cache before making API request + cached_legislation = congress_api_cache.get(bioguide_id) + if cached_legislation: + logger.info(f"Using cached sponsored legislation for bioguideId {bioguide_id}") + return jsonify({"message": "Sponsored legislation retrieved from cache", "legislations": cached_legislation}) + + + +@app.route('/persons', methods=['GET']) +def list_persons(): + logger.info("Listing all person nodes from Neo4j") + + # Check cache before querying Neo4j + cached_persons = neo4j_cache.get('all_persons') + if cached_persons: + logger.info("Using cached list of persons") + return jsonify({"message": "Persons retrieved from cache", "persons": cached_persons}) + + with driver.session() as session: + query = "MATCH (p:Person) RETURN p" + logger.info(f"Executing Neo4j query to list all person nodes: {query}") + result = session.run(query) + + persons = [record['p'] for record in result] + person_list = [] + for person in persons: + node_properties = {key: value for key, value in person.items()} + person_list.append(node_properties) + + # Cache the response + neo4j_cache['all_persons'] = person_list + logger.info("Cached list of all persons") + + return jsonify({"message": "Persons listed successfully", "persons": person_list}) + +if __name__ == '__main__': + app.run(debug=True, host='0.0.0.0', port=5000) diff --git a/cosponsored_add.py b/cosponsored_add.py new file mode 100644 index 0000000..5f04f7a --- /dev/null +++ b/cosponsored_add.py @@ -0,0 +1,104 @@ +import requests +from neo4j import GraphDatabase +from dotenv import load_dotenv +import os + +# Load environment variables from .env file +load_dotenv() + +# Retrieve API key and Neo4j connection details from environment variables +CONGRESS_API_KEY = os.getenv('CONGRESS_API_KEY') +if not CONGRESS_API_KEY: + raise ValueError("API key not found in .env file") + +NEO4J_URI = os.getenv('NEO4J_URI') +NEO4J_USER = os.getenv('NEO4J_USER') +NEO4J_PASSWORD = os.getenv('NEO4J_PASSWORD') +if not NEO4J_URI or not NEO4J_USER or not NEO4J_PASSWORD: + raise ValueError("Neo4j connection details not found in .env file") + +BASE_URL_MEMBERSHIP = 'https://api.congress.gov/v3/member' +BASE_URL_COSPONSORED_LEGISLATION = 'https://api.congress.gov/v3/member/{}/cosponsored-legislation' + +# Function to get all person nodes with bioguideId from Neo4j +def get_person_nodes(driver): + with driver.session() as session: + result = session.run("MATCH (p:Person) RETURN p.bioguideId") + return [record['p.bioguideId'] for record in result] + +# Function to fetch cosponsored legislation for a given bioguideId +def fetch_cosponsored_legislation(bioguideId, api_key): + url = BASE_URL_COSPONSORED_LEGISLATION.format(bioguideId) + params = { + 'api_key': api_key, + 'limit': 250 # Maximum limit allowed is 250 + } + response = requests.get(url, params=params) + if response.status_code == 200: + return response.json().get('legislations', []) + else: + print(f"Failed to retrieve cosponsored legislation for {bioguideId}: {response.status_code}") + return [] + +# Function to create or update a person node in Neo4j using MERGE +def merge_person_node(driver, bioguideId): + with driver.session() as session: + query = """ + MERGE (p:Person {bioguideId: $bioguideId}) + RETURN p.bioguideId + """ + result = session.run(query, bioguideId=bioguideId) + return result.single()[0] + +# Function to create a legislation node in Neo4j and return its label +def create_legislation_node(driver, legislation): + with driver.session() as session: + number = legislation.get('number') + properties = {str(key): value for key, value in legislation.items() if value is not None and value != ""} + query = f""" + MERGE (l:Legislation {{id: $id}}) + SET l += $properties + RETURN labels(l) + """ + result = session.run(query, id=legislation.get('id'), properties=properties) + return result.single()[0] + +# Function to create a cosponsored relationship between a person and legislation node in Neo4j +def create_cosponsored_relationship(driver, bioguideId, number): + with driver.session() as session: + query = """ + MATCH (p:Person {bioguideId: $bioguideId}) + MATCH (l) + WHERE id(l) IN [id(n) WHERE n:Legislation AND n.id = $number] + MERGE (p)-[:COSPONSORED]->(l) + """ + session.run(query, bioguideId=bioguideId, number=legislation.get('id')) + +# Function to process each person node and fetch cosponsored legislation +def process_person_nodes(driver): + person_nodes = get_person_nodes(driver) + for bioguideId in person_nodes: + print(f"Processing member with bioguideId: {bioguideId}") + + # Ensure the person node exists in Neo4j + merge_person_node(driver, bioguideId) + + # Fetch cosponsored legislation + legislations = fetch_cosponsored_legislation(bioguideId, CONGRESS_API_KEY) + for legislation in legislations: + print(f"Processing legislation: {legislation.get('number')}") + + # Create or update the legislation node + create_legislation_node(driver, legislation) + + # Create a cosponsored relationship from the person to the legislation + create_cosponsored_relationship(driver, bioguideId, legislation) + +# Main execution block +if __name__ == "__main__": + driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USER, NEO4J_PASSWORD)) + + try: + process_person_nodes(driver) + finally: + driver.close() diff --git a/executive_orders.csv b/executive_orders.csv index a786cb2..25791f2 100644 --- a/executive_orders.csv +++ b/executive_orders.csv @@ -8,3 +8,69 @@ order^EO 14216^Expanding Access to In Vitro Fertilization^^https://www.federalre order^EO 14215^Ensuring Accountability for All Agencies^^https://www.federalregister.gov/d/2025-03063^February 18, 2025^ order^EO 14214^Keeping Education Accessible and Ending COVID-19 Vaccine Mandates in Schools^^https://www.federalregister.gov/d/2025-02931^February 14, 2025^ order^EO 14213^Establishing the National Energy Dominance Council^^https://www.federalregister.gov/d/2025-02928^February 14, 2025^ +order^EO 14212^Establishing the Presidents Make America Healthy Again Commission^^https://www.federalregister.gov/d/2025-02871^February 13, 2025^ +order^EO 14211^One Voice for Americas Foreign Relations^^https://www.federalregister.gov/d/2025-02841^February 12, 2025^ +order^EO 14210^"Implementing the Presidents ""Department of Government Efficiency"" Workforce Optimization Initiative"^^https://www.federalregister.gov/d/2025-02762^February 11, 2025^ +order^EO 14209^Pausing Foreign Corrupt Practices Act Enforcement To Further American Economic and National Security^^https://www.federalregister.gov/d/2025-02736^February 10, 2025^ +order^EO 14208^Ending Procurement and Forced Use of Paper Straws^^https://www.federalregister.gov/d/2025-02735^February 10, 2025^ +order^EO 14207^Eliminating the Federal Executive Institute ^^https://www.federalregister.gov/d/2025-02734^February 10, 2025^ +order^EO 14206^Protecting Second Amendment Rights^^https://www.federalregister.gov/d/2025-02636^February 7, 2025^ +order^EO 14205^Establishment of the White House Faith Office^^https://www.federalregister.gov/d/2025-02635^February 7, 2025^ +order^EO 14204^Addressing Egregious Actions of the Republic of South Africa^^https://www.federalregister.gov/d/2025-02630^February 7, 2025^ +order^EO 14203^Imposing Sanctions on the International Criminal Court^^https://www.federalregister.gov/d/2025-02612^February 6, 2025^ +order^EO 14202^Eradicating Anti-Christian Bias^^https://www.federalregister.gov/d/2025-02611^February 6, 2025^ +order^EO 14201^Keeping Men Out of Womens Sports ^^https://www.federalregister.gov/d/2025-02513^February 5, 2025^ +order^EO 14200^Amendment to Duties Addressing the Synthetic Opioid Supply Chain in the Peoples Republic of China^^https://www.federalregister.gov/d/2025-02512^February 5, 2025^ +order^EO 14199^Withdrawing the United States From and Ending Funding to Certain United Nations Organizations and Reviewing United States Support to All International Organizations^^https://www.federalregister.gov/d/2025-02504^February 4, 2025^ +order^EO 14198^Progress on the Situation at Our Southern Border^^https://www.federalregister.gov/d/2025-02479^February 3, 2025^ +order^EO 14197^Progress on the Situation at Our Northern Border^^https://www.federalregister.gov/d/2025-02478^February 3, 2025^ +order^EO 14196^A Plan for Establishing a United States Sovereign Wealth Fund^^https://www.federalregister.gov/d/2025-02477^February 3, 2025^ +order^EO 14195^Imposing Duties To Address the Synthetic Opioid Supply Chain in the Peoples Republic of China^^https://www.federalregister.gov/d/2025-02408^February 1, 2025^ +order^EO 14194^Imposing Duties To Address the Situation at Our Southern Border^^https://www.federalregister.gov/d/2025-02407^February 1, 2025^ +order^EO 14193^Imposing Duties To Address the Flow of Illicit Drugs Across Our Northern Border^^https://www.federalregister.gov/d/2025-02406^February 1, 2025^ +order^EO 14192^Unleashing Prosperity Through Deregulation ^^https://www.federalregister.gov/d/2025-02345^January 31, 2025^ +order^EO 14191^Expanding Educational Freedom and Opportunity for Families^^https://www.federalregister.gov/d/2025-02233^January 29, 2025^ +order^EO 14190^Ending Radical Indoctrination in K-12 Schooling ^^https://www.federalregister.gov/d/2025-02232^January 29, 2025^ +order^EO 14189^Celebrating Americas 250th Birthday^^https://www.federalregister.gov/d/2025-02231^January 29, 2025^ +order^EO 14188^Additional Measures To Combat Anti-Semitism^^https://www.federalregister.gov/d/2025-02230^January 29, 2025^ +order^EO 14187^Protecting Children From Chemical and Surgical Mutilation^^https://www.federalregister.gov/d/2025-02194^January 28, 2025^ +order^EO 14186^The Iron Dome for America ^^https://www.federalregister.gov/d/2025-02182^January 27, 2025^ +order^EO 14185^Restoring Americas Fighting Force^^https://www.federalregister.gov/d/2025-02181^January 27, 2025^ +order^EO 14184^Reinstating Service Members Discharged Under the Militarys COVID-19 Vaccination Mandate^^https://www.federalregister.gov/d/2025-02180^January 27, 2025^ +order^EO 14183^Prioritizing Military Excellence and Readiness^^https://www.federalregister.gov/d/2025-02178^January 27, 2025^ +order^EO 14182^Enforcing the Hyde Amendment^^https://www.federalregister.gov/d/2025-02175^January 24, 2025^ +order^EO 14181^Emergency Measures To Provide Water Resources in California and Improve Disaster Response in Certain Areas^^https://www.federalregister.gov/d/2025-02174^January 24, 2025^ +order^EO 14180^Council To Assess the Federal Emergency Management Agency^^https://www.federalregister.gov/d/2025-02173^January 24, 2025^ +order^EO 14179^Removing Barriers to American Leadership in Artificial Intelligence^^https://www.federalregister.gov/d/2025-02172^January 23, 2025^ +order^EO 14178^Strengthening American Leadership in Digital Financial Technology^^https://www.federalregister.gov/d/2025-02123^January 23, 2025^ +order^EO 14177^Presidents Council of Advisors on Science and Technology^^https://www.federalregister.gov/d/2025-02121^January 23, 2025^ +order^EO 14176^Declassification of Records Concerning the Assassinations of President John F. Kennedy, Senator Robert F. Kennedy, and the Reverend Dr. Martin Luther King, Jr.^^https://www.federalregister.gov/d/2025-02116^January 23, 2025^ +order^EO 14175^Designation of Ansar Allah as a Foreign Terrorist Organization^^https://www.federalregister.gov/d/2025-02103^January 22, 2025^ +order^EO 14174^Revocation of Certain Executive Orders ^^https://www.federalregister.gov/d/2025-02098^January 21, 2025^ +order^EO 14173^Ending Illegal Discrimination and Restoring Merit-Based Opportunity^^https://www.federalregister.gov/d/2025-02097^January 21, 2025^ +order^EO 14172^Restoring Names That Honor American Greatness^^https://www.federalregister.gov/d/2025-02096^January 20, 2025^ +order^EO 14171^Restoring Accountability to Policy-Influencing Positions Within the Federal Workforce^^https://www.federalregister.gov/d/2025-02095^January 20, 2025^ +order^EO 14170^Reforming the Federal Hiring Process and Restoring Merit to Government Service^^https://www.federalregister.gov/d/2025-02094^January 20, 2025^ +order^EO 14169^Reevaluating and Realigning United States Foreign Aid^^https://www.federalregister.gov/d/2025-02091^January 20, 2025^ +order^EO 14168^Defending Women From Gender Ideology Extremism and Restoring Biological Truth to the Federal Government^^https://www.federalregister.gov/d/2025-02090^January 20, 2025^ +order^EO 14167^Clarifying the Militarys Role in Protecting the Territorial Integrity of the United States^^https://www.federalregister.gov/d/2025-02089^January 20, 2025^ +order^EO 14166^Application of Protecting Americans From Foreign Adversary Controlled Applications Act to TikTok^^https://www.federalregister.gov/d/2025-02087^January 20, 2025^ +order^EO 14165^Securing Our Borders^^https://www.federalregister.gov/d/2025-02015^January 20, 2025^ +order^EO 14164^Restoring the Death Penalty and Protecting Public Safety^^https://www.federalregister.gov/d/2025-02012^January 20, 2025^ +order^EO 14163^Realigning the United States Refugee Admissions Program^^https://www.federalregister.gov/d/2025-02011^January 20, 2025^ +order^EO 14162^Putting America First in International Environmental Agreements^^https://www.federalregister.gov/d/2025-02010^January 20, 2025^ +order^EO 14161^Protecting the United States From Foreign Terrorists and Other National Security and Public Safety Threats^^https://www.federalregister.gov/d/2025-02009^January 20, 2025^ +order^EO 14160^Protecting the Meaning and Value of American Citizenship^^https://www.federalregister.gov/d/2025-02007^January 20, 2025^ +order^EO 14159^Protecting the American People Against Invasion^^https://www.federalregister.gov/d/2025-02006^January 20, 2025^ +order^EO 14158^"Establishing and Implementing the Presidents ""Department of Government Efficiency"""^^https://www.federalregister.gov/d/2025-02005^January 20, 2025^ +order^EO 14157^Designating Cartels and Other Organizations as Foreign Terrorist Organizations and Specially Designated Global Terrorists^^https://www.federalregister.gov/d/2025-02004^January 20, 2025^ +order^EO 14156^Declaring a National Energy Emergency^^https://www.federalregister.gov/d/2025-02003^January 20, 2025^ +order^EO 14155^Withdrawing the United States From the World Health Organization ^^https://www.federalregister.gov/d/2025-01957^January 20, 2025^ +order^EO 14154^Unleashing American Energy^^https://www.federalregister.gov/d/2025-01956^January 20, 2025^ +order^EO 14153^Unleashing Alaskas Extraordinary Resource Potential^^https://www.federalregister.gov/d/2025-01955^January 20, 2025^ +order^EO 14152^Holding Former Government Officials Accountable for Election Interference and Improper Disclosure of Sensitive Governmental Information^^https://www.federalregister.gov/d/2025-01954^January 20, 2025^ +order^EO 14151^Ending Radical and Wasteful Government DEI Programs and Preferencing^^https://www.federalregister.gov/d/2025-01953^January 20, 2025^ +order^EO 14150^America First Policy Directive to the Secretary of State^^https://www.federalregister.gov/d/2025-01952^January 20, 2025^ +order^EO 14149^Restoring Freedom of Speech and Ending Federal Censorship^^https://www.federalregister.gov/d/2025-01902^January 20, 2025^ +order^EO 14148^Initial Rescissions of Harmful Executive Orders and Actions^^https://www.federalregister.gov/d/2025-01901^January 20, 2025^ +order^EO 14147^Ending the Weaponization of the Federal Government^^https://www.federalregister.gov/d/2025-01900^January 20, 2025^ diff --git a/get_members.py b/get_members.py new file mode 100644 index 0000000..8a2a20f --- /dev/null +++ b/get_members.py @@ -0,0 +1,80 @@ +import requests +import csv +from dotenv import load_dotenv +import os + +# Load environment variables from .env file +load_dotenv() + +# Retrieve API key from environment variables +API_KEY = os.getenv('CONGRESS_API_KEY') +if not API_KEY: + raise ValueError("API key not found in .env file") + +BASE_URL = 'https://api.congress.gov/v3/member' + +# Parameters for the request +params = { + 'offset': 0, + 'limit': 250, # Maximum limit allowed is 250 + 'api_key': API_KEY +} + +# Open a CSV file to write the data +with open('members.csv', 'w', newline='', encoding='utf-8') as csvfile: + csvwriter = csv.writer(csvfile, delimiter='^') + + # Placeholder for headers + headers = None + row_count = 0 + + while True: + response = requests.get(BASE_URL, params=params) + + # Print the retrieved data from the request + print("Retrieved Data:", response.json()) + + if response.status_code == 200: + data = response.json() + members = data.get('members', []) + + for member in members: + depiction = member.pop('depiction', {}) + + # Ensure all keys are present by setting missing keys to None + flattened_member = { + 'bioguideId': member.get('bioguideId'), + 'attribution': depiction.get('attribution'), + 'imageUrl': depiction.get('imageUrl'), + 'district': member.get('district', ''), + 'name': member.get('name'), + 'partyName': member.get('partyName'), + 'state': member.get('state'), + 'chamber': member.get('terms', {}).get('item', [{}])[0].get('chamber'), + 'startYear': member.get('terms', {}).get('item', [{}])[0].get('startYear'), + 'endYear': member.get('terms', {}).get('item', [{}])[0].get('endYear'), + 'updateDate': member.get('updateDate'), + 'url': member.get('url') + } + + # Extract keys to use as headers if not set yet + if headers is None: + headers = list(flattened_member.keys()) + csvwriter.writerow(headers) + + # Write each row to the CSV file + csvwriter.writerow(list(flattened_member.values())) + row_count += 1 + print(f"Writing Row {row_count}: {flattened_member}") + + # Check if there are more members to retrieve + if len(members) < params['limit']: + break + + # Increment offset for the next batch of members + params['offset'] += params['limit'] + else: + print(f"Failed to retrieve data: {response.status_code}") + break + +print(f"Total rows written: {row_count}") diff --git a/members_add_node.py b/members_add_node.py new file mode 100644 index 0000000..ec45c8c --- /dev/null +++ b/members_add_node.py @@ -0,0 +1,64 @@ +import csv +from neo4j import GraphDatabase +from dotenv import load_dotenv +import os + +# Load environment variables from .env file +load_dotenv() + +# Retrieve Neo4j connection details from environment variables +NEO4J_URI = os.getenv('NEO4J_URI') +NEO4J_USER = os.getenv('NEO4J_USER') +NEO4J_PASSWORD = os.getenv('NEO4J_PASSWORD') + +if not NEO4J_URI or not NEO4J_USER or not NEO4J_PASSWORD: + raise ValueError("Neo4j connection details not found in .env file") + +TEST_FLAG = False + +# Function to create or update a person node in Neo4j using MERGE +def merge_person_node(driver, member): + with driver.session() as session: + query = """ + MERGE (p:Person {bioguideId: $bioguideId}) + ON CREATE SET p += $properties + ON MATCH SET p += $properties + RETURN p + """ + + # Filter out empty or None values and ensure keys are strings + properties = {str(key): value for key, value in member.items() if value is not None and value != ""} + + if TEST_FLAG: + print(f"MERGE (p:Person {{bioguideId: '{member['bioguideId']}'}}) ON CREATE SET p += ${properties} ON MATCH SET p += ${properties}") + else: + result = session.run(query, bioguideId=member['bioguideId'], properties=properties) + return result.single() + +# Read the CSV file and create/update nodes in Neo4j +def read_csv_and_merge_nodes(file_path): + driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USER, NEO4J_PASSWORD)) + + try: + with open(file_path, mode='r', newline='', encoding='utf-8') as csvfile: + reader = csv.DictReader(csvfile, delimiter='^') + + for row in reader: + member = {key: value.strip() if isinstance(value, str) else value for key, value in row.items()} + + # Debugging print statement to check keys and values + print(f"Processing row with keys: {member.keys()} and values: {member.values()}") + + result = merge_person_node(driver, member) + if not TEST_FLAG: + print(f"Created/Updated node: {result}") + + finally: + if not TEST_FLAG: + driver.close() + +# Path to the CSV file +csv_file_path = 'members.csv' + +# Run the function +read_csv_and_merge_nodes(csv_file_path) diff --git a/retrieve_bills.py b/retrieve_bills.py new file mode 100644 index 0000000..78b7c50 --- /dev/null +++ b/retrieve_bills.py @@ -0,0 +1,66 @@ +import requests +import csv +from dotenv import load_dotenv +import os + +# Load environment variables from .env file +load_dotenv() + +# Retrieve API key from environment variables +API_KEY = os.getenv('CONGRESS_API_KEY') +if not API_KEY: + raise ValueError("API key not found in .env file") + +BASE_URL = 'https://api.congress.gov/v3/bill' + +# Parameters for the request +params = { + 'offset': 0, + 'limit': 250, # Maximum limit allowed is 250 + 'api_key': API_KEY +} + +# Open a CSV file to write the data +with open('bills.csv', 'w', newline='', encoding='utf-8') as csvfile: + csvwriter = csv.writer(csvfile, delimiter='^') + + # Placeholder for headers + headers = None + row_count = 0 + + while True: + response = requests.get(BASE_URL, params=params) + + # Print the retrieved data from the request + print("Retrieved Data:", response.json()) + + if response.status_code == 200: + data = response.json() + bills = data.get('bills', []) + + for bill in bills: + # Flatten the nested 'latestAction' dictionary + latest_action = bill.pop('latestAction', {}) + flattened_bill = {**bill, **latest_action} + + # Extract keys to use as headers if not set yet + if headers is None: + headers = list(flattened_bill.keys()) + csvwriter.writerow(headers) + + # Write each row to the CSV file + csvwriter.writerow(list(flattened_bill.values())) + row_count += 1 + print(f"Writing Row {row_count}: {flattened_bill}") + + # Check if there are more bills to retrieve + if len(bills) < params['limit']: + break + + # Increment offset for the next batch of bills + params['offset'] += params['limit'] + else: + print(f"Failed to retrieve data: {response.status_code}") + break + +print(f"Total rows written: {row_count}") diff --git a/sponsored_add.py b/sponsored_add.py new file mode 100644 index 0000000..efdc4f2 --- /dev/null +++ b/sponsored_add.py @@ -0,0 +1,108 @@ +import requests +from neo4j import GraphDatabase +from dotenv import load_dotenv +import os + +# Load environment variables from .env file +load_dotenv() + +# Retrieve Neo4j connection details and API key +NEO4J_URI = os.getenv('NEO4J_URI') +NEO4J_USER = os.getenv('NEO4J_USER') +NEO4J_PASSWORD = os.getenv('NEO4J_PASSWORD') +CONGRESS_API_KEY = os.getenv('CONGRESS_API_KEY') + +if not NEO4J_URI or not NEO4J_USER or not NEO4J_PASSWORD: + raise ValueError("Neo4j connection details not found in .env file") +if not CONGRESS_API_KEY: + raise ValueError("Congress API key not found in .env file") + +# Initialize Neo4j driver +driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USER, NEO4J_PASSWORD)) + +# Function to get all person nodes and their bioguideIds +def get_person_bioguide_ids(driver): + with driver.session() as session: + result = session.run("MATCH (p:Person) RETURN p.bioguideId") + return [record['p.bioguideId'] for record in result] + +# Get list of person bioguideIds +person_bioguide_ids = get_person_bioguide_ids(driver) +print("List of Person Bioguide IDs:", person_bioguide_ids) + +# Function to fetch sponsored legislation for a given bioguideId from Congress API +def fetch_sponsored_legislation(bioguide_id, api_key): + url = f"https://api.congress.gov/v3/member/{bioguide_id}/sponsored-legislation" + headers = { + 'X-API-Key': api_key, + 'Content-Type': 'application/json' + } + params = { + 'limit': 250 # Maximum limit allowed is 250 + } + response = requests.get(url, headers=headers, params=params) + + if response.status_code == 200: + return response.json() + else: + print(f"Failed to retrieve sponsored legislation for bioguideId {bioguide_id}: {response.status_code}") + print("Response Content:", response.content.decode('utf-8')) + return None + +# Fetch and process sponsored legislation for each person +legislation_dict = {} +for bioguide_id in person_bioguide_ids: + if bioguide_id is not None: # Check if the bioguideId is not None + print(f"Fetching sponsored legislation for member with bioguideId {bioguide_id}") + json_response = fetch_sponsored_legislation(bioguide_id, CONGRESS_API_KEY) + if json_response and 'legislations' in json_response: + for legislation in json_response['legislations']: + legislation_dict[legislation.get('id')] = legislation + +# Create legislation nodes and sponsored relationships +with driver.session() as session: + for legislation_id, legislation in legislation_dict.items(): + properties_str = ', '.join([f"'{key}': '{value}'" for key, value in legislation.items() if value is not None and value != '']) + + # Check if the legislation node already exists + query_check = f"MATCH (l:Legislation {{id: '{legislation_id}'}}) RETURN l" + result_check = session.run(query_check) + legislation_exists = bool(result_check.data()) + + # Create or update the legislation node + query_create_update = f"MATCH (p:Person {{bioguideId: '{bioguide_id}'}}) MERGE (l:Legislation {{id: '{legislation_id}'}}) SET l = {{{properties_str}}} RETURN l" + result_create_update = session.run(query_create_update) + + # Print the creation or update status + if legislation_exists: + print(f"Updated Legislation Node for member {bioguide_id}: {result_create_update.single()}") + else: + print(f"Created Legislation Node for member {bioguide_id}: {result_create_update.single()}") + + # Create the sponsored relationship + query_relationship = f"MATCH (p:Person {{bioguideId: '{bioguide_id}'}}), (l:Legislation {{id: '{legislation_id}'}}) MERGE (p)-[:SPONSORED]->(l)" + session.run(query_relationship) + print(f"Created Sponsored Relationship for Person {bioguide_id} and Legislation {legislation_id}") + +# Test that nodes and relationships were created properly +with driver.session() as session: + for bioguide_id, legislation in legislation_dict.items(): + legislation_id = legislation['id'] + + # Check if the legislation node exists + query_check_legislation = f"MATCH (l:Legislation {{id: '{legislation_id}'}}) RETURN l" + result_check_legislation = session.run(query_check_legislation) + if not result_check_legislation.data(): + print(f"Error: Legislation Node with ID {legislation_id} does not exist.") + + # Check if the sponsored relationship exists + query_check_relationship = f"MATCH (p:Person {{bioguideId: '{bioguide_id}'}})-[:SPONSORED]->(l:Legislation {{id: '{legislation_id}'}}) RETURN p, l" + result_check_relationship = session.run(query_check_relationship) + if not result_check_relationship.data(): + print(f"Error: Sponsored Relationship for Person {bioguide_id} and Legislation {legislation_id} does not exist.") + + # Print success message + print(f"Success: Node and Relationship for Person {bioguide_id} and Legislation {legislation_id} are created properly.") + +# Close the Neo4j driver connection +driver.close() diff --git a/write_json_to_csv.py b/write_json_to_csv.py new file mode 100644 index 0000000..252ced1 --- /dev/null +++ b/write_json_to_csv.py @@ -0,0 +1,78 @@ +import csv +import requests +import sys + +def get_api_endpoint_map(): + # Define a mapping of names to API endpoints and keys + return { + "member": { + "url": "https://api.congress.gov/v3/member", + "key": "NYjZ0jWEeOuLI5by1vfa7Z7orA717mvd2oIvalQe" + }, + # Add more endpoints as needed + } + +def fetch_json_data(endpoint_name): + api_endpoint_map = get_api_endpoint_map() + + if endpoint_name not in api_endpoint_map: + print(f"Error: Endpoint '{endpoint_name}' is not defined.") + sys.exit(1) + + endpoint_info = api_endpoint_map[endpoint_name] + url = endpoint_info["url"] + key = endpoint_info["key"] + + # Make the HTTP GET request to fetch JSON data + headers = { + "X-API-KEY": key + } + response = requests.get(url, headers=headers) + + if response.status_code != 200: + print(f"Error: Failed to fetch data. Status code {response.status_code}") + sys.exit(1) + + json_data = response.json() + + # Print the JSON response for debugging + print("JSON Response:") + print(json_data) + + return json_data + +def write_json_to_csv(json_data, filename="output.csv"): + # Open the CSV file for writing + with open(filename, mode='w', newline='', encoding='utf-8') as csvfile: + writer = csv.writer(csvfile, delimiter='^') + + # Write the header row + if json_data and isinstance(json_data, dict) and 'members' in json_data: + data_list = json_data['members'] + if data_list and len(data_list) > 0: + headers = data_list[0].keys() + writer.writerow(headers) + + # Write each item to a new row + for item in data_list: + print(f"Writing row: {list(item.values())}") # Print statement added here + writer.writerow(item.values()) + else: + print("No data to write.") + +def main(): + if len(sys.argv) != 2: + print("Usage: python script.py ") + sys.exit(1) + + endpoint_name = sys.argv[1] + try: + json_data = fetch_json_data(endpoint_name) + write_json_to_csv(json_data) + print(f"Data has been written to output.csv.") + except Exception as e: + print(f"An error occurred: {e}") + sys.exit(1) + +if __name__ == "__main__": + main()