5 changed files with 196 additions and 314 deletions
--- a/api/endpoints/create_cosponsored_relationship.py
+++ b/api/endpoints/create_cosponsored_relationship.py
@ -1,62 +0,0 @@
 # endpoints/create_cosponsored_relationship.py
 from flask import Blueprint, jsonify
 import logging
 import csv
 import os
 from neo4j import GraphDatabase
 from app import get_driver, neo4j_logger  # Ensure relative imports work
 bp = Blueprint('create_cosponsored_relationship', __name__)
@bp.route('/create_cosponsored_relationship', methods=['GET'])
 def create_cosponsored_relationship():
    try:
        # Path to the CSV file
        csv_file_path = os.path.join(os.path.dirname(__file__), '..', 'cosponsored_legislation.csv')
        if not os.path.exists(csv_file_path):
            return jsonify({"status": "error", "message": "CSV file not found"}), 404
        driver = get_driver()
        with driver.session() as session:
            # Read the CSV data
            with open(csv_file_path, mode='r', newline='', encoding='utf-8') as csv_file:
                csv_reader = csv.DictReader(csv_file)
                for row in csv_reader:
                    cosponsored_by = row.get('cosponsored_by')
                    number = row.get('number')
                    if cosponsored_by and number:
                        person_node = session.run(
                            "MATCH (p:Person {bioguideId: $bioguideId}) RETURN p",
                            {"bioguideId": cosponsored_by}
                        ).single()
                        legislation_node = session.run(
                            "MATCH (l:Legislation {number: $number}) RETURN l",
                            {"number": number}
                        ).single()
                        if person_node and legislation_node:
                            person = person_node['p']
                            legislation = legislation_node['l']
                            session.run(
                                "MATCH (p:Person), (l:Legislation) "
                                "WHERE id(p) = $person_id AND id(l) = $legislation_id "
                                "CREATE (p)-[:COSPONSORED]->(l)",
                                {"person_id": person.id, "legislation_id": legislation.id}
                            )
                            neo4j_logger.info(f"Created COSPONSORED relationship from Person {person['name']} to Legislation {legislation['title']}")
                        else:
                            if not person_node:
                                neo4j_logger.warning(f"No Person node found for bioguideId: {cosponsored_by}")
                            if not legislation_node:
                                neo4j_logger.warning(f"No Legislation node found for number: {number}")
            return jsonify({"status": "success", "message": "COSPONSORED relationships created successfully"}), 201
    except Exception as e:
        neo4j_logger.error(f"Error creating COSPONSORED relationships: {e}")
        return jsonify({"status": "error", "message": str(e)}), 500
--- a/api/endpoints/create_sponsored_relationship.py
+++ b/api/endpoints/create_sponsored_relationship.py
@ -1,43 +0,0 @@
 # endpoints/create_sponsored_relationship.py
 from flask import Blueprint, jsonify
 from neo4j import GraphDatabase
 import logging
 from app import get_driver, neo4j_logger  # Use absolute import
 bp = Blueprint('create_sponsored_relationship', __name__)
@bp.route('/create_sponsored_relationship', methods=['GET'])
 def create_sponsored_relationship():
    try:
        driver = get_driver()
        with driver.session() as session:
            # Step 3: Implement the endpoint logic
            legislation_nodes = session.run("MATCH (l:Legislation) RETURN l")
            for record in legislation_nodes:
                legislation = record['l']
                bioguide_id = legislation.get('sponsored_by')
                if bioguide_id:
                    person_node = session.run(
                        "MATCH (p:Person {bioguideId: $bioguideId}) RETURN p",
                        {"bioguideId": bioguide_id}
                    ).single()
                    if person_node:
                        person = person_node['p']
                        session.run(
                            "MATCH (p:Person), (l:Legislation) "
                            "WHERE id(p) = $person_id AND id(l) = $legislation_id "
                            "CREATE (p)-[:SPONSORED]->(l)",
                            {"person_id": person.id, "legislation_id": legislation.id}
                        )
                        neo4j_logger.info(f"Created SPONSORED relationship from Person {person['name']} to Legislation {legislation['title']}")
                    else:
                        neo4j_logger.warning(f"No Person node found for bioguideId: {bioguide_id}")
            return jsonify({"status": "success", "message": "SPONSORED relationships created successfully"}), 201
    except Exception as e:
        neo4j_logger.error(f"Error creating SPONSORED relationships: {e}")
        return jsonify({"status": "error", "message": str(e)}), 500
--- a/api/endpoints/get_cosponsored.py
+++ b/api/endpoints/get_cosponsored.py
@ -1,99 +0,0 @@
 from flask import Blueprint, jsonify, request
 import requests
 import csv
 import os
 import logging
 import json
 bp = Blueprint('get_cosponsored', __name__)
 # Function to retrieve bioguideIds from cache.json file
 def get_bioguideIds_from_cache():
    CACHE_PATH = os.getenv("CACHE_PATH")
    if not CACHE_PATH:
        logging.error("CACHE_PATH not found in .env file")
        return None
    if not os.path.exists(CACHE_PATH):
        logging.error("Cache file not found at specified path")
        return None
    try:
        with open(CACHE_PATH, 'r') as file:
            cache_data = json.load(file)
        bioguideIds = cache_data.get("bioguideIds", [])
        if not bioguideIds:
            logging.error("bioguideIds not found in cache.json")
            return None
        return bioguideIds
    except Exception as e:
        logging.error(f"Failed to read cache file: {str(e)}")
        return None
 # Function to flatten nested dictionaries and lists
 def flatten_dict(d, parent_key='', sep='_'):
    items = []
    for k, v in d.items():
        new_key = f"{parent_key}{sep}{k}" if parent_key else k
        if isinstance(v, dict):
            items.extend(flatten_dict(v, new_key, sep=sep).items())
        elif isinstance(v, list):
            for i, item in enumerate(v):
                items.append((f"{new_key}_{i}", flatten_dict(item, '', sep=sep)))
        else:
            items.append((new_key, v if v is not None and v != "" else "NONE"))
    return dict(items)
 # Function to write data to CSV
 def write_to_csv(data, filename):
    keys = set()
    for item in data:
        keys.update(item.keys())
    with open(filename, 'w', newline='') as output_file:
        dict_writer = csv.DictWriter(output_file, fieldnames=keys)
        dict_writer.writeheader()
        dict_writer.writerows(data)
@bp.route('/get_cosponsored', methods=['GET'])
 def get_cosponsored_legislation():
    try:
        # Retrieve bioguideIds from cache.json
        bioguideIds = get_bioguideIds_from_cache()
        if not bioguideIds:
            return jsonify({"error": "bioguideIds not found"}), 404
        all_data = []
        for bioguideId in bioguideIds:
            # Make request to Congress API
            api_key = os.getenv("CONGRESS_API_KEY")
            url = f"https://api.congress.gov/v3/member/{bioguideId}/cosponsored-legislation?api_key={api_key}"
            response = requests.get(url)
            if response.status_code != 200:
                logging.error(f"Failed to retrieve cosponsored legislation for bioguideId {bioguideId}: {response.text}")
                return jsonify({"error": f"Failed to retrieve data from Congress API for bioguideId {bioguideId}"}), 500
            data = response.json().get("cosponsoredLegislation", [])
            # Add cosponsored_by column and handle nested items
            for item in data:
                flattened_item = flatten_dict(item)
                flattened_item["cosponsored_by"] = bioguideId
                if not any(flattened_item.values()):
                    continue  # Skip empty rows
                all_data.append(flattened_item)
        if not all_data:
            return jsonify({"error": "No cosponsored legislation found for the given bioguideIds"}), 404
        # Write data to CSV
        csv_filename = f"cosponsored_legislation.csv"
        write_to_csv(all_data, csv_filename)
        return jsonify({"message": "Data written to CSV successfully", "filename": csv_filename})
    except Exception as e:
        logging.error(f"An error occurred: {str(e)}")
        return jsonify({"error": str(e)}), 500
--- a/api/endpoints/get_sponsored.py
+++ b/api/endpoints/get_sponsored.py
@ -1,99 +1,144 @@
-from flask import Blueprint, jsonify, request
+from flask import Blueprint, jsonify
 import requests
 import csv
 import os
 import logging
 import json
 import os
 import pandas as pd
 # Assuming you have these functions and configurations in your app.py
 from app import get_driver, neo4j_logger
 bp = Blueprint('get_sponsored', __name__)
 CACHE_FILE = 'cache.json'
 CSV_FILE = 'legislation.csv'
-# Function to retrieve bioguideIds from cache.json file
+def load_cache():
-def get_bioguideIds_from_cache():
+    if os.path.exists(CACHE_FILE):
-    CACHE_PATH = os.getenv("CACHE_PATH")
+        with open(CACHE_FILE, 'r') as f:
-    if not CACHE_PATH:
+            return json.load(f)
-        logging.error("CACHE_PATH not found in .env file")
+    else:
-        return None
+        neo4j_logger.info(f"Cache file {CACHE_FILE} does not exist. Initializing empty cache.")
        return {}
-    if not os.path.exists(CACHE_PATH):
+def save_cache(cache_data):
-        logging.error("Cache file not found at specified path")
+    with open(CACHE_FILE, 'w') as f:
-        return None
+        json.dump(cache_data, f)
    neo4j_logger.info(f"Saved data to cache file: {CACHE_FILE}")
-    try:
+def write_to_csv(legislation_data, csv_file):
-        with open(CACHE_PATH, 'r') as file:
+    # Flatten the nested dictionaries
-            cache_data = json.load(file)
+    flattened_legislation = []
-        bioguideIds = cache_data.get("bioguideIds", [])
+    for item in legislation_data:
-        if not bioguideIds:
+        flattened_item = {}
-            logging.error("bioguideIds not found in cache.json")
+        flatten_dict(item, "", flattened_item)
-            return None
+        flattened_legislation.append(flattened_item)
-        return bioguideIds
+
-    except Exception as e:
+    df = pd.DataFrame(flattened_legislation)
-        logging.error(f"Failed to read cache file: {str(e)}")
+
-        return None
+    # Debugging: Print the first few entries of the DataFrame to inspect its structure
    print("Debugging DataFrame:")
    if not df.empty:
        print(df.head())
    else:
        print("DataFrame is empty.")
    if df.empty:
        neo4j_logger.warning(f"No data to write to CSV file: {csv_file}")
        print("DataFrame is empty. Debugging information:")
        for item in flattened_legislation[:5]:  # Print first 5 items
            print(json.dumps(item, indent=4))
        return
    df.to_csv(csv_file, index=False)
    neo4j_logger.info(f"Data written to CSV file: {csv_file}")
 # Function to flatten nested dictionaries and lists
 def flatten_dict(d, parent_key='', sep='_'):
    items = []
    for k, v in d.items():
        new_key = f"{parent_key}{sep}{k}" if parent_key else k
        if isinstance(v, dict):
            # Debugging: Print the key and value when entering a nested dictionary
            print(f"Entering nested dictionary with key: {new_key}")
            items.extend(flatten_dict(v, new_key, sep=sep).items())
        elif isinstance(v, list):
-            for i, item in enumerate(v):
+            # Handle lists by converting them to strings or other appropriate representation
-                items.append((f"{new_key}_{i}", flatten_dict(item, '', sep=sep)))
+            if v:
                items.append((new_key, ', '.join(map(str, v))))
            else:
                # If the list is empty, add an empty string or a placeholder
                items.append((new_key, ''))
        elif v is not None:
            # Debugging: Print the key and value when adding a non-dict, non-list item
            print(f"Adding {new_key}: {v}")
            items.append((new_key, v))
        else:
-            items.append((new_key, v if v is not None and v != "" else "NONE"))
+            # Handle None values appropriately (e.g., add an empty string or a placeholder)
            items.append((new_key, ''))
    return dict(items)
-# Function to write data to CSV
+@bp.route('/get_sponsored')
-def write_to_csv(data, filename):
+def get_sponsored():
-    keys = set()
+    # Load bioguideIds and legislation from the same cache
-    for item in data:
+    cache = load_cache()
-        keys.update(item.keys())
+    if 'bioguideIds' not in cache or len(cache['bioguideIds']) == 0:
        return jsonify({"message": "No bioguideIds found in cache"}), 404
-    with open(filename, 'w', newline='') as output_file:
+    # Print the number of items found in the cache initially
-        dict_writer = csv.DictWriter(output_file, fieldnames=keys)
+    initial_bioguideIds_count = len(cache['bioguideIds'])
-        dict_writer.writeheader()
+    print(f"Initial bioguideIds count: {initial_bioguideIds_count}")
        dict_writer.writerows(data)
-@bp.route('/get_sponsored', methods=['GET'])
+    processed_legislation_count = 0
-def get_sponsored_legislation():
+    all_legislation_data = []
    try:
        # Retrieve bioguideIds from cache.json
        bioguideIds = get_bioguideIds_from_cache()
        if not bioguideIds:
            return jsonify({"error": "bioguideIds not found"}), 404
-        all_data = []
+    while 'bioguideIds' in cache and len(cache['bioguideIds']) > 0:
        # Step 1: Retrieve a sponsor from the cache
        current_bioguideId = cache['bioguideIds'].pop(0)
        if current_bioguideId is None:
            continue
-        for bioguideId in bioguideIds:
+        print(f"Processing bioguideId: {current_bioguideId}")
            # Make request to Congress API
            api_key = os.getenv("CONGRESS_API_KEY")
            url = f"https://api.congress.gov/v3/member/{bioguideId}/sponsored-legislation?api_key={api_key}"
            response = requests.get(url)
-            if response.status_code != 200:
+        congress_api_url = f"https://api.congress.gov/v3/member/{current_bioguideId}/sponsored-legislation"
                logging.error(f"Failed to retrieve sponsored legislation for bioguideId {bioguideId}: {response.text}")
                return jsonify({"error": f"Failed to retrieve data from Congress API for bioguideId {bioguideId}"}), 500
-            data = response.json().get("sponsoredLegislation", [])
+        # Include API key in headers (if required)
        api_key = os.getenv('CONGRESS_API_KEY')
        if not api_key:
            neo4j_logger.error("Congress API key not found in environment variables")
            continue
-            # Add sponsored_by column and handle nested items
+        headers = {
-            for item in data:
+            'X-API-KEY': api_key
-                flattened_item = flatten_dict(item)
+        }
                flattened_item["sponsored_by"] = bioguideId
                if not any(flattened_item.values()):
                    continue  # Skip empty rows
                all_data.append(flattened_item)
-        if not all_data:
+        # Step 2: Fetch sponsored legislation for the member
-            return jsonify({"error": "No sponsored legislation found for the given bioguideIds"}), 404
+        response = requests.get(congress_api_url, headers=headers)
        print(f"Response Status Code: {response.status_code}")
        print(f"Response Text: {response.text}")
-        # Write data to CSV
+        if response.status_code != 200:
-        csv_filename = f"sponsored_legislation.csv"
+            neo4j_logger.error(f"Failed to fetch sponsored legislation for bioguideId {current_bioguideId}: Status Code {response.status_code}, Response: {response.text}")
-        write_to_csv(all_data, csv_filename)
+            continue
-        return jsonify({"message": "Data written to CSV successfully", "filename": csv_filename})
+        response_data = response.json()
-    except Exception as e:
+        # Debugging statement to check the raw API response
-        logging.error(f"An error occurred: {str(e)}")
+        print("Raw API Response:")
-        return jsonify({"error": str(e)}), 500
+        print(json.dumps(response_data, indent=4))
        # Extract legislation data from the response
        if 'sponsoredLegislation' in response_data and len(response_data['sponsoredLegislation']) > 0:
            for result in response_data['sponsoredLegislation']:
                all_legislation_data.append(result)
    # Debugging statement to check the number of legislation items collected
    print(f"Number of legislation items collected: {len(all_legislation_data)}")
    if len(all_legislation_data) > 0:
        # Print first few items to ensure data is structured correctly
        for i, item in enumerate(all_legislation_data[:5]):
            print(f"\nLegislation Item {i+1}:")
            print(json.dumps(item, indent=4))
    # Write the extracted legislation data to a CSV file
    write_to_csv(all_legislation_data, CSV_FILE)
    return jsonify({"message": "Legislation data written to CSV successfully"}), 200
--- a/api/endpoints/process_sponsors.py
+++ b/api/endpoints/process_sponsors.py
@ -1,54 +1,95 @@
-from flask import Blueprint, jsonify, request
+# endpoints/process_sponsors.py
 from flask import Blueprint, jsonify
 from app import get_driver, neo4j_logger
 import json
 import os
 import csv
 from neo4j import GraphDatabase
 import logging
-bp = Blueprint('process_sponsored', __name__)
+bp = Blueprint('process_sponsors', __name__)
-# Custom logger for the process_sponsored blueprint
+CACHE_FILE = 'cache.json'
 process_sponsored_logger = logging.getLogger('ProcessSponsoredLogger')
 process_sponsored_logger.setLevel(logging.INFO)
 process_sponsored_handler = logging.StreamHandler()
 process_sponsored_formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
 process_sponsored_handler.setFormatter(process_sponsored_formatter)
 process_sponsored_logger.addHandler(process_sponsored_handler)
-@bp.route('/process_sponsored', methods=['GET'])
+def load_cache():
-def process_sponsored():
+    if os.path.exists(CACHE_FILE):
-    csv_file_path = os.getenv("SPONSORED_LEGISLATION_CSV")
+        with open(CACHE_FILE, 'r') as f:
            return json.load(f)
    return {}
-    if not csv_file_path:
+def save_cache(cache_data):
-        return jsonify({"error": "SPONSORED_LEGISLATION_CSV environment variable is not set"}), 400
+    with open(CACHE_FILE, 'w') as f:
        json.dump(cache_data, f)
-    try:
+@bp.route('/process_sponsors')
-        with open(csv_file_path, mode='r', newline='', encoding='utf-8') as file:
+def process_sponsors():
-            reader = csv.DictReader(file)
+    cache = load_cache()
            driver = GraphDatabase.driver(os.getenv("NEO4J_URI"), auth=(os.getenv("NEO4J_USER"), os.getenv("NEO4J_PASSWORD")))
            session = driver.session()
-            for row in reader:
+    if 'legislation_entries' not in cache or len(cache['legislation_entries']) == 0:
-                properties = {key: value.strip() if isinstance(value, str) else value for key, value in row.items()}
+        return jsonify({"message": "No legislation entries found in cache"}), 404
-                # Log the CSV row
+    # Print the number of items found in the cache initially
-                process_sponsored_logger.info(f"Processing row: {properties}")
+    initial_legislation_entries_count = len(cache['legislation_entries'])
    print(f"Initial legislation entries count: {initial_legislation_entries_count}")
-                query = (
+    processed_legislation_count = 0
                    "MERGE (l:legislation {"
                    + ", ".join(f"{key}: $props.{key}" for key in properties)
                    + "})"
                )
-                # Log the MERGE query
+    while 'legislation_entries' in cache and len(cache['legislation_entries']) > 0:
-                process_sponsored_logger.info(f"Executing query: {query}")
+        # Step 1: Retrieve a legislation entry from the cache
        legislation_entry = cache['legislation_entries'].pop(0)
-                session.run(query, props=properties)
+        if not legislation_entry or 'bioguideId' not in legislation_entry:
            continue
-            session.close()
+        bioguideId = legislation_entry['bioguideId']
-            driver.close()
+        legislation_properties = {key: value for key, value in legislation_entry.items() if key != 'bioguideId'}
-        return jsonify({"message": f"Processed {reader.line_num - 1} sponsored legislations"}), 200
+        # Step 2: Create a legislation node with the properties
        driver = get_driver()
        with driver.session() as session:
            legislation_id = legislation_properties['id']
            query = f"MATCH (n:Legislation {{id: $legislation_id}}) RETURN n"
            neo4j_logger.info(f"Executing query: {query} with params: {{legislation_id: '{legislation_id}'}}")
-    except Exception as e:
+            existing_legislation = session.run(query, legislation_id=legislation_id).single()
-        process_sponsored_logger.error(f"Error processing sponsored legislation: {e}")
+            if not existing_legislation:
-        return jsonify({"error": str(e)}), 500
+                properties = ', '.join([f'{key}: "${key}"' for key in legislation_properties])
                query = f"CREATE (n:Legislation {{{properties}}}) RETURN n"
                neo4j_logger.info(f"Executing query: {query} with data: {legislation_properties}")
                result = session.run(query, **legislation_properties)
                # Convert the created node to a dictionary
                new_legislation_node = {
                    'id': result.single()['n'].id,
                    'labels': list(result.single()['n'].labels),
                    **{key: value for key, value in result.single()['n'].items()}
                }
                neo4j_logger.info(f"Created legislation node: {new_legislation_node}")
        # Step 3: Create a relationship of type "sponsored" from the sponsor to the legislation
        with driver.session() as session:
            person_query = f"MATCH (a:Person {{bioguideId: $bioguideId}}) RETURN a"
            neo4j_logger.info(f"Executing query: {person_query} with params: {{bioguideId: '{bioguideId}'}}")
            sponsor_node = session.run(person_query, bioguideId=bioguideId).single()
            if not sponsor_node:
                neo4j_logger.error(f"Person node does not exist for bioguideId {bioguideId}")
                continue
            legislation_id = legislation_properties['id']
            relationship_query = f"MATCH (a:Person {{bioguideId: $bioguideId}}), (b:Legislation {{id: $legislation_id}}) CREATE (a)-[r:sponsored]->(b) RETURN r"
            neo4j_logger.info(f"Executing query: {relationship_query} with params: {{bioguideId: '{bioguideId}', legislation_id: '{legislation_id}'}}")
            result = session.run(relationship_query, bioguideId=bioguideId, legislation_id=legislation_id)
            relationship_node = {
                'id': result.single()['r'].id,
                'type': "sponsored",
                **{key: value for key, value in result.single()['r'].items()}
            }
            neo4j_logger.info(f"Created sponsored relationship: {relationship_node}")
        processed_legislation_count += 1
    save_cache(cache)
    # Print the total number of legislation items processed
    print(f"Total processed legislation count: {processed_legislation_count}")
    return jsonify({"message": "Sponsorship processing completed successfully", "processed_legislation_count": processed_legislation_count}), 200