policymap/api/endpoints/get_sponsored.py

from flask import Blueprint, jsonify
import requests
import json
import os
import pandas as pd

# Assuming you have these functions and configurations in your app.py
from app import get_driver, neo4j_logger

bp = Blueprint('get_sponsored', __name__)
CACHE_FILE = 'cache.json'
CSV_FILE = 'legislation.csv'

def load_cache():
    if os.path.exists(CACHE_FILE):
        with open(CACHE_FILE, 'r') as f:
            return json.load(f)
    else:
        neo4j_logger.info(f"Cache file {CACHE_FILE} does not exist. Initializing empty cache.")
        return {}

def save_cache(cache_data):
    with open(CACHE_FILE, 'w') as f:
        json.dump(cache_data, f)
    neo4j_logger.info(f"Saved data to cache file: {CACHE_FILE}")

def write_to_csv(legislation_data, csv_file):
    # Flatten the nested dictionaries
    flattened_legislation = []
    for item in legislation_data:
        flattened_item = {}
        flatten_dict(item, "", flattened_item)
        flattened_legislation.append(flattened_item)

    df = pd.DataFrame(flattened_legislation)

    # Debugging: Print the first few entries of the DataFrame to inspect its structure
    print("Debugging DataFrame:")
    if not df.empty:
        print(df.head())
    else:
        print("DataFrame is empty.")

    if df.empty:
        neo4j_logger.warning(f"No data to write to CSV file: {csv_file}")
        print("DataFrame is empty. Debugging information:")
        for item in flattened_legislation[:5]:  # Print first 5 items
            print(json.dumps(item, indent=4))
        return

    df.to_csv(csv_file, index=False)
    neo4j_logger.info(f"Data written to CSV file: {csv_file}")

def flatten_dict(d, parent_key='', sep='_'):
    items = []
    for k, v in d.items():
        new_key = f"{parent_key}{sep}{k}" if parent_key else k
        if isinstance(v, dict):
            # Debugging: Print the key and value when entering a nested dictionary
            print(f"Entering nested dictionary with key: {new_key}")
            items.extend(flatten_dict(v, new_key, sep=sep).items())
        elif isinstance(v, list):
            # Handle lists by converting them to strings or other appropriate representation
            if v:
                items.append((new_key, ', '.join(map(str, v))))
            else:
                # If the list is empty, add an empty string or a placeholder
                items.append((new_key, ''))
        elif v is not None:
            # Debugging: Print the key and value when adding a non-dict, non-list item
            print(f"Adding {new_key}: {v}")
            items.append((new_key, v))
        else:
            # Handle None values appropriately (e.g., add an empty string or a placeholder)
            items.append((new_key, ''))
    return dict(items)

@bp.route('/get_sponsored')
def get_sponsored():
    # Load bioguideIds and legislation from the same cache
    cache = load_cache()
    if 'bioguideIds' not in cache or len(cache['bioguideIds']) == 0:
        return jsonify({"message": "No bioguideIds found in cache"}), 404

    # Print the number of items found in the cache initially
    initial_bioguideIds_count = len(cache['bioguideIds'])
    print(f"Initial bioguideIds count: {initial_bioguideIds_count}")

    processed_legislation_count = 0
    all_legislation_data = []

    while 'bioguideIds' in cache and len(cache['bioguideIds']) > 0:
        # Step 1: Retrieve a sponsor from the cache
        current_bioguideId = cache['bioguideIds'].pop(0)
        if current_bioguideId is None:
            continue

        print(f"Processing bioguideId: {current_bioguideId}")

        congress_api_url = f"https://api.congress.gov/v3/member/{current_bioguideId}/sponsored-legislation"

        # Include API key in headers (if required)
        api_key = os.getenv('CONGRESS_API_KEY')
        if not api_key:
            neo4j_logger.error("Congress API key not found in environment variables")
            continue

        headers = {
            'X-API-KEY': api_key
        }

        # Step 2: Fetch sponsored legislation for the member
        response = requests.get(congress_api_url, headers=headers)
        print(f"Response Status Code: {response.status_code}")
        print(f"Response Text: {response.text}")

        if response.status_code != 200:
            neo4j_logger.error(f"Failed to fetch sponsored legislation for bioguideId {current_bioguideId}: Status Code {response.status_code}, Response: {response.text}")
            continue

        response_data = response.json()

        # Debugging statement to check the raw API response
        print("Raw API Response:")
        print(json.dumps(response_data, indent=4))

        # Extract legislation data from the response
        if 'sponsoredLegislation' in response_data and len(response_data['sponsoredLegislation']) > 0:
            for result in response_data['sponsoredLegislation']:
                all_legislation_data.append(result)

    # Debugging statement to check the number of legislation items collected
    print(f"Number of legislation items collected: {len(all_legislation_data)}")

    if len(all_legislation_data) > 0:
        # Print first few items to ensure data is structured correctly
        for i, item in enumerate(all_legislation_data[:5]):
            print(f"\nLegislation Item {i+1}:")
            print(json.dumps(item, indent=4))

    # Write the extracted legislation data to a CSV file
    write_to_csv(all_legislation_data, CSV_FILE)

    return jsonify({"message": "Legislation data written to CSV successfully"}), 200
add initial api work and endpoints 2025-03-06 20:00:45 -08:00			`from flask import Blueprint, jsonify`
			`import requests`
			`import json`
			`import os`
checkpoint 2025-03-09 01:47:27 -08:00			`import pandas as pd`
add initial api work and endpoints 2025-03-06 20:00:45 -08:00
checkpoint 2025-03-09 01:47:27 -08:00			`# Assuming you have these functions and configurations in your app.py`
			`from app import get_driver, neo4j_logger`
add initial api work and endpoints 2025-03-06 20:00:45 -08:00
checkpoint 2025-03-09 01:47:27 -08:00			`bp = Blueprint('get_sponsored', __name__)`
add initial api work and endpoints 2025-03-06 20:00:45 -08:00			`CACHE_FILE = 'cache.json'`
checkpoint 2025-03-09 01:47:27 -08:00			`CSV_FILE = 'legislation.csv'`
add initial api work and endpoints 2025-03-06 20:00:45 -08:00
			`def load_cache():`
			`if os.path.exists(CACHE_FILE):`
			`with open(CACHE_FILE, 'r') as f:`
			`return json.load(f)`
checkpoint 2025-03-08 13:22:48 -08:00			`else:`
			`neo4j_logger.info(f"Cache file {CACHE_FILE} does not exist. Initializing empty cache.")`
			`return {}`
add initial api work and endpoints 2025-03-06 20:00:45 -08:00
			`def save_cache(cache_data):`
			`with open(CACHE_FILE, 'w') as f:`
			`json.dump(cache_data, f)`
checkpoint 2025-03-08 13:22:48 -08:00			`neo4j_logger.info(f"Saved data to cache file: {CACHE_FILE}")`
add initial api work and endpoints 2025-03-06 20:00:45 -08:00
checkpoint 2025-03-09 01:47:27 -08:00			`def write_to_csv(legislation_data, csv_file):`
			`# Flatten the nested dictionaries`
			`flattened_legislation = []`
			`for item in legislation_data:`
			`flattened_item = {}`
			`flatten_dict(item, "", flattened_item)`
			`flattened_legislation.append(flattened_item)`

			`df = pd.DataFrame(flattened_legislation)`
checkpoint 2025-03-09 13:07:53 -07:00
			`# Debugging: Print the first few entries of the DataFrame to inspect its structure`
			`print("Debugging DataFrame:")`
			`if not df.empty:`
			`print(df.head())`
			`else:`
			`print("DataFrame is empty.")`

checkpoint 2025-03-09 01:47:27 -08:00			`if df.empty:`
			`neo4j_logger.warning(f"No data to write to CSV file: {csv_file}")`
			`print("DataFrame is empty. Debugging information:")`
			`for item in flattened_legislation[:5]: # Print first 5 items`
			`print(json.dumps(item, indent=4))`
			`return`

			`df.to_csv(csv_file, index=False)`
			`neo4j_logger.info(f"Data written to CSV file: {csv_file}")`

			`def flatten_dict(d, parent_key='', sep='_'):`
			`items = []`
			`for k, v in d.items():`
			`new_key = f"{parent_key}{sep}{k}" if parent_key else k`
			`if isinstance(v, dict):`
			`# Debugging: Print the key and value when entering a nested dictionary`
			`print(f"Entering nested dictionary with key: {new_key}")`
			`items.extend(flatten_dict(v, new_key, sep=sep).items())`
			`elif isinstance(v, list):`
			`# Handle lists by converting them to strings or other appropriate representation`
			`if v:`
			`items.append((new_key, ', '.join(map(str, v))))`
			`else:`
			`# If the list is empty, add an empty string or a placeholder`
			`items.append((new_key, ''))`
			`elif v is not None:`
			`# Debugging: Print the key and value when adding a non-dict, non-list item`
			`print(f"Adding {new_key}: {v}")`
			`items.append((new_key, v))`
			`else:`
			`# Handle None values appropriately (e.g., add an empty string or a placeholder)`
			`items.append((new_key, ''))`
			`return dict(items)`

add initial api work and endpoints 2025-03-06 20:00:45 -08:00			`@bp.route('/get_sponsored')`
			`def get_sponsored():`
checkpoint 2025-03-08 13:22:48 -08:00			`# Load bioguideIds and legislation from the same cache`
add initial api work and endpoints 2025-03-06 20:00:45 -08:00			`cache = load_cache()`
checkpoint 2025-03-08 13:22:48 -08:00			`if 'bioguideIds' not in cache or len(cache['bioguideIds']) == 0:`
			`return jsonify({"message": "No bioguideIds found in cache"}), 404`
add initial api work and endpoints 2025-03-06 20:00:45 -08:00
			`# Print the number of items found in the cache initially`
checkpoint 2025-03-08 13:22:48 -08:00			`initial_bioguideIds_count = len(cache['bioguideIds'])`
			`print(f"Initial bioguideIds count: {initial_bioguideIds_count}")`
checkpoint 2025-03-09 01:47:27 -08:00
add initial api work and endpoints 2025-03-06 20:00:45 -08:00			`processed_legislation_count = 0`
checkpoint 2025-03-09 01:47:27 -08:00			`all_legislation_data = []`
add initial api work and endpoints 2025-03-06 20:00:45 -08:00
checkpoint 2025-03-08 13:22:48 -08:00			`while 'bioguideIds' in cache and len(cache['bioguideIds']) > 0:`
checkpoint before fixing a dumb mistake with dumbeer mistakes 2025-03-06 21:01:42 -08:00			`# Step 1: Retrieve a sponsor from the cache`
checkpoint 2025-03-08 13:22:48 -08:00			`current_bioguideId = cache['bioguideIds'].pop(0)`
			`if current_bioguideId is None:`
checkpoint before fixing a dumb mistake with dumbeer mistakes 2025-03-06 21:01:42 -08:00			`continue`
checkpoint 2025-03-09 01:47:27 -08:00
checkpoint 2025-03-08 13:22:48 -08:00			`print(f"Processing bioguideId: {current_bioguideId}")`
add initial api work and endpoints 2025-03-06 20:00:45 -08:00
checkpoint 2025-03-08 13:22:48 -08:00			`congress_api_url = f"https://api.congress.gov/v3/member/{current_bioguideId}/sponsored-legislation"`

			`# Include API key in headers (if required)`
			`api_key = os.getenv('CONGRESS_API_KEY')`
			`if not api_key:`
			`neo4j_logger.error("Congress API key not found in environment variables")`
			`continue`

			`headers = {`
			`'X-API-KEY': api_key`
			`}`
add initial api work and endpoints 2025-03-06 20:00:45 -08:00
			`# Step 2: Fetch sponsored legislation for the member`
checkpoint 2025-03-08 13:22:48 -08:00			`response = requests.get(congress_api_url, headers=headers)`
			`print(f"Response Status Code: {response.status_code}")`
			`print(f"Response Text: {response.text}")`

add initial api work and endpoints 2025-03-06 20:00:45 -08:00			`if response.status_code != 200:`
checkpoint 2025-03-08 13:22:48 -08:00			`neo4j_logger.error(f"Failed to fetch sponsored legislation for bioguideId {current_bioguideId}: Status Code {response.status_code}, Response: {response.text}")`
add initial api work and endpoints 2025-03-06 20:00:45 -08:00			`continue`

legislation cache working now too 2025-03-08 15:13:55 -08:00			`response_data = response.json()`

checkpoint 2025-03-09 01:47:27 -08:00			`# Debugging statement to check the raw API response`
			`print("Raw API Response:")`
			`print(json.dumps(response_data, indent=4))`
checkpoint 2025-03-08 13:22:48 -08:00
checkpoint 2025-03-09 01:47:27 -08:00			`# Extract legislation data from the response`
			`if 'sponsoredLegislation' in response_data and len(response_data['sponsoredLegislation']) > 0:`
			`for result in response_data['sponsoredLegislation']:`
			`all_legislation_data.append(result)`
checkpoint 2025-03-08 13:22:48 -08:00
checkpoint 2025-03-09 01:47:27 -08:00			`# Debugging statement to check the number of legislation items collected`
			`print(f"Number of legislation items collected: {len(all_legislation_data)}")`
add initial api work and endpoints 2025-03-06 20:00:45 -08:00
checkpoint 2025-03-09 01:47:27 -08:00			`if len(all_legislation_data) > 0:`
			`# Print first few items to ensure data is structured correctly`
			`for i, item in enumerate(all_legislation_data[:5]):`
			`print(f"\nLegislation Item {i+1}:")`
			`print(json.dumps(item, indent=4))`
add initial api work and endpoints 2025-03-06 20:00:45 -08:00
checkpoint 2025-03-09 01:47:27 -08:00			`# Write the extracted legislation data to a CSV file`
			`write_to_csv(all_legislation_data, CSV_FILE)`
add initial api work and endpoints 2025-03-06 20:00:45 -08:00
checkpoint 2025-03-09 01:47:27 -08:00			`return jsonify({"message": "Legislation data written to CSV successfully"}), 200`