policymap/api/endpoints/get_sponsored.py

145 lines
5.4 KiB
Python
Raw Normal View History

2025-03-06 20:00:45 -08:00
from flask import Blueprint, jsonify
import requests
import json
import os
2025-03-09 01:47:27 -08:00
import pandas as pd
2025-03-06 20:00:45 -08:00
2025-03-09 01:47:27 -08:00
# Assuming you have these functions and configurations in your app.py
from app import get_driver, neo4j_logger
2025-03-06 20:00:45 -08:00
2025-03-09 01:47:27 -08:00
bp = Blueprint('get_sponsored', __name__)
2025-03-06 20:00:45 -08:00
CACHE_FILE = 'cache.json'
2025-03-09 01:47:27 -08:00
CSV_FILE = 'legislation.csv'
2025-03-06 20:00:45 -08:00
def load_cache():
if os.path.exists(CACHE_FILE):
with open(CACHE_FILE, 'r') as f:
return json.load(f)
2025-03-08 13:22:48 -08:00
else:
neo4j_logger.info(f"Cache file {CACHE_FILE} does not exist. Initializing empty cache.")
return {}
2025-03-06 20:00:45 -08:00
def save_cache(cache_data):
with open(CACHE_FILE, 'w') as f:
json.dump(cache_data, f)
2025-03-08 13:22:48 -08:00
neo4j_logger.info(f"Saved data to cache file: {CACHE_FILE}")
2025-03-06 20:00:45 -08:00
2025-03-09 01:47:27 -08:00
def write_to_csv(legislation_data, csv_file):
# Flatten the nested dictionaries
flattened_legislation = []
for item in legislation_data:
flattened_item = {}
flatten_dict(item, "", flattened_item)
flattened_legislation.append(flattened_item)
df = pd.DataFrame(flattened_legislation)
2025-03-09 13:07:53 -07:00
# Debugging: Print the first few entries of the DataFrame to inspect its structure
print("Debugging DataFrame:")
if not df.empty:
print(df.head())
else:
print("DataFrame is empty.")
2025-03-09 01:47:27 -08:00
if df.empty:
neo4j_logger.warning(f"No data to write to CSV file: {csv_file}")
print("DataFrame is empty. Debugging information:")
for item in flattened_legislation[:5]: # Print first 5 items
print(json.dumps(item, indent=4))
return
df.to_csv(csv_file, index=False)
neo4j_logger.info(f"Data written to CSV file: {csv_file}")
def flatten_dict(d, parent_key='', sep='_'):
items = []
for k, v in d.items():
new_key = f"{parent_key}{sep}{k}" if parent_key else k
if isinstance(v, dict):
# Debugging: Print the key and value when entering a nested dictionary
print(f"Entering nested dictionary with key: {new_key}")
items.extend(flatten_dict(v, new_key, sep=sep).items())
elif isinstance(v, list):
# Handle lists by converting them to strings or other appropriate representation
if v:
items.append((new_key, ', '.join(map(str, v))))
else:
# If the list is empty, add an empty string or a placeholder
items.append((new_key, ''))
elif v is not None:
# Debugging: Print the key and value when adding a non-dict, non-list item
print(f"Adding {new_key}: {v}")
items.append((new_key, v))
else:
# Handle None values appropriately (e.g., add an empty string or a placeholder)
items.append((new_key, ''))
return dict(items)
2025-03-06 20:00:45 -08:00
@bp.route('/get_sponsored')
def get_sponsored():
2025-03-08 13:22:48 -08:00
# Load bioguideIds and legislation from the same cache
2025-03-06 20:00:45 -08:00
cache = load_cache()
2025-03-08 13:22:48 -08:00
if 'bioguideIds' not in cache or len(cache['bioguideIds']) == 0:
return jsonify({"message": "No bioguideIds found in cache"}), 404
2025-03-06 20:00:45 -08:00
# Print the number of items found in the cache initially
2025-03-08 13:22:48 -08:00
initial_bioguideIds_count = len(cache['bioguideIds'])
print(f"Initial bioguideIds count: {initial_bioguideIds_count}")
2025-03-09 01:47:27 -08:00
2025-03-06 20:00:45 -08:00
processed_legislation_count = 0
2025-03-09 01:47:27 -08:00
all_legislation_data = []
2025-03-06 20:00:45 -08:00
2025-03-08 13:22:48 -08:00
while 'bioguideIds' in cache and len(cache['bioguideIds']) > 0:
# Step 1: Retrieve a sponsor from the cache
2025-03-08 13:22:48 -08:00
current_bioguideId = cache['bioguideIds'].pop(0)
if current_bioguideId is None:
continue
2025-03-09 01:47:27 -08:00
2025-03-08 13:22:48 -08:00
print(f"Processing bioguideId: {current_bioguideId}")
2025-03-06 20:00:45 -08:00
2025-03-08 13:22:48 -08:00
congress_api_url = f"https://api.congress.gov/v3/member/{current_bioguideId}/sponsored-legislation"
# Include API key in headers (if required)
api_key = os.getenv('CONGRESS_API_KEY')
if not api_key:
neo4j_logger.error("Congress API key not found in environment variables")
continue
headers = {
'X-API-KEY': api_key
}
2025-03-06 20:00:45 -08:00
# Step 2: Fetch sponsored legislation for the member
2025-03-08 13:22:48 -08:00
response = requests.get(congress_api_url, headers=headers)
print(f"Response Status Code: {response.status_code}")
print(f"Response Text: {response.text}")
2025-03-06 20:00:45 -08:00
if response.status_code != 200:
2025-03-08 13:22:48 -08:00
neo4j_logger.error(f"Failed to fetch sponsored legislation for bioguideId {current_bioguideId}: Status Code {response.status_code}, Response: {response.text}")
2025-03-06 20:00:45 -08:00
continue
2025-03-08 15:13:55 -08:00
response_data = response.json()
2025-03-09 01:47:27 -08:00
# Debugging statement to check the raw API response
print("Raw API Response:")
print(json.dumps(response_data, indent=4))
2025-03-08 13:22:48 -08:00
2025-03-09 01:47:27 -08:00
# Extract legislation data from the response
if 'sponsoredLegislation' in response_data and len(response_data['sponsoredLegislation']) > 0:
for result in response_data['sponsoredLegislation']:
all_legislation_data.append(result)
2025-03-08 13:22:48 -08:00
2025-03-09 01:47:27 -08:00
# Debugging statement to check the number of legislation items collected
print(f"Number of legislation items collected: {len(all_legislation_data)}")
2025-03-06 20:00:45 -08:00
2025-03-09 01:47:27 -08:00
if len(all_legislation_data) > 0:
# Print first few items to ensure data is structured correctly
for i, item in enumerate(all_legislation_data[:5]):
print(f"\nLegislation Item {i+1}:")
print(json.dumps(item, indent=4))
2025-03-06 20:00:45 -08:00
2025-03-09 01:47:27 -08:00
# Write the extracted legislation data to a CSV file
write_to_csv(all_legislation_data, CSV_FILE)
2025-03-06 20:00:45 -08:00
2025-03-09 01:47:27 -08:00
return jsonify({"message": "Legislation data written to CSV successfully"}), 200