policymap/api/endpoints/get_sponsored.py

131 lines
5.1 KiB
Python
Raw Normal View History

2025-03-06 20:00:45 -08:00
from flask import Blueprint, jsonify
from app import get_driver, neo4j_logger
import requests
import json
import os
bp = Blueprint('get_sponsored', __name__)
CACHE_FILE = 'cache.json'
def load_cache():
if os.path.exists(CACHE_FILE):
with open(CACHE_FILE, 'r') as f:
return json.load(f)
2025-03-08 13:22:48 -08:00
else:
neo4j_logger.info(f"Cache file {CACHE_FILE} does not exist. Initializing empty cache.")
return {}
2025-03-06 20:00:45 -08:00
def save_cache(cache_data):
with open(CACHE_FILE, 'w') as f:
json.dump(cache_data, f)
2025-03-08 13:22:48 -08:00
neo4j_logger.info(f"Saved data to cache file: {CACHE_FILE}")
2025-03-06 20:00:45 -08:00
@bp.route('/get_sponsored')
def get_sponsored():
2025-03-08 13:22:48 -08:00
# Load bioguideIds and legislation from the same cache
2025-03-06 20:00:45 -08:00
cache = load_cache()
2025-03-08 13:22:48 -08:00
if 'bioguideIds' not in cache or len(cache['bioguideIds']) == 0:
return jsonify({"message": "No bioguideIds found in cache"}), 404
2025-03-06 20:00:45 -08:00
# Print the number of items found in the cache initially
2025-03-08 13:22:48 -08:00
initial_bioguideIds_count = len(cache['bioguideIds'])
print(f"Initial bioguideIds count: {initial_bioguideIds_count}")
2025-03-06 20:00:45 -08:00
processed_legislation_count = 0
2025-03-08 13:22:48 -08:00
while 'bioguideIds' in cache and len(cache['bioguideIds']) > 0:
# Step 1: Retrieve a sponsor from the cache
2025-03-08 13:22:48 -08:00
current_bioguideId = cache['bioguideIds'].pop(0)
if current_bioguideId is None:
continue
2025-03-08 13:22:48 -08:00
print(f"Processing bioguideId: {current_bioguideId}")
2025-03-06 20:00:45 -08:00
2025-03-08 13:22:48 -08:00
congress_api_url = f"https://api.congress.gov/v3/member/{current_bioguideId}/sponsored-legislation"
# Include API key in headers (if required)
api_key = os.getenv('CONGRESS_API_KEY')
if not api_key:
neo4j_logger.error("Congress API key not found in environment variables")
continue
headers = {
'X-API-KEY': api_key
}
2025-03-06 20:00:45 -08:00
# Step 2: Fetch sponsored legislation for the member
2025-03-08 13:22:48 -08:00
response = requests.get(congress_api_url, headers=headers)
print(f"Response Status Code: {response.status_code}")
print(f"Response Text: {response.text}")
2025-03-06 20:00:45 -08:00
if response.status_code != 200:
2025-03-08 13:22:48 -08:00
neo4j_logger.error(f"Failed to fetch sponsored legislation for bioguideId {current_bioguideId}: Status Code {response.status_code}, Response: {response.text}")
2025-03-06 20:00:45 -08:00
continue
2025-03-08 15:13:55 -08:00
response_data = response.json()
# Ensure the response contains 'sponsoredLegislation' key
if 'sponsoredLegislation' not in response_data:
neo4j_logger.error(f"Missing 'sponsoredLegislation' key in response for bioguideId {current_bioguideId}: {response_data}")
continue
sponsored_legislation = response_data['sponsoredLegislation']
2025-03-06 20:00:45 -08:00
2025-03-08 13:22:48 -08:00
# Step 3: Store each piece of legislation in the cache along with the sponsor bioguideId
2025-03-08 14:16:40 -08:00
for legislation in sponsored_legislation:
if 'number' not in legislation:
neo4j_logger.error(f"Missing 'number' field in legislation data for bioguideId {current_bioguideId}: {legislation}")
continue
2025-03-08 13:22:48 -08:00
2025-03-08 14:16:40 -08:00
key = f"legislation_{legislation['number']}"
2025-03-08 13:22:48 -08:00
print(f"About to write to cache: {key} - {legislation}")
2025-03-06 20:00:45 -08:00
if key not in cache:
2025-03-08 14:16:40 -08:00
# Ensure all nested dictionaries are handled properly
2025-03-08 15:13:55 -08:00
latest_action = legislation.get('latestAction')
# Check if latest_action is None before calling .get()
if latest_action is None:
latest_action = {}
policy_area = legislation.get('policyArea', {})
2025-03-08 14:16:40 -08:00
legislation_info = {
2025-03-08 13:22:48 -08:00
'bioguideId': current_bioguideId,
2025-03-08 14:16:40 -08:00
**legislation,
'latestAction': {
2025-03-08 15:13:55 -08:00
'actionDate': latest_action.get('actionDate'),
'text': latest_action.get('text')
2025-03-08 14:16:40 -08:00
},
'policyArea': {
2025-03-08 15:13:55 -08:00
'name': policy_area.get('name')
2025-03-08 14:16:40 -08:00
}
2025-03-06 20:00:45 -08:00
}
2025-03-08 14:16:40 -08:00
cache[key] = legislation_info
2025-03-06 20:00:45 -08:00
processed_legislation_count += 1
2025-03-08 13:22:48 -08:00
# Save the cache immediately after writing each entry
save_cache(cache)
# Print the updated legislation cache for debugging
print(f"Updated legislation cache: {json.dumps(cache, indent=2)}")
# Step 4: Delete the sponsor from the bioguideIds list (already done by popping)
neo4j_logger.info(f"Processed sponsored legislation for bioguideId {current_bioguideId}")
# Print the number of items left in the bioguide cache
remaining_bioguideIds_count = len(cache['bioguideIds'])
print(f"Remaining bioguideIds count after processing {current_bioguideId}: {remaining_bioguideIds_count}")
2025-03-06 20:00:45 -08:00
2025-03-08 13:22:48 -08:00
# Save the cache again to ensure all changes are persisted
2025-03-06 20:00:45 -08:00
save_cache(cache)
# Print the total number of legislation items stored and overall items added to the cache
print(f"Total processed legislation count: {processed_legislation_count}")
2025-03-08 13:22:48 -08:00
print(f"Overall items in cache: {len(cache)}")
2025-03-06 20:00:45 -08:00
2025-03-08 14:16:40 -08:00
return jsonify({
"message": "Sponsored legislation processed successfully",
"processed_legislation_count": processed_legislation_count
}), 200