2025-03-06 20:00:45 -08:00
from flask import Blueprint , jsonify
from app import get_driver , neo4j_logger
import requests
import json
import os
bp = Blueprint ( ' get_sponsored ' , __name__ )
CACHE_FILE = ' cache.json '
def load_cache ( ) :
if os . path . exists ( CACHE_FILE ) :
with open ( CACHE_FILE , ' r ' ) as f :
return json . load ( f )
2025-03-08 13:22:48 -08:00
else :
neo4j_logger . info ( f " Cache file { CACHE_FILE } does not exist. Initializing empty cache. " )
return { }
2025-03-06 20:00:45 -08:00
def save_cache ( cache_data ) :
with open ( CACHE_FILE , ' w ' ) as f :
json . dump ( cache_data , f )
2025-03-08 13:22:48 -08:00
neo4j_logger . info ( f " Saved data to cache file: { CACHE_FILE } " )
2025-03-06 20:00:45 -08:00
@bp.route ( ' /get_sponsored ' )
def get_sponsored ( ) :
2025-03-08 13:22:48 -08:00
# Load bioguideIds and legislation from the same cache
2025-03-06 20:00:45 -08:00
cache = load_cache ( )
2025-03-08 13:22:48 -08:00
if ' bioguideIds ' not in cache or len ( cache [ ' bioguideIds ' ] ) == 0 :
return jsonify ( { " message " : " No bioguideIds found in cache " } ) , 404
2025-03-06 20:00:45 -08:00
# Print the number of items found in the cache initially
2025-03-08 13:22:48 -08:00
initial_bioguideIds_count = len ( cache [ ' bioguideIds ' ] )
print ( f " Initial bioguideIds count: { initial_bioguideIds_count } " )
2025-03-06 20:00:45 -08:00
processed_legislation_count = 0
2025-03-08 13:22:48 -08:00
while ' bioguideIds ' in cache and len ( cache [ ' bioguideIds ' ] ) > 0 :
2025-03-06 21:01:42 -08:00
# Step 1: Retrieve a sponsor from the cache
2025-03-08 13:22:48 -08:00
current_bioguideId = cache [ ' bioguideIds ' ] . pop ( 0 )
if current_bioguideId is None :
2025-03-06 21:01:42 -08:00
continue
2025-03-08 13:22:48 -08:00
print ( f " Processing bioguideId: { current_bioguideId } " )
2025-03-06 20:00:45 -08:00
2025-03-08 13:22:48 -08:00
congress_api_url = f " https://api.congress.gov/v3/member/ { current_bioguideId } /sponsored-legislation "
# Include API key in headers (if required)
api_key = os . getenv ( ' CONGRESS_API_KEY ' )
if not api_key :
neo4j_logger . error ( " Congress API key not found in environment variables " )
continue
headers = {
' X-API-KEY ' : api_key
}
2025-03-06 20:00:45 -08:00
# Step 2: Fetch sponsored legislation for the member
2025-03-08 13:22:48 -08:00
response = requests . get ( congress_api_url , headers = headers )
print ( f " Response Status Code: { response . status_code } " )
print ( f " Response Text: { response . text } " )
2025-03-06 20:00:45 -08:00
if response . status_code != 200 :
2025-03-08 13:22:48 -08:00
neo4j_logger . error ( f " Failed to fetch sponsored legislation for bioguideId { current_bioguideId } : Status Code { response . status_code } , Response: { response . text } " )
2025-03-06 20:00:45 -08:00
continue
2025-03-08 15:13:55 -08:00
response_data = response . json ( )
# Ensure the response contains 'sponsoredLegislation' key
if ' sponsoredLegislation ' not in response_data :
neo4j_logger . error ( f " Missing ' sponsoredLegislation ' key in response for bioguideId { current_bioguideId } : { response_data } " )
continue
sponsored_legislation = response_data [ ' sponsoredLegislation ' ]
2025-03-06 20:00:45 -08:00
2025-03-08 13:22:48 -08:00
# Step 3: Store each piece of legislation in the cache along with the sponsor bioguideId
2025-03-08 14:16:40 -08:00
for legislation in sponsored_legislation :
if ' number ' not in legislation :
neo4j_logger . error ( f " Missing ' number ' field in legislation data for bioguideId { current_bioguideId } : { legislation } " )
continue
2025-03-08 13:22:48 -08:00
2025-03-08 14:16:40 -08:00
key = f " legislation_ { legislation [ ' number ' ] } "
2025-03-08 13:22:48 -08:00
print ( f " About to write to cache: { key } - { legislation } " )
2025-03-06 20:00:45 -08:00
if key not in cache :
2025-03-08 14:16:40 -08:00
# Ensure all nested dictionaries are handled properly
2025-03-08 15:13:55 -08:00
latest_action = legislation . get ( ' latestAction ' )
# Check if latest_action is None before calling .get()
if latest_action is None :
latest_action = { }
policy_area = legislation . get ( ' policyArea ' , { } )
2025-03-08 14:16:40 -08:00
legislation_info = {
2025-03-08 13:22:48 -08:00
' bioguideId ' : current_bioguideId ,
2025-03-08 14:16:40 -08:00
* * legislation ,
' latestAction ' : {
2025-03-08 15:13:55 -08:00
' actionDate ' : latest_action . get ( ' actionDate ' ) ,
' text ' : latest_action . get ( ' text ' )
2025-03-08 14:16:40 -08:00
} ,
' policyArea ' : {
2025-03-08 15:13:55 -08:00
' name ' : policy_area . get ( ' name ' )
2025-03-08 14:16:40 -08:00
}
2025-03-06 20:00:45 -08:00
}
2025-03-08 14:16:40 -08:00
cache [ key ] = legislation_info
2025-03-06 20:00:45 -08:00
processed_legislation_count + = 1
2025-03-08 13:22:48 -08:00
# Save the cache immediately after writing each entry
save_cache ( cache )
# Print the updated legislation cache for debugging
print ( f " Updated legislation cache: { json . dumps ( cache , indent = 2 ) } " )
# Step 4: Delete the sponsor from the bioguideIds list (already done by popping)
neo4j_logger . info ( f " Processed sponsored legislation for bioguideId { current_bioguideId } " )
# Print the number of items left in the bioguide cache
remaining_bioguideIds_count = len ( cache [ ' bioguideIds ' ] )
print ( f " Remaining bioguideIds count after processing { current_bioguideId } : { remaining_bioguideIds_count } " )
2025-03-06 20:00:45 -08:00
2025-03-08 13:22:48 -08:00
# Save the cache again to ensure all changes are persisted
2025-03-06 20:00:45 -08:00
save_cache ( cache )
# Print the total number of legislation items stored and overall items added to the cache
print ( f " Total processed legislation count: { processed_legislation_count } " )
2025-03-08 13:22:48 -08:00
print ( f " Overall items in cache: { len ( cache ) } " )
2025-03-06 20:00:45 -08:00
2025-03-08 14:16:40 -08:00
return jsonify ( {
" message " : " Sponsored legislation processed successfully " ,
" processed_legislation_count " : processed_legislation_count
} ) , 200