2025-03-06 20:00:45 -08:00
from flask import Blueprint , jsonify
import requests
import json
import os
2025-03-09 01:47:27 -08:00
import pandas as pd
2025-03-06 20:00:45 -08:00
2025-03-09 01:47:27 -08:00
# Assuming you have these functions and configurations in your app.py
from app import get_driver , neo4j_logger
2025-03-06 20:00:45 -08:00
2025-03-09 01:47:27 -08:00
bp = Blueprint ( ' get_sponsored ' , __name__ )
2025-03-06 20:00:45 -08:00
CACHE_FILE = ' cache.json '
2025-03-09 01:47:27 -08:00
CSV_FILE = ' legislation.csv '
2025-03-06 20:00:45 -08:00
def load_cache ( ) :
if os . path . exists ( CACHE_FILE ) :
with open ( CACHE_FILE , ' r ' ) as f :
return json . load ( f )
2025-03-08 13:22:48 -08:00
else :
neo4j_logger . info ( f " Cache file { CACHE_FILE } does not exist. Initializing empty cache. " )
return { }
2025-03-06 20:00:45 -08:00
def save_cache ( cache_data ) :
with open ( CACHE_FILE , ' w ' ) as f :
json . dump ( cache_data , f )
2025-03-08 13:22:48 -08:00
neo4j_logger . info ( f " Saved data to cache file: { CACHE_FILE } " )
2025-03-06 20:00:45 -08:00
2025-03-09 01:47:27 -08:00
def write_to_csv ( legislation_data , csv_file ) :
# Flatten the nested dictionaries
flattened_legislation = [ ]
for item in legislation_data :
flattened_item = { }
flatten_dict ( item , " " , flattened_item )
flattened_legislation . append ( flattened_item )
df = pd . DataFrame ( flattened_legislation )
2025-03-09 13:07:53 -07:00
# Debugging: Print the first few entries of the DataFrame to inspect its structure
print ( " Debugging DataFrame: " )
if not df . empty :
print ( df . head ( ) )
else :
print ( " DataFrame is empty. " )
2025-03-09 01:47:27 -08:00
if df . empty :
neo4j_logger . warning ( f " No data to write to CSV file: { csv_file } " )
print ( " DataFrame is empty. Debugging information: " )
for item in flattened_legislation [ : 5 ] : # Print first 5 items
print ( json . dumps ( item , indent = 4 ) )
return
df . to_csv ( csv_file , index = False )
neo4j_logger . info ( f " Data written to CSV file: { csv_file } " )
def flatten_dict ( d , parent_key = ' ' , sep = ' _ ' ) :
items = [ ]
for k , v in d . items ( ) :
new_key = f " { parent_key } { sep } { k } " if parent_key else k
if isinstance ( v , dict ) :
# Debugging: Print the key and value when entering a nested dictionary
print ( f " Entering nested dictionary with key: { new_key } " )
items . extend ( flatten_dict ( v , new_key , sep = sep ) . items ( ) )
elif isinstance ( v , list ) :
# Handle lists by converting them to strings or other appropriate representation
if v :
items . append ( ( new_key , ' , ' . join ( map ( str , v ) ) ) )
else :
# If the list is empty, add an empty string or a placeholder
items . append ( ( new_key , ' ' ) )
elif v is not None :
# Debugging: Print the key and value when adding a non-dict, non-list item
print ( f " Adding { new_key } : { v } " )
items . append ( ( new_key , v ) )
else :
# Handle None values appropriately (e.g., add an empty string or a placeholder)
items . append ( ( new_key , ' ' ) )
return dict ( items )
2025-03-06 20:00:45 -08:00
@bp.route ( ' /get_sponsored ' )
def get_sponsored ( ) :
2025-03-08 13:22:48 -08:00
# Load bioguideIds and legislation from the same cache
2025-03-06 20:00:45 -08:00
cache = load_cache ( )
2025-03-08 13:22:48 -08:00
if ' bioguideIds ' not in cache or len ( cache [ ' bioguideIds ' ] ) == 0 :
return jsonify ( { " message " : " No bioguideIds found in cache " } ) , 404
2025-03-06 20:00:45 -08:00
# Print the number of items found in the cache initially
2025-03-08 13:22:48 -08:00
initial_bioguideIds_count = len ( cache [ ' bioguideIds ' ] )
print ( f " Initial bioguideIds count: { initial_bioguideIds_count } " )
2025-03-09 01:47:27 -08:00
2025-03-06 20:00:45 -08:00
processed_legislation_count = 0
2025-03-09 01:47:27 -08:00
all_legislation_data = [ ]
2025-03-06 20:00:45 -08:00
2025-03-08 13:22:48 -08:00
while ' bioguideIds ' in cache and len ( cache [ ' bioguideIds ' ] ) > 0 :
2025-03-06 21:01:42 -08:00
# Step 1: Retrieve a sponsor from the cache
2025-03-08 13:22:48 -08:00
current_bioguideId = cache [ ' bioguideIds ' ] . pop ( 0 )
if current_bioguideId is None :
2025-03-06 21:01:42 -08:00
continue
2025-03-09 01:47:27 -08:00
2025-03-08 13:22:48 -08:00
print ( f " Processing bioguideId: { current_bioguideId } " )
2025-03-06 20:00:45 -08:00
2025-03-08 13:22:48 -08:00
congress_api_url = f " https://api.congress.gov/v3/member/ { current_bioguideId } /sponsored-legislation "
# Include API key in headers (if required)
api_key = os . getenv ( ' CONGRESS_API_KEY ' )
if not api_key :
neo4j_logger . error ( " Congress API key not found in environment variables " )
continue
headers = {
' X-API-KEY ' : api_key
}
2025-03-06 20:00:45 -08:00
# Step 2: Fetch sponsored legislation for the member
2025-03-08 13:22:48 -08:00
response = requests . get ( congress_api_url , headers = headers )
print ( f " Response Status Code: { response . status_code } " )
print ( f " Response Text: { response . text } " )
2025-03-06 20:00:45 -08:00
if response . status_code != 200 :
2025-03-08 13:22:48 -08:00
neo4j_logger . error ( f " Failed to fetch sponsored legislation for bioguideId { current_bioguideId } : Status Code { response . status_code } , Response: { response . text } " )
2025-03-06 20:00:45 -08:00
continue
2025-03-08 15:13:55 -08:00
response_data = response . json ( )
2025-03-09 01:47:27 -08:00
# Debugging statement to check the raw API response
print ( " Raw API Response: " )
print ( json . dumps ( response_data , indent = 4 ) )
2025-03-08 13:22:48 -08:00
2025-03-09 01:47:27 -08:00
# Extract legislation data from the response
if ' sponsoredLegislation ' in response_data and len ( response_data [ ' sponsoredLegislation ' ] ) > 0 :
for result in response_data [ ' sponsoredLegislation ' ] :
all_legislation_data . append ( result )
2025-03-08 13:22:48 -08:00
2025-03-09 01:47:27 -08:00
# Debugging statement to check the number of legislation items collected
print ( f " Number of legislation items collected: { len ( all_legislation_data ) } " )
2025-03-06 20:00:45 -08:00
2025-03-09 01:47:27 -08:00
if len ( all_legislation_data ) > 0 :
# Print first few items to ensure data is structured correctly
for i , item in enumerate ( all_legislation_data [ : 5 ] ) :
print ( f " \n Legislation Item { i + 1 } : " )
print ( json . dumps ( item , indent = 4 ) )
2025-03-06 20:00:45 -08:00
2025-03-09 01:47:27 -08:00
# Write the extracted legislation data to a CSV file
write_to_csv ( all_legislation_data , CSV_FILE )
2025-03-06 20:00:45 -08:00
2025-03-09 01:47:27 -08:00
return jsonify ( { " message " : " Legislation data written to CSV successfully " } ) , 200