add sponsored and cosponsored endpoints

This commit is contained in:
Moses Rolston 2025-03-09 16:41:33 -07:00
parent 084ddc26d3
commit 33dd47226b
2 changed files with 169 additions and 115 deletions

View File

@ -0,0 +1,99 @@
from flask import Blueprint, jsonify, request
import requests
import csv
import os
import logging
import json
bp = Blueprint('get_cosponsored', __name__)
# Function to retrieve bioguideIds from cache.json file
def get_bioguideIds_from_cache():
CACHE_PATH = os.getenv("CACHE_PATH")
if not CACHE_PATH:
logging.error("CACHE_PATH not found in .env file")
return None
if not os.path.exists(CACHE_PATH):
logging.error("Cache file not found at specified path")
return None
try:
with open(CACHE_PATH, 'r') as file:
cache_data = json.load(file)
bioguideIds = cache_data.get("bioguideIds", [])
if not bioguideIds:
logging.error("bioguideIds not found in cache.json")
return None
return bioguideIds
except Exception as e:
logging.error(f"Failed to read cache file: {str(e)}")
return None
# Function to flatten nested dictionaries and lists
def flatten_dict(d, parent_key='', sep='_'):
items = []
for k, v in d.items():
new_key = f"{parent_key}{sep}{k}" if parent_key else k
if isinstance(v, dict):
items.extend(flatten_dict(v, new_key, sep=sep).items())
elif isinstance(v, list):
for i, item in enumerate(v):
items.append((f"{new_key}_{i}", flatten_dict(item, '', sep=sep)))
else:
items.append((new_key, v if v is not None and v != "" else "NONE"))
return dict(items)
# Function to write data to CSV
def write_to_csv(data, filename):
keys = set()
for item in data:
keys.update(item.keys())
with open(filename, 'w', newline='') as output_file:
dict_writer = csv.DictWriter(output_file, fieldnames=keys)
dict_writer.writeheader()
dict_writer.writerows(data)
@bp.route('/get_cosponsored', methods=['GET'])
def get_cosponsored_legislation():
try:
# Retrieve bioguideIds from cache.json
bioguideIds = get_bioguideIds_from_cache()
if not bioguideIds:
return jsonify({"error": "bioguideIds not found"}), 404
all_data = []
for bioguideId in bioguideIds:
# Make request to Congress API
api_key = os.getenv("CONGRESS_API_KEY")
url = f"https://api.congress.gov/v3/member/{bioguideId}/cosponsored-legislation?api_key={api_key}"
response = requests.get(url)
if response.status_code != 200:
logging.error(f"Failed to retrieve cosponsored legislation for bioguideId {bioguideId}: {response.text}")
return jsonify({"error": f"Failed to retrieve data from Congress API for bioguideId {bioguideId}"}), 500
data = response.json().get("cosponsoredLegislation", [])
# Add cosponsored_by column and handle nested items
for item in data:
flattened_item = flatten_dict(item)
flattened_item["cosponsored_by"] = bioguideId
if not any(flattened_item.values()):
continue # Skip empty rows
all_data.append(flattened_item)
if not all_data:
return jsonify({"error": "No cosponsored legislation found for the given bioguideIds"}), 404
# Write data to CSV
csv_filename = f"cosponsored_legislation.csv"
write_to_csv(all_data, csv_filename)
return jsonify({"message": "Data written to CSV successfully", "filename": csv_filename})
except Exception as e:
logging.error(f"An error occurred: {str(e)}")
return jsonify({"error": str(e)}), 500

View File

@ -1,144 +1,99 @@
from flask import Blueprint, jsonify from flask import Blueprint, jsonify, request
import requests import requests
import json import csv
import os import os
import pandas as pd import logging
import json
# Assuming you have these functions and configurations in your app.py
from app import get_driver, neo4j_logger
bp = Blueprint('get_sponsored', __name__) bp = Blueprint('get_sponsored', __name__)
CACHE_FILE = 'cache.json'
CSV_FILE = 'legislation.csv'
def load_cache(): # Function to retrieve bioguideIds from cache.json file
if os.path.exists(CACHE_FILE): def get_bioguideIds_from_cache():
with open(CACHE_FILE, 'r') as f: CACHE_PATH = os.getenv("CACHE_PATH")
return json.load(f) if not CACHE_PATH:
else: logging.error("CACHE_PATH not found in .env file")
neo4j_logger.info(f"Cache file {CACHE_FILE} does not exist. Initializing empty cache.") return None
return {}
def save_cache(cache_data): if not os.path.exists(CACHE_PATH):
with open(CACHE_FILE, 'w') as f: logging.error("Cache file not found at specified path")
json.dump(cache_data, f) return None
neo4j_logger.info(f"Saved data to cache file: {CACHE_FILE}")
def write_to_csv(legislation_data, csv_file): try:
# Flatten the nested dictionaries with open(CACHE_PATH, 'r') as file:
flattened_legislation = [] cache_data = json.load(file)
for item in legislation_data: bioguideIds = cache_data.get("bioguideIds", [])
flattened_item = {} if not bioguideIds:
flatten_dict(item, "", flattened_item) logging.error("bioguideIds not found in cache.json")
flattened_legislation.append(flattened_item) return None
return bioguideIds
df = pd.DataFrame(flattened_legislation) except Exception as e:
logging.error(f"Failed to read cache file: {str(e)}")
# Debugging: Print the first few entries of the DataFrame to inspect its structure return None
print("Debugging DataFrame:")
if not df.empty:
print(df.head())
else:
print("DataFrame is empty.")
if df.empty:
neo4j_logger.warning(f"No data to write to CSV file: {csv_file}")
print("DataFrame is empty. Debugging information:")
for item in flattened_legislation[:5]: # Print first 5 items
print(json.dumps(item, indent=4))
return
df.to_csv(csv_file, index=False)
neo4j_logger.info(f"Data written to CSV file: {csv_file}")
# Function to flatten nested dictionaries and lists
def flatten_dict(d, parent_key='', sep='_'): def flatten_dict(d, parent_key='', sep='_'):
items = [] items = []
for k, v in d.items(): for k, v in d.items():
new_key = f"{parent_key}{sep}{k}" if parent_key else k new_key = f"{parent_key}{sep}{k}" if parent_key else k
if isinstance(v, dict): if isinstance(v, dict):
# Debugging: Print the key and value when entering a nested dictionary
print(f"Entering nested dictionary with key: {new_key}")
items.extend(flatten_dict(v, new_key, sep=sep).items()) items.extend(flatten_dict(v, new_key, sep=sep).items())
elif isinstance(v, list): elif isinstance(v, list):
# Handle lists by converting them to strings or other appropriate representation for i, item in enumerate(v):
if v: items.append((f"{new_key}_{i}", flatten_dict(item, '', sep=sep)))
items.append((new_key, ', '.join(map(str, v))))
else:
# If the list is empty, add an empty string or a placeholder
items.append((new_key, ''))
elif v is not None:
# Debugging: Print the key and value when adding a non-dict, non-list item
print(f"Adding {new_key}: {v}")
items.append((new_key, v))
else: else:
# Handle None values appropriately (e.g., add an empty string or a placeholder) items.append((new_key, v if v is not None and v != "" else "NONE"))
items.append((new_key, ''))
return dict(items) return dict(items)
@bp.route('/get_sponsored') # Function to write data to CSV
def get_sponsored(): def write_to_csv(data, filename):
# Load bioguideIds and legislation from the same cache keys = set()
cache = load_cache() for item in data:
if 'bioguideIds' not in cache or len(cache['bioguideIds']) == 0: keys.update(item.keys())
return jsonify({"message": "No bioguideIds found in cache"}), 404
# Print the number of items found in the cache initially with open(filename, 'w', newline='') as output_file:
initial_bioguideIds_count = len(cache['bioguideIds']) dict_writer = csv.DictWriter(output_file, fieldnames=keys)
print(f"Initial bioguideIds count: {initial_bioguideIds_count}") dict_writer.writeheader()
dict_writer.writerows(data)
processed_legislation_count = 0 @bp.route('/get_sponsored', methods=['GET'])
all_legislation_data = [] def get_sponsored_legislation():
try:
# Retrieve bioguideIds from cache.json
bioguideIds = get_bioguideIds_from_cache()
if not bioguideIds:
return jsonify({"error": "bioguideIds not found"}), 404
while 'bioguideIds' in cache and len(cache['bioguideIds']) > 0: all_data = []
# Step 1: Retrieve a sponsor from the cache
current_bioguideId = cache['bioguideIds'].pop(0)
if current_bioguideId is None:
continue
print(f"Processing bioguideId: {current_bioguideId}") for bioguideId in bioguideIds:
# Make request to Congress API
api_key = os.getenv("CONGRESS_API_KEY")
url = f"https://api.congress.gov/v3/member/{bioguideId}/sponsored-legislation?api_key={api_key}"
response = requests.get(url)
congress_api_url = f"https://api.congress.gov/v3/member/{current_bioguideId}/sponsored-legislation" if response.status_code != 200:
logging.error(f"Failed to retrieve sponsored legislation for bioguideId {bioguideId}: {response.text}")
return jsonify({"error": f"Failed to retrieve data from Congress API for bioguideId {bioguideId}"}), 500
# Include API key in headers (if required) data = response.json().get("sponsoredLegislation", [])
api_key = os.getenv('CONGRESS_API_KEY')
if not api_key:
neo4j_logger.error("Congress API key not found in environment variables")
continue
headers = { # Add sponsored_by column and handle nested items
'X-API-KEY': api_key for item in data:
} flattened_item = flatten_dict(item)
flattened_item["sponsored_by"] = bioguideId
if not any(flattened_item.values()):
continue # Skip empty rows
all_data.append(flattened_item)
# Step 2: Fetch sponsored legislation for the member if not all_data:
response = requests.get(congress_api_url, headers=headers) return jsonify({"error": "No sponsored legislation found for the given bioguideIds"}), 404
print(f"Response Status Code: {response.status_code}")
print(f"Response Text: {response.text}")
if response.status_code != 200: # Write data to CSV
neo4j_logger.error(f"Failed to fetch sponsored legislation for bioguideId {current_bioguideId}: Status Code {response.status_code}, Response: {response.text}") csv_filename = f"sponsored_legislation.csv"
continue write_to_csv(all_data, csv_filename)
response_data = response.json() return jsonify({"message": "Data written to CSV successfully", "filename": csv_filename})
# Debugging statement to check the raw API response except Exception as e:
print("Raw API Response:") logging.error(f"An error occurred: {str(e)}")
print(json.dumps(response_data, indent=4)) return jsonify({"error": str(e)}), 500
# Extract legislation data from the response
if 'sponsoredLegislation' in response_data and len(response_data['sponsoredLegislation']) > 0:
for result in response_data['sponsoredLegislation']:
all_legislation_data.append(result)
# Debugging statement to check the number of legislation items collected
print(f"Number of legislation items collected: {len(all_legislation_data)}")
if len(all_legislation_data) > 0:
# Print first few items to ensure data is structured correctly
for i, item in enumerate(all_legislation_data[:5]):
print(f"\nLegislation Item {i+1}:")
print(json.dumps(item, indent=4))
# Write the extracted legislation data to a CSV file
write_to_csv(all_legislation_data, CSV_FILE)
return jsonify({"message": "Legislation data written to CSV successfully"}), 200