from flask import Blueprint, jsonify import requests import json import os import pandas as pd # Assuming you have these functions and configurations in your app.py from app import get_driver, neo4j_logger bp = Blueprint('get_sponsored', __name__) CACHE_FILE = 'cache.json' CSV_FILE = 'legislation.csv' def load_cache(): if os.path.exists(CACHE_FILE): with open(CACHE_FILE, 'r') as f: return json.load(f) else: neo4j_logger.info(f"Cache file {CACHE_FILE} does not exist. Initializing empty cache.") return {} def save_cache(cache_data): with open(CACHE_FILE, 'w') as f: json.dump(cache_data, f) neo4j_logger.info(f"Saved data to cache file: {CACHE_FILE}") def write_to_csv(legislation_data, csv_file): # Flatten the nested dictionaries flattened_legislation = [] for item in legislation_data: flattened_item = {} flatten_dict(item, "", flattened_item) flattened_legislation.append(flattened_item) df = pd.DataFrame(flattened_legislation) # Debugging: Print the first few entries of the DataFrame to inspect its structure print("Debugging DataFrame:") if not df.empty: print(df.head()) else: print("DataFrame is empty.") if df.empty: neo4j_logger.warning(f"No data to write to CSV file: {csv_file}") print("DataFrame is empty. Debugging information:") for item in flattened_legislation[:5]: # Print first 5 items print(json.dumps(item, indent=4)) return df.to_csv(csv_file, index=False) neo4j_logger.info(f"Data written to CSV file: {csv_file}") def flatten_dict(d, parent_key='', sep='_'): items = [] for k, v in d.items(): new_key = f"{parent_key}{sep}{k}" if parent_key else k if isinstance(v, dict): # Debugging: Print the key and value when entering a nested dictionary print(f"Entering nested dictionary with key: {new_key}") items.extend(flatten_dict(v, new_key, sep=sep).items()) elif isinstance(v, list): # Handle lists by converting them to strings or other appropriate representation if v: items.append((new_key, ', '.join(map(str, v)))) else: # If the list is empty, add an empty string or a placeholder items.append((new_key, '')) elif v is not None: # Debugging: Print the key and value when adding a non-dict, non-list item print(f"Adding {new_key}: {v}") items.append((new_key, v)) else: # Handle None values appropriately (e.g., add an empty string or a placeholder) items.append((new_key, '')) return dict(items) @bp.route('/get_sponsored') def get_sponsored(): # Load bioguideIds and legislation from the same cache cache = load_cache() if 'bioguideIds' not in cache or len(cache['bioguideIds']) == 0: return jsonify({"message": "No bioguideIds found in cache"}), 404 # Print the number of items found in the cache initially initial_bioguideIds_count = len(cache['bioguideIds']) print(f"Initial bioguideIds count: {initial_bioguideIds_count}") processed_legislation_count = 0 all_legislation_data = [] while 'bioguideIds' in cache and len(cache['bioguideIds']) > 0: # Step 1: Retrieve a sponsor from the cache current_bioguideId = cache['bioguideIds'].pop(0) if current_bioguideId is None: continue print(f"Processing bioguideId: {current_bioguideId}") congress_api_url = f"https://api.congress.gov/v3/member/{current_bioguideId}/sponsored-legislation" # Include API key in headers (if required) api_key = os.getenv('CONGRESS_API_KEY') if not api_key: neo4j_logger.error("Congress API key not found in environment variables") continue headers = { 'X-API-KEY': api_key } # Step 2: Fetch sponsored legislation for the member response = requests.get(congress_api_url, headers=headers) print(f"Response Status Code: {response.status_code}") print(f"Response Text: {response.text}") if response.status_code != 200: neo4j_logger.error(f"Failed to fetch sponsored legislation for bioguideId {current_bioguideId}: Status Code {response.status_code}, Response: {response.text}") continue response_data = response.json() # Debugging statement to check the raw API response print("Raw API Response:") print(json.dumps(response_data, indent=4)) # Extract legislation data from the response if 'sponsoredLegislation' in response_data and len(response_data['sponsoredLegislation']) > 0: for result in response_data['sponsoredLegislation']: all_legislation_data.append(result) # Debugging statement to check the number of legislation items collected print(f"Number of legislation items collected: {len(all_legislation_data)}") if len(all_legislation_data) > 0: # Print first few items to ensure data is structured correctly for i, item in enumerate(all_legislation_data[:5]): print(f"\nLegislation Item {i+1}:") print(json.dumps(item, indent=4)) # Write the extracted legislation data to a CSV file write_to_csv(all_legislation_data, CSV_FILE) return jsonify({"message": "Legislation data written to CSV successfully"}), 200