diff --git a/api/endpoints/get_sponsored.py b/api/endpoints/get_sponsored.py index f78991f..b1efaf7 100644 --- a/api/endpoints/get_sponsored.py +++ b/api/endpoints/get_sponsored.py @@ -1,12 +1,15 @@ from flask import Blueprint, jsonify -from app import get_driver, neo4j_logger import requests import json import os +import pandas as pd + +# Assuming you have these functions and configurations in your app.py +from app import get_driver, neo4j_logger bp = Blueprint('get_sponsored', __name__) - CACHE_FILE = 'cache.json' +CSV_FILE = 'legislation.csv' def load_cache(): if os.path.exists(CACHE_FILE): @@ -21,6 +24,53 @@ def save_cache(cache_data): json.dump(cache_data, f) neo4j_logger.info(f"Saved data to cache file: {CACHE_FILE}") +def write_to_csv(legislation_data, csv_file): + # Flatten the nested dictionaries + flattened_legislation = [] + for item in legislation_data: + flattened_item = {} + flatten_dict(item, "", flattened_item) + flattened_legislation.append(flattened_item) + + df = pd.DataFrame(flattened_legislation) + if df.empty: + neo4j_logger.warning(f"No data to write to CSV file: {csv_file}") + print("DataFrame is empty. Debugging information:") + for item in flattened_legislation[:5]: # Print first 5 items + print(json.dumps(item, indent=4)) + return + + # Debugging statement to check DataFrame contents + print("DataFrame contents:") + print(df.head()) + + df.to_csv(csv_file, index=False) + neo4j_logger.info(f"Data written to CSV file: {csv_file}") + +def flatten_dict(d, parent_key='', sep='_'): + items = [] + for k, v in d.items(): + new_key = f"{parent_key}{sep}{k}" if parent_key else k + if isinstance(v, dict): + # Debugging: Print the key and value when entering a nested dictionary + print(f"Entering nested dictionary with key: {new_key}") + items.extend(flatten_dict(v, new_key, sep=sep).items()) + elif isinstance(v, list): + # Handle lists by converting them to strings or other appropriate representation + if v: + items.append((new_key, ', '.join(map(str, v)))) + else: + # If the list is empty, add an empty string or a placeholder + items.append((new_key, '')) + elif v is not None: + # Debugging: Print the key and value when adding a non-dict, non-list item + print(f"Adding {new_key}: {v}") + items.append((new_key, v)) + else: + # Handle None values appropriately (e.g., add an empty string or a placeholder) + items.append((new_key, '')) + return dict(items) + @bp.route('/get_sponsored') def get_sponsored(): # Load bioguideIds and legislation from the same cache @@ -31,13 +81,16 @@ def get_sponsored(): # Print the number of items found in the cache initially initial_bioguideIds_count = len(cache['bioguideIds']) print(f"Initial bioguideIds count: {initial_bioguideIds_count}") + processed_legislation_count = 0 + all_legislation_data = [] while 'bioguideIds' in cache and len(cache['bioguideIds']) > 0: # Step 1: Retrieve a sponsor from the cache current_bioguideId = cache['bioguideIds'].pop(0) if current_bioguideId is None: continue + print(f"Processing bioguideId: {current_bioguideId}") congress_api_url = f"https://api.congress.gov/v3/member/{current_bioguideId}/sponsored-legislation" @@ -63,68 +116,25 @@ def get_sponsored(): response_data = response.json() - # Ensure the response contains 'sponsoredLegislation' key - if 'sponsoredLegislation' not in response_data: - neo4j_logger.error(f"Missing 'sponsoredLegislation' key in response for bioguideId {current_bioguideId}: {response_data}") - continue + # Debugging statement to check the raw API response + print("Raw API Response:") + print(json.dumps(response_data, indent=4)) - sponsored_legislation = response_data['sponsoredLegislation'] + # Extract legislation data from the response + if 'sponsoredLegislation' in response_data and len(response_data['sponsoredLegislation']) > 0: + for result in response_data['sponsoredLegislation']: + all_legislation_data.append(result) - # Step 3: Store each piece of legislation in the cache along with the sponsor bioguideId - for legislation in sponsored_legislation: - if 'number' not in legislation: - neo4j_logger.error(f"Missing 'number' field in legislation data for bioguideId {current_bioguideId}: {legislation}") - continue + # Debugging statement to check the number of legislation items collected + print(f"Number of legislation items collected: {len(all_legislation_data)}") - key = f"legislation_{legislation['number']}" - print(f"About to write to cache: {key} - {legislation}") + if len(all_legislation_data) > 0: + # Print first few items to ensure data is structured correctly + for i, item in enumerate(all_legislation_data[:5]): + print(f"\nLegislation Item {i+1}:") + print(json.dumps(item, indent=4)) - if key not in cache: - # Ensure all nested dictionaries are handled properly - latest_action = legislation.get('latestAction') + # Write the extracted legislation data to a CSV file + write_to_csv(all_legislation_data, CSV_FILE) - # Check if latest_action is None before calling .get() - if latest_action is None: - latest_action = {} - - policy_area = legislation.get('policyArea', {}) - - legislation_info = { - 'bioguideId': current_bioguideId, - **legislation, - 'latestAction': { - 'actionDate': latest_action.get('actionDate'), - 'text': latest_action.get('text') - }, - 'policyArea': { - 'name': policy_area.get('name') - } - } - - cache[key] = legislation_info - processed_legislation_count += 1 - - # Save the cache immediately after writing each entry - save_cache(cache) - - # Print the updated legislation cache for debugging - print(f"Updated legislation cache: {json.dumps(cache, indent=2)}") - - # Step 4: Delete the sponsor from the bioguideIds list (already done by popping) - neo4j_logger.info(f"Processed sponsored legislation for bioguideId {current_bioguideId}") - - # Print the number of items left in the bioguide cache - remaining_bioguideIds_count = len(cache['bioguideIds']) - print(f"Remaining bioguideIds count after processing {current_bioguideId}: {remaining_bioguideIds_count}") - - # Save the cache again to ensure all changes are persisted - save_cache(cache) - - # Print the total number of legislation items stored and overall items added to the cache - print(f"Total processed legislation count: {processed_legislation_count}") - print(f"Overall items in cache: {len(cache)}") - - return jsonify({ - "message": "Sponsored legislation processed successfully", - "processed_legislation_count": processed_legislation_count - }), 200 + return jsonify({"message": "Legislation data written to CSV successfully"}), 200