checkpoint

2025-03-09 01:47:27 -08:00 · 2025-03-09 01:47:27 -08:00 · 97c96ed0fc
commit 97c96ed0fc
parent 207449ee26
1 changed files with 72 additions and 62 deletions
--- a/api/endpoints/get_sponsored.py
+++ b/api/endpoints/get_sponsored.py
@ -1,12 +1,15 @@
 from flask import Blueprint, jsonify
 from app import get_driver, neo4j_logger
 import requests
 import json
 import os
 import pandas as pd
 # Assuming you have these functions and configurations in your app.py
 from app import get_driver, neo4j_logger
 bp = Blueprint('get_sponsored', __name__)
 CACHE_FILE = 'cache.json'
 CSV_FILE = 'legislation.csv'
 def load_cache():
    if os.path.exists(CACHE_FILE):
@ -21,6 +24,53 @@ def save_cache(cache_data):
        json.dump(cache_data, f)
    neo4j_logger.info(f"Saved data to cache file: {CACHE_FILE}")
 def write_to_csv(legislation_data, csv_file):
    # Flatten the nested dictionaries
    flattened_legislation = []
    for item in legislation_data:
        flattened_item = {}
        flatten_dict(item, "", flattened_item)
        flattened_legislation.append(flattened_item)
    df = pd.DataFrame(flattened_legislation)
    if df.empty:
        neo4j_logger.warning(f"No data to write to CSV file: {csv_file}")
        print("DataFrame is empty. Debugging information:")
        for item in flattened_legislation[:5]:  # Print first 5 items
            print(json.dumps(item, indent=4))
        return
    # Debugging statement to check DataFrame contents
    print("DataFrame contents:")
    print(df.head())
    df.to_csv(csv_file, index=False)
    neo4j_logger.info(f"Data written to CSV file: {csv_file}")
 def flatten_dict(d, parent_key='', sep='_'):
    items = []
    for k, v in d.items():
        new_key = f"{parent_key}{sep}{k}" if parent_key else k
        if isinstance(v, dict):
            # Debugging: Print the key and value when entering a nested dictionary
            print(f"Entering nested dictionary with key: {new_key}")
            items.extend(flatten_dict(v, new_key, sep=sep).items())
        elif isinstance(v, list):
            # Handle lists by converting them to strings or other appropriate representation
            if v:
                items.append((new_key, ', '.join(map(str, v))))
            else:
                # If the list is empty, add an empty string or a placeholder
                items.append((new_key, ''))
        elif v is not None:
            # Debugging: Print the key and value when adding a non-dict, non-list item
            print(f"Adding {new_key}: {v}")
            items.append((new_key, v))
        else:
            # Handle None values appropriately (e.g., add an empty string or a placeholder)
            items.append((new_key, ''))
    return dict(items)
@bp.route('/get_sponsored')
 def get_sponsored():
    # Load bioguideIds and legislation from the same cache
@ -31,13 +81,16 @@ def get_sponsored():
    # Print the number of items found in the cache initially
    initial_bioguideIds_count = len(cache['bioguideIds'])
    print(f"Initial bioguideIds count: {initial_bioguideIds_count}")
    processed_legislation_count = 0
    all_legislation_data = []
    while 'bioguideIds' in cache and len(cache['bioguideIds']) > 0:
        # Step 1: Retrieve a sponsor from the cache
        current_bioguideId = cache['bioguideIds'].pop(0)
        if current_bioguideId is None:
            continue
        print(f"Processing bioguideId: {current_bioguideId}")
        congress_api_url = f"https://api.congress.gov/v3/member/{current_bioguideId}/sponsored-legislation"
@ -63,68 +116,25 @@ def get_sponsored():
        response_data = response.json()
-        # Ensure the response contains 'sponsoredLegislation' key
+        # Debugging statement to check the raw API response
-        if 'sponsoredLegislation' not in response_data:
+        print("Raw API Response:")
-            neo4j_logger.error(f"Missing 'sponsoredLegislation' key in response for bioguideId {current_bioguideId}: {response_data}")
+        print(json.dumps(response_data, indent=4))
            continue
-        sponsored_legislation = response_data['sponsoredLegislation']
+        # Extract legislation data from the response
        if 'sponsoredLegislation' in response_data and len(response_data['sponsoredLegislation']) > 0:
            for result in response_data['sponsoredLegislation']:
                all_legislation_data.append(result)
-        # Step 3: Store each piece of legislation in the cache along with the sponsor bioguideId
+    # Debugging statement to check the number of legislation items collected
-        for legislation in sponsored_legislation:
+    print(f"Number of legislation items collected: {len(all_legislation_data)}")
            if 'number' not in legislation:
                neo4j_logger.error(f"Missing 'number' field in legislation data for bioguideId {current_bioguideId}: {legislation}")
                continue
-            key = f"legislation_{legislation['number']}"
+    if len(all_legislation_data) > 0:
-            print(f"About to write to cache: {key} - {legislation}")
+        # Print first few items to ensure data is structured correctly
        for i, item in enumerate(all_legislation_data[:5]):
            print(f"\nLegislation Item {i+1}:")
            print(json.dumps(item, indent=4))
-            if key not in cache:
+    # Write the extracted legislation data to a CSV file
-                # Ensure all nested dictionaries are handled properly
+    write_to_csv(all_legislation_data, CSV_FILE)
                latest_action = legislation.get('latestAction')
-                # Check if latest_action is None before calling .get()
+    return jsonify({"message": "Legislation data written to CSV successfully"}), 200
                if latest_action is None:
                    latest_action = {}
                policy_area = legislation.get('policyArea', {})
                legislation_info = {
                    'bioguideId': current_bioguideId,
                    **legislation,
                    'latestAction': {
                        'actionDate': latest_action.get('actionDate'),
                        'text': latest_action.get('text')
                    },
                    'policyArea': {
                        'name': policy_area.get('name')
                    }
                }
                cache[key] = legislation_info
                processed_legislation_count += 1
                # Save the cache immediately after writing each entry
                save_cache(cache)
                # Print the updated legislation cache for debugging
                print(f"Updated legislation cache: {json.dumps(cache, indent=2)}")
        # Step 4: Delete the sponsor from the bioguideIds list (already done by popping)
        neo4j_logger.info(f"Processed sponsored legislation for bioguideId {current_bioguideId}")
        # Print the number of items left in the bioguide cache
        remaining_bioguideIds_count = len(cache['bioguideIds'])
        print(f"Remaining bioguideIds count after processing {current_bioguideId}: {remaining_bioguideIds_count}")
    # Save the cache again to ensure all changes are persisted
    save_cache(cache)
    # Print the total number of legislation items stored and overall items added to the cache
    print(f"Total processed legislation count: {processed_legislation_count}")
    print(f"Overall items in cache: {len(cache)}")
    return jsonify({
        "message": "Sponsored legislation processed successfully",
        "processed_legislation_count": processed_legislation_count
    }), 200