add sponsored and cosponsored endpoints

2025-03-09 16:41:33 -07:00 · 2025-03-09 16:41:33 -07:00 · 33dd47226b
commit 33dd47226b
parent 084ddc26d3
2 changed files with 169 additions and 115 deletions
--- a/api/endpoints/get_cosponsored.py
+++ b/api/endpoints/get_cosponsored.py
@ -0,0 +1,99 @@
 from flask import Blueprint, jsonify, request
 import requests
 import csv
 import os
 import logging
 import json
 bp = Blueprint('get_cosponsored', __name__)
 # Function to retrieve bioguideIds from cache.json file
 def get_bioguideIds_from_cache():
    CACHE_PATH = os.getenv("CACHE_PATH")
    if not CACHE_PATH:
        logging.error("CACHE_PATH not found in .env file")
        return None
    if not os.path.exists(CACHE_PATH):
        logging.error("Cache file not found at specified path")
        return None
    try:
        with open(CACHE_PATH, 'r') as file:
            cache_data = json.load(file)
        bioguideIds = cache_data.get("bioguideIds", [])
        if not bioguideIds:
            logging.error("bioguideIds not found in cache.json")
            return None
        return bioguideIds
    except Exception as e:
        logging.error(f"Failed to read cache file: {str(e)}")
        return None
 # Function to flatten nested dictionaries and lists
 def flatten_dict(d, parent_key='', sep='_'):
    items = []
    for k, v in d.items():
        new_key = f"{parent_key}{sep}{k}" if parent_key else k
        if isinstance(v, dict):
            items.extend(flatten_dict(v, new_key, sep=sep).items())
        elif isinstance(v, list):
            for i, item in enumerate(v):
                items.append((f"{new_key}_{i}", flatten_dict(item, '', sep=sep)))
        else:
            items.append((new_key, v if v is not None and v != "" else "NONE"))
    return dict(items)
 # Function to write data to CSV
 def write_to_csv(data, filename):
    keys = set()
    for item in data:
        keys.update(item.keys())
    with open(filename, 'w', newline='') as output_file:
        dict_writer = csv.DictWriter(output_file, fieldnames=keys)
        dict_writer.writeheader()
        dict_writer.writerows(data)
@bp.route('/get_cosponsored', methods=['GET'])
 def get_cosponsored_legislation():
    try:
        # Retrieve bioguideIds from cache.json
        bioguideIds = get_bioguideIds_from_cache()
        if not bioguideIds:
            return jsonify({"error": "bioguideIds not found"}), 404
        all_data = []
        for bioguideId in bioguideIds:
            # Make request to Congress API
            api_key = os.getenv("CONGRESS_API_KEY")
            url = f"https://api.congress.gov/v3/member/{bioguideId}/cosponsored-legislation?api_key={api_key}"
            response = requests.get(url)
            if response.status_code != 200:
                logging.error(f"Failed to retrieve cosponsored legislation for bioguideId {bioguideId}: {response.text}")
                return jsonify({"error": f"Failed to retrieve data from Congress API for bioguideId {bioguideId}"}), 500
            data = response.json().get("cosponsoredLegislation", [])
            # Add cosponsored_by column and handle nested items
            for item in data:
                flattened_item = flatten_dict(item)
                flattened_item["cosponsored_by"] = bioguideId
                if not any(flattened_item.values()):
                    continue  # Skip empty rows
                all_data.append(flattened_item)
        if not all_data:
            return jsonify({"error": "No cosponsored legislation found for the given bioguideIds"}), 404
        # Write data to CSV
        csv_filename = f"cosponsored_legislation.csv"
        write_to_csv(all_data, csv_filename)
        return jsonify({"message": "Data written to CSV successfully", "filename": csv_filename})
    except Exception as e:
        logging.error(f"An error occurred: {str(e)}")
        return jsonify({"error": str(e)}), 500
--- a/api/endpoints/get_sponsored.py
+++ b/api/endpoints/get_sponsored.py
@ -1,144 +1,99 @@
-from flask import Blueprint, jsonify
+from flask import Blueprint, jsonify, request
 import requests
-import json
+import csv
 import os
-import pandas as pd
+import logging
-
+import json
 # Assuming you have these functions and configurations in your app.py
 from app import get_driver, neo4j_logger
 bp = Blueprint('get_sponsored', __name__)
 CACHE_FILE = 'cache.json'
 CSV_FILE = 'legislation.csv'
-def load_cache():
+# Function to retrieve bioguideIds from cache.json file
-    if os.path.exists(CACHE_FILE):
+def get_bioguideIds_from_cache():
-        with open(CACHE_FILE, 'r') as f:
+    CACHE_PATH = os.getenv("CACHE_PATH")
-            return json.load(f)
+    if not CACHE_PATH:
-    else:
+        logging.error("CACHE_PATH not found in .env file")
-        neo4j_logger.info(f"Cache file {CACHE_FILE} does not exist. Initializing empty cache.")
+        return None
        return {}
-def save_cache(cache_data):
+    if not os.path.exists(CACHE_PATH):
-    with open(CACHE_FILE, 'w') as f:
+        logging.error("Cache file not found at specified path")
-        json.dump(cache_data, f)
+        return None
    neo4j_logger.info(f"Saved data to cache file: {CACHE_FILE}")
-def write_to_csv(legislation_data, csv_file):
+    try:
-    # Flatten the nested dictionaries
+        with open(CACHE_PATH, 'r') as file:
-    flattened_legislation = []
+            cache_data = json.load(file)
-    for item in legislation_data:
+        bioguideIds = cache_data.get("bioguideIds", [])
-        flattened_item = {}
+        if not bioguideIds:
-        flatten_dict(item, "", flattened_item)
+            logging.error("bioguideIds not found in cache.json")
-        flattened_legislation.append(flattened_item)
+            return None
-
+        return bioguideIds
-    df = pd.DataFrame(flattened_legislation)
+    except Exception as e:
-
+        logging.error(f"Failed to read cache file: {str(e)}")
-    # Debugging: Print the first few entries of the DataFrame to inspect its structure
+        return None
    print("Debugging DataFrame:")
    if not df.empty:
        print(df.head())
    else:
        print("DataFrame is empty.")
    if df.empty:
        neo4j_logger.warning(f"No data to write to CSV file: {csv_file}")
        print("DataFrame is empty. Debugging information:")
        for item in flattened_legislation[:5]:  # Print first 5 items
            print(json.dumps(item, indent=4))
        return
    df.to_csv(csv_file, index=False)
    neo4j_logger.info(f"Data written to CSV file: {csv_file}")
 # Function to flatten nested dictionaries and lists
 def flatten_dict(d, parent_key='', sep='_'):
    items = []
    for k, v in d.items():
        new_key = f"{parent_key}{sep}{k}" if parent_key else k
        if isinstance(v, dict):
            # Debugging: Print the key and value when entering a nested dictionary
            print(f"Entering nested dictionary with key: {new_key}")
            items.extend(flatten_dict(v, new_key, sep=sep).items())
        elif isinstance(v, list):
-            # Handle lists by converting them to strings or other appropriate representation
+            for i, item in enumerate(v):
-            if v:
+                items.append((f"{new_key}_{i}", flatten_dict(item, '', sep=sep)))
                items.append((new_key, ', '.join(map(str, v))))
            else:
                # If the list is empty, add an empty string or a placeholder
                items.append((new_key, ''))
        elif v is not None:
            # Debugging: Print the key and value when adding a non-dict, non-list item
            print(f"Adding {new_key}: {v}")
            items.append((new_key, v))
        else:
-            # Handle None values appropriately (e.g., add an empty string or a placeholder)
+            items.append((new_key, v if v is not None and v != "" else "NONE"))
            items.append((new_key, ''))
    return dict(items)
-@bp.route('/get_sponsored')
+# Function to write data to CSV
-def get_sponsored():
+def write_to_csv(data, filename):
-    # Load bioguideIds and legislation from the same cache
+    keys = set()
-    cache = load_cache()
+    for item in data:
-    if 'bioguideIds' not in cache or len(cache['bioguideIds']) == 0:
+        keys.update(item.keys())
        return jsonify({"message": "No bioguideIds found in cache"}), 404
-    # Print the number of items found in the cache initially
+    with open(filename, 'w', newline='') as output_file:
-    initial_bioguideIds_count = len(cache['bioguideIds'])
+        dict_writer = csv.DictWriter(output_file, fieldnames=keys)
-    print(f"Initial bioguideIds count: {initial_bioguideIds_count}")
+        dict_writer.writeheader()
        dict_writer.writerows(data)
-    processed_legislation_count = 0
+@bp.route('/get_sponsored', methods=['GET'])
-    all_legislation_data = []
+def get_sponsored_legislation():
    try:
        # Retrieve bioguideIds from cache.json
        bioguideIds = get_bioguideIds_from_cache()
        if not bioguideIds:
            return jsonify({"error": "bioguideIds not found"}), 404
-    while 'bioguideIds' in cache and len(cache['bioguideIds']) > 0:
+        all_data = []
        # Step 1: Retrieve a sponsor from the cache
        current_bioguideId = cache['bioguideIds'].pop(0)
        if current_bioguideId is None:
            continue
-        print(f"Processing bioguideId: {current_bioguideId}")
+        for bioguideId in bioguideIds:
            # Make request to Congress API
            api_key = os.getenv("CONGRESS_API_KEY")
            url = f"https://api.congress.gov/v3/member/{bioguideId}/sponsored-legislation?api_key={api_key}"
            response = requests.get(url)
-        congress_api_url = f"https://api.congress.gov/v3/member/{current_bioguideId}/sponsored-legislation"
+            if response.status_code != 200:
                logging.error(f"Failed to retrieve sponsored legislation for bioguideId {bioguideId}: {response.text}")
                return jsonify({"error": f"Failed to retrieve data from Congress API for bioguideId {bioguideId}"}), 500
-        # Include API key in headers (if required)
+            data = response.json().get("sponsoredLegislation", [])
        api_key = os.getenv('CONGRESS_API_KEY')
        if not api_key:
            neo4j_logger.error("Congress API key not found in environment variables")
            continue
-        headers = {
+            # Add sponsored_by column and handle nested items
-            'X-API-KEY': api_key
+            for item in data:
-        }
+                flattened_item = flatten_dict(item)
                flattened_item["sponsored_by"] = bioguideId
                if not any(flattened_item.values()):
                    continue  # Skip empty rows
                all_data.append(flattened_item)
-        # Step 2: Fetch sponsored legislation for the member
+        if not all_data:
-        response = requests.get(congress_api_url, headers=headers)
+            return jsonify({"error": "No sponsored legislation found for the given bioguideIds"}), 404
        print(f"Response Status Code: {response.status_code}")
        print(f"Response Text: {response.text}")
-        if response.status_code != 200:
+        # Write data to CSV
-            neo4j_logger.error(f"Failed to fetch sponsored legislation for bioguideId {current_bioguideId}: Status Code {response.status_code}, Response: {response.text}")
+        csv_filename = f"sponsored_legislation.csv"
-            continue
+        write_to_csv(all_data, csv_filename)
-        response_data = response.json()
+        return jsonify({"message": "Data written to CSV successfully", "filename": csv_filename})
-        # Debugging statement to check the raw API response
+    except Exception as e:
-        print("Raw API Response:")
+        logging.error(f"An error occurred: {str(e)}")
-        print(json.dumps(response_data, indent=4))
+        return jsonify({"error": str(e)}), 500
        # Extract legislation data from the response
        if 'sponsoredLegislation' in response_data and len(response_data['sponsoredLegislation']) > 0:
            for result in response_data['sponsoredLegislation']:
                all_legislation_data.append(result)
    # Debugging statement to check the number of legislation items collected
    print(f"Number of legislation items collected: {len(all_legislation_data)}")
    if len(all_legislation_data) > 0:
        # Print first few items to ensure data is structured correctly
        for i, item in enumerate(all_legislation_data[:5]):
            print(f"\nLegislation Item {i+1}:")
            print(json.dumps(item, indent=4))
    # Write the extracted legislation data to a CSV file
    write_to_csv(all_legislation_data, CSV_FILE)
    return jsonify({"message": "Legislation data written to CSV successfully"}), 200