checkpoint

add create_legislation_node endpiont
legislation cache working now too
2025-03-09 01:47:27 -08:00 · 2025-03-08 22:57:53 -08:00 · 2025-03-08 15:13:55 -08:00 · 2025-03-08 14:16:40 -08:00
3 changed files with 148 additions and 38 deletions
--- a/api/app.py
+++ b/api/app.py
@ -1,4 +1,3 @@
-# app.py
 from flask import Flask, jsonify, request
 import os
 import logging
@ -7,6 +6,7 @@ from dotenv import load_dotenv
 from werkzeug.middleware.proxy_fix import ProxyFix
 import importlib.util

+# Load environment variables from .env file
 load_dotenv()

 app = Flask(__name__)
@ -40,7 +40,6 @@ def load_blueprints_from_directory(directory):
        if filename.endswith('.py') and not filename.startswith('__'):
            module_name = filename[:-3]  # Remove .py extension
            file_path = os.path.join(directory, filename)
-
            spec = importlib.util.spec_from_file_location(module_name, file_path)
            module = importlib.util.module_from_spec(spec)
            spec.loader.exec_module(module)
@ -51,5 +50,23 @@ def load_blueprints_from_directory(directory):
 # Load blueprints
 load_blueprints_from_directory('endpoints')

+# Sitemap endpoint function
+def sitemap():
+    # Get all registered routes
+    routes = []
+    for rule in app.url_map.iter_rules():
+        # Skip the sitemap route itself to avoid infinite recursion
+        if rule.endpoint != 'sitemap':
+            routes.append({
+                'rule': rule.rule,
+                'methods': list(rule.methods),
+                'endpoint': rule.endpoint
+            })
+
+    return jsonify(routes)
+
+# Register the sitemap endpoint
+app.add_url_rule('/sitemap', view_func=sitemap, methods=['GET'])
+
 if __name__ == '__main__':
    app.run(debug=True)
--- a/api/endpoints/create_legislation_node.py
+++ b/api/endpoints/create_legislation_node.py
@ -0,0 +1,58 @@
+from flask import Blueprint, jsonify
+import json
+import os
+import logging
+from neo4j import GraphDatabase
+from dotenv import load_dotenv
+
+
+bp = Blueprint('create_legislation_node', __name__)
+
+
+@bp.route('/create_legislation_node', methods=['GET'])
+def create_legislation_nodes():
+    # Assuming the legislation cache is stored in a JSON file
+    legislation_cache_path = os.getenv("LEGISLATION_CACHE_PATH")
+
+    if not legislation_cache_path:
+        return jsonify({"error": "Legislation cache path is not set"}), 400
+
+    try:
+        with open(legislation_cache_path, 'r') as f:
+            legislation_data = json.load(f)
+    except FileNotFoundError:
+        return jsonify({"error": "Legislation cache file not found"}), 404
+    except json.JSONDecodeError:
+        return jsonify({"error": "Failed to decode JSON from the legislation cache"}), 500
+
+    # Ensure legislation_data is a list of dictionaries
+    if not isinstance(legislation_data, list):
+        return jsonify({"error": "Legislation data should be a list"}), 400
+
+    created_nodes = []
+
+    for item in legislation_data:
+        if not item.get("id"):
+            continue
+
+        # Prepare the properties for the node
+        properties = {k: v for k, v in item.items()}
+
+        # Create or merge a node in Neo4j with all properties
+        driver = get_driver()
+        with driver.session() as session:
+            query = "MERGE (l:Legislation {id: $id})"
+            for key in properties:
+                if key != 'id':
+                    query += f" ON CREATE SET l.{key} = ${key}"
+
+            result = session.run(query, **properties)
+
+            # Log the creation or merging of the node
+            neo4j_logger.info(f"Created/Merged Legislation node with ID: {item.get('id')} and properties: {properties}")
+
+        driver.close()
+
+        created_nodes.append({"id": item.get("id"), "properties": properties})
+
+    return jsonify({"message": "Legislation nodes created/merged successfully", "nodes": created_nodes}), 201
--- a/api/endpoints/get_sponsored.py
+++ b/api/endpoints/get_sponsored.py
@ -1,13 +1,15 @@
-# endpoints/get_sponsored.py
 from flask import Blueprint, jsonify
-from app import get_driver, neo4j_logger
 import requests
 import json
 import os
+import pandas as pd
+
+# Assuming you have these functions and configurations in your app.py
+from app import get_driver, neo4j_logger

 bp = Blueprint('get_sponsored', __name__)
-
 CACHE_FILE = 'cache.json'
+CSV_FILE = 'legislation.csv'

 def load_cache():
    if os.path.exists(CACHE_FILE):
@ -22,11 +24,57 @@ def save_cache(cache_data):
        json.dump(cache_data, f)
    neo4j_logger.info(f"Saved data to cache file: {CACHE_FILE}")

+def write_to_csv(legislation_data, csv_file):
+    # Flatten the nested dictionaries
+    flattened_legislation = []
+    for item in legislation_data:
+        flattened_item = {}
+        flatten_dict(item, "", flattened_item)
+        flattened_legislation.append(flattened_item)
+
+    df = pd.DataFrame(flattened_legislation)
+    if df.empty:
+        neo4j_logger.warning(f"No data to write to CSV file: {csv_file}")
+        print("DataFrame is empty. Debugging information:")
+        for item in flattened_legislation[:5]:  # Print first 5 items
+            print(json.dumps(item, indent=4))
+        return
+
+    # Debugging statement to check DataFrame contents
+    print("DataFrame contents:")
+    print(df.head())
+
+    df.to_csv(csv_file, index=False)
+    neo4j_logger.info(f"Data written to CSV file: {csv_file}")
+
+def flatten_dict(d, parent_key='', sep='_'):
+    items = []
+    for k, v in d.items():
+        new_key = f"{parent_key}{sep}{k}" if parent_key else k
+        if isinstance(v, dict):
+            # Debugging: Print the key and value when entering a nested dictionary
+            print(f"Entering nested dictionary with key: {new_key}")
+            items.extend(flatten_dict(v, new_key, sep=sep).items())
+        elif isinstance(v, list):
+            # Handle lists by converting them to strings or other appropriate representation
+            if v:
+                items.append((new_key, ', '.join(map(str, v))))
+            else:
+                # If the list is empty, add an empty string or a placeholder
+                items.append((new_key, ''))
+        elif v is not None:
+            # Debugging: Print the key and value when adding a non-dict, non-list item
+            print(f"Adding {new_key}: {v}")
+            items.append((new_key, v))
+        else:
+            # Handle None values appropriately (e.g., add an empty string or a placeholder)
+            items.append((new_key, ''))
+    return dict(items)
+
@bp.route('/get_sponsored')
 def get_sponsored():
    # Load bioguideIds and legislation from the same cache
    cache = load_cache()
-
    if 'bioguideIds' not in cache or len(cache['bioguideIds']) == 0:
        return jsonify({"message": "No bioguideIds found in cache"}), 404

@ -35,11 +83,11 @@ def get_sponsored():
    print(f"Initial bioguideIds count: {initial_bioguideIds_count}")

    processed_legislation_count = 0
+    all_legislation_data = []

    while 'bioguideIds' in cache and len(cache['bioguideIds']) > 0:
        # Step 1: Retrieve a sponsor from the cache
        current_bioguideId = cache['bioguideIds'].pop(0)
-
        if current_bioguideId is None:
            continue

@ -59,7 +107,6 @@ def get_sponsored():

        # Step 2: Fetch sponsored legislation for the member
        response = requests.get(congress_api_url, headers=headers)
-
        print(f"Response Status Code: {response.status_code}")
        print(f"Response Text: {response.text}")

@ -67,39 +114,27 @@ def get_sponsored():
            neo4j_logger.error(f"Failed to fetch sponsored legislation for bioguideId {current_bioguideId}: Status Code {response.status_code}, Response: {response.text}")
            continue

-        legislations = response.json().get('results', [])
+        response_data = response.json()

-        # Step 3: Store each piece of legislation in the cache along with the sponsor bioguideId
-        for legislation in legislations:
-            key = f"legislation_{legislation['id']}"
+        # Debugging statement to check the raw API response
+        print("Raw API Response:")
+        print(json.dumps(response_data, indent=4))

-            print(f"About to write to cache: {key} - {legislation}")
+        # Extract legislation data from the response
+        if 'sponsoredLegislation' in response_data and len(response_data['sponsoredLegislation']) > 0:
+            for result in response_data['sponsoredLegislation']:
+                all_legislation_data.append(result)

-            if key not in cache:
-                cache[key] = {
-                    'bioguideId': current_bioguideId,
-                    **legislation
-                }
-                processed_legislation_count += 1
+    # Debugging statement to check the number of legislation items collected
+    print(f"Number of legislation items collected: {len(all_legislation_data)}")

-                # Save the cache immediately after writing each entry
-                save_cache(cache)
+    if len(all_legislation_data) > 0:
+        # Print first few items to ensure data is structured correctly
+        for i, item in enumerate(all_legislation_data[:5]):
+            print(f"\nLegislation Item {i+1}:")
+            print(json.dumps(item, indent=4))

-                # Print the updated legislation cache for debugging
-                print(f"Updated legislation cache: {json.dumps(cache, indent=2)}")
+    # Write the extracted legislation data to a CSV file
+    write_to_csv(all_legislation_data, CSV_FILE)

-        # Step 4: Delete the sponsor from the bioguideIds list (already done by popping)
-        neo4j_logger.info(f"Processed sponsored legislation for bioguideId {current_bioguideId}")
-
-        # Print the number of items left in the bioguide cache
-        remaining_bioguideIds_count = len(cache['bioguideIds'])
-        print(f"Remaining bioguideIds count after processing {current_bioguideId}: {remaining_bioguideIds_count}")
-
-    # Save the cache again to ensure all changes are persisted
-    save_cache(cache)
-
-    # Print the total number of legislation items stored and overall items added to the cache
-    print(f"Total processed legislation count: {processed_legislation_count}")
-    print(f"Overall items in cache: {len(cache)}")
-
-    return jsonify({"message": "Sponsored legislation processed successfully", "processed_legislation_count": processed_legislation_count}), 200
+    return jsonify({"message": "Legislation data written to CSV successfully"}), 200
Author	SHA256	Message	Date
Moses Rolston	97c96ed0fc	checkpoint	2025-03-09 01:47:27 -08:00
Moses Rolston	207449ee26	add create_legislation_node endpiont	2025-03-08 22:57:53 -08:00
Moses Rolston	84156a1ce9	legislation cache working now too	2025-03-08 15:13:55 -08:00
Moses Rolston	be101beac8	cache working	2025-03-08 14:16:40 -08:00