add relationship endpoints

add endpoint process sponsors
add sponsored and cosponsored endpoints
2025-03-09 21:06:18 -07:00 · 2025-03-09 18:58:00 -07:00 · 2025-03-09 16:41:33 -07:00
5 changed files with 314 additions and 196 deletions
--- a/api/endpoints/create_cosponsored_relationship.py
+++ b/api/endpoints/create_cosponsored_relationship.py
@ -0,0 +1,62 @@
+# endpoints/create_cosponsored_relationship.py
+
+from flask import Blueprint, jsonify
+import logging
+import csv
+import os
+from neo4j import GraphDatabase
+from app import get_driver, neo4j_logger  # Ensure relative imports work
+
+bp = Blueprint('create_cosponsored_relationship', __name__)
+
+@bp.route('/create_cosponsored_relationship', methods=['GET'])
+def create_cosponsored_relationship():
+    try:
+        # Path to the CSV file
+        csv_file_path = os.path.join(os.path.dirname(__file__), '..', 'cosponsored_legislation.csv')
+
+        if not os.path.exists(csv_file_path):
+            return jsonify({"status": "error", "message": "CSV file not found"}), 404
+
+        driver = get_driver()
+        with driver.session() as session:
+            # Read the CSV data
+            with open(csv_file_path, mode='r', newline='', encoding='utf-8') as csv_file:
+                csv_reader = csv.DictReader(csv_file)
+
+                for row in csv_reader:
+                    cosponsored_by = row.get('cosponsored_by')
+                    number = row.get('number')
+
+                    if cosponsored_by and number:
+                        person_node = session.run(
+                            "MATCH (p:Person {bioguideId: $bioguideId}) RETURN p",
+                            {"bioguideId": cosponsored_by}
+                        ).single()
+
+                        legislation_node = session.run(
+                            "MATCH (l:Legislation {number: $number}) RETURN l",
+                            {"number": number}
+                        ).single()
+
+                        if person_node and legislation_node:
+                            person = person_node['p']
+                            legislation = legislation_node['l']
+                            session.run(
+                                "MATCH (p:Person), (l:Legislation) "
+                                "WHERE id(p) = $person_id AND id(l) = $legislation_id "
+                                "CREATE (p)-[:COSPONSORED]->(l)",
+                                {"person_id": person.id, "legislation_id": legislation.id}
+                            )
+                            neo4j_logger.info(f"Created COSPONSORED relationship from Person {person['name']} to Legislation {legislation['title']}")
+                        else:
+                            if not person_node:
+                                neo4j_logger.warning(f"No Person node found for bioguideId: {cosponsored_by}")
+                            if not legislation_node:
+                                neo4j_logger.warning(f"No Legislation node found for number: {number}")
+
+            return jsonify({"status": "success", "message": "COSPONSORED relationships created successfully"}), 201
+
+    except Exception as e:
+        neo4j_logger.error(f"Error creating COSPONSORED relationships: {e}")
+        return jsonify({"status": "error", "message": str(e)}), 500
--- a/api/endpoints/create_sponsored_relationship.py
+++ b/api/endpoints/create_sponsored_relationship.py
@ -0,0 +1,43 @@
+# endpoints/create_sponsored_relationship.py
+
+from flask import Blueprint, jsonify
+from neo4j import GraphDatabase
+import logging
+from app import get_driver, neo4j_logger  # Use absolute import
+
+bp = Blueprint('create_sponsored_relationship', __name__)
+
+@bp.route('/create_sponsored_relationship', methods=['GET'])
+def create_sponsored_relationship():
+    try:
+        driver = get_driver()
+        with driver.session() as session:
+            # Step 3: Implement the endpoint logic
+            legislation_nodes = session.run("MATCH (l:Legislation) RETURN l")
+            for record in legislation_nodes:
+                legislation = record['l']
+                bioguide_id = legislation.get('sponsored_by')
+
+                if bioguide_id:
+                    person_node = session.run(
+                        "MATCH (p:Person {bioguideId: $bioguideId}) RETURN p",
+                        {"bioguideId": bioguide_id}
+                    ).single()
+
+                    if person_node:
+                        person = person_node['p']
+                        session.run(
+                            "MATCH (p:Person), (l:Legislation) "
+                            "WHERE id(p) = $person_id AND id(l) = $legislation_id "
+                            "CREATE (p)-[:SPONSORED]->(l)",
+                            {"person_id": person.id, "legislation_id": legislation.id}
+                        )
+                        neo4j_logger.info(f"Created SPONSORED relationship from Person {person['name']} to Legislation {legislation['title']}")
+                    else:
+                        neo4j_logger.warning(f"No Person node found for bioguideId: {bioguide_id}")
+
+            return jsonify({"status": "success", "message": "SPONSORED relationships created successfully"}), 201
+
+    except Exception as e:
+        neo4j_logger.error(f"Error creating SPONSORED relationships: {e}")
+        return jsonify({"status": "error", "message": str(e)}), 500
--- a/api/endpoints/get_cosponsored.py
+++ b/api/endpoints/get_cosponsored.py
@ -0,0 +1,99 @@
+from flask import Blueprint, jsonify, request
+import requests
+import csv
+import os
+import logging
+import json
+
+bp = Blueprint('get_cosponsored', __name__)
+
+# Function to retrieve bioguideIds from cache.json file
+def get_bioguideIds_from_cache():
+    CACHE_PATH = os.getenv("CACHE_PATH")
+    if not CACHE_PATH:
+        logging.error("CACHE_PATH not found in .env file")
+        return None
+
+    if not os.path.exists(CACHE_PATH):
+        logging.error("Cache file not found at specified path")
+        return None
+
+    try:
+        with open(CACHE_PATH, 'r') as file:
+            cache_data = json.load(file)
+        bioguideIds = cache_data.get("bioguideIds", [])
+        if not bioguideIds:
+            logging.error("bioguideIds not found in cache.json")
+            return None
+        return bioguideIds
+    except Exception as e:
+        logging.error(f"Failed to read cache file: {str(e)}")
+        return None
+
+# Function to flatten nested dictionaries and lists
+def flatten_dict(d, parent_key='', sep='_'):
+    items = []
+    for k, v in d.items():
+        new_key = f"{parent_key}{sep}{k}" if parent_key else k
+        if isinstance(v, dict):
+            items.extend(flatten_dict(v, new_key, sep=sep).items())
+        elif isinstance(v, list):
+            for i, item in enumerate(v):
+                items.append((f"{new_key}_{i}", flatten_dict(item, '', sep=sep)))
+        else:
+            items.append((new_key, v if v is not None and v != "" else "NONE"))
+    return dict(items)
+
+# Function to write data to CSV
+def write_to_csv(data, filename):
+    keys = set()
+    for item in data:
+        keys.update(item.keys())
+
+    with open(filename, 'w', newline='') as output_file:
+        dict_writer = csv.DictWriter(output_file, fieldnames=keys)
+        dict_writer.writeheader()
+        dict_writer.writerows(data)
+
+@bp.route('/get_cosponsored', methods=['GET'])
+def get_cosponsored_legislation():
+    try:
+        # Retrieve bioguideIds from cache.json
+        bioguideIds = get_bioguideIds_from_cache()
+        if not bioguideIds:
+            return jsonify({"error": "bioguideIds not found"}), 404
+
+        all_data = []
+
+        for bioguideId in bioguideIds:
+            # Make request to Congress API
+            api_key = os.getenv("CONGRESS_API_KEY")
+            url = f"https://api.congress.gov/v3/member/{bioguideId}/cosponsored-legislation?api_key={api_key}"
+            response = requests.get(url)
+
+            if response.status_code != 200:
+                logging.error(f"Failed to retrieve cosponsored legislation for bioguideId {bioguideId}: {response.text}")
+                return jsonify({"error": f"Failed to retrieve data from Congress API for bioguideId {bioguideId}"}), 500
+
+            data = response.json().get("cosponsoredLegislation", [])
+
+            # Add cosponsored_by column and handle nested items
+            for item in data:
+                flattened_item = flatten_dict(item)
+                flattened_item["cosponsored_by"] = bioguideId
+                if not any(flattened_item.values()):
+                    continue  # Skip empty rows
+                all_data.append(flattened_item)
+
+        if not all_data:
+            return jsonify({"error": "No cosponsored legislation found for the given bioguideIds"}), 404
+
+        # Write data to CSV
+        csv_filename = f"cosponsored_legislation.csv"
+        write_to_csv(all_data, csv_filename)
+
+        return jsonify({"message": "Data written to CSV successfully", "filename": csv_filename})
+
+    except Exception as e:
+        logging.error(f"An error occurred: {str(e)}")
+        return jsonify({"error": str(e)}), 500
--- a/api/endpoints/get_sponsored.py
+++ b/api/endpoints/get_sponsored.py
@ -1,144 +1,99 @@
-from flask import Blueprint, jsonify
+from flask import Blueprint, jsonify, request
 import requests
-import json
+import csv
 import os
-import pandas as pd
-
-# Assuming you have these functions and configurations in your app.py
-from app import get_driver, neo4j_logger
+import logging
+import json

 bp = Blueprint('get_sponsored', __name__)
-CACHE_FILE = 'cache.json'
-CSV_FILE = 'legislation.csv'

-def load_cache():
-    if os.path.exists(CACHE_FILE):
-        with open(CACHE_FILE, 'r') as f:
-            return json.load(f)
-    else:
-        neo4j_logger.info(f"Cache file {CACHE_FILE} does not exist. Initializing empty cache.")
-        return {}
+# Function to retrieve bioguideIds from cache.json file
+def get_bioguideIds_from_cache():
+    CACHE_PATH = os.getenv("CACHE_PATH")
+    if not CACHE_PATH:
+        logging.error("CACHE_PATH not found in .env file")
+        return None

-def save_cache(cache_data):
-    with open(CACHE_FILE, 'w') as f:
-        json.dump(cache_data, f)
-    neo4j_logger.info(f"Saved data to cache file: {CACHE_FILE}")
+    if not os.path.exists(CACHE_PATH):
+        logging.error("Cache file not found at specified path")
+        return None

-def write_to_csv(legislation_data, csv_file):
-    # Flatten the nested dictionaries
-    flattened_legislation = []
-    for item in legislation_data:
-        flattened_item = {}
-        flatten_dict(item, "", flattened_item)
-        flattened_legislation.append(flattened_item)
-
-    df = pd.DataFrame(flattened_legislation)
-
-    # Debugging: Print the first few entries of the DataFrame to inspect its structure
-    print("Debugging DataFrame:")
-    if not df.empty:
-        print(df.head())
-    else:
-        print("DataFrame is empty.")
-
-    if df.empty:
-        neo4j_logger.warning(f"No data to write to CSV file: {csv_file}")
-        print("DataFrame is empty. Debugging information:")
-        for item in flattened_legislation[:5]:  # Print first 5 items
-            print(json.dumps(item, indent=4))
-        return
-
-    df.to_csv(csv_file, index=False)
-    neo4j_logger.info(f"Data written to CSV file: {csv_file}")
+    try:
+        with open(CACHE_PATH, 'r') as file:
+            cache_data = json.load(file)
+        bioguideIds = cache_data.get("bioguideIds", [])
+        if not bioguideIds:
+            logging.error("bioguideIds not found in cache.json")
+            return None
+        return bioguideIds
+    except Exception as e:
+        logging.error(f"Failed to read cache file: {str(e)}")
+        return None

+# Function to flatten nested dictionaries and lists
 def flatten_dict(d, parent_key='', sep='_'):
    items = []
    for k, v in d.items():
        new_key = f"{parent_key}{sep}{k}" if parent_key else k
        if isinstance(v, dict):
-            # Debugging: Print the key and value when entering a nested dictionary
-            print(f"Entering nested dictionary with key: {new_key}")
            items.extend(flatten_dict(v, new_key, sep=sep).items())
        elif isinstance(v, list):
-            # Handle lists by converting them to strings or other appropriate representation
-            if v:
-                items.append((new_key, ', '.join(map(str, v))))
-            else:
-                # If the list is empty, add an empty string or a placeholder
-                items.append((new_key, ''))
-        elif v is not None:
-            # Debugging: Print the key and value when adding a non-dict, non-list item
-            print(f"Adding {new_key}: {v}")
-            items.append((new_key, v))
+            for i, item in enumerate(v):
+                items.append((f"{new_key}_{i}", flatten_dict(item, '', sep=sep)))
        else:
-            # Handle None values appropriately (e.g., add an empty string or a placeholder)
-            items.append((new_key, ''))
+            items.append((new_key, v if v is not None and v != "" else "NONE"))
    return dict(items)

-@bp.route('/get_sponsored')
-def get_sponsored():
-    # Load bioguideIds and legislation from the same cache
-    cache = load_cache()
-    if 'bioguideIds' not in cache or len(cache['bioguideIds']) == 0:
-        return jsonify({"message": "No bioguideIds found in cache"}), 404
+# Function to write data to CSV
+def write_to_csv(data, filename):
+    keys = set()
+    for item in data:
+        keys.update(item.keys())

-    # Print the number of items found in the cache initially
-    initial_bioguideIds_count = len(cache['bioguideIds'])
-    print(f"Initial bioguideIds count: {initial_bioguideIds_count}")
+    with open(filename, 'w', newline='') as output_file:
+        dict_writer = csv.DictWriter(output_file, fieldnames=keys)
+        dict_writer.writeheader()
+        dict_writer.writerows(data)

-    processed_legislation_count = 0
-    all_legislation_data = []
+@bp.route('/get_sponsored', methods=['GET'])
+def get_sponsored_legislation():
+    try:
+        # Retrieve bioguideIds from cache.json
+        bioguideIds = get_bioguideIds_from_cache()
+        if not bioguideIds:
+            return jsonify({"error": "bioguideIds not found"}), 404

-    while 'bioguideIds' in cache and len(cache['bioguideIds']) > 0:
-        # Step 1: Retrieve a sponsor from the cache
-        current_bioguideId = cache['bioguideIds'].pop(0)
-        if current_bioguideId is None:
-            continue
+        all_data = []

-        print(f"Processing bioguideId: {current_bioguideId}")
+        for bioguideId in bioguideIds:
+            # Make request to Congress API
+            api_key = os.getenv("CONGRESS_API_KEY")
+            url = f"https://api.congress.gov/v3/member/{bioguideId}/sponsored-legislation?api_key={api_key}"
+            response = requests.get(url)

-        congress_api_url = f"https://api.congress.gov/v3/member/{current_bioguideId}/sponsored-legislation"
+            if response.status_code != 200:
+                logging.error(f"Failed to retrieve sponsored legislation for bioguideId {bioguideId}: {response.text}")
+                return jsonify({"error": f"Failed to retrieve data from Congress API for bioguideId {bioguideId}"}), 500

-        # Include API key in headers (if required)
-        api_key = os.getenv('CONGRESS_API_KEY')
-        if not api_key:
-            neo4j_logger.error("Congress API key not found in environment variables")
-            continue
+            data = response.json().get("sponsoredLegislation", [])

-        headers = {
-            'X-API-KEY': api_key
-        }
+            # Add sponsored_by column and handle nested items
+            for item in data:
+                flattened_item = flatten_dict(item)
+                flattened_item["sponsored_by"] = bioguideId
+                if not any(flattened_item.values()):
+                    continue  # Skip empty rows
+                all_data.append(flattened_item)

-        # Step 2: Fetch sponsored legislation for the member
-        response = requests.get(congress_api_url, headers=headers)
-        print(f"Response Status Code: {response.status_code}")
-        print(f"Response Text: {response.text}")
+        if not all_data:
+            return jsonify({"error": "No sponsored legislation found for the given bioguideIds"}), 404

-        if response.status_code != 200:
-            neo4j_logger.error(f"Failed to fetch sponsored legislation for bioguideId {current_bioguideId}: Status Code {response.status_code}, Response: {response.text}")
-            continue
+        # Write data to CSV
+        csv_filename = f"sponsored_legislation.csv"
+        write_to_csv(all_data, csv_filename)

-        response_data = response.json()
+        return jsonify({"message": "Data written to CSV successfully", "filename": csv_filename})

-        # Debugging statement to check the raw API response
-        print("Raw API Response:")
-        print(json.dumps(response_data, indent=4))
-
-        # Extract legislation data from the response
-        if 'sponsoredLegislation' in response_data and len(response_data['sponsoredLegislation']) > 0:
-            for result in response_data['sponsoredLegislation']:
-                all_legislation_data.append(result)
-
-    # Debugging statement to check the number of legislation items collected
-    print(f"Number of legislation items collected: {len(all_legislation_data)}")
-
-    if len(all_legislation_data) > 0:
-        # Print first few items to ensure data is structured correctly
-        for i, item in enumerate(all_legislation_data[:5]):
-            print(f"\nLegislation Item {i+1}:")
-            print(json.dumps(item, indent=4))
-
-    # Write the extracted legislation data to a CSV file
-    write_to_csv(all_legislation_data, CSV_FILE)
-
-    return jsonify({"message": "Legislation data written to CSV successfully"}), 200
+    except Exception as e:
+        logging.error(f"An error occurred: {str(e)}")
+        return jsonify({"error": str(e)}), 500
--- a/api/endpoints/process_sponsors.py
+++ b/api/endpoints/process_sponsors.py
@ -1,95 +1,54 @@
-# endpoints/process_sponsors.py
-from flask import Blueprint, jsonify
-from app import get_driver, neo4j_logger
-import json
+from flask import Blueprint, jsonify, request
 import os
+import csv
+from neo4j import GraphDatabase
+import logging

-bp = Blueprint('process_sponsors', __name__)
+bp = Blueprint('process_sponsored', __name__)

-CACHE_FILE = 'cache.json'
+# Custom logger for the process_sponsored blueprint
+process_sponsored_logger = logging.getLogger('ProcessSponsoredLogger')
+process_sponsored_logger.setLevel(logging.INFO)
+process_sponsored_handler = logging.StreamHandler()
+process_sponsored_formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
+process_sponsored_handler.setFormatter(process_sponsored_formatter)
+process_sponsored_logger.addHandler(process_sponsored_handler)

-def load_cache():
-    if os.path.exists(CACHE_FILE):
-        with open(CACHE_FILE, 'r') as f:
-            return json.load(f)
-    return {}
+@bp.route('/process_sponsored', methods=['GET'])
+def process_sponsored():
+    csv_file_path = os.getenv("SPONSORED_LEGISLATION_CSV")

-def save_cache(cache_data):
-    with open(CACHE_FILE, 'w') as f:
-        json.dump(cache_data, f)
+    if not csv_file_path:
+        return jsonify({"error": "SPONSORED_LEGISLATION_CSV environment variable is not set"}), 400

-@bp.route('/process_sponsors')
-def process_sponsors():
-    cache = load_cache()
+    try:
+        with open(csv_file_path, mode='r', newline='', encoding='utf-8') as file:
+            reader = csv.DictReader(file)
+            driver = GraphDatabase.driver(os.getenv("NEO4J_URI"), auth=(os.getenv("NEO4J_USER"), os.getenv("NEO4J_PASSWORD")))
+            session = driver.session()

-    if 'legislation_entries' not in cache or len(cache['legislation_entries']) == 0:
-        return jsonify({"message": "No legislation entries found in cache"}), 404
+            for row in reader:
+                properties = {key: value.strip() if isinstance(value, str) else value for key, value in row.items()}

-    # Print the number of items found in the cache initially
-    initial_legislation_entries_count = len(cache['legislation_entries'])
-    print(f"Initial legislation entries count: {initial_legislation_entries_count}")
+                # Log the CSV row
+                process_sponsored_logger.info(f"Processing row: {properties}")

-    processed_legislation_count = 0
+                query = (
+                    "MERGE (l:legislation {"
+                    + ", ".join(f"{key}: $props.{key}" for key in properties)
+                    + "})"
+                )

-    while 'legislation_entries' in cache and len(cache['legislation_entries']) > 0:
-        # Step 1: Retrieve a legislation entry from the cache
-        legislation_entry = cache['legislation_entries'].pop(0)
+                # Log the MERGE query
+                process_sponsored_logger.info(f"Executing query: {query}")

-        if not legislation_entry or 'bioguideId' not in legislation_entry:
-            continue
+                session.run(query, props=properties)

-        bioguideId = legislation_entry['bioguideId']
-        legislation_properties = {key: value for key, value in legislation_entry.items() if key != 'bioguideId'}
+            session.close()
+            driver.close()

-        # Step 2: Create a legislation node with the properties
-        driver = get_driver()
-        with driver.session() as session:
-            legislation_id = legislation_properties['id']
-            query = f"MATCH (n:Legislation {{id: $legislation_id}}) RETURN n"
-            neo4j_logger.info(f"Executing query: {query} with params: {{legislation_id: '{legislation_id}'}}")
+        return jsonify({"message": f"Processed {reader.line_num - 1} sponsored legislations"}), 200

-            existing_legislation = session.run(query, legislation_id=legislation_id).single()
-            if not existing_legislation:
-                properties = ', '.join([f'{key}: "${key}"' for key in legislation_properties])
-                query = f"CREATE (n:Legislation {{{properties}}}) RETURN n"
-                neo4j_logger.info(f"Executing query: {query} with data: {legislation_properties}")
-                result = session.run(query, **legislation_properties)
-
-                # Convert the created node to a dictionary
-                new_legislation_node = {
-                    'id': result.single()['n'].id,
-                    'labels': list(result.single()['n'].labels),
-                    **{key: value for key, value in result.single()['n'].items()}
-                }
-                neo4j_logger.info(f"Created legislation node: {new_legislation_node}")
-
-        # Step 3: Create a relationship of type "sponsored" from the sponsor to the legislation
-        with driver.session() as session:
-            person_query = f"MATCH (a:Person {{bioguideId: $bioguideId}}) RETURN a"
-            neo4j_logger.info(f"Executing query: {person_query} with params: {{bioguideId: '{bioguideId}'}}")
-
-            sponsor_node = session.run(person_query, bioguideId=bioguideId).single()
-            if not sponsor_node:
-                neo4j_logger.error(f"Person node does not exist for bioguideId {bioguideId}")
-                continue
-
-            legislation_id = legislation_properties['id']
-            relationship_query = f"MATCH (a:Person {{bioguideId: $bioguideId}}), (b:Legislation {{id: $legislation_id}}) CREATE (a)-[r:sponsored]->(b) RETURN r"
-            neo4j_logger.info(f"Executing query: {relationship_query} with params: {{bioguideId: '{bioguideId}', legislation_id: '{legislation_id}'}}")
-
-            result = session.run(relationship_query, bioguideId=bioguideId, legislation_id=legislation_id)
-            relationship_node = {
-                'id': result.single()['r'].id,
-                'type': "sponsored",
-                **{key: value for key, value in result.single()['r'].items()}
-            }
-            neo4j_logger.info(f"Created sponsored relationship: {relationship_node}")
-
-        processed_legislation_count += 1
-
-    save_cache(cache)
-
-    # Print the total number of legislation items processed
-    print(f"Total processed legislation count: {processed_legislation_count}")
-
-    return jsonify({"message": "Sponsorship processing completed successfully", "processed_legislation_count": processed_legislation_count}), 200
+    except Exception as e:
+        process_sponsored_logger.error(f"Error processing sponsored legislation: {e}")
+        return jsonify({"error": str(e)}), 500
Author	SHA256	Message	Date
Moses Rolston	faeea6e4e0	add relationship endpoints	2025-03-09 21:06:18 -07:00
Moses Rolston	2d039f7b8f	add endpoint process sponsors	2025-03-09 18:58:00 -07:00
Moses Rolston	33dd47226b	add sponsored and cosponsored endpoints	2025-03-09 16:41:33 -07:00