add endpoint process sponsors

This commit is contained in:
Moses Rolston 2025-03-09 18:58:00 -07:00
parent 33dd47226b
commit 2d039f7b8f

View File

@ -1,95 +1,54 @@
# endpoints/process_sponsors.py from flask import Blueprint, jsonify, request
from flask import Blueprint, jsonify
from app import get_driver, neo4j_logger
import json
import os import os
import csv
from neo4j import GraphDatabase
import logging
bp = Blueprint('process_sponsors', __name__) bp = Blueprint('process_sponsored', __name__)
CACHE_FILE = 'cache.json' # Custom logger for the process_sponsored blueprint
process_sponsored_logger = logging.getLogger('ProcessSponsoredLogger')
process_sponsored_logger.setLevel(logging.INFO)
process_sponsored_handler = logging.StreamHandler()
process_sponsored_formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
process_sponsored_handler.setFormatter(process_sponsored_formatter)
process_sponsored_logger.addHandler(process_sponsored_handler)
def load_cache(): @bp.route('/process_sponsored', methods=['GET'])
if os.path.exists(CACHE_FILE): def process_sponsored():
with open(CACHE_FILE, 'r') as f: csv_file_path = os.getenv("SPONSORED_LEGISLATION_CSV")
return json.load(f)
return {}
def save_cache(cache_data): if not csv_file_path:
with open(CACHE_FILE, 'w') as f: return jsonify({"error": "SPONSORED_LEGISLATION_CSV environment variable is not set"}), 400
json.dump(cache_data, f)
@bp.route('/process_sponsors') try:
def process_sponsors(): with open(csv_file_path, mode='r', newline='', encoding='utf-8') as file:
cache = load_cache() reader = csv.DictReader(file)
driver = GraphDatabase.driver(os.getenv("NEO4J_URI"), auth=(os.getenv("NEO4J_USER"), os.getenv("NEO4J_PASSWORD")))
session = driver.session()
if 'legislation_entries' not in cache or len(cache['legislation_entries']) == 0: for row in reader:
return jsonify({"message": "No legislation entries found in cache"}), 404 properties = {key: value.strip() if isinstance(value, str) else value for key, value in row.items()}
# Print the number of items found in the cache initially # Log the CSV row
initial_legislation_entries_count = len(cache['legislation_entries']) process_sponsored_logger.info(f"Processing row: {properties}")
print(f"Initial legislation entries count: {initial_legislation_entries_count}")
processed_legislation_count = 0 query = (
"MERGE (l:legislation {"
+ ", ".join(f"{key}: $props.{key}" for key in properties)
+ "})"
)
while 'legislation_entries' in cache and len(cache['legislation_entries']) > 0: # Log the MERGE query
# Step 1: Retrieve a legislation entry from the cache process_sponsored_logger.info(f"Executing query: {query}")
legislation_entry = cache['legislation_entries'].pop(0)
if not legislation_entry or 'bioguideId' not in legislation_entry: session.run(query, props=properties)
continue
bioguideId = legislation_entry['bioguideId'] session.close()
legislation_properties = {key: value for key, value in legislation_entry.items() if key != 'bioguideId'} driver.close()
# Step 2: Create a legislation node with the properties return jsonify({"message": f"Processed {reader.line_num - 1} sponsored legislations"}), 200
driver = get_driver()
with driver.session() as session:
legislation_id = legislation_properties['id']
query = f"MATCH (n:Legislation {{id: $legislation_id}}) RETURN n"
neo4j_logger.info(f"Executing query: {query} with params: {{legislation_id: '{legislation_id}'}}")
existing_legislation = session.run(query, legislation_id=legislation_id).single() except Exception as e:
if not existing_legislation: process_sponsored_logger.error(f"Error processing sponsored legislation: {e}")
properties = ', '.join([f'{key}: "${key}"' for key in legislation_properties]) return jsonify({"error": str(e)}), 500
query = f"CREATE (n:Legislation {{{properties}}}) RETURN n"
neo4j_logger.info(f"Executing query: {query} with data: {legislation_properties}")
result = session.run(query, **legislation_properties)
# Convert the created node to a dictionary
new_legislation_node = {
'id': result.single()['n'].id,
'labels': list(result.single()['n'].labels),
**{key: value for key, value in result.single()['n'].items()}
}
neo4j_logger.info(f"Created legislation node: {new_legislation_node}")
# Step 3: Create a relationship of type "sponsored" from the sponsor to the legislation
with driver.session() as session:
person_query = f"MATCH (a:Person {{bioguideId: $bioguideId}}) RETURN a"
neo4j_logger.info(f"Executing query: {person_query} with params: {{bioguideId: '{bioguideId}'}}")
sponsor_node = session.run(person_query, bioguideId=bioguideId).single()
if not sponsor_node:
neo4j_logger.error(f"Person node does not exist for bioguideId {bioguideId}")
continue
legislation_id = legislation_properties['id']
relationship_query = f"MATCH (a:Person {{bioguideId: $bioguideId}}), (b:Legislation {{id: $legislation_id}}) CREATE (a)-[r:sponsored]->(b) RETURN r"
neo4j_logger.info(f"Executing query: {relationship_query} with params: {{bioguideId: '{bioguideId}', legislation_id: '{legislation_id}'}}")
result = session.run(relationship_query, bioguideId=bioguideId, legislation_id=legislation_id)
relationship_node = {
'id': result.single()['r'].id,
'type': "sponsored",
**{key: value for key, value in result.single()['r'].items()}
}
neo4j_logger.info(f"Created sponsored relationship: {relationship_node}")
processed_legislation_count += 1
save_cache(cache)
# Print the total number of legislation items processed
print(f"Total processed legislation count: {processed_legislation_count}")
return jsonify({"message": "Sponsorship processing completed successfully", "processed_legislation_count": processed_legislation_count}), 200