From 21aa013cb848b165c5fc2b7ec82832056c2dc89b11019a889dab16d9966cc4bc Mon Sep 17 00:00:00 2001 From: Moses Rolston Date: Thu, 6 Mar 2025 20:00:45 -0800 Subject: [PATCH] add initial api work and endpoints --- api/endpoints/get_sponsored.py | 69 +++++++++++++++++++++++ api/endpoints/members.py | 53 ++++++++++++++++++ api/endpoints/nodes.py | 42 +++++++++++++- api/endpoints/process_sponsors.py | 91 +++++++++++++++++++++++++++++++ api/endpoints/relationships.py | 82 ++++++++++++++++++++++++++++ api/endpoints/store_sponsors.py | 42 ++++++++++++++ api/requirements.txt | 4 ++ 7 files changed, 382 insertions(+), 1 deletion(-) create mode 100644 api/endpoints/get_sponsored.py create mode 100644 api/endpoints/members.py create mode 100644 api/endpoints/process_sponsors.py create mode 100644 api/endpoints/relationships.py create mode 100644 api/endpoints/store_sponsors.py create mode 100644 api/requirements.txt diff --git a/api/endpoints/get_sponsored.py b/api/endpoints/get_sponsored.py new file mode 100644 index 0000000..eb2e0ed --- /dev/null +++ b/api/endpoints/get_sponsored.py @@ -0,0 +1,69 @@ +# endpoints/get_sponsored.py +from flask import Blueprint, jsonify +from app import get_driver, neo4j_logger +import requests +import json +import os + +bp = Blueprint('get_sponsored', __name__) + +CACHE_FILE = 'cache.json' + +def load_cache(): + if os.path.exists(CACHE_FILE): + with open(CACHE_FILE, 'r') as f: + return json.load(f) + return {} + +def save_cache(cache_data): + with open(CACHE_FILE, 'w') as f: + json.dump(cache_data, f) + +@bp.route('/get_sponsored') +def get_sponsored(): + cache = load_cache() + + if 'bioguideids' not in cache or len(cache['bioguideids']) == 0: + return jsonify({"message": "No bioguideids found in cache"}), 404 + + # Print the number of items found in the cache initially + initial_bioguideids_count = len(cache['bioguideids']) + print(f"Initial bioguideids count: {initial_bioguideids_count}") + + processed_legislation_count = 0 + + while 'bioguideids' in cache and len(cache['bioguideids']) > 0: + # Print the current bioguideid being processed + current_bioguideid = cache['bioguideids'].pop(0) + print(f"Processing bioguideid: {current_bioguideid}") + + congress_api_url = f"https://api.congress.gov/v3/member/{current_bioguideid}/sponsored-legislation" + + # Step 2: Fetch sponsored legislation for the member + response = requests.get(congress_api_url) + if response.status_code != 200: + neo4j_logger.error(f"Failed to fetch sponsored legislation for bioguideid {current_bioguideid}") + continue + + legislations = response.json().get('results', []) + + # Step 3: Store each piece of legislation in the cache along with the sponsor bioguideid + for legislation in legislations: + key = f"legislation_{legislation['id']}" + if key not in cache: + cache[key] = { + 'bioguideid': current_bioguideid, + **legislation + } + processed_legislation_count += 1 + + # Step 4: Delete the sponsor from the cache (already done by popping) + neo4j_logger.info(f"Processed sponsored legislation for bioguideid {current_bioguideid}") + + save_cache(cache) + + # Print the total number of legislation items stored and overall items added to the cache + print(f"Total processed legislation count: {processed_legislation_count}") + print(f"Overall items added to cache: {len(cache)}") + + return jsonify({"message": "Sponsored legislation processed successfully", "processed_legislation_count": processed_legislation_count}), 200 diff --git a/api/endpoints/members.py b/api/endpoints/members.py new file mode 100644 index 0000000..c80edd8 --- /dev/null +++ b/api/endpoints/members.py @@ -0,0 +1,53 @@ +# endpoints/members.py +from flask import Blueprint, request, jsonify +from app import get_driver, neo4j_logger + +bp = Blueprint('members', __name__) + +@bp.route('/members', methods=['GET', 'POST']) +def members(): + if request.method == 'GET': + return get_persons() + elif request.method == 'POST': + return create_person() + +def get_persons(): + driver = get_driver() + with driver.session() as session: + query = "MATCH (n:Person) RETURN n" + neo4j_logger.info(f"Executing query: {query}") + nodes = session.run(query) + + # Convert the nodes to a list of dictionaries + persons_list = [ + { + 'id': record['n'].id, + 'labels': list(record['n'].labels), + **{key: value for key, value in record['n'].items()} + } + for record in nodes + ] + + return jsonify({"persons": persons_list}) + +def create_person(): + data = request.get_json() + if not data: + return jsonify({"error": "No data provided"}), 400 + + properties = ', '.join([f'{key}: "${key}"' for key in data]) + query = f"CREATE (n:Person {{{properties}}}) RETURN n" + + driver = get_driver() + with driver.session() as session: + neo4j_logger.info(f"Executing query: {query} with data: {data}") + result = session.run(query, **data) + + # Convert the created node to a dictionary + person = { + 'id': result.single()['n'].id, + 'labels': list(result.single()['n'].labels), + **{key: value for key, value in result.single()['n'].items()} + } + + return jsonify(person), 201 diff --git a/api/endpoints/nodes.py b/api/endpoints/nodes.py index c5b776b..e71d76d 100644 --- a/api/endpoints/nodes.py +++ b/api/endpoints/nodes.py @@ -4,13 +4,22 @@ from app import get_driver, neo4j_logger bp = Blueprint('nodes', __name__) -@bp.route('/nodes') +@bp.route('/nodes', methods=['GET', 'POST']) +def nodes(): + if request.method == 'GET': + return get_nodes() + elif request.method == 'POST': + return create_node() + def get_nodes(): node_type = request.args.get('type') if not node_type: return jsonify({"error": "Node type is required"}), 400 + # Get the filter parameter + filter_property = request.args.get('filter') + driver = get_driver() with driver.session() as session: query = f"MATCH (n:{node_type}) RETURN n" @@ -27,4 +36,35 @@ def get_nodes(): for record in nodes ] + if filter_property: + # Filter the results to only include the specified property + filtered_nodes_list = [{filter_property: node.get(filter_property)} for node in nodes_list] + return jsonify({"nodes": filtered_nodes_list}) + return jsonify({"nodes": nodes_list}) + +def create_node(): + data = request.get_json() + if not data: + return jsonify({"error": "No data provided"}), 400 + + node_type = data.pop('type', None) + if not node_type: + return jsonify({"error": "Node type is required in JSON data"}), 400 + + properties = ', '.join([f'{key}: "${key}"' for key in data]) + query = f"CREATE (n:{node_type} {{{properties}}}) RETURN n" + + driver = get_driver() + with driver.session() as session: + neo4j_logger.info(f"Executing query: {query} with data: {data}") + result = session.run(query, **data) + + # Convert the created node to a dictionary + new_node = { + 'id': result.single()['n'].id, + 'labels': list(result.single()['n'].labels), + **{key: value for key, value in result.single()['n'].items()} + } + + return jsonify(new_node), 201 diff --git a/api/endpoints/process_sponsors.py b/api/endpoints/process_sponsors.py new file mode 100644 index 0000000..a8fefbf --- /dev/null +++ b/api/endpoints/process_sponsors.py @@ -0,0 +1,91 @@ +# endpoints/process_sponsors.py +from flask import Blueprint, jsonify +from app import get_driver, neo4j_logger +import json +import os + +bp = Blueprint('process_sponsors', __name__) + +CACHE_FILE = 'cache.json' + +def load_cache(): + if os.path.exists(CACHE_FILE): + with open(CACHE_FILE, 'r') as f: + return json.load(f) + return {} + +def save_cache(cache_data): + with open(CACHE_FILE, 'w') as f: + json.dump(cache_data, f) + +@bp.route('/process_sponsors') +def process_sponsors(): + cache = load_cache() + + if 'legislation_entries' not in cache or len(cache['legislation_entries']) == 0: + return jsonify({"message": "No legislation entries found in cache"}), 404 + + # Print the number of items found in the cache initially + initial_legislation_entries_count = len(cache['legislation_entries']) + print(f"Initial legislation entries count: {initial_legislation_entries_count}") + + processed_legislation_count = 0 + + while 'legislation_entries' in cache and len(cache['legislation_entries']) > 0: + # Step 1: Retrieve a legislation entry from the cache + legislation_entry = cache['legislation_entries'].pop(0) + bioguideid = legislation_entry['bioguideid'] + legislation_properties = {key: value for key, value in legislation_entry.items() if key != 'bioguideid'} + + # Step 2: Create a legislation node with the properties + driver = get_driver() + with driver.session() as session: + legislation_id = legislation_properties['id'] + query = f"MATCH (n:Legislation {{id: $legislation_id}}) RETURN n" + neo4j_logger.info(f"Executing query: {query} with params: {{legislation_id: '{legislation_id}'}}") + + existing_legislation = session.run(query, legislation_id=legislation_id).single() + if not existing_legislation: + properties = ', '.join([f'{key}: "${key}"' for key in legislation_properties]) + query = f"CREATE (n:Legislation {{{properties}}}) RETURN n" + neo4j_logger.info(f"Executing query: {query} with data: {legislation_properties}") + result = session.run(query, **legislation_properties) + + # Convert the created node to a dictionary + new_legislation_node = { + 'id': result.single()['n'].id, + 'labels': list(result.single()['n'].labels), + **{key: value for key, value in result.single()['n'].items()} + } + neo4j_logger.info(f"Created legislation node: {new_legislation_node}") + + # Step 3: Create a relationship of type "sponsored" from the sponsor to the legislation + with driver.session() as session: + person_query = f"MATCH (a:Person {{bioguideid: $bioguideid}}) RETURN a" + neo4j_logger.info(f"Executing query: {person_query} with params: {{bioguideid: '{bioguideid}'}}") + + sponsor_node = session.run(person_query, bioguideid=bioguideid).single() + if not sponsor_node: + neo4j_logger.error(f"Person node does not exist for bioguideid {bioguideid}") + continue + + legislation_id = legislation_properties['id'] + relationship_query = f"MATCH (a:Person {{bioguideid: $bioguideid}}), (b:Legislation {{id: $legislation_id}}) CREATE (a)-[r:sponsored]->(b) RETURN r" + neo4j_logger.info(f"Executing query: {relationship_query} with params: {{bioguideid: '{bioguideid}', legislation_id: '{legislation_id}'}}") + + result = session.run(relationship_query, bioguideid=bioguideid, legislation_id=legislation_id) + relationship_node = { + 'id': result.single()['r'].id, + 'type': "sponsored", + **{key: value for key, value in result.single()['r'].items()} + } + neo4j_logger.info(f"Created sponsored relationship: {relationship_node}") + + processed_legislation_count += 1 + + save_cache(cache) + + # Print the total number of legislation items processed + print(f"Total processed legislation count: {processed_legislation_count}") + + return jsonify({"message": "Sponsorship processing completed successfully", "processed_legislation_count": processed_legislation_count}), 200 diff --git a/api/endpoints/relationships.py b/api/endpoints/relationships.py new file mode 100644 index 0000000..6721a32 --- /dev/null +++ b/api/endpoints/relationships.py @@ -0,0 +1,82 @@ +# endpoints/relationships.py +from flask import Blueprint, request, jsonify +from app import get_driver, neo4j_logger + +bp = Blueprint('relationships', __name__) + +@bp.route('/relationships', methods=['GET', 'POST']) +def relationships(): + if request.method == 'GET': + relationship_type = request.args.get('type') + + if not relationship_type: + return list_relationship_types() + else: + return list_relationships_by_type(relationship_type) + elif request.method == 'POST': + return create_relationship() + +def list_relationship_types(): + driver = get_driver() + with driver.session() as session: + query = "CALL db.relationshipTypes()" + neo4j_logger.info(f"Executing query: {query}") + result = session.run(query) + + # Convert the result to a list of dictionaries + relationship_types_list = [record['relationshipType'] for record in result] + + return jsonify({"relationship_types": relationship_types_list}) + +def list_relationships_by_type(relationship_type): + driver = get_driver() + with driver.session() as session: + query = f"MATCH ()-[r:{relationship_type}]->() RETURN r" + neo4j_logger.info(f"Executing query: {query}") + relationships = session.run(query) + + # Convert the relationships to a list of dictionaries + relationships_list = [ + { + 'id': record['r'].id, + 'type': relationship_type, + **{key: value for key, value in record['r'].items()} + } + for record in relationships + ] + + return jsonify({"relationships": relationships_list}) + +def create_relationship(): + data = request.get_json() + if not data: + return jsonify({"error": "No data provided"}), 400 + + required_keys = ['start_node_id', 'end_node_id', 'type'] + for key in required_keys: + if key not in data: + return jsonify({"error": f"Missing required field: {key}"}), 400 + + start_node_id = data['start_node_id'] + end_node_id = data['end_node_id'] + relationship_type = data['type'] + + # Optional properties + properties = ', '.join([f'{key}: "${key}"' for key in data if key not in required_keys]) + property_clause = f"{{{properties}}}" if properties else "" + + query = f"MATCH (a) WHERE id(a)={start_node_id} MATCH (b) WHERE id(b)={end_node_id} CREATE (a)-[r:{relationship_type}{property_clause}]->(b) RETURN r" + + driver = get_driver() + with driver.session() as session: + neo4j_logger.info(f"Executing query: {query} with data: {data}") + result = session.run(query, **data) + + # Convert the created relationship to a dictionary + new_relationship = { + 'id': result.single()['r'].id, + 'type': relationship_type, + **{key: value for key, value in result.single()['r'].items()} + } + + return jsonify(new_relationship), 201 diff --git a/api/endpoints/store_sponsors.py b/api/endpoints/store_sponsors.py new file mode 100644 index 0000000..7c977c7 --- /dev/null +++ b/api/endpoints/store_sponsors.py @@ -0,0 +1,42 @@ +# endpoints/store_sponsors.py +from flask import Blueprint, jsonify +from app import get_driver, neo4j_logger +import json +import os + +bp = Blueprint('store_sponsors', __name__) + +CACHE_FILE = 'cache.json' + +def load_cache(): + if os.path.exists(CACHE_FILE): + with open(CACHE_FILE, 'r') as f: + return json.load(f) + return {} + +def save_cache(cache_data): + with open(CACHE_FILE, 'w') as f: + json.dump(cache_data, f) + +@bp.route('/store_sponsors') +def store_sponsors(): + cache = load_cache() + + if 'bioguideids' in cache and len(cache['bioguideids']) > 0: + return jsonify({"message": "Bioguideids already cached"}), 200 + + driver = get_driver() + with driver.session() as session: + query = "MATCH (n:Person) RETURN n.bioguideid" + neo4j_logger.info(f"Executing query: {query}") + nodes = session.run(query) + + # Convert the nodes to a list of bioguideids + bioguideids = [record['n.bioguideid'] for record in nodes] + cache['bioguideids'] = bioguideids + + neo4j_logger.info(f"Cached bioguideids: {len(bioguideids)}") + + save_cache(cache) + + return jsonify({"message": "Bioguideids cached successfully", "cached_bioguideids_count": len(cache['bioguideids'])}), 200 diff --git a/api/requirements.txt b/api/requirements.txt new file mode 100644 index 0000000..2dd4bbb --- /dev/null +++ b/api/requirements.txt @@ -0,0 +1,4 @@ +flask +neo4j +dotenv +werkzeug