diff --git a/api/api.py b/api/api.py deleted file mode 100644 index b824eb7..0000000 --- a/api/api.py +++ /dev/null @@ -1,44 +0,0 @@ -# app.py -from flask import Flask, jsonify, request -import os -import logging -from neo4j import GraphDatabase -from dotenv import load_dotenv -from werkzeug.middleware.proxy_fix import ProxyFix -import importlib.util - -load_dotenv() - -app = Flask(__name__) -app.wsgi_app = ProxyFix(app.wsgi_app) - -# Configure logging -logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') - -# Neo4j configuration -NEO4J_URI = os.getenv("NEO4J_URI") -NEO4J_USER = os.getenv("NEO4J_USER") -NEO4J_PASSWORD = os.getenv("NEO4J_PASSWORD") - -def get_driver(): - return GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USER, NEO4J_PASSWORD)) - -# Function to dynamically import and register blueprints -def load_blueprints_from_directory(directory): - for filename in os.listdir(directory): - if filename.endswith('.py') and not filename.startswith('__'): - module_name = filename[:-3] # Remove .py extension - file_path = os.path.join(directory, filename) - - spec = importlib.util.spec_from_file_location(module_name, file_path) - module = importlib.util.module_from_spec(spec) - spec.loader.exec_module(module) - - if hasattr(module, 'bp'): - app.register_blueprint(module.bp) - -# Load blueprints -load_blueprints_from_directory('endpoints') - -if __name__ == '__main__': - app.run(debug=True) diff --git a/api/endpoints/get_sponsored.py b/api/endpoints/get_sponsored.py index 67adced..78aecc2 100644 --- a/api/endpoints/get_sponsored.py +++ b/api/endpoints/get_sponsored.py @@ -13,61 +13,93 @@ def load_cache(): if os.path.exists(CACHE_FILE): with open(CACHE_FILE, 'r') as f: return json.load(f) - return {} + else: + neo4j_logger.info(f"Cache file {CACHE_FILE} does not exist. Initializing empty cache.") + return {} def save_cache(cache_data): with open(CACHE_FILE, 'w') as f: json.dump(cache_data, f) + neo4j_logger.info(f"Saved data to cache file: {CACHE_FILE}") @bp.route('/get_sponsored') def get_sponsored(): + # Load bioguideIds and legislation from the same cache cache = load_cache() - if 'bioguideids' not in cache or len(cache['bioguideids']) == 0: - return jsonify({"message": "No bioguideids found in cache"}), 404 + if 'bioguideIds' not in cache or len(cache['bioguideIds']) == 0: + return jsonify({"message": "No bioguideIds found in cache"}), 404 # Print the number of items found in the cache initially - initial_bioguideids_count = len(cache['bioguideids']) - print(f"Initial bioguideids count: {initial_bioguideids_count}") + initial_bioguideIds_count = len(cache['bioguideIds']) + print(f"Initial bioguideIds count: {initial_bioguideIds_count}") processed_legislation_count = 0 - while 'bioguideids' in cache and len(cache['bioguideids']) > 0: + while 'bioguideIds' in cache and len(cache['bioguideIds']) > 0: # Step 1: Retrieve a sponsor from the cache - current_bioguideid = cache['bioguideids'].pop(0) + current_bioguideId = cache['bioguideIds'].pop(0) - if current_bioguideid is None: + if current_bioguideId is None: continue - print(f"Processing bioguideid: {current_bioguideid}") + print(f"Processing bioguideId: {current_bioguideId}") - congress_api_url = f"https://api.congress.gov/v3/member/{current_bioguideid}/sponsored-legislation" + congress_api_url = f"https://api.congress.gov/v3/member/{current_bioguideId}/sponsored-legislation" + + # Include API key in headers (if required) + api_key = os.getenv('CONGRESS_API_KEY') + if not api_key: + neo4j_logger.error("Congress API key not found in environment variables") + continue + + headers = { + 'X-API-KEY': api_key + } # Step 2: Fetch sponsored legislation for the member - response = requests.get(congress_api_url) + response = requests.get(congress_api_url, headers=headers) + + print(f"Response Status Code: {response.status_code}") + print(f"Response Text: {response.text}") + if response.status_code != 200: - neo4j_logger.error(f"Failed to fetch sponsored legislation for bioguideid {current_bioguideid}") + neo4j_logger.error(f"Failed to fetch sponsored legislation for bioguideId {current_bioguideId}: Status Code {response.status_code}, Response: {response.text}") continue legislations = response.json().get('results', []) - # Step 3: Store each piece of legislation in the cache along with the sponsor bioguideid + # Step 3: Store each piece of legislation in the cache along with the sponsor bioguideId for legislation in legislations: key = f"legislation_{legislation['id']}" + + print(f"About to write to cache: {key} - {legislation}") + if key not in cache: cache[key] = { - 'bioguideid': current_bioguideid, + 'bioguideId': current_bioguideId, **legislation } processed_legislation_count += 1 - # Step 4: Delete the sponsor from the cache (already done by popping) - neo4j_logger.info(f"Processed sponsored legislation for bioguideid {current_bioguideid}") + # Save the cache immediately after writing each entry + save_cache(cache) + # Print the updated legislation cache for debugging + print(f"Updated legislation cache: {json.dumps(cache, indent=2)}") + + # Step 4: Delete the sponsor from the bioguideIds list (already done by popping) + neo4j_logger.info(f"Processed sponsored legislation for bioguideId {current_bioguideId}") + + # Print the number of items left in the bioguide cache + remaining_bioguideIds_count = len(cache['bioguideIds']) + print(f"Remaining bioguideIds count after processing {current_bioguideId}: {remaining_bioguideIds_count}") + + # Save the cache again to ensure all changes are persisted save_cache(cache) # Print the total number of legislation items stored and overall items added to the cache print(f"Total processed legislation count: {processed_legislation_count}") - print(f"Overall items added to cache: {len(cache)}") + print(f"Overall items in cache: {len(cache)}") return jsonify({"message": "Sponsored legislation processed successfully", "processed_legislation_count": processed_legislation_count}), 200 diff --git a/api/endpoints/process_sponsors.py b/api/endpoints/process_sponsors.py index b294c5f..a8fe1b1 100644 --- a/api/endpoints/process_sponsors.py +++ b/api/endpoints/process_sponsors.py @@ -35,11 +35,11 @@ def process_sponsors(): # Step 1: Retrieve a legislation entry from the cache legislation_entry = cache['legislation_entries'].pop(0) - if not legislation_entry or 'bioguideid' not in legislation_entry: + if not legislation_entry or 'bioguideId' not in legislation_entry: continue - bioguideid = legislation_entry['bioguideid'] - legislation_properties = {key: value for key, value in legislation_entry.items() if key != 'bioguideid'} + bioguideId = legislation_entry['bioguideId'] + legislation_properties = {key: value for key, value in legislation_entry.items() if key != 'bioguideId'} # Step 2: Create a legislation node with the properties driver = get_driver() @@ -65,19 +65,19 @@ def process_sponsors(): # Step 3: Create a relationship of type "sponsored" from the sponsor to the legislation with driver.session() as session: - person_query = f"MATCH (a:Person {{bioguideid: $bioguideid}}) RETURN a" - neo4j_logger.info(f"Executing query: {person_query} with params: {{bioguideid: '{bioguideid}'}}") + person_query = f"MATCH (a:Person {{bioguideId: $bioguideId}}) RETURN a" + neo4j_logger.info(f"Executing query: {person_query} with params: {{bioguideId: '{bioguideId}'}}") - sponsor_node = session.run(person_query, bioguideid=bioguideid).single() + sponsor_node = session.run(person_query, bioguideId=bioguideId).single() if not sponsor_node: - neo4j_logger.error(f"Person node does not exist for bioguideid {bioguideid}") + neo4j_logger.error(f"Person node does not exist for bioguideId {bioguideId}") continue legislation_id = legislation_properties['id'] - relationship_query = f"MATCH (a:Person {{bioguideid: $bioguideid}}), (b:Legislation {{id: $legislation_id}}) CREATE (a)-[r:sponsored]->(b) RETURN r" - neo4j_logger.info(f"Executing query: {relationship_query} with params: {{bioguideid: '{bioguideid}', legislation_id: '{legislation_id}'}}") + relationship_query = f"MATCH (a:Person {{bioguideId: $bioguideId}}), (b:Legislation {{id: $legislation_id}}) CREATE (a)-[r:sponsored]->(b) RETURN r" + neo4j_logger.info(f"Executing query: {relationship_query} with params: {{bioguideId: '{bioguideId}', legislation_id: '{legislation_id}'}}") - result = session.run(relationship_query, bioguideid=bioguideid, legislation_id=legislation_id) + result = session.run(relationship_query, bioguideId=bioguideId, legislation_id=legislation_id) relationship_node = { 'id': result.single()['r'].id, 'type': "sponsored", diff --git a/api/endpoints/store_sponsors.py b/api/endpoints/store_sponsors.py index 206ae8b..0ad83d5 100644 --- a/api/endpoints/store_sponsors.py +++ b/api/endpoints/store_sponsors.py @@ -22,32 +22,32 @@ def save_cache(cache_data): def store_sponsors(): cache = load_cache() - if 'bioguideids' in cache and len(cache['bioguideids']) > 0: - return jsonify({"message": "Bioguideids already cached"}), 200 + if 'bioguideIds' in cache and len(cache['bioguideIds']) > 0: + return jsonify({"message": "bioguideIds already cached"}), 200 driver = get_driver() with driver.session() as session: - query = "MATCH (n:Person) RETURN n.bioguideid" + query = "MATCH (n:Person) RETURN n.bioguideId" neo4j_logger.info(f"Executing query: {query}") # Fetch the records records = session.run(query) - bioguideids = [] + bioguideIds = [] for record in records: - bioguideid = record['n.bioguideid'] - if bioguideid is not None: - bioguideids.append(bioguideid) - print(f"Storing bioguideid: {bioguideid}") # Print each bioguideid as it's added + bioguideId = record['n.bioguideId'] + if bioguideId is not None: + bioguideIds.append(bioguideId) + print(f"Storing bioguideId: {bioguideId}") # Print each bioguideId as it's added else: - print("Found a record with None bioguideid") # Log any None values + print("Found a record with None bioguideId") # Log any None values - cache['bioguideids'] = bioguideids + cache['bioguideIds'] = bioguideIds - neo4j_logger.info(f"Cached bioguideids: {len(bioguideids)}") + neo4j_logger.info(f"Cached bioguideIds: {len(bioguideIds)}") # Save the updated cache save_cache(cache) - return jsonify({"message": "Bioguideids cached successfully", "cached_bioguideids_count": len(cache['bioguideids'])}), 200 + return jsonify({"message": "bioguideIds cached successfully", "cached_bioguideIds_count": len(cache['bioguideIds'])}), 200