Compare commits
4 Commits
574c889258
...
97c96ed0fc
Author | SHA256 | Date | |
---|---|---|---|
97c96ed0fc | |||
207449ee26 | |||
84156a1ce9 | |||
be101beac8 |
21
api/app.py
21
api/app.py
@ -1,4 +1,3 @@
|
|||||||
# app.py
|
|
||||||
from flask import Flask, jsonify, request
|
from flask import Flask, jsonify, request
|
||||||
import os
|
import os
|
||||||
import logging
|
import logging
|
||||||
@ -7,6 +6,7 @@ from dotenv import load_dotenv
|
|||||||
from werkzeug.middleware.proxy_fix import ProxyFix
|
from werkzeug.middleware.proxy_fix import ProxyFix
|
||||||
import importlib.util
|
import importlib.util
|
||||||
|
|
||||||
|
# Load environment variables from .env file
|
||||||
load_dotenv()
|
load_dotenv()
|
||||||
|
|
||||||
app = Flask(__name__)
|
app = Flask(__name__)
|
||||||
@ -40,7 +40,6 @@ def load_blueprints_from_directory(directory):
|
|||||||
if filename.endswith('.py') and not filename.startswith('__'):
|
if filename.endswith('.py') and not filename.startswith('__'):
|
||||||
module_name = filename[:-3] # Remove .py extension
|
module_name = filename[:-3] # Remove .py extension
|
||||||
file_path = os.path.join(directory, filename)
|
file_path = os.path.join(directory, filename)
|
||||||
|
|
||||||
spec = importlib.util.spec_from_file_location(module_name, file_path)
|
spec = importlib.util.spec_from_file_location(module_name, file_path)
|
||||||
module = importlib.util.module_from_spec(spec)
|
module = importlib.util.module_from_spec(spec)
|
||||||
spec.loader.exec_module(module)
|
spec.loader.exec_module(module)
|
||||||
@ -51,5 +50,23 @@ def load_blueprints_from_directory(directory):
|
|||||||
# Load blueprints
|
# Load blueprints
|
||||||
load_blueprints_from_directory('endpoints')
|
load_blueprints_from_directory('endpoints')
|
||||||
|
|
||||||
|
# Sitemap endpoint function
|
||||||
|
def sitemap():
|
||||||
|
# Get all registered routes
|
||||||
|
routes = []
|
||||||
|
for rule in app.url_map.iter_rules():
|
||||||
|
# Skip the sitemap route itself to avoid infinite recursion
|
||||||
|
if rule.endpoint != 'sitemap':
|
||||||
|
routes.append({
|
||||||
|
'rule': rule.rule,
|
||||||
|
'methods': list(rule.methods),
|
||||||
|
'endpoint': rule.endpoint
|
||||||
|
})
|
||||||
|
|
||||||
|
return jsonify(routes)
|
||||||
|
|
||||||
|
# Register the sitemap endpoint
|
||||||
|
app.add_url_rule('/sitemap', view_func=sitemap, methods=['GET'])
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
app.run(debug=True)
|
app.run(debug=True)
|
||||||
|
58
api/endpoints/create_legislation_node.py
Normal file
58
api/endpoints/create_legislation_node.py
Normal file
@ -0,0 +1,58 @@
|
|||||||
|
from flask import Blueprint, jsonify
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import logging
|
||||||
|
from neo4j import GraphDatabase
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
|
||||||
|
|
||||||
|
bp = Blueprint('create_legislation_node', __name__)
|
||||||
|
|
||||||
|
|
||||||
|
@bp.route('/create_legislation_node', methods=['GET'])
|
||||||
|
def create_legislation_nodes():
|
||||||
|
# Assuming the legislation cache is stored in a JSON file
|
||||||
|
legislation_cache_path = os.getenv("LEGISLATION_CACHE_PATH")
|
||||||
|
|
||||||
|
if not legislation_cache_path:
|
||||||
|
return jsonify({"error": "Legislation cache path is not set"}), 400
|
||||||
|
|
||||||
|
try:
|
||||||
|
with open(legislation_cache_path, 'r') as f:
|
||||||
|
legislation_data = json.load(f)
|
||||||
|
except FileNotFoundError:
|
||||||
|
return jsonify({"error": "Legislation cache file not found"}), 404
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
return jsonify({"error": "Failed to decode JSON from the legislation cache"}), 500
|
||||||
|
|
||||||
|
# Ensure legislation_data is a list of dictionaries
|
||||||
|
if not isinstance(legislation_data, list):
|
||||||
|
return jsonify({"error": "Legislation data should be a list"}), 400
|
||||||
|
|
||||||
|
created_nodes = []
|
||||||
|
|
||||||
|
for item in legislation_data:
|
||||||
|
if not item.get("id"):
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Prepare the properties for the node
|
||||||
|
properties = {k: v for k, v in item.items()}
|
||||||
|
|
||||||
|
# Create or merge a node in Neo4j with all properties
|
||||||
|
driver = get_driver()
|
||||||
|
with driver.session() as session:
|
||||||
|
query = "MERGE (l:Legislation {id: $id})"
|
||||||
|
for key in properties:
|
||||||
|
if key != 'id':
|
||||||
|
query += f" ON CREATE SET l.{key} = ${key}"
|
||||||
|
|
||||||
|
result = session.run(query, **properties)
|
||||||
|
|
||||||
|
# Log the creation or merging of the node
|
||||||
|
neo4j_logger.info(f"Created/Merged Legislation node with ID: {item.get('id')} and properties: {properties}")
|
||||||
|
|
||||||
|
driver.close()
|
||||||
|
|
||||||
|
created_nodes.append({"id": item.get("id"), "properties": properties})
|
||||||
|
|
||||||
|
return jsonify({"message": "Legislation nodes created/merged successfully", "nodes": created_nodes}), 201
|
@ -1,13 +1,15 @@
|
|||||||
# endpoints/get_sponsored.py
|
|
||||||
from flask import Blueprint, jsonify
|
from flask import Blueprint, jsonify
|
||||||
from app import get_driver, neo4j_logger
|
|
||||||
import requests
|
import requests
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
# Assuming you have these functions and configurations in your app.py
|
||||||
|
from app import get_driver, neo4j_logger
|
||||||
|
|
||||||
bp = Blueprint('get_sponsored', __name__)
|
bp = Blueprint('get_sponsored', __name__)
|
||||||
|
|
||||||
CACHE_FILE = 'cache.json'
|
CACHE_FILE = 'cache.json'
|
||||||
|
CSV_FILE = 'legislation.csv'
|
||||||
|
|
||||||
def load_cache():
|
def load_cache():
|
||||||
if os.path.exists(CACHE_FILE):
|
if os.path.exists(CACHE_FILE):
|
||||||
@ -22,11 +24,57 @@ def save_cache(cache_data):
|
|||||||
json.dump(cache_data, f)
|
json.dump(cache_data, f)
|
||||||
neo4j_logger.info(f"Saved data to cache file: {CACHE_FILE}")
|
neo4j_logger.info(f"Saved data to cache file: {CACHE_FILE}")
|
||||||
|
|
||||||
|
def write_to_csv(legislation_data, csv_file):
|
||||||
|
# Flatten the nested dictionaries
|
||||||
|
flattened_legislation = []
|
||||||
|
for item in legislation_data:
|
||||||
|
flattened_item = {}
|
||||||
|
flatten_dict(item, "", flattened_item)
|
||||||
|
flattened_legislation.append(flattened_item)
|
||||||
|
|
||||||
|
df = pd.DataFrame(flattened_legislation)
|
||||||
|
if df.empty:
|
||||||
|
neo4j_logger.warning(f"No data to write to CSV file: {csv_file}")
|
||||||
|
print("DataFrame is empty. Debugging information:")
|
||||||
|
for item in flattened_legislation[:5]: # Print first 5 items
|
||||||
|
print(json.dumps(item, indent=4))
|
||||||
|
return
|
||||||
|
|
||||||
|
# Debugging statement to check DataFrame contents
|
||||||
|
print("DataFrame contents:")
|
||||||
|
print(df.head())
|
||||||
|
|
||||||
|
df.to_csv(csv_file, index=False)
|
||||||
|
neo4j_logger.info(f"Data written to CSV file: {csv_file}")
|
||||||
|
|
||||||
|
def flatten_dict(d, parent_key='', sep='_'):
|
||||||
|
items = []
|
||||||
|
for k, v in d.items():
|
||||||
|
new_key = f"{parent_key}{sep}{k}" if parent_key else k
|
||||||
|
if isinstance(v, dict):
|
||||||
|
# Debugging: Print the key and value when entering a nested dictionary
|
||||||
|
print(f"Entering nested dictionary with key: {new_key}")
|
||||||
|
items.extend(flatten_dict(v, new_key, sep=sep).items())
|
||||||
|
elif isinstance(v, list):
|
||||||
|
# Handle lists by converting them to strings or other appropriate representation
|
||||||
|
if v:
|
||||||
|
items.append((new_key, ', '.join(map(str, v))))
|
||||||
|
else:
|
||||||
|
# If the list is empty, add an empty string or a placeholder
|
||||||
|
items.append((new_key, ''))
|
||||||
|
elif v is not None:
|
||||||
|
# Debugging: Print the key and value when adding a non-dict, non-list item
|
||||||
|
print(f"Adding {new_key}: {v}")
|
||||||
|
items.append((new_key, v))
|
||||||
|
else:
|
||||||
|
# Handle None values appropriately (e.g., add an empty string or a placeholder)
|
||||||
|
items.append((new_key, ''))
|
||||||
|
return dict(items)
|
||||||
|
|
||||||
@bp.route('/get_sponsored')
|
@bp.route('/get_sponsored')
|
||||||
def get_sponsored():
|
def get_sponsored():
|
||||||
# Load bioguideIds and legislation from the same cache
|
# Load bioguideIds and legislation from the same cache
|
||||||
cache = load_cache()
|
cache = load_cache()
|
||||||
|
|
||||||
if 'bioguideIds' not in cache or len(cache['bioguideIds']) == 0:
|
if 'bioguideIds' not in cache or len(cache['bioguideIds']) == 0:
|
||||||
return jsonify({"message": "No bioguideIds found in cache"}), 404
|
return jsonify({"message": "No bioguideIds found in cache"}), 404
|
||||||
|
|
||||||
@ -35,11 +83,11 @@ def get_sponsored():
|
|||||||
print(f"Initial bioguideIds count: {initial_bioguideIds_count}")
|
print(f"Initial bioguideIds count: {initial_bioguideIds_count}")
|
||||||
|
|
||||||
processed_legislation_count = 0
|
processed_legislation_count = 0
|
||||||
|
all_legislation_data = []
|
||||||
|
|
||||||
while 'bioguideIds' in cache and len(cache['bioguideIds']) > 0:
|
while 'bioguideIds' in cache and len(cache['bioguideIds']) > 0:
|
||||||
# Step 1: Retrieve a sponsor from the cache
|
# Step 1: Retrieve a sponsor from the cache
|
||||||
current_bioguideId = cache['bioguideIds'].pop(0)
|
current_bioguideId = cache['bioguideIds'].pop(0)
|
||||||
|
|
||||||
if current_bioguideId is None:
|
if current_bioguideId is None:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
@ -59,7 +107,6 @@ def get_sponsored():
|
|||||||
|
|
||||||
# Step 2: Fetch sponsored legislation for the member
|
# Step 2: Fetch sponsored legislation for the member
|
||||||
response = requests.get(congress_api_url, headers=headers)
|
response = requests.get(congress_api_url, headers=headers)
|
||||||
|
|
||||||
print(f"Response Status Code: {response.status_code}")
|
print(f"Response Status Code: {response.status_code}")
|
||||||
print(f"Response Text: {response.text}")
|
print(f"Response Text: {response.text}")
|
||||||
|
|
||||||
@ -67,39 +114,27 @@ def get_sponsored():
|
|||||||
neo4j_logger.error(f"Failed to fetch sponsored legislation for bioguideId {current_bioguideId}: Status Code {response.status_code}, Response: {response.text}")
|
neo4j_logger.error(f"Failed to fetch sponsored legislation for bioguideId {current_bioguideId}: Status Code {response.status_code}, Response: {response.text}")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
legislations = response.json().get('results', [])
|
response_data = response.json()
|
||||||
|
|
||||||
# Step 3: Store each piece of legislation in the cache along with the sponsor bioguideId
|
# Debugging statement to check the raw API response
|
||||||
for legislation in legislations:
|
print("Raw API Response:")
|
||||||
key = f"legislation_{legislation['id']}"
|
print(json.dumps(response_data, indent=4))
|
||||||
|
|
||||||
print(f"About to write to cache: {key} - {legislation}")
|
# Extract legislation data from the response
|
||||||
|
if 'sponsoredLegislation' in response_data and len(response_data['sponsoredLegislation']) > 0:
|
||||||
|
for result in response_data['sponsoredLegislation']:
|
||||||
|
all_legislation_data.append(result)
|
||||||
|
|
||||||
if key not in cache:
|
# Debugging statement to check the number of legislation items collected
|
||||||
cache[key] = {
|
print(f"Number of legislation items collected: {len(all_legislation_data)}")
|
||||||
'bioguideId': current_bioguideId,
|
|
||||||
**legislation
|
|
||||||
}
|
|
||||||
processed_legislation_count += 1
|
|
||||||
|
|
||||||
# Save the cache immediately after writing each entry
|
if len(all_legislation_data) > 0:
|
||||||
save_cache(cache)
|
# Print first few items to ensure data is structured correctly
|
||||||
|
for i, item in enumerate(all_legislation_data[:5]):
|
||||||
|
print(f"\nLegislation Item {i+1}:")
|
||||||
|
print(json.dumps(item, indent=4))
|
||||||
|
|
||||||
# Print the updated legislation cache for debugging
|
# Write the extracted legislation data to a CSV file
|
||||||
print(f"Updated legislation cache: {json.dumps(cache, indent=2)}")
|
write_to_csv(all_legislation_data, CSV_FILE)
|
||||||
|
|
||||||
# Step 4: Delete the sponsor from the bioguideIds list (already done by popping)
|
return jsonify({"message": "Legislation data written to CSV successfully"}), 200
|
||||||
neo4j_logger.info(f"Processed sponsored legislation for bioguideId {current_bioguideId}")
|
|
||||||
|
|
||||||
# Print the number of items left in the bioguide cache
|
|
||||||
remaining_bioguideIds_count = len(cache['bioguideIds'])
|
|
||||||
print(f"Remaining bioguideIds count after processing {current_bioguideId}: {remaining_bioguideIds_count}")
|
|
||||||
|
|
||||||
# Save the cache again to ensure all changes are persisted
|
|
||||||
save_cache(cache)
|
|
||||||
|
|
||||||
# Print the total number of legislation items stored and overall items added to the cache
|
|
||||||
print(f"Total processed legislation count: {processed_legislation_count}")
|
|
||||||
print(f"Overall items in cache: {len(cache)}")
|
|
||||||
|
|
||||||
return jsonify({"message": "Sponsored legislation processed successfully", "processed_legislation_count": processed_legislation_count}), 200
|
|
||||||
|
Loading…
Reference in New Issue
Block a user