Compare commits

...

3 Commits

5 changed files with 314 additions and 196 deletions

View File

@ -0,0 +1,62 @@
# endpoints/create_cosponsored_relationship.py
from flask import Blueprint, jsonify
import logging
import csv
import os
from neo4j import GraphDatabase
from app import get_driver, neo4j_logger # Ensure relative imports work
bp = Blueprint('create_cosponsored_relationship', __name__)
@bp.route('/create_cosponsored_relationship', methods=['GET'])
def create_cosponsored_relationship():
try:
# Path to the CSV file
csv_file_path = os.path.join(os.path.dirname(__file__), '..', 'cosponsored_legislation.csv')
if not os.path.exists(csv_file_path):
return jsonify({"status": "error", "message": "CSV file not found"}), 404
driver = get_driver()
with driver.session() as session:
# Read the CSV data
with open(csv_file_path, mode='r', newline='', encoding='utf-8') as csv_file:
csv_reader = csv.DictReader(csv_file)
for row in csv_reader:
cosponsored_by = row.get('cosponsored_by')
number = row.get('number')
if cosponsored_by and number:
person_node = session.run(
"MATCH (p:Person {bioguideId: $bioguideId}) RETURN p",
{"bioguideId": cosponsored_by}
).single()
legislation_node = session.run(
"MATCH (l:Legislation {number: $number}) RETURN l",
{"number": number}
).single()
if person_node and legislation_node:
person = person_node['p']
legislation = legislation_node['l']
session.run(
"MATCH (p:Person), (l:Legislation) "
"WHERE id(p) = $person_id AND id(l) = $legislation_id "
"CREATE (p)-[:COSPONSORED]->(l)",
{"person_id": person.id, "legislation_id": legislation.id}
)
neo4j_logger.info(f"Created COSPONSORED relationship from Person {person['name']} to Legislation {legislation['title']}")
else:
if not person_node:
neo4j_logger.warning(f"No Person node found for bioguideId: {cosponsored_by}")
if not legislation_node:
neo4j_logger.warning(f"No Legislation node found for number: {number}")
return jsonify({"status": "success", "message": "COSPONSORED relationships created successfully"}), 201
except Exception as e:
neo4j_logger.error(f"Error creating COSPONSORED relationships: {e}")
return jsonify({"status": "error", "message": str(e)}), 500

View File

@ -0,0 +1,43 @@
# endpoints/create_sponsored_relationship.py
from flask import Blueprint, jsonify
from neo4j import GraphDatabase
import logging
from app import get_driver, neo4j_logger # Use absolute import
bp = Blueprint('create_sponsored_relationship', __name__)
@bp.route('/create_sponsored_relationship', methods=['GET'])
def create_sponsored_relationship():
try:
driver = get_driver()
with driver.session() as session:
# Step 3: Implement the endpoint logic
legislation_nodes = session.run("MATCH (l:Legislation) RETURN l")
for record in legislation_nodes:
legislation = record['l']
bioguide_id = legislation.get('sponsored_by')
if bioguide_id:
person_node = session.run(
"MATCH (p:Person {bioguideId: $bioguideId}) RETURN p",
{"bioguideId": bioguide_id}
).single()
if person_node:
person = person_node['p']
session.run(
"MATCH (p:Person), (l:Legislation) "
"WHERE id(p) = $person_id AND id(l) = $legislation_id "
"CREATE (p)-[:SPONSORED]->(l)",
{"person_id": person.id, "legislation_id": legislation.id}
)
neo4j_logger.info(f"Created SPONSORED relationship from Person {person['name']} to Legislation {legislation['title']}")
else:
neo4j_logger.warning(f"No Person node found for bioguideId: {bioguide_id}")
return jsonify({"status": "success", "message": "SPONSORED relationships created successfully"}), 201
except Exception as e:
neo4j_logger.error(f"Error creating SPONSORED relationships: {e}")
return jsonify({"status": "error", "message": str(e)}), 500

View File

@ -0,0 +1,99 @@
from flask import Blueprint, jsonify, request
import requests
import csv
import os
import logging
import json
bp = Blueprint('get_cosponsored', __name__)
# Function to retrieve bioguideIds from cache.json file
def get_bioguideIds_from_cache():
CACHE_PATH = os.getenv("CACHE_PATH")
if not CACHE_PATH:
logging.error("CACHE_PATH not found in .env file")
return None
if not os.path.exists(CACHE_PATH):
logging.error("Cache file not found at specified path")
return None
try:
with open(CACHE_PATH, 'r') as file:
cache_data = json.load(file)
bioguideIds = cache_data.get("bioguideIds", [])
if not bioguideIds:
logging.error("bioguideIds not found in cache.json")
return None
return bioguideIds
except Exception as e:
logging.error(f"Failed to read cache file: {str(e)}")
return None
# Function to flatten nested dictionaries and lists
def flatten_dict(d, parent_key='', sep='_'):
items = []
for k, v in d.items():
new_key = f"{parent_key}{sep}{k}" if parent_key else k
if isinstance(v, dict):
items.extend(flatten_dict(v, new_key, sep=sep).items())
elif isinstance(v, list):
for i, item in enumerate(v):
items.append((f"{new_key}_{i}", flatten_dict(item, '', sep=sep)))
else:
items.append((new_key, v if v is not None and v != "" else "NONE"))
return dict(items)
# Function to write data to CSV
def write_to_csv(data, filename):
keys = set()
for item in data:
keys.update(item.keys())
with open(filename, 'w', newline='') as output_file:
dict_writer = csv.DictWriter(output_file, fieldnames=keys)
dict_writer.writeheader()
dict_writer.writerows(data)
@bp.route('/get_cosponsored', methods=['GET'])
def get_cosponsored_legislation():
try:
# Retrieve bioguideIds from cache.json
bioguideIds = get_bioguideIds_from_cache()
if not bioguideIds:
return jsonify({"error": "bioguideIds not found"}), 404
all_data = []
for bioguideId in bioguideIds:
# Make request to Congress API
api_key = os.getenv("CONGRESS_API_KEY")
url = f"https://api.congress.gov/v3/member/{bioguideId}/cosponsored-legislation?api_key={api_key}"
response = requests.get(url)
if response.status_code != 200:
logging.error(f"Failed to retrieve cosponsored legislation for bioguideId {bioguideId}: {response.text}")
return jsonify({"error": f"Failed to retrieve data from Congress API for bioguideId {bioguideId}"}), 500
data = response.json().get("cosponsoredLegislation", [])
# Add cosponsored_by column and handle nested items
for item in data:
flattened_item = flatten_dict(item)
flattened_item["cosponsored_by"] = bioguideId
if not any(flattened_item.values()):
continue # Skip empty rows
all_data.append(flattened_item)
if not all_data:
return jsonify({"error": "No cosponsored legislation found for the given bioguideIds"}), 404
# Write data to CSV
csv_filename = f"cosponsored_legislation.csv"
write_to_csv(all_data, csv_filename)
return jsonify({"message": "Data written to CSV successfully", "filename": csv_filename})
except Exception as e:
logging.error(f"An error occurred: {str(e)}")
return jsonify({"error": str(e)}), 500

View File

@ -1,144 +1,99 @@
from flask import Blueprint, jsonify
from flask import Blueprint, jsonify, request
import requests
import json
import csv
import os
import pandas as pd
# Assuming you have these functions and configurations in your app.py
from app import get_driver, neo4j_logger
import logging
import json
bp = Blueprint('get_sponsored', __name__)
CACHE_FILE = 'cache.json'
CSV_FILE = 'legislation.csv'
def load_cache():
if os.path.exists(CACHE_FILE):
with open(CACHE_FILE, 'r') as f:
return json.load(f)
else:
neo4j_logger.info(f"Cache file {CACHE_FILE} does not exist. Initializing empty cache.")
return {}
# Function to retrieve bioguideIds from cache.json file
def get_bioguideIds_from_cache():
CACHE_PATH = os.getenv("CACHE_PATH")
if not CACHE_PATH:
logging.error("CACHE_PATH not found in .env file")
return None
def save_cache(cache_data):
with open(CACHE_FILE, 'w') as f:
json.dump(cache_data, f)
neo4j_logger.info(f"Saved data to cache file: {CACHE_FILE}")
if not os.path.exists(CACHE_PATH):
logging.error("Cache file not found at specified path")
return None
def write_to_csv(legislation_data, csv_file):
# Flatten the nested dictionaries
flattened_legislation = []
for item in legislation_data:
flattened_item = {}
flatten_dict(item, "", flattened_item)
flattened_legislation.append(flattened_item)
df = pd.DataFrame(flattened_legislation)
# Debugging: Print the first few entries of the DataFrame to inspect its structure
print("Debugging DataFrame:")
if not df.empty:
print(df.head())
else:
print("DataFrame is empty.")
if df.empty:
neo4j_logger.warning(f"No data to write to CSV file: {csv_file}")
print("DataFrame is empty. Debugging information:")
for item in flattened_legislation[:5]: # Print first 5 items
print(json.dumps(item, indent=4))
return
df.to_csv(csv_file, index=False)
neo4j_logger.info(f"Data written to CSV file: {csv_file}")
try:
with open(CACHE_PATH, 'r') as file:
cache_data = json.load(file)
bioguideIds = cache_data.get("bioguideIds", [])
if not bioguideIds:
logging.error("bioguideIds not found in cache.json")
return None
return bioguideIds
except Exception as e:
logging.error(f"Failed to read cache file: {str(e)}")
return None
# Function to flatten nested dictionaries and lists
def flatten_dict(d, parent_key='', sep='_'):
items = []
for k, v in d.items():
new_key = f"{parent_key}{sep}{k}" if parent_key else k
if isinstance(v, dict):
# Debugging: Print the key and value when entering a nested dictionary
print(f"Entering nested dictionary with key: {new_key}")
items.extend(flatten_dict(v, new_key, sep=sep).items())
elif isinstance(v, list):
# Handle lists by converting them to strings or other appropriate representation
if v:
items.append((new_key, ', '.join(map(str, v))))
else:
# If the list is empty, add an empty string or a placeholder
items.append((new_key, ''))
elif v is not None:
# Debugging: Print the key and value when adding a non-dict, non-list item
print(f"Adding {new_key}: {v}")
items.append((new_key, v))
for i, item in enumerate(v):
items.append((f"{new_key}_{i}", flatten_dict(item, '', sep=sep)))
else:
# Handle None values appropriately (e.g., add an empty string or a placeholder)
items.append((new_key, ''))
items.append((new_key, v if v is not None and v != "" else "NONE"))
return dict(items)
@bp.route('/get_sponsored')
def get_sponsored():
# Load bioguideIds and legislation from the same cache
cache = load_cache()
if 'bioguideIds' not in cache or len(cache['bioguideIds']) == 0:
return jsonify({"message": "No bioguideIds found in cache"}), 404
# Function to write data to CSV
def write_to_csv(data, filename):
keys = set()
for item in data:
keys.update(item.keys())
# Print the number of items found in the cache initially
initial_bioguideIds_count = len(cache['bioguideIds'])
print(f"Initial bioguideIds count: {initial_bioguideIds_count}")
with open(filename, 'w', newline='') as output_file:
dict_writer = csv.DictWriter(output_file, fieldnames=keys)
dict_writer.writeheader()
dict_writer.writerows(data)
processed_legislation_count = 0
all_legislation_data = []
@bp.route('/get_sponsored', methods=['GET'])
def get_sponsored_legislation():
try:
# Retrieve bioguideIds from cache.json
bioguideIds = get_bioguideIds_from_cache()
if not bioguideIds:
return jsonify({"error": "bioguideIds not found"}), 404
while 'bioguideIds' in cache and len(cache['bioguideIds']) > 0:
# Step 1: Retrieve a sponsor from the cache
current_bioguideId = cache['bioguideIds'].pop(0)
if current_bioguideId is None:
continue
all_data = []
print(f"Processing bioguideId: {current_bioguideId}")
for bioguideId in bioguideIds:
# Make request to Congress API
api_key = os.getenv("CONGRESS_API_KEY")
url = f"https://api.congress.gov/v3/member/{bioguideId}/sponsored-legislation?api_key={api_key}"
response = requests.get(url)
congress_api_url = f"https://api.congress.gov/v3/member/{current_bioguideId}/sponsored-legislation"
if response.status_code != 200:
logging.error(f"Failed to retrieve sponsored legislation for bioguideId {bioguideId}: {response.text}")
return jsonify({"error": f"Failed to retrieve data from Congress API for bioguideId {bioguideId}"}), 500
# Include API key in headers (if required)
api_key = os.getenv('CONGRESS_API_KEY')
if not api_key:
neo4j_logger.error("Congress API key not found in environment variables")
continue
data = response.json().get("sponsoredLegislation", [])
headers = {
'X-API-KEY': api_key
}
# Add sponsored_by column and handle nested items
for item in data:
flattened_item = flatten_dict(item)
flattened_item["sponsored_by"] = bioguideId
if not any(flattened_item.values()):
continue # Skip empty rows
all_data.append(flattened_item)
# Step 2: Fetch sponsored legislation for the member
response = requests.get(congress_api_url, headers=headers)
print(f"Response Status Code: {response.status_code}")
print(f"Response Text: {response.text}")
if not all_data:
return jsonify({"error": "No sponsored legislation found for the given bioguideIds"}), 404
if response.status_code != 200:
neo4j_logger.error(f"Failed to fetch sponsored legislation for bioguideId {current_bioguideId}: Status Code {response.status_code}, Response: {response.text}")
continue
# Write data to CSV
csv_filename = f"sponsored_legislation.csv"
write_to_csv(all_data, csv_filename)
response_data = response.json()
return jsonify({"message": "Data written to CSV successfully", "filename": csv_filename})
# Debugging statement to check the raw API response
print("Raw API Response:")
print(json.dumps(response_data, indent=4))
# Extract legislation data from the response
if 'sponsoredLegislation' in response_data and len(response_data['sponsoredLegislation']) > 0:
for result in response_data['sponsoredLegislation']:
all_legislation_data.append(result)
# Debugging statement to check the number of legislation items collected
print(f"Number of legislation items collected: {len(all_legislation_data)}")
if len(all_legislation_data) > 0:
# Print first few items to ensure data is structured correctly
for i, item in enumerate(all_legislation_data[:5]):
print(f"\nLegislation Item {i+1}:")
print(json.dumps(item, indent=4))
# Write the extracted legislation data to a CSV file
write_to_csv(all_legislation_data, CSV_FILE)
return jsonify({"message": "Legislation data written to CSV successfully"}), 200
except Exception as e:
logging.error(f"An error occurred: {str(e)}")
return jsonify({"error": str(e)}), 500

View File

@ -1,95 +1,54 @@
# endpoints/process_sponsors.py
from flask import Blueprint, jsonify
from app import get_driver, neo4j_logger
import json
from flask import Blueprint, jsonify, request
import os
import csv
from neo4j import GraphDatabase
import logging
bp = Blueprint('process_sponsors', __name__)
bp = Blueprint('process_sponsored', __name__)
CACHE_FILE = 'cache.json'
# Custom logger for the process_sponsored blueprint
process_sponsored_logger = logging.getLogger('ProcessSponsoredLogger')
process_sponsored_logger.setLevel(logging.INFO)
process_sponsored_handler = logging.StreamHandler()
process_sponsored_formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
process_sponsored_handler.setFormatter(process_sponsored_formatter)
process_sponsored_logger.addHandler(process_sponsored_handler)
def load_cache():
if os.path.exists(CACHE_FILE):
with open(CACHE_FILE, 'r') as f:
return json.load(f)
return {}
@bp.route('/process_sponsored', methods=['GET'])
def process_sponsored():
csv_file_path = os.getenv("SPONSORED_LEGISLATION_CSV")
def save_cache(cache_data):
with open(CACHE_FILE, 'w') as f:
json.dump(cache_data, f)
if not csv_file_path:
return jsonify({"error": "SPONSORED_LEGISLATION_CSV environment variable is not set"}), 400
@bp.route('/process_sponsors')
def process_sponsors():
cache = load_cache()
try:
with open(csv_file_path, mode='r', newline='', encoding='utf-8') as file:
reader = csv.DictReader(file)
driver = GraphDatabase.driver(os.getenv("NEO4J_URI"), auth=(os.getenv("NEO4J_USER"), os.getenv("NEO4J_PASSWORD")))
session = driver.session()
if 'legislation_entries' not in cache or len(cache['legislation_entries']) == 0:
return jsonify({"message": "No legislation entries found in cache"}), 404
for row in reader:
properties = {key: value.strip() if isinstance(value, str) else value for key, value in row.items()}
# Print the number of items found in the cache initially
initial_legislation_entries_count = len(cache['legislation_entries'])
print(f"Initial legislation entries count: {initial_legislation_entries_count}")
# Log the CSV row
process_sponsored_logger.info(f"Processing row: {properties}")
processed_legislation_count = 0
query = (
"MERGE (l:legislation {"
+ ", ".join(f"{key}: $props.{key}" for key in properties)
+ "})"
)
while 'legislation_entries' in cache and len(cache['legislation_entries']) > 0:
# Step 1: Retrieve a legislation entry from the cache
legislation_entry = cache['legislation_entries'].pop(0)
# Log the MERGE query
process_sponsored_logger.info(f"Executing query: {query}")
if not legislation_entry or 'bioguideId' not in legislation_entry:
continue
session.run(query, props=properties)
bioguideId = legislation_entry['bioguideId']
legislation_properties = {key: value for key, value in legislation_entry.items() if key != 'bioguideId'}
session.close()
driver.close()
# Step 2: Create a legislation node with the properties
driver = get_driver()
with driver.session() as session:
legislation_id = legislation_properties['id']
query = f"MATCH (n:Legislation {{id: $legislation_id}}) RETURN n"
neo4j_logger.info(f"Executing query: {query} with params: {{legislation_id: '{legislation_id}'}}")
return jsonify({"message": f"Processed {reader.line_num - 1} sponsored legislations"}), 200
existing_legislation = session.run(query, legislation_id=legislation_id).single()
if not existing_legislation:
properties = ', '.join([f'{key}: "${key}"' for key in legislation_properties])
query = f"CREATE (n:Legislation {{{properties}}}) RETURN n"
neo4j_logger.info(f"Executing query: {query} with data: {legislation_properties}")
result = session.run(query, **legislation_properties)
# Convert the created node to a dictionary
new_legislation_node = {
'id': result.single()['n'].id,
'labels': list(result.single()['n'].labels),
**{key: value for key, value in result.single()['n'].items()}
}
neo4j_logger.info(f"Created legislation node: {new_legislation_node}")
# Step 3: Create a relationship of type "sponsored" from the sponsor to the legislation
with driver.session() as session:
person_query = f"MATCH (a:Person {{bioguideId: $bioguideId}}) RETURN a"
neo4j_logger.info(f"Executing query: {person_query} with params: {{bioguideId: '{bioguideId}'}}")
sponsor_node = session.run(person_query, bioguideId=bioguideId).single()
if not sponsor_node:
neo4j_logger.error(f"Person node does not exist for bioguideId {bioguideId}")
continue
legislation_id = legislation_properties['id']
relationship_query = f"MATCH (a:Person {{bioguideId: $bioguideId}}), (b:Legislation {{id: $legislation_id}}) CREATE (a)-[r:sponsored]->(b) RETURN r"
neo4j_logger.info(f"Executing query: {relationship_query} with params: {{bioguideId: '{bioguideId}', legislation_id: '{legislation_id}'}}")
result = session.run(relationship_query, bioguideId=bioguideId, legislation_id=legislation_id)
relationship_node = {
'id': result.single()['r'].id,
'type': "sponsored",
**{key: value for key, value in result.single()['r'].items()}
}
neo4j_logger.info(f"Created sponsored relationship: {relationship_node}")
processed_legislation_count += 1
save_cache(cache)
# Print the total number of legislation items processed
print(f"Total processed legislation count: {processed_legislation_count}")
return jsonify({"message": "Sponsorship processing completed successfully", "processed_legislation_count": processed_legislation_count}), 200
except Exception as e:
process_sponsored_logger.error(f"Error processing sponsored legislation: {e}")
return jsonify({"error": str(e)}), 500