Compare commits

...

3 Commits

5 changed files with 314 additions and 196 deletions

View File

@ -0,0 +1,62 @@
# endpoints/create_cosponsored_relationship.py
from flask import Blueprint, jsonify
import logging
import csv
import os
from neo4j import GraphDatabase
from app import get_driver, neo4j_logger # Ensure relative imports work
bp = Blueprint('create_cosponsored_relationship', __name__)
@bp.route('/create_cosponsored_relationship', methods=['GET'])
def create_cosponsored_relationship():
try:
# Path to the CSV file
csv_file_path = os.path.join(os.path.dirname(__file__), '..', 'cosponsored_legislation.csv')
if not os.path.exists(csv_file_path):
return jsonify({"status": "error", "message": "CSV file not found"}), 404
driver = get_driver()
with driver.session() as session:
# Read the CSV data
with open(csv_file_path, mode='r', newline='', encoding='utf-8') as csv_file:
csv_reader = csv.DictReader(csv_file)
for row in csv_reader:
cosponsored_by = row.get('cosponsored_by')
number = row.get('number')
if cosponsored_by and number:
person_node = session.run(
"MATCH (p:Person {bioguideId: $bioguideId}) RETURN p",
{"bioguideId": cosponsored_by}
).single()
legislation_node = session.run(
"MATCH (l:Legislation {number: $number}) RETURN l",
{"number": number}
).single()
if person_node and legislation_node:
person = person_node['p']
legislation = legislation_node['l']
session.run(
"MATCH (p:Person), (l:Legislation) "
"WHERE id(p) = $person_id AND id(l) = $legislation_id "
"CREATE (p)-[:COSPONSORED]->(l)",
{"person_id": person.id, "legislation_id": legislation.id}
)
neo4j_logger.info(f"Created COSPONSORED relationship from Person {person['name']} to Legislation {legislation['title']}")
else:
if not person_node:
neo4j_logger.warning(f"No Person node found for bioguideId: {cosponsored_by}")
if not legislation_node:
neo4j_logger.warning(f"No Legislation node found for number: {number}")
return jsonify({"status": "success", "message": "COSPONSORED relationships created successfully"}), 201
except Exception as e:
neo4j_logger.error(f"Error creating COSPONSORED relationships: {e}")
return jsonify({"status": "error", "message": str(e)}), 500

View File

@ -0,0 +1,43 @@
# endpoints/create_sponsored_relationship.py
from flask import Blueprint, jsonify
from neo4j import GraphDatabase
import logging
from app import get_driver, neo4j_logger # Use absolute import
bp = Blueprint('create_sponsored_relationship', __name__)
@bp.route('/create_sponsored_relationship', methods=['GET'])
def create_sponsored_relationship():
try:
driver = get_driver()
with driver.session() as session:
# Step 3: Implement the endpoint logic
legislation_nodes = session.run("MATCH (l:Legislation) RETURN l")
for record in legislation_nodes:
legislation = record['l']
bioguide_id = legislation.get('sponsored_by')
if bioguide_id:
person_node = session.run(
"MATCH (p:Person {bioguideId: $bioguideId}) RETURN p",
{"bioguideId": bioguide_id}
).single()
if person_node:
person = person_node['p']
session.run(
"MATCH (p:Person), (l:Legislation) "
"WHERE id(p) = $person_id AND id(l) = $legislation_id "
"CREATE (p)-[:SPONSORED]->(l)",
{"person_id": person.id, "legislation_id": legislation.id}
)
neo4j_logger.info(f"Created SPONSORED relationship from Person {person['name']} to Legislation {legislation['title']}")
else:
neo4j_logger.warning(f"No Person node found for bioguideId: {bioguide_id}")
return jsonify({"status": "success", "message": "SPONSORED relationships created successfully"}), 201
except Exception as e:
neo4j_logger.error(f"Error creating SPONSORED relationships: {e}")
return jsonify({"status": "error", "message": str(e)}), 500

View File

@ -0,0 +1,99 @@
from flask import Blueprint, jsonify, request
import requests
import csv
import os
import logging
import json
bp = Blueprint('get_cosponsored', __name__)
# Function to retrieve bioguideIds from cache.json file
def get_bioguideIds_from_cache():
CACHE_PATH = os.getenv("CACHE_PATH")
if not CACHE_PATH:
logging.error("CACHE_PATH not found in .env file")
return None
if not os.path.exists(CACHE_PATH):
logging.error("Cache file not found at specified path")
return None
try:
with open(CACHE_PATH, 'r') as file:
cache_data = json.load(file)
bioguideIds = cache_data.get("bioguideIds", [])
if not bioguideIds:
logging.error("bioguideIds not found in cache.json")
return None
return bioguideIds
except Exception as e:
logging.error(f"Failed to read cache file: {str(e)}")
return None
# Function to flatten nested dictionaries and lists
def flatten_dict(d, parent_key='', sep='_'):
items = []
for k, v in d.items():
new_key = f"{parent_key}{sep}{k}" if parent_key else k
if isinstance(v, dict):
items.extend(flatten_dict(v, new_key, sep=sep).items())
elif isinstance(v, list):
for i, item in enumerate(v):
items.append((f"{new_key}_{i}", flatten_dict(item, '', sep=sep)))
else:
items.append((new_key, v if v is not None and v != "" else "NONE"))
return dict(items)
# Function to write data to CSV
def write_to_csv(data, filename):
keys = set()
for item in data:
keys.update(item.keys())
with open(filename, 'w', newline='') as output_file:
dict_writer = csv.DictWriter(output_file, fieldnames=keys)
dict_writer.writeheader()
dict_writer.writerows(data)
@bp.route('/get_cosponsored', methods=['GET'])
def get_cosponsored_legislation():
try:
# Retrieve bioguideIds from cache.json
bioguideIds = get_bioguideIds_from_cache()
if not bioguideIds:
return jsonify({"error": "bioguideIds not found"}), 404
all_data = []
for bioguideId in bioguideIds:
# Make request to Congress API
api_key = os.getenv("CONGRESS_API_KEY")
url = f"https://api.congress.gov/v3/member/{bioguideId}/cosponsored-legislation?api_key={api_key}"
response = requests.get(url)
if response.status_code != 200:
logging.error(f"Failed to retrieve cosponsored legislation for bioguideId {bioguideId}: {response.text}")
return jsonify({"error": f"Failed to retrieve data from Congress API for bioguideId {bioguideId}"}), 500
data = response.json().get("cosponsoredLegislation", [])
# Add cosponsored_by column and handle nested items
for item in data:
flattened_item = flatten_dict(item)
flattened_item["cosponsored_by"] = bioguideId
if not any(flattened_item.values()):
continue # Skip empty rows
all_data.append(flattened_item)
if not all_data:
return jsonify({"error": "No cosponsored legislation found for the given bioguideIds"}), 404
# Write data to CSV
csv_filename = f"cosponsored_legislation.csv"
write_to_csv(all_data, csv_filename)
return jsonify({"message": "Data written to CSV successfully", "filename": csv_filename})
except Exception as e:
logging.error(f"An error occurred: {str(e)}")
return jsonify({"error": str(e)}), 500

View File

@ -1,144 +1,99 @@
from flask import Blueprint, jsonify from flask import Blueprint, jsonify, request
import requests import requests
import json import csv
import os import os
import pandas as pd import logging
import json
# Assuming you have these functions and configurations in your app.py
from app import get_driver, neo4j_logger
bp = Blueprint('get_sponsored', __name__) bp = Blueprint('get_sponsored', __name__)
CACHE_FILE = 'cache.json'
CSV_FILE = 'legislation.csv'
def load_cache(): # Function to retrieve bioguideIds from cache.json file
if os.path.exists(CACHE_FILE): def get_bioguideIds_from_cache():
with open(CACHE_FILE, 'r') as f: CACHE_PATH = os.getenv("CACHE_PATH")
return json.load(f) if not CACHE_PATH:
else: logging.error("CACHE_PATH not found in .env file")
neo4j_logger.info(f"Cache file {CACHE_FILE} does not exist. Initializing empty cache.") return None
return {}
def save_cache(cache_data): if not os.path.exists(CACHE_PATH):
with open(CACHE_FILE, 'w') as f: logging.error("Cache file not found at specified path")
json.dump(cache_data, f) return None
neo4j_logger.info(f"Saved data to cache file: {CACHE_FILE}")
def write_to_csv(legislation_data, csv_file): try:
# Flatten the nested dictionaries with open(CACHE_PATH, 'r') as file:
flattened_legislation = [] cache_data = json.load(file)
for item in legislation_data: bioguideIds = cache_data.get("bioguideIds", [])
flattened_item = {} if not bioguideIds:
flatten_dict(item, "", flattened_item) logging.error("bioguideIds not found in cache.json")
flattened_legislation.append(flattened_item) return None
return bioguideIds
df = pd.DataFrame(flattened_legislation) except Exception as e:
logging.error(f"Failed to read cache file: {str(e)}")
# Debugging: Print the first few entries of the DataFrame to inspect its structure return None
print("Debugging DataFrame:")
if not df.empty:
print(df.head())
else:
print("DataFrame is empty.")
if df.empty:
neo4j_logger.warning(f"No data to write to CSV file: {csv_file}")
print("DataFrame is empty. Debugging information:")
for item in flattened_legislation[:5]: # Print first 5 items
print(json.dumps(item, indent=4))
return
df.to_csv(csv_file, index=False)
neo4j_logger.info(f"Data written to CSV file: {csv_file}")
# Function to flatten nested dictionaries and lists
def flatten_dict(d, parent_key='', sep='_'): def flatten_dict(d, parent_key='', sep='_'):
items = [] items = []
for k, v in d.items(): for k, v in d.items():
new_key = f"{parent_key}{sep}{k}" if parent_key else k new_key = f"{parent_key}{sep}{k}" if parent_key else k
if isinstance(v, dict): if isinstance(v, dict):
# Debugging: Print the key and value when entering a nested dictionary
print(f"Entering nested dictionary with key: {new_key}")
items.extend(flatten_dict(v, new_key, sep=sep).items()) items.extend(flatten_dict(v, new_key, sep=sep).items())
elif isinstance(v, list): elif isinstance(v, list):
# Handle lists by converting them to strings or other appropriate representation for i, item in enumerate(v):
if v: items.append((f"{new_key}_{i}", flatten_dict(item, '', sep=sep)))
items.append((new_key, ', '.join(map(str, v))))
else:
# If the list is empty, add an empty string or a placeholder
items.append((new_key, ''))
elif v is not None:
# Debugging: Print the key and value when adding a non-dict, non-list item
print(f"Adding {new_key}: {v}")
items.append((new_key, v))
else: else:
# Handle None values appropriately (e.g., add an empty string or a placeholder) items.append((new_key, v if v is not None and v != "" else "NONE"))
items.append((new_key, ''))
return dict(items) return dict(items)
@bp.route('/get_sponsored') # Function to write data to CSV
def get_sponsored(): def write_to_csv(data, filename):
# Load bioguideIds and legislation from the same cache keys = set()
cache = load_cache() for item in data:
if 'bioguideIds' not in cache or len(cache['bioguideIds']) == 0: keys.update(item.keys())
return jsonify({"message": "No bioguideIds found in cache"}), 404
# Print the number of items found in the cache initially with open(filename, 'w', newline='') as output_file:
initial_bioguideIds_count = len(cache['bioguideIds']) dict_writer = csv.DictWriter(output_file, fieldnames=keys)
print(f"Initial bioguideIds count: {initial_bioguideIds_count}") dict_writer.writeheader()
dict_writer.writerows(data)
processed_legislation_count = 0 @bp.route('/get_sponsored', methods=['GET'])
all_legislation_data = [] def get_sponsored_legislation():
try:
# Retrieve bioguideIds from cache.json
bioguideIds = get_bioguideIds_from_cache()
if not bioguideIds:
return jsonify({"error": "bioguideIds not found"}), 404
while 'bioguideIds' in cache and len(cache['bioguideIds']) > 0: all_data = []
# Step 1: Retrieve a sponsor from the cache
current_bioguideId = cache['bioguideIds'].pop(0)
if current_bioguideId is None:
continue
print(f"Processing bioguideId: {current_bioguideId}") for bioguideId in bioguideIds:
# Make request to Congress API
api_key = os.getenv("CONGRESS_API_KEY")
url = f"https://api.congress.gov/v3/member/{bioguideId}/sponsored-legislation?api_key={api_key}"
response = requests.get(url)
congress_api_url = f"https://api.congress.gov/v3/member/{current_bioguideId}/sponsored-legislation" if response.status_code != 200:
logging.error(f"Failed to retrieve sponsored legislation for bioguideId {bioguideId}: {response.text}")
return jsonify({"error": f"Failed to retrieve data from Congress API for bioguideId {bioguideId}"}), 500
# Include API key in headers (if required) data = response.json().get("sponsoredLegislation", [])
api_key = os.getenv('CONGRESS_API_KEY')
if not api_key:
neo4j_logger.error("Congress API key not found in environment variables")
continue
headers = { # Add sponsored_by column and handle nested items
'X-API-KEY': api_key for item in data:
} flattened_item = flatten_dict(item)
flattened_item["sponsored_by"] = bioguideId
if not any(flattened_item.values()):
continue # Skip empty rows
all_data.append(flattened_item)
# Step 2: Fetch sponsored legislation for the member if not all_data:
response = requests.get(congress_api_url, headers=headers) return jsonify({"error": "No sponsored legislation found for the given bioguideIds"}), 404
print(f"Response Status Code: {response.status_code}")
print(f"Response Text: {response.text}")
if response.status_code != 200: # Write data to CSV
neo4j_logger.error(f"Failed to fetch sponsored legislation for bioguideId {current_bioguideId}: Status Code {response.status_code}, Response: {response.text}") csv_filename = f"sponsored_legislation.csv"
continue write_to_csv(all_data, csv_filename)
response_data = response.json() return jsonify({"message": "Data written to CSV successfully", "filename": csv_filename})
# Debugging statement to check the raw API response except Exception as e:
print("Raw API Response:") logging.error(f"An error occurred: {str(e)}")
print(json.dumps(response_data, indent=4)) return jsonify({"error": str(e)}), 500
# Extract legislation data from the response
if 'sponsoredLegislation' in response_data and len(response_data['sponsoredLegislation']) > 0:
for result in response_data['sponsoredLegislation']:
all_legislation_data.append(result)
# Debugging statement to check the number of legislation items collected
print(f"Number of legislation items collected: {len(all_legislation_data)}")
if len(all_legislation_data) > 0:
# Print first few items to ensure data is structured correctly
for i, item in enumerate(all_legislation_data[:5]):
print(f"\nLegislation Item {i+1}:")
print(json.dumps(item, indent=4))
# Write the extracted legislation data to a CSV file
write_to_csv(all_legislation_data, CSV_FILE)
return jsonify({"message": "Legislation data written to CSV successfully"}), 200

View File

@ -1,95 +1,54 @@
# endpoints/process_sponsors.py from flask import Blueprint, jsonify, request
from flask import Blueprint, jsonify
from app import get_driver, neo4j_logger
import json
import os import os
import csv
from neo4j import GraphDatabase
import logging
bp = Blueprint('process_sponsors', __name__) bp = Blueprint('process_sponsored', __name__)
CACHE_FILE = 'cache.json' # Custom logger for the process_sponsored blueprint
process_sponsored_logger = logging.getLogger('ProcessSponsoredLogger')
process_sponsored_logger.setLevel(logging.INFO)
process_sponsored_handler = logging.StreamHandler()
process_sponsored_formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
process_sponsored_handler.setFormatter(process_sponsored_formatter)
process_sponsored_logger.addHandler(process_sponsored_handler)
def load_cache(): @bp.route('/process_sponsored', methods=['GET'])
if os.path.exists(CACHE_FILE): def process_sponsored():
with open(CACHE_FILE, 'r') as f: csv_file_path = os.getenv("SPONSORED_LEGISLATION_CSV")
return json.load(f)
return {}
def save_cache(cache_data): if not csv_file_path:
with open(CACHE_FILE, 'w') as f: return jsonify({"error": "SPONSORED_LEGISLATION_CSV environment variable is not set"}), 400
json.dump(cache_data, f)
@bp.route('/process_sponsors') try:
def process_sponsors(): with open(csv_file_path, mode='r', newline='', encoding='utf-8') as file:
cache = load_cache() reader = csv.DictReader(file)
driver = GraphDatabase.driver(os.getenv("NEO4J_URI"), auth=(os.getenv("NEO4J_USER"), os.getenv("NEO4J_PASSWORD")))
session = driver.session()
if 'legislation_entries' not in cache or len(cache['legislation_entries']) == 0: for row in reader:
return jsonify({"message": "No legislation entries found in cache"}), 404 properties = {key: value.strip() if isinstance(value, str) else value for key, value in row.items()}
# Print the number of items found in the cache initially # Log the CSV row
initial_legislation_entries_count = len(cache['legislation_entries']) process_sponsored_logger.info(f"Processing row: {properties}")
print(f"Initial legislation entries count: {initial_legislation_entries_count}")
processed_legislation_count = 0 query = (
"MERGE (l:legislation {"
+ ", ".join(f"{key}: $props.{key}" for key in properties)
+ "})"
)
while 'legislation_entries' in cache and len(cache['legislation_entries']) > 0: # Log the MERGE query
# Step 1: Retrieve a legislation entry from the cache process_sponsored_logger.info(f"Executing query: {query}")
legislation_entry = cache['legislation_entries'].pop(0)
if not legislation_entry or 'bioguideId' not in legislation_entry: session.run(query, props=properties)
continue
bioguideId = legislation_entry['bioguideId'] session.close()
legislation_properties = {key: value for key, value in legislation_entry.items() if key != 'bioguideId'} driver.close()
# Step 2: Create a legislation node with the properties return jsonify({"message": f"Processed {reader.line_num - 1} sponsored legislations"}), 200
driver = get_driver()
with driver.session() as session:
legislation_id = legislation_properties['id']
query = f"MATCH (n:Legislation {{id: $legislation_id}}) RETURN n"
neo4j_logger.info(f"Executing query: {query} with params: {{legislation_id: '{legislation_id}'}}")
existing_legislation = session.run(query, legislation_id=legislation_id).single() except Exception as e:
if not existing_legislation: process_sponsored_logger.error(f"Error processing sponsored legislation: {e}")
properties = ', '.join([f'{key}: "${key}"' for key in legislation_properties]) return jsonify({"error": str(e)}), 500
query = f"CREATE (n:Legislation {{{properties}}}) RETURN n"
neo4j_logger.info(f"Executing query: {query} with data: {legislation_properties}")
result = session.run(query, **legislation_properties)
# Convert the created node to a dictionary
new_legislation_node = {
'id': result.single()['n'].id,
'labels': list(result.single()['n'].labels),
**{key: value for key, value in result.single()['n'].items()}
}
neo4j_logger.info(f"Created legislation node: {new_legislation_node}")
# Step 3: Create a relationship of type "sponsored" from the sponsor to the legislation
with driver.session() as session:
person_query = f"MATCH (a:Person {{bioguideId: $bioguideId}}) RETURN a"
neo4j_logger.info(f"Executing query: {person_query} with params: {{bioguideId: '{bioguideId}'}}")
sponsor_node = session.run(person_query, bioguideId=bioguideId).single()
if not sponsor_node:
neo4j_logger.error(f"Person node does not exist for bioguideId {bioguideId}")
continue
legislation_id = legislation_properties['id']
relationship_query = f"MATCH (a:Person {{bioguideId: $bioguideId}}), (b:Legislation {{id: $legislation_id}}) CREATE (a)-[r:sponsored]->(b) RETURN r"
neo4j_logger.info(f"Executing query: {relationship_query} with params: {{bioguideId: '{bioguideId}', legislation_id: '{legislation_id}'}}")
result = session.run(relationship_query, bioguideId=bioguideId, legislation_id=legislation_id)
relationship_node = {
'id': result.single()['r'].id,
'type': "sponsored",
**{key: value for key, value in result.single()['r'].items()}
}
neo4j_logger.info(f"Created sponsored relationship: {relationship_node}")
processed_legislation_count += 1
save_cache(cache)
# Print the total number of legislation items processed
print(f"Total processed legislation count: {processed_legislation_count}")
return jsonify({"message": "Sponsorship processing completed successfully", "processed_legislation_count": processed_legislation_count}), 200