Compare commits

..

No commits in common. "faeea6e4e0acf7bb7525e33d9d8d1ddac9616ea7c51b8eea5b20ecb1f2038434" and "084ddc26d399e43601d2d985e39ddcbf22c1726b2bb49267454ea3876643bc01" have entirely different histories.

5 changed files with 196 additions and 314 deletions

View File

@ -1,62 +0,0 @@
# endpoints/create_cosponsored_relationship.py
from flask import Blueprint, jsonify
import logging
import csv
import os
from neo4j import GraphDatabase
from app import get_driver, neo4j_logger # Ensure relative imports work
bp = Blueprint('create_cosponsored_relationship', __name__)
@bp.route('/create_cosponsored_relationship', methods=['GET'])
def create_cosponsored_relationship():
try:
# Path to the CSV file
csv_file_path = os.path.join(os.path.dirname(__file__), '..', 'cosponsored_legislation.csv')
if not os.path.exists(csv_file_path):
return jsonify({"status": "error", "message": "CSV file not found"}), 404
driver = get_driver()
with driver.session() as session:
# Read the CSV data
with open(csv_file_path, mode='r', newline='', encoding='utf-8') as csv_file:
csv_reader = csv.DictReader(csv_file)
for row in csv_reader:
cosponsored_by = row.get('cosponsored_by')
number = row.get('number')
if cosponsored_by and number:
person_node = session.run(
"MATCH (p:Person {bioguideId: $bioguideId}) RETURN p",
{"bioguideId": cosponsored_by}
).single()
legislation_node = session.run(
"MATCH (l:Legislation {number: $number}) RETURN l",
{"number": number}
).single()
if person_node and legislation_node:
person = person_node['p']
legislation = legislation_node['l']
session.run(
"MATCH (p:Person), (l:Legislation) "
"WHERE id(p) = $person_id AND id(l) = $legislation_id "
"CREATE (p)-[:COSPONSORED]->(l)",
{"person_id": person.id, "legislation_id": legislation.id}
)
neo4j_logger.info(f"Created COSPONSORED relationship from Person {person['name']} to Legislation {legislation['title']}")
else:
if not person_node:
neo4j_logger.warning(f"No Person node found for bioguideId: {cosponsored_by}")
if not legislation_node:
neo4j_logger.warning(f"No Legislation node found for number: {number}")
return jsonify({"status": "success", "message": "COSPONSORED relationships created successfully"}), 201
except Exception as e:
neo4j_logger.error(f"Error creating COSPONSORED relationships: {e}")
return jsonify({"status": "error", "message": str(e)}), 500

View File

@ -1,43 +0,0 @@
# endpoints/create_sponsored_relationship.py
from flask import Blueprint, jsonify
from neo4j import GraphDatabase
import logging
from app import get_driver, neo4j_logger # Use absolute import
bp = Blueprint('create_sponsored_relationship', __name__)
@bp.route('/create_sponsored_relationship', methods=['GET'])
def create_sponsored_relationship():
try:
driver = get_driver()
with driver.session() as session:
# Step 3: Implement the endpoint logic
legislation_nodes = session.run("MATCH (l:Legislation) RETURN l")
for record in legislation_nodes:
legislation = record['l']
bioguide_id = legislation.get('sponsored_by')
if bioguide_id:
person_node = session.run(
"MATCH (p:Person {bioguideId: $bioguideId}) RETURN p",
{"bioguideId": bioguide_id}
).single()
if person_node:
person = person_node['p']
session.run(
"MATCH (p:Person), (l:Legislation) "
"WHERE id(p) = $person_id AND id(l) = $legislation_id "
"CREATE (p)-[:SPONSORED]->(l)",
{"person_id": person.id, "legislation_id": legislation.id}
)
neo4j_logger.info(f"Created SPONSORED relationship from Person {person['name']} to Legislation {legislation['title']}")
else:
neo4j_logger.warning(f"No Person node found for bioguideId: {bioguide_id}")
return jsonify({"status": "success", "message": "SPONSORED relationships created successfully"}), 201
except Exception as e:
neo4j_logger.error(f"Error creating SPONSORED relationships: {e}")
return jsonify({"status": "error", "message": str(e)}), 500

View File

@ -1,99 +0,0 @@
from flask import Blueprint, jsonify, request
import requests
import csv
import os
import logging
import json
bp = Blueprint('get_cosponsored', __name__)
# Function to retrieve bioguideIds from cache.json file
def get_bioguideIds_from_cache():
CACHE_PATH = os.getenv("CACHE_PATH")
if not CACHE_PATH:
logging.error("CACHE_PATH not found in .env file")
return None
if not os.path.exists(CACHE_PATH):
logging.error("Cache file not found at specified path")
return None
try:
with open(CACHE_PATH, 'r') as file:
cache_data = json.load(file)
bioguideIds = cache_data.get("bioguideIds", [])
if not bioguideIds:
logging.error("bioguideIds not found in cache.json")
return None
return bioguideIds
except Exception as e:
logging.error(f"Failed to read cache file: {str(e)}")
return None
# Function to flatten nested dictionaries and lists
def flatten_dict(d, parent_key='', sep='_'):
items = []
for k, v in d.items():
new_key = f"{parent_key}{sep}{k}" if parent_key else k
if isinstance(v, dict):
items.extend(flatten_dict(v, new_key, sep=sep).items())
elif isinstance(v, list):
for i, item in enumerate(v):
items.append((f"{new_key}_{i}", flatten_dict(item, '', sep=sep)))
else:
items.append((new_key, v if v is not None and v != "" else "NONE"))
return dict(items)
# Function to write data to CSV
def write_to_csv(data, filename):
keys = set()
for item in data:
keys.update(item.keys())
with open(filename, 'w', newline='') as output_file:
dict_writer = csv.DictWriter(output_file, fieldnames=keys)
dict_writer.writeheader()
dict_writer.writerows(data)
@bp.route('/get_cosponsored', methods=['GET'])
def get_cosponsored_legislation():
try:
# Retrieve bioguideIds from cache.json
bioguideIds = get_bioguideIds_from_cache()
if not bioguideIds:
return jsonify({"error": "bioguideIds not found"}), 404
all_data = []
for bioguideId in bioguideIds:
# Make request to Congress API
api_key = os.getenv("CONGRESS_API_KEY")
url = f"https://api.congress.gov/v3/member/{bioguideId}/cosponsored-legislation?api_key={api_key}"
response = requests.get(url)
if response.status_code != 200:
logging.error(f"Failed to retrieve cosponsored legislation for bioguideId {bioguideId}: {response.text}")
return jsonify({"error": f"Failed to retrieve data from Congress API for bioguideId {bioguideId}"}), 500
data = response.json().get("cosponsoredLegislation", [])
# Add cosponsored_by column and handle nested items
for item in data:
flattened_item = flatten_dict(item)
flattened_item["cosponsored_by"] = bioguideId
if not any(flattened_item.values()):
continue # Skip empty rows
all_data.append(flattened_item)
if not all_data:
return jsonify({"error": "No cosponsored legislation found for the given bioguideIds"}), 404
# Write data to CSV
csv_filename = f"cosponsored_legislation.csv"
write_to_csv(all_data, csv_filename)
return jsonify({"message": "Data written to CSV successfully", "filename": csv_filename})
except Exception as e:
logging.error(f"An error occurred: {str(e)}")
return jsonify({"error": str(e)}), 500

View File

@ -1,99 +1,144 @@
from flask import Blueprint, jsonify, request from flask import Blueprint, jsonify
import requests import requests
import csv
import os
import logging
import json import json
import os
import pandas as pd
# Assuming you have these functions and configurations in your app.py
from app import get_driver, neo4j_logger
bp = Blueprint('get_sponsored', __name__) bp = Blueprint('get_sponsored', __name__)
CACHE_FILE = 'cache.json'
CSV_FILE = 'legislation.csv'
# Function to retrieve bioguideIds from cache.json file def load_cache():
def get_bioguideIds_from_cache(): if os.path.exists(CACHE_FILE):
CACHE_PATH = os.getenv("CACHE_PATH") with open(CACHE_FILE, 'r') as f:
if not CACHE_PATH: return json.load(f)
logging.error("CACHE_PATH not found in .env file") else:
return None neo4j_logger.info(f"Cache file {CACHE_FILE} does not exist. Initializing empty cache.")
return {}
if not os.path.exists(CACHE_PATH): def save_cache(cache_data):
logging.error("Cache file not found at specified path") with open(CACHE_FILE, 'w') as f:
return None json.dump(cache_data, f)
neo4j_logger.info(f"Saved data to cache file: {CACHE_FILE}")
try: def write_to_csv(legislation_data, csv_file):
with open(CACHE_PATH, 'r') as file: # Flatten the nested dictionaries
cache_data = json.load(file) flattened_legislation = []
bioguideIds = cache_data.get("bioguideIds", []) for item in legislation_data:
if not bioguideIds: flattened_item = {}
logging.error("bioguideIds not found in cache.json") flatten_dict(item, "", flattened_item)
return None flattened_legislation.append(flattened_item)
return bioguideIds
except Exception as e: df = pd.DataFrame(flattened_legislation)
logging.error(f"Failed to read cache file: {str(e)}")
return None # Debugging: Print the first few entries of the DataFrame to inspect its structure
print("Debugging DataFrame:")
if not df.empty:
print(df.head())
else:
print("DataFrame is empty.")
if df.empty:
neo4j_logger.warning(f"No data to write to CSV file: {csv_file}")
print("DataFrame is empty. Debugging information:")
for item in flattened_legislation[:5]: # Print first 5 items
print(json.dumps(item, indent=4))
return
df.to_csv(csv_file, index=False)
neo4j_logger.info(f"Data written to CSV file: {csv_file}")
# Function to flatten nested dictionaries and lists
def flatten_dict(d, parent_key='', sep='_'): def flatten_dict(d, parent_key='', sep='_'):
items = [] items = []
for k, v in d.items(): for k, v in d.items():
new_key = f"{parent_key}{sep}{k}" if parent_key else k new_key = f"{parent_key}{sep}{k}" if parent_key else k
if isinstance(v, dict): if isinstance(v, dict):
# Debugging: Print the key and value when entering a nested dictionary
print(f"Entering nested dictionary with key: {new_key}")
items.extend(flatten_dict(v, new_key, sep=sep).items()) items.extend(flatten_dict(v, new_key, sep=sep).items())
elif isinstance(v, list): elif isinstance(v, list):
for i, item in enumerate(v): # Handle lists by converting them to strings or other appropriate representation
items.append((f"{new_key}_{i}", flatten_dict(item, '', sep=sep))) if v:
items.append((new_key, ', '.join(map(str, v))))
else:
# If the list is empty, add an empty string or a placeholder
items.append((new_key, ''))
elif v is not None:
# Debugging: Print the key and value when adding a non-dict, non-list item
print(f"Adding {new_key}: {v}")
items.append((new_key, v))
else: else:
items.append((new_key, v if v is not None and v != "" else "NONE")) # Handle None values appropriately (e.g., add an empty string or a placeholder)
items.append((new_key, ''))
return dict(items) return dict(items)
# Function to write data to CSV @bp.route('/get_sponsored')
def write_to_csv(data, filename): def get_sponsored():
keys = set() # Load bioguideIds and legislation from the same cache
for item in data: cache = load_cache()
keys.update(item.keys()) if 'bioguideIds' not in cache or len(cache['bioguideIds']) == 0:
return jsonify({"message": "No bioguideIds found in cache"}), 404
with open(filename, 'w', newline='') as output_file: # Print the number of items found in the cache initially
dict_writer = csv.DictWriter(output_file, fieldnames=keys) initial_bioguideIds_count = len(cache['bioguideIds'])
dict_writer.writeheader() print(f"Initial bioguideIds count: {initial_bioguideIds_count}")
dict_writer.writerows(data)
@bp.route('/get_sponsored', methods=['GET']) processed_legislation_count = 0
def get_sponsored_legislation(): all_legislation_data = []
try:
# Retrieve bioguideIds from cache.json
bioguideIds = get_bioguideIds_from_cache()
if not bioguideIds:
return jsonify({"error": "bioguideIds not found"}), 404
all_data = [] while 'bioguideIds' in cache and len(cache['bioguideIds']) > 0:
# Step 1: Retrieve a sponsor from the cache
current_bioguideId = cache['bioguideIds'].pop(0)
if current_bioguideId is None:
continue
for bioguideId in bioguideIds: print(f"Processing bioguideId: {current_bioguideId}")
# Make request to Congress API
api_key = os.getenv("CONGRESS_API_KEY")
url = f"https://api.congress.gov/v3/member/{bioguideId}/sponsored-legislation?api_key={api_key}"
response = requests.get(url)
if response.status_code != 200: congress_api_url = f"https://api.congress.gov/v3/member/{current_bioguideId}/sponsored-legislation"
logging.error(f"Failed to retrieve sponsored legislation for bioguideId {bioguideId}: {response.text}")
return jsonify({"error": f"Failed to retrieve data from Congress API for bioguideId {bioguideId}"}), 500
data = response.json().get("sponsoredLegislation", []) # Include API key in headers (if required)
api_key = os.getenv('CONGRESS_API_KEY')
if not api_key:
neo4j_logger.error("Congress API key not found in environment variables")
continue
# Add sponsored_by column and handle nested items headers = {
for item in data: 'X-API-KEY': api_key
flattened_item = flatten_dict(item) }
flattened_item["sponsored_by"] = bioguideId
if not any(flattened_item.values()):
continue # Skip empty rows
all_data.append(flattened_item)
if not all_data: # Step 2: Fetch sponsored legislation for the member
return jsonify({"error": "No sponsored legislation found for the given bioguideIds"}), 404 response = requests.get(congress_api_url, headers=headers)
print(f"Response Status Code: {response.status_code}")
print(f"Response Text: {response.text}")
# Write data to CSV if response.status_code != 200:
csv_filename = f"sponsored_legislation.csv" neo4j_logger.error(f"Failed to fetch sponsored legislation for bioguideId {current_bioguideId}: Status Code {response.status_code}, Response: {response.text}")
write_to_csv(all_data, csv_filename) continue
return jsonify({"message": "Data written to CSV successfully", "filename": csv_filename}) response_data = response.json()
except Exception as e: # Debugging statement to check the raw API response
logging.error(f"An error occurred: {str(e)}") print("Raw API Response:")
return jsonify({"error": str(e)}), 500 print(json.dumps(response_data, indent=4))
# Extract legislation data from the response
if 'sponsoredLegislation' in response_data and len(response_data['sponsoredLegislation']) > 0:
for result in response_data['sponsoredLegislation']:
all_legislation_data.append(result)
# Debugging statement to check the number of legislation items collected
print(f"Number of legislation items collected: {len(all_legislation_data)}")
if len(all_legislation_data) > 0:
# Print first few items to ensure data is structured correctly
for i, item in enumerate(all_legislation_data[:5]):
print(f"\nLegislation Item {i+1}:")
print(json.dumps(item, indent=4))
# Write the extracted legislation data to a CSV file
write_to_csv(all_legislation_data, CSV_FILE)
return jsonify({"message": "Legislation data written to CSV successfully"}), 200

View File

@ -1,54 +1,95 @@
from flask import Blueprint, jsonify, request # endpoints/process_sponsors.py
from flask import Blueprint, jsonify
from app import get_driver, neo4j_logger
import json
import os import os
import csv
from neo4j import GraphDatabase
import logging
bp = Blueprint('process_sponsored', __name__) bp = Blueprint('process_sponsors', __name__)
# Custom logger for the process_sponsored blueprint CACHE_FILE = 'cache.json'
process_sponsored_logger = logging.getLogger('ProcessSponsoredLogger')
process_sponsored_logger.setLevel(logging.INFO)
process_sponsored_handler = logging.StreamHandler()
process_sponsored_formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
process_sponsored_handler.setFormatter(process_sponsored_formatter)
process_sponsored_logger.addHandler(process_sponsored_handler)
@bp.route('/process_sponsored', methods=['GET']) def load_cache():
def process_sponsored(): if os.path.exists(CACHE_FILE):
csv_file_path = os.getenv("SPONSORED_LEGISLATION_CSV") with open(CACHE_FILE, 'r') as f:
return json.load(f)
return {}
if not csv_file_path: def save_cache(cache_data):
return jsonify({"error": "SPONSORED_LEGISLATION_CSV environment variable is not set"}), 400 with open(CACHE_FILE, 'w') as f:
json.dump(cache_data, f)
try: @bp.route('/process_sponsors')
with open(csv_file_path, mode='r', newline='', encoding='utf-8') as file: def process_sponsors():
reader = csv.DictReader(file) cache = load_cache()
driver = GraphDatabase.driver(os.getenv("NEO4J_URI"), auth=(os.getenv("NEO4J_USER"), os.getenv("NEO4J_PASSWORD")))
session = driver.session()
for row in reader: if 'legislation_entries' not in cache or len(cache['legislation_entries']) == 0:
properties = {key: value.strip() if isinstance(value, str) else value for key, value in row.items()} return jsonify({"message": "No legislation entries found in cache"}), 404
# Log the CSV row # Print the number of items found in the cache initially
process_sponsored_logger.info(f"Processing row: {properties}") initial_legislation_entries_count = len(cache['legislation_entries'])
print(f"Initial legislation entries count: {initial_legislation_entries_count}")
query = ( processed_legislation_count = 0
"MERGE (l:legislation {"
+ ", ".join(f"{key}: $props.{key}" for key in properties)
+ "})"
)
# Log the MERGE query while 'legislation_entries' in cache and len(cache['legislation_entries']) > 0:
process_sponsored_logger.info(f"Executing query: {query}") # Step 1: Retrieve a legislation entry from the cache
legislation_entry = cache['legislation_entries'].pop(0)
session.run(query, props=properties) if not legislation_entry or 'bioguideId' not in legislation_entry:
continue
session.close() bioguideId = legislation_entry['bioguideId']
driver.close() legislation_properties = {key: value for key, value in legislation_entry.items() if key != 'bioguideId'}
return jsonify({"message": f"Processed {reader.line_num - 1} sponsored legislations"}), 200 # Step 2: Create a legislation node with the properties
driver = get_driver()
with driver.session() as session:
legislation_id = legislation_properties['id']
query = f"MATCH (n:Legislation {{id: $legislation_id}}) RETURN n"
neo4j_logger.info(f"Executing query: {query} with params: {{legislation_id: '{legislation_id}'}}")
except Exception as e: existing_legislation = session.run(query, legislation_id=legislation_id).single()
process_sponsored_logger.error(f"Error processing sponsored legislation: {e}") if not existing_legislation:
return jsonify({"error": str(e)}), 500 properties = ', '.join([f'{key}: "${key}"' for key in legislation_properties])
query = f"CREATE (n:Legislation {{{properties}}}) RETURN n"
neo4j_logger.info(f"Executing query: {query} with data: {legislation_properties}")
result = session.run(query, **legislation_properties)
# Convert the created node to a dictionary
new_legislation_node = {
'id': result.single()['n'].id,
'labels': list(result.single()['n'].labels),
**{key: value for key, value in result.single()['n'].items()}
}
neo4j_logger.info(f"Created legislation node: {new_legislation_node}")
# Step 3: Create a relationship of type "sponsored" from the sponsor to the legislation
with driver.session() as session:
person_query = f"MATCH (a:Person {{bioguideId: $bioguideId}}) RETURN a"
neo4j_logger.info(f"Executing query: {person_query} with params: {{bioguideId: '{bioguideId}'}}")
sponsor_node = session.run(person_query, bioguideId=bioguideId).single()
if not sponsor_node:
neo4j_logger.error(f"Person node does not exist for bioguideId {bioguideId}")
continue
legislation_id = legislation_properties['id']
relationship_query = f"MATCH (a:Person {{bioguideId: $bioguideId}}), (b:Legislation {{id: $legislation_id}}) CREATE (a)-[r:sponsored]->(b) RETURN r"
neo4j_logger.info(f"Executing query: {relationship_query} with params: {{bioguideId: '{bioguideId}', legislation_id: '{legislation_id}'}}")
result = session.run(relationship_query, bioguideId=bioguideId, legislation_id=legislation_id)
relationship_node = {
'id': result.single()['r'].id,
'type': "sponsored",
**{key: value for key, value in result.single()['r'].items()}
}
neo4j_logger.info(f"Created sponsored relationship: {relationship_node}")
processed_legislation_count += 1
save_cache(cache)
# Print the total number of legislation items processed
print(f"Total processed legislation count: {processed_legislation_count}")
return jsonify({"message": "Sponsorship processing completed successfully", "processed_legislation_count": processed_legislation_count}), 200