checkpoint
This commit is contained in:
parent
207449ee26
commit
97c96ed0fc
@ -1,12 +1,15 @@
|
||||
from flask import Blueprint, jsonify
|
||||
from app import get_driver, neo4j_logger
|
||||
import requests
|
||||
import json
|
||||
import os
|
||||
import pandas as pd
|
||||
|
||||
# Assuming you have these functions and configurations in your app.py
|
||||
from app import get_driver, neo4j_logger
|
||||
|
||||
bp = Blueprint('get_sponsored', __name__)
|
||||
|
||||
CACHE_FILE = 'cache.json'
|
||||
CSV_FILE = 'legislation.csv'
|
||||
|
||||
def load_cache():
|
||||
if os.path.exists(CACHE_FILE):
|
||||
@ -21,6 +24,53 @@ def save_cache(cache_data):
|
||||
json.dump(cache_data, f)
|
||||
neo4j_logger.info(f"Saved data to cache file: {CACHE_FILE}")
|
||||
|
||||
def write_to_csv(legislation_data, csv_file):
|
||||
# Flatten the nested dictionaries
|
||||
flattened_legislation = []
|
||||
for item in legislation_data:
|
||||
flattened_item = {}
|
||||
flatten_dict(item, "", flattened_item)
|
||||
flattened_legislation.append(flattened_item)
|
||||
|
||||
df = pd.DataFrame(flattened_legislation)
|
||||
if df.empty:
|
||||
neo4j_logger.warning(f"No data to write to CSV file: {csv_file}")
|
||||
print("DataFrame is empty. Debugging information:")
|
||||
for item in flattened_legislation[:5]: # Print first 5 items
|
||||
print(json.dumps(item, indent=4))
|
||||
return
|
||||
|
||||
# Debugging statement to check DataFrame contents
|
||||
print("DataFrame contents:")
|
||||
print(df.head())
|
||||
|
||||
df.to_csv(csv_file, index=False)
|
||||
neo4j_logger.info(f"Data written to CSV file: {csv_file}")
|
||||
|
||||
def flatten_dict(d, parent_key='', sep='_'):
|
||||
items = []
|
||||
for k, v in d.items():
|
||||
new_key = f"{parent_key}{sep}{k}" if parent_key else k
|
||||
if isinstance(v, dict):
|
||||
# Debugging: Print the key and value when entering a nested dictionary
|
||||
print(f"Entering nested dictionary with key: {new_key}")
|
||||
items.extend(flatten_dict(v, new_key, sep=sep).items())
|
||||
elif isinstance(v, list):
|
||||
# Handle lists by converting them to strings or other appropriate representation
|
||||
if v:
|
||||
items.append((new_key, ', '.join(map(str, v))))
|
||||
else:
|
||||
# If the list is empty, add an empty string or a placeholder
|
||||
items.append((new_key, ''))
|
||||
elif v is not None:
|
||||
# Debugging: Print the key and value when adding a non-dict, non-list item
|
||||
print(f"Adding {new_key}: {v}")
|
||||
items.append((new_key, v))
|
||||
else:
|
||||
# Handle None values appropriately (e.g., add an empty string or a placeholder)
|
||||
items.append((new_key, ''))
|
||||
return dict(items)
|
||||
|
||||
@bp.route('/get_sponsored')
|
||||
def get_sponsored():
|
||||
# Load bioguideIds and legislation from the same cache
|
||||
@ -31,13 +81,16 @@ def get_sponsored():
|
||||
# Print the number of items found in the cache initially
|
||||
initial_bioguideIds_count = len(cache['bioguideIds'])
|
||||
print(f"Initial bioguideIds count: {initial_bioguideIds_count}")
|
||||
|
||||
processed_legislation_count = 0
|
||||
all_legislation_data = []
|
||||
|
||||
while 'bioguideIds' in cache and len(cache['bioguideIds']) > 0:
|
||||
# Step 1: Retrieve a sponsor from the cache
|
||||
current_bioguideId = cache['bioguideIds'].pop(0)
|
||||
if current_bioguideId is None:
|
||||
continue
|
||||
|
||||
print(f"Processing bioguideId: {current_bioguideId}")
|
||||
|
||||
congress_api_url = f"https://api.congress.gov/v3/member/{current_bioguideId}/sponsored-legislation"
|
||||
@ -63,68 +116,25 @@ def get_sponsored():
|
||||
|
||||
response_data = response.json()
|
||||
|
||||
# Ensure the response contains 'sponsoredLegislation' key
|
||||
if 'sponsoredLegislation' not in response_data:
|
||||
neo4j_logger.error(f"Missing 'sponsoredLegislation' key in response for bioguideId {current_bioguideId}: {response_data}")
|
||||
continue
|
||||
# Debugging statement to check the raw API response
|
||||
print("Raw API Response:")
|
||||
print(json.dumps(response_data, indent=4))
|
||||
|
||||
sponsored_legislation = response_data['sponsoredLegislation']
|
||||
# Extract legislation data from the response
|
||||
if 'sponsoredLegislation' in response_data and len(response_data['sponsoredLegislation']) > 0:
|
||||
for result in response_data['sponsoredLegislation']:
|
||||
all_legislation_data.append(result)
|
||||
|
||||
# Step 3: Store each piece of legislation in the cache along with the sponsor bioguideId
|
||||
for legislation in sponsored_legislation:
|
||||
if 'number' not in legislation:
|
||||
neo4j_logger.error(f"Missing 'number' field in legislation data for bioguideId {current_bioguideId}: {legislation}")
|
||||
continue
|
||||
# Debugging statement to check the number of legislation items collected
|
||||
print(f"Number of legislation items collected: {len(all_legislation_data)}")
|
||||
|
||||
key = f"legislation_{legislation['number']}"
|
||||
print(f"About to write to cache: {key} - {legislation}")
|
||||
if len(all_legislation_data) > 0:
|
||||
# Print first few items to ensure data is structured correctly
|
||||
for i, item in enumerate(all_legislation_data[:5]):
|
||||
print(f"\nLegislation Item {i+1}:")
|
||||
print(json.dumps(item, indent=4))
|
||||
|
||||
if key not in cache:
|
||||
# Ensure all nested dictionaries are handled properly
|
||||
latest_action = legislation.get('latestAction')
|
||||
# Write the extracted legislation data to a CSV file
|
||||
write_to_csv(all_legislation_data, CSV_FILE)
|
||||
|
||||
# Check if latest_action is None before calling .get()
|
||||
if latest_action is None:
|
||||
latest_action = {}
|
||||
|
||||
policy_area = legislation.get('policyArea', {})
|
||||
|
||||
legislation_info = {
|
||||
'bioguideId': current_bioguideId,
|
||||
**legislation,
|
||||
'latestAction': {
|
||||
'actionDate': latest_action.get('actionDate'),
|
||||
'text': latest_action.get('text')
|
||||
},
|
||||
'policyArea': {
|
||||
'name': policy_area.get('name')
|
||||
}
|
||||
}
|
||||
|
||||
cache[key] = legislation_info
|
||||
processed_legislation_count += 1
|
||||
|
||||
# Save the cache immediately after writing each entry
|
||||
save_cache(cache)
|
||||
|
||||
# Print the updated legislation cache for debugging
|
||||
print(f"Updated legislation cache: {json.dumps(cache, indent=2)}")
|
||||
|
||||
# Step 4: Delete the sponsor from the bioguideIds list (already done by popping)
|
||||
neo4j_logger.info(f"Processed sponsored legislation for bioguideId {current_bioguideId}")
|
||||
|
||||
# Print the number of items left in the bioguide cache
|
||||
remaining_bioguideIds_count = len(cache['bioguideIds'])
|
||||
print(f"Remaining bioguideIds count after processing {current_bioguideId}: {remaining_bioguideIds_count}")
|
||||
|
||||
# Save the cache again to ensure all changes are persisted
|
||||
save_cache(cache)
|
||||
|
||||
# Print the total number of legislation items stored and overall items added to the cache
|
||||
print(f"Total processed legislation count: {processed_legislation_count}")
|
||||
print(f"Overall items in cache: {len(cache)}")
|
||||
|
||||
return jsonify({
|
||||
"message": "Sponsored legislation processed successfully",
|
||||
"processed_legislation_count": processed_legislation_count
|
||||
}), 200
|
||||
return jsonify({"message": "Legislation data written to CSV successfully"}), 200
|
||||
|
Loading…
Reference in New Issue
Block a user