policymap/add_csv.py

55 lines
2.2 KiB
Python
Raw Normal View History

2025-03-01 22:11:51 -08:00
import pandas as pd
from neo4j import GraphDatabase
import argparse
import os
from dotenv import load_dotenv
# Load environment variables from .env file
load_dotenv()
def create_neo4j_session(uri, user, password):
return GraphDatabase.driver(uri, auth=(user, password))
2025-03-06 14:29:01 -08:00
def create_or_merge_node(tx, node_type, properties):
# Create a Cypher query to merge a node with given type and properties
2025-03-01 22:11:51 -08:00
properties_string = ', '.join([f"{key}: '{value}'" for key, value in properties.items()])
2025-03-06 14:29:01 -08:00
query = f"MERGE (n:{node_type} {{{properties_string}}})"
2025-03-01 22:11:51 -08:00
tx.run(query)
def main(csv_file_path):
# Read environment variables
NEO4J_URI = os.getenv("NEO4J_URI")
NEO4J_USER = os.getenv("NEO4J_USER")
NEO4J_PASSWORD = os.getenv("NEO4J_PASSWORD")
if not all([NEO4J_URI, NEO4J_USER, NEO4J_PASSWORD]):
raise ValueError("Neo4j URI, user, and password must be set in the environment variables.")
# Read the CSV file into a DataFrame
df = pd.read_csv(csv_file_path, delimiter='^', usecols=['type', 'id', 'title', 'description', 'url', 'date_enacted', 'date_repealed'])
# Connect to Neo4j
driver = create_neo4j_session(NEO4J_URI, NEO4J_USER, NEO4J_PASSWORD)
with driver.session() as session:
for _, row in df.iterrows():
node_type = row['type']
properties = {
'id': str(row['id']),
'title': row['title'],
'description': row['description'],
'url': row['url'],
'date_enacted': pd.to_datetime(row['date_enacted']).strftime('%Y-%m-%d') if pd.notnull(row['date_enacted']) else None,
'date_repealed': pd.to_datetime(row['date_repealed']).strftime('%Y-%m-%d') if pd.notnull(row['date_repealed']) else None
}
2025-03-06 14:29:01 -08:00
# Create or merge the node in Neo4j
session.write_transaction(create_or_merge_node, node_type, properties)
2025-03-01 22:11:51 -08:00
driver.close()
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Read a CSV file and create nodes in Neo4j.")
parser.add_argument("csv_file_path", type=str, help="Path to the CSV file")
args = parser.parse_args()
main(args.csv_file_path)