import pandas as pd from neo4j import GraphDatabase import argparse import os from dotenv import load_dotenv # Load environment variables from .env file load_dotenv() def create_neo4j_session(uri, user, password): return GraphDatabase.driver(uri, auth=(user, password)) def create_or_merge_node(tx, node_type, properties): # Create a Cypher query to merge a node with given type and properties properties_string = ', '.join([f"{key}: '{value}'" for key, value in properties.items()]) query = f"MERGE (n:{node_type} {{{properties_string}}})" tx.run(query) def main(csv_file_path): # Read environment variables NEO4J_URI = os.getenv("NEO4J_URI") NEO4J_USER = os.getenv("NEO4J_USER") NEO4J_PASSWORD = os.getenv("NEO4J_PASSWORD") if not all([NEO4J_URI, NEO4J_USER, NEO4J_PASSWORD]): raise ValueError("Neo4j URI, user, and password must be set in the environment variables.") # Read the CSV file into a DataFrame df = pd.read_csv(csv_file_path, delimiter='^', usecols=['type', 'id', 'title', 'description', 'url', 'date_enacted', 'date_repealed']) # Connect to Neo4j driver = create_neo4j_session(NEO4J_URI, NEO4J_USER, NEO4J_PASSWORD) with driver.session() as session: for _, row in df.iterrows(): node_type = row['type'] properties = { 'id': str(row['id']), 'title': row['title'], 'description': row['description'], 'url': row['url'], 'date_enacted': pd.to_datetime(row['date_enacted']).strftime('%Y-%m-%d') if pd.notnull(row['date_enacted']) else None, 'date_repealed': pd.to_datetime(row['date_repealed']).strftime('%Y-%m-%d') if pd.notnull(row['date_repealed']) else None } # Create or merge the node in Neo4j session.write_transaction(create_or_merge_node, node_type, properties) driver.close() if __name__ == "__main__": parser = argparse.ArgumentParser(description="Read a CSV file and create nodes in Neo4j.") parser.add_argument("csv_file_path", type=str, help="Path to the CSV file") args = parser.parse_args() main(args.csv_file_path)