{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "ename": "", "evalue": "", "output_type": "error", "traceback": [ "\u001b[1;31mRunning cells with 'Python 3.12.4' requires the ipykernel package.\n", "\u001b[1;31mRun the following command to install 'ipykernel' into the Python environment. \n", "\u001b[1;31mCommand: '/bin/python3.12 -m pip install ipykernel -U --user --force-reinstall'" ] } ], "source": [ "from pyspark.sql import SparkSession\n", "from pyspark.sql.functions import *\n", "from pyspark.sql.types import *\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "ename": "", "evalue": "", "output_type": "error", "traceback": [ "\u001b[1;31mRunning cells with 'Python 3.12.4' requires the ipykernel package.\n", "\u001b[1;31mRun the following command to install 'ipykernel' into the Python environment. \n", "\u001b[1;31mCommand: '/bin/python3.12 -m pip install ipykernel -U --user --force-reinstall'" ] } ], "source": [ "\n", "spark = SparkSession.builder.appName(\"finance_dwh\").config(\"spark.memory.offHeap.enabled\",\"true\").config(\"spark.memory.offHeap.size\",\"10g\").getOrCreate()\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "\n", "accounts_data = spark.read.json(\"data/20240728094708.json\")\n", "accounts_data.printSchema()\n", "#accounts_data.show()\n", "\n" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "name": "python", "version": "3.12.4" } }, "nbformat": 4, "nbformat_minor": 2 }