/**
 * Twenty Twenty-Four functions and definitions
 *
 * @link https://developer.wordpress.org/themes/basics/theme-functions/
 *
 * @package Twenty Twenty-Four
 * @since Twenty Twenty-Four 1.0
 */

/**
 * Register block styles.
 */

if ( ! function_exists( 'twentytwentyfour_block_styles' ) ) :
	/**
	 * Registers custom block styles.
	 *
	 * @since Twenty Twenty-Four 1.0
	 * @return void
	 */
	function twentytwentyfour_block_styles() {

		register_block_style(
			'core/details',
			array(
				'name'         => 'arrow-icon-details',
				'label'        => __( 'Arrow icon', 'twentytwentyfour' ),
				/*
				 * Styles for the custom Arrow icon style of the Details block
				 */
				'inline_style' => '
				.is-style-arrow-icon-details {
					padding-top: var(--wp--preset--spacing--10);
					padding-bottom: var(--wp--preset--spacing--10);
				}

				.is-style-arrow-icon-details summary {
					list-style-type: "\2193\00a0\00a0\00a0";
				}

				.is-style-arrow-icon-details[open]>summary {
					list-style-type: "\2192\00a0\00a0\00a0";
				}',
			)
		);
		register_block_style(
			'core/post-terms',
			array(
				'name'         => 'pill',
				'label'        => __( 'Pill', 'twentytwentyfour' ),
				/*
				 * Styles variation for post terms
				 * https://github.com/WordPress/gutenberg/issues/24956
				 */
				'inline_style' => '
				.is-style-pill a,
				.is-style-pill span:not([class], [data-rich-text-placeholder]) {
					display: inline-block;
					background-color: var(--wp--preset--color--base-2);
					padding: 0.375rem 0.875rem;
					border-radius: var(--wp--preset--spacing--20);
				}

				.is-style-pill a:hover {
					background-color: var(--wp--preset--color--contrast-3);
				}',
			)
		);
		register_block_style(
			'core/list',
			array(
				'name'         => 'checkmark-list',
				'label'        => __( 'Checkmark', 'twentytwentyfour' ),
				/*
				 * Styles for the custom checkmark list block style
				 * https://github.com/WordPress/gutenberg/issues/51480
				 */
				'inline_style' => '
				ul.is-style-checkmark-list {
					list-style-type: "\2713";
				}

				ul.is-style-checkmark-list li {
					padding-inline-start: 1ch;
				}',
			)
		);
		register_block_style(
			'core/navigation-link',
			array(
				'name'         => 'arrow-link',
				'label'        => __( 'With arrow', 'twentytwentyfour' ),
				/*
				 * Styles for the custom arrow nav link block style
				 */
				'inline_style' => '
				.is-style-arrow-link .wp-block-navigation-item__label:after {
					content: "\2197";
					padding-inline-start: 0.25rem;
					vertical-align: middle;
					text-decoration: none;
					display: inline-block;
				}',
			)
		);
		register_block_style(
			'core/heading',
			array(
				'name'         => 'asterisk',
				'label'        => __( 'With asterisk', 'twentytwentyfour' ),
				'inline_style' => "
				.is-style-asterisk:before {
					content: '';
					width: 1.5rem;
					height: 3rem;
					background: var(--wp--preset--color--contrast-2, currentColor);
					clip-path: path('M11.93.684v8.039l5.633-5.633 1.216 1.23-5.66 5.66h8.04v1.737H13.2l5.701 5.701-1.23 1.23-5.742-5.742V21h-1.737v-8.094l-5.77 5.77-1.23-1.217 5.743-5.742H.842V9.98h8.162l-5.701-5.7 1.23-1.231 5.66 5.66V.684h1.737Z');
					display: block;
				}

				/* Hide the asterisk if the heading has no content, to avoid using empty headings to display the asterisk only, which is an A11Y issue */
				.is-style-asterisk:empty:before {
					content: none;
				}

				.is-style-asterisk:-moz-only-whitespace:before {
					content: none;
				}

				.is-style-asterisk.has-text-align-center:before {
					margin: 0 auto;
				}

				.is-style-asterisk.has-text-align-right:before {
					margin-left: auto;
				}

				.rtl .is-style-asterisk.has-text-align-left:before {
					margin-right: auto;
				}",
			)
		);
	}
endif;

add_action( 'init', 'twentytwentyfour_block_styles' );

/**
 * Enqueue block stylesheets.
 */

if ( ! function_exists( 'twentytwentyfour_block_stylesheets' ) ) :
	/**
	 * Enqueues custom block stylesheets.
	 *
	 * @since Twenty Twenty-Four 1.0
	 * @return void
	 */
	function twentytwentyfour_block_stylesheets() {
		/**
		 * The wp_enqueue_block_style() function allows us to enqueue a stylesheet
		 * for a specific block. These will only get loaded when the block is rendered
		 * (both in the editor and on the front end), improving performance
		 * and reducing the amount of data requested by visitors.
		 *
		 * See https://make.wordpress.org/core/2021/12/15/using-multiple-stylesheets-per-block/ for more info.
		 */
		wp_enqueue_block_style(
			'core/button',
			array(
				'handle' => 'twentytwentyfour-button-style-outline',
				'src'    => get_parent_theme_file_uri( 'assets/css/button-outline.css' ),
				'ver'    => wp_get_theme( get_template() )->get( 'Version' ),
				'path'   => get_parent_theme_file_path( 'assets/css/button-outline.css' ),
			)
		);
	}
endif;

add_action( 'init', 'twentytwentyfour_block_stylesheets' );

/**
 * Register pattern categories.
 */

if ( ! function_exists( 'twentytwentyfour_pattern_categories' ) ) :
	/**
	 * Registers pattern categories.
	 *
	 * @since Twenty Twenty-Four 1.0
	 * @return void
	 */
	function twentytwentyfour_pattern_categories() {

		register_block_pattern_category(
			'twentytwentyfour_page',
			array(
				'label'       => _x( 'Pages', 'Block pattern category', 'twentytwentyfour' ),
				'description' => __( 'A collection of full page layouts.', 'twentytwentyfour' ),
			)
		);
	}
endif;

add_action( 'init', 'twentytwentyfour_pattern_categories' );<?xml version="1.0" encoding="UTF-8"?><rss version="2.0"
	xmlns:content="http://purl.org/rss/1.0/modules/content/"
	xmlns:wfw="http://wellformedweb.org/CommentAPI/"
	xmlns:dc="http://purl.org/dc/elements/1.1/"
	xmlns:atom="http://www.w3.org/2005/Atom"
	xmlns:sy="http://purl.org/rss/1.0/modules/syndication/"
	xmlns:slash="http://purl.org/rss/1.0/modules/slash/"
	>

<channel>
	<title>Blog &#8211; SANDIP KUMAR SINGH</title>
	<atom:link href="https://sksiddha.com/blog/feed/" rel="self" type="application/rss+xml" />
	<link>https://sksiddha.com</link>
	<description>Assistant Professor Computer Science @RRIMT Lucknow</description>
	<lastBuildDate>Sun, 17 Aug 2025 18:46:33 +0000</lastBuildDate>
	<language>en-US</language>
	<sy:updatePeriod>
	hourly	</sy:updatePeriod>
	<sy:updateFrequency>
	1	</sy:updateFrequency>
	<generator>https://wordpress.org/?v=6.8.5</generator>

<image>
	<url>https://sksiddha.com/wp-content/uploads/2024/08/sksiddha-logo-100x100.png</url>
	<title>Blog &#8211; SANDIP KUMAR SINGH</title>
	<link>https://sksiddha.com</link>
	<width>32</width>
	<height>32</height>
</image> 
	<item>
		<title>Unit 3 Feature Generation &#038; Feature Selection</title>
		<link>https://sksiddha.com/unit-3-feature-generation-feature-selection/</link>
					<comments>https://sksiddha.com/unit-3-feature-generation-feature-selection/#respond</comments>
		
		<dc:creator><![CDATA[Sandip Kumar Singh]]></dc:creator>
		<pubDate>Sun, 27 Jul 2025 15:17:55 +0000</pubDate>
				<category><![CDATA[Blog]]></category>
		<guid isPermaLink="false">https://sksiddha.com/?p=1544</guid>

					<description><![CDATA[<p>Extracting Meaning from Data Using Data ScienceIn the digital age, data is everywhere—generated by smartphones, social media, websites,sensors, and machines. But data alone is not valuable until we can make sense of it. That’swhere data science comes in. It helps us extract meaning, patterns, and insights from rawinformation, transforming it into a powerful tool for [&#8230;]</p>
<p>&lt;p&gt;The post <a rel="nofollow" href="https://sksiddha.com/unit-3-feature-generation-feature-selection/">Unit 3 Feature Generation &amp; Feature Selection</a> first appeared on <a rel="nofollow" href="https://sksiddha.com">SANDIP KUMAR SINGH</a>.&lt;/p&gt;</p>
]]></description>
										<content:encoded><![CDATA[
<p><strong>Extracting Meaning from Data Using Data Science</strong><br>In the digital age, data is everywhere—generated by smartphones, social media, websites,<br>sensors, and machines. But data alone is not valuable until we can make sense of it. That’s<br>where data science comes in. It helps us extract meaning, patterns, and insights from raw<br>information, transforming it into a powerful tool for decision-making, innovation, and<br>understanding the world.</p>


<p><strong>What Is Data Science?</strong><br>Data science is an interdisciplinary field that combines statistics, computer science, and<br>domain knowledge to analyze data and generate actionable insights. It involves collecting,<br>cleaning, processing, analyzing, and visualizing data to answer questions or solve problems.<br>Think of it as a modern-day detective work—finding hidden clues in massive piles of<br>information to uncover the story behind the numbers.</p>


<p><strong>How Data Science Extracts Meaning from Data</strong><br>Let’s break down how data science turns data into knowledge:</p>


<ol class="wp-block-list">
<li><strong>Data Collection</strong><br>Everything starts with data—collected from sources like apps, surveys, sensors, websites, or<br>databases. For example, an e-commerce platform collects user clicks, purchase history, and<br>product reviews.</li>


<li><strong>Data Cleaning and Preparation</strong><br>Raw data is often messy or incomplete. Data scientists clean it by removing errors, handling<br>missing values, and formatting it correctly. This step is crucial for ensuring accurate analysis.</li>


<li><strong>Data Analysis and Exploration</strong><br>Using statistical techniques and tools like Python, R, or SQL, data scientists explore the data to<br>find patterns, trends, and anomalies. For example, they might find that sales drop on certain<br>weekdays or that users from a particular city spend more.</li>


<li><strong>Machine Learning and Modeling</strong><br>To make predictions or classifications, data scientists build machine learning models. These<br>models &#8220;learn&#8221; from historical data to make future decisions—for instance, predicting customer<br>churn or recommending products.</li>


<li><strong>Data Visualization</strong><br>Charts, graphs, and dashboards are used to visually present the results in a clear and<br>understandable way. Tools like Tableau, Power BI, or Matplotlib help turn complex insights<br>into stories anyone can understand.</li>


<li><strong>Interpretation and Decision-Making</strong><br>The final and most important step: drawing conclusions and making informed decisions.<br>Whether it’s a business strategy, healthcare diagnosis, or policy development, the goal is to use<br>data insights to act smarter and faster.<br>Real-Life Example: Retail Industry<br>Imagine you run an online clothing store. You want to know:<br> Which products are most popular?<br> What time of year do customers buy the most?<br> What kind of promotions increase sales?<br>Using data science, you can:<br> Analyze customer behavior and trends<br> Segment customers based on preferences<br> Forecast future demand<br> Personalize recommendations<br> With these insights, you can optimize inventory, improve marketing, and enhance the<br>customer experience.<br><strong>The Responsibility of Interpretation</strong><br>Extracting meaning from data comes with responsibility. Data must be interpreted ethically<br>and accurately, keeping in mind privacy, bias, and fairness. Misinterpreted or biased data can<br>lead to wrong decisions or unfair outcomes.<br>Quote: Data is the new oil, but data science is the refinery that turns it into value.<br>How to Get Customer Retention Using Data Science<br>Here’s a step-by-step breakdown:</li>


<li><strong>Collect the Right Data</strong><br>Start with data related to customer behavior and interaction:<br> Transactional data (purchases, frequency, amount)<br> Engagement data (website visits, clicks, time spent)<br> Support data (complaints, tickets raised, response time)<br> Demographics (age, location, gender)<br> Feedback and reviews</li>


<li><strong>Analyze Retention Metrics</strong><br>Use key metrics to understand how loyal your customers are:<br> Churn rate = (Customers lost / Total customers) × 100<br> Customer Lifetime Value (CLTV) = Revenue expected from a customer over the<br>relationship<br> Repeat purchase rate<br> Time between purchases<br>These metrics provide a baseline to monitor improvements.</li>


<li><strong>Predict Customer Churn (Who Might Leave?)</strong><br>Use machine learning models to predict churn (customers likely to stop buying). Common<br>models:<br> Logistic Regression<br> Random Forest<br> XGBoost<br> Neural Networks<br>Features used in churn models might include:<br> Drop in usage frequency<br> Late payments<br> No logins for a long time<br> Negative reviews or support tickets<br>Label your past data as &#8220;churned&#8221; vs. &#8220;retained&#8221; to train supervised models.</li>


<li><strong>Segment Customers (Who Needs Attention?)</strong><br>Use clustering algorithms like K-Means or DBSCAN to segment customers:<br> High-value loyal customers<br> At-risk customers<br> New customers with high potential<br>This allows targeted retention strategies.</li>


<li><strong>Personalize Retention Strategies</strong><br>Once insights are clear, apply them:<br> Personalized offers or loyalty rewards<br> Timely reminders or re-engagement emails<br> Better customer support for at-risk users<br> Product recommendations based on browsing and purchase history<br>Data science helps automate and optimize these actions.</li>


<li><strong>A/B Test Retention Campaigns</strong><br>Run A/B tests to see which retention strategies work best. Compare two customer groups:<br> Group A: receives a 10% discount<br> Group B: receives personalized recommendations<br>Use statistical analysis to determine which group had better retention.</li>


<li><strong>Monitor and Improve Continuously</strong><br>Use dashboards and KPIs to track customer retention over time. Tools like:<br> Power BI<br> Tableau<br> Google Data Studio<br> Python (Plotly, Seaborn)<br>Regular monitoring ensures early detection of churn patterns.<br>Example Use Case: E-commerce<br>An e-commerce company used data science to:<br> Identify customers with declining purchases<br> Predict churn with a Random Forest model<br> Send targeted discounts to at-risk users<br> Improve website speed based on behavior data<br>Result: 15% increase in customer retention within 3 months.<br>Brainstorming in Feature Generation (Feature Engineering)<br>Feature generation is a critical step in data science and machine learning where we create<br>new input variables (features) from raw data to improve model performance. Brainstorming in<br>this context means creatively thinking about what extra or derived features can help the model<br>better understand patterns and relationships in the data.<br>What is Brainstorming in Feature Generation?<br>It’s the idea generation phase where data scientists explore, discuss, and invent new features<br>from existing data using:<br> Domain knowledge<br> Statistical thinking<br> Business goals<br> Logical combinations and transformations<br>This helps models &#8220;learn&#8221; more from the data by giving them richer and more meaningful<br>inputs</li>
</ol>
<p>&lt;p&gt;The post <a rel="nofollow" href="https://sksiddha.com/unit-3-feature-generation-feature-selection/">Unit 3 Feature Generation &amp; Feature Selection</a> first appeared on <a rel="nofollow" href="https://sksiddha.com">SANDIP KUMAR SINGH</a>.&lt;/p&gt;</p>
]]></content:encoded>
					
					<wfw:commentRss>https://sksiddha.com/unit-3-feature-generation-feature-selection/feed/</wfw:commentRss>
			<slash:comments>0</slash:comments>
		
		
			</item>
		<item>
		<title>Unit 2  Data Analysis Process</title>
		<link>https://sksiddha.com/unit-2-data-analysis-process/</link>
					<comments>https://sksiddha.com/unit-2-data-analysis-process/#respond</comments>
		
		<dc:creator><![CDATA[Sandip Kumar Singh]]></dc:creator>
		<pubDate>Sun, 27 Jul 2025 14:48:26 +0000</pubDate>
				<category><![CDATA[Blog]]></category>
		<guid isPermaLink="false">https://sksiddha.com/?p=1541</guid>

					<description><![CDATA[<p>Introduction The Data Analytics Process is a structured method to explore, analyze, and interpret data to make better decisions. 1. Define the Problem / Objective Clearly understand what question you are trying to answer. Example: Why are sales dropping in the last 3 months? 2. Collect the Data Gather data from various sources like databases, [&#8230;]</p>
<p>&lt;p&gt;The post <a rel="nofollow" href="https://sksiddha.com/unit-2-data-analysis-process/">Unit 2  Data Analysis Process</a> first appeared on <a rel="nofollow" href="https://sksiddha.com">SANDIP KUMAR SINGH</a>.&lt;/p&gt;</p>
]]></description>
										<content:encoded><![CDATA[
<p><strong>Introduction</strong></p>


<p>The <strong>Data Analytics Process</strong> is a structured method to explore, analyze, and interpret data to make better decisions.</p>


<hr class="wp-block-separator has-alpha-channel-opacity"/>


<p><strong>1. Define the Problem / Objective</strong></p>


<p>Clearly understand what question you are trying to answer.</p>


<p><strong>Example</strong>: Why are sales dropping in the last 3 months?</p>


<hr class="wp-block-separator has-alpha-channel-opacity"/>


<p><strong>2. Collect the Data</strong></p>


<p>Gather data from various sources like databases, websites, sensors, or surveys.</p>


<p><strong>Example</strong>: Collect sales reports, customer feedback, and market trends.</p>


<hr class="wp-block-separator has-alpha-channel-opacity"/>


<p><strong>3. Clean and Prepare the Data</strong></p>


<p>Remove duplicates, fix missing values, and organize data for analysis.</p>


<p><strong>Example</strong>: Remove entries with no price or incorrect dates.</p>


<hr class="wp-block-separator has-alpha-channel-opacity"/>


<p><strong>4. Analyze the Data</strong></p>


<p>Use statistical tools and programming (like Python, Excel, or R) to find patterns and insights.</p>


<p><strong>Example</strong>: Find which product categories have low sales and in which regions.</p>


<hr class="wp-block-separator has-alpha-channel-opacity"/>


<p><strong>5. Interpret and Visualize Results</strong></p>


<p>Create charts, graphs, and dashboards to explain findings in a clear way.</p>


<p><strong>Example</strong>: Use a bar chart to show the drop in sales per region.</p>


<hr class="wp-block-separator has-alpha-channel-opacity"/>


<p><strong>6. Make Decisions / Take Action</strong></p>


<p>Use the insights to improve business strategies, operations, or performance.</p>


<p><strong>Example</strong>: Increase marketing in low-performing areas or offer discounts on slow-selling items.</p>


<p><strong>Notes</strong>: <strong>Data Analytics = Ask → Gather → Clean → Analyze → Visualize → Act</strong></p>


<p>It&#8217;s all about turning raw data into smart decisions</p>


<p><strong>&nbsp;“knowledge check in data science”</strong></p>


<p>To check your <strong>knowledge in data analytics</strong>, you can evaluate your understanding and skills through the following methods:</p>


<hr class="wp-block-separator has-alpha-channel-opacity"/>


<p><strong>1. Concept Understanding</strong></p>


<p>Test your knowledge of key topics like:</p>


<ul class="wp-block-list">
<li>Types of data (structured/unstructured)</li>


<li>Data analytics process</li>


<li>Descriptive, diagnostic, predictive, and prescriptive analytics</li>


<li>Basic statistics (mean, median, standard deviation)</li>
</ul>


<p><em>Example Question:</em><br><strong>What is the difference between descriptive and predictive analytics?</strong></p>


<hr class="wp-block-separator has-alpha-channel-opacity"/>


<p><strong>2. Tools and Skills</strong></p>


<p>Check your practical knowledge of tools like:</p>


<ul class="wp-block-list">
<li><strong>Excel</strong> (formulas, pivot tables)</li>


<li><strong>SQL</strong> (queries to retrieve data)</li>


<li><strong>Python or R</strong> (data handling with Pandas/Numpy)</li>


<li><strong>Power BI or Tableau</strong> (creating dashboards)</li>
</ul>


<p><em>Example Task:</em><br><strong>Use Excel to create a dashboard showing sales trends by region.</strong></p>


<hr class="wp-block-separator has-alpha-channel-opacity"/>


<p><strong>3. Hands-on Projects</strong></p>


<p>Practice with small datasets to solve real-world problems.</p>


<p><em>Example Activity:</em><br><strong>Analyze a CSV file to find which product had the highest returns.</strong></p>


<p><strong>Exploratory Data Analysis (EDA) – In Brief</strong></p>


<p><strong>Exploratory Data Analysis (EDA)</strong> is the process of <strong>examining and visualizing data</strong> to understand its structure, patterns, and key features <strong>before applying any models or making decisions</strong>.</p>


<hr class="wp-block-separator has-alpha-channel-opacity"/>


<p><strong>Purpose of EDA:</strong></p>


<ul class="wp-block-list">
<li>Identify <strong>patterns</strong>, <strong>relationships</strong>, and <strong>trends</strong> in the data</li>


<li>Detect <strong>missing values</strong>, <strong>outliers</strong>, or <strong>errors</strong></li>


<li>Get a basic idea of how data is distributed</li>


<li>Choose the right <strong>analysis or model</strong> for further processing</li>
</ul>


<p><strong>Common EDA Techniques:</strong><strong></strong></p>


<figure class="wp-block-table"><table class="has-fixed-layout"><thead><tr><td><strong>Technique</strong></td><td><strong>Purpose</strong></td><td><strong>Example Tool</strong></td></tr></thead><tbody><tr><td>Summary Statistics</td><td>Mean, Median, Mode, Standard Deviation</td><td>Pandas.describe() in Python</td></tr><tr><td>Data Visualization</td><td>Plot graphs for insights</td><td>Matplotlib, Seaborn</td></tr><tr><td>Correlation Analysis</td><td>Find relationships between variables</td><td>corr() function</td></tr><tr><td>Value Counts</td><td>Frequency of categorical values</td><td>value_counts() in Pandas</td></tr></tbody></table></figure>


<h3 class="wp-block-heading"><strong>Example:</strong></h3>


<p>You have a dataset of student marks.</p>


<ul class="wp-block-list">
<li>Use <strong>histograms</strong> to see score distribution</li>


<li>Use <strong>box plots</strong> to spot outliers</li>


<li>Use <strong>scatter plots</strong> to check relationships (e.g., hours studied vs. marks scored)</li>
</ul>


<p><strong>Notes:</strong> EDA helps you understand your data deeply before applying any machine learning or business decisions.</p>


<p><strong>Type of Exploratory Data Analysis</strong></p>


<p><strong>A </strong>Quantitative Analysis Technique</p>


<p><strong>B </strong>Graphical Analysis Technique</p>


<p><strong>Quantitative Data Analysis</strong></p>


<p><strong>Quantitative Data Analysis</strong> is the process of analyzing <strong>numerical data</strong> (data that can be measured or counted) using <strong>statistical techniques</strong> to uncover patterns, relationships, and trends.</p>


<hr class="wp-block-separator has-alpha-channel-opacity"/>


<p><strong>Key Features of Quantitative Data:</strong></p>


<ul class="wp-block-list">
<li>Expressed in numbers (e.g., marks, sales, age)</li>


<li>Can be analyzed using <strong>mean</strong>, <strong>median</strong>, <strong>standard deviation</strong>, <strong>correlation</strong>, etc.</li>


<li>Often displayed with charts like histograms, scatter plots, line graphs</li>
</ul>


<hr class="wp-block-separator has-alpha-channel-opacity"/>


<p><strong>Example Use-Case:</strong></p>


<p>Suppose we have data on <strong>students&#8217; hours studied</strong> and <strong>exam scores</strong>. We want to analyze the relationship between them.</p>


<hr class="wp-block-separator has-alpha-channel-opacity"/>


<p><strong>Python Program for Quantitative Data Analysis</strong></p>


<p># Import necessary libraries</p>


<p>import pandas as pd</p>


<p>import numpy as np</p>


<p>import matplotlib.pyplot as plt</p>


<p>import seaborn as sns</p>


<p># Sample dataset</p>


<p>data = {</p>


<p>&#8216;Hours_Studied&#8217;: [1, 2, 3, 4, 5, 6, 7, 8, 9],</p>


<p>&#8216;Exam_Score&#8217;: [35, 40, 45, 50, 55, 65, 70, 75, 80]</p>


<p>}</p>


<p># Create DataFrame</p>


<p>df = pd.DataFrame(data)</p>


<p># 1. View basic statistics</p>


<p>print(&#8220;Summary Statistics:\n&#8221;, df.describe())</p>


<p># 2. Calculate correlation</p>


<p>correlation = df[&#8216;Hours_Studied&#8217;].corr(df[&#8216;Exam_Score&#8217;])</p>


<p>print(&#8220;\nCorrelation between hours studied and score:&#8221;, correlation)</p>


<p># 3. Plot the data</p>


<p>plt.figure(figsize=(8,5))</p>


<p>sns.scatterplot(x=&#8217;Hours_Studied&#8217;, y=&#8217;Exam_Score&#8217;, data=df)</p>


<p>plt.title(&#8216;Hours Studied vs Exam Score&#8217;)</p>


<p>plt.xlabel(&#8216;Hours Studied&#8217;)</p>


<p>plt.ylabel(&#8216;Exam Score&#8217;)</p>


<p>plt.grid(True)</p>


<p>plt.show()</p>


<h3 class="wp-block-heading"><strong>Graphical Data Analytics</strong></h3>


<p><strong>Graphical Analysis</strong> is a method of visualizing data using <strong>charts and graphs</strong> to identify trends, patterns, outliers, and relationships.</p>


<p>Below are the most commonly used graphical techniques</p>


<p><strong>Python examples</strong>:</p>


<p>Histogram</p>


<p>Shows the <strong>distribution</strong> of a single numeric variable.</p>


<p>import seaborn as sns</p>


<p>import matplotlib.pyplot as plt</p>


<p>data = [55, 60, 61, 62, 65, 65, 66, 68, 70, 75, 80, 85, 90, 95]</p>


<p>sns.histplot(data, bins=5, kde=True)</p>


<p>plt.title(&#8220;Histogram of Test Scores&#8221;)</p>


<p>plt.xlabel(&#8220;Score&#8221;)</p>


<p>plt.ylabel(&#8220;Frequency&#8221;)</p>


<p>plt.show()</p>


<h2 class="wp-block-heading"><strong>Scatter Plot</strong></h2>


<h3 class="wp-block-heading">Purpose:</h3>


<p>Shows the <strong>relationship between two numeric variables</strong></p>


<p>import seaborn as sns</p>


<p>df = sns.load_dataset(&#8220;iris&#8221;)</p>


<p>sns.scatterplot(x=&#8217;sepal_length&#8217;, y=&#8217;sepal_width&#8217;, hue=&#8217;species&#8217;, data=df)</p>


<p>plt.title(&#8220;Sepal Length vs Width&#8221;)</p>


<p>plt.show()</p>


<h2 class="wp-block-heading"><strong>Bar Chart</strong></h2>


<h3 class="wp-block-heading">Purpose:</h3>


<p>Compares <strong>categorical variables</strong> or grouped data.</p>


<p>import pandas as pd</p>


<p>import matplotlib.pyplot as plt</p>


<p>df = pd.DataFrame({</p>


<p>&nbsp;&nbsp;&nbsp; &#8216;Department&#8217;: [&#8216;IT&#8217;, &#8216;HR&#8217;, &#8216;Sales&#8217;, &#8216;Marketing&#8217;],</p>


<p>&nbsp;&nbsp;&nbsp; &#8216;Employees&#8217;: [40, 15, 25, 30]</p>


<p>})</p>


<p>df.plot(kind=&#8217;bar&#8217;, x=&#8217;Department&#8217;, y=&#8217;Employees&#8217;, legend=False)</p>


<p>plt.title(&#8220;Number of Employees by Department&#8221;)</p>


<p>plt.ylabel(&#8220;Employees&#8221;)</p>


<p>plt.show()</p>


<h2 class="wp-block-heading"><strong>Pie Chart</strong><strong></strong></h2>


<h2 class="wp-block-heading">Purpose:</h2>


<p>Displays the <strong>percentage</strong> or <strong>proportion</strong> of parts to a whole.</p>


<p>labels = [&#8216;Python&#8217;, &#8216;Java&#8217;, &#8216;C++&#8217;, &#8216;JavaScript&#8217;]</p>


<p>sizes = [40, 25, 20, 15]</p>


<p>plt.pie(sizes, labels=labels, autopct=&#8217;%1.1f%%&#8217;)</p>


<p>plt.title(&#8220;Programming Language Usage&#8221;)</p>


<p>plt.show()</p>


<h2 class="wp-block-heading"><strong>Line Chart</strong></h2>


<h3 class="wp-block-heading">Purpose:</h3>


<p>Shows <strong>trends over time</strong>.</p>


<p>import pandas as pd</p>


<p>import matplotlib.pyplot as plt</p>


<p>df = pd.DataFrame({</p>


<p>&nbsp;&nbsp;&nbsp; &#8216;Month&#8217;: [&#8216;Jan&#8217;, &#8216;Feb&#8217;, &#8216;Mar&#8217;, &#8216;Apr&#8217;],</p>


<p>&nbsp;&nbsp;&nbsp; &#8216;Revenue&#8217;: [1000, 1500, 1300, 1700]</p>


<p>})</p>


<p>plt.plot(df[&#8216;Month&#8217;], df[&#8216;Revenue&#8217;], marker=&#8217;o&#8217;)</p>


<p>plt.title(&#8220;Monthly Revenue&#8221;)</p>


<p>plt.xlabel(&#8220;Month&#8221;)</p>


<p>plt.ylabel(&#8220;Revenue in USD&#8221;)</p>


<p>plt.grid(True)</p>


<p>plt.show()</p>


<p><strong>Summary Table:</strong></p>


<figure class="wp-block-table"><table class="has-fixed-layout"><thead><tr><td><strong>Technique</strong></td><td><strong>Best For</strong></td><td><strong>Python Tool</strong></td></tr></thead><tbody><tr><td>Histogram</td><td>Data distribution</td><td>seaborn, matplotlib</td></tr><tr><td>Box Plot</td><td>Outliers, spread, quartiles</td><td>seaborn</td></tr><tr><td>Scatter Plot</td><td>Relationship between variables</td><td>seaborn, matplotlib</td></tr><tr><td>Bar Chart</td><td>Categorical comparison</td><td>pandas, matplotlib</td></tr><tr><td>Pie Chart</td><td>Part-to-whole visualization</td><td>matplotlib</td></tr><tr><td>Line Chart</td><td>Trend over time</td><td>matplotlib, pandas</td></tr></tbody></table></figure>


<h3 class="wp-block-heading"><br></h3>


<h3 class="wp-block-heading"><strong>Data Analytics: Conclusion and Prediction</strong></h3>


<p>In data analytics, the <strong>final goal</strong> is to extract <strong>meaningful insights</strong> from data that can help in making <strong>informed decisions</strong>. Two important outcomes are:</p>


<hr class="wp-block-separator has-alpha-channel-opacity"/>


<h2 class="wp-block-heading"><strong>Conclusion (Descriptive Analytics)</strong></h2>


<h3 class="wp-block-heading">What is it?</h3>


<p>A <strong>conclusion</strong> summarizes what the data tells us <strong>after analysis</strong>. It answers:</p>


<p>“What happened?” or “What is happening?”</p>


<h3 class="wp-block-heading">Purpose:</h3>


<ul class="wp-block-list">
<li>Identify trends and patterns</li>


<li>Discover relationships or differences</li>


<li>Highlight key findings</li>
</ul>


<h3 class="wp-block-heading">&nbsp;</h3>


<h3 class="wp-block-heading">Example:</h3>


<p>After analyzing sales data for 12 months:</p>


<p>“Sales increased by 20% in the second half of the year, with the highest revenue in December.”</p>


<h3 class="wp-block-heading">Techniques Used:</h3>


<ul class="wp-block-list">
<li>Charts &amp; visualizations</li>


<li>Descriptive statistics (mean, median)</li>


<li>Correlation and comparison</li>
</ul>


<hr class="wp-block-separator has-alpha-channel-opacity"/>


<h2 class="wp-block-heading"><strong>Prediction (Predictive Analytics)</strong></h2>


<h3 class="wp-block-heading">What is it?</h3>


<p>A <strong>prediction</strong> uses past data and mathematical models to forecast <strong>future outcomes</strong>. It answers:</p>


<p>“What is likely to happen next?”</p>


<h3 class="wp-block-heading">Purpose:</h3>


<ul class="wp-block-list">
<li>Estimate future values (e.g., sales, stock prices, user behavior)</li>


<li>Help in planning and decision-making</li>
</ul>


<h3 class="wp-block-heading">Example:</h3>


<p>Using student attendance and study hours to predict:</p>


<p>“This student has a 90% chance of scoring above 75% in the exam.”</p>


<h3 class="wp-block-heading">Techniques Used:</h3>


<ul class="wp-block-list">
<li><strong>Machine Learning models</strong> (Linear Regression, Decision Trees, etc.)</li>


<li><strong>Time Series forecasting</strong></li>


<li><strong>Predictive modeling libraries</strong> like <code>scikit-learn</code></li>
</ul>


<hr class="wp-block-separator has-alpha-channel-opacity"/>


<h2 class="wp-block-heading"><strong>Conclusion vs. Prediction – Quick Comparison</strong><strong></strong></h2>


<figure class="wp-block-table"><table class="has-fixed-layout"><thead><tr><td><strong>Feature</strong></td><td><strong>Conclusion</strong></td><td><strong>Prediction</strong></td></tr></thead><tbody><tr><td>Based on</td><td>Existing data</td><td>Existing + future (inference) data</td></tr><tr><td>Answers</td><td>What happened</td><td>What will happen</td></tr><tr><td>Examples</td><td>&#8220;Most sales happened in June&#8221;</td><td>&#8220;Sales will rise 10% next quarter&#8221;</td></tr><tr><td>Tools</td><td>Summary stats, EDA, visuals</td><td>Regression, ML models, forecasting</td></tr></tbody></table></figure>


<p><strong>Unit 3</strong><strong>Feature Generation and</strong></p>
<p>&lt;p&gt;The post <a rel="nofollow" href="https://sksiddha.com/unit-2-data-analysis-process/">Unit 2  Data Analysis Process</a> first appeared on <a rel="nofollow" href="https://sksiddha.com">SANDIP KUMAR SINGH</a>.&lt;/p&gt;</p>
]]></content:encoded>
					
					<wfw:commentRss>https://sksiddha.com/unit-2-data-analysis-process/feed/</wfw:commentRss>
			<slash:comments>0</slash:comments>
		
		
			</item>
		<item>
		<title>Unit 1  Introduction to Data Science</title>
		<link>https://sksiddha.com/unit-1-introduction-to-data-science/</link>
					<comments>https://sksiddha.com/unit-1-introduction-to-data-science/#respond</comments>
		
		<dc:creator><![CDATA[Sandip Kumar Singh]]></dc:creator>
		<pubDate>Sun, 27 Jul 2025 14:46:38 +0000</pubDate>
				<category><![CDATA[Blog]]></category>
		<guid isPermaLink="false">https://sksiddha.com/?p=1538</guid>

					<description><![CDATA[<p>Introduction: Data science is the domain of study that deals with vast volumes of data using modern tools and techniques to find unseen patterns, derive meaningful information, and make business decisions. Data science uses complex machine learning algorithms to build predictive models. The Data Science Lifecycle Data science’s lifecycle consists of five distinct stages, each [&#8230;]</p>
<p>&lt;p&gt;The post <a rel="nofollow" href="https://sksiddha.com/unit-1-introduction-to-data-science/">Unit 1  Introduction to Data Science</a> first appeared on <a rel="nofollow" href="https://sksiddha.com">SANDIP KUMAR SINGH</a>.&lt;/p&gt;</p>
]]></description>
										<content:encoded><![CDATA[
<p><strong>Introduction</strong>: Data science is the domain of study that deals with vast volumes of data using modern tools and techniques to find unseen patterns, derive meaningful information, and make business decisions. Data science uses complex machine learning algorithms to build predictive models.</p>


<p><strong>The Data Science Lifecycle</strong> Data science’s lifecycle consists of five distinct stages, each with its own tasks:</p>


<p><strong>Capture</strong>: Data Acquisition, Data Entry, Signal Reception, Data Extraction. This stage involves gathering raw structured and unstructured data.</p>


<p><strong>Maintain</strong>: Data Warehousing, Data Cleansing, Data Staging, Data Processing, Data Architecture. This stage covers taking the raw data and putting it in a form that can be used.</p>


<p><strong>Process</strong>: Data Mining, Clustering/Classification, Data Modeling, Data Summarization. Data scientists take the prepared data and examine its patterns, ranges, and biases to determine how useful it will be in predictive analysis.</p>


<p>Analyze: Exploratory/Confirmatory, Predictive Analysis, Regression, Text Mining, Qualitative Analysis. Here is the real meat of the lifecycle. This stage involves performing the various analyses on the data.</p>


<p><strong>Communicate</strong>: Data Reporting, Data Visualization, Business Intelligence, Decision Making. In this final step, analysts prepare the analyses in easily readable forms such as charts, graphs, and reports.</p>


<p><strong>Data Science</strong> is the process of collecting, analyzing, and using data to make decisions or predictions. It combines math, statistics, programming, and domain knowledge.</p>


<p><strong>Example:</strong></p>


<ul class="wp-block-list">
<li>An online store (like Amazon) uses <strong>data science</strong> to suggest products based on your previous searches and purchases.</li>
</ul>


<hr class="wp-block-separator has-alpha-channel-opacity"/>


<p><strong>Key Steps in Data Science</strong></p>


<ol class="wp-block-list">
<li><strong>Collect Data</strong> – Example: User clicks on a website.</li>


<li><strong>Clean Data</strong> – Fix errors or missing info.</li>


<li><strong>Analyze Data</strong> – Find patterns or trends.</li>


<li><strong>Build Models</strong> – Use machine learning to predict.</li>


<li><strong>Visualize Data</strong> – Show results using charts or graphs.</li>
</ol>


<hr class="wp-block-separator has-alpha-channel-opacity"/>


<p><strong>Where is Data Science Used?</strong></p>


<p><strong>Healthcare</strong></p>


<ul class="wp-block-list">
<li><strong>Use</strong>: Predict diseases, analyze medical images.</li>


<li><strong>Example</strong>: AI predicts chances of diabetes using patient data.</li>
</ul>


<p><strong>Finance</strong></p>


<ul class="wp-block-list">
<li><strong>Use</strong>: Detect fraud, credit scoring.</li>


<li><strong>Example</strong>: Banks use data science to approve or reject loans.</li>
</ul>


<p><strong>Marketing</strong></p>


<ul class="wp-block-list">
<li><strong>Use</strong>: Customer segmentation, personalized ads.</li>


<li><strong>Example</strong>: Facebook shows ads based on your interests.</li>
</ul>


<p><strong>Transport</strong></p>


<ul class="wp-block-list">
<li><strong>Use</strong>: Optimize delivery routes.</li>


<li><strong>Example</strong>: Zomato uses data science to assign the nearest delivery person.</li>
</ul>


<p><strong>Education</strong></p>


<ul class="wp-block-list">
<li><strong>Use</strong>: Track student performance, personalize learning.</li>


<li><strong>Example</strong>: Online courses suggest next lessons based on your progress.</li>
</ul>


<hr class="wp-block-separator has-alpha-channel-opacity"/>


<p><strong>Why Should You Learn It?</strong></p>


<ul class="wp-block-list">
<li>High demand in jobs (Data Scientist, Analyst).</li>


<li>Useful in every industry.</li>


<li>Helps solve real-life problems with smart solutions.</li>
</ul>


<p><strong>Purpose of Data Science</strong></p>


<p>The <strong>main purpose of Data Science</strong> is to <strong>extract useful knowledge and insights from data</strong> to help individuals and organizations make better decisions.</p>


<hr class="wp-block-separator has-alpha-channel-opacity"/>


<p><strong>Key Purposes of Data Science:</strong></p>


<ol class="wp-block-list">
<li><strong>Understand Patterns and Trends</strong>
<ol class="wp-block-list">
<li>Example: An e-commerce company analyzes customer behavior to find which products are popular.</li>
</ol>
</li>


<li><strong>Make Predictions</strong>
<ol class="wp-block-list">
<li>Example: Weather apps use past climate data to predict rainfall or temperature.</li>
</ol>
</li>


<li><strong>Improve Decision-Making</strong>
<ol class="wp-block-list">
<li>Example: Hospitals use data to decide the best treatment plans for patients.</li>
</ol>
</li>


<li><strong>Automate Processes</strong>
<ol class="wp-block-list">
<li>Example: Self-driving cars use data science to automatically detect obstacles and decide actions.</li>
</ol>
</li>


<li><strong>Solve Complex Problems</strong>
<ol class="wp-block-list">
<li>Example: Banks use data science to detect fraudulent transactions instantly.</li>
</ol>
</li>


<li><strong>Personalize User Experience</strong></li>
</ol>


<p>Example: Netflix recommends shows based on your watch history.</p>


<h1 class="wp-block-heading">Basic Components of Python in Data Science</h1>


<h2 class="wp-block-heading">Python Basics</h2>


<p>Python is a popular programming language used in data science because of its <strong>simplicity</strong>, <strong>readability</strong>, and <strong>powerful libraries</strong>.</p>


<h3 class="wp-block-heading">Key Features:</h3>


<ul class="wp-block-list">
<li>Easy syntax (like English)</li>


<li>Large community support</li>


<li>Tons of libraries for data handling and analysis</li>
</ul>


<hr class="wp-block-separator has-alpha-channel-opacity"/>


<h2 class="wp-block-heading">Essential Python Components for Data Science</h2>


<h3 class="wp-block-heading"><strong>Variables and Data Types</strong></h3>


<p>Used to store and handle different types of data.</p>


<pre class="wp-block-preformatted">python</pre>


<pre class="wp-block-preformatted">CopyEdit</pre>


<pre class="wp-block-preformatted"><code>age = </code>25<code>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; </code># Integer<code></code></pre>


<pre class="wp-block-preformatted"><code>price = </code>99.99<code>&nbsp;&nbsp;&nbsp;&nbsp; </code># Float<code></code></pre>


<pre class="wp-block-preformatted"><code>name = </code>"Alice"<code>&nbsp;&nbsp;&nbsp; </code># String<code></code></pre>


<pre class="wp-block-preformatted"><code>is_valid = </code>True<code>&nbsp;&nbsp; </code># Boolean<code></code></pre>


<hr class="wp-block-separator has-alpha-channel-opacity"/>


<h3 class="wp-block-heading"><strong>Control Structures</strong></h3>


<p>Used to make decisions and repeat tasks.</p>


<pre class="wp-block-preformatted">python</pre>


<pre class="wp-block-preformatted">CopyEdit</pre>


<pre class="wp-block-preformatted"># If statement<code></code></pre>


<pre class="wp-block-preformatted">if<code> age &gt; </code>18<code>:</code></pre>


<pre class="wp-block-preformatted"><code>&nbsp;&nbsp;&nbsp; </code>print<code>(</code>"Adult"<code>)</code></pre>


<div class="wp-block-codemirror-blocks-code-block code-block"><pre class="CodeMirror" data-setting="{&quot;showPanel&quot;:true,&quot;languageLabel&quot;:&quot;language&quot;,&quot;fullScreenButton&quot;:true,&quot;copyButton&quot;:true,&quot;mode&quot;:&quot;htmlmixed&quot;,&quot;mime&quot;:&quot;text/html&quot;,&quot;theme&quot;:&quot;material&quot;,&quot;lineNumbers&quot;:true,&quot;styleActiveLine&quot;:true,&quot;lineWrapping&quot;:true,&quot;readOnly&quot;:true,&quot;fileName&quot;:&quot;HTML&quot;,&quot;language&quot;:&quot;HTML&quot;,&quot;maxHeight&quot;:&quot;400px&quot;,&quot;modeName&quot;:&quot;html&quot;}"> </pre></div>


<pre class="wp-block-preformatted"># Loop<code></code></pre>


<pre class="wp-block-preformatted">for<code> i </code>in<code> </code>range<code>(</code>5<code>):</code></pre>


<pre class="wp-block-preformatted"><code>&nbsp;&nbsp;&nbsp; </code>print<code>(i)</code></pre>


<hr class="wp-block-separator has-alpha-channel-opacity"/>


<h3 class="wp-block-heading"><strong>&nbsp;</strong></h3>


<h3 class="wp-block-heading"><strong>Functions</strong></h3>


<p>Reusable blocks of code.</p>


<pre class="wp-block-preformatted">python</pre>


<pre class="wp-block-preformatted">CopyEdit</pre>


<pre class="wp-block-preformatted">def<code> </code>greet<code>(</code>name<code>):</code></pre>


<pre class="wp-block-preformatted"><code>&nbsp;&nbsp;&nbsp; </code>return<code> </code>"Hello "<code> + name</code></pre>


<hr class="wp-block-separator has-alpha-channel-opacity"/>


<h3 class="wp-block-heading"><strong>Popular Libraries in Data Science</strong></h3>


<figure class="wp-block-table"><table class="has-fixed-layout"><thead><tr><td><strong>Library</strong></td><td><strong>Purpose</strong></td></tr></thead><tbody><tr><td><code>NumPy</code></td><td>Numerical operations (arrays, math)</td></tr><tr><td><code>Pandas</code></td><td>Data manipulation (tables, CSVs)</td></tr><tr><td><code>Matplotlib</code></td><td>Data visualization (charts/graphs)</td></tr><tr><td><code>Seaborn</code></td><td>Advanced data visualization</td></tr><tr><td><code>Scikit-learn</code></td><td>Machine learning models</td></tr></tbody></table></figure>


<hr class="wp-block-separator has-alpha-channel-opacity"/>


<h3 class="wp-block-heading"><strong>DataFrames (Pandas)</strong></h3>


<p>Used to store and manipulate data in table format (like Excel).</p>


<pre class="wp-block-preformatted">python</pre>


<pre class="wp-block-preformatted">CopyEdit</pre>


<pre class="wp-block-preformatted">import<code> pandas </code>as<code> pd</code></pre>


<div class="wp-block-codemirror-blocks-code-block code-block"><pre class="CodeMirror" data-setting="{&quot;showPanel&quot;:true,&quot;languageLabel&quot;:&quot;language&quot;,&quot;fullScreenButton&quot;:true,&quot;copyButton&quot;:true,&quot;mode&quot;:&quot;htmlmixed&quot;,&quot;mime&quot;:&quot;text/html&quot;,&quot;theme&quot;:&quot;material&quot;,&quot;lineNumbers&quot;:true,&quot;styleActiveLine&quot;:true,&quot;lineWrapping&quot;:true,&quot;readOnly&quot;:true,&quot;fileName&quot;:&quot;HTML&quot;,&quot;language&quot;:&quot;HTML&quot;,&quot;maxHeight&quot;:&quot;400px&quot;,&quot;modeName&quot;:&quot;html&quot;}"> </pre></div>


<pre class="wp-block-preformatted"><code>data = {</code>"Name"<code>: [</code>"John"<code>, </code>"Alice"<code>], </code>"Age"<code>: [</code>28<code>, </code>24<code>]}</code></pre>


<div class="wp-block-codemirror-blocks-code-block code-block"><pre class="CodeMirror" data-setting="{&quot;showPanel&quot;:true,&quot;languageLabel&quot;:&quot;language&quot;,&quot;fullScreenButton&quot;:true,&quot;copyButton&quot;:true,&quot;mode&quot;:&quot;htmlmixed&quot;,&quot;mime&quot;:&quot;text/html&quot;,&quot;theme&quot;:&quot;material&quot;,&quot;lineNumbers&quot;:true,&quot;styleActiveLine&quot;:true,&quot;lineWrapping&quot;:true,&quot;readOnly&quot;:true,&quot;fileName&quot;:&quot;HTML&quot;,&quot;language&quot;:&quot;HTML&quot;,&quot;maxHeight&quot;:&quot;400px&quot;,&quot;modeName&quot;:&quot;html&quot;}">df = pd.DataFrame(data)</pre></div>


<pre class="wp-block-preformatted">print<code>(df)</code></pre>


<hr class="wp-block-separator has-alpha-channel-opacity"/>


<h3 class="wp-block-heading">&nbsp;</h3>


<h3 class="wp-block-heading">&nbsp;</h3>


<h3 class="wp-block-heading">&nbsp;</h3>


<h3 class="wp-block-heading">&nbsp;<strong>Visualization</strong></h3>


<p>Used to see data trends using charts and graphs.</p>


<pre class="wp-block-preformatted">python</pre>


<pre class="wp-block-preformatted">CopyEdit</pre>


<pre class="wp-block-preformatted">import<code> matplotlib.pyplot </code>as<code> plt</code></pre>


<div class="wp-block-codemirror-blocks-code-block code-block"><pre class="CodeMirror" data-setting="{&quot;showPanel&quot;:true,&quot;languageLabel&quot;:&quot;language&quot;,&quot;fullScreenButton&quot;:true,&quot;copyButton&quot;:true,&quot;mode&quot;:&quot;htmlmixed&quot;,&quot;mime&quot;:&quot;text/html&quot;,&quot;theme&quot;:&quot;material&quot;,&quot;lineNumbers&quot;:true,&quot;styleActiveLine&quot;:true,&quot;lineWrapping&quot;:true,&quot;readOnly&quot;:true,&quot;fileName&quot;:&quot;HTML&quot;,&quot;language&quot;:&quot;HTML&quot;,&quot;maxHeight&quot;:&quot;400px&quot;,&quot;modeName&quot;:&quot;html&quot;}"> </pre></div>


<pre class="wp-block-preformatted"><code>x = [</code>1<code>, </code>2<code>, </code>3<code>]</code></pre>


<pre class="wp-block-preformatted"><code>y = [</code>2<code>, </code>4<code>, </code>6<code>]</code></pre>


<div class="wp-block-codemirror-blocks-code-block code-block"><pre class="CodeMirror" data-setting="{&quot;showPanel&quot;:true,&quot;languageLabel&quot;:&quot;language&quot;,&quot;fullScreenButton&quot;:true,&quot;copyButton&quot;:true,&quot;mode&quot;:&quot;htmlmixed&quot;,&quot;mime&quot;:&quot;text/html&quot;,&quot;theme&quot;:&quot;material&quot;,&quot;lineNumbers&quot;:true,&quot;styleActiveLine&quot;:true,&quot;lineWrapping&quot;:true,&quot;readOnly&quot;:true,&quot;fileName&quot;:&quot;HTML&quot;,&quot;language&quot;:&quot;HTML&quot;,&quot;maxHeight&quot;:&quot;400px&quot;,&quot;modeName&quot;:&quot;html&quot;}">plt.plot(x, y)</pre></div>


<div class="wp-block-codemirror-blocks-code-block code-block"><pre class="CodeMirror" data-setting="{&quot;showPanel&quot;:true,&quot;languageLabel&quot;:&quot;language&quot;,&quot;fullScreenButton&quot;:true,&quot;copyButton&quot;:true,&quot;mode&quot;:&quot;htmlmixed&quot;,&quot;mime&quot;:&quot;text/html&quot;,&quot;theme&quot;:&quot;material&quot;,&quot;lineNumbers&quot;:true,&quot;styleActiveLine&quot;:true,&quot;lineWrapping&quot;:true,&quot;readOnly&quot;:true,&quot;fileName&quot;:&quot;HTML&quot;,&quot;language&quot;:&quot;HTML&quot;,&quot;maxHeight&quot;:&quot;400px&quot;,&quot;modeName&quot;:&quot;html&quot;}">plt.show()</pre></div>


<div class="wp-block-codemirror-blocks-code-block code-block"><pre class="CodeMirror" data-setting="{&quot;showPanel&quot;:true,&quot;languageLabel&quot;:&quot;language&quot;,&quot;fullScreenButton&quot;:true,&quot;copyButton&quot;:true,&quot;mode&quot;:&quot;htmlmixed&quot;,&quot;mime&quot;:&quot;text/html&quot;,&quot;theme&quot;:&quot;material&quot;,&quot;lineNumbers&quot;:true,&quot;styleActiveLine&quot;:true,&quot;lineWrapping&quot;:true,&quot;readOnly&quot;:true,&quot;fileName&quot;:&quot;HTML&quot;,&quot;language&quot;:&quot;HTML&quot;,&quot;maxHeight&quot;:&quot;400px&quot;,&quot;modeName&quot;:&quot;html&quot;}"> </pre></div>


<hr class="wp-block-separator has-alpha-channel-opacity"/>


<h3 class="wp-block-heading"><strong>Machine Learning</strong></h3>


<p>Use libraries like <code>Scikit-learn</code> to train models on data.</p>


<pre class="wp-block-preformatted">python</pre>


<pre class="wp-block-preformatted">CopyEdit</pre>


<pre class="wp-block-preformatted">from<code> sklearn.linear_model </code>import<code> LinearRegression</code></pre>


<div class="wp-block-codemirror-blocks-code-block code-block"><pre class="CodeMirror" data-setting="{&quot;showPanel&quot;:true,&quot;languageLabel&quot;:&quot;language&quot;,&quot;fullScreenButton&quot;:true,&quot;copyButton&quot;:true,&quot;mode&quot;:&quot;htmlmixed&quot;,&quot;mime&quot;:&quot;text/html&quot;,&quot;theme&quot;:&quot;material&quot;,&quot;lineNumbers&quot;:true,&quot;styleActiveLine&quot;:true,&quot;lineWrapping&quot;:true,&quot;readOnly&quot;:true,&quot;fileName&quot;:&quot;HTML&quot;,&quot;language&quot;:&quot;HTML&quot;,&quot;maxHeight&quot;:&quot;400px&quot;,&quot;modeName&quot;:&quot;html&quot;}">model = LinearRegression()</pre></div>


<pre class="wp-block-preformatted"># model.fit(X, y)&nbsp; # Fit model to data</pre>


<p><strong>Note</strong> : Python is the <strong>foundation of modern data science</strong>, and knowing its basics — variables, control structures, functions, libraries — is key to starting a successful journey in data analysis and machine learning.</p>
<p>&lt;p&gt;The post <a rel="nofollow" href="https://sksiddha.com/unit-1-introduction-to-data-science/">Unit 1  Introduction to Data Science</a> first appeared on <a rel="nofollow" href="https://sksiddha.com">SANDIP KUMAR SINGH</a>.&lt;/p&gt;</p>
]]></content:encoded>
					
					<wfw:commentRss>https://sksiddha.com/unit-1-introduction-to-data-science/feed/</wfw:commentRss>
			<slash:comments>0</slash:comments>
		
		
			</item>
	</channel>
</rss>