/** * Twenty Twenty-Four functions and definitions * * @link https://developer.wordpress.org/themes/basics/theme-functions/ * * @package Twenty Twenty-Four * @since Twenty Twenty-Four 1.0 */ /** * Register block styles. */ if ( ! function_exists( 'twentytwentyfour_block_styles' ) ) : /** * Registers custom block styles. * * @since Twenty Twenty-Four 1.0 * @return void */ function twentytwentyfour_block_styles() { register_block_style( 'core/details', array( 'name' => 'arrow-icon-details', 'label' => __( 'Arrow icon', 'twentytwentyfour' ), /* * Styles for the custom Arrow icon style of the Details block */ 'inline_style' => ' .is-style-arrow-icon-details { padding-top: var(--wp--preset--spacing--10); padding-bottom: var(--wp--preset--spacing--10); } .is-style-arrow-icon-details summary { list-style-type: "\2193\00a0\00a0\00a0"; } .is-style-arrow-icon-details[open]>summary { list-style-type: "\2192\00a0\00a0\00a0"; }', ) ); register_block_style( 'core/post-terms', array( 'name' => 'pill', 'label' => __( 'Pill', 'twentytwentyfour' ), /* * Styles variation for post terms * https://github.com/WordPress/gutenberg/issues/24956 */ 'inline_style' => ' .is-style-pill a, .is-style-pill span:not([class], [data-rich-text-placeholder]) { display: inline-block; background-color: var(--wp--preset--color--base-2); padding: 0.375rem 0.875rem; border-radius: var(--wp--preset--spacing--20); } .is-style-pill a:hover { background-color: var(--wp--preset--color--contrast-3); }', ) ); register_block_style( 'core/list', array( 'name' => 'checkmark-list', 'label' => __( 'Checkmark', 'twentytwentyfour' ), /* * Styles for the custom checkmark list block style * https://github.com/WordPress/gutenberg/issues/51480 */ 'inline_style' => ' ul.is-style-checkmark-list { list-style-type: "\2713"; } ul.is-style-checkmark-list li { padding-inline-start: 1ch; }', ) ); register_block_style( 'core/navigation-link', array( 'name' => 'arrow-link', 'label' => __( 'With arrow', 'twentytwentyfour' ), /* * Styles for the custom arrow nav link block style */ 'inline_style' => ' .is-style-arrow-link .wp-block-navigation-item__label:after { content: "\2197"; padding-inline-start: 0.25rem; vertical-align: middle; text-decoration: none; display: inline-block; }', ) ); register_block_style( 'core/heading', array( 'name' => 'asterisk', 'label' => __( 'With asterisk', 'twentytwentyfour' ), 'inline_style' => " .is-style-asterisk:before { content: ''; width: 1.5rem; height: 3rem; background: var(--wp--preset--color--contrast-2, currentColor); clip-path: path('M11.93.684v8.039l5.633-5.633 1.216 1.23-5.66 5.66h8.04v1.737H13.2l5.701 5.701-1.23 1.23-5.742-5.742V21h-1.737v-8.094l-5.77 5.77-1.23-1.217 5.743-5.742H.842V9.98h8.162l-5.701-5.7 1.23-1.231 5.66 5.66V.684h1.737Z'); display: block; } /* Hide the asterisk if the heading has no content, to avoid using empty headings to display the asterisk only, which is an A11Y issue */ .is-style-asterisk:empty:before { content: none; } .is-style-asterisk:-moz-only-whitespace:before { content: none; } .is-style-asterisk.has-text-align-center:before { margin: 0 auto; } .is-style-asterisk.has-text-align-right:before { margin-left: auto; } .rtl .is-style-asterisk.has-text-align-left:before { margin-right: auto; }", ) ); } endif; add_action( 'init', 'twentytwentyfour_block_styles' ); /** * Enqueue block stylesheets. */ if ( ! function_exists( 'twentytwentyfour_block_stylesheets' ) ) : /** * Enqueues custom block stylesheets. * * @since Twenty Twenty-Four 1.0 * @return void */ function twentytwentyfour_block_stylesheets() { /** * The wp_enqueue_block_style() function allows us to enqueue a stylesheet * for a specific block. These will only get loaded when the block is rendered * (both in the editor and on the front end), improving performance * and reducing the amount of data requested by visitors. * * See https://make.wordpress.org/core/2021/12/15/using-multiple-stylesheets-per-block/ for more info. */ wp_enqueue_block_style( 'core/button', array( 'handle' => 'twentytwentyfour-button-style-outline', 'src' => get_parent_theme_file_uri( 'assets/css/button-outline.css' ), 'ver' => wp_get_theme( get_template() )->get( 'Version' ), 'path' => get_parent_theme_file_path( 'assets/css/button-outline.css' ), ) ); } endif; add_action( 'init', 'twentytwentyfour_block_stylesheets' ); /** * Register pattern categories. */ if ( ! function_exists( 'twentytwentyfour_pattern_categories' ) ) : /** * Registers pattern categories. * * @since Twenty Twenty-Four 1.0 * @return void */ function twentytwentyfour_pattern_categories() { register_block_pattern_category( 'twentytwentyfour_page', array( 'label' => _x( 'Pages', 'Block pattern category', 'twentytwentyfour' ), 'description' => __( 'A collection of full page layouts.', 'twentytwentyfour' ), ) ); } endif; add_action( 'init', 'twentytwentyfour_pattern_categories' ); Blog – SANDIP KUMAR SINGH https://sksiddha.com Assistant Professor Computer Science @RRIMT Lucknow Sun, 17 Aug 2025 18:46:33 +0000 en-US hourly 1 https://wordpress.org/?v=6.8.5 https://sksiddha.com/wp-content/uploads/2024/08/sksiddha-logo-100x100.png Blog – SANDIP KUMAR SINGH https://sksiddha.com 32 32 Unit 3 Feature Generation & Feature Selection https://sksiddha.com/unit-3-feature-generation-feature-selection/ https://sksiddha.com/unit-3-feature-generation-feature-selection/#respond Sun, 27 Jul 2025 15:17:55 +0000 https://sksiddha.com/?p=1544 Extracting Meaning from Data Using Data ScienceIn the digital age, data is everywhere—generated by smartphones, social media, websites,sensors, and machines. But data alone is not valuable until we can make sense of it. That’swhere data science comes in. It helps us extract meaning, patterns, and insights from rawinformation, transforming it into a powerful tool for […]

<p>The post Unit 3 Feature Generation & Feature Selection first appeared on SANDIP KUMAR SINGH.</p>

]]>
Extracting Meaning from Data Using Data Science
In the digital age, data is everywhere—generated by smartphones, social media, websites,
sensors, and machines. But data alone is not valuable until we can make sense of it. That’s
where data science comes in. It helps us extract meaning, patterns, and insights from raw
information, transforming it into a powerful tool for decision-making, innovation, and
understanding the world.

What Is Data Science?
Data science is an interdisciplinary field that combines statistics, computer science, and
domain knowledge to analyze data and generate actionable insights. It involves collecting,
cleaning, processing, analyzing, and visualizing data to answer questions or solve problems.
Think of it as a modern-day detective work—finding hidden clues in massive piles of
information to uncover the story behind the numbers.

How Data Science Extracts Meaning from Data
Let’s break down how data science turns data into knowledge:

  1. Data Collection
    Everything starts with data—collected from sources like apps, surveys, sensors, websites, or
    databases. For example, an e-commerce platform collects user clicks, purchase history, and
    product reviews.
  2. Data Cleaning and Preparation
    Raw data is often messy or incomplete. Data scientists clean it by removing errors, handling
    missing values, and formatting it correctly. This step is crucial for ensuring accurate analysis.
  3. Data Analysis and Exploration
    Using statistical techniques and tools like Python, R, or SQL, data scientists explore the data to
    find patterns, trends, and anomalies. For example, they might find that sales drop on certain
    weekdays or that users from a particular city spend more.
  4. Machine Learning and Modeling
    To make predictions or classifications, data scientists build machine learning models. These
    models “learn” from historical data to make future decisions—for instance, predicting customer
    churn or recommending products.
  5. Data Visualization
    Charts, graphs, and dashboards are used to visually present the results in a clear and
    understandable way. Tools like Tableau, Power BI, or Matplotlib help turn complex insights
    into stories anyone can understand.
  6. Interpretation and Decision-Making
    The final and most important step: drawing conclusions and making informed decisions.
    Whether it’s a business strategy, healthcare diagnosis, or policy development, the goal is to use
    data insights to act smarter and faster.
    Real-Life Example: Retail Industry
    Imagine you run an online clothing store. You want to know:
     Which products are most popular?
     What time of year do customers buy the most?
     What kind of promotions increase sales?
    Using data science, you can:
     Analyze customer behavior and trends
     Segment customers based on preferences
     Forecast future demand
     Personalize recommendations
     With these insights, you can optimize inventory, improve marketing, and enhance the
    customer experience.
    The Responsibility of Interpretation
    Extracting meaning from data comes with responsibility. Data must be interpreted ethically
    and accurately, keeping in mind privacy, bias, and fairness. Misinterpreted or biased data can
    lead to wrong decisions or unfair outcomes.
    Quote: Data is the new oil, but data science is the refinery that turns it into value.
    How to Get Customer Retention Using Data Science
    Here’s a step-by-step breakdown:
  7. Collect the Right Data
    Start with data related to customer behavior and interaction:
     Transactional data (purchases, frequency, amount)
     Engagement data (website visits, clicks, time spent)
     Support data (complaints, tickets raised, response time)
     Demographics (age, location, gender)
     Feedback and reviews
  8. Analyze Retention Metrics
    Use key metrics to understand how loyal your customers are:
     Churn rate = (Customers lost / Total customers) × 100
     Customer Lifetime Value (CLTV) = Revenue expected from a customer over the
    relationship
     Repeat purchase rate
     Time between purchases
    These metrics provide a baseline to monitor improvements.
  9. Predict Customer Churn (Who Might Leave?)
    Use machine learning models to predict churn (customers likely to stop buying). Common
    models:
     Logistic Regression
     Random Forest
     XGBoost
     Neural Networks
    Features used in churn models might include:
     Drop in usage frequency
     Late payments
     No logins for a long time
     Negative reviews or support tickets
    Label your past data as “churned” vs. “retained” to train supervised models.
  10. Segment Customers (Who Needs Attention?)
    Use clustering algorithms like K-Means or DBSCAN to segment customers:
     High-value loyal customers
     At-risk customers
     New customers with high potential
    This allows targeted retention strategies.
  11. Personalize Retention Strategies
    Once insights are clear, apply them:
     Personalized offers or loyalty rewards
     Timely reminders or re-engagement emails
     Better customer support for at-risk users
     Product recommendations based on browsing and purchase history
    Data science helps automate and optimize these actions.
  12. A/B Test Retention Campaigns
    Run A/B tests to see which retention strategies work best. Compare two customer groups:
     Group A: receives a 10% discount
     Group B: receives personalized recommendations
    Use statistical analysis to determine which group had better retention.
  13. Monitor and Improve Continuously
    Use dashboards and KPIs to track customer retention over time. Tools like:
     Power BI
     Tableau
     Google Data Studio
     Python (Plotly, Seaborn)
    Regular monitoring ensures early detection of churn patterns.
    Example Use Case: E-commerce
    An e-commerce company used data science to:
     Identify customers with declining purchases
     Predict churn with a Random Forest model
     Send targeted discounts to at-risk users
     Improve website speed based on behavior data
    Result: 15% increase in customer retention within 3 months.
    Brainstorming in Feature Generation (Feature Engineering)
    Feature generation is a critical step in data science and machine learning where we create
    new input variables (features) from raw data to improve model performance. Brainstorming in
    this context means creatively thinking about what extra or derived features can help the model
    better understand patterns and relationships in the data.
    What is Brainstorming in Feature Generation?
    It’s the idea generation phase where data scientists explore, discuss, and invent new features
    from existing data using:
     Domain knowledge
     Statistical thinking
     Business goals
     Logical combinations and transformations
    This helps models “learn” more from the data by giving them richer and more meaningful
    inputs

<p>The post Unit 3 Feature Generation & Feature Selection first appeared on SANDIP KUMAR SINGH.</p>

]]>
https://sksiddha.com/unit-3-feature-generation-feature-selection/feed/ 0
Unit 2 Data Analysis Process https://sksiddha.com/unit-2-data-analysis-process/ https://sksiddha.com/unit-2-data-analysis-process/#respond Sun, 27 Jul 2025 14:48:26 +0000 https://sksiddha.com/?p=1541 Introduction The Data Analytics Process is a structured method to explore, analyze, and interpret data to make better decisions. 1. Define the Problem / Objective Clearly understand what question you are trying to answer. Example: Why are sales dropping in the last 3 months? 2. Collect the Data Gather data from various sources like databases, […]

<p>The post Unit 2 Data Analysis Process first appeared on SANDIP KUMAR SINGH.</p>

]]>
Introduction

The Data Analytics Process is a structured method to explore, analyze, and interpret data to make better decisions.


1. Define the Problem / Objective

Clearly understand what question you are trying to answer.

Example: Why are sales dropping in the last 3 months?


2. Collect the Data

Gather data from various sources like databases, websites, sensors, or surveys.

Example: Collect sales reports, customer feedback, and market trends.


3. Clean and Prepare the Data

Remove duplicates, fix missing values, and organize data for analysis.

Example: Remove entries with no price or incorrect dates.


4. Analyze the Data

Use statistical tools and programming (like Python, Excel, or R) to find patterns and insights.

Example: Find which product categories have low sales and in which regions.


5. Interpret and Visualize Results

Create charts, graphs, and dashboards to explain findings in a clear way.

Example: Use a bar chart to show the drop in sales per region.


6. Make Decisions / Take Action

Use the insights to improve business strategies, operations, or performance.

Example: Increase marketing in low-performing areas or offer discounts on slow-selling items.

Notes: Data Analytics = Ask → Gather → Clean → Analyze → Visualize → Act

It’s all about turning raw data into smart decisions

 “knowledge check in data science”

To check your knowledge in data analytics, you can evaluate your understanding and skills through the following methods:


1. Concept Understanding

Test your knowledge of key topics like:

  • Types of data (structured/unstructured)
  • Data analytics process
  • Descriptive, diagnostic, predictive, and prescriptive analytics
  • Basic statistics (mean, median, standard deviation)

Example Question:
What is the difference between descriptive and predictive analytics?


2. Tools and Skills

Check your practical knowledge of tools like:

  • Excel (formulas, pivot tables)
  • SQL (queries to retrieve data)
  • Python or R (data handling with Pandas/Numpy)
  • Power BI or Tableau (creating dashboards)

Example Task:
Use Excel to create a dashboard showing sales trends by region.


3. Hands-on Projects

Practice with small datasets to solve real-world problems.

Example Activity:
Analyze a CSV file to find which product had the highest returns.

Exploratory Data Analysis (EDA) – In Brief

Exploratory Data Analysis (EDA) is the process of examining and visualizing data to understand its structure, patterns, and key features before applying any models or making decisions.


Purpose of EDA:

  • Identify patterns, relationships, and trends in the data
  • Detect missing values, outliers, or errors
  • Get a basic idea of how data is distributed
  • Choose the right analysis or model for further processing

Common EDA Techniques:

TechniquePurposeExample Tool
Summary StatisticsMean, Median, Mode, Standard DeviationPandas.describe() in Python
Data VisualizationPlot graphs for insightsMatplotlib, Seaborn
Correlation AnalysisFind relationships between variablescorr() function
Value CountsFrequency of categorical valuesvalue_counts() in Pandas

Example:

You have a dataset of student marks.

  • Use histograms to see score distribution
  • Use box plots to spot outliers
  • Use scatter plots to check relationships (e.g., hours studied vs. marks scored)

Notes: EDA helps you understand your data deeply before applying any machine learning or business decisions.

Type of Exploratory Data Analysis

A Quantitative Analysis Technique

B Graphical Analysis Technique

Quantitative Data Analysis

Quantitative Data Analysis is the process of analyzing numerical data (data that can be measured or counted) using statistical techniques to uncover patterns, relationships, and trends.


Key Features of Quantitative Data:

  • Expressed in numbers (e.g., marks, sales, age)
  • Can be analyzed using mean, median, standard deviation, correlation, etc.
  • Often displayed with charts like histograms, scatter plots, line graphs

Example Use-Case:

Suppose we have data on students’ hours studied and exam scores. We want to analyze the relationship between them.


Python Program for Quantitative Data Analysis

# Import necessary libraries

import pandas as pd

import numpy as np

import matplotlib.pyplot as plt

import seaborn as sns

# Sample dataset

data = {

‘Hours_Studied’: [1, 2, 3, 4, 5, 6, 7, 8, 9],

‘Exam_Score’: [35, 40, 45, 50, 55, 65, 70, 75, 80]

}

# Create DataFrame

df = pd.DataFrame(data)

# 1. View basic statistics

print(“Summary Statistics:\n”, df.describe())

# 2. Calculate correlation

correlation = df[‘Hours_Studied’].corr(df[‘Exam_Score’])

print(“\nCorrelation between hours studied and score:”, correlation)

# 3. Plot the data

plt.figure(figsize=(8,5))

sns.scatterplot(x=’Hours_Studied’, y=’Exam_Score’, data=df)

plt.title(‘Hours Studied vs Exam Score’)

plt.xlabel(‘Hours Studied’)

plt.ylabel(‘Exam Score’)

plt.grid(True)

plt.show()

Graphical Data Analytics

Graphical Analysis is a method of visualizing data using charts and graphs to identify trends, patterns, outliers, and relationships.

Below are the most commonly used graphical techniques

Python examples:

Histogram

Shows the distribution of a single numeric variable.

import seaborn as sns

import matplotlib.pyplot as plt

data = [55, 60, 61, 62, 65, 65, 66, 68, 70, 75, 80, 85, 90, 95]

sns.histplot(data, bins=5, kde=True)

plt.title(“Histogram of Test Scores”)

plt.xlabel(“Score”)

plt.ylabel(“Frequency”)

plt.show()

Scatter Plot

Purpose:

Shows the relationship between two numeric variables

import seaborn as sns

df = sns.load_dataset(“iris”)

sns.scatterplot(x=’sepal_length’, y=’sepal_width’, hue=’species’, data=df)

plt.title(“Sepal Length vs Width”)

plt.show()

Bar Chart

Purpose:

Compares categorical variables or grouped data.

import pandas as pd

import matplotlib.pyplot as plt

df = pd.DataFrame({

    ‘Department’: [‘IT’, ‘HR’, ‘Sales’, ‘Marketing’],

    ‘Employees’: [40, 15, 25, 30]

})

df.plot(kind=’bar’, x=’Department’, y=’Employees’, legend=False)

plt.title(“Number of Employees by Department”)

plt.ylabel(“Employees”)

plt.show()

Pie Chart

Purpose:

Displays the percentage or proportion of parts to a whole.

labels = [‘Python’, ‘Java’, ‘C++’, ‘JavaScript’]

sizes = [40, 25, 20, 15]

plt.pie(sizes, labels=labels, autopct=’%1.1f%%’)

plt.title(“Programming Language Usage”)

plt.show()

Line Chart

Purpose:

Shows trends over time.

import pandas as pd

import matplotlib.pyplot as plt

df = pd.DataFrame({

    ‘Month’: [‘Jan’, ‘Feb’, ‘Mar’, ‘Apr’],

    ‘Revenue’: [1000, 1500, 1300, 1700]

})

plt.plot(df[‘Month’], df[‘Revenue’], marker=’o’)

plt.title(“Monthly Revenue”)

plt.xlabel(“Month”)

plt.ylabel(“Revenue in USD”)

plt.grid(True)

plt.show()

Summary Table:

TechniqueBest ForPython Tool
HistogramData distributionseaborn, matplotlib
Box PlotOutliers, spread, quartilesseaborn
Scatter PlotRelationship between variablesseaborn, matplotlib
Bar ChartCategorical comparisonpandas, matplotlib
Pie ChartPart-to-whole visualizationmatplotlib
Line ChartTrend over timematplotlib, pandas


Data Analytics: Conclusion and Prediction

In data analytics, the final goal is to extract meaningful insights from data that can help in making informed decisions. Two important outcomes are:


Conclusion (Descriptive Analytics)

What is it?

A conclusion summarizes what the data tells us after analysis. It answers:

“What happened?” or “What is happening?”

Purpose:

  • Identify trends and patterns
  • Discover relationships or differences
  • Highlight key findings

 

Example:

After analyzing sales data for 12 months:

“Sales increased by 20% in the second half of the year, with the highest revenue in December.”

Techniques Used:

  • Charts & visualizations
  • Descriptive statistics (mean, median)
  • Correlation and comparison

Prediction (Predictive Analytics)

What is it?

A prediction uses past data and mathematical models to forecast future outcomes. It answers:

“What is likely to happen next?”

Purpose:

  • Estimate future values (e.g., sales, stock prices, user behavior)
  • Help in planning and decision-making

Example:

Using student attendance and study hours to predict:

“This student has a 90% chance of scoring above 75% in the exam.”

Techniques Used:

  • Machine Learning models (Linear Regression, Decision Trees, etc.)
  • Time Series forecasting
  • Predictive modeling libraries like scikit-learn

Conclusion vs. Prediction – Quick Comparison

FeatureConclusionPrediction
Based onExisting dataExisting + future (inference) data
AnswersWhat happenedWhat will happen
Examples“Most sales happened in June”“Sales will rise 10% next quarter”
ToolsSummary stats, EDA, visualsRegression, ML models, forecasting

Unit 3Feature Generation and

<p>The post Unit 2 Data Analysis Process first appeared on SANDIP KUMAR SINGH.</p>

]]>
https://sksiddha.com/unit-2-data-analysis-process/feed/ 0
Unit 1 Introduction to Data Science https://sksiddha.com/unit-1-introduction-to-data-science/ https://sksiddha.com/unit-1-introduction-to-data-science/#respond Sun, 27 Jul 2025 14:46:38 +0000 https://sksiddha.com/?p=1538 Introduction: Data science is the domain of study that deals with vast volumes of data using modern tools and techniques to find unseen patterns, derive meaningful information, and make business decisions. Data science uses complex machine learning algorithms to build predictive models. The Data Science Lifecycle Data science’s lifecycle consists of five distinct stages, each […]

<p>The post Unit 1 Introduction to Data Science first appeared on SANDIP KUMAR SINGH.</p>

]]>
Introduction: Data science is the domain of study that deals with vast volumes of data using modern tools and techniques to find unseen patterns, derive meaningful information, and make business decisions. Data science uses complex machine learning algorithms to build predictive models.

The Data Science Lifecycle Data science’s lifecycle consists of five distinct stages, each with its own tasks:

Capture: Data Acquisition, Data Entry, Signal Reception, Data Extraction. This stage involves gathering raw structured and unstructured data.

Maintain: Data Warehousing, Data Cleansing, Data Staging, Data Processing, Data Architecture. This stage covers taking the raw data and putting it in a form that can be used.

Process: Data Mining, Clustering/Classification, Data Modeling, Data Summarization. Data scientists take the prepared data and examine its patterns, ranges, and biases to determine how useful it will be in predictive analysis.

Analyze: Exploratory/Confirmatory, Predictive Analysis, Regression, Text Mining, Qualitative Analysis. Here is the real meat of the lifecycle. This stage involves performing the various analyses on the data.

Communicate: Data Reporting, Data Visualization, Business Intelligence, Decision Making. In this final step, analysts prepare the analyses in easily readable forms such as charts, graphs, and reports.

Data Science is the process of collecting, analyzing, and using data to make decisions or predictions. It combines math, statistics, programming, and domain knowledge.

Example:

  • An online store (like Amazon) uses data science to suggest products based on your previous searches and purchases.

Key Steps in Data Science

  1. Collect Data – Example: User clicks on a website.
  2. Clean Data – Fix errors or missing info.
  3. Analyze Data – Find patterns or trends.
  4. Build Models – Use machine learning to predict.
  5. Visualize Data – Show results using charts or graphs.

Where is Data Science Used?

Healthcare

  • Use: Predict diseases, analyze medical images.
  • Example: AI predicts chances of diabetes using patient data.

Finance

  • Use: Detect fraud, credit scoring.
  • Example: Banks use data science to approve or reject loans.

Marketing

  • Use: Customer segmentation, personalized ads.
  • Example: Facebook shows ads based on your interests.

Transport

  • Use: Optimize delivery routes.
  • Example: Zomato uses data science to assign the nearest delivery person.

Education

  • Use: Track student performance, personalize learning.
  • Example: Online courses suggest next lessons based on your progress.

Why Should You Learn It?

  • High demand in jobs (Data Scientist, Analyst).
  • Useful in every industry.
  • Helps solve real-life problems with smart solutions.

Purpose of Data Science

The main purpose of Data Science is to extract useful knowledge and insights from data to help individuals and organizations make better decisions.


Key Purposes of Data Science:

  1. Understand Patterns and Trends
    1. Example: An e-commerce company analyzes customer behavior to find which products are popular.
  2. Make Predictions
    1. Example: Weather apps use past climate data to predict rainfall or temperature.
  3. Improve Decision-Making
    1. Example: Hospitals use data to decide the best treatment plans for patients.
  4. Automate Processes
    1. Example: Self-driving cars use data science to automatically detect obstacles and decide actions.
  5. Solve Complex Problems
    1. Example: Banks use data science to detect fraudulent transactions instantly.
  6. Personalize User Experience

Example: Netflix recommends shows based on your watch history.

Basic Components of Python in Data Science

Python Basics

Python is a popular programming language used in data science because of its simplicity, readability, and powerful libraries.

Key Features:

  • Easy syntax (like English)
  • Large community support
  • Tons of libraries for data handling and analysis

Essential Python Components for Data Science

Variables and Data Types

Used to store and handle different types of data.

python
CopyEdit
age = 25          # Integer
price = 99.99     # Float
name = "Alice"    # String
is_valid = True   # Boolean

Control Structures

Used to make decisions and repeat tasks.

python
CopyEdit
# If statement
if age > 18:
    print("Adult")
 
# Loop
for i in range(5):
    print(i)

 

Functions

Reusable blocks of code.

python
CopyEdit
def greet(name):
    return "Hello " + name

Popular Libraries in Data Science

LibraryPurpose
NumPyNumerical operations (arrays, math)
PandasData manipulation (tables, CSVs)
MatplotlibData visualization (charts/graphs)
SeabornAdvanced data visualization
Scikit-learnMachine learning models

DataFrames (Pandas)

Used to store and manipulate data in table format (like Excel).

python
CopyEdit
import pandas as pd
 
data = {"Name": ["John", "Alice"], "Age": [28, 24]}
df = pd.DataFrame(data)
print(df)

 

 

 

 Visualization

Used to see data trends using charts and graphs.

python
CopyEdit
import matplotlib.pyplot as plt
 
x = [1, 2, 3]
y = [2, 4, 6]
plt.plot(x, y)
plt.show()
 

Machine Learning

Use libraries like Scikit-learn to train models on data.

python
CopyEdit
from sklearn.linear_model import LinearRegression
model = LinearRegression()
# model.fit(X, y)  # Fit model to data

Note : Python is the foundation of modern data science, and knowing its basics — variables, control structures, functions, libraries — is key to starting a successful journey in data analysis and machine learning.

<p>The post Unit 1 Introduction to Data Science first appeared on SANDIP KUMAR SINGH.</p>

]]>
https://sksiddha.com/unit-1-introduction-to-data-science/feed/ 0