Data Science
https://tinyurl.com/Allinonebhai
http://tinyurl.com/tybscds
pip install numpy ,pandas ,matplotlib ,scikit-learn
pip install statsmodels
Logistic Regression
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, classification_report, confusion_matrix
iris = load_iris()
X = iris.data
y = (iris.target == 0).astype(int)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
logistic_model = LogisticRegression()
logistic_model.fit(X_train, y_train)
y_pred_logistic = logistic_model.predict(X_test)
print("Logistic Regression Metrics")
print("Accuracy:", accuracy_score(y_test, y_pred_logistic))
print("Precision:", precision_score(y_test, y_pred_logistic))
print("Recall:", recall_score(y_test, y_pred_logistic))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred_logistic)
print("\nClassification Report:\n", classification_report(y_test, y_pred_logistic ))
Linear Regression ( Change the data value as given )
import pandas as pd
from sklearn.linear_model import LinearRegression
data = {
"Experience": [2, 10, 4, 20, 8, 12, 22],
"Salary": [30000, 95000, 45000, 178000, 84000, 120000, 200000]
}
df = pd.DataFrame(data)
X = df[['Experience']]
y = df['Salary']
model = LinearRegression()
model.fit(X, y)
y_pred = model.predict(X)
print("First few predictions:")
for i in range(5):
print(f"Years of Experience: {X.iloc[i, 0]} → Predicted Salary: Rs. {y_pred[i]:.0f}")
Linear Regression ( for predicting petal.width on petal.length.)
from sklearn.datasets import load_iris
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt
data = load_iris()
X = data.data[:, 2].reshape(-1, 1)
y = data.data[:, 3]
model = LinearRegression().fit(X, y)
y_pred = model.predict(X)
print("R² Score:", model.score(X, y))
plt.scatter(X, y, color='blue')
plt.plot(X, y_pred, color='red')
plt.xlabel("Petal Length (cm)")
plt.ylabel("Petal Width (cm)")
plt.title("Linear Regression")
plt.show()
Create CSV file from given data.
import pandas as pd
data = {
'Rollno': [1, 2, 3, 4, 5, 6, 7, 8, 9],
'Name': ['Sudin', 'Shaima', 'Raina', 'Paul', 'Rahul', 'Gopal', 'Yatin', 'Jim', 'Nima'],
'Age': [44, 46, 27, 38, 46, None, 59, 36, 45],
'Marks': [47, 86, 45, None, 45, 67, 45, 34, 32],
'Class': ['FY', 'SY', 'TY', 'SY', 'FY', 'TY', 'FY', 'FY', 'TY']
}
df = pd.DataFrame(data)
df['Age'] = df['Age'].fillna(df['Age'].mean().round())
df['Marks'] = df['Marks'].fillna(df['Marks'].median())
Q1, Q3 = df['Marks'].quantile([0.25, 0.75])
IQR = Q3 - Q1
df = df[df['Marks'].between(Q1 - 1.5 * IQR, Q3 + 1.5 * IQR)]
print("\nCleaned Data:\n", df)
Feature Scaling Standardization/normalization
#Feature Scaling STrandardization/normalization
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.preprocessing import MinMaxScaler, StandardScaler
iris = load_iris()
df = pd.DataFrame(iris.data, columns = iris.feature_names)
print(df.head())
minmax = MinMaxScaler().fit_transform(df.head())
print("\nData after Min-Max scaling:\n", minmax)
standard = StandardScaler().fit_transform(df.head())
print("\nData after Standard scaling:\n",standard)
Principal component Analysis (PCA)
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
iris = load_iris()
X = iris.data
y = iris.target
X_scaled = StandardScaler().fit_transform(X)
pca = PCA()
X_pca = pca.fit_transform(X_scaled)
explained = np.cumsum(pca.explained_variance_ratio_)
plt.plot(explained, marker='o')
plt.title("Cumulative Explained Variance")
plt.xlabel("Number of Components")
plt.ylabel("Explained Variance")
plt.grid(True)
plt.show()
n_components = np.argmax(explained >= 0.95) + 1
print("Components needed to explain 95% variance:", n_components)
X_reduced = PCA(n_components=n_components).fit_transform(X_scaled)
plt.scatter(X_reduced[:, 0], X_reduced[:, 1], c=y, cmap='viridis')
plt.title("PCA (Reduced to 2D)")
plt.xlabel("PC1")
plt.ylabel("PC2")
plt.colorbar(label="Target")
plt.grid(True)
plt.show()
One Sampled T-Test
from scipy import stats
scores = [72, 88, 64, 74, 67, 79, 85, 75, 89, 77]
hypothesized_mean = 70
t_stat, p_value = stats.ttest_1samp(scores, hypothesized_mean)
print("T-statistic:", t_stat)
print("P-value:", p_value)
Comments
Post a Comment