AK Python
1.86K subscribers
39 photos
2 videos
11 files
236 links
Join here to unlock your programming ability
Download Telegram
New video check out..!
import numpy as np
import pandas as pd
import wordcloud


import os
print(os.listdir('A:\data'))

data = pd.read_csv('A:\data\spamdata.csv',encoding = 'latin-1')
data.shape
data.head()

#Dropping the unused columns
data = data.drop(['Unnamed: 2', 'Unnamed: 3', 'Unnamed: 4'], axis = 1)

# rename the columns

data = data.rename(columns = {'v1': 'Type', 'v2': 'Messages'})

data.columns

#Print Spam messages in messages

df = pd.DataFrame(data)
Spamfilter = df.loc[df['Type'] == 'spam']
print (Spamfilter)

#Print ham messages in Messages

df = pd.DataFrame(data)
hamfilter = df.loc[df['Type'] == 'ham']
print (hamfilter)

#Print the most common number of words used in all Messages

from wordcloud import WordCloud
import matplotlib.pyplot as plt

wordcloud = WordCloud(background_color = 'White', width = 1000, height = 1000, max_words = 50).generate(str(data['Messages']))

plt.rcParams['figure.figsize'] = (10, 10)
plt.title('Most Common words in the dataset', fontsize = 20)
plt.axis('off')
plt.imshow(wordcloud)

#Print the most number of words used in spam messages

from wordcloud import WordCloud

wordcloud = WordCloud(background_color = 'white', width = 1000, height = 1000, max_words = 50).generate(str([Spamfilter]))

plt.rcParams['figure.figsize'] = (10, 10)
plt.title('Most Common words in spam', fontsize = 20)
plt.axis('off')
plt.imshow(wordcloud)

#Print most number of words used in ham messages


from wordcloud import WordCloud

wordcloud = WordCloud(background_color = 'white', width = 1000, height = 1000, max_words = 50).generate(str([hamfilter]))

plt.rcParams['figure.figsize'] = (10, 10)
plt.title('Most Common words in ham', fontsize = 20)
plt.axis('off')
plt.imshow(wordcloud)
First part of the code ( Spam sms prediction Machine learning project )👆
import pandas as pd
from sklearn.model_selection import train_test_split
data=pd.read_csv("A:\data\spamdata.csv",encoding = 'latin-1')
data.head()

data = data.rename(columns = {'v1': 'Type', 'v2': 'Messages'})

data.columns

import re
import nltk

nltk.download('stopwords')
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer
corpus = []

for i in range(0, 5572):
review = re.sub('[^a-zA-Z]', ' ',data['Messages'][i])
review = review.lower()
review = review.split()
ps = PorterStemmer()

# stemming
review = [ps.stem(word) for word in review if not word in set(stopwords.words('english'))]

# joining them back with space
review = ' '.join(review)
corpus.append(review)


from sklearn.feature_extraction.text import CountVectorizer

cv = CountVectorizer()
x = cv.fit_transform(corpus).toarray()
y = data.iloc[:, 0]

print(x.shape)
print(y.shape)

from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.25, random_state = 42)

print(x_train.shape)
print(x_test.shape)
print(y_train.shape)
print(y_test.shape)


from sklearn.preprocessing import StandardScaler

sc = StandardScaler()

x_train = sc.fit_transform(x_train)
x_test = sc.transform(x_test)

from sklearn.ensemble import RandomForestClassifier
from pandas_confusion import ConfusionMatrix
import matplotlib.pyplot as plt

model = RandomForestClassifier()
model.fit(x_train, y_train)
y_pred = model.predict(x_test)

print("Training Accuracy :", model.score(x_train, y_train))
print("Testing Accuracy :", model.score(x_test, y_test))

confusion_matrix = ConfusionMatrix(y_test, y_pred)
print("Confusion matrix:\n%s" % confusion_matrix)


confusion_matrix.plot()
Second part of the code ( Spam Sms Detection Machine learning project )👆
spamdata.csv
491.9 KB
Dataset file
Suggestion: Use Jupyter notebook to run this Machine learning project⚠️
Keep support & share❤️
Check out our playlists ...!
New series 👍‼️
‼️ It's 5K now..! On YouTube
Thanks for your support & Share
#YKYG
Breath first Search ( BFS ) using which data structure?
Anonymous Quiz
25%
Stack
30%
Queue
21%
Linked lists
24%
Trees
New video : Today @ 8:00Pm👍
How many valid built-in data types in Python?
Anonymous Quiz
31%
4
28%
5
29%
6
12%
3