AK Python

import numpy as np
import pandas as pd
import wordcloud

import os
print(os.listdir('A:\data'))

data = pd.read_csv('A:\data\spamdata.csv',encoding = 'latin-1')
data.shape
data.head()

#Dropping the unused columns
data = data.drop(['Unnamed: 2', 'Unnamed: 3', 'Unnamed: 4'], axis = 1)

# rename the columns

data = data.rename(columns = {'v1': 'Type', 'v2': 'Messages'})

data.columns

#Print Spam messages in messages

df = pd.DataFrame(data)
Spamfilter = df.loc[df['Type'] == 'spam']
print (Spamfilter)

#Print ham messages in Messages

df = pd.DataFrame(data)
hamfilter = df.loc[df['Type'] == 'ham']
print (hamfilter)

#Print the most common number of words used in all Messages

from wordcloud import WordCloud
import matplotlib.pyplot as plt

wordcloud = WordCloud(background_color = 'White', width = 1000, height = 1000, max_words = 50).generate(str(data['Messages']))

plt.rcParams['figure.figsize'] = (10, 10)
plt.title('Most Common words in the dataset', fontsize = 20)
plt.axis('off')
plt.imshow(wordcloud)

#Print the most number of words used in spam messages

from wordcloud import WordCloud

wordcloud = WordCloud(background_color = 'white', width = 1000, height = 1000, max_words = 50).generate(str([Spamfilter]))

plt.rcParams['figure.figsize'] = (10, 10)
plt.title('Most Common words in spam', fontsize = 20)
plt.axis('off')
plt.imshow(wordcloud)

#Print most number of words used in ham messages

from wordcloud import WordCloud

wordcloud = WordCloud(background_color = 'white', width = 1000, height = 1000, max_words = 50).generate(str([hamfilter]))

plt.rcParams['figure.figsize'] = (10, 10)
plt.title('Most Common words in ham', fontsize = 20)
plt.axis('off')
plt.imshow(wordcloud)

2.5K views03:37

About

Blog

Apps

Platform