import numpy as np
import pandas as pd
import wordcloud
import os
print(os.listdir('A:\data'))
data = pd.read_csv('A:\data\spamdata.csv',encoding = 'latin-1')
data.shape
data.head()
#Dropping the unused columns
data = data.drop(['Unnamed: 2', 'Unnamed: 3', 'Unnamed: 4'], axis = 1)
# rename the columns
data = data.rename(columns = {'v1': 'Type', 'v2': 'Messages'})
data.columns
#Print Spam messages in messages
df = pd.DataFrame(data)
Spamfilter = df.loc[df['Type'] == 'spam']
print (Spamfilter)
#Print ham messages in Messages
df = pd.DataFrame(data)
hamfilter = df.loc[df['Type'] == 'ham']
print (hamfilter)
#Print the most common number of words used in all Messages
from wordcloud import WordCloud
import matplotlib.pyplot as plt
wordcloud = WordCloud(background_color = 'White', width = 1000, height = 1000, max_words = 50).generate(str(data['Messages']))
plt.rcParams['figure.figsize'] = (10, 10)
plt.title('Most Common words in the dataset', fontsize = 20)
plt.axis('off')
plt.imshow(wordcloud)
#Print the most number of words used in spam messages
from wordcloud import WordCloud
wordcloud = WordCloud(background_color = 'white', width = 1000, height = 1000, max_words = 50).generate(str([Spamfilter]))
plt.rcParams['figure.figsize'] = (10, 10)
plt.title('Most Common words in spam', fontsize = 20)
plt.axis('off')
plt.imshow(wordcloud)
#Print most number of words used in ham messages
from wordcloud import WordCloud
wordcloud = WordCloud(background_color = 'white', width = 1000, height = 1000, max_words = 50).generate(str([hamfilter]))
plt.rcParams['figure.figsize'] = (10, 10)
plt.title('Most Common words in ham', fontsize = 20)
plt.axis('off')
plt.imshow(wordcloud)
import pandas as pd
import wordcloud
import os
print(os.listdir('A:\data'))
data = pd.read_csv('A:\data\spamdata.csv',encoding = 'latin-1')
data.shape
data.head()
#Dropping the unused columns
data = data.drop(['Unnamed: 2', 'Unnamed: 3', 'Unnamed: 4'], axis = 1)
# rename the columns
data = data.rename(columns = {'v1': 'Type', 'v2': 'Messages'})
data.columns
#Print Spam messages in messages
df = pd.DataFrame(data)
Spamfilter = df.loc[df['Type'] == 'spam']
print (Spamfilter)
#Print ham messages in Messages
df = pd.DataFrame(data)
hamfilter = df.loc[df['Type'] == 'ham']
print (hamfilter)
#Print the most common number of words used in all Messages
from wordcloud import WordCloud
import matplotlib.pyplot as plt
wordcloud = WordCloud(background_color = 'White', width = 1000, height = 1000, max_words = 50).generate(str(data['Messages']))
plt.rcParams['figure.figsize'] = (10, 10)
plt.title('Most Common words in the dataset', fontsize = 20)
plt.axis('off')
plt.imshow(wordcloud)
#Print the most number of words used in spam messages
from wordcloud import WordCloud
wordcloud = WordCloud(background_color = 'white', width = 1000, height = 1000, max_words = 50).generate(str([Spamfilter]))
plt.rcParams['figure.figsize'] = (10, 10)
plt.title('Most Common words in spam', fontsize = 20)
plt.axis('off')
plt.imshow(wordcloud)
#Print most number of words used in ham messages
from wordcloud import WordCloud
wordcloud = WordCloud(background_color = 'white', width = 1000, height = 1000, max_words = 50).generate(str([hamfilter]))
plt.rcParams['figure.figsize'] = (10, 10)
plt.title('Most Common words in ham', fontsize = 20)
plt.axis('off')
plt.imshow(wordcloud)