Sunday 4 May 2014

Email Scrapers and Spammers

Decided to write a web spider the other day and ended up playing with pythons "smtplib" library instead. These are some of the scripts I came up with.

Scrape a page for emails and send them an email.
This script will scrape a page you feed it for any emails and then send an email to any emails it finds. It uses googles SMTP servers so you need to input your email and password in the server login section. It first downloads the source code of the webpage using "urllib" and then parses it through the lxml parser using "Beautiful soup" and creates a BS object. It then uses a regex to filter out the emails and write them to a file. Next it does some cleanup on the email list and adds each email to an array. That array is used in the "to" field of the email. Next it creates the emails MIME headers to properly format the message and then sends the email. Bit of unnecessary repetition in this script with creating a list then adding it to an array but I did not start this script with this intention. Was just playing around and this is what came out :P. 

Example usage: ./auto_email.py http://www.example.com

#!/usr/bin/python

import urllib
from bs4 import BeautifulSoup
import sys
import re
import os
import smtplib
from email.MIMEText import MIMEText

###This script scrapes a webpage given as a command line argument for email adresses. It then
###Sends an email to each one.

def writeToFile(result):
emails = 0
file = open(emailLists, "a")
for line in result:
file.write(line + "\n")
emails +=1
file.close
print str(emails) + " results written to file"

def emailSpam(bs):
#create a email filter regex
email_re = re.compile(r'([\w\.,]+@[\w\.,]+\.\w+)')
global result
result = []
#search through bs using the email filter
result += email_re.findall(bs.text)
#write the results to file
writeToFile(result)

def sendEmails():
os.system("sort -u -o emailList emailList")
emailaddrs = []
for line in open(emailLists, "r").readlines():
        emailaddrs.append(line)

fromaddr = "anonymous@gmail.com"
toaddr = emailaddrs
TEXT = """Another multi message
with python"""
SUBJECT = "Test message"
message = 'Subject: %s\n\n%s' % (SUBJECT, TEXT)
SERVER = "smtp.gmail.com:587"

print "Connecting to mail server"
server = smtplib.SMTP(SERVER)
server.set_debuglevel(True)
server.ehlo()
print "Starting TTLS"
server.starttls()
print "TTLS started successfully"
print "logging in to GMAIL"
server.login("fakeemail@gmail.com", "fakepassword")
print "Sending emails"
server.sendmail(fromaddr, toaddr, message)
server.quit()
print "\n\nComplete"


#Get the html and parse it through BS-LXML
html = urllib.urlopen(sys.argv[1])

os.system("rm emailList")
print "Removed old list"

bs = BeautifulSoup(html.read(), "lxml")
global emailLists
emailLists = "emailList"

#Start the script
emailSpam(bs)
sendEmails()




Scrape a webpage for emails and write them to a file
Same as the above but it does not send the emails. It just writes them to a file for later use. 

#!/usr/bin/python

import urllib
from bs4 import BeautifulSoup
import sys
import mechanize
import re
import os
import smtplib
from email.MIMEText import MIMEText

###This script scrapes a webpage given as a command line argument for email adresses.

def writeToFile(result):
        emails = 0
        file = open(emailLists, "a")
        for line in result:
                file.write(line + "\n")
                emails +=1
        file.close
        print str(emails) + " results written to file"

def emailSpam(bs):
        #create a email filter regex
        email_re = re.compile(r'([\w\.,]+@[\w\.,]+\.\w+)')
        global result
        result = []
        #search through bs using the email filter
        result += email_re.findall(bs.text)
        #write the results to file
        writeToFile(result)

#Get the html and parse it through BS-LXML
html = urllib.urlopen(sys.argv[1])
bs = BeautifulSoup(html.read(), "lxml")
global emailLists
emailLists = "emailList"

#Start the script
emailSpam(bs)



Email a list of contacts.
This script takes an email list as input and emails each one. 

#!/usr/bin/python

#This bot takes the email list as command line argument. 

import smtplib
from email.MIMEText import MIMEText
import sys

lineCount = 0
emailaddrs = []
for line in open(sys.argv[1], "r").readlines():
        emailaddrs.append(line)
        lineCount += 1

fromaddr = "anonymous@gmail.com"
toaddr = emailaddrs
TEXT = """This is a test message"""
SUBJECT = "It works :D"
message = 'Subject: %s\n\n%s' % (SUBJECT, TEXT)
SERVER = "smtp.gmail.com:587"

print "Connecting to mail server"
server = smtplib.SMTP(SERVER)
server.set_debuglevel(True)
server.ehlo()
print "Starting TTLS"
server.starttls()
print "TTLS started successfully"
print "logging in to GMAIL"
server.login("fakeemail@gmail.com", "fakepassword")
print "Sending %s emails"%(str(lineCount))
server.sendmail(fromaddr, toaddr, message)
server.quit()
print "\n\nComplete"





No comments:

Post a Comment