본문 바로가기
Python/0x01-url

html parser class ][ security focus title과 link정보 가져오기

by SpeeDr00t 2017. 10. 5.
반응형

html parser class ][ security focus title과 link정보 가져오기


# -*- coding: utf-8 -*-
# 
# write by kyoung chip , jang
#
# python 3.6
# pip list
# beautifulsoup4 requests
# 
# pip install bs4
# pip install requests
#
import requests
from bs4 import BeautifulSoup

class CHtmlParser :

    def __init__( self ) :

        self.req = ''	

		
		
    def login( self, url , user , passwd ) :
	
        self.req = requests.get( url , auth=(user , passwd) )
        print( self.req.text )
		
		
		
    def getHtml( self , url   ) :

        self.req = requests.get( url  )	
		
        '''		
        print("status code %s"  % ( self.req.status_code ) )
        print("headers %s " % ( self.req.headers['content-type'] ) )
        print("encoding %s " % ( self.req.encoding ) )
        print("text %s " % ( self.req.text ) )
        print("json %s " % ( self.req.json ) )
        '''
		
        return BeautifulSoup( self.req.text, 'html.parser')    
		
        		

		
class CSecurityFocus :

    def __init__ ( self ) :
	
        self.html = CHtmlParser()
        self.response = ''
	
   		
    def getHtml( self, url ) :
	
        self.response = self.html.getHtml( url )	

		
    def getLink( self ) :	
	
        link = self.response.select('#tabs > ul > li > a')                
		
        for keyword in link :

            print( str(keyword)[str(keyword).find('=')+2:str(keyword).find('">')].replace("/bid/","http://www.securityfocus.com/bid/")   )


    def getVulnInfo( self ) :	

        title = self.response.select('#vulnerability > span')
   
        print( str(title)[str(title).find('title">')+7:str(title).find('</span')] )
   
		
    def doWork( self ) :   
   
        self.getHtml("http://www.securityfocus.com/bid/1/info")

        self.getVulnInfo()
        self.getLink()		

		
if __name__ == '__main__':

    r = CSecurityFocus()
    r.doWork()


반응형

'Python > 0x01-url' 카테고리의 다른 글

wget  (0) 2017.10.06
html parser class ][ 보안뉴스 가장 많이 본 뉴스 keyword 가져오기  (0) 2017.10.03
request class  (0) 2017.10.03
url parser ][ CUrlParser  (0) 2017.10.03