반응형
html parser class ][ security focus title과 link정보 가져오기
# -*- coding: utf-8 -*- # # write by kyoung chip , jang # # python 3.6 # pip list # beautifulsoup4 requests # # pip install bs4 # pip install requests # import requests from bs4 import BeautifulSoup class CHtmlParser : def __init__( self ) : self.req = '' def login( self, url , user , passwd ) : self.req = requests.get( url , auth=(user , passwd) ) print( self.req.text ) def getHtml( self , url ) : self.req = requests.get( url ) ''' print("status code %s" % ( self.req.status_code ) ) print("headers %s " % ( self.req.headers['content-type'] ) ) print("encoding %s " % ( self.req.encoding ) ) print("text %s " % ( self.req.text ) ) print("json %s " % ( self.req.json ) ) ''' return BeautifulSoup( self.req.text, 'html.parser') class CSecurityFocus : def __init__ ( self ) : self.html = CHtmlParser() self.response = '' def getHtml( self, url ) : self.response = self.html.getHtml( url ) def getLink( self ) : link = self.response.select('#tabs > ul > li > a') for keyword in link : print( str(keyword)[str(keyword).find('=')+2:str(keyword).find('">')].replace("/bid/","http://www.securityfocus.com/bid/") ) def getVulnInfo( self ) : title = self.response.select('#vulnerability > span') print( str(title)[str(title).find('title">')+7:str(title).find('</span')] ) def doWork( self ) : self.getHtml("http://www.securityfocus.com/bid/1/info") self.getVulnInfo() self.getLink() if __name__ == '__main__': r = CSecurityFocus() r.doWork()
반응형
'Python > 0x01-url' 카테고리의 다른 글
wget (0) | 2017.10.06 |
---|---|
html parser class ][ 보안뉴스 가장 많이 본 뉴스 keyword 가져오기 (0) | 2017.10.03 |
request class (0) | 2017.10.03 |
url parser ][ CUrlParser (0) | 2017.10.03 |