#!/usr/bin/python # -*- encoding: utf-8 -*- import urllib import BeautifulSoup def coreString(tag): if isinstance(tag, BeautifulSoup.Tag): return coreString(tag.contents[0]) return tag if __name__ == '__main__': html_source=urllib.urlopen('http://search.naver.com/search.naver?sm=tab_hty&where=nexearch&query=%s'%(u'쿵야어드벤처'.encode('cp949'))) soup=BeautifulSoup.BeautifulSoup(html_source, fromEncoding='cp949') rank=soup('div', {'id': 'ranking'})[0] print coreString(rank.h3) ranking_list=rank('ul', {'id': 'ranking_list'})[0] for i in ranking_list('li'): print '\t'+coreString(i.span)+'.\t'+coreString(i.a)