From 3af09032b1937d2ccebf0e6e8973a4ece16c22c3 Mon Sep 17 00:00:00 2001 From: Berlin Date: Sun, 7 Oct 2018 17:27:22 +0800 Subject: [PATCH 1/2] fix parser and connection --- xgoogle/browser.py | 12 ++++++------ xgoogle/search.py | 6 +++--- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/xgoogle/browser.py b/xgoogle/browser.py index c7d2618..df4177a 100755 --- a/xgoogle/browser.py +++ b/xgoogle/browser.py @@ -81,13 +81,13 @@ def __init__(self, user_agent=BROWSERS[0], debug=False, use_pool=False): self.debug = debug def get_page(self, url, data=None): - handlers = [PoolHTTPHandler] - opener = urllib2.build_opener(*handlers) - if data: data = urllib.urlencode(data) - request = urllib2.Request(url, data, self.headers) + opener = urllib2.build_opener() + opener.addheaders = [('User-agent', 'Mozilla/5.0')] + try: - response = opener.open(request) - return response.read() + response = opener.open(url) + result = response.read() + return result except (urllib2.HTTPError, urllib2.URLError), e: raise BrowserError(url, str(e)) except (socket.error, socket.sslerror), msg: diff --git a/xgoogle/search.py b/xgoogle/search.py index 98b681e..1b0a534 100755 --- a/xgoogle/search.py +++ b/xgoogle/search.py @@ -205,7 +205,6 @@ def _get_results_page(self): page = self.browser.get_page(safe_url) except BrowserError, e: raise SearchError, "Failed getting %s: %s" % (e.url, e.error) - return BeautifulSoup(page) def _extract_info(self, soup): @@ -226,7 +225,7 @@ def _extract_info(self, soup): return {'from': int(matches.group(1)), 'to': int(matches.group(2)), 'total': int(matches.group(3))} def _extract_results(self, soup): - results = soup.findAll('li', {'class': 'g'}) + results = soup.findAll('div', {'class': 'g'}) ret_res = [] for result in results: eres = self._extract_result(result) @@ -237,7 +236,8 @@ def _extract_results(self, soup): def _extract_result(self, result): title, url = self._extract_title_url(result) desc = self._extract_description(result) - if not title or not url or not desc: + desc = desc is None and desc or "" + if not title or not url : return None return SearchResult(title, url, desc) From 0c84eba9967208609ebf19ef1face05bd5dde2c8 Mon Sep 17 00:00:00 2001 From: Berlin Date: Sun, 7 Oct 2018 17:28:43 +0800 Subject: [PATCH 2/2] refine --- xgoogle/browser.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/xgoogle/browser.py b/xgoogle/browser.py index df4177a..1afb369 100755 --- a/xgoogle/browser.py +++ b/xgoogle/browser.py @@ -86,8 +86,7 @@ def get_page(self, url, data=None): try: response = opener.open(url) - result = response.read() - return result + return response.read() except (urllib2.HTTPError, urllib2.URLError), e: raise BrowserError(url, str(e)) except (socket.error, socket.sslerror), msg: