pkrumins · kenorb · Aug 1, 2012 · Aug 1, 2012 · Mar 8, 2013 · Mar 8, 2013
diff --git a/xgoogle/browser.py b/xgoogle/browser.py
@@ -20,6 +20,7 @@
     #  awk -F\" '{B[$6]++} END { for (b in B) { print B[b] ": " b } }' |
     #  sort -rn |
     #  head -20
+    'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 2.0.50727)',
     'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.6) Gecko/2009011913 Firefox/3.0.6',
     'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.5; en-US; rv:1.9.0.6) Gecko/2009011912 Firefox/3.0.6',
     'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.6) Gecko/2009011913 Firefox/3.0.6 (.NET CLR 3.5.30729)',
@@ -32,7 +33,6 @@
     'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.5) Gecko/2008121621 Ubuntu/8.04 (hardy) Firefox/3.0.5',
     'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_5_6; en-us) AppleWebKit/525.27.1 (KHTML, like Gecko) Version/3.2.1 Safari/525.27.1',
     'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322)',
-    'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 2.0.50727)',
     'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)'
 )
 

diff --git a/xgoogle/search.py b/xgoogle/search.py
@@ -237,7 +237,7 @@ def _extract_results(self, soup):
     def _extract_result(self, result):
         title, url = self._extract_title_url(result)
         desc = self._extract_description(result)
-        if not title or not url or not desc:
+        if not title or not url:
             return None
         return SearchResult(title, url, desc)
 
@@ -250,7 +250,10 @@ def _extract_title_url(self, result):
         title = ''.join(title_a.findAll(text=True))
         title = self._html_unescape(title)
         url = title_a['href']
-        match = re.match(r'/url\?q=(http[^&]+)&', url)
+        match = re.match(r'/url\?q=((http|ftp|https)[^&]+)&', url)
+        if match:
+            url = urllib.unquote(match.group(1))
+        match = re.match(r'/interstitial\?url=((http|ftp|https)[^&]+)&', url)
         if match:
             url = urllib.unquote(match.group(1))
         return title, url
@@ -260,6 +263,10 @@ def _extract_description(self, result):
         if not desc_div:
             self._maybe_raise(ParseError, "Description tag in Google search result was not found", result)
             return None
+        desc_span = desc_div.find('span', {'class': 'st'})
+        if not desc_span:
+            self._maybe_raise(ParseError, "Description tag in Google search result was not found", result)
+            return None
 
         desc_strs = []
         def looper(tag):
@@ -275,8 +282,8 @@ def looper(tag):
                 except AttributeError:
                     desc_strs.append(t)
 
-        looper(desc_div)
-        looper(desc_div.find('wbr')) # BeautifulSoup does not self-close <wbr>
+        looper(desc_span)
+        looper(desc_span.find('wbr')) # BeautifulSoup does not self-close <wbr>
 
         desc = ''.join(s for s in desc_strs if s)
         return self._html_unescape(desc)
@@ -322,7 +329,8 @@ def _extract_info(self, soup):
         return {'from': int(matches.group(1)), 'to': int(matches.group(2)), 'total': int(matches.group(3))}
 
     def _extract_results(self, soup):
-        results = soup.findAll('p', {'class': 'g'})
+        #results = soup.findAll('p', {'class': 'g'})
+        results = soup.findAll('li','g')
         ret_res = []
         for result in results:
             eres = self._extract_result(result)
@@ -352,7 +360,8 @@ def _extract_title_url(self, result):
         return title, url
 
     def _extract_description(self, result):
-        desc_td = result.findNext('td')
+        #desc_td = result.findNext('td')
+        desc_div = result.find('span', 'st'))
         if not desc_td:
             self._maybe_raise(ParseError, "Description tag in Google search result was not found", result)
             return None