Skip to content

Commit

Permalink
Use requests_html to implement ComposerPopularityFeature
Browse files Browse the repository at this point in the history
This had stopped working because Google's response to the static UserAgent we were giving lacked result counts.
  • Loading branch information
jacobtylerwalls committed May 13, 2021
1 parent f4c1f1e commit dddbb93
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 12 deletions.
20 changes: 8 additions & 12 deletions music21/features/native.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
import math
from typing import Optional

from urllib.request import Request, urlopen
from urllib.parse import urlencode


Expand Down Expand Up @@ -847,20 +846,19 @@ class ComposerPopularity(featuresModule.FeatureExtractor):
Requires an internet connection.
Changed in v7 -- implementation uses the package `requests_html`, which must
be installed.
>>> #_DOCS_SHOW s = corpus.parse('mozart/k155', 2)
>>> s = stream.Score() #_DOCS_HIDE
>>> s.append(metadata.Metadata()) #_DOCS_HIDE
>>> s.metadata.composer = 'W.A. Mozart' #_DOCS_HIDE
>>> fe = features.native.ComposerPopularity(s)
>>> #_DOCS_SHOW fe.extract().vector[0] > 5.0
>>> True #_DOCS_HIDE
>>> fe.extract().vector[0] > 5.0
True
'''
id = 'MD1'
googleResultsRE = re.compile(r'([\d,]+) results')
_M21UserAgent = ('Mozilla/5.0 (Windows; U; Windows NT 5.1; it; rv:1.8.1.11) '
+ 'Gecko/20071127 Firefox/2.0.0.11')

def __init__(self, dataOrStream=None, *arguments, **keywords):
super().__init__(dataOrStream=dataOrStream, *arguments, **keywords)
Expand Down Expand Up @@ -888,13 +886,11 @@ def process(self):
params = urlencode(paramsBasic)
urlStr = f'http://www.google.com/search?{params}'

headers = {'User-Agent': self._M21UserAgent}
req = Request(urlStr, headers=headers)
with urlopen(req) as response:
the_page = response.read()
the_page = the_page.decode('utf-8')

m = self.googleResultsRE.search(the_page)
from requests_html import HTMLSession
session = HTMLSession()
response = session.get(urlStr)
resultsDiv = response.html.find('div[@id="result-stats"]', first=True)
m = self.googleResultsRE.search(resultsDiv.text)
if m is not None and m.group(0):
totalRes = int(m.group(1).replace(',', ''))
if totalRes > 0:
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,5 @@ jsonpickle
matplotlib
more_itertools
numpy
requests_html
webcolors>=1.5

0 comments on commit dddbb93

Please sign in to comment.