Skip to content
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/main.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ['3.7', '3.8', '3.9', '3.10', '3.11']
python-version: ['3.8', '3.9', '3.10', '3.11']
steps:
- uses: actions/checkout@v3
- name: Set up Python
Expand Down
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ Signup to ScrapingBee to [get your API key](https://app.scrapingbee.com/account/
'device': 'desktop',
# Use some data extraction rules
'extract_rules': {'title': 'h1'},
# Use AI to extract data from the page
'ai_extract_rules': {'product_name': 'The name of the product', 'price': 'The price in USD'},
# Wrap response in JSON
'json_response': False,
# Interact with the webpage you want to scrape
Expand Down
8 changes: 4 additions & 4 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,17 @@ certifi==2022.12.7
charset-normalizer==3.1.0
distlib==0.3.6
filelock==3.10.0
flake8==3.9.2
flake8==6.0.0
idna==3.4
iniconfig==2.0.0
mccabe==0.6.1
mccabe==0.7.0
more-itertools==9.1.0
packaging==23.0
platformdirs==3.1.1
pluggy==0.13.1
py==1.11.0
pycodestyle==2.7.0
pyflakes==2.3.1
pycodestyle==2.10.0
pyflakes==3.0.1
pytest==7.2.2
requests==2.28.2
six==1.16.0
Expand Down
2 changes: 1 addition & 1 deletion scrapingbee/__version__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "2.0.1"
__version__ = "2.0.2"
2 changes: 2 additions & 0 deletions scrapingbee/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@ def process_params(params: dict) -> dict:
new_params[k] = process_cookies(v)
elif k == 'extract_rules':
new_params[k] = process_json_stringify_param(v, 'extract_rules')
elif k == 'ai_extract_rules':
new_params[k] = process_json_stringify_param(v, 'ai_extract_rules')
elif k == 'js_scenario':
new_params[k] = process_json_stringify_param(v, 'js_scenario')
else:
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,6 @@
'Programming Language :: Python :: 3.11',
'Topic :: Software Development :: Libraries :: Python Modules',
],
python_requires='>=3.7',
python_requires='>=3.8',
install_requires=['requests'],
)
21 changes: 21 additions & 0 deletions tests/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,27 @@ def test_get_with_js_scenario(mock_session, client):
)


@mock.patch('scrapingbee.client.Session')
def test_get_with_ai_extract_rules(mock_session, client):
'''It should format the ai_extract_rules and add them to the url'''
client.get('https://httpbin.org', params={
'ai_extract_rules': {
"product_name": "The name of the product",
"price": "The price in USD"
}
})

mock_session.return_value.request.assert_called_with(
'GET',
'https://app.scrapingbee.com/api/v1/'
'?api_key=API_KEY&url=https%3A%2F%2Fhttpbin.org&'
'ai_extract_rules=%7B%22product_name%22%3A+%22The+name+of+the+product%22%2C+%22'
'price%22%3A+%22The+price+in+USD%22%7D',
data=None,
headers=DEFAULT_HEADERS,
)


@mock.patch('scrapingbee.client.Session')
def test_post(mock_session, client):
'''It should make a POST request with some data'''
Expand Down
9 changes: 8 additions & 1 deletion tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ def test_process_headers():
"""It should add a Spb- prefix to header names"""
output = process_headers({"Accept-Language": "En-US"})
assert output == {
"User-Agent": "ScrapingBee-Python/2.0.1",
"User-Agent": "ScrapingBee-Python/2.0.2",
"Spb-Accept-Language": "En-US",
}

Expand Down Expand Up @@ -46,6 +46,13 @@ def test_process_js_scenario():
assert output == '{"instructions": [{"click": "#buttonId"}]}'


def test_process_ai_extract_rules():
"""It should format ai_extract_rules to a stringified JSON"""
output = process_json_stringify_param(
{"product_name": "The name of the product", "price": "The price in USD"}, "ai_extract_rules")
assert output == '{"product_name": "The name of the product", "price": "The price in USD"}'


def test_process_params():
"""It should keep boolean parameters"""
output = process_params({"render_js": True})
Expand Down
Loading