Skip to content
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ Package providing simple Python access to data in:
* AWS s3
* MySQL
* neo4j
* SPARQL

Sroka library was checked to work for Python **>=3.8, <=3.11**.

Expand Down
100 changes: 62 additions & 38 deletions Test APIs.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,7 @@
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"# GA API\n",
"from sroka.api.ga.ga import ga_request\n",
Expand All @@ -43,10 +41,36 @@
"from sroka.api.s3_connection.s3_connection_api import s3_download_data, s3_upload_data\n",
"# MySQL API\n",
"from sroka.api.mysql.mysql import query_mysql\n",
"# SPARQL API\n",
"from sroka.api.sparql.sparql import query_sparql\n",
"\n",
"# data wrangling\n",
"import numpy as np"
]
],
"outputs": [],
"execution_count": null
},
{
"metadata": {},
"cell_type": "markdown",
"source": "# SPARQL"
},
{
"metadata": {},
"cell_type": "code",
"source": [
"df = query_sparql(\"\"\"SELECT \n",
"\t?game \n",
"\t?gameLabel \n",
"\t?gameTitle\n",
"WHERE {\n",
"\t?game wdt:P31 wd:Q7889 . # instance of video game\n",
"}\n",
"LIMIT 10\"\"\", endpoint_url='https://query.wikidata.org/sparql')\n",
"df"
],
"outputs": [],
"execution_count": null
},
{
"cell_type": "markdown",
Expand All @@ -57,16 +81,16 @@
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df = query_athena(\"\"\"\n",
" SELECT '2019-03-01' as date\n",
" \"\"\")\n",
"\n",
"df"
]
],
"outputs": [],
"execution_count": null
},
{
"cell_type": "markdown",
Expand All @@ -77,38 +101,38 @@
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# input a path to data on your s3, it is needed to perform any query\n",
"s3_folder = ''\n",
"\n",
"s3_download_data('s3://{}'.format(s3_folder), prefix=True, sep=';')"
]
],
"outputs": [],
"execution_count": null
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# input bucket name and file path on your s3, it is needed to perform any query\n",
"s3_bucket = ''\n",
"s3_file_path = ''\n",
"\n",
"# create a test array\n",
"arr = np.array(([1,2,3,4], [4,3,2,1]))"
]
],
"outputs": [],
"execution_count": null
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"s3_upload_data(arr, bucket=s3_bucket, path=s3_file_path)"
]
],
"outputs": [],
"execution_count": null
},
{
"cell_type": "markdown",
Expand All @@ -119,9 +143,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"start_day = '01'\n",
"end_day='04'\n",
Expand All @@ -142,7 +164,9 @@
"\n",
"df_gam = get_data_from_admanager(query, dimensions, columns, start_date, stop_date)\n",
"df_gam.head()"
]
],
"outputs": [],
"execution_count": null
},
{
"cell_type": "markdown",
Expand All @@ -153,9 +177,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# your account id, it is needed to perform any query\n",
"your_id = ''\n",
Expand All @@ -172,7 +194,9 @@
"\n",
"df_ga = ga_request(request, print_sample_size=True, sampling_level='FASTER')\n",
"df_ga.head()"
]
],
"outputs": [],
"execution_count": null
},
{
"cell_type": "markdown",
Expand All @@ -183,14 +207,14 @@
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"new_sheet = google_drive_sheets_create('new_sheet')\n",
"\n",
"google_drive_sheets_write(df, new_sheet)"
]
],
"outputs": [],
"execution_count": null
},
{
"cell_type": "markdown",
Expand All @@ -201,9 +225,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"input_data_moat = {\n",
" 'start' : '20190301',\n",
Expand All @@ -213,7 +235,9 @@
"\n",
"df_moat = get_data_from_moat(input_data_moat, 'moat')\n",
"df_moat.head()"
]
],
"outputs": [],
"execution_count": null
},
{
"cell_type": "markdown",
Expand All @@ -224,17 +248,17 @@
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"presto_query = \"\"\"\n",
" SELECT '2019-03-01' as date;\n",
" \"\"\"\n",
"\n",
"data_presto = request_qubole(presto_query, query_type='hive')\n",
"data_presto.head()"
]
],
"outputs": [],
"execution_count": null
},
{
"cell_type": "markdown",
Expand All @@ -245,9 +269,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"input_data = {\n",
" 'start' : '2018-08-23T00:00:00-07:00',\n",
Expand All @@ -264,7 +286,9 @@
"\n",
"data = get_data_from_rubicon(input_data)\n",
"data.head()"
]
],
"outputs": [],
"execution_count": null
},
{
"cell_type": "markdown",
Expand All @@ -275,20 +299,20 @@
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df = query_mysql(\"SELECT * FROM clan LIMIT 10\")\n",
"df.head()"
]
],
"outputs": [],
"execution_count": null
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"source": [],
"outputs": [],
"source": []
"execution_count": null
}
],
"metadata": {
Expand Down
3 changes: 3 additions & 0 deletions config.sample.ini
Original file line number Diff line number Diff line change
Expand Up @@ -35,3 +35,6 @@ database: DATABASE
neo4j_username: USERNAME
neo4j_password: PASSWORD
neo4j_address: ADDRESS:PORT

[sparql]
endpoint_url: ENDPOINT_URL
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,5 @@ requests>=2.20
retrying>=1.3.3
urllib3>=1.26.18
py2neo>=4.2.0
SPARQLWrapper>=2.0.0
db-dtypes
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,10 @@

setuptools.setup(
name="sroka",
version="0.0.8",
version="0.0.9",
author="Ad Engineering FANDOM",
author_email="murbanek@fandom.com",
description="Package for access GA, GAM, MOAT, Qubole, Athena, S3, Rubicon APIs, BigQuery, MySQL",
description="Package for access GA, GAM, MOAT, Qubole, Athena, S3, Rubicon APIs, BigQuery, MySQL, SPARQL",
long_description=long_description,
long_description_content_type="text/markdown",
url="https://github.com/Wikia/sroka",
Expand Down
22 changes: 22 additions & 0 deletions sroka/api/helpers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
import os
from pathlib import Path


def save_to_file(df, filename):
# Store the path in a cross-platform pathlib object to ensure compatibility
# with DOS & UNIX-based operating systems.
path = Path(filename)

# Get the parent directory of the given path, if it exists.
directory_path = str(path.parent.resolve())

# If the given path points to a folder, attempt to create it. If it already
# exists, the `exist_ok` option ensures that no exception will be thrown.
if directory_path != "":
os.makedirs(directory_path, exist_ok=True)

# Export the data in a CSV file.
try:
df.to_csv(filename)
except OSError as e:
print('Unable to write on filesystem: {}'.format(e))
23 changes: 3 additions & 20 deletions sroka/api/mysql/mysql.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
import os
import mysql.connector
import pandas as pd
from configparser import NoSectionError
from pathlib import Path
from mysql.connector.errors import DatabaseError, OperationalError, InternalError
from retrying import retry
from sroka.api.mysql.mysql_helpers import validate_options, get_options_from_config
from sroka.api.helpers import save_to_file


@retry(stop_max_attempt_number=1,
Expand Down Expand Up @@ -72,21 +71,5 @@ def query_mysql(query: str, filename=None,
# Otherwise, store it in a file.
if not filename:
return df

# Store the path in a cross-platform pathlib object to ensure compatibility
# with DOS & UNIX-based operating systems.
path = Path(filename)

# Get the parent directory of the given path, if it exists.
directory_path = str(path.parent.resolve())

# If the given path points to a folder, attempt to create it. If it already
# exists, the `exist_ok` option ensures that no exception will be thrown.
if directory_path != "":
os.makedirs(directory_path, exist_ok=True)

# Export the data in a CSV file.
try:
df.to_csv(filename)
except OSError as e:
print('Unable to write on filesystem: {}'.format(e))
else:
save_to_file(df, filename)
Loading