From d5696e31957e64a20e84a48cefd02bb93d8d8263 Mon Sep 17 00:00:00 2001 From: Nicholas Smith Date: Thu, 9 Jun 2022 09:59:48 -0400 Subject: [PATCH 1/8] Get latest target-postgres version which contains multiple performance improvements --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index f3eaa00..c5e9b02 100644 --- a/setup.py +++ b/setup.py @@ -20,7 +20,7 @@ py_modules=['target_snowflake'], install_requires=[ 'singer-python==5.9.0', - 'singer-target-postgres==0.2.4', + 'target_postgres@git+ssh://git@github.com/datamill-co/target-postgres.git#279cb62d2a80b1bd8e8ab1191e7a1d17c19383a8' 'target-redshift==0.2.4', 'botocore<1.13.0,>=1.12.253', 'snowflake-connector-python==2.2.5' From 683e470c04999a3505217c3877fd24ee4689bc69 Mon Sep 17 00:00:00 2001 From: Nicholas Smith Date: Thu, 9 Jun 2022 10:02:39 -0400 Subject: [PATCH 2/8] Try https git link --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index c5e9b02..c27a4ee 100644 --- a/setup.py +++ b/setup.py @@ -20,7 +20,7 @@ py_modules=['target_snowflake'], install_requires=[ 'singer-python==5.9.0', - 'target_postgres@git+ssh://git@github.com/datamill-co/target-postgres.git#279cb62d2a80b1bd8e8ab1191e7a1d17c19383a8' + 'target_postgres@https://github.com/datamill-co/target-postgres.git#279cb62d2a80b1bd8e8ab1191e7a1d17c19383a8' 'target-redshift==0.2.4', 'botocore<1.13.0,>=1.12.253', 'snowflake-connector-python==2.2.5' From 773ff8dc778898096a5f4de6d5fef4d0508796de Mon Sep 17 00:00:00 2001 From: Nicholas Smith Date: Thu, 9 Jun 2022 10:03:59 -0400 Subject: [PATCH 3/8] Fix typo --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index c27a4ee..0be67a7 100644 --- a/setup.py +++ b/setup.py @@ -20,7 +20,7 @@ py_modules=['target_snowflake'], install_requires=[ 'singer-python==5.9.0', - 'target_postgres@https://github.com/datamill-co/target-postgres.git#279cb62d2a80b1bd8e8ab1191e7a1d17c19383a8' + 'target_postgres@https://github.com/datamill-co/target-postgres.git#279cb62d2a80b1bd8e8ab1191e7a1d17c19383a8', 'target-redshift==0.2.4', 'botocore<1.13.0,>=1.12.253', 'snowflake-connector-python==2.2.5' From c64cb64a56151b80be9ee607ff1c5caa9ada1804 Mon Sep 17 00:00:00 2001 From: Nicholas Smith Date: Thu, 9 Jun 2022 10:05:29 -0400 Subject: [PATCH 4/8] Another try at the git link --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 0be67a7..cbf2292 100644 --- a/setup.py +++ b/setup.py @@ -20,7 +20,7 @@ py_modules=['target_snowflake'], install_requires=[ 'singer-python==5.9.0', - 'target_postgres@https://github.com/datamill-co/target-postgres.git#279cb62d2a80b1bd8e8ab1191e7a1d17c19383a8', + 'target_postgres@git+https://github.com/datamill-co/target-postgres.git#279cb62d2a80b1bd8e8ab1191e7a1d17c19383a8', 'target-redshift==0.2.4', 'botocore<1.13.0,>=1.12.253', 'snowflake-connector-python==2.2.5' From 12cec7157215c9c34e0707194b6e2db4c8d30cc4 Mon Sep 17 00:00:00 2001 From: Nicholas Smith Date: Thu, 9 Jun 2022 10:07:36 -0400 Subject: [PATCH 5/8] Fix package name --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index cbf2292..3247731 100644 --- a/setup.py +++ b/setup.py @@ -20,7 +20,7 @@ py_modules=['target_snowflake'], install_requires=[ 'singer-python==5.9.0', - 'target_postgres@git+https://github.com/datamill-co/target-postgres.git#279cb62d2a80b1bd8e8ab1191e7a1d17c19383a8', + 'singer-target-postgres@git+https://github.com/datamill-co/target-postgres.git#279cb62d2a80b1bd8e8ab1191e7a1d17c19383a8', 'target-redshift==0.2.4', 'botocore<1.13.0,>=1.12.253', 'snowflake-connector-python==2.2.5' From d9097452c55613954c804a9e12e9e20e8387ed17 Mon Sep 17 00:00:00 2001 From: Nicholas Smith Date: Thu, 9 Jun 2022 10:28:27 -0400 Subject: [PATCH 6/8] Avoid doing work to insert zero-length record batches --- target_snowflake/snowflake.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/target_snowflake/snowflake.py b/target_snowflake/snowflake.py index 60fe45e..a76462f 100644 --- a/target_snowflake/snowflake.py +++ b/target_snowflake/snowflake.py @@ -545,8 +545,11 @@ def persist_csv_rows(self, subkeys) def write_table_batch(self, cur, table_batch, metadata): - remote_schema = table_batch['remote_schema'] + record_count = len(table_batch['records']) + if record_count == 0: + return 0 + remote_schema = table_batch['remote_schema'] ## Create temp table to upload new data to target_table_name = self.canonicalize_identifier('tmp_' + str(uuid.uuid4())) @@ -586,7 +589,7 @@ def transform(): csv_headers, csv_rows) - return len(table_batch['records']) + return record_count def add_column(self, cur, table_name, column_name, column_schema): cur.execute(''' From 478630a5e5eadf6717333afee2ca7646271de541 Mon Sep 17 00:00:00 2001 From: Nicholas Smith Date: Thu, 9 Jun 2022 10:48:01 -0400 Subject: [PATCH 7/8] Add date time serialization cache optimization --- target_snowflake/snowflake.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/target_snowflake/snowflake.py b/target_snowflake/snowflake.py index a76462f..1485891 100644 --- a/target_snowflake/snowflake.py +++ b/target_snowflake/snowflake.py @@ -6,6 +6,7 @@ import os import re import uuid +from functools import lru_cache import arrow from psycopg2 import sql @@ -21,6 +22,16 @@ from target_snowflake.connection import connect from target_snowflake.exceptions import SnowflakeError +# copied in from optimization in TargetPostgres: https://github.com/datamill-co/target-postgres/commit/6a3da026d2bb4681fdf46bd7ca69fbb164489d8a +@lru_cache(maxsize=128) +def _format_datetime(value): + """ + Format a datetime value. This is only called from the + SnowflakeTarget.serialize_table_record_datetime_value + but this non-method version allows caching + """ + return arrow.get(value).format('YYYY-MM-DD HH:mm:ss.SSSSZZ') + class SnowflakeTarget(SQLInterface): """ Specific Snowflake implementation of a Singer Target. @@ -334,7 +345,7 @@ def serialize_table_record_null_value(self, remote_schema, streamed_schema, fiel return value def serialize_table_record_datetime_value(self, remote_schema, streamed_schema, field, value): - return arrow.get(value).format('YYYY-MM-DD HH:mm:ss.SSSSZZ') + return _format_datetime(value) def perform_update(self, cur, target_table_name, temp_table_name, key_properties, columns, subkeys): full_table_name = '{}.{}.{}'.format( From fee3dcb93b7d1762cee774adf0a03fd0341e5297 Mon Sep 17 00:00:00 2001 From: Nicholas Smith Date: Thu, 9 Jun 2022 10:48:49 -0400 Subject: [PATCH 8/8] Fix comment --- target_snowflake/snowflake.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/target_snowflake/snowflake.py b/target_snowflake/snowflake.py index 1485891..87010ea 100644 --- a/target_snowflake/snowflake.py +++ b/target_snowflake/snowflake.py @@ -22,7 +22,7 @@ from target_snowflake.connection import connect from target_snowflake.exceptions import SnowflakeError -# copied in from optimization in TargetPostgres: https://github.com/datamill-co/target-postgres/commit/6a3da026d2bb4681fdf46bd7ca69fbb164489d8a +# copied in from optimization in PostgresTarget: https://github.com/datamill-co/target-postgres/commit/6a3da026d2bb4681fdf46bd7ca69fbb164489d8a @lru_cache(maxsize=128) def _format_datetime(value): """