From b33af5a4713e6c478f007f7ddf3501af27147f18 Mon Sep 17 00:00:00 2001 From: Gaurav Vaidya Date: Tue, 7 Feb 2023 01:41:12 -0700 Subject: [PATCH 1/6] Added an INNER_LIMIT to prevent queries from stalling. --- src/main/scala/org/renci/cam/QueryService.scala | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/main/scala/org/renci/cam/QueryService.scala b/src/main/scala/org/renci/cam/QueryService.scala index f2a7745e..9e4a4e9b 100644 --- a/src/main/scala/org/renci/cam/QueryService.scala +++ b/src/main/scala/org/renci/cam/QueryService.scala @@ -21,6 +21,8 @@ import scala.jdk.CollectionConverters._ object QueryService extends LazyLogging { + val INNER_LIMIT = 1000 + val ProvWasDerivedFrom: IRI = IRI("http://www.w3.org/ns/prov#wasDerivedFrom") val RDFSSubClassOf: IRI = IRI("http://www.w3.org/2000/01/rdf-schema#subClassOf") @@ -494,7 +496,7 @@ object QueryService extends LazyLogging { SELECT $nodeProjections ?g WHERE { $edgePatterns - } + } LIMIT ${INNER_LIMIT} } $BigDataQueryHintPrior $BigDataQueryHintRunFirst true . } From 7785a9ff658e0efb643eab7741e4038d4f69138f Mon Sep 17 00:00:00 2001 From: Gaurav Vaidya Date: Tue, 7 Feb 2023 02:03:58 -0700 Subject: [PATCH 2/6] Increased the inner limit based on the outer limit value. --- src/main/scala/org/renci/cam/QueryService.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/scala/org/renci/cam/QueryService.scala b/src/main/scala/org/renci/cam/QueryService.scala index 9e4a4e9b..00139d21 100644 --- a/src/main/scala/org/renci/cam/QueryService.scala +++ b/src/main/scala/org/renci/cam/QueryService.scala @@ -21,7 +21,7 @@ import scala.jdk.CollectionConverters._ object QueryService extends LazyLogging { - val INNER_LIMIT = 1000 + val INNER_LIMIT_MULTIPLIER = 10000 val ProvWasDerivedFrom: IRI = IRI("http://www.w3.org/ns/prov#wasDerivedFrom") @@ -496,7 +496,7 @@ object QueryService extends LazyLogging { SELECT $nodeProjections ?g WHERE { $edgePatterns - } LIMIT ${INNER_LIMIT} + } LIMIT ${if (limit == 0) INNER_LIMIT_MULTIPLIER else INNER_LIMIT_MULTIPLIER * limit} } $BigDataQueryHintPrior $BigDataQueryHintRunFirst true . } From 9cf65d97dfc7f1c34c146b393d1973f1747d1a92 Mon Sep 17 00:00:00 2001 From: Gaurav Vaidya Date: Tue, 7 Feb 2023 02:06:41 -0700 Subject: [PATCH 3/6] Fixed inner limit. --- src/main/scala/org/renci/cam/QueryService.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/main/scala/org/renci/cam/QueryService.scala b/src/main/scala/org/renci/cam/QueryService.scala index 00139d21..4d56a8ff 100644 --- a/src/main/scala/org/renci/cam/QueryService.scala +++ b/src/main/scala/org/renci/cam/QueryService.scala @@ -485,6 +485,7 @@ object QueryService extends LazyLogging { val nodesToDirectTypes = getNodesToDirectTypes(queryGraph.nodes) val edgePatterns = queryEdgeSparql.fold(sparql"")(_ + _) val limitSparql = if (limit > 0) sparql" LIMIT $limit" else sparql"" + val innerLimit = if (limit == 0) INNER_LIMIT_MULTIPLIER else (INNER_LIMIT_MULTIPLIER * limit) val queryString = sparql"""SELECT DISTINCT $typeProjections (GROUP_CONCAT(DISTINCT ?g; SEPARATOR='|') AS ?graphs) @@ -496,7 +497,7 @@ object QueryService extends LazyLogging { SELECT $nodeProjections ?g WHERE { $edgePatterns - } LIMIT ${if (limit == 0) INNER_LIMIT_MULTIPLIER else INNER_LIMIT_MULTIPLIER * limit} + } LIMIT ${innerLimit} } $BigDataQueryHintPrior $BigDataQueryHintRunFirst true . } From c87234245296880c08ef852f6da48bdb1de800a0 Mon Sep 17 00:00:00 2001 From: Gaurav Vaidya Date: Tue, 7 Feb 2023 02:16:42 -0700 Subject: [PATCH 4/6] Reduced the inner limit multiplier to 1000. --- src/main/scala/org/renci/cam/QueryService.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/scala/org/renci/cam/QueryService.scala b/src/main/scala/org/renci/cam/QueryService.scala index 4d56a8ff..49b2308d 100644 --- a/src/main/scala/org/renci/cam/QueryService.scala +++ b/src/main/scala/org/renci/cam/QueryService.scala @@ -21,7 +21,7 @@ import scala.jdk.CollectionConverters._ object QueryService extends LazyLogging { - val INNER_LIMIT_MULTIPLIER = 10000 + val INNER_LIMIT_MULTIPLIER = 1000 val ProvWasDerivedFrom: IRI = IRI("http://www.w3.org/ns/prov#wasDerivedFrom") From 045d50984d03346f34cd12b01114a5e244ff4695 Mon Sep 17 00:00:00 2001 From: Gaurav Vaidya Date: Tue, 7 Feb 2023 02:23:01 -0700 Subject: [PATCH 5/6] Improved SPARQL code. --- src/main/scala/org/renci/cam/QueryService.scala | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/main/scala/org/renci/cam/QueryService.scala b/src/main/scala/org/renci/cam/QueryService.scala index 49b2308d..dcace374 100644 --- a/src/main/scala/org/renci/cam/QueryService.scala +++ b/src/main/scala/org/renci/cam/QueryService.scala @@ -21,7 +21,7 @@ import scala.jdk.CollectionConverters._ object QueryService extends LazyLogging { - val INNER_LIMIT_MULTIPLIER = 1000 + val INNER_LIMIT_MULTIPLIER = 100 val ProvWasDerivedFrom: IRI = IRI("http://www.w3.org/ns/prov#wasDerivedFrom") @@ -485,7 +485,8 @@ object QueryService extends LazyLogging { val nodesToDirectTypes = getNodesToDirectTypes(queryGraph.nodes) val edgePatterns = queryEdgeSparql.fold(sparql"")(_ + _) val limitSparql = if (limit > 0) sparql" LIMIT $limit" else sparql"" - val innerLimit = if (limit == 0) INNER_LIMIT_MULTIPLIER else (INNER_LIMIT_MULTIPLIER * limit) + val innerLimit = INNER_LIMIT_MULTIPLIER * limit + val innerLimitSparql = if (limit > 0) sparql" LIMIT $innerLimit" else sparql"" val queryString = sparql"""SELECT DISTINCT $typeProjections (GROUP_CONCAT(DISTINCT ?g; SEPARATOR='|') AS ?graphs) @@ -497,7 +498,7 @@ object QueryService extends LazyLogging { SELECT $nodeProjections ?g WHERE { $edgePatterns - } LIMIT ${innerLimit} + } ${innerLimitSparql} } $BigDataQueryHintPrior $BigDataQueryHintRunFirst true . } From 28f4b96e7a3bef4ee2fa1ba83c48ce2a962dabac Mon Sep 17 00:00:00 2001 From: Gaurav Vaidya Date: Sun, 26 Mar 2023 16:42:13 -0400 Subject: [PATCH 6/6] Removed ?*_class from projection and GROUP BY. --- src/main/scala/org/renci/cam/QueryService.scala | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/main/scala/org/renci/cam/QueryService.scala b/src/main/scala/org/renci/cam/QueryService.scala index dcace374..625e4a45 100644 --- a/src/main/scala/org/renci/cam/QueryService.scala +++ b/src/main/scala/org/renci/cam/QueryService.scala @@ -6,13 +6,13 @@ import io.circe.syntax._ import org.apache.jena.query.QuerySolution import org.apache.jena.rdf.model.Resource import org.phenoscape.sparql.SPARQLInterpolation._ -import org.renci.cam.Biolink.{biolinkData, BiolinkData} +import org.renci.cam.Biolink.{BiolinkData, biolinkData} import org.renci.cam.HttpClient.HttpClient import org.renci.cam.SPARQLQueryExecutor.SPARQLCache import org.renci.cam.Util.IterableSPARQLOps import org.renci.cam.domain.{TRAPIAttribute, _} -import zio.config.{getConfig, ZConfig} -import zio.{config => _, Has, RIO, Task, UIO, ZIO} +import zio.config.{ZConfig, getConfig} +import zio.{Has, RIO, Task, UIO, ZIO, config => _} import java.math.BigInteger import java.nio.charset.StandardCharsets @@ -569,7 +569,6 @@ object QueryService extends LazyLogging { def getProjections(queryGraph: TRAPIQueryGraph, typesInsteadOfNodes: Boolean = false): QueryText = { val projectionVariableNames = queryGraph.edges.keys ++ - queryGraph.nodes.keys.map(queryNodeID => s"${queryNodeID}_class") ++ (if (typesInsteadOfNodes) queryGraph.nodes.keys.map(queryNodeID => s"${queryNodeID}_type") else queryGraph.edges.flatMap(e => List(e._2.subject, e._2.`object`))) projectionVariableNames.map(Var(_)).map(v => sparql" $v ").fold(sparql"")(_ + _)