diff --git a/.github/PULL_REQUEST_TEMPLATE b/.github/PULL_REQUEST_TEMPLATE new file mode 100644 index 0000000000..430dd5bcb6 --- /dev/null +++ b/.github/PULL_REQUEST_TEMPLATE @@ -0,0 +1,15 @@ +## Changes proposed in this pull request + +(Fill in the changes here) + +## Patch testing + +(Fill in the details about how this patch was tested) + +## ReleaseNotes.txt changes + +(Does this change require an entry in ReleaseNotes.txt? If yes, has it been added to it?) + +## Other PRs + +(Does this change require changes in other projects- store, spark, spark-jobserver, aqp? Add the links of PR of the other subprojects that are related to this change) diff --git a/.gitignore b/.gitignore index efeb736a8a..9eb7478888 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,10 @@ *.class -snappy-aqp +aqp +benchmarking/ +hydraLogs/ +snappy-connectors/ +snappy-poc/ +vm_* # sbt specific .cache/ @@ -14,6 +19,7 @@ project/plugins/project/ # gradle specific .gradle/ +buildOutput.log # Scala-IDE specific .scala_dependencies diff --git a/.gitmodules b/.gitmodules index 57432ddc1c..409d58331a 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,12 +1,13 @@ [submodule "spark-jobserver"] path = spark-jobserver - url = https://github.com/SnappyData/spark-jobserver.git + url = https://github.com/SnappyDataInc/spark-jobserver.git branch = snappydata -[submodule "snappy-spark"] - path = snappy-spark - url = https://github.com/SnappyData/snappy-spark.git - branch = snappy/master [submodule "snappy-store"] - path = snappy-store - url = https://github.com/gemfire/gemxd-staging.git - branch = snappy-store + path = store + url = https://github.com/SnappyDataInc/snappy-store.git + branch = snappy/master +[submodule "spark"] + path = spark + url = https://github.com/SnappyDataInc/spark.git + branch = snappy/branch-2.1 + diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000000..764a4d4cea --- /dev/null +++ b/LICENSE @@ -0,0 +1,299 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright 2018 SnappyData Inc. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + +======================================================================= +Apache Spark Subcomponents: + +The Apache Spark project contains subcomponents with separate copyright +notices and license terms. Your use of the source code for the these +subcomponents is subject to the terms and conditions of the following +licenses. + + +======================================================================== +For heapq (pyspark/heapq3.py): +======================================================================== + +See license/LICENSE-heapq.txt + +======================================================================== +For SnapTree: +======================================================================== + +See license/LICENSE-SnapTree.txt + +======================================================================== +For jbcrypt: +======================================================================== + +See license/LICENSE-jbcrypt.txt + +======================================================================== +BSD-style licenses +======================================================================== + +The following components are provided under a BSD-style license. See project link for details. +The text of each license is also included at licenses/LICENSE-[project].txt. + + (BSD 3 Clause) netlib core (com.github.fommil.netlib:core:1.1.2 - https://github.com/fommil/netlib-java/core) + (BSD 3 Clause) JPMML-Model (org.jpmml:pmml-model:1.2.7 - https://github.com/jpmml/jpmml-model) + (BSD License) AntLR Parser Generator (antlr:antlr:2.7.7 - http://www.antlr.org/) + (BSD License) ANTLR 4.5.2-1 (org.antlr:antlr4:4.5.2-1 - http://wwww.antlr.org/) + (BSD licence) ANTLR ST4 4.0.4 (org.antlr:ST4:4.0.4 - http://www.stringtemplate.org) + (BSD licence) ANTLR StringTemplate (org.antlr:stringtemplate:3.2.1 - http://www.stringtemplate.org) + (BSD License) Javolution (javolution:javolution:5.5.1 - http://javolution.org) + (BSD) JLine (jline:jline:0.9.94 - http://jline.sourceforge.net) + (BSD) ParaNamer Core (com.thoughtworks.paranamer:paranamer:2.3 - http://paranamer.codehaus.org/paranamer) + (BSD) ParaNamer Core (com.thoughtworks.paranamer:paranamer:2.6 - http://paranamer.codehaus.org/paranamer) + (BSD 3 Clause) Scala (http://www.scala-lang.org/download/#License) + (Interpreter classes (all .scala files in repl/src/main/scala + except for Main.Scala, SparkHelper.scala and ExecutorClassLoader.scala), + and for SerializableMapWrapper in JavaUtils.scala) + (BSD-like) Scala Actors library (org.scala-lang:scala-actors:2.11.8 - http://www.scala-lang.org/) + (BSD-like) Scala Compiler (org.scala-lang:scala-compiler:2.11.8 - http://www.scala-lang.org/) + (BSD-like) Scala Compiler (org.scala-lang:scala-reflect:2.11.8 - http://www.scala-lang.org/) + (BSD-like) Scala Library (org.scala-lang:scala-library:2.11.8 - http://www.scala-lang.org/) + (BSD-like) Scalap (org.scala-lang:scalap:2.11.8 - http://www.scala-lang.org/) + (BSD-style) scalacheck (org.scalacheck:scalacheck_2.11:1.10.0 - http://www.scalacheck.org) + (BSD-style) spire (org.spire-math:spire_2.11:0.7.1 - http://spire-math.org) + (BSD-style) spire-macros (org.spire-math:spire-macros_2.11:0.7.1 - http://spire-math.org) + (New BSD License) Kryo (com.esotericsoftware:kryo:3.0.3 - https://github.com/EsotericSoftware/kryo) + (New BSD License) MinLog (com.esotericsoftware:minlog:1.3.0 - https://github.com/EsotericSoftware/minlog) + (New BSD license) Protocol Buffer Java API (com.google.protobuf:protobuf-java:2.5.0 - http://code.google.com/p/protobuf) + (New BSD license) Protocol Buffer Java API (org.spark-project.protobuf:protobuf-java:2.4.1-shaded - http://code.google.com/p/protobuf) + (The BSD License) Fortran to Java ARPACK (net.sourceforge.f2j:arpack_combined_all:0.1 - http://f2j.sourceforge.net) + (The BSD License) xmlenc Library (xmlenc:xmlenc:0.52 - http://xmlenc.sourceforge.net) + (The New BSD License) Py4J (net.sf.py4j:py4j:0.10.1 - http://py4j.sourceforge.net/) + (Two-clause BSD-style license) JUnit-Interface (com.novocode:junit-interface:0.10 - http://github.com/szeiger/junit-interface/) + (BSD licence) sbt and sbt-launch-lib.bash + (BSD 3 Clause) d3.min.js (https://github.com/mbostock/d3/blob/master/LICENSE) + (BSD 3 Clause) DPark (https://github.com/douban/dpark/blob/master/LICENSE) + (BSD 3 Clause) CloudPickle (https://github.com/cloudpipe/cloudpickle/blob/master/LICENSE) + +======================================================================== +MIT licenses +======================================================================== + +The following components are provided under the MIT License. See project link for details. +The text of each license is also included at licenses/LICENSE-[project].txt. + + (MIT License) JCL 1.1.1 implemented over SLF4J (org.slf4j:jcl-over-slf4j:1.7.5 - http://www.slf4j.org) + (MIT License) JUL to SLF4J bridge (org.slf4j:jul-to-slf4j:1.7.5 - http://www.slf4j.org) + (MIT License) SLF4J API Module (org.slf4j:slf4j-api:1.7.5 - http://www.slf4j.org) + (MIT License) SLF4J LOG4J-12 Binding (org.slf4j:slf4j-log4j12:1.7.5 - http://www.slf4j.org) + (MIT License) pyrolite (org.spark-project:pyrolite:2.0.1 - http://pythonhosted.org/Pyro4/) + (MIT License) scopt (com.github.scopt:scopt_2.11:3.2.0 - https://github.com/scopt/scopt) + (The MIT License) Mockito (org.mockito:mockito-core:1.9.5 - http://www.mockito.org) + (MIT License) jquery (https://jquery.org/license/) + (MIT License) AnchorJS (https://github.com/bryanbraun/anchorjs) + (MIT License) graphlib-dot (https://github.com/cpettitt/graphlib-dot) + (MIT License) dagre-d3 (https://github.com/cpettitt/dagre-d3) + (MIT License) sorttable (https://github.com/stuartlangridge/sorttable) + (MIT License) boto (https://github.com/boto/boto/blob/develop/LICENSE) + (MIT License) datatables (http://datatables.net/license) + (MIT License) mustache (https://github.com/mustache/mustache/blob/master/LICENSE) + (MIT License) cookies (http://code.google.com/p/cookies/wiki/License) + (MIT License) blockUI (http://jquery.malsup.com/block/) + (MIT License) RowsGroup (http://datatables.net/license/mit) + (MIT License) jsonFormatter (http://www.jqueryscript.net/other/jQuery-Plugin-For-Pretty-JSON-Formatting-jsonFormatter.html) + (MIT License) modernizr (https://github.com/Modernizr/Modernizr/blob/master/LICENSE) diff --git a/NOTICE b/NOTICE new file mode 100644 index 0000000000..d6cba4b41e --- /dev/null +++ b/NOTICE @@ -0,0 +1,1306 @@ +SnappyData OSS Edition +Copyright 2018 and onwards SnappyData Inc. + + +This is a comprehensive list of software libraries used by SnappyData in version 1.0. +More details on license types, license versions, and contributors can be found further down in this file + +HikariCP-2.7.9.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +JavaEWAH-0.3.2.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +RoaringBitmap-0.6.66.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +ST4-4.0.4.jar BSD License: http://antlr.org/license.html +Vis.js Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +activation-1.1.1.jar CDDL 1.0: https://glassfish.dev.java.net/public/CDDLv1.0.html +akka-actor_2.11-2.3.16.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +akka-cluster_2.11-2.3.16.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +akka-remote_2.11-2.3.16.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +akka-slf4j_2.11-2.3.16.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +ant-1.9.13.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +antlr-2.7.7.jar BSD License: http://antlr.org/license.html +antlr-runtime-3.4.jar BSD License: http://antlr.org/license.html +antlr4-runtime-4.5.3.jar BSD License: http://antlr.org/license.html +aopalliance-1.0.jar Public Domain +aopalliance-repackaged-2.5.0-b42.jar CDDL/GPLv2+CE: https://glassfish.java.net/nonav/public/CDDL+GPL_1_1.html +apache-log4j-extras-1.2.17.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +arpack_combined_all-0.1.jar BSD License: http://www.opensource.org/licenses/bsd-license.php +audience-annotations-0.5.0.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +avro-1.7.7.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +avro-ipc-1.7.7.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +avro-mapred-1.7.7-hadoop2.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +aws-java-sdk-1.7.4.jar Apache V2: https://aws.amazon.com/apache2.0 +base64-2.3.8.jar Public Domain +bcprov-jdk15on-1.52.jar Bouncy Castle License: http://www.bouncycastle.org/licence.html +bonecp-0.8.0.RELEASE.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +breeze-macros_2.11-0.13.1.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +breeze_2.11-0.13.1.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +calcite-avatica-1.4.0-incubating.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +calcite-core-1.4.0-incubating.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +calcite-linq4j-1.4.0-incubating.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +cglib-2.2.1-v20090111.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +chill-java-0.8.5.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +chill_2.11-0.8.5.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +commons-beanutils-1.9.3.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +commons-cli-1.4.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +commons-codec-1.11.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +commons-collections-3.2.2.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +commons-compiler-3.0.11.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +commons-compress-1.4.1.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +commons-configuration-1.10.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +commons-crypto-1.0.0.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +commons-dbcp-1.4.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +commons-httpclient-3.1.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +commons-io-2.6.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +commons-lang-2.6.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +commons-lang3-3.8.1.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +commons-logging-1.2.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +commons-math3-3.6.1.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +commons-modeler-2.0.1.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +commons-net-3.6.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +commons-pool-1.6.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +commons-pool2-2.6.0.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +compress-lzf-1.0.4.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +concurrentlinkedhashmap-lru-1.4.2.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +config-1.3.3.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +core-1.1.2.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +curator-client-2.7.1.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +curator-framework-2.7.1.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +curator-recipes-2.7.1.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +d3.js BSD-3: https://github.com/d3/d3/blob/master/LICENSE +datanucleus-api-jdo-3.2.8.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +datanucleus-core-3.2.15.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +datanucleus-rdbms-3.2.13.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +derby-10.14.2.0.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +eclipse-collections-api-9.2.0.jar EPL-1.0: https://www.eclipse.org/legal/epl-v10.html +eclipse-collections-9.2.0.jar EPL-1.0: https://www.eclipse.org/legal/epl-v10.html +eigenbase-properties-1.1.5.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +flyway-core-3.2.1.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +gemfire-core-1.6.2.1.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +gemfire-jgroups-1.6.2.1.jar LGPL2.1: http://www.opensource.org/licenses/lgpl-2.1.php +gemfire-shared-1.6.2.1.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +gemfire-trove-1.6.2.1.jar LGPL2.1: http://www.opensource.org/licenses/lgpl-2.1.php +gemfire-util-1.6.2.1.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +gson-2.2.4.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +guava-14.0.1.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +guice-3.0.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +guice-servlet-3.0.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +h2-1.3.176.jar H2 License V1: http://h2database.com/html/license.html +hadoop-annotations-2.7.7.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +hadoop-auth-2.7.7.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +hadoop-aws-2.7.7.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +hadoop-client-2.7.7.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +hadoop-common-2.7.7.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +hadoop-hdfs-2.7.7.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +hadoop-mapreduce-client-app-2.7.7.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +hadoop-mapreduce-client-common-2.7.7.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +hadoop-mapreduce-client-core-2.7.7.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +hadoop-mapreduce-client-jobclient-2.7.7.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +hadoop-mapreduce-client-shuffle-2.7.7.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +hadoop-yarn-api-2.7.7.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +hadoop-yarn-client-2.7.7.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +hadoop-yarn-common-2.7.7.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +hadoop-yarn-server-common-2.7.7.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +hadoop-yarn-server-nodemanager-2.7.7.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +hadoop-yarn-server-web-proxy-2.7.7.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +hammer.js MIT: http://www.opensource.org/licenses/mit-license.php +hive-beeline-1.2.1.spark2.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +hive-cli-1.2.1.spark2.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +hive-exec-1.2.1.spark2.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +hive-jdbc-1.2.1.spark2.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +hive-metastore-1.2.1.spark2.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +hk2-api-2.5.0-b42.jar CDDL/GPLv2+CE: https://glassfish.java.net/nonav/public/CDDL+GPL_1_1.html +hk2-locator-2.5.0-b42.jar CDDL/GPLv2+CE: https://glassfish.java.net/nonav/public/CDDL+GPL_1_1.html +hk2-utils-2.5.0-b42.jar CDDL/GPLv2+CE: https://glassfish.java.net/nonav/public/CDDL+GPL_1_1.html +htrace-core-3.2.0-incubating.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +httpclient-4.5.6.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +httpcore-4.4.10.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +ivy-2.4.0.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +jackson-annotations-2.6.7.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +jackson-core-2.6.7.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +jackson-core-asl-1.9.13.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +jackson-databind-2.6.7.2.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +jackson-jaxrs-1.9.13.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +jackson-mapper-asl-1.9.13.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +jackson-module-paranamer-2.6.7.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +jackson-module-scala_2.11-2.6.7.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +jackson-xc-1.9.13.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +janino-3.0.11.jar BSD: https://raw.githubusercontent.com/janino-compiler/janino/master/LICENSE +java-xmlbuilder-1.1.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +javassist-3.22.0-CR2.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +javax.annotation-api-1.2.jar CDDL/GPLv2+CE: https://glassfish.java.net/public/CDDL+GPL_1_1.html +javax.inject-1.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +javax.inject-2.5.0-b42.jar CDDL/GPLv2+CE: https://glassfish.java.net/public/CDDL+GPL_1_1.html +javax.resource-api-1.7.1.jar CDDL/GPLv2+CE: https://glassfish.java.net/public/CDDL+GPL_1_1.html +javax.servlet-api-4.0.1.jar CDDL/GPLv2+CE: https://glassfish.java.net/public/CDDL+GPL_1_1.html +javax.transaction-api-1.3.jar CDDL/GPLv2+CE: https://glassfish.java.net/public/CDDL+GPL_1_1.html +javax.ws.rs-api-2.1.1.jar CDDL/GPLv2+CE: https://glassfish.java.net/public/CDDL+GPL_1_1.html +javolution-5.5.1.jar BSD: http://javolution.org/LICENSE.txt +jaxb-api-2.2.2.jar CDDL/GPLv2+CE: https://glassfish.java.net/public/CDDL+GPL_1_1.html +jcl-over-slf4j-1.7.25.jar MIT: http://www.opensource.org/licenses/mit-license.php +jdo-api-3.0.1.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +jersey-client-2.27.jar CDDL/GPLv2+CE: https://glassfish.java.net/public/CDDL+GPL_1_1.html +jersey-common-2.27.jar CDDL/GPLv2+CE: https://glassfish.java.net/public/CDDL+GPL_1_1.html +jersey-container-servlet-2.27.jar CDDL/GPLv2+CE: https://glassfish.java.net/public/CDDL+GPL_1_1.html +jersey-container-servlet-core-2.27.jar CDDL/GPLv2+CE: https://glassfish.java.net/public/CDDL+GPL_1_1.html +jersey-hk2-2.27.jar CDDL/GPLv2+CE: https://glassfish.java.net/public/CDDL+GPL_1_1.html +jersey-media-jaxb-2.27.jar CDDL/GPLv2+CE: https://glassfish.java.net/public/CDDL+GPL_1_1.html +jersey-server-2.27.jar CDDL/GPLv2+CE: https://glassfish.java.net/public/CDDL+GPL_1_1.html +jets3t-0.9.4.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +jettison-1.1.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +jetty-6.1.26.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +jetty-client-9.2.26.v20180806.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +jetty-continuation-9.2.26.v20180806.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +jetty-http-9.2.26.v20180806.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +jetty-io-9.2.26.v20180806.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +jetty-jndi-9.2.26.v20180806.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +jetty-plus-9.2.26.v20180806.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +jetty-proxy-9.2.26.v20180806.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +jetty-security-9.2.26.v20180806.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +jetty-server-9.2.26.v20180806.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +jetty-servlet-9.2.26.v20180806.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +jetty-servlets-9.2.26.v20180806.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +jetty-sslengine-6.1.26.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +jetty-util-6.1.26.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +jetty-util-9.2.26.v20180806.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +jetty-webapp-9.2.26.v20180806.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +jetty-xml-9.2.26.v20180806.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +jline-2.14.6.jar BSD: http://www.opensource.org/licenses/bsd-license.php +jna-4.5.2.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +joda-convert-2.1.2.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +joda-time-2.10.1.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +jodd-core-5.0.6.jar BSD: http://jodd.org/license.html +jpam-1.1.jar Apache V2: http://jpam.sourceforge.net/LICENSE.txt +json4s-ast_2.11-3.2.11.jar Apache V2: http://jpam.sourceforge.net/LICENSE.txt +json4s-core_2.11-3.2.11.jar Apache V2: http://jpam.sourceforge.net/LICENSE.txt +json4s-jackson_2.11-3.2.11.jar Apache V2: http://jpam.sourceforge.net/LICENSE.txt +jsp-api-2.1.jar CDDL/GPLv2+CE: https://glassfish.java.net/public/CDDL+GPL_1_1.html +jsr305-3.0.2.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +jta-1.1.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +jtransforms-2.4.0.jar MPL: http://www.mozilla.org/MPL/2.0/index.txt +jul-to-slf4j-1.7.25.jar MIT: http://www.opensource.org/licenses/mit-license.php +kafka-clients-0.10.0.1.jar Apache V2: http://jpam.sourceforge.net/LICENSE.txt +kafka_2.11-0.10.0.1.jar Apache V2: http://jpam.sourceforge.net/LICENSE.txt +kryo-shaded-4.0.2.jar BSD: http://www.opensource.org/licenses/bsd-license.php +leveldbjni-all-1.8.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +libfb303-0.9.3.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +libgemfirexd.dylib Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +libgemfirexd.so Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +libgemfirexd64.dylib Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +libgemfirexd64.so Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +libthrift-0.9.3.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +liquidFillGauge.js BSD: http://choosealicense.com/licenses/bsd-2-clause +log4j-1.2.17.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +lz4-java-1.5.0.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +machinist_2.11-0.6.1.jar MIT: http://opensource.org/licenses/MIT +macro-compat_2.11-1.1.1.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +metrics-core-2.2.0.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +metrics-core-3.2.6.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +metrics-graphite-3.2.6.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +metrics-json-3.2.6.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +metrics-jvm-3.2.6.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +mimepull-1.9.5.jar CDDL/GPLv2+CE: https://glassfish.java.net/public/CDDL+GPL_1_1.html +minlog-1.3.0.jar BSD: http://www.opensource.org/licenses/bsd-license.php +moment.js MIT: http://www.opensource.org/licenses/mit-license.php +mx4j-3.0.2.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +mx4j-remote-3.0.2.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +mx4j-tools-3.0.1.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +netty-3.10.6.Final.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +netty-all-4.0.56.Final.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +objenesis-3.0.1.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +opencsv-2.3.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +org.osgi.core-6.0.0.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +oro-2.0.8.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +osgi-resource-locator-1.0.1.jar CDDL/GPLv2+CE: https://glassfish.java.net/public/CDDL+GPL_1_1.html +paranamer-2.6.jar BSD: http://www.opensource.org/licenses/bsd-license.php +parboiled-core-1.1.7.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +parboiled-scala_2.11-1.1.7.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +parboiled_2.11-2.1.5.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +parquet-column-1.8.3.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +parquet-common-1.8.3.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +parquet-encoding-1.8.3.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +parquet-format-2.3.1.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +parquet-hadoop-1.8.3.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +parquet-hadoop-bundle-1.6.0.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +parquet-jackson-1.8.3.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +pmml-model-1.2.17.jar BSD-3: http://opensource.org/licenses/BSD-3-Clause +pmml-schema-1.2.17.jar BSD-3: http://opensource.org/licenses/BSD-3-Clause +protobuf-java-3.6.1.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +py4j-0.10.8.1.jar BSD: http://www.opensource.org/licenses/bsd-license.php +pyrolite-4.22.jar MIT: https://raw.githubusercontent.com/irmen/Pyrolite/master/LICENSE +scala-compiler-2.11.8.jar BSD-3: http://www.scala-lang.org/license.html +scala-library-2.11.8.jar BSD-3: http://www.scala-lang.org/license.html +scala-parser-combinators_2.11-1.0.4.jar BSD-3: http://www.scala-lang.org/license.html +scala-reflect-2.11.8.jar BSD-3: http://www.scala-lang.org/license.html +scala-xml_2.11-1.0.4.jar BSD-3: http://www.scala-lang.org/license.html +scalap-2.11.8.jar BSD-3: http://www.scala-lang.org/license.html +servlet-api-2.5-20081211.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +shapeless_2.11-2.3.3.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +shiro-core-1.2.6.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +slf4j-api-1.7.25.jar MIT: http://www.opensource.org/licenses/mit-license.php +slf4j-log4j12-1.7.25.jar MIT: http://www.opensource.org/licenses/mit-license.php +slick_2.11-2.1.0.jar BSD: http://github.com/slick/slick/blob/master/LICENSE.txt +snappy-0.2.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +snappy-java-1.1.7.2.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +snappy-spark-catalyst_2.11-2.1.1.4.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +snappy-spark-core_2.11-2.1.1.4.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +snappy-spark-graphx_2.11-2.1.1.4.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +snappy-spark-hive-thriftserver_2.11-2.1.1.4.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +snappy-spark-hive_2.11-2.1.1.4.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +snappy-spark-launcher_2.11-2.1.1.4.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +snappy-spark-mllib-local_2.11-2.1.1.4.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +snappy-spark-mllib_2.11-2.1.1.4.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +snappy-spark-network-common_2.11-2.1.1.4.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +snappy-spark-network-shuffle_2.11-2.1.1.4.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +snappy-spark-repl_2.11-2.1.1.4.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +snappy-spark-sketch_2.11-2.1.1.4.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +snappy-spark-sql_2.11-2.1.1.4.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +snappy-spark-sql-kafka-0.10_2.11-2.1.1.4.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +snappy-spark-streaming-kafka-0.10_2.11-2.1.1.4.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +snappy-spark-streaming_2.11-2.1.1.4.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +snappy-spark-tags_2.11-2.1.1.4.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +snappy-spark-unsafe_2.11-2.1.1.4.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +snappy-spark-yarn_2.11-2.1.1.4.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +snappydata-cluster_2.11-1.0.2.1.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +snappydata-core_2.11-1.0.2.1.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +snappydata-jdbc_2.11-1.0.2.1-only.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +snappydata-launcher-1.0.2.1.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +snappydata-store-client-1.6.2.1.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +snappydata-store-core-1.6.2.1.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +snappydata-store-hibernate-1.6.2.1.jar LGPL2.1: http://www.gnu.org/licenses/lgpl-2.1.html +snappydata-store-shared-1.6.2.1.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +snappydata-store-tools-1.6.2.1.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +spark-jobserver_2.11-0.6.2.8.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +spire-macros_2.11-0.13.0.jar MIT: http://opensource.org/licenses/MIT +spire_2.11-0.13.0.jar MIT: http://opensource.org/licenses/MIT +spray-caching_2.11-1.3.4.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +spray-can_2.11-1.3.4.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +spray-client_2.11-1.3.4.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +spray-http_2.11-1.3.4.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +spray-httpx_2.11-1.3.4.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +spray-io_2.11-1.3.4.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +spray-json_2.11-1.3.5.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +spray-routing-shapeless2_2.11-1.3.3.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +spray-util_2.11-1.3.4.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +stax-api-1.0-2.jar CDDL 1.0: https://opensource.org/licenses/CDDL-1.0 +stax-api-1.0.1.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +stream-2.9.6.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +stringtemplate-3.2.1.jar BSD: http://antlr.org/license.html +super-csv-2.2.0.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +tomcat-jdbc-8.5.37.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +tomcat-juli-8.5.37.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +twitter4j-core-4.0.7.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +twitter4j-stream-4.0.7.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +uncommons-maths-1.2.2a.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +univocity-parsers-2.7.6.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +validation-api-1.1.0.Final.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +xbean-asm5-shaded-4.5.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +xercesImpl-2.9.1.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +xml-apis-1.4.01.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +xmlenc-0.52.jar BSD: http://www.opensource.org/licenses/bsd-license.php +xom-1.2.10.jar LGPL2.1: http://www.gnu.org/licenses/lgpl-2.1.html +xz-1.0.jar Public Domain +zkclient-0.8.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 +zookeeper-3.4.13.jar Apache V2: http://www.apache.org/licenses/LICENSE-2.0 + + + +This product includes software developed at +The Apache Software Foundation (http://www.apache.org/). + + +======================================================================== +Common Development and Distribution License 1.0 +======================================================================== + +The following components are provided under the Common Development and Distribution License 1.0. See project link for details. + + (CDDL 1.0) Glassfish Jasper (org.mortbay.jetty:jsp-2.1:6.1.14 - http://jetty.mortbay.org/project/modules/jsp-2.1) + (CDDL 1.0) JAX-RS (https://jax-rs-spec.java.net/) + (CDDL 1.0) Servlet Specification 2.5 API (org.mortbay.jetty:servlet-api-2.5:6.1.14 - http://jetty.mortbay.org/project/modules/servlet-api-2.5) + (CDDL 1.0) (GPL2 w/ CPE) javax.annotation API (https://glassfish.java.net/nonav/public/CDDL+GPL.html) + (COMMON DEVELOPMENT AND DISTRIBUTION LICENSE (CDDL) Version 1.0) (GNU General Public Library) Streaming API for XML (javax.xml.stream:stax-api:1.0-2 - no url defined) + (Common Development and Distribution License (CDDL) v1.0) JavaBeans Activation Framework (JAF) (javax.activation:activation:1.1.1 - http://java.sun.com/products/javabeans/jaf/index.jsp) + +======================================================================== +Common Development and Distribution License 1.1 +======================================================================== + +The following components are provided under the Common Development and Distribution License 1.1. See project link for details. + + (CDDL 1.1) (GPL2 w/ CPE) org.glassfish.hk2 (https://hk2.java.net) + (CDDL 1.1) (GPL2 w/ CPE) JAXB API bundle for GlassFish V3 (javax.xml.bind:jaxb-api:2.2.2 - https://jaxb.dev.java.net/) + (CDDL 1.1) (GPL2 w/ CPE) JAXB RI (com.sun.xml.bind:jaxb-impl:2.2.3-1 - http://jaxb.java.net/) + (CDDL 1.1) (GPL2 w/ CPE) Jersey 2 (https://jersey.java.net) + +======================================================================== +Common Public License 1.0 +======================================================================== + +The following components are provided under the Common Public 1.0 License. See project link for details. + + (Common Public License Version 1.0) JUnit (junit:junit-dep:4.10 - http://junit.org) + (Common Public License Version 1.0) JUnit (junit:junit:3.8.1 - http://junit.org) + (Common Public License Version 1.0) JUnit (junit:junit:4.8.2 - http://junit.org) + +======================================================================== +Eclipse Public License 1.0 +======================================================================== + +The following components are provided under the Eclipse Public License 1.0. See project links for details. + + (Eclipse Public License v1.0) Eclipse JDT Core (org.eclipse.jdt:core:3.1.1 - http://www.eclipse.org/jdt/) + (Eclipse Public License v1.0) Eclipse Collections (org.eclipse.collections:9.2.0 - https://github.com/eclipse/eclipse-collections/blob/9.2.0/LICENSE-EPL-1.0.txt) + +======================================================================== +Mozilla Public License 1.0 +======================================================================== + +The following components are provided under the Mozilla Public License 1.0. See project link for details. + + (GPL) (LGPL) (MPL) JTransforms (com.github.rwl:jtransforms:2.4.0 - http://sourceforge.net/projects/jtransforms/) + (Mozilla Public License Version 1.1) jamon-runtime (org.jamon:jamon-runtime:2.3.1 - http://www.jamon.org/jamon-runtime/) + + + +======================================================================== +NOTICE files +======================================================================== + +The following NOTICEs are pertain to software distributed with this project. + + +// ------------------------------------------------------------------ +// NOTICE file corresponding to the section 4d of The Apache License, +// Version 2.0, in this case for +// ------------------------------------------------------------------ + +Apache Avro +Copyright 2009-2013 The Apache Software Foundation + +This product includes software developed at +The Apache Software Foundation (http://www.apache.org/). + +Apache Commons Codec +Copyright 2002-2009 The Apache Software Foundation + +This product includes software developed by +The Apache Software Foundation (http://www.apache.org/). + +-------------------------------------------------------------------------------- +src/test/org/apache/commons/codec/language/DoubleMetaphoneTest.java contains +test data from http://aspell.sourceforge.net/test/batch0.tab. + +Copyright (C) 2002 Kevin Atkinson (kevina@gnu.org). Verbatim copying +and distribution of this entire article is permitted in any medium, +provided this notice is preserved. +-------------------------------------------------------------------------------- + +Apache HttpComponents HttpClient +Copyright 1999-2011 The Apache Software Foundation + +This project contains annotations derived from JCIP-ANNOTATIONS +Copyright (c) 2005 Brian Goetz and Tim Peierls. See http://www.jcip.net + +Apache HttpComponents HttpCore +Copyright 2005-2011 The Apache Software Foundation + +Curator Recipes +Copyright 2011-2014 The Apache Software Foundation + +Curator Framework +Copyright 2011-2014 The Apache Software Foundation + +Curator Client +Copyright 2011-2014 The Apache Software Foundation + +Apache Geronimo +Copyright 2003-2008 The Apache Software Foundation + +Activation 1.1.1 +Copyright 2003-2007 The Apache Software Foundation + +Apache Commons Lang +Copyright 2001-2014 The Apache Software Foundation + +This product includes software from the Spring Framework, +under the Apache License 2.0 (see: StringUtils.containsWhitespace()) + +Apache log4j +Copyright 2007 The Apache Software Foundation + +# Compress LZF + +This library contains efficient implementation of LZF compression format, +as well as additional helper classes that build on JDK-provided gzip (deflat) +codec. + +## Licensing + +Library is licensed under Apache License 2.0, as per accompanying LICENSE file. + +## Credit + +Library has been written by Tatu Saloranta (tatu.saloranta@iki.fi). +It was started at Ning, inc., as an official Open Source process used by +platform backend, but after initial versions has been developed outside of +Ning by supporting community. + +Other contributors include: + +* Jon Hartlaub (first versions of streaming reader/writer; unit tests) +* Cedrik Lime: parallel LZF implementation + +Various community members have contributed bug reports, and suggested minor +fixes; these can be found from file "VERSION.txt" in SCM. + +Objenesis +Copyright 2006-2009 Joe Walnes, Henri Tremblay, Leonardo Mesquita + +Apache Commons Net +Copyright 2001-2010 The Apache Software Foundation + + The Netty Project + ================= + +Please visit the Netty web site for more information: + + * http://netty.io/ + +Copyright 2011 The Netty Project + +The Netty Project licenses this file to you under the Apache License, +version 2.0 (the "License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at: + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +License for the specific language governing permissions and limitations +under the License. + +Also, please refer to each LICENSE..txt file, which is located in +the 'license' directory of the distribution file, for the license terms of the +components that this product depends on. + +------------------------------------------------------------------------------- +This product contains the extensions to Java Collections Framework which has +been derived from the works by JSR-166 EG, Doug Lea, and Jason T. Greene: + + * LICENSE: + * license/LICENSE.jsr166y.txt (Public Domain) + * HOMEPAGE: + * http://gee.cs.oswego.edu/cgi-bin/viewcvs.cgi/jsr166/ + * http://viewvc.jboss.org/cgi-bin/viewvc.cgi/jbosscache/experimental/jsr166/ + +This product contains a modified version of Robert Harder's Public Domain +Base64 Encoder and Decoder, which can be obtained at: + + * LICENSE: + * license/LICENSE.base64.txt (Public Domain) + * HOMEPAGE: + * http://iharder.sourceforge.net/current/java/base64/ + +This product contains a modified version of 'JZlib', a re-implementation of +zlib in pure Java, which can be obtained at: + + * LICENSE: + * license/LICENSE.jzlib.txt (BSD Style License) + * HOMEPAGE: + * http://www.jcraft.com/jzlib/ + +This product optionally depends on 'Protocol Buffers', Google's data +interchange format, which can be obtained at: + + * LICENSE: + * license/LICENSE.protobuf.txt (New BSD License) + * HOMEPAGE: + * http://code.google.com/p/protobuf/ + +This product optionally depends on 'SLF4J', a simple logging facade for Java, +which can be obtained at: + + * LICENSE: + * license/LICENSE.slf4j.txt (MIT License) + * HOMEPAGE: + * http://www.slf4j.org/ + +This product optionally depends on 'Apache Commons Logging', a logging +framework, which can be obtained at: + + * LICENSE: + * license/LICENSE.commons-logging.txt (Apache License 2.0) + * HOMEPAGE: + * http://commons.apache.org/logging/ + +This product optionally depends on 'Apache Log4J', a logging framework, +which can be obtained at: + + * LICENSE: + * license/LICENSE.log4j.txt (Apache License 2.0) + * HOMEPAGE: + * http://logging.apache.org/log4j/ + +This product optionally depends on 'JBoss Logging', a logging framework, +which can be obtained at: + + * LICENSE: + * license/LICENSE.jboss-logging.txt (GNU LGPL 2.1) + * HOMEPAGE: + * http://anonsvn.jboss.org/repos/common/common-logging-spi/ + +This product optionally depends on 'Apache Felix', an open source OSGi +framework implementation, which can be obtained at: + + * LICENSE: + * license/LICENSE.felix.txt (Apache License 2.0) + * HOMEPAGE: + * http://felix.apache.org/ + +This product optionally depends on 'Webbit', a Java event based +WebSocket and HTTP server: + + * LICENSE: + * license/LICENSE.webbit.txt (BSD License) + * HOMEPAGE: + * https://github.com/joewalnes/webbit + +# Jackson JSON processor + +Jackson is a high-performance, Free/Open Source JSON processing library. +It was originally written by Tatu Saloranta (tatu.saloranta@iki.fi), and has +been in development since 2007. +It is currently developed by a community of developers, as well as supported +commercially by FasterXML.com. + +Jackson core and extension components may be licensed under different licenses. +To find the details that apply to this artifact see the accompanying LICENSE file. +For more information, including possible other licensing options, contact +FasterXML.com (http://fasterxml.com). + +## Credits + +A list of contributors may be found from CREDITS file, which is included +in some artifacts (usually source distributions); but is always available +from the source code management (SCM) system project uses. + +Jackson core and extension components may licensed under different licenses. +To find the details that apply to this artifact see the accompanying LICENSE file. +For more information, including possible other licensing options, contact +FasterXML.com (http://fasterxml.com). + +mesos +Copyright 2014 The Apache Software Foundation + +Apache Thrift +Copyright 2006-2010 The Apache Software Foundation. + + Apache Ant + Copyright 1999-2013 The Apache Software Foundation + + The task is based on code Copyright (c) 2002, Landmark + Graphics Corp that has been kindly donated to the Apache Software + Foundation. + +Apache Commons IO +Copyright 2002-2012 The Apache Software Foundation + +Apache Commons Math +Copyright 2001-2013 The Apache Software Foundation + +=============================================================================== + +The inverse error function implementation in the Erf class is based on CUDA +code developed by Mike Giles, Oxford-Man Institute of Quantitative Finance, +and published in GPU Computing Gems, volume 2, 2010. +=============================================================================== + +The BracketFinder (package org.apache.commons.math3.optimization.univariate) +and PowellOptimizer (package org.apache.commons.math3.optimization.general) +classes are based on the Python code in module "optimize.py" (version 0.5) +developed by Travis E. Oliphant for the SciPy library (http://www.scipy.org/) +Copyright © 2003-2009 SciPy Developers. +=============================================================================== + +The LinearConstraint, LinearObjectiveFunction, LinearOptimizer, +RelationShip, SimplexSolver and SimplexTableau classes in package +org.apache.commons.math3.optimization.linear include software developed by +Benjamin McCann (http://www.benmccann.com) and distributed with +the following copyright: Copyright 2009 Google Inc. +=============================================================================== + +This product includes software developed by the +University of Chicago, as Operator of Argonne National +Laboratory. +The LevenbergMarquardtOptimizer class in package +org.apache.commons.math3.optimization.general includes software +translated from the lmder, lmpar and qrsolv Fortran routines +from the Minpack package +Minpack Copyright Notice (1999) University of Chicago. All rights reserved +=============================================================================== + +The GraggBulirschStoerIntegrator class in package +org.apache.commons.math3.ode.nonstiff includes software translated +from the odex Fortran routine developed by E. Hairer and G. Wanner. +Original source copyright: +Copyright (c) 2004, Ernst Hairer +=============================================================================== + +The EigenDecompositionImpl class in package +org.apache.commons.math3.linear includes software translated +from some LAPACK Fortran routines. Original source copyright: +Copyright (c) 1992-2008 The University of Tennessee. All rights reserved. +=============================================================================== + +The MersenneTwister class in package org.apache.commons.math3.random +includes software translated from the 2002-01-26 version of +the Mersenne-Twister generator written in C by Makoto Matsumoto and Takuji +Nishimura. Original source copyright: +Copyright (C) 1997 - 2002, Makoto Matsumoto and Takuji Nishimura, +All rights reserved +=============================================================================== + +The LocalizedFormatsTest class in the unit tests is an adapted version of +the OrekitMessagesTest class from the orekit library distributed under the +terms of the Apache 2 licence. Original source copyright: +Copyright 2010 CS Systèmes d'Information +=============================================================================== + +The HermiteInterpolator class and its corresponding test have been imported from +the orekit library distributed under the terms of the Apache 2 licence. Original +source copyright: +Copyright 2010-2012 CS Systèmes d'Information +=============================================================================== + +The creation of the package "o.a.c.m.analysis.integration.gauss" was inspired +by an original code donated by Sébastien Brisard. +=============================================================================== + +The complete text of licenses and disclaimers associated with the the original +sources enumerated above at the time of code translation are in the LICENSE.txt +file. + +This product currently only contains code developed by authors +of specific components, as identified by the source code files; +if such notes are missing files have been created by +Tatu Saloranta. + +For additional credits (generally to people who reported problems) +see CREDITS file. + +Apache Commons Lang +Copyright 2001-2011 The Apache Software Foundation + +Apache Commons Compress +Copyright 2002-2012 The Apache Software Foundation + +Apache Commons CLI +Copyright 2001-2009 The Apache Software Foundation + +Google Guice - Extensions - Servlet +Copyright 2006-2011 Google, Inc. + +Google Guice - Core Library +Copyright 2006-2011 Google, Inc. + +Apache Jakarta HttpClient +Copyright 1999-2007 The Apache Software Foundation + +Apache Hive +Copyright 2008-2013 The Apache Software Foundation + +This product includes software developed by The Apache Software +Foundation (http://www.apache.org/). + +This product includes software developed by The JDBM Project +(http://jdbm.sourceforge.net/). + +This product includes/uses ANTLR (http://www.antlr.org/), +Copyright (c) 2003-2011, Terrence Parr. + +This product includes/uses StringTemplate (http://www.stringtemplate.org/), +Copyright (c) 2011, Terrence Parr. + +This product includes/uses ASM (http://asm.ow2.org/), +Copyright (c) 2000-2007 INRIA, France Telecom. + +This product includes/uses org.json (http://www.json.org/java/index.html), +Copyright (c) 2002 JSON.org + +This product includes/uses JLine (http://jline.sourceforge.net/), +Copyright (c) 2002-2006, Marc Prud'hommeaux . + +This product includes/uses SQLLine (http://sqlline.sourceforge.net), +Copyright (c) 2002, 2003, 2004, 2005 Marc Prud'hommeaux . + +This product includes/uses SLF4J (http://www.slf4j.org/), +Copyright (c) 2004-2010 QOS.ch + +This product includes/uses Bootstrap (http://twitter.github.com/bootstrap/), +Copyright (c) 2012 Twitter, Inc. + +This product includes/uses Glyphicons (http://glyphicons.com/), +Copyright (c) 2010 - 2012 Jan Kovarík + +This product includes DataNucleus (http://www.datanucleus.org/) +Copyright 2008-2008 DataNucleus + +This product includes Guava (http://code.google.com/p/guava-libraries/) +Copyright (C) 2006 Google Inc. + +This product includes JavaEWAH (http://code.google.com/p/javaewah/) +Copyright (C) 2011 Google Inc. + +Apache Commons Pool +Copyright 1999-2009 The Apache Software Foundation + +========================================================================= +== NOTICE file corresponding to section 4(d) of the Apache License, == +== Version 2.0, in this case for the DataNucleus distribution. == +========================================================================= + +=================================================================== +This product includes software developed by many individuals, +including the following: +=================================================================== +Erik Bengtson +Andy Jefferson + +=================================================================== +This product has included contributions from some individuals, +including the following: +=================================================================== + +=================================================================== +This product has included contributions from some individuals, +including the following: +=================================================================== +Joerg von Frantzius +Thomas Marti +Barry Haddow +Marco Schulze +Ralph Ullrich +David Ezzio +Brendan de Beer +David Eaves +Martin Taal +Tony Lai +Roland Szabo +Marcus Mennemeier +Xuan Baldauf +Eric Sultan + +=================================================================== +This product also includes software developed by the TJDO project +(http://tjdo.sourceforge.net/). +=================================================================== + +=================================================================== +This product includes software developed by many individuals, +including the following: +=================================================================== +Andy Jefferson +Erik Bengtson +Joerg von Frantzius +Marco Schulze + +=================================================================== +This product has included contributions from some individuals, +including the following: +=================================================================== +Barry Haddow +Ralph Ullrich +David Ezzio +Brendan de Beer +David Eaves +Martin Taal +Tony Lai +Roland Szabo +Anton Troshin (Timesten) + +=================================================================== +This product also includes software developed by the Apache Commons project +(http://commons.apache.org/). +=================================================================== + +Apache Java Data Objects (JDO) +Copyright 2005-2006 The Apache Software Foundation + +========================================================================= +== NOTICE file corresponding to section 4(d) of the Apache License, == +== Version 2.0, in this case for the Apache Derby distribution. == +========================================================================= + +Apache Derby +Copyright 2004-2008 The Apache Software Foundation + +Portions of Derby were originally developed by +International Business Machines Corporation and are +licensed to the Apache Software Foundation under the +"Software Grant and Corporate Contribution License Agreement", +informally known as the "Derby CLA". +The following copyright notice(s) were affixed to portions of the code +with which this file is now or was at one time distributed +and are placed here unaltered. + +(C) Copyright 1997,2004 International Business Machines Corporation. All rights reserved. + +(C) Copyright IBM Corp. 2003. + +The portion of the functionTests under 'nist' was originally +developed by the National Institute of Standards and Technology (NIST), +an agency of the United States Department of Commerce, and adapted by +International Business Machines Corporation in accordance with the NIST +Software Acknowledgment and Redistribution document at +http://www.itl.nist.gov/div897/ctg/sql_form.htm + +Apache Commons Collections +Copyright 2001-2008 The Apache Software Foundation + +Apache Commons Configuration +Copyright 2001-2008 The Apache Software Foundation + +Apache Jakarta Commons Digester +Copyright 2001-2006 The Apache Software Foundation + +Apache Commons BeanUtils +Copyright 2000-2008 The Apache Software Foundation + +Apache Avro Mapred API +Copyright 2009-2013 The Apache Software Foundation + +Apache Avro IPC +Copyright 2009-2013 The Apache Software Foundation + + +Vis.js +Copyright 2010-2015 Almende B.V. + +Vis.js is dual licensed under both + + * The Apache 2.0 License + http://www.apache.org/licenses/LICENSE-2.0 + + and + + * The MIT License + http://opensource.org/licenses/MIT + +Vis.js may be distributed under either license. + + +Vis.js uses and redistributes the following third-party libraries: + +- component-emitter + https://github.com/component/emitter + The MIT License + +- hammer.js + http://hammerjs.github.io/ + The MIT License + +- moment.js + http://momentjs.com/ + The MIT License + +- keycharm + https://github.com/AlexDM0/keycharm + The MIT License + + + +liquidFillGauge.js +/*! + * @license Open source under BSD 2-clause (http://choosealicense.com/licenses/bsd-2-clause/) + * Copyright (c) 2015, Curtis Bratton + * All rights reserved. + * + * Liquid Fill Gauge v1.1 + */ + + +=============================================================================== + +The CSS style for the navigation sidebar of the documentation was originally +submitted by Óscar Nájera for the scikit-learn project. The scikit-learn project +is distributed under the 3-Clause BSD license. +=============================================================================== + +For CSV functionality: + +/* + * Copyright 2014 Databricks + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * Copyright 2015 Ayasdi Inc + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +=============================================================================== +For dev/sparktestsupport/toposort.py: + +Copyright 2014 True Blade Systems, Inc. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + + + +============================================================================= +============================================================================= + +SnappyData Store +Copyright (c) 2018 SnappyData Inc. + +This product includes software developed at +The Apache Software Foundation (http://www.apache.org/). + +The following is a listing of the open source components detailed in +this document. This list is provided for your convenience; please read +further if you wish to review the copyright notice(s) and the full text +of the license associated with each component. + +SECTION 1: BSD-STYLE, MIT-STYLE, OR SIMILAR STYLE LICENSES + + >>> dom4j-1.6.1 + >>> jopt-simple-4.4 + >>> json-none + >>> spymemcached-2.9.0 + + + +SECTION 2: Apache License, V2.0 + + >>> hadoop-common-2.7.3 + >>> swagger-ui-2.0.17 + + + +SECTION 3: GNU Lesser General Public License, V2.1 + + >>> jgroups-2.2.9 + + + +APPENDIX. Standard License Files + + >>> Apache License, V2.0 + + >>> GNU Lesser General Public License, V2.1 + + + + +--------------- SECTION 1: BSD-STYLE, MIT-STYLE, OR SIMILAR STYLE LICENSES ---------- + +BSD-STYLE, MIT-STYLE, OR SIMILAR STYLE LICENSES are applicable to the following component(s). + + +>>> dom4j-1.6.1 + +Copyright 2001-2005 (C) MetaStuff, Ltd. All Rights Reserved. + +Redistribution and use of this software and associated documentation +("Software"), with or without modification, are permitted provided +that the following conditions are met: + +1. Redistributions of source code must retain copyright + statements and notices. Redistributions must also contain a + copy of this document. + +2. Redistributions in binary form must reproduce the + above copyright notice, this list of conditions and the + following disclaimer in the documentation and/or other + materials provided with the distribution. + +3. The name "DOM4J" must not be used to endorse or promote + products derived from this Software without prior written + permission of MetaStuff, Ltd. For written permission, + please contact dom4j-info@metastuff.com. + +4. Products derived from this Software may not be called "DOM4J" + nor may "DOM4J" appear in their names without prior written + permission of MetaStuff, Ltd. DOM4J is a registered + trademark of MetaStuff, Ltd. + +5. Due credit should be given to the DOM4J Project - + http://www.dom4j.org + +THIS SOFTWARE IS PROVIDED BY METASTUFF, LTD. AND CONTRIBUTORS +``AS IS'' AND ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT +NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND +FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL +METASTUFF, LTD. OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, +INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED +OF THE POSSIBILITY OF SUCH DAMAGE. + + +>>> jopt-simple-4.4 + +The MIT License + + Copyright (c) 2004-2011 Paul R. Holser, Jr. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be + included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + +ADDITIONAL LICENSE INFORMATION: + +> Apache 2.0 + +pholser-jopt-simple-jopt-simple-4.3-11-gdf866e0.zip\pholser-jopt-simple-df866e0\src\site\resources\scripts\prettify.js + +Copyright (C) 2006 Google Inc. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http:www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + + +>>> json-none + +Copyright (c) 2002 JSON.org + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +The Software shall be used for Good, not Evil. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + + +>>> spymemcached-2.9.0 + +Copyright (C) 2006-2009 Dustin Sallings + * Copyright (C) 2009-2011 Couchbase, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALING + * IN THE SOFTWARE. + + +--------------- SECTION 2: Apache License, V2.0 ---------- + +Apache License, V2.0 is applicable to the following component(s). + + +>>> hadoop-common-2.7.3 + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. See accompanying LICENSE file. + + +>>> swagger-ui-2.0.17 + +License: Apache 2.0 + + + +ADDITIONAL LICENSE INFORMATION: + + +> MIT + + +swagger-ui-2.0.17.jar/ META-INF/ resources/ webjars/ swagger-ui/ 2.0.17/ lib/ jquery.ba-bbq.min.js + + +[PLEASE NOTE: WE ELECT TO USE AND DISTRIBUTE THIS COMPONENT UNDER THE TERMS OF THE MIT LICENSE. + THE ORIGINAL LICENSE TERMS ARE REPRODUCED BELOW ONLY AS A REFERENCE.] + + + +jQuery BBQ: Back Button & Query Library - v1.2.1 - 2/17/2010 + * http://benalman.com/projects/jquery-bbq-plugin/ + * + * Copyright (c) 2010 "Cowboy" Ben Alman + * Dual licensed under the MIT and GPL licenses. + * http://benalman.com/about/license/ + + +--------------- SECTION 3: GNU Lesser General Public License, V2.1 ---------- + +GNU Lesser General Public License, V2.1 is applicable to the following component(s). + + +>>> jgroups-2.2.9 + +License: LGPL 2.1 + + +ADDITIONAL LICENSE INFORMATION: + + +> Apache 1.1 + +JGroups-2.2.9.src.zip\JGroups-2.2.9.src\lib\commons-logging.jar\META-INF\LICENSE.txt + + +The Apache Software License, Version 1.1 + * + * Copyright (c) 1999-2003 The Apache Software Foundation. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. The end-user documentation included with the redistribution, if + * any, must include the following acknowlegement: + * "This product includes software developed by the + * Apache Software Foundation (http://www.apache.org/)." + * Alternately, this acknowlegement may appear in the software itself, + * if and wherever such third-party acknowlegements normally appear. + * + * 4. The names "The Jakarta Project", "Commons", and "Apache Software + * Foundation" must not be used to endorse or promote products derived + * from this software without prior written permission. For written + * permission, please contact apache@apache.org. + * + * 5. Products derived from this software may not be called "Apache" + * nor may "Apache" appear in their names without prior written + * permission of the Apache Group. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation. For more + * information on the Apache Software Foundation, please see + * . + + +ant-optional.jar +ant.jar + + +> MIT + +JGroups-2.2.9.src.zip/ JGroups-2.2.9.src/ lib/bcprov-jdk14-117.jar/ org/ bouncycastle/ LICENSE.class + + +Copyright (c) 2000 The Legion Of The Bouncy Castle (http://www.bouncycastle.org) GH line.separator IJ^ + + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so,subject to the following conditions:The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE [LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/README.md b/README.md index 6a48d97288..7be870e936 100644 --- a/README.md +++ b/README.md @@ -1,131 +1,82 @@ -## Build Quickstart +### SnappyData fuses Apache Spark with an in-memory database to deliver a data engine capable of processing streams, transactions and interactive analytics in a single cluster. -As of now, only the "integrated" build seems to work. Quickstart to compile -project: +### The Challenge with Spark and Remote Data Sources +Apache Spark is a general purpose parallel computational engine for analytics at scale. At its core, it has a batch design center and is capable of working with disparate data sources. While this provides rich unified access to data, this can also be quite inefficient and expensive. Analytic processing requires massive data sets to be repeatedly copied and data to be reformatted to suit Spark. In many cases, it ultimately fails to deliver the promise of interactive analytic performance. +For instance, each time an aggregation is run on a large Cassandra table, it necessitates streaming the entire table into Spark to do the aggregation. Caching within Spark is immutable and results in stale insight. -1. git clone git@github.com:SnappyData/snappy-commons.git -2. cd snappy-commons -3. git clone git@github.com:SnappyData/snappy-spark.git -4. git clone git@github.com:gemfire/gemxd-staging.git -5. git clone git@github.com:SnappyData/snappy-aqp.git -6. mv gemxd-staging snappy-store -7. git submodule init # get the job server integrated -8. git submodule update # checkout the correct job server code -9. ./gradlew clean assemble +### The SnappyData Approach +At SnappyData, we take a very different approach. SnappyData fuses a low latency, highly available in-memory transactional database (GemFireXD) into Spark with shared memory management and optimizations. Data in the highly available in-memory store is laid out using the same columnar format as Spark (Tungsten). All query engine operators are significantly more optimized through better vectorization and code generation.
+The net effect is, an order of magnitude performance improvement when compared to native Spark caching, and more than two orders of magnitude better Spark performance when working with external data sources. -## Repository layout +Essentially, we turn Spark into an in-memory operational database capable of transactions, point reads, writes, working with Streams (Spark) and running analytic SQL queries. Or, it is an in-memory scale out Hybrid Database that can execute Spark code, SQL or even Objects. -There were few proposals about how to manage the various repositories mentioned in [this document](https://docs.google.com/document/d/1jC8z-WPzK0B8J6p3jverumK4gcbprmFiciXYKd2JUVE/edit#). Based on few discussions, we shortlisted Proposal 4 in the document. -According to "Proposal 4" gemxd and snappy-spark repositories will be independent of any other repository. There will be a third repository that will hold the code of Snappy - snappy-commons. Snappy-Commons will have two projects: +If you are already using Spark, experience 20x speed up for your query performance. Try out this [test](https://github.com/SnappyDataInc/snappydata/blob/master/examples/quickstart/scripts/Quickstart.scala). -(a) **snappy-core** - Any code that is an extension to Spark code and is not dependent on gemxd, job server etc. should go in here. For e.g. SnappyContext, cluster manager etc. +##### Snappy Architecture +![SnappyData Architecture](docs/Images/SnappyArchitecture.png) -(b) **snappy-tools** - This is the code that serves as the bridge between GemXD and snappy-spark. For e.g. query routing, job server initialization etc. +## Getting Started +We provide multiple options to get going with SnappyData. The easiest option is, if you are already using Spark 2.1.1. +You can simply get started by adding SnappyData as a package dependency. You can find more information on options for running SnappyData [here](docs/quickstart.md). -Code in snappy-tools can depend on snappy-core but it cannot happen other way round. +## Downloading and Installing SnappyData +You can download and install the latest version of SnappyData from the [SnappyData Download Page](https://www.snappydata.io/download). +Refer to the [documentation](docs/install.md) for installation steps. -The snappy-spark repository has to be copied or moved inside snappy-commons for an integrated build. +If you would like to build SnappyData from source, refer to the [documentation on building from source](docs/install/building_from_source.md). -(c) **snappy-spark** - This is the Spark code with Snappy modifcations. +## SnappyData in 5 Minutes! +Refer to the [5 minutes guide](docs/quickstart.md) which is intended for both first time and experienced SnappyData users. It provides you with references and common examples to help you get started quickly! -Similarly the GemfireXD repository can be copied or moved inside snappy-commons by name *snappy-store* for an integrated build with GemFireXD. The branch of GemFireXD to use is also *snappy-store* that has been branched from rebrand_Dec13 recently for this purpose. +## Documentation +To understand SnappyData and its features refer to the [documentation](http://snappydatainc.github.io/snappydata/). -(d) **snappy-store** - This is the GemFireXD with Snappy additions. +## Community Support -(e) **snappy-aqp** - This is the Snappy Data proprietary code (AQP error estimation) +We monitor channels listed below for comments/questions. -Note that git operations have still to be done separately on snappy-commons, snappy-spark and snappy-store(GemFireXD) repositories. +[Stackoverflow](http://stackoverflow.com/questions/tagged/snappydata) ![Stackoverflow](http://i.imgur.com/LPIdp12.png) [Slack](http://snappydata-slackin.herokuapp.com/)![Slack](http://i.imgur.com/h3sc6GM.png) [Gitter](https://gitter.im/SnappyDataInc/snappydata) ![Gitter](http://i.imgur.com/jNAJeOn.jpg) [Mailing List](https://groups.google.com/forum/#!forum/snappydata-user) ![Mailing List](http://i.imgur.com/YomdH4s.png) [Reddit](https://www.reddit.com/r/snappydata) ![Reddit](http://i.imgur.com/AB3cVtj.png) [JIRA](https://jira.snappydata.io/projects/SNAP/issues) ![JIRA](http://i.imgur.com/E92zntA.png) +## Link with SnappyData Distribution -## Building using gradle +**Using Maven Dependency** -Gradle builds have been arranged in a way so that all of snappy projects including snappy's spark variant can be built from the top-level. In addition snappy-spark and GemFireXD (inside snappy-store) can also be built separately. If the snappy-spark directory is not present inside snappy-commons, then it will try to use locally published snappy-spark artifacts instead. Likewise if there is no snappy-store directory, then it will use the local artifacts inside local-repo in snappy-commons: - * The full build and Intellij import has been tested with only JDK7. If you are using JDK8, then you are on your own (though it will likely work). On Ubuntu/Mint systems, best way to get Oracle JDK7 as default: +SnappyData artifacts are hosted in Maven Central. You can add a Maven dependency with the following coordinates: - - add webupd8 java repository: sudo add-apt-repository ppa:webupd8team/java - - install and set jdk7 as default: sudo aptitude install oracle-java7-set-default - - you can also install oracle-java7-unlimited-jce-policy package for enhanced JCE encryption - - this will set java to point to JDK7 version and also set JAVA_HOME, so start a new shell for the changes to take effect; also run "source /etc/profile.d/jdk.sh" to update JAVA_HOME (or else you will need to logoff and login again for the JAVA_HOME setting to get applied) +``` +groupId: io.snappydata +artifactId: snappydata-cluster_2.11 +version: 1.1.0 +``` - * Ensure that snappy-spark repository has been moved/cloned inside snappy-commons by "snappy-spark" name. Similarly move the GemFireXD (snappy-store branch) repository inside snappy-commons by "snappy-store" name. The integrated build depends on its name and presence inside else it will use the local artifacts as mentioned before. *DO NOT JUST SYMLINK THE DIRECTORIES* -- that is known to cause trouble with IDE though command-line build may go through. - * Update both repos (snappy-commons and snappy-spark) to latest version and the GemFireXD repository in snappy-store to latest snappy-store branch. Then test the build with: ./gradlew clean && ./gradlew assemble - * If you see an error like "Could not find hadoop-common-tests.jar", then clear maven cache artifacts: rm -rf ~/.m2/repository/org/apache/hadoop, so that gradle can download all required depedencies, then run assemble target again. - * Run a snappy-core test application: ./gradlew :snappy-core_2.10:run -PmainClass=io.snappydata.app.SparkSQLTest - AND/OR a GemFireXD junit test: ./gradlew :snappy-store:gemfirexd:tools:test -Dtest.single=\*\*/BugsTest +**Using SBT Dependency** +If you are using SBT, add this line to your **build.sbt** for core SnappyData artifacts: -## Setting up Intellij with gradle +`libraryDependencies += "io.snappydata" % "snappydata-core_2.11" % "1.1.0"` -If the build works fine, then import into Intellij: - * Update Intellij to the latest version, including the latest Scala plugin. Check using "Help->Check for Update". The scala plugin version in File->Settings->Plugins->Scala should be at least 1.5.4 else update the plugin from that page. - * Double check that Scala plugin is enabled in File->Settings->Plugins, as also the Gradle plugin. Note that update in previous step could have disabled either or both, so don't assume it would be enabled. - * Select import project, then point to the snappy-commons directory. Use external Gradle import. Add -XX:MaxPermSize=350m to VM options in global Gradle settings. Select defaults, next, next ... finish. Ignore "Gradle location is unknown warning". Ensure that a JDK7 installation has been selected. - * Disable the "Unindexed remote maven repositories found" warning message. - * Once import finishes, go to File->Settings->Editor->Code Style->Scala. Set the scheme as "Project". Check that the same has been set in Java's Code Style too. Then OK to close it. Next copy codeStyleSettings.xml in snappy-commons to .idea directory created by Intellij and then File->Synchronize just to be sure. Check that settings are now applied in File->Settings->Editor->Code Style->Java which should show TabSize, Indent as 2 and continuation indent as 4 (same for Scala). - * If the Gradle tab is not visible immediately, then select it from window list popup at the left-bottom corner of IDE. If you click on that window list icon, then the tabs will appear permanently. - * Generate avro and GemFireXD required sources by expanding :snappy-commons_2.10->Tasks->other. Right click on "generateSources" and run it. The Run item may not be available if indexing is still in progress, so wait for it to finish. The first run may take a while as it downloads jars etc. This step has to be done the first time, or if ./gradlew clean has been run, or you have made changes to javacc/avro/messages.xml source files. *IF YOU GET UNEXPECTED DATABASE NOT FOUND OR NPE ERRORS IN GemFireXD LAYER, THEN FIRST THING TO TRY IS TO RUN THE generateSources TARGET AGAIN.* - * Increase the compiler heap sizes else build can take quite long especially with integrated GemFireXD. In File->Settings->Build, Execution, Deployment->Compiler increase "Build process heap size" to say 1536 or 2048. Similarly increase JVM maximum heap size in "Languages & Frameworks->Scala Compiler Server" to 1536 or 2048. - * Test the full build. - * Open Run->Edit Configurations. Expand Defaults, and select Application. Add -XX:MaxPermSize=350m in VM options. Similarly add it to VM parameters for ScalaTest and JUnit. - * For JUnit configuration also append "/build-artifacts" to the working directory i.e. the directory should be "$MODULE_DIR$/build-artifacts". Likewise change working directory for ScalaTest to be inside build-artifacts otherwise all intermediate log and other files (especially created by GemFireXD) will pollute the source tree and may need to cleaned manually. - * Try Run->Run... on a test like SparkSQLTest. +For additions related to SnappyData cluster, use: +`libraryDependencies += "io.snappydata" % "snappydata-cluster_2.11" % "1.1.0"` -### Running a junit/scalatest +You can find more specific SnappyData artifacts [here](http://mvnrepository.com/artifact/io.snappydata) -Running an application like SparkSQLTest should be straightforward -- just ensure that MaxPermSize has been increased as mentioned above especially for Spark/Snappy tests. For running junit/scalatest: +**Note:** If your project fails when resolving the above dependency (that is, it fails to download javax.ws.rs#javax.ws.rs-api;2.1), it may be due an issue with its pom file.
As a workaround, you can add the below code to your **build.sbt**: - * When selecting a run configuration for junit/scalatest, avoid selecting the gradle one (green round icon) otherwise that will launch an external gradle process that will start building the project all over again. Use the normal junit (red+green arrows icon) or scalatest (junit like with red overlay). - * For JUnit tests, ensure that working directory is "$MODULE_DIR$/build-artifacts" as mentioned before. Otherwise many GemFireXD tests will fail to find the resource files required in many tests. They will also pollute the checkouts with large number of log files etc, so this will allow those to go into build-artifacts that can also be cleaned up easily. +``` +val workaround = { + sys.props += "packaging.type" -> "jar" + () +} +``` +For more details, refer [https://github.com/sbt/sbt/issues/3618](https://github.com/sbt/sbt/issues/3618). -### Manual sources and docs imports +## Ad Analytics using SnappyData +Here is a stream + Transactions + Analytics use case example to illustrate the SQL as well as the Spark programming approaches in SnappyData - [Ad Analytics code example](https://github.com/SnappyDataInc/snappy-poc). Here is a [screencast](https://www.youtube.com/watch?v=bXofwFtmHjE) that showcases many useful features of SnappyData. The example also goes through a benchmark comparing SnappyData to a Hybrid in-memory database and Cassandra. -If sources and docs were selected during initial import, then it can take a long time to get sources+docs for all dependencies. Instead one way could be to get the sources+docs for only scala-lang jars. The project setup after import already links sources and javadocs to appropriate locations in .m2 local cache, but since sources+docs were not selected during import so Maven may not have downloaded them yet. Check if you already have sources in m2 cache by opening a scala-lang class like Seq (hit Shift->Ctrl->T when using eclipse bindings and type scala.collection.Seq) and check if sources+docs are correctly shown. If not, then to easily download for selected jars do this: - * Open the File->Project Structure->Libraries - * Click on the '+' sign at the top to add new library, and choose Maven. - * In the box, provide "scala-library-2.10.4" and click on the search tool. - * Select the "org.scala-lang:scala-library:2.10.4" in the drop down. Then check "Sources", "JavaDocs" options and go ahead. - * Do the same for others like "scala-reflect-2.10.4" and "scala-compiler-2.10.4" as required. - * Once this is done, don't select OK on the main Project Structure box. Instead hit "Cancel" and it should be all good since we only wanted to get Maven to download the sources and docs for these jars. +## Contributing to SnappyData +If you are interested in contributing, please visit the [community page](http://www.snappydata.io/community) for ways in which you can help. -## Git configuration to use keyring/keychain - -Snappy is currently hosting private repositories and will continue to do -so for foreseable future. One way to avoid passing credentials everytime could -have been to upload the public SSH key and use git:// URL. However, that doesn't -work at least in Pune network due to firewall issue (and proxy server not -supporting proxying ssh). However, it is possible to configure git to enable -using gnome-keyring on Linux platforms, and KeyChain on OSX to avoid it. -(sumedh: latter not verified by me yet, so someone who uses OSX should do it) - -On Linux Ubuntu/Mint: - -Install gnome-keyring dev files: sudo aptitude install libgnome-keyring-dev - -Build git-credential-gnome-keyring: - - cd build/git-gnome-keyring - make - -Copy to PATH (optional): - - sudo cp git-credential-gnome-keyring /usr/local/bin - make clean - -Note that if you skip this step then need to give full path in the next -step i.e. /path-to-snappy-commons/build/git-gnome-keyring/git-credential-gnome-keyring - -Configure git: git config --global credential.helper gnome-keyring - -Similarly on OSX locate git-credential-osxkeychain, build it if not present -(it is named "osxkeychain" instead of gnome-keyring), then set in git config. - -Now your git password will be stored in keyring/keychain which is normally -unlocked automatically on login (or you will be asked to unlock on first use). - -On Linux, you can install "seahorse", if not already, to see/modify all -the passwords in keyring (GUI menu "Passwords and Keys" under Preferences -or Accessories or System Tools) diff --git a/ReleaseNotes.txt b/ReleaseNotes.txt new file mode 100644 index 0000000000..a24567b2f2 --- /dev/null +++ b/ReleaseNotes.txt @@ -0,0 +1,1409 @@ +#################################################################################################### +# PLEASE KEEP THE WIDTH OF THE LINES BELOW WITHIN 100 CHARACTERS. # +# MOST RECENT CHANGE AT THE TOP. # +# KEEP THE DESCRIPTION OF EACH OF YOUR CHANGES THAT NEEDS TO BE PUT INTO THE RELEASE NOTES TO ONE # +# TO THREE LINES. # +# KEEP A LINE BLANK BETWEEN TWO NOTES. # +# ADD THE JIRA TICKET ID, IF APPLICABLE. # +#################################################################################################### + +Release 1.1.0 + +- New Features/Fixed Issues + + [SNAP-2440] Support fully qualified column names in projection + + (f7ecc73) Start hive-thriftserver by default in product + + [SNAP-2982] Fix for exception while startup - "MetaException: Metastore contains multiple versions" + + [SNAP-2902] Mismatch in the expected and actual inserted rows, after inserting data from a column + table to another column table + + (5fade0b) Quote table and schema name in some commands before executing on GemFireXD connection. + This allows support for reserved keywords in GemFireXD parser like "default" as schema name. + + (1942811) Default glibc malloc settings to avoid memory defragmentation + + [SNAP-2959] Increase the weightage of lead to avoid it being thrown out of distributed system in + case network partitioning + + [SNAP-2975] Fix crash due to SEGV in putInto operations + + (ce1874e) Including SparkR library as part of distribution + + [SNAP-2962] Set default value for spark.sql.files.maxPartitionBytes to 32mb + + (41594f5) Use Spark convention to return Catalog listDatabases/listTables output in lower-case + + [SNAP-2860] PUT INTO <> SELECT v1, v2... throws IllegalArgumentException for Column table + + [SNAP-2474] Partition pruning support added for row table scan + + [SNAP-2956] Wrap non-fatal OOME from Spark layer in LowMemoryException + + [SNAP-2900] UI Enhancements: Provide an ability/control, on Dashboard UI, to expand or collapse + whole row per member node entry in single click + + [SNAP-2890] Exception occurs when the maximum column projection corresponds to 128th column + position + + (7607064) Spotfire Apache Spark compatibility changes: + - SHOW DATABASES as an alias for SHOW SCHEMAS + - Support in SnappySqlParser for spark.sql.variable.substitute to substitute ${var} in query string + + (568b7e1) Fix for JDBC driver Jar running with Spark 2.3 + versions (java.lang.NoSuchFieldError: + MAX_ROUNDED_ARRAY_LENGTH) + + [SNAP-2368] Handling the case when SnappyDataBaseDialect is used to determine table schema with the + table name not containing schema name + + (5911532) A "--config " option can be passed to the snappy-start-all script now to + take config files from that folder instead of default conf folder. Please note log4j.properties + file is still taken from the default conf folder. + + (1630148) Fix for filter push down to scan level when IN list has constants but in cast node. + + (7a7e902) Fix backward compatibility issues with sample tables + + [SNAP-2761] Use off-heap cache(if configured) for streaming-sink + + [SNAP-2818] Trim the JOB_DESCRIPTION property in Spark jobs + + [SNAP-2790] Key_columns option does not check for the validity of column names at time of table creation + + [SNAP-2789] Enabled broadcast and exchange reuse for column and row tables + + [SNAP-2751] Cannot connect to secure SnappyData cluster via Spark's Thrift server + + [SNAP-2773] Improve GUI display of put into + + (80f2d30) Store the temporary join result in offheap for put into operation + + [SNAP-2661] Provide Snappy UI User a control over auto update + + [SNAP-2381] Global lock to serialize concurrent puts + + [SNAP-2712] Some in built functions returning wrong results due to tokenization and plan caching + + (5601184) Fixing some meta-data query inconsistencies: + - Add support for SHOW VIEWS + - Use "schemaName" for the column instead of Spark's "database" in SHOW TABLES + - Show CHAR/VARCHAR types instead of STRING for those types of columns in meta-data queries + + [SNAP-2389] NPE during lead failure/restart + + [SNAP-2462] Enable common-subexpression elimination for ParamLiterals. This improves performance + of TPC-H Q19 + + [SNAP-2602] On UI add column named "Overflown Size"/ "Disk Size" in tables + + (dd590a2) Perf improvement for limit query on external tables + + [SNAP-2985] Fixes for multiple inconsistency issue in snapshot isolation + + (91a9a63) Error in select if table(replicated) is created with backticks as + delimiter + + (c12fa66) Scalability improvements for snapshot isoalation + + (72052fc) Change the default auto reconnect setting to false. For locator it is still set to true + + (1f9d934) Fix occasional NPE in ClientService init + + (be351df) Disable load-balance by default on servers + + [SNAP-2591] Implemented failover in ODBC + + [SNAP-2654] GemFireCacheImpl.oldEntryMap causes memory leak + + (1d5e0fc4) Always route statements like "show tables/views...", "describe" to lead from + snappy-shell to give consistent results + + (d0a1002) Fix for "show tables" command. When the standard table type "TABLE" is given, + then show all of row, column, external, sample, stream and topK tables + + [SNAP-2934] Avoid double free of page that caused server crash due to SIGABORT/SIGSEGV + + [SNAP-2908] UI: Display sparklines for cpu and memory usage for individual members + + [SNAP-2926] UI: Changing default page size for all tabular lists from 10 to 50, sorting + members list tabular view on member type for ordering all nodes such that all locators + first, then all leads and then all servers. + + [SNAP-2457] Enabling plan caching for hive thrift server sessions + + (b825fd6) Property to set if hive meta-store client should use isolated ClassLoader + + [SNAP-2909] Streaming micro batch thread keeps running forever when snappy-job fails due + to failure from outside streaming query + + [SNAP-2237] SQL expression alias in projection cannot be used in GroupBy + + [SNAP 2634] Planner is not used in IncrementalExecution + + +Release 1.0.2.1 + +- New Features/Fixed Issues + + [SNAP-2646] Make a copy for non-primitive aggregations. (#1195) + + (81f3cbf) Changed scripts to work with old bash 3.x versions, for cases like MacOSX that ships + with bash 3.x by default. + + [SNAP-2659] Reset the pool at the end of collect to avoid spillover of lowlatency pool setting + to latter operations that may not use the CachedDataFrame execution paths. (#1191) + + [SNAP-2503] Kill the VM on OutOfMemoryError by adding command line argument. (#1192, #1187) + + [SNAP-2657] Added caching for hive catalog lookups. + Meta-data queries with large number of tables take quite long because of nested loop joins between + SYSTABLES and HIVETABLES for most meta-data queries. Additionally each row in SYSTABLES looks up + hive-metastore separately to determine if the table is a column table or not. Overall this + results in close to a million hive-metastore lookups for each meta-data query when there are + tables in the range of hundreds. (#1190) + + [SNAP-2656] Check for underlying Attribute with joins on aggregate columns. (#1189) + + (0354e8b) Fix GUI plans for CACHE QUERY SQL. + + [SNAP-2630][SNAP-2625] Allow deleteFrom to work as far as the dataframe contains matching column + to key columns. Make deleteFrom API behaviour consistent for row and columns tables. (#1184) + + [SNAP-2645] Making startingOffsets property optional as Spark's Streaming API takes care of that + by using committed offset or Kafka param "auto.offset.reset" if no explicit value is specified for + startingOffsets. (#1186) + + (4390d58) Fixes to joinType to apply to a JOIN result. + + (707c55b) Fix build and run with newer IDEA releases. + + (f2bcf4f) SortMergeJoinExec extension to avoid shuffle when join key columns are a superset of + child partitioning. + + (0f7a01a) Fix plan-level query hints like joinType on RHS of join to be applied on the relations + rather than Join operator. Fix catalog inconsistency in row tables with CREATE TABLE .. SELECT ... + when the insert fails for some reason. + + (829534a) Fix for occasional failures due to hive client disconnect. + + [SNAP-2569] Support Spark's HiveServer2 in SnappyData cluster. Enable starting an embedded Spark + HiveServer2 on leads in embedded mode. The default session type has been switched to be + SnappySession though user can force Spark's hive session using a property. (#1161) + + (9ad806d) Use JNA Platform support to skip agent on non-linux platforms. Also use the same to + determine 64-bit support instead of custom code. Update snappy script to include JNA jar with + launcher and gemfire-shared. + + (128ad7b) Created new class for Snappy sink example instead of replacing existing structured + streaming example. (#1183) + + [SNAP-2508] Make condition for replicatedTableJoin stricter while creating HashJoinExec. (#1173) + + (736b029) Create a separate module for pooled JDBC driver. (#1170) + Example usage: + import io.snappydata.implicits._ + spark.snappyQuery("select count(*) from table1").show() + spark.snappyExecute("create table table2 (id int, data string) using column").show() + df.write.snappy("table3") + Above examples assume that spark.snappydata.connection property is already set at conf level or + can be passed explicitly to first two methods too). + + (01f800b) Disabling conflation by default in default snappy streaming callback. (#1181) + + [SNAP-2575] Default Sink Callback - Support multiple events with same key column in same streaming + batch. Conflating events with same key columns while processing streaming batch in default sink + callback. (#1176) + + [SNAP-2491] Fixed: Column added using 'ALTER TABLE ... ADD COLUMN ...' through snappy shell does + not reflect in spark-shell. (#1175) + + [SNAP-2568] Allow access to store-side system tables/virtual tables from Spark SQL. Also, expanded + the "SHOW" statements to include SCHEMAS, COLUMN, MEMBERS as well as expanded "DESCRIBE" + statements. (#1160) + + (83a4253) Structured streaming - Added default sink callback. (#1157) + + [SNAP-2582] Allow NONE as a valid policy for server-auth-provider. (#1167) + + (c1cf502) Fixing occasional failures in serialization using CachedDataFrame if node is just + starting/stopping. Also, fix a hang in shutdown for cases where hive client close is trying to + boot up the node again, waiting on the locks taken during shutdown. + + (db7d568) Fixed issue of snappy-env.sh not being loaded. + + [SNAP-2576] Update, delete on table should not get policy filter applied. Fixed. (#1163) + + [SNAP-2577] Simple column table creation using jdbc API fails in single VM tests. Fixed. (#1165) + + (47a8608) Fixing buffer handling in compression. + + [SNAP-2571] Add support for query hints to force a join type for cases where result is known to be + small, for example, but plan rules cannot determine so. (#1159) + + [SNAP-2566] Offset argument to Lead and Lag window functions were not being marked as foldable + causing analysis error. Added them in Foldable functions list. (#1162) + + (56420fb) Fixing some issues in startup scripts. + 1. Moved snippet to get AWS public host name inside "snappy" script since it must be done on the + actual server rather than where "snappy-start-all.sh" has been invoked. + 2. Use hostname-for-clients to set the SPARK_PUBLIC_DNS so that both JDBC/ODBC and WebUI point to + the same address if required; any explicit SPARK_PUBLIC_DNS is ignored because it will be common + for the cluster and will not work with multiple leads. + 3. Likewise, use bind-address to set SPARK_LOCAL_IP on every node instead of reading the global + environment variable. + + (f870cd7) SnappyData specific GfxdDataSerializable types were not getting registered when validate + tool was getting launched through SnapyUtilLauncher. Fixed. (#1144) + + (858436a) Mark numLeadsOnNode as negative if heap-size or memory-size has been configured + explicitly. + + (b84254d) Enable default off-heap memory-size when possible. + + [SNAP-2555] Clear non-default configs for internal hive meta-store client. (#1154) + + [SNAP-2511] Enable lazy initialization in SortMergeJoin and SnappyHashJoin. (#1146) + + [SNAP-2487] Properly fetching the current active snappy streaming context and closing it when + "streaming.batch_interval" is not passed as part of configuration. (#1143) + + (484ecab) Add CACHE, UNCACHE, RECACHE and RESET for routing. + + (9fd2e01) Checking if the entry causing constraint violation is destroyed or removed in a separate + thread with synch taken on region entry, to avoid dead lock. (#441) + + (b704ce2) Catch and throw any errors like OutOfMemoryError during region initialization. + + (ada81d3) Fix schema in ResultSet metadata. + + (a97b40e) Conditional listing of regions in a diskstore only when the validate disk store tool has + been invoked. + + (ece2329) Temporary workaround for SNAP-2627. If the unique constraint violation is due to removed + or destroyed AbstractRegionEntry, attempt is made to remove it from index and another try is made + to put the new value against the index key. (#438) + + (16532da) Handle CancelException in ClientTracker cleanup. + + [SNAP-2562] Fix validate-disk-store tool for co-location check. (#436) + + (33508bf) JDBC Client Pool Driver, stores the pool of connections internally. For pooling purpose + internally we have used Tomcat connection pooling library. Also, configured the jdbcInterceptor to + reset the autocommit, readOnly and isoloationlevel to default values whenever a connection + borrowed from the pool. Also, for the cleanup purpose we have run eviction thread to remove idle + connection from the pool. (#433) + + (a689c80) Adding jvmkill.c and also merged it into libgemfirexd64.so and libgemfirexd.so (#435) + + (1a0ec43) Fixing connection property handling in TomcatConnectionPool. + + (5eba064) JDBC Client with In-built Pool Driver. With support of connection pool in the driver + itself. (#428) + + (f34c8b3) Fixed an occasional hang due to connection pool exhaustion when reconnects happen too + quickly in the retry loop before membership VIEW can be updated; in such cases connections were + being released at the end after failure but this got stuck in the retry itself due to pool being + exhausted. Correct a rare case of method to get colocated regions going into an infinite loop; now + explicitly skip over regions already seen in the colocation chain. + + (5587195) Update bucket stats for SerializedDiskBuffer in sync block. + + (fc79f5a) Enable default off-heap size on machines having large amount of RAM (> 14G) in + enterprise product. + 1. Adjust the default heap-size for lead/server to range from 2GB-8GB depending on the amount of + available RAM and number of cores. + 2. Check if configured memory-size is available (no swap) at startup and fail if not. System can + still fail at runtime due to lazy allocation by modern OSes. + + [SNAPPYDATA] (ab71801) Corrected the URL paths for RDDs to use /Spark Cache/ instead of /storage/. + + [SNAPPYDATA] (67596fc) Increase hive-thrift shell history file size to 50000 lines. + + [SNAPPYDATA] (12dc507) Generate spark-version-info.properties in source path + src/main/extra-resources. + + [SNAPPYDATA] (336c021) Fix default bind-address of ThriftCLIService. ThriftCLIService uses + InetAddress.getLocalHost() as default address to be shown but hive thrift server actually uses + InetAddress.anyLocalAddress(). Honour bind host property in ThriftHttpCLIService too. + + [SPARK-24950][SQL] (205c133) DateTimeUtilsSuite daysToMillis and millisToDays fails w/java 8 181-b13 + Author: Chris Martin + Closes #21901 from d80tb7/SPARK-24950_datetimeUtilsSuite_failures. + (cherry picked from commit c5b8d54c61780af6e9e157e6c855718df972efad) + Signed-off-by: Sean Owen + + +Release 1.0.2 + +- New Features/Fixed Issues + + [SNAP-2459] Adding an API to get primary key or Key Columns of a SnappyData table. (#1123) + + [SNAP-2433] Fixed incorrect server status shown in the UI by storing cluster members stats in map + against their DiskStoreUUID(as key) only and removing members ID as key. (#1126) + + [SNAP-2470] Fixed missing SQL tab on SnappyData UI in local mode. (#1122) + + [SNAP-2463] Fixed SELECT query results on ROW tables. Removed complex OR conditions. + + [SNAP-2451] Fixed JOIN query results on ROW tables. Handled the case of DelegateRDD where BaseRDD + itself is a DelegateRDD. (#1120) + + (be094ef) Support for displaying VIEWTEXT for views in SYS.HIVETABLES. + + [ENT-40] Fixed recursion in policy filter application. + + [SNAP-2349] Fixed the deadlock by reducing the scope of synchronized blocks in ColumnFormatValue + to only when reading/writing fields. (#1045) + + [SNAP-2364] Skip batch if the stats row is missing. This is already handled for in-memory batches + and the same has been added for on-disk batches. + + (3a6a8ae) Row Level Security support. (#1084) + + (3bc03ae) Changes to split the view definition string and store it as split properties in the Map + to fix the issue of view string size exceeding 37200 chars. (#1118) + + [SNAP-2351][SNAP-2443] Fix a problem with row tables and hash joins. Skip plan caching of Range + based DataFrames. (#1112) + + (aed173b) Fixes for Catalog Repair Procedure. (#1113) + + [ENT-21] Set security handlers to all ServletContextHandlers which are part of Dashboard, SQL + tabs and Auto-refresh feature web services. (#1115) + + [SNAP-2453] Added a fix to query with optional n arguments. In "FETCH FIRST ROW ONLY" it was + expecting to pass n number of first rows. Now made it optional, if n is not passed then it + fetches first row only. (#1117) + + [SNAP-2302][SNAP-2303][SNAP-2307] Sqlfixes (#1106) + + [SNAP-2432] Added a system property to avoid pulling jar info from snappydata cluster, if not + required, logging a warning if required jar info could not be pulled. (#1104) + + [SNAP-2400] To speed up the dashboard data auto-refresh, removing unnecessary empty checks on + table stats info and external table stats info. (#1101) + + [ENT-34] Avoid setting snappydata user name in all pool connections obtained in system. (#1094) + + [SNAP-2438] currentschema should be considered for plan caching. (#1096) + + [ENT-29] Make sure that snapshotTxIdForRead is reset properly. (#1095) + + [SNAP-2421] Fixed concurrent query performance issue by fixing incorrect output partition choice. + Due to numBucket check, all the partition pruned queries were converted to hash partition with + one partition. This was causing an exchange node to be introduced. (#1087) + + [SNAP-2434] Fixed syntax error when projection mentioned as schema.tablename.* (#1092) + + [SNAP-2351] Added support for PreparedStatement.getMetadata() JDBC API. (#1038) + + [SNAP-2422] HTML code changes for displaying error message if loading Google charts library fails. + + [SNAP-2144] Display only total CPU cores count and remove cores count break up. (#1083) + + [SNAP-2306] Added StreamingQuery.stop on job server session.stop() method call. This will be + called from JobManagerActor's JobKill. (#1078) + + [SNAP-2128] Support Store specific DDLs in SnappyDDLParser. (#1003) + + (984c8ce) Added check for boot time manager. If boot time UMM cannot find any memory, it is best + to return from that point. This means the system itself is configured with very low memory, which + is not capable of handling overhead for keys & region structures. (#1075) + + [SNAP-2398] Smart Connector side changes for deploy jars/packages functionality. (#1071) + + [SNAP-2144] Adding CPU cores details in the ClusterDetails and MemberDetails. (#1068) + + [SNAP-2071] Fixed SnappyData UI becoming unresponsive on LowMemoryException. (#1067) + + [SNAP-2390] Fixed NullPointerException at updateMemberStatistics when cluster was getting + restarted. (#1070) + + [SNAP-2387] Fix ParamLiteral handling for common sub-expressions. (#1059) + + (c674c94) Added Spark SQL test suites to SnappyData. (#994) + + (b3c0e56) Changes to impose limit on results fetched from External relation (GemFire) for + "select *" query. (#1051) + + [SNAP-2332] Fixed exception in querying caused by tokenization of constants in aggregate functions + by removing param literals from the aggregate functions in prepare phase for prepared statements. + (#1041) + + [SNAP-2347] Fix for row tables getting dropped. (#1058) + + [SNAP-2382] Fix for COLUMN table mysteriously shown as ROW table on dashboard after LME in data + serever. (#1064) + + [SNAP-2341] Table shown in dashboard even after 'CREATE TABLE ...' is killed. (#1052) + + [SNAP-2388] Change to let 'lead' restart with warnings if deployed jars/coordinates not present + during restart. (#1060) + + [SNAP-2186] Fixed off-heap size for Partitioned Regions. (#1053) + + [SNAP-2363] Fixed failure when query on view does not fallback to Spark plan in case Code + Generation fails. (#1042) + + [SNAP-2348] Fix invalid decompress call on stats row. (#1044) + + [GITHUB-982] Fixed negative bucket size with eviction. (#1048) + + [SNAP-1334] Added Auto Refresh feature for Dashboard UI and other enhancements. (#1005) + + [SNAP-2356] Release compressed buffers that are skipped. (#1040) + + [SNAP-2308] The fix involves allowing query with decimal numbers suffixed by 'BD' to be parsed + correctly and recognized as numeric literals. (#1008) + + (5fbb47a) Taking credentials from globalSparkContext to shutdown the store. (#1024) + + [SNAP-2339] Do not cache UnsafeProjection instance. (#1034) + + [ENT-27] Changes to push down filter predicate to Scan Level for GemFireRelation and reverting + ParamLiterals to TokenLiteral (hence Literal) in case of LogicalRelation other than Column Tables + Scan. (#1020) + + [SNAP-2338] Added task cancellation checks at the start of new batch in ColumnTableScan. (#1033) + + [SNAP-2312] Handled int overflow case. For a large number of distinct keys ObjectHashSet might + have entries near to 1 << 30. Multiplying that to 8 causes int overflow to a negative number. + + [SNAP-2329] Restarting zeppelin interpreter when a deploy happens if the lead hosts a zeppelin + interpreter server. Added deploying of jar files and made them persistent too. (#1029) + + [SNAP-2297] Support deployment of packages as a DDL command. (#1021) + + [SNAP-2321] Moved the caching of join dataset after the count operation, as an action will clear + the session context object. (#1025) + + [SNAP-1529] Added support for reading maven dependencies using --packages option in our job server + scripts (snappy-job.sh). (#1004) + + [SNAP-2296] Make sure that connection is closed when task completes so that transaction is also + committed. (#1018) + + [SNAP-2241] Handled catalog database creation if it's not present during function creation. (#998) + + [SNAP-2080] Allow creation of index on VARCHAR column added through ALTER TABLE. (#996) + + [SNAP-2255] Closing the connection so as to return the pooled connection back to pool. (#995) + + (ce530a9) Eagerly clear shuffle data after a bulk insert/update/put. + + [SNAP-1932] Cleaning up tokenization handling and fixes. Main change is addition of two separate + classes for tokenization, a) ParamLiteral and b) TokenLiteral. Both classes extend a common trait + TokenizedLiteral. Basic idea being that tokenization will always happen (unless explicitly turned + off) independently of plan caching. (#989) + + [SNAP-2244] Stats for delta column batches (#980) + + [SNAP-2243][SNAP-2188] Procedure for smart connector iteration and fixes. Includes fixes for perf + issues as noted for all iterators (disk iterator, smart connector and remote iterator). (#979) + + [SNAP-2225] Fixed different results of nearly identical queries, due to join order. (#971) + + [SNAP-2220][SNAP-2157] Corrected row count updated/inserted in a column table via putInto. (#974) + + [SNAP-1931][SNAP-1932][SNAP-1906] Fixed the issue of incomplete plan info in UI due to plan + caching changes. (#973) + + (adf4664) Miscellaneous fixes and performance improvements. + + (794d03f) Miscellaneous fixes, added bucket count column to dashboard. (#969) + + (7069522) Corrected the logic of existence join, which was looking for null values earlier. (#966) + + [SNAP-2217] Fixed thread-unsafe paths in stats service and other cleanups. (#965) + + [SNAP-2215] Split out argument value for -log-file specified in conf/locators. (#964) + + [AQP-292] Evaluate grouping keys explicitly if aggregates are using them. (#963) + + +Release 1.0.1 + +- New Features/Fixed Issues + + [SNAP-2214][SNAP-2036] Fixed OOME after restart with heap, projection pushdown. (#960) + + Fixed putInto inner join cache perf and related issues. (#958) + + [SNAP-2212] Fixed failure in TPCH Q21, by re-evaluating check condition for all joins. (#959) + + [SNAP-2205] Fixed scala.MatchError in SnappyEmbeddedTableStatsProviderService on cluster restart. + + [SNAP-2175] Handle no GemFireCache in smart connector mode. (#956) + + Explicit Action for put innerjoin cache. Materialized cache for intermediate inner join in a put + operation. (#955) + + [SNAP-2204] Search through aliases (e.g. for VIEWs) for colocated join keys. (#954) + + [SNAP-2200] Fixed ClassCastException when reading from overflowed update deltas. + + [SNAP-2178] Increase the time to wait for servers to join. + + [SNAP-2191] Disable zeppelin interpreter from within lead process when security is enabled. (#946) + + [SNAP-2194] Add partition pruning for column tables to smart connector. (#952) + + [SNAP-2180] Fixed snappy pulse UI showing zero memory usage on data server, on active lead node + restart by explicitly initializing memoryMap on UMM start. (#951) + + [SNAP-2192] Delay rollover in column updates to pre-commit. (#950) + + [SNAP-2124] Fixed rows missing in update due to incorrect stats row read. (#945) + + [SNAP-1283] LATERAL VIEW support in SnappyParser. (#944) + + [SNAP-1840] Fixed TPCH Q22 in Smart Connector mode due to NPE in CollectAggregateExec. + + (5955ce7) Fixed some snappy-spark failures and miscellaneous changes. + + [SNAP-2042] Added GRANT/REVOKE support from SnappySession. + + (41ed1ca) Use power of 2 for number of buckets in tests/docs. + + [SNAP-2178] Wait for servers to join in LeadImpl start and start stats service only after some + servers have joined. Likewise for creating the global SnappyContext. + + [SNAP-2170] Reduced the scope of global lock in SnappyContext.stopSnappyContext to fix deadlock + in lead shutdown. + + [SNAP-2088] Fixes for queries with filters on columns with null values. (#937) + + Parser performance improvements to recover the regression over 0.9 release. Also, optimized and + enhanced numeric/decimal literal handling. (#936) + + [SNAP-2086] Snappy Pulse displays list of external tables. + + (1d5cfd4) Some enhancements towards snappydata security. (#930) + + [SNAP-2102] Added memory+disk optimized column batch iterators. (#933) + + [SNAP-2118] Allow reading previous variable length value again. (#929) + + [SNAP-1501] Set overflow-to-disk as the only evict-action for tables. (#924) + By default, column and row tables will have heap-based eviction enabled with overflow-to-disk as + evict action. Allow OVERFLOW=false to disable eviction if EVICTION_BY is absent in DDL. + + [SNAP-2114] Plan caching is now attempted only for snappy tables. (#922) + + [SNAP-2125] Added setter commands to disable plan cachin on current session and on all sessions. + + [SNAP-2093] Support ColumnTable PutInto & DeleteFrom API. (#906) + + [SNAP-2141] Fix updates on complex types. (#925) + + [SNAP-2146] Avoid prefixing zeppelin properties with "snappydata.store". + + (b139935) Refactored ByteBufferHashMap into a generic base class. + + (1c9e661) Instead of an explicit property to acquire read or write locks (which is supposed to be + set by scripts), if some other server has already initialized the hive metastore, then + automatically drop to read-lock to avoid servers unnecessarily blocking each other. + + [SNAP-338] Improvements in cluster startup time. + * Rreduce discovery/join timeouts for first locator. + * A faster launcher that avoids loading any other classes (other than gemfire-shared and JNA) + * Jobserver startup (and thus the global SnappySession initialization) in background + * Initialize the hive catalog in background. (#911) + * Updated SnappyData type registration. (#910) + + (fd33f31) Avoid infinite retries in Utils.mapExecutors. In case an executor goes away then + retries in Utils.mapExecutors can get stuck in infinite retry loop so break it after a few + attempts. Changed PooledKryoSerializer to use direct buffers for Output. + + (0b34233) Fixing a couple of issues seen in ODBC testing. + + [SNAP-2127] Use separate delta disk-stores for row buffer regions. (#918) + + [SNAP-2084] Handled dropStorageMemoryForObject in DefaultMemoryManager. (#892) + + [SNAP-2121] Mark delta regions to use the delta diskstore. (#916) + + [SNAP-2122] Use a canonical representation of DistributedMembers in query routing comparisons. + + [SNAP-2120] Use "spark.sql.codegen.cacheSize" for Snappy caches. (#915) + + [SNAP-338] Changes related to locator startup time improvements. (#909) + + [SNAP-1743] Compress column batches when storing to disk or sending over network. (#905) + Changes to ColumnFormatValue serialization/deserialization to deal with compression transparently + when storing to disk or sending over network. + + [SNAP-2063] Thrift servers were getting started in rowstore mode instead of DRDA server. (#907) + + [SNAP-2116] Auto-configuration for AWS and local clusters. (#908) + + [GITHUB-900] Fix case-sensitivity of columns in CREATE INDEX. (#904) + + Fix the case of remote pull from smart connector. (#896) + + [SNAP-2101] Smart connector performance fixes and related issues. (#895) + With above changes (+ the store ones), the performance for smart connector mode in + ColumnCacheBenchmark has improved by more than 2X and now within expected range: from 12-13ns per + row to 5-6ns per row. It is now 3-4X faster than Spark caching and 2-3X faster than direct Parquet + scan having compression=none and entirely in OS buffers. + + (0b09eea) Fixing failure in QueryRoutingDUnitSecurityTest; dropTable should always throw back + SQLExceptions and not proceed with unresolved relation. + + [SNAP-2072][SNAP-2073] Fix external connectors and support VIEWs. (#887) + This commit fixes primarily two issues: + 1. External connectors not working in smart connector mode since the required libraries may not + be available in the embedded cluster. This happens because the BaseRelation is attempted to be + resolved in both "CREATE EXTERNAL TABLE" and "DROP TABLE". Now resolve all required information + (schema, inbuilt or not) at the driver connector JVM and send that in the procedure calls for + external providers. + 2. Support for VIEW, VIEW...USING (temporary, global and persistent) in the parser. + + (916cea3) Fix UDT reads/writes for row buffer. Use the "inner" sqlType for UDTs in schema mapping. + Same in the CodeGeneration row buffer/table fragments for PreparedStatement set or read. + Read underlying data as byte array directly if incoming type is SerializedRow/Map/Array. + Added efficient serialization for SerializedMap (like already done so for SerializedRow/Array). + + [SNAP-2077] Modified the parser to understand FETCH FIRST syntax also. FETCH NEXT will be taken + with OFFSET support if required. (#876) + + (d1987c7) Removed unused ExternalEmbeddedMode and "snappydata.embedded" property. + + [SNAP-2044] Integrate Snappy python tests with precheckin (#879) + + [SNAP-1986] Use a global lock throughout hive client initialization which ensures no two hive + client initializations end up trying to create the hive directory. (#878) + + [SNAP-2068] Added ThreadFactory to SnappyExecutor to cleanup thread artifacts on close with + ConnectionTable.releaseThreadsSockets() as done by other pool threads. (#872) + + [SNAP-1960] Fix the RUNNING status being set prematurely by removing the override of running in + LeadImpl which is no longer required. (#871) + + [SNAP-2056] Use Spark JacksonGenerator with separate JSON generators per column to convert type + to JSON format. (#866) + +Release 1.0.0 + +- New Features/Fixed Issues + + [SNAP-953] Add RPM/DEB installer packaging targets using the Netflix Nebula ospackage gradle + plugin. + + [SNAP-2039] Correct null updates to column tables. (#861) + + Use concurrent TrieMaps in SnappySession contextObjects, and queryHints map. Reason being that + SnappySession can be read concurrently by multiple threads from same query for sub-query/broadcast + kind of plans where planning for the BroadcastExchangeExec plan happens in parallel on another + thread. + + [SNAP-2029] Added new "snappydata.preferPrimaries" option to prefer primaries for queries. (#852) + Avoid double memory at the cost of reduced scalability but still having a hot backup. + See discussion on Slack: https://snappydata-public.slack.com/archives/C0DCF0UGG/p1505460492000378 + + Fixed a parser issue where AS can be optional in namedExpression rule. This fixes Q77 of TPCDS. + + [SNAP-2030] Now routed update and delete query on row table would return number of affected rows. + + [SNAP-2028] Snappy Python APIs fixes. (#851) + A) Some of the SparkSession python APIs used to pass SQLContext to DataFrameWriter and + DataFrameReader APIs. + B) Fixed truncate table API. + + Fixed a couple of issues in parser. (#849) + 1. Order by and sort by clauses after partition by can be optional. + 2. INTERVAL non reserved key word was being treated as an identifier because of optional clauses + ordering. + + [SNAP-2022] Remove the check which tested if any lead is already stopped, in snappy-stop-all.sh + (#845). This was causing the script to skip shutting down of other running leads, if any. Added + a check for rowstore, so that 'sbin/snappy-stop-all.sh rowstore' doesn't see the message. + + [SNAP-2020] Track in-progress insert size to avoid data skew. (#844) + With many concurrent inserts/partitions on a node, significant data skew in inserts was still + observed (on machines with large number of cores like 32) due to same smallest bucket being + chosen by multiple partitions. This change now tracks the in-progress size for bucket and adds + that to determine smallest bucket. + + [SNAP-2012] Skip locked entries in evictor. (#839) + Fix as suggested by @rishitesh to use Unsafe API to try acquire monitor on RegionEntry. + + Hiding commands not applicable to snappydata (will be continued to be displayed for GemFireXD and + RowStore mode). (#838) + + [SNAP-2003] Fix for 'stream to big table join CQ returning incorrect result'. (#829) + HashJoinExec's streamPlan and buildPlan RDDs are computed on each CQ execution. + + [AQP-293] Changes for JNI UTF8String.contains. (#832) + Convert UTF8Strings in ParamLiteral to off-heap when snappydata's off-heap is enabled. + Changes in SnappyParser. Also, updated parboiled2 to latest release. + + [SNAP-1995] Added a python example showcasing KMeans usage. (#827) + + Fix an issue in collect-debug-artifacts script with extraction. Skip any configuration checks in + collect-debug-artifacts for extraction (-x, --extract=). + + [SNAP-1993] Fixes for data skew when no partition_by is provided. (#825) + With these changes, distribution in ColumnCacheBenchmark test, for example, is nearly equal most + of the time among the buckets. Other cases like those reported originally with 7M rows have only + ~50% difference between min and max (as compared to ~4X originally) + + Remove ParamLiteral for LIKE/RLIKE/REGEXP. If expression foldable is false, then LIKE family + generates very suboptimal plan (if not converted to Contains/StartsWith/EndsWith) that will + compile the Regex for every row. + + [SNAP-1984] Changes to retain UnifiedMemoryManager state across executor restart by copying the + state in a temporary memory manager, which is created when store boots up but Spark environment is + not ready. (#821) + + [SNAP-1981] For prepare phase, avoid rules that do not handle NullType since that is what is used + as placeholder for params. (#815) + + [SNAP-1851] Properly closing the connection in case when connection commit fails. (#796) + + [SNAP-1976] Changes to set isolation level. (#813) + Allow operations on row and column tables if isolation level is set to other than NONE and + autocommit is true (query routing is enabled). If autocommit is false, query routing will be + disabled and transactions on row tables will be supported. Queries on column tables will error out + when query routing is disabled. + + [SNAP-1973][SNAP-1970] Avoid clearing hive meta-store system properties. (#816) + The hive meta-store system properties are required to be set for static initialization of Hive and + should not be cleared because a concurrent hive context initialization (from some other path) can + see inconsistencies like system property found but not available when actually read. + + [SNAP-1979] Added MemoryManagerStats for capturing different stats for UnifiedMemoryManager.(#814) + Smart Cconnector mode will not have these stats as GemFireXD cache will not be available. + + [SNAP-1982] Change batch UUID to be a long (#812) + Now using region.newUUID to generate the batch UUID. Use colocatedRegion of column table (the row + buffer) to generate the UUID since that is what smart connector and internal rollover uses. + + [SNAP-1611] Increased spark.memory.fraction from 92% to 97% (#808) + We want to give a little buffer to JVM before it reaches the critical hep size. + + Make SnappySession.contextObjects as transient to fix the serialization issues reported on + spark-shell when SnappySession gets captured in closures (e.g. import session.implicits._ with + toDF) + + [SNAP-1955] Fixes for issues seen in parallel test runs (#805) + + [SNAP-1660] Remove password from product logging. + + [SNAP_1948] Added an option to specify streaming batch interval during streaming job submission. + e.g. bin/snappy-job.sh submit --lead localhost:8090 --app-name appname --class appclass \ + --app-jar appjar --conf logFileName=demo.txt --stream --batch-interval 4000 + + [SNAP-1893] Changed locator status to RUNNING after stopped locator is restarted with + snappy-start-all.sh + + [SNAP-1877] GC issues with large dictionaries in decoding and other optimizations (#787) + 1. Performance issues with dictionary decoder when dictionary is large. 2. Data skew fixes. 3. + Using a consistent sort order so that generated code is identical across sessions for the same + query. 4. Reducing the size of generated code. + + Fix issues seen during concurrency testing (#782) + + [SNAP-1884] Fixed result mismatch in join between snappy table and temp table. + + Overridden two methods from Executor.scala. (#783) These methods have been added in Spark + executor to check store related errors. + + [SNAP-1917] Properly comparing datatype of complex schema. + + [SNAP-1919][SNAP-1916] Added isPartitioned flag to determine partitioned tables (#784) + + [SNAP-1904] Use same connection for rowbuffer and columnstore. + + [SNAP-1883] Parser change for range operator. + + Fixed: After new job classloader changes executors are not fetching driver files. (#777) + + [SNAP-1894] Codegen issue for query with case in predicate expression (#772) + + [SNAP-1888][SNAP-1886] Fixed parser error in two level nested subQuery, works with Spark (#774) + + [Snap 1833] Fixed the synchronization problem with sc.addJar() (#728) + + [SNAP-1377][SNAP-902] Proper handling of exceptions in case of Lead and Server HA (#758) + + [Snap 1871] Remove custom built-in jdbc provider and instead use spark's JDBC provider (#757) + + [SNAP-1882] Changes done for routing update and delete queries on column table to lead node. + Also handled prepared statement on update and delete queries for column table. + + [SNAP-1885] Fixed Semijoin returning incorrect result (#768) + + [SNAP-1787] - Handling Array[Decimal] in both embedded and split mode (#754) + + [SNAP-1892] .show() after table creation using CreateExternalTable api gives empty/null + entries, caused due to empty UserSpecifiedSchema instead of None (#764) + + [SNAP-1734] Query plan shows 0 number of output rows at the start of the plan. (#761) + Snappy's execution happens in two phases. First phase the plan is executed to create a rdd + which is then used to create a CachedDataFrame. In second phase, the CachedDataFrame is then + used for further actions. For accumulating the metrics for first phase, + SparkListenerSQLPlanExecutionStart is fired. This keeps the current executionID in + _executionIdToData but does not add it to the active executions. This ensures that query is not + shown in the UI but the new jobs that are run while the plan is being executed are tracked + against this executionID. In the second phase, when the query is actually executed, + SparkListenerSQLPlanExecutionStart adds the execution data to the active executions. + SparkListenerSQLPlanExecutionEnd is then sent with the accumulated time of both the phases. For + consuming SparkListenerSQLPlanExecutionStart, Snappy's SQLListener has been added. Overridden + withNewExecutionId in CachedDataFrame so that the above explained process also happens when the + dataset APIs are used. + + [SNAP-1878] Proper handling of path option while creation of external table using API (#760) + + [SNAP-1850] Remove connection used in JDBCSourceAsColumnarStore#getPartitionID v2 (#750) + + [SNAP-1389] Update and delete support for column tables (#747) + + [SNAP-1426] Fixed the Snappy Dashboard freezing issue when loading data sets (#732) + + Making background start of multi-node cluster as default + + [SNAP-1860] Close the connection if \commit/rollback is not done (#746) + Made changes to make sure to commit/rollback the snapshot tx in case of exception. e.g Security + related while trying to iterate over the region. + + [SNAP-1656] Security support in snappydata (#731) + Enable LDAP based authentication and authorization in Snappydata cluster. + + Support for snapshot transactional insert in column table (#718) + + [SNAP-1825][SNAP-1818] DDL routing changes (#742) + Fix for ALTER TABLE ADD column does not work in case of row table when the table is altered + after inserting data and CREATE ASYNCEVENTLISTENER doesn't work with lead node. + + Removing old 2.0.x backward compatibility classes. + + Fixes the "describe table" from Spark and shows the full schema. + + [SNAP-1268] Code changes to start SnappyTableStatsProviderService service only once. (#738) + + [SNAP-1838] skip plan cache clear if there is no SparkContext + + Fixes for issues found during concurrency testing (#730) + + [SNAP-1815] Disallow configuration of Hive metsatore using hive.metastore.uris property in + hive-site.xml (#714) + + [SNAP-1708] collect-debug-artifacts script won't need both way ssh now. (#723) + + [SNAP-1723] When foldable functions are there in the queries and literals are there in their + argument then identify case where Tokenization should be stopped. Added a bunch of such functions + with corresponding relevant argument numbers for that. (#706) + + [SNAP-1806] Changed the exception handling in SnappyConnector mode. (#719) + + Support for setting scheduler pools using the set command (#700) + + [SNAP-671] Added support for DSID to work for column tables (#716) + + Added a task context listener to explicitly remove the obtained memory. (#713) + + [SNAP-1326] SnappyParser changes to support ALTER TABLE ADD/DROP COLUMN DDLs (#711) + + [SNAP-1808] Create cachedbatch tables in user's schema instead of the earlier common schema + SNAPPYSYS_INTERNAL. Changes from Sumedh @sumwale (#712) + + [SNAP-1805] Fixed Query Execution statistics are not getting displayed in SQL graph, caused + because function to withNewExecutionId was executed before it was passed as argument (#703) + + [SNAP-1777] Increasing default member-timeout for SnappyData (#704) + + [SNAP-1610] Removing the code related to split cluster mode (that was disabled for users in 0.9 + release) (#696) + + [SNAP-1363] Performance degrades because of PoolExhaustedException when run from connector mode. + Increasing max connection pool size since there is an idle timeout in the pool implementations + (default: 120s), so cleanup of unused connections will happen in any case. + + [SNAP-1794] Modified code generation of DynamicFoldableExpression such that even the + initMutableState splits into multiple init() functions, code will be generated properly. (#699) + + Changes for Apache Spark 2.1.1 merge (#695) + + [SNAP-1451] set default startup-recovery-delay to 102s for Snappy tables to avoid interfering + with initial bucket creation. + + [SNAP-1722] Test to validate support for long, short, tinyint and byte datatypes for row tables + (#689) + + Spark 2.1 Merge (#501) + + Fixing NoSuchElementException "None get" in dropTable. Using the global SparkContext directly + instead of getting from active SparkSession (which may not exist) in hive meta-store listener. + + [SNAP-1688] CachedDataFrame memory allocation should be accounted with execution memory rather + than storage memory. + + [SNAP-1748] Fixed: Without persistence, data loading is unsuccessful with eviction on (#682) + + [SNAP-1721] Avoid code generation failure in WorkingWithObject.scala example (#685) + + Changes for SNAP-1678 Smart connector should emit info logs that indicate the cluster to which it is connecting (#676) + + [SNAP-1760] Correct null bitset expansion and reduce copying in inserts. (#678) Fixes + ArrayIndexOutOfBounds exception in queries with wide schema having nulls. + + Corrected the scaladoc examples in SnappySession. (#672) + + Allow for spaces at start of API parser calls + + [SNAP-1737] While passing value to GemFireXD, it should ve converted from catalyst type to scala + type.(#669) + + [SNAP-1735] use single batch count in stats row (#664) + + Renamed "-b" option to "-bg" to match convention used in other POSIX commands + + [SNAP-1725] Fix start and collect-debug scripts for Mac. + + [SNAP-1714] Correcting case-sensitivity handling for API calls (#657) + + [SNAP-1792] Snappy Monitoring UI now also displays Member Details View which shows member specific + information, various statistics (like Status, CPU, Memory, Heap & Off-Heap Usages, etc) and + members logs in incremental way. + + [Snap-1890] Snappy Monitoring UI displays new Pulse logo. Also product and it's build details are + shown under version in pop up window. + + [Snap-1813] Pulse (Snappy Monitoring UI) users need to provide valid user name and password if + SnappyData cluster is running in secure mode. + +Release 0.9 + +- New Features/Fixed Issues + + [Snap-1286] Thin Client Smart Connector implementation. + + [SNAP-1235] Overhaul SnappyUnifiedMemoryManager to work properly for overflow. + + [SNAP-1454] Support for Off-Heap in column store. + + [SNAP-1413] install_jar does not work for Streaming jobs. Handled classloader in case of + Streaming factory as well. + + [SNAP-1424] Add a "shouldStop()" call to EncoderScanExec. The "shouldStop()" check is necessary + because if the target is a RowWriter (e.g. the parent is an EXCHANGE) then the same row gets + reused. + + [SNAP-1304] Implementation of Snapshot Isolation in snappydata. + + [SNAP-990] Column wise storage in region for better perf instead of full cachedbatch. + + [SNAP-1346] Plan caching ignoring constant values. + + [SNAP-1323] Support parameterized prepared statements for routed queries. Changes for improved + execution of prepared statement on column table through JDBC route. + + JDBC CDC Streaming support. (https://github.com/SnappyDataInc/snappydata/pull/622) + + [SNAP-1655] Support for boolean in row table. + + [SNAP-1705] Support slash ('/') and special characters in column names. + + [SNAP-1698] Snappy Dashboard UI Enhancements + + Multi-grid master (https://github.com/SnappyDataInc/snappydata/pull/628) + + [SNAP-1545] Redesigned SnappyData Dashboard. Now displays detailed member description, heap and + off-heap usage along with snappy storage and execution splits. It also displays cluster level + aggregate Memory and CPU usage. + + [SNAP-1642] Avoid plan caching for queries with subqueries as the underlying changing data does + not reflect in subsequent query. + + [SNAP-1221] Unable to restart server nodes in the cluster due to + ConflictingPersistentDataException. + + [SNAP-1461] Scalar subquery is only allowed to return a single row, while executing subquery on + partitioned row table. This is fixed by routing any query with more than one table to lead node. + + [SNAP-1520] Switched to upstream Spark from snappy-spark-unsafe. Removed explicit + KryoSerializableSerializer registration for UnsafeRow and UTF8String in PooledKryoSerializer and + instead call just the .register() method which will determine the serializer to be used by + reflection. + + [SNAP-1615] If a column being aggregated has a NULL value while grouping on a string column, the + grouping row itself produces a new row with Null column. As a fix, check the actual value while + scanning row table for string column to decide whether its Null or not. + + [SNAP-1496] Wide table scan for column tables fails due to 64K limitation of JVM. As a fix, we + now chunk the different parts of the scan code if the number of columns exceeds 30. + + [SNAP-1384] Column Table Inserts can fail if generated code is big. Modified ColumnInsertExec to + handle wides schema to 1012 columns. + + [SNAP-892] SnappyData launch script picks localhost as the locator hostname and ignores + conf/locators, when invoked from non-locator host. + + [SNAP-1518] sbin/snappy-start-all/.sh does not start lead in a large cluster. As a fix, retry if + Hive metastore initialization fails due to datastore being no yet available on servers. + + [SNAP-1400] When a server/cluster is restarted, sometimes incorrect results are observed. Added a + check to get buckets from initialized members only. + + [SNAP-1344] As streaming jobs are recurring jobs, the earlier mechanism of removing dependent jar + files were broken. Now we maintain a list of jars in the context itself and remove the jars when + the streaming context is stopped. + + [SNAP-1494]: Dashboard shows an exception stack trace when a server goes down. Exception handled + and logged it into log file. + + [SNAP-1399] Updated column stats for complex type which was causing issue while inserting JSON + data to column table. + + [SNAP-1351] After a low memory exception is encountered, the snappy server does not remain stable. + Snappy threads cannot be interrupted. + + [SNAP-1481] SQL Tab on UI, Description column now displays actual SQL Query string executed + instead of handler description text. + + [SNAP-1442] Registering row table in catalog after its creation. + + [SNAP-1210] Fix NullPointerException caused when writing dataframe containing timestamp column to + csv files by registering FastDatePrinter with KryoSerializer. + + [SNAP-1420] Removed the property "config.trace"->"substitutions" which is generating unneccessary + logs. + + [SNAP-1435] Added support for off-heap in SnappyMemoryManager. + + [GITHUB-534][SNAP-1480] Code generation failure for nested GROUP BY. Match against variable name + for dictionary optimization. + + [SNAP-1482] Tableau generated query fails with NumberFormatException. Fix parsing of full + engineering format double values. + + [SNAP-303] Handle non-store hive tables in meta-data queries. + + [SNAP-1459] StackOverflowError running query on Airline (narrow table) with small data set. As a + fix, registering the classes with a multimap parameter which differentiates between the hashjoin + and hashaggregate. + + [SNAP-1361] Added support for schema name in udf while querying e.g. select app.udfname(col_name) + from table. + + [SNAP-1414] ArrayIndexOutOfBoundsException when creating sample table out of large dataset. As a + fix, passing a reasonable initial size for the encoder term. + + Pruning partitions for predicates based on partitioning columns. + (https://github.com/SnappyDataInc/snappydata/pull/543) + + [SNAP-1441] Limit query on column table gives less number of rows (JDBC). + + [SNAP-1395] ElasticSearch connector gives NullPointerException when used with SnappyData. + + Enhancements in SDE + + - For an external table, the LogicalRelation was not storing the table identifier, thus + sample table replacement was not happening. Now passing table identifier in the logical relation. + + +Release 0.8 + +- Known Issues + + [SNAP-1384] Inserting into or querying a table with wide schema may fail with + StackOverflowException due to a limitation of JVM. + +- New Features/Fixed Issues + + [SNAP-1357] ODBC Driver and Installer. You can now connect to the SnappyData cluster using the + SnappyData ODBC driver and execute SQL queries. + + [SNAP-1313] Multiple Language Binding using Thrift Protocol. SnappyData now provides support for + Apache Thrift protocol enabling users to access the cluster from other languages that are not + supported directly by SnappyData. + + [SNAP-490] Insert Performance Optimizations - Insert into tables is much more optimized and + performant now. A new insert plan has been introduced which uses code generation + and a new encoding format. + + Fixes backward compatibility with Spark 2.0.0 - The 0.8 SnappyData release is based on the Spark + 2.0.2 version. And, the SnappyData Smart Connector is now backward compatible with + Spark 2.0.0 and 2.0.1 releases. + + [SNAP-1146] Fixes RowBuffer bloating. Data was not being aged into the internal compressed + columnar format leading to unoptimized storage and hence bad query performances. + + [SNAP-1308] Incorrect number of entries displayed on UI if insert was being done from a Spark app + using Smart Connector. + + [SNAP-1293] The driver process of external Spark app using Smart connector were incorrectly being + displayed as members of SnappyData cluster. + + [SNAP-1282] The Spark web UI stopped working with SnappyData 0.7. Fixed. + + [SNAP-1243] UI incorrectly displaying multiple entries for the same lead node after restarts. + + [SNAP-1296] ResultsSet obtained from PreparedStatement.executeQuery returned 0 column count + from its metadata. However SNAP-1311 needs to be fixed for complete support of Prepared + Statement jdbc api. Being worked on. + + [SNAP-1291] Queries issued with execution-engine=store hint were getting ignored in some cases + + [SNAP-1287] Query execution using indexes on row tables was sometimes throwing ClasscastException + + [SNAP-1269] Create tables using schema of other table but with different column names were using + column names of the source tables itself. + + [SNAP-1134] A job throwing exception remained in hung state instead of reporting failure + + [SNAP-982] Support for persistent UDFs added. Even after restart the UDF can be used now + + Enhancements in SDE + - Sample selection logic enhanced. It can now select best suited sample table even if SQL + functions are used on QCS columns while creating sample tables. + + - Poisson multiplicity generator logic for bootstrap is improved. Error estimated using + bootstrap are now more accurate. + + - Improved performance of closed-form and bootstrap error estimations. + + +Release 0.7 + + [SNAP-1260] Miscellaneous plan optimizations. + + [SNAP-1251][SNAP-1252] Avoid exchange when join columns are superset of partitioning. + + [SNAP-1112] Query hints for executionEngine doesn't work correctly. + + [SNAP-1240] SnappyData monitoring dashboard. + + [SNAP-1234] Always skip broadcast join for cases of collocated PR joins. + + [SNAP-1229] Fixed Snappy Python APIs broken after Spark 2.0 merge. + + [SNAP-1219] Unable to drop persistent column table when a server node is killed abruptly. + + [SNAP-1225] Performance improvements for hash joins (and other fixes). + + [SNAP-1218] Enable RDD-bucket de-linking for single table and replicated table joins. + + [SNAP-1217] Introduce Enable Experimental Feature property. + + [SNAP-1213] Using esoteric ExternalizableSerializable as default serializer for Externalizable + rather than FieldSerialzable. + + [AQP-259] Fixing the issue where the size of the Map was not being assigned early enough, + resulting in flush increasing the reservoir size in an unbounded manner. + + [SNAP-1209] Updated LocalJoin to cover colocated join cases as well. + + [SNAP-1205] Avoid exchange when the table is partitioned with the join key. + + [SNAP-1193] Optimized Collect aggregate plan to avoid last step exchange. + + [SNAP-1191] Basic plan caching (without constant tokenization). Add plan caching and reuse of + SparkPlan, RDD and PlanInfo. + + [SNAP-1194] Optimization for single dictionary column GROUP BY and JOIN + + [SNAP-1136] Pooled version of Kryo serializer which works for closures. + New PooledKryoSerializer that does pooling of Kryo objects (else performance is bad if new + instance is created for every call which needs to register and walk tons of classes) + + [SNAP-1067] Optimized GROUP BY (HashAggregateExec) and HASH JOIN. Optimized hash table + for GROUP BY (HashAggregateExec) and for LocalJoin. + + [SNAP-1087] Maintain stats (which include lower bound, upper bound, null count, etc.) + for every column. And then uses the upper bound and lower bound values of columns to + filter out the cached batches. This will be a perf enhancement for the queries which + filters extensively. + + [SNAP-1084] Cache and return CatalogTable instead of going to hive + + [SNAP-1182] Added map/flatMap/filter/glom/mapPartition/transform APIs to SchemaDStream + + [SNAP-1180] Use ConfigEntry mechanism for SnappyData properties. Added SQLConfigEntry and + convenience methods. + + [SNAP-999] Changes to remove the install jar and instead use SparkJobServer only. + + Apache Spark 2.0.2 merge. + + [SNAP-730] Add a rule to replace column tables with indexes when the join column is indexed. + + Removing the kafka-0.10 dependencies and shipping only kafka-0.8 + + [SNAP-1075] Added a service to publish store table size that is used for query plan generation. + These stats are also published on SnappyData Dashboard. + + [SNAP-1060] [SNAP-1141] [SNAP-1115] Fixes for Streaming related issues after Apache Spark 2.0 + merge. + + [SNAP-1152] Fixing NPE in aggregation. Handling null entry in ObjectHashMapAccessor during code + generation. + + Avoid pooling of stream Input and Output objects in PooledKryoSerializer to try and fix occasional + failures in TPCHDUnitTest. + + [SNAP-1172] Changes to render StringType as VARCHAR for tables created via API. + + [SNAP-1185] Changing all internal.Logging references to public one. + + [SNAP-1188] Set batch uuid to previous record if the current batchuuid is null. + + [SNAP-69] Fix SparkJobServer rootDir to point to current working directory instead of /tmp. + Redirecting rootdir from /tmp to "-dir" startup parameter via gemfirexd.system.home variable + set in the launcher. + + Fixing failure for optimized=T case in TPCETrade + + [SNAP-1147] Properly handle dropping of collocated table. + + [SNAP-1087] Removing StatsPredicateCompiler and closure; instead generate embedded predicate code + in a new function in the same context as for ColumnTableScan code. + + [SNAP-977] Allow user to specify configuration on command-line while submitting a job. + + [SNAP-1021] Added an external catalog to SnappyCatalog to ensure the Catalog API of Spark will + also work fine. This makes Snappy catalog cleaner and removes redundant code. + + [SNAP-1096] Add Lead attribute in Member MBean. + + [SNAP-1066] Modified existing tests to inferschema instead of using string and proper use of + nullValue. + + Fixed readLongDecimal for ColumnEncoding adapters. + + [SNAP-1199] Making external table visible with SnappyData Connector. + + [SNAP-1083] Fixing multiple issues in RDD de-linking. + + [SNAP-1190] Reduce per-partition task overhead. + + +Release 0.6 + + [SNAP-735] Supporting VARCHAR with size and processing STRING as VARCHAR(32762), by default. + Provided query hint (--+ columnsAsClob(*)) to force processing STRING as CLOB. Changes to render + CHAR as CHAR and VARCHAR as VARCHAR. Added a system property to stop treating STRING as max size + VARCHAR but as CLOB. + + [SNAP-1049] IllegalArgumentException: requirement failed: partitions(1).partition == 5, but it + should equal 1 + + [SNAP-1050] Query execution from JDBC waits infinitely for external table if column name in query + is wrong + + [SNAP-1036] Optimize access to row store using raw region iterators + + [SNAP-1000] Perf improvement for localjoin through code generation + + [SNAP-1034] Optimized generated code iteration for Column tables + + [SNAP-1047] Fix column table row count in UI + + [SNAP-1044] Support for describe table and show table using snappycontext + + [SNAP-846] Ensuring that Spark Uncaught exceptions are handled in the Snappy side and do not cause + a system.exit + + [SNAP-1025] Stream tables return duplicate rows + + [SNAP-959] create table as select not working as expected if row table is source table + + [SNAP-845] Atomicity of DDLs across catalogs + + [SNAP-981] Support Snappy with multiple Hadoop version + + [SNAP-979] Correct table size and count shown on the Snappy UI tab + + [SNAP-936] Automatic selection of execution engine based on query type. Query hint also provided + to select a particular engine for execution + + [SNAP-653] Cleanup relation artifacts when it is altered/dropped/... from external cluster + + [SNAP-654] If the Lead is running and an application runs a program that points to the Snappy + cluster as the Master, then, the client program perpetually hangs. + + [SNAP-174] No ssh required for starting cluster through scripts if only localhost is being used + + [SNAP-910] DELETE / UPDATE FROM COLUMN TABLE throws proper exception now + + [SNAP-293] Single install/replace jar utility. User can install a jar using install jar utility + and it will be available to all executors, store and driver node the jar uploaded via the job + server also follows the same norm. + + [SNAP-824] Support for CUBE/ROLLUP/GROUPING SETS through sql. Support for window clauses and + partition/distribute by + + SPARK 2.0 merge + + [SNAP-861] Zeppelin interpreter for SnappyData + + [SNAP-947] Unable to restart cluster with 0.5 version with columnar persistent tables + + [SNAP-961] Fix passing of some DDL extension clauses like OFFHEAP PERSISTENT etc. + + [SNAP-734] Support for EXISTS from sql + + [SNAP-835] Drop table from default schema with fully qualified name throws "Table Not Found" Error + + [SNAP-784] Fully qualified table name access fails with "Table Not Found" Error + + [SNAP-864] Script to launch SnappyData cluster on Amazon Web Services EC2 instances. + + [AQP-77] exception " STRATIFIED_SAMPLER_WEIGHTAGE#411L missing + + [AQP-94] Class cast exception if aggregate is on string column + + [AQP-107] scala.MatchError,while using reserved word sample_ in the query + + [AQP-143] Unexpected error for query on empty table + + [AQP-154] Actual sample count varies with varying number of columns in QCS. + + [AQP-177] Unable to drop the sample table + + [AQP-190] Relative Error estimates are wildly OFF + + [AQP-199] Use of alias in FROM clause results in Sample not being selected + + [AQP-203] COUNT(DISTINCT) queries 'with error' clause fails with No plan for ErrorDefaults + + [AQP-204] Inconsistent results ,each time the same bootStrap query is executed multiple times. + + [AQP-205] Bug in abortSegment implementation of stratum cache/ concurrent segment hashmap causes + count to be inocrrect + + [AQP-206] Exception while using error_functions in HAVING clause + + [AQP-207] Join query fails with error while evaluating an expression + + [AQP-210] Mathematical expression involving error estimates not working + + [AQP-212] HAC behavior 'local_omit' doesnot work as expected. + + [AQP-213] Exception when using errorFuntion in HAVING clause with HAC behavior 'run_on_full_table' + and 'partial_run_on_base_table' + + [AQP-214] Need support for functions in sample creation + + [AQP-216] Cannot use float datatype for sample creted on row table + + +Release 0.5 + + Rowstore quickstarts are now packaged into the SnappyData distribution. + + [AQP] Optimizations of bootstrap for sort based aggregate. + + [AQP] Minimize the query plan size for bootstrap. + + [AQP] Optimized the Declarative aggregate function. + + [SNAP-858] Added documentation for Python APIs. + + [SNAP-852] Added new fields on the Snappy Store tab in Spark UI. + + [SNAP-730] Added index creation and colocated joins diff --git a/build.gradle b/build.gradle index 1342626c0f..5caf4b1c4e 100644 --- a/build.gradle +++ b/build.gradle @@ -1,87 +1,235 @@ -apply plugin: 'wrapper' +/* + * Copyright (c) 2017-2019 TIBCO Software Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ -if(JavaVersion.current() != JavaVersion.VERSION_1_7){ - throw new GradleException("==== This build must be run with java 7 ====") -} +import org.gradle.api.tasks.testing.logging.* +import org.gradle.internal.logging.* buildscript { repositories { - maven { url "https://plugins.gradle.org/m2" } - jcenter() + maven { url 'https://plugins.gradle.org/m2' } + mavenCentral() } dependencies { - classpath "com.github.maiflai:gradle-scalatest:0.10" - classpath "org.github.ngbinh.scalastyle:gradle-scalastyle-plugin_2.10:0.7.2" - classpath 'com.github.jengelman.gradle.plugins:shadow:1.2.2' + classpath 'io.snappydata:gradle-scalatest:0.23' + classpath 'org.github.ngbinh.scalastyle:gradle-scalastyle-plugin_2.11:0.9.0' + classpath 'com.github.jengelman.gradle.plugins:shadow:4.0.3' + classpath 'de.undercouch:gradle-download-task:3.4.3' + classpath 'net.rdrei.android.buildtimetracker:gradle-plugin:0.11.+' + classpath 'com.netflix.nebula:gradle-ospackage-plugin:5.2.+' } } +apply plugin: 'wrapper' +apply plugin: 'distribution' +apply plugin: 'nebula.ospackage-base' +apply plugin: "nebula.ospackage" + +def isEnterpriseProduct = rootProject.hasProperty('snappydata.enterprise') + allprojects { - // We want to see all test results. This is equivalatent to setting --continue + // We want to see all test results. This is equivalent to setting --continue // on the command line. gradle.startParameter.continueOnFailure = true + tasks.withType(Javadoc) { + options.addStringOption('Xdoclint:none', '-quiet') + /* + if (javax.tools.ToolProvider.getSystemDocumentationTool().isSupportedOption("--allow-script-in-comments") == 0) { + options.addBooleanOption("-allow-script-in-comments", true) + } + */ + } + repositories { - maven { url "file://" + rootDir.getAbsolutePath() + "/local-repo" } - mavenLocal() - //maven { url "http://dl.bintray.com/spark-jobserver/maven" } - jcenter() - maven { url "https://repository.apache.org/content/repositories/releases" } - maven { url "https://repository.jboss.org/nexus/content/repositories/releases" } - maven { url "https://repo.eclipse.org/content/repositories/paho-releases" } - maven { url "https://repository.cloudera.com/artifactory/cloudera-repos" } - maven { url "https://oss.sonatype.org/content/repositories/orgspark-project-1113" } - maven { url "http://repository.mapr.com/maven" } + mavenCentral() + maven { url 'https://dl.bintray.com/big-data/maven' } maven { url "https://repo.spring.io/libs-release" } - maven { url "http://maven.twttr.com" } - maven { url "http://repository.apache.org/snapshots" } + maven { url "https://oss.sonatype.org/content/repositories/snapshots" } + // maven { url 'http://repository.snappydata.io/repository/internal' } + // maven { url 'http://repository.snappydata.io/repository/snapshots' } + maven { url 'https://app.camunda.com/nexus/content/repositories/public' } } apply plugin: 'java' apply plugin: 'maven' apply plugin: 'scalaStyle' + apply plugin: 'com.github.johnrengelman.shadow' apply plugin: 'idea' - apply plugin: 'eclipse' + apply plugin: "build-time-tracker" group = 'io.snappydata' - version = '0.1.0-SNAPSHOT' + version = '1.1.0-HF-1' // apply compiler options - sourceCompatibility = 1.7 - targetCompatibility = 1.7 + tasks.withType(JavaCompile) { + options.encoding = 'UTF-8' + options.incremental = true + options.compilerArgs << '-Xlint:-serial,-path,-deprecation,-unchecked,-rawtypes' + options.compilerArgs << '-XDignore.symbol.file' + options.fork = true + options.forkOptions.javaHome = file(System.properties['java.home']) + options.forkOptions.jvmArgs = [ '-J-Xmx2g', '-J-Xms2g', '-J-XX:ReservedCodeCacheSize=512m', '-J-Djava.net.preferIPv4Stack=true' ] + } + tasks.withType(ScalaCompile) { + options.fork = true + options.forkOptions.jvmArgs = [ '-Xmx2g', '-Xms2g', '-XX:ReservedCodeCacheSize=512m', '-Djava.net.preferIPv4Stack=true' ] + // scalaCompileOptions.optimize = true + // scalaCompileOptions.useAnt = false + scalaCompileOptions.deprecation = false + scalaCompileOptions.additionalParameters = [ '-feature' ] + options.encoding = 'UTF-8' + } + + jar.duplicatesStrategy = DuplicatesStrategy.EXCLUDE - compileJava.options.encoding = 'UTF-8' - compileJava.options.compilerArgs << '-Xlint:all,-serial,-path' javadoc.options.charSet = 'UTF-8' ext { - scalaBinaryVersion = '2.10' - scalaVersion = scalaBinaryVersion + '.6' - sparkVersion = '1.5.0-SNAPSHOT.1' + if (isEnterpriseProduct) { + productName = 'TIBCO ComputeDB' + } else { + productName = 'SnappyData' + } + vendorName = 'TIBCO Software Inc.' + scalaBinaryVersion = '2.11' + scalaVersion = scalaBinaryVersion + '.8' + sparkVersion = '2.1.1' + snappySparkVersion = '2.1.1.6' + sparkDistName = "spark-${sparkVersion}-bin-hadoop2.7" + sparkCurrentVersion = '2.3.2' + sparkCurrentDistName = "spark-${sparkCurrentVersion}-bin-hadoop2.7" log4jVersion = '1.2.17' - slf4jVersion = '1.7.12' - junitVersion = '4.11' - hadoopVersion = '2.4.1' - gemfireXDVersion = '2.0-Beta' + slf4jVersion = '1.7.25' + junitVersion = '4.12' + mockitoVersion = '1.10.19' + hadoopVersion = '2.7.7' + scalatestVersion = '2.2.6' + jettyVersion = '9.2.26.v20180806' + guavaVersion = '14.0.1' + kryoVersion = '4.0.1' + thriftVersion = '0.9.3' + metricsVersion = '4.0.3' + metrics2Version = '2.2.0' + janinoVersion = '3.0.8' + derbyVersion = '10.14.2.0' + parboiledVersion = '2.1.5' + tomcatJdbcVersion = '8.5.37' + hikariCPVersion = '2.7.9' + twitter4jVersion = '4.0.7' + objenesisVersion = '3.0.1' + rabbitMqVersion = '4.9.1' + akkaVersion = '2.3.16' + sprayVersion = '1.3.4' + sprayJsonVersion = '1.3.5' + sprayShapelessVersion = '1.3.3' + sprayTestkitVersion = '1.3.4' + jodaVersion = '2.1.2' + jodaTimeVersion = '2.10.1' + slickVersion = '2.1.0' + h2Version = '1.3.176' + commonsIoVersion = '2.6' + commonsPoolVersion = '1.6' + dbcpVersion = '1.4' + shiroVersion = '1.2.6' + flywayVersion = '3.2.1' + typesafeConfigVersion = '1.3.3' + mssqlVersion = '7.0.0.jre8' + antlr2Version = '2.7.7' + + pegdownVersion = '1.6.0' + snappyStoreVersion = '1.6.3' + snappydataVersion = version + pulseVersion = '1.5.1' + zeppelinInterpreterVersion = '0.7.3.5' + buildFlags = '' + createdBy = System.getProperty('user.name') + osArch = System.getProperty('os.arch') + osName = org.gradle.internal.os.OperatingSystem.current() + osFamilyName = osName.getFamilyName().replace(' ', '').toLowerCase() + osVersion = System.getProperty('os.version') + buildDate = new Date().format('yyyy-MM-dd HH:mm:ss Z') + buildNumber = new Date().format('MMddyy') + jdkVersion = System.getProperty('java.version') + sparkJobServerVersion = '0.6.2.9' + eclipseCollectionsVersion = '9.2.0' + fastutilVersion = '8.2.2' + + gitCmd = "git --git-dir=${rootDir}/.git --work-tree=${rootDir}" + gitBranch = "${gitCmd} rev-parse --abbrev-ref HEAD".execute().text.trim() + commitId = "${gitCmd} rev-parse HEAD".execute().text.trim() + sourceDate = "${gitCmd} log -n 1 --format=%ai".execute().text.trim() + buildIdPrefix = System.env.USER + ' ' + + sparkDistDir = "${project.gradle.gradleUserHomeDir}/sparkDist" + sparkProductDir = "${sparkDistDir}/${sparkDistName}" + sparkCurrentProductDir = "${sparkDistDir}/${sparkCurrentDistName}" } if (!buildRoot.isEmpty()) { buildDir = new File(buildRoot, 'scala-' + scalaBinaryVersion + '/' + project.path.replace(':', '/')) } else { - // default output directory like in sbt/maven + // default output directory suffix like in sbt/maven buildDir = 'build-artifacts/scala-' + scalaBinaryVersion } + if (rootProject.hasProperty('enablePublish')) { + buildIdPrefix = "${vendorName} " + } + if (rootProject.hasProperty('sparkDistDir')) { + sparkDistDir = rootProject.property('sparkDistDir') + sparkProductDir = "${sparkDistDir}/${sparkDistName}" + sparkCurrentProductDir = "${sparkDistDir}/${sparkCurrentDistName}" + } ext { testResultsBase = "${rootProject.buildDir}/tests/snappy" snappyProductDir = "${rootProject.buildDir}/snappy" } + + // force same output directory for IDEA and gradle + idea { + module { + def projOutDir = file("${projectDir}/src/main/scala").exists() + ? "${project.sourceSets.main.java.outputDir}/../../scala/main" + : project.sourceSets.main.java.outputDir + def projTestOutDir = file("${projectDir}/src/test/scala").exists() + ? "${project.sourceSets.test.java.outputDir}/../../scala/test" + : project.sourceSets.test.java.outputDir + outputDir file(projOutDir) + testOutputDir file(projTestOutDir) + } + } +} + + +def hasAqpProject = new File(rootDir, 'aqp/build.gradle').exists() +def aqpProject = isEnterpriseProduct ? project(":snappy-aqp_${scalaBinaryVersion}") : null +def hasJdbcConnectorProject = new File(rootDir, 'snappy-connectors/jdbc-stream-connector/build.gradle').exists() +def hasGemFireConnectorProject = new File(rootDir, 'snappy-connectors/gemfire-connector/build.gradle').exists() + +if (isEnterpriseProduct) { + if (!hasJdbcConnectorProject || !hasGemFireConnectorProject) { + throw new GradleException('Project repository snappy-connectors not found.') + } } def getProcessId() { - def name = java.lang.management.ManagementFactory.getRuntimeMXBean().getName() - return name[0..name.indexOf("@")-1] + String name = java.lang.management.ManagementFactory.getRuntimeMXBean().getName() + return name[0..name.indexOf('@') - 1] } def getStackTrace(def t) { @@ -91,11 +239,22 @@ def getStackTrace(def t) { return sw.toString() } -// Configure scalaStyle for only non spark related modules -configure(subprojects.findAll {!(it.name ==~ /snappy-spark.*/)}) { +// Skip snappy-spark, snappy-aqp and spark-jobserver that have their own +// scalaStyle configuration. Skip snappy-store that will not use it. +configure(subprojects.findAll {!(it.name ==~ /snappy-spark.*/ || + it.name ==~ /snappy-store.*/ || + it.name ==~ /snappy-aqp.*/ || + it.name ==~ /spark-jobserver.*/)}) { scalaStyle { - configLocation = "scalastyle-config.xml" - source = "src/main/scala" + configLocation = "${rootProject.projectDir}/scalastyle-config.xml" + inputEncoding = 'UTF-8' + outputEncoding = 'UTF-8' + outputFile = "${buildDir}/scalastyle-output.xml" + includeTestSourceDirectory = true + source = 'src/main/scala' + testSource = 'src/test/scala' + failOnViolation = true + failOnWarning = false } } @@ -108,21 +267,49 @@ def cleanIntermediateFiles(def projectName) { include 'BACKUPGFXD-DEFAULT-DISKSTORE**', 'locator*.dat' } } -task cleanScalaTest << { - def workingDir = "${testResultsBase}/scalatest" + +def now() { + return new Date().format('yyyy-MM-dd HH:mm:ss.SSS Z') +} + +task cleanScalaTest { doLast { + String workingDir = "${testResultsBase}/scalatest" delete workingDir file(workingDir).mkdirs() -} -task cleanJUnit << { - def workingDir = "${testResultsBase}/junit" +} } +task cleanJUnit { doLast { + String workingDir = "${testResultsBase}/junit" delete workingDir file(workingDir).mkdirs() -} -task cleanDUnit << { - def workingDir = "${testResultsBase}/dunit" +} } +task cleanDUnit { doLast { + String workingDir = "${testResultsBase}/dunit" delete workingDir file(workingDir).mkdirs() -} + // clean spark cluster directories + delete "${snappyProductDir}/work", "${snappyProductDir}/logs" + delete "${sparkProductDir}/work", "${sparkProductDir}/logs" + delete "${sparkCurrentProductDir}/work", "${sparkCurrentProductDir}/logs" +} } +task cleanSecurityDUnit { doLast { + String workingDir = "${testResultsBase}/dunit-security" + delete workingDir + file(workingDir).mkdirs() + // clean spark cluster directories + delete "${snappyProductDir}/work", "${snappyProductDir}/logs" + delete "${sparkProductDir}/work", "${sparkProductDir}/logs" + delete "${sparkCurrentProductDir}/work", "${sparkCurrentProductDir}/logs" +} } +task cleanAllReports { doLast { + String workingDir = "${testResultsBase}/combined-reports" + delete workingDir + file(workingDir).mkdirs() +} } +task cleanQuickstart { doLast { + String workingDir = "${testResultsBase}/quickstart" + delete workingDir + file(workingDir).mkdirs() +} } subprojects { // the run task for a selected sub-project @@ -133,25 +320,48 @@ subprojects { main = mainClass } if (project.hasProperty('params')) { - args = params.split(",") as List + args = params.split(',') as List } classpath = sourceSets.main.runtimeClasspath + sourceSets.test.runtimeClasspath - jvmArgs '-Xmx2g', '-XX:MaxPermSize=512m' + jvmArgs '-Xmx2g', '-Xms2g' } task scalaTest(type: Test) { - actions = [ new com.github.maiflai.ScalaTestAction() ] + def factory = new com.github.maiflai.BackwardsCompatibleJavaExecActionFactory(gradle.gradleVersion) + actions = [ new com.github.maiflai.ScalaTestAction(factory) ] // top-level default is single process run since scalatest does not // spawn separate JVMs maxParallelForks = 1 - maxHeapSize '1g' - jvmArgs '-XX:+HeapDumpOnOutOfMemoryError', '-XX:MaxPermSize=350M', '-ea' - testLogging.exceptionFormat = 'full' + minHeapSize '4g' + maxHeapSize '4g' + jvmArgs '-ea', '-XX:+HeapDumpOnOutOfMemoryError','-XX:+UseConcMarkSweepGC', '-XX:MaxNewSize=1g', + '-XX:+UseParNewGC', '-XX:+CMSClassUnloadingEnabled' + // for benchmarking + // minHeapSize '12g' + // maxHeapSize '12g' + // jvmArgs '-XX:+HeapDumpOnOutOfMemoryError','-XX:+UseConcMarkSweepGC', '-XX:MaxNewSize=2g', + // '-XX:+UseParNewGC', '-XX:+CMSClassUnloadingEnabled' + + testLogging.exceptionFormat = TestExceptionFormat.FULL + testLogging.events = TestLogEvent.values() as Set + extensions.add(com.github.maiflai.ScalaTestAction.TAGS, new org.gradle.api.tasks.util.PatternSet()) List suites = [] extensions.add(com.github.maiflai.ScalaTestAction.SUITES, suites) - extensions.add("suite", { String name -> suites.add(name) } ) - extensions.add("suites", { String... name -> suites.addAll(name) } ) + extensions.add('suite', { String name -> suites.add(name) } ) + extensions.add('suites', { String... name -> suites.addAll(name) } ) + + def result = new StringBuilder() + extensions.add(com.github.maiflai.ScalaTestAction.TESTRESULT, result) + extensions.add('testResult', { String name -> result.setLength(0); result.append(name) } ) + + def output = new StringBuilder() + extensions.add(com.github.maiflai.ScalaTestAction.TESTOUTPUT, output) + extensions.add('testOutput', { String name -> output.setLength(0); output.append(name) }) + + def errorOutput = new StringBuilder() + extensions.add(com.github.maiflai.ScalaTestAction.TESTERROR, errorOutput) + extensions.add('testError', { String name -> errorOutput.setLength(0); errorOutput.append(name) }) // running a single scala suite if (rootProject.hasProperty('singleSuite')) { @@ -159,92 +369,158 @@ subprojects { } workingDir = "${testResultsBase}/scalatest" + // testResult '/dev/tty' + testOutput "${workingDir}/output.txt" + testError "${workingDir}/error.txt" binResultsDir = file("${workingDir}/binary/${project.name}") reports.html.destination = file("${workingDir}/html/${project.name}") reports.junitXml.destination = file(workingDir) } test { - maxParallelForks = (2 * Runtime.getRuntime().availableProcessors()) - maxHeapSize '1g' - jvmArgs '-XX:+HeapDumpOnOutOfMemoryError', '-XX:MaxPermSize=350M', '-ea' - testLogging.exceptionFormat = 'full' + maxParallelForks = Runtime.getRuntime().availableProcessors() + maxHeapSize '2g' + jvmArgs '-ea', '-XX:+HeapDumpOnOutOfMemoryError','-XX:+UseConcMarkSweepGC', + '-XX:+UseParNewGC', '-XX:+CMSClassUnloadingEnabled' + testLogging.exceptionFormat = TestExceptionFormat.FULL + + def single = System.getProperty('junit.single') + if (single == null || single.length() == 0) { + single = rootProject.hasProperty('junit.single') ? + rootProject.property('junit.single') : null + } + if (single == null || single.length() == 0) { + include '**/*.class' + exclude '**/*TestBase.class' + exclude '**/*DUnit*.class' + } else { + include single + } workingDir = "${testResultsBase}/junit" binResultsDir = file("${workingDir}/binary/${project.name}") reports.html.destination = file("${workingDir}/html/${project.name}") reports.junitXml.destination = file(workingDir) + + doFirst { + String eol = System.getProperty('line.separator') + String now = new Date().format('yyyy-MM-dd HH:mm:ss.SSS Z') + def progress = new File(workingDir, 'progress.txt') + progress << "${eol}${now} ========== STARTING JUNIT TEST SUITE FOR ${project.name} ==========${eol}${eol}" + } } + task dunitTest(type: Test) { + dependsOn ':cleanDUnit' + dependsOn ':product' + dependsOn ':copyShadowJars' + maxParallelForks = 1 + minHeapSize '1536m' + maxHeapSize '1536m' - gradle.taskGraph.whenReady({ graph -> - tasks.withType(Test).each { test -> - test.configure { - onlyIf { !Boolean.getBoolean('skip.tests') } - environment 'SNAPPY_HOME': snappyProductDir, - 'SNAPPY_DIST_CLASSPATH': "${sourceSets.test.runtimeClasspath.asPath}" + // limit netty buffer arenas to avoid occasional OOMEs with 1.5g heap + int numArenas = Math.min(8, Runtime.getRuntime().availableProcessors()) + jvmArgs = ['-XX:+HeapDumpOnOutOfMemoryError', + '-XX:+UseParNewGC', '-XX:+UseConcMarkSweepGC', + '-XX:CMSInitiatingOccupancyFraction=50', + '-XX:+CMSClassUnloadingEnabled', '-ea', + '-Dspark.sql.codegen.cacheSize=1000', + '-Dspark.ui.retainedStages=500', + '-Dspark.ui.retainedJobs=500', + '-Dspark.sql.ui.retainedExecutions=500', + '-Dio.netty.allocator.pageSize=8192', + '-Dio.netty.allocator.maxOrder=10', + "-Dio.netty.allocator.numHeapArenas=${numArenas}", + "-Dio.netty.allocator.numDirectArenas=${numArenas}"] - def eol = System.getProperty('line.separator') - beforeTest { desc -> - def now = new Date().format('yyyy-MM-dd HH:mm:ss.SSS Z') - def progress = new File(workingDir, "progress.txt") - def output = new File(workingDir, "output.txt") - progress << "${now} Starting test ${desc.className} ${desc.name}${eol}" - output << "${now} STARTING TEST ${desc.className} ${desc.name}${eol}${eol}" - } - onOutput { desc, event -> - def output = new File(workingDir, "output.txt") - output << event.message - } - afterTest { desc, result -> - def now = new Date().format('yyyy-MM-dd HH:mm:ss.SSS Z') - def progress = new File(workingDir, "progress.txt") - def output = new File(workingDir, "output.txt") - progress << "${now} Completed test ${desc.className} ${desc.name} with result: ${result.resultType}${eol}" - output << "${eol}${now} COMPLETED TEST ${desc.className} ${desc.name} with result: ${result.resultType}${eol}${eol}" - result.exceptions.each { t -> - progress << " EXCEPTION: ${getStackTrace(t)}${eol}" - output << "${getStackTrace(t)}${eol}" - } - } + workingDir = "${testResultsBase}/dunit" + + binResultsDir = file("${workingDir}/binary/${project.name}") + reports.html.destination = file("${workingDir}/html/${project.name}") + reports.junitXml.destination = file(workingDir) + + systemProperties 'java.net.preferIPv4Stack': 'true', + 'SNAPPY_HOME': snappyProductDir + + int numTestClasses = 0 + def testCount = new java.util.concurrent.atomic.AtomicInteger(0) + + doFirst { + String eol = System.getProperty('line.separator') + String now = new Date().format('yyyy-MM-dd HH:mm:ss.SSS Z') + def progress = new File(workingDir, 'progress.txt') + numTestClasses = getCandidateClassFiles().getFiles().size() + progress << "${eol}${now} ========== STARTING DUNIT TEST SUITE FOR ${project.name} ==========${eol}${eol}" + } + beforeSuite { desc -> + if (desc.className != null) { + def count = testCount.incrementAndGet() + println "${now()} Start ${desc.className} ($count/$numTestClasses)" } } - }) - check.dependsOn test, scalaTest + afterSuite { desc, result -> + if (desc.className != null) { + println "${now()} END ${desc.className}" + } + } + } + + task dunitSecurityTest(type: Test) { + dependsOn ':cleanSecurityDUnit' + dependsOn ':product' + dependsOn ':copyShadowJars' + maxParallelForks = 1 + minHeapSize '1536m' + maxHeapSize '1536m' + + // limit netty buffer arenas to avoid occasional OOMEs with 1.5g heap + int numArenas = Math.min(8, Runtime.getRuntime().availableProcessors()) + jvmArgs = ['-XX:+HeapDumpOnOutOfMemoryError', + '-XX:+UseParNewGC', '-XX:+UseConcMarkSweepGC', + '-XX:CMSInitiatingOccupancyFraction=50', + '-XX:+CMSClassUnloadingEnabled', '-ea', + '-Dspark.sql.codegen.cacheSize=1000', + '-Dspark.ui.retainedStages=500', + '-Dspark.ui.retainedJobs=500', + '-Dspark.sql.ui.retainedExecutions=500', + '-Dio.netty.allocator.pageSize=8192', + '-Dio.netty.allocator.maxOrder=10', + "-Dio.netty.allocator.numHeapArenas=${numArenas}", + "-Dio.netty.allocator.numDirectArenas=${numArenas}"] + + workingDir = "${testResultsBase}/dunit-security" + + binResultsDir = file("${workingDir}/binary/${project.name}") + reports.html.destination = file("${workingDir}/html/${project.name}") + reports.junitXml.destination = file(workingDir) + + systemProperties 'java.net.preferIPv4Stack': 'true', + 'SNAPPY_HOME': snappyProductDir + + doFirst { + String eol = System.getProperty('line.separator') + String now = new Date().format('yyyy-MM-dd HH:mm:ss.SSS Z') + def progress = new File(workingDir, 'progress.txt') + progress << "${eol}${now} ========== STARTING SECURITY DUNIT TEST SUITE FOR ${project.name} ==========${eol}${eol}" + } + } // apply default manifest + if (rootProject.hasProperty('enablePublish')) { + createdBy = 'TIBCO Software Inc.' + } jar { manifest { attributes( - "Manifest-Version" : "1.0", - "Created-By" : System.getProperty("user.name"), - "Title" : rootProject.name, - "Version" : version, - "Vendor" : "Snappy Data, Inc." + 'Manifest-Version' : '1.0', + 'Created-By' : createdBy, + 'Title' : rootProject.name, + 'Version' : version, + 'Vendor' : vendorName ) } } - task packageSources(type: Jar, dependsOn: classes) { - classifier = 'sources' - from sourceSets.main.allSource - } - task packageDocs(type: Jar, dependsOn: javadoc) { - classifier = 'sources' - from javadoc.destinationDir - } - /* - artifacts { - archives packageSources - archives packageDocs - } - */ - configurations { - provided { - description 'a dependency that is provided externally at runtime' - visible true - } - testOutput { extendsFrom testCompile description 'a dependency that exposes test artifacts' @@ -260,146 +536,749 @@ subprojects { */ } - task packageTests(type: Jar) { - from sourceSets.test.output + // force versions for some dependencies that get pulled multiple times + configurations.all { + resolutionStrategy.force "com.google.guava:guava:${guavaVersion}", + "org.apache.derby:derby:${derbyVersion}", + "org.apache.hadoop:hadoop-annotations:${hadoopVersion}", + "org.apache.hadoop:hadoop-auth:${hadoopVersion}", + "org.apache.hadoop:hadoop-client:${hadoopVersion}", + "org.apache.hadoop:hadoop-common:${hadoopVersion}", + "org.apache.hadoop:hadoop-hdfs:${hadoopVersion}", + "org.apache.hadoop:hadoop-mapreduce-client-app:${hadoopVersion}", + "org.apache.hadoop:hadoop-mapreduce-client-common:${hadoopVersion}", + "org.apache.hadoop:hadoop-mapreduce-client-core:${hadoopVersion}", + "org.apache.hadoop:hadoop-mapreduce-client-jobclient:${hadoopVersion}", + "org.apache.hadoop:hadoop-mapreduce-client-shuffle:${hadoopVersion}", + "org.apache.hadoop:hadoop-yarn-api:${hadoopVersion}", + "org.apache.hadoop:hadoop-yarn-client:${hadoopVersion}", + "org.apache.hadoop:hadoop-yarn-common:${hadoopVersion}", + "org.apache.hadoop:hadoop-yarn-server-common:${hadoopVersion}", + "org.apache.hadoop:hadoop-yarn-server-nodemanager:${hadoopVersion}", + "org.apache.hadoop:hadoop-yarn-server-web-proxy:${hadoopVersion}" + exclude(group: 'org.mortbay.jetty', module: 'servlet-api') + } + configurations.testRuntime { + // below is included indirectly by hadoop deps and conflicts with embedded 1.5.7 apacheds + exclude(group: 'org.apache.directory.server', module: 'apacheds-kerberos-codec') + exclude(group: 'org.apache.directory.server', module: 'apacheds-i18n') + } + + task packageTests(type: Jar, dependsOn: testClasses) { + description 'Assembles a jar archive of test classes.' classifier = 'tests' } artifacts { testOutput packageTests } - idea { - module { - scopes.PROVIDED.plus += [ configurations.provided ] + dependencies { + compile 'log4j:log4j:' + log4jVersion + compile 'org.slf4j:slf4j-api:' + slf4jVersion + compile 'org.slf4j:slf4j-log4j12:' + slf4jVersion + + testCompile "junit:junit:${junitVersion}" + } +} + +// maven publish tasks +subprojects { + + apply plugin: 'signing' + + task packageSources(type: Jar, dependsOn: classes) { + classifier = 'sources' + from sourceSets.main.allSource + } + task packageDocs(type: Jar, dependsOn: javadoc) { + classifier = 'javadoc' + from javadoc + } + if (rootProject.hasProperty('enablePublish')) { + signing { + sign configurations.archives + } + + uploadArchives { + repositories { + mavenDeployer { + beforeDeployment { MavenDeployment deployment -> signing.signPom(deployment) } + + repository(url: 'https://oss.sonatype.org/service/local/staging/deploy/maven2/') { + authentication(userName: ossrhUsername, password: ossrhPassword) + } + snapshotRepository(url: 'https://oss.sonatype.org/content/repositories/snapshots/') { + authentication(userName: ossrhUsername, password: ossrhPassword) + } + + pom.project { + if (isEnterpriseProduct) { + name 'TIBCO ComputeDB' + } else { + name 'SnappyData' + } + packaging 'jar' + // optionally artifactId can be defined here + description 'TIBCO ComputeDB distributed data store and execution engine' + url 'http://www.snappydata.io' + + scm { + connection 'scm:git:https://github.com/SnappyDataInc/snappydata.git' + developerConnection 'scm:git:https://github.com/SnappyDataInc/snappydata.git' + url 'https://github.com/SnappyDataInc/snappydata' + } + + licenses { + license { + name 'The Apache License, Version 2.0' + url 'http://www.apache.org/licenses/LICENSE-2.0.txt' + } + } + + developers { + developer { + id 'smenon' + name 'Sudhir Menon' + email 'sumenon@tibco.com' + } + } + } + } + } } } +} + +// apply common test and misc configuration +gradle.taskGraph.whenReady { graph -> - sourceSets { - main.compileClasspath += configurations.provided - main.runtimeClasspath -= configurations.provided - test.compileClasspath += configurations.provided - test.runtimeClasspath += configurations.provided + String dunitSingle = System.getProperty('dunit.single') + if (dunitSingle == null || dunitSingle.length() == 0) { + dunitSingle = rootProject.hasProperty('dunit.single') ? + rootProject.property('dunit.single') : null } + String dunitSecSingle = System.getProperty('dunitSecurity.single') + if (dunitSecSingle == null || dunitSecSingle.length() == 0) { + dunitSecSingle = rootProject.hasProperty('dunitSecurity.single') ? + rootProject.property('dunitSecurity.single') : null + } + + def allTasks = subprojects.collect { it.tasks }.flatten() + allTasks.each { task -> + if (task instanceof Tar) { + def tar = (Tar)task + tar.compression = Compression.GZIP + tar.extension = 'tar.gz' + } else if (task instanceof Jar) { + def pack = (Jar)task + if (pack.name == 'packageTests') { + pack.from(pack.project.sourceSets.test.output.classesDirs, pack.project.sourceSets.test.resources.srcDirs) + } + } else if (task instanceof Test) { + def test = (Test)task + test.configure { - javadoc.classpath += configurations.provided + if (test.name == 'dunitTest') { + includes.clear() + excludes.clear() + if (dunitSingle == null || dunitSingle.length() == 0) { + def dunitTests = testClassesDirs.asFileTree.matching { + includes = [ '**/*DUnitTest.class', '**/*DUnit.class' ] + excludes = [ '**/*Suite.class', '**/*DUnitSecurityTest.class', '**/NCJ*DUnit.class', + '**/BackwardCompatabilityPart*DUnit.class', '**/*Perf*DUnit.class', '**/ListAggDUnit.class', + '**/SingleHop*TransactionDUnit.class', '**/*Parallel*AsyncEvent*DUnit.class', '**/pivotal/gemfirexd/wan/**/*DUnit.class' ] + } + FileTree includeTestFiles = dunitTests + int dunitFrom = rootProject.hasProperty('dunit.from') ? + getLast(includeTestFiles, rootProject.property('dunit.from')) : 0 + int dunitTo = rootProject.hasProperty('dunit.to') ? + getLast(includeTestFiles, rootProject.property('dunit.to')) : includeTestFiles.size() - dependencies { - compile 'log4j:log4j:' + log4jVersion - compile 'org.slf4j:slf4j-api:' + slf4jVersion - compile 'org.slf4j:slf4j-log4j12:' + slf4jVersion + int begin = dunitFrom != -1 ? dunitFrom : 0 + int end = dunitTo != -1 ? dunitTo : includeTestFiles.size() + def filteredSet = includeTestFiles.drop(begin).take(end-begin+1).collect {f -> "**/" + f.name} + if (begin != 0 || end != includeTestFiles.size()) { + println("Picking tests :") + filteredSet.each { a -> println(a) } + } + include filteredSet + } else { + include dunitSingle + } + } else if (test.name == 'dunitSecurityTest') { + includes.clear() + excludes.clear() + if (!rootProject.hasProperty('snappydata.enterprise')) { + excludes = [ '**/*Suite.class', '**/*DUnitSecurityTest.class', '**/*DUnitTest.class', '**/*DUnit.class' ] + } else if (dunitSecSingle == null || dunitSecSingle.length() == 0) { + def dunitSecurityTests = testClassesDirs.asFileTree.matching { + includes = [ '**/*DUnitSecurityTest.class' ] + excludes = [ '**/*Suite.class', '**/*DUnitTest.class', '**/*DUnit.class' ] + } + FileTree includeTestFiles = dunitSecurityTests + int dunitFrom = rootProject.hasProperty('dunitSecurity.from') ? + getLast(includeTestFiles, rootProject.property('dunitSecurity.from')) : 0 + int dunitTo = rootProject.hasProperty('dunitSecurity.to') ? + getLast(includeTestFiles, rootProject.property('dunitSecurity.to')) : includeTestFiles.size() - testCompile "junit:junit:${junitVersion}" + int begin = dunitFrom != -1 ? dunitFrom : 0 + int end = dunitTo != -1 ? dunitTo : includeTestFiles.size() + def filteredSet = includeTestFiles.drop(begin).take(end-begin+1).collect {f -> "**/" + f.name} + if (begin != 0 || end != includeTestFiles.size()) { + println("Picking tests :") + filteredSet.each { a -> println(a) } + } + include filteredSet + } else { + include dunitSecSingle + } + } + + String logLevel = System.getProperty('logLevel') + if (logLevel != null && logLevel.length() > 0) { + systemProperties 'gemfire.log-level' : logLevel, + 'logLevel' : logLevel + } + logLevel = System.getProperty('securityLogLevel') + if (logLevel != null && logLevel.length() > 0) { + systemProperties 'gemfire.security-log-level' : logLevel, + 'securityLogLevel' : logLevel + } + + environment 'SNAPPY_HOME': snappyProductDir, + 'APACHE_SPARK_HOME': sparkProductDir, + 'APACHE_SPARK_CURRENT_HOME': sparkCurrentProductDir, + 'SPARK_TESTING': '1', + 'SNAPPY_DIST_CLASSPATH': test.classpath.asPath + + def failureCount = new java.util.concurrent.atomic.AtomicInteger(0) + def progress = new File(workingDir, 'progress.txt') + def output = new File(workingDir, 'output.txt') + + String eol = System.getProperty('line.separator') + beforeTest { desc -> + String now = now() + progress << "${now} Starting test ${desc.className} ${desc.name}${eol}" + output << "${now} STARTING TEST ${desc.className} ${desc.name}${eol}${eol}" + } + onOutput { desc, event -> + String msg = event.message + if (event.destination.toString() == 'StdErr') { + msg = msg.replace(eol, "${eol}[error] ") + } + output << msg + } + afterTest { desc, result -> + String now = now() + progress << "${now} Completed test ${desc.className} ${desc.name} with result: ${result.resultType}${eol}" + output << "${eol}${now} COMPLETED TEST ${desc.className} ${desc.name} with result: ${result.resultType}${eol}${eol}" + def exceptions = result.exceptions + if (exceptions.size() > 0) { + exceptions.each { t -> + progress << " EXCEPTION: ${getStackTrace(t)}${eol}" + output << "${getStackTrace(t)}${eol}" + } + failureCount.incrementAndGet() + } + } + doLast { + def report = "${test.reports.html.destination}/index.html" + boolean hasProgress = progress.exists() + if (failureCount.get() > 0) { + println() + def failureMsg = "FAILED: There were ${failureCount.get()} failures.${eol}" + if (hasProgress) { + failureMsg += " See the progress report in: file://$progress${eol}" + } + failureMsg += " HTML report in: file://$report" + throw new GradleException(failureMsg) + } else if (hasProgress) { + println() + println("SUCCESS: See the progress report in: file://$progress") + println(" HTML report in: file://$report") + println() + } else { + println() + println("SUCCESS: See the HTML report in: file://$report") + println() + } + } + } + } } } -task generateSources { - dependsOn ':snappy-spark:snappy-spark-streaming-flume-sink_' + scalaBinaryVersion + ':generateAvroJava' - dependsOn ':snappy-store:generateSources' + +task publishLocal { + dependsOn subprojects.findAll { p -> p.name != 'snappydata-native' && + p.name != 'snappydata-store-prebuild' && p.name != 'snappydata-store' }.collect { + it.getTasksByName('install', false).collect { it.path } + } +} + +task publishMaven { + dependsOn subprojects.findAll { p -> p.name != 'snappydata-native' && + p.name != 'snappydata-store-prebuild' && p.name != 'snappy-store' && + p.name != 'snappydata-store' }.collect { + it.getTasksByName('uploadArchives', false).collect { it.path } + } } -task product { - dependsOn ":snappy-tools_${scalaBinaryVersion}:shadowJar" - dependsOn ':snappy-examples:jar' - +task product(type: Zip) { + dependsOn ":snappy-cluster_${scalaBinaryVersion}:jar" + dependsOn ":snappy-examples_${scalaBinaryVersion}:jar" + dependsOn ":snappy-spark:snappy-spark-assembly_${scalaBinaryVersion}:sparkProduct" + dependsOn ':snappy-launcher:jar' + dependsOn ':jdbcJar' + + def clusterProject = project(":snappy-cluster_${scalaBinaryVersion}") + def launcherProject = project(':snappy-launcher') + def targetProject = clusterProject + + if (isEnterpriseProduct) { + if (hasAqpProject) { + dependsOn ":snappy-aqp_${scalaBinaryVersion}:jar" + targetProject = aqpProject + } + + if (hasJdbcConnectorProject){ + dependsOn ":snappy-jdbc-connector_${scalaBinaryVersion}:jar" + } + if (hasGemFireConnectorProject){ + dependsOn ":gemfire-connector:product" + } + } + + + // create snappydata+spark combined python zip + destinationDir = file("${snappyProductDir}/python/lib") + archiveName = 'pyspark.zip' + from("${project(':snappy-spark').projectDir}/python") { + include 'pyspark/**/*' + } + from("${rootDir}/python") { + include 'pyspark/**/*' + } + doFirst { - delete snappyProductDir - file("${snappyProductDir}/lib").mkdirs() + // remove the spark pyspark.zip + delete "${snappyProductDir}/python/lib/pyspark.zip" } doLast { - // copy datanucleus jars specifically since they don't work as part of fat jar - def datanucleusJars = project(":snappy-spark:snappy-spark-hive_${scalaBinaryVersion}").configurations.runtime.filter { - it.getName().contains('datanucleus') - } + def examplesProject = project(":snappy-examples_${scalaBinaryVersion}") + String exampleArchiveName = "quickstart.jar" + + // copy all runtime dependencies of snappy-cluster, itself and AQP + def targets = targetProject.configurations.runtime copy { - from datanucleusJars - into "${snappyProductDir}/lib" + from(targets) { + // exclude antlr4 explicitly (runtime is still included) + // that gets pulled by antlr gradle plugin + exclude '**antlr4-4*.jar' + // exclude scalatest included by spark-tags + exclude '**scalatest*.jar' + // exclude other test jars + exclude '**junit*.jar' + exclude '**mockito*.jar' + exclude '**hamcrest-core*.jar' + exclude '**test-interface*.jar' + exclude '**scalacheck*.jar' + if (rootProject.hasProperty('hadoop-provided')) { + exclude 'hadoop-*.jar' + } + } + from targetProject.jar.outputs + into "${snappyProductDir}/jars" } - // copy GemFireXD shared libraries for optimized JNI calls + // copy the launcher jar copy { - from "${project(':snappy-store:gemfirexd:core').projectDir}/lib" - into "${snappyProductDir}/lib" + from launcherProject.jar.destinationDir + into "${snappyProductDir}/jars" + include launcherProject.jar.archiveName } // create the RELEASE file - def release = file("${snappyProductDir}/RELEASE") - def gitCommitId = "git rev-parse HEAD".execute().text.trim() - release << "Snappy Spark ${project.version} ${gitCommitId} built for Hadoop $hadoopVersion\n" - release << "Build flags: ${buildFlags}\n" - - def toolsProject = project(":snappy-tools_${scalaBinaryVersion}") - def examplesProject = project(':snappy-examples') - def baseName = 'snappy-spark-assembly' - def archiveName = "${baseName}_${scalaBinaryVersion}-${version}-hadoop${hadoopVersion}.jar" - def exampleArchiveName = "quickstart-${version}.jar" + def releaseFile = file("${snappyProductDir}/RELEASE") + String buildFlags = '' + if (rootProject.hasProperty('docker')) { + buildFlags += ' -Pdocker' + } + if (rootProject.hasProperty('ganglia')) { + buildFlags += ' -Pganglia' + } + if (rootProject.hasProperty('hadoop-provided')) { + buildFlags += ' -Phadoop-provided' + } + String gitRevision = "${gitCmd} rev-parse --short HEAD".execute().text.trim() + if (gitRevision.length() > 0) { + gitRevision = " (git revision ${gitRevision})" + } + + if (rootProject.hasProperty('hadoop-provided')) { + releaseFile.append("TIBCO ComputeDB ${version}${gitRevision} " + + "built with Hadoop ${hadoopVersion} but hadoop not bundled.\n") + } else { + releaseFile.append("TIBCO ComputeDB ${version}${gitRevision} built for Hadoop ${hadoopVersion}.\n") + } + releaseFile.append("Build flags:${buildFlags}\n") + + // copy LICENSE, README.md and doc files + copy { + from projectDir + into snappyProductDir + include 'LICENSE' + if (!isEnterpriseProduct) { + include 'NOTICE' + } + include 'README.md' + } + copy { + from "${projectDir}/docs" + into "${snappyProductDir}/docs" + } + + copy { + from "${rootDir}/python/pyspark/" + include 'sql/**' + include 'streaming/**' + include 'test_support/**' + into "${snappyProductDir}/python/pyspark" + } + + // Next the remaining components of full product like examples etc + // Spark portions already copied in the assembly:product dependency copy { - from("${toolsProject.buildDir}/libs") - into "${snappyProductDir}/lib" - include "${toolsProject.shadowJar.archiveName}" - rename { filename -> archiveName } + from("${examplesProject.projectDir}/src/main/python") + into "${snappyProductDir}/quickstart/python" + } + if (new File(rootDir, 'store/build.gradle').exists()) { + // copy snappy-store shared libraries for JNI calls + def storeCoreProject = project(':snappy-store:snappydata-store-core') + copy { + from "${storeCoreProject.projectDir}/lib" + into "${snappyProductDir}/jars" + exclude '*_sol*.so' + } + copy { + from "${storeCoreProject.projectDir}/../quickstart" + into "${snappyProductDir}/quickstart/store" + exclude '.git*' + } + } + if (isEnterpriseProduct) { + if (hasAqpProject) { + // copy enterprise shared libraries for optimized JNI calls + copy { + from aqpProject.projectDir.path + '/lib' + into "${snappyProductDir}/jars" + } + copy { + from aqpProject.projectDir + into snappyProductDir + include 'NOTICE' + include '*EULA*' + } + } + + def jdbcConnectorProject = project(":snappy-jdbc-connector_${scalaBinaryVersion}") + def gemfireConnectorProject = project(":gemfire-connector") + def gfeConnectorProject = project(":gemfire-connector:connector_${scalaBinaryVersion}") + def gfeFunctionProject = project(":gemfire-connector:gfeFunctions") + if (hasJdbcConnectorProject) { + copy { + from jdbcConnectorProject.jar.destinationDir + into "${snappyProductDir}/connectors" + include "*.jar" + } + } + if (hasGemFireConnectorProject) { + copy { + from gfeConnectorProject.jar.destinationDir + into "${snappyProductDir}/connectors" + include "*.jar" + } + copy { + from gfeFunctionProject.jar.destinationDir + into "${snappyProductDir}/connectors" + include "*.jar" + } + copy { + from "${gemfireConnectorProject.projectDir}/examples/quickstart/data" + into "${snappyProductDir}/connectors" + include "persons.jar" + } + } } copy { from "${examplesProject.buildDir}/libs" - into "${snappyProductDir}/lib" + into "${snappyProductDir}/examples/jars" include "${examplesProject.jar.archiveName}" rename { filename -> exampleArchiveName } } copy { - from "${project(':snappy-spark').projectDir}/bin" + from("${clusterProject.projectDir}/bin") into "${snappyProductDir}/bin" } copy { - from "${project(':snappy-spark').projectDir}/sbin" + from("${clusterProject.projectDir}/sbin") into "${snappyProductDir}/sbin" } copy { - from "${project(':snappy-spark').projectDir}/conf" + from("${clusterProject.projectDir}/conf") into "${snappyProductDir}/conf" } copy { - from "${project(':snappy-spark').projectDir}/python" - into "${snappyProductDir}/python" - } - copy { - from "${project(':snappy-spark').projectDir}/data" - into "${snappyProductDir}/data" - } - copy { - from("${toolsProject.projectDir}/bin") - into "${snappyProductDir}/bin" + from("${examplesProject.projectDir}/quickstart") + into "${snappyProductDir}/quickstart" } copy { - from("${toolsProject.projectDir}/sbin") - into "${snappyProductDir}/sbin" + from("${examplesProject.projectDir}/src") + into "${snappyProductDir}/quickstart/src" } copy { - from("${toolsProject.projectDir}/conf") - into "${snappyProductDir}/conf" + from("${clusterProject.projectDir}/benchmark") + into "${snappyProductDir}/benchmark" } - def sparkR = "${project(':snappy-spark').projectDir}/R/lib/SparkR" - if (file(sparkR).exists()) { + if (rootProject.hasProperty('R.enable')) { + def targetRDir = "${snappyProductDir}/R" copy { - from sparkR - into "${snappyProductDir}/R/lib" + from("${project(":snappy-spark").projectDir}/R") + into targetRDir + } + + exec { + environment "SPARK_HOME", snappyProductDir + environment "NO_TESTS", "1" + environment "CLEAN_INSTALL", "1" + workingDir targetRDir + commandLine "${targetRDir}/check-cran.sh" } + + fileTree(targetRDir).exclude('lib').visit { delete it.file } } } } -def copyTestsCommonResources(def bdir) { - def outdir = "${bdir}/resources/test" - file(outdir).mkdirs() +if (rootProject.hasProperty('copyToDir')) { + task copyProduct(type: Copy, dependsOn: product) { + from snappyProductDir + into copyToDir + } +} + +// TODO: right now just copying over the product contents. +// Can flip it around and let distribution do all the work. - copy { - from "${rootDir}/tests-common/src/main/resources" - into outdir +distributions { + main { + if (isEnterpriseProduct) { + baseName = 'tib-compute' + } else { + baseName = 'snappydata' + } + contents { + from { snappyProductDir } + } + } +} + +ospackage { + packageName = distributions.main.baseName + version = version + release = '1' + os = LINUX + + maintainer = vendorName + packageDescription = productName + + ', the Spark Database. Stream - Transact - Analyze - Predict all in one cluster' + summary = productName + ' Installer' + + user = 'snappydata' + permissionGroup = 'snappydata' + + from(snappyProductDir) { + into '/opt/' + packageName + } + + if (rootProject.hasProperty('enablePublish')) { + signingKeyId = rootProject.property('signing.keyId') + signingKeyPassphrase = rootProject.property('signing.password') + signingKeyRingFile = file(rootProject.property('signing.secretKeyRingFile')) + } + + link('/usr/sbin/snappy-start-all.sh', "/opt/${packageName}/sbin/snappy-start-all.sh") + link('/usr/sbin/snappy-stop-all.sh', "/opt/${packageName}/sbin/snappy-stop-all.sh") + link('/usr/sbin/snappy-status-all.sh', "/opt/${packageName}/sbin/snappy-status-all.sh") + link('/usr/bin/snappy', "/opt/${packageName}/bin/snappy") + link('/usr/bin/snappy-sql', "/opt/${packageName}/bin/snappy-sql") + link('/usr/bin/snappy-job.sh', "/opt/${packageName}/bin/snappy-job.sh") +} + +buildRpm { + dependsOn ':packageVSD' + if (rootProject.hasProperty('enablePublish')) { + dependsOn ':packageZeppelinInterpreter' + } + requires('glibc') + requires('bash') + requires('perl') + requires('curl') + if (rootProject.hasProperty('hadoop-provided')) { + classifier 'without_hadoop' + } + + preInstall file('release/preInstallRpm.sh') +} + +buildDeb { + dependsOn ':packageVSD' + if (rootProject.hasProperty('enablePublish')) { + dependsOn ':packageZeppelinInterpreter' + } + requires('libc6') + requires('bash') + requires('perl') + requires('curl') + recommends('java8-sdk') + if (rootProject.hasProperty('hadoop-provided')) { + classifier 'without-hadoop' + } + + preInstall file('release/preInstallDeb.sh') +} + +distTar { + // archiveName = 'TIB_compute-ce_' + version + '_' + osFamilyName + '.tar.gz' + if (isEnterpriseProduct) { + archiveName = 'TIB_compute_' + version + '_' + osFamilyName + '.tar.gz' + } else { + classifier 'bin' + } + dependsOn product + // also package VSD + dependsOn ':packageVSD' + if (rootProject.hasProperty('enablePublish')) { + dependsOn ':packageZeppelinInterpreter' + } + compression = Compression.GZIP + extension = 'tar.gz' + if (rootProject.hasProperty('hadoop-provided')) { + classifier 'without-hadoop-bin' + } +} + +distZip { + // archiveName = 'TIB_compute-ce_' + version + '_' + osFamilyName + '.zip' + if (isEnterpriseProduct) { + archiveName = 'TIB_compute_' + version + '_' + osFamilyName + '.zip' + } else { + classifier 'bin' + } + dependsOn product + // also package VSD + dependsOn ':packageVSD' + if (rootProject.hasProperty('enablePublish')) { + dependsOn ':packageZeppelinInterpreter' + } + if (rootProject.hasProperty('hadoop-provided')) { + classifier 'without-hadoop-bin' + } +} + +// disable distZip by default +assemble.dependsOn.clear() +assemble.dependsOn product, distTar + +task distRpm { + dependsOn product + dependsOn buildRpm +} + +task distDeb { + dependsOn product + dependsOn buildDeb +} + +task jdbcJar { + dependsOn ":snappy-jdbc_${scalaBinaryVersion}:shadowJar" + + doLast { + def jdbcProject = project(":snappy-jdbc_${scalaBinaryVersion}") + String jdbcName = "snappydata-jdbc_${scalaBinaryVersion}-${version}.jar" + if (isEnterpriseProduct) { + jdbcName = "TIB_compute-jdbc-${scalaBinaryVersion}_${version}.jar" + } + // copy the snappy-jdbc shadow jar into distributions + copy { + from jdbcProject.shadowJar.destinationDir + into "${rootProject.buildDir}/distributions" + include jdbcProject.shadowJar.archiveName + rename { filename -> jdbcName } + } + } +} + +task copyShadowJars { + dependsOn jdbcJar + dependsOn ":snappy-core_${scalaBinaryVersion}:shadowJar" + + doLast { + def coreProject = project(":snappy-core_${scalaBinaryVersion}") + String coreName = "snappydata-core_${scalaBinaryVersion}-${version}.jar" + if (isEnterpriseProduct) { + coreName = "TIB_compute-core-${scalaBinaryVersion}_${version}.jar" + } + copy { + from coreProject.shadowJar.destinationDir + into "${rootProject.buildDir}/distributions" + include coreProject.shadowJar.archiveName + rename { filename -> coreName } + } } } -task copyResourcesAll << { - copyTestsCommonResources(project(":snappy-core_${scalaBinaryVersion}").buildDir) - copyTestsCommonResources(project(":snappy-tools_${scalaBinaryVersion}").buildDir) - copyTestsCommonResources(project(':snappy-dunits').buildDir) +task distInstallers { + dependsOn product + dependsOn buildRpm + dependsOn buildDeb +} + +// use the task below to prepare final release bits +task distProduct { + dependsOn product, distTar, distZip + dependsOn distInstallers + dependsOn copyShadowJars +} + +task generateSources { + dependsOn ':snappy-spark:generateSources', ':snappy-store:generateSources' + // copy all resource files into build classes path because new versions of IDEA + // do not include separate resources path in CLASSPATH if output path has been customized + doLast { + subprojects.collect { proj -> + String resourcesDir = proj.sourceSets.main.output.resourcesDir + if (file(resourcesDir).exists()) { + def projOutDir = file("${proj.projectDir}/src/main/scala").exists() + ? "${proj.sourceSets.main.java.outputDir}/../../scala/main" + : proj.sourceSets.main.java.outputDir + copy { + from resourcesDir + into projOutDir + } + } + resourcesDir = proj.sourceSets.test.output.resourcesDir + if (file(resourcesDir).exists()) { + def projOutDir = file("${proj.projectDir}/src/test/scala").exists() + ? "${proj.sourceSets.test.java.outputDir}/../../scala/test" + : proj.sourceSets.test.java.outputDir + copy { + from resourcesDir + into projOutDir + } + } + } + } } task cleanAll { @@ -407,17 +1286,269 @@ task cleanAll { } task buildAll { dependsOn getTasksByName('assemble', true).collect { it.path } + dependsOn getTasksByName('product', true).collect { it.path } dependsOn getTasksByName('testClasses', true).collect { it.path } mustRunAfter cleanAll } +task buildDtests { + dependsOn "snappy-dtests_${scalaBinaryVersion}:buildDtests" +} task checkAll { - dependsOn ":snappy-core_${scalaBinaryVersion}:check", ":snappy-tools_${scalaBinaryVersion}:check", ':snappy-dunits:test' //, ":snappy-aqp:check" - if (project.hasProperty('spark')) { - dependsOn project(':snappy-spark').getTasksByName('check', true).collect { it.path } + dependsOn ':snappy-spark:scalaStyle' + if (rootProject.hasProperty('store')) { + dependsOn ':snappy-store:check' + } + dependsOn ":snappy-core_${scalaBinaryVersion}:check" + if (rootProject.hasProperty('spark')) { + dependsOn ':snappy-spark:check' + } + dependsOn ":snappy-cluster_${scalaBinaryVersion}:check" + dependsOn ":snappy-examples_${scalaBinaryVersion}:check" + if (!rootProject.hasProperty('aqp.skip') && hasAqpProject && isEnterpriseProduct) { + dependsOn ":snappy-aqp_${scalaBinaryVersion}:check" + } + if (!rootProject.hasProperty('connectors.skip') && hasGemFireConnectorProject && isEnterpriseProduct) { + dependsOn ":gemfire-connector:check" + } + if (!rootProject.hasProperty('smoke.skip')) { + dependsOn buildDtests, ":snappy-dtests_${scalaBinaryVersion}:check" + } + if (!rootProject.hasProperty('compatibility.skip')) { + dependsOn ":snappy-compatibility-tests_${scalaBinaryVersion}:check" } mustRunAfter buildAll - mustRunAfter product } +task allReports(type: TestReport) { + description 'Combines the test reports.' + dependsOn cleanAllReports + destinationDir = file("${testResultsBase}/combined-reports") + mustRunAfter checkAll +} +gradle.taskGraph.whenReady { graph -> + tasks.getByName('allReports').reportOn rootProject.subprojects.collect{ it.tasks.withType(Test) }.flatten() +} + +def writeProperties(def parent, def name, def comment, def propsMap) { + parent.exists() || parent.mkdirs() + def writer = new File(parent, name).newWriter() + def props = new Properties() + propsMap.each { k, v -> props.setProperty(k, v.toString()) } + try { + props.store(writer, comment.toString()) + writer.flush() + } finally { + writer.close() + } +} + +int getLast(includeTestFiles, pattern) { + includeTestFiles.findLastIndexOf { + File f -> f.name.indexOf(pattern) >= 0 + } +} + +task packageZeppelinInterpreter { doLast { + String zeppelinInterpreterJarName = "snappydata-zeppelin_${scalaBinaryVersion}-${zeppelinInterpreterVersion}.jar" + String zeppelinInterpreterDir = System.env.ZEPPELIN_INTERPRETER_DIR + + if (zeppelinInterpreterDir == null || zeppelinInterpreterDir.length() == 0) { + zeppelinInterpreterDir = "${projectDir}/../zeppelin-interpreter" + } + + String zeppelinInterpreterLibDir = "${zeppelinInterpreterDir}/build-artifacts/libs" + if (file(zeppelinInterpreterDir).canWrite()) { + exec { + executable "${zeppelinInterpreterDir}/gradlew" + workingDir = zeppelinInterpreterDir + args "clean", "product", "distTar", "-PenablePublish" + } + println '' + println "Copying Zeppelin Interpreter jar from ${zeppelinInterpreterLibDir} to ${snappyProductDir}/jars" + println '' + copy { + from "${zeppelinInterpreterLibDir}" + into "${snappyProductDir}/jars" + include "${zeppelinInterpreterJarName}" + } + } else { + println "Skipping including Zeppelin Interpreter jar due to unwritable ${zeppelinInterpreterDir}" + } +} } + +task packagePulse { doLast { + String pulseWarName = "pulse-${pulseVersion}.war" + String pulseDir = System.env.PULSEDIR + + if (pulseDir == null || pulseDir.length() == 0) { + pulseDir = "${projectDir}/../pulse" + } + + String pulseDistDir = "${pulseDir}/build-artifacts/linux/dist" + if (file(pulseDir).canWrite()) { + exec { + executable "${pulseDir}/build.sh" + workingDir = pulseDir + args 'clean', 'build-all' + } + delete "${snappyProductDir}/jars/pulse.war" + println '' + println "Copying Pulse war from ${pulseDistDir} to ${snappyProductDir}/jars" + println '' + copy { + from "${pulseDir}/build-artifacts/linux/dist" + into "${snappyProductDir}/jars" + include "${pulseWarName}" + rename { filename -> 'pulse.war' } + } + } else { + println "Skipping Pulse due to unwritable ${pulseDir}" + } +} } + +task packageVSD { doLast { + String thirdparty = System.env.THIRDPARTYDIR + String vsdDir = '' + + if (thirdparty == null || thirdparty.length() == 0) { + vsdDir = "${projectDir}/../thirdparty/vsd" + } else { + vsdDir = "${thirdparty}/vsd" + } + + String vsdDistDir = "${vsdDir}/70/vsd" + if (file(vsdDistDir).canWrite()) { + println '' + println "Copying VSD from ${vsdDistDir} to ${snappyProductDir}/vsd" + println '' + delete "${snappyProductDir}/vsd" + copy { + from vsdDistDir + into "${snappyProductDir}/vsd" + } + } else { + println "Skipping VSD due to unwritable ${vsdDistDir}" + } +} } + +task sparkPackage { + dependsOn ":snappy-core_${scalaBinaryVersion}:sparkPackage" +} + +packagePulse.mustRunAfter product +packageVSD.mustRunAfter product +packageZeppelinInterpreter.mustRunAfter product + +distTar.mustRunAfter clean, cleanAll, product +distZip.mustRunAfter clean, cleanAll, product +distRpm.mustRunAfter clean, cleanAll, product +distDeb.mustRunAfter clean, cleanAll, product +distInstallers.mustRunAfter clean, cleanAll, product, distRpm, distDeb +distProduct.mustRunAfter clean, cleanAll, product + +task deleteDocsDir(type: Delete) { + delete "${rootProject.buildDir}/docs" +} + +task docs(type: ScalaDoc) { + apply plugin: 'scala' + + scalaDocOptions.additionalParameters = [ '-J-Xmx7g', '-J-XX:ReservedCodeCacheSize=512m', '-J-Djava.net.preferIPv4Stack=true' ] + + dependsOn deleteDocsDir + Set allSource = [] + def docProjects = rootProject.subprojects.collectMany { project -> + if ((project.plugins.hasPlugin('scala') || project.plugins.hasPlugin('java')) && + // skip gemfire-connector + !project.path.contains('gemfire-connector') && + // jobserver depends on Apache Spark 1.5.x which causes conflicts + !project.path.contains('snappy-store') && + !project.name.contains('jobserver') && + // below three will get filtered with the snappy-store path check itself + // but still keeping it as when we would remove the snappy-store path filter + // still the below three sub prejects should not be built. + !project.name.contains('jgroups') && + !project.name.contains('gemfire-examples') && + !project.name.contains('trove') && + !project.name.contains('kafka') && + !project.name.contains('encoders') && + // exclude tests + !project.name.contains('tests')) { + allSource.addAll(project.sourceSets.main.allJava.findAll { + !it.getPath().matches('.*/internal/.*') && !it.getPath().contains('com/gemstone/gemfire/cache/operations/') + }) + + if (project.plugins.hasPlugin('scala')) { + allSource.addAll(project.sourceSets.main.allScala.findAll { + !it.getPath().matches('.*org/apache/spark/sql/execution/joins/HashedRelation.*') && + !it.getPath().matches('.*org/apache/spark/sql/execution/debug/package.*') && + !it.getPath().matches('.*org/apache/spark/sql/store/CodeGeneration.*') + }) + } + [ project ] + } else [] + } + source = allSource + classpath = files(docProjects.collect { project -> + project.sourceSets.main.compileClasspath + }) + destinationDir = file("${rootProject.buildDir}/docs") +} +task publishDocs(type:Exec) { + dependsOn docs + //on linux + commandLine './publish-site.sh' +} + +// It runs test script from product dir. Hence if running the target individually make sure +// to run product target first +task checkPython(type:Exec) { + String wdir = "${testResultsBase}/python" + delete wdir + file(wdir).mkdirs() + workingDir wdir + copy { + from "${rootDir}/python/pyspark/test_supprt" + into wdir + } + environment 'PYTHONPATH', "${snappyProductDir}/python/lib/py4j-0.10.4-src.zip:${snappyProductDir}/python" + environment 'SPARK_HOME', "${snappyProductDir}" + commandLine 'python', '-u', "${rootDir}/python/run-snappy-tests.py" +} + task precheckin { - dependsOn cleanAll, buildAll, product, checkAll + dependsOn cleanAll, buildAll, checkAll, allReports, docs +} + +if (rootProject.hasProperty('trackBuildTime') ) { + buildtimetracker { + reporters { + summary { + ordered false + threshold 5000 + barstyle "unicode" + } + } + } } + +// log build output to buildOutput.log in addition to console output + +def buildOutput = new File("${rootDir}/buildOutput.log") +// delete build output file if it has become large +if (buildOutput.length() > 1000000) { + delete buildOutput +} +def gradleLogger = new org.gradle.api.logging.StandardOutputListener() { + void onOutput(CharSequence output) { + buildOutput << output + } +} +def loggerService = gradle.services.get(LoggingOutputInternal) +loggerService.addStandardOutputListener(gradleLogger) +loggerService.addStandardErrorListener(gradleLogger) + +println() +println('-------------------------------------------------') +println("Starting new build on ${buildDate}") +println('-------------------------------------------------') +println() diff --git a/build/create-tree-structure.sh b/build/create-tree-structure.sh deleted file mode 100755 index ac67d5f722..0000000000 --- a/build/create-tree-structure.sh +++ /dev/null @@ -1,41 +0,0 @@ -#!/bin/sh -# -# Create the snappy-spark copy in top-level snappy-commons directory -# and local-repo links in directories. -# - -# Get the parent base directory of this script -scriptdir="`dirname "$0"`" - -realpath() { - ( cd "$1" && pwd ) -} - -basedir="`realpath "${scriptdir}/.."`" -destdir="${basedir}/snappy-spark" - -if [ ! -d "${destdir}" ]; then - [ -e "${destdir}" ] && echo "${destdir} exists but is not a directory" && exit 1 - - sspdir="${basedir}/../snappy-spark" - # Search for snappy-spark first in SPARK_HOME - if [ -n "${SPARK_HOME}" -a -d "${SPARK_HOME}" ]; then - if [ "`realpath "${SPARK_HOME}"`" != "${destdir}" ]; then - cp -a "${SPARK_HOME}" "${basedir}" - fi - # Then one level up - elif [ -d "${sspdir}" ]; then - sspdir="`realpath "${sspdir}"`" - cp -a "${sspdir}" "${destdir}" - else - echo "Failed to find ${sspdir}. Either set SPARK_HOME to its location or place it in the same directory as snappy-commons." - exit 1 - fi -fi - -for localRepo in "${basedir}/snappy-core" "${basedir}/snappy-tools"; do - rm -rf "${localRepo}/local-repo" - ln -s ../local-repo "${localRepo}/local-repo" -done - -exit 0 diff --git a/build/git-gnome-keyring/Makefile b/build/git-gnome-keyring/Makefile deleted file mode 100644 index c3c7c98aa1..0000000000 --- a/build/git-gnome-keyring/Makefile +++ /dev/null @@ -1,24 +0,0 @@ -MAIN:=git-credential-gnome-keyring -all:: $(MAIN) - -CC = gcc -RM = rm -f -CFLAGS = -g -O2 -Wall - --include ../../../config.mak.autogen --include ../../../config.mak - -INCS:=$(shell pkg-config --cflags gnome-keyring-1 glib-2.0) -LIBS:=$(shell pkg-config --libs gnome-keyring-1 glib-2.0) - -SRCS:=$(MAIN).c -OBJS:=$(SRCS:.c=.o) - -%.o: %.c - $(CC) $(CFLAGS) $(CPPFLAGS) $(INCS) -o $@ -c $< - -$(MAIN): $(OBJS) - $(CC) -o $@ $(LDFLAGS) $^ $(LIBS) - -clean: - @$(RM) $(MAIN) $(OBJS) diff --git a/build/git-gnome-keyring/git-credential-gnome-keyring.c b/build/git-gnome-keyring/git-credential-gnome-keyring.c deleted file mode 100644 index 2a317fca44..0000000000 --- a/build/git-gnome-keyring/git-credential-gnome-keyring.c +++ /dev/null @@ -1,471 +0,0 @@ -/* - * Copyright (C) 2011 John Szakmeister - * 2012 Philipp A. Hartmann - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -/* - * Credits: - * - GNOME Keyring API handling originally written by John Szakmeister - * - ported to credential helper API by Philipp A. Hartmann - */ - -#include -#include -#include -#include -#include - -#ifdef GNOME_KEYRING_DEFAULT - - /* Modern gnome-keyring */ - -#include - -#else - - /* - * Support ancient gnome-keyring, circ. RHEL 5.X. - * GNOME_KEYRING_DEFAULT seems to have been introduced with Gnome 2.22, - * and the other features roughly around Gnome 2.20, 6 months before. - * Ubuntu 8.04 used Gnome 2.22 (I think). Not sure any distro used 2.20. - * So the existence/non-existence of GNOME_KEYRING_DEFAULT seems like - * a decent thing to use as an indicator. - */ - -#define GNOME_KEYRING_DEFAULT NULL - -/* - * ancient gnome-keyring returns DENIED when an entry is not found. - * Setting NO_MATCH to DENIED will prevent us from reporting DENIED - * errors during get and erase operations, but we will still report - * DENIED errors during a store. - */ -#define GNOME_KEYRING_RESULT_NO_MATCH GNOME_KEYRING_RESULT_DENIED - -#define gnome_keyring_memory_alloc g_malloc -#define gnome_keyring_memory_free gnome_keyring_free_password -#define gnome_keyring_memory_strdup g_strdup - -static const char *gnome_keyring_result_to_message(GnomeKeyringResult result) -{ - switch (result) { - case GNOME_KEYRING_RESULT_OK: - return "OK"; - case GNOME_KEYRING_RESULT_DENIED: - return "Denied"; - case GNOME_KEYRING_RESULT_NO_KEYRING_DAEMON: - return "No Keyring Daemon"; - case GNOME_KEYRING_RESULT_ALREADY_UNLOCKED: - return "Already UnLocked"; - case GNOME_KEYRING_RESULT_NO_SUCH_KEYRING: - return "No Such Keyring"; - case GNOME_KEYRING_RESULT_BAD_ARGUMENTS: - return "Bad Arguments"; - case GNOME_KEYRING_RESULT_IO_ERROR: - return "IO Error"; - case GNOME_KEYRING_RESULT_CANCELLED: - return "Cancelled"; - case GNOME_KEYRING_RESULT_ALREADY_EXISTS: - return "Already Exists"; - default: - return "Unknown Error"; - } -} - -/* - * Support really ancient gnome-keyring, circ. RHEL 4.X. - * Just a guess for the Glib version. Glib 2.8 was roughly Gnome 2.12 ? - * Which was released with gnome-keyring 0.4.3 ?? - */ -#if GLIB_MAJOR_VERSION == 2 && GLIB_MINOR_VERSION < 8 - -static void gnome_keyring_done_cb(GnomeKeyringResult result, gpointer user_data) -{ - gpointer *data = (gpointer *)user_data; - int *done = (int *)data[0]; - GnomeKeyringResult *r = (GnomeKeyringResult *)data[1]; - - *r = result; - *done = 1; -} - -static void wait_for_request_completion(int *done) -{ - GMainContext *mc = g_main_context_default(); - while (!*done) - g_main_context_iteration(mc, TRUE); -} - -static GnomeKeyringResult gnome_keyring_item_delete_sync(const char *keyring, guint32 id) -{ - int done = 0; - GnomeKeyringResult result; - gpointer data[] = { &done, &result }; - - gnome_keyring_item_delete(keyring, id, gnome_keyring_done_cb, data, - NULL); - - wait_for_request_completion(&done); - - return result; -} - -#endif -#endif - -/* - * This credential struct and API is simplified from git's credential.{h,c} - */ -struct credential { - char *protocol; - char *host; - unsigned short port; - char *path; - char *username; - char *password; -}; - -#define CREDENTIAL_INIT { NULL, NULL, 0, NULL, NULL, NULL } - -typedef int (*credential_op_cb)(struct credential *); - -struct credential_operation { - char *name; - credential_op_cb op; -}; - -#define CREDENTIAL_OP_END { NULL, NULL } - -/* ----------------- GNOME Keyring functions ----------------- */ - -/* create a special keyring option string, if path is given */ -static char *keyring_object(struct credential *c) -{ - if (!c->path) - return NULL; - - if (c->port) - return g_strdup_printf("%s:%hd/%s", c->host, c->port, c->path); - - return g_strdup_printf("%s/%s", c->host, c->path); -} - -static int keyring_get(struct credential *c) -{ - char *object = NULL; - GList *entries; - GnomeKeyringNetworkPasswordData *password_data; - GnomeKeyringResult result; - - if (!c->protocol || !(c->host || c->path)) - return EXIT_FAILURE; - - object = keyring_object(c); - - result = gnome_keyring_find_network_password_sync( - c->username, - NULL /* domain */, - c->host, - object, - c->protocol, - NULL /* authtype */, - c->port, - &entries); - - g_free(object); - - if (result == GNOME_KEYRING_RESULT_NO_MATCH) - return EXIT_SUCCESS; - - if (result == GNOME_KEYRING_RESULT_CANCELLED) - return EXIT_SUCCESS; - - if (result != GNOME_KEYRING_RESULT_OK) { - g_critical("%s", gnome_keyring_result_to_message(result)); - return EXIT_FAILURE; - } - - /* pick the first one from the list */ - password_data = (GnomeKeyringNetworkPasswordData *)entries->data; - - gnome_keyring_memory_free(c->password); - c->password = gnome_keyring_memory_strdup(password_data->password); - - if (!c->username) - c->username = g_strdup(password_data->user); - - gnome_keyring_network_password_list_free(entries); - - return EXIT_SUCCESS; -} - - -static int keyring_store(struct credential *c) -{ - guint32 item_id; - char *object = NULL; - GnomeKeyringResult result; - - /* - * Sanity check that what we are storing is actually sensible. - * In particular, we can't make a URL without a protocol field. - * Without either a host or pathname (depending on the scheme), - * we have no primary key. And without a username and password, - * we are not actually storing a credential. - */ - if (!c->protocol || !(c->host || c->path) || - !c->username || !c->password) - return EXIT_FAILURE; - - object = keyring_object(c); - - result = gnome_keyring_set_network_password_sync( - GNOME_KEYRING_DEFAULT, - c->username, - NULL /* domain */, - c->host, - object, - c->protocol, - NULL /* authtype */, - c->port, - c->password, - &item_id); - - g_free(object); - - if (result != GNOME_KEYRING_RESULT_OK && - result != GNOME_KEYRING_RESULT_CANCELLED) { - g_critical("%s", gnome_keyring_result_to_message(result)); - return EXIT_FAILURE; - } - - return EXIT_SUCCESS; -} - -static int keyring_erase(struct credential *c) -{ - char *object = NULL; - GList *entries; - GnomeKeyringNetworkPasswordData *password_data; - GnomeKeyringResult result; - - /* - * Sanity check that we actually have something to match - * against. The input we get is a restrictive pattern, - * so technically a blank credential means "erase everything". - * But it is too easy to accidentally send this, since it is equivalent - * to empty input. So explicitly disallow it, and require that the - * pattern have some actual content to match. - */ - if (!c->protocol && !c->host && !c->path && !c->username) - return EXIT_FAILURE; - - object = keyring_object(c); - - result = gnome_keyring_find_network_password_sync( - c->username, - NULL /* domain */, - c->host, - object, - c->protocol, - NULL /* authtype */, - c->port, - &entries); - - g_free(object); - - if (result == GNOME_KEYRING_RESULT_NO_MATCH) - return EXIT_SUCCESS; - - if (result == GNOME_KEYRING_RESULT_CANCELLED) - return EXIT_SUCCESS; - - if (result != GNOME_KEYRING_RESULT_OK) { - g_critical("%s", gnome_keyring_result_to_message(result)); - return EXIT_FAILURE; - } - - /* pick the first one from the list (delete all matches?) */ - password_data = (GnomeKeyringNetworkPasswordData *)entries->data; - - result = gnome_keyring_item_delete_sync( - password_data->keyring, password_data->item_id); - - gnome_keyring_network_password_list_free(entries); - - if (result != GNOME_KEYRING_RESULT_OK) { - g_critical("%s", gnome_keyring_result_to_message(result)); - return EXIT_FAILURE; - } - - return EXIT_SUCCESS; -} - -/* - * Table with helper operation callbacks, used by generic - * credential helper main function. - */ -static struct credential_operation const credential_helper_ops[] = { - { "get", keyring_get }, - { "store", keyring_store }, - { "erase", keyring_erase }, - CREDENTIAL_OP_END -}; - -/* ------------------ credential functions ------------------ */ - -static void credential_init(struct credential *c) -{ - memset(c, 0, sizeof(*c)); -} - -static void credential_clear(struct credential *c) -{ - g_free(c->protocol); - g_free(c->host); - g_free(c->path); - g_free(c->username); - gnome_keyring_memory_free(c->password); - - credential_init(c); -} - -static int credential_read(struct credential *c) -{ - char *buf; - size_t line_len; - char *key; - char *value; - - key = buf = gnome_keyring_memory_alloc(1024); - - while (fgets(buf, 1024, stdin)) { - line_len = strlen(buf); - - if (line_len && buf[line_len-1] == '\n') - buf[--line_len] = '\0'; - - if (!line_len) - break; - - value = strchr(buf, '='); - if (!value) { - g_warning("invalid credential line: %s", key); - gnome_keyring_memory_free(buf); - return -1; - } - *value++ = '\0'; - - if (!strcmp(key, "protocol")) { - g_free(c->protocol); - c->protocol = g_strdup(value); - } else if (!strcmp(key, "host")) { - g_free(c->host); - c->host = g_strdup(value); - value = strrchr(c->host, ':'); - if (value) { - *value++ = '\0'; - c->port = atoi(value); - } - } else if (!strcmp(key, "path")) { - g_free(c->path); - c->path = g_strdup(value); - } else if (!strcmp(key, "username")) { - g_free(c->username); - c->username = g_strdup(value); - } else if (!strcmp(key, "password")) { - gnome_keyring_memory_free(c->password); - c->password = gnome_keyring_memory_strdup(value); - while (*value) - *value++ = '\0'; - } - /* - * Ignore other lines; we don't know what they mean, but - * this future-proofs us when later versions of git do - * learn new lines, and the helpers are updated to match. - */ - } - - gnome_keyring_memory_free(buf); - - return 0; -} - -static void credential_write_item(FILE *fp, const char *key, const char *value) -{ - if (!value) - return; - fprintf(fp, "%s=%s\n", key, value); -} - -static void credential_write(const struct credential *c) -{ - /* only write username/password, if set */ - credential_write_item(stdout, "username", c->username); - credential_write_item(stdout, "password", c->password); -} - -static void usage(const char *name) -{ - struct credential_operation const *try_op = credential_helper_ops; - const char *basename = strrchr(name, '/'); - - basename = (basename) ? basename + 1 : name; - fprintf(stderr, "usage: %s <", basename); - while (try_op->name) { - fprintf(stderr, "%s", (try_op++)->name); - if (try_op->name) - fprintf(stderr, "%s", "|"); - } - fprintf(stderr, "%s", ">\n"); -} - -int main(int argc, char *argv[]) -{ - int ret = EXIT_SUCCESS; - - struct credential_operation const *try_op = credential_helper_ops; - struct credential cred = CREDENTIAL_INIT; - - if (!argv[1]) { - usage(argv[0]); - exit(EXIT_FAILURE); - } - - g_set_application_name("Git Credential Helper"); - - /* lookup operation callback */ - while (try_op->name && strcmp(argv[1], try_op->name)) - try_op++; - - /* unsupported operation given -- ignore silently */ - if (!try_op->name || !try_op->op) - goto out; - - ret = credential_read(&cred); - if (ret) - goto out; - - /* perform credential operation */ - ret = (*try_op->op)(&cred); - - credential_write(&cred); - -out: - credential_clear(&cred); - return ret; -} diff --git a/build/gradle-idea-hack.sh b/build/gradle-idea-hack.sh deleted file mode 100755 index 2241f67bca..0000000000 --- a/build/gradle-idea-hack.sh +++ /dev/null @@ -1,7 +0,0 @@ -#!/bin/sh - -if [ "$1" = "-u" ]; then - find . -name build.gradle | xargs perl -pi -e "s,^//(.*[sS]cala['\.]),\$1," -else - find . -name build.gradle | xargs perl -pi -e "s,^(.*[sS]cala['\.]),//\$1," -fi diff --git a/build/mvn b/build/mvn deleted file mode 100755 index 4a9152d8a7..0000000000 --- a/build/mvn +++ /dev/null @@ -1,34 +0,0 @@ -#!/bin/sh - -# Get the parent base directory of this script -scriptdir="`dirname "$0"`" -basedir="`cd "${scriptdir}/.." && pwd`" - -# First create the snappy-spark directory -if ! "${scriptdir}/create-tree-structure.sh"; then - exit 1 -fi - -# Setup the default options -DEFAULT_OPTS="-Phadoop-2.4 -Phive -Phive-thriftserver -Pyarn" -SKIP_TESTS=1 -SKIP_SPARK= -PROCESS_OPTS=1 -while [ -n "$PROCESS_OPTS" ]; do - case "$1" in - -no-defaults) DEFAULT_OPTS="" && SKIP_TESTS="" && shift ;; - -tests) SKIP_TESTS="" && shift ;; - -skip-spark) SKIP_SPARK=1 && shift ;; - *) PROCESS_OPTS="" ;; - esac -done - -if [ -n "${SKIP_TESTS}" ]; then - DEFAULT_OPTS="${DEFAULT_OPTS} -DskipTests" -fi -if [ -n "${SKIP_SPARK}" ]; then - DEFAULT_OPTS="${DEFAULT_OPTS} -pl snappy-core,snappy-tools" -fi - -# Finally execute the mvn in the snappy-spark directory for consistent builds across spark and snappy projects -"${basedir}/snappy-spark/build/mvn" ${DEFAULT_OPTS} "$@" diff --git a/cluster/README-thrift.md b/cluster/README-thrift.md new file mode 100644 index 0000000000..84cce72e01 --- /dev/null +++ b/cluster/README-thrift.md @@ -0,0 +1,268 @@ +## Introduction + +SnappyData store now has support for Thrift protocol that provides functionality equivalent to +JDBC/ODBC protocols and can be used to access the store from other languages not yet supported +directly by SnappyData. Compared to the Spark [thrift server](http://spark.apache.org/docs/latest/sql-programming-guide.html#running-the-thrift-jdbcodbc-server) based on Hive, this has multiple advantages: + +* Spark Hive thrift server provides a single point of entry that will spawn its own executors. + In the SnappyData embedded mode, this means that only one "lead" node can act as a thrift-server + at any point of time. The thrift server implementation in SnappyData has no such limitation + that can be started on each of the executors. +* Enables writing a driver with implicit failover, high-availability characteristics. + The SnappyData JDBC driver is based on thrift API which uses these characteristics of the API. +* Hive thrift API is quite limited and lacks a lot of functionality required for full + JDBC/ODBC compliance. The thrift API supported by SnappyData store is rich enough to write + a fully compliant JDBC/ODBC drivers including mutability, cursors etc. +* There is no open source ODBC driver available for Hive thrift server so that limits + its usefulness in terms of overall tools connectivity (e.g. Tableau). + + +## Thrift server + +The _-client-bind-address_ and _client-port_ arguments start a thrift server +by default in SnappyData product and a DRDA server in the old "rowstore" mode. +The defaults for these are _localhost_ and _1527_ respectively. +The _run-netserver_ option controls whether to start a network server or not (default is true). + +The command-line SnappyData locators and servers accept _-thrift-server-address_ +and _-thrift-server-port_ arguments to start a Thrift server in addition +to above. The thrift servers use the _Thrift Compact Protocol_ by default +which is not SSL enabled. When using the _snappy-start-all.sh_ script, these +properties can be specified in the conf/locators and conf/servers file in the +product directory like any other locator/server properties. For example, copy the +conf/locators.template and conf/servers.template files to conf/locators and conf/servers +respectively and add these arguments as required. + +Add to conf/locators: +``` +host1 -client-bind-address=host1 -client-port=1530 +``` + +Provide appropriate values to _host1_ and the port 1530 above. If running SnappyData in the +_rowstore_ mode use thrift specific arguments like: + +``` +host1 -thrift-server-address=host1 -thrift-server-port=1530 -run-netserver=false +``` + +This also adds _run-netserver=false_ to inhibit starting the DRDA server or one can skip this +to also start the default DRDA server. If starting on localhost, then the +_client-bind-address_ and _thrift-server-address_ parameters can be skipped: + +``` +host1 -client-bind-address=host1 -client-port=1530 +``` + +``` +localhost -thrift-server-port=1530 -run-netserver=false +``` + +Similarly add the above parameters to conf/servers. + + +Other optional startup Thrift properties include: + +* _thrift-binary-protocol=(true|false)_: to use the thrift binary protocol instead of default compact protocol +* _thrift-framed-transport=(true|false)_: to use the thrift framed transport; this is not the + recommended mode since it provides no advantages over the default with SnappyData's server + implementation but has been provided for languages that only support framed transport +* _thrift-ssl=(true|false)_: enable SSL +* _thrift-ssl-properties_: comma-separated SSL properties including: + * _protocol_: default "TLS", see [JCA docs](https://docs.oracle.com/javase/7/docs/technotes/guides/security/StandardNames.html#SSLContext) + * _enabled-protocols_: enabled protocols separated by ":" + * _cipher-suites_: enabled cipher suites separated by ":", see [JCA docs](https://docs.oracle.com/javase/7/docs/technotes/guides/security/StandardNames.html#ciphersuites) + * _client-auth_=(true|false): if client also needs to be authenticated, see [J2SE docs](https://docs.oracle.com/javase/7/docs/api/javax/net/ssl/SSLServerSocket.html#setNeedClientAuth(boolean)) + * _keystore_: path to key store file + * _keystore-type_: the type of key-store (default "JKS"), see [JCA docs](https://docs.oracle.com/javase/7/docs/technotes/guides/security/StandardNames.html#KeyStore) + * _keystore-password_: password for the key store file + * _keymanager-type_: the type of key manager factory, see [JSSE docs](https://docs.oracle.com/javase/7/docs/technotes/guides/security/jsse/JSSERefGuide.html#KeyManagerFactory) + * _truststore_: path to trust store file + * _truststore-type_: the type of trust-store (default "JKS"), see [JCA docs](https://docs.oracle.com/javase/7/docs/technotes/guides/security/StandardNames.html#KeyStore) + * _truststore-password_: password for the trust store file + * _trustmanager-type_: the type of trust manager factory, see [JSSE docs](https://docs.oracle.com/javase/7/docs/technotes/guides/security/jsse/JSSERefGuide.html#TrustManagerFactory) + + +If using these properties, then the client has to setup the corresponding Thrift client socket +options as per the programming language being used. For instance if _thrift-binary-protocol_ +is set as true on server, then a java client will need to use _TBinaryProtocol_ from the +Thrift Java API for the protocol. + + +## Thrift client + +The current thrift IDL file can be found [here](https://github.com/SnappyDataInc/snappy-store/blob/snappy/master/gemfirexd/shared/src/main/java/io/snappydata/thrift/common/snappydata.thrift). +Client drivers can be generated from the IDL as described in its [documentation](https://thrift.apache.org/). +A description of the thrift types etc can also be found there and [elsewhere](https://diwakergupta.github.io/thrift-missing-guide/). + +The _SnappyDataService_ provides the full set of operations that can be performed on the servers. +Basic steps are given below. A complete Java Thrift client example can be found [here](https://github.com/SnappyDataInc/snappy-store/blob/snappy/master/gemfirexd/tools/src/test/java/io/snappydata/app/TestThrift.java). + + +### Open a connection + +This includes creating a Thrift socket with appropriate protocol, then invoking +the _openConnection_ API providing appropriate _OpenConnectionArgs_. +Three parameters that need to be provided are: +* _clientHostName_: the host name of the client used for mapping on the server +* _clientID_: a unique ID for the connection that can be used in logs, maps etc on the server; + this must be unique on that _clientHostName_ +* _security_: the security mechanism to use, currently only SecurityMechanism.PLAIN is supported + +No userName or password arguments have been provided in the example below assuming +no authentication has been configured by default on the locators/servers. +For the clientID, some symbolic name for the client followed by the threadId have been used. + +```java + import io.snappydata.thrift.*; + import org.apache.thrift.*; + import org.apache.thrift.protocol.*; + import org.apache.thrift.transport.*; + + + String myHostName = InetAddress.getLocalHost().getCanonicalHostName(); + TSocket socket = new TSocket("localhost", 1531); + TCompactProtocol inProtocol = new TCompactProtocol(socket); + TCompactProtocol outProtocol = new TCompactProtocol(socket); + socket.open(); + + Thread currentThread = Thread.currentThread(); + OpenConnectionArgs connArgs = new OpenConnectionArgs() + .setClientHostName(myHostName) + .setClientID("javaClient1|0x" + Long.toHexString(currentThread.getId())) + .setSecurity(SecurityMechanism.PLAIN); + SnappyDataService.Client conn = new SnappyDataService.Client( + inProtocol, outProtocol); + ConnectionProperties connProperties = conn.openConnection(connArgs); +``` + +The returned _ConnectionProperties_ contains a long connection ID and a unique token +to use for subsequent Thrift API calls. + + +### Execute a statement + +The _connectionId_ and _token_ fields of the _ConnectionProperties_ provide a connection oriented +feel to the rest of the API which requires one or both of these to be passed to nearly all other +API calls. The calls can be invoked using any underlying thrift connection passing them. +A simple (un-prepared) SQL statement execution can be done using APIs like execute/executeUpdate. + +```java + long connId = conn.getId(); + ByteBuffer token = conn.getToken(); + conn.execute(connId, "create table foo (bar int primary key)", + null, null, token); +``` + +Or a statement with update count: + +```java + StatementResult result = conn.execute(connId, "insert into foo values (1), (2)", + null, null, token); + if (result.updateCount != 2) { + throw new AssertionError("Expected update count to be 2 but was " + result.updateCount); + } +``` + +### Prepared statement + +Like the JDBC/ODBC, the Thrift API allows "preparing" a statement having placeholders +that can be re-used across multiple executions with different parameters. +The _prepareStatement_ and _prepareAndExecute_ APIs allow one to prepare a statement +with latter also executing it the first time. The result of prepare will provide the +meta-data of the parameters which can be used to also fill in the _Row_ having parameters. + +```java + PrepareResult pstmt = conn.prepareStatement(connId, + "insert into foo values (?)", null, null, token); + Row params = new Row(pstmt.getParameterMetaData()); + int count; + + for (int bar = 1; bar <= 10; bar++) { + params.setInt(0, bar); + count = conn.executePreparedUpdate(pstmt.statementId, + params, null, token).updateCount; + if (count != 1) { + throw new AssertionError("Unexpected count for single insert = " + count); + } + } +``` + +### Handling result sets + +Queries returning result sets provide a _RowSet_ object that will have the meta-data, data, +warnings etc. The result data itself is provided as a list of _Row_ objects that can be +iterated using the normal language-specific list iteration. An example in java can look like: + +```java + + pstmt = conn.prepareStatement(connId, "select * from foo where bar=?", + null, null, token); + params = new Row(pstmt.getParameterMetaData()); + + RowSet rs; + for (int bar = 1; bar <= 5; bar++) { + params.setInt(0, bar); + + rs = conn.executePreparedQuery(pstmt.statementId, params, null, token); + + int numResults = 0; + for (Row row : rs.getRows()) { + System.out.println("For bar=" + bar + " select result = " + row.getInt(1)); + numResults++; + } + } +``` + +### Close the connection + +Clients should invoke both the _closeConnection_ API (that will clear server-side + artificats for the connection) as well as close the client socket. + +```java + conn.closeConnection(connId, token); + conn.getInputProtocol().getTransport().close(); +``` + +### Handling failover + +The API defines a separate _LocatorService_ for locators (the methods in that service are + also available on _SnappyDataService_ on servers) that provides a _getPreferredServer_ +method to discover the _preferred_ server as per load. In case of a _TTransportException_ +on a client connection, clients can invoke the _getPreferredServer_ again providing the +failed server(s) as argument to get a new server to use. In addition when retrying a statement +execution after failover, set the _possibleDuplicate_ flag on the _StatementAttributes_. +The java example below only demonstrates how to use the locator connection (which can be + one connection per client) to discover the _preferred_ server to use instead of +hard-coding the server host/port. + +```java + // search only for servers using TCompactProtocol without SSL + Set serverType = Collections.singleton(ServerType.THRIFT_SNAPPY_CP); + TSocket socket = new TSocket("localhost", 1530); + TCompactProtocol inProtocol = new TCompactProtocol(socket); + TCompactProtocol outProtocol = new TCompactProtocol(socket); + socket.open(); + LocatorService.Client controlService = new LocatorService.Client( + inProtocol, outProtocol); + + HostAddress preferredServer = controlService.getPreferredServer( + serverType, null, null); + + System.out.println("Attempting connection to preferred server:port = " + + preferredServer.getHostName() + ':' + preferredServer.getPort()); + + TSocket socket = new TSocket(preferredServer.getHostName(), + preferredServer.getPort()); + TCompactProtocol inProtocol = new TCompactProtocol(socket); + TCompactProtocol outProtocol = new TCompactProtocol(socket); + socket.open(); + + Thread currentThread = Thread.currentThread(); + OpenConnectionArgs connArgs = new OpenConnectionArgs() + .setClientHostName(myHostName) + .setClientID("javaClient1|0x" + Long.toHexString(currentThread.getId())) + .setSecurity(SecurityMechanism.PLAIN); + SnappyDataService.Client conn = new SnappyDataService.Client( + inProtocol, outProtocol); + ConnectionProperties connProperties = conn.openConnection(connArgs); +``` diff --git a/cluster/bin/load-snappy-env.sh b/cluster/bin/load-snappy-env.sh new file mode 100644 index 0000000000..1b105c8e74 --- /dev/null +++ b/cluster/bin/load-snappy-env.sh @@ -0,0 +1,76 @@ +#!/usr/bin/env bash + +# +# Copyright (c) 2018 SnappyData, Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you +# may not use this file except in compliance with the License. You +# may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. See the License for the specific language governing +# permissions and limitations under the License. See accompanying +# LICENSE file. +# + +# This script loads spark-env.sh if it exists, and ensures it is only loaded once. +# spark-env.sh is loaded from SPARK_CONF_DIR if set, or within the current directory's +# conf/ subdirectory. + +function absPath() { + perl -MCwd -le 'print Cwd::abs_path(shift)' "$1" +} +FWDIR="$(dirname "$(absPath "$0")")" + +if [ -z "$SNAPPY_ENV_LOADED" ]; then + export SNAPPY_ENV_LOADED=1 + + # Returns the parent of the directory this script lives in. + parent_dir="`absPath "$FWDIR/.."`" + + if [ -z "$MALLOC_ARENA_MAX" ]; then + export MALLOC_ARENA_MAX=4 + fi + + if [ -z "$MALLOC_MMAP_THRESHOLD_" ]; then + export MALLOC_MMAP_THRESHOLD_=131072 + fi + + if [ -z "$MALLOC_MMAP_MAX_" ]; then + export MALLOC_MMAP_MAX_=2147483647 + fi + + user_conf_dir="${SPARK_CONF_DIR:-"$parent_dir"/conf}" + + if [ -f "${user_conf_dir}/snappy-env.sh" ]; then + # Promote all variable declarations to environment (exported) variables + set -a + . "${user_conf_dir}/snappy-env.sh" + set +a + fi + +fi + +# Setting SPARK_SCALA_VERSION if not already set. + +if [ -z "$SPARK_SCALA_VERSION" ]; then + + ASSEMBLY_DIR2="$FWDIR/assembly/target/scala-2.11" + ASSEMBLY_DIR1="$FWDIR/assembly/target/scala-2.10" + + if [[ -d "$ASSEMBLY_DIR2" && -d "$ASSEMBLY_DIR1" ]]; then + echo -e "Presence of build for both scala versions(SCALA 2.10 and SCALA 2.11) detected." 1>&2 + echo -e 'Either clean one of them or, export SPARK_SCALA_VERSION=2.11 in spark-env.sh.' 1>&2 + exit 1 + fi + + if [ -d "$ASSEMBLY_DIR2" ]; then + export SPARK_SCALA_VERSION="2.11" + else + export SPARK_SCALA_VERSION="2.10" + fi +fi diff --git a/cluster/bin/snappy b/cluster/bin/snappy new file mode 100755 index 0000000000..e330103020 --- /dev/null +++ b/cluster/bin/snappy @@ -0,0 +1,112 @@ +#!/usr/bin/env bash + +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +function absPath() { + perl -MCwd -le 'print Cwd::abs_path(shift)' "$1" +} +if [ -z "${SPARK_HOME}" ]; then + export SPARK_HOME="$(absPath "$(dirname "$(absPath "$0")")/..")" +fi +export SNAPPY_HOME="${SPARK_HOME}" +# only scala 2.11 supported in SnappyData builds +export SPARK_SCALA_VERSION="2.11" +# disable randomized hash for string in Python 3.3+ +export PYTHONHASHSEED=0 + +if [ -n "`echo "$JAVA_ARGS" | grep -q verbose`" ]; then + echo "Using JAVA_ARGS=$JAVA_ARGS" +fi + + +function setnewargs() { newargs="$@"; } + +if [ -z "${SNAPPY_SCRIPT_NAME}" ]; then + export SNAPPY_SCRIPT_NAME=snappy +fi + +if echo $@ | grep -qw "rowstore"; then + #using rowstore launcher + newargs= + for arg in "$@"; do + if [[ "$arg" != "rowstore" ]] ; then + setnewargs "$newargs" "$arg" + fi + done + exec "$SPARK_HOME"/bin/spark-class $JAVA_ARGS com.pivotal.gemfirexd.tools.GfxdUtilLauncher $newargs +elif [ "$1" = "dataextractor" ]; then + shift + exec "$SPARK_HOME"/bin/spark-class $JAVA_ARGS com.pivotal.gemfirexd.tools.dataextractor.GemFireXDDataExtractor "$@" +elif [ "$1" = "dataextractloader" ]; then + shift + exec "$SPARK_HOME"/bin/spark-class $JAVA_ARGS com.pivotal.gemfirexd.tools.dataextractor.GemFireXDDataExtractorLoader "$@" +elif [ -z "$SNAPPY_NO_QUICK_LAUNCH" -a $# -ge 2 \ + -a '(' "$2" = "start" -o "$2" = "stop" -o "$2" = "status" ')' \ + -a '(' "$1" = "server" -o "$1" = "leader" -o "$1" = "locator" ')' ]; then + # faster route for start/stop/status + + # Find the java binary + if [ -n "${JAVA_HOME}" ]; then + RUNNER="${JAVA_HOME}/bin/java" + else + if [ "$(command -v java)" ]; then + RUNNER="java" + else + echo "JAVA_HOME is not set" >&2 + exit 1 + fi + fi + + . "${SPARK_HOME}"/bin/load-spark-env.sh + . "${SPARK_HOME}"/bin/load-snappy-env.sh + + HOSTNAME_FOR_CLIENTS= + if [ "$2" = "start" ]; then + # set hostname-for-clients and SPARK_PUBLIC_DNS in AWS (only supported for Linux) + if [ -z "$SPARK_PUBLIC_DNS" ]; then + CHECK_AWS=1 + if [ -r /sys/hypervisor/uuid ]; then + if ! grep -q '^ec2' /sys/hypervisor/uuid; then + CHECK_AWS= + fi + elif [ -r /sys/devices/virtual/dmi/id/product_name ]; then + if ! grep -iq 'hvm' /sys/devices/virtual/dmi/id/product_name; then + CHECK_AWS= + fi + else + # not running on AWS if neither of those two files are present + CHECK_AWS= + fi + if [ -n "$CHECK_AWS" ]; then + SPARK_PUBLIC_DNS="$(curl -s --connect-timeout 3 http://169.254.169.254/latest/meta-data/public-ipv4 | head -1)" + if [ -n "$SPARK_PUBLIC_DNS" ]; then + if ! echo $"${@// /\\ }" | grep -q 'hostname-for-clients='; then + HOSTNAME_FOR_CLIENTS="-hostname-for-clients=$SPARK_PUBLIC_DNS" + fi + export SPARK_PUBLIC_DNS + fi + fi + fi + fi + + JARS="`echo "${SPARK_HOME}"/jars/snappydata-launcher* "${SPARK_HOME}"/jars/gemfire-shared* "${SPARK_HOME}"/jars/jna-4.* | sed 's/ /:/g'`" + exec $RUNNER $JAVA_ARGS -Xverify:none -cp "$JARS" io.snappydata.tools.QuickLauncher "$@" $HOSTNAME_FOR_CLIENTS +else + # use full snappy launcher + exec "$SPARK_HOME"/bin/spark-class $JAVA_ARGS io.snappydata.tools.SnappyUtilLauncher "$@" +fi diff --git a/cluster/bin/snappy-job.sh b/cluster/bin/snappy-job.sh new file mode 100755 index 0000000000..9697717c45 --- /dev/null +++ b/cluster/bin/snappy-job.sh @@ -0,0 +1,353 @@ +#!/usr/bin/env bash + +# +# Copyright (c) 2018 SnappyData, Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you +# may not use this file except in compliance with the License. You +# may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. See the License for the specific language governing +# permissions and limitations under the License. See accompanying +# LICENSE file. +# + +#set -vx + +function absPath() { + perl -MCwd -le 'print Cwd::abs_path(shift)' "$1" +} + +if [ -z "${SNAPPY_HOME}" ]; then + export SNAPPY_HOME="$(absPath "$(dirname "$(absPath "$0")")/..")" +fi +if [ -z "${SPARK_HOME}" ]; then + export SPARK_HOME="$SNAPPY_HOME" +fi + +usage=$'Usage: + # Create a new context using the provided context factory + snappy-job.sh newcontext --factory [--lead ] + [--app-jar --app-name ] [--conf ] + [--passfile ] + # Submit a job, optionally with a provided context or create a streaming-context and use it with the job + snappy-job.sh submit --app-name --class [--lead ] + [--app-jar ] [--context | --stream] [--conf ] + [--passfile ] [--batch-interval ] + [--packages ] + [--repos [--lead ] [--passfile ] + # Stop a job with the given job-id + snappy-job.sh stop --job-id [--lead ] [--passfile ] + # List all the current contexts + snappy-job.sh listcontexts [--lead ] [--passfile ] + # Stop a context with the given name + snappy-job.sh stopcontext [--lead ] [--passfile ]' + +function showUsage { + echo "ERROR: incorrect argument specified: " "$@" + echo "$usage" + exit 1 +} + +hostnamePort= +appName= +jobClass= +appjar= +jobID= +contextName= +contextFactory= +newContext= +TOK_EMPTY="EMPTY" +APP_PROPS=$APP_PROPS +securePart="" +batchInterval= +packages= +repos= +jarcache= +alljars= + +while (( "$#" )); do + param="$1" + case $param in + submit) + cmd="jobs" + ;; + status) + cmd="status" + ;; + stop) + cmd="stop" + ;; + newcontext) + cmd="newcontext" + shift + contextName="${1:-$TOK_EMPTY}" + ;; + --lead) + shift + hostnamePort="${1:-$TOK_EMPTY}" + ;; + --app-name) + shift + appName="${1:-$TOK_EMPTY}" + ;; + --class) + shift + jobClass="${1:-$TOK_EMPTY}" + ;; + --app-jar) + shift + appjar="${1:-$TOK_EMPTY}" + ;; + --packages) + shift + packages="${1:-$TOK_EMPTY}" + ;; + --repos) + shift + repos="${1:-$TOK_EMPTY}" + ;; + --jarcache) + shift + jarcache="${1:-$TOK_EMPTY}" + ;; + --job-id) + shift + jobID="${1:-$TOK_EMPTY}" + ;; + --factory) + shift + contextFactory="${1:-$TOK_EMPTY}" + ;; + --context) + shift + contextName="${1:-$TOK_EMPTY}" + ;; + --conf) + shift + if [[ -z "$APP_PROPS" ]]; then + APP_PROPS="${1:-$TOK_EMPTY}" + else + APP_PROPS=$APP_PROPS",""${1:-$TOK_EMPTY}" + fi + ;; + --stream) + if [[ $contextName != "" || $cmd != "jobs" ]]; then + showUsage "--context ${contextName} AND --stream" + fi + newContext="yes" + contextName="snappyStreamingContext"$(date +%s%N) + contextFactory="org.apache.spark.sql.streaming.SnappyStreamingContextFactory" + ;; + listcontexts) + cmd="listcontexts" + ;; + stopcontext) + cmd="stopcontext" + shift + contextName="${1:-$TOK_EMPTY}" + ;; + --passfile) + shift + passwordfile="${1:-$TOK_EMPTY}" + if [[ ! -e $passwordfile ]]; then + echo "The config file $passwordfile not found." + exit 1 + fi + securePart=" --config ${passwordfile}" + ;; + --batch-interval) + shift + batchInterval="${1:-$TOK_EMPTY}" + if [[ $contextFactory != "org.apache.spark.sql.streaming.SnappyStreamingContextFactory" ]]; then + echo "Non Streaming job. Batch interval config parameter will not be used." + fi + ;; + *) + showUsage $1 + ;; + esac + shift +done + + +validateOptionalArg() { + arg=$1 + if [[ -z $arg ]]; then + return 1 # false + fi + + validateArg $arg + return $? +} + +validateArg() { + arg=$1 + if [[ $arg == "" || $arg == $TOK_EMPTY || + ${arg:0:2} == "--" ]]; then + return 0 # true + fi + + return 1 +} + +# command builder +cmdLine= + +function buildCommand () { +case $cmd in + status) + if validateArg $jobID ; then + showUsage "--job-id" + fi + cmdLine="jobs/${jobID}" + ;; + + jobs) + if validateArg $appName ; then + showUsage "--app-name" + elif validateArg $jobClass ; then + showUsage "--class" + elif validateOptionalArg $appjar ; then + showUsage "--app-jar" + elif validateOptionalArg $packages ; then + showUsage "--packages" + elif validateOptionalArg $repos ; then + showUsage "--repos" + elif validateOptionalArg $jarcache ; then + showUsage "--jarcache" + elif validateOptionalArg $contextName ; then + showUsage "--context" + fi + cmdLine="jobs?appName=${appName}&classPath=${jobClass}" + + if [[ -n $contextName ]]; then + cmdLine="${cmdLine}&context=${contextName}" + fi + ;; + + stop) + if validateArg $jobID ; then + showUsage "--job-id" + fi + cmdLine="jobs/${jobID}" + ;; + + newcontext) + if validateArg $contextName ; then + showUsage "newcontext " + elif validateArg $contextFactory ; then + showUsage "--factory" + elif validateOptionalArg $appjar ; then + showUsage "--app-jar" + elif [[ $appjar != "" ]] && validateArg $appName ; then + showUsage "--app-name" + fi + cmdLine="contexts/${contextName}?context-factory=${contextFactory}" + + if [[ -n $batchInterval ]]; then + cmdLine="${cmdLine}&streaming.batch_interval=${batchInterval}" + fi + ;; + + listcontexts) + cmdLine="contexts" + ;; + + stopcontext) + if validateArg $contextName ; then + showUsage "stopcontext " + fi + cmdLine="contexts/${contextName}" + ;; + + *) + showUsage +esac +} + +if [[ $cmd == "jobs" && -z $newContext && -z $contextName ]]; then + contextName="snappyContext"$(date +%s%N) + contextFactory="org.apache.spark.sql.SnappySessionFactory" + newContext="yes" +fi + +function addDependentJarsToProp () { + if [[ $packages != "" ]]; then + JAR_FOLDER=$SNAPPY_HOME/jars + jarclasspath=`echo $JAR_FOLDER/*.jar | tr -s ' ' ':'` + depargs= + if [ ! -z $repos ]; then + depargs="-- repos $repos" + fi + if [ ! -z $jarcache ]; then + depargs="$depargs --jarcache $jarcache" + fi + depargs="$depargs $packages" + depjars=`${SPARK_HOME}/bin/spark-class -cp $jarclasspath org.apache.spark.deploy.GetJarsAndDependencies $depargs` + depjars=`echo $depjars | sed -e "s/,/|/g"` + if [[ -z "$APP_PROPS" ]]; then + APP_PROPS="dependent-jar-uris=$depjars" + else + APP_PROPS=$APP_PROPS",dependent-jar-uris=$depjars" + fi + fi +} + +buildCommand + +# build command for new context, if needed. +if [[ -n $newContext ]]; then + cmd="newcontext" + jobsCommand=$cmdLine + buildCommand + newContext=$cmdLine + cmdLine=$jobsCommand +fi + + +if [[ -z $hostnamePort ]]; then + hostnamePort=localhost:8090 +fi + + +# invoke command + +jobServerURL="$hostnamePort/${cmdLine}" + +addDependentJarsToProp + +case $cmd in + jobs | newcontext) + if [[ $appjar != "" ]]; then + curl --data-binary @$appjar $hostnamePort\/jars\/$appName $CURL_OPTS ${securePart} + fi + + if [[ $newContext != "" ]]; then + curl -d "${APP_PROPS}" ${hostnamePort}/${newContext} $CURL_OPTS ${securePart} + fi + + curl -d "${APP_PROPS}" ${jobServerURL} $CURL_OPTS ${securePart} + ;; + + status) + curl ${jobServerURL} $CURL_OPTS ${securePart} + ;; + + listcontexts) + curl -X GET ${jobServerURL} $CURL_OPTS ${securePart} + ;; + + stop | stopcontext) + curl -X DELETE ${jobServerURL} $CURL_OPTS ${securePart} + ;; +esac + +echo diff --git a/cluster/bin/snappy-sql b/cluster/bin/snappy-sql new file mode 100755 index 0000000000..8571d47b3a --- /dev/null +++ b/cluster/bin/snappy-sql @@ -0,0 +1,26 @@ +#!/usr/bin/env bash + +# +# Copyright (c) 2018 SnappyData, Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you +# may not use this file except in compliance with the License. You +# may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. See the License for the specific language governing +# permissions and limitations under the License. See accompanying +# LICENSE file. +# + +function absPath() { + perl -MCwd -le 'print Cwd::abs_path(shift)' "$1" +} +bin="$(dirname "$(absPath "$0")")" + +export SNAPPY_SCRIPT_NAME=snappy-sql +exec "$bin/snappy" "$@" diff --git a/cluster/build.gradle b/cluster/build.gradle new file mode 100644 index 0000000000..5fd69f11df --- /dev/null +++ b/cluster/build.gradle @@ -0,0 +1,210 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ + +apply plugin: 'scala' + +compileScala.options.encoding = 'UTF-8' + +// fix scala+java mix to all use compileScala which uses correct dependency order +sourceSets.main.scala.srcDir 'src/main/java' +sourceSets.test.scala.srcDirs = [ 'src/test/java', 'src/test/scala', 'src/dunit/scala' ] +sourceSets.main.java.srcDirs = [] +sourceSets.test.java.srcDirs = [ ] + +dependencies { + compile 'org.scala-lang:scala-library:' + scalaVersion + compile 'org.scala-lang:scala-reflect:' + scalaVersion + + compile 'org.slf4j:slf4j-api:' + slf4jVersion + compile 'org.slf4j:slf4j-log4j12:' + slf4jVersion + compile 'org.slf4j:jcl-over-slf4j:' + slf4jVersion + compile 'org.slf4j:jul-to-slf4j:' + slf4jVersion + + if (new File(rootDir, 'spark/build.gradle').exists()) { + compile project(':snappy-spark:snappy-spark-unsafe_' + scalaBinaryVersion) + compile project(':snappy-spark:snappy-spark-core_' + scalaBinaryVersion) + compile project(':snappy-spark:snappy-spark-catalyst_' + scalaBinaryVersion) + compile project(':snappy-spark:snappy-spark-sql_' + scalaBinaryVersion) + compile project(':snappy-spark:snappy-spark-hive_' + scalaBinaryVersion) + compile project(':snappy-spark:snappy-spark-repl_' + scalaBinaryVersion) + compile project(':snappy-spark:snappy-spark-streaming_' + scalaBinaryVersion) + compile project(':snappy-spark:snappy-spark-streaming-kafka-0.10_' + scalaBinaryVersion) + compile project(':snappy-spark:snappy-spark-sql-kafka-0.10_' + scalaBinaryVersion) + compile project(':snappy-spark:snappy-spark-mllib_' + scalaBinaryVersion) + compile project(':snappy-spark:snappy-spark-yarn_' + scalaBinaryVersion) + compile project(':snappy-spark:snappy-spark-graphx_' + scalaBinaryVersion) + compile project(':snappy-spark:snappy-spark-hive-thriftserver_' + scalaBinaryVersion) + if (rootProject.hasProperty('mesos')) { + compile project(':snappy-spark:snappy-spark-mesos_' + scalaBinaryVersion) + } + + testCompile project(path: ':snappy-spark:snappy-spark-sql_' + scalaBinaryVersion, + configuration: 'testOutput') + } else { + compile 'io.snappydata:snappy-spark-unsafe_' + scalaBinaryVersion + ':' + snappySparkVersion + compile 'io.snappydata:snappy-spark-core_' + scalaBinaryVersion + ':' + snappySparkVersion + compile 'io.snappydata:snappy-spark-catalyst_' + scalaBinaryVersion + ':' + snappySparkVersion + compile 'io.snappydata:snappy-spark-sql_' + scalaBinaryVersion + ':' + snappySparkVersion + compile 'io.snappydata:snappy-spark-hive_' + scalaBinaryVersion + ':' + snappySparkVersion + compile 'io.snappydata:snappy-spark-repl_' + scalaBinaryVersion + ':' + snappySparkVersion + compile 'io.snappydata:snappy-spark-streaming_' + scalaBinaryVersion + ':' + snappySparkVersion + compile 'io.snappydata:snappy-spark-streaming-kafka-0.10_' + scalaBinaryVersion + ':' + snappySparkVersion + compile 'io.snappydata:snappy-spark-sql-kafka-0.10_' + scalaBinaryVersion + ':' + snappySparkVersion + compile 'io.snappydata:snappy-spark-mllib_' + scalaBinaryVersion + ':' + snappySparkVersion + compile 'io.snappydata:snappy-spark-yarn_' + scalaBinaryVersion + ':' + snappySparkVersion + compile 'io.snappydata:snappy-spark-graphx_' + scalaBinaryVersion + ':' + snappySparkVersion + compile 'io.snappydata:snappy-spark-hive-thriftserver_' + scalaBinaryVersion + ':' + snappySparkVersion + if (rootProject.hasProperty('mesos')) { + compile 'io.snappydata:snappy-spark-mesos_' + scalaBinaryVersion + ':' + snappySparkVersion + } + + testCompile group: 'io.snappydata', name: 'snappy-spark-sql_' + scalaBinaryVersion, + version: snappySparkVersion, classifier: 'tests' + } + + compile (project(':snappy-core_' + scalaBinaryVersion)) { + exclude(group: 'org.apache.spark', module: 'spark-unsafe_' + scalaBinaryVersion) + exclude(group: 'org.apache.spark', module: 'spark-core_' + scalaBinaryVersion) + exclude(group: 'org.apache.spark', module: 'spark-catalyst_' + scalaBinaryVersion) + exclude(group: 'org.apache.spark', module: 'spark-sql_' + scalaBinaryVersion) + exclude(group: 'org.apache.spark', module: 'spark-hive_' + scalaBinaryVersion) + exclude(group: 'org.apache.spark', module: 'spark-streaming_' + scalaBinaryVersion) + exclude(group: 'org.apache.spark', module: 'spark-streaming-kafka-0-10_' + scalaBinaryVersion) + exclude(group: 'org.apache.spark', module: 'spark-sql-kafka-0-10_' + scalaBinaryVersion) + exclude(group: 'org.apache.spark', module: 'spark-mllib_' + scalaBinaryVersion) + exclude(group: 'org.eclipse.jetty', module: 'jetty-servlet') + } + testCompile (project(path: ':snappy-core_' + scalaBinaryVersion, configuration: 'testOutput')) { + exclude(group: 'org.apache.spark', module: 'spark-unsafe_' + scalaBinaryVersion) + exclude(group: 'org.apache.spark', module: 'spark-core_' + scalaBinaryVersion) + exclude(group: 'org.apache.spark', module: 'spark-catalyst_' + scalaBinaryVersion) + exclude(group: 'org.apache.spark', module: 'spark-sql_' + scalaBinaryVersion) + exclude(group: 'org.apache.spark', module: 'spark-hive_' + scalaBinaryVersion) + exclude(group: 'org.apache.spark', module: 'spark-streaming_' + scalaBinaryVersion) + exclude(group: 'org.apache.spark', module: 'spark-streaming-kafka-0-10_' + scalaBinaryVersion) + exclude(group: 'org.apache.spark', module: 'spark-sql-kafka-0-10_' + scalaBinaryVersion) + exclude(group: 'org.apache.spark', module: 'spark-mllib_' + scalaBinaryVersion) + exclude(group: 'org.eclipse.jetty', module: 'jetty-servlet') + } + + if (new File(rootDir, 'store/build.gradle').exists()) { + testCompile project(path: ':snappy-store:snappydata-store-tools', configuration: 'testOutput') + } else { + testCompile group: 'io.snappydata', name: 'snappydata-store-tools', version: snappyStoreVersion, classifier: 'tests' + } + if (new File(rootDir, 'spark-jobserver/build.gradle').exists()) { + compile project(':spark-jobserver_' + scalaBinaryVersion) + } else { + compile group: 'io.snappydata', name: 'spark-jobserver_' + scalaBinaryVersion, version: sparkJobServerVersion + } + // support AWS URLs + compile(group: 'org.apache.hadoop', name: 'hadoop-aws', version: hadoopVersion) { + exclude(group: 'asm', module: 'asm') + exclude(group: 'org.codehaus.jackson', module: 'jackson-mapper-asl') + exclude(group: 'org.ow2.asm', module: 'asm') + exclude(group: 'org.apache.zookeeper', module: 'zookeeper') + exclude(group: 'org.jboss.netty', module: 'netty') + exclude(group: 'jline', module: 'jline') + exclude(group: 'commons-logging', module: 'commons-logging') + exclude(group: 'org.mockito', module: 'mockito-all') + exclude(group: 'org.mortbay.jetty', module: 'servlet-api-2.5') + exclude(group: 'javax.servlet', module: 'servlet-api') + exclude(group: 'junit', module: 'junit') + exclude(group: 'com.google.guava', module: 'guava') + exclude(group: 'com.sun.jersey') + exclude(group: 'com.sun.jersey.jersey-test-framework') + exclude(group: 'com.sun.jersey.contribs') + exclude(group: 'com.google.protobuf', module: 'protobuf-java') + exclude(group: 'com.jcraft', module: 'jsch') + exclude(group: 'org.apache.directory.server', module: 'apacheds-kerberos-codec') + } + + testCompile project(':dunit') + testCompile "it.unimi.dsi:fastutil:${fastutilVersion}" + testCompile "org.scalatest:scalatest_${scalaBinaryVersion}:${scalatestVersion}" + + if (new File(rootDir, 'aqp/build.gradle').exists() && rootProject.hasProperty('snappydata.enterprise')) { + testRuntime project(':snappy-aqp_' + scalaBinaryVersion) + } + testRuntime files("${projectDir}/../tests/common/src/main/resources") + testRuntime "org.pegdown:pegdown:${pegdownVersion}" +} + +// Creates the version properties file and writes it to the resources dir +task createVersionPropertiesFile(dependsOn: 'processResources') { + def propertiesDir = file("${sourceSets.main.scala.outputDir}/io/snappydata") + outputs.file "${propertiesDir}/SnappyDataVersion.properties" + inputs.file "${rootProject.projectDir}/build.gradle" + + doLast { + + def props = [ + 'Product-Name' : productName, + 'Product-Version' : version, + 'Build-Id' : buildIdPrefix + buildNumber, + 'Build-Date' : buildDate, + 'Build-Platform' : osName.getName() + osVersion + osArch, + 'Build-Java-Version': jdkVersion, + 'Source-Date' : sourceDate, + 'Source-Revision' : commitId, + 'Source-Repository' : gitBranch, + ] + + writeProperties(propertiesDir, 'SnappyDataVersion.properties', + "Properties that control what version ${productName} will think it is. Changing these values may cause ${productName} to no longer function.", props) + } +} + +compileJava.dependsOn createVersionPropertiesFile + +task packageScalaDocs(type: Jar, dependsOn: scaladoc) { + classifier = 'javadoc' + from scaladoc +} +if (rootProject.hasProperty('enablePublish')) { + artifacts { + archives packageScalaDocs, packageSources + } +} + +def copyDirs(def srcDir, def destDir) { + mkdir(destDir) + copy { + from srcDir + into destDir + } +} + +test.dependsOn ':cleanJUnit' +scalaTest { + dependsOn ':cleanScalaTest' + doFirst { + // cleanup files since scalatest plugin does not honour workingDir yet + cleanIntermediateFiles(project.path) + environment 'TPCDS_SUITE': rootProject.hasProperty("tpcds") + } + doLast { + // cleanup files since scalatest plugin does not honour workingDir yet + cleanIntermediateFiles(project.path) + } +} +check.dependsOn test, scalaTest, dunitTest +if (rootProject.hasProperty('snappydata.enterprise')) { + check.dependsOn dunitSecurityTest +} + +archivesBaseName = 'snappydata-cluster_' + scalaBinaryVersion diff --git a/cluster/conf/debug.conf.template b/cluster/conf/debug.conf.template new file mode 100644 index 0000000000..63945982f1 --- /dev/null +++ b/cluster/conf/debug.conf.template @@ -0,0 +1,3 @@ +MEMBERS_FILE=$SNAPPY_HOME/work/members.txt +NO_OF_STACK_DUMPS=2 +INTERVAL_BETWEEN_DUMPS=10 diff --git a/cluster/conf/fairscheduler.xml b/cluster/conf/fairscheduler.xml new file mode 100644 index 0000000000..04a6381024 --- /dev/null +++ b/cluster/conf/fairscheduler.xml @@ -0,0 +1,30 @@ + + + + + + + FAIR + 1 + + + FAIR + 2 + 2 + + diff --git a/cluster/conf/leads.template b/cluster/conf/leads.template new file mode 100644 index 0000000000..6ad62e6d21 --- /dev/null +++ b/cluster/conf/leads.template @@ -0,0 +1,43 @@ +# +# Copyright (c) 2018 SnappyData, Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you +# may not use this file except in compliance with the License. You +# may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. See the License for the specific language governing +# permissions and limitations under the License. See accompanying +# LICENSE file. +# +# +# Here are examples using common configuration properties +# I) +# Specify the host name on which a Snappy lead will be started. Also +# specify the directory where the logs and metadata files +# for that lead instance will be created. If the directory and properties +# are not specified a default directory is created inside the SNAPPY_HOME directory. +# +# lead1 -dir=/tmp/data/lead (config args) +# +# II) +# Below is an example of how you can specify multiple locators for a lead and also +# set its heap size to 8 GB. +# +# lead1 -dir=/tmp/data/server -locators=locator1:9988,locator2:8899 -heap-size=8g +# +# III) +# Another example which shows how to specify Spark properties. +# +# lead1 -dir=/tmp/data/server -spark.ui.port=3333 -spark.executor.cores=16 +# +# IV) Start the SnappyData Zeppelin interpreter on the Lead node +# +# lead1 -dir=/tmp/data/server -spark.ui.port=3333 -spark.executor.cores=16 -zeppelin.interpreter.enable=true -classpath= +# +# For more options, see http://snappydatainc.github.io/snappydata/configuration/#configuration +localhost diff --git a/cluster/conf/locators.template b/cluster/conf/locators.template new file mode 100644 index 0000000000..9a9bd3c5b1 --- /dev/null +++ b/cluster/conf/locators.template @@ -0,0 +1,63 @@ +# +# Copyright (c) 2018 SnappyData, Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you +# may not use this file except in compliance with the License. You +# may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. See the License for the specific language governing +# permissions and limitations under the License. See accompanying +# LICENSE file. +# +# +# By default, SnappyData launch scripts will start a single locator on localhost +# and uses /work/localhost-locator-1/ as the directory for logs and +# statistics. +# Assuming your network is ssh enabled you can add hostnames (one line per host) to start +# locator on multiple hosts. +# +# Example configurations: +# I) Configuring the hostname/IP address for cluster members to find the locator: +# +# By default, locator binds to provided hostname on port 10334 for discovering other members of the cluster. +# Optionally set peer-discovery-address to a hostname/IP (usually the internal LAN IP) where other members of +# cluster can talk to locator (configured as their -locators option) which is the provided hostname by default, +# and peer-discover-port if you want to change port from the default 10334. +# The peer-discovery-address can be a wildcard like 0.0.0.0 to listen on all interfaces. +# +# locator1 -peer-discovery-port=9988 -locators=locator2:8899 +# +# If there are multiple locators in the cluster, then specify hostname:port of other locators in the +# -locators option. +# +# locator1 -peer-discovery-port=9988 -locators=locator2:8899 +# locator2 -peer-discovery-port=9988 -locators=locator1:8899 +# +# II) Using client bind address: +# +# One can specify bind address for clients to allow clients from outside this machine to connect +# using JDBC/ODBC/Thrift protocols (default for `client-bind-address` is localhost). +# +# In environments with an internal hostname/IP and a different public hostname (e.g. cloud deployments), +# you should also configure the -hostname-for-clients else clients from outside the network +# will not be able to connect to the locators/servers. It should be set to the public hostname +# or public IP address that will be sent to clients to connect to. It can be skipped for cases +# where private hostname is the same as public hostname (e.g. DNS translates appropriately). +# Default is the `client-bind-address` of the locator. +# +# -client-bind-address= -hostname-for-clients= +# +# III) Logging to different directory +# Specify the startup directory where the logs and configuration files for that locator instance +# are managed. +# +# locator1 -dir=/tmp/data/locator -client-bind-address=locator1 +# +# For more configuration options, see +# http://snappydatainc.github.io/snappydata/configuration/#configuration +localhost diff --git a/cluster/conf/log4j.properties.template b/cluster/conf/log4j.properties.template new file mode 100644 index 0000000000..0df4d6bfa0 --- /dev/null +++ b/cluster/conf/log4j.properties.template @@ -0,0 +1,136 @@ +# +# Copyright (c) 2018 SnappyData, Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you +# may not use this file except in compliance with the License. You +# may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. See the License for the specific language governing +# permissions and limitations under the License. See accompanying +# LICENSE file. +# +# Some parts taken from Spark's log4j.properties having license below. +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +log4j.rootCategory=INFO, file + +# RollingFile appender +log4j.appender.file=org.apache.log4j.RollingFileAppender +log4j.appender.file.append=true +log4j.appender.file.file=snappydata.log +log4j.appender.file.MaxFileSize=1GB +log4j.appender.file.MaxBackupIndex=10000 +log4j.appender.file.layout=io.snappydata.log4j.PatternLayout +log4j.appender.file.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS zzz} %t %p %c{1}: %m%n + +# Appender for code dumps of WholeStageCodegenExec, CodeGenerator etc +log4j.appender.code=org.apache.log4j.RollingFileAppender +log4j.appender.code.append=true +log4j.appender.code.file=generatedcode.log +log4j.appender.code.MaxFileSize=1GB +log4j.appender.code.MaxBackupIndex=10000 +log4j.appender.code.layout=io.snappydata.log4j.PatternLayout +log4j.appender.code.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS zzz} %t %p %c{1}: %m%n + +# Console appender +log4j.appender.console=org.apache.log4j.ConsoleAppender +log4j.appender.console.target=System.out +log4j.appender.console.layout=io.snappydata.log4j.PatternLayout +log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS zzz} %t %p %c{1}: %m%n + +# Ignore messages below warning level from Jetty, because it's a bit verbose +log4j.logger.org.spark-project.jetty=WARN +org.spark-project.jetty.LEVEL=WARN +log4j.logger.org.mortbay.jetty=WARN +log4j.logger.org.eclipse.jetty=WARN + +# Some packages are noisy for no good reason. +log4j.additivity.org.apache.hadoop.hive.serde2.lazy.LazyStruct=false +log4j.logger.org.apache.hadoop.hive.serde2.lazy.LazyStruct=OFF + +log4j.additivity.org.apache.hadoop.hive.metastore.RetryingHMSHandler=false +log4j.logger.org.apache.hadoop.hive.metastore.RetryingHMSHandler=OFF + +log4j.additivity.hive.log=false +log4j.logger.hive.log=OFF + +log4j.additivity.parquet.hadoop.ParquetRecordReader=false +log4j.logger.parquet.hadoop.ParquetRecordReader=OFF + +log4j.additivity.org.apache.parquet.hadoop.ParquetRecordReader=false +log4j.logger.org.apache.parquet.hadoop.ParquetRecordReader=OFF + +log4j.additivity.org.apache.parquet.hadoop.ParquetOutputCommitter=false +log4j.logger.org.apache.parquet.hadoop.ParquetOutputCommitter=OFF + +log4j.additivity.hive.ql.metadata.Hive=false +log4j.logger.hive.ql.metadata.Hive=OFF + +log4j.additivity.org.apache.hadoop.hive.ql.io.RCFile=false +log4j.logger.org.apache.hadoop.hive.ql.io.RCFile=ERROR + +# Other Spark classes that generate unnecessary logs at INFO level +log4j.logger.org.apache.spark.broadcast.TorrentBroadcast=WARN +log4j.logger.org.apache.spark.ContextCleaner=WARN +log4j.logger.org.apache.spark.MapOutputTracker=WARN +log4j.logger.org.apache.spark.scheduler.TaskSchedulerImpl=WARN +log4j.logger.org.apache.spark.storage.ShuffleBlockFetcherIterator=WARN +log4j.logger.org.apache.spark.scheduler.DAGScheduler=WARN +log4j.logger.org.apache.spark.scheduler.TaskSetManager=WARN +log4j.logger.org.apache.spark.scheduler.FairSchedulableBuilder=WARN +log4j.logger.org.apache.spark.scheduler.cluster.CoarseGrainedSchedulerBackend$DriverEndpoint=WARN +log4j.logger.org.apache.spark.storage.BlockManagerInfo=WARN +log4j.logger.org.apache.hadoop.hive=WARN +log4j.logger.org.apache.spark.sql.execution.datasources=WARN +log4j.logger.org.apache.spark.scheduler.SnappyTaskSchedulerImpl=WARN +log4j.logger.org.apache.spark.MapOutputTrackerMasterEndpoint=WARN +log4j.logger.org.apache.spark.MapOutputTrackerMaster=WARN +log4j.logger.org.apache.spark.storage.memory.MemoryStore=WARN +log4j.logger.org.apache.spark.MapOutputTrackerWorker=WARN +log4j.logger.org.apache.parquet=ERROR +log4j.logger.parquet=ERROR +log4j.logger.org.apache.hadoop.io.compress=WARN +log4j.logger.spark.jobserver.LocalContextSupervisorActor=WARN +log4j.logger.spark.jobserver.JarManager=WARN +log4j.logger.org.datanucleus=ERROR +# Task logger created in SparkEnv +log4j.logger.org.apache.spark.Task=WARN +log4j.logger.org.apache.spark.sql.catalyst.parser.CatalystSqlParser=WARN + +# Keep log-level of some classes as INFO even if root level is higher +log4j.logger.io.snappydata.impl.LeadImpl=INFO +log4j.logger.io.snappydata.impl.ServerImpl=INFO +log4j.logger.io.snappydata.impl.LocatorImpl=INFO +log4j.logger.spray.can.server.HttpListener=INFO + +# for generated code of plans +log4j.logger.org.apache.spark.sql.execution.WholeStageCodegenExec=INFO, code +log4j.additivity.org.apache.spark.sql.execution.WholeStageCodegenExec=false +log4j.logger.org.apache.spark.sql.execution.WholeStageCodegenRDD=INFO, code +log4j.additivity.org.apache.spark.sql.execution.WholeStageCodegenRDD=false +# for all Spark generated code (including ad-hoc UnsafeProjection calls etc) +log4j.logger.org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator=WARN, code +log4j.additivity.org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator=false +# for SnappyData generated code used on store (ComplexTypeSerializer, JDBC inserts ...) +log4j.logger.org.apache.spark.sql.store.CodeGeneration=INFO, code +log4j.additivity.org.apache.spark.sql.store.CodeGeneration=false diff --git a/cluster/conf/servers.template b/cluster/conf/servers.template new file mode 100644 index 0000000000..01085567f8 --- /dev/null +++ b/cluster/conf/servers.template @@ -0,0 +1,44 @@ +# +# Copyright (c) 2018 SnappyData, Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you +# may not use this file except in compliance with the License. You +# may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. See the License for the specific language governing +# permissions and limitations under the License. See accompanying +# LICENSE file. +# +# +# Specify the host name on which a Snappy server will be started. Also +# specify the startup directory where the logs and configuration files +# for that server instance are kept. If the directory and properties +# are not specified a default directory is created inside the SNAPPY_HOME directory. +# +# server1 -dir=/tmp/data/server [config args] +# +# An example of how you can specify multiple locators for a server and +# set its heap size to 64 GB. +# +# server1 -dir=/tmp/data/server -locators=locator1:9988,locator2:8899 -heap-size=64g +# +# One can specify bind address for clients to allow clients from outside this machine to connect +# using JDBC/ODBC/Thrift protocols (default for `client-bind-address` is localhost). +# +# In environments with an internal hostname/IP and a different public hostname (e.g. cloud deployments), +# you should also configure the -hostname-for-clients else clients from outside the network +# will not be able to connect to the servers. It should be set to the public hostname +# or public IP address that will be sent to clients to connect to. It can be skipped for cases +# where private hostname is the same as public hostname (e.g. DNS translates appropriately). +# Default is the `client-bind-address` of the server. +# +# -client-bind-address= -client-port=1555 -hostname-for-clients= +# +# For more configuration options, +# see http://snappydatainc.github.io/snappydata/configuration/#configuration +localhost diff --git a/snappy-tools/conf/snappy-env.sh.template b/cluster/conf/snappy-env.sh.template similarity index 100% rename from snappy-tools/conf/snappy-env.sh.template rename to cluster/conf/snappy-env.sh.template diff --git a/cluster/sbin/check-dir-option.sh b/cluster/sbin/check-dir-option.sh new file mode 100755 index 0000000000..7f5261d37b --- /dev/null +++ b/cluster/sbin/check-dir-option.sh @@ -0,0 +1,50 @@ +#!/usr/bin/env bash + +# +# Copyright (c) 2018 SnappyData, Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you +# may not use this file except in compliance with the License. You +# may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. See the License for the specific language governing +# permissions and limitations under the License. See accompanying +# LICENSE file. +# + +# Check whether "-dir" argument has been provide or not while invoing snappy-locator.sh/snappy-server.sh/snappy-lead.sh +# + +noOfInputsArgs=$# + +if [ $noOfInputsArgs -eq 0 ];then #if no arguments passed + echo "ERROR: No arguments have been provided. Please provide required arguments" + exit 1 +else + + # there could be two scenario if arguments are not equal to zero + # - script get triggerd from snappy-start-all.sh--then argument provided by this snappy-nodes.sh + # - user executing individual component script,in this case this script: snappy-locator.sh + # Need to check in both case -dir option is provided or not + isPresent=0 + for argument in "$@"; do + if [[ "$argument" == -dir=* ]]; then + isPresent=1 # present + if [ -z $(echo $argument | cut -d'=' -f 2) ]; then #present but empty i.e "-dir=" + isPresent=0 + #else #present but should be a directory but do not need to check here, as getting check in launcher + fi + fi + done + + if [ $isPresent -eq 0 ]; then + echo "ERROR: Please provide -dir argument" + exit 1 + fi + exit 0 +fi diff --git a/cluster/sbin/collect-debug-artifacts.sh b/cluster/sbin/collect-debug-artifacts.sh new file mode 100755 index 0000000000..1890e47787 --- /dev/null +++ b/cluster/sbin/collect-debug-artifacts.sh @@ -0,0 +1,559 @@ +#!/usr/bin/env bash + +# +# Copyright (c) 2018 SnappyData, Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you +# may not use this file except in compliance with the License. You +# may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. See the License for the specific language governing +# permissions and limitations under the License. See accompanying +# LICENSE file. +# + +#!/usr/bin/env bash + +timestamp_format="YYYY-MM-DD HH:MM[:SS]" + +function usage { + echo + echo "Usage: collect-debug-artifacts" + echo " [ -c conffile|--conf=conffile|--config=conffile ]" + echo " [ -o resultdir|--out=resultdir|--outdir=resultdir ]" + echo " [ -h|--help ]" + echo " [ -a|--all ]" + echo " [ -d|--dump ]" + echo " [ -v|--verbose ]" + echo " [ -s starttimestamp|--start=starttimestamp ]" + echo " [ -e endtimestamp|--end=endtimestamp ]" + echo " [ -x debugtarfile|--extract=debugtarfile ]" + echo + echo " Timestamp format: ${timestamp_format}" + echo +} + +function absPath() { + perl -MCwd -le 'print Cwd::abs_path(shift)' "$1" +} + +if [ -z "${SNAPPY_HOME}" ]; then + export SNAPPY_HOME="$(absPath "$(dirname "$(absPath "$0")")/..")" +fi + +while [ "$1" != "" ]; do + option="$1" + + case "$option" in + -c) + CONF_FILE="$2" + shift ;; + --conf=*|--config=*) + CONF_FILE="`echo "$1" | sed 's/^[^=]*=//'`" ;; + -x) + TAR_FILE="$2" + shift ;; + --extract=*|--xtract=*) + TAR_FILE="`echo "$1" | sed 's/^[^=]*=//'`" ;; + -o) + OUTPUT_DIR="$2" + shift ;; + --out=*|--outdir=*) + OUTPUT_DIR="`echo "$1" | sed 's/^[^=]*=//'`" ;; + -s) + START_TIME="$2" + shift ;; + --start=*) + START_TIME="`echo "$1" | sed 's/^[^=]*=//'`" ;; + -e) + END_TIME="$2" + shift ;; + --end=*) + END_TIME="`echo "$1" | sed 's/^[^=]*=//'`" ;; + -h|--help) + usage + exit 0 + ;; + -a|--all) + GET_EVERYTHING=1 + ;; + -v|--verbose) + VERBOSE=1 + ;; + -d|--dump) + DUMP_STACK=1 + ;; + -m|--hprofdump) + HPROF_DUMP=1 + ;; + *) + usage + exit 1 + esac + shift # past argument or value +done + +num_regex='^[0-9]+$' + +# Check configurations and assign defaults +function check_configs { + + if [ -n "${TAR_FILE}" ]; then + if [ ! -f "${TAR_FILE}" ]; then + echo "Debug Tar file ${TAR_FILE} does not exist" + exit 1 + fi + # no need of further configuration checks for extraction + return + fi + + if [ -z "${CONF_FILE}" ]; then + CONF_FILE="${SNAPPY_HOME}/conf/debug.conf.template" + fi + + if [ ! -f "${CONF_FILE}" ]; then + echo "Config file ${CONF_FILE} does not exist" + exit 1 + fi + + source $CONF_FILE + + if [ ! -f "${MEMBERS_FILE}" ]; then + echo "members file ${MEMBERS_FILE} does not exist" + exit 1 + fi + + if [ -z "${NO_OF_STACK_DUMPS}" ]; then + NO_OF_STACK_DUMPS=5 + fi + + if [ -z "${INTERVAL_BETWEEN_DUMPS}" ]; then + INTERVAL_BETWEEN_DUMPS=5 + fi + + if [ -z "${GET_EVERYTHING}" ]; then + GET_EVERYTHING=0 + fi + + if [ "${VERBOSE}" = "1" ]; then + echo CONF=$CONF_FILE + echo MEMINFO=$MEMBERS_FILE + echo NUM_STACK_DUMPS=$NO_OF_STACK_DUMPS + echo INTERVAL_BETWEEN_DUMPS=$INTERVAL_BETWEEN_DUMPS + echo GET_EVERYTHING=${GET_EVERYTHING} + echo START TIME = "${START_TIME}" + echo END TIME = "${END_TIME}" + echo SNAPPY_HOME = "${SNAPPY_HOME}" + echo OUTPUT_DIR = "${OUTPUT_DIR}" + echo TAR_FILE = "${TAR_FILE}" + fi + + if [ -z "${START_TIME}" ]; then + START_EPOCH=0 + else + START_EPOCH=$(date +%s --date "${START_TIME}" 2>/dev/null) + if ! [[ "$START_EPOCH" =~ $num_regex ]] ; then + echo "Error: Not expected date format '${START_TIME}'" + echo + echo "Expected Timestamp format: ${timestamp_format}" + exit 1 + fi + fi + + if [ -z "${END_TIME}" ]; then + END_EPOCH=0 + else + END_EPOCH=`date +%s --date "${END_TIME}" 2>/dev/null` + if ! [[ $END_EPOCH =~ $num_regex ]] ; then + echo "Error: Not expected date format '${END_TIME}'" + echo + echo "Expected Timestamp format: ${timestamp_format}" + exit 1 + fi + fi + + if [ "${START_EPOCH}" = "0" -a "${END_EPOCH}" != "0" ] \ + || [ "${START_EPOCH}" != "0" -a "${END_EPOCH}" = "0" ]; then + echo + echo "Please verify start and end time both" + echo "Timestamp format: ${timestamp_format}" + usage + exit 1 + fi +} + +collector_host=`hostname` + +function extract { + debugtarzip="$1" + xtractdir=`dirname ${debugtarzip}` + tarname=`basename ${debugtarzip}` + cd $xtractdir + tar -xf $tarname + for zf in `find . -name '*.gz'`; do + ( cd "`dirname "$zf"`" && gunzip "`basename "$zf"`" ) + done + echo "extracted in ${xtractdir}" +} + +function collect_data { + host="$1" + wd="$2" + + if [ "${VERBOSE}" = "1" ]; then + echo "Collecting data for process running on ${host} with working_dir ${wd}" + fi + + if [ "${VERBOSE}" = "1" ]; then + echo "Args Being passed for host ${host}" + echo "arg1 working directory = ${wd}" + echo "arg2 num_stack_dumps = ${NO_OF_STACK_DUMPS}" + echo "arg3 interval_dumps = ${INTERVAL_BETWEEN_DUMPS}" + echo "arg4 get_everything = ${GET_EVERYTHING}" + echo "arg5 collector_host = ${collector_host}" + echo "arg6 verbose = ${VERBOSE}" + echo "arg7 start epoch = ${START_EPOCH}" + echo "arg8 end epoch = ${END_EPOCH}" + fi + + if [ "${DUMP_STACK}" != "1" ]; then + NO_OF_STACK_DUMPS=0 + INTERVAL_BETWEEN_DUMPS=0 + fi + + if [ "${HPROF_DUMP}" != "1" ]; then + HPROF_DUMP=0 + fi + + # Create the outdir with the same name on each remote and collect everything there. + typeset -f | ssh $host "$(cat);collect_on_remote \"${wd}\" \"${NO_OF_STACK_DUMPS}\" \\ + \"${INTERVAL_BETWEEN_DUMPS}\" \"${GET_EVERYTHING}\" \"${collector_host}\" \\ + \"${VERBOSE}\" \"${START_EPOCH}\" \"${END_EPOCH}\" \"${HPROF_DUMP}\"" +} + +function collect_on_remote { + data_dir="$1" + num_stack_dumps="$2" + int_stack_dumps="$3" + get_all="$4" + collector_host="$5" + verbose="$6" + start_epoch="$7" + end_epoch="$8" + get_hprof="$9" + + # Create a .tmpcda dir if not exists else empty it + tmp_dir="$data_dir/.tmpcda" + if [ -d ${tmp_dir} ]; then + rm -rf ${tmp_dir}/* + else + mkdir -p $tmp_dir + retval=$? + if [ ! -d ${tmp_dir} ]; then + echo "FAILED TO CREATE tmp dir on ${host} at ${data_dir} with errno ${retval}" + exit 1 + fi + if [ "${verbose}" = "1" ]; then + echo "created dir ${tmp_dir} on remote host" + fi + fi + + # first get the pid. The latest log file with the header will have the pid + host=`hostname` + if [ ! -d "$data_dir" ]; then + echo "${data_dir} not found on host: ${host}" + exit 1 + fi + + proc_id="" + + cd $data_dir + + if [ "${get_all}" = "1" ]; then + if [ "${verbose}" = "1" ]; then + echo "collecting everything in the working dir" + fi + for l in $( ls *.log* 2> /dev/null ) + do + files+=($l) + done + for l in $( ls *.gfs* 2> /dev/null ) + do + files+=($l) + done + for l in $( ls *.conf* 2> /dev/null ) + do + files+=($l) + done + for l in $( ls *.out* 2> /dev/null ) + do + files+=($l) + done + for l in $( ls *.bin* 2> /dev/null ) + do + files+=($l) + done + elif [ "${start_epoch}" = "0" ]; then + if [ "${verbose}" = "1" ]; then + echo "collecting latest log files and all stats file" + fi + logs_latest_first=`ls -t *.log* | grep -Ev '(^start_.+\.log|^locator.+views\.log|derby.log)'` + all_logs=($logs_latest_first) + files=() + last_restart_log="" + for l in "${all_logs[@]}" + do + # If last log is got, get the one before that as well + if [ ! -z "$last_restart_log" ]; then + if [ "${verbose}" = "1" ]; then + echo "Adding the last file ${l} to the array" + fi + files+=($l) + break + fi + copyright_headers=`grep 'Copyright [ ]*([ ]*.[ ]*)' ${l}` + if [ ! -z "$copyright_headers" ]; then + # also check for the pid line and get the pid + proc_id=`sed -n 's/.*Process ID: \([0-9]\+\)$/\1/p' ${l}` + if [ "${verbose}" = "1" ]; then + echo "Adding latest copyright header file ${l} to the array" + fi + files+=($l) + last_restart_log="$l" + else + if [ "${verbose}" = "1" ]; then + echo "Adding file ${l} to the array" + fi + files+=($l) + fi + done + + # get all the gfs files as well + for l in $( ls *.gfs* 2> /dev/null ) + do + files+=($l) + done + else + if [ "${verbose}" = "1" ]; then + echo "collecting files based on modified time" + fi + files=() + prev_file_mod_epoch=0 + for l in $( ls -tr *.log* | grep -Ev '(^start_.+\.log|^locator.+views\.log|derby.log)' 2>/dev/null ) + do + file_mod_epoch=`stat -c %Y $l` + if [ "${file_mod_epoch}" -ge "${start_epoch}" -a "${prev_file_mod_epoch}" -le "${end_epoch}" ]; then + if [ "${verbose}" = "1" ]; then + echo "${l} MOD TIME = ${file_mod_epoch}" + echo "Adding file ${l} to the array" + fi + files+=($l) + file_added=1 + fi + prev_file_mod_epoch=$file_mod_epoch + done + + prev_file_mod_epoch=0 + for l in $( ls -tr *.gfs* 2>/dev/null ) + do + file_mod_epoch=`stat -c %Y $l` + if [ "${file_mod_epoch}" -ge "${start_epoch}" -a "${prev_file_mod_epoch}" -le "${end_epoch}" ]; then + if [ "${verbose}" = "1" ]; then + echo "${l} MOD TIME = ${file_mod_epoch}" + echo "Adding file ${l} to the array" + fi + files+=($l) + fi + prev_file_mod_epoch=$file_mod_epoch + done + fi + + # get the stack dumps if required + if [ "$num_stack_dumps" -gt "0" ]; then + # add the latest log file and keep it. Later after taking the dump take all the log files + # which got created after this one as rollover would have taken place. + + logs_latest_first=`ls -t *.log* | grep -Ev '(^start_.+\.log|^locator.+views\.log|derby.log)' 2>/dev/null` + all_logs=($logs_latest_first) + latest_log=${all_logs[0]} + all_logs=($logs_latest_first) + for l in "${all_logs[@]}" + do + copyright_headers=`grep 'Copyright [ ]*([ ]*.[ ]*)' ${l}` + if [ ! -z "$copyright_headers" ]; then + # also check for the pid line and get the pid + proc_id=`sed -n 's/.*Process ID: \([0-9]\+\)$/\1/p' ${l}` + break + fi + done + + dump_num=1 + for i in `seq 1 ${num_stack_dumps}` + do + dump_num=`expr ${dump_num} + 1` + if [ "${verbose}" = "1" ]; then + echo "Taking the dump of process ${proc_id} on ${host} -- count ${i}" + fi + kill -URG $proc_id + kill -QUIT $proc_id + # record the last modified time of this log + if [ "$i" = "1" ]; then + first_dump_file_mod_epoch=`stat -c %Y $latest_log` + fi + + if [ "$i" -lt "${num_stack_dumps}" ]; then + echo "Sleeping for ${int_stack_dumps} seconds before taking next stack dump" + fi + sleep $int_stack_dumps + done + fi + + logs_latest_first=`ls -t *.log* | grep -Ev '(^start_.+\.log|^locator.+views\.log|derby.log)' 2>/dev/null` + all_logs=($logs_latest_first) + # add all the remaining whose modified time is greater than the last recorded + if [ ! -z "${first_dump_file_mod_epoch}" ]; then + for l in "${all_logs[@]}" + do + mod_epoch=`stat -c %Y $l` + if [ "${mod_epoch}" -gt "${first_dump_file_mod_epoch}" ]; then + files+=($l) + else + break + fi + done + fi + + # Add other files + for l in $( ls *.jfr* 2> /dev/null ) + do + files+=($l) + done + for l in $( ls jvmkill*.log 2> /dev/null ) + do + files+=($l) + done + for l in $( ls *.jmap 2> /dev/null ) + do + files+=($l) + done + + # Add hprof files too if asked for + if [ "${get_hprof}" = "1" ]; then + if [ "${verbose}" = "1" ]; then + echo "collecting hprof files too" + fi + for l in $( ls *.hprof 2> /dev/null ) + do + files+=($l) + done + fi + + for f in "${files[@]}" + do + if [ "${verbose}" = "1" ]; then + echo "copying file ${f} in dir ${tmp_dir}/" + fi + cp $f "${tmp_dir}/" + returnval=$? + if [ "${verbose}" = "1" ]; then + echo "copied and returnval=${returnval}" + fi + done + + # gzip all the files so that rsync is fast + cd "${tmp_dir}" + for i in `ls` + do + if [ "${verbose}" = "1" ]; then + echo "zipping ${i}" + fi + gzip $i + if [ "${verbose}" = "1" ]; then + echo "zipp of ${i} done" + fi + done + + if [ "${verbose}" = "1" ]; then + echo "FILES=${files[@]} zipped and copied to ${collector_host}:$tmp_dir}" + fi +} + +check_configs +# Assuming each line in the members info file has the following format +# host pid cwd + +if [ -n "${TAR_FILE}" ]; then + ( extract "${TAR_FILE}" ) + exit 0 +fi + +# Make output directory +TS=`date +%m.%d.%H.%M.%S` +if [ -z "${OUTPUT_DIR}" ]; then + out_dir="${SNAPPY_HOME}/work/debug_data_${TS}" +else + out_dir="${OUTPUT_DIR}/debug_data_${TS}" +fi + +if [ "${VERBOSE}" = "1" ]; then + echo "Top Level output dir = ${out_dir}" +fi + +mkdir -p $out_dir + +# get the uniq lines from the members file +tmp_members_file="$(mktemp /tmp/debug_mem.XXXX)" + +sort $MEMBERS_FILE | uniq > $tmp_members_file + +all_pids=() +while read -r line || [[ -n "$line" ]]; do + if [ "${VERBOSE}" = "1" ]; then + echo "Line read from file: $line" + fi + + read host cwd <<< $line + if [ "${VERBOSE}" = "1" ]; then + echo "host: $host pid: $pid and cwd: $cwd" + fi + + collect_data $host $cwd & + all_pids+=($!) +done < $tmp_members_file + +# wait for all the collection to end on respective hosts +# Then rsync 1 by 1 +for p in "${all_pids[@]}" +do + if [ "${VERBOSE}" = "1" ]; then + echo "Waiting for pid ${p}" + fi + wait $p 2> /dev/null +done + +while read -r line || [[ -n "$line" ]]; do + read host cwd <<< $line + basenamedir=`basename $cwd` + hostout_dir="${out_dir}/${host}-${basenamedir}" + mkdir "${hostout_dir}" + if [ "${VERBOSE}" = "1" ]; then + rsync -av --remove-source-files "${host}:${cwd}/.tmpcda/*" "${hostout_dir}"/ + else + rsync -a --remove-source-files "${host}:${cwd}/.tmpcda/*" "${hostout_dir}"/ + fi +done < $tmp_members_file + +rm -rf $tmp_members_file + +# make tar ball +echo +echo "Collected artifacts in tar file: ${out_dir}.tar" +echo +cd "${out_dir}/.." +tar -cf "${out_dir}.tar" $(basename $out_dir) +rm -rf ${out_dir} diff --git a/cluster/sbin/common.funcs b/cluster/sbin/common.funcs new file mode 100644 index 0000000000..119a649fea --- /dev/null +++ b/cluster/sbin/common.funcs @@ -0,0 +1,28 @@ +# map functions for bash 3 that does not have associative arrays + +function keyIndex() { + key="$1" + shift + keys=("$@") + + index=0 + for k in ${keys[@]}; do + if [ "$k" = "$key" ]; then + echo $index + break + fi + ((index++)) + done +} + +function keyPutIndex() { + key="$1" + shift + keys=("$@") + + index=$(keyIndex "$key" "${keys[@]}") + if [ -z "$index" ]; then + index=${#keys[@]} + fi + echo $index +} diff --git a/cluster/sbin/snappy-config.sh b/cluster/sbin/snappy-config.sh new file mode 100755 index 0000000000..30cc4c4066 --- /dev/null +++ b/cluster/sbin/snappy-config.sh @@ -0,0 +1,33 @@ +# +# Copyright (c) 2018 SnappyData, Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you +# may not use this file except in compliance with the License. You +# may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. See the License for the specific language governing +# permissions and limitations under the License. See accompanying +# LICENSE file. +# + +# included in all the snappy scripts with source command +# should not be executable directly +# also should not be passed any arguments, since we need original $* + +function absPath() { + perl -MCwd -le 'print Cwd::abs_path(shift)' "$1" +} + +if [ -z "$SNAPPY_HOME" ]; then + if [ -z "$SPARK_HOME" ]; then + export SPARK_HOME="$(absPath "$(dirname "$(absPath "$0")")/..")" + fi + export SNAPPY_HOME="${SPARK_HOME}" +elif [ -z "$SPARK_HOME" ]; then + export SPARK_HOME="${SNAPPY_HOME}" +fi diff --git a/cluster/sbin/snappy-encrypt-password.sh b/cluster/sbin/snappy-encrypt-password.sh new file mode 100755 index 0000000000..5a4a42a479 --- /dev/null +++ b/cluster/sbin/snappy-encrypt-password.sh @@ -0,0 +1,91 @@ +#!/usr/bin/env bash + +function absPath() { + perl -MCwd -le 'print Cwd::abs_path(shift)' "$1" +} + +sbin="$(dirname "$(absPath "$0")")" + +. "$sbin/common.funcs" + +if [ -z "$1" ]; then + echo "At least one user name must be provided" + echo "Usage: `basename $0` ..." + exit 1 +fi + +trap "stty echo; exit $?" EXIT + +# get the plain-text passwords for all specified users +declare -a users +declare -a passwords +for user in "$@"; do + while /bin/true; do + echo -n "Enter password for $user: " + stty -echo + read passwd1 + stty echo + echo + echo -n "Re-enter password for $user: " + stty -echo + read passwd2 + stty echo + echo + if [ "${passwd1}" != "${passwd2}" ]; then + echo Passwords for $user do not match + echo + else + break + fi + done + userIndex=$(keyPutIndex "$user" "${users[@]}") + users[$userIndex]="$user" + passwords[$userIndex]="$passwd1" +done + +# get locator host:port +hostPort="$($sbin/snappy-locators.sh start -dump-server-info)" + +# check for no specified client-port which will default to 1527 as per product defaults +clientPort="" +case "${hostPort}" in + *:) clientPort="-client-port=1527"; hostPort="${hostPort}1527" ;; +esac + +tmpOut="$(mktemp)" +ENCRYPT_PASSWORD_OPTIONS="-user=app -auth-provider=NONE -J-Dgemfirexd.thrift-default=false -log-level=warning $clientPort" +export ENCRYPT_PASSWORD_OPTIONS +$sbin/snappy-locators.sh start 2>&1 | tee "${tmpOut}" + +locatorStarted="$(grep 'SnappyData Locator pid: .* status: running' "${tmpOut}")" +rm -f "${tmpOut}" + +user= +passwd= +callStr= + +for index in ${!users[@]}; do + user="${users[$index]}" + passwd="${passwords[$index]}" + callStr="${callStr} call sys.encrypt_password('$user', '$passwd', 'AES', 0);" +done + +if [ -n "${locatorStarted}" ]; then + +# connect to temporary locators using DRDA +$sbin/../bin/snappy << EOF +connect 'jdbc:snappydata:drda://$hostPort/;load-balance=false'; +$callStr +EOF + +$sbin/snappy-locators.sh stop + +else + +# connect to existing cluster using default thrift +$sbin/../bin/snappy << EOF +connect 'jdbc:snappydata://$hostPort/;user=$user;password=$passwd'; +$callStr +EOF + +fi diff --git a/cluster/sbin/snappy-lead.sh b/cluster/sbin/snappy-lead.sh new file mode 100755 index 0000000000..349fd67acd --- /dev/null +++ b/cluster/sbin/snappy-lead.sh @@ -0,0 +1,51 @@ +#!/usr/bin/env bash + +# +# Copyright (c) 2018 SnappyData, Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you +# may not use this file except in compliance with the License. You +# may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. See the License for the specific language governing +# permissions and limitations under the License. See accompanying +# LICENSE file. +# + +# Starts a lead on the machine this script is executed on. +# + +usage="Usage: snappy-lead.sh (start|stop|status) -locators=locatorhost:port[,locatorhostN:portN] -dir=directory" + +function absPath() { + perl -MCwd -le 'print Cwd::abs_path(shift)' "$1" +} +sbin="$(dirname "$(absPath "$0")")" + +mode=$1 + +shift + +. "$sbin/snappy-config.sh" lead +. "$sbin/spark-config.sh" + +. "$SNAPPY_HOME/bin/load-spark-env.sh" +. "$SNAPPY_HOME/bin/load-snappy-env.sh" + +# Start up the lead instance +function start_instance { + "$SNAPPY_HOME"/bin/snappy leader "$mode" "$@" +} + +#Since want to test whether the result is zero, don't need to treat it as an return value using $? . Just treat the command itself as a conditional. +if "$sbin/check-dir-option.sh" "$@"; then + start_instance "$@" +else + echo $usage +fi + diff --git a/cluster/sbin/snappy-leads.sh b/cluster/sbin/snappy-leads.sh new file mode 100755 index 0000000000..d0a36f7cfb --- /dev/null +++ b/cluster/sbin/snappy-leads.sh @@ -0,0 +1,47 @@ +#!/usr/bin/env bash + +# +# Copyright (c) 2018 SnappyData, Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you +# may not use this file except in compliance with the License. You +# may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. See the License for the specific language governing +# permissions and limitations under the License. See accompanying +# LICENSE file. +# + +# Starts a lead instance on each machine specified in the conf/leads file. + +function absPath() { + perl -MCwd -le 'print Cwd::abs_path(shift)' "$1" +} +sbin="$(dirname "$(absPath "$0")")" + +. "$sbin/snappy-config.sh" +. "$sbin/spark-config.sh" + +. "$SNAPPY_HOME/bin/load-spark-env.sh" +. "$SNAPPY_HOME/bin/load-snappy-env.sh" + +CONF_DIR_OPT= +# Check if --config is passed as an argument. It is an optional parameter. +if [ "$1" == "--config" ] +then + CONF_DIR=$2 + CONF_DIR_OPT="--config $CONF_DIR" + shift 2 +fi + +# Launch the slaves +if echo $@ | grep -qw start; then + "$sbin/snappy-nodes.sh" lead $CONF_DIR_OPT cd "$SNAPPY_HOME" \; "$sbin/snappy-lead.sh" "$@" $LEAD_STARTUP_OPTIONS +else + "$sbin/snappy-nodes.sh" lead $CONF_DIR_OPT cd "$SNAPPY_HOME" \; "$sbin/snappy-lead.sh" "$@" +fi diff --git a/cluster/sbin/snappy-locator.sh b/cluster/sbin/snappy-locator.sh new file mode 100755 index 0000000000..96498cfdc4 --- /dev/null +++ b/cluster/sbin/snappy-locator.sh @@ -0,0 +1,52 @@ +#!/usr/bin/env bash + +# +# Copyright (c) 2018 SnappyData, Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you +# may not use this file except in compliance with the License. You +# may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. See the License for the specific language governing +# permissions and limitations under the License. See accompanying +# LICENSE file. +# + +# Starts a locator on the machine this script is executed on. +# + +usage="Usage: snappy-locator.sh (start|stop|status) -dir=directory" + +function absPath() { + perl -MCwd -le 'print Cwd::abs_path(shift)' "$1" +} +sbin="$(dirname "$(absPath "$0")")" + +mode=$1 + +shift + +. "$sbin/snappy-config.sh" +. "$sbin/spark-config.sh" + + +. "$SNAPPY_HOME/bin/load-spark-env.sh" +. "$SNAPPY_HOME/bin/load-snappy-env.sh" + +# Start up the locator instance +function start_instance { + "$SNAPPY_HOME"/bin/snappy locator "$mode" "$@" +} + +#Since want to test whether the result is zero, don't need to treat it as an return value using $? . Just treat the command itself as a conditional. +if "$sbin/check-dir-option.sh" "$@"; then + start_instance "$@" +else + echo $usage +fi + diff --git a/cluster/sbin/snappy-locators.sh b/cluster/sbin/snappy-locators.sh new file mode 100755 index 0000000000..0898928bf8 --- /dev/null +++ b/cluster/sbin/snappy-locators.sh @@ -0,0 +1,48 @@ +#!/usr/bin/env bash + +# +# Copyright (c) 2018 SnappyData, Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you +# may not use this file except in compliance with the License. You +# may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. See the License for the specific language governing +# permissions and limitations under the License. See accompanying +# LICENSE file. +# + +# Starts a locator instance on each machine specified in the conf/locators file. + +function absPath() { + perl -MCwd -le 'print Cwd::abs_path(shift)' "$1" +} +sbin="$(dirname "$(absPath "$0")")" + +. "$sbin/snappy-config.sh" +. "$sbin/spark-config.sh" + + +. "$SNAPPY_HOME/bin/load-spark-env.sh" +. "$SNAPPY_HOME/bin/load-snappy-env.sh" + +CONF_DIR_OPT= +# Check if --config is passed as an argument. It is an optional parameter. +if [ "$1" == "--config" ] +then + CONF_DIR=$2 + CONF_DIR_OPT="--config $CONF_DIR" + shift 2 +fi + +# Launch the slaves +if echo $@ | grep -qw start; then + "$sbin/snappy-nodes.sh" locator $CONF_DIR_OPT cd "$SNAPPY_HOME" \; "$sbin/snappy-locator.sh" "$@" $LOCATOR_STARTUP_OPTIONS $ENCRYPT_PASSWORD_OPTIONS +else + "$sbin/snappy-nodes.sh" locator $CONF_DIR_OPT cd "$SNAPPY_HOME" \; "$sbin/snappy-locator.sh" "$@" +fi diff --git a/cluster/sbin/snappy-nodes.sh b/cluster/sbin/snappy-nodes.sh new file mode 100755 index 0000000000..394e5c0ade --- /dev/null +++ b/cluster/sbin/snappy-nodes.sh @@ -0,0 +1,451 @@ +#!/usr/bin/env bash + +# +# Copyright (c) 2018 SnappyData, Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you +# may not use this file except in compliance with the License. You +# may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. See the License for the specific language governing +# permissions and limitations under the License. See accompanying +# LICENSE file. +# + +# Run a shell command on all nodes. +# +# Environment Variables +# +# SPARK_CONF_DIR Alternate conf dir. Default is ${SNAPPY_HOME}/conf. +# SPARK_SSH_OPTS Options passed to ssh when running remote commands. +## + +usage="Usage: snappy-nodes.sh locator/server/lead [-bg|--background] [--config ] command..." + +# if no args specified, show usage +if [ $# -le 0 ]; then + echo $usage + exit 1 +fi + +function absPath() { + perl -MCwd -le 'print Cwd::abs_path(shift)' "$1" +} +sbin="$(dirname "$(absPath "$0")")" + +. "$sbin/common.funcs" +. "$sbin/snappy-config.sh" +. "$sbin/spark-config.sh" + + +componentType=$1 +shift + +# Whether to apply the operation in background +RUN_IN_BACKGROUND= +if [ "$1" = "-bg" -o "$1" = "--background" ]; then + RUN_IN_BACKGROUND=1 + shift +fi +export RUN_IN_BACKGROUND + +# Check if --config is passed as an argument. It is an optional parameter. +# Exit if the argument is not a directory. +if [ "$1" == "--config" ] +then + shift + conf_dir="$1" + if [ ! -d "$conf_dir" ] + then + echo "ERROR : $conf_dir is not a directory" + echo $usage + exit 1 + else + export SPARK_CONF_DIR="$conf_dir" + fi + shift +fi + +. "$SNAPPY_HOME/bin/load-spark-env.sh" +. "$SNAPPY_HOME/bin/load-snappy-env.sh" + + +FIRST_LOCATOR= +case $componentType in + + (locator) + if [ -f "${SPARK_CONF_DIR}/locators" ]; then + HOSTLIST="${SPARK_CONF_DIR}/locators" + fi + FIRST_LOCATOR=1 + ;; + + (server) + if [ -f "${SPARK_CONF_DIR}/servers" ]; then + HOSTLIST="${SPARK_CONF_DIR}/servers" + fi + if [ -f "${SPARK_CONF_DIR}/leads" ]; then + LEADHOSTLIST="${SPARK_CONF_DIR}/leads" + fi + ;; + (lead) + if [ -f "${SPARK_CONF_DIR}/leads" ]; then + HOSTLIST="${SPARK_CONF_DIR}/leads" + fi + ;; + (*) + echo $usage + exit 1 + ;; +esac +export FIRST_LOCATOR + +# By default disable strict host key checking +if [ "$SPARK_SSH_OPTS" = "" ]; then + SPARK_SSH_OPTS="-o StrictHostKeyChecking=no" +fi + +default_loc_port=10334 + +function readAllLocators() { + retVal= + while read loc || [[ -n "${loc}" ]]; do + [[ -z "$(echo $loc | grep ^[^#] | grep -v ^$ )" ]] && continue + if [ -n "$(echo $loc | grep peer-discovery-port)" ]; then + retVal="$retVal,$(echo $loc | sed "s#\([^ ]*\).*peer-discovery-port\s*=\s*\([^ ]*\).*#\1:\2#g")" + else + retVal="$retVal,$(echo $loc | sed "s#\([^ ]*\).*#\1:$default_loc_port#g")" + fi + done < "${SPARK_CONF_DIR}/locators" + echo ${retVal#","} +} + +LOCATOR_IS_LOCAL= +if [ -f "${SPARK_CONF_DIR}/locators" ]; then + allLocators="$(readAllLocators)" + LOCATOR_ARGS="-locators=$allLocators" + if echo $allLocators | egrep -wq '(localhost|127\.0\.0\.1|::1)'; then + LOCATOR_IS_LOCAL=1 + fi +else + LOCATOR_ARGS="-locators=localhost[$default_loc_port]" + LOCATOR_IS_LOCAL=1 +fi + +MEMBERS_FILE="$SNAPPY_HOME/work/members.txt" +isStart= + +function execute() { + dirparam="$(echo $args | sed -n 's/^.*\(-dir=[^ ]*\).*$/\1/p')" + + # Set directory folder if not already set. + if [ -z "${dirparam}" ]; then + dirfolder="$SNAPPY_HOME"/work/"$host"-$componentType-$index + dirparam="-dir=${dirfolder}" + args="${args} ${dirparam}" + fi + + # For stop and status mode, don't pass any parameters other than directory + if echo $"${@// /\\ }" | grep -wq "start"; then + # Set a default locator if not already set. + if ! echo $args $"${@// /\\ }" | egrep -q '[-](locators=|peer-discovery-address=)'; then + args="${args} $LOCATOR_ARGS" + # inject start-locators argument if not present + if [ "${componentType}" = "locator" -a -z "$(echo $args $"${@// /\\ }" | grep 'start-locator=')" ]; then + port=$(echo $args | grep -wo "peer-discovery-port=[^ ]*" | sed 's#peer-discovery-port=##g') + if [ -z "$port" ]; then + port=$default_loc_port + fi + args="${args} -start-locator=$host:$port" + fi + fi + # Reduce discovery and join timeouts, retries for first locator to reduce self-wait + if [ -n "$FIRST_LOCATOR" ]; then + FIRST_LOCATOR= + if ! echo $args $"${@// /\\ }" | grep -q 'Dp2p.discoveryTimeout='; then + args="${args} -J-Dp2p.discoveryTimeout=1000" + fi + if ! echo $args $"${@// /\\ }" | grep -q 'Dp2p.joinTimeout='; then + args="${args} -J-Dp2p.joinTimeout=2000" + fi + if ! echo $args $"${@// /\\ }" | grep -q 'Dp2p.minJoinTries='; then + args="${args} -J-Dp2p.minJoinTries=1" + fi + fi + + bindAddress= + clientBindAddress= + clientHostName= + clientPort= + dumpServerInfo= + for arg in $args $"${@// /\\ }"; do + case "$arg" in + -bind-address=*) bindAddress="$(echo $arg | sed 's/-bind-address=//')" ;; + -client-bind-address=*) clientBindAddress="$(echo $arg | sed 's/-client-bind-address=//')" ;; + -hostname-for-clients=*) clientHostName="$(echo $arg | sed 's/-hostname-for-clients=//')" ;; + -client-port=*) clientPort="$(echo $arg | sed 's/-client-port=//')" ;; + -dump-server-info) dumpServerInfo=1 ;; + esac + done + # set the default bind-address and SPARK_LOCAL_IP + if [ -z "${bindAddress}" ]; then + args="${args} -bind-address=$host" + bindAddress="${host}" + fi + preCommand="${preCommand}export SPARK_LOCAL_IP=$bindAddress; " + + # set the default client-bind-address and locator's peer-discovery-address + if [ -z "${clientBindAddress}" -a "${componentType}" != "lead" ]; then + args="${args} -client-bind-address=${host}" + clientBindAddress="${host}" + fi + if [ -z "$(echo $args $"${@// /\\ }" | grep 'peer-discovery-address=')" -a "${componentType}" = "locator" ]; then + args="${args} -peer-discovery-address=${host}" + fi + # set the public hostname for Spark Web UI to hostname-for-clients if configured + if [ -n "${clientHostName}" ]; then + preCommand="${preCommand}export SPARK_PUBLIC_DNS=${clientHostName}; " + fi + # set host-data=false explicitly for leads + if [ "${componentType}" = "lead" ]; then + args="${args} -host-data=false" + fi + + if [ -n "${dumpServerInfo}" ]; then + if [ -n "${clientHostName}" ]; then + echo "${clientHostName}:${clientPort}" + else + echo "${clientBindAddress}:${clientPort}" + fi + exit 0 + fi + else + args="${dirparam}" + fi + + if [ ! -d "${SNAPPY_HOME}/work" ]; then + mkdir -p "${SNAPPY_HOME}/work" + ret=$? + if [ "$ret" != "0" ]; then + echo "Could not create work directory ${SNAPPY_HOME}/work" + exit 1 + fi + fi + + postArgs= + for arg in "${@// /\\ }"; do + case "$arg" in + -*) postArgs="$postArgs $arg" + esac + done + #copy the conf files into other node before starting the launch processs + if [[ -n "$isStart" && $SKIP_CONF_COPY -eq 0 ]]; then + copyConf "$@" + fi + if [ "$host" != "localhost" ]; then + if [ "$dirfolder" != "" ]; then + # Create the directory for the snappy component if the folder is a default folder + (ssh $SPARK_SSH_OPTS "$host" \ + "{ if [ ! -d \"$dirfolder\" ]; then mkdir -p \"$dirfolder\"; fi; } && " $"${preCommand}${@// /\\ } ${args} ${postArgs};" \ + < /dev/null 2>&1 | sed "s/^/$host: /") & + LAST_PID="$!" + else + # ssh reads from standard input and eats all the remaining lines.Connect its standard input to nowhere: + (ssh $SPARK_SSH_OPTS "$host" $"${preCommand}${@// /\\ } ${args} ${postArgs}" < /dev/null \ + 2>&1 | sed "s/^/$host: /") & + LAST_PID="$!" + fi + else + if [ "$dirfolder" != "" ]; then + # Create the directory for the snappy component if the folder is a default folder + if [ ! -d "$dirfolder" ]; then + mkdir -p "$dirfolder" + fi + fi + launchcommand="${@// /\\ } ${args} ${postArgs} < /dev/null 2>&1" + eval $launchcommand & + LAST_PID="$!" + fi + if [ -z "$RUN_IN_BACKGROUND" ]; then + wait $LAST_PID + else + sleep 1 + if [ -e "/proc/$LAST_PID/status" ]; then + sleep 1 + fi + fi + + df=${dirfolder} + if [ -z "${df}" ]; then + df=$(echo ${dirparam} | cut -d'=' -f2) + fi + + if [ -z "${df}" ]; then + echo "No run directory identified for ${host}" + exit 1 + fi + + echo "${host} ${df}" >> $MEMBERS_FILE +} + +index=1 +isServerStart= +declare -a leadHosts +declare -a leadCounts +if [ "$componentType" = "server" -a -n "$(echo $"${@// /\\ }" | grep -w start)" ]; then + isServerStart=1 +fi +# check leads on the same nodes as servers +# (and if none then memory-size can be increased) +if [ -n "$LEADHOSTLIST" -a -n "$isServerStart" ]; then + while read slave || [[ -n "$slave" ]]; do + [[ -z "$(echo $slave | grep ^[^#])" ]] && continue + host="$(echo "$slave "| tr -s ' ' | cut -d ' ' -f1)" + args="$(echo "$slave "| tr -s ' ' | cut -d ' ' -f2-)" + leadIndex=$(keyIndex "$host" "${leadHosts[@]}") + leadPutIndex="$leadIndex" + if [ -z "$leadPutIndex" ]; then + leadPutIndex=${#leadCounts[@]} + fi + leadHosts[$leadPutIndex]="$host" + # marker for the case when lead heap/memory has been configured explicitly + # in which case server side auto-configuration will also be skipped + if echo $args $"${@// /\\ }" | grep -q "heap-size=\|memory-size="; then + leadCounts[$leadPutIndex]=-1 + elif [ -z "$leadIndex" ]; then + leadCounts[$leadPutIndex]=1 + else + ((leadCounts[$leadPutIndex]++)) + fi + done < "$LEADHOSTLIST" +fi + +function getNumLeadsOnHost() { + host="$1" + numLeadsOnHost= + if [ ${#leadCounts[@]} -gt 0 ]; then + leadIndex=$(keyIndex "$host" "${leadHosts[@]}") + if [ -n "$leadIndex" ]; then + numLeadsOnHost="${leadCounts[$leadIndex]}" + fi + elif [ "$host" = "localhost" ]; then + numLeadsOnHost=1 + fi + if [ -z "$numLeadsOnHost" ]; then + numLeadsOnHost=0 + fi + echo $numLeadsOnHost +} +# function for copying all the configuration files into the other nodes/members of the cluster +function copyConf() { + currentNodeIpAddr=$(ip addr | grep 'state UP' -A2 | head -n3 | tail -n1 | awk '{print $2}' | cut -f1 -d'/') + currentNodeHostName=$(uname -n) + + if [[ "$host" != "$currentNodeIpAddr" && "$host" != "localhost" && "$host" != $currentNodeHostName ]] ; then + #loop to get the all the files avaliable in Conf directory + for entry in "${SPARK_CONF_DIR}"/*; do + if [ -f "$entry" ];then + #${file%.*} to get the filename without the extension and ${file##*.} to get the extension alone + fileName=$(basename $entry) + template=".template" + #check the extension, interested in files those doesn't have template extension + if [[ ! "$fileName" = @(*.template) ]]; then + if ! ssh $host "test -e $entry"; then #"File does not exist." + scp ${SPARK_CONF_DIR}/$fileName $host:${SPARK_CONF_DIR} + else + backupDir="backup" + if [[ ! -z $(ssh $host "cat $entry" | diff - "$entry") ]] ; then + backupFileName=${fileName}_${START_ALL_TIMESTAMP} + echo "INFO: Copied $filename from this host to $host. Moved the original $filename on $host to $backupFileName." + (ssh "$host" "{ if [ ! -d \"${SPARK_CONF_DIR}/$backupDir\" ]; then mkdir \"${SPARK_CONF_DIR}/$backupDir\"; fi; } ") + ssh $host "mv ${SPARK_CONF_DIR}/$fileName ${SPARK_CONF_DIR}/$backupDir/$backupFileName" + scp ${SPARK_CONF_DIR}/$fileName $host:${SPARK_CONF_DIR} + fi + #fi + fi + fi # end of if, check extension + fi # end of if to get each file + done #end of for loop + fi # end of if +} + +if [ -n "${HOSTLIST}" ]; then + declare -a arr + declare -a hosts + declare -a counts + isStartOrStatus= + while read slave || [[ -n "${slave}" ]]; do + [[ -z "$(echo $slave | grep ^[^#])" ]] && continue + arr[${#arr[@]}]="$slave" + if [ -n "$isServerStart" ]; then + host="$(echo "$slave "| tr -s ' ' | cut -d ' ' -f1)" + hostIndex=$(keyIndex "$host" "${hosts[@]}") + if [ -z "$hostIndex" ]; then + hostIndex=${#hosts[@]} + counts[$hostIndex]=1 + else + ((counts[$hostIndex]++)) + fi + hosts[$hostIndex]="$host" + fi + done < "$HOSTLIST" + + numSlaves=${#arr[@]} + if [ $numSlaves -eq 0 ]; then + arr[0]=localhost + hosts[0]=localhost + counts[0]=1 + numSlaves=1 + fi + + if echo $"${@// /\\ }" | grep -wq "start\|status"; then + isStartOrStatus=1 + fi + if echo $"${@// /\\ }" | grep -wq "start"; then + isStart=1 + fi + for slave in "${arr[@]}"; do + if [ -n "$isStartOrStatus" ]; then + host="$(echo "$slave "| tr -s ' ' | cut -d ' ' -f1)" + args="$(echo "$slave "| tr -s ' ' | cut -d ' ' -f2-)" + # disable implicit off-heap for nodes having multiple servers configured + if [ -n "$isServerStart" ]; then + hostIndex=$(keyIndex "$host" "${hosts[@]}") + if [ -n "$hostIndex" -a ${counts[$hostIndex]} -gt 1 -a -z "$(echo $args $"${@// /\\ }" | grep 'memory-size=')" ]; then + args="$args -memory-size=0" + fi + # check number of leads on the same node + args="$args -J-Dsnappydata.numLeadsOnHost=$(getNumLeadsOnHost "$host")" + fi + execute "$@" + fi + ((index++)) + done + + # stop nodes in reverse order + if echo $"${@// /\\ }" | grep -wq "stop"; then + line=$numSlaves + if [ $((index-1)) -eq $line ]; then + for (( i=$numSlaves-1 ; i>=0 ; i-- )) ; do + ((index--)) + CONF_ARG=${arr[$i]} + host="$(echo "$CONF_ARG "| tr -s ' ' | cut -d ' ' -f1)" + args="$(echo "$CONF_ARG "| tr -s ' ' | cut -d ' ' -f2-)" + execute "$@" + done + fi + fi +else + host="localhost" + args="" + if [ -n "$isServerStart" ]; then + args="$args -J-Dsnappydata.numLeadsOnHost=$(getNumLeadsOnHost "$host")" + fi + execute "$@" +fi +wait diff --git a/cluster/sbin/snappy-server.sh b/cluster/sbin/snappy-server.sh new file mode 100755 index 0000000000..96efbd12b6 --- /dev/null +++ b/cluster/sbin/snappy-server.sh @@ -0,0 +1,51 @@ +#!/usr/bin/env bash + +# +# Copyright (c) 2018 SnappyData, Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you +# may not use this file except in compliance with the License. You +# may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. See the License for the specific language governing +# permissions and limitations under the License. See accompanying +# LICENSE file. +# + +# Starts a server on the machine this script is executed on. +# + +usage="Usage: snappy-server.sh (start|stop|status) -locators=locatorhost:port[,locatorhostN:portN] -dir=directory" + +function absPath() { + perl -MCwd -le 'print Cwd::abs_path(shift)' "$1" +} +sbin="$(dirname "$(absPath "$0")")" + +mode=$1 + +shift + +. "$sbin/snappy-config.sh" +. "$sbin/spark-config.sh" + +. "$SNAPPY_HOME/bin/load-spark-env.sh" +. "$SNAPPY_HOME/bin/load-snappy-env.sh" + +# Start up the server instance +function start_instance { + "$SNAPPY_HOME"/bin/snappy server "$mode" "$@" +} + +#Since want to test whether the result is zero, don't need to treat it as an return value using $? . Just treat the command itself as a conditional. +if "$sbin/check-dir-option.sh" "$@"; then + start_instance "$@" +else + echo $usage +fi + diff --git a/cluster/sbin/snappy-servers.sh b/cluster/sbin/snappy-servers.sh new file mode 100755 index 0000000000..3f4278ded4 --- /dev/null +++ b/cluster/sbin/snappy-servers.sh @@ -0,0 +1,56 @@ +#!/usr/bin/env bash + +# +# Copyright (c) 2018 SnappyData, Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you +# may not use this file except in compliance with the License. You +# may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. See the License for the specific language governing +# permissions and limitations under the License. See accompanying +# LICENSE file. +# + +# Starts a server instance on each machine specified in the conf/servers file. + +function absPath() { + perl -MCwd -le 'print Cwd::abs_path(shift)' "$1" +} +sbin="$(dirname "$(absPath "$0")")" + +. "$sbin/snappy-config.sh" +. "$sbin/spark-config.sh" + +. "$SNAPPY_HOME/bin/load-spark-env.sh" +. "$SNAPPY_HOME/bin/load-snappy-env.sh" + + +# Check for background specification +BACKGROUND=-bg +if [ "$1" = "-bg" -o "$1" = "--background" ]; then + BACKGROUND=-bg + shift +elif [ "$1" = "-fg" -o "$1" = "--foreground" ]; then + BACKGROUND="" + shift +fi + +# Check for conf dir specification +CONF_DIR_OPT= +if [ "$1" = "--config" ]; then + CONF_DIR_OPT="--config $2" + shift 2 +fi + +# Launch the slaves +if echo $@ | grep -qw start; then + "$sbin/snappy-nodes.sh" server $BACKGROUND $CONF_DIR_OPT cd "$SNAPPY_HOME" \; "$sbin/snappy-server.sh" "$@" $SERVER_STARTUP_OPTIONS +else + "$sbin/snappy-nodes.sh" server $BACKGROUND $CONF_DIR_OPT cd "$SNAPPY_HOME" \; "$sbin/snappy-server.sh" "$@" +fi diff --git a/cluster/sbin/snappy-start-all.sh b/cluster/sbin/snappy-start-all.sh new file mode 100755 index 0000000000..caeef84d0d --- /dev/null +++ b/cluster/sbin/snappy-start-all.sh @@ -0,0 +1,84 @@ +#!/usr/bin/env bash + +# +# Copyright (c) 2018 SnappyData, Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you +# may not use this file except in compliance with the License. You +# may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. See the License for the specific language governing +# permissions and limitations under the License. See accompanying +# LICENSE file. +# + +# Start all snappy daemons - locator, lead and server on the nodes specified in the +# conf/locators, conf/leads and conf/servers files respectively + +function absPath() { + perl -MCwd -le 'print Cwd::abs_path(shift)' "$1" +} +sbin="$(dirname "$(absPath "$0")")" + +# Load the Spark configuration +. "$sbin/snappy-config.sh" +. "$sbin/spark-config.sh" + +MEMBERS_FILE="$SNAPPY_HOME/work/members.txt" +if [ -f "${MEMBERS_FILE}" ]; then + rm $MEMBERS_FILE +fi + +BACKGROUND=-bg +clustermode= +CONF_DIR_ARG= +SKIP_CONF_COPY=0 + +while (( "$#" )); do + param="$1" + case $param in + # Check for background/foreground start + -bg | --background) + BACKGROUND=-bg + ;; + -fg | --foreground) + BACKGROUND=-fg + ;; + -conf | --config) + conf_dir="$2" + if [ ! -d $conf_dir ] ; then + echo "Conf directory $conf_dir does not exist" + exit 1 + fi + CONF_DIR_ARG="--config $conf_dir" + shift ;; + rowstore) + clustermode="rowstore" + ;; + --skipconfcopy) + SKIP_CONF_COPY=1 + ;; + *) + ;; + esac + shift +done + +export START_ALL_TIMESTAMP="$(date +"%Y_%m_%d_%H_%M_%S")" +export SKIP_CONF_COPY + +# Start Locators +"$sbin"/snappy-locators.sh $CONF_DIR_ARG start $clustermode "$@" + +# Start Servers +"$sbin"/snappy-servers.sh $BACKGROUND $CONF_DIR_ARG start $clustermode "$@" + +# Start Leads +if [ "$clustermode" != "rowstore" ]; then + "$sbin"/snappy-leads.sh $CONF_DIR_ARG start +fi diff --git a/cluster/sbin/snappy-status-all.sh b/cluster/sbin/snappy-status-all.sh new file mode 100755 index 0000000000..c63c541b06 --- /dev/null +++ b/cluster/sbin/snappy-status-all.sh @@ -0,0 +1,42 @@ +#!/usr/bin/env bash + +# +# Copyright (c) 2018 SnappyData, Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you +# may not use this file except in compliance with the License. You +# may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. See the License for the specific language governing +# permissions and limitations under the License. See accompanying +# LICENSE file. +# + +# Start all snappy daemons - locator, lead and server on the nodes specified in the +# conf/locators, conf/leads and conf/servers files repsectively + +function absPath() { + perl -MCwd -le 'print Cwd::abs_path(shift)' "$1" +} +sbin="$(dirname "$(absPath "$0")")" + +# Load the Spark configuration +. "$sbin/snappy-config.sh" +. "$sbin/spark-config.sh" + + +# Start Locators +"$sbin"/snappy-locators.sh status "$@" + +# Start Servers +"$sbin"/snappy-servers.sh -fg status "$@" + +# Start Leads +if [ "$1" != "rowstore" ]; then + "$sbin"/snappy-leads.sh status "$@" +fi diff --git a/cluster/sbin/snappy-stop-all.sh b/cluster/sbin/snappy-stop-all.sh new file mode 100755 index 0000000000..dcca58f772 --- /dev/null +++ b/cluster/sbin/snappy-stop-all.sh @@ -0,0 +1,73 @@ +#!/usr/bin/env bash + +# +# Copyright (c) 2018 SnappyData, Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you +# may not use this file except in compliance with the License. You +# may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. See the License for the specific language governing +# permissions and limitations under the License. See accompanying +# LICENSE file. +# + +# Stops all snappy daemons - locator, lead and server on the nodes specified in the +# conf/locators, conf/leads and conf/servers files respectively + +function absPath() { + perl -MCwd -le 'print Cwd::abs_path(shift)' "$1" +} +sbin="$(dirname "$(absPath "$0")")" + +# Load the Spark configuration + +. "$sbin/snappy-config.sh" +. "$sbin/spark-config.sh" + +BACKGROUND=-fg +clustermode= +CONF_DIR_ARG= + +while (( "$#" )); do + param="$1" + case $param in + # Check for background/foreground stop + -bg | --background) + BACKGROUND="$param" + ;; + -fg | --foreground) + BACKGROUND=-fg + ;; + -conf | --config) + conf_dir="$2" + if [ ! -d $conf_dir ] ; then + echo "Conf directory $conf_dir does not exists" + exit 1 + fi + CONF_DIR_ARG="--config $conf_dir" + shift ;; + rowstore) + clustermode="rowstore" + ;; + *) + ;; + esac + shift +done + +# Stop Leads +if [ "$clustermode" != "rowstore" ]; then + "$sbin"/snappy-leads.sh $CONF_DIR_ARG stop +fi + +# Stop Servers +"$sbin"/snappy-servers.sh $BACKGROUND $CONF_DIR_ARG stop + +# Stop locators +"$sbin"/snappy-locators.sh $CONF_DIR_ARG stop diff --git a/snappy-dunits/src/test/scala/io/snappydata/dunit/HiveMetastoreClientAccessDUnitTest.scala b/cluster/src/dunit/scala/io/snappydata/HiveMetastoreClientAccessDUnitTest.scala similarity index 82% rename from snappy-dunits/src/test/scala/io/snappydata/dunit/HiveMetastoreClientAccessDUnitTest.scala rename to cluster/src/dunit/scala/io/snappydata/HiveMetastoreClientAccessDUnitTest.scala index b06b5064d5..06937d62e5 100644 --- a/snappy-dunits/src/test/scala/io/snappydata/dunit/HiveMetastoreClientAccessDUnitTest.scala +++ b/cluster/src/dunit/scala/io/snappydata/HiveMetastoreClientAccessDUnitTest.scala @@ -1,13 +1,29 @@ -package io.snappydata.dunit +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ +package io.snappydata import java.util.Properties import com.gemstone.gemfire.distributed.internal.InternalDistributedSystem import com.pivotal.gemfirexd.internal.engine.store.GemFireStore -import dunit.AvailablePortHelper -import io.snappydata.dunit.cluster.ClusterManagerTestBase -import io.snappydata.{Property, ServiceManager} +import io.snappydata.cluster.ClusterManagerTestBase +import io.snappydata.test.dunit.AvailablePortHelper +import org.apache.spark.Logging import org.apache.spark.sql.collection.ReusableRow import org.apache.spark.sql.types.{IntegerType, StringType, StructField, StructType} import org.apache.spark.sql.{DataFrame, Row, SaveMode} @@ -31,7 +47,7 @@ class HiveMetastoreClientAccessDUnitTest(val s: String) def _testOne(): Unit = { val serverNetPort = AvailablePortHelper.getRandomAvailableTCPPort - val locStr = "localhost[" + locatorPort + ']' + val locStr = "localhost[" + ClusterManagerTestBase.locatorPort + ']' vm2.invoke(this.getClass, "startDriverApp", Array(locStr.asInstanceOf[AnyRef])) @@ -47,26 +63,29 @@ class HiveMetastoreClientAccessDUnitTest(val s: String) val bootProperties = new Properties() bootProperties.setProperty("locators", locatorStr) dataStoreService.start(bootProperties) - println("Gfxd peer node vm type = " + GemFireStore.getBootedInstance.getMyVMKind) + getLogWriter.info("Gfxd peer node vm type = " + + GemFireStore.getBootedInstance.getMyVMKind) } } -object HiveMetastoreClientAccessDUnitTest { +object HiveMetastoreClientAccessDUnitTest extends Logging { def helloWorld(): Unit = { hello("Hello World! " + this.getClass) } def hello(s: String): Unit = { + // scalastyle:off println println(s) + // scalastyle:on println } def startDriverApp(locatorStr: String): Unit = { startSnappyLocalModeAndCreateARowAndAColumnTable(locatorStr) val dsys = InternalDistributedSystem.getConnectedInstance assert(dsys != null) - println("Driver vm type = " + GemFireStore.getBootedInstance.getMyVMKind) - println("locator prop in driver app = " + InternalDistributedSystem + logInfo("Driver vm type = " + GemFireStore.getBootedInstance.getMyVMKind) + logInfo("locator prop in driver app = " + InternalDistributedSystem .getConnectedInstance.getConfig.getLocators) } @@ -146,10 +165,10 @@ object HiveMetastoreClientAccessDUnitTest { val conf = new org.apache.spark.SparkConf().setAppName("HiveMetastoreTest") .set("spark.logConf", "true") - .set(Property.locators, locStr) + .set(Property.Locators.name, locStr) if (setMaster != null) { - conf.setMaster(setMaster).set(Property.embedded, "true") + conf.setMaster(setMaster) } val sc = new org.apache.spark.SparkContext(conf) @@ -206,7 +225,7 @@ object HiveMetastoreClientAccessDUnitTest { (1 to 1000).map(i => TestData(i, s"$i"))) val dataDF = snContext.createDataFrame(rdd) - snContext.createExternalTable("row_table", "row", dataDF.schema, + snContext.createTable("row_table", "row", dataDF.schema, Map.empty[String, String]) } } diff --git a/cluster/src/dunit/scala/io/snappydata/cluster/ClusterManagerLDAPTestBase.scala b/cluster/src/dunit/scala/io/snappydata/cluster/ClusterManagerLDAPTestBase.scala new file mode 100644 index 0000000000..d6c1588289 --- /dev/null +++ b/cluster/src/dunit/scala/io/snappydata/cluster/ClusterManagerLDAPTestBase.scala @@ -0,0 +1,105 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ +package io.snappydata.cluster + +import java.util.Properties + +import scala.language.postfixOps + +import com.gemstone.org.jgroups.protocols.AUTH +import com.pivotal.gemfirexd.Attribute +import com.pivotal.gemfirexd.security.{LdapTestServer, SecurityTestUtils} +import io.snappydata.test.dunit.{AvailablePortHelper, SerializableRunnable} + +/** + * Base class for start and stop of LDAP Server + */ +object ClusterManagerLDAPTestBase { + val securityProperties: Properties = new Properties() + val thriftPort = AvailablePortHelper.getRandomAvailableUDPPort + var admin: String = "" +} + +abstract class ClusterManagerLDAPTestBase(s: String, val adminUser: String = "gemfire10") + extends ClusterManagerTestBase(s) with Serializable { + + ClusterManagerLDAPTestBase.admin = adminUser + // start embedded thrift server on lead + bootProps.setProperty("snappydata.hiveServer.enabled", "true") + bootProps.setProperty("hive.server2.thrift.bind.host", "localhost") + bootProps.setProperty("hive.server2.thrift.port", ClusterManagerLDAPTestBase.thriftPort.toString) + + override def beforeClass(): Unit = { + val ldapProperties = SecurityTestUtils.startLdapServerAndGetBootProperties(0, 0, adminUser, + getClass.getResource("/auth.ldif").getPath, true) + setSecurityProps(ldapProperties) + super.beforeClass() + SplitClusterDUnitSecurityTest.bootExistingAuthModule(ldapProperties) + + // check that server-auth-provider has disabled the GFE JGroups authenticator + val serverAuth = ldapProperties.getProperty(Attribute.SERVER_AUTH_PROVIDER) + assert(serverAuth == "NONE") + + val checkServerAuth = new SerializableRunnable() { + override def run(): Unit = { + val authInit = AUTH.getAuthInit + val authenticator = AUTH.getAuthenticator + assert((authInit eq null) || authInit.isEmpty) + assert((authenticator eq null) || authenticator.isEmpty) + } + } + Seq(vm0, vm1, vm2).foreach(_.invoke(checkServerAuth)) + } + + override def afterClass(): Unit = { + try { + super.afterClass() + } finally { + val ldapServer = LdapTestServer.getInstance() + if (ldapServer.isServerStarted) { + ldapServer.stopService() + } + ClusterManagerLDAPTestBase.securityProperties.clear() + } + } + + override def setUp(): Unit = { + ClusterManagerLDAPTestBase.securityProperties.keySet().toArray.foreach(k => + bootProps.put(k, ClusterManagerLDAPTestBase.securityProperties.get(k))) + super.setUp() + } + + def setSecurityProps(ldapProperties: Properties): Unit = { + import com.pivotal.gemfirexd.Property.{AUTH_LDAP_SEARCH_BASE, AUTH_LDAP_SERVER} + for (k <- List(Attribute.AUTH_PROVIDER, AUTH_LDAP_SERVER, AUTH_LDAP_SEARCH_BASE)) { + System.setProperty(k, ldapProperties.getProperty(k)) + } + for (k <- List(Attribute.AUTH_PROVIDER, Attribute.SERVER_AUTH_PROVIDER, AUTH_LDAP_SERVER, + AUTH_LDAP_SEARCH_BASE, Attribute.USERNAME_ATTR, Attribute.PASSWORD_ATTR)) { + val propValue = ldapProperties.getProperty(k) + if (propValue ne null) { + locatorNetProps.setProperty(k, propValue) + bootProps.setProperty(k, propValue) + ClusterManagerLDAPTestBase.securityProperties.setProperty(k, propValue) + } else { + locatorNetProps.remove(k) + bootProps.remove(k) + ClusterManagerLDAPTestBase.securityProperties.remove(k) + } + } + } +} diff --git a/cluster/src/dunit/scala/io/snappydata/cluster/ClusterManagerTestBase.scala b/cluster/src/dunit/scala/io/snappydata/cluster/ClusterManagerTestBase.scala new file mode 100644 index 0000000000..364e0623ad --- /dev/null +++ b/cluster/src/dunit/scala/io/snappydata/cluster/ClusterManagerTestBase.scala @@ -0,0 +1,391 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ +package io.snappydata.cluster + +import java.sql.{Connection, DriverManager} +import java.util.Properties + +import scala.language.postfixOps +import scala.sys.process._ +import scala.util.Random + +import com.gemstone.gemfire.internal.shared.NativeCalls +import com.pivotal.gemfirexd.internal.engine.Misc +import com.pivotal.gemfirexd.internal.engine.distributed.utils.GemFireXDUtils +import com.pivotal.gemfirexd.{FabricService, TestUtil} +import io.snappydata._ +import io.snappydata.test.dunit.DistributedTestBase.WaitCriterion +import io.snappydata.test.dunit._ +import io.snappydata.util.TestUtils +import org.slf4j.LoggerFactory + +import org.apache.spark.sql.SnappyContext +import org.apache.spark.sql.collection.Utils +import org.apache.spark.sql.execution.ConnectionPool +import org.apache.spark.{Logging, SparkContext} +/** + * Base class for tests using Snappy ClusterManager. New utility methods + * would need to be added as and when corresponding snappy code gets added. + * + * @author hemant + */ +abstract class ClusterManagerTestBase(s: String) + extends DistributedTestBase(s) with Serializable { + + import ClusterManagerTestBase._ + + val bootProps: Properties = new Properties() + val sysProps: Properties = new Properties() + bootProps.setProperty("log-file", "snappyStore.log") + val logLevel: String = System.getProperty("logLevel", "config") + bootProps.setProperty("log-level", logLevel) + // set DistributionManager.VERBOSE for log-level fine or higher + if (logLevel.startsWith("fine") || logLevel == "all") { + sysProps.setProperty("DistributionManager.VERBOSE", "true") + } + bootProps.setProperty("security-log-level", + System.getProperty("securityLogLevel", "config")) + // Easier to switch ON traces. thats why added this. +// bootProps.setProperty("gemfirexd.debug.true", +// "QueryDistribution,TraceExecution,TraceActivation,TraceTran") + bootProps.setProperty("statistic-archive-file", "snappyStore.gfs") + bootProps.setProperty("bind-address", "localhost") + bootProps.setProperty("spark.executor.cores", + TestUtils.defaultCores.toString) + bootProps.setProperty("spark.memory.manager", + "org.apache.spark.memory.SnappyUnifiedMemoryManager") + bootProps.setProperty("critical-heap-percentage", "95") + bootProps.setProperty("gemfirexd.max-lock-wait", "60000") + bootProps.setProperty("member-timeout", "5000") + bootProps.setProperty("snappydata.sql.planCaching", random.nextBoolean().toString) + + // reduce startup time + // sysProps.setProperty("p2p.discoveryTimeout", "1000") + // sysProps.setProperty("p2p.joinTimeout", "2000") + sysProps.setProperty("p2p.minJoinTries", "1") + + // spark memory fill to detect any uninitialized memory accesses + sysProps.setProperty("spark.memory.debugFill", "true") + // reduce minimum compression size so that it happens for all the values for testing + sysProps.setProperty(Constant.COMPRESSION_MIN_SIZE, "128") + + sysProps.setProperty("gemfire.DISALLOW_CLUSTER_RESTART_CHECK", "true") + + var host: Host = _ + var vm0: VM = _ + var vm1: VM = _ + var vm2: VM = _ + var vm3: VM = _ + + if (Host.getHostCount > 0) { + host = Host.getHost(0) + vm0 = host.getVM(0) + vm1 = host.getVM(1) + vm2 = host.getVM(2) + vm3 = host.getVM(3) + } + + protected final def startArgs = + Array(locatorPort, bootProps).asInstanceOf[Array[AnyRef]] + + val locatorNetPort: Int = 0 + val locatorNetProps = new Properties() + val stopNetServersInTearDown = true + + locatorNetProps.setProperty("bind-address", "localhost") + + // SparkContext is initialized on the lead node and hence, + // this can be used only by jobs running on Lead node + def sc: SparkContext = SnappyContext.globalSparkContext + + override def beforeClass(): Unit = { + super.beforeClass() + val logger = LoggerFactory.getLogger(getClass) + logger.info("Boot properties:" + bootProps) + + doSetUp() + val locNetPort = locatorNetPort + val locNetProps = locatorNetProps + val locPort = ClusterManagerTestBase.locPort + val sysProps = this.sysProps + DistributedTestBase.invokeInLocator(new SerializableRunnable() { + override def run(): Unit = { + ClusterManagerTestBase.setSystemProperties(sysProps) + val loc: Locator = ServiceManager.getLocatorInstance + + if (loc.status != FabricService.State.RUNNING) { + loc.start("localhost", locPort, locNetProps) + } + if (locNetPort > 0) { + loc.startNetworkServer("localhost", locNetPort, locNetProps) + } + assert(loc.status == FabricService.State.RUNNING) + + val logger = LoggerFactory.getLogger(getClass) + logger.info("\n\n\n STARTING TESTS IN " + getClass.getName + "\n\n") + } + }) + val nodeProps = bootProps + val startNode = new SerializableRunnable() { + override def run(): Unit = { + ClusterManagerTestBase.setSystemProperties(sysProps) + val node = ServiceManager.currentFabricServiceInstance + if (node == null || node.status != FabricService.State.RUNNING) { + startSnappyServer(locPort, nodeProps) + } + assert(ServiceManager.currentFabricServiceInstance.status == + FabricService.State.RUNNING) + + val logger = LoggerFactory.getLogger(getClass) + logger.info("\n\n\n STARTING TESTS IN " + getClass.getName + "\n\n") + } + } + + Array(vm0, vm1, vm2).map(_.invokeAsync(startNode)).foreach(_.getResult) + vm3.invoke(new SerializableRunnable() { + override def run(): Unit = { + ClusterManagerTestBase.setSystemProperties(sysProps) + } + }) + // start lead node in this VM + val sc = SnappyContext.globalSparkContext + if (sc == null || sc.isStopped) { + startSnappyLead(locatorPort, bootProps.clone().asInstanceOf[java.util.Properties]) + } + assert(ServiceManager.currentFabricServiceInstance.status == + FabricService.State.RUNNING) + } + + override def setUp(): Unit = { + super.setUp() + doSetUp() + } + + private def doSetUp() : Unit = { + val testName = getName + val testClass = getClass + // bootProps.setProperty(Attribute.SYS_PERSISTENT_DIR, s) + TestUtil.currentTest = testName + TestUtil.currentTestClass = getTestClass + TestUtil.skipDefaultPartitioned = true + TestUtil.doCommonSetup(bootProps) + ClusterManagerTestBase.setSystemProperties(sysProps) + GemFireXDUtils.IS_TEST_MODE = true + + getLogWriter.info("\n\n\n STARTING TEST " + testClass.getName + '.' + + testName + "\n\n") + } + + override def tearDown2(): Unit = { + super.tearDown2() + GemFireXDUtils.IS_TEST_MODE = false + cleanupTestData(getClass.getName, getName) + Array(vm3, vm2, vm1, vm0).foreach(_.invoke(getClass, "cleanupTestData", + Array[AnyRef](getClass.getName, getName))) + if (stopNetServersInTearDown) { + Array(vm3, vm2, vm1, vm0).foreach(_.invoke(getClass, "stopNetworkServers")) + stopNetworkServers() + } + + bootProps.clear() + } + + override def afterClass(): Unit = { + super.afterClass() + val locNetPort = locatorNetPort + DistributedTestBase.invokeInLocator(new SerializableRunnable() { + override def run(): Unit = { + if (locNetPort > 0) { + val loc = ServiceManager.getLocatorInstance + if (loc != null) { + loc.stopAllNetworkServers() + } + } + } + }) + } + + def getANetConnection(netPort: Int, + useGemXDURL: Boolean = false, + disableQueryRouting: Boolean = false): Connection = { + val driver = "io.snappydata.jdbc.ClientDriver" + Utils.classForName(driver).newInstance + var url: String = null + if (useGemXDURL) { + url = "jdbc:gemfirexd:thrift://localhost:" + netPort + "/" + } else if (disableQueryRouting) { + url = "jdbc:snappydata://localhost:" + netPort + "/route-query=false" + } else { + url = "jdbc:snappydata://localhost:" + netPort + "/" + } + + DriverManager.getConnection(url) + } + + def startNetworkServersOnAllVMs(): Unit = { + vm0.invoke(classOf[ClusterManagerTestBase], "startNetServer", + AvailablePortHelper.getRandomAvailableTCPPort) + vm1.invoke(classOf[ClusterManagerTestBase], "startNetServer", + AvailablePortHelper.getRandomAvailableTCPPort) + vm2.invoke(classOf[ClusterManagerTestBase], "startNetServer", + AvailablePortHelper.getRandomAvailableTCPPort) + } + + + +} + +/** + * New utility methods would need to be added as and when corresponding + * snappy code gets added. + */ +object ClusterManagerTestBase extends Logging { + final def locatorPort: Int = DistributedTestBase.getDUnitLocatorPort + final lazy val locPort: Int = locatorPort + private val random = new Random() + + /* SparkContext is initialized on the lead node and hence, + this can be used only by jobs running on Lead node */ + def sc: SparkContext = SnappyContext.globalSparkContext + + def setSystemProperties(props: Properties): Unit = { + val sysPropNames = props.stringPropertyNames().iterator() + while (sysPropNames.hasNext) { + val propName = sysPropNames.next() + System.setProperty(propName, props.getProperty(propName)) + } + } + + /** + * Start a snappy lead. This code starts a Spark server and at the same time + * also starts a SparkContext and hence it kind of becomes lead. We will use + * LeadImpl once the code for that is ready. + * + * Only a single instance of SnappyLead should be started. + */ + def startSnappyLead(locatorPort: Int, props: Properties): Unit = { + NativeCalls.getInstance().setEnvironment("SPARK_LOCAL_IP", "localhost") + NativeCalls.getInstance().setEnvironment("SPARK_PUBLIC_DNS", "localhost") + props.setProperty("locators", "localhost[" + locatorPort + ']') + props.setProperty(Property.JobServerEnabled.name, "false") + props.setProperty("isTest", "true") + val server: Lead = ServiceManager.getLeadInstance + server.start(props) + assert(server.status == FabricService.State.RUNNING) + } + + /** + * Start a snappy server. Any number of snappy servers can be started. + */ + def startSnappyServer(locatorPort: Int, props: Properties): Unit = { + NativeCalls.getInstance().setEnvironment("SPARK_LOCAL_IP", "localhost") + props.setProperty("locators", "localhost[" + locatorPort + ']') + // bootProps.setProperty("log-level", "info") + val server: Server = ServiceManager.getServerInstance + server.start(props) + assert(server.status == FabricService.State.RUNNING) + } + + def startNetServer(netPort: Int): Unit = { + ServiceManager.getServerInstance.startNetworkServer("localhost", + netPort, null) + } + + def cleanupTestData(testClass: String, testName: String): Unit = { + // cleanup metastore + if (Misc.getMemStoreBootingNoThrow eq null) return + val snc = SnappyContext() + if (snc != null) { + TestUtils.resetAllFunctions(snc.snappySession) + TestUtils.dropAllSchemas(snc.snappySession) + } + if (testName != null) { + logInfo("\n\n\n ENDING TEST " + testClass + '.' + testName + "\n\n") + } + } + + def stopSpark(): Unit = { + // cleanup metastore + cleanupTestData(null, null) + val service = ServiceManager.currentFabricServiceInstance + if (service != null) { + service.stop(null) + } + } + + def stopNetworkServers(): Unit = { + val service = ServiceManager.currentFabricServiceInstance + if (service != null) { + service.stopAllNetworkServers() + // clear stale connection pool + ConnectionPool.clear() + } + } + + def stopAny(): Unit = { + val service = ServiceManager.currentFabricServiceInstance + if (service != null) { + service.stop(null) + } + } + + /** + * Wait until given criterion is met + * + * @param check Function criterion to wait on + * @param ms total time to wait, in milliseconds + * @param interval pause interval between waits + * @param throwOnTimeout if false, don't generate an error + */ + def waitForCriterion(check: => Boolean, desc: String, ms: Long, + interval: Long, throwOnTimeout: Boolean): Unit = { + val criterion = new WaitCriterion { + + override def done: Boolean = { + check + } + + override def description(): String = desc + } + DistributedTestBase.waitForCriterion(criterion, ms, interval, + throwOnTimeout) + } + + def startSparkCluster(productDir: String): Unit = { + logInfo(s"Starting spark cluster in $productDir/work") + (productDir + "/sbin/start-all.sh") !! + } + + def stopSparkCluster(productDir: String): Unit = { + val sparkContext = SnappyContext.globalSparkContext + logInfo(s"Stopping spark cluster in $productDir/work") + if (sparkContext != null) sparkContext.stop() + (productDir + "/sbin/stop-all.sh") !! + } + + def validateNoActiveSnapshotTX(): Unit = { + val cache = Misc.getGemFireCache + val txMgr = cache.getCacheTransactionManager + if (txMgr != null) { + val itr = txMgr.getHostedTransactionsInProgress.iterator() + while (itr.hasNext) { + val tx = itr.next() + if (tx.isSnapshot) assert(tx.isClosed, s"$tx is not closed. ") + } + } + } +} diff --git a/cluster/src/dunit/scala/io/snappydata/cluster/ClusterMgrDUnitTest.scala b/cluster/src/dunit/scala/io/snappydata/cluster/ClusterMgrDUnitTest.scala new file mode 100644 index 0000000000..92be8cf3a5 --- /dev/null +++ b/cluster/src/dunit/scala/io/snappydata/cluster/ClusterMgrDUnitTest.scala @@ -0,0 +1,205 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ +package io.snappydata.cluster + +import java.net.InetAddress + +import scala.math._ +import scala.util.Random + +import com.gemstone.gemfire.cache.LowMemoryException + +import org.apache.spark.sql.{Row, SnappyContext} +import org.apache.spark.{Logging, SparkConf, SparkContext} + +class ClusterMgrDUnitTest(s: String) extends ClusterManagerTestBase(s) with Logging { + + import ClusterMgrDUnitTest._ + + /** + * This test starts a lead node and two server nodes. Executes a job. + * Then stops the lead node and starts lead in another node and then executes + * the same job. + */ + def testMultipleDriver(): Unit = { + // Execute the job + startSparkJob() + startGemJob() + // Stop the lead node + ClusterManagerTestBase.stopSpark() + + // Start the lead node in another JVM. The executors should + // connect with this new lead. + // In this case servers are already running and a lead comes + // and join + try { + vm3.invoke(getClass, "stopAny") + vm3.invoke(getClass, "startSnappyLead", startArgs) + vm3.invoke(getClass, "startSparkJob") + vm3.invoke(getClass, "startGemJob") + } finally { + vm3.invoke(getClass, "stopSpark") + ClusterManagerTestBase.startSnappyLead(ClusterManagerTestBase.locatorPort, bootProps) + } + } + + def testUncaughtExceptionInExecutor(): Unit = { + try { + failTheExecutors + } catch { + case _ : Throwable => + } + // The executors should have started automatically, so this should not hang + startSparkJob() + } + + def testNonFatalOOMException(): Unit = { + try { + throwNonFatalOOMException + } catch { + case e: org.apache.spark.SparkException => + var t: Throwable = e + var foundExpectedError = false + while (t != null && !foundExpectedError) { + t match { + case l: LowMemoryException => + foundExpectedError = true + logInfo("Received expected LowMemoryException exception") + case _ => t = t.getCause + } + } + // throw if this is not an expected exception + if (!foundExpectedError) throw e + } + // run a spark job to make sure that cluster is available + startSparkJob() + } + + def testUncaughtExceptionInExecutorthread(): Unit = { + vm2.invoke(getClass, "failAThread") + vm1.invoke(getClass, "failAThread") + vm0.invoke(getClass, "failAThread") + // The executors should have started automatically, so this should not hang + startSparkJob() + } + + def testSnap684(): Unit = { + startSparkJob() + startGemJob() + vm3.invoke(getClass, "stopAny") + vm3.invoke(getClass, "startExternalSparkApp", ClusterManagerTestBase.locatorPort) + } +} + +object ClusterMgrDUnitTest { + + private def sc = SnappyContext.globalSparkContext + + def startSparkJob(): Unit = { + val slices = 5 + val n = math.min(1000000L * slices, Int.MaxValue).toInt // avoid overflow + val count = sc.parallelize(1 until n, slices).map { i => + val x = random * 2 - 1 + val y = random * 2 - 1 + if (x * x + y * y < 1) 1 else 0 + }.reduce(_ + _) + val pi = 4.0 * count / n + assert(3.04 <= pi) + assert(3.25 > pi) + } + + def failTheExecutors: Unit = { + sc.parallelize(1 until 100, 5).map { i => + throw new OutOfMemoryError("Some message") + }.collect() + } + + def throwNonFatalOOMException: Unit = { + sc.parallelize(1 until 100, 5).map { i => + // the message in exception should match one of + // the ignored messages in SystemFailure.isJVMFailureError + throw new OutOfMemoryError("Unable to acquire") + }.collect() + } + + def failAThread: Unit = { + new Thread(){ + override def run(): Unit = { + throw new InternalError(); + } + }.start() + } + + def startGemJob(): Unit = { + + val snContext = SnappyContext(sc) + val externalUrl = "jdbc:snappydata:;" + val ddlStr = "YearI INT NOT NULL," + + "MonthI INT NOT NULL," + + "DayOfMonth INT NOT NULL," + + "DepTime INT," + + "ArrTime INT," + + "UniqueCarrier CHAR(6) NOT NULL" + snContext.sql("drop table if exists airline") + snContext.sql(s"create table airline ($ddlStr)") + if (new Random().nextBoolean()) { + + snContext.sql(s"create external table airline1 " + + s" using jdbc options (URL '$externalUrl'," + + " Driver 'io.snappydata.jdbc.EmbeddedDriver', dbtable 'APP.AIRLINE')").collect() + } else { + snContext.sql(s"create external table if not exists airline1 " + + s" using jdbc options (URL '$externalUrl'," + + " Driver 'com.pivotal.gemfirexd.jdbc.EmbeddedDriver',dbtable 'APP.AIRLINE')").collect() + } + + snContext.sql("insert into airline values(2015, 2, 15, 1002, 1803, 'AA')") + snContext.sql("insert into airline values(2014, 4, 15, 1324, 1500, 'UT')") + + val result = snContext.sql("select * from airline1") + val expected = Set[Row](Row(2015, 2, 15, 1002, 1803, "AA "), + Row(2014, 4, 15, 1324, 1500, "UT ")) + val returnedRows = result.collect() + // scalastyle:off + println(s"Returned rows: ${returnedRows.mkString(",")} ") + println(s"Expected rows: ${expected.mkString(",")}") + // scalastyle:on + assert(returnedRows.toSet == expected) + + snContext.sql("drop table if exists airline") + snContext.sql("drop table if exists airline1") + } + + def startExternalSparkApp(locatorPort: Int): Unit = { + // println("locatorPort =" + locatorPort) + val hostName = InetAddress.getLocalHost.getHostName + val conf: SparkConf = new SparkConf() + .setMaster(s"snappydata://$hostName:$locatorPort") + .setAppName("externalApp").set("spark.testing.reservedMemory", "0") + + try { + new SparkContext(conf) + assert(assertion = false, + "Expected SparkContext creation to fail without launcher") + } catch { + case e: org.apache.spark.SparkException => + if (!e.getMessage.contains("only supported from ServiceManager")) { + throw e + } // else ok + } + } +} diff --git a/cluster/src/dunit/scala/io/snappydata/cluster/ConcurrentQueryRoutingDUnitSecurityTest.scala b/cluster/src/dunit/scala/io/snappydata/cluster/ConcurrentQueryRoutingDUnitSecurityTest.scala new file mode 100644 index 0000000000..b9b63a8129 --- /dev/null +++ b/cluster/src/dunit/scala/io/snappydata/cluster/ConcurrentQueryRoutingDUnitSecurityTest.scala @@ -0,0 +1,129 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ + +package io.snappydata.cluster + +import java.util.concurrent.atomic.AtomicInteger + +import io.snappydata.test.dunit.AvailablePortHelper + +import org.apache.spark.Logging + +class ConcurrentQueryRoutingDUnitSecurityTest(val s: String) + extends ClusterManagerLDAPTestBase(s) with Logging { + + def columnTableRouting(thr: Int, iter: Int, jdbcUser1: String, jdbcUser2: String, + serverHostPort: Int): Int = { + val tableName = s"order_line_col_${thr}_${iter}" + QueryRoutingDUnitSecurityTest.columnTableRouting(jdbcUser1, jdbcUser2, tableName, + serverHostPort) + // scalastyle:off println + println(s"ConcurrentQueryRoutingDUnitSecureTest.columnTableRouting-${thr}-${iter} done") + // scalastyle:on println + 1 + } + + def rowTableRouting(thr: Int, iter: Int, jdbcUser1: String, jdbcUser2: String, + serverHostPort: Int): Int = { + val tableName = s"order_line_row_${thr}_${iter}" + QueryRoutingDUnitSecurityTest.rowTableRouting(jdbcUser1, jdbcUser2, tableName, serverHostPort) + // scalastyle:off println + println(s"ConcurrentQueryRoutingDUnitSecureTest.rowTableRouting-${thr}-${iter} done") + // scalastyle:on println + 1 + } + + def testConcurrency(): Unit = { + + val serverHostPort = AvailablePortHelper.getRandomAvailableTCPPort + vm2.invoke(classOf[ClusterManagerTestBase], "startNetServer", serverHostPort) + // scalastyle:off println + println(s"ConcurrentQueryRoutingDUnitSecureTest.testConcurrency: " + + s"network server started on $serverHostPort") + // scalastyle:on println + + val thrCount1 = new AtomicInteger(0) + val colThread1 = new Thread(new Runnable {def run() { + (1 to 5) foreach (i => { + thrCount1.addAndGet(columnTableRouting(1, i, "gemfire1", "gemfire2", serverHostPort)) + }) + } + }) + colThread1.start() + + val thrCount2 = new AtomicInteger(0) + val colThread2 = new Thread(new Runnable {def run() { + (1 to 5) foreach (i => { + thrCount2.addAndGet(columnTableRouting(2, i, "gemfire3", "gemfire4", serverHostPort)) + }) + } + }) + colThread2.start() + + val thrCount3 = new AtomicInteger(0) + val rowThread1 = new Thread(new Runnable {def run() { + (1 to 5) foreach (i => { + thrCount3.addAndGet(columnTableRouting(3, i, "gemfire5", "gemfire6", serverHostPort)) + }) + } + }) + rowThread1.start() + + val thrCount4 = new AtomicInteger(0) + val rowThread2 = new Thread(new Runnable {def run() { + (1 to 5) foreach (i => { + thrCount4.addAndGet(columnTableRouting(4, i, "gemfire7", "gemfire8", serverHostPort)) + }) + } + }) + rowThread2.start() + + colThread1.join(5 * 60 * 1000) + // scalastyle:off println + println(s"ConcurrentQueryRoutingDUnitSecureTest.testConcurrency:" + + s" columnTableRouting-1 thread done") + // scalastyle:on println + rowThread1.join(5 * 60 * 1000) + // scalastyle:off println + println(s"ConcurrentQueryRoutingDUnitSecureTest.testConcurrency:" + + s"rowTableRouting-1 thread done") + // scalastyle:on println + colThread2.join(5 * 60 * 1000) + // scalastyle:off println + println(s"ConcurrentQueryRoutingDUnitSecureTest.testConcurrency:" + + s" columnTableRouting-2 thread done") + // scalastyle:on println + rowThread2.join(5 * 60 * 1000) + // scalastyle:off println + println(s"ConcurrentQueryRoutingDUnitSecureTest.testConcurrency:" + + s"rowTableRouting-2 thread done") + // scalastyle:on println + + assert(thrCount1.get() == 5, + s"ConcurrentQueryRoutingDUnitSecureTest.testConcurrency:" + + s" columnTableRoutingCompleted-1=$thrCount1") + assert(thrCount2.get() == 5, + s"ConcurrentQueryRoutingDUnitSecureTest.testConcurrency:" + + s" rowTableRoutingCompleted-1=$thrCount2") + assert(thrCount3.get() == 5, + s"ConcurrentQueryRoutingDUnitSecureTest.testConcurrency:" + + s" columnTableRoutingCompleted-2=$thrCount3") + assert(thrCount4.get() == 5, + s"ConcurrentQueryRoutingDUnitSecureTest.testConcurrency:" + + s" rowTableRoutingCompleted-2=$thrCount4") + } +} diff --git a/cluster/src/dunit/scala/io/snappydata/cluster/ConcurrentQueryRoutingDUnitTest.scala b/cluster/src/dunit/scala/io/snappydata/cluster/ConcurrentQueryRoutingDUnitTest.scala new file mode 100644 index 0000000000..6e8c1b1540 --- /dev/null +++ b/cluster/src/dunit/scala/io/snappydata/cluster/ConcurrentQueryRoutingDUnitTest.scala @@ -0,0 +1,306 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ + +package io.snappydata.cluster + +import java.sql.{Connection, DriverManager, ResultSet} + +import io.snappydata.test.dunit.AvailablePortHelper + +import org.apache.spark.Logging +import org.apache.spark.sql.collection.Utils + +class ConcurrentQueryRoutingDUnitTest(val s: String) + extends ClusterManagerTestBase(s) with Logging { + + def columnTableRouting(thr: Int, iter: Int, serverHostPort: Int): Int = { + val tableName = s"order_line_col_${thr}_${iter}" + + // scalastyle:off println + println(s"ConcurrentQueryRoutingDUnitTest.columnTableRouting-${thr}-${iter}:" + + s"network server started at $serverHostPort") + // scalastyle:on println + ConcurrentQueryRoutingDUnitTest.columnTableRouting(tableName, serverHostPort) + // scalastyle:off println + println(s"ConcurrentQueryRoutingDUnitTest.columnTableRouting-${thr}-${iter} done") + // scalastyle:on println + 1 + } + +/* def rowTableRouting(thr: Int, iter: Int): Int = { + val tableName = s"order_line_row_${thr}_${iter}" + val serverHostPort = AvailablePortHelper.getRandomAvailableTCPPort + vm2.invoke(classOf[ClusterManagerTestBase], "startNetServer", serverHostPort) + // scalastyle:off println + println(s"ConcurrentQueryRoutingDUnitTest.rowTableRouting-${thr}-${iter}:" + + s"network server started at $serverHostPort") + // scalastyle:on println + ConcurrentQueryRoutingDUnitTest.rowTableRouting(tableName, serverHostPort) + // scalastyle:off println + println(s"ConcurrentQueryRoutingDUnitTest.rowTableRouting-${thr}-${iter} done") + // scalastyle:on println + 1 + }*/ + + def testConcurrency(): Unit = { + val serverHostPort = AvailablePortHelper.getRandomAvailableTCPPort + vm2.invoke(classOf[ClusterManagerTestBase], "startNetServer", serverHostPort) + + var thrCount1: Integer = 0 + val colThread1 = new Thread(new Runnable {def run() { + (1 to 5) foreach (i => { + thrCount1 += columnTableRouting(1, i, serverHostPort) + }) + } + }) + colThread1.start() + + var thrCount2: Integer = 0 + val colThread2 = new Thread(new Runnable {def run() { + (1 to 5) foreach (i => { + thrCount2 += columnTableRouting(2, i, serverHostPort) + }) + } + }) + colThread2.start() + + var thrCount3: Integer = 0 + val rowThread1 = new Thread(new Runnable {def run() { + (1 to 5) foreach (i => { + thrCount3 += columnTableRouting(3, i, serverHostPort) + }) + } + }) + rowThread1.start() + + var thrCount4: Integer = 0 + val rowThread2 = new Thread(new Runnable {def run() { + (1 to 5) foreach (i => { + thrCount4 += columnTableRouting(4, i, serverHostPort) + }) + } + }) + rowThread2.start() + + colThread1.join(5 * 60 * 1000) + // scalastyle:off println + println(s"ConcurrentQueryRoutingDUnitTest.testConcurrency:" + + s" columnTableRouting-1 thread done") + // scalastyle:on println + rowThread1.join(5 * 60 * 1000) + // scalastyle:off println + println(s"ConcurrentQueryRoutingDUnitTest.testConcurrency:" + + s"rowTableRouting-1 thread done") + // scalastyle:on println + colThread2.join(5 * 60 * 1000) + // scalastyle:off println + println(s"ConcurrentQueryRoutingDUnitTest.testConcurrency:" + + s" columnTableRouting-2 thread done") + // scalastyle:on println + rowThread2.join(5 * 60 * 1000) + // scalastyle:off println + println(s"ConcurrentQueryRoutingDUnitTest.testConcurrency:" + + s"rowTableRouting-2 thread done") + // scalastyle:on println + + assert(thrCount1 == 5, + s"ConcurrentQueryRoutingDUnitTest.testConcurrency:" + + s" columnTableRoutingCompleted-1=$thrCount1") + assert(thrCount2 == 5, + s"ConcurrentQueryRoutingDUnitTest.testConcurrency:" + + s" rowTableRoutingCompleted-1=$thrCount2") + assert(thrCount3 == 5, + s"ConcurrentQueryRoutingDUnitTest.testConcurrency:" + + s" columnTableRoutingCompleted-2=$thrCount3") + assert(thrCount4 == 5, + s"ConcurrentQueryRoutingDUnitTest.testConcurrency:" + + s" rowTableRoutingCompleted-2=$thrCount4") + + Array(vm0,vm1,vm2).foreach(_.invoke(classOf[ClusterManagerTestBase], "validateNoActiveSnapshotTX")) + } +} + + +object ConcurrentQueryRoutingDUnitTest { + def columnTableRouting(tableName: String, serverHostPort: Int): Unit = { + createColumnTable("testColumnTableRouting-2", serverHostPort, tableName) + batchInsert("testColumnTableRouting-2", 200, 100, serverHostPort, tableName) + singleInsert("testColumnTableRouting-2", 200, serverHostPort, tableName) + query("testColumnTableRouting-2", serverHostPort, tableName, 400, 40) + dropTable("testColumnTableRouting-2", serverHostPort, tableName) + } + + def rowTableRouting(tableName: String, serverHostPort: Int): Unit = { + createRowTable("testRowTableRouting-2", serverHostPort, tableName) + batchInsert("testRowTableRouting-2", 20, 20, serverHostPort, tableName) + singleInsert("testRowTableRouting-2", 20, serverHostPort, tableName) + query("testRowTableRouting-2", serverHostPort, tableName, 40, 4) + dropTable("testRowTableRouting-2", serverHostPort, tableName) + } + + def netConnection(netPort: Int): Connection = { + val driver = "io.snappydata.jdbc.ClientDriver" + Utils.classForName(driver).newInstance + val url: String = "jdbc:snappydata://localhost:" + netPort + "/" + DriverManager.getConnection(url) + } + + def createColumnTable(testName: String, serverHostPort: Int, tableName: String): Unit = { + val conn = netConnection(serverHostPort) + // scalastyle:off println + println(s"createColumnTable-$testName: Connected to $serverHostPort") + // scalastyle:on println + + val stmt1 = conn.createStatement() + try { + stmt1.execute(s"create table $tableName (ol_int_id integer," + + s" ol_int2_id integer, ol_str_id STRING) using column " + + "options( partition_by 'ol_int_id, ol_int2_id', buckets '8', COLUMN_BATCH_SIZE '200')") + } finally { + stmt1.close() + conn.close() + } + } + + def createRowTable(testName: String, serverHostPort: Int, tableName: String): Unit = { + val conn = netConnection(serverHostPort) + // scalastyle:off println + println(s"createRowTable-$testName: Connected to $serverHostPort") + // scalastyle:on println + + val stmt1 = conn.createStatement() + try { + stmt1.execute(s"create table $tableName (ol_int_id integer," + + s" ol_int2_id integer, ol_str_id STRING) using row " + + "options( partition_by 'ol_int_id, ol_int2_id', buckets '8')") + } finally { + stmt1.close() + conn.close() + } + } + + def dropTable(testName: String, serverHostPort: Int, tableName: String): Unit = { + val conn = netConnection(serverHostPort) + // scalastyle:off println + println(s"dropTable-$testName: Connected to $serverHostPort") + // scalastyle:on println + + val stmt1 = conn.createStatement() + try { + stmt1.execute(s"drop table $tableName") + } finally { + stmt1.close() + conn.close() + } + } + + def batchInsert(testName: String, numRows: Int, batchSize: Int, serverHostPort: Int, + tableName: String): Unit = { + val conn = netConnection(serverHostPort) + // scalastyle:off println + println(s"batchInsert-$testName: Connected to $serverHostPort") + // scalastyle:on println + + val stmt1 = conn.createStatement() + try { + var i = 1 + (1 to numRows).foreach(_ => { + stmt1.addBatch(s"insert into $tableName values($i, $i, '$i')") + i += 1 + if (i % batchSize == 0) { + stmt1.executeBatch() + i = 0 + } + }) + stmt1.executeBatch() + + // scalastyle:off println + println(s"batchInsert-$testName: committed $numRows rows") + // scalastyle:on println + } finally { + stmt1.close() + conn.close() + } + } + + def singleInsert(testName: String, numRows: Int, serverHostPort: Int, tableName: String): Unit = { + val conn = netConnection(serverHostPort) + // scalastyle:off println + println(s"singleInsert-$testName: Connected to $serverHostPort") + // scalastyle:on println + + val stmt1 = conn.createStatement() + try { + (1 to numRows).foreach(i => { + stmt1.executeUpdate(s"insert into $tableName values($i, $i, '$i')") + }) + + // scalastyle:off println + println(s"singleInsert-$testName: committed $numRows rows") + // scalastyle:on println + } finally { + stmt1.close() + conn.close() + } + } + + def verifyQuery(testName: String, qryTest: String, stmt_rs: ResultSet, numRows: Int, + debugNumRows: Int): Unit = { + val builder = StringBuilder.newBuilder + + var index = 0 + while (stmt_rs.next()) { + index += 1 + val stmt_i = stmt_rs.getInt(1) + val stmt_j = stmt_rs.getInt(2) + val stmt_s = stmt_rs.getString(3) + if (index % debugNumRows == 0) { + builder.append(s"verifyQuery-$testName: " + + s"$qryTest Stmt: row($index) $stmt_i $stmt_j $stmt_s ").append("\n") + } + } + builder.append(s"verifyQuery-$testName: " + + s"$qryTest Stmt: Total number of rows = $index").append("\n") + // scalastyle:off println + println(builder.toString()) + // scalastyle:on println + assert(index == numRows) + } + + def query(testName: String, serverHostPort: Int, tableName: String, + numRows: Int, debugNumRows: Int): Unit = { + val conn = netConnection(serverHostPort) + // scalastyle:off println + println(s"query-$testName: Connected to $serverHostPort") + // scalastyle:off println + + val stmt1 = conn.createStatement() + try { + val qry1 = s"select ol_int_id, ol_int2_id, ol_str_id " + + s" from $tableName " + + s" where ol_int_id < 5000000 " + + s"" + val rs1 = stmt1.executeQuery(qry1) + verifyQuery(testName, qry1, rs1, numRows, debugNumRows) + rs1.close() + // Thread.sleep(1000000) + } finally { + stmt1.close() + conn.close() + } + } +} \ No newline at end of file diff --git a/cluster/src/dunit/scala/io/snappydata/cluster/DDLRoutingDUnitTest.scala b/cluster/src/dunit/scala/io/snappydata/cluster/DDLRoutingDUnitTest.scala new file mode 100644 index 0000000000..8d789bfec7 --- /dev/null +++ b/cluster/src/dunit/scala/io/snappydata/cluster/DDLRoutingDUnitTest.scala @@ -0,0 +1,493 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ +package io.snappydata.cluster + +import java.sql.{Connection, DriverManager, SQLException} + +import com.pivotal.gemfirexd.internal.engine.{GfxdConstants, Misc} +import io.snappydata.SnappyFunSuite.resultSetToDataset +import io.snappydata.test.dunit.{AvailablePortHelper, SerializableRunnable} + +import org.apache.spark.sql.collection.Utils +import org.apache.spark.sql.store.ViewTest +import org.apache.spark.sql.{Dataset, Row, SnappyContext, SnappySession} + +class DDLRoutingDUnitTest(val s: String) extends ClusterManagerTestBase(s) { + + private def getANetConnection(netPort: Int): Connection = { + val driver = "io.snappydata.jdbc.ClientDriver" + Utils.classForName(driver).newInstance + val url = "jdbc:snappydata://localhost:" + netPort + "/" + DriverManager.getConnection(url) + } + + def testColumnTableRouting(): Unit = { + val tableName: String = "TEST.ColumnTableQR" + val netPort1 = AvailablePortHelper.getRandomAvailableTCPPort + vm2.invoke(classOf[ClusterManagerTestBase], "startNetServer", netPort1) + val conn = getANetConnection(netPort1) + + // first fail a statement + failCreateTableXD(conn, tableName, doFail = true, " column ") + + createTableXD(conn, tableName, " column ") + tableMetadataAssertColumnTable("TEST", "ColumnTableQR") + // Test create table - error for recreate + failCreateTableXD(conn, tableName, doFail = false, " column ") + + // Drop Table and Recreate + dropTableXD(conn, tableName) + createTableXD(conn, tableName, " column ") + + insertDataXD(conn, tableName) + queryData(tableName) + + truncateTableXD(conn, tableName) + insertDataXD(conn, tableName) + queryData(tableName) + + createTempTableXD(conn) + + queryDataXD(conn, tableName) + dropTableXD(conn, tableName) + } + + def testRowTableRouting(): Unit = { + val tableName: String = "RowTableQR" + + val netPort1 = AvailablePortHelper.getRandomAvailableTCPPort + vm2.invoke(classOf[ClusterManagerTestBase], "startNetServer", netPort1) + val conn = getANetConnection(netPort1) + + // first fail a statement + failCreateTableXD(conn, tableName, doFail = true, " row ") + + createTableXD(conn, tableName, " row ") + tableMetadataAssertRowTable("APP", tableName) + // Test create table - error for recreate + failCreateTableXD(conn, tableName, doFail = false, " row ") + + // Drop Table and Recreate + dropTableXD(conn, tableName) + createTableXD(conn, tableName, " row ") + + insertDataXD(conn, tableName) + queryData(tableName) + + truncateTableXD(conn, tableName) + insertDataXD(conn, tableName) + queryData(tableName) + + createTempTableXD(conn) + + queryDataXD(conn, tableName) + dropTableXD(conn, tableName) + } + + def testRowTableByDefaultRouting(): Unit = { + val tableName: String = "TEST.DefaultRowTableQR" + + val netPort1 = AvailablePortHelper.getRandomAvailableTCPPort + vm2.invoke(classOf[ClusterManagerTestBase], "startNetServer", netPort1) + val conn = getANetConnection(netPort1) + + createTableByDefaultXD(conn, tableName) + tableMetadataAssertRowTable("TEST", "DefaultRowTableQR") + + // Drop Table and Recreate + dropTableXD(conn, tableName) + createTableByDefaultXD(conn, tableName) + + insertDataXD(conn, tableName) + queryData(tableName) + + createTempTableXD(conn) + + queryDataXD(conn, tableName) + dropTableXD(conn, tableName) + + Snap319(conn) + } + + def testHang_SNAP_961(): Unit = { + val tableName: String = "TEST.ColumnTableQR" + val netPort1 = AvailablePortHelper.getRandomAvailableTCPPort + vm2.invoke(classOf[ClusterManagerTestBase], "startNetServer", netPort1) + val conn = getANetConnection(netPort1) + + val s = conn.createStatement() + var options = "OPTIONS(PERSISTENT 'async', DISKSTORE 'd1')" + try { + s.execute(s"CREATE TABLE $tableName (Col1 INT, Col2 INT, Col3 INT) " + + s"USING column $options") + } catch { + case sqle: SQLException => if (sqle.getSQLState != "38000" || + (!sqle.getMessage.contains("Disk store D1 not found") && !sqle.getMessage.contains( + s"Disk store D1${GfxdConstants.SNAPPY_DELTA_DISKSTORE_SUFFIX} not found"))) { + throw sqle + } + } + + // should succeed after creating diskstore + s.execute("CREATE DISKSTORE d1") + s.execute(s"CREATE TABLE $tableName (Col1 INT, Col2 INT, Col3 INT) " + + s"USING column $options") + + dropTableXD(conn, tableName) + + // offheap has been removed + options = "OPTIONS(OFFHEAP 'true')" + try { + s.execute(s"CREATE TABLE $tableName (Col1 INT, Col2 INT, Col3 INT) " + + s"USING column $options") + } catch { + case sqle: SQLException => if (sqle.getSQLState != "42000" || + !sqle.getMessage.contains("Unknown option")) { + throw sqle + } + } + + s.execute("DROP DISKSTORE d1") + } + + def _testAlterRowTableRoutingFromXD(): Unit = { + val tableName: String = "rowTableDDLRouting" + + vm2.invoke(classOf[ClusterManagerTestBase], "stopAny") + val props = bootProps.clone().asInstanceOf[java.util.Properties] + props.put("distributed-system-id", "1") + props.put("server-groups", "sg1") + + val restartServer = new SerializableRunnable() { + override def run(): Unit = { + ClusterManagerTestBase.startSnappyServer( + ClusterManagerTestBase.locatorPort, props) + } + } + + vm2.invoke(restartServer) + var netPort = AvailablePortHelper.getRandomAvailableTCPPort + vm2.invoke(classOf[ClusterManagerTestBase], "startNetServer", netPort) + + var conn = getANetConnection(netPort) + var s = conn.createStatement() + s.execute(s"CREATE TABLE $tableName (Col1 INT, Col2 INT, Col3 STRING)") + insertDataXD(conn, tableName) + val snc = org.apache.spark.sql.SnappyContext(sc) + verifyResultAndSchema(snc, tableName, 3) + + s.execute(s"ALTER TABLE $tableName ADD Col4 INT") + verifyResultAndSchema(snc, tableName, 4) + + s.execute(s"ALTER TABLE $tableName DROP Col3") + verifyResultAndSchema(snc, tableName, 3) + + s.execute(s"ALTER TABLE $tableName DROP COLUMN Col4") + verifyResultAndSchema(snc, tableName, 2) + + s.execute(s"ALTER TABLE $tableName ADD COLUMN Col4 INT") + verifyResultAndSchema(snc, tableName, 3) + + // execute at store level + + // add constraints + s.execute(s"insert into $tableName values (1,1)") + s.execute(s"insert into $tableName values (1,1)") + s.execute(s"ALTER TABLE $tableName add constraint emp_uk unique (Col1)") + try { + s.execute(s"insert into $tableName values (1,1)") + } catch { + case sqle: SQLException => + if (sqle.getSQLState != "23505" || + !sqle.getMessage.contains("duplicate key value in a unique or" + + " primary key constraint or unique index")) { + throw sqle + } + } + + // asynceventlistener + s.execute("CREATE ASYNCEVENTLISTENER myListener (" + + " listenerclass 'com.pivotal.gemfirexd.callbacks.DBSynchronizer'" + + " initparams 'org.apache.derby.jdbc.EmbeddedDriver,jdbc:derby:newDB;create=true')" + + " server groups(sg1)") + + s.execute(s"ALTER TABLE $tableName SET ASYNCEVENTLISTENER (myListener) ") + var rs = s.executeQuery(s"select * from SYS.SYSTABLES where tablename='$tableName'") + while (rs.next) { + assert("MYLISTENER".equalsIgnoreCase(rs.getString(17))) + } + + // gatewaysenders/receivers + s.execute("CREATE GATEWAYSENDER gwSender ( REMOTEDSID 2) SERVER GROUPS (sg1)") + s.execute("CREATE GATEWAYRECEIVER gwRcvr (bindaddress 'localhost' " + + "startport 1111 endport 9999) SERVER GROUPS (sg1)") + s.execute(s"ALTER TABLE $tableName SET GATEWAYSENDER (gwSender) ") + rs = s.executeQuery(s"select * from SYS.SYSTABLES where tablename='$tableName'") + while (rs.next) { + assert("gwSender".equalsIgnoreCase(rs.getString(19))) + } + + vm2.invoke(classOf[ClusterManagerTestBase], "stopAny") + vm2.invoke(restartServer) + netPort = AvailablePortHelper.getRandomAvailableTCPPort + vm2.invoke(classOf[ClusterManagerTestBase], "startNetServer", netPort) + + conn = getANetConnection(netPort) + s = conn.createStatement() + + s.execute(s"ALTER TABLE $tableName SET ASYNCEVENTLISTENER () ") + rs = s.executeQuery(s"select * from SYS.SYSTABLES where tablename='$tableName'") + while (rs.next) { + assert(rs.getString(17) == null) + } + s.execute(s"drop ASYNCEVENTLISTENER myListener") + + s.execute(s"ALTER TABLE $tableName SET GATEWAYSENDER () ") + rs = s.executeQuery(s"select * from SYS.SYSTABLES where tablename='$tableName'") + while (rs.next) { + assert(rs.getString(19) == null) + } + s.execute(s"drop GATEWAYSENDER gwSender") + + dropTableXD(conn, tableName) + } + + def verifyResultAndSchema(snc: SnappyContext, tableName: String, expectedColumns: Int): Unit = { + val dataDF = snc.sql("Select * from " + tableName) + assert(dataDF.count() == 5) + assert(dataDF.schema.fields.length == expectedColumns, + " Number of columns -> " + dataDF.schema.fields.length) + } + + def testAlterRowTableFromXD_DifferentConnections(): Unit = { + val tableName: String = "RowTableQR" + + val netPort1 = AvailablePortHelper.getRandomAvailableTCPPort + vm2.invoke(classOf[ClusterManagerTestBase], "startNetServer", netPort1) + val conn1 = getANetConnection(netPort1) + val conn2 = getANetConnection(netPort1) + + conn1.createStatement().execute(s"CREATE TABLE $tableName (Col1 INT, Col2 INT, Col3 STRING)") + insertDataXD(conn1, tableName) + conn2.createStatement().execute(s"ALTER TABLE $tableName ADD COLUMN Col4 INT") + + val rs = conn1.createStatement().executeQuery(s"select Col1, Col4 from $tableName") + var cnt = 0 + while (rs.next()) { + cnt += 1 + rs.getInt(1) + rs.getInt(2) + } + assert(cnt == 5, cnt) + + conn1.createStatement().execute(s"ALTER TABLE $tableName DROP COLUMN Col3") + val rs2 = conn2.createStatement().executeQuery(s"select Col1, Col2, Col4 from $tableName") + cnt = 0 + while (rs2.next()) { + cnt += 1 + rs2.getInt(1) + rs2.getInt(2) + rs2.getInt(3) + } + assert(cnt == 5, cnt) + + dropTableXD(conn2, tableName) + } + + def testAlterRowTableFromSnappy(): Unit = { + val tableName: String = "RowTableQR" + + val netPort1 = AvailablePortHelper.getRandomAvailableTCPPort + vm2.invoke(classOf[ClusterManagerTestBase], "startNetServer", netPort1) + val conn = getANetConnection(netPort1) + + val snc = org.apache.spark.sql.SnappyContext(sc) + snc.sql(s"CREATE TABLE $tableName (Col1 INT, Col2 INT, Col3 STRING)") + insertDataXD(conn, tableName) + queryDataXD(conn, tableName) + + snc.sql(s"ALTER TABLE $tableName ADD COLUMN Col4 INT") + queryDataXD(conn, tableName) + + snc.sql(s"ALTER TABLE $tableName DROP COLUMN Col3") + queryDataXD(conn, tableName) + + dropTableXD(conn, tableName) + } + + def testViews(): Unit = { + val netPort1 = AvailablePortHelper.getRandomAvailableTCPPort + vm2.invoke(classOf[ClusterManagerTestBase], "startNetServer", netPort1) + + val session = new SnappySession(sc) + ViewTest.createTables(session) + + def newExecution(): String => Dataset[Row] = { + val session = new SnappySession(sc) + val conn = getANetConnection(netPort1) + val stmt = conn.createStatement() + resultSetToDataset(session, stmt) + } + + val conn = getANetConnection(netPort1) + val stmt = conn.createStatement() + ViewTest.testTemporaryView(resultSetToDataset(session, stmt), newExecution) + ViewTest.testGlobalTemporaryView(resultSetToDataset(session, stmt), newExecution) + ViewTest.testTemporaryViewUsing(resultSetToDataset(session, stmt), newExecution) + ViewTest.testGlobalTemporaryViewUsing(resultSetToDataset(session, stmt), newExecution) + ViewTest.testPersistentView(resultSetToDataset(session, stmt), checkPlans = false, + newExecution, restartSpark) + ViewTest.dropTables(new SnappySession(sc)) + } + + private def restartSpark(): Unit = { + ClusterManagerTestBase.stopAny() + ClusterManagerTestBase.startSnappyLead(ClusterManagerTestBase.locatorPort, bootProps) + } + + def createTableXD(conn: Connection, tableName: String, + usingStr: String): Unit = { + val s = conn.createStatement() + val options = "" + s.execute(s"CREATE TABLE $tableName (Col1 INT, Col2 INT, Col3 STRING) " + + s"USING $usingStr $options") + } + + def createTableByDefaultXD(conn: Connection, tableName: String): Unit = { + val s = conn.createStatement() + s.execute("set spark.sql.shuffle.partitions=5") + s.execute("CREATE TABLE " + tableName + " (Col1 INT, Col2 INT, Col3 STRING) ") + } + + def Snap319(conn: Connection): Unit = { + { + val snc = org.apache.spark.sql.SnappyContext(sc) + snc.sql("set spark.sql.shuffle.partitions=10") + val val1 = snc.getAllConfs.getOrElse("spark.sql.shuffle.partitions", "0") + assert(val1.equals("10"), "Expect 10 but got " + val1) + + { + // Change by DRDA has no effects + val s = conn.createStatement() + s.execute("set spark.sql.shuffle.partitions=5") + val val2 = snc.getAllConfs.getOrElse("spark.sql.shuffle.partitions", "0") + assert(val2.equals("10"), "Expect 10 but got " + val2) + } + } + + { + // This setting has no effect in other Snappy Context + val snc3 = org.apache.spark.sql.SnappyContext(sc) + val val3 = snc3.getAllConfs.getOrElse("spark.sql.shuffle.partitions", "0") + assert(val3.equals("0"), "Expect 0 but got " + val3) + } + } + + def failCreateTableXD(conn: Connection, tableName: String, doFail: Boolean, + usingStr: String): Unit = { + try { + val s = conn.createStatement() + val options = "" + s.execute("CREATE TABLE " + tableName + " (Col1 INT, Col2 INT, " + + "Col3 INT) " + (if (doFail) "fail" orElse "") + " USING " + + usingStr + " " + options) + // println("Successfully Created ColumnTable = " + tableName) + } catch { + case e: Exception => getLogWriter.error("create: Caught exception " + + e.getMessage + " for ColumnTable = " + tableName, e) + // println("Exception stack. create. ex=" + e.getMessage + " ,stack=" + + // ExceptionUtils.getFullStackTrace(e)) + } + } + + def tableMetadataAssertColumnTable(schemaName: String, + tableName: String): Unit = { + vm0.invoke(new SerializableRunnable() { + override def run(): Unit = { + val catalog = Misc.getMemStore.getExternalCatalog + assert(catalog.isColumnTable(schemaName, tableName, false)) + } + }) + } + + def tableMetadataAssertRowTable(schemaName: String, tableName: String): Unit = { + vm0.invoke(new SerializableRunnable() { + override def run(): Unit = { + val catalog = Misc.getMemStore.getExternalCatalog + assert(!catalog.isColumnTable(schemaName, tableName, false)) + } + }) + } + + def insertDataXD(conn: Connection, tableName: String): Unit = { + val s = conn.createStatement() + s.execute("insert into " + tableName + " values(10, 200, '3') ") + s.execute("insert into " + tableName + + " values(70, 800, '9'),(90, 200, '3'),(40, 200, '3'),(50, 600, '7') ") + } + + def dropTableXD(conn: Connection, tableName: String): Unit = { + val s = conn.createStatement() + s.execute("drop table " + tableName) + } + + def truncateTableXD(conn: Connection, tableName: String): Unit = { + val s = conn.createStatement() + s.execute("truncate table " + tableName) + } + + def createTempTableXD(conn: Connection): Unit = { + try { + val s = conn.createStatement() + s.execute("CREATE EXTERNAL TABLE airlineRef_temp(Code VARCHAR(25), " + + "Description VARCHAR(25)) USING parquet OPTIONS()") + } catch { + case _: java.sql.SQLException => + // println("Exception stack. create. ex=" + e.getMessage + + // " ,stack=" + ExceptionUtils.getFullStackTrace(e)) + } + // println("Created ColumnTable = " + tableName) + } + + def queryData(tableName: String): Unit = { + val snc = org.apache.spark.sql.SnappyContext(sc) + // println("Firing select on ColumnTable = " + tableName) + val dataDF = snc.sql("Select * from " + tableName) + // dataDF.map(t => "Select Query: Col1: " + t(0) + " Col2: " + t(1) + + // " Col3: " + t(2)).collect().foreach(println) + + assert(dataDF.rdd.map(t => t(0)).count() == 5) + dataDF.rdd.map(t => t(0)).collect().foreach(verifyData) + } + + def verifyData(v: Any): Unit = { + assert(Seq(10, 70, 90, 40, 50).contains(v)) + } + + def queryDataXD(conn: Connection, tableName: String): Unit = { + val s = conn.createStatement() + val rs = s.executeQuery("Select col1 from " + tableName) + var cnt = 0 + while (rs.next()) { + cnt += 1 + assert(Seq(10, 70, 90, 40, 50).contains(rs.getInt(1))) + } + assert(cnt == 5, cnt) + } +} + +case class insertData(col1: Int, col2: Int, col3: Int) diff --git a/cluster/src/dunit/scala/io/snappydata/cluster/DistributedIndexDUnitTest.scala b/cluster/src/dunit/scala/io/snappydata/cluster/DistributedIndexDUnitTest.scala new file mode 100644 index 0000000000..088165761f --- /dev/null +++ b/cluster/src/dunit/scala/io/snappydata/cluster/DistributedIndexDUnitTest.scala @@ -0,0 +1,351 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ +package io.snappydata.cluster + +import java.sql.{Connection, DriverManager} + +import scala.collection.mutable.ListBuffer + +import com.gemstone.gemfire.cache.CacheException +import com.pivotal.gemfirexd.internal.engine.access.index.{OpenMemIndex, SortedMap2IndexScanController} +import com.pivotal.gemfirexd.internal.engine.store.GemFireContainer +import com.pivotal.gemfirexd.internal.engine.{GemFireXDQueryObserver, GemFireXDQueryObserverAdapter, GemFireXDQueryObserverHolder} +import com.pivotal.gemfirexd.internal.iapi.sql.conn.LanguageConnectionContext +import com.pivotal.gemfirexd.internal.iapi.store.access.conglomerate.Conglomerate +import com.pivotal.gemfirexd.internal.impl.sql.compile.StatementNode +import io.snappydata.benchmark.TPCHColumnPartitionedTable +import io.snappydata.test.dunit.{AvailablePortHelper, SerializableRunnable} + +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.store.CreateIndexTest +import org.apache.spark.sql.{SaveMode, SnappyContext} +import org.apache.spark.sql.collection.Utils + +/** + * Tests various distributed index related tests. + */ +class DistributedIndexDUnitTest(s: String) extends ClusterManagerTestBase(s) { + + // SNAP-1800 Disabled all tests in this dunit + private val disabled = true + + val tablesToDrop = new ListBuffer[String] + val indexesToDrop = new ListBuffer[String] + override def tearDown2(): Unit = { + if (disabled) { + super.tearDown2() + return + } + try { + val snContext = SnappyContext(sc) + if (snContext != null) { + snContext.setConf(io.snappydata.Property.EnableExperimentalFeatures.name, + io.snappydata.Property.EnableExperimentalFeatures.configEntry.defaultValueString) + snContext.setConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key, + SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.defaultValue.get.toString) + indexesToDrop.reverse.foreach(i => snContext.sql(s"DROP INDEX if exists $i ")) + tablesToDrop.reverse.foreach(t => snContext.sql(s"DROP TABLE if exists $t ")) + indexesToDrop.clear() + tablesToDrop.clear() + } + } finally { + super.tearDown2() + } + } + + def createBaseTable(snContext: SnappyContext, tableName: String): Unit = { + val props = Map( + "PARTITION_BY" -> "col1") + snContext.sql("drop table if exists " + tableName) + + val data = Seq(Seq(111, "aaa", "hello"), + Seq(222, "bbb", "halo"), + Seq(333, "aaa", "hello"), + Seq(444, "bbb", "halo"), + Seq(555, "ccc", "halo"), + Seq(666, "ccc", "halo") + ) + + val rdd = sc.parallelize(data, data.length).map(s => + new Data2(s(0).asInstanceOf[Int], s(1).asInstanceOf[String], s(2).asInstanceOf[String])) + val dataDF = snContext.createDataFrame(rdd) + + dataDF.write.format("column").mode(SaveMode.Append).options(props).saveAsTable(tableName) + tablesToDrop += tableName + } + + def testPartitionedSingleColumnTable(): Unit = { + if (disabled) return + + val tableName = "tabOne" + + val snContext = SnappyContext(sc) + snContext.setConf(io.snappydata.Property.EnableExperimentalFeatures.configEntry.key, "true") + snContext.setConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key, "-1") + createBaseTable(snContext, tableName) + getLogWriter.info("Creating indexes") + val indexOne = s"${tableName}_IdxOne" + val indexTwo = s"${tableName}_IdxTwo" + val indexThree = s"${tableName}_IdxThree" +// snContext.sql(s"create index $indexOne on $tableName (COL1)") +// indexesToDrop += indexOne + snContext.sql(s"create index $indexTwo on $tableName (COL2, COL3)") + indexesToDrop += indexTwo + snContext.sql(s"create index $indexThree on $tableName (COL1, COL3)") + indexesToDrop += indexThree + + val executeQ = CreateIndexTest.QueryExecutor(snContext) +// executeQ(s"select * from $tableName where col1 = 111") { +// CreateIndexTest.validateIndex(Seq(indexOne))(_) +// } + +// executeQ(s"select * from $tableName where col2 = 'aaa' ") { +// CreateIndexTest.validateIndex(Nil, tableName)(_) +// } + + executeQ(s"select * from $tableName where col2 = 'bbb' and col3 = 'halo' ") { + CreateIndexTest.validateIndex(Seq(indexTwo))(_) + } + + executeQ(s"select * from $tableName where col1 = 111 and col3 = 'halo' ") { + CreateIndexTest.validateIndex(Seq(indexThree))(_) + } + } + + private def getANetConnection(netPort: Int): Connection = { + val driver = "io.snappydata.jdbc.ClientDriver" + Utils.classForName(driver).newInstance + val url = "jdbc:snappydata://localhost:" + netPort + "/" + DriverManager.getConnection(url) + } + + def testCreateDropColumnTable(): Unit = { + if (disabled) return + + val tableName = "tabOne" + val netPort1 = AvailablePortHelper.getRandomAvailableTCPPort + vm2.invoke(classOf[ClusterManagerTestBase], "startNetServer", netPort1) + val conn = getANetConnection(netPort1) + + val snContext = SnappyContext(sc) + snContext.setConf(io.snappydata.Property.EnableExperimentalFeatures.configEntry.key, "true") + snContext.setConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key, "-1") + createBaseTable(snContext, tableName) + getLogWriter.info("Creating indexes") + val indexOne = s"${tableName}_IdxOne" + val indexTwo = s"${tableName}_IdxTwo" + val indexThree = s"${tableName}_IdxThree" + // snContext.sql(s"create index $indexOne on $tableName (COL1)") + // indexesToDrop += indexOne + val s1 = conn.createStatement() + s1.execute(s"create index $indexTwo on $tableName (COL2, COL3)") + indexesToDrop += indexTwo + val s2 = conn.createStatement() + s2.execute(s"create index $indexThree on $tableName (COL1, COL3)") + indexesToDrop += indexThree + + val executeQ = CreateIndexTest.QueryExecutor(snContext) + // executeQ(s"select * from $tableName where col1 = 111") { + // CreateIndexTest.validateIndex(Seq(indexOne))(_) + // } + + // executeQ(s"select * from $tableName where col2 = 'aaa' ") { + // CreateIndexTest.validateIndex(Nil, tableName)(_) + // } + + System.setProperty("LOG-NOW", "xxx") + getLogWriter.info("SB: About to execute queries") + executeQ(s"select * from $tableName where col2 = 'bbb' and col3 = 'halo' ") { + CreateIndexTest.validateIndex(Seq(indexTwo))(_) + } + + executeQ(s"select * from $tableName where col1 = 111 and col3 = 'halo' ") { + CreateIndexTest.validateIndex(Seq(indexThree))(_) + } + + val d1 = conn.createStatement() + d1.execute(s"drop index $indexTwo") + val d2 = conn.createStatement() + d2.execute(s"drop index $indexThree") + + getLogWriter.info("SB: Done executing the queries") + System.clearProperty("LOG-NOW") + } + + // Part of fix to SNAP-1461 + // This is being commented out. This is because now even the replicated + // table queries which are not pkbased or convertible to getAll are being routed + // and the test below asserts on an index being used assuming store execution. + def testCreateDropRowTable(): Unit = { + if (disabled) return + + val tableName = "tabTwo" + val netPort1 = AvailablePortHelper.getRandomAvailableTCPPort + vm2.invoke(classOf[ClusterManagerTestBase], "startNetServer", netPort1) + val conn = getANetConnection(netPort1) + + val snContext = SnappyContext(sc) + snContext.setConf(io.snappydata.Property.EnableExperimentalFeatures.configEntry.key, "true") + snContext.setConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key, "-1") + + val s = conn.createStatement() + s.executeUpdate(s"create table $tableName (COL1 Int, COL2 Int, COL3 Int) using row") + s.executeUpdate(s"insert into $tableName values (111, 11, 81)") + s.executeUpdate(s"insert into $tableName values (222, 22, 91)") + s.executeUpdate(s"insert into $tableName values (333, 11, 81)") + s.executeUpdate(s"insert into $tableName values (444, 22, 91)") + s.executeUpdate(s"insert into $tableName values (555, 33, 91)") + s.executeUpdate(s"insert into $tableName values (666, 33, 91)") + + getLogWriter.info("Creating indexes") + val indexOne = s"${tableName}_IdxOne" + val indexTwo = s"${tableName}_IdxTwo" + val indexThree = s"${tableName}_IdxThree" + // snContext.sql(s"create index $indexOne on $tableName (COL1)") + // indexesToDrop += indexOne + s.executeUpdate(s"create index $indexTwo on $tableName (COL2, COL3)") + indexesToDrop += indexTwo + s.executeUpdate(s"create index $indexThree on $tableName (COL1, COL3)") + indexesToDrop += indexThree + + // val executeQ = CreateIndexTest.QueryExecutor(snContext) + // executeQ(s"select * from $tableName where col1 = 111") { + // CreateIndexTest.validateIndex(Seq(indexOne))(_) + // } + + // executeQ(s"select * from $tableName where col2 = 'aaa' ") { + // CreateIndexTest.validateIndex(Nil, tableName)(_) + // } + + System.setProperty("LOG-NOW", "xxx") + getLogWriter.info("SB: About to execute queries") + + val query1 = s"select * from $tableName where col2 = 22 and col3 = 91" + val query2 = s"select * from $tableName where col1 =111 and col3 = 81" + setIndexObserver(s"$indexTwo", s"$query1", s"$indexThree", s"$query2") + + val rs1 = s.executeQuery(s"$query1") + while(rs1.next()) { + getLogWriter.info("q1= " + rs1.getInt(1)) + } + + val rs2 = s.executeQuery(s"$query2") + while(rs2.next()) { + getLogWriter.info("q2= " + rs2.getInt(1)) + } + + unsetObserver() + s.execute(s"drop index $indexTwo") + s.execute(s"drop index $indexThree") + + getLogWriter.info("SB: Done executing the queries") + System.clearProperty("LOG-NOW") + } + + def setIndexObserver(indexTwo: String, queryTwo: String, indexThree: String, queryThree: String): + Unit = { + val hook = new SerializableRunnable { + override def run() { + val executionEngineObserver: GemFireXDQueryObserver = new GemFireXDQueryObserverAdapter() { + var indexTwoPicked: Boolean = false + var caseOfIndexTwo: Boolean = false + var indexThreePicked: Boolean = false + var caseOfIndexThree: Boolean = false + + override def afterQueryParsing(query: String, qt: StatementNode, lcc: + LanguageConnectionContext): Unit = { + if (query != null) { + if (!caseOfIndexTwo) { + caseOfIndexTwo = query.equalsIgnoreCase(queryTwo) + } + + if (!caseOfIndexThree) { + caseOfIndexThree = query.equalsIgnoreCase(queryThree) + } + } + } + + override def overrideDerbyOptimizerIndexUsageCostForHash1IndexScan(memIndex: OpenMemIndex, + optimzerEvalutatedCost: Double): Double = Double.MaxValue + + override def overrideDerbyOptimizerCostForMemHeapScan(gfContainer: GemFireContainer, + optimzerEvalutatedCost: Double): Double = Double.MaxValue + + override def overrideDerbyOptimizerIndexUsageCostForSortedIndexScan(memIndex: + OpenMemIndex, optimzerEvalutatedCost: Double): Double = 1 + + override def scanControllerOpened(sc: AnyRef, conglom: Conglomerate) { + if (caseOfIndexTwo && !indexTwoPicked) { + indexTwoPicked = sc match { + case smisc: SortedMap2IndexScanController => + smisc.getQualifiedIndexName.split(":base-table:")(0).equalsIgnoreCase(s"APP" + + s".$indexTwo") + case _ => false + } + } + if (caseOfIndexThree && !indexThreePicked) { + indexThreePicked = sc match { + case smisc: SortedMap2IndexScanController => + smisc.getQualifiedIndexName.split(":base-table:")(0).equalsIgnoreCase(s"APP" + + s".$indexThree") + case _ => false + } + } + } + + override def close(): Unit = { + if (caseOfIndexTwo) { + assert(indexTwoPicked) + } + + if (caseOfIndexThree) { + assert(indexThreePicked) + } + } + } + + GemFireXDQueryObserverHolder.setInstance(executionEngineObserver) + } + } + + hook.run() + vm0.invoke(hook) + vm1.invoke(hook) + vm2.invoke(hook) + vm3.invoke(hook) + } + + def unsetObserver(): Unit = { + val hook = new SerializableRunnable { + override def run() { + try { + GemFireXDQueryObserverHolder.clearInstance() + } + catch { + case e: Exception => throw new CacheException(e){} + } + } + } + + hook.run() + vm0.invoke(hook) + vm1.invoke(hook) + vm2.invoke(hook) + vm3.invoke(hook) + } + +} diff --git a/cluster/src/dunit/scala/io/snappydata/cluster/ExecutionEngineArbiterDUnitTest.scala b/cluster/src/dunit/scala/io/snappydata/cluster/ExecutionEngineArbiterDUnitTest.scala new file mode 100644 index 0000000000..ae1f70aeda --- /dev/null +++ b/cluster/src/dunit/scala/io/snappydata/cluster/ExecutionEngineArbiterDUnitTest.scala @@ -0,0 +1,730 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ + +package io.snappydata.cluster + +import java.sql.{Connection, DriverManager, SQLException} + +import scala.util.Random + +import com.pivotal.gemfirexd.internal.engine.distributed.metadata.QueryInfo +import com.pivotal.gemfirexd.internal.engine.{GemFireXDQueryObserver, GemFireXDQueryObserverAdapter, GemFireXDQueryObserverHolder} +import com.pivotal.gemfirexd.internal.impl.sql.rules.ExecutionEngineRule.ExecutionEngine +import com.pivotal.gemfirexd.internal.shared.common.reference.SQLState +import io.snappydata.test.dunit.{AvailablePortHelper, DistributedTestBase, SerializableRunnable} +import io.snappydata.test.util.TestException + +import org.apache.spark.Logging +import org.apache.spark.sql.types.Decimal +import org.apache.spark.sql.{SaveMode, SnappyContext} + +/** + * Tests for query routing from JDBC client driver. + */ +class ExecutionEngineArbiterDUnitTest(val s: String) + extends ClusterManagerTestBase(s) with Logging with ExecutionEngineArbiterTestBase { + + override def tearDown2(): Unit = { + // reset the chunk size on lead node + // setDMLMaxChunkSize(default_chunk_size) + super.tearDown2() + } + + def testExecutionEngineForDistinctQueries(): Unit = { + distinctExecutionEngineRule(SnappyContext()) + } + + + def testExecutionEngineForSpecialOuterJoinQueries(): Unit = { + outerJoinExecutionEngineRule(SnappyContext()) + } + + + def testExecutionEngineForGroupByQueries(): Unit = { + groupByExecutionEngineRule(SnappyContext()) + } + + + def testExecutionEngineForReplicatedTableQueries(): Unit = { + replicatedTableExecutionEngineRule(SnappyContext()) + } + + + def testExecutionEngineForUnionAndDistinct(): Unit = { + distinctExecutionEngineRule(SnappyContext()) + } + + def testExecutionEngineQueryHint(): Unit = { + queryHint(SnappyContext()) + } + + def testExecutionEngineQueryHintWithException(): Unit = { + queryHintWithException(SnappyContext()) + } + + def testExecutionEngineTableWithGetAllConvertible(): Unit = { + queryGetAllConvertibleEngineRule(SnappyContext()) + } + + def testExecutionEngineTableWithIndex(): Unit = { + queryIndexEngineRule(SnappyContext()) + } + + def testPrimaryKeyWithIndex(): Unit = { + queryPrimaryWithIndex(SnappyContext()) + } + + def testMultipleQueryHint(): Unit = { + queryWithMultipleHint(SnappyContext()) + } + + // make sure that the query with more one level of + // nested subquery is routed to to spark engine + def test_SNAP1507(): Unit = { + nestedSubQuery(SnappyContext()) + } + + override def startNetServer: String = { + val port = AvailablePortHelper.getRandomAvailableTCPPort + vm2.invoke(classOf[ClusterManagerTestBase], "startNetServer", + port) + s"localhost:$port" + } + + override def stopNetServer(): Unit = { + vm2.invoke(classOf[ClusterManagerTestBase], "stopNetworkServers") + } + + override def setObserver(executeOnSpark: Boolean, query: String): Unit = { + val hook = new SerializableRunnable { + override def run() { + val executionEngineObserver: GemFireXDQueryObserver = new GemFireXDQueryObserverAdapter() { + + override def testExecutionEngineDecision(queryInfo: QueryInfo, engine: + ExecutionEngine, queryText: String): Unit = { + + // scalastyle:off println + if (queryText.equals(query)) { + println("callback getting invoked for following query" + + query + " queryText :" + queryText) + if (executeOnSpark) { + println("callback getting invoked for following query : asserting spark") + assert(engine == ExecutionEngine.SPARK) + } + else { + println("callback getting invoked for following query : asserting store") + assert(engine == ExecutionEngine.STORE) + } + } + // scalastyle:on println + } + } + + GemFireXDQueryObserverHolder.setInstance(executionEngineObserver) + + } + } + + hook.run() + vm0.invoke(hook) + vm1.invoke(hook) + vm2.invoke(hook) + vm3.invoke(hook) + } + +// override def setTestHook(): Unit = { +// val hook = new SerializableRunnable { +// override def run() { +// ExecutionEngineArbiter.setTestHookCostThreshold(100) +// } +// } +// hook.run() +// vm0.invoke(hook) +// vm1.invoke(hook) +// vm2.invoke(hook) +// vm3.invoke(hook) +// } + + +} + +trait ExecutionEngineArbiterTestBase { + def setObserver(executeOnSpark: Boolean, query: String); + + def startNetServer: String + + def stopNetServer(): Unit + + // def setTestHook: Unit + + def createRowTableAndInsertData(snc: SnappyContext, tableName: String, + props: Map[String, String] = Map.empty): Unit = { + val sc = snc.sparkContext + val data = Seq(Seq(1, 2, 3), Seq(7, 8, 9), Seq(9, 2, 3), + Seq(4, 2, 3), Seq(5, 6, 7)) + val rdd = sc.parallelize(data, data.length).map(s => + Data(s.head, s(1).toString, Decimal(s(1).toString + '.' + s(2)))) + val dataDF = snc.createDataFrame(rdd) + snc.createTable(tableName, "row", dataDF.schema, props) + dataDF.write.format("row").mode(SaveMode.Append) + .saveAsTable(tableName) + } + + + def outerJoinExecutionEngineRule(snc: SnappyContext): Unit = { + val testTable = "testTable1" + val testTable1 = "testTable2" + val testsubQueryTable1 = "testTable3" + + createRowTableAndInsertData(snc, testTable) + createRowTableAndInsertData(snc, testTable1, Map("PARTITION_BY" -> "COL1")) + createRowTableAndInsertData(snc, testsubQueryTable1, Map("PARTITION_BY" -> "COL1")) + + val serverHostPort = startNetServer + + val conn = DriverManager.getConnection( + "jdbc:snappydata://" + serverHostPort) + + runAndValidateQuery(conn, true, s"select t.col1 from $testTable t " + + s"LEFT OUTER JOIN $testTable1 t1 on t.col1 = t1.col1 ") + + val s = conn.createStatement() + s.execute(s"drop table $testTable") + s.execute(s"drop table $testTable1") + s.execute(s"drop table $testsubQueryTable1") + + s.close() + conn.close() + + stopNetServer + } + + def runAndValidateQuery(conn: Connection, isSparkExecution: Boolean, query: + String, isUpdate: Boolean = false): Unit = { + setObserver(isSparkExecution, query) + val s = conn.createStatement() + if (isUpdate) s.executeUpdate(query) + else { + s.execute(query) + } + s.close() + } + +// def runAndValidateQueryForCostBasedRouting(conn: Connection, isSparkExecution: Boolean, query: +// String, isUpdate: Boolean = false): Unit = { +// setTestHook +// runAndValidateQuery(conn, isSparkExecution, query, isUpdate) +// } + + def distinctExecutionEngineRule(snc: SnappyContext): Unit = { + val testTable = "testTable1" + val testTable2 = "testTable2" + + createRowTableAndInsertData(snc, testTable) + createRowTableAndInsertData(snc, testTable2, Map("PARTITION_BY" -> "COL1")) + + val conn = DriverManager.getConnection( + "jdbc:snappydata://" + startNetServer) + + runAndValidateQuery(conn, true, s"select distinct col2 from $testTable2") + runAndValidateQuery(conn, true, s" select col2 from $testTable where col1 in " + + s"(select distinct col1 from $testTable2)") + runAndValidateQuery(conn, true, s"select distinct(col2) from $testTable2 where col1 in" + + s" (select col1 from $testTable)") + + val s = conn.createStatement() + s.execute(s"drop table $testTable") + s.execute(s"drop table $testTable2") + + s.close() + conn.close() + + stopNetServer + } + + def queryHintWithException(snc: SnappyContext): Unit = { + val testTable = "testTable1" + + createRowTableAndInsertData(snc, testTable, Map("PARTITION_BY" -> "COL1")) + val conn = DriverManager.getConnection( + "jdbc:snappydata://" + startNetServer) + + runAndValidateQuery(conn, false, + s"select * from $testTable limit 1") + + // Execute a query on Store side with wrong syntax that works on spark engine + try { + runAndValidateQuery(conn, false, + s"select * from $testTable -- GEMFIREXD-PROPERTIES executionEngine=Store\n limit 1") + DistributedTestBase.fail("Expected syntax error as query was supposed " + + "to be executed on store with limit clause", + new TestException("Expected Exception")) + } + catch { + case sqe: SQLException => + if ("42X01" != sqe.getSQLState) { + throw sqe + } + } + } + + def queryGetAllConvertibleEngineRule(snc: SnappyContext): Unit = { + val testTable = "testTable1" + + val sc = snc.sparkContext + var data = Seq(Seq(1, 2, 3), Seq(7, 8, 9), Seq(9, 2, 3), + Seq(4, 2, 3), Seq(5, 6, 7), Seq(2, 8, 3), Seq(3, 9, 0), Seq(3, 9, 3)) + 1 to 1000 foreach { _ => + data = data :+ Seq.fill(3)(Random.nextInt(10)) + } + val rdd = sc.parallelize(data, data.length).map(s => + IndexData(s.head, s(1), Decimal(s(1).toString + '.' + s(2)))) + + val dataDF = snc.createDataFrame(rdd) + snc.createTable(testTable, "row", dataDF.schema, Map("PARTITION_BY" -> "COL1")) + snc.sql(s"create index col2index on $testTable(col2)") + + val conn = DriverManager.getConnection( + "jdbc:snappydata://" + startNetServer) + + val query: String = s"select col1, col3 from $testTable where col2 IN (2,8,6)" + runAndValidateQuery(conn, false, query) + } + + def queryPrimaryWithIndex(snc: SnappyContext): Unit = { + val sc = snc.sparkContext + + snc.sql("create table tabOne(id1 int not null primary key, id2 int not null, " + + "name String, address String) USING row OPTIONS(partition_by 'id1')") + snc.sql("insert into tabOne values(111, 123, 'aaa', 'hello')") + snc.sql("insert into tabOne values(222, 234, 'bbb', 'halo')") + + snc.sql("insert into tabOne values(333, 123, 'aaa', 'hello')") + snc.sql("insert into tabOne values(444, 234, 'bbb', 'halo')") + snc.sql("insert into tabOne values(555, 234, 'ccc', 'halo')") + snc.sql("insert into tabOne values(666, 234, 'ccc', 'halo')") + + snc.sql("create index indexOne on tabOne (id1)") + snc.sql("create index indexTwo on tabOne (id2)") + + val conn = DriverManager.getConnection( + "jdbc:snappydata://" + startNetServer) + + var query = "select * from tabOne --GEMFIREXD-PROPERTIES " + + "executionEngine=Store,index=indexOne\n where id1 = 111" + runAndValidateQuery(conn, false, query) + + query = "select * from tabOne --GEMFIREXD-PROPERTIES " + + "executionEngine=Store,index=indexTwo\n where id2 = 111" + runAndValidateQuery(conn, false, query) + + query = "select * from tabOne where id2 = 111" + runAndValidateQuery(conn, false, query) + + query = "select * from tabOne where id1 = 111" + runAndValidateQuery(conn, false, query) + + + query = "select * from tabOne --GEMFIREXD-PROPERTIES " + + "executionEngine=Store,index=indexOne\n where id1 = 111" + runAndValidateQuery(conn, false, query) + + query = "select * from tabOne --GEMFIREXD-PROPERTIES " + + "executionEngine=Store,index=indexTwo\n where id2 = 111" + runAndValidateQuery(conn, false, query) + + } + + def queryWithMultipleHint(snc: SnappyContext): Unit = { + val sc = snc.sparkContext + + snc.sql("create table tabOne(id1 int not null primary key, id2 int not null, " + + "name String, address String) USING row OPTIONS(partition_by 'id1')") + snc.sql("insert into tabOne values(111, 123, 'aaa', 'hello')") + snc.sql("insert into tabOne values(222, 234, 'bbb', 'halo')") + + snc.sql("insert into tabOne values(333, 123, 'aaa', 'hello')") + snc.sql("insert into tabOne values(444, 234, 'bbb', 'halo')") + snc.sql("insert into tabOne values(555, 234, 'ccc', 'halo')") + snc.sql("insert into tabOne values(666, 234, 'ccc', 'halo')") + + snc.sql("create index indexOne on tabOne (id1)") + snc.sql("create index indexTwo on tabOne (id2)") + + val conn = DriverManager.getConnection( + "jdbc:snappydata://" + startNetServer) + + var query = "select * from tabOne --GEMFIREXD-PROPERTIES " + + "executionEngine=Store,index=indexOne\n where id1 = 111" + runAndValidateQuery(conn, false, query) + + query = "select * from tabOne --GEMFIREXD-PROPERTIES " + + "executionEngine=Store,index=indexTwo\n where id2 = 111" + runAndValidateQuery(conn, false, query) + query = "select * from tabOne --GEMFIREXD-PROPERTIES " + + "executionEngine=Spark,index=indexTwo\n where id2 = 111" + runAndValidateQuery(conn, true, query) + +// //TODO: We may throw exception in future. + query = "select * from tabOne --GEMFIREXD-PROPERTIES " + + "executionEngine=Spark,index=indexThree\n where id2 = 111" + runAndValidateQuery(conn, true, query) + + query = "select * from tabOne --GEMFIREXD-PROPERTIES index=indexTwo\n where id2 = 111" + runAndValidateQuery(conn, false, query) + + try { + query = "select * from tabOne --GEMFIREXD-PROPERTIES index=indexThree\n where id2 = 111" + runAndValidateQuery(conn, true, query) + + // store query hint + query = "select * from tabOne --GEMFIREXD-PROPERTIES " + + "executionEngine=Store, index=indexThree\n where id2 = 111" + // this should not route but throw exception + runAndValidateQuery(conn, true, query) + DistributedTestBase.fail("Expected syntax error as query has wrong index hint", + new TestException("Expected Exception")) + } + catch { + case sqe: SQLException => + if (sqe.getSQLState != SQLState.LANG_INVALID_FORCED_INDEX1) throw sqe + } + } + + def queryIndexEngineRule(snc: SnappyContext): Unit = { + val testTable = "testTable1" + + val sc = snc.sparkContext + val data = Seq(Seq(1, 2, 3), Seq(7, 8, 9), Seq(9, 2, 3), + Seq(4, 2, 3), Seq(5, 6, 7), Seq(2, 8, 3), Seq(3, 9, 0), Seq(3, 9, 3)) + + val rdd = sc.parallelize(data, data.length).map(s => + IndexData(s.head, s(1), Decimal(s(1).toString + '.' + s(2)))) + + val dataDF = snc.createDataFrame(rdd) + snc.createTable(testTable, "row", dataDF.schema, Map("PARTITION_BY" -> "COL1")) + snc.sql(s"create index col2index on $testTable(col2)") + + val conn = DriverManager.getConnection( + "jdbc:snappydata://" + startNetServer) + + var query = s"select col1, col3 from $testTable where col2 > 1" + runAndValidateQuery(conn, true, query) + + query = s"select col1, col3 from $testTable where col2 = 2" + runAndValidateQuery(conn, false, query) + + query = s"select col1, col3 from $testTable -- GEMFIREXD-PROPERTIES " + + s"executionEngine=Spark\n where col2 = 2" + runAndValidateQuery(conn, true, query) + + query = s"select col1, col3 from $testTable -- GEMFIREXD-PROPERTIES " + + s"executionEngine=Store\n where col2 = 2" + runAndValidateQuery(conn, false, query) + + query = s"select col1, col3 from $testTable where col2 > 1" + runAndValidateQuery(conn, true, query) + + query = s"select col1, col3 from $testTable -- GEMFIREXD-PROPERTIES " + + s"executionEngine=Store\n where col2 > 1" + runAndValidateQuery(conn, false, query) + + query = s"select col1, col3 from $testTable -- GEMFIREXD-PROPERTIES " + + s"executionEngine=Spark\n where col2 > 1" + runAndValidateQuery(conn, true, query) + + snc.sql(s"create index col1index on $testTable(col1)") + + query = s"select col2, col3 from $testTable where col1 = 3" + runAndValidateQuery(conn, false, query) + + query = s"select col2, col3 from $testTable -- GEMFIREXD-PROPERTIES " + + s"executionEngine=Spark\n where col1 = 3" + runAndValidateQuery(conn, true, query) + + query = s"select col2, col3 from $testTable -- GEMFIREXD-PROPERTIES " + + s"executionEngine=Store\n where col1 = 3" + runAndValidateQuery(conn, false, query) + + query = s"select col2, col3 from $testTable where col1 > 1" + runAndValidateQuery(conn, true, query) + + query = s"select col2, col3 from $testTable -- GEMFIREXD-PROPERTIES " + + s"executionEngine=Store\n where col1 > 1" + runAndValidateQuery(conn, false, query) + + query = s"select col2, col3 from $testTable -- GEMFIREXD-PROPERTIES " + + s"executionEngine=Spark\n where col1 > 1" + runAndValidateQuery(conn, true, query) + + } + +/* + def indexSelectivityEngineRule(snc: SnappyContext): Unit = { + val testTable = "testTable1" + + val sc = snc.sparkContext + var data = Seq(Seq(1, 2, 3), Seq(7, 8, 9), Seq(9, 2, 3), + Seq(4, 2, 3), Seq(5, 6, 7), Seq(2, 8, 3), Seq(3, 9, 0), Seq(3, 9, 3)) + 1 to 1000 foreach { _ => + data = data :+ Seq.fill(3)(Random.nextInt(10)) + } + val rdd = sc.parallelize(data, data.length).map(s => + IndexData(s.head, s(1), Decimal(s(1).toString + '.' + s(2)))) + + val dataDF = snc.createDataFrame(rdd) + snc.createTable(testTable, "row", dataDF.schema, Map("PARTITION_BY" -> "COL1")) + snc.sql(s"create index col1index on $testTable(col1)") + snc.sql(s"create index col2index on $testTable(col2)") + + dataDF.write.format("row").mode(SaveMode.Append).saveAsTable(testTable) + + val conn = DriverManager.getConnection( + "jdbc:snappydata://" + startNetServer) + + runAndValidateQueryForCostBasedRouting(conn, false, + s"select col1, count(*) from $testTable WHERE col2 = 2 group by col1") + + runAndValidateQueryForCostBasedRouting(conn, false, + s"select col1, col3 from $testTable WHERE col2 > 3") + + 1 to 10000 foreach { _ => + data = data :+ Seq.fill(3)(Random.nextInt(10)) + } + val rdd2 = sc.parallelize(data, data.length).map(s => + IndexData(s.head, s(1), Decimal(s(1).toString + '.' + s(2)))) + val dataDF2 = snc.createDataFrame(rdd2) + + dataDF2.write.format("row").mode(SaveMode.Overwrite) + .saveAsTable(testTable) + + runAndValidateQueryForCostBasedRouting(conn, true, + s"select col1, count(*) from $testTable WHERE col2 = 2 group by col1") + + runAndValidateQueryForCostBasedRouting(conn, true, + s"select col1, col3 from $testTable WHERE col2 > 3") + + } +*/ + + def queryHint(snc: SnappyContext): Unit = { + val testTable = "testTable1" + val testTable1 = "testTable2" + + createRowTableAndInsertData(snc, testTable) + createRowTableAndInsertData(snc, testTable1, Map("PARTITION_BY" -> "COL1")) + val conn = DriverManager.getConnection( + "jdbc:snappydata://" + startNetServer) + + // Execute a replicate table query on Spark engine + runAndValidateQuery(conn, true, + s"select * from $testTable -- GEMFIREXD-PROPERTIES executionEngine=Spark") + + // execute distinct query on partitioned table on store + runAndValidateQuery(conn, false, + s"select distinct col1 from $testTable1" + + s" -- GEMFIREXD-PROPERTIES executionEngine=Store") + + + } + + def queryMultipleQueryHint(snc: SnappyContext): Unit = { + val testTable = "testTable1" + val testTable1 = "testTable2" + + createRowTableAndInsertData(snc, testTable) + createRowTableAndInsertData(snc, testTable1, Map("PARTITION_BY" -> "COL1")) + val conn = DriverManager.getConnection( + "jdbc:snappydata://" + startNetServer) + + runAndValidateQuery(conn, true, + s"select * from $testTable -- GEMFIREXD-PROPERTIES executionEngine=Spark") + + + // Execute a replicate table query on Spark engine + runAndValidateQuery(conn, true, + s"select * from $testTable -- GEMFIREXD-PROPERTIES executionEngine=Spark") + + // execute distinct query on partitioned table on store + runAndValidateQuery(conn, false, + s"select distinct col1 from $testTable1" + + s" -- GEMFIREXD-PROPERTIES executionEngine=Store") + + } + + + def replicatedTableExecutionEngineRule(snc: SnappyContext): Unit = { + val testTable = "testTable" + val testTable1 = "testTable1" + val testTable2 = "testTable2" + + val serverHostPort = startNetServer + + createRowTableAndInsertData(snc, testTable) + createRowTableAndInsertData(snc, testTable1) + createRowTableAndInsertData(snc, testTable2) + + val conn = DriverManager.getConnection( + "jdbc:snappydata://" + serverHostPort) + + + // test for distinct queries + runAndValidateQuery(conn, true, s"select distinct col1 from $testTable") + runAndValidateQuery(conn, true, s"select col1 from $testTable where col2 in " + + s"(select distinct col2 from $testTable2)") + runAndValidateQuery(conn, true, s"select sum(col1) from $testTable group by col2") + + runAndValidateQuery(conn, true, s"select col1 from $testTable where col1 in" + + s" (select avg(col1) from $testTable2 group by col2)") + + // test for union queries + runAndValidateQuery(conn, true, s"select col1 " + + s"from $testTable union select col1 from $testTable1") + runAndValidateQuery(conn, true, s"select * from $testTable2 where col1 in " + + s"( select col1 from $testTable union select col1 from $testTable1)") + + + // test intersect queries + runAndValidateQuery(conn, true, s"select col1 from $testTable " + + s"intersect select col1 from $testTable1") + + runAndValidateQuery(conn, true, s"select * from $testTable2 where col1 in " + + s"( select col1 from $testTable intersect select col1 from $testTable1)") + + val s = conn.createStatement() + s.execute(s"drop table $testTable") + s.execute(s"drop table $testTable1") + s.execute(s"drop table $testTable2") + + } + + def groupByExecutionEngineRule(snc: SnappyContext): Unit = { + val testTable = "testTable" + val testTable1 = "testTable1" + val testTable2 = "testTable2" + + createRowTableAndInsertData(snc, testTable) + createRowTableAndInsertData(snc, testTable1, Map("PARTITION_BY" -> "COL2")) + createRowTableAndInsertData(snc, testTable2, Map("PARTITION_By" -> "COL2")) + + + val serverHostPort = startNetServer + + val conn = DriverManager.getConnection( + "jdbc:snappydata://" + serverHostPort) + + runAndValidateQuery(conn, true, s"select count(*) from $testTable1 group by col1") + + + runAndValidateQuery(conn, true, s"select count(col1) from $testTable1 " + + s" where col1 in ( 5, 1, 2, 4, 5, 6,7,8,9,10) group by col3 ") + + runAndValidateQuery(conn, true, s" select sum(t1.col1) from $testTable1 t1 , $testTable t2 " + + s"where t1.col1 = t2.col1 group by t1.col2") + + runAndValidateQuery(conn, true, s" select * from $testTable1 t1 where col1 in " + + s"(select avg(col1) from $testTable group by col2)") + + runAndValidateQuery(conn, true, s"create index testIndex on $testTable1(col1)", true) + + runAndValidateQuery(conn, true, s"select count(col1) from $testTable1 " + + s" where col1 in ( 5, 1, 2, 4, 5, 6,7,8,9,10) group by col3 ") + + runAndValidateQuery(conn, true, s" select sum(t1.col1) from $testTable1 t1 , $testTable t2 " + + s"where t1.col1 = t2.col1 group by t1.col2") + + runAndValidateQuery(conn, true, s"select sum(col1) from" + + s" $testTable2 where col2 in (select col1 from $testTable1 " + + s"where col1 in (1,2,3) group by col1)") + + runAndValidateQuery(conn, true, s"drop table $testTable1") + + runAndValidateQuery(conn, true, + s" create table $testTable1 (col1 int primary key , col2 int , col3 int ) " + + s"using row options (" + "PARTITION_BY 'PRIMARY KEY'" + ")", true) + + runAndValidateQuery(conn, true, s"select sum(col1) from" + + s" $testTable2 where col2 in (select col1 from $testTable1 " + + s"where col1 in (1,2,3) group by col1)") + + // create index on col2 and then check for query execution again + + val s = conn.createStatement() + s.execute(s"drop table $testTable") + s.execute(s"drop table $testTable1") + s.execute(s"drop table $testTable2") + + s.close() + conn.close() + + stopNetServer + } + + def createTables_SNAP1507(snc: SnappyContext, tableType: String): Unit = { + snc.sql("drop table if exists TABLE1") + snc.sql("create table TABLE1 (" + + "T1_COL1 varchar(5)" + + ", T1_COL2 varchar(18)" + + ", T1_COL3 varchar(20)" + + ", T1_COL4 timestamp" + + ", T1_COL5 timestamp" + + ", T1_COL6 numeric(20,10)" + + ", T1_COL7 numeric(20,10)" + + " , T1_COL8 varchar(20)" + + s") USING $tableType OPTIONS(PARTITION_BY 'T1_COL1', PERSISTENT 'ASYNCHRONOUS')") + + snc.sql("drop table if exists TABLE2") + snc.sql("create table TABLE2 (" + + "T2_COL1 varchar(5)" + + ", T2_COL2 varchar(18)" + + ", T2_COL3 varchar(20)" + + ", T2_COL4 timestamp" + + ", T2_COL5 varchar(20)" + + ", T2_COL6 timestamp" + + s") USING $tableType OPTIONS(PARTITION_BY 'T2_COL1', PERSISTENT 'ASYNCHRONOUS')") + + snc.sql("drop table if exists TABLE3") + snc.sql("create table TABLE3 (" + + "T3_COL1 varchar(5)" + + ", T3_COL2 varchar(100)" + + ", T3_COL3 varchar(40)" + + ", T3_COL4 varchar(50)" + + ", T3_COL5 timestamp" + + ", T3_COL6 timestamp" + + ", T3_COL7 varchar(20)" + + ", T3_COL8 varchar(100)" + + s") USING $tableType OPTIONS(PARTITION_BY 'T3_COL1', PERSISTENT 'ASYNCHRONOUS')") + } + + def nestedSubQuery(snc: SnappyContext): Unit = { + createTables_SNAP1507(snc, "COLUMN") + val conn = DriverManager.getConnection( + "jdbc:snappydata://" + startNetServer) + + val query = "select T1_COL1, T1_COL2, T1_COL3, T1_COL4, T1_COL5, T1_COL6, T1_COL7," + + " T1_COL8 from TABLE1 as tab1 where exists (select * from " + + "TABLE2 as tab2 where exists (select * from " + + "TABLE3 as tab3 where T3_COL1 = 'HMC01'))" + + runAndValidateQuery(conn, true, query) + + createTables_SNAP1507(snc, "ROW") + runAndValidateQuery(conn, true, query) + + stopNetServer() + } +} diff --git a/cluster/src/dunit/scala/io/snappydata/cluster/PreparedQueryRoutingDUnitTest.scala b/cluster/src/dunit/scala/io/snappydata/cluster/PreparedQueryRoutingDUnitTest.scala new file mode 100644 index 0000000000..79b233110a --- /dev/null +++ b/cluster/src/dunit/scala/io/snappydata/cluster/PreparedQueryRoutingDUnitTest.scala @@ -0,0 +1,911 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ + +package io.snappydata.cluster + +import java.sql._ +import java.time.LocalDate +import java.util.concurrent.atomic.AtomicBoolean + +import com.pivotal.gemfirexd.TestUtil +import com.pivotal.gemfirexd.internal.engine.distributed.utils.GemFireXDUtils +import io.snappydata.test.dunit.AvailablePortHelper +import org.apache.spark.Logging +import org.apache.spark.sql.SnappyContext + +import scala.util.Random + +/** + * Tests for query routing from JDBC client driver. + */ +class PreparedQueryRoutingDUnitTest(val s: String) + extends ClusterManagerTestBase(s) with Logging { + + // set default batch size for this test + private val default_chunk_size = GemFireXDUtils.DML_MAX_CHUNK_SIZE + var serverHostPort = -1 + + override def tearDown2(): Unit = { + // reset the chunk size on lead node + setDMLMaxChunkSize(default_chunk_size) + super.tearDown2() + } + + def setDMLMaxChunkSize(size: Long): Unit = { + GemFireXDUtils.DML_MAX_CHUNK_SIZE = size + } + + def test1_PrepStatementRouting(): Unit = { + val tableName = "order_line_col_test1" + serverHostPort = AvailablePortHelper.getRandomAvailableTCPPort + vm2.invoke(classOf[ClusterManagerTestBase], "startNetServer", serverHostPort) + // scalastyle:off println + println(s"test1: network server started at $serverHostPort") + // scalastyle:on println + + val snc = SnappyContext(sc) + snc.sql(s"create table $tableName (ol_int_id decimal(6,2)," + + s" ol_int2_id integer, ol_str_id STRING) using column " + + "options( partition_by 'ol_int_id, ol_int2_id', buckets '2')") + + insertRows_test1(1000, tableName) + + // (1 to 5).foreach(d => query()) + query_test1(tableName) + // fire queries on views + snc.sql(s"create view $tableName" + s"_view as select * from $tableName") + query_test1(tableName + "_view") + } + + def insertRows_test1(numRows: Int, tableName: String): Unit = { + + val conn = DriverManager.getConnection( + "jdbc:snappydata://localhost:" + serverHostPort) + + val rows = (1 to numRows) + val stmt = conn.createStatement() + try { + var i = 1 + rows.foreach(d => { + stmt.addBatch(s"insert into $tableName values($i.11, $i, '$i')") + i += 1 + if (i % 1000 == 0) { + stmt.executeBatch() + i = 0 + } + }) + stmt.executeBatch() + // scalastyle:off println + println(s"committed $numRows rows") + // scalastyle:on println + } finally { + stmt.close() + conn.close() + } + } + + def verifyQuery_test1(qryTest: String, prep_rs: ResultSet, stmt_rs: ResultSet, + expectedNoRows: Int): Unit = { + val builder = StringBuilder.newBuilder + var index = 0 + var assertionFailed = false + while (prep_rs.next() && stmt_rs.next()) { + val prep_i = prep_rs.getBigDecimal(1) + val prep_j = prep_rs.getInt(2) + val prep_s = prep_rs.getString(3) + + val stmt_i = stmt_rs.getBigDecimal(1) + val stmt_j = stmt_rs.getInt(2) + val stmt_s = stmt_rs.getString(3) + + builder.append(s"$qryTest Prep: row($index) $prep_i $prep_j $prep_s ").append("\n") + builder.append(s"$qryTest Stmt: row($index) $stmt_i $stmt_j $stmt_s ").append("\n") + + if (prep_i != stmt_i && !assertionFailed) { + builder.append(s"Assertion failed at index=$index prep=$prep_i stmt=$stmt_i").append("\n") + assertionFailed = true + } + + if (prep_j != stmt_j && !assertionFailed) { + builder.append(s"Assertion failed at index=$index prep=$prep_j stmt=$stmt_j").append("\n") + assertionFailed = true + } + + if (prep_s != stmt_s && !assertionFailed) { + builder.append(s"Assertion failed at index=$index prep=$prep_s stmt=$stmt_s").append("\n") + assertionFailed = true + } + + index += 1 + } + + while (prep_rs.next()) { + if (!assertionFailed) { + builder.append(s"Assertion failed at index=$index").append("\n") + assertionFailed = true + } + + val prep_i = prep_rs.getBigDecimal(1) + val prep_j = prep_rs.getInt(2) + val prep_s = prep_rs.getString(3) + builder.append(s"$qryTest Prep: row($index) $prep_i $prep_j $prep_s ").append("\n") + index += 1 + } + + while (stmt_rs.next()) { + if (!assertionFailed) { + builder.append(s"Assertion failed at index=$index").append("\n") + assertionFailed = true + } + + val stmt_i = stmt_rs.getBigDecimal(1) + val stmt_j = stmt_rs.getInt(2) + val stmt_s = stmt_rs.getString(3) + builder.append(s"$qryTest Stmt: row($index) $stmt_i $stmt_j $stmt_s ").append("\n") + index += 1 + } + + if (index != expectedNoRows) { + if (!assertionFailed) { + builder.append(s"Assertion failed: got number of rows=$index " + + s"expected=$expectedNoRows").append("\n") + assertionFailed = true + } + } + + if (assertionFailed) { + // scalastyle:off println + println(builder.toString()) + // scalastyle:on println + } + + assert(!assertionFailed) + + prep_rs.close() + stmt_rs.close() + } + + def query1_like_clause_test1(limit: Boolean, tableName: String): Unit = { + val conn = DriverManager.getConnection( + "jdbc:snappydata://localhost:" + serverHostPort) + + // scalastyle:off println + println(s"query1_like_clause: Connected to $serverHostPort") + // scalastyle:on println + val limitClause = if (limit) "limit 20" else "" + val stmt = conn.createStatement() + var prepStatement: java.sql.PreparedStatement = null + try { + val qry = s"select ol_int_id, ol_int2_id, ol_str_id " + + s" from $tableName " + + s" where ol_int_id < ? " + + s" and ol_int2_id > 100 " + + s" and ol_str_id like ? " + + s" $limitClause" + + s"" + + prepStatement = conn.prepareStatement(qry) + prepStatement.setBigDecimal(1, new java.math.BigDecimal("500.11")) + prepStatement.setString(2, "%0") + val qry2 = s"select ol_int_id, ol_int2_id, ol_str_id " + + s" from $tableName " + + s" where ol_int_id < 500.11 " + + s" and ol_int2_id > 100 " + + s" and ol_str_id LIKE '%0' " + + s" $limitClause" + + s"" + verifyQuery_test1("query1_like_clause_test1.1", prepStatement.executeQuery, + stmt.executeQuery(qry2), if (limit) 20 else 39) + + prepStatement.setBigDecimal(1, new java.math.BigDecimal("800.11")) + prepStatement.setString(2, "%0") + val qry3 = s"select ol_int_id, ol_int2_id, ol_str_id " + + s" from $tableName " + + s" where ol_int_id < 800.11 " + + s" and ol_int2_id > 100 " + + s" and ol_str_id LIKE '%0' " + + s" $limitClause" + + s"" + verifyQuery_test1("query1_like_clause_test1.2", prepStatement.executeQuery, + stmt.executeQuery(qry3), if (limit) 20 else 69) + + // Thread.sleep(1000000) + + } finally { + stmt.close() + if (prepStatement != null) prepStatement.close() + conn.close() + } + } + + def query2_in_clause_test1(limit: Boolean, tableName: String): Unit = { + val conn = DriverManager.getConnection( + "jdbc:snappydata://localhost:" + serverHostPort) + + // scalastyle:off println + println(s"query2_in_clause: Connected to $serverHostPort") + // scalastyle:on println + val limitClause = if (limit) "limit 20" else "" + val stmt = conn.createStatement() + var prepStatement: java.sql.PreparedStatement = null + try { + val qry = s"select ol_int_id, ol_int2_id, ol_str_id " + + s" from $tableName " + + s" where ol_int_id < ? " + + s" and ol_int2_id in (?, ?, ?) " + + s" and ol_str_id like ? " + + s" $limitClause" + + s"" + + prepStatement = conn.prepareStatement(qry) + prepStatement.setBigDecimal(1, new java.math.BigDecimal("500.11")) + prepStatement.setInt(2, 100) + prepStatement.setInt(3, 200) + prepStatement.setInt(4, 300) + prepStatement.setString(5, "%0") + val qry2 = s"select ol_int_id, ol_int2_id, ol_str_id " + + s" from $tableName " + + s" where ol_int_id < 500.11 " + + s" and ol_int2_id in (100, 200, 300) " + + s" and ol_str_id LIKE '%0' " + + s" $limitClause" + + s"" + verifyQuery_test1("query2_in_clause_test1.1", prepStatement.executeQuery, + stmt.executeQuery(qry2), 3) + + prepStatement = conn.prepareStatement(qry) + prepStatement.setBigDecimal(1, new java.math.BigDecimal("300.11")) + prepStatement.setInt(2, 110) + prepStatement.setInt(3, 120) + prepStatement.setInt(4, 130) + prepStatement.setString(5, "1%") + val qry3 = s"select ol_int_id, ol_int2_id, ol_str_id " + + s" from $tableName " + + s" where ol_int_id < 300.11 " + + s" and ol_int2_id in (110, 120, 130) " + + s" and ol_str_id LIKE '1%' " + + s" $limitClause" + + s"" + verifyQuery_test1("query2_in_clause_test1.2", prepStatement.executeQuery, + stmt.executeQuery(qry3), 3) + + // Thread.sleep(1000000) + + } finally { + stmt.close() + if (prepStatement != null) prepStatement.close() + conn.close() + } + } + + def query_test1(tableName: String): Unit = { + query1_like_clause_test1(true, tableName) + query1_like_clause_test1(false, tableName) + query2_in_clause_test1(true, tableName) + query2_in_clause_test1(false, tableName) + } + + def insertRows_test2(numRows: Int, tableName: String): Unit = { + + val conn = DriverManager.getConnection( + "jdbc:snappydata://localhost:" + serverHostPort) + + val rows = (1 to numRows) + val stmt = conn.createStatement() + try { + var i = 1 + val newDate = LocalDate.parse("2017-01-01") + rows.foreach(d => { + val e = newDate.plusDays(i) + stmt.addBatch(s"insert into $tableName values('$e', '$e', '$e')") + i += 1 + if (i % 1000 == 0) { + stmt.executeBatch() + i = 0 + } + }) + stmt.executeBatch() + // scalastyle:off println + println(s"committed $numRows rows") + // scalastyle:on println + } finally { + stmt.close() + conn.close() + } + } + + def verifyQuery_test2(qryTest: String, prep_rs: ResultSet, stmt_rs: ResultSet, + expectedNoRows: Int): Unit = { + val builder = StringBuilder.newBuilder + var index = 0 + var assertionFailed = false + while (prep_rs.next() && stmt_rs.next()) { + val prep_i = prep_rs.getDate(1) + val prep_j = prep_rs.getDate(2) + val prep_s = prep_rs.getString(3) + + val stmt_i = stmt_rs.getDate(1) + val stmt_j = stmt_rs.getDate(2) + val stmt_s = stmt_rs.getString(3) + + builder.append(s"$qryTest Prep: row($index) $prep_i $prep_j $prep_s ").append("\n") + builder.append(s"$qryTest Stmt: row($index) $stmt_i $stmt_j $stmt_s ").append("\n") + + if (prep_i != stmt_i && !assertionFailed) { + builder.append(s"Assertion failed at index=$index prep=$prep_i stmt=$stmt_i").append("\n") + assertionFailed = true + } + + if (prep_j != stmt_j && !assertionFailed) { + builder.append(s"Assertion failed at index=$index prep=$prep_j stmt=$stmt_j").append("\n") + assertionFailed = true + } + + if (prep_s != stmt_s && !assertionFailed) { + builder.append(s"Assertion failed at index=$index prep=$prep_s stmt=$stmt_s").append("\n") + assertionFailed = true + } + + index += 1 + } + + while (prep_rs.next()) { + if (!assertionFailed) { + builder.append(s"Assertion failed at index=$index").append("\n") + assertionFailed = true + } + + val prep_i = prep_rs.getDate(1) + val prep_j = prep_rs.getDate(2) + val prep_s = prep_rs.getString(3) + builder.append(s"$qryTest Prep: row($index) $prep_i $prep_j $prep_s ").append("\n") + index += 1 + } + + while (stmt_rs.next()) { + if (!assertionFailed) { + builder.append(s"Assertion failed at index=$index").append("\n") + assertionFailed = true + } + + val stmt_i = stmt_rs.getDate(1) + val stmt_j = stmt_rs.getDate(2) + val stmt_s = stmt_rs.getString(3) + builder.append(s"$qryTest Stmt: row($index) $stmt_i $stmt_j $stmt_s ").append("\n") + index += 1 + } + + if (index != expectedNoRows) { + if (!assertionFailed) { + builder.append(s"Assertion failed: got number of rows=$index " + + s"expected=$expectedNoRows").append("\n") + assertionFailed = true + } + } + + if (assertionFailed) { + // scalastyle:off println + println(builder.toString()) + // scalastyle:on println + } + + assert(!assertionFailed) + + prep_rs.close() + stmt_rs.close() + } + + def query1_test2(limit: Boolean, tableName: String): Unit = { + val conn = DriverManager.getConnection( + "jdbc:snappydata://localhost:" + serverHostPort) + + // scalastyle:off println + println(s"query1_test2: Connected to $serverHostPort") + // scalastyle:on println + val limitClause = if (limit) "limit 20" else "" + val stmt = conn.createStatement() + var prepStatement: java.sql.PreparedStatement = null + val oneDate = Date.valueOf("2017-03-15") + val twoDate = Date.valueOf("2017-02-28") + val threeDate = Date.valueOf("2017-03-31") + try { + val qry = s"select ol_int_id, ol_int2_id, ol_str_id " + + s" from $tableName " + + s" where ol_int_id < ? " + + s" and ol_int2_id > ? " + + s" and ol_str_id like ? " + + s" $limitClause" + + s"" + + prepStatement = conn.prepareStatement(qry) + prepStatement.setString(1, oneDate.toString) + prepStatement.setDate(2, twoDate) + prepStatement.setString(3, "%-%") + val qry2 = s"select ol_int_id, ol_int2_id, ol_str_id " + + s" from $tableName " + + s" where ol_int_id < '${oneDate.toString}' " + + s" and ol_int2_id > '${twoDate.toString}' " + + s" and ol_str_id LIKE '%-%' " + + s" $limitClause" + + s"" + verifyQuery_test2("query1_test2.1", prepStatement.executeQuery, stmt.executeQuery(qry2), 14) + + prepStatement.setString(1, threeDate.toString) + prepStatement.setDate(2, twoDate) + prepStatement.setString(3, "%-%") + val qry3 = s"select ol_int_id, ol_int2_id, ol_str_id " + + s" from $tableName " + + s" where ol_int_id < '${threeDate.toString}' " + + s" and ol_int2_id > '${twoDate.toString}' " + + s" and ol_str_id LIKE '%-%' " + + s" $limitClause" + + s"" + verifyQuery_test2("query1_test2.2", prepStatement.executeQuery, stmt.executeQuery(qry3), + if (limit) 20 else 30) + + // Thread.sleep(1000000) + + } finally { + stmt.close() + if (prepStatement != null) prepStatement.close() + conn.close() + } + } + + def query_test2(tableName: String): Unit = { + query1_test2(true, tableName: String) + query1_test2(false, tableName: String) + } + + def test2_date(): Unit = { + val tableName = "order_line_col_test2" + serverHostPort = AvailablePortHelper.getRandomAvailableTCPPort + vm2.invoke(classOf[ClusterManagerTestBase], "startNetServer", serverHostPort) + // scalastyle:off println + println(s"test2: network server started at $serverHostPort") + // scalastyle:on println + + val snc = SnappyContext(sc) + snc.sql(s"create table $tableName (ol_int_id date," + + s" ol_int2_id date, ol_str_id STRING) using column " + + "options( partition_by 'ol_int_id, ol_int2_id', buckets '2')") + + insertRows_test2(1000, tableName) + query_test2(tableName) + } + + def insertRows_test3(numRows: Int, tableName: String): Unit = { + + val conn = DriverManager.getConnection( + "jdbc:snappydata://localhost:" + serverHostPort) + + val rows = (1 to numRows) + val stmt = conn.createStatement() + try { + var i = 1 + import java.time.format.DateTimeFormatter + val formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss") + val newTimestamp = java.time.LocalDateTime.parse("2017-01-01 10:00:00", formatter) + rows.foreach(d => { + val e = newTimestamp.plusDays(i).format(formatter) + stmt.addBatch(s"insert into $tableName values('$e', '$e', '$e')") + i += 1 + if (i % 1000 == 0) { + stmt.executeBatch() + i = 0 + } + }) + stmt.executeBatch() + // scalastyle:off println + println(s"committed $numRows rows") + // scalastyle:on println + } finally { + stmt.close() + conn.close() + } + } + + def verifyQuery_test3(qryTest: String, prep_rs: ResultSet, stmt_rs: ResultSet, + expectedNoRows: Int): Unit = { + val builder = StringBuilder.newBuilder + var index = 0 + var assertionFailed = false + while (prep_rs.next() && stmt_rs.next()) { + val prep_i = prep_rs.getTimestamp(1) + val prep_j = prep_rs.getTimestamp(2) + val prep_s = prep_rs.getString(3) + + val stmt_i = stmt_rs.getTimestamp(1) + val stmt_j = stmt_rs.getTimestamp(2) + val stmt_s = stmt_rs.getString(3) + + builder.append(s"$qryTest Prep: row($index) $prep_i $prep_j $prep_s ").append("\n") + builder.append(s"$qryTest Stmt: row($index) $stmt_i $stmt_j $stmt_s ").append("\n") + + if (prep_i != stmt_i && !assertionFailed) { + builder.append(s"Assertion failed at index=$index prep=$prep_i stmt=$stmt_i").append("\n") + assertionFailed = true + } + + if (prep_j != stmt_j && !assertionFailed) { + builder.append(s"Assertion failed at index=$index prep=$prep_j stmt=$stmt_j").append("\n") + assertionFailed = true + } + + if (prep_s != stmt_s && !assertionFailed) { + builder.append(s"Assertion failed at index=$index prep=$prep_s stmt=$stmt_s").append("\n") + assertionFailed = true + } + + index += 1 + } + + while (prep_rs.next()) { + if (!assertionFailed) { + builder.append(s"Assertion failed at index=$index").append("\n") + assertionFailed = true + } + + val prep_i = prep_rs.getTimestamp(1) + val prep_j = prep_rs.getTimestamp(2) + val prep_s = prep_rs.getString(3) + builder.append(s"$qryTest Prep: row($index) $prep_i $prep_j $prep_s ").append("\n") + index += 1 + } + + while (stmt_rs.next()) { + if (!assertionFailed) { + builder.append(s"Assertion failed at index=$index").append("\n") + assertionFailed = true + } + + val stmt_i = stmt_rs.getTimestamp(1) + val stmt_j = stmt_rs.getTimestamp(2) + val stmt_s = stmt_rs.getString(3) + builder.append(s"$qryTest Stmt: row($index) $stmt_i $stmt_j $stmt_s ").append("\n") + index += 1 + } + + if (index != expectedNoRows) { + if (!assertionFailed) { + builder.append(s"Assertion failed: got number of rows=$index " + + s"expected=$expectedNoRows").append("\n") + assertionFailed = true + } + } + + if (assertionFailed) { + // scalastyle:off println + println(builder.toString()) + // scalastyle:on println + } + + assert(!assertionFailed) + + prep_rs.close() + stmt_rs.close() + } + + def query1_test3(limit: Boolean, tableName: String): Unit = { + val conn = DriverManager.getConnection( + "jdbc:snappydata://localhost:" + serverHostPort) + + // scalastyle:off println + println(s"query1_test2: Connected to $serverHostPort") + // scalastyle:on println + val limitClause = if (limit) "limit 20" else "" + val stmt = conn.createStatement() + var prepStatement: java.sql.PreparedStatement = null + val oneTs = java.sql.Timestamp.valueOf("2017-03-15 12:02:03") + val twoTs = java.sql.Timestamp.valueOf("2017-02-28 12:02:04") + val threeTs = java.sql.Timestamp.valueOf("2017-03-31 12:02:04") + try { + val qry = s"select ol_int_id, ol_int2_id, ol_str_id " + + s" from $tableName " + + s" where ol_int_id < ? " + + s" and ol_int2_id > ? " + + s" and ol_str_id like ? " + + s" $limitClause" + + s"" + + prepStatement = conn.prepareStatement(qry) + prepStatement.setString(1, oneTs.toString) + prepStatement.setTimestamp(2, twoTs) + prepStatement.setString(3, "%-%") + val qry2 = s"select ol_int_id, ol_int2_id, ol_str_id " + + s" from $tableName " + + s" where ol_int_id < '${oneTs.toString}' " + + s" and ol_int2_id > '${twoTs.toString}' " + + s" and ol_str_id LIKE '%-%' " + + s" $limitClause" + + s"" + verifyQuery_test3("query1_test3.1", prepStatement.executeQuery, stmt.executeQuery(qry2), 15) + + prepStatement.setString(1, threeTs.toString) + prepStatement.setTimestamp(2, twoTs) + prepStatement.setString(3, "%-%") + val qry3 = s"select ol_int_id, ol_int2_id, ol_str_id " + + s" from $tableName " + + s" where ol_int_id < '${threeTs.toString}' " + + s" and ol_int2_id > '${twoTs.toString}' " + + s" and ol_str_id LIKE '%-%' " + + s" $limitClause" + + s"" + verifyQuery_test3("query1_test3.1", prepStatement.executeQuery, stmt.executeQuery(qry3), + if (limit) 20 else 31) + + // Thread.sleep(1000000) + + } finally { + stmt.close() + if (prepStatement != null) prepStatement.close() + conn.close() + } + } + + def query_test3(tableName: String): Unit = { + query1_test3(true, tableName: String) + query1_test3(false, tableName: String) + } + + def test3_timestamp(): Unit = { + val tableName = "order_line_col_test3" + serverHostPort = AvailablePortHelper.getRandomAvailableTCPPort + vm2.invoke(classOf[ClusterManagerTestBase], "startNetServer", serverHostPort) + // scalastyle:off println + println(s"test3: network server started at $serverHostPort") + // scalastyle:on println + + val snc = SnappyContext(sc) + snc.sql(s"create table $tableName (ol_int_id timestamp," + + s" ol_int2_id timestamp, ol_str_id STRING) using column " + + "options( partition_by 'ol_int_id, ol_int2_id', buckets '2')") + + insertRows_test3(1000, tableName) + query_test3(tableName) + } + + def test4_update_delete_on_column_table(): Unit = { + serverHostPort = AvailablePortHelper.getRandomAvailableTCPPort + vm2.invoke(classOf[ClusterManagerTestBase], "startNetServer", serverHostPort) + // scalastyle:off println + println(s"test4_update_delete_on_column_table: network server started at $serverHostPort") + // scalastyle:on println + val snc = SnappyContext(sc) + PreparedQueryRoutingSingleNodeSuite.updateDeleteOnColumnTable(snc, s"localhost:$serverHostPort") + } + + def test5_equalityOnStringColumn(): Unit = { + serverHostPort = AvailablePortHelper.getRandomAvailableTCPPort + vm2.invoke(classOf[ClusterManagerTestBase], "startNetServer", serverHostPort) + // scalastyle:off println + println(s"test5_equalityOnStringColumn: network server started at $serverHostPort") + // scalastyle:on println + val snc = SnappyContext(sc) + PreparedQueryRoutingSingleNodeSuite.equalityOnStringColumn(snc, s"localhost:$serverHostPort") + } + + def test_prepStmntManyThreads(): Unit = { + serverHostPort = AvailablePortHelper.getRandomAvailableTCPPort + vm2.invoke(classOf[ClusterManagerTestBase], "startNetServer", serverHostPort) + // scalastyle:off println + println(s"test2: network server started at $serverHostPort") + // scalastyle:on println + val conn = DriverManager.getConnection( + "jdbc:snappydata://localhost:" + serverHostPort) + + // scalastyle:off println + println(s"query1_test2: Connected to $serverHostPort") + val stmt = conn.createStatement() + createTableAndPopulateSomeData(conn) + val keepRunning = new AtomicBoolean(true) + createNThreadsAndPrepExecute(10, keepRunning) + assert(keepRunning.get()) + } + + def createTableAndPopulateSomeData(conn: Connection): Boolean = { + val stmnt = conn.createStatement() + stmnt.execute("create table test(col1 int, col2 int not null, col3 string, " + + "col4 long, col5 date, col6 timestamp not null, col7 decimal(4, 2))" + + " using column options()") + val prepStmntInsert = conn.prepareCall("insert into test values(?, ?, ?, ?, ?, ?, ?)") + for ( i <- 0 until 10000) { + prepStmntInsert.setInt(1, i) + prepStmntInsert.setInt(2, i*2) + prepStmntInsert.setString(3, s"aaa$i") + prepStmntInsert.setLong(4, i*100L) + prepStmntInsert.setString(5, "2019-05-23") + prepStmntInsert.setString(6, "2019-05-23 00:01:10") + prepStmntInsert.setDouble(7, 10.22) + prepStmntInsert.executeUpdate() + } + true + } + + def createNThreadsAndPrepExecute(i: Int, keepRunning: AtomicBoolean): Unit = { + val queries = scala.Array("select * from test where col1 < ? and col3 = ?", + "select col1 from test where col2 in (?, ?, ?)", + "select col1 from test where col1 = ? and (col2 > ? or col2 < ?)", + "select * from test where col7 = ?", + "select avg(col1) from test") + + class Runner extends Runnable { + override def run(): Unit = { + var cnt = 0; + val conn = DriverManager.getConnection( + "jdbc:snappydata://localhost:" + serverHostPort) + try { + while ((cnt < 20) && keepRunning.get()) { + cnt += 1 + val qNum = Random.nextInt(5) + val prepquery = conn.prepareCall(queries(qNum)) + qNum match { + case 0 => + prepquery.setInt(1, 10) + prepquery.setString(2, "aaa10") + prepquery.execute() + val rs = prepquery.getResultSet + assert(rs.next()) + rs.close() + case 1 => + prepquery.setInt(1, 100) + prepquery.setInt(2, 10) + prepquery.setInt(3, 1000) + prepquery.execute() + val rs = prepquery.getResultSet + assert(rs != null) + assert(rs.next()) + assert(rs.next()) + assert(rs.next()) + rs.close() + case 2 => + prepquery.setInt(1, 100) + prepquery.setInt(2, 10) + prepquery.setInt(3, 1000) + prepquery.execute() + val rs = prepquery.getResultSet + assert(rs != null) + rs.close() + case 3 => + prepquery.setInt(1, 1000) + prepquery.execute() + val rs = prepquery.getResultSet + assert(rs != null) + assert(rs.next()) + rs.close() + case 4 => + prepquery.execute() + val rs = prepquery.getResultSet + assert(rs.next()) + rs.close() + } + } + } catch { + case se: SQLException => + logInfo(s"exception got = $se with state ${se.getSQLState}", se) + keepRunning.set(false) + } + } + } + val allThreads: scala.Array[Thread] = scala.Array.ofDim(i) + for ( t <- 0 until i) allThreads(t) = new Thread(new Runner()) + for ( t <- 0 until i) allThreads(t).start() + for ( t <- 0 until i) allThreads(t).join(180000) + assert(keepRunning.get()) + } + + + def testSNAP2254(): Unit = { + serverHostPort = AvailablePortHelper.getRandomAvailableTCPPort + vm2.invoke(classOf[ClusterManagerTestBase], "startNetServer", serverHostPort) + // scalastyle:off println + println(s"testSNAP2254: network server started at $serverHostPort") + // scalastyle:on println + val conn = DriverManager.getConnection( + "jdbc:snappydata://localhost:" + serverHostPort) + + // scalastyle:off println + println(s"testSNAP2254: Connected to $serverHostPort") + val stmt = conn.createStatement() + + viewQueryTest1(conn, stmt) + viewQueryTest2(conn, stmt) + } + + private def viewQueryTest1(conn: Connection, stmt: Statement): Unit = { + stmt.execute("create table t1 (col1 int, col2 string, col3 date) using row") + val ps1 = conn.prepareStatement("insert into t1 values (?, ?, ?)") + for (i <- 1 to 100) { + ps1.setInt(1, i) + ps1.setString(2, s"$i") + ps1.setString(3, "2019-06-09") + ps1.addBatch() + } + ps1.executeBatch() + ps1.close() + + stmt.execute("create view t1view as select col1 as c1, col2 as c2 from t1") + + val ps2 = conn.prepareStatement("select c1, c2 from t1view where c1 = ? and c2 = ?") + val parameterMetaData = ps2.getParameterMetaData + assert(parameterMetaData.getParameterCount == 2) + assert(parameterMetaData.getParameterType(1) == java.sql.Types.INTEGER) + assert(parameterMetaData.getParameterType(2) == java.sql.Types.CLOB) + ps2.setInt(1, 5) + ps2.setString(2, "5") + + val rs1 = ps2.executeQuery() + val resultSetMetaData = rs1.getMetaData + assert(resultSetMetaData.getColumnCount == 2) + assert(resultSetMetaData.getColumnName(1).equalsIgnoreCase("c1")) + assert(resultSetMetaData.getColumnName(2).equalsIgnoreCase("c2")) + assert(resultSetMetaData.getColumnType(1) == java.sql.Types.INTEGER) + assert(resultSetMetaData.getColumnType(2) == java.sql.Types.CLOB) + assert(rs1.next()) + assert(rs1.getInt(1) == 5) + assert(rs1.getString(2).equals("5")) + rs1.close() + ps2.close() + } + + private def viewQueryTest2(conn: Connection, stmt: Statement): Unit = { + stmt.execute("create table t2 (col21 int, col22 string, col23 timestamp, col24 boolean)") + stmt.execute("create table t3 (col31 int, col32 string, col33 timestamp, col34 boolean)") + stmt.execute("create view view2 as select t2.col21 as c1, t2.col22 as c2, t2.col23 as" + + " c3 from t2 join t3 on t2.col21 = t3.col31") + + def insertData(table: String): Unit = { + val ps1 = conn.prepareStatement(s"insert into $table values (?, ?, ?)") + for (i <- 1 to 100) { + ps1.setInt(1, i) + ps1.setString(2, s"$i") + ps1.setString(3, "2019-06-09 04:04:10") + ps1.addBatch() + } + ps1.executeBatch() + ps1.close() + } + + insertData("t2") + insertData("t3") + + + val ps3 = conn.prepareStatement("select * from view2 where c1 = ? and c3 = ?") + val parameterMetaData = ps3.getParameterMetaData + assert(parameterMetaData.getParameterCount == 2) + assert(parameterMetaData.getParameterType(1) == java.sql.Types.INTEGER) + assert(parameterMetaData.getParameterType(2) == java.sql.Types.TIMESTAMP) + ps3.setInt(1, 5) + ps3.setString(2, "2019-06-09 04:04:10.0") + + val rs1 = ps3.executeQuery() + val resultSetMetaData = rs1.getMetaData + assert(resultSetMetaData.getColumnCount == 3) + assert(resultSetMetaData.getColumnName(1).equalsIgnoreCase("c1")) + assert(resultSetMetaData.getColumnName(2).equalsIgnoreCase("c2")) + assert(resultSetMetaData.getColumnName(3).equalsIgnoreCase("c3")) + assert(resultSetMetaData.getColumnType(1) == java.sql.Types.INTEGER) + assert(resultSetMetaData.getColumnType(2) == java.sql.Types.CLOB) + assert(resultSetMetaData.getColumnType(3) == java.sql.Types.TIMESTAMP) + assert(rs1.next()) + assert(rs1.getInt(1) == 5) + assert(rs1.getString(2).equals("5")) + assert(rs1.getString(3).equals("2019-06-09 04:04:10.0")) + rs1.close() + ps3.close() + } +} diff --git a/cluster/src/dunit/scala/io/snappydata/cluster/QueryRoutingDUnitSecurityTest.scala b/cluster/src/dunit/scala/io/snappydata/cluster/QueryRoutingDUnitSecurityTest.scala new file mode 100644 index 0000000000..48afbac91e --- /dev/null +++ b/cluster/src/dunit/scala/io/snappydata/cluster/QueryRoutingDUnitSecurityTest.scala @@ -0,0 +1,526 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ + +package io.snappydata.cluster + +import java.sql.{BatchUpdateException, Connection, DriverManager, ResultSet, SQLException} + +import io.snappydata.cluster.ClusterManagerLDAPTestBase.thriftPort +import io.snappydata.test.dunit.AvailablePortHelper + +import org.apache.spark.Logging +import org.apache.spark.sql.collection.Utils + +class QueryRoutingDUnitSecurityTest(val s: String) + extends ClusterManagerLDAPTestBase(s) with Logging { + + def testColumnTableRouting(): Unit = { + val jdbcUser1 = "gemfire1" + val jdbcUser2 = "gemfire2" + val tableName = "order_line_col" + + val serverHostPort = AvailablePortHelper.getRandomAvailableTCPPort + vm2.invoke(classOf[ClusterManagerTestBase], "startNetServer", serverHostPort) + // scalastyle:off println + println(s"QueryRoutingDUnitSecureTest.testColumnTableRouting:" + + s" network server started at $serverHostPort") + // scalastyle:on println + + QueryRoutingDUnitSecurityTest.columnTableRouting(jdbcUser1, jdbcUser2, tableName, + serverHostPort) + } + + def testRowTableRouting(): Unit = { + val jdbcUser1 = "gemfire3" + val jdbcUser2 = "gemfire4" + val tableName = "order_line_row" + val serverHostPort = AvailablePortHelper.getRandomAvailableTCPPort + vm2.invoke(classOf[ClusterManagerTestBase], "startNetServer", serverHostPort) + // scalastyle:off println + println(s"QueryRoutingDUnitSecureTest.testRowTableRouting:" + + s" network server started at $serverHostPort") + // scalastyle:on println + + QueryRoutingDUnitSecurityTest.rowTableRouting(jdbcUser1, jdbcUser2, tableName, serverHostPort) + } + + /** Test some queries on the embedded thrift server */ + def testEmbeddedThriftServer(): Unit = { + val jdbcUser1 = "gemfire1" + val jdbcUser2 = "gemfire2" + + try { + DriverManager.getConnection(s"jdbc:hive2://localhost:$thriftPort/app") + } catch { + case sqle: SQLException if sqle.getSQLState == "08004" => // expected + } + try { + DriverManager.getConnection(s"jdbc:hive2://localhost:$thriftPort/app", + "app", "app") + } catch { + case sqle: SQLException if sqle.getSQLState == "08004" => // expected + } + try { + DriverManager.getConnection(s"jdbc:hive2://localhost:$thriftPort/$jdbcUser1", + jdbcUser1, jdbcUser2) + } catch { + case sqle: SQLException if sqle.getSQLState == "08004" => // expected + } + try { + DriverManager.getConnection(s"jdbc:hive2://localhost:$thriftPort/$jdbcUser1", + null, null) + } catch { + case sqle: SQLException if sqle.getSQLState == "08004" => // expected + } + + val conn = DriverManager.getConnection( + s"jdbc:hive2://localhost:$thriftPort/$jdbcUser1", jdbcUser1, jdbcUser1) + val stmt = conn.createStatement() + + stmt.execute("create table testTable100 (id int)") + var rs = stmt.executeQuery("show tables") + assert(rs.next()) + assert(rs.getString(1) == jdbcUser1) + assert(rs.getString(2) == "testtable100") + assert(!rs.getBoolean(3)) // isTemporary + assert(!rs.next()) + rs.close() + + rs = stmt.executeQuery(s"show tables in $jdbcUser1") + assert(rs.next()) + assert(rs.getString(1) == jdbcUser1) + assert(rs.getString(2) == "testtable100") + assert(!rs.getBoolean(3)) // isTemporary + assert(!rs.next()) + rs.close() + + rs = stmt.executeQuery("select count(*) from testTable100") + assert(rs.next()) + assert(rs.getLong(1) == 0) + assert(!rs.next()) + rs.close() + stmt.execute("insert into testTable100 select id from range(10000)") + rs = stmt.executeQuery("select count(*) from testTable100") + assert(rs.next()) + assert(rs.getLong(1) == 10000) + assert(!rs.next()) + rs.close() + + stmt.execute("drop table testTable100") + rs = stmt.executeQuery(s"show tables in $jdbcUser1") + assert(!rs.next()) + rs.close() + + stmt.close() + conn.close() + } + + // Test if SNAPPY_HIVE_METASTORE tables can be accessed by admin user only. + def testMetastoreAccessAdminOnly: Unit = { + val adminUser = ClusterManagerLDAPTestBase.admin + val jdbcUser4 = "gemfire3" + + val serverHostPort = AvailablePortHelper.getRandomAvailableTCPPort + vm2.invoke(classOf[ClusterManagerTestBase], "startNetServer", serverHostPort) + // scalastyle:off println + println(s"QueryRoutingDUnitSecureTest.testMetastoreAccessAdminOnly:" + + s" network server started at $serverHostPort") + // scalastyle:on println + QueryRoutingDUnitSecurityTest.checkMetastoreAccess(adminUser, jdbcUser4, serverHostPort) + } +} + +object QueryRoutingDUnitSecurityTest { + + def columnTableRouting(jdbcUser1: String, jdbcUser2: String, tableName: String, + serverHostPort: Int): Unit = { + try { + createColumnTable("testColumnTableRouting-1", serverHostPort, + jdbcUser2 + "." + tableName, jdbcUser1, jdbcUser1) + assert(false) // fail + } catch { + case x: SQLException if x.getSQLState.equals("42507") || + x.getSQLState.equals("42508") => // ignore + case t: Throwable => throw t + } + createColumnTable("testColumnTableRouting-2", serverHostPort, + tableName, jdbcUser2, jdbcUser2) + + try { + batchInsert("testColumnTableRouting-1", 200, 100, + serverHostPort, jdbcUser2 + "." + tableName, jdbcUser1, jdbcUser1) + assert(false) // fail + } catch { + case _: BatchUpdateException => // ignore + // case x: SQLException if x.getSQLState.equals("42500") => // ignore + case t: Throwable => throw t + } + batchInsert("testColumnTableRouting-2", 200, 100, + serverHostPort, tableName, jdbcUser2, jdbcUser2) + + try { + singleInsert("testColumnTableRouting-1", 200, serverHostPort, + jdbcUser2 + "." + tableName, jdbcUser1, jdbcUser1) + assert(false) // fail + } catch { + case x: SQLException if x.getSQLState.equals("42500") => // ignore + case t: Throwable => throw t + } + singleInsert("testColumnTableRouting-2", 200, serverHostPort, + tableName, jdbcUser2, jdbcUser2) + + // (1 to 5).foreach(d => query()) + try { + query("testColumnTableRouting-1", serverHostPort, + jdbcUser2 + "." + tableName, jdbcUser1, jdbcUser1, 400, 40) + assert(false) // fail + } catch { + case x: SQLException if x.getSQLState.equals("42500") => // ignore + case t: Throwable => throw t + } + query("testColumnTableRouting-2", serverHostPort, + tableName, jdbcUser2, jdbcUser2, 400, 40) + + try { + dropTable("testColumnTableRouting-1", serverHostPort, + jdbcUser2 + "." + tableName, jdbcUser1, jdbcUser1) + assert(false) // fail + } catch { + case x: SQLException if x.getSQLState.equals("42507") => // ignore + case t: Throwable => throw t + } + dropTable("testColumnTableRouting-2", serverHostPort, + tableName, jdbcUser2, jdbcUser2) + } + + def rowTableRouting(jdbcUser1: String, jdbcUser2: String, tableName: String, + serverHostPort: Int): Unit = { + try { + createRowTable("testRowTableRouting-1", serverHostPort, + jdbcUser1 + "." + tableName, jdbcUser2, jdbcUser2) + assert(false) // fail + } catch { + case x: SQLException if x.getSQLState.equals("42507") || + x.getSQLState.equals("42508") => // ignore + case t: Throwable => throw t + } + createRowTable("testRowTableRouting-2", + serverHostPort, tableName, jdbcUser1, jdbcUser1) + + try { + batchInsert("testRowTableRouting-1", 20, 20, + serverHostPort, jdbcUser1 + "." + tableName, jdbcUser2, jdbcUser2) + assert(false) // fail + } catch { + case _: BatchUpdateException => // ignore + // case x: SQLException if x.getSQLState.equals("42500") => // ignore + case t: Throwable => throw t + } + batchInsert("testRowTableRouting-2", 20, 20, + serverHostPort, tableName, jdbcUser1, jdbcUser1) + + try { + singleInsert("testRowTableRouting-1", 20, + serverHostPort, jdbcUser1 + "." + tableName, jdbcUser2, jdbcUser2) + assert(false) // fail + } catch { + case x: SQLException if x.getSQLState.equals("42500") => // ignore + case t: Throwable => throw t + } + singleInsert("testRowTableRouting-2", 20, + serverHostPort, tableName, jdbcUser1, jdbcUser1) + + // (1 to 5).foreach(d => query()) + try { + query("testRowTableRouting-1", serverHostPort, + jdbcUser1 + "." + tableName, jdbcUser2, jdbcUser2, 40, 4) + assert(false) // fail + } catch { + case x: SQLException if x.getSQLState.equals("42502") => // ignore + case t: Throwable => throw t + } + query("testRowTableRouting-2", serverHostPort, tableName, + jdbcUser1, jdbcUser1, 40, 4) + + try { + dropTable("testRowTableRouting-1", serverHostPort, + jdbcUser1 + "." + tableName, jdbcUser2, jdbcUser2) + assert(false) // fail + } catch { + case x: SQLException if x.getSQLState.equals("42507") => // ignore + case t: Throwable => throw t + } + dropTable("testRowTableRouting-2", serverHostPort, + tableName, jdbcUser1, jdbcUser1) + } + + def checkMetastoreAccess(adminUser: String, nonAdminUser: String, netPort: Int): Unit = { + val schema = "SNAPPY_HIVE_METASTORE" + val adminConn = netConnection(netPort, adminUser, adminUser, false) + val adminStmt = adminConn.createStatement() + import org.scalatest.Assertions._ + try { + adminStmt.execute(s"insert into $schema.version values (2, '1.2.1', 'dummy comment v2')") + adminStmt.execute(s"update $schema.version set version_comment =" + + s" 'comment changed' where ver_id = 2") + var res = adminStmt.executeQuery(s"select * from $schema.version order by ver_id") + res.next() + assert(res.getInt(1) === 1) + res.next() + assert(res.getInt(1) === 2 && res.getString(3) === "comment changed") + + adminStmt.execute(s"delete from $schema.version where ver_id = 2") + res = adminStmt.executeQuery(s"select * from $schema.version") + while(res.next()){ + assert(res.getInt(1) === 1) + } + } + finally { + adminStmt.close() + adminConn.close() + } + + val conn = netConnection(netPort, nonAdminUser, nonAdminUser, false) + val stmt = conn.createStatement() + + try { + var thrown = intercept[SQLException] { + stmt.executeQuery(s"select * from $schema.version") + } + assert(thrown.getMessage.contains("User 'GEMFIRE3' does not have SELECT permission on" + + " column 'VER_ID' of table 'SNAPPY_HIVE_METASTORE'.'VERSION'")) + + thrown = intercept[SQLException] { + stmt.execute(s"insert into $schema.version values (2, '1.2.1', 'dummy comm v2')") + } + assert(thrown.getMessage.contains("User 'GEMFIRE3' does not have INSERT permission on" + + " table 'SNAPPY_HIVE_METASTORE'.'VERSION'")) + + val thrown2 = intercept[SQLException] { + stmt.execute(s"update $schema.version set version_comment =" + + s" 'comment changed ' where ver_id = 2") + } + println(s"Error msg: ${thrown2.getMessage}") + assert(thrown2.getMessage.contains("User 'GEMFIRE3' does not have UPDATE permission on column 'VERSION_COMMENT' of table 'SNAPPY_HIVE_METASTORE'.'VERSION'")) + + thrown = intercept[SQLException] { + stmt.execute(s"delete from $schema.version where ver_id = 2") + } + assert(thrown.getMessage.contains("User 'GEMFIRE3' does not have DELETE permission on" + + " table 'SNAPPY_HIVE_METASTORE'.'VERSION'")) + } + finally { + stmt.close() + conn.close() + } + + val conn2 = netConnection(netPort, nonAdminUser, nonAdminUser, true) + val stmt2 = conn2.createStatement() + try { + var thrown = intercept[SQLException] { + stmt2.executeQuery(s"select * from $schema.version") + } + assert(thrown.getMessage.contains("Invalid input \"SNAPPY_HIVE_METASTORE.v\"," + + " expected ws, test or relations")) + + thrown = intercept[SQLException] { + stmt2.execute(s"insert into $schema.version values (2, '1.2.1', 'dummy comm v2')") + } + assert(thrown.getMessage.contains("User 'GEMFIRE3' does not have INSERT permission on" + + " table 'SNAPPY_HIVE_METASTORE'.'VERSION'")) + + thrown = intercept[SQLException] { + stmt2.execute(s"update $schema.version set version_comment =" + + s" 'comment changed ' where ver_id = 2") + } + assert(thrown.getMessage.contains("GEMFIRE3' does not have UPDATE permission on column 'VERSION_COMMENT' of table 'SNAPPY_HIVE_METASTORE'.'VERSION'")) + + thrown = intercept[SQLException] { + stmt2.execute(s"delete from $schema.version where ver_id = 2") + } + assert(thrown.getMessage.contains("User 'GEMFIRE3' does not have DELETE permission on table" + + " 'SNAPPY_HIVE_METASTORE'.'VERSION'")) + } + finally { + stmt2.close() + conn2.close() + } + } + + def netConnection(netPort: Int, user: String, pass: String, + routeQuery: Boolean = true): Connection = { + val driver = "io.snappydata.jdbc.ClientDriver" + Utils.classForName(driver).newInstance + var url: String = null + if(routeQuery) { + url = "jdbc:snappydata://localhost:" + netPort + "/" + } + else { + url = "jdbc:snappydata://localhost:" + netPort + "/route-query=false" + } + DriverManager.getConnection(url, user, pass) + } + + def createColumnTable(testName: String, serverHostPort: Int, tableName: String, + user: String, pass: String): Unit = { + val conn = netConnection(serverHostPort, user, pass) + // scalastyle:off println + println(s"createColumnTable-$testName: Connected to $serverHostPort") + // scalastyle:on println + + val stmt1 = conn.createStatement() + try { + stmt1.execute(s"create table $tableName (ol_int_id integer," + + s" ol_int2_id integer, ol_str_id STRING) using column " + + "options( partition_by 'ol_int_id, ol_int2_id', buckets '8', COLUMN_BATCH_SIZE '200')") + } finally { + stmt1.close() + conn.close() + } + } + + def createRowTable(testName: String, serverHostPort: Int, tableName: String, + user: String, pass: String): Unit = { + val conn = netConnection(serverHostPort, user, pass) + // scalastyle:off println + println(s"createRowTable-$testName: Connected to $serverHostPort") + // scalastyle:on println + + val stmt1 = conn.createStatement() + try { + stmt1.execute(s"create table $tableName (ol_int_id integer," + + s" ol_int2_id integer, ol_str_id STRING) using row " + + "options( partition_by 'ol_int_id, ol_int2_id', buckets '8')") + } finally { + stmt1.close() + conn.close() + } + } + + def dropTable(testName: String, serverHostPort: Int, tableName: String, + user: String, pass: String): Unit = { + val conn = netConnection(serverHostPort, user, pass) + // scalastyle:off println + println(s"dropTable-$testName: Connected to $serverHostPort") + // scalastyle:on println + + val stmt1 = conn.createStatement() + try { + stmt1.execute(s"drop table $tableName") + } finally { + stmt1.close() + conn.close() + } + } + + def batchInsert(testName: String, numRows: Int, batchSize: Int, serverHostPort: Int, + tableName: String, user: String, pass: String): Unit = { + val conn = netConnection(serverHostPort, user, pass) + // scalastyle:off println + println(s"batchInsert-$testName: Connected to $serverHostPort") + // scalastyle:on println + + val stmt1 = conn.createStatement() + try { + var i = 1 + (1 to numRows).foreach(_ => { + stmt1.addBatch(s"insert into $tableName values($i, $i, '$i')") + i += 1 + if (i % batchSize == 0) { + stmt1.executeBatch() + i = 0 + } + }) + stmt1.executeBatch() + + // scalastyle:off println + println(s"batchInsert-$testName: committed $numRows rows") + // scalastyle:on println + } finally { + stmt1.close() + conn.close() + } + } + + def singleInsert(testName: String, numRows: Int, serverHostPort: Int, tableName: String, + user: String, pass: String): Unit = { + val conn = netConnection(serverHostPort, user, pass) + // scalastyle:off println + println(s"singleInsert-$testName: Connected to $serverHostPort") + // scalastyle:on println + + val stmt1 = conn.createStatement() + try { + (1 to numRows).foreach(i => { + stmt1.executeUpdate(s"insert into $tableName values($i, $i, '$i')") + }) + + // scalastyle:off println + println(s"singleInsert-$testName: committed $numRows rows") + // scalastyle:on println + } finally { + stmt1.close() + conn.close() + } + } + + def verifyQuery(testName: String, qryTest: String, stmt_rs: ResultSet, numRows: Int, + debugNumRows: Int): Unit = { + val builder = StringBuilder.newBuilder + + var index = 0 + while (stmt_rs.next()) { + index += 1 + val stmt_i = stmt_rs.getInt(1) + val stmt_j = stmt_rs.getInt(2) + val stmt_s = stmt_rs.getString(3) + if (index % debugNumRows == 0) { + builder.append(s"verifyQuery-$testName: " + + s"$qryTest Stmt: row($index) $stmt_i $stmt_j $stmt_s ").append("\n") + } + } + builder.append(s"verifyQuery-$testName: " + + s"$qryTest Stmt: Total number of rows = $index").append("\n") + // scalastyle:off println + println(builder.toString()) + // scalastyle:on println + assert(index == numRows) + } + + def query(testName: String, serverHostPort: Int, tableName: String, + user: String, pass: String, numRows: Int, debugNumRows: Int): Unit = { + val conn = netConnection(serverHostPort, user, pass) + // scalastyle:off println + println(s"query-$testName: Connected to $serverHostPort") + // scalastyle:off println + + val stmt1 = conn.createStatement() + try { + val qry1 = s"select ol_int_id, ol_int2_id, ol_str_id " + + s" from $tableName " + + s" where ol_int_id < 5000000 " + + s"" + val rs1 = stmt1.executeQuery(qry1) + verifyQuery(testName, qry1, rs1, numRows, debugNumRows) + rs1.close() + // Thread.sleep(1000000) + } finally { + stmt1.close() + conn.close() + } + } +} diff --git a/cluster/src/dunit/scala/io/snappydata/cluster/QueryRoutingDUnitTest.scala b/cluster/src/dunit/scala/io/snappydata/cluster/QueryRoutingDUnitTest.scala new file mode 100644 index 0000000000..3c36ec68ba --- /dev/null +++ b/cluster/src/dunit/scala/io/snappydata/cluster/QueryRoutingDUnitTest.scala @@ -0,0 +1,1287 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ + +package io.snappydata.cluster + +import java.io.File +import java.math.BigDecimal +import java.sql.{Connection, DatabaseMetaData, DriverManager, PreparedStatement, ResultSet, SQLException, Statement} + +import scala.collection.JavaConverters._ +import scala.collection.mutable + +import com.gemstone.gemfire.distributed.DistributedMember +import com.gemstone.gemfire.distributed.internal.membership.InternalDistributedMember +import com.gemstone.gemfire.internal.cache.PartitionedRegion +import com.pivotal.gemfirexd.internal.engine.Misc +import com.pivotal.gemfirexd.internal.engine.distributed.utils.GemFireXDUtils +import io.snappydata.Property +import io.snappydata.test.dunit.{AvailablePortHelper, SerializableRunnable} +import org.apache.commons.io.FileUtils +import org.junit.Assert +import org.junit.Assert.assertEquals + +import org.apache.spark.Logging +import org.apache.spark.sql.collection.Utils +import org.apache.spark.sql.execution.columnar.impl.ColumnFormatRelation +import org.apache.spark.sql.types.Decimal +import org.apache.spark.sql.{IndexTest, SaveMode, SingleNodeTest, SnappyContext, SnappySession, TPCHUtils} + +/** + * Tests for query routing from JDBC client driver. + */ +class QueryRoutingDUnitTest(val s: String) + extends ClusterManagerTestBase(s) with Logging { + + private val default_chunk_size = GemFireXDUtils.DML_MAX_CHUNK_SIZE + + override def tearDown2(): Unit = { + // reset the chunk size on lead node + setDMLMaxChunkSize(default_chunk_size) + super.tearDown2() + } + + def testQueryRouting(): Unit = { + val netPort1 = AvailablePortHelper.getRandomAvailableTCPPort + vm2.invoke(classOf[ClusterManagerTestBase], "startNetServer", netPort1) + + createTableAndInsertData() + val conn = getANetConnection(netPort1) + val s = conn.createStatement() + s.execute("select col1 from TEST.ColumnTableQR") + var rs = s.getResultSet + var cnt = 0 + while (rs.next()) { + cnt += 1 + } + assert(cnt == 5) + + var md = rs.getMetaData + logInfo("metadata col cnt = " + md.getColumnCount + " col name = " + + md.getColumnName(1) + " col table name = " + md.getTableName(1)) + assert(md.getColumnCount == 1) + assert(md.getColumnName(1).equalsIgnoreCase("COL1")) + assert(md.getTableName(1).equalsIgnoreCase("COLUMNTABLEQR")) + + // 2nd query which compiles in gemxd too but needs to be routed + s.execute("select * from TEST.ColumnTableQR") + rs = s.getResultSet + cnt = 0 + while (rs.next()) { + cnt += 1 + } + assert(cnt == 5) + md = rs.getMetaData + logInfo("2nd metadata col cnt = " + md.getColumnCount + " col name = " + + md.getColumnName(1) + " col table name = " + md.getTableName(1)) + assert(md.getColumnCount == 3) + assert(md.getColumnName(1).equalsIgnoreCase("COL1")) + assert(md.getColumnName(2).equalsIgnoreCase("COL2")) + assert(md.getColumnName(3).equalsIgnoreCase("COL3")) + assert(md.getTableName(1).equalsIgnoreCase("COLUMNTABLEQR")) + assert(md.getTableName(2).equalsIgnoreCase("COLUMNTABLEQR")) + assert(md.getTableName(3).equalsIgnoreCase("COLUMNTABLEQR")) + + vm1.invoke(new SerializableRunnable() { + override def run(): Unit = { + val catalog = Misc.getMemStore.getExternalCatalog + assert(catalog.isColumnTable("TEST", "ColumnTableQR", false)) + } + }) + + // Now give a syntax error which will give parse error on spark sql side as well + try { + s.execute("select ** from sometable") + } catch { + case sqe: SQLException => + if ("42X01" != sqe.getSQLState && "42000" != sqe.getSQLState) { + throw sqe + } + } + s.execute("select col1, col2 from TEST.ColumnTableQR") + rs = s.getResultSet + cnt = 0 + while (rs.next()) { + cnt += 1 + } + assert(cnt == 5) + md = rs.getMetaData + logInfo("3rd metadata col cnt = " + md.getColumnCount + " col name = " + + md.getColumnName(1) + " col table name = " + md.getTableName(1)) + assert(md.getColumnCount == 2) + + s.execute("select * from TEST.ColumnTableQR where col1 > 4") + rs = s.getResultSet + cnt = 0 + while (rs.next()) { + cnt += 1 + } + assert(cnt == 3) + + s.execute( + "select col1 from TEST.ColumnTableQR where col1 > 0 order by col1 desc") + rs = s.getResultSet + cnt = 0 + // 1, 7, 9, 4, 5 + while (rs.next()) { + cnt += 1 + cnt match { + case 1 => assert(9 == rs.getInt(1), s"Expected 9 but found ${rs.getInt(1)}") + case 2 => assert(7 == rs.getInt(1), s"Expected 7 but found ${rs.getInt(1)}") + case 3 => assert(5 == rs.getInt(1), s"Expected 5 but found ${rs.getInt(1)}") + case 4 => assert(4 == rs.getInt(1), s"Expected 4 but found ${rs.getInt(1)}") + case 5 => assert(1 == rs.getInt(1), s"Expected 1 but found ${rs.getInt(1)}") + } + } + assert(cnt == 5) + + // reducing DML chunk size size to force lead node to send + // results in multiple batches + setDMLMaxChunkSize(50L) + val expectedResult: Array[Int] = Array(1, 7, 9, 4, 5) + val actualResult: Array[Int] = new Array[Int](5) + s.execute("select col1 from TEST.ColumnTableQR order by col1") + rs = s.getResultSet + cnt = 0 + while (rs.next()) { + actualResult(cnt) = rs.getInt(1) + logInfo("----" + rs.getInt(1)) + cnt += 1 + } + assert(cnt == 5) + // actualResult.foreach(println) + assert(expectedResult.sorted.sameElements(actualResult)) + setDMLMaxChunkSize(default_chunk_size) + + // Check that update and delete on column table works + val updated = s.executeUpdate("update TEST.ColumnTableQR set col1 = 10") + assert(updated == 5) + s.execute("select col1 from TEST.ColumnTableQR order by col1") + val rs2 = s.getResultSet + cnt = 0 + while (rs2.next()) { + val row = rs2.getInt(1) + assert(row == 10) + cnt += 1 + } + assert(cnt == 5) + + val deleted = s.executeUpdate("delete from TEST.ColumnTableQR where spark_partition_id() > -1") + assert(deleted == 5) + s.execute("select col1 from TEST.ColumnTableQR order by col1") + assert(!s.getResultSet.next()) + createTableAndInsertData() + val deleted2 = s.executeUpdate("delete from TEST.ColumnTableQR") + assert(deleted2 == 5) + s.execute("select * from TEST.ColumnTableQR") + assert(!s.getResultSet.next()) + conn.close() + } + + def testQueryRoutingWithSchema(): Unit = { + val netPort1 = AvailablePortHelper.getRandomAvailableTCPPort + vm2.invoke(classOf[ClusterManagerTestBase], "startNetServer", netPort1) + + val conn1 = getANetConnection(netPort1) + val conn2 = getANetConnection(netPort1) + val conn3 = getANetConnection(netPort1) + val columnTable = "columnTable" + val rowTable = "rowTable" + conn1.createStatement().executeUpdate("create schema test1") + conn1.createStatement().executeUpdate("set schema test1") + + conn2.createStatement().executeUpdate("create schema test2") + conn2.createStatement().executeUpdate("set schema test2") + + // tables are created under schema test1 + conn1.createStatement().executeUpdate(s"create table $columnTable ( x int) using column") + conn1.createStatement().executeUpdate(s"create table $rowTable ( x int) using row") + + // tables are created under schema test2 + conn2.createStatement().executeUpdate(s"create table $columnTable ( x int) using column") + conn2.createStatement().executeUpdate(s"create table $rowTable ( x int) using row") + + // tables are created under schema APP + conn3.createStatement().executeUpdate(s"create table $columnTable ( x int) using column") + conn3.createStatement().executeUpdate(s"create table $rowTable ( x int) using row") + + // insert data under schema test1 + conn1.createStatement().executeUpdate(s" insert into $columnTable values (1)") + conn1.createStatement().executeUpdate(s" insert into $rowTable values (2)") + + // insert data under schema test2 + conn2.createStatement().executeUpdate(s" insert into $columnTable values (1)") + conn2.createStatement().executeUpdate(s" insert into $rowTable values (2)") + + // insert data under schema APP + conn3.createStatement().executeUpdate(s" insert into $columnTable values (1)") + conn3.createStatement().executeUpdate(s" insert into $rowTable values (2)") + + // verify data under each column table + var rs = conn1.createStatement().executeQuery(s"select count(*) from APP.$columnTable") + assert(rs.next()) + assert(rs.getInt(1) == 1) + rs = conn1.createStatement().executeQuery(s"select count(*) from TEST1.$columnTable") + assert(rs.next()) + assert(rs.getInt(1) == 1) + rs = conn1.createStatement().executeQuery(s"select count(*) from TEST2.$columnTable") + assert(rs.next()) + assert(rs.getInt(1) == 1) + + // verify data under each row table + rs = conn1.createStatement().executeQuery(s"select count(*) from APP.$rowTable") + assert(rs.next()) + assert(rs.getInt(1) == 1) + rs = conn1.createStatement().executeQuery(s"select count(*) from TEST1.$rowTable") + assert(rs.next()) + assert(rs.getInt(1) == 1) + rs = conn1.createStatement().executeQuery(s"select count(*) from TEST2.$rowTable") + assert(rs.next()) + assert(rs.getInt(1) == 1) + + // Unit test for DSID function + val membersList = mutable.MutableList[String]() + val members: java.util.Set[DistributedMember] = GemFireXDUtils. + getGfxdAdvisor.adviseDataStores(null) + import scala.collection.JavaConverters._ + members.asScala.foreach(m => { + membersList += m.getId + }) + + rs = conn1.createStatement().executeQuery(s"select DSID() from TEST2.$rowTable") + assert(rs.next()) + do { + assert(membersList.contains(rs.getString(1))) + } while (rs.next()) + + rs = conn1.createStatement().executeQuery(s"select DSID() from TEST2.$columnTable") + assert(rs.next()) + do { + assert(membersList.contains(rs.getString(1))) + } while (rs.next()) + + // truncate tables + conn1.createStatement().executeUpdate(s" truncate table $columnTable") + conn1.createStatement().executeUpdate(s" truncate table $rowTable") + + conn2.createStatement().executeUpdate(s" truncate table $columnTable") + conn2.createStatement().executeUpdate(s" truncate table $rowTable") + + conn3.createStatement().executeUpdate(s" truncate table $columnTable") + conn3.createStatement().executeUpdate(s" truncate table $rowTable") + + // verify that all tables are empty + rs = conn1.createStatement().executeQuery(s"select count(*) from APP.$rowTable") + assert(rs.next()) + assert(rs.getInt(1) == 0, s"Expected 0 but found ${rs.getInt(1)}") + rs = conn1.createStatement().executeQuery(s"select count(*) from TEST1.$rowTable") + assert(rs.next()) + assert(rs.getInt(1) == 0, s"Expected 0 but found ${rs.getInt(1)}") + rs = conn1.createStatement().executeQuery(s"select count(*) from TEST2.$rowTable") + assert(rs.next()) + assert(rs.getInt(1) == 0, s"Expected 0 but found ${rs.getInt(1)}") + + // drop all tables + conn1.createStatement().executeUpdate(s" drop table $columnTable") + conn1.createStatement().executeUpdate(s" drop table $rowTable") + + conn2.createStatement().executeUpdate(s" drop table $columnTable") + conn2.createStatement().executeUpdate(s" drop table $rowTable") + + conn3.createStatement().executeUpdate(s" drop table $columnTable") + conn3.createStatement().executeUpdate(s" drop table $rowTable") + } + + def testSnap1296_1297(): Unit = { + val netPort1 = AvailablePortHelper.getRandomAvailableTCPPort + vm2.invoke(classOf[ClusterManagerTestBase], "startNetServer", netPort1) + createTableAndInsertData() + + val conn = getANetConnection(netPort1) + val ps = conn.prepareStatement("select * from TEST.ColumnTableQR") + val rs = ps.executeQuery + val md = rs.getMetaData + + assert(md.getColumnCount == 3, "column count is = " + md.getColumnCount) + assert(md.getColumnName(1).equalsIgnoreCase("COL1")) + assert(md.getColumnName(2).equalsIgnoreCase("COL2")) + assert(md.getColumnName(3).equalsIgnoreCase("COL3")) + assert(md.getTableName(1).equalsIgnoreCase("COLUMNTABLEQR")) + assert(md.getTableName(2).equalsIgnoreCase("COLUMNTABLEQR")) + assert(md.getTableName(3).equalsIgnoreCase("COLUMNTABLEQR")) + + var cnt = 0 + while (rs.next()) { + val col1 = rs.getString(1) + val col2 = rs.getString(2) + val col3 = rs.getString(3) + logInfo(s"col1 = $col1, col2 = $col2, col3 = $col3") + cnt += 1 + } + assert(cnt == 5) + ps.close() + + val ps2 = conn.prepareStatement("select * from TEST.ColumnTableQR where col1 = ?") + ps2.setInt(1, 1) + ps2.execute + val rs2 = ps2.getResultSet + val md2 = rs2.getMetaData + assert(md2.getColumnCount == 3) + assert(md2.getColumnName(1).equalsIgnoreCase("COL1")) + assert(md2.getColumnName(2).equalsIgnoreCase("COL2")) + assert(md2.getColumnName(3).equalsIgnoreCase("COL3")) + assert(md2.getTableName(1).equalsIgnoreCase("COLUMNTABLEQR")) + assert(md2.getTableName(2).equalsIgnoreCase("COLUMNTABLEQR")) + assert(md2.getTableName(3).equalsIgnoreCase("COLUMNTABLEQR")) + + var cnt2 = 0 + while (rs2.next()) { + val col1 = rs2.getInt(1) + val col2 = rs2.getString(2) + val col3 = rs2.getString(3) + logInfo(s"col1 = $col1, col2 = $col2, col3 = $col3") + assert(col1 == 1) + cnt2 += 1 + } + assert(cnt2 == 1) + ps2.close() + } + + def testSnap1945_putdmlvariation(): Unit = { + val netPort1 = AvailablePortHelper.getRandomAvailableTCPPort + vm2.invoke(classOf[ClusterManagerTestBase], "startNetServer", netPort1) + + val conn = getANetConnection(netPort1) + val stmt = conn.createStatement() + stmt.execute("create table dest(col1 int, col2 int not null primary key) using row options()") + stmt.execute("create table source(col1 int, col2 int) using row options()") + stmt.executeUpdate("insert into source values (1, 2), (2, 3)") + stmt.executeUpdate("put into dest select * from source") + stmt.execute("select count(*) from dest") + val rs = stmt.getResultSet + assert(rs.next()) + assert(2 == rs.getInt(1)) + assert(!rs.next()) + rs.close() + stmt.execute("drop table source") + stmt.execute("drop table dest") + stmt.close() + conn.close() + } + + def testSNAP193_607_8_9(): Unit = { + val netPort1 = AvailablePortHelper.getRandomAvailableTCPPort + vm2.invoke(classOf[ClusterManagerTestBase], "startNetServer", netPort1) + + createTableAndInsertData2(netPort1) + val conn = getANetConnection(netPort1) + val stmt = conn.createStatement() + + val numExpectedRows = 188894 + var rs = stmt.executeQuery("select count(UniqueCarrier) from Airline") + assert(rs.next()) + val numRows = rs.getInt(1) + assert(numRows == numExpectedRows, s"got rows=$numRows") + assert(!rs.next()) + + val md = rs.getMetaData + logInfo("metadata colCount=" + md.getColumnCount + " colName=" + + md.getColumnName(1) + " tableName=" + md.getTableName(1)) + assert(md.getColumnCount == 1) + assert(md.getColumnName(1).equalsIgnoreCase("count(UNIQUECARRIER)"), + "columnName=" + md.getColumnName(1)) + + // check successful run with larger number (>8) of columns (SNAP-607) + rs.close() + rs = stmt.executeQuery("select YEARI, MONTHI, DAYOFMONTH, DAYOFWEEK, " + + "DEPTIME, CRSDEPTIME, ARRTIME, CRSARRTIME, UNIQUECARRIER " + + "from AIRLINE limit 10") + var nrows = 0 + while (rs.next()) { + nrows += 1 + } + rs.close() + Assert.assertEquals(10, nrows) + + // check no hang with decent number of runs (SNAP-608) + rs.close() + for (_ <- 0 until 20) { + rs = stmt.executeQuery("select YEARI, MONTHI, DAYOFMONTH, DAYOFWEEK, " + + "DEPTIME, CRSDEPTIME, UNIQUECARRIER " + + "from AIRLINE limit 2") + var nrows = 0 + while (rs.next()) { + nrows += 1 + } + rs.close() + Assert.assertEquals(2, nrows) + } + + // below hangs in CREATE TABLE (SNAP-609) + stmt.execute("CREATE TABLE airline2 USING column AS " + + "(select * from airline limit 10000)") + rs = stmt.executeQuery("select count(*) from Airline2") + assert(rs.next()) + assert(rs.getInt(1) == 10000, "got rows=" + rs.getInt(1)) + assert(!rs.next()) + + // now check for ClassCastException with a "select *" + rs = stmt.executeQuery("select * from Airline2") + var cnt = 0 + while (rs.next()) { + cnt += 1 + } + rs.close() + Assert.assertEquals(10000, cnt) + + conn.close() + } + + def testSystablesQueries(): Unit = { + val netPort1 = AvailablePortHelper.getRandomAvailableTCPPort + vm2.invoke(classOf[ClusterManagerTestBase], "startNetServer", netPort1) + + val filePath = "/tmp/parquetdata" + val dataDir = new File(filePath) + val conn = getANetConnection(netPort1) + var newConn: Connection = null + try { + val s = conn.createStatement() + val colTable = "COLUMNTABLE" + val rowTable = "ROWTABLE" + + // SYSTABLES queries + s.execute(s"CREATE TABLE $colTable (Col1 INT, Col2 INT, Col3 INT) " + + "USING column") + s.execute(s"select * from sys.systables where tablename='$colTable'") + var rs = s.getResultSet + assert(rs.next()) + var tableType = rs.getString("tabletype") + assert("C".equals(tableType)) + var schemaname = rs.getString("tableschemaname") + assert("APP".equalsIgnoreCase(schemaname)) + + // just check few metadata for internal column table absence + checkDBAPIsForNonInclusionOfInternalColTable(conn) + s.execute(s"CREATE TABLE $rowTable (Col1 INT, Col2 INT, Col3 INT) USING row") + s.execute(s"select * from sys.systables where tablename='$rowTable'") + rs = s.getResultSet + assert(rs.next()) + tableType = rs.getString("tabletype") + assert("T".equals(tableType)) + schemaname = rs.getString("tableschemaname") + assert("APP".equalsIgnoreCase(schemaname)) + + val dbmd = conn.getMetaData + val rSet = dbmd.getTables(null, "APP", null, + Array[String]("ROW TABLE", "SYSTEM TABLE", "COLUMN TABLE", + "EXTERNAL TABLE", "STREAM TABLE", "VTI")) + assert(rSet.next()) + + s.execute(s"drop table $rowTable") + + // Ensure systables, members can be queried (SNAP-215) + doQueries(s, dbmd, colTable) + + // Ensure systables, members can be queried (SNAP-215) on a new connection too. + newConn = getANetConnection(netPort1) + doQueries(newConn.createStatement(), newConn.getMetaData, colTable) + + // Ensure parquet table can be dropped (SNAP-215) + val parquetTable = "PARQUETTABLE" + dataDir.mkdir() + s.execute(s"CREATE EXTERNAL TABLE APP_PARQUET.$parquetTable " + + s"(Col1 INT, Col2 INT, Col3 INT) USING parquet OPTIONS (path '$filePath')") + + // check meta-data + val schemaMd = dbmd.getSchemas + val results = new mutable.HashSet[String]() + while (schemaMd.next()) { + results += schemaMd.getString(1) + } + assert(results.contains("APP")) + assert(results.contains("APP_PARQUET")) + results.clear() + + val tableMd = dbmd.getTables(null, "APP%", null, + Array[String]("ROW TABLE", "SYSTEM TABLE", "COLUMN TABLE", + "EXTERNAL TABLE", "STREAM TABLE", "VTI")) + while (tableMd.next()) { + results += tableMd.getString(2) + '.' + tableMd.getString(3) + } + // 1 for column table and 1 for parquet external table + assert(results.size == 2, s"Got size = ${results.size} [$results] but expected 2.") + assert(results.contains(s"APP.$colTable")) + assert(results.contains(s"APP_PARQUET.$parquetTable")) + results.clear() + + // check the columns + val columnsMd = dbmd.getColumns(null, "APP_PARQUET", null, null) + while (columnsMd.next()) { + results += columnsMd.getString(4) + } + assert(results.size == 3, s"Got columns = ${results.size} but expected 3") + assert(results.contains("col1"), s"columns = $results") + assert(results.contains("col2")) + assert(results.contains("col3")) + results.clear() + + s.execute(s"DROP TABLE APP_PARQUET.$parquetTable") + + } finally { + conn.close() + if (newConn != null) { + newConn.close() + } + FileUtils.deleteDirectory(dataDir) + } + } + + def checkDBAPIsForNonInclusionOfInternalColTable(conn: Connection): Unit = { + var rs = conn.getMetaData.getTables(null, null, "%", null) + var ncols = rs.getMetaData.getColumnCount + while (rs.next()) { + // 3rd index the table name + assert(!rs.getString(3).contains("SNAPPYSYS_INTERNAL____")) + } + rs.close() + rs = conn.getMetaData.getColumns(null, null, "%", "%") + ncols = rs.getMetaData.getColumnCount + while (rs.next()) { + // 3rd index the table name + for (i <- 1 to ncols) { + // 3rd index the table name + assert(!rs.getString(3).contains("SNAPPYSYS_INTERNAL____")) + } + } + rs.close() + rs = conn.getMetaData.getTablePrivileges(null, null, "%") + ncols = rs.getMetaData.getColumnCount + while (rs.next()) { + // 3rd index the table name + for (i <- 1 to ncols) { + // 3rd index the table name + assert(!rs.getString(3).contains("SNAPPYSYS_INTERNAL____")) + } + } + } + + def testPrepStatementRouting(): Unit = { + val netPort1 = AvailablePortHelper.getRandomAvailableTCPPort + vm2.invoke(classOf[ClusterManagerTestBase], "startNetServer", netPort1) + + createTableAndInsertData() + val conn = getANetConnection(netPort1) + try { + val ps = conn.prepareStatement("select col1 from TEST.ColumnTableQR " + + "where col1 >? and col1 < ?") + ps.setInt(1, 1) + ps.setInt(2, 1000) + val rs = ps.executeQuery() + var cnt = 0 + while (rs.next()) { + cnt += 1 + } + assert(cnt == 4) + + val md = rs.getMetaData + assert(md.getColumnCount == 1) + assert(md.getColumnName(1).equalsIgnoreCase("col1")) + // assert(md.getSchemaName(1).equalsIgnoreCase("test")) + assert(md.getTableName(1).equalsIgnoreCase("columnTableqr")) + + // Test zero parameter + val ps2 = conn.prepareStatement("select col1 from TEST.ColumnTableQR " + + "where col1 > 1 and col1 < 500") + val rs2 = ps2.executeQuery() + var cnt2 = 0 + while (rs2.next()) { + cnt2 += 1 + } + assert(cnt2 == 4) + } finally { + conn.close() + } + } + + private def doQueries(s: Statement, dbmd: DatabaseMetaData, t: String): Unit = { + s.execute("select * from sys.members") + assert(s.getResultSet.next()) + s.execute("select * from sys.systables") + assert(s.getResultSet.next()) + s.execute("select * from sys.systables where tableschemaname='APP'") + assert(s.getResultSet.next()) + + // Simulates 'SHOW TABLES' of ij + var rSet = dbmd.getTables(null, "APP", null, + Array[String]("ROW TABLE", "SYSTEM TABLE", "COLUMN TABLE", + "EXTERNAL TABLE", "STREAM TABLE", "VTI")) + + var foundTable = false + while (rSet.next()) { + if (t.equalsIgnoreCase(rSet.getString("TABLE_NAME"))) { + foundTable = true + assert(rSet.getString("TABLE_TYPE").equalsIgnoreCase("COLUMN TABLE")) + } + } + assert(foundTable) + + val rSet2 = dbmd.getTables(null, "APP", null, + Array[String]("ROW TABLE", "SYSTEM TABLE", "COLUMN TABLE", + "EXTERNAL TABLE", "STREAM TABLE", "VTI")) + + foundTable = false + while (rSet2.next()) { + if (ColumnFormatRelation.columnBatchTableName("APP." + t). + equalsIgnoreCase(rSet2.getString("TABLE_NAME"))) { + foundTable = true + assert(rSet2.getString("TABLE_TYPE").equalsIgnoreCase("TABLE")) + } + } + // internal column tables are no longer visible in getTables + assert(!foundTable) + + // Simulates 'SHOW MEMBERS' of ij + rSet = s.executeQuery("SELECT * FROM SYS.MEMBERS ORDER BY ID ASC") + assert(rSet.next()) + } + + def createTableAndInsertData(): Unit = { + val snc = SnappyContext(sc) + val tableName: String = "TEST.ColumnTableQR" + snc.sql(s" drop table if exists $tableName") + + val data = Seq(Seq(1, 2, 3), Seq(7, 8, 9), Seq(9, 2, 3), + Seq(4, 2, 3), Seq(5, 6, 7)) + val rdd = sc.parallelize(data, data.length).map(s => + Data(s.head, s(1).toString, Decimal(s(1).toString + '.' + s(2)))) + val dataDF = snc.createDataFrame(rdd) + snc.createTable(tableName, "column", dataDF.schema, + Map.empty[String, String]) + dataDF.write.format("column").mode(SaveMode.Append) + .saveAsTable(tableName) + } + + def createTableAndInsertData2(netPort1: Int): Unit = { + val snc = SnappyContext(sc) + val tableName: String = "Airline" + + val hfile = getClass.getResource("/2015-trimmed.parquet").getPath + val dataDF = snc.read.load(hfile) + snc.createTable(tableName, "column", dataDF.schema, + Map.empty[String, String]) + val conn = getANetConnection(netPort1) + val stmt = conn.createStatement() + + val numExpectedRows = 0 + var rs = stmt.executeQuery("select count(UniqueCarrier) from Airline") + assert(rs.next()) + val numRows = rs.getInt(1) + assert(numRows == numExpectedRows, s"got rows=$numRows") + assert(!rs.next()) + + dataDF.write.format("column").mode(SaveMode.Append) + .saveAsTable(tableName) + } + + def setDMLMaxChunkSize(size: Long): Unit = { + GemFireXDUtils.DML_MAX_CHUNK_SIZE = size + } + + def testGemXDURL(): Unit = { + val netPort1 = AvailablePortHelper.getRandomAvailableTCPPort + vm2.invoke(classOf[ClusterManagerTestBase], "startNetServer", netPort1) + + val conn = getANetConnection(netPort1, useGemXDURL = true) + val s = conn.createStatement() + s.execute("CREATE TABLE T1(COL1 INT, COL2 INT) PERSISTENT REPLICATE") + s.execute("INSERT INTO T1 VALUES(1, 1), (2, 2), (3, 3),(4, 4), (5, 5)") + s.execute("SELECT * FROM T1") + val rs = s.getResultSet + var cnt = 0 + while (rs.next()) { + cnt += 1 + } + assert(cnt == 5) + + try { + s.execute("CREATE TABLE colTable(Col1 INT ,Col2 INT, Col3 INT)" + + "USING column " + + "options " + + "(" + + "BUCKETS '1'," + + "REDUNDANCY '0')") + Assert.fail( + "Should have thrown an exception as gemxd URL does not route query") + } catch { + case sqe: SQLException => + if ("42X01" != sqe.getSQLState) { + throw sqe + } + } + s.execute("DROP TABLE T1") + + } + + def DISABLED_SNAP_1597_testNodesPruning(): Unit = { + val netPort1 = AvailablePortHelper.getRandomAvailableTCPPort + vm2.invoke(classOf[ClusterManagerTestBase], "startNetServer", netPort1) + val snc = SnappyContext(sc) + SingleNodeTest.testNodesPruning(snc) + } + + def testTPCHNodesPruning(): Unit = { + val netPort1 = AvailablePortHelper.getRandomAvailableTCPPort + vm2.invoke(classOf[ClusterManagerTestBase], "startNetServer", netPort1) + val snc = SnappyContext(sc) + + try { + val queries = Array("1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", + "12", "13", "14", "15", "16", "17", "18", "19", + "20", "21", "22") + + TPCHUtils.createAndLoadTables(snc, true) + + snc.setConf(Property.EnableExperimentalFeatures.name, "true") + snc.sql( + s"""CREATE INDEX idx_orders_cust ON orders(o_custkey) + options (COLOCATE_WITH 'customer') + """) + + snc.sql( + s"""CREATE INDEX idx_lineitem_part ON lineitem(l_partkey) + options (COLOCATE_WITH 'part') + """) + + val tables = Seq("nation", "region", "supplier", "customer", "orders", "lineitem", "part", + "partsupp") + + val tableSizes = tables.map { tableName => + (tableName, snc.table(tableName).count()) + }.toMap + + tableSizes.foreach(s => logInfo(s.toString())) + + val i = new IndexTest + i.runBenchmark("select o_orderkey from orders where o_orderkey = 1", tableSizes, 2) + i.runBenchmark("select o_orderkey from orders where o_orderkey = 32", tableSizes) + i.runBenchmark("select o_orderkey from orders where o_orderkey = 801", tableSizes) + i.runBenchmark("select o_orderkey from orders where o_orderkey = 1409", tableSizes) + // queries.foreach(q => i.benchmark(q, tableSizes)) + } finally { + snc.sql(s"DROP INDEX idx_orders_cust") + snc.sql(s"DROP INDEX idx_lineitem_part") + } + + } + + def testLimitStatementRouting(): Unit = { + val serverHostPort = AvailablePortHelper.getRandomAvailableTCPPort + vm2.invoke(classOf[ClusterManagerTestBase], "startNetServer", serverHostPort) + logInfo(s"network server started at $serverHostPort") + + val tableName = "order_line_col" + val snc = SnappyContext(sc) + snc.sql(s"create table $tableName (ol_int_id integer," + + s" ol_int2_id integer, ol_str_id STRING) using column " + + "options( partition_by 'ol_int_id, ol_int2_id', buckets '2')") + + limitInsertRows(1000, serverHostPort, tableName) + + // (1 to 5).foreach(d => query()) + limitQuery(serverHostPort, tableName) + } + + def testPrimaryPreferenceInRouting(): Unit = { + val session = new SnappySession(sc) + Property.ColumnBatchSize.set(session.sessionState.conf, "10k") + Property.ForceLinkPartitionsToBuckets.set(session.sessionState.conf, true) + Property.PreferPrimariesInQuery.set(session.sessionState.conf, true) + + val table = "UPDATETABLE" + val df = session.range(100000).selectExpr("id", "concat('addr', cast(id as string)) addr") + df.write.mode(SaveMode.Overwrite).format("column").option("redundancy", "1") + .saveAsTable(table) + + def assertPrimaries(query: String): Unit = { + + def hostExecutorId(m: InternalDistributedMember): String = + Utils.getHostExecutorId(SnappyContext.getBlockId(m.canonicalString()).get.blockId) + + val rdd = session.sql(query).queryExecution.executedPlan.execute() + val region = Misc.getRegionForTable(s"APP.$table", true) + .asInstanceOf[PartitionedRegion] + val adviser = region.getRegionAdvisor + rdd.partitions.foreach { split => + val preferredLocations = rdd.preferredLocations(split) + val primary = adviser.getPrimaryMemberForBucket(split.index) + val owners = adviser.getBucketOwners(split.index) + + assert(preferredLocations.head == hostExecutorId(primary)) + assert(owners.size() > 1) + assert(owners.asScala.map(hostExecutorId) == preferredLocations.toSet) + } + } + + assertPrimaries(s"select * from $table where id < 1000") + assertPrimaries(s"select * from $table") + + // also for partitioned tables + val schema = session.table(table).schema + session.dropTable(table) + + session.createTable(table, "column", schema, + Map("partition_by" -> "id", "redundancy" -> "1")) + df.write.insertInto(table) + + assertPrimaries(s"select * from $table where id < 1000") + assertPrimaries(s"select * from $table") + + session.dropTable(table) + } + + def testSNAP2247(): Unit = { + val serverHostPort = AvailablePortHelper.getRandomAvailableTCPPort + vm2.invoke(classOf[ClusterManagerTestBase], "startNetServer", serverHostPort) + val conn = DriverManager.getConnection( + "jdbc:snappydata://localhost:" + serverHostPort) + val st = conn.createStatement() + try { + val conn = DriverManager.getConnection( + "jdbc:snappydata://localhost:" + serverHostPort) + + val st = conn.createStatement() + st.execute(s"create table trade.securities " + + s"(sec_id int not null, symbol varchar(10) not null, " + + s"price decimal (30, 20), exchange varchar(10) not null, " + + s"tid int, constraint sec_pk primary key (sec_id), " + + s"constraint sec_uq unique (symbol, exchange), constraint exc_ch check " + + s"(exchange in ('nasdaq', 'nye', 'amex', 'lse', 'fse', 'hkse', 'tse'))) " + + s"ENABLE CONCURRENCY CHECKS") + + val ps = conn.prepareStatement(s"select price, symbol, exchange from trade.securities" + + s" where (price=?) and tid =? order by CASE when exchange ='nasdaq'" + + s" then symbol END desc, CASE when exchange in('nye', 'amex') then sec_id END desc," + + s" CASE when exchange ='lse' then symbol END asc, CASE when exchange ='fse' then" + + s" sec_id END desc, CASE when exchange ='hkse' then symbol END asc," + + s" CASE when exchange ='tse' then symbol END desc") + + ps.setBigDecimal(1, new BigDecimal("0.02")) + ps.setBigDecimal(2, new BigDecimal("20.02")) + ps.setInt(3, 3) + + ps.execute() + assert(!ps.getResultSet.next()) + } finally { + st.execute(s"drop table trade.securities") + conn.close() + } + } + + def limitInsertRows(numRows: Int, serverHostPort: Int, tableName: String): Unit = { + + val conn = DriverManager.getConnection( + "jdbc:snappydata://localhost:" + serverHostPort) + + val rows = (1 to numRows).toSeq + val stmt = conn.createStatement() + try { + var i = 1 + rows.foreach(d => { + stmt.addBatch(s"insert into $tableName values($i, $i, '$i')") + i += 1 + if (i % 1000 == 0) { + stmt.executeBatch() + i = 0 + } + }) + stmt.executeBatch() + logInfo(s"committed $numRows rows") + } finally { + stmt.close() + conn.close() + } + } + + def verifyQuery(qryTest: String, prep_rs: ResultSet, stmt_rs: ResultSet): Unit = { + val builder = StringBuilder.newBuilder + var index = 0 + var assertionFailed = false + while (prep_rs.next() && stmt_rs.next()) { + val prep_i = prep_rs.getInt(1) + val prep_j = prep_rs.getInt(2) + val prep_s = prep_rs.getString(3) + + val stmt_i = stmt_rs.getInt(1) + val stmt_j = stmt_rs.getInt(2) + val stmt_s = stmt_rs.getString(3) + + builder.append(s"$qryTest Prep: row($index) $prep_i $prep_j $prep_s ").append("\n") + builder.append(s"$qryTest Stmt: row($index) $stmt_i $stmt_j $stmt_s ").append("\n") + + if (prep_i != stmt_i && !assertionFailed) { + builder.append(s"Assertion failed at index=$index prep=$prep_i stmt=$stmt_i").append("\n") + assertionFailed = true + } + + if (prep_j != stmt_j && !assertionFailed) { + builder.append(s"Assertion failed at index=$index prep=$prep_j stmt=$stmt_j").append("\n") + assertionFailed = true + } + + if (prep_s != stmt_s && !assertionFailed) { + builder.append(s"Assertion failed at index=$index prep=$prep_s stmt=$stmt_s").append("\n") + assertionFailed = true + } + + index += 1 + } + + while (prep_rs.next()) { + if (!assertionFailed) { + builder.append(s"Assertion failed at index=$index").append("\n") + assertionFailed = true + } + + val prep_i = prep_rs.getInt(1) + val prep_j = prep_rs.getInt(2) + val prep_s = prep_rs.getString(3) + builder.append(s"$qryTest Prep: row($index) $prep_i $prep_j $prep_s ").append("\n") + } + + while (stmt_rs.next()) { + if (!assertionFailed) { + builder.append(s"Assertion failed at index=$index").append("\n") + assertionFailed = true + } + + val stmt_i = stmt_rs.getInt(1) + val stmt_j = stmt_rs.getInt(2) + val stmt_s = stmt_rs.getString(3) + builder.append(s"$qryTest Stmt: row($index) $stmt_i $stmt_j $stmt_s ").append("\n") + } + + if (assertionFailed) { + logInfo(builder.toString()) + } + + assert(!assertionFailed) + } + + def limitQuery(serverHostPort: Int, tableName: String): Unit = { + val conn = DriverManager.getConnection( + "jdbc:snappydata://localhost:" + serverHostPort) + + logInfo(s"Connected to $serverHostPort") + + val stmt1 = conn.createStatement() + val stmt2 = conn.createStatement() + try { + val qry1 = s"select ol_int_id, ol_int2_id, ol_str_id " + + s" from $tableName " + + s" where ol_int_id < 500 " + + s" and ol_int2_id in (100, 200, 300) " + + " and ol_str_id LIKE '%0' " + + s" limit 20" + + s"" + val rs1 = stmt1.executeQuery(qry1) + + val qry2 = s"select ol_int_id, ol_int2_id, ol_str_id " + + s" from $tableName " + + s" where ol_int_id < 500 " + + s" and ol_int2_id in (100, 200, 300) " + + s" and ol_str_id LIKE '%0' " + + s"" + val rs2 = stmt2.executeQuery(qry2) + verifyQuery("query", rs1, rs2) + rs1.close() + rs2.close() + + // Thread.sleep(1000000) + + } finally { + stmt1.close() + stmt2.close() + conn.close() + } + } + + def testAlterTableRowTable(): Unit = { + val serverHostPort = AvailablePortHelper.getRandomAvailableTCPPort + vm2.invoke(classOf[ClusterManagerTestBase], "startNetServer", serverHostPort) + val conn = DriverManager.getConnection("jdbc:snappydata://localhost:" + serverHostPort) + logInfo(s"Connected to $serverHostPort") + + val stmt = conn.createStatement(); + try { + val createParentTable: String = + "create table parentT (cid int not null, sid int not null, qty int not null, " + + " constraint parent_pk primary key (cid, sid)) " + + "USING ROW OPTIONS ( PERSISTENT 'SYNCHRONOUS');" + val createChildTable: String = + "create table childT (oid int not null constraint child_pk primary key, cid int, " + + "sid int, qty int, constraint parent_fk foreign key (cid, sid)" + + "references parentT (cid, sid) on delete restrict) " + + "USING ROW OPTIONS ( PERSISTENT 'SYNCHRONOUS');" + val alterTableStmt: String = "alter table childT drop FOREIGN KEY parent_fk" + stmt.execute(createParentTable) + stmt.execute(createChildTable) + stmt.execute(alterTableStmt) + } finally { + stmt.execute("drop table childT") + stmt.execute("drop table parentT") + stmt.close() + conn.close() + } + } + + def testSNAP2707withPreparedStatement(): Unit = { + val netPort1 = AvailablePortHelper.getRandomAvailableTCPPort + vm2.invoke(classOf[ClusterManagerTestBase], "startNetServer", netPort1) + val conn = getANetConnection(netPort1) + val stmt = conn.createStatement() + val snc = SnappyContext(sc) + stmt.execute("drop table if exists t1") + stmt.execute("create table t1(id integer, str string) using column options" + + "(key_columns 'id', COLUMN_MAX_DELTA_ROWS '7', BUCKETS '2')") + var ps: PreparedStatement = null + for (i <- 1 to 10) { + snc.sql("insert into t1 values(" + i + ",'str" + i + "')") + } + + var rscnt = stmt.executeQuery("select count(*) from t1") + rscnt.next() + assertEquals(10, rscnt.getInt(1)) + + val rs = stmt.executeQuery("select * from t1 order by id") + var i = 1 + while (rs.next()) { + assertEquals(i, rs.getInt(1)) + assertEquals("str" + i, rs.getString(2)) + i = i + 1 + } + + val query2 = "put into t1 values(?,?)" + ps = conn.prepareStatement(query2) + for (i <- 1 to 20) { + ps.setInt(1, i) + ps.setString(2, "str_" + i) + ps.executeUpdate() + } + var rscnt2 = stmt.executeQuery("select count(*) from t1") + rscnt2.next() + assertEquals(20, rscnt2.getInt(1)) + + val rs2 = stmt.executeQuery("select * from t1 order by id") + var i2 = 0 + while (rs.next()) { + assertEquals(i2, rs2.getInt(1)) + assertEquals("str_" + i2, rs2.getString(2)) + i2 = i2 + 1 + } + + val query1 = "put into t1 values(?,?)" + ps = conn.prepareStatement(query1) + for (i <- 1 to 30) { + ps.setInt(1, i) + ps.setString(2, "strings_" + i) + ps.addBatch() + if (i % 10 == 0) { + ps.executeBatch() + } + } + ps.executeBatch() + + var rscnt1 = stmt.executeQuery("select count(*) from t1") + rscnt1.next() + assertEquals(30, rscnt1.getInt(1)) + + val rs1 = stmt.executeQuery("select * from t1 order by id") + var i1 = 1 + while (rs1.next()) { + assertEquals(i1, rs1.getInt(1)) + assertEquals("strings_" + i1, rs1.getString(2)) + i1 = i1 + 1 + } + + val query3 = "put into t1(id,str) values(?,?)" + ps = conn.prepareStatement(query3) + for (i <- 11 to 20) { + ps.setInt(1, i) + ps.setString(2, "str123_" + i) + ps.addBatch() + if (i % 10 == 0) { + ps.executeBatch() + } + } + ps.executeBatch() + + var rscnt3 = stmt.executeQuery("select count(*) from t1") + rscnt3.next() + assertEquals(30, rscnt3.getInt(1)) + + val rs3 = stmt.executeQuery("select * from t1 where id >= 11 and id <= 20 order by id") + var i3 = 11 + while (rs3.next()) { + assertEquals(i3, rs3.getInt(1)) + assertEquals("str123_" + i3, rs3.getString(2)) + i3 = i3 + 1 + } + + val query4 = "put into t1(id) values(?)" + ps = conn.prepareStatement(query4) + for (i <- 31 to 40) { + ps.setInt(1, i) + ps.addBatch() + if (i % 10 == 0) { + ps.executeBatch() + } + } + ps.executeBatch() + + var rscnt4 = stmt.executeQuery("select count(*) from t1") + rscnt4.next() + assertEquals(40, rscnt4.getInt(1)) + + val rs4 = stmt.executeQuery("select * from t1 where id >= 31 and id <= 40 order by id") + var i4 = 31 + while (rs4.next()) { + assertEquals(i4, rs4.getInt(1)) + assertEquals(null, rs4.getString(2)) + i4 = i4 + 1 + } + } + + def testSNAP3038withPreparedStatement(): Unit = { + val netPort1 = AvailablePortHelper.getRandomAvailableTCPPort + vm2.invoke(classOf[ClusterManagerTestBase], "startNetServer", netPort1) + val conn = getANetConnection(netPort1) + val stmt = conn.createStatement() + val snc = SnappyContext(sc) + stmt.execute("drop schema if exists std1") + stmt.execute("create schema std1") + stmt.execute("drop table if exists std1.t1") + stmt.execute("create table std1.t1(id integer, str string) using column options" + + "(key_columns 'id', COLUMN_MAX_DELTA_ROWS '7', BUCKETS '2')") + var ps: PreparedStatement = null + for (i <- 1 to 10) { + snc.sql("insert into std1.t1 values(" + i + ",'str" + i + "')") + } + + var rscnt = stmt.executeQuery("select count(*) from std1.t1") + rscnt.next() + assertEquals(10, rscnt.getInt(1)) + + val rs = stmt.executeQuery("select * from std1.t1 order by id") + var i = 1 + while (rs.next()) { + assertEquals(i, rs.getInt(1)) + assertEquals("str" + i, rs.getString(2)) + i = i + 1 + } + + val query2 = "put into std1.t1 values(?,?)" + ps = conn.prepareStatement(query2) + for (i <- 1 to 20) { + ps.setInt(1, i) + ps.setString(2, "str_" + i) + ps.executeUpdate() + } + var rscnt2 = stmt.executeQuery("select count(*) from std1.t1") + rscnt2.next() + assertEquals(20, rscnt2.getInt(1)) + + val rs2 = stmt.executeQuery("select * from std1.t1 order by id") + var i2 = 0 + while (rs.next()) { + assertEquals(i2, rs2.getInt(1)) + assertEquals("str_" + i2, rs2.getString(2)) + i2 = i2 + 1 + } + + val query1 = "put into std1.t1 values(?,?)" + ps = conn.prepareStatement(query1) + for (i <- 1 to 30) { + ps.setInt(1, i) + ps.setString(2, "strings_" + i) + ps.addBatch() + if (i % 10 == 0) { + ps.executeBatch() + } + } + ps.executeBatch() + + var rscnt1 = stmt.executeQuery("select count(*) from std1.t1") + rscnt1.next() + assertEquals(30, rscnt1.getInt(1)) + + val rs1 = stmt.executeQuery("select * from std1.t1 order by id") + var i1 = 1 + while (rs1.next()) { + assertEquals(i1, rs1.getInt(1)) + assertEquals("strings_" + i1, rs1.getString(2)) + i1 = i1 + 1 + } + + val query3 = "put into std1.t1(id,str) values(?,?)" + ps = conn.prepareStatement(query3) + for (i <- 11 to 20) { + ps.setInt(1, i) + ps.setString(2, "str123_" + i) + ps.addBatch() + if (i % 10 == 0) { + ps.executeBatch() + } + } + ps.executeBatch() + + var rscnt3 = stmt.executeQuery("select count(*) from std1.t1") + rscnt3.next() + assertEquals(30, rscnt3.getInt(1)) + + val rs3 = stmt.executeQuery("select * from std1.t1 where id >= 11 and id <= 20 order by id") + var i3 = 11 + while (rs3.next()) { + assertEquals(i3, rs3.getInt(1)) + assertEquals("str123_" + i3, rs3.getString(2)) + i3 = i3 + 1 + } + + val query4 = "put into std1.t1(id) values(?)" + ps = conn.prepareStatement(query4) + for (i <- 31 to 40) { + ps.setInt(1, i) + ps.addBatch() + if (i % 10 == 0) { + ps.executeBatch() + } + } + ps.executeBatch() + + var rscnt4 = stmt.executeQuery("select count(*) from std1.t1") + rscnt4.next() + assertEquals(40, rscnt4.getInt(1)) + + val rs4 = stmt.executeQuery("select * from std1.t1 where id >= 31 and id <= 40 order by id") + var i4 = 31 + while (rs4.next()) { + assertEquals(i4, rs4.getInt(1)) + assertEquals(null, rs4.getString(2)) + i4 = i4 + 1 + } + } +} diff --git a/cluster/src/dunit/scala/io/snappydata/cluster/SetIsolationDUnitTest.scala b/cluster/src/dunit/scala/io/snappydata/cluster/SetIsolationDUnitTest.scala new file mode 100644 index 0000000000..1c9c38be25 --- /dev/null +++ b/cluster/src/dunit/scala/io/snappydata/cluster/SetIsolationDUnitTest.scala @@ -0,0 +1,270 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ + +package io.snappydata.cluster + +import java.sql.{Connection, SQLException, Statement} +import java.util + +import com.pivotal.gemfirexd.internal.shared.common.reference.SQLState +import io.snappydata.test.dunit.AvailablePortHelper + +import org.apache.spark.Logging + +class SetIsolationDUnitTest (val s: String) + extends ClusterManagerTestBase(s) with Logging { + + private def createTables(conn: Connection): Unit = { + val stmt = conn.createStatement() + stmt.execute("create table rowtable(col1 int, col2 int, col3 int)" + + " using row options (partition_by 'col1')") + stmt.execute("create table coltable(col1 int, col2 int, col3 int)" + + " using column options (partition_by 'col1')") + + for (i <- 1 to 100) { + stmt.execute(s"insert into rowtable values ($i, $i, $i)") + stmt.execute(s"insert into coltable values ($i, $i, $i)") + } + } + + private def validateTableData(conn: Connection) = { + val stmt1 = conn.createStatement() + var rs1 = stmt1.executeQuery("select count(*) from rowtable") + assert(rs1.next()) + assert(rs1.getInt(1) == 100, "result mismatch") + + rs1 = stmt1.executeQuery("select count(*) from coltable") + assert(rs1.next()) + assert(rs1.getInt(1) == 100, "result mismatch") + } + + // queries not allowed on a column table inside a transaction + def checkUnsupportedQueries(stmt: Statement, query: String, + expectedSqlState: String = SQLState.SNAPPY_OP_DISALLOWED_ON_COLUMN_TABLES): Unit = { + try { + // tx not allowed as on column tables + stmt.execute(query) + assert(false, "query should have failed as tx on column table is not allowed") + } catch { + case sq: SQLException if expectedSqlState. + startsWith(sq.getSQLState) => // expected + } + } + + def performOperationsOnTable(conn: Connection, tableName: String): Unit = { + val stmt1 = conn.createStatement() + var rs1 = stmt1.executeQuery(s"select count(*) from $tableName") + assert(rs1.next()) + assert(rs1.getInt(1) == 100, "result mismatch") + // insert data + logInfo(s"inserting a row in $tableName") + stmt1.execute(s"insert into $tableName values(101, 101, 101)") + logInfo(s"select count from $tableName") + rs1 = stmt1.executeQuery(s"select count(*) from $tableName") + assert(rs1.next()) + var cnt = rs1.getInt(1) + assert(cnt == 101, s"result mismatch. Actual numRows = $cnt. Expect numRows = 101") + // delete + stmt1.execute(s"delete from $tableName where col1 = 101") + rs1 = stmt1.executeQuery(s"select count(*) from $tableName") + assert(rs1.next()) + cnt = rs1.getInt(1) + assert(cnt == 100, s"Expected 100 but got $cnt") + stmt1.close() + } + + def testSetIsolationLevel(): Unit = { + val netPort1 = AvailablePortHelper.getRandomAvailableTCPPort + vm2.invoke(classOf[ClusterManagerTestBase], "startNetServer", netPort1) + var conn = getANetConnection(netPort1) + + logInfo("Creating tables for the test") + createTables(conn) + + // with autocommit true transactions on row and column table are allowed + logInfo("setting autocommit true") + conn.setAutoCommit(true) + conn.setTransactionIsolation(Connection.TRANSACTION_READ_COMMITTED) + validateTableData(conn) + performOperationsOnTable(conn, "rowtable") + performOperationsOnTable(conn, "coltable") + conn.close() + + // with autocommit false transactions allowed on row tables only + logInfo("setting autocommit false") + conn = getANetConnection(netPort1) + conn.setAutoCommit(false) + conn.setTransactionIsolation(Connection.TRANSACTION_READ_COMMITTED) + performOperationsOnTable(conn, "rowtable") + conn.commit() + + val stmt1 = conn.createStatement() + logInfo("checking unsupported queries on column tables") + // queries involving column tables + checkUnsupportedQueries(stmt1, "select count(*) from coltable") + checkUnsupportedQueries(stmt1, "insert into coltable values(101, 101, 101)") + checkUnsupportedQueries(stmt1, "insert into rowtable select col1, col2, col3 from coltable") + checkUnsupportedQueries(stmt1, "put into coltable values(101, 101, 101)") + checkUnsupportedQueries(stmt1, "put into rowtable select col1, col2, col3 from coltable") + checkUnsupportedQueries(stmt1, "delete from coltable where col1 = 101") + checkUnsupportedQueries(stmt1, "delete from rowtable where col1 in " + + "(select col1 from coltable)") + checkUnsupportedQueries(stmt1, "update coltable set col2 = 101") + checkUnsupportedQueries(stmt1, "update coltable set col2 = 101 where col2 in " + + "(select col1 from coltable)") + + // queries involving row tables that should not get routed when + // autocommit is false (for example even if there is a syntax error) + logInfo("checking unsupported queries on row tables") + checkUnsupportedQueries(stmt1, "select * from rowtable limit 1", SQLState.LANG_SYNTAX_ERROR) + checkUnsupportedQueries(stmt1, "select rowtable.col1 as rc1, coltable.col1 as cc1" + + " from rowtable, coltable where rowtable.col1 = coltable.col1", SQLState.NOT_COLOCATED_WITH) + stmt1.close() + conn.close() + + // user sets route-query=false, query involving column table should error out + val queryRoutingDisabledConn = getANetConnection(netPort1, disableQueryRouting = true) + val stmt2 = queryRoutingDisabledConn.createStatement() + checkUnsupportedQueries(stmt2, "select count(*) from coltable") + queryRoutingDisabledConn.close() + } + + var gotConflict = false + + /** + * Test conflicts. Copied part of + * rowstore test TransactionDUnit#testCommitWithConflicts() + */ + def testCommitWithConflicts() { + val netPort1 = AvailablePortHelper.getRandomAvailableTCPPort + vm2.invoke(classOf[ClusterManagerTestBase], "startNetServer", netPort1) + + val conn = getANetConnection(netPort1) + logInfo(s"testCommitWithConflicts: test with isolation level TRANSACTION_READ_COMMITTED") + doTestCommitWithConflicts(netPort1, conn, Connection.TRANSACTION_READ_COMMITTED) + conn.close() + + val conn2 = getANetConnection(netPort1) + logInfo(s"testCommitWithConflicts: test with isolation level TRANSACTION_REPEATABLE_READ") + doTestCommitWithConflicts(netPort1, conn2, Connection.TRANSACTION_REPEATABLE_READ) + conn2.close() + } + + /** + * Test put into on replicated table - SNAP2082 + */ + def testPutIntoOnReplicatedTables() { + val netPort1 = AvailablePortHelper.getRandomAvailableTCPPort + vm2.invoke(classOf[ClusterManagerTestBase], "startNetServer", netPort1) + + val conn = getANetConnection(netPort1) + logInfo(s"testPutIntoOnReplicatedTables: test with isolation level TRANSACTION_READ_COMMITTED") + conn.setTransactionIsolation(Connection.TRANSACTION_READ_COMMITTED) + val st = conn.createStatement() + st.execute("CREATE TABLE APP.TABLE2 ( FIELD1 VARCHAR(36) NOT NULL " + + "PRIMARY KEY, FIELD2 INT, FIELD3 VARCHAR(36) NOT NULL)") + st.executeUpdate("PUT INTO APP.TABLE2(FIELD1, FIELD2, FIELD3) VALUES ('key1',1,'value1')") + conn.commit() + val rs1 = st.executeQuery("select * from table2") + assert(rs1.next()) + assert(rs1.getString(3).equals("value1")) + st.executeUpdate("PUT INTO APP.TABLE2(FIELD1, FIELD2, FIELD3) VALUES ('key1',1,'value1')") + conn.commit() + st.executeUpdate("PUT INTO APP.TABLE2(FIELD1, FIELD2, FIELD3) VALUES ('key1',1,'value11')") + conn.commit() + val rs2 = st.executeQuery("select * from table2") + assert(rs2.next()) + assert(rs2.getString(3).equals("value11")) + conn.close() + } + + private def doTestCommitWithConflicts(netPort1: Int, conn: Connection, isolationLevel: Int) = { + conn.setAutoCommit(false) + val st = conn.createStatement +// st.execute("create schema tran") + st.execute("Create table tran.t1 (c1 int not null primary key, c2 int not null) using row") + conn.commit() + this.gotConflict = false + logInfo(s"doTestCommitWithConflicts: setting isolation level $isolationLevel") + + conn.setTransactionIsolation(isolationLevel) + st.execute("insert into tran.t1 values (10, 10)") + st.execute("insert into tran.t1 values (20, 10)") + st.execute("insert into tran.t1 values (30, 10)") + val otherTxOk = Array[Boolean](false) + val otherTx = new Thread(new Runnable() { + def run() { + try { + val otherConn = getANetConnection(netPort1) + otherConn.setTransactionIsolation(isolationLevel) + otherConn.setAutoCommit(false) + val otherSt = otherConn.createStatement + try { + otherSt.execute("insert into tran.t1 values (10, 20)") + otherTxOk(0) = true + } + catch { + case sqle: SQLException => { + if ("X0Z02" == sqle.getSQLState) { + gotConflict = true + otherConn.rollback() + } + else throw sqle + } + } finally { + otherConn.close() + } + } + catch { + case se: SQLException => { + gotConflict = false + assert(false, s"unexpected exception $se") + } + } + } + }) + otherTx.start() + otherTx.join() + assert(!otherTxOk(0)) + assert(this.gotConflict, "expected conflict") + this.gotConflict = false + + + val ps = conn.prepareStatement("select * from tran.t1") + conn.commit() + // check that the value should be that of first transaction + var rs = ps.executeQuery + var expectedKeys = Array[Int](10, 20, 30) + var numRows = 0 + while (rs.next) { + numRows += 1 + val key = rs.getInt(1) + val index = util.Arrays.binarySearch(expectedKeys, key) + assert(index >= 0, "Expected to find the key: ") + expectedKeys(index) = Integer.MIN_VALUE + 1 + util.Arrays.sort(expectedKeys) + val v = rs.getInt(2) + assert(v == 10, s"Second column should be 10. Actual is $v") + } + assert(numRows == 3, "ResultSet should have three rows") + rs.close() + st.close() + conn.commit() + conn.setTransactionIsolation(Connection.TRANSACTION_NONE) + conn.createStatement().execute("drop table tran.t1") + } +} diff --git a/cluster/src/dunit/scala/io/snappydata/cluster/SnappyResourceEventsDUnitTest.scala b/cluster/src/dunit/scala/io/snappydata/cluster/SnappyResourceEventsDUnitTest.scala new file mode 100644 index 0000000000..7a16853801 --- /dev/null +++ b/cluster/src/dunit/scala/io/snappydata/cluster/SnappyResourceEventsDUnitTest.scala @@ -0,0 +1,141 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ +package io.snappydata.cluster + +import scala.Predef._ + +import com.gemstone.gemfire.internal.cache.GemFireCacheImpl +import com.gemstone.gemfire.internal.cache.control.{HeapMemoryMonitor, InternalResourceManager} +import com.pivotal.gemfirexd.internal.engine.Misc +import io.snappydata.ServiceManager + +import org.apache.spark.SparkEnv +import org.apache.spark.sql.SnappyContext +import org.apache.spark.storage.{RDDInfo, StorageLevel} + +class SnappyResourceEventsDUnitTest (s: String) extends ClusterManagerTestBase(s) { + + import SnappyResourceEventsDUnitTest._ + + override def tearDown2(): Unit = { + resetGFResourceManager() + Array(vm3, vm2, vm1, vm0).foreach(_.invoke(this.getClass, + "resetGFResourceManager")) + super.tearDown2() + } + + def _testEvictionUp(): Unit = { + // Execute the job + runSparkJob() + vm0.invoke(this.getClass, "raiseEvictionUpMemoryEvent") + vm1.invoke(this.getClass, "raiseEvictionUpMemoryEvent") + vm2.invoke(this.getClass, "raiseEvictionUpMemoryEvent") + runSparkJobAfterThresholdBreach() + } + + def _testCriticalUp(): Unit = { + // Execute the job + runSparkJob() + vm0.invoke(this.getClass, "raiseCriticalUpMemoryEvent") + vm1.invoke(this.getClass, "raiseCriticalUpMemoryEvent") + vm2.invoke(this.getClass, "raiseCriticalUpMemoryEvent") + runSparkJobAfterThresholdBreach() + } + + def testDummy(): Unit = { + // Dummy test. Does not want to delete this class as some test codes can be reused later. + } + + } + +object SnappyResourceEventsDUnitTest { + + private def sc = SnappyContext.globalSparkContext + + def runSparkJob(): Unit = { + val rdd1 = sc.makeRDD(Array(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16)).cache() + println(rdd1.count()) + assert(!sc.getRDDStorageInfo.isEmpty) + } + + def getInMemorySizeForCachedRDDs: Long = { + val rddInfo: Array[RDDInfo] = sc.getRDDStorageInfo + var sum = 0L + for (i <- rddInfo.indices) { + sum = sum + rddInfo(i).memSize + } + sum + } + + def runSparkJobAfterThresholdBreach(): Unit = { + val sum1: Long = getInMemorySizeForCachedRDDs + println("1. cached rdd mem size before caching rdd when critical or eviction up = " + sum1) + + val rdd2 = sc.makeRDD(Array(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16)).cache() + println(rdd2.count()) + val sum2: Long = getInMemorySizeForCachedRDDs + println("2. cached rdd mem size after caching first rdd when critical or eviction up = " + sum2) + // make sure that after eviction up new rdd being cached does not result in + // increased memory usage + assert(!(sum2 > sum1), s"sum1 = $sum1, sum2 = $sum2") + + val rdd3 = sc.makeRDD(Array(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16)).cache() + println(rdd3.count()) + val sum3: Long = getInMemorySizeForCachedRDDs + println("3. cached rdd mem size after caching second rdd when critical or eviction up = " + sum3) + // make sure that after eviction up new rdd being cached does not result in + // increased memory usage + assert(!(sum3 > sum2), s"sum2 = $sum2, sum3 = $sum3") + } + + def raiseCriticalUpMemoryEvent(): Unit = { + println("About to raise CRITICAL UP event") + val gfCache: GemFireCacheImpl = Misc.getGemFireCache + val resMgr: InternalResourceManager = gfCache.getResourceManager + HeapMemoryMonitor.setTestDisableMemoryUpdates(true) + resMgr.getHeapMonitor.setTestMaxMemoryBytes(100) + HeapMemoryMonitor.setTestBytesUsedForThresholdSet(92) + resMgr.setCriticalHeapPercentage(90F) + + resMgr.getHeapMonitor.updateStateAndSendEvent(92) + println("CRITICAL UP event sent") + } + + def raiseEvictionUpMemoryEvent(): Unit = { + println("About to raise EVICTION UP event") + val gfCache: GemFireCacheImpl = Misc.getGemFireCache + val resMgr: InternalResourceManager = gfCache.getResourceManager + HeapMemoryMonitor.setTestDisableMemoryUpdates(true) + resMgr.getHeapMonitor.setTestMaxMemoryBytes(100) + HeapMemoryMonitor.setTestBytesUsedForThresholdSet(90) + resMgr.setEvictionHeapPercentage(40F) + resMgr.getHeapMonitor.updateStateAndSendEvent(85) + println("EVICTION UP event sent") + } + + def resetGFResourceManager(): Unit = { + val service = ServiceManager.currentFabricServiceInstance + if (service != null) { + val gfCache: GemFireCacheImpl = Misc.getGemFireCacheNoThrow + if (gfCache != null) { + val resMgr: InternalResourceManager = gfCache.getResourceManager + resMgr.getHeapMonitor.setTestMaxMemoryBytes(0) + resMgr.getHeapMonitor.updateStateAndSendEvent(10) + } + } + } +} diff --git a/cluster/src/dunit/scala/io/snappydata/cluster/SnappyRowStoreModeDUnit.scala b/cluster/src/dunit/scala/io/snappydata/cluster/SnappyRowStoreModeDUnit.scala new file mode 100644 index 0000000000..8de3ccf4e0 --- /dev/null +++ b/cluster/src/dunit/scala/io/snappydata/cluster/SnappyRowStoreModeDUnit.scala @@ -0,0 +1,132 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ +package io.snappydata.cluster + +import java.io.PrintWriter +import java.nio.file.{Files, Paths} +import java.sql.{Connection, DriverManager, SQLException} + +import io.snappydata.test.dunit.{AvailablePortHelper, DistributedTestBase} +import io.snappydata.test.util.TestException +import scala.sys.process._ + +import com.pivotal.gemfirexd.TestUtil +import org.junit.Assert + +import org.apache.spark.Logging +import org.apache.spark.sql.collection.Utils + +class SnappyRowStoreModeDUnit (s: String) extends DistributedTestBase(s) with Logging { + + private val snappyProductDir = getEnvironmentVariable("SNAPPY_HOME") + + val port: Int = AvailablePortHelper.getRandomAvailableTCPPort + val netPort1: Int = AvailablePortHelper.getRandomAvailableTCPPort + val netPort2: Int = AvailablePortHelper.getRandomAvailableTCPPort + val netPort3: Int = AvailablePortHelper.getRandomAvailableTCPPort + + override def beforeClass(): Unit = { + super.beforeClass() + logInfo(s"Starting snappy rowstore cluster" + + s" in $snappyProductDir/work with locator client port $netPort1") + + // delete any old work directory + val workDir = new java.io.File(s"$snappyProductDir/work") + if (workDir.exists()) { + TestUtil.deleteDir(workDir) + } + // create locators and servers files + val confDir = s"$snappyProductDir/conf" + writeToFile(s"localhost -peer-discovery-port=$port -client-port=$netPort1", + s"$confDir/locators") + writeToFile( + s"""localhost -locators=localhost[$port] -client-port=$netPort2 + |localhost -locators=localhost[$port] -client-port=$netPort3 + |""".stripMargin, s"$confDir/servers") + (snappyProductDir + "/sbin/snappy-start-all.sh rowstore").!! + } + + override def afterClass(): Unit = { + super.afterClass() + + logInfo(s"Stopping snappy rowstore cluster in $snappyProductDir/work") + (snappyProductDir + "/sbin/snappy-stop-all.sh").!! + Files.deleteIfExists(Paths.get(snappyProductDir, "conf", "locators")) + Files.deleteIfExists(Paths.get(snappyProductDir, "conf", "leads")) + Files.deleteIfExists(Paths.get(snappyProductDir, "conf", "servers")) + } + + def getEnvironmentVariable(env: String): String = { + val value = scala.util.Properties.envOrElse(env, null) + if (env == null) { + throw new TestException(s"Environment variable $env is not defined") + } + value + } + + private def writeToFile(str: String, fileName: String): Unit = { + val pw = new PrintWriter(fileName) + try { + pw.write(str) + } finally { + pw.close() + } + } + + def getANetConnection(netPort: Int): Connection = { + val driver = "io.snappydata.jdbc.ClientDriver" + Utils.classForName(driver).newInstance + DriverManager.getConnection("jdbc:gemfirexd://localhost:" + netPort + "/") + } + + /* + * Basic test to make sure that SnappyData rowstore mode works + */ + def testRowStoreCluster(): Unit = { + val conn = getANetConnection(netPort1) + val s = conn.createStatement() + try { + s.execute("CREATE TABLE T1(COL1 INT, COL2 INT) PERSISTENT REPLICATE") + s.execute("INSERT INTO T1 VALUES(1, 1), (2, 2), (3, 3),(4, 4), (5, 5)") + s.execute("SELECT * FROM T1") + val rs = s.getResultSet + var cnt = 0 + while (rs.next()) { + cnt += 1 + } + assert(cnt == 5) + + try { + s.execute("CREATE TABLE colTable(Col1 INT ,Col2 INT, Col3 INT)" + + "USING column " + + "options " + + "(" + + "BUCKETS '1'," + + "REDUNDANCY '0')") + Assert.fail( + "Should have thrown an exception as rowstore does not support column tables") + } catch { + case sqe: SQLException => + if ("42X01" != sqe.getSQLState) { + throw sqe + } + } + } finally { + s.execute("DROP TABLE IF EXISTS T1") + } + } +} diff --git a/cluster/src/dunit/scala/io/snappydata/cluster/SnappyTableStatsProviderDUnitTest.scala b/cluster/src/dunit/scala/io/snappydata/cluster/SnappyTableStatsProviderDUnitTest.scala new file mode 100644 index 0000000000..b6a00b7548 --- /dev/null +++ b/cluster/src/dunit/scala/io/snappydata/cluster/SnappyTableStatsProviderDUnitTest.scala @@ -0,0 +1,314 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ + +package io.snappydata.cluster + +import java.util.Properties + +import scala.collection.JavaConverters._ + +import com.gemstone.gemfire.internal.cache.{CachedDeserializableFactory, DiskEntry, DistributedRegion, PartitionedRegion, RegionEntry} +import com.gemstone.gemfire.management.ManagementService +import com.gemstone.gemfire.management.internal.SystemManagementService +import com.pivotal.gemfirexd.internal.engine.Misc +import com.pivotal.gemfirexd.internal.engine.ui.SnappyRegionStats +import com.pivotal.gemfirexd.tools.sizer.GemFireXDInstrumentation +import io.snappydata.test.dunit.SerializableRunnable +import io.snappydata.{SnappyEmbeddedTableStatsProviderService, SnappyTableStatsProviderService} + +import org.apache.spark.sql.collection.Utils +import org.apache.spark.sql.execution.columnar.impl.ColumnFormatRelation +import org.apache.spark.sql.{SaveMode, SnappyContext} + +class SnappyTableStatsProviderDUnitTest(s: String) extends ClusterManagerTestBase(s) { + + val table = "test.test_table" + + override def afterClass(): Unit = { + ClusterManagerTestBase.stopSpark() + super.afterClass() + } + def nodeShutDown(): Unit = { + ClusterManagerTestBase.stopSpark() + vm2.invoke(classOf[ClusterManagerTestBase], "stopAny") + vm1.invoke(classOf[ClusterManagerTestBase], "stopAny") + vm0.invoke(classOf[ClusterManagerTestBase], "stopAny") + } + + def newContext(): SnappyContext = { + val snc = SnappyContext(sc).newSession() + io.snappydata.Property.ColumnBatchSize.set(snc.sessionState.conf, "5120") + snc + } + + def testVerifyTableStats(): Unit = { + val snc = newContext() + + createTable(snc, table, "row") + SnappyTableStatsProviderDUnitTest.verifyResults(snc, table, "R") + snc.dropTable(table) + + createTable(snc, table, "row", Map("PERSISTENCE" -> "none")) + SnappyTableStatsProviderDUnitTest.verifyResults(snc, table, "R") + snc.dropTable(table) + + + createTable(snc, table, "row", Map("PARTITION_BY" -> "col1")) + SnappyTableStatsProviderDUnitTest.verifyResults(snc, table, "P") + snc.dropTable(table) + + + createTable(snc, table, "row", Map("PARTITION_BY" -> "col1", "PERSISTENCE" -> "sync")) + SnappyTableStatsProviderDUnitTest.verifyResults(snc, table, "P") + snc.dropTable(table) + + createTable(snc, table, "column") + SnappyTableStatsProviderDUnitTest.verifyResults(snc, table) + snc.dropTable(table) + + + createTable(snc, table, "column", Map("BUCKETS" -> "2", "PARTITION_BY" -> "col1")) + SnappyTableStatsProviderDUnitTest.verifyResults(snc, table) + snc.dropTable(table) + + createTable(snc, table, "column", Map("PARTITION_BY" -> "col1", "PERSISTENCE" -> "sync")) + SnappyTableStatsProviderDUnitTest.verifyResults(snc, table) + snc.dropTable(table) + + createTable(snc, table, "column", Map("BUCKETS" -> "2", + "PARTITION_BY" -> "col1", "PERSISTENT" -> "sync")) + SnappyTableStatsProviderDUnitTest.verifyResults(snc, table) + snc.dropTable(table) + } + + def testVerifyTableStatsEvictionAndHA(): Unit = { + val props = bootProps + val port = ClusterManagerTestBase.locPort + + val restartServer = new SerializableRunnable() { + override def run(): Unit = ClusterManagerTestBase.startSnappyServer(port, props) + } + + val snc = newContext() + + createTable(snc, table, "column", Map("BUCKETS" -> "6" + , "PERSISTENT" -> "sync")) + + SnappyTableStatsProviderDUnitTest.verifyResults(snc, table) + + vm1.invoke(classOf[ClusterManagerTestBase], "stopAny") + vm1.invoke(restartServer) + + SnappyTableStatsProviderDUnitTest.verifyResults(snc, table) + + snc.dropTable(table) + } + + def testHeapEvictionHA(): Unit = { + + var props = bootProps.clone().asInstanceOf[java.util.Properties] + val port = ClusterManagerTestBase.locPort + + props.setProperty("eviction-heap-percentage", "20") + + def restartServer(props: Properties): SerializableRunnable = new SerializableRunnable() { + override def run(): Unit = ClusterManagerTestBase.startSnappyServer(port, props) + } + + val snc = newContext() + val expectedRowCount = 1888622 + + vm1.invoke(classOf[ClusterManagerTestBase], "stopAny") + vm2.invoke(classOf[ClusterManagerTestBase], "stopAny") + + vm1.invoke(restartServer(props)) + vm2.invoke(restartServer(props)) + + val airlineDataFrame = snc.read.load(getClass.getResource("/2015.parquet").getPath) + snc.createTable(table, "column", airlineDataFrame.schema, Map("PERSISTENT" -> "async")) + airlineDataFrame.write.format("column").mode(SaveMode.Append).saveAsTable(table) + SnappyTableStatsProviderDUnitTest.verifyResults(snc, table, "C", expectedRowCount) + + vm1.invoke(classOf[ClusterManagerTestBase], "stopAny") + vm2.invoke(classOf[ClusterManagerTestBase], "stopAny") + + props = bootProps + + vm1.invoke(restartServer(props)) + vm2.invoke(restartServer(props)) + + SnappyTableStatsProviderDUnitTest.verifyResults(snc, table, "C", expectedRowCount) + snc.dropTable(table, true) + } + + + def createTable(snc: SnappyContext, tableName: String, + tableType: String, props: Map[String, String] = Map.empty): Unit = { + val data = for (i <- 1 to 7000) yield (Seq(i, (i + 1), (i + 2))) + val rdd = snc.sparkContext.parallelize(data.toSeq, 8).map(s => + new io.snappydata.externalstore.Data(s(0), s(1), s(2))) + val dataDF = snc.createDataFrame(rdd) + snc.createTable(tableName, tableType, dataDF.schema, props) + dataDF.write.format(tableType).mode(SaveMode.Append).saveAsTable(tableName) + } +} + + +object SnappyTableStatsProviderDUnitTest { + + def getPartitionedRegionStats(tableName: String, isColumnTable: Boolean): + SnappyRegionStats = { + var result = new SnappyRegionStats(tableName) + if (isColumnTable) { + result.setColumnTable(true) + val columnBatchTableName = ColumnFormatRelation.columnBatchTableName(tableName) + result = getDetailsForPR(columnBatchTableName, true, result) + } + getDetailsForPR(tableName, false, result) + } + + def getDetailsForPR(table: String, isColumnBatchTable: Boolean, + stats: SnappyRegionStats): SnappyRegionStats = { + val region = Misc.getRegionForTable(table.toUpperCase, true).asInstanceOf[PartitionedRegion] + val managementService = ManagementService.getManagementService(Misc.getGemFireCache). + asInstanceOf[SystemManagementService] + val regionBean = managementService.getLocalRegionMBean(region.getFullPath) + val sizer = GemFireXDInstrumentation.getInstance() + var entryOverhead = 0L + var entryCount = 0L + val (memSize, totalSize) = region.getDataStore.getAllLocalBucketRegions.asScala + .foldLeft(0L -> 0L) { case ((msize, tsize), br) => + val overhead = br.estimateMemoryOverhead(sizer) + if (entryOverhead == 0) { + val iter = br.entries.regionEntries().iterator() + if (iter.hasNext) { + val re = iter.next() + entryOverhead = sizer.sizeof(re) + (re match { + case de: DiskEntry => sizer.sizeof(de.getDiskId) + case _ => 0 + }) + } + } + entryCount += br.entryCount() + (msize + br.getSizeInMemory + overhead, tsize + br.getTotalBytes + overhead) + } + stats.setReplicatedTable(false) + stats.setBucketCount(region.getTotalNumberOfBuckets) + val size = if (isColumnBatchTable) regionBean.getRowsInColumnBatches + else regionBean.getEntryCount + stats.setRowCount(stats.getRowCount + size) + entryOverhead *= entryCount + stats.setSizeInMemory(stats.getSizeInMemory + memSize + entryOverhead) + stats.setTotalSize(stats.getTotalSize + totalSize + entryOverhead) + stats.setSizeSpillToDisk(stats.getTotalSize - stats.getSizeInMemory) + stats + } + + def getReplicatedRegionStats(tableName: String): SnappyRegionStats = { + val region = Misc.getRegionForTable(tableName.toUpperCase, true) + .asInstanceOf[DistributedRegion] + val result = new SnappyRegionStats(tableName) + val managementService = + ManagementService.getManagementService(Misc.getGemFireCache) + .asInstanceOf[SystemManagementService] + val sizer = GemFireXDInstrumentation.getInstance() + + def getReplicatedEntrySize(re: RegionEntry): Long = { + var size = 0L + val key = re.getRawKey + if (key ne null) { + size = CachedDeserializableFactory.calcMemSize(key) + } + size + CachedDeserializableFactory.calcMemSize(re._getValue()) + } + + var totalSize = region.estimateMemoryOverhead(sizer) + + region.getBestLocalIterator(true).asScala + .foldLeft(0L)(_ + getReplicatedEntrySize(_)) + val regionBean = managementService.getLocalRegionMBean(region.getFullPath) + result.setReplicatedTable(true) + result.setColumnTable(false) + result.setBucketCount(1) + result.setRowCount(regionBean.getEntryCount) + val overhead = region.getBestLocalIterator(true).next() match { + case de: DiskEntry => sizer.sizeof(de) + sizer.sizeof(de.getDiskId) + case re => sizer.sizeof(re) + } + totalSize += overhead * result.getRowCount + result.setSizeInMemory(totalSize) + result.setTotalSize(totalSize) + result.setSizeSpillToDisk(0) + result + } + + def getExpectedResult(snc: SnappyContext, tableName: String, + isReplicatedTable: Boolean = false, isColumnTable: Boolean = false): + SnappyRegionStats = { + def aggregateResults(left: SnappyRegionStats, + right: SnappyRegionStats): + SnappyRegionStats = { + left.getCombinedStats(right) + } + + val expected = Utils.mapExecutors[RegionStat](snc.sparkContext, () => { + val result = if (isReplicatedTable) getReplicatedRegionStats(tableName) + else getPartitionedRegionStats(tableName, isColumnTable) + Iterator[RegionStat](convertToSerializableForm(result)) + }) + + expected.map(getRegionStat).reduce(aggregateResults) + + } + + def convertToSerializableForm(stat: SnappyRegionStats): RegionStat = { + RegionStat(stat.getTableName, stat.getTotalSize, stat.getSizeInMemory, + stat.getRowCount, stat.isColumnTable, stat.isReplicatedTable, stat.getBucketCount) + } + + def getRegionStat(stat: RegionStat): SnappyRegionStats = { + new SnappyRegionStats(stat.tableName, stat.totalSize, + stat.memSize, stat.rowCount, stat.isColumnType, stat.isReplicated, stat.bucketCount) + } + + + def verifyResults(snc: SnappyContext, table: String, + tableType: String = "C", expectedRowCount: Int = 7000): Unit = { + SnappyEmbeddedTableStatsProviderService.publishColumnTableRowCountStats() + val isColumnTable = tableType.equals("C") + val isReplicatedTable = tableType.equals("R") + def expected = SnappyTableStatsProviderDUnitTest.getExpectedResult(snc, table, + isReplicatedTable, isColumnTable) + def actual = SnappyTableStatsProviderService.getService. + getAggregatedStatsOnDemand._1(table.toUpperCase) + + assert(actual.getTableName.toLowerCase == expected.getTableName) + assert(actual.isColumnTable == expected.isColumnTable, + s"Actual=${actual.isColumnTable} expected=${expected.isColumnTable} for $table") + + ClusterManagerTestBase.waitForCriterion(actual.getSizeInMemory == expected.getSizeInMemory + && actual.getSizeInMemory == expected.getSizeInMemory + && actual.getRowCount == expected.getRowCount, + s"Expected Size ${expected.getSizeInMemory} Size ${actual.getSizeInMemory} \n" + + s"Expected Total Size ${expected.getTotalSize} Total Size ${actual.getTotalSize} \n" + + s"Expected Count ${expected.getRowCount} Count ${actual.getRowCount} \n", + 20000, 1000, true) + } +} + +case class RegionStat(tableName: String, totalSize: Long, + memSize: Long, rowCount: Long, isColumnType: Boolean, + isReplicated: Boolean, bucketCount: Int) diff --git a/cluster/src/dunit/scala/io/snappydata/cluster/SplitSnappyClusterDUnitTest.scala b/cluster/src/dunit/scala/io/snappydata/cluster/SplitSnappyClusterDUnitTest.scala new file mode 100644 index 0000000000..d30d0f1715 --- /dev/null +++ b/cluster/src/dunit/scala/io/snappydata/cluster/SplitSnappyClusterDUnitTest.scala @@ -0,0 +1,1453 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ +package io.snappydata.cluster + +import java.io.PrintWriter +import java.net.InetAddress +import java.nio.file.{Files, Paths} +import java.util.Properties + +import scala.concurrent.duration.Duration +import scala.concurrent.{Await, Future} +import scala.language.postfixOps +import scala.reflect.io.Path +import scala.util.{Failure, Success, Try} + +import com.gemstone.gemfire.internal.cache.PartitionedRegion +import com.pivotal.gemfirexd.internal.engine.Misc +import io.snappydata.core.{TestData, TestData2} +import io.snappydata.test.dunit.{AvailablePortHelper, SerializableRunnable} +import io.snappydata.util.TestUtils +import io.snappydata.{ColumnUpdateDeleteTests, Property, SnappyTableStatsProviderService} +import org.junit.Assert + +import org.apache.spark.rdd.RDD +import org.apache.spark.sql._ +import org.apache.spark.sql.catalyst.encoders.{ExpressionEncoder, RowEncoder} +import org.apache.spark.sql.execution.CatalogStaleException +import org.apache.spark.sql.execution.columnar.impl.ColumnFormatRelation +import org.apache.spark.sql.kafka010.KafkaTestUtils +import org.apache.spark.sql.store.{SnappyJoinSuite, StoreUtils} +import org.apache.spark.sql.streaming.ProcessingTime +import org.apache.spark.sql.types.{DateType, StringType, StructField, StructType} +import org.apache.spark.sql.udf.UserDefinedFunctionsDUnitTest +import org.apache.spark.{Logging, SparkConf, SparkContext} + +/** + * Basic tests for non-embedded mode connections to an embedded cluster. + */ +class SplitSnappyClusterDUnitTest(s: String) + extends ClusterManagerTestBase(s) with SplitClusterDUnitTestBase with Serializable { + + override val locatorNetPort: Int = testObject.locatorNetPort + + override val stopNetServersInTearDown = false + + val currentLocatorPort: Int = ClusterManagerTestBase.locPort + + override protected val sparkProductDir: String = + testObject.getEnvironmentVariable("SNAPPY_HOME") + + override def beforeClass(): Unit = { + // stop any existing SnappyContext to enable applying thrift-server properties + val sc = SnappyContext.globalSparkContext + if ((sc ne null) && !sc.isStopped) { + ClusterManagerTestBase.stopSpark() + } + super.beforeClass() + startNetworkServers() + vm3.invoke(classOf[ClusterManagerTestBase], "startSparkCluster", sparkProductDir) + } + + override def afterClass(): Unit = { + Array(vm2, vm1, vm0).foreach(_.invoke(getClass, "stopNetworkServers")) + ClusterManagerTestBase.stopNetworkServers() + vm3.invoke(classOf[ClusterManagerTestBase], "stopSparkCluster", sparkProductDir) + super.afterClass() + } + + def testCreateTablesFromOtherTables(): Unit = { + // stop a network server to test remote fetch + vm0.invoke(classOf[ClusterManagerTestBase], "stopNetworkServers") + vm3.invoke(getClass, "createTablesFromOtherTablesTest", + startArgs :+ + Int.box(locatorClientPort)) + } + + override protected def locatorClientPort: Int = locatorNetPort + + override protected def startNetworkServers(): Unit = { + startNetworkServersOnAllVMs() + } + + override protected def testObject = SplitSnappyClusterDUnitTest + + def testCollocatedJoinInSplitModeRowTable(): Unit = { + testObject.createRowTableForCollocatedJoin() + vm3.invoke(getClass, "checkCollocatedJoins", startArgs :+ + "PR_TABLE1" :+ "PR_TABLE2" :+ Int.box(locatorClientPort)) + } + + def testCollocatedJoinInSplitModeColumnTable(): Unit = { + testObject.createColumnTableForCollocatedJoin() + vm3.invoke(getClass, "checkCollocatedJoins", startArgs :+ + "PR_TABLE3" :+ "PR_TABLE4" :+ + Int.box(locatorClientPort)) + } + + def testColumnTableStatsInSplitMode(): Unit = { + vm3.invoke(getClass, "checkStatsForSplitMode", startArgs :+ + "1" :+ Int.box(locatorClientPort)) + vm3.invoke(getClass, "checkStatsForSplitMode", startArgs :+ + "8" :+ Int.box(locatorClientPort)) + } + + def testBatchSize(): Unit = { + val snc = SnappyContext(sc) + val tblBatchSizeSmall = "APP.tblBatchSizeSmall_embedded" + val tblSizeBig = "APP.tblBatchSizeBig_embedded" + val tblBatchSizeBig_split = "APP.tblBatchSizeBig_split" + val tblBatchSizeSmall_split = "APP.tblBatchSizeSmall_split" + + snc.sql(s"drop table if exists $tblBatchSizeSmall") + snc.sql(s"drop table if exists $tblSizeBig") + snc.sql(s"drop table if exists $tblBatchSizeBig_split") + snc.sql(s"drop table if exists $tblBatchSizeSmall_split") + + snc.sql(s"CREATE TABLE $tblBatchSizeSmall(Key1 INT ,Value STRING) " + + "USING column " + + "options " + + "(" + + "PARTITION_BY 'Key1'," + + "BUCKETS '8', COLUMN_BATCH_SIZE '200')") + + snc.sql(s"CREATE TABLE $tblSizeBig (Key1 INT ,Value STRING) " + + "USING column " + + "options " + + "(" + + "PARTITION_BY 'Key1'," + + "BUCKETS '8', COLUMN_BATCH_SIZE '200000')") + + val rdd = sc.parallelize( + (1 to 100000).map(i => TestData(i, i.toString))) + + implicit val encoder: Encoder[TestData] = Encoders.product[TestData] + val dataDF = snc.createDataset(rdd) + + dataDF.write.insertInto(tblBatchSizeSmall) + dataDF.write.insertInto(tblSizeBig) + + // StandAlone Spark Cluster Operations + vm3.invoke(getClass, "splitModeTableCreate", + startArgs :+ + Int.box(locatorClientPort)) + + assert(getShadowRegionSize(tblBatchSizeSmall) > 10, + s"Expected batches should be greater than " + + s"10 but are ${getShadowRegionSize(tblBatchSizeSmall)}") + assert(getShadowRegionSize(tblSizeBig) > 0, s"Expected batches should be greater than " + + s"0 but are ${getShadowRegionSize(tblSizeBig)}") + assert(getShadowRegionSize(tblSizeBig) < 10, s"Expected batches should be less than " + + s"10 but are ${getShadowRegionSize(tblSizeBig)}") + + assert(getShadowRegionSize(tblBatchSizeSmall_split) > 10, + s"Expected batches should be greater than " + + s"10 but are ${getShadowRegionSize(tblBatchSizeSmall_split)}") + + assert(getShadowRegionSize(tblBatchSizeBig_split) > 0, + s"Expected batches should be greater than " + + s"0 but are ${getShadowRegionSize(tblBatchSizeBig_split)}") + + assert(getShadowRegionSize(tblBatchSizeBig_split) < 10, + s"Expected batches should be less than " + + s"10 but are ${getShadowRegionSize(tblBatchSizeBig_split)}") + + logInfo("Test Completed Successfully") + } + + def getRegionSize(tbl: String): Long = { + Misc.getRegionForTable(tbl.toUpperCase, + true).asInstanceOf[PartitionedRegion].size() + + } + + def getShadowRegionSize(tbl: String): Long = { + // divide by three as 2 entries are for column and one is a base entry + Misc.getRegionForTable(ColumnFormatRelation. + columnBatchTableName(tbl).toUpperCase, + true).asInstanceOf[PartitionedRegion].size() / 3 + } + + def testColumnTableStatsInSplitModeWithHA(): Unit = { + vm3.invoke(getClass, "checkStatsForSplitMode", startArgs :+ + "1" :+ Int.box(locatorClientPort)) + val props = bootProps + val port = currentLocatorPort + + val restartServer = new SerializableRunnable() { + override def run(): Unit = ClusterManagerTestBase.startSnappyServer(port, props) + } + + vm0.invoke(classOf[ClusterManagerTestBase], "stopAny") + val stats = SnappyTableStatsProviderService.getService. + getAggregatedStatsOnDemand._1("APP.SNAPPYTABLE") + + Assert.assertEquals(10000100, stats.getRowCount) + vm0.invoke(restartServer) + + vm1.invoke(classOf[ClusterManagerTestBase], "stopAny") + val stats1 = SnappyTableStatsProviderService.getService. + getAggregatedStatsOnDemand._1("APP.SNAPPYTABLE") + Assert.assertEquals(10000100, stats1.getRowCount) + vm1.invoke(restartServer) + + // Test using using 5 buckets + vm3.invoke(getClass, "checkStatsForSplitMode", startArgs :+ + "8" :+ Int.box(locatorClientPort)) + vm0.invoke(classOf[ClusterManagerTestBase], "stopAny") + val stats2 = SnappyTableStatsProviderService.getService. + getAggregatedStatsOnDemand._1("APP.SNAPPYTABLE") + Assert.assertEquals(10000100, stats2.getRowCount) + val snc = SnappyContext(sc) + snc.sql("insert into snappyTable values(1,'Test')") + SnappyTableStatsProviderService.getService. + getAggregatedStatsOnDemand._1("APP.SNAPPYTABLE") + vm0.invoke(restartServer) + } + + def testCTAS(): Unit = { + val snc = SnappyContext(sc) + // StandAlone Spark Cluster Operations + vm3.invoke(getClass, "splitModeCreateTableUsingCTAS", + startArgs :+ + Int.box(locatorClientPort)) + + val count = snc.sql("select * from customer").count() + assert(count == 750, s"Expected 750 rows. Actual rows = $count") + + snc.sql("DROP TABLE CUSTOMER_STAGING") + snc.sql("DROP TABLE CUSTOMER") + + val count2 = snc.sql("select * from customer_2").count() + assert(count2 == 750, s"Expected 750 rows. Actual rows = $count2") + snc.sql("DROP TABLE CUSTOMER_2") + } + + def testUDF(): Unit = { + doTestUDF(skewNetworkServers) + } + + def doTestUDF(skewServerDistribution: Boolean): Unit = { + testObject.createUDFInEmbeddedMode() + + // StandAlone Spark Cluster Operations + vm3.invoke(getClass, "createUDFInSplitMode", + startArgs :+ Int.box(locatorClientPort)) + + testObject.verifyUDFInEmbeddedMode() + + // StandAlone Spark Cluster Operations + vm3.invoke(getClass, "verifyUDFInSplitMode", + startArgs :+ Int.box(locatorClientPort)) + } + + def testDeployPackageNameFormat(): Unit = { + val sns = new SnappySession(sc) + try { + val jarPath = s"$sparkProductDir/jars/hadoop-client-2.7.7.jar" + sns.sql("deploy package mongo_spark 'org.mongodb.spark:mongo-spark-connector_2.11:2.2.2'") + sns.sql("deploy package mongo-spark_v1.0 'org.mongodb.spark:mongo-spark-" + + "connector_2.11:2.2.2'") + sns.sql("deploy package app.mongo-spark_v1.1 'org.mongodb.spark:mongo-spark-" + + "connector_2.11:2.2.2'") + sns.sql("deploy package testsch.mongo-spark_v1.2 'org.mongodb.spark:mongo-spark" + + "-connector_2.11:2.2.2'") + sns.sql( + s"""deploy package "testsch"."mongo-spark_v1.3" 'org.mongodb.spark:mongo""" + + "-spark-connector_2.11:2.2.2'") + sns.sql( + s"""deploy package testsch."mongo-spark_v1.4" 'org.mongodb.spark:mongo""" + + "-spark-connector_2.11:2.2.2'") + sns.sql( + s"""deploy package "testsch".mongo-spark_v1.5 'org.mongodb.spark:mongo""" + + "-spark-connector_2.11:2.2.2'") + assert(sns.sql("list packages").count() == 7) + + sns.sql(s"""deploy jar avro-v_1.0 '$jarPath'""") + sns.sql(s"""deploy jar app.avro-v_1.1 '$jarPath'""") + sns.sql(s"""deploy jar testsch.avro-v_1.2 '$jarPath'""") + sns.sql(s"""deploy jar "app".avro-v_1.3 '$jarPath'""") + sns.sql(s"""deploy jar "testsch"."avro-v_1.4" '$jarPath'""") + sns.sql(s"""deploy jar testsch."avro-v_1.5" '$jarPath'""") + assert(sns.sql("list packages").count() == 13) + } + finally { + sns.sql("undeploy mongo_spark") + sns.sql("undeploy mongo-spark_v1.0") + sns.sql("undeploy app.mongo-spark_v1.1") + sns.sql("undeploy testsch.mongo-spark_v1.2") + sns.sql(s"""undeploy "testsch"."mongo-spark_v1.3" """) + sns.sql(s"""undeploy testsch."mongo-spark_v1.4" """) + sns.sql(s"""undeploy "testsch".mongo-spark_v1.5 """) + + sns.sql("undeploy avro-v_1.0 ") + sns.sql("undeploy app.avro-v_1.1") + sns.sql("undeploy testsch.avro-v_1.2") + sns.sql(s"""undeploy "app".avro-v_1.3 """) + sns.sql(s"""undeploy "testsch"."avro-v_1.4" """) + sns.sql(s"""undeploy testsch."avro-v_1.5" """) + assert(sns.sql("list packages").count() == 0) + } + import org.scalatest.Assertions._ + val thrown = intercept[Exception] { + sns.sql("deploy package \"testsch\".mongo-###park_v1.5" + + " 'org.mongodb.spark:mongo-spark-connector_2.11:2.2.2") + } + // scalastyle:off + assert(thrown.getMessage === s"""Invalid input \"mongo-#\", expected packageIdentifierPart or stringLiteral (line 1, column 26):\ndeploy package \"testsch\".mongo-###park_v1.5 'org.mongodb.spark:mongo-spark-connector_2.11:2.2.2\n ^;""") + // scalastyle:on + } + + def testDeployPackageDuplicateName(): Unit = { + val sns = new SnappySession(sc) + try { + sns.sql("deploy package mongo-spark_v.1.5" + + " 'org.mongodb.spark:mongo-spark-connector_2.11:2.2.2'") + + sns.sql("deploy package mongo-spark_v.1.5_dup" + + " 'org.mongodb.spark:mongo-spark-connector_2.11:2.2.2'") + + assert(sns.sql("list packages").count() == 2) + + sns.sql("deploy package akka-v1 'com.typesafe.akka:akka-actor_2.11:2.5.8'") + + Try(sns.sql("deploy package akka-v1 'com.datastax.spark:" + + "spark-cassandra-connector_2.11:2.3.2'")) match { + case Success(_) => throw new AssertionError( + "Deploy command should have failed because of the duplicate alias.") + case Failure(error) => assert(error.getMessage == "Name 'akka-v1' specified in context" + + " 'of deploying jars/packages' is not unique.") + } + assert(sns.sql("list packages").count() == 3) + } + finally { + sns.sql("undeploy mongo-spark_v.1.5") + sns.sql("undeploy mongo-spark_v.1.5_dup") + sns.sql("undeploy akka-v1") + assert(sns.sql("list packages").count() == 0) + } + } + + override def testUpdateDeleteOnColumnTables(): Unit = { + // check in embedded mode (connector mode tested in SplitClusterDUnitTest) + val session = new SnappySession(sc) + // using random bucket assignment for this test to check remote iteration + // added in SNAP-2012 + StoreUtils.TEST_RANDOM_BUCKETID_ASSIGNMENT = true + try { + ColumnUpdateDeleteTests.testBasicUpdate(session) + ColumnUpdateDeleteTests.testDeltaStats(session) + ColumnUpdateDeleteTests.testBasicDelete(session) + ColumnUpdateDeleteTests.testSNAP1925(session) + ColumnUpdateDeleteTests.testSNAP1926(session) + ColumnUpdateDeleteTests.testConcurrentOps(session) + ColumnUpdateDeleteTests.testSNAP2124(session) + } finally { + StoreUtils.TEST_RANDOM_BUCKETID_ASSIGNMENT = false + } + } + + def testStaleCatalog(): Unit = { + + val snc = SnappyContext(sc) + snc.sql(s"CREATE TABLE T5(COL1 STRING, COL2 STRING) USING column OPTIONS" + + s" (key_columns 'col1', PARTITION_BY 'COL1', COLUMN_MAX_DELTA_ROWS '1')") + + import scala.concurrent.ExecutionContext.Implicits.global + val future = Future { + vm3.invoke(getClass, "doTestStaleCatalog", startArgs :+ Int.box(locatorClientPort)) + } + + try { + // wait till the smart connector job perform at-least one putInto operation + var count = 0 + while (snc.table("T5").count() == 0 && count < 10) { + Thread.sleep(4000) + count += 1 + } + assert(count != 10, "Smart connector application not performing putInto as expected.") + + // perform DDL + snc.sql(s"CREATE TABLE T6(COL1 STRING, COL2 STRING) " + + s"USING column OPTIONS (PARTITION_BY 'COL1', COLUMN_MAX_DELTA_ROWS '1')") + + Await.result(future, scala.concurrent.duration.Duration.apply(3, "min")) + } finally { + snc.sql("drop table if exists T6") + snc.sql("drop table if exists T5") + } + } + + def testStaleCatalogRetryForStreamingSink(): Unit = { + val snc = SnappyContext(sc) + import scala.concurrent.ExecutionContext.Implicits.global + val testTempDirectory = "/tmp/SplitSnappyClusterDUnitTest" + + def cleanUp(): Unit = { + snc.sql("drop table if exists SYNC_TABLE") + snc.sql("drop table if exists USERS") + Path(testTempDirectory).deleteRecursively() + } + + cleanUp() + val future = Future { + vm3.invoke(getClass, "doTestStaleCatalogRetryForStreamingSink", + startArgs :+ Int.box(locatorClientPort) :+ testTempDirectory) + } + try { + var attempts = 0 + while (!Files.exists(Paths.get(testTempDirectory, "file0")) && attempts < 15) { + Thread.sleep(4000) + attempts += 1 + } + assert(attempts < 14, "No data ingested by streaming application.") + + // perform DDL leading to stale catalog in smart connector application + snc.sql(s"CREATE TABLE SYNC_TABLE(COL1 STRING) " + s"USING column") + + new PrintWriter(s"$testTempDirectory/file1") { + write("dummydata") + close() + } + Await.result(future, Duration(2, "min")) + } finally { + cleanUp() + } + } + + def testSNAP3024(): Unit = { + val snc = SnappyContext(sc) + snc.sql(s"CREATE TABLE T5(COL1 STRING, COL2 STRING) USING column OPTIONS" + + s" (key_columns 'col1', PARTITION_BY 'COL1', COLUMN_MAX_DELTA_ROWS '1')") + snc.sql("insert into t5 values('1', '1')") + snc.sql("insert into t5 values('2', '2')") + snc.sql("insert into t5 values('3', '3')") + + import scala.concurrent.ExecutionContext.Implicits.global + val future = Future { + vm3.invoke(getClass, "doTestStaleCatalogForSNAP3024", startArgs :+ Int.box(locatorClientPort)) + } + + try { + // wait till the smart connector job perform at-least one putInto operation + var count = 0 + while (snc.table("T5").count() == 3 && count < 10) { + Thread.sleep(4000) + count += 1 + } + assert(count != 10, "Smart connector application not performing putInto as expected.") + + // perform DDL + snc.sql(s"CREATE TABLE T6(COL1 STRING, COL2 STRING) " + + s"USING column OPTIONS (PARTITION_BY 'COL1', COLUMN_MAX_DELTA_ROWS '1')") + + Await.result(future, scala.concurrent.duration.Duration.apply(3, "min")) + } finally { + snc.sql("drop table if exists T6") + snc.sql("drop table if exists T5") + } + } + + def testSmartConnectorAfterBucketRebalance(): Unit = { + val snc = SnappyContext(sc) + snc.sql(s"CREATE TABLE T5(COL1 STRING, COL2 STRING) USING column OPTIONS" + + s" (key_columns 'col1', PARTITION_BY 'COL1', COLUMN_MAX_DELTA_ROWS '1')") + snc.sql("insert into t5 values('1', '1')") + snc.sql("insert into t5 values('2', '2')") + snc.sql("insert into t5 values('3', '3')") + + import scala.concurrent.ExecutionContext.Implicits.global + val future = Future { + vm3.invoke(getClass, + "doTestSmartConnectorForBucketRebalance", startArgs :+ Int.box(locatorClientPort)) + } + + try { + // wait till the smart connector job perform at-least one putInto operation + var count = 0 + while (snc.table("T5").count() == 3 && count < 10) { + Thread.sleep(4000) + count += 1 + } + assert(count != 10, "Smart connector application not performing putInto as expected.") + + // rebalance the buckets + snc.sql(s"CALL SYS.REBALANCE_ALL_BUCKETS()") + + Await.result(future, scala.concurrent.duration.Duration.apply(3, "min")) + } finally { + snc.sql("drop table if exists T6") + snc.sql("drop table if exists T5") + } + } + + def testInsertIntoRowTableAfterStaleCatalog(): Unit = { + insertDataAfterStaleCatalog("ROW") + } + + def testInsertIntoColumnTableAfterStaleCatalog(): Unit = { + insertDataAfterStaleCatalog("COLUMN") + } + + private def insertDataAfterStaleCatalog(tableType: String) = { + val snc = SnappyContext(sc) + + logInfo(s"insertDataAfterStaleCatalog: invoked for $tableType table") + if (tableType == "COLUMN") { + snc.sql(s"CREATE TABLE T5(COL1 STRING, COL2 STRING) USING column OPTIONS" + + s" ( PARTITION_BY 'COL1', COLUMN_MAX_DELTA_ROWS '1')") + } else { + snc.sql(s"CREATE TABLE T5(COL1 STRING, COL2 STRING) USING row OPTIONS (partition_by 'col1')") + } + snc.sql("insert into t5 values('1', '1')") + snc.sql("insert into t5 values('2', '2')") + snc.sql("insert into t5 values('3', '3')") + + import scala.concurrent.ExecutionContext.Implicits.global + val future = Future { + vm3.invoke(getClass, "doTestInsertAfterStaleCatalog", + startArgs :+ Int.box(locatorClientPort)) + } + + try { + // wait till the smart connector job perform at-least one putInto operation + var count = 0 + while (snc.table("T5").count() == 3 && count < 10) { + Thread.sleep(4000) + count += 1 + } + assert(count != 10, "Smart connector application not performing insert as expected.") + + logInfo("testInsertQueryAfterStaleCatalog dropping table t5") + // drop the table and create a table with same name and different schema + // create a table with different schema + snc.sql("drop table t5") + if (tableType == "COLUMN") { + snc.sql(s"CREATE TABLE T5(COL1 DATE, COL2 DATE) USING column OPTIONS" + + s" ( PARTITION_BY 'COL1', COLUMN_MAX_DELTA_ROWS '1')") + } else { + snc.sql(s"CREATE TABLE T5(COL1 DATE, COL2 DATE) USING row OPTIONS (partition_by 'col1')") + } + + Await.result(future, scala.concurrent.duration.Duration.apply(5, "min")) + } finally { + snc.sql("drop table if exists T5") + } + } + + def testDeleteAfterStaleCatalog(): Unit = { + val snc = SnappyContext(sc) + + snc.sql(s"CREATE TABLE T6(COL1 STRING, COL2 STRING) USING column OPTIONS" + + s" (key_columns 'COL1', PARTITION_BY 'COL1', COLUMN_MAX_DELTA_ROWS '1')") + snc.sql("insert into t6 values('1', '1')") + snc.sql("insert into t6 values('2', '2')") + snc.sql("insert into t6 values('3', '3')") + + import scala.concurrent.ExecutionContext.Implicits.global + val future = Future { + vm3.invoke(getClass, "doTestDeleteAfterStaleCatalog", + startArgs :+ Int.box(locatorClientPort)) + } + + try { + // wait till the smart connector job perform at-least one putInto operation + var count = 0 + while (snc.table("T6").count() == 3 && count < 10) { + Thread.sleep(4000) + count += 1 + } + assert(count != 10, "Smart connector application not performing delete as expected.") + + logInfo("testDeleteAfterStaleCatalog dropping table t6") + snc.sql("drop table t6") + // create a table with different schema + snc.sql(s"CREATE TABLE T6(COL1 DATE, COL2 DATE) USING column OPTIONS" + + s" (key_columns 'COL1', PARTITION_BY 'COL1', COLUMN_MAX_DELTA_ROWS '1')") + + Await.result(future, scala.concurrent.duration.Duration.apply(5, "min")) + } finally { + snc.sql("drop table if exists T6") + } + } + + def testUpdateAfterStaleCatalog(): Unit = { + val snc = SnappyContext(sc) + + snc.sql(s"CREATE TABLE T7(COL1 STRING, COL2 STRING) USING column OPTIONS" + + s" (key_columns 'COL1', PARTITION_BY 'COL1', COLUMN_MAX_DELTA_ROWS '1')") + snc.sql("insert into t7 values('1', '1')") + snc.sql("insert into t7 values('2', '2')") + snc.sql("insert into t7 values('3', '3')") + + import scala.concurrent.ExecutionContext.Implicits.global + val future = Future { + vm3.invoke(getClass, "doTestUpdateAfterStaleCatalog", + startArgs :+ Int.box(locatorClientPort)) + } + + try { + // wait till the smart connector job perform at-least one putInto operation + var count = 0 + while (snc.table("T7").count() == 3 && count < 10) { + Thread.sleep(4000) + count += 1 + } + assert(count != 10, "Smart connector application not performing delete as expected.") + + snc.sql(s"CREATE TABLE T8(COL1 DATE, COL2 DATE) USING column OPTIONS" + + s" (key_columns 'COL1', PARTITION_BY 'COL1', COLUMN_MAX_DELTA_ROWS '1')") + + Await.result(future, scala.concurrent.duration.Duration.apply(5, "min")) + } finally { + snc.sql("drop table if exists T7") + snc.sql("drop table if exists T8") + } + } +} + +object SplitSnappyClusterDUnitTest + extends SplitClusterDUnitTestObject with Logging { + + private val locatorNetPort = AvailablePortHelper.getRandomAvailableTCPPort + + def sc: SparkContext = { + val context = ClusterManagerTestBase.sc + context + } + + def assertTableNotCachedInHiveCatalog(tableName: String): Unit = { + val session = new SnappySession(SnappyContext.globalSparkContext) + val catalog = session.sessionCatalog + try { + catalog.lookupRelation(session.tableIdentifier(tableName)) + assert(assertion = false, s"Table $tableName should not exist in the " + + s"cached Hive catalog") + } catch { + // expected exception + case _: org.apache.spark.sql.TableNotFoundException => + } + } + + override def createTablesAndInsertData(tableType: String): Unit = { + val snc = SnappyContext(sc) + + createTableUsingDataSourceAPI(snc, "embeddedModeTable1", tableType) + selectFromTable(snc, "embeddedModeTable1", 1005) + + createTableUsingDataSourceAPI(snc, "embeddedModeTable2", tableType) + selectFromTable(snc, "embeddedModeTable2", 1005) + + logInfo("Successful") + } + + override def createComplexTablesAndInsertData( + props: Map[String, String]): Unit = { + val snc = SnappyContext(sc) + + createComplexTableUsingDataSourceAPI(snc, "embeddedModeTable1", + "column", props) + selectFromTable(snc, "embeddedModeTable1", 1005) + + createComplexTableUsingDataSourceAPI(snc, "embeddedModeTable2", + "column", props) + selectFromTable(snc, "embeddedModeTable2", 1005) + + logInfo("Successful") + } + + def createUDFInEmbeddedMode(): Unit = { + val snc = SnappyContext(sc) + val rdd = sc.parallelize((1 to 5).map(i => OrderData(i, s"some $i", i))) + val refDf = snc.createDataFrame(rdd) + snc.sql("DROP TABLE IF EXISTS RR_TABLE") + snc.sql("DROP TABLE IF EXISTS COL_TABLE") + + snc.sql("CREATE TABLE RR_TABLE(OrderRef INT NOT NULL, description String, price BIGINT)") + snc.sql("CREATE TABLE COL_TABLE(OrderRef INT NOT NULL, " + + "description String, price LONG) using column options()") + + refDf.write.insertInto("RR_TABLE") + refDf.write.insertInto("COL_TABLE") + + // create a udf in embedded mode + val udfText: String = "public class IntegerUDF implements " + + " org.apache.spark.sql.api.java.UDF1 {" + + " @Override public Integer call(String s){ " + + " return 6; " + + "}" + + "}" + val file = UserDefinedFunctionsDUnitTest.createUDFClass("IntegerUDF", udfText) + val jar = UserDefinedFunctionsDUnitTest.createJarFile(Seq(file)) + snc.sql(s"CREATE FUNCTION APP.intudf_embeddedmode AS IntegerUDF " + + s"RETURNS Integer USING JAR " + + s"'$jar'") + val row = snc.sql("select intudf_embeddedmode(description) from col_table").collect() + row.foreach(r => assert(r(0) == 6)) + } + + def createUDFInSplitMode(locatorPort: Int, + prop: Properties, + locatorClientPort: Int): Unit = { + + val snc: SnappyContext = getSnappyContextForConnector(locatorClientPort) + + // create a udf in split mode + val udfText = "public class IntegerUDF2 implements org.apache.spark.sql.api.java.UDF1 {" + + " @Override public Integer call(String s){ " + + " return 8; " + + "}" + + "}" + + val file2 = UserDefinedFunctionsDUnitTest.createUDFClass("IntegerUDF2", udfText) + val jar = UserDefinedFunctionsDUnitTest.createJarFile(Seq(file2)) + snc.sql(s"CREATE FUNCTION APP.intudf_splitmode AS IntegerUDF2 " + + s"RETURNS Integer USING JAR " + + s"'$jar'") + val row2 = snc.sql("select intudf_splitmode(description) from col_table").collect() + row2.foreach(r => assert(r(0) == 8)) + + // use function created in embedded mode + val row = snc.sql("select intudf_embeddedmode(description) from col_table").collect() + row.foreach(r => assert(r(0) == 6)) + snc.sql("drop function APP.intudf_embeddedmode") + assert(snc.snappySession.sql(s"SHOW FUNCTIONS APP.intudf_embeddedmode").collect().length == 0) + } + + def verifyUDFInEmbeddedMode(): Unit = { + val snc = SnappyContext(sc) + // use function created in splitmode + val row2 = snc.sql("select intudf_splitmode(description) from col_table").collect() + row2.foreach(r => assert(r(0) == 8)) + snc.sql("drop function APP.intudf_splitmode") + assert(snc.snappySession.sql(s"SHOW FUNCTIONS APP.intudf_splitmode").collect().length == 0) + } + + def verifyUDFInSplitMode(locatorPort: Int, + prop: Properties, + locatorClientPort: Int): Unit = { + val snc: SnappyContext = getSnappyContextForConnector(locatorClientPort) + + // function that was dropped in embedded mode + try { + snc.sql("select intudf_splitmode(description) from col_table").collect() + } catch { + case e: AnalysisException if e.getMessage.contains("Undefined function") => // do nothing + } + assert(snc.snappySession.sql(s"SHOW FUNCTIONS APP.intudf_splitmode").collect().length == 0) + } + + override def verifySplitModeOperations(tableType: String, isComplex: Boolean, + props: Map[String, String]): Unit = { + // embeddedModeTable1 is dropped in split mode. recreate it + val snc = SnappyContext(sc) + if (isComplex) { + createComplexTableUsingDataSourceAPI(snc, "embeddedModeTable1", + tableType, props) + } else { + createTableUsingDataSourceAPI(snc, "embeddedModeTable1", + tableType, props) + } + selectFromTable(snc, "embeddedModeTable1", 1005) + + snc.dropTable("embeddedModeTable1", ifExists = true) + + // embeddedModeTable2 still exists drop it + snc.dropTable("embeddedModeTable2", ifExists = true) + + // read data from splitModeTable1 + selectFromTable(snc, "splitModeTable1", 1005) + + // drop table created in split mode + snc.dropTable("splitModeTable1", ifExists = true) + + // recreate the dropped table + var expected: Seq[ComplexData] = Nil + if (isComplex) { + expected = createComplexTableUsingDataSourceAPI(snc, "splitModeTable1", + tableType, props) + } else { + createTableUsingDataSourceAPI(snc, "splitModeTable1", + tableType, props) + } + selectFromTable(snc, "splitModeTable1", 1005, expected) + snc.dropTable("splitModeTable1", ifExists = true) + + logInfo("Successful") + } + + def createRowTableForCollocatedJoin(): Unit = { + + val snc = SnappyContext(sc) + val dimension1 = sc.parallelize( + (1 to 1000).map(i => TestData2(i, i.toString, i % 10 + 1))) + val refDf = snc.createDataFrame(dimension1) + snc.sql("DROP TABLE IF EXISTS PR_TABLE1") + + snc.sql("CREATE TABLE PR_TABLE1(OrderId INT NOT NULL,description String, " + + "OrderRef INT) USING row " + + "options (" + + "PARTITION_BY 'OrderId, OrderRef')") + + refDf.write.insertInto("PR_TABLE1") + + snc.sql("DROP TABLE IF EXISTS PR_TABLE2") + + snc.sql("CREATE TABLE PR_TABLE2(OrderId INT NOT NULL,description String, " + + "OrderRef INT) USING row options (" + + "PARTITION_BY 'OrderId,OrderRef'," + + "COLOCATE_WITH 'PR_TABLE1')") + + val dimension2 = sc.parallelize( + (1 to 1000).map(i => TestData2(i, i.toString, i % 5 + 1))) + + val dimensionDf = snc.createDataFrame(dimension2) + dimensionDf.write.insertInto("PR_TABLE2") + + // force the stats to be populated + SnappyTableStatsProviderService.getService.getTableStatsFromService("APP.PR_TABLE1") + SnappyTableStatsProviderService.getService.getTableStatsFromService("APP.PR_TABLE2") + } + + def createColumnTableForCollocatedJoin(): Unit = { + + val snc = SnappyContext(sc) + val dimension1 = sc.parallelize( + (1 to 1000).map(i => TestData2(i, i.toString, i % 10 + 1))) + val refDf = snc.createDataFrame(dimension1) + snc.sql("DROP TABLE IF EXISTS PR_TABLE3") + + snc.sql("CREATE TABLE PR_TABLE3(OrderId INT, description String, " + + "OrderRef INT) USING column " + + "options (" + + "PARTITION_BY 'OrderId,OrderRef')") + + refDf.write.format("column").mode(SaveMode.Append).options(props) + .saveAsTable("PR_TABLE3") + + val countdf = snc.sql("select * from PR_TABLE3") + var count = countdf.count() + assert(count == 1000, s"Unexpected count = $count, expected 1000") + + snc.sql("DROP TABLE IF EXISTS PR_TABLE4") + + snc.sql("CREATE TABLE PR_TABLE4(OrderId INT ,description String, " + + "OrderRef INT) USING column options (" + + "PARTITION_BY 'OrderId,OrderRef'," + + "COLOCATE_WITH 'PR_TABLE3')") + + val dimension2 = sc.parallelize( + (1 to 1000).map(i => TestData2(i, i.toString, i % 5 + 1))) + + val dimensionDf = snc.createDataFrame(dimension2) + dimensionDf.write.insertInto("PR_TABLE4") + val countdf1 = snc.sql("select * from PR_TABLE4") + count = countdf1.count() + assert(count == 1000, s"Unexpected count = $count, expected 1000") + + // force the stats to be populated + SnappyTableStatsProviderService.getService.getTableStatsFromService("APP.PR_TABLE3") + SnappyTableStatsProviderService.getService.getTableStatsFromService("APP.PR_TABLE4") + } + + + def checkCollocatedJoins(locatorPort: Int, prop: Properties, + table1: String, table2: String, + locatorClientPort: Int): Unit = { + val snc: SnappyContext = getSnappyContextForConnector(locatorClientPort) + + val testJoins = new SnappyJoinSuite() + testJoins.partitionToPartitionJoinAssertions(snc, table1, table2) + + logInfo("Successful") + } + + /** + * Returns the SnappyContext for external(compute) Spark cluster connected to + * SnappyData cluster using the locator property + */ + override def getSnappyContextForConnector(locatorClientPort: Int, properties: Properties = null) + : SnappyContext = { + val hostName = InetAddress.getLocalHost.getHostName + // val connectionURL = "jdbc:snappydata://localhost:" + locatorClientPort + "/" + val connectionURL = s"localhost:$locatorClientPort" + logInfo(s"URL for connector is $connectionURL") + val conf = new SparkConf() + .setAppName("test Application") + .setMaster(s"spark://$hostName:7077") + .set("spark.executor.cores", TestUtils.defaultCores.toString) + .set("spark.executor.extraClassPath", + getEnvironmentVariable("SNAPPY_DIST_CLASSPATH")) + .set("spark.testing.reservedMemory", "0") + .set("spark.sql.autoBroadcastJoinThreshold", "-1") + .set("snappydata.connection", connectionURL) + .set("snapptdata.sql.planCaching", random.nextBoolean().toString) + + logInfo("Spark conf:" + conf.getAll.toString) + + val sc = SparkContext.getOrCreate(conf) + // sc.setLogLevel("DEBUG") + // Logger.getRootLogger.setLevel(Level.ALL) + // Logger.getLogger("org").setLevel(Level.DEBUG) + // Logger.getLogger("akka").setLevel(Level.DEBUG) + // val snc = SnappySession.getOrCreate(sc).sqlContext + val snc = SnappyContext(sc) + + val mode = SnappyContext.getClusterMode(snc.sparkContext) + mode match { + case ThinClientConnectorMode(_, _) => // expected + case _ => assert(assertion = false, "cluster mode is " + mode) + } + + snc + } + + def splitModeTableCreate(locatorPort: Int, + prop: Properties, + locatorClientPort: Int): Unit = { + val tblBatchSize200K = "tblBatchSizeBig_split" + + val tblBatchSize200 = "tblBatchSizeSmall_split" + + val snc = getSnappyContextForConnector(locatorClientPort) + snc.sql(s"CREATE TABLE $tblBatchSize200(Key1 INT ,Value STRING) " + + "USING column " + + "options " + + "(" + + "PARTITION_BY 'Key1'," + + "BUCKETS '8', COLUMN_BATCH_SIZE '200')") + + snc.sql(s"CREATE TABLE $tblBatchSize200K (Key1 INT ,Value STRING) " + + "USING column " + + "options " + + "(" + + "PARTITION_BY 'Key1'," + + "BUCKETS '8', COLUMN_BATCH_SIZE '200000')") + + val rdd = sc.parallelize( + (1 to 100000).map(i => TestData(i, i.toString))) + + implicit val encoder: Encoder[TestData] = Encoders.product[TestData] + val dataDF = snc.createDataset(rdd) + + dataDF.write.insertInto(tblBatchSize200) + dataDF.write.insertInto(tblBatchSize200K) + } + + def checkStatsForSplitMode(locatorPort: Int, prop: Properties, + buckets: String, + locatorClientPort: Int): Unit = { + val snc: SnappyContext = getSnappyContextForConnector(locatorClientPort) + snc.sql("drop table if exists snappyTable") + snc.sql(s"create table snappyTable (id bigint not null, sym varchar(10) not null) using " + + s"column options(redundancy '1', buckets '$buckets')") + val testDF = snc.range(10000000).selectExpr("id", "concat('sym', cast((id % 100) as varchar" + + "(10))) as sym") + testDF.write.insertInto("snappyTable") + // TODO: Fix this. wait added to make sure that stats are + // generated on the embedded cluster and the smart connector + // mode is able to get those. Ideally if table stats are not + // present connector should send the table name and + // get those from embedded side + var expectedRowCount = 10000000 + + def waitForStats: Boolean = { + SnappyTableStatsProviderService.getService. + getAggregatedStatsOnDemand._1.get("APP.SNAPPYTABLE") match { + case Some(stats) => stats.getRowCount == expectedRowCount + case _ => false + } + } + + ClusterManagerTestBase.waitForCriterion(waitForStats, + s"Expected stats row count to be $expectedRowCount", 30000, 500, throwOnTimeout = true) + for (i <- 1 to 100) { + snc.sql(s"insert into snappyTable values($i,'Test$i')") + } + expectedRowCount = 10000100 + ClusterManagerTestBase.waitForCriterion(waitForStats, + s"Expected stats row count to be $expectedRowCount", 30000, 500, throwOnTimeout = true) + logInfo("Successful") + } + + def splitModeCreateTableUsingCTAS(locatorPort: Int, + prop: Properties, + locatorClientPort: Int): Unit = { + val snc = getSnappyContextForConnector(locatorClientPort) + val customerFile: String = getClass.getResource("/customer.csv").getPath + + snc.sql(s"CREATE EXTERNAL TABLE CUSTOMER_STAGING ( " + + "C_CUSTKEY INTEGER NOT NULL," + + "C_NAME VARCHAR(25) NOT NULL," + + "C_ADDRESS VARCHAR(40) NOT NULL," + + "C_NATIONKEY INTEGER NOT NULL," + + "C_PHONE VARCHAR(15) NOT NULL," + + "C_ACCTBAL DECIMAL(15,2) NOT NULL," + + "C_MKTSEGMENT VARCHAR(10) NOT NULL," + + "C_COMMENT VARCHAR(117) NOT NULL)" + + s"USING csv OPTIONS (path '$customerFile', maxCharsPerColumn '4096')") + + snc.sql(s"CREATE TABLE CUSTOMER AS SELECT * FROM CUSTOMER_STAGING") + val count = snc.sql("select * from customer").count() + assert(count == 750, s"Expected 750 rows. Actual rows = $count") + + val customerWithHeadersFile: String = getClass.getResource("/customer_with_headers.csv").getPath + val customer_csv_DF = snc.read.option("header", "true") + .option("inferSchema", "true") + .option("maxCharsPerColumn", "4096").csv(customerWithHeadersFile) + val props1 = Map("PARTITION_BY" -> "C_CUSTKEY") + customer_csv_DF.write.format("column").mode("append").options(props1).saveAsTable("CUSTOMER_2") + val count2 = snc.sql("select * from customer_2").count() + assert(count2 == 750, s"Expected 750 rows. Actual rows = $count2") + + // also test temp table + snc.sql(s"CREATE TEMPORARY TABLE CUSTOMER_TEMP AS SELECT * FROM CUSTOMER_STAGING") + val count3 = snc.sql("select * from CUSTOMER_TEMP").count() + assert(count3 == 750, s"Expected 750 rows. Actual rows = $count3") + val catalog = snc.snappySession.sessionCatalog + assert(catalog.isTemporaryTable(snc.snappySession.tableIdentifier("CUSTOMER_TEMP"))) + snc.sql("DROP TABLE CUSTOMER_TEMP") + } + + override def dropAndCreateTablesInEmbeddedMode( + tableType: String): Unit = { + val snc = SnappyContext(sc) + val df = snc.table("APP.T1") + assert(df.schema.fields.length == 3) + snc.dropTable("APP.T1") + + snc.sql(s"CREATE TABLE T1(COL1 STRING, COL2 STRING) " + + s"USING $tableType OPTIONS (PARTITION_BY 'COL1', COLUMN_MAX_DELTA_ROWS '1')") + snc.sql("INSERT INTO T1 VALUES('AA', 'AA')") + snc.sql("INSERT INTO T1 VALUES('BB', 'BB')") + snc.sql("INSERT INTO T1 VALUES('CC', 'CC')") + snc.sql("INSERT INTO T1 VALUES('DD', 'DD')") + snc.sql("INSERT INTO T1 VALUES('EE', 'EE')") + + val rs = snc.sql("select * from t1").collect() + assert(rs.length == 5) + } + + var connectorSnc: SnappyContext = _ + + override def createTablesInSplitMode(locatorPort: Int, + prop: Properties, + locatorClientPort: Int, + tableType: String): Unit = { + if (connectorSnc == null || connectorSnc.sparkContext.isStopped) { + connectorSnc = getSnappyContextForConnector(locatorClientPort) + } + // row table + connectorSnc.sql(s"CREATE TABLE T1(C1 INT, C2 INT, C3 INT) " + + s"USING $tableType OPTIONS (PARTITION_BY 'C1', COLUMN_MAX_DELTA_ROWS '1')") + connectorSnc.sql("INSERT INTO T1 VALUES(1, 1, 1)") + connectorSnc.sql("INSERT INTO T1 VALUES(2, 2, 2)") + connectorSnc.sql("INSERT INTO T1 VALUES(3, 3, 3)") + connectorSnc.sql("INSERT INTO T1 VALUES(4, 4, 4)") + connectorSnc.sql("INSERT INTO T1 VALUES(5, 5, 5)") + + val rs = connectorSnc.sql("select * from t1 order by c1").collect() + + assert(rs.length == 5) + assert(rs(0).getAs[Int]("c1") == 1) + assert(rs(0).getAs[Int]("c2") == 1) + assert(rs(0).getAs[Int]("c3") == 1) + } + + override def verifyTableFormInSplitMOde(locatorPort: Int, + prop: Properties, + locatorClientPort: Int): Unit = { + var resultDF: org.apache.spark.sql.DataFrame = null + try { + resultDF = connectorSnc.sql("select * from t1 order by col1") + } catch { + case _: org.apache.spark.sql.AnalysisException => + resultDF = connectorSnc.sql("select * from t1 order by col1") + } + + val rs = resultDF.collect() + assert(rs.length == 5, s"Expected 5 but got ${rs.length}") + assert(rs(0).getAs[String]("col1").equals("AA")) + assert(rs(0).getAs[String]("col2").equals("AA")) + + connectorSnc.dropTable("APP.T1") + } + + def createTablesFromOtherTablesTest(locatorPort: Int, + prop: Properties, + locatorClientPort: Int): Unit = { + val tempRowTableProps = "BUCKETS '16', PARTITION_BY 'COL2'" + + executeTestWithOptions(locatorPort, locatorClientPort, Map("BUCKETS" -> "8", + "PARTITION_BY" -> "COL1", "REDUNDANCY" -> "1"), Map.empty, tempRowTableProps) + executeTestWithOptions(locatorPort, locatorClientPort, Map.empty, Map("BUCKETS" -> "16"), + tempRowTableProps, "BUCKETS '8', PARTITION_BY 'COL1', REDUNDANCY '1'") + } + + def executeTestWithOptions(locatorPort: Int, locatorClientPort: Int, + rowTableOptios: Map[String, String] = Map.empty[String, String], + colTableOptions: Map[String, String] = Map.empty[String, String], + tempRowTableOptions: String = "", + tempColTableOptions: String = ""): Unit = { + + val snc = getSnappyContextForConnector(locatorClientPort) + val rowTable = "rowTable" + val colTable = "colTable" + + + snc.sql("DROP TABLE IF EXISTS " + rowTable) + snc.sql("DROP TABLE IF EXISTS " + colTable) + Property.ColumnBatchSize.set(snc.sessionState.conf, "30k") + val rdd = sc.parallelize( + (1 to 113999).map(i => TestRecord(i, i + 1, i + 2))) + val dataDF = snc.createDataFrame(rdd) + + snc.createTable(rowTable, "row", dataDF.schema, rowTableOptios) + dataDF.write.format("row").mode(SaveMode.Append).options(rowTableOptios).saveAsTable(rowTable) + + snc.createTable(colTable, "column", dataDF.schema, colTableOptions) + dataDF.write.insertInto(colTable) + + val tempRowTableName = "testRowTable1" + val tempColTableName = "testcolTable1" + + + snc.sql("DROP TABLE IF EXISTS " + tempRowTableName) + snc.sql(s"CREATE TABLE " + tempRowTableName + s" using row options($tempRowTableOptions) AS" + + s" (SELECT col1 ,col2 FROM " + rowTable + ")") + val testResults1 = snc.sql("SELECT * FROM " + tempRowTableName).collect() + assert(testResults1.length == 113999, s"Expected row count is 113999 while actual count is " + + s"${testResults1.length}") + + + snc.sql("DROP TABLE IF EXISTS " + tempRowTableName) + snc.sql("CREATE TABLE " + tempRowTableName + s" using row options($tempRowTableOptions) AS " + + s"(SELECT col1 ,col2 FROM " + colTable + ")") + val testResults2 = snc.sql("SELECT * FROM " + tempRowTableName).collect() + assert(testResults2.length == 113999, s"Expected row count is 113999 while actual count is " + + s"${testResults2.length}") + + snc.sql("DROP TABLE IF EXISTS " + tempColTableName) + snc.sql("CREATE TABLE " + tempColTableName + s" USING COLUMN OPTIONS($tempColTableOptions) " + + s"AS (SELECT col1 ,col2 FROM " + tempRowTableName + ")") + + val testResults3 = snc.sql("SELECT * FROM " + tempColTableName).collect() + assert(testResults3.length == 113999, s"Expected row count is 113999 while actual count is " + + s"${testResults3.length}") + + snc.sql("DROP TABLE IF EXISTS " + tempColTableName) + snc.sql("CREATE TABLE " + tempColTableName + s" USING COLUMN OPTIONS($tempColTableOptions) " + + s"AS (SELECT col1 ,col2 FROM " + colTable + ")") + + val testResults4 = snc.sql("SELECT * FROM " + tempColTableName).collect() + assert(testResults4.length == 113999, s"Expected row count is 113999 while actual count is" + + s"${testResults4.length}") + + snc.sql("DROP TABLE IF EXISTS " + tempColTableName) + + snc.sql("CREATE TABLE " + tempColTableName + s" USING COLUMN OPTIONS($tempColTableOptions) " + + s"AS (SELECT t1.col1 ,t1.col2 FROM " + colTable + " t1," + rowTable + + " t2 where t1.col1=t2.col2)") + // Expected count will be 113998 as first row will not match + val testResults5 = snc.sql("SELECT * FROM " + tempColTableName).collect() + + assert(testResults5.length == 113998, s"Expected row count is 113998 while actual count is" + + s"${testResults5.length}") + + snc.sql("DROP TABLE IF EXISTS " + tempColTableName) + snc.sql("DROP TABLE IF EXISTS " + tempRowTableName) + + snc.sql("DROP TABLE IF EXISTS " + rowTable) + snc.sql("DROP TABLE IF EXISTS " + colTable) + } + + def doTestStaleCatalog(locatorPort: Int, + prop: Properties, + locatorClientPort: Int): Unit = { + val snc: SnappyContext = getSnappyContextForConnector(locatorClientPort) + + val rdd: RDD[Row] = sc.parallelize( + Seq( + Row("val1", "val3"), + Row("val2", "val4") + ) + ) + val schema = new StructType() + .add(StructField("col1", StringType)) + .add(StructField("col2", StringType)) + val dataFrame = snc.createDataFrame(rdd, schema) + import org.apache.spark.sql.snappy._ + try { + Thread.sleep(2000) + for (_ <- 1 to 10) { + dataFrame.write.putInto("T5") + } + Assert.fail("Should have thrown CatalogStaleException.") + } catch { + case _: CatalogStaleException => + // retrying putInto operation and it should pass + dataFrame.write.putInto("T5") + } + } + + def doTestStaleCatalogForSNAP3024(locatorPort: Int, + prop: Properties, + locatorClientPort: Int): Unit = { + performSmartConnectorOps(locatorClientPort) + } + + private def performSmartConnectorOps(locatorClientPort: Int): Unit = { + val snc: SnappyContext = getSnappyContextForConnector(locatorClientPort) + + snc.sql("select * from t5").collect() + + val rdd: RDD[Row] = sc.parallelize( + Seq( + Row("4", "4"), + Row("5", "5") + ) + ) + val schema = new StructType() + .add(StructField("col1", StringType)) + .add(StructField("col2", StringType)) + val dataFrame = snc.createDataFrame(rdd, schema) + + dataFrame.write.insertInto("T5") + // wait for the embedded mode to change the catalog or rebalance buckets + Thread.sleep(6000) + // should not throw an exception + for (_ <- 1 to 5) { + snc.sql("select * from t5").collect() + } + } + + def doTestSmartConnectorForBucketRebalance(locatorPort: Int, + prop: Properties, + locatorClientPort: Int): Unit = { + performSmartConnectorOps(locatorClientPort) + } + + def doTestInsertAfterStaleCatalog(locatorPort: Int, + prop: Properties, + locatorClientPort: Int): Unit = { + val snc: SnappyContext = getSnappyContextForConnector(locatorClientPort) + snc.sql("insert into t5 values('4', '4')") + logInfo("1. schema is = " + snc.table("T5").schema) + + val schema2 = new StructType() + .add(StructField("col1", DateType)) + .add(StructField("col2", DateType)) + val rdd2: RDD[Row] = sc.parallelize( + Seq( + Row(java.sql.Date.valueOf("2019-01-01"), java.sql.Date.valueOf("2019-01-01")), + Row(java.sql.Date.valueOf("2019-02-02"), java.sql.Date.valueOf("2019-02-02")) + ) + ) + val dataFrame2 = snc.createDataFrame(rdd2, schema2) + + logInfo("doTestInsertAfterStaleCatalog: Waiting 6 seconds to allow schema change") + Thread.sleep(6000) + try { + for (_ <- 1 to 20) { + Thread.sleep(500) + logInfo("calling dataFrame.write.insertInto(\"T5\")") + logInfo("2. schema is = " + snc.table("T5").schema) + dataFrame2.write.insertInto("T5") + } + Assert.fail("Should have thrown CatalogStaleException.") + } catch { + case _: CatalogStaleException => + logInfo("doTestInsertAfterStaleCatalog: Caught expected CatalogStaleException") + // retrying insertInto operation and it should pass + retryOperation(5) { + dataFrame2.write.insertInto("T5") + } + } + logInfo("3. schema is = " + snc.table("T5").schema) + } + + def retryOperation[T](maxRetryAttempts: Int)(f: => T): Unit = { + var retryCount = 0 + var success = false + while (!success) { + try { + f + success = true + } catch { + // if table is not created yet on embedded cluster, + // TableNotFoundException can be seen; retry in + // such a case + case t: TableNotFoundException => + retryCount = retryCount + 1 + if (retryCount == maxRetryAttempts) { + throw t + } else { + Thread.sleep(200) + } + } + } + } + + def doTestDeleteAfterStaleCatalog(locatorPort: Int, + prop: Properties, + locatorClientPort: Int): Unit = { + val snc: SnappyContext = getSnappyContextForConnector(locatorClientPort) + snc.sql("delete from t6 where col1 like '1%'") + + logInfo("doTestDeleteAfterStaleCatalog: Waiting 6 seconds to allow schema change") + Thread.sleep(6000) + try { + for (_ <- 1 to 20) { + Thread.sleep(500) + snc.sql("delete from t6 where col1 like '2%'") + } + Assert.fail("Should have thrown CatalogStaleException.") + } catch { + case _: CatalogStaleException => + logInfo("doTestDeleteAfterStaleCatalog: Caught expected CatalogStaleException") + // retrying delete from operation and it should pass + retryOperation(5) { + snc.sql("delete from t6 where col1 like '2%'") + } + } + } + + def doTestUpdateAfterStaleCatalog(locatorPort: Int, + prop: Properties, + locatorClientPort: Int): Unit = { + val snc: SnappyContext = getSnappyContextForConnector(locatorClientPort) + snc.sql("insert into t7 values('4', '4')") + + logInfo("doTestUpdateAfterStaleCatalog: Waiting 6 seconds to allow schema change") + Thread.sleep(6000) + try { + for (_ <- 1 to 20) { + Thread.sleep(500) + snc.sql("update t7 set col2 = '22' where col1 = '2'") + } + Assert.fail("Should have thrown CatalogStaleException.") + } catch { + case _: CatalogStaleException => + logInfo("doTestUpdateAfterStaleCatalog: Caught expected CatalogStaleException") + // retrying delete from operation and it should pass + retryOperation(5) { + snc.sql("update t7 set col2 = '22' where col1 = '2'") + } + } + } + + def doTestStaleCatalogRetryForStreamingSink(locatorPort: Int, + prop: Properties, locatorClientPort: Int, testTempDir: String): Unit = { + val tableName = "users" + val kafkaTestUtils = new KafkaTestUtils + kafkaTestUtils.setup() + kafkaTestUtils.createTopic(tableName, partitions = 3) + try { + val snc: SnappyContext = getSnappyContextForConnector(locatorClientPort) + snc.sql(s"drop table if exists $tableName") + snc.sql( + s"""create table $tableName (id long , name varchar(40), age int) + | using column options(key_columns 'id')""".stripMargin) + + val streamingDF = snc + .readStream + .format("kafka") + .option("kafka.bootstrap.servers", kafkaTestUtils.brokerAddress) + .option("subscribe", tableName) + .option("startingOffsets", "earliest") + .load() + + implicit val encoder: ExpressionEncoder[Row] = RowEncoder(snc.table(tableName).schema) + val session = snc.sparkSession + import session.implicits._ + val streamingQuery = streamingDF.selectExpr("CAST(value AS STRING)") + .as[String] + .map(_.split(",")) + .map(r => Row(r(0).toLong, r(1), r(2).toInt)) + .writeStream + .format("snappysink") + .queryName(tableName) + .trigger(ProcessingTime("1 seconds")) + .option("tableName", tableName) + .option("checkpointLocation", s"$testTempDir/checkpoint") + .start() + + // produce first batch of data + val dataBatch1 = Seq(Seq(1, "name1", 20), Seq(2, "name2", 20)) + kafkaTestUtils.sendMessages(tableName, dataBatch1.map(r => r.mkString(",")).toArray) + + waitTillTheBatchIsPickedForProcessing(snc, 0, tableName) + + new PrintWriter(s"$testTempDir/file0") { + write("dummyData") + close() + } + + // wait till DDL is fired on snappy cluster which will lead to stale smart-connector catalog + var attempts = 0 + while (!Files.exists(Paths.get(testTempDir, "file1")) && attempts < 15) { + Thread.sleep(4000) + attempts += 1 + } + + assert(attempts < 14, "Waiting for stale catalog timed out") + + // produce second batch of data + val dataBatch2 = Seq(Seq(3, "name3", 20)) + kafkaTestUtils.sendMessages(tableName, dataBatch2.map(r => r.mkString(",")).toArray) + + streamingQuery.processAllAvailable() + + assertData(Array(Row(1, "name1", 20), Row(2, "name2", 20), Row(3, "name3", 20))) + + def assertData(expectedData: Array[Row]): Unit = { + val actualData = snc.sql(s"select * from $tableName" + + s" order by id, name, age") + .collect() + + assert(expectedData sameElements actualData, "actual data:" + + actualData.map(a => a.toString()).mkString(",")) + } + } finally { + kafkaTestUtils.teardown() + } + } + + private def waitTillTheBatchIsPickedForProcessing(snc: SnappyContext, batchId: Int, + queryName: String, retries: Int = 15): Unit = { + if (retries == 0) { + throw new RuntimeException(s"Batch id $batchId not found in sink status table") + } + val sql = s"select batch_id from snappysys_internal____sink_state_table " + + s"where stream_query_id = '$queryName'" + val batchIdFromTable = snc.sql(sql).collect() + if (batchIdFromTable.isEmpty || batchIdFromTable(0)(0) != batchId) { + Thread.sleep(1000) + waitTillTheBatchIsPickedForProcessing(snc, batchId, queryName, retries - 1) + } + } +} diff --git a/cluster/src/dunit/scala/io/snappydata/cluster/StringAsVarcharDUnitTest.scala b/cluster/src/dunit/scala/io/snappydata/cluster/StringAsVarcharDUnitTest.scala new file mode 100644 index 0000000000..db04858ced --- /dev/null +++ b/cluster/src/dunit/scala/io/snappydata/cluster/StringAsVarcharDUnitTest.scala @@ -0,0 +1,293 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ + +package io.snappydata.cluster + +import java.sql.{Connection, Statement} + +import io.snappydata.Constant +import io.snappydata.test.dunit.AvailablePortHelper + +import org.apache.spark.Logging +import org.apache.spark.sql.collection.Utils +import org.apache.spark.sql.types.{IntegerType, StringType, StructField, StructType} +import org.apache.spark.sql.{SaveMode, SnappyContext} + +/** + * Tests for verifying rendering of STRING happens as VARCHAR or CLOB, + * depending upon the query hint. + */ +class StringAsVarcharDUnitTest(val s: String) + extends ClusterManagerTestBase(s) with Logging { + + val colTab1 = "colTab1" + val rowTab1 = "rowTab1" + val rowTab2 = "rowTab2" + val extTab1 = "extTab1" + val extTab2 = "extTab2" + + val varcharSize = 20 + val charSize = 10 + + /** + * Test 'select *' on column, row and external tables and 'select cast(* as)' on a column/row + * tables, with different possible query hints. The tables are created via DDLs. + */ + def testQueries(): Unit = { + executeAndVerify() + } + + /** + * Test 'select *' on column, row and external tables and 'select cast(* as)' on a column/row + * tables, with different possible query hints. The tables are created via APIs. + */ + def testQueriesOnTablesCreatedViaAPI(): Unit = { + executeAndVerify(false) + validateUtilsFunctions() + } + + def executeAndVerify(useDDL: Boolean = true, join: Boolean = false): Unit = { + val netPort = AvailablePortHelper.getRandomAvailableTCPPort + vm2.invoke(classOf[ClusterManagerTestBase], "startNetServer", netPort) + val conn = getANetConnection(netPort) + + if (useDDL) { + createTablesViaDDLAndInsertData(conn) + } else { + createTablesViaAPIAndInsertData(conn) + } + + val s = conn.createStatement() + + Seq( // all possible hint values + "FALSE", "*", "col_string,col_varchar", "inv,lid" + ).foreach(str => runQueriesAndVerify(s, useDDL, str)) + + conn.close() + } + + def runQueriesAndVerify(s: Statement, useDDL: Boolean, hint: String): Unit = { + var stringType = "VARCHAR" + var affix = "" + hint match { + case "FALSE" => + case _ => + affix = s" --+ columnsAsClob($hint)" + logInfo(s"affix: '$affix'") + stringType = hint match { + case "*" => "CLOB" + case s: String => if (s.contains("col_string")) "CLOB" else "VARCHAR" + } + } + + def eNv(stmt: Statement, t: String, s: String, checkCount: Boolean = false, expectedCount: + Int = 0): Unit = { + stmt.executeQuery(s"select * from $t $affix") + val rs = stmt.getResultSet + verify(rs, 5, s, t, hint) + if (checkCount) { + var count = 0 + while (rs.next()) { + count += 1 + } + assert(count == expectedCount, + s"Expected count = $expectedCount but got $count") + } + } + + eNv(s, colTab1, stringType) + // row table metadata is from store so rs metadata shows CLOB + eNv(s, rowTab1, "CLOB") + eNv(s, extTab1, stringType, true, 0) + if (!useDDL) { + eNv(s, rowTab2, "CLOB") + eNv(s, extTab2, stringType, true, 5) + } + + def testCastOperator(s: Statement, t: String, expectedCount: Int): Unit = { + s.executeQuery(s"select cast(col_int as string), cast(col_string as clob), " + + s"cast(col_char as varchar(100)) from $t $affix") + val rSet = s.getResultSet + var count = 0 + while (rSet.next()) { + count += 1 + } + assert(count == expectedCount) + } + + testCastOperator(s, colTab1, 2) + testCastOperator(s, rowTab1, 5) + } + + /** + * Verify the metadata of the result set. + */ + private def verify(rs: java.sql.ResultSet, cols: Int, + stringType: String, tName: String, hint: String = "FALSE"): Unit = { + val md = rs.getMetaData + assert(md.getColumnCount == cols) + logInfo(s"$tName metadata column count = ${md.getColumnCount}, " + + s"hint = $hint expectedStringType = $stringType") + for (i <- 1 to cols) { + logInfo(s"col name = ${md.getColumnName(i)}, col type ${md.getColumnTypeName(i)}, table " + + s"name = ${md.getTableName(i)}") + } + assertMetaData(md, stringType, tName) + } + + private def assertMetaData(md: java.sql.ResultSetMetaData, + stringType: String, tName: String): Unit = { + assert(md.getColumnName(1).equalsIgnoreCase("COL_INT")) + assert(md.getColumnTypeName(1).equals("INTEGER")) + + assert(md.getColumnName(2).equalsIgnoreCase("COL_STRING")) + assert(md.getColumnTypeName(2).equals(stringType), + s"Expected type to be $stringType but got ${md.getColumnTypeName(2)}") + if (stringType.equals("VARCHAR")) { + assert(md.getPrecision(2) == Constant.MAX_VARCHAR_SIZE) + } + + assert(md.getColumnName(3).equalsIgnoreCase("COL_VARCHAR")) + assert(md.getColumnTypeName(3).equals("VARCHAR"), + s"Expected type to be VARCHAR but got ${md.getColumnTypeName(3)}") + assert(md.getPrecision(3) == varcharSize) + + assert(md.getColumnName(4).equalsIgnoreCase("COL_CLOB")) + assert(md.getColumnTypeName(4).equals("CLOB"), + s"Expected type to be CLOB but got ${md.getColumnTypeName(4)}") + + assert(md.getColumnName(5).equalsIgnoreCase("COL_CHAR")) + assert(md.getColumnTypeName(5).equals("CHAR"), + s"Expected type to be CHAR but got ${md.getColumnTypeName(5)}") + assert(md.getPrecision(5) == charSize) + + assert(md.getTableName(1).equalsIgnoreCase(tName), + s"Expected $tName but got ${md.getTableName(1)}") + } + + /** + * Create a row table and a column table with five columns each. Row table has five entries while + * the column table has just two entries. + */ + def createTablesViaDDLAndInsertData(conn: Connection): Unit = { + val snc = SnappyContext(sc) + + snc.sql(s"create table $rowTab1 (col_int int, col_string string, " + + s"col_varchar varchar($varcharSize), col_clob clob, col_char char($charSize)) using row") + + snc.sql(s"create table $colTab1 (col_int int, col_string string, " + + s"col_varchar varchar($varcharSize), col_clob clob, col_char char($charSize)) " + + "using column options(buckets '8')") + + snc.sql(s"create external table $extTab1 (col_int int, col_string string, " + + s"col_varchar varchar($varcharSize), col_clob clob, col_char char($charSize)) " + + s"USING csv OPTIONS(path '${getClass.getResource("/empty.csv").getPath}')") + + insertData(snc) + } + + /** + * Create a row, column and external tables with five columns each via APIs. Column table + * has two records while others have five records. + * + * @param conn + */ + def createTablesViaAPIAndInsertData(conn: Connection): Unit = { + val snc = SnappyContext(sc) + + val schema = StructType(Array( + StructField("col_int", IntegerType, false), + StructField("col_string", StringType, false), + StructField("col_varchar", StringType, false, Utils.varcharMetadata(varcharSize)), + StructField("col_clob", StringType, false, Utils.stringMetadata()), + StructField("col_char", StringType, false, Utils.charMetadata(charSize)) + )) + + snc.createTable(rowTab1, "row", schema, Map.empty[String, String]) + + snc.createTable(rowTab2, "row", s"(col_int int, col_string string, col_varchar varchar" + + s"($varcharSize), col_clob clob, col_char char($charSize))", + Map.empty[String, String], false) + + snc.createTable(colTab1, "column", schema, Map("buckets" -> "8")) + + snc.createExternalTable(extTab1, "csv", schema, + Map("path" -> getClass.getResource("/empty.csv").getPath)) + + val df = snc.read + .format("com.databricks.spark.csv") + .option("header", "false") + .option("maxCharsPerColumn", "4096") + .schema(schema) + .load(getClass.getResource("/allstringtypes.csv").getPath) + + df.write.format("column").saveAsTable(extTab2) + + insertData(snc) + } + + def validateUtilsFunctions(): Unit = { + try { + Utils.varcharMetadata(Constant.MAX_VARCHAR_SIZE + 1) + assert(false, "Validation for Utils.varcharMetadata() failed") + } catch { + case iae: IllegalArgumentException => // ignore + case t: Throwable => throw t + } + var md = Utils.varcharMetadata() + assert(md.getString(Constant.CHAR_TYPE_BASE_PROP).equals("VARCHAR")) + assert(md.getLong(Constant.CHAR_TYPE_SIZE_PROP) == Constant.MAX_VARCHAR_SIZE) + + try { + Utils.charMetadata(Constant.MAX_CHAR_SIZE + 1) + assert(false, "Validation for Utils.charMetadata() failed") + } catch { + case iae: IllegalArgumentException => // ignore + case t: Throwable => throw t + } + md = Utils.charMetadata() + assert(md.getString(Constant.CHAR_TYPE_BASE_PROP).equals("CHAR")) + assert(md.getLong(Constant.CHAR_TYPE_SIZE_PROP) == Constant.MAX_CHAR_SIZE) + + md = Utils.stringMetadata() + assert(md.getString(Constant.CHAR_TYPE_BASE_PROP).equals("CLOB"), + "Validation for Utils.stringMetadata() failed") + } + + def insertData(snc: SnappyContext): Unit = { + // Insert into row table + val data = Seq(Seq(1, "t1.1.string", "t1.1.varchar", "t1.1.clob", "t1.1.char"), + Seq(7, "t1.7.string", "t1.7.varchar", "t1.7.clob", "t1.7.char"), + Seq(9, "t1.9.string", "t1.9.varchar", "t1.9.clob", "t1.9.char"), + Seq(4, "t1.4.string", "t1.4.varchar", "t1.4.clob", "t1.4.char"), + Seq(5, "t1.5.string", "t1.5.varchar", "t1.5.clob", "t1.5.char")) + + val rdd = sc.parallelize(data, data.length).map(s => + Data9(s(0).asInstanceOf[Int], s(1).toString, s(2).toString, s(3).toString, s(4).toString)) + val dataDF = snc.createDataFrame(rdd) + dataDF.write.format("row").mode(SaveMode.Append) + .saveAsTable(rowTab1) + + // Insert into column table + snc.sql(s"insert into $colTab1 values (1, 't2.1.string', " + + s"'t2.1.varchar', 't2.1.clob', 't2.1.char')") + snc.sql(s"insert into $colTab1 values (4, 't2.4.string', " + + s"'t2.4.varchar', 't2.4.clob', 't2.4.char')") + } +} + +case class Data9(col1: Int, col2: String, col3: String, col4: String, col5: String) diff --git a/cluster/src/dunit/scala/io/snappydata/cluster/ValidateMVCCDUnitTest.scala b/cluster/src/dunit/scala/io/snappydata/cluster/ValidateMVCCDUnitTest.scala new file mode 100644 index 0000000000..9bc078d9d0 --- /dev/null +++ b/cluster/src/dunit/scala/io/snappydata/cluster/ValidateMVCCDUnitTest.scala @@ -0,0 +1,743 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ + +package io.snappydata.cluster + +import java.sql.DriverManager +import java.util + +import com.gemstone.gemfire.cache.IsolationLevel +import com.gemstone.gemfire.internal.cache.{TXStateProxy, GemFireCacheImpl} +import com.gemstone.gemfire.internal.cache.GemFireCacheImpl.RvvSnapshotTestHook +import com.pivotal.gemfirexd.internal.engine.Misc +import com.pivotal.gemfirexd.internal.engine.distributed.utils.GemFireXDUtils +import com.pivotal.gemfirexd.{FabricService, TestUtil} +import io.snappydata.test.dunit.DistributedTestBase.WaitCriterion +import io.snappydata.test.dunit.{AvailablePortHelper, DistributedTestBase, SerializableRunnable, VM} +import io.snappydata.{Locator, ServiceManager} +import org.slf4j.LoggerFactory + +import org.apache.spark.Logging +import org.apache.spark.sql.{SaveMode, SnappyContext} +import org.apache.spark.sql.collection.Utils +import org.apache.spark.sql.execution.columnar.impl.ColumnFormatRelation + +class ValidateMVCCDUnitTest(val s: String) extends ClusterManagerTestBase(s) with Logging { + + // set default batch size for this test + bootProps.setProperty(io.snappydata.Property.ColumnBatchSize.name, "100") + var errorInThread: Throwable = null + + private val default_chunk_size = GemFireXDUtils.DML_MAX_CHUNK_SIZE + + override def tearDown2(): Unit = { + // reset the chunk size on lead node + setDMLMaxChunkSize(default_chunk_size) + super.tearDown2() + } + + override def beforeClass(): Unit = { + val testName = getName + val testClass = getClass + // bootProps.setProperty(Attribute.SYS_PERSISTENT_DIR, s) + TestUtil.currentTest = testName + TestUtil.currentTestClass = getTestClass + TestUtil.skipDefaultPartitioned = true + TestUtil.doCommonSetup(bootProps) + GemFireXDUtils.IS_TEST_MODE = true + + getLogWriter.info("\n\n\n STARTING TEST " + testClass.getName + '.' + + testName + "\n\n") + + val locNetPort = locatorNetPort + val locNetProps = locatorNetProps + val locPort = ClusterManagerTestBase.locPort + val sysProps = this.sysProps + DistributedTestBase.invokeInLocator(new SerializableRunnable() { + override def run(): Unit = { + ClusterManagerTestBase.setSystemProperties(sysProps) + val loc: Locator = ServiceManager.getLocatorInstance + + if (loc.status != FabricService.State.RUNNING) { + loc.start("localhost", locPort, locNetProps) + } + if (locNetPort > 0) { + loc.startNetworkServer("localhost", locNetPort, locNetProps) + } + assert(loc.status == FabricService.State.RUNNING) + + val logger = LoggerFactory.getLogger(getClass) + logger.info("\n\n\n STARTING TESTS IN " + getClass.getName + "\n\n") + } + }) + val nodeProps = bootProps + val startNode = new SerializableRunnable() { + override def run(): Unit = { + ClusterManagerTestBase.setSystemProperties(sysProps) + val node = ServiceManager.currentFabricServiceInstance + if (node == null || node.status != FabricService.State.RUNNING) { + ClusterManagerTestBase.startSnappyServer(locPort, nodeProps) + } + assert(ServiceManager.currentFabricServiceInstance.status == + FabricService.State.RUNNING) + + val logger = LoggerFactory.getLogger(getClass) + logger.info("\n\n\n STARTING TESTS IN " + getClass.getName + "\n\n") + } + } + + vm0.invoke(startNode) + // vm1.invoke(startNode) + // vm2.invoke(startNode) + + // start lead node in this VM + val sc = SnappyContext.globalSparkContext + if (sc == null || sc.isStopped) { + ClusterManagerTestBase.startSnappyLead(locPort, bootProps) + } + assert(ServiceManager.currentFabricServiceInstance.status == + FabricService.State.RUNNING) + } + + override def tearDownAfter(): Unit = { + vm0.invoke(classOf[ValidateMVCCDUnitTest], "clearTestHook", 0) + } + + def setDMLMaxChunkSize(size: Long): Unit = { + GemFireXDUtils.DML_MAX_CHUNK_SIZE = size + } + + def testSnapshotInsertionForColumnTable(): Unit = { + errorInThread = null + val netPort1 = AvailablePortHelper.getRandomAvailableTCPPort + vm0.invoke(classOf[ClusterManagerTestBase], "startNetServer", netPort1) + + val snc = SnappyContext(sc) + val tableName: String = "TESTTABLE" + + snc.sql(s"create table $tableName(col1 integer, col2 String, col3 integer) using column " + + s"OPTIONS (PARTITION_BY 'col1', buckets '1',MAXPARTSIZE '200'," + + s"COLUMN_MAX_DELTA_ROWS '10',COLUMN_BATCH_SIZE " + + s"'5000')") + + for (i <- 1 to 10) { + snc.sql(s"insert into $tableName values($i,'${i + 1}',${i + 2})") + println(s"Inserting $i") + } + + val cnt = snc.sql(s"select * from $tableName").count() + vm0.invoke(classOf[ValidateMVCCDUnitTest], "printRegionSize") + assert(cnt >=9, s"Expected row count is 10 while actual row count is $cnt") + snc.sql(s"drop table $tableName") + + vm0.invoke(classOf[ClusterManagerTestBase], "validateNoActiveSnapshotTX") + // scalastyle:off + println("Successful") + // scalastyle:on + } + + def testSnapshotInsertionForColumnTableDFInsert(): Unit = { + errorInThread = null + val netPort1 = AvailablePortHelper.getRandomAvailableTCPPort + vm0.invoke(classOf[ClusterManagerTestBase], "startNetServer", netPort1) + + val snc = SnappyContext(sc) + val tableName: String = "TESTTABLE" + + snc.sql(s"create table $tableName(col1 integer, col2 String, col3 integer) using column " + + s"OPTIONS (PARTITION_BY 'col1', buckets '1',MAXPARTSIZE '200'," + + s"COLUMN_MAX_DELTA_ROWS '10',COLUMN_BATCH_SIZE " + + s"'5000')") + + val df = for(i <- 1 to 100) yield Seq(i, i+1, i+2) + val rdd = sc.parallelize(df, df.length).map( + s => new Data2(s(0), s(1).toString, s(2).toString)) + + val dataDF = snc.createDataFrame(rdd) + dataDF.write.mode(SaveMode.Append).saveAsTable(tableName) + + val cnt = snc.sql(s"select * from $tableName").count() + vm0.invoke(classOf[ValidateMVCCDUnitTest], "printRegionSize") + assert(cnt == 100, s"Expected row count is 100 while actual row count is $cnt") + snc.sql(s"drop table $tableName") + + vm0.invoke(classOf[ClusterManagerTestBase], "validateNoActiveSnapshotTX") + // scalastyle:off + println("Successful") + // scalastyle:on + } + + def testSnapshotInsertionForColumnTableDFInsertMultiThreaded(): Unit = { + errorInThread = null + val netPort1 = AvailablePortHelper.getRandomAvailableTCPPort + vm0.invoke(classOf[ClusterManagerTestBase], "startNetServer", netPort1) + + val snc = SnappyContext(sc) + val tableName: String = "TESTTABLE" + + snc.sql(s"create table $tableName(col1 integer, col2 String, col3 integer) using column " + + s"OPTIONS (PARTITION_BY 'col1', buckets '10',MAXPARTSIZE '200'," + + s"COLUMN_MAX_DELTA_ROWS '10',COLUMN_BATCH_SIZE " + + s"'5000')") + + val df = for(i <- 1 to 100) yield Seq(i, i+1, i+2) + val rdd = sc.parallelize(df, 10).map( + s => new Data2(s(0), s(1).toString, s(2).toString)) + + val dataDF = snc.createDataFrame(rdd) + dataDF.write.mode(SaveMode.Append).saveAsTable(tableName) + + val cnt = snc.sql(s"select * from $tableName").count() + vm0.invoke(classOf[ValidateMVCCDUnitTest], "printRegionSize") + assert(cnt == 100, s"Expected row count is 10 while actual row count is $cnt") + snc.sql(s"drop table $tableName") + + vm0.invoke(classOf[ClusterManagerTestBase], "validateNoActiveSnapshotTX") + // scalastyle:off + println("Successful") + // scalastyle:on + } + + def testMVCCForColumnTable(): Unit = { + errorInThread = null + val netPort1 = AvailablePortHelper.getRandomAvailableTCPPort + vm0.invoke(classOf[ClusterManagerTestBase], "startNetServer", netPort1) + + val snc = SnappyContext(sc) + val tableName: String = "TESTTABLE" + // snc.sql("set spark.sql.inMemoryColumnarStorage.batchSize = 5") + + snc.sql(s"create table $tableName(col1 integer, col2 String, col3 integer) using column " + + s"OPTIONS (PARTITION_BY 'col1', buckets '1',MAXPARTSIZE '200'," + + s"COLUMN_MAX_DELTA_ROWS '10',COLUMN_BATCH_SIZE " + + s"'5000')") + + vm0.invoke(classOf[ValidateMVCCDUnitTest], "setTestHook") + // Invoking validate result in each VM as a separate thread inorder to resume the code for + // insertion of records + invokeMethodInVm(vm0, classOf[ValidateMVCCDUnitTest], "validateResults", netPort1) + + for (i <- 1 to 10) { + snc.sql(s"insert into $tableName values($i,'${i + 1}',${i + 2})") + println(s"Inserting $i") + } + + val cnt = snc.sql(s"select * from $tableName").count() + vm0.invoke(classOf[ValidateMVCCDUnitTest], "printRegionSize") + assert(cnt >=9, s"Expected row count is 10 while actual row count is $cnt") + snc.sql(s"drop table $tableName") + + if (errorInThread != null) { + throw errorInThread + } + + vm0.invoke(classOf[ValidateMVCCDUnitTest],"clearTestHook", 0) + vm0.invoke(classOf[ClusterManagerTestBase], "validateNoActiveSnapshotTX") + // scalastyle:off + println("Successful") + // scalastyle:on + + } + + def invokeMethodInVm(vM: VM, + classType: Class[ValidateMVCCDUnitTest], + methodName: String, netPort1: Int): Thread = { + + val t = new Thread { + + override def run: Unit = { + try { + vM.invoke(classType, methodName, netPort1) + } catch { + case e: Throwable => + errorInThread = e + } + } + } + + t.start() + t + } + + + def testMVCCForColumnTableWithRollback(): Unit = { + errorInThread = null + val netPort1 = AvailablePortHelper.getRandomAvailableTCPPort + vm0.invoke(classOf[ClusterManagerTestBase], "startNetServer", netPort1) + + val snc = SnappyContext(sc) + val tableName: String = "TESTTABLE" + // snc.sql("set spark.sql.inMemoryColumnarStorage.batchSize = 5") + + snc.sql(s"create table $tableName(col1 integer, col2 String, col3 integer) using column " + + s"OPTIONS (PARTITION_BY 'col1'," + + s" buckets '1',MAXPARTSIZE '200',COLUMN_MAX_DELTA_ROWS '10',COLUMN_BATCH_SIZE " + + s"'5000')") + + vm0.invoke(classOf[ValidateMVCCDUnitTest], "setTestHook") + // Invoking validate result in each VM as a separate thread inorder to resume the code for + // insertion of records + val t= invokeMethodInVm(vm0,classOf[ValidateMVCCDUnitTest], "validateResultsWithRollback", netPort1) + + var cnt = snc.sql(s"select * from $tableName").count() + + assert(cnt == 0, s"Expected row count is 0 while actual row count is $cnt") + + try { + for (i <- 1 to 10) { + snc.sql(s"insert into $tableName values($i, '${i + 1}', ${i + 2})") + println(s"From: testMVCCForColumnTableWithRollback Inserting $i") + } + } catch { + case rex: Throwable => // As expected + } + + + vm0.invoke(classOf[ValidateMVCCDUnitTest], "printRegionSize") + cnt = snc.sql(s"select * from $tableName").count() + + assert(cnt >=9, s"Expected row count is 10 while actual row count is $cnt") + t.join(30000) + + snc.sql(s"drop table $tableName") + if (errorInThread != null) { + throw errorInThread + } + vm0.invoke(classOf[ValidateMVCCDUnitTest],"clearTestHook", 0) + vm0.invoke(classOf[ClusterManagerTestBase], "validateNoActiveSnapshotTX") + // scalastyle:off + println("Successful") + // scalastyle:on + + } + + + def testMixOperationsOnRowTables(): Unit = { + errorInThread = null + val netPort1 = AvailablePortHelper.getRandomAvailableTCPPort + vm0.invoke(classOf[ClusterManagerTestBase], "startNetServer", netPort1) + + val snc = SnappyContext(sc) + val tableName: String = "TESTTABLE" + // snc.sql("set spark.sql.inMemoryColumnarStorage.batchSize = 5") + + snc.sql(s"drop table if exists $tableName") + snc.sql(s"create table $tableName(col1 integer, col2 String, col3 integer) using row " + + s"OPTIONS (REDUNDANCY '1',PARTITION_BY 'col1')") + + vm0.invoke(classOf[ValidateMVCCDUnitTest], "performMixOperationsOnRowTable", + netPort1) + vm0.invoke(classOf[ClusterManagerTestBase], "validateNoActiveSnapshotTX") + // scalastyle:off + println("Successful") + // scalastyle:on + } + + + def testBatchInsertUsingPreparedStatement(): Unit = { + errorInThread = null + val netPort1 = AvailablePortHelper.getRandomAvailableTCPPort + vm0.invoke(classOf[ClusterManagerTestBase], "startNetServer", netPort1) + + val snc = SnappyContext(sc) + val tableName: String = "TESTTABLE" + // snc.sql("set spark.sql.inMemoryColumnarStorage.batchSize = 5") + + snc.sql(s"drop table if exists $tableName") + snc.sql(s"create table $tableName(col1 integer, col2 String, col3 integer) using row " + + s"OPTIONS (REDUNDANCY '1',PARTITION_BY 'col1')") + + + vm0.invoke(classOf[ValidateMVCCDUnitTest], "performBatchInsert", + netPort1) + vm0.invoke(classOf[ClusterManagerTestBase], "validateNoActiveSnapshotTX") + // scalastyle:off + println("Successful") + // scalastyle:on + } + +} + +object ValidateMVCCDUnitTest { + + class MyTestHook extends RvvSnapshotTestHook { + val lockForTest: AnyRef = new AnyRef + val operationLock: AnyRef = new AnyRef + + override def notifyOperationLock(): Unit = { + operationLock.synchronized { + operationLock.notify() + } + } + + override def notifyTestLock(): Unit = { + lockForTest.synchronized { + lockForTest.notify() + } + } + + override def waitOnTestLock(): Unit = { + lockForTest.synchronized { + lockForTest.wait(60000) + } + } + + override def waitOnOperationLock(): Unit = { + operationLock.synchronized { + operationLock.wait(60000) + } + } + } + + def setTestHook(): Unit = { + val cache = GemFireCacheImpl.getInstance() + cache.setRvvSnapshotTestHook(new MyTestHook) + println("Setting testhook") + + } + + + def printRegionSize(): Unit = { + val cache = GemFireCacheImpl.getInstance() + val cbName = ColumnFormatRelation.columnBatchTableName("APP.TESTTABLE") + println("APP.TESTTABLE Region size : " + cache.getRegion("/APP/TESTTABLE").size()) + println(s"APP.$cbName Region size : " + Misc.getRegionForTable(cbName, true).size()) + } + + def validateResults(netPort: Int): Unit = { + val ctmp = Misc.getGemFireCacheNoThrow + if (ctmp != null) { + println("Validate results invoked in: " + ctmp.getDistributedSystem.getMemberId) + } + val cache = GemFireCacheImpl.getInstance() + + // started waiting on rvv test hook + cache.waitOnRvvTestHook() + // scalastyle:off + println("Got notification from test hook") + // scalastyle:on + val driver = "io.snappydata.jdbc.ClientDriver" + Utils.classForName(driver).newInstance + var url: String = null + + url = "jdbc:snappydata://localhost:" + netPort + "/" + + val tableName: String = "APP.TESTTABLE" + val conn = DriverManager.getConnection(url) + + val s = conn.createStatement() + s.execute(s"select * from $tableName") + var cnt = 0 + val rs = s.getResultSet + while (rs.next) { + cnt = cnt + 1 + // scalastyle:off + println("Resultset: " + rs.getInt(1)) + // scalastyle:on + } + + // scalastyle:off + println("Row count before creating the cachebatch: " + cnt) + // scalastyle:on + assert(cnt >= 9, s"Expected row count is 10 while actual row count is $cnt") + + + var cnt1 = 0; + s.execute(s"select * from $tableName -- GEMFIREXD-PROPERTIES executionEngine=Store\n") + val rs1 = s.getResultSet + while (rs1.next) { + cnt1 = cnt1 + 1 + // scalastyle:off + println("Resultset from row buffer: " + rs1.getInt(1)) + // scalastyle:on + } + // scalastyle:off + println("Row count before creating the cachebatch in row buffer: " + cnt1) + // scalastyle:on + assert(cnt1 >= 9, s"Expected row count is 10 while actual row count is $cnt1") + + var cnt2 = 0 + s.execute(s"select * from " + + ColumnFormatRelation.columnBatchTableName("APP.TESTTABLE") + + s" -- GEMFIREXD-PROPERTIES executionEngine=Store\n") + val rs2 = s.getResultSet + while (rs2.next) { + cnt2 = cnt2 + 1 + } + // scalastyle:off + println("Row count before creating the cachebatch in column store: " + cnt2) + // scalastyle:on + assert(cnt2 == 0, s"Expected row count is 0 while actual row count is $cnt2") + + cache.notifyRvvSnapshotTestHook() + cache.waitOnRvvTestHook() + + + var cnt3 = 0; + s.execute(s"select * from $tableName -- GEMFIREXD-PROPERTIES executionEngine=Store\n") + val rs3 = s.getResultSet + while (rs3.next) { + cnt3 = cnt3 + 1 + } + + // scalastyle:off + println("Row count in row buffer after destroy all entries from row buffer but no commit : " + cnt3) + // scalastyle:on + assert(cnt3 >= 9, s"Expected row count is 10 while actual row count is $cnt3") + + cache.notifyRvvSnapshotTestHook() + + + cache.waitOnRvvTestHook() + cache.setRvvSnapshotTestHook(null) + + + var cnt4 = 0 + s.execute(s"select * from " + + ColumnFormatRelation.columnBatchTableName("APP.TESTTABLE") + + s" -- GEMFIREXD-PROPERTIES executionEngine=Store\n") + val rs4 = s.getResultSet + while (rs4.next) { + cnt4 = cnt4 + 1 + } + // scalastyle:off + println("Row count in column store after destroy all entries from row buffer " + + "and reinitialize snapshot : " + cnt4) + // scalastyle:on + // The number of entries in column store is 4 + // as after columnwise storage 3 rows will be created one for each + // column and 4th row is for stats + assert(cnt4 == 4, s"Expected Count is 4 but actual count is $cnt4") + + + var cnt5 = 0; + s.execute(s"select * from $tableName -- GEMFIREXD-PROPERTIES executionEngine=Store\n") + val rs5 = s.getResultSet + while (rs5.next) { + cnt5 = cnt5 + 1 + } + + // scalastyle:off + println("Row count in row buffer after destroy all entries from row buffer " + + "and reinitialize snapshot : " + cnt5) + // scalastyle:on + assert(cnt5 == 0, s"Expected row count is 0 while actual row count is $cnt5") + + var cnt6 = 0; + s.execute(s"select * from $tableName") + val rs6 = s.getResultSet + while (rs6.next) { + cnt6 = cnt6 + 1 + } + // scalastyle:off + println("Row count in column table : " + cnt6) + // scalastyle:on + assert(cnt6 >= 9, s"Expected row count is 10 while actual row count is $cnt6") + + } + + + def validateResultsWithRollback(netPort: Int): Unit = { + + val cache = GemFireCacheImpl.getInstance() + cache.getCacheTransactionManager.testRollBack = true + cache.waitOnRvvTestHook() + + val driver = "io.snappydata.jdbc.ClientDriver" + Utils.classForName(driver).newInstance + var url: String = null + + url = "jdbc:snappydata://localhost:" + netPort + "/" + + val tableName: String = "APP.TESTTABLE" + val conn = DriverManager.getConnection(url) + + + val s = conn.createStatement() + s.execute(s"select * from $tableName") + var cnt = 0 + val rs = s.getResultSet + while (rs.next) { + cnt = cnt + 1 + // scalastyle:off + println("Resultset: " + rs.getInt(1)) + // scalastyle:on + } + + // scalastyle:off + println("Row count before creating the cachebatch: " + cnt) + // scalastyle:on + assert(cnt >= 9, s"Expected row count is 10 while actual row count is $cnt") + + + var cnt1 = 0 + s.execute(s"select * from $tableName -- GEMFIREXD-PROPERTIES executionEngine=Store\n") + val rs1 = s.getResultSet + while (rs1.next) { + cnt1 = cnt1 + 1 + // scalastyle:off + println("Resultset from row buffer: " + rs1.getInt(1)) + // scalastyle:on + } + // scalastyle:off + println("Row count before creating the cachebatch in row buffer: " + cnt1) + assert(cnt1 >= 9, s"Expected row count is 10 while actual row count is $cnt1") + + var cnt2 = 0 + s.execute(s"select * from " + + ColumnFormatRelation.columnBatchTableName("APP.TESTTABLE") + + s" -- GEMFIREXD-PROPERTIES executionEngine=Store\n") + val rs2 = s.getResultSet + while (rs2.next) { + cnt2 = cnt2 + 1 + } + // scalastyle:off + println("Row count before creating the cachebatch in column store: " + cnt2) + // scalastyle:on + assert(cnt2 == 0, s"Expected row count is 0 while actual row count is $cnt2") + + cache.notifyRvvSnapshotTestHook() + + + var cnt3 = 0; + s.execute(s"select * from $tableName -- GEMFIREXD-PROPERTIES executionEngine=Store\n") + val rs3 = s.getResultSet + while (rs3.next) { + cnt3 = cnt3 + 1 + } + // scalastyle:off + println("Row count in row buffer after destroy all entries from row buffer but no commit : " + cnt3) + // scalastyle:on + assert(cnt3 >= 9, s"Expected row count is 10 while actual row count is $cnt3") + + + // The number of entries in column store is 4 as after + // columnwise storage 3 rows will be created one for each + // column and 4th row is for stats + + ClusterManagerTestBase.waitForCriterion({ + var cnt4 = 0 + s.execute(s"select * from " + + ColumnFormatRelation.columnBatchTableName("APP.TESTTABLE") + + s" -- GEMFIREXD-PROPERTIES executionEngine=Store\n") + val rs4 = s.getResultSet + while (rs4.next) { + cnt4 = cnt4 + 1 + } + // scalastyle:off + println("Row count in column store after destroy all entries from row buffer " + + "and reinitialize snapshot : " + cnt4) + // scalastyle:on + cnt4 == 0 + }, "Row count not 0 even after rollback ", 30000, 500, true) + } + + def performBatchInsert(netPort: Int): Unit = { + + val driver = "io.snappydata.jdbc.ClientDriver" + Utils.classForName(driver).newInstance + var url: String = null + + url = "jdbc:snappydata://localhost:" + netPort + "/" + + val tableName: String = "APP.TESTTABLE" + val conn = DriverManager.getConnection(url) + val prepareStatement = conn.prepareStatement(s"insert into $tableName values(?,?,?)") + + val s = conn.createStatement() + for (i <- 1 to 100) { + prepareStatement.setInt(1, i) + prepareStatement.setInt(2, i + 1) + prepareStatement.setInt(3, i + 2) + prepareStatement.addBatch() + } + prepareStatement.executeBatch() + + s.execute(s"select * from $tableName") + var cnt = 0 + val rs = s.getResultSet + while (rs.next) { + cnt = cnt + 1 + } + assert(cnt == 100, s"Expected row count is 100 while actual row count is $cnt") + + s.execute(s"drop table if exists $tableName") + + } + + def performMixOperationsOnRowTable(netPort: Int): Unit = { + + + val driver = "io.snappydata.jdbc.ClientDriver" + Utils.classForName(driver).newInstance + var url: String = null + + url = "jdbc:snappydata://localhost:" + netPort + "/" + + val tableName: String = "APP.TESTTABLE" + val conn = DriverManager.getConnection(url) + val s = conn.createStatement() + + for (i <- 1 to 5) { + s.executeUpdate(s"insert into $tableName values($i,'${i + 1}',${i + 2})") + } + + s.execute(s"select * from $tableName") + var cnt = 0 + val rs = s.getResultSet + while (rs.next) { + cnt = cnt + 1 + } + + assert(cnt == 5, s"Expected row count is 5 while actual row count is $cnt") + + + s.executeUpdate(s"update $tableName set col3=1 where col1>2") + + s.execute(s"select * from $tableName where col3=1") + cnt = 0 + val rs1 = s.getResultSet + while (rs1.next) { + cnt = cnt + 1 + } + assert(cnt == 3, s"Expected row count is 3 while actual row count is $cnt") + + + s.executeUpdate(s"delete from $tableName where col3=1") + + s.execute(s"select * from $tableName") + cnt = 0 + val rs2 = s.getResultSet + while (rs2.next) { + cnt = cnt + 1 + } + assert(cnt == 2, s"Expected row count is 1 while actual row count is $cnt") + + s.execute(s"drop table if exists $tableName") + + } + + def clearTestHook(netPort: Int): Unit = { + + val cache = GemFireCacheImpl.getInstance() + if (null != cache) { + cache.setRvvSnapshotTestHook(null) + cache.getCacheTransactionManager.testRollBack = false; + } + } + +} diff --git a/cluster/src/dunit/scala/io/snappydata/cluster/jobs/CassandraSnappyConnectionJob.scala b/cluster/src/dunit/scala/io/snappydata/cluster/jobs/CassandraSnappyConnectionJob.scala new file mode 100644 index 0000000000..d2879c3369 --- /dev/null +++ b/cluster/src/dunit/scala/io/snappydata/cluster/jobs/CassandraSnappyConnectionJob.scala @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ +package io.snappydata.cluster.jobs + +import com.typesafe.config.Config + +import org.apache.spark.sql.{SaveMode, SnappyJobValid, SnappyJobValidation, SnappySQLJob, SnappySession} + +object CassandraSnappyConnectionJob extends SnappySQLJob { + + override def isValidJob(sc: SnappySession, config: Config): SnappyJobValidation = SnappyJobValid() + + override def runSnappyJob(sc: SnappySession, jobConfig: Config): Any = { + + // scalastyle:off println + val df = sc.read.format("org.apache.spark.sql.cassandra"). + options(Map("table" -> "customer", "keyspace" -> "test")).load + df.write.format("column").mode(SaveMode.Overwrite).saveAsTable("CUSTOMER") + val showDF = sc.sql("select * from CUSTOMER") + assert(showDF.count == 3, "Number of rows = " + showDF.count()) + assert(showDF.schema.fields.length == 4, "Number of columns = " + showDF.schema.fields.length) + } +} diff --git a/cluster/src/dunit/scala/io/snappydata/cluster/jobs/SNAP3028TestJob.scala b/cluster/src/dunit/scala/io/snappydata/cluster/jobs/SNAP3028TestJob.scala new file mode 100644 index 0000000000..032991c9b9 --- /dev/null +++ b/cluster/src/dunit/scala/io/snappydata/cluster/jobs/SNAP3028TestJob.scala @@ -0,0 +1,45 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.snappydata.cluster.jobs + +import com.typesafe.config.Config + +import org.apache.spark.sql.{SnappyJobValid, SnappyJobValidation, SnappySQLJob, SnappySession} + +object SNAP3028TestJob extends SnappySQLJob { + override def isValidJob(sc: SnappySession, config: Config): SnappyJobValidation = + SnappyJobValid() + + override def runSnappyJob(snappy: SnappySession, jobConfig: Config): Any = { + snappy.sql("create table users (id long, name string) using column") + try { + snappy.sql("insert into users values (1, 'name1')") + println("Job strted:") + for(i <- 0 to 3) { + val start = System.nanoTime() + val df = snappy.table("users") + import snappy.implicits._ + df.as[User].collectAsList() + println("Time taken:" + (System.nanoTime() - start)) + } + } finally { + snappy.sql("drop table users") + } + } +} + +case class User(id: Long, name: String) \ No newline at end of file diff --git a/cluster/src/dunit/scala/io/snappydata/cluster/jobs/SnappyJavaSecureJob.java b/cluster/src/dunit/scala/io/snappydata/cluster/jobs/SnappyJavaSecureJob.java new file mode 100644 index 0000000000..7f7fee7bcd --- /dev/null +++ b/cluster/src/dunit/scala/io/snappydata/cluster/jobs/SnappyJavaSecureJob.java @@ -0,0 +1,22 @@ +package io.snappydata.cluster.jobs; + +import com.typesafe.config.Config; +import org.apache.spark.sql.JavaSnappySQLJob; +import org.apache.spark.sql.SnappyJobValid; +import org.apache.spark.sql.SnappyJobValidation; +import org.apache.spark.sql.SnappySession; + +public class SnappyJavaSecureJob extends JavaSnappySQLJob { + + @Override + public SnappyJobValidation isValidJob(SnappySession sc, Config config) { + SnappyStreamingSecureJob.verifySessionAndConfig(sc, config); + return new SnappyJobValid(); + } + + @Override + public Object runSnappyJob(SnappySession sc, Config jobConfig) { + SnappyStreamingSecureJob.verifySessionAndConfig(sc, jobConfig); + return "done"; + } +} diff --git a/cluster/src/dunit/scala/io/snappydata/cluster/jobs/SnappyJavaStreamingSecureJob.java b/cluster/src/dunit/scala/io/snappydata/cluster/jobs/SnappyJavaStreamingSecureJob.java new file mode 100644 index 0000000000..37a43272e2 --- /dev/null +++ b/cluster/src/dunit/scala/io/snappydata/cluster/jobs/SnappyJavaStreamingSecureJob.java @@ -0,0 +1,21 @@ +package io.snappydata.cluster.jobs; + +import com.typesafe.config.Config; +import org.apache.spark.sql.SnappyJobValid; +import org.apache.spark.sql.SnappyJobValidation; +import org.apache.spark.streaming.JavaSnappyStreamingJob; +import org.apache.spark.streaming.api.java.JavaSnappyStreamingContext; + +public class SnappyJavaStreamingSecureJob extends JavaSnappyStreamingJob { + @Override + public Object runSnappyJob(JavaSnappyStreamingContext snc, Config jobConfig) { + SnappyStreamingSecureJob.verifySessionAndConfig(snc.snappySession(), jobConfig); + return "done"; + } + + @Override + public SnappyJobValidation isValidJob(JavaSnappyStreamingContext snc, Config jobConfig) { + SnappyStreamingSecureJob.verifySessionAndConfig(snc.snappySession(), jobConfig); + return new SnappyJobValid(); + } +} diff --git a/cluster/src/dunit/scala/io/snappydata/cluster/jobs/SnappySecureJob.scala b/cluster/src/dunit/scala/io/snappydata/cluster/jobs/SnappySecureJob.scala new file mode 100644 index 0000000000..8dab562fb3 --- /dev/null +++ b/cluster/src/dunit/scala/io/snappydata/cluster/jobs/SnappySecureJob.scala @@ -0,0 +1,274 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ + +package io.snappydata.cluster.jobs + +import java.io.{FileOutputStream, PrintWriter} + +import com.pivotal.gemfirexd.Attribute +import com.typesafe.config.{Config, ConfigException} +import io.snappydata.{Constant, ServiceManager} +import io.snappydata.impl.LeadImpl +import org.apache.spark.SparkCallbacks +import org.apache.spark.sql.types.{DecimalType, IntegerType, StructField, StructType} +import org.apache.spark.sql._ +import org.apache.spark.sql.streaming.SnappyStreamingJob +import org.apache.spark.streaming.SnappyStreamingContext +import org.apache.spark.ui.SnappyBasicAuthenticator + +// scalastyle:off println +class SnappySecureJob extends SnappySQLJob { + private val colTable = "JOB_COLTABLE" + private val rowTable = "JOB_ROWTABLE" + private var pw: PrintWriter = _ + + // Job config names + val outputFile = "output.file" + val opCode = "op.code" + val otherColTabName = "other.columntable" + val otherRowTabName = "other.rowtable" + + case class Data(PS_PARTKEY: Int, PS_SUPPKEY: Int, + PS_AVAILQTY: Int, PS_SUPPLYCOST: BigDecimal) + + def getCurrentDirectory: String = new java.io.File(".").getCanonicalPath + + override def runSnappyJob(snSession: SnappySession, jobConfig: Config): Any = { + val file = jobConfig.getString(outputFile) + val msg = s"\nCheck ${getCurrentDirectory}/$file file for output of this job" + pw = new PrintWriter(new FileOutputStream(file), true) + try { + SnappyStreamingSecureJob.verifySessionAndConfig(snSession, jobConfig) + if (jobConfig.getString(opCode).equalsIgnoreCase("sqlOps")) { + createPartitionedRowTableUsingSQL(snSession) + createPartitionedRowTableUsingAPI(snSession) + } else { + accessAndModifyTablesOwnedByOthers(snSession, jobConfig) + } + // Confirm that our zeppelin interpreter is not initialized. + assert(ServiceManager.getLeadInstance.asInstanceOf[LeadImpl].getInterpreterServerClass == + null, "Zeppelin interpreter must not be initialized in secure cluster") + // Check SnappyData Pulse UI is secured by our custom authenticator. + assert(SparkCallbacks.getAuthenticatorForJettyServer().get + .isInstanceOf[SnappyBasicAuthenticator], "SnappyData Pulse UI not secured") + pw.println(msg) + } finally { + pw.close() + } + msg + } + + override def isValidJob(sc: SnappySession, config: Config): SnappyJobValidation = { + SnappyStreamingSecureJob.verifySessionAndConfig(sc, config) + SnappyJobValid() + } + + + def createPartitionedRowTableUsingAPI(snSession: SnappySession): Unit = { + pw.println() + pw.println(s"Creating tables $colTable and $rowTable using API") + + // drop the table if it exists + snSession.dropTable(colTable, ifExists = true) + snSession.dropTable(rowTable, ifExists = true) + + val schema = StructType(Array(StructField("PS_PARTKEY", IntegerType, false), + StructField("S_SUPPKEY", IntegerType, false), + StructField("PS_AVAILQTY", IntegerType, false), + StructField("PS_SUPPLYCOST", DecimalType(15, 2), false) + )) + + val props1 = Map("PARTITION_BY" -> "PS_PARTKEY") + snSession.createTable(colTable, "column", schema, props1) + snSession.createTable(rowTable, "row", schema, props1) + + val data = Seq(Seq(100, 1, 5000, BigDecimal(100)), + Seq(200, 2, 50, BigDecimal(10)), + Seq(300, 3, 1000, BigDecimal(20)), + Seq(400, 4, 200, BigDecimal(30)) + ) + val rdd = snSession.sparkContext.parallelize(data, + data.length).map(s => new Data(s(0).asInstanceOf[Int], + s(1).asInstanceOf[Int], + s(2).asInstanceOf[Int], + s(3).asInstanceOf[BigDecimal])) + + val dataDF = snSession.createDataFrame(rdd) + pw.println(s"Inserting data in $colTable table") + dataDF.write.insertInto(colTable) + pw.println(s"Inserting data in $rowTable table") + dataDF.write.insertInto(rowTable) + + pw.println(s"Printing the contents of the $colTable table") + var tableData = snSession.sql(s"SELECT * FROM $colTable").collect() + tableData.foreach(pw.println) + pw.println(s"Printing the contents of the $rowTable table") + tableData = snSession.sql(s"SELECT * FROM $rowTable").collect() + tableData.foreach(pw.println) + + pw.println() + pw.println("Update the available quantity for PARTKEY 100") + snSession.update(rowTable, "PS_PARTKEY = 100", Row(50000), "PS_AVAILQTY") + + pw.println(s"Printing the contents of the $rowTable table after update") + tableData = snSession.sql(s"SELECT * FROM $rowTable").collect() + tableData.foreach(pw.println) + + pw.println() + pw.println("Delete the records for PARTKEY 400") + snSession.delete(rowTable, "PS_PARTKEY = 400") + + pw.println(s"Printing the contents of the $rowTable table after delete") + tableData = snSession.sql(s"SELECT * FROM $rowTable").collect() + tableData.foreach(pw.println) + + pw.println("****Done****") + } + + def createPartitionedRowTableUsingSQL(snSession: SnappySession): Unit = { + Map(colTable -> "column", rowTable -> "row").foreach(e => createPartitionedTableUsingSQL + (snSession, e._1, e._2)) + } + + def createPartitionedTableUsingSQL(snSession: SnappySession, table: String, tableType: String): + Unit = { + pw.println() + pw.println(s"****Creating a partitioned table($table) using SQL****") + + snSession.sql(s"DROP TABLE IF EXISTS $table") + + val pk = if (tableType.equalsIgnoreCase("row")) "PRIMARY KEY" else "" + snSession.sql(s"CREATE TABLE $table ( " + + s"PS_PARTKEY INTEGER NOT NULL $pk," + + "PS_SUPPKEY INTEGER NOT NULL," + + "PS_AVAILQTY INTEGER NOT NULL," + + "PS_SUPPLYCOST DECIMAL(15,2) NOT NULL)" + + s"USING $tableType OPTIONS (PARTITION_BY 'PS_PARTKEY' )") + + // insert some data in it + pw.println() + pw.println(s"Inserting data in $table table") + snSession.sql(s"INSERT INTO $table VALUES(100, 1, 5000, 100)") + snSession.sql(s"INSERT INTO $table VALUES(200, 2, 50, 10)") + snSession.sql(s"INSERT INTO $table VALUES(300, 3, 1000, 20)") + snSession.sql(s"INSERT INTO $table VALUES(400, 4, 200, 30)") + + pw.println(s"Printing the contents of the $table table") + var tableData = snSession.sql(s"SELECT * FROM $table").collect() + tableData.foreach(pw.println) + + pw.println() + pw.println("Update the available quantity for PARTKEY 100") + snSession.sql(s"UPDATE $table SET PS_AVAILQTY = 50000 WHERE PS_PARTKEY = 100") + + pw.println(s"Printing the contents of the $table table after update") + tableData = snSession.sql(s"SELECT * FROM $table").collect() + tableData.foreach(pw.println) + + pw.println() + pw.println("Delete the records for PARTKEY 400") + snSession.sql(s"DELETE FROM $table WHERE PS_PARTKEY = 400") + + pw.println(s"Printing the contents of the $table table after delete") + tableData = snSession.sql(s"SELECT * FROM $table").collect() + tableData.foreach(pw.println) + + pw.println("****Done****") + } + + def accessAndModifyTablesOwnedByOthers(sns: SnappySession, config: Config): Unit = { + pw.println() + pw.println("****Accessing other user's tables****") + + val otherColTab = config.getString(otherColTabName) + val otherRowTab = config.getString(otherRowTabName) + val grantedOp = config.getString(opCode) + + val opCodeToSQLs = Map("select" -> Seq(s"SELECT * FROM $otherColTab", + s"SELECT * FROM $otherRowTab"), + "insert" -> Seq(s"INSERT INTO $otherColTab VALUES (1, 'ONE', 1.1)", + s"INSERT INTO $otherRowTab VALUES (1, 'ONE', 1.1)"), + "update" -> Seq(s"UPDATE $otherColTab SET COL1 = 100 WHERE COL1 = 1", + s"UPDATE $otherRowTab SET COL1 = 100 WHERE COL1 = 1"), + "delete" -> Seq(s"DELETE FROM $otherColTab WHERE COL1 = 100", + s"DELETE FROM $otherRowTab WHERE COL1 = 100")) + + grantedOp match { + case "nogrant" => opCodeToSQLs.keys.foreach(k => opCodeToSQLs(k).foreach(s => + assertGrantRevoke(s, k, granted = false))) + case op: String => opCodeToSQLs(op).foreach(s => assertGrantRevoke(s, op, granted = true)) + } + + def assertGrantRevoke(s: String, op: String, granted: Boolean = false): Unit = { + if (granted) { + sns.sql(s).collect() + pw.println(s"Success for $s") + } else { + try { + sns.sql(s).collect() + assert(false, s"Should have failed $s") + } catch { + case t: Throwable if (t.getMessage.contains(s"does not have ${op.toUpperCase} " + + s"permission on") || + t.getMessage.contains(s"does not have SELECT permission on") || + t.getMessage.contains("42502")) => + pw.println(s"Found expected exception for $s") + // t.getStackTrace.foreach(s => pw.println(s"${t.getMessage}\n ${s.toString}")) + case t: Throwable => pw.println(s"UNEXPECTED ERROR FOR $s:\n[${t.getMessage}]") + t.getStackTrace.foreach(s => pw.println(s" ${s.toString}")) + throw t + } + } + } + + pw.println("****Done****") + } + +} + +object SnappyStreamingSecureJob extends SnappyStreamingJob { + + def verifySessionAndConfig(snSession: SnappySession, jobConfig: Config): Unit = { + assert(snSession.conf.getOption(Attribute.USERNAME_ATTR).isDefined, "Username not set in conf") + assert(snSession.conf.getOption(Attribute.PASSWORD_ATTR).isDefined, "Password not set in conf") + + def checkConfig(configKey: String): Unit = { + try { + jobConfig.getString(configKey) + assert(false, s"Sensitive config $configKey found in job config!") + } catch { + case _: ConfigException.Missing => // expected + } + } + + Seq(Constant.STORE_PROPERTY_PREFIX + com.pivotal.gemfirexd.Attribute.USERNAME_ATTR, + Constant.STORE_PROPERTY_PREFIX + com.pivotal.gemfirexd.Attribute.PASSWORD_ATTR, + com.pivotal.gemfirexd.Attribute.USERNAME_ATTR, + com.pivotal.gemfirexd.Attribute.PASSWORD_ATTR, + "gemfire.sys.security-password", + "javax.jdo.option.ConnectionURL").foreach(checkConfig(_)) + } + + override def isValidJob(sc: SnappyStreamingContext, config: Config): SnappyJobValidation = { + verifySessionAndConfig(sc.snappySession, config) + new SnappyJobValid + } + + override def runSnappyJob(sc: SnappyStreamingContext, jobConfig: Config): Any = { + verifySessionAndConfig(sc.snappySession, jobConfig) + } +} \ No newline at end of file diff --git a/cluster/src/dunit/scala/io/snappydata/externalstore/CatalogConsistencyDUnitTest.scala b/cluster/src/dunit/scala/io/snappydata/externalstore/CatalogConsistencyDUnitTest.scala new file mode 100644 index 0000000000..c2b1d566de --- /dev/null +++ b/cluster/src/dunit/scala/io/snappydata/externalstore/CatalogConsistencyDUnitTest.scala @@ -0,0 +1,307 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ +package io.snappydata.externalstore + +import java.sql.{Connection, DriverManager, SQLException} + +import io.snappydata.cluster.ClusterManagerTestBase +import io.snappydata.test.dunit.AvailablePortHelper + +import org.apache.spark.sql.execution.columnar.impl.ColumnFormatRelation +import org.apache.spark.sql.sources.JdbcExtendedUtils +import org.apache.spark.sql.{AnalysisException, SaveMode, SnappyContext, TableNotFoundException} + +/** + * Some basic tests to detect catalog inconsistency and repair it + */ +class CatalogConsistencyDUnitTest(s: String) extends ClusterManagerTestBase(s) { + + private def getClientConnection(netPort: Int, + routeQuery: Boolean = true): Connection = { + val driver = "io.snappydata.jdbc.ClientDriver" + Class.forName(driver).newInstance //scalastyle:ignore + var url: String = null + if (!routeQuery) { + url = "jdbc:snappydata://localhost:" + netPort + "/route-query=false" + } else { + url = "jdbc:snappydata://localhost:" + netPort + "/" + } + + DriverManager.getConnection(url) + } + + private def createTables(snc: SnappyContext): Unit = { + val props = Map("PERSISTENT" -> "sync") + + val data = Seq(Seq(1, 2, 3), Seq(7, 8, 9), Seq(9, 2, 3), Seq(4, 2, 3), Seq(5, 6, 7)) + val rdd = sc.parallelize(data, data.length).map(s => new Data(s(0), s(1), s(2))) + val dataDF = snc.createDataFrame(rdd) + + snc.createTable("column_table1", "column", dataDF.schema, props) + snc.createTable("column_table2", "column", dataDF.schema, props) + dataDF.write.format("column").mode(SaveMode.Append).options(props).saveAsTable("column_table2") + + snc.sql("create stream table tweetsTable (id long, text string, fullName string, " + + "country string, retweets int, hashtag string) using twitter_stream options (" + + "consumerKey '0Xo8rg3W0SOiqu14HZYeyFPZi', " + + "consumerSecret 'gieTDrdzFS4b1g9mcvyyyadOkKoHqbVQALoxfZ19eHJzV9CpLR', " + + "accessToken '43324358-0KiFugPFlZNfYfib5b6Ah7c2NdHs1524v7LM2qaUq', " + + "accessTokenSecret 'aB1AXHaRiE3g2d7tLgyASdgIg9J7CzbPKBkNfvK8Y88bu', " + + "rowConverter 'io.snappydata.streaming.TweetToRowsConverter')") + } + + // assert that table is not in Hive catalog and store DD + private def assertTableDoesNotExist(netPort1: Int, snc: SnappyContext): Any = { + try { + // table should not exist in the Hive catalog + snc.snappySession.sessionCatalog.lookupRelation( + snc.snappySession.tableIdentifier("column_table1")) + } catch { + case t: TableNotFoundException => // expected exception + case unknown: Throwable => throw unknown + } + + val routeQueryDisabledConn = getClientConnection(netPort1, false) + try { + // make sure that the column buffer does not exist + routeQueryDisabledConn.createStatement().executeQuery( + "select * from " + ColumnFormatRelation.columnBatchTableName("app.column_table1")) + } catch { + case se: SQLException if (se.getSQLState.equals("42X05")) => + case unknown: Throwable => throw unknown + } + } + + def verifyTables(snc: SnappyContext): Unit = { + val result = snc.sql("SELECT * FROM column_table2") + assert(result.collect.length == 5) + // below call should not throw an exception + snc.snappySession.sessionCatalog.lookupRelation( + snc.snappySession.tableIdentifier("tweetsTable")) + } + + def testHiveStoreEntryMissingForTable(): Unit = { + val snc = SnappyContext(sc) + + val netPort1 = AvailablePortHelper.getRandomAvailableTCPPort + vm2.invoke(classOf[ClusterManagerTestBase], "startNetServer", netPort1) + + vm1.invoke(classOf[ClusterManagerTestBase], "startNetServer", + AvailablePortHelper.getRandomAvailableTCPPort) + vm0.invoke(classOf[ClusterManagerTestBase], "startNetServer", + AvailablePortHelper.getRandomAvailableTCPPort) + + createTables(snc) + + // remove column_table1 entry from Hive store but not from store DD + snc.snappySession.sessionCatalog.dropTable( + snc.snappySession.tableIdentifier("column_table1"), ignoreIfNotExists = false, purge = false) + + try { + snc.snappySession.sessionCatalog.lookupRelation( + snc.snappySession.tableIdentifier("column_table1")) + } catch { + case t: TableNotFoundException => // expected exception + case unknown: Throwable => throw unknown + } + + val connection = getClientConnection(netPort1) + // repair the catalog + connection.createStatement().execute("CALL SYS.REPAIR_CATALOG('true', 'true')") + // column_table1 should not be found in either catalog after repair + assertTableDoesNotExist(netPort1, snc) + // other tables should exist + verifyTables(snc) + + } + + def testStoreDDEntryMissingForTable(): Unit = { + val snc = SnappyContext(sc) + + val netPort1 = AvailablePortHelper.getRandomAvailableTCPPort + vm2.invoke(classOf[ClusterManagerTestBase], "startNetServer", netPort1) + + vm1.invoke(classOf[ClusterManagerTestBase], "startNetServer", + AvailablePortHelper.getRandomAvailableTCPPort) + vm0.invoke(classOf[ClusterManagerTestBase], "startNetServer", + AvailablePortHelper.getRandomAvailableTCPPort) + + createTables(snc) + + // drop column_table1 from store DD + val routeQueryDisabledConn = getClientConnection(netPort1, false) + routeQueryDisabledConn.createStatement().execute("drop table " + + ColumnFormatRelation.columnBatchTableName("app.column_table1")) + routeQueryDisabledConn.createStatement().execute("drop table column_table1") + + // make sure that the table exists in Hive metastore + assert(JdbcExtendedUtils.tableExistsInMetaData("APP", "COLUMN_TABLE1", routeQueryDisabledConn)) + + val connection = getClientConnection(netPort1) + // repair the catalog + connection.createStatement().execute("CALL SYS.REPAIR_CATALOG('true', 'true')") + // column_table1 should not be found in either catalog after repair + assertTableDoesNotExist(netPort1, snc) + // other tables should exist + verifyTables(snc) + } + + // Hive entry missing but DD entry exists + def testCatalogRepairedWhenLeadStopped1(): Unit = { + val netPort1 = AvailablePortHelper.getRandomAvailableTCPPort + vm2.invoke(classOf[ClusterManagerTestBase], "startNetServer", netPort1) + + var snc = SnappyContext(sc) + + createTables(snc) + // remove column_table1 entry from Hive store but not from store DD + snc.snappySession.sessionCatalog.dropTable( + snc.snappySession.tableIdentifier("column_table1"), ignoreIfNotExists = false, purge = false) + + // stop spark + val sparkContext = SnappyContext.globalSparkContext + if(sparkContext != null) sparkContext.stop() + ClusterManagerTestBase.stopAny() + + val connection = getClientConnection(netPort1) + // repair the catalog + // does not actually repair, just adds warning to log file + connection.createStatement().execute("CALL SYS.REPAIR_CATALOG('false', 'false')") + // actually repair the catalog + connection.createStatement().execute("CALL SYS.REPAIR_CATALOG('true', 'true')") + + ClusterManagerTestBase.startSnappyLead(ClusterManagerTestBase.locatorPort, bootProps) + snc = SnappyContext(sc) + // column_table1 should not be found in either catalog after repair + assertTableDoesNotExist(netPort1, snc) + + // other tables should exist + verifyTables(snc) + } + + // Hive entry exists but DD entry missing + def testCatalogRepairedWhenLeadStopped2(): Unit = { + val netPort1 = AvailablePortHelper.getRandomAvailableTCPPort + vm2.invoke(classOf[ClusterManagerTestBase], "startNetServer", netPort1) + + var snc = SnappyContext(sc) + + createTables(snc) + + // drop column_table1 from store DD + val routeQueryDisabledConn = getClientConnection(netPort1, false) + routeQueryDisabledConn.createStatement().execute("drop table " + + ColumnFormatRelation.columnBatchTableName("app.column_table1")) + routeQueryDisabledConn.createStatement().execute("drop table column_table1") + + // make sure that the table exists in Hive metastore + assert(JdbcExtendedUtils.tableExistsInMetaData("APP", "COLUMN_TABLE1", routeQueryDisabledConn)) + + // stop spark + val sparkContext = SnappyContext.globalSparkContext + if(sparkContext != null) sparkContext.stop() + ClusterManagerTestBase.stopAny() + + val connection = getClientConnection(netPort1) + // repair the catalog + // does not actually repair, just adds warning to log file + connection.createStatement().execute("CALL SYS.REPAIR_CATALOG('false', 'false')") + // actually repair the catalog + connection.createStatement().execute("CALL SYS.REPAIR_CATALOG('true', 'true')") + + ClusterManagerTestBase.startSnappyLead(ClusterManagerTestBase.locatorPort, bootProps) + snc = SnappyContext(sc) + // column_table1 should not be found in either catalog after repair + assertTableDoesNotExist(netPort1, snc) + + // other tables should exist + verifyTables(snc) + } + + + def testConsistencyWithCollocatedTables(): Unit = { + val netPort1 = AvailablePortHelper.getRandomAvailableTCPPort + vm2.invoke(classOf[ClusterManagerTestBase], "startNetServer", netPort1) + + var snc = SnappyContext(sc) + + val baseRowTable = "ORDER_DETAILS_ROW" + val colloactedRowTable = "EXEC_DETAILS_ROW" + + val baseColumnTable = "ORDER_DETAILS_COL" + val colloactedColumnTable = "EXEC_DETAILS_COL" + + + snc.sql(s"create table $baseRowTable(SINGLE_ORDER_DID BIGINT ,SYS_ORDER_ID VARCHAR(64) , " + + "SYS_ORDER_VER INTEGER ,DATA_SNDG_SYS_NM VARCHAR(128)) USING row OPTIONS(BUCKETS '16', " + + "REDUNDANCY '1', EVICTION_BY 'LRUHEAPPERCENT', PERSISTENT 'ASYNCHRONOUS',PARTITION_BY " + + "'SINGLE_ORDER_DID')"); + + + snc.sql(s"create table $colloactedRowTable(EXEC_DID BIGINT,SYS_EXEC_VER INTEGER,SYS_EXEC_ID " + + "VARCHAR (64),TRD_DATE VARCHAR(20),ALT_EXEC_ID VARCHAR(64)) USING row OPTIONS" + + s"(COLOCATE_WITH '$baseRowTable', BUCKETS '16', REDUNDANCY '1', EVICTION_BY " + + "'LRUHEAPPERCENT', PERSISTENT 'ASYNCHRONOUS',PARTITION_BY 'EXEC_DID')"); + + + + snc.sql(s"create table $baseColumnTable(SINGLE_ORDER_DID BIGINT ,SYS_ORDER_ID VARCHAR(64) ," + + s"SYS_ORDER_VER INTEGER ,DATA_SNDG_SYS_NM VARCHAR(128)) USING column OPTIONS(BUCKETS " + + s"'16', REDUNDANCY '1', EVICTION_BY 'LRUHEAPPERCENT', PERSISTENT 'ASYNCHRONOUS'," + + s"PARTITION_BY 'SINGLE_ORDER_DID')"); + + snc.sql(s"create table $colloactedColumnTable(EXEC_DID BIGINT,SYS_EXEC_VER INTEGER," + + s"SYS_EXEC_ID VARCHAR(64),TRD_DATE VARCHAR(20),ALT_EXEC_ID VARCHAR(64)) USING column " + + s"OPTIONS (COLOCATE_WITH '$baseColumnTable', BUCKETS '16', REDUNDANCY '1', EVICTION_BY " + + s"'LRUHEAPPERCENT', PERSISTENT 'ASYNCHRONOUS',PARTITION_BY 'EXEC_DID')"); + + try { + // This should throw an exception + snc.sql(s"drop table $baseRowTable") + assert(assertion = false, "expected the drop to fail") + } catch { + case ae: AnalysisException => + // Expected Exception and assert message + assert(ae.getMessage.contains("app.order_details_row cannot be dropped because of " + + "dependent objects: app.exec_details_row")) + } + + // stop spark + val sparkContext = SnappyContext.globalSparkContext + if (sparkContext != null) sparkContext.stop() + + ClusterManagerTestBase.stopAny() + ClusterManagerTestBase.startSnappyLead(ClusterManagerTestBase.locatorPort, bootProps) + snc = SnappyContext(sc) + try { + // This should throw an exception + snc.sql(s"drop table $baseRowTable") + assert(assertion = false, "expected the drop to fail") + } catch { + case ae: AnalysisException => + // Expected Exception and assert message + assert(ae.getMessage.contains("app.order_details_row cannot be dropped because of " + + "dependent objects: app.exec_details_row")) + } + + snc.sql(s"drop table $colloactedColumnTable") + snc.sql(s"drop table $baseColumnTable") + + snc.sql(s"drop table $colloactedRowTable") + snc.sql(s"drop table $baseRowTable") + } +} diff --git a/cluster/src/dunit/scala/io/snappydata/externalstore/ColumnTableDUnitTest.scala b/cluster/src/dunit/scala/io/snappydata/externalstore/ColumnTableDUnitTest.scala new file mode 100644 index 0000000000..95abc273c4 --- /dev/null +++ b/cluster/src/dunit/scala/io/snappydata/externalstore/ColumnTableDUnitTest.scala @@ -0,0 +1,724 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ +package io.snappydata.externalstore + +import java.io.File + +import scala.collection.JavaConverters._ +import scala.util.Random + +import com.gemstone.gemfire.internal.cache.PartitionedRegion +import com.pivotal.gemfirexd.internal.engine.Misc +import io.snappydata.cluster.ClusterManagerTestBase +import io.snappydata.test.dunit.{SerializableCallable, SerializableRunnable} +import org.apache.commons.io.FileUtils + +import org.apache.spark.sql.execution.columnar.impl.ColumnFormatRelation +import org.apache.spark.sql.{Row, SaveMode, SnappyContext} + +// scalastyle:off println +/** + * Some basic column table tests. + */ +// noinspection ZeroIndexToHead +class ColumnTableDUnitTest(s: String) extends ClusterManagerTestBase(s) { + + private val currentLocatorPort = ClusterManagerTestBase.locPort + + def testTableCreation(): Unit = { + startSparkJob() + Array(vm0, vm1, vm2).foreach(_.invoke(classOf[ClusterManagerTestBase], + "validateNoActiveSnapshotTX")) + } + + def testTableCreationWithHA(): Unit = { + val tableName = "TestTable" + val snc = SnappyContext(sc) + + createTable(snc, tableName, Map("BUCKETS" -> "1", "PERSISTENT" -> "async")) + verifyTableData(snc, tableName) + + vm2.invoke(classOf[ClusterManagerTestBase], "stopAny") + + val props = bootProps + val port = currentLocatorPort + + val restartServer = new SerializableRunnable() { + override def run(): Unit = ClusterManagerTestBase.startSnappyServer(port, props) + } + + vm2.invoke(restartServer) + + verifyTableData(snc, tableName) + dropTable(snc, tableName) + Array(vm0, vm1, vm2).foreach(_.invoke(classOf[ClusterManagerTestBase], + "validateNoActiveSnapshotTX")) + } + + def testCreateInsertAndDropOfTable(): Unit = { + startSparkJob2() + Array(vm0, vm1, vm2).foreach(_.invoke(classOf[ClusterManagerTestBase], + "validateNoActiveSnapshotTX")) + } + + def testCreateInsertAndDropOfTableProjectionQuery(): Unit = { + startSparkJob3() + Array(vm0, vm1, vm2).foreach(_.invoke(classOf[ClusterManagerTestBase], + "validateNoActiveSnapshotTX")) + } + + def testCreateInsertAndDropOfTableWithPartition(): Unit = { + startSparkJob4() + Array(vm0, vm1, vm2).foreach(_.invoke(classOf[ClusterManagerTestBase], + "validateNoActiveSnapshotTX")) + } + + def testCreateInsertAPI(): Unit = { + startSparkJob5() + Array(vm0, vm1, vm2).foreach(_.invoke(classOf[ClusterManagerTestBase], + "validateNoActiveSnapshotTX")) + } + + def testCreateAndSingleInsertAPI(): Unit = { + startSparkJob6() + Array(vm0, vm1, vm2).foreach(_.invoke(classOf[ClusterManagerTestBase], + "validateNoActiveSnapshotTX")) + } + + def testCreateAndInsertCLOB(): Unit = { + startSparkJob7() + Array(vm0, vm1, vm2).foreach(_.invoke(classOf[ClusterManagerTestBase], + "validateNoActiveSnapshotTX")) + } + + + // changing the test to such that batches are created + // and looking for column table stats + // Disabled for now. See SNAP-1353. + def disabled_testSNAP205_InsertLocalBuckets(): Unit = { + val snc = SnappyContext(sc) + + var data = Seq(Seq(1, 2, 3), Seq(7, 8, 9), Seq(9, 2, 3), + Seq(4, 2, 3), Seq(5, 6, 7), Seq(2, 8, 3), Seq(3, 9, 0)) + 1 to 1000 foreach { _ => + data = data :+ Seq.fill(3)(Random.nextInt) + } + val rdd = sc.parallelize(data, data.length).map( + s => Data(s(0), s(1), s(2))) + + val dataDF = snc.createDataFrame(rdd) + + // Now column table with partition only can expect + // local insertion. After Suranjan's change we can expect + // column batches to inserted locally if no partitioning is given. + // TDOD : Merge and validate test after SNAP-105 + val p = Map[String, String]("PARTITION_BY" -> "col1") + snc.createTable(tableName, "column", dataDF.schema, p) + + // we don't expect any increase in put distribution stats + val columnTableRegionName = ColumnFormatRelation. + columnBatchTableName("APP." + tableName).toUpperCase + val getPRMessageCount = new SerializableCallable[AnyRef] { + override def call(): AnyRef = { + Int.box(Misc.getRegionForTable(columnTableRegionName, true). + asInstanceOf[PartitionedRegion].getPrStats.getPartitionMessagesSent) + } + } + val counts = Array(vm0, vm1, vm2).map(_.invoke(getPRMessageCount)) + dataDF.write.mode(SaveMode.Append).saveAsTable(tableName) + val newCounts = Array(vm0, vm1, vm2).map(_.invoke(getPRMessageCount)) + newCounts.zip(counts).foreach { case (c1, c2) => + assert(c1 == c2, s"newCount=$c1 count=$c2") + } + + val result = snc.sql("SELECT * FROM " + tableName) + val r = result.collect() + assert(r.length == 1007, s"Unexpected elements ${r.length}, expected=1007") + + snc.dropTable(tableName, ifExists = true) + getLogWriter.info("Successful") + } + + // changing the test to such that batches are created + // and looking for column table stats + // Disabled for now. See SNAP-1353. + def disabled_testSNAP205_InsertLocalBucketsNonPartitioning(): Unit = { + val snc = SnappyContext(sc) + + var data = Seq(Seq(1, 2, 3), Seq(7, 8, 9), Seq(9, 2, 3), + Seq(4, 2, 3), Seq(5, 6, 7), Seq(2, 8, 3), Seq(3, 9, 0), Seq(3, 9, 3)) + 1 to 1000 foreach { _ => + data = data :+ Seq.fill(3)(Random.nextInt) + } + val rdd = sc.parallelize(data, 3).map( + s => Data(s(0), s(1), s(2))) + + val dataDF = snc.createDataFrame(rdd) + + // Now column table with partition only can expect + // local insertion. After Suranjan's change we can expect + // column batches to inserted locally if no partitioning is given. + + // For COLUMNTABLE, there will be distribution for the messages beyond + // column batches. + + // TDOD : Merge and validate test after SNAP-105 + val p = Map.empty[String, String] + snc.createTable(tableName, "column", dataDF.schema, p) + val columnTableRegionName = ColumnFormatRelation. + columnBatchTableName("APP." + tableName).toUpperCase + // we don't expect any increase in put distribution stats + val getPRMessageCount = new SerializableCallable[AnyRef] { + override def call(): AnyRef = { + Int.box(Misc.getRegionForTable(columnTableRegionName, true). + asInstanceOf[PartitionedRegion].getPrStats.getPartitionMessagesSent) + } + } + val counts = Array(vm0, vm1, vm2).map(_.invoke(getPRMessageCount)) + dataDF.write.mode(SaveMode.Append).saveAsTable(tableName) + val newCounts = Array(vm0, vm1, vm2).map(_.invoke(getPRMessageCount)) + newCounts.zip(counts).foreach { case (c1, c2) => + assert(c1 == c2, s"newCount=$c1 count=$c2") + } + + val result = snc.sql("SELECT * FROM " + tableName) + val r = result.collect() + assert(r.length == 1008, s"Unexpected elements ${r.length}, expected=1008") + + snc.dropTable(tableName, ifExists = true) + getLogWriter.info("Successful") + } + + // changing the test to such that batches are created + // and looking for column table stats + def testSNAP365_FetchRemoteBucketEntries(): Unit = { + val snc = SnappyContext(sc) + + var data = Seq(Seq(1, 2, 3), Seq(7, 8, 9), Seq(9, 2, 3), + Seq(4, 2, 3), Seq(5, 6, 7), Seq(2, 8, 3), Seq(3, 9, 0), Seq(3, 9, 3)) + 1 to 1000 foreach { _ => + data = data :+ Seq.fill(3)(Random.nextInt) + } + val rdd = sc.parallelize(data, 3).map( + s => Data(s(0), s(1), s(2))) + + val dataDF = snc.createDataFrame(rdd) + + val p = Map.empty[String, String] + snc.createTable(tableName, "column", dataDF.schema, p) + + val tName = ColumnFormatRelation.columnBatchTableName("APP." + tableName.toUpperCase()) + // we don't expect any increase in put distribution stats + val getTotalEntriesCount = new SerializableCallable[AnyRef] { + override def call(): AnyRef = { + val pr: PartitionedRegion = + Misc.getRegionForTable(tName, true).asInstanceOf[PartitionedRegion] + var buckets = Set.empty[Integer] + 0 until pr.getTotalNumberOfBuckets foreach { x => + buckets = buckets + x + } + val iter = pr.getAppropriateLocalEntriesIterator( + buckets.asJava, false, false, true, pr, true) + var count = 0 + while (iter.hasNext) { + iter.next + count = count + 1 + } + println("The total count is " + count) + Int.box(count) + } + } + + val getLocalEntriesCount = new SerializableCallable[AnyRef] { + override def call(): AnyRef = { + val pr: PartitionedRegion = + Misc.getRegionForTable(tName, true).asInstanceOf[PartitionedRegion] + val iter = pr.getAppropriateLocalEntriesIterator( + pr.getDataStore.getAllLocalBucketIds, false, false, true, pr, false) + var count = 0 + while (iter.hasNext) { + iter.next + count = count + 1 + } + Int.box(count) + } + } + + dataDF.write.mode(SaveMode.Append).saveAsTable(tableName) + val totalCounts = Array(vm0, vm1, vm2).map(_.invoke(getTotalEntriesCount).asInstanceOf[Int]) + assert(totalCounts(0) == totalCounts(1)) + assert(totalCounts(0) == totalCounts(2)) + + val localCounts = Array(vm0, vm1, vm2).map(_.invoke(getLocalEntriesCount).asInstanceOf[Int]) + + assert(totalCounts(0) == localCounts.sum) + + val result = snc.sql("SELECT * FROM " + tableName) + val r = result.collect() + assert(r.length == 1008, s"Unexpected elements ${r.length}, expected=1008") + + snc.dropTable(tableName, ifExists = true) + Array(vm0, vm1, vm2).foreach(_.invoke(classOf[ClusterManagerTestBase], + "validateNoActiveSnapshotTX")) + getLogWriter.info("Successful") + } + + private val tableName: String = "ColumnTable" + private val tableNameWithPartition: String = "ColumnTablePartition" + + val props = Map.empty[String, String] + + def startSparkJob(): Unit = { + val snc = SnappyContext(sc) + createTable(snc) + verifyTableData(snc) + dropTable(snc) + getLogWriter.info("Successful") + } + + def createTable(snc: SnappyContext, + tableName: String = tableName, + props: Map[String, String] = props): Unit = { + val data = Seq(Seq(1, 2, 3), Seq(7, 8, 9), Seq(9, 2, 3), Seq(4, 2, 3), Seq(5, 6, 7)) + val rdd = sc.parallelize(data, data.length).map(s => Data(s(0), s(1), s(2))) + val dataDF = snc.createDataFrame(rdd) + snc.createTable(tableName, "column", dataDF.schema, props) + dataDF.write.format("column").mode(SaveMode.Append).saveAsTable(tableName) + } + + def verifyTableData(snc: SnappyContext, tableName: String = tableName): Unit = { + val result = snc.sql("SELECT * FROM " + tableName) + val r = result.collect() + assert(r.length == 5, s"Unexpected elements ${r.length}, expected=5") + } + + def dropTable(snc: SnappyContext, tableName: String = tableName): Unit = { + snc.dropTable(tableName, ifExists = true) + } + + def startSparkJob2(): Unit = { + val snc = SnappyContext(sc) + + var data = Seq(Seq(1, 2, 3), Seq(7, 8, 9), Seq(9, 2, 3), Seq(4, 2, 3), Seq(5, 6, 7)) + 1 to 1000 foreach { _ => + data = data :+ Seq.fill(3)(Random.nextInt) + } + + val rdd = sc.parallelize(data, data.length).map(s => Data(s(0), s(1), s(2))) + val dataDF = snc.createDataFrame(rdd) + + snc.createTable(tableName, "column", dataDF.schema, props) + + dataDF.write.format("column").mode(SaveMode.Append) + .options(props).saveAsTable(tableName) + + val result = snc.sql("SELECT * FROM " + tableName) + val r = result.collect() + + assert(r.length == 1005, s"Unexpected elements ${r.length}, expected=1005") + + val region = Misc.getRegionForTable(s"APP.${tableName.toUpperCase()}", + true).asInstanceOf[PartitionedRegion] + val shadowRegion = Misc.getRegionForTable(ColumnFormatRelation.columnBatchTableName( + "APP." + tableName).toUpperCase(), true).asInstanceOf[PartitionedRegion] + + println("startSparkJob2 " + region.size()) + + println("startSparkJob2 " + shadowRegion.size()) + + assert(shadowRegion.size() == 0) + + snc.dropTable(tableName, ifExists = true) + getLogWriter.info("Successful") + } + + def startSparkJob3(): Unit = { + val snc = org.apache.spark.sql.SnappyContext(sc) + + snc.sql(s"CREATE TABLE $tableNameWithPartition(Col1 INT ,Col2 INT, Col3 INT)" + + "USING column " + + "options " + + "(" + + "BUCKETS '1'," + + "REDUNDANCY '0')") + + var data = Seq(Seq(1, 2, 3), Seq(7, 8, 9), Seq(9, 2, 3), Seq(4, 2, 3), Seq(5, 6, 7)) + 1 to 1000 foreach { _ => + data = data :+ Seq.fill(3)(Random.nextInt) + } + + val rdd = sc.parallelize(data, data.length).map(s => Data(s(0), s(1), s(2))) + val dataDF = snc.createDataFrame(rdd) + + dataDF.write.format("column").mode(SaveMode.Append) + .options(props).saveAsTable(tableNameWithPartition) + + val result = snc.sql("SELECT Col2 FROM " + tableNameWithPartition) + + + val r = result.collect() + + assert(r.length == 1005, s"Unexpected elements ${r.length}, expected=1005") + val region = Misc.getRegionForTable(s"APP.${tableNameWithPartition.toUpperCase()}", + true).asInstanceOf[PartitionedRegion] + val shadowRegion = Misc.getRegionForTable(ColumnFormatRelation.columnBatchTableName( + "APP." + tableNameWithPartition).toUpperCase(), true).asInstanceOf[PartitionedRegion] + + println("startSparkJob3 " + region.size()) + println("startSparkJob3 " + shadowRegion.size()) + + assert(shadowRegion.size() == 0) + + snc.dropTable(tableNameWithPartition, ifExists = true) + getLogWriter.info("Successful") + } + + def startSparkJob4(): Unit = { + val snc = org.apache.spark.sql.SnappyContext(sc) + + snc.sql(s"CREATE TABLE $tableNameWithPartition" + + s"(Key1 INT ,Value STRING, other1 STRING, other2 STRING )" + + "USING column " + + "options " + + "(" + + "PARTITION_BY 'Key1', buckets '2', " + + "REDUNDANCY '2', COLUMN_BATCH_SIZE '100')") + + var data = Seq(Seq(1, 2, 3, 4), Seq(7, 8, 9, 4), Seq(9, 2, 3, 4), + Seq(4, 2, 3, 4), Seq(5, 6, 7, 4)) + 1 to 1000 foreach { _ => + data = data :+ Seq.fill(4)(Random.nextInt) + } + + val rdd = sc.parallelize(data, data.length).map(s => PartitionData(s(0), + s(1).toString, s(2).toString, s(3).toString)) + val dataDF = snc.createDataFrame(rdd) + + dataDF.write.format("column").mode(SaveMode.Append) + .options(props).saveAsTable(tableNameWithPartition) + + var result = snc.sql("SELECT Value FROM " + tableNameWithPartition) + var r = result.collect() + assert(r.length == 1005, s"Unexpected size = ${r.length}, expected = 1005") + + result = snc.sql("SELECT other1 FROM " + tableNameWithPartition) + r = result.collect() + val colValues = Seq(3, 9, 3, 3, 7) + val resultValues = r map { row => + row.getString(0).toInt + } + assert(resultValues.length == 1005, + s"Unexpected size = ${resultValues.length}, expected = 1005") + colValues.foreach(v => assert(resultValues.contains(v))) + + val region = Misc.getRegionForTable(s"APP.${tableNameWithPartition.toUpperCase()}", + true).asInstanceOf[PartitionedRegion] + val shadowRegion = Misc.getRegionForTable(ColumnFormatRelation.columnBatchTableName( + "APP." + tableNameWithPartition).toUpperCase(), true).asInstanceOf[PartitionedRegion] + + println("startSparkJob4 " + region.size()) + println("startSparkJob4 " + shadowRegion.size()) + + assert(shadowRegion.size() > 0) + + snc.dropTable(tableNameWithPartition, ifExists = true) + getLogWriter.info("Successful") + } + + def startSparkJob5(): Unit = { + val snc = org.apache.spark.sql.SnappyContext(sc) + var data = Seq(Seq(1, 2, 3, 4), Seq(7, 8, 9, 4), Seq(9, 2, 3, 4), + Seq(4, 2, 3, 4), Seq(5, 6, 7, 4)) + 1 to 1000 foreach { _ => + data = data :+ Seq.fill(4)(Random.nextInt) + } + val rdd = sc.parallelize(data, 10).map(s => PartitionDataInt(s(0), + s(1), s(2), s(3))) + val dataDF = snc.createDataFrame(rdd) + + snc.createTable(tableNameWithPartition, "column", dataDF.schema, + props + ("COLUMN_BATCH_SIZE" -> "100") + ("buckets" -> "2")) + + data.map { r => + snc.insert(tableNameWithPartition, Row.fromSeq(r)) + } + + var result = snc.sql("SELECT Value FROM " + tableNameWithPartition) + var r = result.collect() + + assert(r.length == 1005, s"Unexpected size = ${r.length}, expected = 1005") + + result = snc.sql("SELECT other1 FROM " + tableNameWithPartition) + r = result.collect() + + val colValues = Seq(3, 9, 3, 3, 7) + val resultValues = r map { row => + row.getInt(0) + } + assert(resultValues.length == 1005, + s"Unexpected size = ${resultValues.length}, expected = 1005") + colValues.foreach(v => assert(resultValues.contains(v))) + + val region = Misc.getRegionForTable(s"APP.${tableNameWithPartition.toUpperCase()}", + true).asInstanceOf[PartitionedRegion] + val shadowRegion = Misc.getRegionForTable(ColumnFormatRelation.columnBatchTableName( + "APP." + tableNameWithPartition).toUpperCase(), true).asInstanceOf[PartitionedRegion] + + println("startSparkJob5 " + region.size()) + println("startSparkJob5 " + shadowRegion.size()) + + val regionSize = region.size() + (shadowRegion.size() / 5) * 3 + assert(1005 == regionSize, s"Unexpected size = $regionSize, expected = 1005") + assert(shadowRegion.size() > 0) + + snc.dropTable(tableNameWithPartition, ifExists = true) + getLogWriter.info("Successful") + } + + def startSparkJob6(): Unit = { + val snc = org.apache.spark.sql.SnappyContext(sc) + + snc.sql(s"CREATE TABLE COLUMNTABLE4(Key1 INT ,Value INT)" + + "USING column " + + "options " + + "(" + + "PARTITION_BY 'Key1'," + + "BUCKETS '1'," + + "REDUNDANCY '2')") + + snc.sql("insert into COLUMNTABLE4 VALUES(1,11)") + snc.sql("insert into COLUMNTABLE4 VALUES(2,11)") + snc.sql("insert into COLUMNTABLE4 VALUES(3,11)") + + snc.sql("insert into COLUMNTABLE4 VALUES(4,11)") + snc.sql("insert into COLUMNTABLE4 VALUES(5,11)") + snc.sql("insert into COLUMNTABLE4 VALUES(6,11)") + + snc.sql("insert into COLUMNTABLE4 VALUES(7,11)") + + var data = Seq(Seq(1, 2), Seq(7, 8), Seq(9, 2), Seq(4, 2), Seq(5, 6)) + 1 to 10000 foreach { _ => + data = data :+ Seq.fill(2)(Random.nextInt) + } + val rdd = sc.parallelize(data, 50).map(s => TData(s(0), s(1))) + + val dataDF = snc.createDataFrame(rdd) + dataDF.write.format("column").mode(SaveMode.Append) + .options(props).saveAsTable("COLUMNTABLE4") + + val result = snc.sql("SELECT Value FROM COLUMNTABLE4") + val r = result.collect() + println("total region.size() " + r.length) + + + val region = Misc.getRegionForTable("APP.COLUMNTABLE4", true). + asInstanceOf[PartitionedRegion] + val shadowRegion = Misc.getRegionForTable(ColumnFormatRelation.columnBatchTableName( + "APP.COLUMNTABLE4"), true).asInstanceOf[PartitionedRegion] + + println("region.size() " + region.size()) + println("shadowRegion.size()" + shadowRegion.size()) + + assert(r.length == 10012, s"Unexpected elements ${r.length}, expected=10012") + + println("startSparkJob6 " + region.size()) + println("startSparkJob6 " + shadowRegion.size()) + + // assert(0 == region.size()) + assert(shadowRegion.size() > 0) + + snc.dropTable("COLUMNTABLE4", ifExists = true) + getLogWriter.info("Successful") + } + + def startSparkJob7(): Unit = { + val snc = org.apache.spark.sql.SnappyContext(sc) + + snc.sql(s"CREATE TABLE COLUMNTABLE4(Key1 INT ,Value INT, other1 VARCHAR(20), other2 STRING)" + + "USING column " + + "options " + + "(" + + "PARTITION_BY 'Key1, Value '," + + "BUCKETS '1'," + + "REDUNDANCY '2')") + + snc.sql("insert into COLUMNTABLE4 VALUES(1,11)") + snc.sql("insert into COLUMNTABLE4 VALUES(2,11)") + snc.sql("insert into COLUMNTABLE4 VALUES(3,11)") + + snc.sql("insert into COLUMNTABLE4 VALUES(4,11)") + snc.sql("insert into COLUMNTABLE4 VALUES(5,11)") + snc.sql("insert into COLUMNTABLE4 VALUES(6,11)") + + snc.sql("insert into COLUMNTABLE4 VALUES(7,11)") + + var data = + Seq(Seq(1, 2, 3, 4), Seq(7, 8, 9, 10), Seq(9, 2, 3, 4), Seq(4, 2, 5, 7), Seq(5, 6, 2, 3)) + + 1 to 10000 foreach { _ => + data = data :+ Seq.fill(4)(Random.nextInt) + } + val rdd = sc.parallelize(data, 50).map(s => PartitionData(s(0), + s(1).toString, s(2).toString, s(3).toString)) + + val dataDF = snc.createDataFrame(rdd) + dataDF.write.format("column").mode(SaveMode.Append) + .options(props).saveAsTable("COLUMNTABLE4") + + val result = snc.sql("SELECT Value,other1 FROM COLUMNTABLE4") + val r = result.collect() + println("total region.size() " + r.length) + + + val region = Misc.getRegionForTable("APP.COLUMNTABLE4", true). + asInstanceOf[PartitionedRegion] + val shadowRegion = Misc.getRegionForTable(ColumnFormatRelation.columnBatchTableName( + "APP.COLUMNTABLE4"), true).asInstanceOf[PartitionedRegion] + + println("region.size() " + region.size()) + println("shadowRegion.size()" + shadowRegion.size()) + + assert(r.length == 10012, s"Unexpected elements ${r.length}, expected=10012") + + println("startSparkJob7 " + region.size()) + println("startSparkJob7 " + shadowRegion.size()) + + // assert(0 == region.size()) + assert(shadowRegion.size() > 0) + + snc.dropTable("COLUMNTABLE4", ifExists = true) + getLogWriter.info("Successful") + } + + def testColumnTableRedundancyTestSNAP1188(): Unit = { + val snc = org.apache.spark.sql.SnappyContext(sc) + + snc.sql(s"create table if not exists airline (YearI INT," + // NOT NULL + "MonthI INT," + // NOT NULL + "DayOfMonth INT," + // NOT NULL + "DayOfWeek INT," + // NOT NULL + "DepTime INT," + + "CRSDepTime INT," + + "ArrTime INT," + + "CRSArrTime INT," + + "UniqueCarrier VARCHAR(20)," + // NOT NULL + "FlightNum INT," + + "TailNum VARCHAR(20)," + + "ActualElapsedTime INT," + + "CRSElapsedTime INT," + + "AirTime INT," + + "ArrDelay INT," + + "DepDelay INT," + + "Origin VARCHAR(20)," + + "Dest VARCHAR(20)," + + "Distance INT," + + "TaxiIn INT," + + "TaxiOut INT," + + "Cancelled INT," + + "CancellationCode VARCHAR(20)," + + "Diverted INT," + + "CarrierDelay INT," + + "WeatherDelay INT," + + "NASDelay INT," + + "SecurityDelay INT," + + "LateAircraftDelay INT," + + "ArrDelaySlot INT) using column options (partition_by 'DayOfMonth', Buckets '8', " + + "Redundancy '2')") + + val hfile: String = getClass.getResource("/2015.parquet").getPath + val airlineDataFrame = snc.read.load(hfile) + airlineDataFrame.write.insertInto(s"airline") + assert(snc.sql("select count(*) from airline").count() > 0) + snc.sql("drop table airline") + } + + def testSNAP1210(): Unit = { + val snc = org.apache.spark.sql.SnappyContext(sc) + + snc.sql(s"create external table t1 using csv options(path " + + s"'${getClass.getResource("/northwind/orders.csv").getPath}', header 'true', " + + s"inferschema 'true', maxCharsPerColumn '4096')") + snc.sql("select * from t1").printSchema() + snc.sql("select * from t1").show + val tempPath = "/tmp/" + System.currentTimeMillis() + + snc.sql("select * from t1").write.csv(tempPath) + assert(snc.sql("select count(*) from t1").count() > 0) + FileUtils.deleteDirectory(new File(tempPath)) + } + + def testSNAP1878(): Unit = { + val snc = org.apache.spark.sql.SnappyContext(sc) + + snc.sql(s"create table t1 (c1 integer,c2 string)") + snc.sql(s"insert into t1 values(1,'test1')") + snc.sql(s"insert into t1 values(2,'test2')") + snc.sql(s"insert into t1 values(3,'test3')") + val df = snc.sql("select * from t1") + df.show + val tempPath = "/tmp/" + System.currentTimeMillis() + + assert(df.count() == 3) + df.write.option("header", "true").csv(tempPath) + snc.createExternalTable("TEST_EXTERNAL", "csv", Map("path" -> tempPath, "header" -> "true")) + val dataDF = snc.sql("select * from TEST_EXTERNAL") + assert(dataDF.count == 3) + snc.sql("drop table if exists TEST_EXTERNAL") + FileUtils.deleteDirectory(new File(tempPath)) + } + + def testSNAP2088_1(): Unit = { + val snc = org.apache.spark.sql.SnappyContext(sc) + val t1 = "snap2088" + val t2 = "snap2088_2" + + snc.sql(s"create table $t1 (airport_id int, name string, city string, country string) " + + s"using column options (COLUMN_BATCH_SIZE '50')") + snc.sql(s"create table $t2 (airport_id int, name string, city string, country string) " + + s"using column options (COLUMN_BATCH_SIZE '5000')") + + 1 to 500 foreach { i => + if (i % 2 == 0) { + snc.sql(s"insert into $t1 values (${Random.nextInt}, 'name_$i', null, 'country_$i')") + snc.sql(s"insert into $t2 values (${Random.nextInt}, 'name_$i', null, 'country_$i')") + } else { + snc.sql(s"insert into $t1 values (${Random.nextInt}, 'name_$i', 'city_$i', 'country_$i')") + snc.sql(s"insert into $t2 values (${Random.nextInt}, 'name_$i', 'city_$i', 'country_$i')") + } + } + snc.sql(s"select distinct city from $t1").show + snc.sql(s"select distinct city from $t2 order by city").show + var df = snc.sql(s"select count(*) from $t1 where city is null") + var cnt = df.collect()(0).getLong(0) + assert(cnt == 250, s"$cnt records found in $t1 with null city, expected 250") + + df = snc.sql(s"select count(*) from $t2 where city is null") + cnt = df.collect()(0).getLong(0) + assert(cnt == 250, s"$cnt records found in $t2 with null city, expected 250") + + snc.sql(s"select distinct city from $t1 where country like 'country_1%'").show + snc.sql(s"select distinct city from $t2 where country like 'country_1%'").show + } +} + +case class TData(Key1: Int, Value: Int) + +case class Data(col1: Int, col2: Int, col3: Int) + +case class PartitionData(col1: Int, Value: String, other1: String, other2: String) + +case class PartitionDataInt(col1: Int, Value: Int, other1: Int, other2: Int) diff --git a/cluster/src/dunit/scala/io/snappydata/externalstore/ExecutorMessageDUnitTest.scala b/cluster/src/dunit/scala/io/snappydata/externalstore/ExecutorMessageDUnitTest.scala new file mode 100644 index 0000000000..30bd9b36b2 --- /dev/null +++ b/cluster/src/dunit/scala/io/snappydata/externalstore/ExecutorMessageDUnitTest.scala @@ -0,0 +1,128 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ + +package io.snappydata.externalstore + +import scala.util.Random + +import io.snappydata.cluster.{ClusterManagerTestBase, ExecutorInitiator} +import io.snappydata.test.dunit.DistributedTestBase +import io.snappydata.test.dunit.DistributedTestBase.WaitCriterion + +import org.apache.spark.Logging +import org.apache.spark.sql.{SaveMode, SnappyContext} + +class ExecutorMessageDUnitTest(s: String) extends ClusterManagerTestBase(s) with Logging { + + val tableName = "ExecutorMessageDUnitTest_table" + + def test01StoreBlockMapUpdatesWithExecutorDown(): Unit = { + val snc = SnappyContext(sc) + var props = Map.empty[String, String] + props += ("BUCKETS" -> "7") + executeSomething(snc, props) + verifyMap(snc, "stopExecutor") + restartSpark() + getLogWriter.info("test01StoreBlockMapUpdatesWithExecutorDown() Successful") + } + + def test02StoreBlockMapUpdatesWithNodeDown(): Unit = { + val snc = SnappyContext(sc) + var props = Map.empty[String, String] + props += ("BUCKETS" -> "7") + props += ("REDUNDANCY" -> "2") + executeSomething(snc, props) + verifyMap(snc, "stopProcess") + getLogWriter.info("test02StoreBlockMapUpdatesWithNodeDown() Successful") + } + + def executeSomething(snc: SnappyContext, + props: Map[String, String] = Map.empty[String, String]): Unit = { + createAndPopulateTable(snc, props) + + val wc: WaitCriterion = new WaitCriterion { + override def done(): Boolean = { + SnappyContext.getAllBlockIds.size == 4 // 3 servers + 1 lead/driver + } + override def description(): String = { + s"Expected SnappyContext.storeToBlockMap.size: 4, actual: " + + s"${SnappyContext.getAllBlockIds.size}" + } + } + DistributedTestBase.waitForCriterion(wc, 10000, 500, true) + for ((dm, blockId) <- SnappyContext.getAllBlockIds) { + assert(blockId != null) + } + } + + def createAndPopulateTable(snc: SnappyContext, props: Map[String, String]): Unit = { + var data = Seq(Seq(1, 2, 3), Seq(7, 8, 9), Seq(9, 2, 3), Seq(4, 2, 3), Seq(5, 6, 7)) + 1 to 1000 foreach { _ => + data = data :+ Seq.fill(3)(Random.nextInt) + } + + val rdd = sc.parallelize(data, data.length).map(s => Data(s.head, s(1), s(2))) + val dataDF = snc.createDataFrame(rdd) + + snc.createTable(tableName, "column", dataDF.schema, props) + + dataDF.write.format("column").mode(SaveMode.Append) + .options(props).saveAsTable(tableName) + } + + def verifyMap(snc: SnappyContext, m: String): Unit = { + vm0.invoke(getClass, m) + assert(SnappyContext.getAllBlockIds.size == 3) + for ((dm, blockId) <- SnappyContext.getAllBlockIds) { + assert(blockId != null) + } + verifyTable(snc) + + vm1.invoke(getClass, m) + assert(SnappyContext.getAllBlockIds.size == 2) + for ((dm, blockId) <- SnappyContext.getAllBlockIds) { + assert(blockId != null) + } + verifyTable(snc) + +// vm2.invoke(getClass, m) // Don't shutdown the last executor, else cleanup will fail. +// assert(SnappyContext.storeToBlockMap.size == 1) + } + + def restartSpark(): Unit = { + ClusterManagerTestBase.stopSpark() + ClusterManagerTestBase.startSnappyLead(ClusterManagerTestBase.locatorPort, bootProps) + } + + def verifyTable (snc: SnappyContext): Unit = { + val count = snc.sql("SELECT * FROM " + tableName).collect().length + assert(count == 1005, s"unexpected count $count") + } +} + +object ExecutorMessageDUnitTest { + + def stopExecutor(): Unit = { + ExecutorInitiator.stop() + Thread.sleep(1000) + } + + def stopProcess(): Unit = { + ClusterManagerTestBase.stopAny() + // Thread.sleep(2000) + } +} diff --git a/cluster/src/dunit/scala/io/snappydata/externalstore/JDBCPreparedStatementDUnitTest.scala b/cluster/src/dunit/scala/io/snappydata/externalstore/JDBCPreparedStatementDUnitTest.scala new file mode 100644 index 0000000000..1c8e356a71 --- /dev/null +++ b/cluster/src/dunit/scala/io/snappydata/externalstore/JDBCPreparedStatementDUnitTest.scala @@ -0,0 +1,406 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ +package io.snappydata.externalstore + +import java.sql.PreparedStatement +import io.snappydata.cluster.ClusterManagerTestBase +import io.snappydata.test.dunit.AvailablePortHelper +import org.junit.Assert.assertEquals +import org.apache.spark.Logging + + +// scalastyle:off println + +class JDBCPreparedStatementDUnitTest(s: String) extends ClusterManagerTestBase(s) + with Logging { + + val netPort1 = AvailablePortHelper.getRandomAvailableTCPPort + + def testPreparedStatementToExecuteSingleInsertUpdateDeleteQuery(): Unit = { + val netPort1 = AvailablePortHelper.getRandomAvailableTCPPort + vm2.invoke(classOf[ClusterManagerTestBase], "startNetServer", netPort1) + val conn = getANetConnection(netPort1) + val stmt = conn.createStatement() + stmt.execute("drop table if exists t1") + stmt.execute("create table t1(id integer, fs integer) using column options" + + "(key_columns 'id', COLUMN_MAX_DELTA_ROWS '7', BUCKETS '2')") + var ps: PreparedStatement = null + + val query = "insert into t1 values(?,?)" + ps = conn.prepareStatement(query) + for (i <- 0 until 20) { + ps.setInt(1, i) + ps.setInt(2, i + 10) + ps.executeUpdate() + } + var rscnt = stmt.executeQuery("select count(*) from t1") + rscnt.next() + assertEquals(20, rscnt.getInt(1)) + + val rs = stmt.executeQuery("select * from t1 order by id") + var i = 0 + while (rs.next()) { + assertEquals(i, rs.getInt(1)) + assertEquals(i + 10, rs.getInt(2)) + i = i + 1 + } + + val query1 = "update t1 set fs = ? where fs = ?" + ps = conn.prepareStatement(query1) + var no = 0 + for (i <- 0 until 20) { + if (i % 2 == 0) { + no = i + 100 + } else { + no = i + 1000 + } + ps.setInt(1, no) + ps.setInt(2, i + 10) + ps.executeUpdate() + } + + rscnt = stmt.executeQuery("select count(*) from t1") + rscnt.next() + assertEquals(20, rscnt.getInt(1)) + + val rs1 = stmt.executeQuery("select * from t1 order by id") + var i2 = 0 + while (rs1.next()) { + if (i2 % 2 == 0) { + no = i2 + 100 + } else { + no = i2 + 1000 + } + assertEquals(i2, rs1.getInt(1)) + assertEquals(no, rs1.getInt(2)) + i2 = i2 + 1 + } + + val query2 = "delete from t1 where id = ?" + ps = conn.prepareStatement(query2) + for (i2 <- 0 until 20) { + ps.setInt(1, i2) + ps.executeUpdate() + } + + rscnt = stmt.executeQuery("select count(*) from t1") + rscnt.next() + assertEquals(0, rscnt.getInt(1)) + } + + def testPreparedStatementToExecuteInsertUpdateDeleteBulkQueries(): Unit = { + val netPort1 = AvailablePortHelper.getRandomAvailableTCPPort + vm2.invoke(classOf[ClusterManagerTestBase], "startNetServer", netPort1) + val conn = getANetConnection(netPort1) + val stmt = conn.createStatement() + stmt.execute("drop table if exists t3") + stmt.execute("create table t3(id integer, fs integer) using column options" + + "(key_columns 'id', COLUMN_MAX_DELTA_ROWS '7', BUCKETS '2')") + var ps: PreparedStatement = null + + val query = "insert into t3 values(?,?)" + ps = conn.prepareStatement(query) + for (i <- 1 to 20) { + ps.setInt(1, i) + ps.setInt(2, i + 10) + ps.addBatch() + if (i % 10 == 0) { + ps.executeBatch() + } + } + ps.executeBatch() + + var rscnt = stmt.executeQuery("select count(*) from t3") + rscnt.next() + assertEquals(20, rscnt.getInt(1)) + + val rs = stmt.executeQuery("select * from t3 order by id") + var i = 1 + while (rs.next()) { + assertEquals(i, rs.getInt(1)) + assertEquals(i + 10, rs.getInt(2)) + i = i + 1 + } + + val query1 = "update t3 set fs = ? where fs = ?" + ps = conn.prepareStatement(query1) + var fs1 = 0 + for (i <- 1 to 20) { + if (i % 2 == 0) { + fs1 = i + 100 + } else { + fs1 = i + 1000 + } + ps.setInt(1, fs1) + ps.setInt(2, i + 10) + ps.addBatch() + if (i % 10 == 0) { + ps.executeBatch() + } + } + ps.executeBatch() + + rscnt = stmt.executeQuery("select count(*) from t3") + rscnt.next() + assertEquals(20, rscnt.getInt(1)) + + val rs1 = stmt.executeQuery("select * from t3 order by id") + var i2 = 1 + var no = 0 + while (rs1.next()) { + if (i2 % 2 == 0) { + no = i2 + 100 + } else { + no = i2 + 1000 + } + assertEquals(i2, rs1.getInt(1)) + assertEquals(no, rs1.getInt(2)) + i2 = i2 + 1 + } + + val query2 = "delete from t3 where id = ?" + ps = conn.prepareStatement(query2) + for (i2 <- 1 to 20) { + ps.setInt(1, i2) + ps.addBatch() + if (i2 % 10 == 0) { + ps.executeBatch() + } + } + ps.executeBatch() + + rscnt = stmt.executeQuery("select count(*) from t3") + rscnt.next() + assertEquals(0, rscnt.getInt(1)) + } + + def insertRecords(s: Int, e: Int): (Int, Int) = { + var numRows = 0 + var ps: PreparedStatement = null + val conn = getANetConnection(netPort1) + val query = "insert into t3 values(?,?)" + ps = conn.prepareStatement(query) + for (i <- s to e) { + ps.setInt(1, i) + ps.setString(2, "str" + i) + ps.addBatch() + if (i % 10 == 0) { + var records = ps.executeBatch() + records.foreach(r => numRows += r) + } + } + var records = ps.executeBatch() + records.foreach(r => numRows += r) + (1, numRows) + } + + def updateRecords(val1: Int, val2: Int): (Int, Int) = { + var numRows = 0 + var ps: PreparedStatement = null + val conn = getANetConnection(netPort1) + val query1 = "update t3 set fs = ? where fs = ?" + ps = conn.prepareStatement(query1) + var fs1 = 1 + for (i <- val1 to val2) { + ps.setString(1, "temp" + i) + ps.setString(2, "str" + i) + ps.addBatch() + if (i % 10 == 0) { + var records = ps.executeBatch() + records.foreach(r => numRows += r) + } + } + var records = ps.executeBatch() + records.foreach(r => numRows += r) + (1, numRows) + } + + def deleteRecords(val1: Int, val2: Int): (Int, Int) = { + var numRows = 0 + var ps: PreparedStatement = null + val conn = getANetConnection(netPort1) + val query2 = "delete from t3 where fs = ?" + ps = conn.prepareStatement(query2) + for (i2 <- val1 to val2) { + ps.setString(1, "temp" + i2) + ps.addBatch() + if (i2 % 10 == 0) { + var records = ps.executeBatch() + records.foreach(r => numRows += r) + } + } + var records = ps.executeBatch() + records.foreach(r => numRows += r) + (1, numRows) + } + + def testConcurrentBatchDmlQueriesUsingPreparedStatement(): Unit = { + vm2.invoke(classOf[ClusterManagerTestBase], "startNetServer", netPort1) + val conn = getANetConnection(netPort1) + val stmt = conn.createStatement() + stmt.execute("drop table if exists t3") + stmt.execute("create table t3(id integer, fs string) using column options" + + "(key_columns 'id', COLUMN_MAX_DELTA_ROWS '7', BUCKETS '2')") + + var thrCount1: Integer = 0 + var insertedRecords = 0 + val colThread1 = new Thread(new Runnable {def run() { + (1 to 5) foreach (i => { + var result = insertRecords(1, 10) + thrCount1 += result._1 + insertedRecords += result._2 + }) + } + }) + colThread1.start() + + var thrCount2: Integer = 0 + val colThread2 = new Thread(new Runnable {def run() { + (1 to 5) foreach (i => { + var result = insertRecords(11, 20) + thrCount2 += result._1 + insertedRecords += result._2 + }) + } + }) + colThread2.start() + + colThread1.join() + colThread2.join() + + var rscnt = stmt.executeQuery("select count(*) from t3") + rscnt.next() + assertEquals(100, rscnt.getInt(1)) + assertEquals(100, insertedRecords) + + val rs = stmt.executeQuery("select * from t3 order by id") + + + var i = 1 + var cnt = 0 + + while (rs.next()) { + if (cnt == 5) { + i = i + 1 + cnt = 0 + } + assertEquals(i, rs.getInt(1)) + assertEquals("str" + i, rs.getString(2)) + cnt = cnt + 1 + } + + var thrCount3: Integer = 0 + var updatedRecords = 0 + val colThread3 = new Thread(new Runnable {def run() { + (1 to 5) foreach (i => { + var result = updateRecords(1, 20) + thrCount3 += result._1 + updatedRecords += result._2 + }) + } + }) + colThread3.start() + + var thrCount4: Integer = 0 + val colThread4 = new Thread(new Runnable {def run() { + (1 to 5) foreach (i => { + var result = updateRecords(11, 20) + thrCount4 += result._1 + updatedRecords += result._2 + }) + } + }) + colThread4.start() + + var thrCount5: Integer = 0 + val colThread5 = new Thread(new Runnable {def run() { + (1 to 5) foreach (i => { + var result = updateRecords(21, 30) + thrCount5 += result._1 + updatedRecords += result._2 + }) + } + }) + colThread5.start() + + colThread3.join() + colThread4.join() + colThread5.join() + + + rscnt = stmt.executeQuery("select count(*) from t3") + rscnt.next() + assertEquals(100, rscnt.getInt(1)) + assertEquals(100, updatedRecords) + + var rs1 = stmt.executeQuery("select * from t3 order by id") + var i2 = 1 + cnt = 0 + while (rs1.next()) { + if (cnt == 5) { + i2 = i2 + 1 + cnt = 0 + } + assertEquals(i2, rs1.getInt(1)) + assertEquals("temp" + i2, rs1.getString(2)) + cnt = cnt + 1 + } + + var thrCount6: Integer = 0 + var deletedRecords = 0 + val colThread6 = new Thread(new Runnable {def run() { + (1 to 5) foreach (i => { + var result = deleteRecords(1, 20) + thrCount6 += result._1 + deletedRecords += result._2 + }) + } + }) + colThread6.start() + + var thrCount7: Integer = 0 + val colThread7 = new Thread(new Runnable {def run() { + (1 to 5) foreach (i => { + var result = deleteRecords(11, 20) + thrCount7 += result._1 + deletedRecords += result._2 + }) + } + }) + colThread7.start() + + var thrCount8: Integer = 0 + val colThread8 = new Thread(new Runnable {def run() { + (1 to 5) foreach (i => { + var result = deleteRecords(21, 30) + thrCount8 += result._1 + deletedRecords += result._2 + }) + } + }) + colThread8.start() + + colThread6.join() + colThread7.join() + colThread8.join() + + rscnt = stmt.executeQuery("select count(*) from t3") + rscnt.next() + assertEquals(0, rscnt.getInt(1)) + assertEquals(100, deletedRecords) + } +} \ No newline at end of file diff --git a/cluster/src/dunit/scala/io/snappydata/externalstore/RowTableDUnitTest.scala b/cluster/src/dunit/scala/io/snappydata/externalstore/RowTableDUnitTest.scala new file mode 100644 index 0000000000..42fa54ea0f --- /dev/null +++ b/cluster/src/dunit/scala/io/snappydata/externalstore/RowTableDUnitTest.scala @@ -0,0 +1,149 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ +package io.snappydata.externalstore + +import io.snappydata.cluster.ClusterManagerTestBase + +import org.apache.spark.sql.SaveMode + +class RowTableDUnitTest(s: String) extends ClusterManagerTestBase(s) { + + def testTableCreation(): Unit = { + startSparkJob() + } + + def testCreateInsertAndDropOfTable(): Unit = { + startSparkJob2() + } + + + private val tableName: String = "RowTable" + + val props = Map.empty[String, String] + + def startSparkJob(): Unit = { + val snc = org.apache.spark.sql.SnappyContext(sc) + + val data = Seq(Seq(1, 2, 3), Seq(7, 8, 9), Seq(9, 2, 3), Seq(4, 2, 3), Seq(5, 6, 7)) + val rdd = sc.parallelize(data, data.length).map(s => new RowData(s(0), s(1), s(2))) + val dataDF = snc.createDataFrame(rdd) + + snc.createTable(tableName, "row", dataDF.schema, props) + val result = snc.sql("SELECT * FROM " + tableName) + val r = result.collect() + assert(r.length == 0) + + snc.dropTable(tableName, ifExists = true) + getLogWriter.info("Successful") + } + + def startSparkJob2(): Unit = { + val snc = org.apache.spark.sql.SnappyContext(sc) + + val data = Seq(Seq(1, 2, 3), Seq(7, 8, 9), Seq(9, 2, 3), Seq(4, 2, 3), Seq(5, 6, 7)) + val rdd = sc.parallelize(data, data.length).map(s => new RowData(s(0), s(1), s(2))) + val dataDF = snc.createDataFrame(rdd) + + snc.createTable(tableName, "row", dataDF.schema, props) + + dataDF.write.format("row").mode(SaveMode.Append) + .options(props).saveAsTable(tableName) + + val result = snc.sql("SELECT * FROM " + tableName) + val r = result.collect() + assert(r.length == 5) + + snc.dropTable(tableName, ifExists = true) + getLogWriter.info("Successful") + } + + def testJobSNAP1224(): Unit = { + + + val query: String = "select '5-CTFIX_ORDER' as SrcFl, * from ORDER_DETAILS " + + "where trd_date>='20160413' and glb_root_order_id in " + + "( select glb_root_order_id from ORDER_DETAILS where trd_date>='20160413' and src_sys='CRIO' ) " + + "order by glb_root_order_id, trd_datE" + val order_details_create_ddl = + "create table order_details" + + "(SINGLE_ORDER_DID BIGINT ,SYS_ORDER_ID VARCHAR(64) ,SYS_ORDER_VER INTEGER , " + + "DATA_SNDG_SYS_NM VARCHAR(128) ,SRC_SYS VARCHAR(20) ,SYS_PARENT_ORDER_ID VARCHAR(64) ," + + "SYS_PARENT_ORDER_VER SMALLINT ,PARENT_ORDER_TRD_DATE VARCHAR(20),PARENT_ORDER_SYS_NM " + + "VARCHAR(128) ,SYS_ALT_ORDER_ID VARCHAR(64) ,TRD_DATE VARCHAR(20),GIVE_UP_BROKER " + + "VARCHAR(20) ,EVENT_RCV_TS TIMESTAMP ,SYS_ROOT_ORDER_ID VARCHAR(64) ,GLB_ROOT_ORDER_ID " + + " VARCHAR(64) ,GLB_ROOT_ORDER_SYS_NM VARCHAR(128) ,GLB_ROOT_ORDER_RCV_TS TIMESTAMP , " + + "SYS_ORDER_STAT_CD VARCHAR(20) ,SYS_ORDER_STAT_DESC_TXT VARCHAR(120) ,DW_STAT_CD " + + "VARCHAR(20) ,EVENT_TS TIMESTAMP,ORDER_OWNER_FIRM_ID VARCHAR(20),RCVD_ORDER_ID VARCHAR" + + "(64) ,EVENT_INITIATOR_ID VARCHAR(64),TRDR_SYS_LOGON_ID VARCHAR(64),SOLICITED_FG " + + "VARCHAR(1),RCVD_FROM_FIRMID_CD VARCHAR(20),RCV_DESK VARCHAR(20),SYS_ACCT_ID_SRC " + + "VARCHAR(64) ,CUST_ACCT_MNEMONIC VARCHAR(128),CUST_SLANG VARCHAR(20) ,SYS_ACCT_TYPE " + + "VARCHAR(20) ,CUST_EXCH_ACCT_ID VARCHAR(64) ,SYS_SECURITY_ALT_ID VARCHAR(64) ," + + "TICKER_SYMBOL VARCHAR(32) ,TICKER_SYMBOL_SUFFIX VARCHAR(20) ,PRODUCT_CAT_CD VARCHAR" + + "(20) ,SIDE VARCHAR(20) ,LIMIT_PRICE DECIMAL(38, 18),STOP_PRICE DECIMAL(38, 18), " + + "ORDER_QTY DECIMAL(18, 4) ,TOTAL_EXECUTED_QTY DECIMAL(18, 4) , AVG_PRICE DECIMAL(38, " + + "18) ,DAY_EXECUTED_QTY DECIMAL(18, 4) ,DAY_AVG_PRICE DECIMAL(38, 18) ,REMNG_QTY DECIMAL(18, 4) ," + + "CNCL_QTY DECIMAL(18, 4) ,CNCL_BY_FG VARCHAR(1) ,EXPIRE_TS TIMESTAMP ,EXEC_INSTR VARCHAR(64) ,TIME_IN_FORCE VARCHAR(20) ," + + "RULE80AF VARCHAR(1) ,DEST_FIRMID_CD VARCHAR(20) ,SENT_TO_CONDUIT VARCHAR(20) ,SENT_TO_MPID VARCHAR(20) ," + + "RCV_METHOD_CD VARCHAR(20) ,LIMIT_ORDER_DISP_IND VARCHAR(1) ,MERGED_ORDER_FG VARCHAR(1) ,MERGED_TO_ORDER_ID VARCHAR(64) ," + + "RCV_DEPT_ID VARCHAR(20) ,ROUTE_METHOD_CD VARCHAR(20) ,LOCATE_ID VARCHAR(256) ,LOCATE_TS TIMESTAMP ,LOCATE_OVERRIDE_REASON VARCHAR(2000) ," + + "LOCATE_BROKER VARCHAR(256) ,ORDER_BRCH_SEQ_TXT VARCHAR(20) ,IGNORE_CD VARCHAR(20) ,CLIENT_ORDER_REFID VARCHAR(64) ," + + "CLIENT_ORDER_ORIG_REFID VARCHAR(64) ,ORDER_TYPE_CD VARCHAR(20) ,SENT_TO_ORDER_ID VARCHAR(64) ,ASK_PRICE DECIMAL(38, 18) ," + + "ASK_QTY DECIMAL(18, 4) ,BID_PRICE DECIMAL(38, 18) ,BID_QTY DECIMAL(18, 4) ,REG_NMS_EXCEP_CD VARCHAR(20) ," + + "REG_NMS_EXCEP_TXT VARCHAR(2000) ,REG_NMS_LINK_ID VARCHAR(64) ,REG_NMS_PRINTS VARCHAR(1) ,REG_NMS_STOP_TIME TIMESTAMP ," + + "SENT_TS TIMESTAMP ,RULE92 VARCHAR(1) ,RULE92_OVERRIDE_TXT VARCHAR(2000) ,RULE92_RATIO DECIMAL(25, 10) ," + + "EXMPT_STGY_BEGIN_TIME TIMESTAMP ,EXMPT_STGY_END_TIME TIMESTAMP ,EXMPT_STGY_PRICE_INST VARCHAR(2000) ," + + "EXMPT_STGY_QTY DECIMAL(18, 4) ,CAPACITY VARCHAR(20) ,DISCRETION_QTY DECIMAL(18, 4) ,DISCRETION_PRICE VARCHAR(64) ," + + "BRCHID_CD VARCHAR(20) ,BASKET_ORDER_ID VARCHAR(64) ,PT_STRTGY_CD VARCHAR(20) ,SETL_DATE VARCHAR(20),SETL_TYPE VARCHAR(20) ," + + "SETL_CURR_CD VARCHAR(20) ,SETL_INSTRS VARCHAR(2000) ,COMMENT_TXT VARCHAR(2000) ,CHANNEL_NM VARCHAR(128) ," + + "FLOW_CAT VARCHAR(20) ,FLOW_CLASS VARCHAR(20) ,FLOW_TGT VARCHAR(20) ,ORDER_FLOW_ENTRY VARCHAR(20) ,ORDER_FLOW_CHANNEL VARCHAR(20) ," + + "ORDER_FLOW_DESK VARCHAR(20) ,FLOW_SUB_CAT VARCHAR(20) ,STRTGY_CD VARCHAR(20) ,RCVD_FROM_VENDOR VARCHAR(20) ," + + "RCVD_FROM_CONDUIT VARCHAR(20) ,SLS_PERSON_ID VARCHAR(64) ,SYNTHETIC_FG VARCHAR(1) ,SYNTHETIC_TYPE VARCHAR(20) ," + + "FXRT DECIMAL(25, 8) ,PARENT_CLREFID VARCHAR(64) ,REF_TIME_ID INTEGER ,OPT_CONTRACT_QTY DECIMAL(18, 4) ," + + "OCEAN_PRODUCT_ID BIGINT ,CREATED_BY VARCHAR(64) ,CREATED_DATE TIMESTAMP ,FIRM_ACCT_ID BIGINT ,DEST VARCHAR(20) ," + + "CNTRY_CD VARCHAR(20) ,DW_SINGLE_ORDER_CAT VARCHAR(20) ,CLIENT_ACCT_ID BIGINT ," + + "EXTERNAL_TRDR_ID VARCHAR(64) ,ANONYMOUS_ORDER_FG VARCHAR(1) ,SYS_SECURITY_ALT_SRC VARCHAR(20) ,CURR_CD VARCHAR(20) ," + + "EVENT_TYPE_CD VARCHAR(20) ,SYS_CLIENT_ACCT_ID VARCHAR(64) ,SYS_FIRM_ACCT_ID VARCHAR(20) ,SYS_TRDR_ID VARCHAR(64) ," + + "DEST_ID INTEGER ,OPT_PUT_OR_CALL VARCHAR(20) ,SRC_FEED_REF_CD VARCHAR(64) ,DIGEST_KEY VARCHAR(128) ,EFF_TS TIMESTAMP ," + + "ENTRY_TS TIMESTAMP ,OPT_STRIKE_PRICE DECIMAL(38, 18) ,OPT_MATURITY_DATE VARCHAR(20) ,ORDER_RESTR VARCHAR(4) ," + + "SHORT_SELL_EXEMPT_CD VARCHAR(4) ,QUOTE_TIME TIMESTAMP ,SLS_CREDIT VARCHAR(20) ,SYS_SECURITY_ID VARCHAR(64) ," + + "SYS_SECURITY_ID_SRC VARCHAR(20) ,SYS_SRC_SYS_ID VARCHAR(20) ,SYS_ORDER_ID_UNIQUE_SUFFIX VARCHAR(20) ," + + "DEST_ID_SRC VARCHAR(4) ,GLB_ROOT_SRC_SYS_ID VARCHAR(20) ,GLB_ROOT_ORDER_ID_SUFFIX VARCHAR(64) ,SYS_ROOT_ORDER_ID_SUFFIX VARCHAR(20) ," + + "SYS_PARENT_ORDER_ID_SUFFIX VARCHAR(20) ,CREDIT_BREACH_PERCENT DECIMAL(25, 10) ,CREDIT_BREACH_OVERRIDE VARCHAR(256) ," + + "INFO_BARRIER_ID VARCHAR(256) ,EXCH_PARTICIPANT_ID VARCHAR(64) ,REJECT_REASON_CD VARCHAR(4) ,DIRECTED_DEST VARCHAR(20) ," + + "REG_NMS_LINK_TYPE VARCHAR(20) ,CONVER_RATIO DECIMAL(18, 9) ,STOCK_REF_PRICE DECIMAL(38, 18) ,CB_SWAP_ORDER_FG VARCHAR(1) ," + + "EV DECIMAL(38, 18) ,SYS_DATA_MODIFIED_TS TIMESTAMP ,CMSN_TYPE VARCHAR(20), " + + "SYS_CREDIT_TRDR_ID VARCHAR(20) ,SYS_ENTRY_USER_ID VARCHAR(20) ,OPEN_CLOSE_CD VARCHAR" + + "(20) ,AS_OF_TRD_FG VARCHAR(1),HANDLING_INSTR VARCHAR(20),SECURITY_DESC VARCHAR(512) ," + + "MINIMUM_QTY DECIMAL(21, 6) ,CUST_OR_FIRM VARCHAR(20) ,MAXIMUM_SHOW DECIMAL(21, 6) ,SECURITY_SUB_TYPE VARCHAR(20) ," + + "MULTILEG_RPT_TYPE VARCHAR(4) ,ORDER_ACTION_TYPE VARCHAR(4) ,BARRIER_STYLE VARCHAR(4) ," + + " AUTO_IOI_REF_TYPE VARCHAR(4) ,PEG_OFFSET_VAL DECIMAL(10, 2) , AUTO_IOI_OFFSET DECIMAL" + + "(28, 12) ,IOI_PRICE DECIMAL(28, 12) ,TGT_PRICE DECIMAL(28, 12) ,IOI_QTY VARCHAR(64) , " + + "IOI_ORDER_QTY DECIMAL(18, 4) ,CMSN VARCHAR(64) ,SYS_LEG_REF_ID VARCHAR(64) , " + + "TRADING_TYPE VARCHAR(4) ,EXCH_ORDER_ID VARCHAR(64) ,DEAL_ID VARCHAR(64) , " + + "ORDER_TRD_TYPE VARCHAR(4) ,CXL_REASON VARCHAR(64))" + + val snc = org.apache.spark.sql.SnappyContext(sc) + snc.sql("drop table if exists order_details") + snc.sql(order_details_create_ddl) + // This test is only added for functional test hence no data and assertion is added + snc.sql(query).show + snc.sql("drop table order_details") + + } +} + +case class RowData(col1: Int, col2: Int, col3: Int) diff --git a/cluster/src/dunit/scala/io/snappydata/streaming/StreamingDUnitTest.scala b/cluster/src/dunit/scala/io/snappydata/streaming/StreamingDUnitTest.scala new file mode 100644 index 0000000000..93783078a9 --- /dev/null +++ b/cluster/src/dunit/scala/io/snappydata/streaming/StreamingDUnitTest.scala @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ +package io.snappydata.streaming + +import java.sql.{Connection, DriverManager} + +import io.snappydata.cluster.ClusterManagerTestBase +import io.snappydata.test.dunit.AvailablePortHelper + +class StreamingDUnitTest(val s: String) extends ClusterManagerTestBase(s) { + + override def tearDown2(): Unit = { + super.tearDown2() + } + + private def getANetConnection(netPort: Int): Connection = { + val driver = "io.snappydata.jdbc.ClientDriver" + Class.forName(driver).newInstance //scalastyle:ignore + val url = "jdbc:snappydata://localhost:" + netPort + "/" + DriverManager.getConnection(url) + } + + def testSnappyStreamingContextStartStop(): Unit = { + val netPort1 = AvailablePortHelper.getRandomAvailableTCPPort + vm1.invoke(classOf[ClusterManagerTestBase], "startNetServer", netPort1) + val conn = getANetConnection(netPort1) + val s = conn.createStatement() + s.execute("streaming stop") + s.execute("streaming init 2secs") + s.execute("streaming init 4secs") + s.execute("create stream table tweetsTable " + + "(id long, text string, fullName string, " + + "country string, retweets int, hashtag string) " + + "using twitter_stream options (" + + "consumerKey '0Xo8rg3W0SOiqu14HZYeyFPZi', " + + "consumerSecret 'gieTDrdzFS4b1g9mcvyyyadOkKoHqbVQALoxfZ19eHJzV9CpLR', " + + "accessToken '43324358-0KiFugPFlZNfYfib5b6Ah7c2NdHs1524v7LM2qaUq', " + + "accessTokenSecret 'aB1AXHaRiE3g2d7tLgyASdgIg9J7CzbPKBkNfvK8Y88bu', " + + "rowConverter 'io.snappydata.streaming.TweetToRowsConverter')") + s.execute("streaming start") + s.execute("streaming start") + s.execute("streaming stop") + s.execute("streaming stop") + s.execute("drop table tweetsTable") + conn.close() + } +} diff --git a/cluster/src/dunit/scala/io/snappydata/streaming/TweetToRowsConverter.scala b/cluster/src/dunit/scala/io/snappydata/streaming/TweetToRowsConverter.scala new file mode 100644 index 0000000000..ee24432899 --- /dev/null +++ b/cluster/src/dunit/scala/io/snappydata/streaming/TweetToRowsConverter.scala @@ -0,0 +1,19 @@ +package io.snappydata.streaming + +import twitter4j.Status + +import org.apache.spark.sql.Row +import org.apache.spark.sql.streaming.StreamToRowsConverter + +class TweetToRowsConverter extends StreamToRowsConverter with Serializable { + + override def toRows(message: Any): Seq[Row] = { + val status: Status = message.asInstanceOf[Status] + Seq(Row.fromSeq(Seq(status.getId, + status.getText, + status.getUser().getName, + status.getUser.getLang, + status.getRetweetCount, + status.getHashtagEntities.map(_.getText).mkString(",")))) + } +} diff --git a/cluster/src/dunit/scala/org/apache/spark/DynamicJarInstallationDUnitTest.scala b/cluster/src/dunit/scala/org/apache/spark/DynamicJarInstallationDUnitTest.scala new file mode 100644 index 0000000000..debcad9efa --- /dev/null +++ b/cluster/src/dunit/scala/org/apache/spark/DynamicJarInstallationDUnitTest.scala @@ -0,0 +1,172 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ + + +package org.apache.spark + +import java.io.File +import java.net.URL +import java.sql.{Connection, DriverManager} + +import _root_.io.snappydata.cluster.ClusterManagerTestBase +import org.joda.time.DateTime + +import org.apache.spark.sql.SnappyContext +import org.apache.spark.sql.collection.{Utils => Utility} +import org.apache.spark.util.Utils + +class DynamicJarInstallationDUnitTest(val s: String) + extends ClusterManagerTestBase(s) { + + val currentLocatorPort = ClusterManagerTestBase.locPort + + override def tearDown2(): Unit = { + sc.setLocalProperty("SNAPPY_CHANGEABLE_JAR_NAME", null) + Array(vm3, vm2, vm1, vm0).foreach(_.invoke(getClass, "stopNetworkServers")) + bootProps.clear() + } + + private def getANetConnection(netPort: Int): Connection = { + val driver = "io.snappydata.jdbc.ClientDriver" + // scalastyle:off classforname + Class.forName(driver).newInstance + val url = "jdbc:snappydata://localhost:" + netPort + "/" + DriverManager.getConnection(url) + } + + + def verifyClassOnExecutors(snc: SnappyContext, className: String, + version: String, count: Int): Unit = { + val countInstances = Utility.mapExecutors[Int](snc.sparkContext, + () => { + if (DynamicJarInstallationDUnitTest.loadClass(className, version)) { + Seq(1).iterator + } else Iterator.empty + }).length + + assert(countInstances == count, + s"Assertion failed as countInstances=$countInstances and count=$count did not match") + } + + + def testJarDeployedWithSparkContext(): Unit = { + var testJar = DynamicJarInstallationDUnitTest.createJarWithClasses( + classNames = Seq("FakeJobClass", "FakeJobClass1"), + toStringValue = "1", + Nil, Nil, + "testJar_SNAPPY_JOB_SERVER_JAR_%s.jar".format(System.currentTimeMillis())) + + var jobCompleted = false + + var localProperty = (Seq("app1", DateTime.now) ++ Array[URL](testJar)).mkString(",") + sc.setLocalProperty("SNAPPY_CHANGEABLE_JAR_NAME", localProperty) + // verify that jar is loaded at executors + val rdd = sc.parallelize(1 to 10, 2) + + sc.runJob(rdd, { iter: Iterator[Int] => { + val currentLoader = Thread.currentThread().getContextClassLoader + // scalastyle:off println + println("Current classLoader is" + currentLoader) + val fakeClass = + Class.forName("FakeJobClass", false, currentLoader).newInstance() + assert(fakeClass.toString == "1") + 1 + } + }) + + // removeJar + sc.setLocalProperty("SNAPPY_CHANGEABLE_JAR_NAME", null) + + sc.runJob(rdd, { iter: Iterator[Int] => { + org.scalatest.Assertions.intercept[ClassNotFoundException] { + val currentLoader = Thread.currentThread().getContextClassLoader + println("Current classLoader is" + currentLoader) + Class.forName("FakeJobClass", false, currentLoader).newInstance() + } + 1 + } + }) + + // Again add the same jar with a different name + + testJar = DynamicJarInstallationDUnitTest.createJarWithClasses( + classNames = Seq("FakeJobClass", "FakeJobClass1"), + toStringValue = "2", + Nil, Nil, + "testJar_SNAPPY_JOB_SERVER_JAR_%s.jar".format(System.currentTimeMillis())) + + localProperty = (Seq("app1", DateTime.now) ++ Array[URL](testJar)).mkString(",") + sc.setLocalProperty("SNAPPY_CHANGEABLE_JAR_NAME", localProperty) + // verify that jar is loaded at executors + + + sc.runJob(rdd, { iter: Iterator[Int] => { + val currentLoader = Thread.currentThread().getContextClassLoader + println("Current classLoader is" + currentLoader) + val fakeClass = + Class.forName("FakeJobClass", false, currentLoader).newInstance() + assert(fakeClass.toString == "2") + 1 + } + }) + + } +} + + +object DynamicJarInstallationDUnitTest { + + def createJarWithClasses( + classNames: Seq[String], + toStringValue: String = "", + classNamesWithBase: Seq[(String, String)] = Seq(), + classpathUrls: Seq[URL] = Seq(), + jarName: String = "" + ): URL = { + val tempDir = Utils.createTempDir() + val files1 = for (name <- classNames) yield { + TestUtils.createCompiledClass(name, tempDir, toStringValue, classpathUrls = classpathUrls) + } + val files2 = for ((childName, baseName) <- classNamesWithBase) yield { + TestUtils.createCompiledClass(childName, tempDir, toStringValue, baseName, classpathUrls) + } + val jarFile = if (jarName.isEmpty) { + new File(tempDir, "testJar-%s.jar".format(System.currentTimeMillis())) + } + else new File(tempDir, jarName.format(System.currentTimeMillis())) + TestUtils.createJar(files1 ++ files2, jarFile) + } + + + @throws[ClassNotFoundException] + def loadClass(className: String, + version: String = ""): Boolean = { + val catchExpectedException: Boolean = version.isEmpty + val loader = Thread.currentThread().getContextClassLoader + assert(loader != null) + try { + val fakeClass = Class.forName(className, false, loader).newInstance() + assert(fakeClass != null) + assert(fakeClass.toString.equals(version)) + true + } catch { + case cnfe: ClassNotFoundException => + if (!catchExpectedException) throw cnfe + else false + } + } +} diff --git a/cluster/src/dunit/scala/org/apache/spark/jdbc/ConnectionConfDUnitTest.scala b/cluster/src/dunit/scala/org/apache/spark/jdbc/ConnectionConfDUnitTest.scala new file mode 100644 index 0000000000..f96809ec93 --- /dev/null +++ b/cluster/src/dunit/scala/org/apache/spark/jdbc/ConnectionConfDUnitTest.scala @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ +package org.apache.spark.jdbc + +import io.snappydata.cluster.ClusterManagerTestBase +import io.snappydata.core.Data + +import org.apache.spark.TaskContext +import org.apache.spark.sql.SnappyContext + + +class ConnectionConfDUnitTest(s: String) extends ClusterManagerTestBase(s) { + + def testSimpleConnection(): Unit = { + val snc = SnappyContext(sc) + val data = Seq(Seq(1, 2, 3), Seq(7, 8, 9), Seq(9, 2, 3), Seq(4, 2, 3), Seq(5, 6, 7)) + val rdd = sc.parallelize(data, data.length).map(s => new Data(s(0), s(1), s(2))) + val dataDF = snc.createDataFrame(rdd) + + snc.sql("create schema MY_SCHEMA") + dataDF.write.format("row").saveAsTable("MY_SCHEMA.MY_TABLE") + + val conf = new ConnectionConfBuilder(snc.snappySession).build + + rdd.foreachPartition(d => { + val conn = ConnectionUtil.getConnection(conf) + TaskContext.get().addTaskCompletionListener(_ => conn.close()) + val stmt = conn.prepareStatement("update MY_SCHEMA.MY_TABLE set col1 = 9") + stmt.executeUpdate() + }) + + val result = snc.sql("SELECT col1 FROM MY_SCHEMA.MY_TABLE" ) + result.collect().foreach(v => assert(v(0) == 9)) + + snc.sql("drop table MY_SCHEMA.MY_TABLE" ) + snc.sql("drop schema my_schema") + + println("Successful") + } + + +} diff --git a/cluster/src/dunit/scala/org/apache/spark/memory/MemoryManagerRestartDUnitTest.scala b/cluster/src/dunit/scala/org/apache/spark/memory/MemoryManagerRestartDUnitTest.scala new file mode 100644 index 0000000000..46aa9f48b6 --- /dev/null +++ b/cluster/src/dunit/scala/org/apache/spark/memory/MemoryManagerRestartDUnitTest.scala @@ -0,0 +1,216 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ +package org.apache.spark.memory + +import java.util.Properties + +import io.snappydata.cluster.{ClusterManagerTestBase, ExecutorInitiator} +import io.snappydata.test.dunit.{DistributedTestBase, SerializableRunnable} +import org.eclipse.collections.api.block.procedure.primitive.ObjectLongProcedure + +import org.apache.spark.SparkEnv +import org.apache.spark.sql.SnappyContext +import org.apache.spark.sql.collection.Utils + + +class MemoryManagerRestartDUnitTest(s: String) extends ClusterManagerTestBase(s) { + + self => + + import MemoryManagerRestartDUnitTest._ + + def testExecutorRestart(): Unit = { + vm1.invoke(getClass, "waitForExecutor") + + val oldID = vm1.invoke(getClass, "getMemoryManagerIdentity").asInstanceOf[Int] + + assert(vm1.invoke(getClass, "allocateStorage", + Array("testExecutorRestart", false, 1000L).asInstanceOf[Array[Object]]).asInstanceOf[Boolean]) + + val t1 = new Thread(new Runnable { + override def run() = try { + failTheExecutors() + } catch { + case _: Throwable => + } + }) + t1.start() + t1.join() + + DistributedTestBase.waitForCriterion(new DistributedTestBase.WaitCriterion { + override def done(): Boolean = { + vm1.invoke(self.getClass, "waitForExecutor") + try { + vm1.invoke(self.getClass, "getMemoryManagerIdentity").asInstanceOf[Int] != oldID + } catch { + case _: AssertionError => false // ignore and retry till timeout + } + } + + override def description(): String = + "waiting for executor to restart with changed memory manager" + }, 30000, 500, true) + + val value1 = vm1.invoke(getClass, "getMemoryForTable", "testExecutorRestart").asInstanceOf[Long] + assert(value1 == 1000L, s"The storage for object should be 1000 rather than $value1") + } + + def testCacheCloseRestart(): Unit = { + vm1.invoke(getClass, "waitForExecutor") + + val props = bootProps.clone().asInstanceOf[java.util.Properties] + val port = ClusterManagerTestBase.locPort + + val oldID = vm1.invoke(getClass, "getMemoryManagerIdentity").asInstanceOf[Int] + + assert(vm1.invoke(getClass, "allocateStorage", + Array("testCacheCloseRestart", false, 1000L). + asInstanceOf[Array[Object]]).asInstanceOf[Boolean]) + + vm1.invoke(classOf[ClusterManagerTestBase], "stopAny") + vm1.invoke(restartServerRunnable(props, port)) + + val newID = vm1.invoke(getClass, "getMemoryManagerIdentity").asInstanceOf[Int] + assert(newID != oldID, "The MemoryManager instance has not changed as expected") + + val value1 = vm1.invoke(getClass, "getMemoryForTable", "testExecutorRestart").asInstanceOf[Long] + assert(value1 == 0L, s"The storage for object should be 0L rather than $value1") + } + + private def restartServerRunnable(props: Properties, port: Int): SerializableRunnable = { + new SerializableRunnable() { + override def run(): Unit = { + ClusterManagerTestBase.startSnappyServer(port, props) + ClusterManagerTestBase.waitForCriterion(SparkEnv.get != null, + "Executor Service did not start in specified time ", 20000, 5000, true) + } + } + } + + def testCacheClose(): Unit = { + vm1.invoke(getClass, "waitForExecutor") + val props = bootProps.clone().asInstanceOf[java.util.Properties] + val port = ClusterManagerTestBase.locPort + + assert(vm1.invoke(getClass, "allocateStorage", + Array("testCacheCloseRestart", false, 1000L). + asInstanceOf[Array[Object]]).asInstanceOf[Boolean]) + + vm1.invoke(classOf[ClusterManagerTestBase], "stopAny") + + try { + val bootMemorySize = vm1.invoke(getClass, "getBootMemoryManagerSize").asInstanceOf[Long] + assert(bootMemorySize == 0L, "After cache close bootMemory map size is greater than 0L") + } finally { + vm1.invoke(restartServerRunnable(props, port)) + } + + } + + def testDriverRestart(): Unit = { + var stopped = false + var oldID = -1 + try { + vm1.invoke(getClass, "waitForExecutor") + oldID = vm1.invoke(getClass, "getMemoryManagerIdentity").asInstanceOf[Int] + assert(vm1.invoke(getClass, "allocateStorage", + Array("testDriverRestart", false, 1000L).asInstanceOf[Array[Object]]).asInstanceOf[Boolean]) + + ClusterManagerTestBase.stopSpark() + stopped = true + } finally { + val t1 = new Thread(new Runnable { + override def run() = if (stopped) { + ClusterManagerTestBase.startSnappyLead(ClusterManagerTestBase.locatorPort, bootProps) + } + }) + + t1.start() + vm1.invoke(getClass, "waitForExecutor") + t1.join(30000) + val newID = vm1.invoke(getClass, "getMemoryManagerIdentity").asInstanceOf[Int] + assert(newID != oldID, "The MemoryManager instance has not changed as expected") + } + val value1 = vm1.invoke(getClass, "getMemoryForTable", "testDriverRestart").asInstanceOf[Long] + assert(value1 == 1000L, s"The storage for object should be 1000 rather than $value1") + } +} + +object MemoryManagerRestartDUnitTest { + + def getBootMemoryManagerSize(): Long = { + MemoryManagerCallback.bootMemoryManager. + asInstanceOf[SnappyUnifiedMemoryManager].memoryForObject.size() + } + + def waitForExecutor(): Unit = { + var l = 0L + while (SparkEnv.get eq null) { + if (l > 30000) throw new Exception(s"Executors did not start in 30 seconds") + Thread.sleep(500) + l += 500 + } + ExecutorInitiator.testWaitForExecutor() + } + + def failTheExecutors(): Unit = { + Utils.mapExecutors[Unit](sc, () => { + throw new OutOfMemoryError("Some Random message") // See SystemFailure.isJVMFailureError + }) + } + + private def sc = SnappyContext.globalSparkContext + + def getMemoryManagerIdentity(): Int = { + assert(SparkEnv.get != null, "Executor is still not initialized") + assert(SparkEnv.get.memoryManager.isInstanceOf[SnappyUnifiedMemoryManager]) + val memoryManager = SparkEnv.get.memoryManager.asInstanceOf[SnappyUnifiedMemoryManager] + System.identityHashCode(memoryManager) + } + + def getMemoryForTable(tableName: String): Long = { + assert(SparkEnv.get != null, "Executor is still not initialized") + assert(SparkEnv.get.memoryManager.isInstanceOf[SnappyUnifiedMemoryManager]) + val memoryManager = SparkEnv.get.memoryManager.asInstanceOf[SnappyUnifiedMemoryManager] + val mMap = memoryManager.memoryForObject + memoryManager.logStats() + var sum = 0L + mMap.forEachKeyValue(new ObjectLongProcedure[MemoryOwner] { + override def value(key: MemoryOwner, value: Long): Unit = { + if (key.owner.toLowerCase().contains(tableName.toLowerCase())) { + sum += value + } + } + }) + sum + } + + def allocateStorage(tableName: String, offHeap: Boolean, numBytes: Long): Boolean = { + assert(SparkEnv.get != null, "Executor is still not initialized") + assert(SparkEnv.get.memoryManager.isInstanceOf[SnappyUnifiedMemoryManager]) + val success = SparkEnv.get.memoryManager + .asInstanceOf[SnappyUnifiedMemoryManager] + .acquireStorageMemoryForObject(objectName = tableName, + blockId = MemoryManagerCallback.storageBlockId, + numBytes = numBytes, + memoryMode = if (offHeap) MemoryMode.OFF_HEAP else MemoryMode.ON_HEAP, + buffer = null, + shouldEvict = false) + + success + } +} diff --git a/cluster/src/dunit/scala/org/apache/spark/memory/SnappyUnifiedMemoryManagerDUnitTest.scala b/cluster/src/dunit/scala/org/apache/spark/memory/SnappyUnifiedMemoryManagerDUnitTest.scala new file mode 100644 index 0000000000..abf126018c --- /dev/null +++ b/cluster/src/dunit/scala/org/apache/spark/memory/SnappyUnifiedMemoryManagerDUnitTest.scala @@ -0,0 +1,566 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ +package org.apache.spark.memory + + +import java.sql.DriverManager +import java.util.Properties + +import com.gemstone.gemfire.internal.cache.{BucketRegion, GemFireCacheImpl, LocalRegion, PartitionedRegion} +import com.pivotal.gemfirexd.internal.engine.Misc +import com.pivotal.gemfirexd.internal.engine.distributed.utils.GemFireXDUtils +import com.pivotal.gemfirexd.internal.engine.store.GemFireStore +import io.snappydata.cluster.ClusterManagerTestBase +import io.snappydata.test.dunit.{SerializableRunnable, VM} +import org.eclipse.collections.api.block.procedure.primitive.ObjectLongProcedure + +import org.apache.spark.SparkEnv +import org.apache.spark.jdbc.{ConnectionConf, ConnectionConfBuilder, ConnectionUtil} +import org.apache.spark.memory.SnappyUnifiedMemoryManagerDUnitTest._ +import org.apache.spark.sql.SnappyContext +import org.apache.spark.sql.execution.columnar.impl.ColumnFormatRelation + +case class DummyData(col1: Int, col2: Int, col3: Int) + +class WaitAssert(val error: Int, clazz: Class[_]) { + + var value1 = 0L + var value2 = 0L + var excString = "" + + def assertStorageUsed(vm1: VM, vm2: VM, ignoreByteCount: Int = 0): Boolean = { + value1 = vm1.invoke(clazz, "getStorageMemory").asInstanceOf[Long] + value2 = vm2.invoke(clazz, "getStorageMemory").asInstanceOf[Long] + // println(s"vm1_memoryUsed $value1 vm2_memoryUsed $value2") + excString = s"failed $value1 & $value2 are not within permissable limit \n" + + if (value1 == value2) return true + if (value1 < value2) { + value1 += ignoreByteCount + } else { + value2 -= ignoreByteCount + } + if (Math.abs(value1 - value2) < ((value2 * error) / 100)) return true else false + + } + + def assertTableMemory(vm1: VM, vm2: VM, tableName: String): Boolean = { + value1 = vm1.invoke(clazz, "getMemoryForTable", tableName).asInstanceOf[Long] + value2 = vm2.invoke(clazz, "getMemoryForTable", tableName).asInstanceOf[Long] + // println(s"vm1_memoryUsed $value1 vm2_memoryUsed $value2") + excString = s"failed $value1 & $value2 are not within permissable limit \n" + + if (value1 == value2) return true + if (Math.abs(value1 - value2) < ((value2 * error) / 100)) return true else false + } + + def exceptionString(): String = excString +} + +class SnappyUnifiedMemoryManagerDUnitTest(s: String) extends ClusterManagerTestBase(s) { + + val col_table = "app.col_table" + val rr_table = "app.rr_table" + val memoryMode = MemoryMode.ON_HEAP + + bootProps.setProperty("default-startup-recovery-delay", "0"); + + override def beforeClass(): Unit = { + super.beforeClass() + val zeroStartupRecoveryDelay = new SerializableRunnable() { + override def run(): Unit = GemFireXDUtils.setDefaultStartupRecoveryDelay(0) + } + zeroStartupRecoveryDelay.run() + Array(vm0, vm1, vm2, vm3).foreach(_.invoke(zeroStartupRecoveryDelay)) + } + + override def afterClass(): Unit = { + super.afterClass() + val resetStartupRecoveryDelay = new SerializableRunnable() { + override def run(): Unit = GemFireXDUtils.setDefaultStartupRecoveryDelay(120000) + } + resetStartupRecoveryDelay.run() + Array(vm0, vm1, vm2, vm3).foreach(_.invoke(resetStartupRecoveryDelay)) + } + + override def setUp(): Unit = { + super.setUp() + LocalRegion.MAX_VALUE_BEFORE_ACQUIRE = 1 + cleanTestResources + } + + override def tearDown2(): Unit = { + cleanTestResources + super.tearDown2() + } + + private def cleanTestResources(): Unit = { + val snc = SnappyContext(sc).newSession() + snc.dropTable(col_table, ifExists = true) + snc.dropTable(rr_table, ifExists = true) + resetMemoryManagers + } + + def resetMemoryManagers(): Unit = { + vm0.invoke(getClass, "resetStorageMemory") + vm1.invoke(getClass, "resetStorageMemory") + vm2.invoke(getClass, "resetStorageMemory") + } + + def assertForWait(value1: Long, value2: Long, error: Int = 5): Boolean = { + if (value1 == value2) return true + if (Math.abs(value1 - value2) < ((value2 * error) / 100)) return true else false + } + + def testMemoryUsedInReplication(): Unit = { + val snc = newContext() + val data = for (i <- 1 to 500) yield (Seq(i, (i + 1), (i + 2))) + val rdd = snc.sparkContext.parallelize(data.toSeq, 2).map(s => + DummyData(s(0), s(1), s(2))) + val dataDF = snc.createDataFrame(rdd) + snc.createTable(rr_table, "row", dataDF.schema, Map.empty[String, String]) + setLocalRegionMaxTempMemory + dataDF.write.insertInto(rr_table) + val vm1_memoryUsed = vm1.invoke(getClass, "getStorageMemory").asInstanceOf[Long] + val vm2_memoryUsed = vm2.invoke(getClass, "getStorageMemory").asInstanceOf[Long] + assertApproximate(vm1_memoryUsed, vm2_memoryUsed) + } + + def testMemoryUsedInBucketRegions_RowTables(): Unit = { + val snc = newContext() + val data = for (i <- 1 to 500) yield (Seq(i, (i + 1), (i + 2))) + val rdd = snc.sparkContext.parallelize(data.toSeq, 2).map(s => + DummyData(s(0), s(1), s(2))) + val dataDF = snc.createDataFrame(rdd) + val options = "OPTIONS (BUCKETS '64', PARTITION_BY 'Col1', REDUNDANCY '2')" + snc.sql("CREATE TABLE " + rr_table + " (Col1 INT, Col2 INT, Col3 INT) " + " USING row " + + options + ) + setLocalRegionMaxTempMemory + dataDF.write.insertInto(rr_table) + + val vm1_memoryUsed = vm1.invoke(getClass, "getStorageMemory").asInstanceOf[Long] + val vm2_memoryUsed = vm2.invoke(getClass, "getStorageMemory").asInstanceOf[Long] + assertApproximate(vm1_memoryUsed, vm2_memoryUsed) + } + + def testMemoryUsedInBucketRegions_ColumnTables(): Unit = { + val snc = newContext() + val data = for (i <- 1 to 500) yield (Seq(i, (i + 1), (i + 2))) + val rdd = snc.sparkContext.parallelize(data.toSeq, 2).map(s => + DummyData(s(0), s(1), s(2))) + val dataDF = snc.createDataFrame(rdd) + val options = "OPTIONS (BUCKETS '64', PARTITION_BY 'Col1', REDUNDANCY '2')" + snc.sql("CREATE TABLE " + col_table + " (Col1 INT, Col2 INT, Col3 INT) " + " USING column " + + options + ) + setLocalRegionMaxTempMemory + dataDF.write.insertInto(col_table) + runOldEntriesCleanerThreadInAll + + val vm1_memoryUsed = vm1.invoke(getClass, "getStorageMemory").asInstanceOf[Long] + val vm2_memoryUsed = vm2.invoke(getClass, "getStorageMemory").asInstanceOf[Long] + assertApproximate(vm1_memoryUsed, vm2_memoryUsed) + } + + // Approximate because we include hash map size also, which can vary across VMs + def assertApproximate(value1: Long, value2: Long, error: Int = 5): Unit = { + if (value1 == value2) return + if (Math.abs(value1 - value2) > ((value2 * error) / 100)) { + // Error target should be 1 + throw new java.lang.AssertionError(s"assertion " + + s"failed $value1 & $value2 are not within permissable limit") + } + } + + /** + * This test checks column table memory usage when GII is done in a node. + * It checks memory usage with reference to the node which was alive at the time + * of GII. + */ + def testMemoryUsedInColumnTableWithGII(): Unit = { + + var props = bootProps.clone().asInstanceOf[java.util.Properties] + val port = ClusterManagerTestBase.locPort + + vm1.invoke(classOf[ClusterManagerTestBase], "stopAny") + val snc = newContext() + val data = for (i <- 1 to 500) yield (Seq(i, (i + 1), (i + 2))) + val rdd = snc.sparkContext.parallelize(data, 2).map(s => + DummyData(s(0), s(1), s(2))) + val dataDF = snc.createDataFrame(rdd) + + val options = "OPTIONS (BUCKETS '1', PARTITION_BY 'Col1', REDUNDANCY '2')" + snc.sql("CREATE TABLE " + col_table + " (Col1 INT, Col2 INT, Col3 INT) " + " USING column " + + options + ) + setLocalRegionMaxTempMemory + dataDF.write.insertInto(col_table) + + vm1.invoke(restartServerRunnable(props, port)) + // val externalTableName = ColumnFormatRelation.columnBatchTableName(col_table) + vm1.invoke(waitForRegionInit(col_table)) + + runOldEntriesCleanerThreadInAll + val waitAssert = new WaitAssert(10, getClass) + ClusterManagerTestBase.waitForCriterion(waitAssert.assertTableMemory(vm1, vm2, "col__table"), + waitAssert.exceptionString(), + 20000, 5000, true) + } + + /** + * This test checks replicated table memory usage when GII is done in a node. + * It checks memory usage with reference to the node which was alive at the time + * of GII. + */ + def testMemoryUsedInReplicatedTableWithGII(): Unit = { + + var props = bootProps.clone().asInstanceOf[java.util.Properties] + val port = ClusterManagerTestBase.locPort + + vm1.invoke(classOf[ClusterManagerTestBase], "stopAny") + val snc = newContext() + val data = for (i <- 1 to 50) yield (Seq(i, (i + 1), (i + 2))) + val rdd = snc.sparkContext.parallelize(data, 2).map(s => + DummyData(s(0), s(1), s(2))) + val dataDF = snc.createDataFrame(rdd) + + snc.createTable(rr_table, "row", dataDF.schema, Map.empty[String, String]) + setLocalRegionMaxTempMemory + dataDF.write.insertInto(rr_table) + vm1.invoke(restartServerRunnable(props, port)) + + val waitAssert = new WaitAssert(10, getClass) + ClusterManagerTestBase.waitForCriterion(waitAssert.assertTableMemory(vm1, vm2, "rr__table"), + waitAssert.exceptionString(), + 20000, 5000, true) + } + + /** + * This test checks row partitioned table memory usage when GII is done in a node. + * It checks memory usage with reference to the node which was alive at the time + * of GII. + */ + def testMemoryUsedInRowPartitionedTableWithGII(): Unit = { + + val props = bootProps.clone().asInstanceOf[java.util.Properties] + val port = ClusterManagerTestBase.locPort + + vm1.invoke(classOf[ClusterManagerTestBase], "stopAny") + val snc = newContext() + val data = for (i <- 1 to 50) yield (Seq(i, (i + 1), (i + 2))) + val rdd = snc.sparkContext.parallelize(data, 2).map(s => + DummyData(s(0), s(1), s(2))) + val dataDF = snc.createDataFrame(rdd) + + val options = "OPTIONS (BUCKETS '1', PARTITION_BY 'Col1', REDUNDANCY '2')" + snc.sql("CREATE TABLE " + rr_table + " (Col1 INT, Col2 INT, Col3 INT) " + " USING row " + + options + ) + setLocalRegionMaxTempMemory + dataDF.write.insertInto(rr_table) + vm1.invoke(restartServerRunnable(props, port)) + vm1.invoke(waitForRegionInit(rr_table)) + + val waitAssert = new WaitAssert(10, getClass) + ClusterManagerTestBase.waitForCriterion(waitAssert.assertTableMemory(vm1, vm2, "rr__table"), + waitAssert.exceptionString(), + 20000, 5000, true) + } + + @throws[Exception] + protected def waitForRegionInit(tableName: String): SerializableRunnable = { + new SerializableRunnable() { + def run() { + val regionName = Misc.getRegionPath(tableName).toUpperCase + while (!Misc.initialDDLReplayDone()) Thread.sleep(100) + val cache = GemFireCacheImpl.getInstance + val pr = cache.getRegion(regionName).asInstanceOf[PartitionedRegion] + while (!pr.getRegionAdvisor.areBucketsInitialized) Thread.sleep(100) + while (!pr.getRegionAdvisor.getBucket(0).isInstanceOf[BucketRegion]) Thread.sleep(100) + val lr = pr.getRegionAdvisor.getBucket(0).asInstanceOf[LocalRegion] + lr.waitOnInitialization() + } + } + } + + @throws[Exception] + protected def readData(tableName: String, numColumns: Int, + numBuckets: Int): SerializableRunnable = { + new SerializableRunnable() { + def run() { + assert(GemFireStore.getBootedInstance ne null) + val conn = DriverManager.getConnection("jdbc:snappydata:") + val stmt = conn.createStatement() + val columnTable = ColumnFormatRelation.columnBatchTableName(tableName.toUpperCase) + stmt.execute(s"CALL SYS.SET_BUCKETS_FOR_LOCAL_EXECUTION('$columnTable', " + + s"'${(0 until numBuckets).mkString(",")}', -1)") + val rs = stmt.executeQuery(s"CALL SYS.COLUMN_TABLE_SCAN('$columnTable', " + + s"'${(1 to numColumns).mkString(",")}', null)") + var n = 0 + while (rs.next()) { + n += 1 + } + rs.close() + assert(n > 0, s"expected non-zero batches") + } + } + } + + /** + * This test checks row partitioned table memory usage when GII is done in a node. + * It checks memory usage with reference to the node which was alive at the time + * of GII. At the same time we fire deletes on the region. + */ + def testMemoryUsedInReplicationParTableGIIWithDeletes(): Unit = { + + val props = bootProps.clone().asInstanceOf[java.util.Properties] + val port = ClusterManagerTestBase.locPort + + vm1.invoke(classOf[ClusterManagerTestBase], "stopAny") + val snc = newContext() + val data = for (i <- 1 to 50) yield (Seq(i, (i + 1), (i + 2))) + val rdd = snc.sparkContext.parallelize(data.toSeq, 2).map(s => + DummyData(s(0), s(1), s(2))) + val dataDF = snc.createDataFrame(rdd) + + val options = "OPTIONS (BUCKETS '1', PARTITION_BY 'Col1', REDUNDANCY '2')" + snc.sql("CREATE TABLE " + rr_table + " (Col1 INT, Col2 INT, Col3 INT) " + " USING row " + + options + ) + setLocalRegionMaxTempMemory + dataDF.write.insertInto(rr_table) + + val otherExecutorThread = new Thread(new Runnable { + + def run() { + (1 to 10).map(i => snc.delete(rr_table, s"col1=$i")) + } + }) + otherExecutorThread.start() + + vm1.invoke(restartServerRunnable(props, port)) + vm1.invoke(waitForRegionInit(rr_table)) + + val waitAssert = new WaitAssert(10, getClass) + // The delete operation takes time to propagate + ClusterManagerTestBase.waitForCriterion(waitAssert.assertTableMemory(vm1, vm2, "rr__table"), + waitAssert.exceptionString(), + 60000, 5000, true) + } + + def testMemoryAfterRecovery_ColumnTable(): Unit = { + + val props = bootProps.clone().asInstanceOf[java.util.Properties] + val port = ClusterManagerTestBase.locPort + + val snc = newContext() + + val data = for (i <- 1 to 500) yield (Seq(i, (i + 1), (i + 2))) + val rdd = snc.sparkContext.parallelize(data, 2).map(s => + DummyData(s(0), s(1), s(2))) + val dataDF = snc.createDataFrame(rdd) + val options = "OPTIONS (BUCKETS '4', PARTITION_BY 'Col1'," + + " PERSISTENT 'SYNCHRONOUS', REDUNDANCY '2')" + snc.sql("CREATE TABLE " + col_table + " (Col1 INT, Col2 INT, Col3 INT) " + " USING column " + + options + ) + setLocalRegionMaxTempMemory + dataDF.write.insertInto(col_table) + + vm1.invoke(classOf[ClusterManagerTestBase], "stopAny") + + vm1.invoke(restartServerRunnable(props, port)) + vm1.invoke(waitForRegionInit(col_table)) + runOldEntriesCleanerThreadInAll + vm1.invoke(readData(col_table, 3, 4)) + val waitAssert = new WaitAssert(10, getClass) + ClusterManagerTestBase.waitForCriterion(waitAssert.assertTableMemory(vm1, vm2, "col__table"), + waitAssert.exceptionString(), + 30000, 5000, true) + + } + + def newContext(): SnappyContext = { + val snc = SnappyContext(sc).newSession() + snc.setConf(io.snappydata.Property.ColumnBatchSize.name, "500") + snc + } + + def runOldEntriesCleanerThreadInAll(): Unit = { + val runOldEntriesCleanerThread = new SerializableRunnable() { + override def run(): Unit = Misc.getGemFireCache.runOldEntriesCleanerThread() + } + Array(vm1, vm2).foreach(_.invoke(runOldEntriesCleanerThread)) + } + + private def restartServerRunnable(props: Properties, port: Int): SerializableRunnable = { + new SerializableRunnable() { + override def run(): Unit = { + ClusterManagerTestBase.startSnappyServer(port, props) + ClusterManagerTestBase.waitForCriterion(SparkEnv.get != null, + "Executor Service did not start in specified time ", 20000, 5000, true) + } + } + } + + def testMemoryAfterRecovery_RowTable(): Unit = { + + val props = bootProps.clone().asInstanceOf[java.util.Properties] + val port = ClusterManagerTestBase.locPort + + val snc = newContext() + + val data = for (i <- 1 to 500) yield (Seq(i, (i + 1), (i + 2))) + val rdd = snc.sparkContext.parallelize(data.toSeq, 4).map(s => + DummyData(s(0), s(1), s(2))) + val dataDF = snc.createDataFrame(rdd) + + val options = "OPTIONS (BUCKETS '4', PARTITION_BY 'Col1'," + + " PERSISTENT 'SYNCHRONOUS', REDUNDANCY '2')" + snc.sql("CREATE TABLE " + rr_table + " (Col1 INT, Col2 INT, Col3 INT) " + " USING row " + + options + ) + setLocalRegionMaxTempMemory + dataDF.write.insertInto(rr_table) + + vm1.invoke(classOf[ClusterManagerTestBase], "stopAny") + vm1.invoke(restartServerRunnable(props, port)) + vm1.invoke(waitForRegionInit(rr_table)) + val waitAssert = new WaitAssert(10, getClass) + ClusterManagerTestBase.waitForCriterion(waitAssert.assertTableMemory(vm1, vm2, "rr__table"), + waitAssert.exceptionString(), + 30000, 5000, true) + } + + def testMemoryAfterRebalance_ColumnTable(): Unit = { + val props = bootProps.clone().asInstanceOf[java.util.Properties] + val port = ClusterManagerTestBase.locPort + + def rebalance(conf: ConnectionConf): SerializableRunnable = new SerializableRunnable() { + override def run(): Unit = { + val conn = ConnectionUtil.getConnection(conf) + val stmt = conn.createStatement + stmt.execute("call sys.rebalance_all_buckets()") + } + } + + val snc = newContext() + val conf = new ConnectionConfBuilder(snc.snappySession).build() + + vm1.invoke(classOf[ClusterManagerTestBase], "stopAny") + val data = for (i <- 1 to 500) yield (Seq(i, (i + 1), (i + 2))) + val rdd = snc.sparkContext.parallelize(data.toSeq, 2).map(s => + DummyData(s(0), s(1), s(2))) + val dataDF = snc.createDataFrame(rdd) + + val options = "OPTIONS (BUCKETS '4', PARTITION_BY 'Col1'," + + " PERSISTENT 'SYNCHRONOUS', REDUNDANCY '2')" + snc.sql("CREATE TABLE " + col_table + " (Col1 INT, Col2 INT, Col3 INT) " + " USING column " + + options + ) + + setLocalRegionMaxTempMemory + dataDF.write.insertInto(col_table) + + vm1.invoke(restartServerRunnable(props, port)) + runOldEntriesCleanerThreadInAll + vm1.invoke(rebalance(conf)) + + val waitAssert = new WaitAssert(10, getClass) + // The delete operation takes time to propagate + ClusterManagerTestBase.waitForCriterion(waitAssert.assertTableMemory(vm1, vm2, "col__table"), + waitAssert.exceptionString(), + 30000, 5000, true) + + } +} + +object SnappyUnifiedMemoryManagerDUnitTest { + val memoryMode = MemoryMode.ON_HEAP + + def resetStorageMemory(): Unit = { + if (SparkEnv.get != null) { + SparkEnv.get.memoryManager.releaseAllStorageMemory + if (SparkEnv.get.memoryManager.isInstanceOf[SnappyUnifiedMemoryManager]) { + val umm = SparkEnv.get.memoryManager + .asInstanceOf[SnappyUnifiedMemoryManager] + if (umm.memoryForObject ne null) { + umm.memoryForObject.clear() + } + MemoryManagerCallback.resetMemoryManager() + } + } + } + + def getStorageMemory(): Long = { + if (SparkEnv.get != null) { + SparkEnv.get.memoryManager.storageMemoryUsed + } else { + -1L + } + + } + + def getMemoryForTable(tableName: String): Long = { + if (SparkEnv.get != null) { + if (SparkEnv.get.memoryManager.isInstanceOf[SnappyUnifiedMemoryManager]) { + val mMap = SparkEnv.get.memoryManager + .asInstanceOf[SnappyUnifiedMemoryManager].memoryForObject + SparkEnv.get.memoryManager + .asInstanceOf[SnappyUnifiedMemoryManager].logStats() + var sum = 0L + mMap.forEachKeyValue(new ObjectLongProcedure[MemoryOwner] { + override def value(key: MemoryOwner, value: Long): Unit = { + if (key.owner.toLowerCase().contains(tableName.toLowerCase())) { + sum += value + } + } + }) + sum + } else { + -1L + } + } else { + -1L + } + } + + def failAllExecutors: Unit = { + try { + failTheExecutors + } catch { + case _: Throwable => + } + Thread.sleep(1000) + } + + def failTheExecutors: Unit = { + sc.parallelize(1 until 100, 5).map { i => + throw new InternalError() + }.collect() + } + + def setLocalRegionMaxTempMemory: Unit = { + sc.parallelize(1 until 100, 5).map { i => + LocalRegion.MAX_VALUE_BEFORE_ACQUIRE = 1 + System.setProperty("snappydata.umm.memtrace", "true") + }.collect() + } + + private def sc = SnappyContext.globalSparkContext +} diff --git a/cluster/src/dunit/scala/org/apache/spark/sql/ColumnBatchScanDUnitTest.scala b/cluster/src/dunit/scala/org/apache/spark/sql/ColumnBatchScanDUnitTest.scala new file mode 100644 index 0000000000..884cca0672 --- /dev/null +++ b/cluster/src/dunit/scala/org/apache/spark/sql/ColumnBatchScanDUnitTest.scala @@ -0,0 +1,284 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ +package org.apache.spark.sql + + +import io.snappydata.Property +import io.snappydata.cluster.ClusterManagerTestBase + +case class TestRecord(col1: Int, col2: Int, col3: Int) + +class ColumnBatchScanDUnitTest(s: String) extends ClusterManagerTestBase(s) { + + def _testColumnBatchSkipping(): Unit = { + + val snc = SnappyContext(sc) + val ddlStr = "YearI INT NOT NULL," + + "MonthI INT NOT NULL," + + "DayOfMonth INT NOT NULL," + + "DepTime INT," + + "ArrDelay INT," + + "UniqueCarrier CHAR(6) NOT NULL" + + // reduce the batch size to ensure that multiple are created + + snc.sql(s"create table if not exists airline ($ddlStr) " + + s" using column options (Buckets '2', COLUMN_BATCH_SIZE '400')") + + import snc.implicits._ + + val ds = snc.createDataset(sc.range(1, 101).map(i => + AirlineData(2015, 2, 15, 1002, i.toInt, "AA" + i))) + ds.write.insertInto("airline") + + // ***Check for the case when all the column batches are scanned **** + var previousExecutionIds = snc.sharedState.listener.executionIdToData.keySet + + val df_allColumnBatchesScan = snc.sql( + "select AVG(ArrDelay) arrivalDelay, UniqueCarrier carrier " + + "from AIRLINE where ArrDelay < 101 " + + "group by UniqueCarrier order by arrivalDelay") + + df_allColumnBatchesScan.count() + + var executionIds = + snc.sharedState.listener.executionIdToData.keySet.diff(previousExecutionIds) + + var executionId = executionIds.head + + val (scanned1, skipped1) = + findColumnBatchStats(df_allColumnBatchesScan, snc.snappySession, executionId) + assert(skipped1 == 0, "All Column batches should have been scanned") + assert(scanned1 > 0, "All Column batches should have been scanned") + + // ***Check for the case when all the column batches are skipped**** + previousExecutionIds = snc.sharedState.listener.executionIdToData.keySet + + val df_noColumnBatchesScan = snc.sql( + "select AVG(ArrDelay) arrivalDelay, UniqueCarrier carrier " + + "from AIRLINE where ArrDelay > 101 " + + "group by UniqueCarrier order by arrivalDelay") + + df_noColumnBatchesScan.count() + + executionIds = + snc.sharedState.listener.executionIdToData.keySet.diff(previousExecutionIds) + + executionId = executionIds.head + + val (scanned2, skipped2) = + findColumnBatchStats(df_allColumnBatchesScan, snc.snappySession, executionId) + assert(scanned2 == skipped2, "No Column batches should have been scanned") + assert(skipped2 > 0, "No Column batches should have been scanned") + + // ***Check for the case when some of the column batches are scanned **** + previousExecutionIds = snc.sharedState.listener.executionIdToData.keySet + + val df_someColumnBatchesScan = snc.sql( + "select AVG(ArrDelay) arrivalDelay, UniqueCarrier carrier " + + "from AIRLINE where ArrDelay < 20 " + + "group by UniqueCarrier order by arrivalDelay") + + df_someColumnBatchesScan.count() + + executionIds = + snc.sharedState.listener.executionIdToData.keySet.diff(previousExecutionIds) + + executionId = executionIds.head + + val (scanned3, skipped3) = + findColumnBatchStats(df_allColumnBatchesScan, snc.snappySession, executionId) + + assert(skipped3 > 0, "Some Column batches should have been skipped") + assert(scanned3 != skipped3, "Some Column batches should have been skipped - comparison") + + // check for StartsWith predicate with MAX/MIN handling + + // first all batches chosen + previousExecutionIds = snc.sharedState.listener.executionIdToData.keySet + + val df_allColumnBatchesLikeScan = snc.sql( + "select AVG(ArrDelay) arrivalDelay, UniqueCarrier carrier " + + "from AIRLINE where UniqueCarrier like 'AA%' " + + "group by UniqueCarrier order by arrivalDelay") + + var count = df_allColumnBatchesLikeScan.count() + assert(count == 100, s"Unexpected count = $count, expected 100") + + executionIds = + snc.sharedState.listener.executionIdToData.keySet.diff(previousExecutionIds) + + executionId = executionIds.head + + val (scanned4, skipped4) = + findColumnBatchStats(df_allColumnBatchesLikeScan, snc.snappySession, executionId) + + assert(skipped4 == 0, "No Column batches should have been skipped") + assert(scanned4 > 0, "All Column batches should have been scanned") + + // next some batches skipped + previousExecutionIds = snc.sharedState.listener.executionIdToData.keySet + + val df_someColumnBatchesLikeScan = snc.sql( + "select AVG(ArrDelay) arrivalDelay, UniqueCarrier carrier " + + "from AIRLINE where UniqueCarrier like 'AA1%' " + + "group by UniqueCarrier order by arrivalDelay") + + count = df_someColumnBatchesLikeScan.count() + assert(count == 12, s"Unexpected count = $count, expected 12") + + executionIds = + snc.sharedState.listener.executionIdToData.keySet.diff(previousExecutionIds) + + executionId = executionIds.head + + val (scanned5, skipped5) = + findColumnBatchStats(df_someColumnBatchesLikeScan, snc.snappySession, executionId) + + assert(skipped5 > 0, "Some Column batches should have been skipped") + assert(scanned5 != skipped5, "Some Column batches should have been skipped - comparison") + + // last all batches skipped + previousExecutionIds = snc.sharedState.listener.executionIdToData.keySet + + val df_noColumnBatchesLikeScan = snc.sql( + "select AVG(ArrDelay) arrivalDelay, UniqueCarrier carrier " + + "from AIRLINE where UniqueCarrier like 'AA0%' " + + "group by UniqueCarrier order by arrivalDelay") + + count = df_noColumnBatchesLikeScan.count() + assert(count == 0, s"Unexpected count = $count, expected 0") + + executionIds = + snc.sharedState.listener.executionIdToData.keySet.diff(previousExecutionIds) + + executionId = executionIds.head + + val (scanned6, skipped6) = + findColumnBatchStats(df_noColumnBatchesLikeScan, snc.snappySession, executionId) + + assert(scanned6 == skipped6, "No Column batches should have been returned") + assert(skipped6 > 0, "No Column batches should have been returned") + } + + private def findColumnBatchStats(df: DataFrame, + sc: SnappySession, executionId: Long): (Long, Long) = { + + val metricValues = sc.sharedState.listener.getExecutionMetrics(executionId) + val a = (sc.sharedState.listener.getRunningExecutions ++ + sc.sharedState.listener.getCompletedExecutions).filter(x => { + x.executionId == executionId + }) + val seenid = a.head.accumulatorMetrics.filter(x => { + x._2.name == "column batches seen" + }).head._1 + val skippedid = a.head.accumulatorMetrics.filter(x => { + x._2.name == "column batches skipped by the predicate" + }).head._1 + + (metricValues.filter(_._1 == seenid).head._2.toInt, + metricValues.filter(_._1 == skippedid).head._2.toInt) + } + + + def testCreateColumnTablesFromOtherTables(): Unit = { + val tempRowTableProps = "BUCKETS '16', PARTITION_BY 'COL2'" + executeTestWithOptions(Map("BUCKETS" -> "8", "PARTITION_BY" -> "COL1", "REDUNDANCY" -> "1"), + Map.empty, tempRowTableProps) + executeTestWithOptions(Map.empty, Map("BUCKETS" -> "16"), tempRowTableProps, + "BUCKETS '8', PARTITION_BY 'COL1', REDUNDANCY '1'") + } + + def executeTestWithOptions(rowTableOptions: Map[String, String] = Map.empty[String, String], + colTableOptions: Map[String, String] = Map.empty[String, String], + tempRowTableOptions: String = "", tempColTableOptions: String = ""): Unit = { + + val snc = SnappyContext(sc) + val rowTable = "rowTable" + val colTable = "colTable" + + + snc.sql("DROP TABLE IF EXISTS " + rowTable) + snc.sql("DROP TABLE IF EXISTS " + colTable) + Property.ColumnBatchSize.set(snc.sessionState.conf, "30k") + val rdd = sc.parallelize( + (1 to 113999).map(i => TestRecord(i, i + 1, i + 2))) + val dataDF = snc.createDataFrame(rdd) + + snc.createTable(rowTable, "row", dataDF.schema, rowTableOptions) + dataDF.write.insertInto(rowTable) + + snc.createTable(colTable, "column", dataDF.schema, colTableOptions) + dataDF.write.format("column").mode(SaveMode.Append).options(colTableOptions) + .saveAsTable(colTable) + + val tempRowTableName = "testRowTable" + val tempColTableName = "testcolTable" + + + snc.sql("DROP TABLE IF EXISTS " + tempRowTableName) + snc.sql(s"CREATE TABLE " + tempRowTableName + s" using row options($tempRowTableOptions) AS" + + s" (SELECT col1 ,col2 FROM " + rowTable + ")") + val testResults1 = snc.sql("SELECT * FROM " + tempRowTableName).collect() + assert(testResults1.length == 113999, s"Expected row count is 113999 while actual count is " + + s"${testResults1.length}") + + + snc.sql("DROP TABLE IF EXISTS " + tempRowTableName) + snc.sql("CREATE TABLE " + tempRowTableName + s" using row options($tempRowTableOptions) AS " + + s"(SELECT col1 ,col2 FROM " + colTable + ")") + val testResults2 = snc.sql("SELECT * FROM " + tempRowTableName).collect() + assert(testResults2.length == 113999, s"Expected row count is 113999 while actual count is " + + s"${testResults2.length}") + + snc.sql("DROP TABLE IF EXISTS " + tempColTableName) + snc.sql("CREATE TABLE " + tempColTableName + s" USING COLUMN OPTIONS($tempColTableOptions) " + + s"AS (SELECT col1 ,col2 FROM " + tempRowTableName + ")") + + val testResults3 = snc.sql("SELECT * FROM " + tempColTableName).collect() + assert(testResults3.length == 113999, s"Expected row count is 113999 while actual count is " + + s"${testResults3.length}") + + snc.sql("DROP TABLE IF EXISTS " + tempColTableName) + snc.sql("CREATE TABLE " + tempColTableName + s" USING COLUMN OPTIONS($tempColTableOptions) " + + s"AS (SELECT col1 ,col2 FROM " + colTable + ")") + + val testResults4 = snc.sql("SELECT * FROM " + tempColTableName).collect() + assert(testResults4.length == 113999, s"Expected row count is 113999 while actual count is" + + s"${testResults4.length}") + + + snc.sql("DROP TABLE IF EXISTS " + tempColTableName) + snc.sql("CREATE TABLE " + tempColTableName + s" USING COLUMN OPTIONS($tempColTableOptions) " + + s"AS (SELECT t1.col1 ,t1.col2 FROM " + colTable + " t1," + rowTable + + " t2 where t1.col1=t2.col2)") + + // Expected count will be 113998 as first row will not match + val testResults5 = snc.sql("SELECT * FROM " + tempColTableName).collect() + assert(testResults5.length == 113998, s"Expected row count is 113998 while actual count is" + + s"${testResults5.length}") + + snc.sql("DROP TABLE IF EXISTS " + tempColTableName) + snc.sql("DROP TABLE IF EXISTS " + tempRowTableName) + + snc.sql("DROP TABLE IF EXISTS " + rowTable) + snc.sql("DROP TABLE IF EXISTS " + colTable) + } +} + +case class AirlineData(year: Int, month: Int, dayOfMonth: Int, + depTime: Int, arrDelay: Int, carrier: String) diff --git a/cluster/src/dunit/scala/org/apache/spark/sql/NorthWindDUnitTest.scala b/cluster/src/dunit/scala/org/apache/spark/sql/NorthWindDUnitTest.scala new file mode 100644 index 0000000000..54d64cca85 --- /dev/null +++ b/cluster/src/dunit/scala/org/apache/spark/sql/NorthWindDUnitTest.scala @@ -0,0 +1,1100 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ +package org.apache.spark.sql + +import java.io.{File, FileOutputStream, PrintWriter} +import java.sql.{ResultSet, Statement} + +import scala.io.Source + +import io.snappydata.cluster.ClusterManagerTestBase +import io.snappydata.test.dunit.AvailablePortHelper + +import org.apache.spark.TaskContext +import org.apache.spark.sql.catalyst.encoders.RowEncoder +import org.apache.spark.sql.collection.Utils +import org.apache.spark.sql.execution.columnar.ColumnTableScan +import org.apache.spark.sql.execution.joins._ +import org.apache.spark.sql.execution.row.RowTableScan +import org.apache.spark.sql.execution.{FilterExec, ProjectExec} + +class NorthWindDUnitTest(s: String) extends ClusterManagerTestBase(s) { + + override val locatorNetPort: Int = AvailablePortHelper.getRandomAvailableTCPPort + protected val productDir: String = SmartConnectorFunctions.getEnvironmentVariable("SNAPPY_HOME") + override val stopNetServersInTearDown = false + + + override def beforeClass(): Unit = { + super.beforeClass() + startNetworkServersOnAllVMs() + vm3.invoke(classOf[ClusterManagerTestBase], "startSparkCluster", productDir) + } + + override def afterClass(): Unit = { + Array(vm3, vm2, vm1, vm0).foreach(_.invoke(getClass, "stopNetworkServers")) + ClusterManagerTestBase.stopNetworkServers() + super.afterClass() + Array(vm0, vm1, vm2).foreach(_.invoke(classOf[ClusterManagerTestBase], + "validateNoActiveSnapshotTX")) + vm3.invoke(classOf[ClusterManagerTestBase], "stopSparkCluster", productDir) + } + + def testReplicatedTableQueries(): Unit = { + val snc = SnappyContext(sc) + val sqlContext = new SparkSession(sc).sqlContext + val pw = new PrintWriter(new FileOutputStream( + new File("ValidateNWQueries_ReplicatedTable.out"), true)) + try { + NorthWindDUnitTest.createAndLoadReplicatedTables(snc) + NorthWindDUnitTest.createAndLoadSparkTables(sqlContext) + // validateReplicatedTableQueries(snc) + NorthWindDUnitTest.validateQueriesFullResultSet(snc, "ReplicatedTable", pw, sqlContext) + } finally { + pw.close() + } + } + + def testPartitionedRowTableQueries(): Unit = { + val snc = SnappyContext(sc) + val sqlContext = new SparkSession(sc).sqlContext + val pw = new PrintWriter(new FileOutputStream( + new File("ValidateNWQueries_PartitionedRowTable.out"), true)) + try { + createAndLoadPartitionedTables(snc) + NorthWindDUnitTest.createAndLoadSparkTables(sqlContext) + // validatePartitionedRowTableQueries(snc) + NorthWindDUnitTest.validateQueriesFullResultSet(snc, "PartitionedRowTable", pw, sqlContext) + } finally { + pw.close() + } + } + + def testPartitionedColumnTableQueries(): Unit = { + val snc = SnappyContext(sc) + val sqlContext = new SparkSession(sc).sqlContext + val pw = new PrintWriter(new FileOutputStream( + new File("ValidateNWQueries_ColumnTable.out"), true)) + try { + NorthWindDUnitTest.createAndLoadColumnTables(snc) + NorthWindDUnitTest.createAndLoadSparkTables(sqlContext) + // validatePartitionedColumnTableQueries(snc) + NorthWindDUnitTest.validateQueriesFullResultSet(snc, "ColumnTable", pw, sqlContext) + + // verify the colocated table queries in smart connector mode + val params = Array(locatorNetPort, "ColumnTable"). + asInstanceOf[Array[AnyRef]] + vm3.invoke(classOf[SmartConnectorFunctions], "nwQueryValidationOnConnector", params) + } finally { + pw.close() + } + } + + def testColocatedTableQueries(): Unit = { + val snc = SnappyContext(sc) + val sqlContext = new SparkSession(sc).sqlContext + val pw = new PrintWriter(new FileOutputStream( + new File("ValidateNWQueries_ColocatedTable.out"), true)) + try { + NorthWindDUnitTest.createAndLoadColocatedTables(snc) + NorthWindDUnitTest.createAndLoadSparkTables(sqlContext) + // validateColocatedTableQueries(snc) + + NorthWindDUnitTest.validateQueriesFullResultSet(snc, "ColocatedTable", pw, sqlContext) + + // verify the colocated table queries in smart connector mode + val params = Array(locatorNetPort, "ColocatedTable"). + asInstanceOf[Array[AnyRef]] + vm3.invoke(classOf[SmartConnectorFunctions], "nwQueryValidationOnConnector", params) + } finally { + pw.close() + } + } + + def testInsertionOfRecordInColumnTable(): Unit = { + val snc = SnappyContext(sc) + val netPort = AvailablePortHelper.getRandomAvailableTCPPort + vm2.invoke(classOf[ClusterManagerTestBase], "startNetServer", netPort) + val conn = getANetConnection(netPort) + + val s = conn.createStatement() + createAndLoadColumnTableUsingJDBC(s, snc) + val rs: ResultSet = s.executeQuery(s"SELECT * from products") + assert(rs.next()) + conn.close() + } + + private lazy val totalProcessors = Utils.mapExecutors[Int](sc, () => + Iterator(Runtime.getRuntime.availableProcessors())).sum + + private def validateReplicatedTableQueries(snc: SnappyContext): Unit = { + for (q <- NWQueries.queries) { + q._1 match { + case "Q1" => NWQueries.assertQuery(snc, NWQueries.Q1, "Q1", 8, 1, classOf[RowTableScan]) + case "Q2" => NWQueries.assertQuery(snc, NWQueries.Q2, "Q2", 91, 1, classOf[RowTableScan]) + case "Q3" => NWQueries.assertQuery(snc, NWQueries.Q3, "Q3", 830, 1, classOf[RowTableScan]) + case "Q4" => NWQueries.assertQuery(snc, NWQueries.Q4, "Q4", 9, 1, classOf[RowTableScan]) + case "Q5" => NWQueries.assertQuery(snc, NWQueries.Q5, "Q5", 9, 1, classOf[RowTableScan]) + case "Q6" => NWQueries.assertQuery(snc, NWQueries.Q6, "Q6", 9, 1, classOf[RowTableScan]) + case "Q7" => NWQueries.assertQuery(snc, NWQueries.Q7, "Q7", 9, 1, classOf[RowTableScan]) + case "Q8" => NWQueries.assertQuery(snc, NWQueries.Q8, "Q8", 6, 1, classOf[FilterExec]) + case "Q9" => NWQueries.assertQuery(snc, NWQueries.Q9, "Q9", 3, 1, classOf[ProjectExec]) + case "Q10" => NWQueries.assertQuery(snc, NWQueries.Q10, "Q10", 2, 1, classOf[FilterExec]) + case "Q11" => NWQueries.assertQuery(snc, NWQueries.Q11, "Q11", 4, 1, classOf[ProjectExec]) + case "Q12" => NWQueries.assertQuery(snc, NWQueries.Q12, "Q12", 2, 1, classOf[FilterExec]) + case "Q13" => NWQueries.assertQuery(snc, NWQueries.Q13, "Q13", 2, 1, classOf[FilterExec]) + case "Q14" => NWQueries.assertQuery(snc, NWQueries.Q14, "Q14", 69, 1, classOf[FilterExec]) + case "Q15" => NWQueries.assertQuery(snc, NWQueries.Q15, "Q15", 5, 1, classOf[FilterExec]) + case "Q16" => NWQueries.assertQuery(snc, NWQueries.Q16, "Q16", 8, 1, classOf[FilterExec]) + case "Q17" => NWQueries.assertQuery(snc, NWQueries.Q17, "Q17", 3, 1, classOf[FilterExec]) + case "Q18" => NWQueries.assertQuery(snc, NWQueries.Q18, "Q18", 9, 1, classOf[ProjectExec]) + case "Q19" => NWQueries.assertQuery(snc, NWQueries.Q19, "Q19", 13, 1, classOf[ProjectExec]) + case "Q20" => NWQueries.assertQuery(snc, NWQueries.Q20, "Q20", 1, 1, classOf[ProjectExec]) + case "Q21" => NWQueries.assertQuery(snc, NWQueries.Q21, "Q21", 1, 1, classOf[RowTableScan]) + case "Q22" => NWQueries.assertQuery(snc, NWQueries.Q22, "Q22", 1, 1, classOf[ProjectExec]) + case "Q23" => NWQueries.assertQuery(snc, NWQueries.Q23, "Q23", 1, 1, classOf[RowTableScan]) + case "Q24" => NWQueries.assertQuery(snc, NWQueries.Q24, "Q24", 4, 1, classOf[ProjectExec]) + case "Q25" => NWQueries.assertJoin(snc, NWQueries.Q25, "Q25", 1, 1, classOf[RowTableScan]) + case "Q26" => NWQueries.assertJoin(snc, NWQueries.Q26, "Q26", 86, 1, + classOf[SortMergeJoinExec]) + case "Q27" => NWQueries.assertJoin(snc, NWQueries.Q27, "Q27", 9, 1, + classOf[SortMergeJoinExec]) + case "Q28" => NWQueries.assertJoin(snc, NWQueries.Q28, "Q28", 12, 1, classOf[RowTableScan]) + case "Q29" => NWQueries.assertJoin(snc, NWQueries.Q29, "Q29", 8, 1, + classOf[SortMergeJoinExec]) + case "Q30" => NWQueries.assertJoin(snc, NWQueries.Q30, "Q30", 8, 1, + classOf[SortMergeJoinExec]) + case "Q31" => NWQueries.assertJoin(snc, NWQueries.Q31, "Q31", 830, 1, classOf[HashJoinExec]) + case "Q32" => NWQueries.assertJoin(snc, NWQueries.Q32, "Q32", 8, 1, classOf[HashJoinExec]) + case "Q33" => NWQueries.assertJoin(snc, NWQueries.Q33, "Q33", 37, 1, classOf[HashJoinExec]) + case "Q34" => NWQueries.assertJoin(snc, NWQueries.Q34, "Q34", 5, 1, classOf[HashJoinExec]) + case "Q35" => NWQueries.assertJoin(snc, NWQueries.Q35, "Q35", 3, 4, classOf[HashJoinExec]) + case "Q36" => NWQueries.assertJoin(snc, NWQueries.Q36, "Q36", 290, 1, classOf[HashJoinExec]) + case "Q37" => NWQueries.assertJoin(snc, NWQueries.Q37, "Q37", 77, totalProcessors, + classOf[HashJoinExec]) + case "Q38" => NWQueries.assertJoin(snc, NWQueries.Q38, "Q38", 2155, 1, + classOf[HashJoinExec]) + case "Q39" => NWQueries.assertJoin(snc, NWQueries.Q39, "Q39", 9, 1, classOf[HashJoinExec]) + case "Q40" => NWQueries.assertJoin(snc, NWQueries.Q40, "Q40", 830, 1, classOf[HashJoinExec]) + case "Q41" => NWQueries.assertJoin(snc, NWQueries.Q41, "Q41", 2155, 1, + classOf[HashJoinExec]) + case "Q42" => NWQueries.assertJoin(snc, NWQueries.Q42, "Q42", 22, 1, classOf[HashJoinExec]) + case "Q43" => NWQueries.assertJoin(snc, NWQueries.Q43, "Q43", 830, 1, + classOf[SortMergeJoinExec]) + case "Q44" => NWQueries.assertJoin(snc, NWQueries.Q44, "Q44", 830, 1, + classOf[BroadcastNestedLoopJoinExec]) + case "Q45" => NWQueries.assertJoin(snc, NWQueries.Q45, "Q45", 1788650, 1, + classOf[CartesianProductExec]) + case "Q46" => NWQueries.assertJoin(snc, NWQueries.Q46, "Q46", 1788650, 1, + classOf[BroadcastNestedLoopJoinExec]) + case "Q47" => NWQueries.assertJoin(snc, NWQueries.Q47, "Q47", 1788650, 1, + classOf[BroadcastNestedLoopJoinExec]) + case "Q48" => NWQueries.assertJoin(snc, NWQueries.Q48, "Q48", 1788650, + totalProcessors * 2 + 1, classOf[BroadcastNestedLoopJoinExec]) + case "Q49" => NWQueries.assertJoin(snc, NWQueries.Q49, "Q49", 1788650, + totalProcessors * 2 + 1, classOf[BroadcastNestedLoopJoinExec]) + case "Q50" => NWQueries.assertJoin(snc, NWQueries.Q50, "Q50", 2155, 1, + classOf[HashJoinExec]) + case "Q51" => NWQueries.assertJoin(snc, NWQueries.Q51, "Q51", 2155, 1, + classOf[SortMergeJoinExec]) + case "Q52" => NWQueries.assertJoin(snc, NWQueries.Q52, "Q52", 2155, 1, + classOf[SortMergeJoinExec]) + case "Q53" => NWQueries.assertJoin(snc, NWQueries.Q53, "Q53", 2155, 1, + classOf[SortMergeJoinExec]) + case "Q54" => NWQueries.assertJoin(snc, NWQueries.Q54, "Q54", 2155, 1, + classOf[SortMergeJoinExec]) + case "Q55" => NWQueries.assertJoin(snc, NWQueries.Q55, "Q55", 21, 1, classOf[HashJoinExec]) + case "Q56" => NWQueries.assertJoin(snc, NWQueries.Q56, "Q56", 8, 1, classOf[HashJoinExec]) + } + } + } + + private def createAndLoadPartitionedTables(snc: SnappyContext): Unit = { + + snc.sql(NWQueries.regions_table) + NWQueries.regions(snc).write.insertInto("regions") + + snc.sql(NWQueries.categories_table) + NWQueries.categories(snc).write.insertInto("categories") + + snc.sql(NWQueries.shippers_table) + NWQueries.shippers(snc).write.insertInto("shippers") + + snc.sql(NWQueries.employees_table) + NWQueries.employees(snc).write.insertInto("employees") + + snc.sql(NWQueries.customers_table) + NWQueries.customers(snc).write.insertInto("customers") + + snc.sql(NWQueries.orders_table + " using row options (" + + "partition_by 'OrderId', buckets '8', redundancy '1')") + NWQueries.orders(snc).write.insertInto("orders") + + snc.sql(NWQueries.order_details_table + " using row options (" + + "partition_by 'OrderId', buckets '8', COLOCATE_WITH 'orders', " + + "redundancy '1')") + NWQueries.order_details(snc).write.insertInto("order_details") + + snc.sql(NWQueries.products_table + + " using row options ( partition_by 'ProductID', buckets '16')") + NWQueries.products(snc).write.insertInto("products") + + snc.sql(NWQueries.suppliers_table + + " USING row options (PARTITION_BY 'SupplierID', buckets '12' )") + NWQueries.suppliers(snc).write.insertInto("suppliers") + + snc.sql(NWQueries.territories_table + + " using row options (partition_by 'TerritoryID', buckets '4')") + NWQueries.territories(snc).write.insertInto("territories") + + snc.sql(NWQueries.employee_territories_table + + " using row options(partition_by 'EmployeeID', buckets '1')") + NWQueries.employee_territories(snc).write.insertInto("employee_territories") + + } + + private def validatePartitionedRowTableQueries(snc: SnappyContext): Unit = { + val numDefaultPartitions = ((totalProcessors - 4) to (totalProcessors + 4)).toArray + for (q <- NWQueries.queries) { + q._1 match { + case "Q1" => NWQueries.assertQuery(snc, NWQueries.Q1, "Q1", 8, 1, classOf[RowTableScan]) + case "Q2" => NWQueries.assertQuery(snc, NWQueries.Q2, "Q2", 91, 1, classOf[RowTableScan]) + case "Q3" => NWQueries.assertQuery(snc, NWQueries.Q3, "Q3", 830, numDefaultPartitions, + classOf[RowTableScan]) + case "Q4" => NWQueries.assertQuery(snc, NWQueries.Q4, "Q4", 9, 1, classOf[RowTableScan]) + case "Q5" => NWQueries.assertQuery(snc, NWQueries.Q5, "Q5", 9, 1, classOf[RowTableScan]) + case "Q6" => NWQueries.assertQuery(snc, NWQueries.Q6, "Q6", 9, 1, classOf[RowTableScan]) + case "Q7" => NWQueries.assertQuery(snc, NWQueries.Q7, "Q7", 9, 1, classOf[RowTableScan]) + case "Q8" => NWQueries.assertQuery(snc, NWQueries.Q8, "Q8", 6, 1, classOf[FilterExec]) + case "Q9" => NWQueries.assertQuery(snc, NWQueries.Q9, "Q9", 3, 1, classOf[ProjectExec]) + case "Q10" => NWQueries.assertQuery(snc, NWQueries.Q10, "Q10", 2, 1, classOf[FilterExec]) + case "Q11" => NWQueries.assertQuery(snc, NWQueries.Q11, "Q11", 4, 1, classOf[ProjectExec]) + case "Q12" => NWQueries.assertQuery(snc, NWQueries.Q12, "Q12", 2, 1, classOf[FilterExec]) + case "Q13" => NWQueries.assertQuery(snc, NWQueries.Q13, "Q13", 2, numDefaultPartitions, + classOf[FilterExec]) + case "Q14" => NWQueries.assertQuery(snc, NWQueries.Q14, "Q14", 69, 1, classOf[FilterExec]) + case "Q15" => NWQueries.assertQuery(snc, NWQueries.Q15, "Q15", 5, 1, classOf[FilterExec]) + case "Q16" => NWQueries.assertQuery(snc, NWQueries.Q16, "Q16", 8, 1, classOf[FilterExec]) + case "Q17" => NWQueries.assertQuery(snc, NWQueries.Q17, "Q17", 3, 1, classOf[FilterExec]) + case "Q18" => NWQueries.assertQuery(snc, NWQueries.Q18, "Q18", 9, 1, classOf[ProjectExec]) + case "Q19" => NWQueries.assertQuery(snc, NWQueries.Q19, "Q19", 13, numDefaultPartitions, + classOf[ProjectExec]) + case "Q20" => NWQueries.assertQuery(snc, NWQueries.Q20, "Q20", 1, 1, classOf[ProjectExec]) + case "Q21" => NWQueries.assertQuery(snc, NWQueries.Q21, "Q21", 1, 1, classOf[RowTableScan]) + case "Q22" => NWQueries.assertQuery(snc, NWQueries.Q22, "Q22", 1, 1, classOf[ProjectExec]) + case "Q23" => NWQueries.assertQuery(snc, NWQueries.Q23, "Q23", 1, 1, classOf[RowTableScan]) + case "Q24" => NWQueries.assertQuery(snc, NWQueries.Q24, "Q24", 4, 4, classOf[ProjectExec]) + case "Q25" => NWQueries.assertJoin(snc, NWQueries.Q25, "Q25", 1, 1, classOf[RowTableScan]) + case "Q26" => NWQueries.assertJoin(snc, NWQueries.Q26, "Q26", 86, totalProcessors, + classOf[BroadcastHashJoinExec]) + case "Q27" => NWQueries.assertJoin(snc, NWQueries.Q27, "Q27", 9, 12, + classOf[BroadcastHashJoinExec]) + case "Q28" => NWQueries.assertJoin(snc, NWQueries.Q28, "Q28", 12, totalProcessors, + classOf[RowTableScan]) + case "Q29" => NWQueries.assertJoin(snc, NWQueries.Q29, "Q29", 8, 12, + classOf[BroadcastHashJoinExec]) + case "Q30" => NWQueries.assertJoin(snc, NWQueries.Q30, "Q30", 8, 12, + classOf[BroadcastHashJoinExec]) + case "Q31" => NWQueries.assertJoin(snc, NWQueries.Q31, "Q31", 830, totalProcessors, + classOf[HashJoinExec]) + case "Q32" => NWQueries.assertJoin(snc, NWQueries.Q32, "Q32", 8, 8, classOf[HashJoinExec]) + case "Q33" => NWQueries.assertJoin(snc, NWQueries.Q33, "Q33", 37, 8, classOf[HashJoinExec]) + case "Q34" => NWQueries.assertJoin(snc, NWQueries.Q34, "Q34", 5, totalProcessors, + classOf[BroadcastHashJoinExec]) + case "Q35" => NWQueries.assertJoin(snc, NWQueries.Q35, "Q35", 3, 4, classOf[HashJoinExec]) + case "Q36" => NWQueries.assertJoin(snc, NWQueries.Q36, "Q36", 290, totalProcessors, + classOf[BroadcastHashJoinExec]) + case "Q37" => NWQueries.assertJoin(snc, NWQueries.Q37, "Q37", 77, totalProcessors, + classOf[BroadcastHashJoinExec]) + case "Q38" => NWQueries.assertJoin(snc, NWQueries.Q38, "Q38", 2155, totalProcessors, + classOf[SortMergeJoinExec]) + case "Q39" => NWQueries.assertJoin(snc, NWQueries.Q39, "Q39", 9, 12, + classOf[HashJoinExec]) + case "Q40" => NWQueries.assertJoin(snc, NWQueries.Q40, "Q40", 830, totalProcessors, + classOf[HashJoinExec]) + case "Q41" => NWQueries.assertJoin(snc, NWQueries.Q41, "Q41", 2155, 8, + classOf[HashJoinExec]) + case "Q42" => NWQueries.assertJoin(snc, NWQueries.Q42, "Q42", 22, totalProcessors, + classOf[HashJoinExec]) + case "Q43" => NWQueries.assertJoin(snc, NWQueries.Q43, "Q43", 830, 8, + classOf[SortMergeJoinExec]) + case "Q44" => NWQueries.assertJoin(snc, NWQueries.Q44, "Q44", 830, 8, + classOf[BroadcastNestedLoopJoinExec]) + case "Q45" => NWQueries.assertJoin(snc, NWQueries.Q45, "Q45", 1788650, 8, + classOf[CartesianProductExec]) + case "Q46" => NWQueries.assertJoin(snc, NWQueries.Q46, "Q46", 1788650, 8, + classOf[BroadcastNestedLoopJoinExec]) + case "Q47" => NWQueries.assertJoin(snc, NWQueries.Q47, "Q47", 1788650, 16, + classOf[BroadcastNestedLoopJoinExec]) + case "Q48" => NWQueries.assertJoin(snc, NWQueries.Q48, "Q48", 1788650, 16, + classOf[BroadcastNestedLoopJoinExec]) + case "Q49" => NWQueries.assertJoin(snc, NWQueries.Q49, "Q49", 1788650, 16, + classOf[BroadcastNestedLoopJoinExec]) + case "Q50" => NWQueries.assertJoin(snc, NWQueries.Q50, "Q50", 2155, 8, + classOf[HashJoinExec]) + case "Q51" => NWQueries.assertJoin(snc, NWQueries.Q51, "Q51", 2155, 8, + classOf[SortMergeJoinExec]) + case "Q52" => NWQueries.assertJoin(snc, NWQueries.Q52, "Q52", 2155, 8, + classOf[SortMergeJoinExec]) + case "Q53" => NWQueries.assertJoin(snc, NWQueries.Q53, "Q53", 2155, 8, + classOf[SortMergeJoinExec]) + case "Q54" => NWQueries.assertJoin(snc, NWQueries.Q54, "Q54", 2155, 8, + classOf[SortMergeJoinExec]) + case "Q55" => NWQueries.assertJoin(snc, NWQueries.Q55, "Q55", 21, totalProcessors, + classOf[HashJoinExec]) + case "Q56" => NWQueries.assertJoin(snc, NWQueries.Q56, "Q56", 8, 1, classOf[HashJoinExec]) + } + } + } + + def validatePartitionedColumnTableQueries(snc: SnappyContext): Unit = { + val numDefaultPartitions = ((totalProcessors - 4) to (totalProcessors + 4)).toArray + for (q <- NWQueries.queries) { + q._1 match { + case "Q1" => NWQueries.assertQuery(snc, NWQueries.Q1, "Q1", 8, 1, classOf[RowTableScan]) + case "Q2" => NWQueries.assertQuery(snc, NWQueries.Q2, "Q2", 91, 1, classOf[RowTableScan]) + case "Q3" => NWQueries.assertQuery(snc, NWQueries.Q3, "Q3", 830, numDefaultPartitions, + classOf[ColumnTableScan]) + case "Q4" => NWQueries.assertQuery(snc, NWQueries.Q4, "Q4", 9, totalProcessors, + classOf[ColumnTableScan]) + case "Q5" => NWQueries.assertQuery(snc, NWQueries.Q5, "Q5", 9, 10, classOf[ColumnTableScan]) + case "Q6" => NWQueries.assertQuery(snc, NWQueries.Q6, "Q6", 9, 10, classOf[ColumnTableScan]) + case "Q7" => NWQueries.assertQuery(snc, NWQueries.Q7, "Q7", 9, 10, classOf[ColumnTableScan]) + case "Q8" => NWQueries.assertQuery(snc, NWQueries.Q8, "Q8", 6, totalProcessors, + classOf[FilterExec]) + case "Q9" => NWQueries.assertQuery(snc, NWQueries.Q9, "Q9", 3, totalProcessors, + classOf[ProjectExec]) + case "Q10" => NWQueries.assertQuery(snc, NWQueries.Q10, "Q10", 2, totalProcessors, + classOf[FilterExec]) + case "Q11" => NWQueries.assertQuery(snc, NWQueries.Q11, "Q11", 4, totalProcessors, + classOf[ProjectExec]) + case "Q12" => NWQueries.assertQuery(snc, NWQueries.Q12, "Q12", 2, 4, classOf[FilterExec]) + case "Q13" => NWQueries.assertQuery(snc, NWQueries.Q13, "Q13", 2, numDefaultPartitions, + classOf[FilterExec]) + case "Q14" => NWQueries.assertQuery(snc, NWQueries.Q14, "Q14", 69, 1, classOf[FilterExec]) + case "Q15" => NWQueries.assertQuery(snc, NWQueries.Q15, "Q15", 5, totalProcessors, + classOf[FilterExec]) + case "Q16" => NWQueries.assertQuery(snc, NWQueries.Q16, "Q16", 8, totalProcessors, + classOf[FilterExec]) + case "Q17" => NWQueries.assertQuery(snc, NWQueries.Q17, "Q17", 3, totalProcessors, + classOf[FilterExec]) + case "Q18" => NWQueries.assertQuery(snc, NWQueries.Q18, "Q18", 9, totalProcessors, + classOf[ProjectExec]) + case "Q19" => NWQueries.assertQuery(snc, NWQueries.Q19, "Q19", 13, numDefaultPartitions, + classOf[ProjectExec]) + case "Q20" => NWQueries.assertQuery(snc, NWQueries.Q20, "Q20", 1, 1, classOf[ProjectExec]) + case "Q21" => NWQueries.assertQuery(snc, NWQueries.Q21, "Q21", 1, 1, + classOf[ColumnTableScan]) + case "Q22" => NWQueries.assertQuery(snc, NWQueries.Q22, "Q22", 1, 2, classOf[ProjectExec]) + case "Q23" => NWQueries.assertQuery(snc, NWQueries.Q23, "Q23", 1, 1, + classOf[ColumnTableScan]) + case "Q24" => NWQueries.assertQuery(snc, NWQueries.Q24, "Q24", 4, 4, classOf[ProjectExec]) + case "Q25" => NWQueries.assertJoin(snc, NWQueries.Q25, "Q25", 1, 1, classOf[RowTableScan]) + case "Q26" => NWQueries.assertJoin(snc, NWQueries.Q26, "Q26", 86, totalProcessors, + classOf[SortMergeJoinExec]) + case "Q27" => NWQueries.assertJoin(snc, NWQueries.Q27, "Q27", 9, 12, + classOf[BroadcastHashJoinExec]) + case "Q28" => NWQueries.assertJoin(snc, NWQueries.Q28, "Q28", 12, totalProcessors, + classOf[ColumnTableScan]) + case "Q29" => NWQueries.assertJoin(snc, NWQueries.Q29, "Q29", 8, 12, + classOf[SortMergeJoinExec]) + case "Q30" => NWQueries.assertJoin(snc, NWQueries.Q30, "Q30", 8, 12, + classOf[SortMergeJoinExec]) + case "Q31" => NWQueries.assertJoin(snc, NWQueries.Q31, "Q31", 830, totalProcessors, + classOf[HashJoinExec]) + case "Q32" => NWQueries.assertJoin(snc, NWQueries.Q32, "Q32", 8, 8, classOf[HashJoinExec]) + case "Q33" => NWQueries.assertJoin(snc, NWQueries.Q33, "Q33", 37, totalProcessors, + classOf[HashJoinExec]) + case "Q34" => NWQueries.assertJoin(snc, NWQueries.Q34, "Q34", 5, totalProcessors, + classOf[HashJoinExec]) + case "Q35" => NWQueries.assertJoin(snc, NWQueries.Q35, "Q35", 3, 4, classOf[HashJoinExec]) + case "Q36" => NWQueries.assertJoin(snc, NWQueries.Q36, "Q36", 290, totalProcessors, + classOf[HashJoinExec]) + case "Q37" => NWQueries.assertJoin(snc, NWQueries.Q37, "Q37", 77, totalProcessors, + classOf[HashJoinExec]) + case "Q38" => NWQueries.assertJoin(snc, NWQueries.Q38, "Q38", 2155, totalProcessors, + classOf[HashJoinExec]) + case "Q39" => NWQueries.assertJoin(snc, NWQueries.Q39, "Q39", 9, 12, + classOf[HashJoinExec]) + case "Q40" => NWQueries.assertJoin(snc, NWQueries.Q40, "Q40", 830, totalProcessors, + classOf[HashJoinExec]) + case "Q41" => NWQueries.assertJoin(snc, NWQueries.Q41, "Q41", 2155, 8, + classOf[HashJoinExec]) + case "Q42" => NWQueries.assertJoin(snc, NWQueries.Q42, "Q42", 22, totalProcessors, + classOf[HashJoinExec]) + case "Q43" => NWQueries.assertJoin(snc, NWQueries.Q43, "Q43", 830, 8, + classOf[SortMergeJoinExec]) + case "Q44" => NWQueries.assertJoin(snc, NWQueries.Q44, "Q44", 830, 8, + classOf[BroadcastNestedLoopJoinExec]) + case "Q45" => NWQueries.assertJoin(snc, NWQueries.Q45, "Q45", 1788650, 8, + classOf[CartesianProductExec]) + case "Q46" => NWQueries.assertJoin(snc, NWQueries.Q46, "Q46", 1788650, 8, + classOf[BroadcastNestedLoopJoinExec]) + case "Q47" => NWQueries.assertJoin(snc, NWQueries.Q47, "Q47", 1788650, 8, + classOf[BroadcastNestedLoopJoinExec]) + case "Q48" => NWQueries.assertJoin(snc, NWQueries.Q48, "Q48", 1788650, 16, + classOf[BroadcastNestedLoopJoinExec]) + case "Q49" => NWQueries.assertJoin(snc, NWQueries.Q49, "Q49", 1788650, 16, + classOf[BroadcastNestedLoopJoinExec]) + case "Q50" => NWQueries.assertJoin(snc, NWQueries.Q50, "Q50", 2155, 8, + classOf[HashJoinExec]) + case "Q51" => NWQueries.assertJoin(snc, NWQueries.Q51, "Q51", 2155, 8, + classOf[SortMergeJoinExec]) + case "Q52" => NWQueries.assertJoin(snc, NWQueries.Q52, "Q52", 2155, 8, + classOf[SortMergeJoinExec]) + case "Q53" => NWQueries.assertJoin(snc, NWQueries.Q53, "Q53", 2155, 8, + classOf[SortMergeJoinExec]) + case "Q54" => NWQueries.assertJoin(snc, NWQueries.Q54, "Q54", 2155, 8, + classOf[SortMergeJoinExec]) + case "Q55" => NWQueries.assertJoin(snc, NWQueries.Q55, "Q55", 21, totalProcessors, + classOf[HashJoinExec]) + case "Q56" => NWQueries.assertJoin(snc, NWQueries.Q56, "Q56", 8, 1, classOf[HashJoinExec]) + } + } + } + + private def validateColocatedTableQueries(snc: SnappyContext): Unit = { + + val numDefaultPartitions = ((totalProcessors - 4) to (totalProcessors + 4)).toArray + for (q <- NWQueries.queries) { + q._1 match { + case "Q1" => NWQueries.assertQuery(snc, NWQueries.Q1, "Q1", 8, 1, classOf[RowTableScan]) + case "Q2" => NWQueries.assertQuery(snc, NWQueries.Q2, "Q2", 91, numDefaultPartitions, + classOf[ColumnTableScan]) + case "Q3" => NWQueries.assertQuery(snc, NWQueries.Q3, "Q3", 830, numDefaultPartitions, + classOf[RowTableScan]) + case "Q4" => NWQueries.assertQuery(snc, NWQueries.Q4, "Q4", 9, 4, classOf[RowTableScan]) + case "Q5" => NWQueries.assertQuery(snc, NWQueries.Q5, "Q5", 9, 8, classOf[RowTableScan]) + case "Q6" => NWQueries.assertQuery(snc, NWQueries.Q6, "Q6", 9, 8, classOf[RowTableScan]) + case "Q7" => NWQueries.assertQuery(snc, NWQueries.Q7, "Q7", 9, 8, classOf[RowTableScan]) + case "Q8" => NWQueries.assertQuery(snc, NWQueries.Q8, "Q8", 6, 4, classOf[FilterExec]) + case "Q9" => NWQueries.assertQuery(snc, NWQueries.Q9, "Q9", 3, 4, classOf[ProjectExec]) + case "Q10" => NWQueries.assertQuery(snc, NWQueries.Q10, "Q10", 2, 4, classOf[FilterExec]) + case "Q11" => NWQueries.assertQuery(snc, NWQueries.Q11, "Q11", 4, 4, classOf[ProjectExec]) + case "Q12" => NWQueries.assertQuery(snc, NWQueries.Q12, "Q12", 2, 4, classOf[FilterExec]) + case "Q13" => NWQueries.assertQuery(snc, NWQueries.Q13, "Q13", 2, numDefaultPartitions, + classOf[FilterExec]) + case "Q14" => NWQueries.assertQuery(snc, NWQueries.Q14, "Q14", 69, totalProcessors, + classOf[FilterExec]) + case "Q15" => NWQueries.assertQuery(snc, NWQueries.Q15, "Q15", 5, 4, classOf[FilterExec]) + case "Q16" => NWQueries.assertQuery(snc, NWQueries.Q16, "Q16", 8, 4, classOf[FilterExec]) + case "Q17" => NWQueries.assertQuery(snc, NWQueries.Q17, "Q17", 3, 4, classOf[FilterExec]) + case "Q18" => NWQueries.assertQuery(snc, NWQueries.Q18, "Q18", 9, 4, classOf[ProjectExec]) + case "Q19" => NWQueries.assertQuery(snc, NWQueries.Q19, "Q19", 13, numDefaultPartitions, + classOf[ProjectExec]) + case "Q20" => NWQueries.assertQuery(snc, NWQueries.Q20, "Q20", 1, 1, classOf[ProjectExec]) + case "Q21" => NWQueries.assertQuery(snc, NWQueries.Q21, "Q21", 1, 1, classOf[RowTableScan]) + case "Q22" => NWQueries.assertQuery(snc, NWQueries.Q22, "Q22", 1, 2, classOf[ProjectExec]) + case "Q23" => NWQueries.assertQuery(snc, NWQueries.Q23, "Q23", 1, 1, classOf[RowTableScan]) + case "Q24" => NWQueries.assertQuery(snc, NWQueries.Q24, "Q24", 4, 4, classOf[ProjectExec]) + case "Q25" => NWQueries.assertJoin(snc, NWQueries.Q25, "Q25", 1, 8, + classOf[ColumnTableScan]) + case "Q26" => NWQueries.assertJoin(snc, NWQueries.Q26, "Q26", 86, 16, + classOf[BroadcastHashJoinExec]) + case "Q27" => NWQueries.assertJoin(snc, NWQueries.Q27, "Q27", 9, 12, + classOf[SortMergeJoinExec]) + case "Q28" => NWQueries.assertJoin(snc, NWQueries.Q28, "Q28", 12, totalProcessors, + classOf[ColumnTableScan]) + case "Q29" => NWQueries.assertJoin(snc, NWQueries.Q29, "Q29", 8, 12, + classOf[BroadcastHashJoinExec]) + case "Q30" => NWQueries.assertJoin(snc, NWQueries.Q30, "Q30", 8, 12, + classOf[BroadcastHashJoinExec]) + case "Q31" => NWQueries.assertJoin(snc, NWQueries.Q31, "Q31", 830, totalProcessors, + classOf[BroadcastHashJoinExec]) + case "Q32" => NWQueries.assertJoin(snc, NWQueries.Q32, "Q32", 8, 8, + classOf[BroadcastHashJoinExec]) + case "Q33" => NWQueries.assertJoin(snc, NWQueries.Q33, "Q33", 37, 8, + classOf[BroadcastHashJoinExec]) + case "Q34" => NWQueries.assertJoin(snc, NWQueries.Q34, "Q34", 5, totalProcessors, + classOf[BroadcastHashJoinExec]) + case "Q35" => NWQueries.assertJoin(snc, NWQueries.Q35, "Q35", 3, 4, + classOf[BroadcastHashJoinExec]) + case "Q36" => NWQueries.assertJoin(snc, NWQueries.Q36, "Q36", 290, totalProcessors, + classOf[BroadcastHashJoinExec]) + case "Q37" => NWQueries.assertJoin(snc, NWQueries.Q37, "Q37", 77, totalProcessors, + classOf[BroadcastHashJoinExec]) + case "Q38" => NWQueries.assertJoin(snc, NWQueries.Q38, "Q38", 2155, totalProcessors, + classOf[HashJoinExec]) + case "Q39" => NWQueries.assertJoin(snc, NWQueries.Q39, "Q39", 9, 12, + classOf[BroadcastHashJoinExec]) + case "Q40" => NWQueries.assertJoin(snc, NWQueries.Q40, "Q40", 830, 16, + classOf[BroadcastHashJoinExec]) + case "Q41" => NWQueries.assertJoin(snc, NWQueries.Q41, "Q41", 2155, totalProcessors, + classOf[BroadcastHashJoinExec]) + case "Q42" => NWQueries.assertJoin(snc, NWQueries.Q42, "Q42", 22, totalProcessors, + classOf[BroadcastHashJoinExec]) + case "Q43" => NWQueries.assertJoin(snc, NWQueries.Q43, "Q43", 830, totalProcessors, + classOf[SortMergeJoinExec]) + case "Q44" => NWQueries.assertJoin(snc, NWQueries.Q44, "Q44", 830, 16, + classOf[BroadcastNestedLoopJoinExec]) + case "Q45" => NWQueries.assertJoin(snc, NWQueries.Q45, "Q45", 1788650, 16, + classOf[CartesianProductExec]) + case "Q46" => NWQueries.assertJoin(snc, NWQueries.Q46, "Q46", 1788650, 16, + classOf[BroadcastNestedLoopJoinExec]) + case "Q47" => NWQueries.assertJoin(snc, NWQueries.Q47, "Q47", 1788650, 32, + classOf[BroadcastNestedLoopJoinExec]) + case "Q48" => NWQueries.assertJoin(snc, NWQueries.Q48, "Q48", 1788650, 16, + classOf[BroadcastNestedLoopJoinExec]) + case "Q49" => NWQueries.assertJoin(snc, NWQueries.Q49, "Q49", 1788650, 16, + classOf[BroadcastNestedLoopJoinExec]) + case "Q50" => NWQueries.assertJoin(snc, NWQueries.Q50, "Q50", 2155, totalProcessors, + classOf[HashJoinExec]) + case "Q51" => NWQueries.assertJoin(snc, NWQueries.Q51, "Q51", 2155, totalProcessors, + classOf[SortMergeJoinExec]) + case "Q52" => NWQueries.assertJoin(snc, NWQueries.Q52, "Q52", 2155, totalProcessors, + classOf[SortMergeJoinExec]) + case "Q53" => NWQueries.assertJoin(snc, NWQueries.Q53, "Q53", 2155, totalProcessors, + classOf[SortMergeJoinExec]) + case "Q54" => NWQueries.assertJoin(snc, NWQueries.Q54, "Q54", 2155, totalProcessors, + classOf[SortMergeJoinExec]) + case "Q55" => NWQueries.assertJoin(snc, NWQueries.Q55, "Q55", 21, totalProcessors, + classOf[HashJoinExec]) + case "Q56" => NWQueries.assertJoin(snc, NWQueries.Q56, "Q56", 8, 1, classOf[HashJoinExec]) + } + } + } + + private def createAndLoadColumnTableUsingJDBC(stmt: Statement, snc: SnappyContext): Unit = { + + stmt.executeUpdate(NWQueries.products_table + " USING column options (" + + "partition_by 'ProductID,SupplierID', buckets '4', redundancy '2')") + NWQueries.products(snc).collect().foreach(row => { + val colValues = row.toSeq + val sqlQuery: String = s"INSERT INTO products VALUES(${colValues.head}, " + + s"'${colValues(1).toString.replace("'", "")}',${colValues(2)}, ${colValues(3)}, " + + s"'${colValues(4).toString.replace("'", "")}',${colValues(5)}, ${colValues(6)}, " + + s"${colValues(7)}, ${colValues(8)}, ${colValues(9)})" + stmt.executeUpdate(sqlQuery) + }) + } +} + +object NorthWindDUnitTest { + + def createAndLoadSparkTables(sqlContext: SQLContext): Unit = { + NWQueries.regions(sqlContext).createOrReplaceTempView("regions") + NWQueries.categories(sqlContext).createOrReplaceTempView("categories") + NWQueries.shippers(sqlContext).createOrReplaceTempView("shippers") + NWQueries.employees(sqlContext).createOrReplaceTempView("employees") + NWQueries.customers(sqlContext).createOrReplaceTempView("customers") + NWQueries.orders(sqlContext).createOrReplaceTempView("orders") + NWQueries.order_details(sqlContext).createOrReplaceTempView("order_details") + NWQueries.products(sqlContext).createOrReplaceTempView("products") + NWQueries.suppliers(sqlContext).createOrReplaceTempView("suppliers") + NWQueries.territories(sqlContext).createOrReplaceTempView("territories") + NWQueries.employee_territories(sqlContext).createOrReplaceTempView("employee_territories") + } + + def createAndLoadReplicatedTables(snc: SnappyContext): Unit = { + snc.sql(NWQueries.regions_table) + NWQueries.regions(snc).write.insertInto("regions") + + snc.sql(NWQueries.categories_table) + NWQueries.categories(snc).write.insertInto("categories") + + snc.sql(NWQueries.shippers_table) + NWQueries.shippers(snc).write.insertInto("shippers") + + snc.sql(NWQueries.employees_table) + NWQueries.employees(snc).write.insertInto("employees") + + snc.sql(NWQueries.customers_table) + NWQueries.customers(snc).write.insertInto("customers") + + snc.sql(NWQueries.orders_table) + NWQueries.orders(snc).write.insertInto("orders") + + snc.sql(NWQueries.order_details_table) + NWQueries.order_details(snc).write.insertInto("order_details") + + snc.sql(NWQueries.products_table) + NWQueries.products(snc).write.insertInto("products") + + snc.sql(NWQueries.suppliers_table) + NWQueries.suppliers(snc).write.insertInto("suppliers") + + snc.sql(NWQueries.territories_table) + NWQueries.territories(snc).write.insertInto("territories") + + snc.sql(NWQueries.employee_territories_table) + NWQueries.employee_territories(snc).write.insertInto("employee_territories") + } + + def createAndLoadColumnTables(snc: SnappyContext): Unit = { + + snc.sql(NWQueries.regions_table) + NWQueries.regions(snc).write.insertInto("regions") + + snc.sql(NWQueries.categories_table) + NWQueries.categories(snc).write.insertInto("categories") + + snc.sql(NWQueries.shippers_table) + NWQueries.shippers(snc).write.insertInto("shippers") + + snc.sql(NWQueries.employees_table) + NWQueries.employees(snc).write.insertInto("employees") + + snc.sql(NWQueries.customers_table) + NWQueries.customers(snc).write.insertInto("customers") + + snc.sql(NWQueries.orders_table + " using column options (" + + "partition_by 'OrderId', buckets '8', redundancy '1')") + NWQueries.orders(snc).write.insertInto("orders") + + snc.sql(NWQueries.order_details_table + " using column options (" + + "partition_by 'OrderId', buckets '8', COLOCATE_WITH 'orders', " + + "redundancy '1')") + NWQueries.order_details(snc).write.insertInto("order_details") + + snc.sql(NWQueries.products_table + + " using column options ( partition_by 'ProductID', buckets '16')") + NWQueries.products(snc).write.insertInto("products") + + snc.sql(NWQueries.suppliers_table + + " USING column options (PARTITION_BY 'SupplierID', buckets '12' )") + NWQueries.suppliers(snc).write.insertInto("suppliers") + + snc.sql(NWQueries.territories_table + + " using column options (partition_by 'TerritoryID', buckets '4')") + NWQueries.territories(snc).write.insertInto("territories") + + snc.sql(NWQueries.employee_territories_table + + " using column options(partition_by 'EmployeeID', buckets '1')") + NWQueries.employee_territories(snc).write.insertInto("employee_territories") + } + + def createAndLoadColocatedTables(snc: SnappyContext): Unit = { + + snc.sql(NWQueries.regions_table) + NWQueries.regions(snc).write.insertInto("regions") + + snc.sql(NWQueries.categories_table) + NWQueries.categories(snc).write.insertInto("categories") + + snc.sql(NWQueries.shippers_table) + NWQueries.shippers(snc).write.insertInto("shippers") + + snc.sql(NWQueries.employees_table) + NWQueries.employees(snc).write.insertInto("employees") + + snc.sql(NWQueries.customers_table + " using column options(" + + "partition_by 'CustomerID', buckets '8', redundancy '1')") + NWQueries.customers(snc).write.insertInto("customers") + + snc.sql(NWQueries.orders_table + " using row options (" + + "partition_by 'CustomerID', buckets '8', " + + "colocate_with 'customers', redundancy '1')") + NWQueries.orders(snc).write.insertInto("orders") + + snc.sql(NWQueries.order_details_table + " using row options (" + + "partition_by 'ProductID', buckets '16', redundancy '1')") + NWQueries.order_details(snc).write.insertInto("order_details") + + snc.sql(NWQueries.products_table + + " USING column options ( partition_by 'ProductID', buckets '16'," + + " colocate_with 'order_details', redundancy '1')") + NWQueries.products(snc).write.insertInto("products") + + snc.sql(NWQueries.suppliers_table + + " USING column options (PARTITION_BY 'SupplierID', buckets '12')") + NWQueries.suppliers(snc).write.insertInto("suppliers") + + snc.sql(NWQueries.territories_table + + " using column options (partition_by 'TerritoryID', buckets '4')") + NWQueries.territories(snc).write.insertInto("territories") + + snc.sql(NWQueries.employee_territories_table + " using row options(" + + "partition_by 'TerritoryID', buckets '4', colocate_with 'territories')") + NWQueries.employee_territories(snc).write.insertInto("employee_territories") + } + + protected def getTempDir(dirName: String, onlyOnce: Boolean): String = { + var log: File = new File(".") + if (onlyOnce) { + val logParent = log.getAbsoluteFile.getParentFile.getParentFile + if (logParent.list().contains("output.txt")) { + log = logParent + } else if (logParent.getParentFile.list().contains("output.txt")) { + log = logParent.getParentFile + } + } + var dest: String = null + dest = log.getCanonicalPath + File.separator + dirName + val tempDir: File = new File(dest) + if (!tempDir.exists) tempDir.mkdir() + tempDir.getAbsolutePath + } + + private def getSortedFiles(file: File): Array[File] = { + file.getParentFile.listFiles.filter(_.getName.startsWith(file.getName)).sortBy { f => + val n = f.getName + val i = n.lastIndexOf('.') + n.substring(i + 1).toInt + } + } + + def assertQueryFullResultSet(snc: SnappyContext, sqlString: String, numRows: Int, + queryNum: String, tableType: String, pw: PrintWriter, sqlContext: SQLContext): Any = { + var snappyDF = snc.sql(sqlString) + val snappyQueryFileName = s"Snappy_$queryNum.out" + val sparkQueryFileName = s"Spark_$queryNum.out" + val snappyDest = getTempDir("snappyQueryFiles_" + tableType, onlyOnce = false) + val sparkDest = getTempDir("sparkQueryFiles", onlyOnce = true) + val sparkFile = new File(sparkDest, sparkQueryFileName) + val snappyFile = new File(snappyDest, snappyQueryFileName) + val col1 = snappyDF.schema.fieldNames(0) + val col = snappyDF.schema.fieldNames.tail + snappyDF = snappyDF.sort(col1, col: _*) + writeToFile(snappyDF, snappyFile, snc) + // scalastyle:off println + pw.println(s"$queryNum Result Collected in files with prefix $snappyFile") + if (!new File(s"$sparkFile.0").exists()) { + var sparkDF = sqlContext.sql(sqlString) + val col = sparkDF.schema.fieldNames(0) + val cols = sparkDF.schema.fieldNames.tail + sparkDF = sparkDF.sort(col, cols: _*) + writeToFile(sparkDF, sparkFile, snc) + pw.println(s"$queryNum Result Collected in files with prefix $sparkFile") + } + val expectedFiles = getSortedFiles(sparkFile).toIterator + val actualFiles = getSortedFiles(snappyFile).toIterator + val expectedLineSet = expectedFiles.flatMap(Source.fromFile(_).getLines()) + val actualLineSet = actualFiles.flatMap(Source.fromFile(_).getLines()) + var numLines = 0 + while (expectedLineSet.hasNext && actualLineSet.hasNext) { + val expectedLine = expectedLineSet.next() + val actualLine = actualLineSet.next() + if (!actualLine.equals(expectedLine)) { + pw.println(s"\n** For $queryNum result mismatch observed**") + pw.println(s"\nExpected Result \n: $expectedLine") + pw.println(s"\nActual Result \n: $actualLine") + pw.println(s"\nQuery =" + sqlString + " Table Type : " + tableType) + assert(assertion = false, s"\n** For $queryNum result mismatch observed** \n" + + s"Expected Result \n: $expectedLine \n" + + s"Actual Result \n: $actualLine \n" + + s"Query =" + sqlString + " Table Type : " + tableType) + } + numLines += 1 + } + if (actualLineSet.hasNext || expectedLineSet.hasNext) { + pw.println(s"\nFor $queryNum result count mismatch observed") + assert(assertion = false, s"\nFor $queryNum result count mismatch observed") + } + assert(numLines == numRows, s"\nFor $queryNum result count mismatch " + + s"observed: Expected=$numRows, Got=$numLines") + // scalastyle:on println + pw.flush() + } + + def assertJoinFullResultSet(snc: SnappyContext, sqlString: String, numRows: Int, + queryNum: String, tableType: String, pw: PrintWriter, sqlContext: SQLContext): Any = { + snc.sql("set spark.sql.crossJoin.enabled = true") + sqlContext.sql("set spark.sql.crossJoin.enabled = true") + assertQueryFullResultSet(snc, sqlString, numRows, queryNum, tableType, pw, sqlContext) + } + + def writeToFile(df: DataFrame, dest: File, snc: SnappyContext): Unit = { + val parent = dest.getParentFile + if (!parent.exists()) { + parent.mkdirs() + } + val destFile = dest.getAbsolutePath + implicit val encoder = RowEncoder(df.schema) + df.mapPartitions { iter => + val sb = new StringBuilder + val partitionId = TaskContext.getPartitionId() + val pw = new PrintWriter(s"$destFile.$partitionId") + try { + iter.foreach { row => + row.toSeq.foreach { + case d: Double => + // round to one decimal digit + sb.append(math.floor(d * 5.0 + 0.25) / 5.0).append(',') + case bd: java.math.BigDecimal => + sb.append(bd.setScale(2, java.math.RoundingMode.HALF_UP)).append(',') + case v => sb.append(v).append(',') + } + val len = sb.length + if (len > 0) sb.setLength(len - 1) + sb.append('\n') + if (sb.length >= 1048576) { + pw.append(sb) + pw.flush() + sb.clear() + } + } + if (sb.nonEmpty) { + pw.append(sb) + pw.flush() + } + } finally { + pw.close() + } + Iterator.empty + }.collect() + } + + def validateQueriesFullResultSet(snc: SnappyContext, tableType: String, + pw: PrintWriter, sqlContext: SQLContext): Unit = { + for (q <- NWQueries.queries) { + q._1 match { + case "Q1" => assertQueryFullResultSet(snc, + NWQueries.Q1, 8, "Q1", tableType, pw, sqlContext) + case "Q2" => assertQueryFullResultSet(snc, + NWQueries.Q2, 91, "Q2", tableType, pw, sqlContext) + case "Q3" => assertQueryFullResultSet(snc, + NWQueries.Q3, 830, "Q3", tableType, pw, sqlContext) + case "Q4" => assertQueryFullResultSet(snc, + NWQueries.Q4, 9, "Q4", tableType, pw, sqlContext) + case "Q5" => assertQueryFullResultSet(snc, + NWQueries.Q5, 9, "Q5", tableType, pw, sqlContext) + case "Q6" => assertQueryFullResultSet(snc, + NWQueries.Q6, 9, "Q6", tableType, pw, sqlContext) + case "Q7" => assertQueryFullResultSet(snc, + NWQueries.Q7, 9, "Q7", tableType, pw, sqlContext) + case "Q8" => assertQueryFullResultSet(snc, + NWQueries.Q8, 6, "Q8", tableType, pw, sqlContext) + case "Q9" => assertQueryFullResultSet(snc, + NWQueries.Q9, 3, "Q9", tableType, pw, sqlContext) + case "Q10" => assertQueryFullResultSet(snc, + NWQueries.Q10, 2, "Q10", tableType, pw, sqlContext) + case "Q11" => assertQueryFullResultSet(snc, + NWQueries.Q11, 4, "Q11", tableType, pw, sqlContext) + case "Q12" => assertQueryFullResultSet(snc, + NWQueries.Q12, 2, "Q12", tableType, pw, sqlContext) + case "Q13" => assertQueryFullResultSet(snc, + NWQueries.Q13, 2, "Q13", tableType, pw, sqlContext) + case "Q14" => assertQueryFullResultSet(snc, + NWQueries.Q14, 69, "Q14", tableType, pw, sqlContext) + case "Q15" => assertQueryFullResultSet(snc, + NWQueries.Q15, 5, "Q15", tableType, pw, sqlContext) + case "Q16" => assertQueryFullResultSet(snc, + NWQueries.Q16, 8, "Q16", tableType, pw, sqlContext) + case "Q17" => assertQueryFullResultSet(snc, + NWQueries.Q17, 3, "Q17", tableType, pw, sqlContext) + case "Q18" => assertQueryFullResultSet(snc, + NWQueries.Q18, 9, "Q18", tableType, pw, sqlContext) + case "Q19" => assertQueryFullResultSet(snc, + NWQueries.Q19, 13, "Q19", tableType, pw, sqlContext) + case "Q20" => assertQueryFullResultSet(snc, + NWQueries.Q20, 1, "Q20", tableType, pw, sqlContext) + case "Q21" => assertQueryFullResultSet(snc, + NWQueries.Q21, 1, "Q21", tableType, pw, sqlContext) + case "Q22" => assertQueryFullResultSet(snc, + NWQueries.Q22, 1, "Q22", tableType, pw, sqlContext) + case "Q23" => assertQueryFullResultSet(snc, + NWQueries.Q23, 1, "Q23", tableType, pw, sqlContext) + case "Q24" => assertQueryFullResultSet(snc, + NWQueries.Q24, 4, "Q24", tableType, pw, sqlContext) + case "Q25" => assertJoinFullResultSet(snc, + NWQueries.Q25, 1, "Q25", tableType, pw, sqlContext) + /* + case "Q25_1" => assertJoinFullResultSet(snc, + NWQueries.Q25_1, 1, "Q25_1", tableType, pw, sqlContext) + case "Q25_2" => assertJoinFullResultSet(snc, + NWQueries.Q25_2, 1, "Q25_2", tableType, pw, sqlContext) + */ + case "Q26" => assertJoinFullResultSet(snc, + NWQueries.Q26, 86, "Q26", tableType, pw, sqlContext) + /* + case "Q26_1" => assertJoinFullResultSet(snc, + NWQueries.Q26_1, 54, "Q26_1", tableType, pw, sqlContext) + case "Q26_2" => assertJoinFullResultSet(snc, + NWQueries.Q26_2, 60, "Q26_2", tableType, pw, sqlContext) + */ + case "Q27" => assertJoinFullResultSet(snc, + NWQueries.Q27, 9, "Q27", tableType, pw, sqlContext) + /* + case "Q27_1" => assertJoinFullResultSet(snc, + NWQueries.Q27_1, 5, "Q27_1", tableType, pw, sqlContext) + case "Q27_2" => assertJoinFullResultSet(snc, + NWQueries.Q27_2, 8, "Q27_2", tableType, pw, sqlContext) + case "Q27_3" => assertJoinFullResultSet(snc, + NWQueries.Q27_3, 3, "Q27_3", tableType, pw, sqlContext) + case "Q27_4" => assertJoinFullResultSet(snc, + NWQueries.Q27_4, 6, "Q27_4", tableType, pw, sqlContext) + */ + case "Q28" => assertJoinFullResultSet(snc, + NWQueries.Q28, 12, "Q28", tableType, pw, sqlContext) + /* + case "Q28_1" => assertJoinFullResultSet(snc, + NWQueries.Q28_1, 12, "Q28_1", tableType, pw, sqlContext) + case "Q28_2" => assertJoinFullResultSet(snc, + NWQueries.Q28_2, 5, "Q28_2", tableType, pw, sqlContext) + */ + case "Q29" => assertJoinFullResultSet(snc, + NWQueries.Q29, 8, "Q29", tableType, pw, sqlContext) + /* + case "Q29_1" => assertJoinFullResultSet(snc, + NWQueries.Q29_1, 5, "Q29_1", tableType, pw, sqlContext) + case "Q29_2" => assertJoinFullResultSet(snc, + NWQueries.Q29_2, 6, "Q29_2", tableType, pw, sqlContext) + */ + case "Q30" => assertJoinFullResultSet(snc, + NWQueries.Q30, 8, "Q30", tableType, pw, sqlContext) + /* + case "Q30_1" => assertJoinFullResultSet(snc, + NWQueries.Q30_1, 8, "Q30_1", tableType, pw, sqlContext) + case "Q30_2" => assertJoinFullResultSet(snc, + NWQueries.Q30_2, 6, "Q30_2", tableType, pw, sqlContext) + */ + case "Q31" => assertJoinFullResultSet(snc, + NWQueries.Q31, 830, "Q31", tableType, pw, sqlContext) + /* + case "Q31_1" => assertJoinFullResultSet(snc, + NWQueries.Q31_1, 502, "Q31_1", tableType, pw, sqlContext) + case "Q31_2" => assertJoinFullResultSet(snc, + NWQueries.Q31_2, 286, "Q31_2", tableType, pw, sqlContext) + case "Q31_3" => assertJoinFullResultSet(snc, + NWQueries.Q31_3, 219, "Q31_3", tableType, pw, sqlContext) + case "Q31_4" => assertJoinFullResultSet(snc, + NWQueries.Q31_4, 484, "Q31_4", tableType, pw, sqlContext) + */ + case "Q32" => assertJoinFullResultSet(snc, + NWQueries.Q32, 8, "Q32", tableType, pw, sqlContext) + /* + case "Q32_1" => assertJoinFullResultSet(snc, + NWQueries.Q32_1, 282, "Q32_1", tableType, pw, sqlContext) + */ + case "Q33" => assertJoinFullResultSet(snc, + NWQueries.Q33, 37, "Q33", tableType, pw, sqlContext) + /* + case "Q33_1" => assertJoinFullResultSet(snc, + NWQueries.Q33_1, 769, "Q33_1", tableType, pw, sqlContext) + */ + case "Q34" => assertJoinFullResultSet(snc, + NWQueries.Q34, 5, "Q34", tableType, pw, sqlContext) + /* + case "Q34_1" => assertJoinFullResultSet(snc, + NWQueries.Q34_1, 1, "Q34_1", tableType, pw, sqlContext) + case "Q34_2" => assertJoinFullResultSet(snc, + NWQueries.Q34_2, 4, "Q34_2", tableType, pw, sqlContext) + */ + case "Q35" => assertJoinFullResultSet(snc, + NWQueries.Q35, 3, "Q35", tableType, pw, sqlContext) + /* + case "Q35_1" => assertJoinFullResultSet(snc, + NWQueries.Q35_1, 2, "Q35_1", tableType, pw, sqlContext) + case "Q35_2" => assertJoinFullResultSet(snc, + NWQueries.Q35_2, 3, "Q35_2", tableType, pw, sqlContext) + */ + case "Q36" => assertJoinFullResultSet(snc, + NWQueries.Q36, 290, "Q36", tableType, pw, sqlContext) + /* + case "Q36_1" => assertJoinFullResultSet(snc, + NWQueries.Q36_1, 232, "Q36_1", tableType, pw, sqlContext) + case "Q36_2" => assertJoinFullResultSet(snc, + NWQueries.Q36_2, 61, "Q36_2", tableType, pw, sqlContext) + */ + case "Q37" => /* assertJoinFullResultSet(snc, + NWQueries.Q37, 77, "Q37", tableType, pw, sqlContext) */ + case "Q38" => assertJoinFullResultSet(snc, + NWQueries.Q38, 2155, "Q38", tableType, pw, sqlContext) + /* + case "Q38_1" => assertJoinFullResultSet(snc, + NWQueries.Q38_1, 2080, "Q38_1", tableType, pw, sqlContext) + case "Q38_2" => assertJoinFullResultSet(snc, + NWQueries.Q38_2, 2041, "Q38_2", tableType, pw, sqlContext) + */ + case "Q39" => assertJoinFullResultSet(snc, + NWQueries.Q39, 9, "Q39", tableType, pw, sqlContext) + case "Q40" => assertJoinFullResultSet(snc, + NWQueries.Q40, 830, "Q40", tableType, pw, sqlContext) + /* + case "Q40_1" => assertJoinFullResultSet(snc, + NWQueries.Q40_1, 12, "Q40_1", tableType, pw, sqlContext) + case "Q40_2" => assertJoinFullResultSet(snc, + NWQueries.Q40_2, 9, "Q40_2", tableType, pw, sqlContext) + */ + case "Q41" => assertJoinFullResultSet(snc, + NWQueries.Q41, 2155, "Q41", tableType, pw, sqlContext) + case "Q42" => assertJoinFullResultSet(snc, + NWQueries.Q42, 22, "Q42", tableType, pw, sqlContext) + /* + case "Q42_1" => assertJoinFullResultSet(snc, + NWQueries.Q42_1, 22, "Q42_1", tableType, pw, sqlContext) + case "Q42_2" => assertJoinFullResultSet(snc, + NWQueries.Q42_2, 7, "Q42_2", tableType, pw, sqlContext) + */ + case "Q43" => assertJoinFullResultSet(snc, + NWQueries.Q43, 830, "Q43", tableType, pw, sqlContext) + /* + case "Q43_1" => assertJoinFullResultSet(snc, + NWQueries.Q43_1, 10, "Q43_1", tableType, pw, sqlContext) + case "Q43_2" => assertJoinFullResultSet(snc, + NWQueries.Q43_2, 2, "Q43_2", tableType, pw, sqlContext) + */ + case "Q44" => assertJoinFullResultSet(snc, + NWQueries.Q44, 830, "Q44", tableType, pw, sqlContext) + case "Q45" => assertJoinFullResultSet(snc, + NWQueries.Q45, 1788650, "Q45", tableType, pw, sqlContext) + case "Q46" => assertJoinFullResultSet(snc, + NWQueries.Q46, 1788650, "Q46", tableType, pw, sqlContext) + case "Q47" => assertJoinFullResultSet(snc, + NWQueries.Q47, 1788650, "Q47", tableType, pw, sqlContext) + case "Q48" => assertJoinFullResultSet(snc, + NWQueries.Q48, 1788650, "Q48", tableType, pw, sqlContext) + case "Q49" => assertJoinFullResultSet(snc, + NWQueries.Q49, 1788650, "Q49", tableType, pw, sqlContext) + /* + case "Q49_1" => assertJoinFullResultSet(snc, + NWQueries.Q49_1, 1713225, "Q49_1", tableType, pw, sqlContext) + case "Q49_2" => assertJoinFullResultSet(snc, + NWQueries.Q49_2, 1741240, "Q49_2", tableType, pw, sqlContext) + */ + case "Q50" => assertJoinFullResultSet(snc, + NWQueries.Q50, 2155, "Q50", tableType, pw, sqlContext) + case "Q51" => assertJoinFullResultSet(snc, + NWQueries.Q51, 2155, "Q51", tableType, pw, sqlContext) + /* + case "Q51_1" => assertJoinFullResultSet(snc, + NWQueries.Q51_1, 2080, "Q51_1", tableType, pw, sqlContext) + case "Q51_2" => assertJoinFullResultSet(snc, + NWQueries.Q51_2, 2041, "Q51_2", tableType, pw, sqlContext) + */ + case "Q52" => assertJoinFullResultSet(snc, + NWQueries.Q52, 2155, "Q52", tableType, pw, sqlContext) + case "Q53" => assertJoinFullResultSet(snc, + NWQueries.Q53, 2155, "Q53", tableType, pw, sqlContext) + case "Q54" => assertJoinFullResultSet(snc, + NWQueries.Q54, 2155, "Q54", tableType, pw, sqlContext) + case "Q55" => assertJoinFullResultSet(snc, + NWQueries.Q55, 21, "Q55", tableType, pw, sqlContext) + /* + case "Q55_1" => assertJoinFullResultSet(snc, + NWQueries.Q55_1, 7, "Q55_1", tableType, pw, sqlContext) + case "Q55_2" => assertJoinFullResultSet(snc, + NWQueries.Q55_2, 6, "Q55_2", tableType, pw, sqlContext) + */ + case "Q56" => assertJoinFullResultSet(snc, + NWQueries.Q56, 8, "Q56", tableType, pw, sqlContext) + /* + case "Q56_1" => assertJoinFullResultSet(snc, + NWQueries.Q56, 8, "Q56_1", tableType, pw, sqlContext) + case "Q56_2" => assertJoinFullResultSet(snc, + NWQueries.Q56, 8, "Q56_2", tableType, pw, sqlContext) + case "Q56_3" => assertJoinFullResultSet(snc, + NWQueries.Q56, 8, "Q56_3", tableType, pw, sqlContext) + */ + case _ => + // scalastyle:off println + println("OK") + // scalastyle:on println + } + } + } +} diff --git a/cluster/src/dunit/scala/org/apache/spark/sql/SmartConnectorFunctions.scala b/cluster/src/dunit/scala/org/apache/spark/sql/SmartConnectorFunctions.scala new file mode 100644 index 0000000000..05adafa6f5 --- /dev/null +++ b/cluster/src/dunit/scala/org/apache/spark/sql/SmartConnectorFunctions.scala @@ -0,0 +1,196 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ +package org.apache.spark.sql + +import java.io.{File, FileOutputStream, PrintWriter} +import java.net.InetAddress + +import io.snappydata.benchmark.TPCHColumnPartitionedTable +import io.snappydata.test.util.TestException +import org.apache.spark.rdd.ZippedPartitionsPartition +import org.apache.spark.sql.catalyst.plans.physical.SinglePartition +import org.apache.spark.sql.collection.MultiBucketExecutorPartition +import org.apache.spark.sql.execution.row.RowTableScan +import org.apache.spark.{SparkConf, SparkContext} + + +class SmartConnectorFunctions { + +} +object SmartConnectorFunctions { + + def queryValidationOnConnector(locatorNetPort: Int): Unit = { + val hostName = InetAddress.getLocalHost.getHostName + val conf = new SparkConf() + .setAppName("test Application") + .setMaster(s"spark://$hostName:7077") + .set("spark.executor.extraClassPath", + getEnvironmentVariable("SNAPPY_DIST_CLASSPATH")) + .set("snappydata.connection", s"localhost:$locatorNetPort") + + val sc = SparkContext.getOrCreate(conf) + val snc = SnappyContext(sc) + + TPCHUtils.queryExecution(snc, true) + TPCHUtils.validateResult(snc, true) + } + def createTablesUsingConnector(locatorNetPort: Int): Unit = { + val hostName = InetAddress.getLocalHost.getHostName + val conf = new SparkConf() + .setAppName("test Application") + .setMaster(s"spark://$hostName:7077") + .set("spark.executor.extraClassPath", + getEnvironmentVariable("SNAPPY_DIST_CLASSPATH")) + .set("snappydata.connection", s"localhost:$locatorNetPort") + + val sc = SparkContext.getOrCreate(conf) + val snc = SnappyContext(sc) + + TPCHUtils.createAndLoadTables(snc, isSnappy = true) + } + + def getEnvironmentVariable(env: String): String = { + val value = scala.util.Properties.envOrElse(env, null) + if (env == null) { + throw new TestException(s"Environment variable $env is not defined") + } + value + } + def nwQueryValidationOnConnector(locatorNetPort: Int, tableType: String): Unit = { + val hostName = InetAddress.getLocalHost.getHostName + val conf = new SparkConf() + .setAppName("test Application") + .setMaster(s"spark://$hostName:7077") + .set("spark.executor.extraClassPath", + SmartConnectorFunctions.getEnvironmentVariable("SNAPPY_DIST_CLASSPATH")) + .set("snappydata.connection", s"localhost:$locatorNetPort") + + val sc = SparkContext.getOrCreate(conf) + val snc = SnappyContext(sc) + snc.snappySession.externalCatalog.invalidateAll() + val sqlContext = new SparkSession(sc).sqlContext + val pw = new PrintWriter(new FileOutputStream( + new File(s"ValidateNWQueries_$tableType.out"), true)) + try { + NorthWindDUnitTest.createAndLoadSparkTables(sqlContext) + // validateReplicatedTableQueries(snc) + NorthWindDUnitTest.validateQueriesFullResultSet(snc, tableType, pw, sqlContext) + } finally { + pw.close() + } + } + + def verifyRowTablePartitionPruning(locatorNetPort: Int): Unit = { + val snc = getSmartConnectorModeSnappyContext(locatorNetPort) + verifyRowTablePruning(snc) + } + + def getSmartConnectorModeSnappyContext(locatorNetPort: Int): SnappyContext = { + val hostName = InetAddress.getLocalHost.getHostName + val conf = new SparkConf() + .setAppName("test Application") + .setMaster(s"spark://$hostName:7077") + .set("spark.executor.extraClassPath", + getEnvironmentVariable("SNAPPY_DIST_CLASSPATH")) + .set("snappydata.connection", s"localhost:$locatorNetPort") + .set("driver-memory", "2G") + .set("executor-memory", "2G") + + val sc = SparkContext.getOrCreate(conf) + val snc = SnappyContext(sc) + snc + } + + val query1 = "select * from orders where o_orderkey = " + + val query2 = "select * from orders where o_orderkey = {fn substring('d1xxd2', 2, 1)} " + + val query3 = "select * from orders where o_orderkey = substring('acbc801xx', 5, 3) " + + val query4 = "select * from orders where o_orderkey = {fn trim(" + + "substring(' acbc801xx', length(' 12345'), length('801'))) }" + + val query5 = "select * from orders where o_orderkey = trim(" + + "substring(' acbc1410xx', length(' 12345'), length('1410'))) " + + val query6 = "select O_ORDERDATE, {fn TIMESTAMPADD(SQL_TSI_DAY," + + " {fn FLOOR((-1 * {fn DAYOFYEAR(O_ORDERDATE)} - 1))}, O_ORDERDATE)}" + + " from orders where O_ORDERKEY = 32" + + def verifyRowTablePruning(snc: SnappyContext): Unit = { + + val tpchDataPath = TPCHColumnPartitionedTable.getClass.getResource("/TPCH").getPath + val buckets_Order_Lineitem = "5" + TPCHColumnPartitionedTable.createPopulateOrderTable(snc, tpchDataPath, + isSnappy = true, buckets_Order_Lineitem, null, provider = "row") + + def validateSinglePartition(df: DataFrame, bucketId: Int): Unit = { + val plan = df.queryExecution.executedPlan.collectFirst { + case c: RowTableScan => c + } + + val scanRDD = plan.map(_.dataRDD). + getOrElse(throw new AssertionError("Expecting RowTable Scan")) + val partitions = scanRDD.partitions + assert(plan.get.outputPartitioning == SinglePartition) + assert(partitions.length == 1, { + val sb = new StringBuilder("Pruning not in effect ? partitions found ") + partitions.foreach(p => sb.append(p.index).append(",")) + sb.toString + }) + val bstr = partitions(0) match { + case zp: ZippedPartitionsPartition => zp.partitionValues.map { + case mb: MultiBucketExecutorPartition => mb.bucketsString + } + case _ => Nil + } + + // each BucketExecutor must have only one bucket. + // there are 2 BucketExecutor entries due to ZipPartion of RowBuffer. + assert(bstr.forall(_.toInt == bucketId), s"Expected $bucketId, found $bstr") + } + + validateSinglePartition(executeQuery(snc, query1 + 1, 1), 4) + validateSinglePartition(executeQuery(snc, query1 + 32, 32), 0) + validateSinglePartition(executeQuery(snc, query1 + 801, 801), 4) + // repeating the query deliberately + validateSinglePartition(executeQuery(snc, query1 + 801, 801), 4) + validateSinglePartition(executeQuery(snc, query1 + 1408, 1408), 0) + validateSinglePartition(executeQuery(snc, query1 + 1409, 1409), 2) + validateSinglePartition(executeQuery(snc, query1 + 1410, 1410), 0) + validateSinglePartition(executeQuery(snc, query1 + 1796, 1796), 4) + validateSinglePartition(executeQuery(snc, query1 + 801, 801), 4) + executeQuery(snc, query1 + "'1'", 1) + executeQuery(snc, query1 + "'32'", 32) + executeQuery(snc, query2, 1) + executeQuery(snc, query3, 801) + executeQuery(snc, query4, 801) + executeQuery(snc, query5, 1410) + + val df = executeQuery(snc, query6, 32, false) + val r = df.collect()(0) + assert(r.getDate(0).toString.equals("1995-07-16")) + assert(r.getDate(1).toString.equals("1994-12-30")) + } + + private def executeQuery(snc: SnappyContext, sql: String, orderKey: Int, + doAssert: Boolean = true): DataFrame = { + val df = snc.sql(sql) + if (doAssert) assert(df.collect()(0).getLong(0) == orderKey) + df + } +} diff --git a/cluster/src/dunit/scala/org/apache/spark/sql/TPCHDUnitTest.scala b/cluster/src/dunit/scala/org/apache/spark/sql/TPCHDUnitTest.scala new file mode 100644 index 0000000000..3b392fe42e --- /dev/null +++ b/cluster/src/dunit/scala/org/apache/spark/sql/TPCHDUnitTest.scala @@ -0,0 +1,739 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ +package org.apache.spark.sql + +import java.io.{File, FileOutputStream, PrintStream, PrintWriter} +import java.sql.{Connection, Date, DriverManager, PreparedStatement, ResultSet} + +import scala.collection.mutable.{ArrayBuffer, ListBuffer} + +import io.snappydata.benchmark.TPCH_Queries.createQuery +import io.snappydata.benchmark.snappy.tpch.QueryExecutor +import io.snappydata.benchmark.{TPCHColumnPartitionedTable, TPCHReplicatedTable, TPCH_Queries} +import io.snappydata.cluster.ClusterManagerTestBase +import io.snappydata.test.dunit.AvailablePortHelper + +import org.apache.spark.{Logging, SparkContext} + +class TPCHDUnitTest(s: String) extends ClusterManagerTestBase(s) + with Logging { + + override val locatorNetPort: Int = TPCHUtils.locatorNetPort + val queries = Array("1", "2", "3", "4", "5", "6", "7", "8", "9", + "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", + "20", "21", "22") + override val stopNetServersInTearDown = false + + protected val productDir = + SmartConnectorFunctions.getEnvironmentVariable("SNAPPY_HOME") + + override def beforeClass(): Unit = { + vm3.invoke(classOf[ClusterManagerTestBase], "startSparkCluster", productDir) + super.beforeClass() + startNetworkServersOnAllVMs() + } + + override def afterClass(): Unit = { + try { + vm3.invoke(classOf[ClusterManagerTestBase], "stopSparkCluster", productDir) + Array(vm2, vm1, vm0).foreach(_.invoke(getClass, "stopNetworkServers")) + ClusterManagerTestBase.stopNetworkServers() + } finally { + super.afterClass() + } + } + + def testSnappy(): Unit = { + val snc = SnappyContext(sc) + + // create table randomly either using smart connector or + // from embedded mode + if ((System.currentTimeMillis() % 2) == 0) { + logInfo("CREATING TABLE USING SMART CONNECTOR") + + vm3.invoke(classOf[SmartConnectorFunctions], + "createTablesUsingConnector", locatorNetPort) + } else { + logInfo("CREATING TABLE IN EMBEDDED MODE") + TPCHUtils.createAndLoadTables(snc, isSnappy = true) + } + TPCHUtils.queryExecution(snc, isSnappy = true) + TPCHUtils.validateResult(snc, isSnappy = true) + + vm3.invoke(classOf[SmartConnectorFunctions], + "queryValidationOnConnector", locatorNetPort) + } + + def testSnappy_PrepStatement(): Unit = { + val serverHostPort = AvailablePortHelper.getRandomAvailableTCPPort + vm2.invoke(classOf[ClusterManagerTestBase], "startNetServer", serverHostPort) + // scalastyle:off println + println(s"testSnappy_PrepStatement: network server started at $serverHostPort") + // scalastyle:on println + + val snc = SnappyContext(sc) + logInfo("CREATING TABLES ") + TPCHUtils.createAndLoadTables(snc, isSnappy = true) + val conn = DriverManager.getConnection( + "jdbc:snappydata://localhost:" + serverHostPort) + runQueriesUsingPrepStatement(conn, snc) + } + + def runQueriesUsingPrepStatement(conn: Connection, snc: SnappyContext): Unit = { + val tpchQueries: Array[Int] = (1 to 22).toArray + val isDynamic: Boolean = false + + // scalastyle:off println + for (query <- tpchQueries) { + var prepStatement: PreparedStatement = null + query match { + case 1 => { + println("Executing query#1") + prepStatement = conn.prepareStatement(TPCH_Queries.getQuery1) + val parameters = TPCH_Queries.getQ1Parameter(isDynamic) + prepStatement.setInt(1, parameters(0).toInt) + } + case 2 => { + println("Executing query#2") + prepStatement = conn.prepareStatement(TPCH_Queries.getQuery2ForPrepStatement) + val parameters = TPCH_Queries.getQ2Parameter(isDynamic) + prepStatement.setString(1, parameters(0)) + prepStatement.setInt(2, parameters(1).toInt) + prepStatement.setString(3, "%" + parameters(2)) + prepStatement.setString(4, parameters(3)) + } + case 3 => { + println("Executing query#3") + prepStatement = conn.prepareStatement(TPCH_Queries.getQuery3ForPrepStatement) + val parameters = TPCH_Queries.getQ3Parameter(isDynamic) + prepStatement.setString(1, parameters(0)) + prepStatement.setDate(2, Date.valueOf(parameters(1))) + prepStatement.setDate(3, Date.valueOf(parameters(2))) + } + case 4 => { + println("Executing query#4") + prepStatement = conn.prepareStatement(TPCH_Queries.getQuery4ForPrepStatement) + val parameters = TPCH_Queries.getQ4Parameter(isDynamic) + prepStatement.setDate(1, Date.valueOf(parameters(0))) + prepStatement.setDate(2, Date.valueOf(parameters(1))) + } + case 5 => { + println("Executing query#5") + prepStatement = conn.prepareStatement(TPCH_Queries.getQuery5ForPrepStatement) + var parameters = TPCH_Queries.getQ5Parameter(isDynamic) + prepStatement.setString(1, parameters(0)) + prepStatement.setString(2, parameters(1)) + prepStatement.setString(3, parameters(2)) + prepStatement.executeQuery() + } + case 6 => { + println("Executing query#6") + prepStatement = conn.prepareStatement(TPCH_Queries.getQuery6ForPrepStatement) + var parameters = TPCH_Queries.getQ6Parameter(isDynamic) + prepStatement.setString(1, parameters(0)) + prepStatement.setString(2, parameters(1)) + prepStatement.setString(3, parameters(2)) + prepStatement.setString(4, parameters(3)) + prepStatement.setString(5, parameters(4)) + } + case 7 => { + println("Executing query#7") + prepStatement = conn.prepareStatement(TPCH_Queries.getQuery7ForPrepStatement) + val parameters = TPCH_Queries.getQ7Parameter(isDynamic) + prepStatement.setString(1, parameters(0)) + prepStatement.setString(2, parameters(1)) + prepStatement.setString(3, parameters(2)) + prepStatement.setString(4, parameters(3)) + } + case 8 => { + println("Executing query#8") + prepStatement = conn.prepareStatement(TPCH_Queries.getQuery8ForPrepStatement) + var parameters = TPCH_Queries.getQ8Parameter(isDynamic) + prepStatement.setString(1, parameters(0)) + prepStatement.setString(2, parameters(1)) + prepStatement.setString(3, parameters(2)) + } + case 9 => { + println("Executing query#9") + prepStatement = conn.prepareStatement(TPCH_Queries.getQuery9ForPrepStatement) + val parameters = TPCH_Queries.getQ9Parameter(isDynamic) + prepStatement.setString(1, "%" + parameters(0) + "%") + } + case 10 => { + println("Executing query#10") + prepStatement = conn.prepareStatement(TPCH_Queries.getQuery10ForPrepStatement) + val parameters = TPCH_Queries.getQ10Parameter(isDynamic) + prepStatement.setDate(1, Date.valueOf(parameters(0))) + prepStatement.setDate(2, Date.valueOf(parameters(1))) + } + case 11 => { + println("Executing query#11") + prepStatement = conn.prepareStatement(TPCH_Queries.getQuery11ForPrepStatement) + val parameters = TPCH_Queries.getQ11Parameter(isDynamic) + prepStatement.setString(1, parameters(0)) + prepStatement.setString(2, parameters(1)) + } + case 12 => { + println("Executing query#12") + prepStatement = conn.prepareStatement(TPCH_Queries.getQuery12ForPrepStatement) + val parameters = TPCH_Queries.getQ12Parameter(isDynamic) + prepStatement.setString(1, parameters(0)) + prepStatement.setString(2, parameters(1)) + prepStatement.setString(3, parameters(2)) + prepStatement.setString(4, parameters(3)) + } + case 13 => { + println("Executing query#13") + prepStatement = conn.prepareStatement(TPCH_Queries.getQuery13ForPrepStatement) + val parameters = TPCH_Queries.getQ13Parameter(isDynamic) + prepStatement.setString(1, "%" + parameters(0) + "%" + parameters(1) + "%") + } + case 14 => { + println("Executing query#14") + prepStatement = conn.prepareStatement(TPCH_Queries.getQuery14ForPrepStatement) + var parameters = TPCH_Queries.getQ14Parameter(isDynamic) + prepStatement.setString(1, parameters(0)) + prepStatement.setString(2, parameters(1)) + } + case 15 => { + println("Executing query#15") + // create a temp view required for Q15 + val queryToBeExecuted = + createQuery(TPCH_Queries.getQuery15_Temp, TPCH_Queries.getQ15TempParameter(isDynamic)) + val result = snc.sql(queryToBeExecuted) + result.createGlobalTempView("revenue") + // prepare Q15 + prepStatement = conn.prepareStatement(TPCH_Queries.getQuery15) + } + case 16 => { + println("Executing query#16") + prepStatement = conn.prepareStatement(TPCH_Queries.getQuery16ForPrepStatement) + val parameters = TPCH_Queries.getQ16Parameter(isDynamic) + prepStatement.setString(1, "Brand#" + parameters(0) + parameters(1)) + prepStatement.setString(2, parameters(2) + "%") + prepStatement.setInt(3, parameters(3).toInt) + prepStatement.setInt(4, parameters(4).toInt) + prepStatement.setInt(5, parameters(5).toInt) + prepStatement.setInt(6, parameters(6).toInt) + prepStatement.setInt(7, parameters(7).toInt) + prepStatement.setInt(8, parameters(8).toInt) + prepStatement.setInt(9, parameters(9).toInt) + prepStatement.setInt(10, parameters(10).toInt) + } + case 17 => { + println("Executing query#17") + prepStatement = conn.prepareStatement(TPCH_Queries.getQuery17ForPrepStatement) + val parameters = TPCH_Queries.getQ17Parameter(isDynamic) + prepStatement.setString(1, "Brand#" + parameters(0) + parameters(1)) + prepStatement.setString(2, parameters(2)) + } + case 18 => { + println("Executing query#18") + prepStatement = conn.prepareStatement(TPCH_Queries.getQuery18ForPrepStatement) + val parameters = TPCH_Queries.getQ18Parameter(isDynamic) + prepStatement.setString(1, parameters(0)) + } + case 19 => { + println("Executing query#19") + prepStatement = conn.prepareStatement(TPCH_Queries.getQuery19ForPrepStatement) + val parameters = TPCH_Queries.getQ19Parameter(isDynamic) + prepStatement.setString(1, "Brand#" + parameters(0)) + prepStatement.setInt(2, parameters(1).toInt) + prepStatement.setInt(3, parameters(2).toInt) + prepStatement.setString(4, "Brand#" + parameters(3)) + prepStatement.setInt(5, parameters(4).toInt) + prepStatement.setInt(6, parameters(5).toInt) + prepStatement.setString(7, "Brand#" + parameters(6)) + prepStatement.setInt(8, parameters(7).toInt) + prepStatement.setInt(9, parameters(8).toInt) + } + case 20 => { + println("Executing query#20") + prepStatement = conn.prepareStatement(TPCH_Queries.getQuery20ForPrepStatement) + val parameters = TPCH_Queries.getQ20Parameter(isDynamic) + prepStatement.setString(1, parameters(0) + "%") + prepStatement.setString(2, parameters(1)) + prepStatement.setString(3, parameters(2)) + prepStatement.setString(4, parameters(3)) + } + case 21 => { + println("Executing query#21") + prepStatement = conn.prepareStatement(TPCH_Queries.getQuery21ForPrepStatement) + var parameters = TPCH_Queries.getQ21Parameter(isDynamic) + prepStatement.setString(1, parameters(0)) + } + case 22 => { + println("Executing query#22") + prepStatement = conn.prepareStatement(TPCH_Queries.getQuery22ForPrepStatement) + var parameters = TPCH_Queries.getQ22Parameter(isDynamic) + prepStatement.setInt(1, parameters(0).toInt) + prepStatement.setInt(2, parameters(1).toInt) + prepStatement.setInt(3, parameters(2).toInt) + prepStatement.setInt(4, parameters(3).toInt) + prepStatement.setInt(5, parameters(4).toInt) + prepStatement.setInt(6, parameters(5).toInt) + prepStatement.setInt(7, parameters(6).toInt) + prepStatement.setInt(8, parameters(7).toInt) + prepStatement.setInt(9, parameters(8).toInt) + prepStatement.setInt(10, parameters(9).toInt) + prepStatement.setInt(11, parameters(10).toInt) + prepStatement.setInt(12, parameters(11).toInt) + prepStatement.setInt(13, parameters(12).toInt) + prepStatement.setInt(14, parameters(13).toInt) + } + } + if (prepStatement != null) { + val rs = prepStatement.executeQuery() + verifyTPCHQueryResult(rs, query) + rs.close() + prepStatement.close() + } + + } + // scalastyle:on println + } + + private def verifyTPCHQueryResult(rs: ResultSet, queryNumber: Int): Unit = { + val rsmd = rs.getMetaData + val columnsNumber = rsmd.getColumnCount + var count = 0 + val result = scala.collection.mutable.ArrayBuffer.empty[String] + val queryResultsFileName = s"JDBCPrepStmtResult_query$queryNumber.txt" + val writer = new PrintWriter(new File(queryResultsFileName)) + while (rs.next()) { + count += 1 + var row: String = "" + for (i <- 1 to columnsNumber) { + if (i > 1) row += "," + if (rsmd.getColumnType(i) == java.sql.Types.DOUBLE) { + // eliminating mismtach due to minor difference in fractional parts + row = row + rs.getDouble(i).ceil.formatted("%.0f") + } else { + row = row + rs.getString(i) + } + } + result += row + // scalastyle:off println + writer.println(row) + // scalastyle:on println + } + writer.close() + // scalastyle:off println + println(s"Number of rows : $count") + // scalastyle:on println + + val actualFile = sc.textFile(queryResultsFileName) + val expectedFile = sc.textFile(getClass.getResource( + s"/TPCH/RESULT/JDBC/ExpectedJDBCPrepStmtResult_query$queryNumber.txt").getPath) + + val expectedLineSet = expectedFile.collect().toList.sorted + val actualLineSet = actualFile.collect().toList.sorted + + val expectedNoOfLines = expectedFile.collect().length + assert(count == expectedNoOfLines, s"For query $queryNumber " + + s"result count mismatch observed with " + + s"expected number of rows: ${expectedLineSet.size}" + + s" and actual number of rows: ${actualLineSet.size}") + + var resultMismatchFound = false + for ((expectedLine, actualLine) <- expectedLineSet zip actualLineSet) { + if (!expectedLine.equals(actualLine)) { + resultMismatchFound = true + // scalastyle:off println + println(s"For query $queryNumber result mismatch observed") + println(s"Expected : $expectedLine") + println(s"Found : $actualLine") + println(s"-------------------------------------") + // scalastyle:on println + } + } + assert(!resultMismatchFound, s"For query $queryNumber result mismatch observed") + } + + + /* + TODO : Kishor + This test is disabled as of now. For dunit test we are using very small TPCH data i.e.5.5MB. + With so small data, its quite possible that for some queries result will be same with dynamic parameters + This needs to make fullproof. + */ + def _testSnappy_Tokenization(): Unit = { + val snc = SnappyContext(sc) + + // create table randomly either using smart connector or + // from embedded mode + if ((System.currentTimeMillis() % 2) == 0) { + logInfo("CREATING TABLE USING SMART CONNECTOR") + vm3.invoke(classOf[SmartConnectorFunctions], + "createTablesUsingConnector", locatorNetPort) + } else { + logInfo("CREATING TABLE IN EMBEDDED MODE") + TPCHUtils.createAndLoadTables(snc, isSnappy = true) + } + TPCHUtils.queryExecution(snc, isSnappy = true, isDynamic = true, fileName = "_FirstRun") + TPCHUtils.queryExecution(snc, isSnappy = true, isDynamic = true, fileName = "_SecondRun") + + TPCHUtils.validateResult(snc, isSnappy = true, isTokenization = true ) + + vm3.invoke(classOf[SmartConnectorFunctions], + "queryValidationOnConnector", locatorNetPort) + } + + private def normalizeRow(rows: Array[Row]): Array[String] = { + val newBuffer: ArrayBuffer[String] = new ArrayBuffer + val sb = new StringBuilder + rows.foreach(r => { + r.toSeq.foreach { + case d: Double => + // round to nearest integer if large enough else + // round to one decimal digit + if (math.abs(d) >= 1000) { + sb.append(math.floor(d + 0.5)).append(',') + } else { + sb.append(math.floor(d * 5.0 + 0.25) / 5.0).append(',') + } + case bd: java.math.BigDecimal => + sb.append(bd.setScale(2, java.math.RoundingMode.HALF_UP)).append(',') + case v => sb.append(v).append(',') + } + newBuffer += sb.toString() + sb.clear() + }) + newBuffer.sortWith(_ < _).toArray + } + + def testTokenization_embedded(): Unit = { + startNetworkServersOnAllVMs() + val snc = SnappyContext(sc) + + logInfo("CREATING TABLE IN EMBEDDED MODE") + TPCHUtils.createAndLoadTables(snc, isSnappy = true) + Thread.sleep(20000) + runtpchMultipleTimes(snc) + } + + def testTokenization_split(): Unit = { + startNetworkServersOnAllVMs() + val snc = SnappyContext(sc) + + logInfo("CREATING TABLE USING SMART CONNECTOR") + vm3.invoke(classOf[SmartConnectorFunctions], + "createTablesUsingConnector", locatorNetPort) + Thread.sleep(20000) + runtpchMultipleTimes(snc) + } + + private def removeLimitClause(query: String): String = { + val idxstart = query.indexOf("limit ") + var retquery = query + if (idxstart > 0) { + retquery = query.substring(0, idxstart) + } + retquery + } + + private def runtpchMultipleTimes(snc: SnappyContext) = { + snc.sql(s"set spark.sql.autoBroadcastJoinThreshold=1") + val results: ListBuffer[(String, String, String, Array[String], + Array[String], Array[String], String)] = new ListBuffer() + + queries.foreach(f = qNum => { + var queryToBeExecuted1 = removeLimitClause(TPCH_Queries.getQuery(qNum, true, true)) + var queryToBeExecuted2 = removeLimitClause(TPCH_Queries.getQuery(qNum, true, true)) + var queryToBeExecuted3 = removeLimitClause(TPCH_Queries.getQuery(qNum, true, true)) + if (!qNum.equals("15")) { + val df = snc.sqlUncached(queryToBeExecuted1) + val res = df.collect() + val r1 = normalizeRow(res) + snc.snappySession.clearPlanCache() + val df2 = snc.sqlUncached(queryToBeExecuted2) + val res2 = df2.collect() + val r2 = normalizeRow(res2) + snc.snappySession.clearPlanCache() + val df3 = snc.sqlUncached(queryToBeExecuted3) + val res3 = df3.collect() + val r3 = normalizeRow(res3) + snc.snappySession.clearPlanCache() + results += ((queryToBeExecuted1, queryToBeExecuted2, + queryToBeExecuted3, r1, r2, r3, qNum)) + } + }) + + var m = SnappySession.getPlanCache + var cached = 0 + results.foreach(x => { + val q1 = x._1 + val q2 = x._2 + val q3 = x._3 + val r1 = x._4 + val r2 = x._5 + val r3 = x._6 + val qN = x._7 + val df = snc.sql(q1) + val res = df.collect() + val rs1 = normalizeRow(res) + assert(rs1.sameElements(r1)) + val df2 = snc.sql(q2) + val res2 = df2.collect() + val rs2 = normalizeRow(res2) + assert(rs2.sameElements(r2)) + val df3 = snc.sql(q3) + val res3 = df3.collect() + val rs3 = normalizeRow(res3) + assert(rs3.sameElements(r3)) + + m = SnappySession.getPlanCache + val size = SnappySession.getPlanCache.size() + snc.snappySession.clearPlanCache() + if (size == 1) { + cached = cached + 1 + } + + }) + logInfo(s"Number of queries cached = ${cached}") + logInfo(s"Size of plan cache = ${SnappySession.getPlanCache.size()}") + m = SnappySession.getPlanCache + } + + + def _testSpark(): Unit = { + val snc = new SQLContext(sc) + TPCHUtils.createAndLoadTables(snc, isSnappy = false) + TPCHUtils.queryExecution(snc, isSnappy = false) + TPCHUtils.validateResult(snc, isSnappy = false) + } + + def testSnap1296_1297(): Unit = { + val snc = SnappyContext(sc) + TPCHUtils.createAndLoadTables(snc, isSnappy = true) + val conn = getANetConnection(locatorNetPort) + val prepStatement = conn.prepareStatement(TPCH_Queries.getQuery10_ForPrepareStatement) + verifyResultSnap1296_1297(prepStatement) + prepStatement.close() + +// TODO: Enable the test below after fixing SNAP-1323 +// val prepStatement2 = conn.prepareStatement(getTPCHQuery10Parameterized) +// val pmd = prepStatement2.getParameterMetaData +// println("pmd = " + pmd + " pmd.getParameterCount =" + pmd.getParameterCount) +// prepStatement2.setString(1, "1993-10-01") +// prepStatement2.setString(2, "1993-10-01") +// +// verifyResultSnap1296_1297(prepStatement2) +// prepStatement2.close() + + } + + def testRowTablePruning(): Unit = { + + logInfo("Started the Row Table Partition Pruning In SmartConnector") + + vm3.invoke(classOf[SmartConnectorFunctions], + "verifyRowTablePartitionPruning", locatorNetPort) + + logInfo("Finished the Row Table Partition Pruning In SmartConnector") + } + + private def verifyResultSnap1296_1297(prepStatement: PreparedStatement): Unit = { + val rs = prepStatement.executeQuery + val rsmd = rs.getMetaData() + val columnsNumber = rsmd.getColumnCount() + var count = 0 + val result = scala.collection.mutable.ArrayBuffer.empty[String] + while (rs.next()) { + count += 1 + var row: String = "" + for (i <- 1 to columnsNumber) { + if (i > 1) row += "," + row = row + rs.getString(i) + } + result += row + } + println(s"Number of rows : $count") + + val expectedFile = sc.textFile(getClass.getResource( + s"/TPCH/RESULT/Snappy_10.out").getPath) + val expectedNoOfLines = expectedFile.collect().size + assert(count == expectedNoOfLines) + } + + private def getTPCHQuery10Parameterized: String = { + "select" + + " C_CUSTKEY," + + " C_NAME," + + " sum(l_extendedprice * (1 - l_discount)) as revenue," + + " C_ACCTBAL," + + " n_name," + + " C_ADDRESS," + + " C_PHONE," + + " C_COMMENT" + + " from" + + " ORDERS," + + " LINEITEM," + + " CUSTOMER," + + " NATION" + + " where" + + " C_CUSTKEY = o_custkey" + + " and l_orderkey = o_orderkey" + + " and o_orderdate >= ?" + + " and o_orderdate < add_months(?, 3)" + + " and l_returnflag = 'R'" + + " and C_NATIONKEY = n_nationkey" + + " group by" + + " C_CUSTKEY," + + " C_NAME," + + " C_ACCTBAL," + + " C_PHONE," + + " n_name," + + " C_ADDRESS," + + " C_COMMENT" + + " order by" + + " revenue desc" + + " limit 20" + } + +} + +object TPCHUtils extends Logging { + + val locatorNetPort = AvailablePortHelper.getRandomAvailableTCPPort + + val queries = Array("1", "2", "3", "4", "5", "6", "7", "8", "9", + "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", + "20", "21", "22") + + def createAndLoadTables(snc: SQLContext, isSnappy: Boolean): Unit = { + val tpchDataPath = getClass.getResource("/TPCH").getPath // "/data/wrk/w/TPCH/1GB" + + val usingOptionString = + s""" + USING row + OPTIONS ()""" + + TPCHReplicatedTable.createPopulateRegionTable(usingOptionString, snc, + tpchDataPath, isSnappy, null) + TPCHReplicatedTable.createPopulateNationTable(usingOptionString, snc, + tpchDataPath, isSnappy, null) + TPCHReplicatedTable.createPopulateSupplierTable(usingOptionString, snc, + tpchDataPath, isSnappy, null) + + val buckets_Order_Lineitem = "5" + val buckets_Cust_Part_PartSupp = "5" + TPCHColumnPartitionedTable.createPopulateOrderTable(snc, tpchDataPath, + isSnappy, buckets_Order_Lineitem, null) + TPCHColumnPartitionedTable.createPopulateLineItemTable(snc, tpchDataPath, + isSnappy, buckets_Order_Lineitem, null) + TPCHColumnPartitionedTable.createPopulateCustomerTable(snc, tpchDataPath, + isSnappy, buckets_Cust_Part_PartSupp, null) + TPCHColumnPartitionedTable.createPopulatePartTable(snc, tpchDataPath, + isSnappy, buckets_Cust_Part_PartSupp, null) + TPCHColumnPartitionedTable.createPopulatePartSuppTable(snc, tpchDataPath, + isSnappy, buckets_Cust_Part_PartSupp, null) + } + + def validateResult(snc: SQLContext, isSnappy: Boolean, isTokenization: Boolean = false): Unit = { + val sc: SparkContext = snc.sparkContext + + val fileName = if (!isTokenization) { + if (isSnappy) "Result_Snappy.out" else "Result_Spark.out" + } else { + "Result_Snappy_Tokenization.out" + } + + val resultsLogFileStream: FileOutputStream = new FileOutputStream(new File(fileName)) + val resultsLogStream: PrintStream = new PrintStream(resultsLogFileStream) + + // scalastyle:off + for (query <- queries) { + println(s"For Query $query") + + if (!isTokenization) { + val expectedFile = sc.textFile(getClass.getResource( + s"/TPCH/RESULT/Snappy_$query.out").getPath) + + //val queryFileName = if (isSnappy) s"1_Snappy_$query.out" else s"1_Spark_$query.out" + val queryResultsFileName = if (isSnappy) s"1_Snappy_Q${query}_Results.out" else s"1_Spark_Q${query}_Results.out" + val actualFile = sc.textFile(queryResultsFileName) + + val expectedLineSet = expectedFile.collect().toList.sorted + val actualLineSet = actualFile.collect().toList.sorted + + if (!actualLineSet.equals(expectedLineSet)) { + if (!(expectedLineSet.size == actualLineSet.size)) { + resultsLogStream.println(s"For $query " + + s"result count mismatched observed with " + + s"expected ${expectedLineSet.size} and actual ${actualLineSet.size}") + } else { + for ((expectedLine, actualLine) <- expectedLineSet zip actualLineSet) { + if (!expectedLine.equals(actualLine)) { + resultsLogStream.println(s"For $query result mismatched observed") + resultsLogStream.println(s"Expected : $expectedLine") + resultsLogStream.println(s"Found : $actualLine") + resultsLogStream.println(s"-------------------------------------") + } + } + } + } + } else { + val firstRunFileName = s"Snappy_${query}_FirstRun.out" + val firstRunFile = sc.textFile(firstRunFileName) + + val secondRunFileName = s"Snappy_${query}_SecondRun.out" + val secondRunFile = sc.textFile(secondRunFileName) + + val expectedLineSet = firstRunFile.collect().toList.sorted + val actualLineSet = secondRunFile.collect().toList.sorted + + if (actualLineSet.equals(expectedLineSet)) { + resultsLogStream.println(s"For $query result matched observed") + resultsLogStream.println(s"-------------------------------------") + } + } + } + // scalastyle:on + resultsLogStream.close() + resultsLogFileStream.close() + + val resultOutputFile = sc.textFile(fileName) + + if(!isTokenization) { + assert(resultOutputFile.count() == 0, + s"Query result mismatch Observed. Look at Result_Snappy.out for detailed failure") + if (resultOutputFile.count() != 0) { + logWarning( + s"QUERY RESULT MISMATCH OBSERVED. Look at Result_Snappy.out for detailed failure") + } + } else { + assert(resultOutputFile.count() == 0, + s"Query result match Observed. Look at Result_Snappy_Tokenization.out for detailed failure") + if (resultOutputFile.count() != 0) { + logWarning( + s"QUERY RESULT MATCH OBSERVED. Look at Result_Snappy_Tokenization.out for detailed" + + s" failure") + } + } + } + + def queryExecution(snc: SQLContext, isSnappy: Boolean, isDynamic: Boolean = false, + warmup: Int = 0, runsForAverage: Int = 1, isResultCollection: Boolean = true, + fileName: String = ""): Unit = { + snc.sql(s"set spark.sql.crossJoin.enabled = true") + + queries.foreach(query => QueryExecutor.execute(query, snc, isResultCollection, + isSnappy, isDynamic = isDynamic, warmup = warmup, runsForAverage = runsForAverage, + avgTimePrintStream = System.out)) + } +} diff --git a/cluster/src/dunit/scala/org/apache/spark/sql/TPCHSuite.scala b/cluster/src/dunit/scala/org/apache/spark/sql/TPCHSuite.scala new file mode 100644 index 0000000000..264a43f176 --- /dev/null +++ b/cluster/src/dunit/scala/org/apache/spark/sql/TPCHSuite.scala @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ + +package org.apache.spark.sql + +import io.snappydata.{Property, SnappyFunSuite} +import org.scalatest.BeforeAndAfterAll + +import org.apache.spark.sql.internal.SQLConf + +/** + * Suite to run TPCH in a single VM. Disabled, by default, + * as TPCH is run with the TPCHDunitTest. This is primarily + * for debugging. + */ +class TPCHSuite extends SnappyFunSuite with BeforeAndAfterAll { + + ignore("Test TPCH") { + val snc = SnappyContext(sc) + snc.conf.setConfString(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key, "104857600") + Property.HashJoinSize.set(snc.conf, "1g") + TPCHUtils.createAndLoadTables(snc, isSnappy = true) + TPCHUtils.queryExecution(snc, isSnappy = true) + // TPCHUtils.queryExecution(snc, isSnappy = true, warmup = 6, runsForAverage = 10, + // isResultCollection = false) + TPCHUtils.validateResult(snc, isSnappy = true) + } +} diff --git a/cluster/src/dunit/scala/org/apache/spark/sql/udf/UserDefinedFunctionsDUnitTest.scala b/cluster/src/dunit/scala/org/apache/spark/sql/udf/UserDefinedFunctionsDUnitTest.scala new file mode 100644 index 0000000000..6d35902582 --- /dev/null +++ b/cluster/src/dunit/scala/org/apache/spark/sql/udf/UserDefinedFunctionsDUnitTest.scala @@ -0,0 +1,317 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ +package org.apache.spark.sql.udf + +import java.io.File + +import scala.language.{implicitConversions, postfixOps} +import scala.sys.process._ +import scala.util.control.NonFatal +import scala.util.{Failure, Success, Try} + +import io.snappydata.cluster.ClusterManagerTestBase +import io.snappydata.test.dunit.{AvailablePortHelper, DistributedTestBase} + +import org.apache.spark.{SparkUtilsAccess, TestUtils} +import org.apache.spark.TestUtils.JavaSourceFromString +import org.apache.spark.sql.udf.UserDefinedFunctionsDUnitTest._ +import org.apache.spark.sql.{SnappyContext, SnappySession} + +case class OrderData(ref: Int, description: String, amount: Long) + +class UserDefinedFunctionsDUnitTest(val s: String) + extends ClusterManagerTestBase(s) { + + def testDriverHA(): Unit = { + // Stop the lead node + ClusterManagerTestBase.stopAny() + + // Start the lead node in another JVM. The executors should + // connect with this new lead. + // In this case servers are already running and a lead comes + // and join + try { + vm3.invoke(getClass, "startSnappyLead", startArgs) + vm3.invoke(getClass, "createTables") + vm3.invoke(getClass, "simpleUDFTest", true) + vm3.invoke(getClass, "stopAny") + // Again start the lead node + vm3.invoke(getClass, "startSnappyLead", startArgs) + vm3.invoke(getClass, "createTables") // as stop Spark deletes tables. + + vm3.invoke(getClass, "simpleUDFTest", false) + } catch { + case e: Throwable => throw new Exception(e) + } finally { + vm3.invoke(getClass, "stopAny") + ClusterManagerTestBase.startSnappyLead(ClusterManagerTestBase.locatorPort, bootProps) + val snSession = new SnappySession(sc) + snSession.sql("drop function if exists APP.intudf") + } + } + + def testExecutorHA(): Unit = { + var snSession = new SnappySession(sc) + createTables() + + simpleUDFTest(createUDF = true) + + try { + failTheExecutors() + } catch { + case _: Throwable => + } + DistributedTestBase.waitForCriterion(new DistributedTestBase.WaitCriterion { + override def done(): Boolean = { + // The executors should have started automatically, so this should not hang + try { + snSession = new SnappySession(sc) + simpleUDFTest(createUDF = false) + snSession.sql("drop function APP.intudf") + true + } catch { + case NonFatal(e) => + getLogWriter.warn(s"Failed in executor restart due to ${e.toString}") + false // ignore and retry till timeout + } + } + + override def description(): String = + "waiting for executor to restart after forced failure" + }, 30000, 500, true) + } + + def testUDFWithConnection(): Unit = { + var snSession = new SnappySession(sc) + createTables() + + val udfText: String = "public class IntegerUDF implements " + + "org.apache.spark.sql.api.java.UDF1 {" + + " @Override public Integer call(String s){ " + + " return 6; " + + "}" + + "}" + val file = createUDFClass("IntegerUDF", udfText) + val jar = createJarFile(Seq(file)) + + val netPort1 = AvailablePortHelper.getRandomAvailableTCPPort + vm2.invoke(classOf[ClusterManagerTestBase], "startNetServer", netPort1) + val conn = getANetConnection(netPort1) + val s = conn.createStatement() + + s.execute(s"CREATE FUNCTION APP.intudf AS IntegerUDF " + + s"RETURNS Integer USING JAR " + + s"'$jar'") + + val row = snSession.sql("select intudf(description) from col_table").collect() + // row.foreach(r => println(r)) + row.foreach(r => assert(r(0) == 6)) + + s.execute("drop function intudf") + + snSession = new SnappySession(sc) + + Try(snSession.sql("select intudf(description) from col_table ")) match { + case Success(_) => throw new AssertionError( + "Should not have succedded with dropped udf") + case Failure(_) => // Do nothing + } + + conn.close() + } + + def testSameUDFWithCodeChange(): Unit = { + val snSession = new SnappySession(sc) + createTables() + + var udfText: String = "public class IntegerUDF implements " + + "org.apache.spark.sql.api.java.UDF1 {" + + " @Override public Integer call(String s){ " + + " return 6; " + + "}" + + "}" + var file = createUDFClass("IntegerUDF", udfText) + var jar = createJarFile(Seq(file)) + snSession.sql(s"CREATE FUNCTION APP.intudf AS IntegerUDF " + + s"RETURNS Integer USING JAR " + + s"'$jar'") + var row = snSession.sql("select intudf(description) from col_table").collect() + // row.foreach(r => println(r)) + row.foreach(r => assert(r(0) == 6)) + + udfText = "public class IntegerUDF implements " + + "org.apache.spark.sql.api.java.UDF1 {" + + " @Override public Integer call(String s){ " + + " return 7; " + + "}" + + "}" + + snSession.sql("drop function intudf") + file = createUDFClass("IntegerUDF", udfText) + jar = createJarFile(Seq(file)) + + snSession.sql(s"CREATE FUNCTION APP.intudf AS IntegerUDF " + + s"RETURNS Integer USING JAR " + + s"'$jar'") + + row = snSession.sql("select intudf(description) from col_table").collect() + // row.foreach(r => println(r)) + row.foreach(r => assert(r(0) == 7)) + snSession.sql("drop function APP.intudf") + } + + def testSameUDFWithFieldChange(): Unit = { + val snSession = new SnappySession(sc) + createTables() + + var udfText: String = "public class IntegerUDF implements " + + "org.apache.spark.sql.api.java.UDF1 {" + + "\n " + + " private int value = 6 ;" + + " @Override public Integer call(String s){ " + + " return value; " + + "}" + + "}" + var file = createUDFClass("IntegerUDF", udfText) + var jar = createJarFile(Seq(file)) + snSession.sql(s"CREATE FUNCTION APP.intudf AS IntegerUDF " + + s"RETURNS Integer USING JAR " + + s"'$jar'") + var row = snSession.sql("select intudf(description) from col_table").collect() + // row.foreach(r => println(r)) + row.foreach(r => assert(r(0) == 6)) + + udfText = "public class IntegerUDF implements " + + "org.apache.spark.sql.api.java.UDF1 {" + + " @Override public Integer call(String s){ " + + " return 6; " + + "}" + + "}" + + snSession.sql("drop function intudf") + file = createUDFClass("IntegerUDF", udfText) + jar = createJarFile(Seq(file)) + + snSession.sql(s"CREATE FUNCTION APP.intudf AS IntegerUDF " + + s"RETURNS Integer USING JAR " + + s"'$jar'") + + row = snSession.sql("select intudf(description) from col_table").collect() + // row.foreach(r => println(r)) + row.foreach(r => assert(r(0) == 6)) + snSession.sql("drop function APP.intudf") + } + + def testTwoUDFsDroppingOne(): Unit = { + val snSession = new SnappySession(sc) + createTables() + + var udfText: String = "public class IntegerUDF1 implements " + + "org.apache.spark.sql.api.java.UDF1 {" + + " @Override public Integer call(String s){ " + + " return 6; " + + "}" + + "}" + val file1 = createUDFClass("IntegerUDF1", udfText) + + udfText = "public class IntegerUDF2 implements " + + "org.apache.spark.sql.api.java.UDF1 {" + + " @Override public Integer call(String s){ " + + " return 8; " + + "}" + + "}" + + val file2 = createUDFClass("IntegerUDF2", udfText) + + val jar = createJarFile(Seq(file1, file2)) + snSession.sql(s"CREATE FUNCTION APP.intudf1 AS IntegerUDF1 " + + s"RETURNS Integer USING JAR " + + s"'$jar'") + var row = snSession.sql("select intudf1(description) from col_table").collect() + // row.foreach(r => println(r)) + row.foreach(r => assert(r(0) == 6)) + + snSession.sql(s"CREATE FUNCTION APP.intudf2 AS IntegerUDF2 " + + s"RETURNS Integer USING JAR " + + s"'$jar'") + row = snSession.sql("select intudf2(description) from col_table").collect() + // row.foreach(r => println(r)) + row.foreach(r => assert(r(0) == 8)) + + snSession.sql("drop function intudf1") + + row = snSession.sql("select intudf2(description) from col_table").collect() + // row.foreach(r => println(r)) + row.foreach(r => assert(r(0) == 8)) + } +} + +object UserDefinedFunctionsDUnitTest { + + private def sc = SnappyContext.globalSparkContext + + def createUDFClass(name: String, code: String): File = { + SparkUtilsAccess.createUDFClass(name, code) + } + + def createJarFile(files: Seq[File]): String = { + SparkUtilsAccess.createJarFile(files) + } + + def failTheExecutors(): Unit = { + sc.parallelize(1 until 100, 5).map { _ => + throw new InternalError() + }.collect() + } + + def simpleUDFTest(createUDF: Boolean): Unit = { + val snSession = new SnappySession(sc) + if (createUDF) { + val udfText: String = "public class IntegerUDF implements " + + "org.apache.spark.sql.api.java.UDF1 {" + + " @Override public Integer call(String s){ " + + " return 6; " + + "}" + + "}" + val file = createUDFClass("IntegerUDF", udfText) + val jar = createJarFile(Seq(file)) + + snSession.sql(s"CREATE FUNCTION APP.intudf AS IntegerUDF " + + s"RETURNS Integer USING JAR " + + s"'$jar'") + } + + val row = snSession.sql("select intudf(description) from col_table").collect() + // row.foreach(r => println(r)) + row.foreach(r => assert(r(0) == 6)) + } + + def createTables() { + val snSession = new SnappySession(sc) + val rdd = sc.parallelize((1 to 5).map(i => OrderData(i, s"some $i", i))) + val refDf = snSession.createDataFrame(rdd) + snSession.sql("DROP TABLE IF EXISTS RR_TABLE") + snSession.sql("DROP TABLE IF EXISTS COL_TABLE") + + snSession.sql("CREATE TABLE RR_TABLE(OrderRef INT NOT NULL, description String, price BIGINT)") + snSession.sql("CREATE TABLE COL_TABLE(OrderRef INT NOT NULL, description String, " + + "price LONG) using column options()") + + refDf.write.insertInto("RR_TABLE") + refDf.write.insertInto("COL_TABLE") + } +} diff --git a/cluster/src/dunit/scala/org/apache/spark/transaction/SnapshotGIIDUnitTest.scala b/cluster/src/dunit/scala/org/apache/spark/transaction/SnapshotGIIDUnitTest.scala new file mode 100644 index 0000000000..ec5b52ba93 --- /dev/null +++ b/cluster/src/dunit/scala/org/apache/spark/transaction/SnapshotGIIDUnitTest.scala @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ +package org.apache.spark.transaction + +import java.util.Properties + +import com.gemstone.gemfire.internal.cache.{BucketRegion, GemFireCacheImpl, LocalRegion, PartitionedRegion} +import com.pivotal.gemfirexd.internal.engine.Misc +import io.snappydata.cluster.ClusterManagerTestBase +import io.snappydata.test.dunit.SerializableRunnable + +import org.apache.spark.sql.{SaveMode, SnappyContext} + +case class Data(col1: Int, col2: Int, col3: Int) + +class SnapshotGIIDUnitTest(s: String) extends ClusterManagerTestBase(s) { + + def testColumnTableGII(): Unit = { + + val snc = SnappyContext(sc) + val tableName = "app.test_table" + createTable(snc, tableName, Map("BUCKETS" -> "1", "REDUNDANCY" -> "2")) + + var props = bootProps.clone().asInstanceOf[java.util.Properties] + val port = ClusterManagerTestBase.locPort + + def restartServer(props: Properties): SerializableRunnable = new SerializableRunnable() { + override def run(): Unit = ClusterManagerTestBase.startSnappyServer(port, props) + } + + vm1.invoke(classOf[ClusterManagerTestBase], "stopAny") + + val data = for (i <- 1 to 50) yield (Seq(i, (i + 1), (i + 2))) + val rdd = snc.sparkContext.parallelize(data, 2).map(s => + Data(s(0), s(1), s(2))) + val dataDF = snc.createDataFrame(rdd) + dataDF.write.insertInto(tableName) + vm1.invoke(restartServer(props)) + vm1.invoke(waitForRegionInit(tableName)) + dataDF.write.insertInto(tableName) + val numRows = snc.sql(s"select * from $tableName").collect().length + assert(numRows == 150) + } + + def createTable(snc: SnappyContext, + tableName: String, + props: Map[String, String]): Unit = { + val data = for (i <- 1 to 50) yield (Seq(i, (i + 1), (i + 2))) + val rdd = sc.parallelize(data, data.length).map(s => new Data(s(0), s(1), s(2))) + val dataDF = snc.createDataFrame(rdd) + snc.createTable(tableName, "column", dataDF.schema, props) + dataDF.write.format("column").mode(SaveMode.Append).saveAsTable(tableName) + } + + @throws[Exception] + protected def waitForRegionInit(tableName: String): SerializableRunnable = { + new SerializableRunnable() { + def run() { + val regionName = Misc.getRegionPath(tableName).toUpperCase + while (!Misc.initialDDLReplayDone()) Thread.sleep(100) + val cache = GemFireCacheImpl.getInstance + val pr = cache.getRegion(regionName).asInstanceOf[PartitionedRegion] + while (!pr.getRegionAdvisor.areBucketsInitialized) Thread.sleep(100) + while (!pr.getRegionAdvisor.getBucket(0).isInstanceOf[BucketRegion]) Thread.sleep(100) + val lr = pr.getRegionAdvisor.getBucket(0).asInstanceOf[LocalRegion] + lr.waitOnInitialization() + } + } + } + +} diff --git a/cluster/src/main/java/io/snappydata/gemxd/SnappySystemAdmin.java b/cluster/src/main/java/io/snappydata/gemxd/SnappySystemAdmin.java new file mode 100644 index 0000000000..be595f1541 --- /dev/null +++ b/cluster/src/main/java/io/snappydata/gemxd/SnappySystemAdmin.java @@ -0,0 +1,186 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ +package io.snappydata.gemxd; + +import java.io.File; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Iterator; +import java.util.Map; + +import com.gemstone.gemfire.SystemFailure; +import com.gemstone.gemfire.internal.GemFireTerminateError; +import com.gemstone.gemfire.internal.GemFireUtilLauncher; +import com.gemstone.gemfire.internal.i18n.LocalizedStrings; +import com.pivotal.gemfirexd.internal.iapi.tools.i18n.LocalizedResource; +import com.pivotal.gemfirexd.internal.impl.tools.ij.utilMain; +import com.pivotal.gemfirexd.tools.GfxdSystemAdmin; + +public class SnappySystemAdmin extends GfxdSystemAdmin { + + private SnappySystemAdmin() { + super(); + UTIL_Tools_DSProps = "UTIL_Snappy_Tools_DSProps"; + UTIL_DSProps_HelpPost = "UTIL_Snappy_Tools_DSProps_HelpPost"; + } + + public static void main(String[] args) { + try { + SnappyDataVersion.loadProperties(); + + final SnappySystemAdmin admin = new SnappySystemAdmin(); + admin.initHelpMap(); + admin.initUsageMap(); + admin.initMapsForGFXD(); + for (String removedCmd : removedCommands) { + admin.usageMap.remove(removedCmd); + admin.helpMap.remove(removedCmd); + } + for (Map.Entry overrideUse : modifiedUsageInfo + .entrySet()) { + admin.usageMap.put(overrideUse.getKey(), overrideUse.getValue()); + } + for (Map.Entry overrideHelp : modifiedHelpInfo + .entrySet()) { + admin.helpMap.put(overrideHelp.getKey(), overrideHelp.getValue()); + } + + admin.invoke(args); + } catch (GemFireTerminateError term) { + System.exit(term.getExitCode()); + } + } + + @Override + protected void printProductDirectory() { + String productDirMessage = LocalizedResource.getMessage( + "UTIL_version_ProductDirectory", getProductDir()); + System.out.println(utilMain.convertGfxdMessageToSnappy(productDirMessage)); + } + + @Override + protected String getUsageString(String cmd) { + return GemFireUtilLauncher.SCRIPT_NAME + ' ' + + this.usageMap.get(cmd.toLowerCase()); + } + + @Override + public void invoke(String[] args) { + this.defaultLogFileName = null; + try { + if (args.length == 1 || args.length == 2) { + boolean isVersion = handleVersion(args); + if (isVersion) { + return; + } + } + + super.invoke(args); + } finally { + // remove zero-sized generatedcode.log file + try { + File codeLogFile = new File("generatedcode.log"); + if (codeLogFile.exists() && codeLogFile.isFile() && codeLogFile.length() == 0) { + codeLogFile.delete(); + } + } catch (Throwable t) { + // ignore at this point + } + } + } + + public boolean handleVersion(String[] args) { + String cmd; + final ArrayList cmdLine = new ArrayList<>(Arrays.asList(args)); + try { + Iterator it = cmdLine.iterator(); + while (it.hasNext()) { + String arg = it.next(); + if (arg.startsWith("-")) { + checkDashArg(null, arg, it); + } else { + break; + } + } + } catch (IllegalArgumentException ex) { + System.err.println(LocalizedStrings.SystemAdmin_ERROR.toLocalizedString() + + ": " + getExceptionMessage(ex)); + // fix for bug 28351 + throw new GemFireTerminateError("exiting due to illegal arguments", 1); + } + if (cmdLine.size() == 0) { + if (help) { + printHelp("gemfire"); + } else { + System.err.println(LocalizedStrings.SystemAdmin_ERROR_WRONG_NUMBER_OF_COMMAND_LINE_ARGS.toLocalizedString()); + usage(); + } + } + cmd = cmdLine.remove(0); + cmd = checkCmd(cmd); + try { + Iterator it = cmdLine.iterator(); + while (it.hasNext()) { + String arg = it.next(); + if (arg.startsWith("-")) { + checkDashArg(cmd, arg, it); + } + } + } catch (IllegalArgumentException ex) { + System.err.println(LocalizedStrings.SystemAdmin_ERROR.toLocalizedString() + + ": " + getExceptionMessage(ex)); + if (debug) { + ex.printStackTrace(System.err); + } + // fix for bug 28351 + throw new GemFireTerminateError("exiting due to illegal arguments", 1); + } + + SystemFailure.loadEmergencyClasses(); + if (help) { + printHelp(cmd); + } + + if (cmd.equalsIgnoreCase("version")) { + boolean optionOK = (cmdLine.size() == 0); + if (cmdLine.size() == 1) { + String option = cmdLine.get(0); + if ("CREATE".equals(option) || "FULL".equalsIgnoreCase(option)) { + optionOK = true; + } + } + + if (!optionOK) { + System.err.println(LocalizedStrings.SystemAdmin_ERROR_UNEXPECTED_COMMAND_LINE_ARGUMENTS_0. + toLocalizedString(join(cmdLine))); + usage(cmd); + } + if (cmdLine.size() == 1 && ("CREATE".equals(cmdLine.get(0)))) { + printProductDirectory(); + SnappyDataVersion.createVersionFile(); + } else if (cmdLine.size() == 1 && "FULL".equalsIgnoreCase(cmdLine.get(0))) { + printProductDirectory(); + SnappyDataVersion.print(System.out, true); + } else { + SnappyDataVersion.print(System.out); + } + return true; + } + + return false; + } +} diff --git a/cluster/src/main/java/org/apache/spark/streaming/JavaSnappyStreamingJob.java b/cluster/src/main/java/org/apache/spark/streaming/JavaSnappyStreamingJob.java new file mode 100644 index 0000000000..4285d0eb20 --- /dev/null +++ b/cluster/src/main/java/org/apache/spark/streaming/JavaSnappyStreamingJob.java @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ +package org.apache.spark.streaming; + + +import com.typesafe.config.Config; +import org.apache.spark.sql.SnappyJobValidate; +import org.apache.spark.sql.SnappyJobValidation; +import org.apache.spark.sql.SnappySessionFactory; +import org.apache.spark.streaming.api.java.JavaSnappyStreamingContext; + +import org.apache.spark.util.SnappyUtils; +import spark.jobserver.SparkJobBase; +import spark.jobserver.SparkJobValidation; + +public abstract class JavaSnappyStreamingJob implements SparkJobBase { + + abstract public Object runSnappyJob(JavaSnappyStreamingContext snc, Config jobConfig); + + abstract public SnappyJobValidation isValidJob(JavaSnappyStreamingContext snc, + Config jobConfig); + + @Override + final public SparkJobValidation validate(Object sc, Config config) { + return SnappyJobValidate.validate(isValidJob(new JavaSnappyStreamingContext((SnappyStreamingContext)sc), + SnappySessionFactory.updateCredentials(((SnappyStreamingContext)sc).snappySession(), + config, true))); + } + + @Override + final public Object runJob(Object sc, Config jobConfig) { + JavaSnappyStreamingContext context = new JavaSnappyStreamingContext((SnappyStreamingContext)sc); + try { + SnappyUtils.setSessionDependencies(context.snappySession().sparkContext(), + this.getClass().getCanonicalName(), + Thread.currentThread().getContextClassLoader()); + return runSnappyJob(context, SnappySessionFactory.updateCredentials(context.snappySession() + , jobConfig, true)); + } finally { + } + } +} \ No newline at end of file diff --git a/cluster/src/main/resources/META-INF/services/org.apache.spark.scheduler.ExternalClusterManager b/cluster/src/main/resources/META-INF/services/org.apache.spark.scheduler.ExternalClusterManager new file mode 100644 index 0000000000..ef49c9f5a8 --- /dev/null +++ b/cluster/src/main/resources/META-INF/services/org.apache.spark.scheduler.ExternalClusterManager @@ -0,0 +1 @@ +org.apache.spark.scheduler.cluster.SnappyEmbeddedModeClusterManager \ No newline at end of file diff --git a/snappy-tools/src/main/resources/i18n/snappymessages_de.properties b/cluster/src/main/resources/i18n/snappymessages_de.properties similarity index 100% rename from snappy-tools/src/main/resources/i18n/snappymessages_de.properties rename to cluster/src/main/resources/i18n/snappymessages_de.properties diff --git a/cluster/src/main/resources/i18n/snappymessages_en.properties b/cluster/src/main/resources/i18n/snappymessages_en.properties new file mode 100644 index 0000000000..1d0762a4c5 --- /dev/null +++ b/cluster/src/main/resources/i18n/snappymessages_en.properties @@ -0,0 +1,41 @@ +# +# Default english messages in TIBCO ComputeDB if locale specific messages files aren't found. +# +# +# Created by soubhikc on 6/10/15. +# + + +# Utility messages +UTIL_Lead_Usage=Starts/stops a TIBCO ComputeDB Lead JVM, or provides status of a running one. +UTIL_Server_Usage=Starts/stops a TIBCO ComputeDB Server JVM, or provides status of a running one. +UTIL_Locator_Usage=Starts/stops a TIBCO ComputeDB Locator JVM, or provides status of a running one. +UTIL_SnappyShell_Usage=With no arguments, starts the TIBCO ComputeDB shell. +UTIL_version_ShortDesc=Prints TIBCO ComputeDB product version information. +UTIL_version_ProductDirectory=TIBCO ComputeDB product directory: {0} + +SD_SERVER_NAME=TIBCO ComputeDB Server +SD_SERVER_SCRIPT=server +SD_LEAD_NAME=TIBCO ComputeDB Lead +SD_LEAD_SCRIPT=leader +SD_LOC_NAME=TIBCO ComputeDB Locator +SD_LOC_SCRIPT=locator + +FS_PRODUCT=TIBCO ComputeDB + +JARTOOLS_INSTALL_JAR_DESC=Install a jar file in a live TIBCO ComputeDB system shipping\ +\ the bytes to the servers without requiring it to be accessible on them +JARTOOLS_REPLACE_JAR_DESC=Replace a jar file in a live TIBCO ComputeDB system with\ +\ a new one for an existing name, shipping the bytes to the servers +JARTOOLS_JAR_FILE_MESSAGE=Path or URL of the jar file to be shipped to\ +\ TIBCO ComputeDB servers +JARTOOLS_REMOVE_JAR_DESC=Remove an installed jar file providing the name used\ +\ during install + +MISCTOOLS_RUN_DESC=Execute SQL commands in given SQL command script.\ +\ The format is the same as expected by TIBCO ComputeDB shell. + +# TIBCO ComputeDB product messages +SD_ZERO_ARGS=Zero arguments provided. + +SD_LEADER_NOT_READY="Leader state is not ready. Status is {0}" diff --git a/snappy-tools/src/main/resources/jobserver-defaults.conf b/cluster/src/main/resources/jobserver-defaults.conf similarity index 97% rename from snappy-tools/src/main/resources/jobserver-defaults.conf rename to cluster/src/main/resources/jobserver-defaults.conf index e718589956..b073f0d046 100644 --- a/snappy-tools/src/main/resources/jobserver-defaults.conf +++ b/cluster/src/main/resources/jobserver-defaults.conf @@ -30,20 +30,20 @@ spark { # } filedao { - rootdir = /tmp/spark-jobserver/filedao/data + rootdir = /spark-jobserver/filedao/data } datadao { # storage directory for files that are uploaded to the server # via POST/data commands - rootdir = /tmp/spark-jobserver/upload + rootdir = /spark-jobserver/upload } # To load up job jars on startup, place them here, # with the app name as the key and the path to the jar as the value # job-jar-paths { - # test = ../job-server-tests/target/scala-2.10/job-server-tests_2.10-0.5.3-SNAPSHOT.jar + # test = ../job-server-tests/target/scala-2.10/job-server-tests_2.10-0.6.0.jar # } sqldao { diff --git a/snappy-tools/src/main/resources/jobserver-overrides.conf b/cluster/src/main/resources/jobserver-overrides.conf similarity index 76% rename from snappy-tools/src/main/resources/jobserver-overrides.conf rename to cluster/src/main/resources/jobserver-overrides.conf index af1f218638..80cf66f8df 100644 --- a/snappy-tools/src/main/resources/jobserver-overrides.conf +++ b/cluster/src/main/resources/jobserver-overrides.conf @@ -15,7 +15,7 @@ spark { # To load up job jars on startup, place them here, # with the app name as the key and the path to the jar as the value # job-jar-paths { - # test = ../job-server-tests/target/scala-2.10/job-server-tests_2.10-0.5.3-SNAPSHOT.jar + # test = ../job-server-tests/target/scala-2.10/job-server-tests_2.10-0.6.0.jar # } sqldao { @@ -28,7 +28,7 @@ spark { jdbc-driver = org.h2.Driver # Directory where default H2 driver stores its data. Only needed for H2. - rootdir = /tmp/spark-jobserver/sqldao/data + rootdir = /spark-jobserver/sqldao/data # Full JDBC URL / init string, along with username and password. Sorry, needs to match above. # Substitutions may be used to launch job-server, but leave it out here in the default or tests won't pass @@ -54,7 +54,7 @@ spark { # A zero-arg class implementing spark.jobserver.context.SparkContextFactory # Determines the type of jobs that can run in a SparkContext - context-factory = org.apache.spark.sql.SnappyContextFactory + context-factory = org.apache.spark.sql.SnappySessionFactory streaming { @@ -69,4 +69,22 @@ spark { } } + + contexts { + + snappyStreamingContext { + # A zero-arg class implementing spark.jobserver.context.SparkContextFactory + # Determines the type of jobs that can run in a SparkContext + context-factory = org.apache.spark.sql.streaming.SnappyStreamingContextFactory + + streaming { + # if true, stops gracefully by waiting for the processing of all received data to be completed + stopGracefully = true + + # if true, stops the SparkContext with the StreamingContext. The underlying SparkContext will be + # stopped regardless of whether the StreamingContext has been started. + stopSparkContext = false + } + } + } } diff --git a/cluster/src/main/scala/io/snappydata/ServiceManager.scala b/cluster/src/main/scala/io/snappydata/ServiceManager.scala new file mode 100644 index 0000000000..89910d4eb3 --- /dev/null +++ b/cluster/src/main/scala/io/snappydata/ServiceManager.scala @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ +package io.snappydata + +import com.pivotal.gemfirexd.FabricService +import com.pivotal.gemfirexd.internal.engine.fabricservice.FabricServiceImpl +import io.snappydata.gemxd.ClusterCallbacksImpl +import io.snappydata.impl.{LeadImpl, LocatorImpl} + +object ServiceManager { + + ClusterCallbacksImpl.initialize() + + private def contextLock = ServerManager.contextLock + + /** + * Get the singleton instance of `Server`. + */ + def getServerInstance: Server = ServerManager.getServerInstance + + /** + * Get the singleton instance of `Locator`. + */ + def getLocatorInstance: Locator = { + var instance: FabricService = FabricServiceImpl.getInstance + if (instance != null) { + return checkLocatorInstance(instance) + } + contextLock.synchronized { + instance = FabricServiceImpl.getInstance + if (instance == null) { + val locator: Locator = new LocatorImpl + FabricServiceImpl.setInstance(locator) + return locator + } + return checkLocatorInstance(instance) + } + } + + /** + * Get the singleton instance of `Lead`. + */ + def getLeadInstance: Lead = { + var instance: FabricService = FabricServiceImpl.getInstance + if (instance != null) { + return checkLeadInstance(instance) + } + contextLock.synchronized { + instance = FabricServiceImpl.getInstance + if (instance == null) { + val lead: Lead = new LeadImpl + FabricServiceImpl.setInstance(lead) + return lead + } + return checkLeadInstance(instance) + } + } + + /** + * Get the current instance of either `Server` or `Locator` or `Lead`. + * This can be null if neither of `getServerInstance` or `getLeadInstance` or + * `getLocatorInstance` have been invoked, or the instance has been stopped. + */ + def currentFabricServiceInstance: FabricService = FabricServiceImpl.getInstance + + private def checkLocatorInstance(instance: FabricService): Locator = { + instance match { + case locator: Locator => locator + case _ => throw new IllegalStateException( + s"Found an instance of another snappy component $instance.") + } + } + + private def checkLeadInstance(instance: FabricService): Lead = { + instance match { + case lead: Lead => lead + case _ => throw new IllegalStateException( + s"Found an instance of another snappy component $instance.") + } + } +} diff --git a/cluster/src/main/scala/io/snappydata/ToolsCallbackImpl.scala b/cluster/src/main/scala/io/snappydata/ToolsCallbackImpl.scala new file mode 100644 index 0000000000..673bdffae8 --- /dev/null +++ b/cluster/src/main/scala/io/snappydata/ToolsCallbackImpl.scala @@ -0,0 +1,233 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ +package io.snappydata + +import java.io.{File, RandomAccessFile} +import java.lang.reflect.InvocationTargetException +import java.net.{URI, URLClassLoader} + +import com.gemstone.gemfire.cache.EntryExistsException +import scala.collection.JavaConverters._ +import com.pivotal.gemfirexd.internal.engine.Misc +import com.pivotal.gemfirexd.internal.engine.distributed.utils.GemFireXDUtils +import com.pivotal.gemfirexd.internal.iapi.error.StandardException +import com.pivotal.gemfirexd.internal.shared.common.reference.SQLState +import io.snappydata.cluster.ExecutorInitiator +import io.snappydata.impl.{ExtendibleURLClassLoader, LeadImpl} +import org.apache.spark.executor.SnappyExecutor +import org.apache.spark.sql.execution.columnar.ExternalStoreUtils +import org.apache.spark.sql.execution.columnar.impl.StoreCallbacksImpl +import org.apache.spark.sql.execution.ui.SQLTab +import org.apache.spark.sql.hive.thriftserver.SnappyHiveThriftServer2 +import org.apache.spark.sql.internal.ContextJarUtils +import org.apache.spark.ui.{JettyUtils, SnappyDashboardTab} +import org.apache.spark.util.SnappyUtils +import org.apache.spark.{Logging, SparkCallbacks, SparkContext, SparkFiles} + +object ToolsCallbackImpl extends ToolsCallback with Logging { + + override def updateUI(sc: SparkContext): Unit = { + + SnappyUtils.getSparkUI(sc).foreach(ui => { + // Create Snappy Dashboard and SQL tabs. + // Set SnappyData authenticator SecurityHandler. + SparkCallbacks.getAuthenticatorForJettyServer() match { + case Some(_) => + logInfo("Setting auth handler") + // Set JettyUtils.skipHandlerStart for adding dashboard and sql security handlers + JettyUtils.skipHandlerStart.set(true) + // Creating SQL and Dashboard UI tabs + if (!sc.isLocal) { + new SQLTab(ExternalStoreUtils.getSQLListener.get(), ui) + } + SnappyHiveThriftServer2.attachUI() + new SnappyDashboardTab(ui) + // Set security handlers + ui.getHandlers.foreach { h => + if (!h.isStarted) { + h.setSecurityHandler(JettyUtils.basicAuthenticationHandler()) + h.start() + } + } + // Unset JettyUtils.skipHandlerStart + JettyUtils.skipHandlerStart.set(false) + case None => logDebug("Not setting auth handler") + // Creating SQL and Dashboard UI tabs + if (!sc.isLocal) { + new SQLTab(ExternalStoreUtils.getSQLListener.get(), ui) + } + SnappyHiveThriftServer2.attachUI() + new SnappyDashboardTab(ui) + } + }) + } + + override def removeAddedJar(sc: SparkContext, jarName: String): Unit = + sc.removeAddedJar(jarName) + + /** + * Callback to spark Utils to fetch file + */ + override def doFetchFile( + url: String, + targetDir: File, + filename: String): File = { + SnappyUtils.doFetchFile(url, targetDir, filename) + } + + override def setSessionDependencies(sparkContext: SparkContext, appName: String, + classLoader: ClassLoader): Unit = { + SnappyUtils.setSessionDependencies(sparkContext, appName, classLoader) + } + + override def addURIs(alias: String, jars: Array[String], + deploySql: String, isPackage: Boolean = true): Unit = { + if (alias != null) { + try { + Misc.getMemStore.getGlobalCmdRgn.create(alias, deploySql) + } catch { + case eee: EntryExistsException => throw StandardException.newException( + SQLState.LANG_DB2_DUPLICATE_NAMES, alias , "of deploying jars/packages") + } + } + val lead = ServiceManager.getLeadInstance.asInstanceOf[LeadImpl] + val loader = lead.urlclassloader + jars.foreach(j => { + val url = new File(j).toURI.toURL + loader.addURL(url) + }) + // Close and reopen interpreter + if (alias != null) { + try { + lead.closeAndReopenInterpreterServer() + } catch { + case ite: InvocationTargetException => assert(ite.getCause.isInstanceOf[SecurityException]) + } + } + } + + override def addURIsToExecutorClassLoader(jars: Array[String]): Unit = { + if (ExecutorInitiator.snappyExecBackend != null) { + val snappyexecutor = ExecutorInitiator.snappyExecBackend.executor.asInstanceOf[SnappyExecutor] + snappyexecutor.updateMainLoader(jars) + } + } + + override def removeFunctionJars(args: Array[String]): Unit = { + if (ExecutorInitiator.snappyExecBackend != null) { + // Remove the file from work directory + val jarFile = new File(SparkFiles.getRootDirectory(), args(0)) + if (jarFile.exists()) { + jarFile.delete() + logDebug(s"Deleted jarFile $jarFile for UDF ${args(0)}") + } + + // Remove the file from spark directory + if (!args(0).isEmpty) { // args(0) = appname-filename + val appName = args(0).split('-')(0) + val url = Misc.getMemStore.getGlobalCmdRgn.get(ContextJarUtils.functionKeyPrefix + appName) + if (url != null && !url.isEmpty) { + val executor = ExecutorInitiator.snappyExecBackend.executor.asInstanceOf[SnappyExecutor] + val cachedFileName = s"${url.hashCode}-1_cache" + val lockFileName = s"${url.hashCode}-1_lock" + val localDir = new File(executor.getLocalDir()) + val lockFile = new File(localDir, lockFileName) + val lockFileChannel = new RandomAccessFile(lockFile, "rw").getChannel() + val lock = lockFileChannel.lock() + val cachedFile = new File(localDir, cachedFileName) + try { + if (cachedFile.exists()) { + cachedFile.delete() + logDebug(s"Deleted $cachedFile for UDF ${args(0)}") + } + } finally { + lock.release() + lockFileChannel.close() + } + } + } + } + } + + override def removeURIsFromExecutorClassLoader(jars: Array[String]): Unit = { + if (ExecutorInitiator.snappyExecBackend != null) { + val snappyexecutor = ExecutorInitiator.snappyExecBackend.executor.asInstanceOf[SnappyExecutor] + snappyexecutor.removeJarsFromExecutorLoader(jars) + } + } + + override def getAllGlobalCmnds: Array[String] = { + GemFireXDUtils.waitForNodeInitialization() + val r = Misc.getMemStore.getGlobalCmdRgn + val keys = r.keySet().asScala.filter(p => !p.startsWith(ContextJarUtils.functionKeyPrefix)) + r.getAll(keys.asJava).values().toArray.map(_.asInstanceOf[String]) + } + + override def getGlobalCmndsSet: java.util.Set[java.util.Map.Entry[String, String]] = { + GemFireXDUtils.waitForNodeInitialization() + Misc.getMemStore.getGlobalCmdRgn.entrySet() + } + + override def removePackage(alias: String): Unit = { + GemFireXDUtils.waitForNodeInitialization() + Misc.getMemStore.getGlobalCmdRgn.destroy(alias) + } + + override def setLeadClassLoader(): Unit = { + val instance = ServiceManager.currentFabricServiceInstance + instance match { + case li: LeadImpl => + val loader = li.urlclassloader + if (loader != null) { + Thread.currentThread().setContextClassLoader(loader) + } + case _ => + } + } + + override def getLeadClassLoader: URLClassLoader = { + var ret: URLClassLoader = null + val instance = ServiceManager.currentFabricServiceInstance + instance match { + case li: LeadImpl => + val loader = li.urlclassloader + if (loader != null) { + ret = loader + } + case _ => + } + ret + } + + override def checkSchemaPermission(schema: String, currentUser: String): String = + StoreCallbacksImpl.checkSchemaPermission(schema, currentUser) + + override def removeURIs(uris: Array[String], isPackage: Boolean): Unit = { + val lead = ServiceManager.getLeadInstance.asInstanceOf[LeadImpl] + val allURLs = lead.urlclassloader.getURLs + val updatedURLs = allURLs.toBuffer + uris.foreach(uri => { + val newUri = new URI("file:" + uri) + if (updatedURLs.contains(newUri.toURL)) { + updatedURLs.remove(updatedURLs.indexOf(newUri.toURL)) + } + }) + lead.urlclassloader = new ExtendibleURLClassLoader(lead.urlclassloader.getParent) + updatedURLs.foreach(url => lead.urlclassloader.addURL(url)) + Thread.currentThread().setContextClassLoader(lead.urlclassloader) + } +} diff --git a/cluster/src/main/scala/io/snappydata/cluster/ExecutorInitiator.scala b/cluster/src/main/scala/io/snappydata/cluster/ExecutorInitiator.scala new file mode 100644 index 0000000000..98a6c96fb0 --- /dev/null +++ b/cluster/src/main/scala/io/snappydata/cluster/ExecutorInitiator.scala @@ -0,0 +1,327 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ +package io.snappydata.cluster + +import java.net.URL +import java.util + +import scala.collection.mutable +import scala.util.control.NonFatal + +import com.gemstone.gemfire.CancelException +import com.gemstone.gemfire.distributed.internal.MembershipListener +import com.gemstone.gemfire.distributed.internal.membership.InternalDistributedMember +import com.gemstone.gemfire.internal.cache.GemFireCacheImpl +import com.pivotal.gemfirexd.internal.engine.Misc +import com.pivotal.gemfirexd.internal.engine.distributed.utils.GemFireXDUtils +import com.pivotal.gemfirexd.internal.engine.store.ServerGroupUtils +import io.snappydata.gemxd.ClusterCallbacksImpl + +import org.apache.spark.deploy.SparkHadoopUtil +import org.apache.spark.executor.SnappyCoarseGrainedExecutorBackend +import org.apache.spark.memory.SnappyUnifiedMemoryManager +import org.apache.spark.sql.SnappyContext +import org.apache.spark.sql.collection.Utils +import org.apache.spark.{Logging, SparkCallbacks, SparkEnv} + +/** + * This class is responsible for initiating the executor process inside + * the jvm. Also, if an executor has to be stopped, driverURL can be set as None + * and it will take care of stopping the executor. + */ +object ExecutorInitiator extends Logging { + + val SNAPPY_MEMORY_MANAGER: String = classOf[SnappyUnifiedMemoryManager].getName + + private var executorRunnable: ExecutorRunnable = new ExecutorRunnable + + var executorThread: Thread = new Thread(executorRunnable) + + @volatile var snappyExecBackend: SnappyCoarseGrainedExecutorBackend = _ + + class ExecutorRunnable() extends Runnable { + private var driverURL: Option[String] = None + private var driverDM: InternalDistributedMember = _ + @volatile private[cluster] var stopTask = false + @volatile private[cluster] var stopped = true + private[cluster] var retryTask: Boolean = false + private[cluster] val lock = new Object() + private[cluster] val testLock = new Object() + @volatile private[cluster] var testStartDone = false + + val membershipListener: MembershipListener = new MembershipListener { + override def quorumLost(failures: util.Set[InternalDistributedMember], + remaining: util.List[InternalDistributedMember]): Unit = {} + + override def memberJoined(id: InternalDistributedMember): Unit = {} + + override def memberSuspect(id: InternalDistributedMember, + whoSuspected: InternalDistributedMember): Unit = {} + + override def memberDeparted(id: InternalDistributedMember, crashed: Boolean): Unit = { + executorRunnable.memberDeparted(id) + } + } + + def memberDeparted(departedDM: InternalDistributedMember): Unit = lock.synchronized { + if (departedDM.equals(driverDM)) { + setDriverDetails(None, null) + } + } + + def setRetryFlag(retry: Boolean = true): Unit = lock.synchronized { + retryTask = retry + lock.notifyAll() + } + + def getRetryFlag: Boolean = lock.synchronized { + retryTask + } + + def getDriverURL: Option[String] = lock.synchronized { + driverURL + } + + def setDriverDetails(url: Option[String], + dm: InternalDistributedMember): Unit = lock.synchronized { + driverURL = url + driverDM = dm + SnappyContext.clearStaticArtifacts() + lock.notifyAll() + } + + override def run(): Unit = { + stopped = false + var prevDriverURL = "" + var env: SparkEnv = null + var numTries = 0 + try { + GemFireXDUtils.getGfxdAdvisor.getDistributionManager + .addMembershipListener(membershipListener) + while (!stopTask) { + try { + + Misc.checkIfCacheClosing(null) + if (prevDriverURL == getDriverURLString && !getRetryFlag) { + lock.synchronized { + while (!stopTask && prevDriverURL == getDriverURLString && !getRetryFlag) { + lock.wait(1000) + } + } + } else { + if (getRetryFlag) { + if (numTries >= 50) { + logError("Exhausted number of retries to connect to the driver. Exiting.") + return + } + // if it's a retry, wait for sometime before we retry. + // This is a measure to ensure that some unforeseen circumstance + // does not lead to continous retries and the thread hogs the CPU. + numTries += 1 + Thread.sleep(3000) + } + // kill if an executor is already running. + SparkCallbacks.stopExecutor(env) + env = null + + getDriverURL match { + case Some(url) => + + /** + * The executor initialization code has been picked from + * CoarseGrainedExecutorBackend. + * We need to track the changes there and merge them here on a regular basis. + */ + val myId = GemFireCacheImpl.getExisting.getMyId + val executorHost = myId.getHost + val memberId = myId.canonicalString() + SparkHadoopUtil.get.runAsSparkUser { () => + + // Fetch the driver's Spark properties. + val executorConf = Utils.newClusterSparkConf() + Utils.setDefaultSerializerAndCodec(executorConf) + + val port = executorConf.getInt("spark.executor.port", 0) + val (ioEncryptionKey, props) = + SparkCallbacks.fetchDriverProperty(memberId, executorHost, + executorConf, port, url) + + val driverConf = Utils.newClusterSparkConf() + Utils.setDefaultSerializerAndCodec(driverConf) + + for ((key, value) <- props) { + // this is required for SSL in standalone mode + if (SparkCallbacks.isExecutorStartupConf(key)) { + driverConf.setIfMissing(key, value) + } else { + driverConf.set(key, value) + } + } + // TODO: Hemant: add executor specific properties from local + // TODO: conf to this conf that was received from driver. + + // If memory manager is not set, use Snappy unified memory manager + driverConf.setIfMissing("spark.memory.manager", + SNAPPY_MEMORY_MANAGER) + + val cores = driverConf.getInt("spark.executor.cores", + Runtime.getRuntime.availableProcessors() * 2) + + env = SparkCallbacks.createExecutorEnv(driverConf, + memberId, executorHost, port, cores, ioEncryptionKey, isLocal = false) + + // This is not required with snappy + val userClassPath = new mutable.ListBuffer[URL]() + + val rpcenv = SparkCallbacks.getRpcEnv(env) + + val executor = new SnappyCoarseGrainedExecutorBackend( + rpcenv, url, memberId, executorHost, + cores, userClassPath, env) + snappyExecBackend = executor + rpcenv.setupEndpoint("Executor", executor) + } + prevDriverURL = url + testStartDone = true + testLock.synchronized(testLock.notifyAll()) + case None => + // If driver url is none, already running executor is stopped. + prevDriverURL = "" + } + setRetryFlag(false) + } + } catch { + case e@(NonFatal(_) | _: InterruptedException) => + try { + Misc.checkIfCacheClosing(e) + // log any exception other than those due to cache closing + logWarning("Unexpected exception in ExecutorInitiator", e) + } catch { + case NonFatal(_) => stopTask = true // just stop the task + } + } + } // end of while(true) + } catch { + case e: Throwable => + logWarning("ExecutorInitiator failing with exception: ", e) + } finally { + testStartDone = false + // kill if an executor is already running. + SparkCallbacks.stopExecutor(env) + lock.synchronized { + stopped = true + lock.notifyAll() + } + try { + Misc.checkIfCacheClosing(null) + GemFireXDUtils.getGfxdAdvisor.getDistributionManager + .removeMembershipListener(membershipListener) + } catch { + case _: CancelException => // do nothing + } + } + } + + def getDriverURLString: String = getDriverURL match { + case Some(x) => x + case None => "" + } + } + + /** + * This should be called only when the process is terminating. + * If a process ceases to be an executor, only startOrTransmuteExecutor should be called + * with None. + */ + def stop(): Unit = { + executorRunnable.stopTask = true + executorRunnable.setDriverDetails(None, null) + Utils.clearDefaultSerializerAndCodec() + val lock = executorRunnable.lock + lock.synchronized { + lock.notifyAll() + var maxTries = 50 + while (maxTries > 0 && !executorRunnable.stopped) { + maxTries -= 1 + lock.wait(1000) + } + } + } + + def restartExecutor(): Unit = { + executorRunnable.setRetryFlag() + } + + // Test hook. Not to be used in other situations + def testWaitForExecutor(): Unit = { + var maxTries = 100 + while (maxTries > 0) { + val runnable = executorRunnable + if (!runnable.testStartDone) { + maxTries -= 1 + runnable.testLock.synchronized { + runnable.wait(500) + } + } else { + // break the loop + maxTries = 0 + } + } + } + + /** + * Set the new driver url and start the thread if not already started + */ + def startOrTransmuteExecutor(driverURL: Option[String], + driverDM: InternalDistributedMember): Unit = { + // Avoid creation of executor inside the Gem accessor + // that is a Spark driver but has joined the gem system + // in the non embedded mode + if (SparkCallbacks.isDriver) { + logInfo("Executor cannot be instantiated in this " + + "VM as a Spark driver is already running. ") + return + } + + if (ServerGroupUtils.isGroupMember(ClusterCallbacksImpl.getLeaderGroup())) { + logInfo("Executor cannot be instantiated in a lead vm.") + return + } + + executorRunnable.setDriverDetails(driverURL, driverDM) + // start the executor thread if driver URL is set and the thread + // is not already started. + driverURL match { + case Some(_) => + if (executorThread.getState == Thread.State.NEW) { + logInfo("About to start thread " + executorThread.getName) + executorThread.setDaemon(true) + executorThread.start() + } else if (executorThread.getState == Thread.State.TERMINATED) { + // Restart a thread after it has been stopped + // This is required for dunit case mainly. + executorRunnable = new ExecutorRunnable + executorThread = new Thread(executorRunnable) + logInfo("Spawning new thread " + executorThread.getName + " and starting") + executorRunnable.setDriverDetails(driverURL, driverDM) + executorThread.setDaemon(true) + executorThread.start() + } + case None => + } + } +} diff --git a/cluster/src/main/scala/io/snappydata/gemxd/ClusterCallbacksImpl.scala b/cluster/src/main/scala/io/snappydata/gemxd/ClusterCallbacksImpl.scala new file mode 100644 index 0000000000..b5186a1be8 --- /dev/null +++ b/cluster/src/main/scala/io/snappydata/gemxd/ClusterCallbacksImpl.scala @@ -0,0 +1,145 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ +package io.snappydata.gemxd + +import java.io.InputStream +import java.util.{Iterator => JIterator} + +import com.gemstone.gemfire.distributed.internal.membership.InternalDistributedMember +import com.gemstone.gemfire.internal.cache.GemFireCacheImpl +import com.gemstone.gemfire.internal.shared.Version +import com.gemstone.gemfire.internal.{ByteArrayDataInput, ClassPathLoader, GemFireVersion} +import com.pivotal.gemfirexd.internal.iapi.sql.ParameterValueSet +import com.pivotal.gemfirexd.internal.iapi.types.DataValueDescriptor +import com.pivotal.gemfirexd.internal.impl.sql.execute.ValueRow +import com.pivotal.gemfirexd.internal.snappy.{CallbackFactoryProvider, ClusterCallbacks, LeadNodeExecutionContext, SparkSQLExecute} +import io.snappydata.cluster.ExecutorInitiator +import io.snappydata.impl.LeadImpl +import io.snappydata.{ServiceManager, SnappyEmbeddedTableStatsProviderService} + +import org.apache.spark.Logging +import org.apache.spark.scheduler.cluster.SnappyClusterManager +import org.apache.spark.serializer.{KryoSerializerPool, StructTypeSerializer} + +/** + * Callbacks that are sent by GemXD to Snappy for cluster management + */ +object ClusterCallbacksImpl extends ClusterCallbacks with Logging { + + CallbackFactoryProvider.setClusterCallbacks(this) + + private[snappydata] def initialize(): Unit = { + // nothing to be done; singleton constructor does all + } + + override def getLeaderGroup: java.util.HashSet[String] = { + val leaderServerGroup = new java.util.HashSet[String] + leaderServerGroup.add(LeadImpl.LEADER_SERVERGROUP) + leaderServerGroup + } + + override def launchExecutor(driverUrl: String, + driverDM: InternalDistributedMember): Unit = { + val url = if (driverUrl == null || driverUrl == "") { + logInfo(s"call to launchExecutor but driverUrl is invalid. $driverUrl") + None + } + else { + Some(driverUrl) + } + logInfo(s"invoking startOrTransmute with $url") + ExecutorInitiator.startOrTransmuteExecutor(url, driverDM) + } + + override def getDriverURL: String = { + SnappyClusterManager.cm.map(_.schedulerBackend) match { + case Some(backend) if backend ne null => + val driverUrl = backend.driverUrl + if ((driverUrl ne null) && !driverUrl.isEmpty) { + logInfo(s"returning driverUrl=$driverUrl") + } + driverUrl + case _ => null + } + } + + override def stopExecutor(): Unit = { + ExecutorInitiator.stop() + } + + override def getSQLExecute(sql: String, schema: String, ctx: LeadNodeExecutionContext, + v: Version, isPreparedStatement: Boolean, isPreparedPhase: Boolean, + pvs: ParameterValueSet): SparkSQLExecute = { + if (isPreparedStatement && isPreparedPhase) { + new SparkSQLPrepareImpl(sql, schema, ctx, v) + } else { + new SparkSQLExecuteImpl(sql, schema, ctx, v, Option(pvs)) + } + } + + override def readDataType(in: ByteArrayDataInput): AnyRef = { + // read the DataType + KryoSerializerPool.deserialize(in.array(), in.position(), in.available(), (kryo, input) => { + val result = StructTypeSerializer.readType(kryo, input) + // move the cursor to the new position + in.setPosition(input.position()) + result + }) + } + + override def getRowIterator(dvds: Array[DataValueDescriptor], + types: Array[Int], precisions: Array[Int], scales: Array[Int], + dataTypes: Array[AnyRef], in: ByteArrayDataInput): JIterator[ValueRow] = { + SparkSQLExecuteImpl.getRowIterator(dvds, types, precisions, scales, + dataTypes, in) + } + + override def clearSnappySessionForConnection( + connectionId: java.lang.Long): Unit = { + SnappySessionPerConnection.removeSnappySession(connectionId) + } + + override def publishColumnTableStats(): Unit = { + SnappyEmbeddedTableStatsProviderService.publishColumnTableRowCountStats() + } + + override def getClusterType: String = { + GemFireCacheImpl.setGFXDSystem(true) + // AQP version if available + val is: InputStream = ClassPathLoader.getLatest.getResourceAsStream( + classOf[SnappyDataVersion], SnappyDataVersion.AQP_VERSION_PROPERTIES) + if (is ne null) try { + GemFireVersion.getInstance(classOf[SnappyDataVersion], SnappyDataVersion + .AQP_VERSION_PROPERTIES) + } finally { + is.close() + } + GemFireVersion.getClusterType + } + + override def setLeadClassLoader(): Unit = { + val instance = ServiceManager.currentFabricServiceInstance + instance match { + case li: LeadImpl => + val loader = li.urlclassloader + if (loader != null) { + Thread.currentThread().setContextClassLoader(loader) + } + case _ => + } + } +} diff --git a/cluster/src/main/scala/io/snappydata/gemxd/SnappyDataVersion.scala b/cluster/src/main/scala/io/snappydata/gemxd/SnappyDataVersion.scala new file mode 100644 index 0000000000..0928cd9405 --- /dev/null +++ b/cluster/src/main/scala/io/snappydata/gemxd/SnappyDataVersion.scala @@ -0,0 +1,156 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ +package io.snappydata.gemxd + +import java.io.{InputStream, PrintStream, PrintWriter} + +import scala.collection.mutable + +import com.gemstone.gemfire.internal.cache.GemFireCacheImpl +import com.gemstone.gemfire.internal.shared.NativeCalls +import com.gemstone.gemfire.internal.{ClassPathLoader, GemFireVersion, SharedLibrary} +import com.pivotal.gemfirexd.internal.GemFireXDVersion +import com.pivotal.gemfirexd.internal.shared.common.SharedUtils + +class SnappyDataVersion { +} + +object SnappyDataVersion { + + // currently version in SNAPPYDATA_VERSION_PROPERTIES is used for column store version and + // SnappyData platform version. If these 2 are to be given different versions separate + // properties file can be created for column store + private val SNAPPYDATA_VERSION_PROPERTIES = "io/snappydata/SnappyDataVersion.properties" + + val AQP_VERSION_PROPERTIES = "io/snappydata/SnappyAQPVersion.properties" + + private val isNativeLibLoaded: Boolean = { + GemFireCacheImpl.setGFXDSystem(true) + val isNativeLibLoaded = if (NativeCalls.getInstance.loadNativeLibrary) { + SharedLibrary.register("gemfirexd") + } else false + val instance: GemFireVersion = GemFireVersion.getInstance(classOf[SnappyDataVersion], + SNAPPYDATA_VERSION_PROPERTIES) + if (isNativeLibLoaded) { + // try to load _getNativeVersion by reflection + try { + val m = classOf[GemFireXDVersion].getDeclaredMethod("_getNativeVersion") + instance.setNativeVersion(m.invoke(null).asInstanceOf[String]) + } catch { + case _: Exception => // ignore + } + } else { + instance.setNativeVersion("gemfirexd " + instance.getNativeVersion) + } + isNativeLibLoaded + } + + def loadProperties(): Unit = { + GemFireCacheImpl.setGFXDSystem(true) + GemFireVersion.getInstance(classOf[SnappyDataVersion], SNAPPYDATA_VERSION_PROPERTIES) + } + + // scalastyle:off println + def print(ps: PrintStream): Unit = { + val pw: PrintWriter = new PrintWriter(ps) + + // platform version + loadProperties() + val platform = s" Platform Version ${GemFireVersion.getProductVersion} " + + s"${GemFireVersion.getProductReleaseStage}" + + // rowstore version + GemFireVersion.getInstance(classOf[GemFireXDVersion], SharedUtils.GFXD_VERSION_PROPERTIES) + val product = if (GemFireVersion.isEnterpriseEdition) "TIBCO ComputeDB" else "SnappyData" + pw.println(product + platform) + pw.printf("%4s%s\n", " ", GemFireVersion.getProductName + " " + + GemFireVersion.getProductVersion + " " + GemFireVersion.getProductReleaseStage) + + // column store version + GemFireVersion.getInstance(classOf[SnappyDataVersion], SNAPPYDATA_VERSION_PROPERTIES) + pw.printf("%4s%s\n", " ", GemFireVersion.getProductName + " Column Store " + + GemFireVersion.getProductVersion + " " + GemFireVersion.getProductReleaseStage) + + // AQP version if available + val is: InputStream = ClassPathLoader.getLatest.getResourceAsStream( + classOf[SnappyDataVersion], AQP_VERSION_PROPERTIES) + if (is ne null) try { + GemFireVersion.getInstance(classOf[SnappyDataVersion], AQP_VERSION_PROPERTIES) + pw.printf("%4s%s\n", " ", GemFireVersion.getProductName + " " + + GemFireVersion.getProductVersion + " " + GemFireVersion.getProductReleaseStage) + } finally { + is.close() + } + pw.flush() + } + + def print(ps: PrintStream, printSourceInfo: Boolean): Unit = { + if (!isNativeLibLoaded) { + System.err.println("Native library not loaded") + } + + val pw: PrintWriter = new PrintWriter(ps) + + GemFireVersion.getInstance(classOf[GemFireXDVersion], SharedUtils.GFXD_VERSION_PROPERTIES) + pw.println(GemFireVersion.getProductName) + GemFireVersion.print(pw, printSourceInfo) + + GemFireVersion.getInstance(classOf[SnappyDataVersion], SNAPPYDATA_VERSION_PROPERTIES) + pw.println(GemFireVersion.getProductName) + GemFireVersion.print(pw, printSourceInfo) + + // AQP version if available + val is: InputStream = ClassPathLoader.getLatest.getResourceAsStream( + classOf[SnappyDataVersion], AQP_VERSION_PROPERTIES) + if (is ne null) try { + GemFireVersion.getInstance(classOf[SnappyDataVersion], AQP_VERSION_PROPERTIES) + pw.println(GemFireVersion.getProductName) + GemFireVersion.print(pw, printSourceInfo) + } finally { + is.close() + } + + pw.flush() + } + + // scalastyle:on println + + def createVersionFile(): Unit = { + loadProperties() + GemFireVersion.createVersionFile() + } + + def getSnappyDataProductVersion: mutable.HashMap[String, String] = { + GemFireVersion.getInstance(classOf[GemFireXDVersion], SNAPPYDATA_VERSION_PROPERTIES) + + val versionDetails = mutable.HashMap.empty[String, String] + versionDetails.put("productName", GemFireVersion.getProductName) + versionDetails.put("productVersion", GemFireVersion.getProductVersion) + versionDetails.put("buildId", GemFireVersion.getBuildId) + versionDetails.put("buildDate", GemFireVersion.getBuildDate) + versionDetails.put("buildPlatform", GemFireVersion.getBuildPlatform) + versionDetails.put("nativeCodeVersion", GemFireVersion.getNativeCodeVersion) + versionDetails.put("sourceRevision", GemFireVersion.getSourceRevision) + + GemFireVersion.getInstance(classOf[GemFireXDVersion], SharedUtils.GFXD_VERSION_PROPERTIES) + val productEditionType = if (GemFireVersion.isEnterpriseEdition) "Enterprise" else "Community" + + versionDetails.put("editionType", productEditionType) + + versionDetails + } +} diff --git a/cluster/src/main/scala/io/snappydata/gemxd/SparkSQLExecuteImpl.scala b/cluster/src/main/scala/io/snappydata/gemxd/SparkSQLExecuteImpl.scala new file mode 100644 index 0000000000..643c17c09c --- /dev/null +++ b/cluster/src/main/scala/io/snappydata/gemxd/SparkSQLExecuteImpl.scala @@ -0,0 +1,501 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ +package io.snappydata.gemxd + +import java.io.{CharArrayWriter, DataOutput} +import java.sql.SQLWarning + +import scala.collection.JavaConverters._ +import scala.collection.mutable.ArrayBuffer + +import com.gemstone.gemfire.DataSerializer +import com.gemstone.gemfire.cache.CacheClosedException +import com.gemstone.gemfire.internal.shared.{ClientSharedUtils, Version} +import com.gemstone.gemfire.internal.{ByteArrayDataInput, InternalDataSerializer} +import com.pivotal.gemfirexd.Attribute +import com.pivotal.gemfirexd.internal.engine.Misc +import com.pivotal.gemfirexd.internal.engine.distributed.message.LeadNodeExecutorMsg +import com.pivotal.gemfirexd.internal.engine.distributed.utils.GemFireXDUtils +import com.pivotal.gemfirexd.internal.engine.distributed.{GfxdHeapDataOutputStream, SnappyResultHolder} +import com.pivotal.gemfirexd.internal.engine.jdbc.GemFireXDRuntimeException +import com.pivotal.gemfirexd.internal.iapi.sql.ParameterValueSet +import com.pivotal.gemfirexd.internal.iapi.types.{DataValueDescriptor, SQLChar} +import com.pivotal.gemfirexd.internal.impl.sql.execute.ValueRow +import com.pivotal.gemfirexd.internal.shared.common.StoredFormatIds +import com.pivotal.gemfirexd.internal.snappy.{LeadNodeExecutionContext, SparkSQLExecute} +import io.snappydata.{Constant, Property, QueryHint} + +import org.apache.spark.serializer.{KryoSerializerPool, StructTypeSerializer} +import org.apache.spark.sql.catalyst.expressions +import org.apache.spark.sql.catalyst.util.DateTimeUtils +import org.apache.spark.sql.collection.Utils +import org.apache.spark.sql.types._ +import org.apache.spark.sql.{CachedDataFrame, SnappyContext, SnappySession} +import org.apache.spark.storage.RDDBlockId +import org.apache.spark.util.SnappyUtils +import org.apache.spark.{Logging, SparkEnv} + +/** + * Encapsulates a Spark execution for use in query routing from JDBC. + */ +class SparkSQLExecuteImpl(val sql: String, + val schema: String, + val ctx: LeadNodeExecutionContext, + senderVersion: Version, + pvs: Option[ParameterValueSet]) extends SparkSQLExecute with Logging { + + // spark context will be constructed by now as this will be invoked when + // DRDA queries will reach the lead node + + if (Thread.currentThread().getContextClassLoader != null) { + val loader = SnappyUtils.getSnappyStoreContextLoader( + SparkSQLExecuteImpl.getContextOrCurrentClassLoader) + Thread.currentThread().setContextClassLoader(loader) + } + + private[this] val session = SnappySessionPerConnection + .getSnappySessionForConnection(ctx.getConnId) + + if (ctx.getUserName != null && !ctx.getUserName.isEmpty) { + session.conf.set(Attribute.USERNAME_ATTR, ctx.getUserName) + session.conf.set(Attribute.PASSWORD_ATTR, ctx.getAuthToken) + } + + session.setCurrentSchema(schema, createIfNotExists = true) + + session.setPreparedQuery(preparePhase = false, pvs) + + private[this] val df = Utils.sqlInternal(session, sql) + + private[this] val thresholdListener = Misc.getMemStore.thresholdListener() + + private[this] val hdos = new GfxdHeapDataOutputStream( + thresholdListener, sql, false, senderVersion) + + private[this] val querySchema = df.schema + + private[this] lazy val colTypes = getColumnTypes + + // check for query hint to serialize complex types as JSON strings + private[this] val complexTypeAsJson = SparkSQLExecuteImpl.getJsonProperties(session) + + private val (allAsClob, columnsAsClob) = SparkSQLExecuteImpl.getClobProperties(session) + + override def packRows(msg: LeadNodeExecutorMsg, + snappyResultHolder: SnappyResultHolder): Unit = { + + var srh = snappyResultHolder + val isLocalExecution = msg.isLocallyExecuted + + val bm = SparkEnv.get.blockManager + val rddId = df.rddId + var blockReadSuccess = false + try { + // get the results and put those in block manager to avoid going OOM + // TODO: can optimize to ship immediately if plan is not ordered + // TODO: can ship CollectAggregateExec processing to the server node + // which is supported via the "skipLocalCollectProcessing" flag to the + // call below (but that has additional overheads of plan + // shipping/compilation etc and lack of proper BlockManager usage in + // messaging + server-side final processing, so do it selectively) + val partitionBlocks = df.collectWithHandler(CachedDataFrame, + CachedDataFrame.localBlockStoreResultHandler(rddId, bm), + CachedDataFrame.localBlockStoreDecoder(querySchema.length, bm)) + hdos.clearForReuse() + SparkSQLExecuteImpl.writeMetaData(srh, hdos, tableNames, nullability, getColumnNames, + colTypes, getColumnDataTypes, session.getWarnings) + + var id = 0 + for (block <- partitionBlocks) { + block match { + case null => // skip but still id has to be incremented + case data: Array[Byte] => if (data.length > 0) { + hdos.write(data) + } + case p: RDDBlockId => + val partitionData = Utils.getPartitionData(p, bm) + // remove the block once a local handle to it has been obtained + bm.removeBlock(p, tellMaster = false) + hdos.write(partitionData) + } + logTrace(s"Writing data for partition ID = $id: $block") + val dosSize = hdos.size() + if (dosSize > GemFireXDUtils.DML_MAX_CHUNK_SIZE) { + if (isLocalExecution) { + // prepare SnappyResultHolder with all data and create new one + SparkSQLExecuteImpl.handleLocalExecution(srh, hdos) + msg.sendResult(srh) + srh = new SnappyResultHolder(this, msg.isUpdateOrDeleteOrPut) + } else { + // throttle sending if target node is CRITICAL_UP + val targetMember = msg.getSender + if (thresholdListener.isCritical || + thresholdListener.isCriticalUp(targetMember)) { + try { + var throttle = true + for (_ <- 1 to 5 if throttle) { + Thread.sleep(4) + throttle = thresholdListener.isCritical || + thresholdListener.isCriticalUp(targetMember) + } + } catch { + case ie: InterruptedException => Misc.checkIfCacheClosing(ie) + } + } + + msg.sendResult(srh) + // clear the metadata flag for subsequent chunks + srh.clearHasMetadata() + } + logTrace(s"Sent one batch for result, current partition ID = $id") + hdos.clearForReuse() + // 0/1 indicator is now written in serializeRows itself to allow + // ByteBuffer to be passed as is in the chunks list of + // GfxdHeapDataOutputStream and avoid a copy + } + id += 1 + } + blockReadSuccess = true + + if (isLocalExecution) { + SparkSQLExecuteImpl.handleLocalExecution(srh, hdos) + } + msg.lastResult(srh) + + } finally { + if (!blockReadSuccess) { + // remove any cached results from block manager + bm.removeRdd(rddId) + } + } + } + + override def serializeRows(out: DataOutput, hasMetadata: Boolean): Unit = + SparkSQLExecuteImpl.serializeRows(out, hasMetadata, hdos) + + private lazy val (tableNames, nullability) = SparkSQLExecuteImpl. + getTableNamesAndNullability(session, df.queryExecution.analyzed.output) + + def getColumnNames: Array[String] = { + querySchema.fieldNames + } + + private def getColumnTypes: Array[(Int, Int, Int)] = + querySchema.map(f => { + SparkSQLExecuteImpl.getSQLType(f.dataType, complexTypeAsJson, + f.metadata, Utils.toLowerCase(f.name), allAsClob, columnsAsClob) + }).toArray + + private def getColumnDataTypes: Array[DataType] = + querySchema.map(_.dataType).toArray +} + +object SparkSQLExecuteImpl { + + def getJsonProperties(session: SnappySession): Boolean = session.getPreviousQueryHints.get( + QueryHint.ComplexTypeAsJson.toString) match { + case null => Constant.COMPLEX_TYPE_AS_JSON_DEFAULT + case v => ClientSharedUtils.parseBoolean(v) + } + + def getClobProperties(session: SnappySession): (Boolean, Set[String]) = + session.getPreviousQueryHints.get(QueryHint.ColumnsAsClob.toString) match { + case null => (false, Set.empty[String]) + case v => Utils.parseColumnsAsClob(v, session) + } + + def getSQLType(dataType: DataType, complexTypeAsJson: Boolean, + metaData: Metadata = Metadata.empty, metaName: String = "", + allAsClob: Boolean = false, columnsAsClob: Set[String] = Set.empty): (Int, Int, Int) = { + dataType match { + case IntegerType => (StoredFormatIds.SQL_INTEGER_ID, -1, -1) + case StringType => + TypeUtilities.getMetadata[String](Constant.CHAR_TYPE_BASE_PROP, metaData) match { + case Some(base) => + lazy val size = TypeUtilities.getMetadata[Long]( + Constant.CHAR_TYPE_SIZE_PROP, metaData) + base match { + case "CHAR" => + val charSize = size match { + case Some(s) => s.toInt + case None => Constant.MAX_CHAR_SIZE + } + (StoredFormatIds.SQL_CHAR_ID, charSize, -1) + case "STRING" if allAsClob || + (columnsAsClob.nonEmpty && columnsAsClob.contains(metaName)) => + (StoredFormatIds.SQL_CLOB_ID, -1, -1) + case "CLOB" => (StoredFormatIds.SQL_CLOB_ID, -1, -1) + case _ => + val varcharSize = size match { + case Some(s) => s.toInt + case None => Constant.MAX_VARCHAR_SIZE + } + (StoredFormatIds.SQL_VARCHAR_ID, varcharSize, -1) + } + case None => if (allAsClob || + (columnsAsClob.nonEmpty && columnsAsClob.contains(metaName))) { + (StoredFormatIds.SQL_CLOB_ID, -1, -1) + } else { + // check if size is specified + val size = TypeUtilities.getMetadata[Long]( + Constant.CHAR_TYPE_SIZE_PROP, metaData) match { + case Some(s) => s.toInt + case None => Constant.MAX_VARCHAR_SIZE + } + (StoredFormatIds.SQL_VARCHAR_ID, size, -1) + } + } + case LongType => (StoredFormatIds.SQL_LONGINT_ID, -1, -1) + case TimestampType => (StoredFormatIds.SQL_TIMESTAMP_ID, -1, -1) + case DateType => (StoredFormatIds.SQL_DATE_ID, -1, -1) + case DoubleType => (StoredFormatIds.SQL_DOUBLE_ID, -1, -1) + case t: DecimalType => (StoredFormatIds.SQL_DECIMAL_ID, + t.precision, t.scale) + case FloatType => (StoredFormatIds.SQL_REAL_ID, -1, -1) + case BooleanType => (StoredFormatIds.SQL_BOOLEAN_ID, -1, -1) + case ShortType => (StoredFormatIds.SQL_SMALLINT_ID, -1, -1) + case ByteType => (StoredFormatIds.SQL_TINYINT_ID, -1, -1) + case BinaryType => (StoredFormatIds.SQL_BLOB_ID, -1, -1) + case _: ArrayType | _: MapType | _: StructType => + // indicates complex types serialized as strings + if (complexTypeAsJson) (StoredFormatIds.REF_TYPE_ID, -1, -1) + else (StoredFormatIds.SQL_BLOB_ID, -1, -1) + + // send across rest as objects that will be displayed as strings + case _ => (StoredFormatIds.REF_TYPE_ID, -1, -1) + } + } + + def getTableNamesAndNullability(session: SnappySession, + output: Seq[expressions.Attribute]): (Seq[String], Seq[Boolean]) = { + output.map { a => + val fn = a.qualifiedName + val dotIdx = fn.lastIndexOf('.') + if (dotIdx > 0) { + val tableName = fn.substring(0, dotIdx) + val fullTableName = if (tableName.indexOf('.') > 0) tableName + else session.getCurrentSchema + '.' + tableName + (fullTableName, a.nullable) + } else { + ("", a.nullable) + } + }.unzip + } + + def writeMetaData(srh: SnappyResultHolder, hdos: GfxdHeapDataOutputStream, + tableNames: Seq[String], nullability: Seq[Boolean], columnNames: Array[String], + colTypes: Array[(Int, Int, Int)], dataTypes: Array[DataType], + warnings: SQLWarning): Unit = { + // indicates that the metadata is being packed too + srh.setHasMetadata() + DataSerializer.writeStringArray(tableNames.toArray, hdos) + DataSerializer.writeStringArray(columnNames, hdos) + DataSerializer.writeBooleanArray(nullability.toArray, hdos) + var i = 0 + while (i < colTypes.length) { + val (tp, precision, scale) = colTypes(i) + InternalDataSerializer.writeSignedVL(tp, hdos) + tp match { + case StoredFormatIds.SQL_DECIMAL_ID => + InternalDataSerializer.writeSignedVL(precision, hdos) // precision + InternalDataSerializer.writeSignedVL(scale, hdos) // scale + case StoredFormatIds.SQL_VARCHAR_ID | + StoredFormatIds.SQL_CHAR_ID => + // Write the size as precision + InternalDataSerializer.writeSignedVL(precision, hdos) + case StoredFormatIds.REF_TYPE_ID => + // Write the DataType + hdos.write(KryoSerializerPool.serialize((kryo, out) => + StructTypeSerializer.writeType(kryo, out, dataTypes(i)))) + case _ => // ignore for others + } + i += 1 + } + DataSerializer.writeObject(warnings, hdos) + } + + def getContextOrCurrentClassLoader: ClassLoader = + Option(Thread.currentThread().getContextClassLoader) + .getOrElse(getClass.getClassLoader) + + def handleLocalExecution(srh: SnappyResultHolder, + hdos: GfxdHeapDataOutputStream): Unit = { + val size = hdos.size() + // prepare SnappyResultHolder with all data and create new one + if (size > 0) { + val bytes = new Array[Byte](size + 1) + // byte 1 will indicate that the metainfo is being packed too + bytes(0) = if (srh.hasMetadata) 0x1 else 0x0 + hdos.sendTo(bytes, 1) + srh.fromSerializedData(bytes, bytes.length, null) + } + } + + def serializeRows(out: DataOutput, hasMetadata: Boolean, + hdos: GfxdHeapDataOutputStream): Unit = { + val numBytes = hdos.size + if (numBytes > 0) { + InternalDataSerializer.writeArrayLength(numBytes + 1, out) + // byte 1 will indicate that the metainfo is being packed too + out.writeByte(if (hasMetadata) 0x1 else 0x0) + hdos.sendTo(out) + } else { + InternalDataSerializer.writeArrayLength(0, out) + } + } + + lazy val STRING_AS_CLOB: Boolean = System.getProperty( + Constant.STRING_AS_CLOB_PROP, "false").toBoolean + + def getRowIterator(dvds: Array[DataValueDescriptor], types: Array[Int], + precisions: Array[Int], scales: Array[Int], dataTypes: Array[AnyRef], + input: ByteArrayDataInput): java.util.Iterator[ValueRow] = { + // initialize JSON generators if required + var writers: ArrayBuffer[CharArrayWriter] = null + var generators: ArrayBuffer[AnyRef] = null + for (d <- dataTypes) { + if (d ne null) { + if (writers eq null) { + writers = new ArrayBuffer[CharArrayWriter](2) + generators = new ArrayBuffer[AnyRef](2) + } + val size = writers.length + val writer = new CharArrayWriter() + writers += writer + generators += Utils.getJsonGenerator(d.asInstanceOf[DataType], + s"col_$size", writer) + } + } + val execRow = new ValueRow(dvds) + val numFields = types.length + val unsafeRows = CachedDataFrame.decodeUnsafeRows(numFields, + input.array(), input.position(), input.available()) + unsafeRows.map { row => + var index = 0 + var writeIndex = 0 + while (index < numFields) { + val dvd = dvds(index) + if (row.isNullAt(index)) { + dvd.setToNull() + index += 1 + } else { + types(index) match { + case StoredFormatIds.SQL_VARCHAR_ID | + StoredFormatIds.SQL_CLOB_ID => + val utf8String = row.getUTF8String(index) + dvd.setValue(utf8String.toString) + + case StoredFormatIds.SQL_CHAR_ID => + val precision = precisions(index) + val utf8String = row.getUTF8String(index) + var fixedString = utf8String.toString + val stringLen = fixedString.length + if (stringLen != precision) { + if (stringLen < precision) { + // add blank padding + val sb = new java.lang.StringBuilder(precision) + val blanks = new Array[Char](precision - stringLen) + SQLChar.appendBlanks(blanks, 0, blanks.length) + fixedString = sb.append(fixedString).append(blanks).toString + } else { + // truncate + fixedString = fixedString.substring(0, precision) + } + } + dvd.setValue(fixedString) + + case StoredFormatIds.SQL_INTEGER_ID => + dvd.setValue(row.getInt(index)) + case StoredFormatIds.SQL_LONGINT_ID => + dvd.setValue(row.getLong(index)) + case StoredFormatIds.SQL_SMALLINT_ID => + dvd.setValue(row.getShort(index)) + + case StoredFormatIds.SQL_TIMESTAMP_ID => + val ts = DateTimeUtils.toJavaTimestamp(row.getLong(index)) + dvd.setValue(ts) + case StoredFormatIds.SQL_DECIMAL_ID => + val dec = row.getDecimal(index, precisions(index), scales(index)) + dvd.setBigDecimal(dec.toJavaBigDecimal) + case StoredFormatIds.SQL_DATE_ID => + val dt = DateTimeUtils.toJavaDate(row.getInt(index)) + dvd.setValue(dt) + case StoredFormatIds.SQL_BOOLEAN_ID => + dvd.setValue(row.getBoolean(index)) + case StoredFormatIds.SQL_TINYINT_ID => + dvd.setValue(row.getByte(index)) + case StoredFormatIds.SQL_REAL_ID => + dvd.setValue(row.getFloat(index)) + case StoredFormatIds.SQL_DOUBLE_ID => + dvd.setValue(row.getDouble(index)) + case StoredFormatIds.REF_TYPE_ID => + // convert to Json using JacksonGenerator + val writer = writers(writeIndex) + val generator = generators(writeIndex) + Utils.generateJson(generator, row, index, + dataTypes(index).asInstanceOf[DataType]) + val json = writer.toString + writer.reset() + dvd.setValue(json) + writeIndex += 1 + case StoredFormatIds.SQL_BLOB_ID => + // all complex types too work with below because all of + // Array, Map, Struct (as well as Binary itself) transport + // data in the same way in UnsafeRow (offsetAndWidth) + dvd.setValue(row.getBinary(index)) + case other => throw new GemFireXDRuntimeException( + s"SparkSQLExecuteImpl: unexpected typeFormatId $other") + } + index += 1 + } + } + if ((generators ne null) && !unsafeRows.hasNext) { + generators.foreach(Utils.closeJsonGenerator) + } + + execRow + }.asJava + } +} + +object SnappySessionPerConnection { + + private val connectionIdMap = + new java.util.concurrent.ConcurrentHashMap[java.lang.Long, SnappySession]() + + def getSnappySessionForConnection(connId: Long): SnappySession = { + val connectionID = Long.box(connId) + val session = connectionIdMap.get(connectionID) + if (session != null) session + else { + val session = SnappyContext.globalSparkContext match { + // use a CancelException to force failover by client to another lead if available + case null => throw new CacheClosedException("No SparkContext ...") + case sc => new SnappySession(sc) + } + Property.PlanCaching.set(session.sessionState.conf, true) + val oldSession = connectionIdMap.putIfAbsent(connectionID, session) + if (oldSession == null) session else oldSession + } + } + + def getAllSessions: Seq[SnappySession] = connectionIdMap.values().asScala.toSeq + + def removeSnappySession(connectionID: java.lang.Long): Unit = { + connectionIdMap.remove(connectionID) + } +} diff --git a/cluster/src/main/scala/io/snappydata/gemxd/SparkSQLPrepareImpl.scala b/cluster/src/main/scala/io/snappydata/gemxd/SparkSQLPrepareImpl.scala new file mode 100644 index 0000000000..262628e0d9 --- /dev/null +++ b/cluster/src/main/scala/io/snappydata/gemxd/SparkSQLPrepareImpl.scala @@ -0,0 +1,261 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ +package io.snappydata.gemxd + +import java.io.DataOutput + +import scala.collection.mutable + +import com.gemstone.gemfire.DataSerializer +import com.gemstone.gemfire.internal.shared.Version +import com.pivotal.gemfirexd.Attribute +import com.pivotal.gemfirexd.internal.engine.Misc +import com.pivotal.gemfirexd.internal.engine.distributed.message.LeadNodeExecutorMsg +import com.pivotal.gemfirexd.internal.engine.distributed.{GfxdHeapDataOutputStream, SnappyResultHolder} +import com.pivotal.gemfirexd.internal.impl.jdbc.Util +import com.pivotal.gemfirexd.internal.shared.common.StoredFormatIds +import com.pivotal.gemfirexd.internal.shared.common.reference.SQLState +import com.pivotal.gemfirexd.internal.snappy.{LeadNodeExecutionContext, SparkSQLExecute} + +import org.apache.spark.sql.catalyst.expressions +import org.apache.spark.sql.catalyst.expressions.{BinaryComparison, CaseWhen, Cast, Exists, Expression, Like, ListQuery, ParamLiteral, PredicateSubquery, ScalarSubquery, SubqueryExpression} +import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan +import org.apache.spark.sql.execution.PutIntoValuesColumnTable +import org.apache.spark.sql.hive.QuestionMark +import org.apache.spark.sql.types._ +import org.apache.spark.util.SnappyUtils + + +class SparkSQLPrepareImpl(val sql: String, + val schema: String, + val ctx: LeadNodeExecutionContext, + senderVersion: Version) extends SparkSQLExecute { + + if (Thread.currentThread().getContextClassLoader != null) { + val loader = SnappyUtils.getSnappyStoreContextLoader( + SparkSQLExecuteImpl.getContextOrCurrentClassLoader) + Thread.currentThread().setContextClassLoader(loader) + } + + private[this] val session = SnappySessionPerConnection + .getSnappySessionForConnection(ctx.getConnId) + + if (ctx.getUserName != null && !ctx.getUserName.isEmpty) { + session.conf.set(Attribute.USERNAME_ATTR, ctx.getUserName) + session.conf.set(Attribute.PASSWORD_ATTR, ctx.getAuthToken) + } + + session.setCurrentSchema(schema, createIfNotExists = true) + + session.setPreparedQuery(preparePhase = true, None) + + private[this] val analyzedPlan: LogicalPlan = { + val aplan = session.prepareSQL(sql) +// println(aplan) + aplan + } + + private[this] val thresholdListener = Misc.getMemStore.thresholdListener() + + protected[this] val hdos = new GfxdHeapDataOutputStream( + thresholdListener, sql, false, senderVersion) + + private lazy val (tableNames, nullability) = SparkSQLExecuteImpl. + getTableNamesAndNullability(session, analyzedPlan.output) + + private lazy val (columnNames, columnDataTypes) = SparkSQLPrepareImpl. + getTableNamesAndDatatype(analyzedPlan.output) + + // check for query hint to serialize complex types as JSON strings + private[this] val complexTypeAsJson = SparkSQLExecuteImpl.getJsonProperties(session) + + private def getColumnTypes: Array[(Int, Int, Int)] = + columnDataTypes.map(d => SparkSQLExecuteImpl.getSQLType(d, complexTypeAsJson)) + + override def packRows(msg: LeadNodeExecutorMsg, + srh: SnappyResultHolder): Unit = { + hdos.clearForReuse() + SparkSQLExecuteImpl.writeMetaData(srh, hdos, tableNames, nullability, columnNames, + getColumnTypes, columnDataTypes, session.getWarnings) + + val questionMarkCounter = session.snappyParser.questionMarkCounter + if (questionMarkCounter > 0) { + val paramLiterals = new mutable.HashSet[ParamLiteral]() + analyzedPlan match { + case PutIntoValuesColumnTable(_, _, _, _) => analyzedPlan.expressions.foreach { + exp => exp.map { + case QuestionMark(pos) => + SparkSQLPrepareImpl.addParamLiteral(pos, exp.dataType, exp.nullable, paramLiterals) + } + } + case _ => + } + SparkSQLPrepareImpl.allParamLiterals(analyzedPlan, paramLiterals) + if (paramLiterals.size != questionMarkCounter) { + SparkSQLPrepareImpl.remainingParamLiterals(analyzedPlan, paramLiterals) + } + val paramLiteralsAtPrepare = paramLiterals.toArray.sortBy(_.pos) + val paramCount = paramLiteralsAtPrepare.length + if (paramCount != questionMarkCounter) { + throw Util.generateCsSQLException(SQLState.NOT_FOR_PREPARED_STATEMENT, sql) + } + val types = new Array[Int](paramCount * 4 + 1) + types(0) = paramCount + (0 until paramCount) foreach (i => { + assert(paramLiteralsAtPrepare(i).pos == i + 1) + val index = i * 4 + 1 + val dType = paramLiteralsAtPrepare(i).dataType + val sqlType = getSQLType(dType) + types(index) = sqlType._1 + types(index + 1) = sqlType._2 + types(index + 2) = sqlType._3 + types(index + 3) = if (paramLiteralsAtPrepare(i).value.asInstanceOf[Boolean]) 1 else 0 + }) + session.setPreparedParamsTypeInfo(types) + DataSerializer.writeIntArray(types, hdos) + } else { + DataSerializer.writeIntArray(Array[Int](0), hdos) + } + + if (msg.isLocallyExecuted) { + SparkSQLExecuteImpl.handleLocalExecution(srh, hdos) + } + msg.lastResult(srh) + } + + override def serializeRows(out: DataOutput, hasMetadata: Boolean): Unit = + SparkSQLExecuteImpl.serializeRows(out, hasMetadata, hdos) + + // Also see SnappyResultHolder.getNewNullDVD( + def getSQLType(dataType: DataType): (Int, Int, Int) = dataType match { + case IntegerType => (StoredFormatIds.SQL_INTEGER_ID, -1, -1) + case StringType => (StoredFormatIds.SQL_CLOB_ID, -1, -1) + case LongType => (StoredFormatIds.SQL_LONGINT_ID, -1, -1) + case TimestampType => (StoredFormatIds.SQL_TIMESTAMP_ID, -1, -1) + case DateType => (StoredFormatIds.SQL_DATE_ID, -1, -1) + case DoubleType => (StoredFormatIds.SQL_DOUBLE_ID, -1, -1) + case t: DecimalType => (StoredFormatIds.SQL_DECIMAL_ID, + t.precision, t.scale) + case FloatType => (StoredFormatIds.SQL_REAL_ID, -1, -1) + case BooleanType => (StoredFormatIds.SQL_BOOLEAN_ID, -1, -1) + case ShortType => (StoredFormatIds.SQL_SMALLINT_ID, -1, -1) + case ByteType => (StoredFormatIds.SQL_TINYINT_ID, -1, -1) + case BinaryType => (StoredFormatIds.SQL_BLOB_ID, -1, -1) + case _: ArrayType | _: MapType | _: StructType => + // indicates complex types serialized as json strings + (StoredFormatIds.REF_TYPE_ID, -1, -1) + + // send across rest as objects that will be displayed as json strings + case _ => (StoredFormatIds.REF_TYPE_ID, -1, -1) + } +} + +object SparkSQLPrepareImpl{ + def getTableNamesAndDatatype( + output: Seq[expressions.Attribute]): (Array[String], Array[DataType]) = + output.toArray.map(o => o.name -> o.dataType).unzip + + def addParamLiteral(position: Int, datatype: DataType, nullable: Boolean, + result: mutable.HashSet[ParamLiteral]): Unit = if (!result.exists(_.pos == position)) { + result += ParamLiteral(nullable, datatype, position, execId = -1, tokenized = true) + } + + def handleCase(branches: Seq[(Expression, Expression)], elseValue: Option[Expression], + datatype: DataType, nullable: Boolean, result: mutable.HashSet[ParamLiteral]): Unit = { + branches.foreach { + case (_, QuestionMark(pos)) => + addParamLiteral(pos, datatype, nullable, result) + case _ => + } + elseValue match { + case Some(QuestionMark(pos)) => + addParamLiteral(pos, datatype, nullable, result) + case _ => + } + } + + def allParamLiterals(plan: LogicalPlan, result: mutable.HashSet[ParamLiteral]): Unit = { + val mapExpression: PartialFunction[Expression, Expression] = { + case bl@BinaryComparison(left: Expression, QuestionMark(pos)) => + addParamLiteral(pos, left.dataType, left.nullable, result) + bl + case blc@BinaryComparison(left: Expression, + Cast(QuestionMark(pos), _)) => + addParamLiteral(pos, left.dataType, left.nullable, result) + blc + case ble@BinaryComparison(left: Expression, CaseWhen(branches, elseValue)) => + handleCase(branches, elseValue, left.dataType, left.nullable, result) + ble + case blce@BinaryComparison(left: Expression, Cast(CaseWhen(branches, elseValue), _)) => + handleCase(branches, elseValue, left.dataType, left.nullable, result) + blce + case br@BinaryComparison(QuestionMark(pos), right: Expression) => + addParamLiteral(pos, right.dataType, right.nullable, result) + br + case brc@BinaryComparison(Cast(QuestionMark(pos), _), + right: Expression) => + addParamLiteral(pos, right.dataType, right.nullable, result) + brc + case bre@BinaryComparison(CaseWhen(branches, elseValue), right: Expression) => + handleCase(branches, elseValue, right.dataType, right.nullable, result) + bre + case brce@BinaryComparison(Cast(CaseWhen(branches, elseValue), _), right: Expression) => + handleCase(branches, elseValue, right.dataType, right.nullable, result) + brce + case l@Like(left: Expression, QuestionMark(pos)) => + addParamLiteral(pos, left.dataType, left.nullable, result) + l + case lc@Like(left: Expression, Cast(QuestionMark(pos), _)) => + addParamLiteral(pos, left.dataType, left.nullable, result) + lc + case inlist@org.apache.spark.sql.catalyst.expressions.In(value: Expression, + list: Seq[Expression]) => + list.map { + case QuestionMark(pos) => + addParamLiteral(pos, value.dataType, value.nullable, result) + case Cast(QuestionMark(pos), _) => + addParamLiteral(pos, value.dataType, value.nullable, result) + case x => x + } + inlist + } + handleSubQuery(plan, mapExpression) + } + + def remainingParamLiterals(plan: LogicalPlan, result: mutable.HashSet[ParamLiteral]): Unit = { + val mapExpression: PartialFunction[Expression, Expression] = { + case c@Cast(QuestionMark(pos), castType: DataType) => + addParamLiteral(pos, castType, nullable = false, result) + c + case cc@Cast(CaseWhen(branches, elseValue), castType: DataType) => + handleCase(branches, elseValue, castType, nullable = false, result) + cc + } + handleSubQuery(plan, mapExpression) + } + + def handleSubQuery(plan: LogicalPlan, + f: PartialFunction[Expression, Expression]): LogicalPlan = plan transformAllExpressions { + case e if f.isDefinedAt(e) => f(e) + case sub: SubqueryExpression => sub match { + case l@ListQuery(query, x) => l.copy(handleSubQuery(query, f), x) + case e@Exists(query, x) => e.copy(handleSubQuery(query, f), x) + case p@PredicateSubquery(query, x, y, z) => p.copy(handleSubQuery(query, f), x, y, z) + case s@ScalarSubquery(query, x, y) => s.copy(handleSubQuery(query, f), x, y) + } + } +} diff --git a/cluster/src/main/scala/io/snappydata/impl/LeadImpl.scala b/cluster/src/main/scala/io/snappydata/impl/LeadImpl.scala new file mode 100644 index 0000000000..ff292f9ff7 --- /dev/null +++ b/cluster/src/main/scala/io/snappydata/impl/LeadImpl.scala @@ -0,0 +1,774 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ +package io.snappydata.impl + +import java.lang.reflect.{Constructor, Method} +import java.net.{URL, URLClassLoader} +import java.security.Permission +import java.sql.SQLException +import java.util.Properties + +import scala.collection.JavaConverters._ +import scala.concurrent.ExecutionContext.Implicits.global +import scala.concurrent.duration.Duration +import scala.concurrent.{Await, Future} + +import akka.actor.ActorSystem +import com.gemstone.gemfire.CancelException +import com.gemstone.gemfire.cache.CacheClosedException +import com.gemstone.gemfire.distributed.internal.InternalDistributedSystem +import com.gemstone.gemfire.distributed.internal.locks.{DLockService, DistributedMemberLock} +import com.gemstone.gemfire.internal.cache.{CacheServerLauncher, Status} +import com.gemstone.gemfire.internal.shared.ClientSharedUtils +import com.pivotal.gemfirexd.FabricService.State +import com.pivotal.gemfirexd.internal.engine.Misc +import com.pivotal.gemfirexd.internal.engine.distributed.utils.GemFireXDUtils +import com.pivotal.gemfirexd.internal.engine.store.ServerGroupUtils +import com.pivotal.gemfirexd.internal.shared.common.reference.SQLState +import com.pivotal.gemfirexd.{Attribute, Constants, FabricService, NetworkInterface} +import com.typesafe.config.{Config, ConfigFactory} +import io.snappydata.Constant.{SPARK_PREFIX, SPARK_SNAPPY_PREFIX, JOBSERVER_PROPERTY_PREFIX => JOBSERVER_PREFIX, PROPERTY_PREFIX => SNAPPY_PREFIX, STORE_PROPERTY_PREFIX => STORE_PREFIX} +import io.snappydata.cluster.ExecutorInitiator +import io.snappydata.util.ServiceUtils +import io.snappydata.{Constant, Lead, LocalizedMessages, Property, ProtocolOverrides, ServiceManager, SnappyTableStatsProviderService} +import org.apache.thrift.transport.TTransportException +import spark.jobserver.JobServer +import spark.jobserver.auth.{AuthInfo, SnappyAuthenticator, User} +import spray.routing.authentication.UserPass + +import org.apache.spark.sql.collection.{ToolsCallbackInit, Utils} +import org.apache.spark.sql.execution.SecurityUtils +import org.apache.spark.sql.hive.thriftserver.SnappyHiveThriftServer2 +import org.apache.spark.sql.{SnappyContext, SnappySession} +import org.apache.spark.{Logging, SparkCallbacks, SparkConf, SparkContext, SparkException} + +class LeadImpl extends ServerImpl with Lead + with ProtocolOverrides with Logging { + + self => + + val DEFAULT_LEADER_MEMBER_WEIGHT_NAME = "gemfire.member-weight" + + val DEFAULT_LEADER_MEMBER_WEIGHT = "17" + + private val LOCK_SERVICE_NAME = "__PRIMARY_LEADER_LS" + + private val bootProperties = new Properties() + + private var notifyStatusChange: FabricService.State => Unit = _ + + @volatile private var servicesStarted: Boolean = _ + + var _directApiInvoked: Boolean = false + var isTestSetup = false + + def directApiInvoked: Boolean = _directApiInvoked + + private var remoteInterpreterServerClass: Class[_] = _ + private var remoteInterpreterServerObj: Any = _ + + var urlclassloader: ExtendibleURLClassLoader = _ + + private def setPropertyIfAbsent(props: Properties, name: String, value: => String): Unit = { + if (!props.containsKey(name)) props.setProperty(name, value) + } + + @throws[SQLException] + override def start(bootProperties: Properties, ignoreIfStarted: Boolean): Unit = { + _directApiInvoked = true + + isTestSetup = bootProperties.getProperty("isTest", "false").toBoolean + bootProperties.remove("isTest") + val authSpecified = Misc.checkLDAPAuthProvider(bootProperties) + + ServiceUtils.setCommonBootDefaults(bootProperties, forLocator = false) + + // prefix all store properties with "snappydata.store" for SparkConf + + // first the passed in bootProperties + val propNames = bootProperties.stringPropertyNames().iterator() + while (propNames.hasNext) { + val propName = propNames.next() + if (propName.startsWith(SPARK_PREFIX)) { + if (propName.startsWith(SPARK_SNAPPY_PREFIX)) { + // remove the "spark." prefix for uniformity (e.g. when looking up a property) + bootProperties.setProperty(propName.substring(SPARK_PREFIX.length), + bootProperties.getProperty(propName)) + bootProperties.remove(propName) + } + } else if (!propName.startsWith(SNAPPY_PREFIX) && + !propName.startsWith(JOBSERVER_PREFIX) && + !propName.startsWith("zeppelin.") && + !propName.startsWith("hive.") && + !propName.startsWith("hadoop.") && + !propName.startsWith("javax.jdo.")) { + bootProperties.setProperty(STORE_PREFIX + propName, bootProperties.getProperty(propName)) + bootProperties.remove(propName) + } + } + // next the system properties that cannot override above + val sysProps = System.getProperties + val sysPropNames = sysProps.stringPropertyNames().iterator() + // check if user has set gemfire.member-weight property + if (System.getProperty(DEFAULT_LEADER_MEMBER_WEIGHT_NAME) eq null) { + System.setProperty(DEFAULT_LEADER_MEMBER_WEIGHT_NAME, DEFAULT_LEADER_MEMBER_WEIGHT) + } + + while (sysPropNames.hasNext) { + val sysPropName = sysPropNames.next() + if (sysPropName.startsWith(SPARK_PREFIX)) { + if (sysPropName.startsWith(SPARK_SNAPPY_PREFIX)) { + // remove the "spark." prefix for uniformity (e.g. when looking up a property) + setPropertyIfAbsent(bootProperties, sysPropName.substring(SPARK_PREFIX.length), + sysProps.getProperty(sysPropName)) + } else { + setPropertyIfAbsent(bootProperties, sysPropName, sysProps.getProperty(sysPropName)) + } + } else if (sysPropName.startsWith(SNAPPY_PREFIX) || + sysPropName.startsWith(JOBSERVER_PREFIX) || + sysPropName.startsWith("zeppelin.") || + sysPropName.startsWith("hive.") || + sysPropName.startsWith("hadoop.") || + sysPropName.startsWith("javax.jdo.")) { + setPropertyIfAbsent(bootProperties, sysPropName, sysProps.getProperty(sysPropName)) + } + } + + // add default lead properties that cannot be overridden + val serverGroupsProp = STORE_PREFIX + Attribute.SERVER_GROUPS + val groups = bootProperties.getProperty(serverGroupsProp) match { + case null => LeadImpl.LEADER_SERVERGROUP + case v => v + ',' + LeadImpl.LEADER_SERVERGROUP + } + bootProperties.setProperty(serverGroupsProp, groups) + bootProperties.setProperty(STORE_PREFIX + Attribute.GFXD_HOST_DATA, "false") + bootProperties.setProperty(STORE_PREFIX + Attribute.GFXD_PERSIST_DD, "false") + + // copy store related properties into a separate properties bag + // to be used by store boot while original will be used by SparkConf + val storeProperties = ServiceUtils.getStoreProperties(bootProperties.stringPropertyNames() + .iterator().asScala.map(k => k -> bootProperties.getProperty(k)).toSeq) + + val productName = { + if (SnappySession.isEnterpriseEdition) { + "TIBCO ComputeDB" + } else { + "SnappyData" + } + } + + // initialize store and Spark in parallel (Spark will wait in + // cluster manager start on internalStart) + val initServices = Future { + val locator = bootProperties.getProperty(Property.Locators.name) + val conf = new SparkConf(false) // system properties already in bootProperties + conf.setMaster(s"${Constant.SNAPPY_URL_PREFIX}$locator"). + setAppName(productName). + set(Property.JobServerEnabled.name, "true"). + set("spark.scheduler.mode", "FAIR"). + setIfMissing("spark.memory.manager", + ExecutorInitiator.SNAPPY_MEMORY_MANAGER) + + Utils.setDefaultSerializerAndCodec(conf) + + conf.setAll(bootProperties.asScala) + // set spark ui port to 5050 that is snappy's default + conf.set("spark.ui.port", + bootProperties.getProperty("spark.ui.port", LeadImpl.SPARKUI_PORT.toString)) + + // wait for log service to initialize so that Spark also uses the same + while (!ClientSharedUtils.isLoggerInitialized && status() != State.RUNNING) { + Thread.sleep(50) + } + resetLogger() + + val zeppelinEnabled = bootProperties.getProperty( + Constant.ENABLE_ZEPPELIN_INTERPRETER, "false").equalsIgnoreCase("true") + if (zeppelinEnabled && !authSpecified) { + try { + + val zeppelinIntpUtilClass = Utils.classForName( + "org.apache.zeppelin.interpreter.ZeppelinIntpUtil") + + /** + * This will initialize the zeppelin repl interpreter. + * This should be done before spark context is created as zeppelin + * interpreter will set some properties for classloader for repl + * which needs to be specified while creating sparkcontext in lead + */ + logInfo("About to initialize SparkContext with SparkConf") + val method: Method = zeppelinIntpUtilClass.getMethod( + "initializeZeppelinReplAndGetConfig") + val obj: Object = method.invoke(null) + val props: Properties = obj.asInstanceOf[Properties] + props.asScala.foreach(kv => conf.set(kv._1, kv._2)) + } catch { + /* [Sachin] So we need to log warning that + interpreter not started or do we need to exit? */ + case e: Throwable => logWarning("Cannot find zeppelin interpreter in the classpath") + throw e; + } + } + + // The auth service is not yet initialized at this point. + // So simply check the auth-provider property value. + if (authSpecified) { + logInfo("Enabling user authentication for SnappyData Pulse") + SparkCallbacks.setAuthenticatorForJettyServer() + } + + // take out the password property from SparkConf so that it is not logged + // or seen by Spark layer + val passwordKey = STORE_PREFIX + Attribute.PASSWORD_ATTR + val password = conf.getOption(passwordKey) + password match { + case Some(_) => conf.remove(passwordKey) + case _ => + } + + val parent = Thread.currentThread().getContextClassLoader + urlclassloader = new ExtendibleURLClassLoader(parent) + Thread.currentThread().setContextClassLoader(urlclassloader) + + val sc = new SparkContext(conf) + + // This will use GfxdDistributionAdvisor#distributeProfileUpdate + // which inturn will create a new profile object via #instantiateProfile + // whereby ClusterCallbacks#getDriverURL should be now returning + // the correct URL given SparkContext is fully initialized. + logInfo("About to send profile update after initialization completed.") + ServerGroupUtils.sendUpdateProfile() + + val startHiveServer = Property.HiveServerEnabled.get(conf) + val startHiveServerDefault = Property.HiveServerEnabled.defaultValue.get && + !conf.contains(Property.HiveServerEnabled.name) + val useHiveSession = Property.HiveServerUseHiveSession.get(conf) + val hiveSessionKind = if (useHiveSession) "session=hive" else "session=snappy" + + var jobServerWait = false + var confFile: Array[String] = null + var jobServerConfig: Config = null + var startupString: String = null + if (Property.JobServerEnabled.get(conf)) { + jobServerWait = (!startHiveServerDefault && startHiveServer) || + Property.JobServerWaitForInit.get(conf) + confFile = conf.getOption("jobserver.configFile") match { + case None => Array[String]() + case Some(c) => Array(c) + } + jobServerConfig = getConfig(confFile) + val bindAddress = jobServerConfig.getString("spark.jobserver.bind-address") + val port = jobServerConfig.getInt("spark.jobserver.port") + startupString = s"job server on: $bindAddress[$port]" + } + // add default startup message for hive-thriftserver + if (startHiveServerDefault) { + addStartupMessage(s"Starting hive thrift server ($hiveSessionKind)") + } + if (!jobServerWait) { + // mark RUNNING (job server and zeppelin will continue to start in background) + markLauncherRunning(if (startupString ne null) s"Starting $startupString" else null) + } + + // Add a URL classloader to the main thread so that new URIs can be added + + // wait for a while until servers get registered + val endWait = System.currentTimeMillis() + 120000 + while (!SnappyContext.hasServerBlockIds && System.currentTimeMillis() <= endWait) { + Thread.sleep(100) + } + // initialize global state + password match { + case Some(p) => + // set the password back and remove after initialization + SparkCallbacks.setSparkConf(sc, passwordKey, p) + SnappyContext(sc) + SparkCallbacks.setSparkConf(sc, passwordKey, value = null) + + case _ => SnappyContext(sc) + } + + // start the service to gather table statistics + SnappyTableStatsProviderService.start(sc, url = null) + + if (startHiveServer) { + val hiveService = SnappyHiveThriftServer2.start(useHiveSession) + if (jobServerWait) SnappyHiveThriftServer2.getHostPort(hiveService) match { + case None => addStartupMessage(s"Started hive thrift server ($hiveSessionKind)") + case Some((host, port)) => + addStartupMessage(s"Started hive thrift server ($hiveSessionKind) on: $host[$port]") + } + } + + // update the Spark UI to add the dashboard and other SnappyData pages + ToolsCallbackInit.toolsCallback.updateUI(sc) + + // start other add-on services (job server) + startAddOnServices(conf, confFile, jobServerConfig) + + // finally start embedded zeppelin interpreter if configured and security is not enabled. + if (!authSpecified) { + checkAndStartZeppelinInterpreter(zeppelinEnabled, bootProperties) + } + + if (jobServerWait) { + // mark RUNNING after job server, hive server and zeppelin initialization if so configured + markLauncherRunning(if (startupString ne null) s"Started $startupString" else null) + } + } + + try { + internalStart(() => storeProperties) + Await.result(initServices, Duration.Inf) + // mark status as RUNNING at the end in any case + markRunning() + } catch { + case _: InterruptedException => + logInfo(s"Thread interrupted, aborting.") + case e: Throwable => + logWarning("Exception while starting lead node", e) + throw e + } + } + + @throws[SparkException] + private def internalStart(initStoreProps: () => Properties): Unit = synchronized { + if (status() != State.UNINITIALIZED && status() != State.STOPPED) { + // already started or in the process of starting + return + } + val storeProps = initStoreProps() + checkAuthProvider(storeProps) + + super.start(storeProps, ignoreIfStarted = false) + + resetLogger() + + val cache = Misc.getGemFireCache + cache.getDistributionManager.addMembershipListener(SnappyContext.membershipListener) + + status() match { + case State.RUNNING => + bootProperties.putAll(storeProps) + logInfo("ds connected. About to check for primary lead lock.") + // check for leader's primary election + + val dls = DLockService.create(LOCK_SERVICE_NAME, cache.getDistributedSystem, + true, true, true) + val primaryLeaderLock = new DistributedMemberLock(dls, + LOCK_SERVICE_NAME, DistributedMemberLock.NON_EXPIRING_LEASE, + DistributedMemberLock.LockReentryPolicy.PREVENT_SILENTLY) + + val startStatus = primaryLeaderLock.tryLock() + // noinspection SimplifyBooleanMatch + startStatus match { + case true => + logInfo("Primary lead lock acquired.") + // let go. + case false => + if (!_directApiInvoked) { + // cleanup before throwing exception + internalStop(bootProperties) + throw new SparkException("Primary Lead node (Spark Driver) is " + + "already running in the system. You may use smart connector " + + "mode to connect to SnappyData cluster.") + } + serverstatus = State.STANDBY + val callback = notifyStatusChange + if (callback != null) { + logInfo("Notifying standby status ...") + callback(serverstatus) + } + + logInfo("Primary Lead node (Spark Driver) is already running in the system." + + " Standing by as secondary.") + primaryLeaderLock.lockInterruptibly() + + // TODO: check cancelInProgress and other shutdown possibilities. + + logInfo("Resuming startup sequence from STANDBY ...") + serverstatus = State.STARTING + if (callback != null) { + callback(serverstatus) + } + } + case _ => + logWarning(LocalizedMessages.res.getTextMessage("SD_LEADER_NOT_READY", status())) + } + } + + override def serviceStatus(): State = { + // show as running only after everything has initialized + status() match { + case State.RUNNING if !servicesStarted => State.STARTING + case state => state + } + } + + private def markRunning(): Unit = { + if (GemFireXDUtils.TraceFabricServiceBoot) { + logInfo("Accepting RUNNING notification") + } + notifyRunningInLauncher(Status.RUNNING) + serverstatus = State.RUNNING + servicesStarted = true + } + + private def addStartupMessage(message: String): Unit = { + if ((message ne null) && !message.isEmpty) { + val launcher = CacheServerLauncher.getCurrentInstance + if (launcher ne null) { + val startupMessage = launcher.getServerStartupMessage + if (startupMessage eq null) { + launcher.setServerStartupMessage(message) + } else { + launcher.setServerStartupMessage(startupMessage + "\n " + message) + } + } + } + } + + private def markLauncherRunning(message: String): Unit = { + addStartupMessage(message) + notifyRunningInLauncher(Status.RUNNING) + } + + private def checkAuthProvider(props: Properties): Unit = { + doCheck(props.getProperty(Attribute.AUTH_PROVIDER)) + doCheck(props.getProperty(Attribute.SERVER_AUTH_PROVIDER)) + + def doCheck(authP: String): Unit = { + if (authP != null && !Constants.AUTHENTICATION_PROVIDER_LDAP.equalsIgnoreCase(authP) && + !"NONE".equalsIgnoreCase(authP)) { + throw new UnsupportedOperationException( + "LDAP is the only supported auth-provider currently.") + } + if (authP != null && !SnappySession.isEnterpriseEdition) { + throw new UnsupportedOperationException("Security feature is available in SnappyData " + + "Enterprise Edition.") + } + } + } + + @throws[SQLException] + override def stop(shutdownCredentials: Properties): Unit = { + /* (sample reservoir region is now persistent by default) + val servers = GemFireXDUtils.getGfxdAdvisor.adviseDataStores(null) + if (servers.size() > 0) { + SnappyContext.flushSampleTables() + } + */ + if (shutdownCredentials eq null) internalStop(null) + else internalStop(ServiceUtils.getStoreProperties(shutdownCredentials.asScala.toSeq)) + } + + private[snappydata] def internalStop(shutdownCredentials: Properties): Unit = { + if (!servicesStarted && bootProperties.isEmpty) return + + try { + Misc.getGemFireCache.getDistributionManager + .removeMembershipListener(SnappyContext.membershipListener) + } catch { + case _: CacheClosedException => + } + SnappyHiveThriftServer2.close() + val sc = SnappyContext.globalSparkContext + if (sc != null) sc.stop() + servicesStarted = false + // TODO: [soubhik] find a way to stop jobserver. + if (null != remoteInterpreterServerObj) { + val method: Method = remoteInterpreterServerClass.getMethod("isAlive") + val isAlive: java.lang.Boolean = method.invoke(remoteInterpreterServerObj) + .asInstanceOf[java.lang.Boolean] + val shutdown: Method = remoteInterpreterServerClass.getMethod("shutdown", + classOf[java.lang.Boolean]) + + if (isAlive) { + shutdown.invoke(remoteInterpreterServerObj, true.asInstanceOf[AnyRef]) + } + } + val sys = InternalDistributedSystem.getConnectedInstance + if (sys ne null) { + try { + super.stop(shutdownCredentials) + } catch { + case sqle: SQLException => + val sqlState = sqle.getSQLState + if (SQLState.CLOUDSCAPE_SYSTEM_SHUTDOWN.startsWith(sqlState) + || SQLState.SHUTDOWN_DATABASE.startsWith(sqlState) + || SQLState.GFXD_NODE_SHUTDOWN.startsWith(sqlState)) { + // ignore if already stopped + } else throw sqle + case _: CancelException => // ignore if already stopped + } + } + bootProperties.clear() + } + + private[snappydata] def initStartupArgs(conf: SparkConf, sc: SparkContext = null) = { + + def changeOrAppend(attr: String, value: String, + overwrite: Boolean = false, ignoreIfPresent: Boolean = false, + sparkPrefix: String = null): Unit = { + val attrKey = if (sparkPrefix == null) attr else sparkPrefix + attr + conf.getOption(attrKey) match { + case None => if (sparkPrefix == null) { + changeOrAppend(attr, value, overwrite, ignoreIfPresent, + sparkPrefix = SPARK_PREFIX) + } else conf.set(attr, value) + case _ if ignoreIfPresent => // skip setting property + case _ if overwrite => conf.set(attr, value) + case Some(v) => + // ignore if already set + val prefixedValue = "," + value + if (v != value && !value.contains(prefixedValue)) conf.set(attr, v + prefixedValue) + } + } + + changeOrAppend(STORE_PREFIX + Attribute.SERVER_GROUPS, LeadImpl.LEADER_SERVERGROUP) + + assert(Property.Locators.getOption(conf).orElse( + Property.McastPort.getOption(conf)).isDefined, + s"Either ${Property.Locators} or ${Property.McastPort} " + + s"must be defined for SnappyData cluster to start") + // skip overriding host-data if loner VM. + if (sc != null && Utils.isLoner(sc)) { + changeOrAppend(STORE_PREFIX + Attribute.GFXD_HOST_DATA, + "true", overwrite = true) + } else { + changeOrAppend(STORE_PREFIX + Attribute.GFXD_HOST_DATA, + "false", overwrite = true) + changeOrAppend(STORE_PREFIX + Attribute.GFXD_PERSIST_DD, + "false", overwrite = true) + } + changeOrAppend(Property.JobServerEnabled.name, "false", + ignoreIfPresent = true) + + conf + } + + protected[snappydata] def notifyOnStatusChange(f: FabricService.State => Unit): Unit = + this.notifyStatusChange = f + + @throws[Exception] + private def startAddOnServices(conf: SparkConf, + confFile: Array[String], jobServerConfig: Config): Unit = this.synchronized { + if (_directApiInvoked && !isTestSetup) { + assert(jobServerConfig ne null, + "JobServer must have been enabled with lead.start(..) invocation") + } + if (jobServerConfig ne null) { + logInfo("Starting job server...") + + configureAuthenticatorForSJS() + JobServer.start(confFile, _ => jobServerConfig, createActorSystem) + } + } + + def configureAuthenticatorForSJS(): Unit = { + if (Misc.isSecurityEnabled) { + logInfo("Configuring authenticator for Snappy Job users.") + SnappyAuthenticator.auth = new SnappyAuthenticator { + + override def authenticate(userPass: Option[UserPass]): Future[Option[AuthInfo]] = { + Future(checkCredentials(userPass)) + } + + def checkCredentials(userPass: Option[UserPass]): Option[AuthInfo] = { + userPass match { + case Some(u) => + try { + SecurityUtils.checkCredentials(u.user, u.pass) match { + case None => Option(new AuthInfo(User(u.user, u.pass))) + case _ => None + } + } catch { + case t: Throwable => logWarning(s"Failed to authenticate the snappy job. $t") + None + } + case None => None + } + } + } + } + } + + def getConfig(args: Array[String]): Config = { + + val notConfigurable = ConfigFactory.parseProperties(getDynamicOverrides). + withFallback(ConfigFactory.parseResources("jobserver-overrides.conf")) + + val bootConfig = notConfigurable.withFallback(ConfigFactory.parseProperties(bootProperties)) + + val snappyDefaults = bootConfig.withFallback( + ConfigFactory.parseResources("jobserver-defaults.conf")) + + val builtIn = ConfigFactory.load() + + val finalConf = snappyDefaults.withFallback(builtIn).resolve() + + logDebug("Passing JobServer with config " + finalConf.root.render()) + + finalConf + } + + def getDynamicOverrides: Properties = { + val dynamicOverrides = new Properties() + val replaceString = "" + + def replace(key: String, value: String, newValue: String) = { + assert(value.indexOf(replaceString) >= 0) + dynamicOverrides.setProperty(key, value.replace(replaceString, newValue)) + } + + val workingDir = System.getProperty( + com.pivotal.gemfirexd.internal.iapi.reference.Property.SYSTEM_HOME_PROPERTY, ".") + val defaultConf = ConfigFactory.parseResources("jobserver-defaults.conf") + + var key = "spark.jobserver.filedao.rootdir" + replace(key, defaultConf.getString(key), workingDir) + key = "spark.jobserver.datadao.rootdir" + replace(key, defaultConf.getString(key), workingDir) + + val overrideConf = ConfigFactory.parseResources("jobserver-overrides.conf") + key = "spark.jobserver.sqldao.rootdir" + replace(key, overrideConf.getString(key), workingDir) + + dynamicOverrides + } + + def createActorSystem(conf: Config): ActorSystem = { + ActorSystem("SnappyLeadJobServer", conf) + } + + @throws[SparkException] + override def startNetworkServer(bindAddress: String, + port: Int, + networkProperties: Properties): NetworkInterface = { + throw new SparkException("Network server cannot be started on lead node.") + } + + @throws[SparkException] + override def startThriftServer(bindAddress: String, + port: Int, + networkProperties: Properties): NetworkInterface = { + throw new SparkException("Thrift server cannot be started on lead node.") + } + + @throws[SparkException] + override def startDRDAServer(bindAddress: String, + port: Int, + networkProperties: Properties): NetworkInterface = { + throw new SparkException("DRDA server cannot be started on lead node.") + } + + override def stopAllNetworkServers(): Unit = { + // nothing to do as none of the net servers are allowed to start. + } + + /** + * This method is used to start the zeppelin interpreter thread. + * By default, zeppelin interpreter will be disabled. User can enable it by + * setting "zeppelin.interpreter.enable" to true in leads conf file. User can also specify + * the port on which interpreter should listen using property "zeppelin.interpreter.port" + */ + private def checkAndStartZeppelinInterpreter(enabled: Boolean, + bootProperties: Properties): Unit = { + // As discussed ZeppelinRemoteInterpreter Server will be disabled by default. + // [sumedh] Our startup times are already very high and we are looking to + // cut that down and not increase further with these external utilities. + if (enabled) { + val port = bootProperties.getProperty(Constant.ZEPPELIN_INTERPRETER_PORT, + "3768").toInt + try { + remoteInterpreterServerClass = Utils.classForName( + "org.apache.zeppelin.interpreter.SnappyInterpreterServer") + val constructor: Constructor[_] = remoteInterpreterServerClass + .getConstructor(classOf[Integer]) + remoteInterpreterServerObj = constructor.newInstance(port.asInstanceOf[AnyRef]) + + remoteInterpreterServerClass.getSuperclass.getSuperclass + .getDeclaredMethod("start").invoke(remoteInterpreterServerObj) + logInfo(s"Starting Zeppelin RemoteInterpreter at port " + port) + } catch { + case tTransportException: TTransportException => + logWarning("Error while starting zeppelin interpreter.Actual exception : " + + tTransportException.getMessage) + case t: Throwable => logWarning("Error starting zeppelin interpreter.Actual exception : " + + t.getMessage, t) + } + // Add memory listener for zeppelin will need it for zeppelin + // val listener = new LeadNodeMemoryListener(); + // Misc.getGemFireCache.getResourceManager. + // addResourceListener(InternalResourceManager.ResourceType.ALL, listener) + + } + } + + class NoExitSecurityManager extends SecurityManager { + override def checkExit(status: Int): Unit = { + throw new SecurityException("exit not allowed") + } + + override def checkPermission(perm: Permission): Unit = { + // Allow other activities by default + } + } + + def closeAndReopenInterpreterServer(): Unit = { + if (remoteInterpreterServerClass != null) { + val origSecurityManager = System.getSecurityManager + System.setSecurityManager(new NoExitSecurityManager) + try { + remoteInterpreterServerClass.getSuperclass. + getDeclaredMethod("shutdown").invoke(remoteInterpreterServerObj) + } finally { + System.setSecurityManager(origSecurityManager) + } + checkAndStartZeppelinInterpreter(enabled = true, bootProperties) + } + } + + def getInterpreterServerClass: Class[_] = { + remoteInterpreterServerClass + } +} + +class ExtendibleURLClassLoader(parent: ClassLoader) + extends URLClassLoader(Array.empty[URL], parent) { + override def addURL(url: URL) { + super.addURL(url) + } + + override def getURLs: Array[URL] = super.getURLs +} + +object LeadImpl { + + val SPARKUI_PORT: Int = 5050 + val LEADER_SERVERGROUP: String = ServerGroupUtils.LEADER_SERVERGROUP + + def invokeLeadStart(conf: SparkConf): Unit = { + val lead = ServiceManager.getLeadInstance.asInstanceOf[LeadImpl] + lead.internalStart(() => ServiceUtils.getStoreProperties(conf.getAll)) + } + + def invokeLeadStop(): Unit = { + val lead = ServiceManager.getLeadInstance.asInstanceOf[LeadImpl] + lead.internalStop(lead.bootProperties) + } +} diff --git a/cluster/src/main/scala/io/snappydata/impl/LocatorImpl.scala b/cluster/src/main/scala/io/snappydata/impl/LocatorImpl.scala new file mode 100644 index 0000000000..e65953ec76 --- /dev/null +++ b/cluster/src/main/scala/io/snappydata/impl/LocatorImpl.scala @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ +package io.snappydata.impl + +import java.sql.SQLException +import java.util.Properties + +import com.pivotal.gemfirexd.internal.engine.fabricservice.FabricLocatorImpl +import io.snappydata.util.ServiceUtils +import io.snappydata.{Locator, ProtocolOverrides} + +class LocatorImpl + extends FabricLocatorImpl with Locator with ProtocolOverrides { + + @throws[SQLException] + override def start(bindAddress: String, port: Int, + bootProperties: Properties): Unit = { + start(bindAddress, port, bootProperties, ignoreIfStarted = false) + } + + @throws[SQLException] + override def start(bindAddress: String, port: Int, + bootProperties: Properties, ignoreIfStarted: Boolean): Unit = synchronized { + super.start(bindAddress, port, + ServiceUtils.setCommonBootDefaults(bootProperties, forLocator = true), ignoreIfStarted) + } +} diff --git a/cluster/src/main/scala/io/snappydata/tools/GfxdLauncherOverrides.scala b/cluster/src/main/scala/io/snappydata/tools/GfxdLauncherOverrides.scala new file mode 100644 index 0000000000..be73ce2672 --- /dev/null +++ b/cluster/src/main/scala/io/snappydata/tools/GfxdLauncherOverrides.scala @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ +package io.snappydata.tools + +import com.pivotal.gemfirexd.FabricService +import com.pivotal.gemfirexd.internal.iapi.tools.i18n.LocalizedResource +import com.pivotal.gemfirexd.internal.shared.common.sanity.SanityManager +import com.pivotal.gemfirexd.tools.GfxdDistributionLocator +import com.pivotal.gemfirexd.tools.internal.GfxdServerLauncher +import io.snappydata.{LocalizedMessages, ServiceManager} + +/** + * Launcher extension for GFXD server launcher to use Snappy service manager. + * + * @author soubhik + */ +class ServerLauncher(baseName: String) extends GfxdServerLauncher(baseName) { + + @throws[Exception] + override protected def getFabricServiceInstance: FabricService = + ServiceManager.getServerInstance + + override protected def run(args: Array[String]): Unit = { + super.run(args) + } + + override protected def usage(): Unit = { + val script: String = LocalizedMessages.res.getTextMessage("SD_SERVER_SCRIPT") + val name: String = LocalizedMessages.res.getTextMessage("SD_SERVER_NAME") + val extraHelp = LocalizedResource.getMessage("FS_SNAPPY_EXTRA_HELP", + LocalizedMessages.res.getTextMessage("FS_PRODUCT")) + val usageOutput: String = LocalizedResource.getMessage("SERVER_HELP", + script, name, LocalizedResource.getMessage("FS_SNAPPY_ADDRESS_ARG"), extraHelp) + + printUsage(usageOutput, SanityManager.DEFAULT_MAX_OUT_LINES) + } +} + +object ServerLauncher { + + def main(args: Array[String]): Unit = { + val launcher = new ServerLauncher("SnappyData Server") + launcher.run(args) + } +} + +/** + * Launcher extension for GFXD locator launcher to use Snappy service manager. + * + * @author soubhik + */ +class LocatorLauncher(baseName: String) extends GfxdDistributionLocator(baseName) { + + @throws[Exception] + override protected def getFabricServiceInstance: FabricService = + ServiceManager.getLocatorInstance + + override protected def run(args: Array[String]): Unit = { + super.run(args) + } + + override protected def usage(): Unit = { + val script: String = LocalizedMessages.res.getTextMessage("SD_LOC_SCRIPT") + val name: String = LocalizedMessages.res.getTextMessage("SD_LOC_NAME") + printUsage(LocalizedResource.getMessage("SERVER_HELP", script, name, + LocalizedResource.getMessage("LOC_ADDRESS_ARG"), + LocalizedResource.getMessage("LOC_EXTRA_HELP")), + SanityManager.DEFAULT_MAX_OUT_LINES) + } +} + +object LocatorLauncher { + + def main(args: Array[String]): Unit = { + val launcher = new LocatorLauncher("SnappyData Locator") + launcher.run(args) + } +} diff --git a/cluster/src/main/scala/io/snappydata/tools/LeaderLauncher.scala b/cluster/src/main/scala/io/snappydata/tools/LeaderLauncher.scala new file mode 100644 index 0000000000..d6bf01398c --- /dev/null +++ b/cluster/src/main/scala/io/snappydata/tools/LeaderLauncher.scala @@ -0,0 +1,145 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ +package io.snappydata.tools + +import java.util.Properties + +import scala.collection.mutable.ArrayBuffer + +import com.gemstone.gemfire.cache.Cache +import com.gemstone.gemfire.internal.cache.Status +import com.pivotal.gemfirexd.FabricService +import com.pivotal.gemfirexd.FabricService.State +import com.pivotal.gemfirexd.internal.engine.Misc +import com.pivotal.gemfirexd.internal.iapi.tools.i18n.LocalizedResource +import com.pivotal.gemfirexd.internal.shared.common.sanity.SanityManager +import com.pivotal.gemfirexd.tools.internal.GfxdServerLauncher +import io.snappydata.impl.LeadImpl +import io.snappydata.{LocalizedMessages, ServiceManager} +import org.slf4j.LoggerFactory + +/** + * Extending server launcher to init Jobserver as part of lead + * node startup. This node won't start DRDA network server. + */ +class LeaderLauncher(baseName: String) extends GfxdServerLauncher(baseName) { + + private val genericLogger = LoggerFactory.getLogger(getClass) + + @throws[Exception] + override protected def getFabricServiceInstance: FabricService = ServiceManager.getLeadInstance + + def initStartupArgs(args: ArrayBuffer[String], + exitOnEmptyArgs: Boolean = true): Array[String] = { + + if (args.isEmpty) { + if (exitOnEmptyArgs) { + usage() + System.exit(1) + } else { + assert(assertion = false, LocalizedMessages.res.getTextMessage( + "SD_ZERO_ARGS")) + } + } + + def changeOrAppend(attr: String, value: String, overwrite: Boolean = false) = { + args.indexWhere(_.indexOf(attr) > 0) match { + case -1 => args += s"""-${attr}=${value}""" + case idx if overwrite => args(idx) = args(idx).takeWhile(_ != '=') + s"""=${value}""" + case idx => args(idx) = args(idx) ++ s""",${value}""" + } + } + + + args(0).equalsIgnoreCase("start") match { + case true => + changeOrAppend(GfxdServerLauncher.RUN_NETSERVER, "false", true) + case _ => + } + + args.toArray[String] + } + + override def hostData: Boolean = false + + override protected def usage(): Unit = { + val script = LocalizedMessages.res.getTextMessage("SD_LEAD_SCRIPT") + val name = LocalizedMessages.res.getTextMessage("SD_LEAD_NAME") + val extraHelp = LocalizedResource.getMessage("FS_EXTRA_HELP", LocalizedMessages. + res.getTextMessage("FS_PRODUCT")) + val usageOutput = LocalizedResource.getMessage("SERVER_HELP", + script, name, LocalizedResource.getMessage("FS_ADDRESS_ARG"), extraHelp) + printUsage(usageOutput, SanityManager.DEFAULT_MAX_OUT_LINES) + } + + override protected def run(args: Array[String]): Unit = { + super.run(args) + } + + @throws[Exception] + override protected def startServerVM(props: Properties) : Unit = { + val leadImpl = getFabricServiceInstance.asInstanceOf[LeadImpl] + leadImpl.notifyOnStatusChange(writeStatusOnChange) + leadImpl.start(props) + this.bootProps = props + } + + @throws[Exception] + override protected def startAdditionalServices(cache: Cache, + options: java.util.Map[String, Object], props: Properties): Unit = { + // don't call super.startAdditionalServices. + // We don't want to init net-server in leader. + + // disabling net server startup etc. + + } + + override protected def checkStatusForWait(status: Status): Boolean = { + status.state == Status.STARTING || status.state == Status.WAITING + } + + def writeStatusOnChange(newState: State): Unit = { + + newState match { + case State.STANDBY => + setStatusField(createStatus(Status.STANDBY, getProcessId)) + writeStatus(status) + genericLogger.info("lead node standby status written.") + + case State.STARTING => + setStatusField(createStatus(Status.STARTING, getProcessId)) + writeStatus(status) + genericLogger.info("Lead Node starting status written.") + + case State.RUNNING => + running(Misc.getDistributedSystem, Status.RUNNING) + genericLogger.info("Lead Node running status written.") + case _ => + } + } + + override protected def getBaseName(name: String) = "snappyleader" + +} // end of class + +object LeaderLauncher { + + def main(args: Array[String]): Unit = { + val launcher = new LeaderLauncher("SnappyData Leader") + launcher.run(args) + } +} diff --git a/cluster/src/main/scala/io/snappydata/tools/SnappyUtilLauncher.scala b/cluster/src/main/scala/io/snappydata/tools/SnappyUtilLauncher.scala new file mode 100644 index 0000000000..3761fb8a6a --- /dev/null +++ b/cluster/src/main/scala/io/snappydata/tools/SnappyUtilLauncher.scala @@ -0,0 +1,162 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ +package io.snappydata.tools + +import java.io.{File, IOException} +import java.util + +import com.gemstone.gemfire.internal.GemFireUtilLauncher.{CommandEntry, SCRIPT_NAME} +import com.gemstone.gemfire.internal.shared.ClientSharedUtils +import com.gemstone.gemfire.internal.{GemFireTerminateError, GemFireUtilLauncher} +import com.pivotal.gemfirexd.internal.iapi.tools.i18n.LocalizedResource +import com.pivotal.gemfirexd.internal.impl.tools.ij.utilMain +import com.pivotal.gemfirexd.internal.tools.ij +import com.pivotal.gemfirexd.tools.GfxdUtilLauncher.GET_CANONICAL_PATH_ARG +import com.pivotal.gemfirexd.tools.internal.{JarTools, MiscTools} +import com.pivotal.gemfirexd.tools.{GfxdSystemAdmin, GfxdUtilLauncher} +import io.snappydata.LocalizedMessages +import io.snappydata.gemxd.{SnappyDataVersion, SnappySystemAdmin} +import org.apache.spark.sql.execution.columnar.impl.StoreCallback + +/** + * Launcher class encompassing snappy processes command lines. + */ +class SnappyUtilLauncher extends GfxdUtilLauncher { + + SnappyUtilLauncher.init() + + GfxdUtilLauncher.snappyStore = true + ClientSharedUtils.setThriftDefault(true) + + SnappyDataVersion.loadProperties() + + // gfxd commands not applicable in snappy + protected var snappy_removed_commands: Set[String] = Set[String]( + "agent", "encrypt-password", "upgrade-disk-store", "export-disk-store") + + protected override def getTypes: java.util.Map[String, CommandEntry] = { + val types: java.util.Map[String, CommandEntry] = new util.LinkedHashMap[String, CommandEntry]() + + types.put("server", new CommandEntry(classOf[ServerLauncher], + LocalizedMessages.res.getTextMessage("UTIL_Server_Usage"), false)) + types.put("locator", new CommandEntry(classOf[LocatorLauncher], + LocalizedMessages.res.getTextMessage("UTIL_Locator_Usage"), false)) + types.put("leader", new CommandEntry(classOf[LeaderLauncher], + LocalizedMessages.res.getTextMessage("UTIL_Lead_Usage"), false)) + + types.put(SCRIPT_NAME, new CommandEntry(classOf[ij], + LocalizedMessages.res.getTextMessage("UTIL_SnappyShell_Usage"), false)) + + val commands = GfxdSystemAdmin.getValidCommands + for (cmd <- commands) { + if (!"help".equals(cmd) && !cmd.contains("locator") && + !snappy_removed_commands.contains(cmd)) { + types.put(cmd, new GemFireUtilLauncher.CommandEntry(classOf[SnappySystemAdmin], + LocalizedResource.getMessage("UTIL_" + cmd.replace('-', '_') + "_ShortDesc"), true)) + } + } + + // MiscTools utilities + val miscToolsIterator = MiscTools.getValidCommands.entrySet.iterator() + while (miscToolsIterator.hasNext) { + val entry = miscToolsIterator.next() + types.put(entry.getKey, new CommandEntry(classOf[MiscTools], + LocalizedMessages.res.getTextMessage(entry.getValue), true)) + } + + // JarTools utilities + val jarToolsIterator = JarTools.getValidCommands.entrySet.iterator() + while (jarToolsIterator.hasNext) { + val entry = jarToolsIterator.next() + types.put(entry.getKey, new CommandEntry(classOf[JarTools], + LocalizedMessages.res.getTextMessage(entry.getValue), true)) + } + + types + } + + override def invoke(args: Array[String]): Unit = { + super.invoke(args) + } + + override def validateArgs(args: Array[String]): Unit = { + super.validateArgs(args) + } + + override def scriptName(): String = { + SCRIPT_NAME + } +} + + +object SnappyUtilLauncher extends StoreCallback { + + init() + + private def init(): Unit = { + SCRIPT_NAME = System.getenv("SNAPPY_SCRIPT_NAME") match { + case s if (s eq null) || s.length == 0 => "snappy" + case s => s + } + } + + /** + * @see GemFireUtilLauncher#main(String[]) + */ + def main(args: Array[String]): Unit = { + + utilMain.setBasePrompt(SCRIPT_NAME) + + val launcher = new SnappyUtilLauncher() + + try { + // no args will default to using ij + if (args.length == 0) { + launcher.invoke(Array(SCRIPT_NAME)) + } + // short-circuit for the internal "--get-canonical-path" argument used by + // script to resolve the full path including symlinks (#43722) + else if (args.length == 2 && GET_CANONICAL_PATH_ARG.equals(args(0))) { + // scalastyle:off println + try { + System.out.println(new File(args(1)).getCanonicalPath) + } catch { + case _: IOException => + // in case of any exception print the given path itself + System.out.println(args(1)) + } + // scalastyle:on println + } else { + launcher.validateArgs(args) + launcher.invoke(args) + } + } catch { + case term: GemFireTerminateError => System.exit(term.getExitCode) + case re: RuntimeException => + // look for a GemFireTerminateError inside + var cause = re.getCause + while (cause != null) { + cause match { + case err: GemFireTerminateError => System.exit(err.getExitCode) + case _ => + } + cause = cause.getCause + } + throw re; + } + } +} diff --git a/cluster/src/main/scala/org/apache/spark/SparkCallbacks.scala b/cluster/src/main/scala/org/apache/spark/SparkCallbacks.scala new file mode 100644 index 0000000000..903e71c31e --- /dev/null +++ b/cluster/src/main/scala/org/apache/spark/SparkCallbacks.scala @@ -0,0 +1,104 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ +package org.apache.spark + +import org.apache.spark +import org.apache.spark.deploy.SparkHadoopUtil +import org.apache.spark.memory.StoreUnifiedManager +import org.apache.spark.rpc.RpcEnv +import org.apache.spark.scheduler.cluster.CoarseGrainedClusterMessages.{RetrieveSparkAppConfig, SparkAppConfig} +import org.apache.spark.ui.{JettyUtils, SnappyBasicAuthenticator} +import org.eclipse.jetty.security.authentication.BasicAuthenticator + +/** + * Calls that are needed to be sent to snappy-cluster classes because + * the variables are private[spark] + */ +object SparkCallbacks { + + def createExecutorEnv( + driverConf: SparkConf, + executorId: String, + hostname: String, + port: Int, + numCores: Int, + ioEncryptionKey: Option[Array[Byte]], + isLocal: Boolean): SparkEnv = { + + val env = SparkEnv.createExecutorEnv(driverConf, executorId, hostname, + port, numCores, ioEncryptionKey, isLocal) + env.memoryManager.asInstanceOf[StoreUnifiedManager].init() + env + } + + def getRpcEnv(sparkEnv: SparkEnv): RpcEnv = { + sparkEnv.rpcEnv + } + + def stopExecutor(env: SparkEnv): Unit = { + if (env != null) { + SparkHadoopUtil.get.runAsSparkUser { () => + // Copy the memory state to boot memory manager + SparkEnv.get.memoryManager.asInstanceOf[StoreUnifiedManager].close + env.stop() + SparkEnv.set(null) + SparkHadoopUtil.get.stopCredentialUpdater() + } + } + } + + def fetchDriverProperty(appId: String, host: String, executorConf: SparkConf, + port: Int, url: String): (Option[Array[Byte]], Seq[(String, String)]) = { + val fetcher = RpcEnv.create( + "driverPropsFetcher", + host, + port, + executorConf, + new spark.SecurityManager(executorConf), clientMode = true) + val driver = fetcher.setupEndpointRefByURI(url) + val cfg = driver.askWithRetry[SparkAppConfig](RetrieveSparkAppConfig) + val ioEncryptionKey: Option[Array[Byte]] = cfg.ioEncryptionKey + val props = cfg.sparkProperties ++ + Seq[(String, String)](("spark.app.id", appId)) + fetcher.shutdown() + (ioEncryptionKey, props) + } + + def isExecutorStartupConf(key: String): Boolean = { + SparkConf.isExecutorStartupConf(key) + } + + def isDriver: Boolean = { + SparkEnv.get != null && + SparkEnv.get.executorId == SparkContext.DRIVER_IDENTIFIER + } + + def setAuthenticatorForJettyServer(): Unit = { + if (JettyUtils.customAuthenticator.isEmpty) { + // create and set SnappyBasicAuthenticator + JettyUtils.customAuthenticator = Some(new SnappyBasicAuthenticator) + } + } + + def getAuthenticatorForJettyServer(): Option[BasicAuthenticator] = { + JettyUtils.customAuthenticator + } + + def setSparkConf(sc: SparkContext, key: String, value: String): Unit = { + if (value ne null) sc.conf.set(key, value) else sc.conf.remove(key) + } +} diff --git a/cluster/src/main/scala/org/apache/spark/deploy/PackageAndDepUtils.scala b/cluster/src/main/scala/org/apache/spark/deploy/PackageAndDepUtils.scala new file mode 100644 index 0000000000..c92986e1a7 --- /dev/null +++ b/cluster/src/main/scala/org/apache/spark/deploy/PackageAndDepUtils.scala @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ + +package org.apache.spark.deploy + +object GetJarsAndDependencies { + + val usage = s"Usage: GetJarsAndDependencies" + + s" [--repos repositories] [--jarcache path] coordinates" + + def main(args: Array[String]) { + if (args.length == 0) println(usage) + val arglist = args.toList + type OptionMap = Map[Symbol, String] + + def nextOption(map: OptionMap, list: List[String]): OptionMap = { + def isSwitch(s: String) = (s(0) == '-') + + list match { + case Nil => map + case "--jarcache" :: value :: tail => + nextOption(map ++ Map('jarcache -> value), tail) + case "--repos" :: value :: tail => + nextOption(map ++ Map('repos -> value), tail) + case string :: opt2 :: tail if isSwitch(opt2) => + nextOption(map ++ Map('coordinates -> string), list.tail) + case string :: Nil => nextOption(map ++ Map('coordinates -> string), list.tail) + case option :: tail => println("Unknown option " + option) + Map.empty + } + } + + val options = nextOption(Map(), arglist) + + val coordinates = options.getOrElse('coordinates, throw new IllegalArgumentException) + val remoteRepos = options.get('repos) + val ivyPath = options.get('jarcache) + println(PackageAndDepUtils.resolveMavenCoordinates(coordinates, remoteRepos, ivyPath)) + } +} + +object PackageAndDepUtils { + def resolveMavenCoordinates(coordinates: String, remoteRepos: Option[String], + ivyPath: Option[String], exclusions: Seq[String] = Nil, isTest: Boolean = false): String = { + SparkSubmitUtils.resolveMavenCoordinates(coordinates, remoteRepos, ivyPath, exclusions, isTest) + } +} diff --git a/cluster/src/main/scala/org/apache/spark/executor/SnappyCoarseGrainedExecutorBackend.scala b/cluster/src/main/scala/org/apache/spark/executor/SnappyCoarseGrainedExecutorBackend.scala new file mode 100644 index 0000000000..df6de7c1d7 --- /dev/null +++ b/cluster/src/main/scala/org/apache/spark/executor/SnappyCoarseGrainedExecutorBackend.scala @@ -0,0 +1,113 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ +package org.apache.spark.executor + +import java.net.URL +import java.nio.ByteBuffer + +import com.gemstone.gemfire.CancelException +import com.pivotal.gemfirexd.internal.engine.Misc +import io.snappydata.cluster.ExecutorInitiator + +import org.apache.spark.deploy.SparkHadoopUtil +import org.apache.spark.rpc.RpcEnv +import org.apache.spark.sql.SnappyContext +import org.apache.spark.{SparkEnv, TaskState} + +class SnappyCoarseGrainedExecutorBackend( + override val rpcEnv: RpcEnv, + driverUrl: String, + executorId: String, + hostName: String, + cores: Int, + userClassPath: Seq[URL], + env: SparkEnv) + extends CoarseGrainedExecutorBackend(rpcEnv, driverUrl, + executorId, hostName, cores, userClassPath, env) { + + override def onStop() { + SnappyContext.clearStaticArtifacts() + exitWithoutRestart() + } + + override def onStart(): Unit = { + super.onStart() + } + + override protected def registerExecutor: Executor = + new SnappyExecutor(executorId, hostName, env, + userClassPath, new SnappyUncaughtExceptionHandler(this), + isLocal = false) + + /** + * Avoid sending any message for TaskState.RUNNING which serves no purpose. + */ + override def statusUpdate(taskId: Long, state: TaskState.TaskState, + data: ByteBuffer): Unit = { + if ((state ne TaskState.RUNNING) || data.hasRemaining) { + super.statusUpdate(taskId, state, data) + } + } + + /** + * Snappy addition (Replace System.exit with exitExecutor). We could have + * added functions calling System.exit to SnappyCoarseGrainedExecutorBackend + * but those functions will have to be brought in sync with CoarseGrainedExecutorBackend + * after every merge. + */ + override def exitExecutor(code: Int, + reason: String, throwable: Throwable, + notifyDriver: Boolean = true): Unit = { + exitWithoutRestart() + // See if the VM is going down + try { + Misc.checkIfCacheClosing(null) + } catch { + case _: CancelException => return + } + // Executor may fail to connect to the driver because of + // https://issues.apache.org/jira/browse/SPARK-9820 and + // https://issues.apache.org/jira/browse/SPARK-8592. To overcome such + // issues, try restarting the executor + val reasonStr = s"Restarting Executor that failed to start. Reason: $reason." + if (throwable != null) { + logError(reasonStr, throwable) + } else { + logError(reasonStr, throwable) + } + ExecutorInitiator.restartExecutor() + + } + + def exitWithoutRestart(): Unit = { + if (executor != null) { + // kill all the running tasks + // When tasks are killed, the task threads cannot be interrupted + // as snappy may be writing to an oplog and it generates a + // DiskAccessException. This DAE ends up closing the underlying regions. + executor.killAllTasks(interruptThread = false) + executor.stop() + } + // stop the actor system + stop() + if (rpcEnv != null) { + rpcEnv.shutdown() + } + + SparkHadoopUtil.get.stopCredentialUpdater() + } +} diff --git a/cluster/src/main/scala/org/apache/spark/executor/SnappyExecutor.scala b/cluster/src/main/scala/org/apache/spark/executor/SnappyExecutor.scala new file mode 100644 index 0000000000..4acfc8d937 --- /dev/null +++ b/cluster/src/main/scala/org/apache/spark/executor/SnappyExecutor.scala @@ -0,0 +1,263 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ +package org.apache.spark.executor + +import java.io.{File, IOException} +import java.net.{URI, URL} +import java.util.concurrent.ThreadFactory +import java.util.concurrent.atomic.AtomicInteger + +import scala.collection.mutable +import com.gemstone.gemfire.internal.tcp.ConnectionTable +import com.gemstone.gemfire.{CancelException, SystemFailure} +import com.google.common.cache.{CacheBuilder, CacheLoader} +import com.pivotal.gemfirexd.internal.engine.Misc +import com.pivotal.gemfirexd.internal.engine.distributed.utils.GemFireXDUtils +import org.apache.spark.deploy.SparkHadoopUtil +import org.apache.spark.serializer.KryoSerializerPool +import org.apache.spark.sql.internal.ContextJarUtils +import org.apache.spark.util.{MutableURLClassLoader, ShutdownHookManager, SparkExitCode, Utils} +import org.apache.spark.{Logging, SparkEnv, SparkFiles} + +class SnappyExecutor( + executorId: String, + executorHostname: String, + env: SparkEnv, + userClassPath: Seq[URL] = Nil, + exceptionHandler: SnappyUncaughtExceptionHandler, + isLocal: Boolean = false) + extends Executor(executorId, executorHostname, env, userClassPath, isLocal) { + + { + // set a thread-factory for the thread pool for cleanup + val threadGroup = Thread.currentThread().getThreadGroup + val threadFactory = new ThreadFactory { + + private val threadNum = new AtomicInteger(0) + + override def newThread(command: Runnable): Thread = { + val r = new Runnable { + override def run(): Unit = { + try { + command.run() + } finally { + ConnectionTable.releaseThreadsSockets() + } + } + } + val thread = new Thread(threadGroup, r, + "Executor task launch worker-" + threadNum.getAndIncrement()) + thread.setDaemon(true) + thread + } + } + threadPool.setThreadFactory(threadFactory) + } + + if (!isLocal) { + // Setup an uncaught exception handler for non-local mode. + // Make any thread terminations due to uncaught exceptions + // kill the executor component + Thread.setDefaultUncaughtExceptionHandler(exceptionHandler) + } + + private val classLoaderCache = { + val loader = new CacheLoader[ClassLoaderKey, SnappyMutableURLClassLoader]() { + override def load(key: ClassLoaderKey): SnappyMutableURLClassLoader = { + val appName = key.appName + val appNameAndJars = key.appNameAndJars + lazy val hadoopConf = SparkHadoopUtil.get.newConfiguration(conf) + val appDependencies = appNameAndJars.drop(2).toSeq + var urls = Seq.empty[URL] + // Prepare urls only if this is not in dropped functions list + if (!ContextJarUtils.checkItemExists(ContextJarUtils.droppedFunctionsKey, appName)) { + logInfo(s"Creating ClassLoader for $appName" + + s" with dependencies $appDependencies") + urls = appDependencies.map(name => { + val localName = name.split("/").last + logInfo(s"Fetching file $name for App[$appName]") + Utils.fetchFile(name, new File(SparkFiles.getRootDirectory()), conf, + env.securityManager, hadoopConf, -1L, useCache = !isLocal) + val url = new File(SparkFiles.getRootDirectory(), localName).toURI.toURL + Misc.getMemStore.getGlobalCmdRgn.put(ContextJarUtils.functionKeyPrefix + appName, name) + url + }) + } + val newClassLoader = new SnappyMutableURLClassLoader(urls.toArray, replClassLoader) + KryoSerializerPool.clear() + newClassLoader + } + } + // Keeping 500 as cache size. Can revisit the number + CacheBuilder.newBuilder().maximumSize(500).build(loader) + } + + class ClassLoaderKey(val appName: String, + val appTime: String, + val appNameAndJars: Array[String]) { + + override def hashCode(): Int = (appName, appTime).hashCode() + + override def equals(obj: Any): Boolean = { + obj match { + case x: ClassLoaderKey => + (x.appName, x.appTime).equals(appName, appTime) + case _ => false + } + } + } + + override def updateDependencies(newFiles: mutable.HashMap[String, Long], + newJars: mutable.HashMap[String, Long]): Unit = { + super.updateDependencies(newFiles, newJars) + synchronized { + val taskDeserializationProps = Executor.taskDeserializationProps.get() + if (null != taskDeserializationProps) { + val appDetails = taskDeserializationProps.getProperty(io.snappydata.Constant + .CHANGEABLE_JAR_NAME, "") + logDebug(s"Submitted Application Details $appDetails") + if (!appDetails.isEmpty) { + val appNameAndJars = appDetails.split(",") + val threadClassLoader = + classLoaderCache.getUnchecked(new ClassLoaderKey(appNameAndJars(0), + appNameAndJars(1), appNameAndJars)) + logDebug(s"Setting thread classloader $threadClassLoader") + Thread.currentThread().setContextClassLoader(threadClassLoader) + } + } + } + } + + override def isStoreCloseException(t: Throwable): Boolean = { + try { + Misc.checkIfCacheClosing(t) + false + } catch { + case _: CancelException => true + case _: Throwable => false + } + } + + override def isStoreException(t: Throwable): Boolean = { + GemFireXDUtils.retryToBeDone(t) + } + + override def isFatalError(t: Throwable): Boolean = { + t match { + case err: Error => SystemFailure.isJVMFailureError(err) + case _ => false + } + } + + def updateMainLoader(jars: Array[String]): Unit = { + synchronized { + lazy val hadoopConf = SparkHadoopUtil.get.newConfiguration(conf) + jars.foreach(name => { + val localName = name.split("/").last + Utils.fetchFile(name, new File(SparkFiles.getRootDirectory()), conf, + env.securityManager, hadoopConf, -1L, true) + val url = new File(SparkFiles.getRootDirectory(), localName).toURI.toURL + urlClassLoader.addURL(url) + }) + } + } + + def removeJarsFromExecutorLoader(jars: Array[String]): Unit = { + synchronized { + var updatedURLs = urlClassLoader.getURLs().toBuffer + jars.foreach(name => { + val localName = name.split("/").last + var jarFile = new File(SparkFiles.getRootDirectory(), localName) + if (jarFile.exists()) { + jarFile.delete() + logDebug(s"Deleted jarFile $jarFile") + } + updatedURLs.foreach(url => { + if (url != null && url.toString.contains(jarFile.toString)) { + updatedURLs.remove(updatedURLs.indexOf(url)) + } + }) + }) + urlClassLoader = new SnappyMutableURLClassLoader(updatedURLs.toArray, + urlClassLoader.getParent) + replClassLoader = addReplClassLoaderIfNeeded(urlClassLoader) + } + } + + def getLocalDir(): String = { + Utils.getLocalDir(conf) + } +} + +class SnappyMutableURLClassLoader(urls: Array[URL], + parent: ClassLoader) + extends MutableURLClassLoader(urls, parent) with Logging { + + + override def loadClass(name: String, resolve: Boolean): Class[_] = { + loadJar(() => super.loadClass(name, resolve)). + getOrElse(loadJar(() => Misc.getMemStore.getDatabase.getClassFactory.loadClassFromDB(name), + throwException = true).get) + } + + def loadJar(f: () => Class[_], throwException: Boolean = false): Option[Class[_]] = { + try { + Option(f()) + } catch { + case cnfe: ClassNotFoundException => if (throwException) throw cnfe + else None + } + } +} + +/** + * The default uncaught exception handler for Executors + */ +private class SnappyUncaughtExceptionHandler( + val executorBackend: SnappyCoarseGrainedExecutorBackend) + extends Thread.UncaughtExceptionHandler with Logging { + + override def uncaughtException(thread: Thread, exception: Throwable) { + try { + // Make it explicit that uncaught exceptions are thrown when container is shutting down. + // It will help users when they analyze the executor logs + val inShutdownMsg = if (ShutdownHookManager.inShutdown()) "[Container in shutdown] " else "" + val errMsg = "Uncaught exception in thread " + logError(inShutdownMsg + errMsg + thread, exception) + + // We may have been called from a shutdown hook, there is no need to do anything + if (!ShutdownHookManager.inShutdown()) { + if (exception.isInstanceOf[OutOfMemoryError]) { + executorBackend.exitExecutor(SparkExitCode.OOM, "Out of Memory", exception) + } else { + executorBackend.exitExecutor( + SparkExitCode.UNCAUGHT_EXCEPTION, errMsg, exception) + } + } + } catch { + case t: Throwable => try { + if (t.isInstanceOf[OutOfMemoryError]) System.exit(SparkExitCode.OOM) + else System.exit(SparkExitCode.UNCAUGHT_EXCEPTION) + } catch { + // Exception while handling an uncaught exception. we cannot do much here + case _: OutOfMemoryError => Runtime.getRuntime.halt(SparkExitCode.OOM) + case _: Throwable => Runtime.getRuntime.halt(SparkExitCode.UNCAUGHT_EXCEPTION_TWICE) + } + } + } +} + diff --git a/cluster/src/main/scala/org/apache/spark/memory/MemoryManagerStatsWrapper.scala b/cluster/src/main/scala/org/apache/spark/memory/MemoryManagerStatsWrapper.scala new file mode 100644 index 0000000000..5b90e8e0e6 --- /dev/null +++ b/cluster/src/main/scala/org/apache/spark/memory/MemoryManagerStatsWrapper.scala @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ +package org.apache.spark.memory + +import com.gemstone.gemfire.internal.snappy.memory.{MemoryManagerStats, MemoryManagerStatsOps} + +/** + * A wrapper class for Split mode. + * In case of split mode UMM stats update will be a no-op + */ +class MemoryManagerStatsWrapper extends MemoryManagerStatsOps { + + private[memory] var stats: MemoryManagerStats = _ + + def setMemoryManagerStats(mStats: MemoryManagerStats): Unit = { + stats = mStats + } + + override def incStoragePoolSize(offHeap: Boolean, delta: Long): Unit = + if (stats ne null) stats.incStoragePoolSize(offHeap, delta) + + override def getStoragePoolSize(offHeap: Boolean): Long = + if (stats ne null) stats.getStoragePoolSize(offHeap) else 0L + + override def decStoragePoolSize(offHeap: Boolean, delta: Long): Unit = + if (stats ne null) stats.decStoragePoolSize(offHeap, delta) + + override def incExecutionPoolSize(offHeap: Boolean, delta: Long): Unit = + if (stats ne null) stats.incExecutionPoolSize(offHeap, delta) + + override def decExecutionPoolSize(offHeap: Boolean, delta: Long): Unit = + if (stats ne null) stats.decExecutionPoolSize(offHeap, delta) + + override def incStorageMemoryUsed(offHeap: Boolean, delta: Long): Unit = + if (stats ne null) stats.incStorageMemoryUsed(offHeap, delta) + + override def decStorageMemoryUsed(offHeap: Boolean, delta: Long): Unit = + if (stats ne null) stats.decStorageMemoryUsed(offHeap, delta) + + override def incExecutionMemoryUsed(offHeap: Boolean, delta: Long): Unit = + if (stats ne null) stats.incExecutionMemoryUsed(offHeap, delta) + + override def decExecutionMemoryUsed(offHeap: Boolean, delta: Long): Unit = + if (stats ne null) stats.decExecutionMemoryUsed(offHeap, delta) + + override def incNumFailedStorageRequest(offHeap: Boolean): Unit = + if (stats ne null) stats.incNumFailedStorageRequest(offHeap) + + override def incNumFailedExecutionRequest(offHeap: Boolean): Unit = + if (stats ne null) stats.incNumFailedExecutionRequest(offHeap) + + override def incNumFailedEvictionRequest(offHeap: Boolean): Unit = + if (stats ne null) stats.incNumFailedEvictionRequest(offHeap) + + override def incMaxStorageSize(offHeap: Boolean, delta: Long): Unit = + if (stats ne null) stats.incMaxStorageSize(offHeap, delta) + + override def getMaxStorageSize(offHeap: Boolean): Long = + if (stats ne null) stats.getMaxStorageSize(offHeap) else 0L + + override def getExecutionPoolSize(offHeap: Boolean): Long = + if (stats ne null) stats.getExecutionPoolSize(offHeap) else 0L + + override def getStorageMemoryUsed(offHeap: Boolean): Long = + if (stats ne null) stats.getStorageMemoryUsed(offHeap) else 0L + + override def getExecutionMemoryUsed(offHeap: Boolean): Long = + if (stats ne null) stats.getExecutionMemoryUsed(offHeap) else 0L + + override def getNumFailedStorageRequest(offHeap: Boolean): Int = + if (stats ne null) stats.getNumFailedStorageRequest(offHeap) else 0 + + override def getNumFailedExecutionRequest(offHeap: Boolean): Int = + if (stats ne null) stats.getNumFailedExecutionRequest(offHeap) else 0 + + override def getNumFailedEvictionRequest(offHeap: Boolean): Int = + if (stats ne null) stats.getNumFailedEvictionRequest(offHeap) else 0 +} diff --git a/cluster/src/main/scala/org/apache/spark/memory/SnappyMemoryUtils.scala b/cluster/src/main/scala/org/apache/spark/memory/SnappyMemoryUtils.scala new file mode 100644 index 0000000000..b04eba6fb0 --- /dev/null +++ b/cluster/src/main/scala/org/apache/spark/memory/SnappyMemoryUtils.scala @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ +package org.apache.spark.memory + +import com.pivotal.gemfirexd.internal.engine.store.GemFireStore + +object SnappyMemoryUtils { + + /** + * Checks whether GemFire critical threshold is breached + * + * @return + */ + def isCriticalUp(): Boolean = { + GemFireStore.getBootingInstance match { + case null => false + case store => store.thresholdListener().isCritical + } + } + + + /** + * Checks whether GemFire eviction threshold is breached + * + * @return + */ + def isEvictionUp: Boolean = { + GemFireStore.getBootingInstance match { + case null => false + case store => store.thresholdListener().isEviction + } + } +} diff --git a/cluster/src/main/scala/org/apache/spark/memory/SnappyStorageEvictor.scala b/cluster/src/main/scala/org/apache/spark/memory/SnappyStorageEvictor.scala new file mode 100644 index 0000000000..fb77cde5d5 --- /dev/null +++ b/cluster/src/main/scala/org/apache/spark/memory/SnappyStorageEvictor.scala @@ -0,0 +1,152 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ + +package org.apache.spark.memory + + +import scala.collection.JavaConverters._ +import scala.collection.mutable.ArrayBuffer +import scala.util.Random + +import com.gemstone.gemfire.cache.RegionDestroyedException +import com.gemstone.gemfire.internal.cache._ +import com.gemstone.gemfire.internal.cache.control.InternalResourceManager +import com.gemstone.gemfire.internal.cache.control.InternalResourceManager.ResourceType +import com.gemstone.gemfire.internal.i18n.LocalizedStrings +import com.pivotal.gemfirexd.internal.engine.Misc + +import org.apache.spark.Logging +import org.apache.spark.sql.execution.columnar.impl.ColumnFormatRelation + + +class SnappyStorageEvictor extends Logging { + + private def getAllRegionList(offHeap: Boolean, + hasOffHeap: Boolean): ArrayBuffer[LocalRegion] = { + val cache = GemFireCacheImpl.getExisting + val allRegionList = new ArrayBuffer[LocalRegion]() + val irm: InternalResourceManager = cache.getResourceManager + for (listener <- irm.getResourceListeners( + SnappyStorageEvictor.resourceType).asScala) listener match { + case pr: PartitionedRegion => + if (includePartitionedRegion(pr, offHeap, hasOffHeap)) { + allRegionList ++= pr.getDataStore.getAllLocalBucketRegions.asScala + } + // no off-heap local regions yet in SnappyData + case lr: LocalRegion => + if (!offHeap && includeLocalRegion(lr)) { + allRegionList += lr + } + case _ => + } + if (SnappyStorageEvictor.MINIMUM_ENTRIES_PER_BUCKET > 0) { + for (i <- (allRegionList.length - 1) to 0 by -1) allRegionList(i) match { + case br: BucketRegion if br.getNumEntriesInVM <= SnappyStorageEvictor + .MINIMUM_ENTRIES_PER_BUCKET => allRegionList.remove(i) + case _ => + } + } + allRegionList + } + + @throws(classOf[Exception]) + def evictRegionData(bytesRequired: Long, offHeap: Boolean): Long = { + val cache = GemFireCacheImpl.getInstance() + if (cache eq null) return 0L + + // check if offHeap has been configured + val hasOffHeap = cache.getMemorySize > 0 + // nothing to be done for off-heap when no storage off-heap is present + if (!hasOffHeap && offHeap) return 0L + + val stats = cache.getCachePerfStats + stats.incEvictorJobsStarted() + var totalBytesEvicted: Long = 0 + val regionSet = Random.shuffle(getAllRegionList(offHeap, hasOffHeap)) + val start = CachePerfStats.getStatTime + try { + while (regionSet.nonEmpty) { + for (i <- (regionSet.length - 1) to 0 by -1) { + val region = regionSet(i) + try { + val bytesEvicted = region.entries.asInstanceOf[AbstractLRURegionMap] + .centralizedLruUpdateCallback(offHeap, true) + if (bytesEvicted == 0) { + regionSet.remove(i) + } else { + // for off-heap don't change on-heap pool sizes assuming + // the on-heap eviction to be small (actual accounting of + // the reduction of on-heap data would already have been + // taken care of in the centralizedLruUpdateCallback) + if (offHeap) { + // off-heap is returned in MSB + totalBytesEvicted += (bytesEvicted >>> 32L) & 0xffffffffL + } else { + totalBytesEvicted += bytesEvicted + } + if (totalBytesEvicted >= bytesRequired) { + return totalBytesEvicted + } + } + } catch { + case rd: RegionDestroyedException => + cache.getCancelCriterion.checkCancelInProgress(rd) + case e: Exception => + cache.getCancelCriterion.checkCancelInProgress(e) + cache.getLoggerI18n.warning(LocalizedStrings.Eviction_EVICTOR_TASK_EXCEPTION, + Array[AnyRef](e.getMessage), e) + } + } + } + } finally { + if (start != 0L) { + val end = CachePerfStats.getStatTime + stats.incEvictWorkTime(end - start) + } + stats.incEvictorJobsCompleted() + } + totalBytesEvicted + } + + protected def includePartitionedRegion(region: PartitionedRegion, + offHeap: Boolean, hasOffHeap: Boolean): Boolean = { + val hasLRU = (region.getEvictionAttributes.getAlgorithm.isLRUHeap + && (region.getDataStore != null) + && !region.getAttributes.getEnableOffHeapMemory && !region.isRowBuffer()) + if (hasOffHeap) { + // when off-heap is enabled then all column tables use off-heap + val regionPath = Misc.getFullTableNameFromRegionPath(region.getFullPath) + if (offHeap) hasLRU && ColumnFormatRelation.isColumnTable(regionPath) + else hasLRU && !ColumnFormatRelation.isColumnTable(regionPath) + } else { + assert(!offHeap, + "unexpected invocation for hasOffHeap=false and offHeap=true") + hasLRU + } + } + + protected def includeLocalRegion(region: LocalRegion): Boolean = { + (region.getEvictionAttributes.getAlgorithm.isLRUHeap + && !region.getAttributes.getEnableOffHeapMemory) + } +} + +object SnappyStorageEvictor { + val MINIMUM_ENTRIES_PER_BUCKET: Int = + Integer.getInteger("gemfire.HeapLRUCapacityController.inlineEvictionThreshold", 0) + val resourceType = ResourceType.HEAP_MEMORY +} diff --git a/cluster/src/main/scala/org/apache/spark/memory/SnappyUnifiedMemoryManager.scala b/cluster/src/main/scala/org/apache/spark/memory/SnappyUnifiedMemoryManager.scala new file mode 100644 index 0000000000..27df67b389 --- /dev/null +++ b/cluster/src/main/scala/org/apache/spark/memory/SnappyUnifiedMemoryManager.scala @@ -0,0 +1,982 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ +package org.apache.spark.memory + +import java.nio.ByteBuffer +import java.util.concurrent.atomic.AtomicInteger +import java.util.function.BiConsumer + +import scala.collection.mutable +import scala.util.control.NonFatal + +import com.gemstone.gemfire.distributed.internal.DistributionConfig +import com.gemstone.gemfire.internal.shared.unsafe.{DirectBufferAllocator, UnsafeHolder} +import com.gemstone.gemfire.internal.shared.{BufferAllocator, LauncherBase} +import com.gemstone.gemfire.internal.snappy.UMMMemoryTracker +import com.gemstone.gemfire.internal.snappy.memory.MemoryManagerStats +import com.pivotal.gemfirexd.internal.engine.Misc +import io.snappydata.Constant +import org.eclipse.collections.api.block.procedure.primitive.ObjectLongProcedure +import org.eclipse.collections.impl.map.mutable.primitive.ObjectLongHashMap + +import org.apache.spark.sql.execution.columnar.impl.StoreCallback +import org.apache.spark.storage.BlockId +import org.apache.spark.util.Utils +import org.apache.spark.{Logging, SparkConf} + +/** + * When there is request for execution or storage memory, critical up and eviction up + * events are checked. If they are set, try to free the memory cached by Spark rdds + * by calling memoryStore.evictBlocksToFreeSpace. If enough memory cannot be freed, + * return the call and let Spark take a corrective action. + * In such cases Spark either fails the task or move the current RDDs data to disk. + * If the critical and eviction events are not set, it asks the UnifiedMemoryManager + * to allocate the space. + * + * @param conf the SparkConf from the SparkEnv to use for initialization + * @param maxHeapMemory the maximum heap memory that is available for use by MemoryManager; + * callers should leave out some amount of "reserved memory" for + * unaccounted object allocations + * @param numCores number of cores available in the cluster + */ +class SnappyUnifiedMemoryManager private[memory]( + conf: SparkConf, + override val maxHeapMemory: Long, + numCores: Int, val bootManager: Boolean) + extends UnifiedMemoryManager(SnappyUnifiedMemoryManager.setMemorySize(conf), + maxHeapMemory, + (maxHeapMemory * conf.getDouble("spark.memory.storageFraction", + SnappyUnifiedMemoryManager.DEFAULT_STORAGE_FRACTION)).toLong, + numCores) with StoreUnifiedManager with StoreCallback { + + self => + + private val managerId = if (!bootManager) "RuntimeMemoryManager" else "BootTimeMemoryManager" + + private val maxOffHeapStorageSize = (maxOffHeapMemory * + conf.getDouble("spark.memory.storageMaxFraction", 0.95)).toLong + + + /** + * An estimate of the maximum result size handled by a single partition. + * There can be major skew so this does not use number of partitions as + * divisor, but even the divisor used may not compensate for the skew in some + * cases but it should be acceptable for those rare cases. + */ + private val maxPartResultSize = Utils.getMaxResultSize(conf) / + math.min(8, Runtime.getRuntime.availableProcessors()) + + /** + * If total heap size is small enough then try and use explicit GC to + * release pending off-heap references before failing storage allocation. + */ + private val canUseExplicitGC = { + // use explicit System.gc() only if total-heap size is not large + maxOffHeapMemory > 0 && Runtime.getRuntime.totalMemory <= + SnappyUnifiedMemoryManager.EXPLICIT_GC_LIMIT + } + + private val onHeapStorageRegionSize = onHeapStorageMemoryPool.poolSize + + private val evictionFraction = SnappyUnifiedMemoryManager.getStorageEvictionFraction(conf) + + private[memory] val maxHeapStorageSize = (maxHeapMemory * evictionFraction).toLong + + private val minHeapEviction = math.min(math.max(10L * 1024L * 1024L, + (maxHeapStorageSize * 0.002).toLong), 1024L * 1024L * 1024L) + + private[memory] val wrapperStats = new MemoryManagerStatsWrapper + + @volatile private var _memoryForObjectMap: ObjectLongHashMap[MemoryOwner] = _ + + private[memory] def memoryForObject: ObjectLongHashMap[MemoryOwner] = { + val memoryMap = _memoryForObjectMap + if (memoryMap eq null) synchronized { + val memoryMap = _memoryForObjectMap + if (memoryMap eq null) { + _memoryForObjectMap = new ObjectLongHashMap[MemoryOwner](16) + // transfer the memory map from tempMemoryManager on first use + if (!bootManager) { + logInfo(s"Allocating boot time memory to $managerId ") + + val bootTimeManager = MemoryManagerCallback.bootMemoryManager + .asInstanceOf[SnappyUnifiedMemoryManager] + val bootTimeMap = bootTimeManager._memoryForObjectMap + if (bootTimeMap ne null) { + // Not null only for cluster mode. In local mode + // as Spark is booted first temp memory manager is not used + bootTimeMap.forEachKeyValue(new ObjectLongProcedure[MemoryOwner] { + override def value(p: MemoryOwner, numBytes: Long): Unit = { + if (numBytes > 0) { + val mode = if (p.offHeap) MemoryMode.OFF_HEAP else MemoryMode.ON_HEAP + acquireStorageMemoryForObject(p.owner, + MemoryManagerCallback.storageBlockId, numBytes, mode, null, shouldEvict = true) + } + // TODO: SW: if above fails then this should throw exception + // and _memoryForObjectMap made null again? + } + }) + setMemoryManagerStats(bootTimeManager.wrapperStats.stats) + logInfo(s"Total Memory used while booting = " + + bootTimeManager.storageMemoryUsed) + bootTimeMap.clear() + } + } + + _memoryForObjectMap + } else memoryMap + } else memoryMap + } + + /** + * This method will be called if executor is going to be restarted. + * When executor is coming up all accounting from store will be done in + * bootMemoryManager. + * When executor stops we will copy the existing entry in this manager to + * boot manager. + * Once executor comes back again we will again copy the boot manager entries + * to run time manager. + */ + override def close(): Unit = { + assert(!bootManager) + // First reset the memory manager in callback. Hence all request will + // go to Boot Manager + MemoryManagerCallback.resetMemoryManager() + synchronized { + logInfo(s" Closing Memory Manager ${this}") + val bootManager = MemoryManagerCallback.bootMemoryManager + .asInstanceOf[SnappyUnifiedMemoryManager] + + val bootManagerMap = bootManager.memoryForObject + val memoryForObject = self.memoryForObject + memoryForObject.forEachKeyValue(new ObjectLongProcedure[MemoryOwner] { + override def value(p: MemoryOwner, numBytes: Long): Unit = { + val objectName = p.owner + if (!objectName.equals(SPARK_CACHE) && + !objectName.endsWith(BufferAllocator.STORE_DATA_FRAME_OUTPUT)) { + bootManagerMap.addToValue(p, numBytes) + } + } + }) + clear() + } + } + + /** + * Clears the internal map + */ + override def clear(): Unit = synchronized { + val memoryForObject = _memoryForObjectMap + if (memoryForObject ne null) memoryForObject.clear() + } + + private[this] val threadsWaitingForStorage = new AtomicInteger() + + private[this] val SPARK_CACHE = "_SPARK_CACHE_" + + private[this] val evictor = new SnappyStorageEvictor + + def this(conf: SparkConf, numCores: Int, tempManager: Boolean = false) = { + this(conf, + SnappyUnifiedMemoryManager.getMaxMemory(conf), + numCores, tempManager) + } + + def this(conf: SparkConf, numCores: Int) = { + this(conf, + SnappyUnifiedMemoryManager.getMaxMemory(conf), + numCores, bootManager = false) + } + + logMemoryConfiguration() + + private def logMemoryConfiguration(): Unit = { + val memoryLog = new StringBuilder + val separator = "\n\t\t" + memoryLog.append(s"$managerId ${this} configuration:") + + memoryLog.append(separator).append("Total Usable Heap = ") + .append(Utils.bytesToString(maxHeapMemory)) + .append(" (").append(maxHeapMemory).append(')') + memoryLog.append(separator).append("Storage Pool = ") + .append(Utils.bytesToString(onHeapStorageRegionSize)) + .append(" (").append(onHeapStorageRegionSize).append(')') + val executionPoolSize = onHeapExecutionMemoryPool.poolSize + memoryLog.append(separator).append("Execution Pool = ") + .append(Utils.bytesToString(executionPoolSize)) + .append(" (").append(executionPoolSize).append(')') + memoryLog.append(separator).append("Max Storage Pool Size = ") + .append(Utils.bytesToString(maxHeapStorageSize)) + .append(" (").append(maxHeapStorageSize).append(')') + if (hasOffHeap) { + memoryLog.append(separator).append("OffHeap Size = ") + .append(Utils.bytesToString(maxOffHeapMemory)) + .append(" (").append(maxOffHeapMemory).append(')') + memoryLog.append(separator).append("OffHeap Storage Pool = ") + .append(Utils.bytesToString(offHeapStorageMemory)) + .append(" (").append(offHeapStorageMemory).append(')') + val offHeapExecutionPoolSize = offHeapExecutionMemoryPool.poolSize + memoryLog.append(separator).append("OffHeap Execution Pool = ") + .append(Utils.bytesToString(offHeapExecutionPoolSize)) + .append(" (").append(offHeapExecutionPoolSize).append(')') + memoryLog.append(separator).append("OffHeap Max Storage Pool Size = ") + .append(Utils.bytesToString(maxOffHeapStorageSize)) + .append(" (").append(maxOffHeapStorageSize).append(')') + } + logInfo(memoryLog.toString()) + } + + override def getStoragePoolMemoryUsed( + memoryMode: MemoryMode): Long = memoryMode match { + case MemoryMode.OFF_HEAP => offHeapStorageMemoryPool.memoryUsed + case MemoryMode.ON_HEAP => onHeapStorageMemoryPool.memoryUsed + } + + override def getStoragePoolSize( + memoryMode: MemoryMode): Long = memoryMode match { + case MemoryMode.OFF_HEAP => offHeapStorageMemoryPool.poolSize + case MemoryMode.ON_HEAP => onHeapStorageMemoryPool.poolSize + } + + override def getExecutionPoolUsedMemory( + memoryMode: MemoryMode): Long = memoryMode match { + case MemoryMode.OFF_HEAP => offHeapExecutionMemoryPool.memoryUsed + case MemoryMode.ON_HEAP => onHeapExecutionMemoryPool.memoryUsed + } + + override def getExecutionPoolSize( + memoryMode: MemoryMode): Long = memoryMode match { + case MemoryMode.OFF_HEAP => offHeapExecutionMemoryPool.poolSize + case MemoryMode.ON_HEAP => onHeapExecutionMemoryPool.poolSize + } + + override def getOffHeapMemory(objectName: String): Long = synchronized { + if (maxOffHeapMemory > 0) memoryForObject.get(MemoryOwner(objectName, offHeap = true)) + else 0L + } + + override def hasOffHeap: Boolean = tungstenMemoryMode eq MemoryMode.OFF_HEAP + + override def logStats(): Unit = logStats("") + + def logStats(tag: String): Unit = synchronized { + val memoryLog = new StringBuilder + val separator = "\n\t\t" + memoryLog.append(s"$tag$managerId ${this} stats:") + memoryLog.append(separator).append("Storage Used = ") + .append(onHeapStorageMemoryPool.memoryUsed) + .append(" (size=").append(onHeapStorageMemoryPool.poolSize).append(')') + memoryLog.append(separator).append("Execution Used = ") + .append(onHeapExecutionMemoryPool.memoryUsed) + .append(" (size=").append(onHeapExecutionMemoryPool.poolSize).append(')') + if (hasOffHeap) { + memoryLog.append(separator).append("OffHeap Size = ") + .append(Utils.bytesToString(maxOffHeapMemory)) + .append(" (").append(maxOffHeapMemory).append(')') + memoryLog.append(separator).append("OffHeap Storage Used = ") + .append(offHeapStorageMemoryPool.memoryUsed) + .append(" (size=").append(offHeapStorageMemoryPool.poolSize).append(')') + memoryLog.append(separator).append("OffHeap Execution Pool = ") + .append(offHeapExecutionMemoryPool.memoryUsed) + .append(" (size=").append(offHeapExecutionMemoryPool.poolSize).append(')') + } + val memoryForObject = self.memoryForObject + if (memoryForObject.size() > 0) { + memoryLog.append("\n\t").append("Objects:\n") + memoryForObject.forEachKeyValue(new ObjectLongProcedure[MemoryOwner] { + override def value(p: MemoryOwner, numBytes: Long): Unit = { + memoryLog.append(separator).append(p).append(" = ").append(numBytes) + } + }) + } + logInfo(memoryLog.toString()) + } + + override def changeOffHeapOwnerToStorage(buffer: ByteBuffer, + allowNonAllocator: Boolean): Unit = synchronized { + val capacity = buffer.capacity() + val mode = MemoryMode.OFF_HEAP + val totalSize = capacity + DirectBufferAllocator.DIRECT_OBJECT_OVERHEAD + val toOwner = DirectBufferAllocator.DIRECT_STORE_OBJECT_OWNER + val changeOwner = new BiConsumer[String, AnyRef] { + override def accept(fromOwner: String, runnable: AnyRef): Unit = { + if (fromOwner ne null) { + val offHeap = mode eq MemoryMode.OFF_HEAP + val memoryForObject = self.memoryForObject + // "from" was changed to "to" + val from = MemoryOwner(fromOwner, offHeap) + val cur = memoryForObject.addToValue(from, -totalSize) + if (cur >= 0) { + memoryForObject.addToValue(MemoryOwner(toOwner, offHeap), totalSize) + } else { + // something went wrong with size accounting + memoryForObject.addToValue(from, totalSize) + throw new IllegalStateException( + s"Unexpected move of $totalSize bytes from owner $fromOwner size=${cur + totalSize}") + } + } else if (allowNonAllocator) { + // add to storage pool + if (!askStoragePool(toOwner, MemoryManagerCallback.storageBlockId, + totalSize, MemoryMode.OFF_HEAP, shouldEvict = true)) { + throw DirectBufferAllocator.instance().lowMemoryException( + "changeToStorage", totalSize) + } + // release from execution pool if using execution allocator + runnable match { + case r: ExecutionFreeMemory => r.releaseExecutionMemory() + case _ => + } + } else throw new IllegalStateException( + s"ByteBuffer Cleaner does not match expected source $fromOwner") + } + } + // change the owner to storage + DirectBufferAllocator.instance().changeOwnerToStorage(buffer, + capacity, changeOwner) + } + + def tryExplicitGC(numBytes: Long): Unit = { + // check if explicit GC should be invoked + if (canUseExplicitGC) { + logStats(s"Explicit GC before failing storage allocation request of $numBytes bytes: ") + System.gc() + System.runFinalization() + logStats("Stats after explicit GC: ") + } + UnsafeHolder.releasePendingReferences() + } + + private def getMinHeapEviction(required: Long): Long = { + // evict at least 100 entries to reduce GC cycles + val waitingThreads = threadsWaitingForStorage.get() + math.max(required * waitingThreads, math.min(minHeapEviction, + required * math.max(100L, waitingThreads + 1))) + } + + private def getMinOffHeapEviction(required: Long): Long = { + // off-heap calculations are precise so evict exactly as much as required; + // bit of "padding" (1M) to account for inaccuracies in pre-allocation by + // putAll threads + math.max(0, required - offHeapStorageMemoryPool.memoryFree + (1024 * 1024)) + } + + /** + * This method is copied from Spark. In addition to evicting data from spark block manager, + * this will also evict data from SnappyStore. + * + * Try to acquire up to `numBytes` of execution memory for the current task and return the + * number of bytes obtained, or 0 if none can be allocated. + * + * This call may block until there is enough free memory in some situations, to make sure each + * task has a chance to ramp up to at least 1 / 2N of the total memory pool (where N is the # of + * active tasks) before it is forced to spill. This can happen if the number of tasks increase + * but an older task had a lot of memory already. + */ + override private[memory] def acquireExecutionMemory( + numBytes: Long, + taskAttemptId: Long, + memoryMode: MemoryMode): Long = synchronized { + logDebug(s"Acquiring $managerId $memoryMode memory for $taskAttemptId = $numBytes") + assertInvariants() + assert(numBytes >= 0) + val offHeap = memoryMode eq MemoryMode.OFF_HEAP + // use vars instead of tuple to avoid Tuple5 creation and Long boxing/unboxing + var executionMemoryPool: ExecutionMemoryPool = null + var storageMemoryPool: StorageMemoryPool = null + var regionSize = 0L + var maxMemoryBytes = 0L + var minEvictionBytes = 0L + memoryMode match { + case MemoryMode.ON_HEAP => + executionMemoryPool = onHeapExecutionMemoryPool + storageMemoryPool = onHeapStorageMemoryPool + regionSize = onHeapStorageRegionSize + maxMemoryBytes = maxHeapMemory + minEvictionBytes = getMinHeapEviction(numBytes) + case MemoryMode.OFF_HEAP => + executionMemoryPool = offHeapExecutionMemoryPool + storageMemoryPool = offHeapStorageMemoryPool + regionSize = offHeapStorageMemory + maxMemoryBytes = maxOffHeapMemory + minEvictionBytes = getMinOffHeapEviction(numBytes) + } + + val executionPool = executionMemoryPool + val storagePool = storageMemoryPool + val storageRegionSize = regionSize + val maxMemory = maxMemoryBytes + val minEviction = minEvictionBytes + /** + * Grow the execution pool by evicting cached blocks, thereby shrinking the storage pool. + * + * When acquiring memory for a task, the execution pool may need to make multiple + * attempts. Each attempt must be able to evict storage in case another task jumps in + * and caches a large block between the attempts. This is called once per attempt. + */ + def maybeGrowExecutionPool(extraMemoryNeeded: Long): Unit = { + if (extraMemoryNeeded > 0) { + + if (!offHeap && SnappyMemoryUtils.isCriticalUp()) { + logWarning(s"CRTICAL_UP event raised due to critical heap memory usage. " + + s"No memory allocated to thread ${Thread.currentThread()}") + return + } + + // There is not enough free memory in the execution pool, so try to reclaim memory from + // storage. We can reclaim any free memory from the storage pool. If the storage pool + // has grown to become larger than `storageRegionSize`, we can evict blocks and reclaim + // the memory that storage has borrowed from execution. + val memoryReclaimableFromStorage = storagePool.poolSize - storageRegionSize + + if (memoryReclaimableFromStorage > 0) { + // Only reclaim as much space as is necessary and available: + val spaceToReclaim = storagePool.freeSpaceToShrinkPool( + math.min(extraMemoryNeeded, memoryReclaimableFromStorage)) + + val bytesEvictedFromStore = if (spaceToReclaim < extraMemoryNeeded) { + val moreBytesRequired = extraMemoryNeeded - spaceToReclaim + val evicted = evictor.evictRegionData(math.min(moreBytesRequired + + minEviction, memoryReclaimableFromStorage), offHeap) + if (offHeap) { + UnsafeHolder.releasePendingReferences() + } + evicted + } else { + 0L + } + if (bytesEvictedFromStore == 0L){ + wrapperStats.incNumFailedEvictionRequest(offHeap) + } + if(storagePool.poolSize - (spaceToReclaim + bytesEvictedFromStore) + >= storagePool.memoryUsed){ + // Some eviction might have increased the storage memory used which will + // case some requirement failing + // while decreasing pool size. + val totalReclaimable = spaceToReclaim + bytesEvictedFromStore + storagePool.decrementPoolSize(totalReclaimable) + wrapperStats.decStoragePoolSize(offHeap, totalReclaimable) + executionPool.incrementPoolSize(totalReclaimable) + wrapperStats.incExecutionPoolSize(offHeap, totalReclaimable) + } + + } + } + } + + /** + * The size the execution pool would have after evicting storage memory. + * + * The execution memory pool divides this quantity among the active tasks evenly to cap + * the execution memory allocation for each task. It is important to keep this greater + * than the execution pool size, which doesn't take into account potential memory that + * could be freed by evicting storage. Otherwise we may hit SPARK-12155. + * + * Additionally, this quantity should be kept below `maxMemory` to arbitrate fairness + * in execution memory allocation across tasks, Otherwise, a task may occupy more than + * its fair share of execution memory, mistakenly thinking that other tasks can acquire + * the portion of storage memory that cannot be evicted. + */ + def computeMaxExecutionPoolSize(): Long = { + maxMemory - math.min(storagePool.memoryUsed, storageRegionSize) + } + + val acquiredNumBytes = executionPool.acquireMemory( + numBytes, taskAttemptId, maybeGrowExecutionPool, computeMaxExecutionPoolSize) + wrapperStats.incExecutionMemoryUsed(offHeap, acquiredNumBytes) + acquiredNumBytes + } + + + override def releaseExecutionMemory( + numBytes: Long, + taskAttemptId: Long, + memoryMode: MemoryMode): Unit = synchronized { + super.releaseExecutionMemory(numBytes, taskAttemptId, memoryMode) + val offHeap = memoryMode eq MemoryMode.OFF_HEAP + wrapperStats.decExecutionMemoryUsed(offHeap, numBytes) + } + + override def acquireStorageMemory( + blockId: BlockId, + numBytes: Long, + memoryMode: MemoryMode): Boolean = { + acquireStorageMemoryForObject(SPARK_CACHE, blockId, numBytes, memoryMode, null, + shouldEvict = true) + } + + private def askStoragePool(objectName: String, + blockId: BlockId, + numBytes: Long, + memoryMode: MemoryMode, + shouldEvict: Boolean): Boolean = { + threadsWaitingForStorage.incrementAndGet() + try { + val success = + askStoragePool_(objectName, blockId, numBytes, memoryMode, shouldEvict) + val offHeap = memoryMode eq MemoryMode.OFF_HEAP + if (success) { + wrapperStats.incStorageMemoryUsed(offHeap, numBytes) + } else { + wrapperStats.incNumFailedStorageRequest(offHeap) + } + success + } finally { + threadsWaitingForStorage.decrementAndGet() + } + } + + private def askStoragePool_(objectName: String, + blockId: BlockId, + numBytes: Long, + memoryMode: MemoryMode, + shouldEvict: Boolean): Boolean = { + synchronized { + if (!shouldEvict) { + SnappyUnifiedMemoryManager. + invokeListenersOnPositiveMemoryIncreaseDueToEviction(objectName, numBytes) + } + assertInvariants() + assert(numBytes >= 0) + // use vars instead of tuple to avoid Tuple5 creation and Long boxing/unboxing + var executionPool: ExecutionMemoryPool = null + var storageMemoryPool: StorageMemoryPool = null + var maxMemory = 0L + var maxStorageSize = 0L + var minEviction = 0L + memoryMode match { + case MemoryMode.ON_HEAP => + executionPool = onHeapExecutionMemoryPool + storageMemoryPool = onHeapStorageMemoryPool + maxMemory = maxOnHeapStorageMemory + maxStorageSize = maxHeapStorageSize + minEviction = getMinHeapEviction(numBytes) + case MemoryMode.OFF_HEAP => + executionPool = offHeapExecutionMemoryPool + storageMemoryPool = offHeapStorageMemoryPool + maxMemory = maxOffHeapMemory - offHeapExecutionMemoryPool.memoryUsed + maxStorageSize = maxOffHeapStorageSize + minEviction = getMinOffHeapEviction(numBytes) + } + + val storagePool = storageMemoryPool + // Evict only limited amount for owners marked as non-evicting. + // TODO: this can be removed once these calls are moved to execution + // TODO use something like "(spark.driver.maxResultSize / numPartitions) * 2" + val doEvict = if (shouldEvict && + objectName.endsWith(BufferAllocator.STORE_DATA_FRAME_OUTPUT)) { + // don't use more than 10% of pool size for one partition result + // 30% of storagePool size is still large. With retries it virtually evicts all data. + // Hence taking 30% of initial storage pool size. Once retry of LowMemoryException is + // stopped it would be much cleaner. + numBytes < math.min(0.3 * maxStorageSize, + math.max(maxPartResultSize, storagePool.memoryFree)) + } else shouldEvict + + if (numBytes > maxMemory) { + val max = maxMemory + // Fail fast if the block simply won't fit + logWarning(s"Will not store $blockId for $objectName as " + + s"the required space ($numBytes bytes) exceeds our " + + s"memory limit ($max bytes)") + return false + } + // don't borrow from execution for off-heap if shouldEvict=false since it + // will try clearing references before calling with shouldEvict=true again + val offHeap = memoryMode eq MemoryMode.OFF_HEAP + val offHeapNoEvict = !doEvict && offHeap + if (numBytes > storagePool.memoryFree && !offHeapNoEvict) { + // There is not enough free memory in the storage pool, so try to borrow free memory from + // the execution pool. + val memoryBorrowedFromExecution = Math.min(executionPool.memoryFree, numBytes) + val actualBorrowedMemory = + if (storagePool.poolSize + memoryBorrowedFromExecution > maxStorageSize) { + maxStorageSize - storagePool.poolSize + } else { + memoryBorrowedFromExecution + } + if (actualBorrowedMemory > 0) { + executionPool.decrementPoolSize(actualBorrowedMemory) + wrapperStats.decExecutionPoolSize(offHeap, actualBorrowedMemory) + storagePool.incrementPoolSize(actualBorrowedMemory) + wrapperStats.incStoragePoolSize(offHeap, actualBorrowedMemory) + } + } + // First let spark try to free some memory + val enoughMemory = if (bootManager) { + // For temp manager no eviction , hence numBytes to free is passed as 0 + storagePool.acquireMemory(blockId, numBytes, numBytesToFree = 0) + } else { + storagePool.acquireMemory(blockId, numBytes) + } + + // Case where boot time memory is insufficient to recover database + if ( !enoughMemory && bootManager) { + return false + } + + if (!enoughMemory) { + + // return immediately for OFF_HEAP with shouldEvict=false + if (offHeapNoEvict) return false + + if (!offHeap && SnappyMemoryUtils.isCriticalUp()) { + logWarning(s"CRTICAL_UP event raised due to critical heap memory usage. " + + s"No memory allocated to thread ${Thread.currentThread()}") + return false + } + + if (doEvict) { + // Sufficient memory could not be freed. Time to evict from SnappyData store. + // val requiredBytes = numBytes - storagePool.memoryFree + // Evict data a little more than required based on waiting tasks + val evicted = evictor.evictRegionData(minEviction, offHeap) + if (SnappyUnifiedMemoryManager.testCallbacks.nonEmpty) { + SnappyUnifiedMemoryManager.testCallbacks.foreach( + _.onEviction(objectName, evicted)) + } + if (offHeap) { + UnsafeHolder.releasePendingReferences() + } + } else { + return false + } + + var couldEvictSomeData = storagePool.acquireMemory(blockId, numBytes) + // run old map GC task explicitly before failing with low memory + if (!couldEvictSomeData) { + val cache = Misc.getGemFireCacheNoThrow + if (cache ne null) { + cache.runOldEntriesCleanerThread() + } + couldEvictSomeData = storagePool.acquireMemory(blockId, numBytes) + } + // for off-heap try harder before giving up since pending references + // may be on heap (due to unexpected exceptions) that will go away on GC + if (!couldEvictSomeData && offHeap) { + tryExplicitGC(numBytes) + couldEvictSomeData = storagePool.acquireMemory(blockId, numBytes) + } + if (!couldEvictSomeData) { + if (doEvict) { + wrapperStats.incNumFailedEvictionRequest(offHeap) + } + logWarning(s"Could not allocate memory for $blockId of " + + s"$objectName size=$numBytes. Memory pool size ${storagePool.memoryUsed}") + } else { + memoryForObject.addToValue(new MemoryOwner(objectName, memoryMode), numBytes) + logDebug(s"Allocated memory for $blockId of " + + s"$objectName size=$numBytes. Memory pool size ${storagePool.memoryUsed}") + } + couldEvictSomeData + } else { + memoryForObject.addToValue(new MemoryOwner(objectName, memoryMode), numBytes) + enoughMemory + } + } + } + + override def acquireStorageMemoryForObject(objectName: String, + blockId: BlockId, + numBytes: Long, + memoryMode: MemoryMode, + buffer: UMMMemoryTracker, + shouldEvict: Boolean): Boolean = { + logDebug(s"Acquiring $managerId $memoryMode memory " + + s"for $objectName = $numBytes (evict=$shouldEvict)") + if (buffer ne null) { + if (buffer.freeMemory() >= numBytes) { + buffer.incMemoryUsed(numBytes) + true + } else { + val predictedMemory = numBytes * buffer.getTotalOperationsExpected + val success = askStoragePool(objectName, blockId, predictedMemory, memoryMode, shouldEvict) + if (success){ + buffer.incAllocatedMemory(predictedMemory) + buffer.setFirstAllocationObject(objectName) + buffer.incMemoryUsed(numBytes) + } + success + } + } else { + askStoragePool(objectName, blockId, numBytes, memoryMode, shouldEvict) + } + } + + override def releaseStorageMemoryForObject(objectName: String, + numBytes: Long, + memoryMode: MemoryMode): Unit = synchronized { + logDebug(s"releasing $managerId memory for $objectName = $numBytes") + val key = new MemoryOwner(objectName, memoryMode) + super.releaseStorageMemory(numBytes, memoryMode) + val offHeap = memoryMode eq MemoryMode.OFF_HEAP + wrapperStats.decStorageMemoryUsed(offHeap, numBytes) + val memoryForObject = self.memoryForObject + if (memoryForObject.containsKey(key)) { + if (memoryForObject.addToValue(key, -numBytes) <= 0) { + memoryForObject.removeKey(key) + } + } + } + + override def releaseStorageMemory(numBytes: Long, memoryMode: MemoryMode): Unit = { + releaseStorageMemoryForObject(SPARK_CACHE, numBytes, memoryMode) + } + + override def dropStorageMemoryForObject(name: String, + memoryMode: MemoryMode, + ignoreNumBytes: Long): Long = synchronized { + val key = new MemoryOwner(name, memoryMode) + val memoryForObject = self.memoryForObject + val bytesToBeFreed = memoryForObject.get(key) + val numBytes = Math.max(0, bytesToBeFreed - ignoreNumBytes) + logDebug(s"Dropping $managerId memory for $name = $numBytes (registered=$bytesToBeFreed)") + if (numBytes > 0) { + super.releaseStorageMemory(numBytes, memoryMode) + val offHeap = memoryMode eq MemoryMode.OFF_HEAP + wrapperStats.decStorageMemoryUsed(offHeap, numBytes) + memoryForObject.removeKey(key) + } + bytesToBeFreed + } + + // Test Hook. Not to be used anywhere else + private[memory] def dropAllObjects(memoryMode: MemoryMode): Unit = synchronized { + val memoryForObject = self.memoryForObject + val clearList = new mutable.ArrayBuffer[MemoryOwner] + memoryForObject.forEachKeyValue(new ObjectLongProcedure[MemoryOwner] { + override def value(p: MemoryOwner, numBytes: Long): Unit = { + val offHeap = memoryMode eq MemoryMode.OFF_HEAP + if (p.offHeap == offHeap) { + SnappyUnifiedMemoryManager.super.releaseStorageMemory(numBytes, memoryMode) + wrapperStats.decStorageMemoryUsed(offHeap, numBytes) + clearList += p + } + } + }) + clearList.foreach(key => memoryForObject.removeKey(key)) + } + + // Recovery is a special case. If any of the storage pool has reached 90% of + // max storage pool size stop recovery. + override def shouldStopRecovery(): Boolean = synchronized { + (offHeapStorageMemoryPool.memoryUsed > (maxOffHeapStorageSize * 0.90) ) || + (onHeapStorageMemoryPool.memoryUsed > (maxHeapStorageSize * 0.90)) + } + + override def initMemoryStats(stats: MemoryManagerStats): Unit = { + stats.incMaxStorageSize(true, maxOffHeapStorageSize) + stats.incMaxStorageSize(false, maxHeapStorageSize) + stats.incStoragePoolSize(true, offHeapStorageMemoryPool.poolSize) + stats.incStoragePoolSize(false, onHeapStorageRegionSize) + stats.incStorageMemoryUsed(true, offHeapStorageMemoryPool.memoryUsed) + stats.incStorageMemoryUsed(false, onHeapStorageMemoryPool.memoryUsed) + stats.incExecutionPoolSize(true, offHeapExecutionMemoryPool.poolSize) + stats.incExecutionPoolSize(false, onHeapExecutionMemoryPool.poolSize) + stats.incExecutionMemoryUsed(true, offHeapExecutionMemoryPool.memoryUsed) + stats.incExecutionMemoryUsed(false, onHeapExecutionMemoryPool.memoryUsed) + setMemoryManagerStats(stats) + } + + private def setMemoryManagerStats(stats: MemoryManagerStats): Unit = { + wrapperStats.setMemoryManagerStats(stats) + } + + /** + * Initializes the memoryManager + */ + override def init(): Unit = memoryForObject +} + +object SnappyUnifiedMemoryManager extends Logging { + + // Reserving minimum 100MB data for unaccounted data, GC headroom etc + private val RESERVED_SYSTEM_MEMORY_BYTES = { + // reserve 5% of heap by default subject to max of 5GB and min of 100MB + math.min(5L * 1024L * 1024L * 1024L, + math.max(getMaxHeapMemory / 20, 100L * 1024L * 1024L)) + } + + private val DEFAULT_MEMORY_FRACTION = 0.85 + + private val DEFAULT_EVICTION_FRACTION = 0.8 + + private val DEFAULT_STORAGE_FRACTION = 0.5 + + private def getMaxHeapMemory: Long = { + val maxMemory = Runtime.getRuntime.maxMemory() + if (maxMemory > 0 && maxMemory != Long.MaxValue) maxMemory + else Runtime.getRuntime.totalMemory() + } + + /** + * The maximum limit of heap size till which an explicit GC will be + * considered for invocation before failing a direct buffer allocation + * request for the case when too many references are lying around uncollected. + */ + private val EXPLICIT_GC_LIMIT = 16L * 1024 * 1024 * 1024 + + private val testCallbacks = mutable.ArrayBuffer.empty[MemoryEventListener] + + def addMemoryEventListener(listener: MemoryEventListener): Unit = { + testCallbacks += listener + } + + def clearMemoryEventListener(): Unit = { + testCallbacks.clear() + } + + private def invokeListenersOnPositiveMemoryIncreaseDueToEviction(objectName: String, + bytes: Long): Unit = { + if (testCallbacks.nonEmpty) { + testCallbacks.foreach(_.onPositiveMemoryIncreaseDueToEviction(objectName, bytes)) + } + } + + /** + * Check for SnappyData off-heap configuration and set Spark's properties. + */ + def setMemorySize(conf: SparkConf): SparkConf = { + val cache = Misc.getGemFireCacheNoThrow + val memorySize = if (cache ne null) { + cache.getMemorySize + } else { // for local mode + var size = conf.getSizeAsBytes(Constant.STORE_PROPERTY_PREFIX + + DistributionConfig.MEMORY_SIZE_NAME, "0b") + if (size == 0) { + // try with additional "spark." prefix + size = conf.getSizeAsBytes(Constant.SPARK_STORE_PREFIX + + DistributionConfig.MEMORY_SIZE_NAME, "0b") + } + if (size > 0) { + // try to load managed allocator + try { + val clazz = Utils.classForName( + "com.gemstone.gemfire.internal.cache.store.ManagedDirectBufferAllocator") + clazz.getDeclaredMethod("instance").invoke(null) + } catch { + case NonFatal(e) => + logError("Failed to load managed buffer allocator in SnappyData OSS." + + s"Temporary scan buffers will be unaccounted DirectByteBuffers: $e") + } + } + size + } + if (memorySize > 0) { + // set Spark's off-heap properties + conf.set("spark.memory.offHeap.enabled", "true") + conf.set("spark.memory.offHeap.size", s"${memorySize}b") + } + if (!conf.contains("spark.memory.storageFraction")) { + conf.set("spark.memory.storageFraction", DEFAULT_STORAGE_FRACTION.toString) + } + conf + } + + /** + * Return a fraction which determines what is the max limit storage can grow. + */ + def getStorageEvictionFraction(conf: SparkConf): Double = { + val cache = Misc.getGemFireCacheNoThrow + val evictionFraction = if (cache ne null) { + val thresholds = cache.getResourceManager.getHeapMonitor.getThresholds + if (thresholds.isEvictionThresholdEnabled) { + thresholds.getEvictionThreshold * 0.01 + } else if (thresholds.isCriticalThresholdEnabled) { + thresholds.getCriticalThreshold * 0.9 * 0.01 + } else { + DEFAULT_EVICTION_FRACTION + } + } else { + // search in conf + conf.getOption(Constant.STORE_PROPERTY_PREFIX + + LauncherBase.EVICTION_HEAP_PERCENTAGE) match { + case Some(c) => c.toDouble * 0.01 + case None => conf.getDouble("spark.testing.maxStorageFraction", DEFAULT_EVICTION_FRACTION) + } + } + if (evictionFraction < 0.1 || evictionFraction > 0.98) { + throw new IllegalArgumentException(s"Eviction fraction $evictionFraction must " + + "be between 0.1 and 0.98. Please set or correct eviction-heap-percentage.") + } + evictionFraction + } + + /** + * Return the total amount of memory shared between execution and storage, in bytes. + * This is a direct copy from UnifiedMemorymanager with an extra check for evict fraction + */ + private def getMaxMemory(conf: SparkConf): Long = { + var systemMemory = conf.getLong("spark.testing.memory", getMaxHeapMemory) + // align reserved memory with critical heap size of GemFire + val cache = Misc.getGemFireCacheNoThrow + var reservedMemory = if (cache ne null) { + val thresholds = cache.getResourceManager.getHeapMonitor.getThresholds + if (thresholds.isCriticalThresholdEnabled) { + systemMemory = thresholds.getMaxMemoryBytes + systemMemory - thresholds.getCriticalThresholdBytes + } else RESERVED_SYSTEM_MEMORY_BYTES + } else { + // search in conf + conf.getOption(Constant.STORE_PROPERTY_PREFIX + + LauncherBase.CRITICAL_HEAP_PERCENTAGE) match { + case Some(c) => (systemMemory * (100.0 - c.toDouble) * 0.01).toLong + case None => RESERVED_SYSTEM_MEMORY_BYTES + } + } + conf.getOption("spark.testing.reservedMemory") match { + case Some(m) => reservedMemory = m.toLong + case _ => + if (conf.contains("spark.testing")) reservedMemory = 0 + else if (reservedMemory < 25L * 1024L * 1024L) { + throw new IllegalArgumentException(s"Reserved memory $reservedMemory must " + + "be at least 25MB. Please increase critical-heap-percentage and/or heap size " + + "using the --driver-memory option or spark.driver.memory in Spark configuration.") + } + } + val minSystemMemory = (reservedMemory * 1.5).ceil.toLong + if (systemMemory < minSystemMemory) { + throw new IllegalArgumentException(s"System memory $systemMemory must " + + s"be at least $minSystemMemory. Please increase heap size using the --driver-memory " + + "option or spark.driver.memory in Spark configuration.") + } + // SPARK-12759 Check executor memory to fail fast if memory is insufficient + if (conf.contains("spark.executor.memory")) { + val executorMemory = conf.getSizeAsBytes("spark.executor.memory") + if (executorMemory < minSystemMemory) { + throw new IllegalArgumentException(s"Executor memory $executorMemory must be at least " + + s"$minSystemMemory. Please increase executor memory using the " + + "--executor-memory option or spark.executor.memory in Spark configuration.") + } + } + + val usableMemory = systemMemory - reservedMemory + // add a cushion for GC before CRITICAL_UP is reached and for temporary buffers + // used by various components + val memoryFraction = conf.getDouble("spark.memory.fraction", DEFAULT_MEMORY_FRACTION) + (usableMemory * memoryFraction).toLong + } +} + +// Test listeners. Should not be used in production code. +abstract class MemoryEventListener { + def onStorageMemoryAcquireSuccess(objectName : String, bytes : Long) : Unit = {} + def onStorageMemoryAcquireFailure(objectName : String, bytes : Long) : Unit = {} + def onEviction(objectName: String, evicted: Long): Unit = {} + def onPositiveMemoryIncreaseDueToEviction(objectName : String, bytes : Long) : Unit = {} + def onExecutionMemoryAcquireSuccess(taskAttemptId : Long, bytes : Long) : Unit = {} + def onExecutionMemoryAcquireFailure(taskAttemptId : Long, bytes : Long) : Unit = {} +} diff --git a/cluster/src/main/scala/org/apache/spark/scheduler/SnappyTaskSchedulerImpl.scala b/cluster/src/main/scala/org/apache/spark/scheduler/SnappyTaskSchedulerImpl.scala new file mode 100644 index 0000000000..cf792949f5 --- /dev/null +++ b/cluster/src/main/scala/org/apache/spark/scheduler/SnappyTaskSchedulerImpl.scala @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ +package org.apache.spark.scheduler + +import org.apache.spark.SparkContext +import org.apache.spark.sql.SnappyContext + +private[spark] class SnappyTaskSchedulerImpl(sc: SparkContext) extends TaskSchedulerImpl(sc) { + + override def postStartHook(): Unit = { + SnappyContext.initGlobalSparkContext(sc) + super.postStartHook() + } +} diff --git a/cluster/src/main/scala/org/apache/spark/scheduler/cluster/SnappyCoarseGrainedSchedulerBackend.scala b/cluster/src/main/scala/org/apache/spark/scheduler/cluster/SnappyCoarseGrainedSchedulerBackend.scala new file mode 100644 index 0000000000..fd8c3bebf0 --- /dev/null +++ b/cluster/src/main/scala/org/apache/spark/scheduler/cluster/SnappyCoarseGrainedSchedulerBackend.scala @@ -0,0 +1,123 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ +package org.apache.spark.scheduler.cluster + +import com.pivotal.gemfirexd.internal.engine.Misc + +import org.apache.spark.SparkContext +import org.apache.spark.rpc.{RpcEndpointAddress, RpcEnv} +import org.apache.spark.scheduler.{SparkListener, SparkListenerApplicationEnd, SparkListenerBlockManagerAdded, SparkListenerBlockManagerRemoved, SparkListenerExecutorAdded, SparkListenerExecutorRemoved, TaskSchedulerImpl} +import org.apache.spark.sql.collection.{ToolsCallbackInit, Utils} +import org.apache.spark.sql.{BlockAndExecutorId, SnappyContext, SnappySession} +import org.apache.spark.storage.BlockManagerId + +class SnappyCoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, + override val rpcEnv: RpcEnv) + extends CoarseGrainedSchedulerBackend(scheduler, rpcEnv) { + + private val snappyAppId = "snappy-app-" + System.currentTimeMillis + + /** + * Overriding the spark app id function to provide a snappy specific app id. + * + * @return An application ID + */ + override def applicationId(): String = snappyAppId + + @volatile private var _driverUrl: String = "" + + def driverUrl: String = _driverUrl + + override def start() { + + super.start() + _driverUrl = RpcEndpointAddress( + scheduler.sc.conf.get("spark.driver.host"), + scheduler.sc.conf.get("spark.driver.port").toInt, + CoarseGrainedSchedulerBackend.ENDPOINT_NAME).toString + logInfo(s"SchedulerBackend started with driverUrl $driverUrl") + } + + override def stop() { + super.stop() + _driverUrl = "" + SnappyClusterManager.cm.foreach(_.stopLead()) + logInfo(s"SchedulerBackend stopped successfully") + } + + override protected def createDriverEndpoint(properties: Seq[(String, String)]): DriverEndpoint = { + // keep the app id as part of driver property so that it can be retrieved + // by the executor when driver properties are fetched using + // [org.apache.spark.scheduler.cluster.CoarseGrainedClusterMessages.RetrieveSparkProps] + super.createDriverEndpoint(properties ++ + Seq[(String, String)](("spark.app.id", applicationId()))) + } +} + +class BlockManagerIdListener(sc: SparkContext) + extends SparkListener { + + override def onExecutorAdded( + msg: SparkListenerExecutorAdded): Unit = synchronized { + val executorCores = msg.executorInfo.totalCores + val profile = Misc.getMemStore.getDistributionAdvisor + .getProfile(msg.executorId) + val numProcessors = if (profile != null) profile.getNumProcessors + else executorCores + SnappyContext.getBlockId(msg.executorId) match { + case None => SnappyContext.addBlockId(msg.executorId, + new BlockAndExecutorId(null, executorCores, numProcessors)) + case Some(b) => b._executorCores = executorCores + b._numProcessors = numProcessors + } + SnappyContext.getBlockId(msg.executorId) match { + case Some(b) => if (b._blockId != null) handleNewExecutorJoin(b._blockId) + case None => + } + } + + override def onBlockManagerAdded( + msg: SparkListenerBlockManagerAdded): Unit = synchronized { + val executorId = msg.blockManagerId.executorId + SnappyContext.getBlockIdIfNull(executorId) match { + case None => + val numCores = sc.schedulerBackend.defaultParallelism() + SnappyContext.addBlockId(executorId, new BlockAndExecutorId( + msg.blockManagerId, numCores, numCores)) + case Some(b) => b._blockId = msg.blockManagerId + } + } + + override def onBlockManagerRemoved( + msg: SparkListenerBlockManagerRemoved): Unit = { + SnappyContext.removeBlockId(msg.blockManagerId.executorId) + } + + override def onExecutorRemoved(msg: SparkListenerExecutorRemoved): Unit = + SnappyContext.removeBlockId(msg.executorId) + + override def onApplicationEnd(msg: SparkListenerApplicationEnd): Unit = + SnappyContext.clearBlockIds() + + private def handleNewExecutorJoin(bid: BlockManagerId): Unit = { + val uris = SnappySession.getJarURIs + Utils.mapExecutors[Unit](sc, () => { + ToolsCallbackInit.toolsCallback.addURIsToExecutorClassLoader(uris) + Iterator.empty + }, 30, Seq(bid)) + } +} diff --git a/cluster/src/main/scala/org/apache/spark/scheduler/cluster/SnappyEmbeddedModeClusterManager.scala b/cluster/src/main/scala/org/apache/spark/scheduler/cluster/SnappyEmbeddedModeClusterManager.scala new file mode 100644 index 0000000000..f27634b2cb --- /dev/null +++ b/cluster/src/main/scala/org/apache/spark/scheduler/cluster/SnappyEmbeddedModeClusterManager.scala @@ -0,0 +1,114 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ +package org.apache.spark.scheduler.cluster + +import io.snappydata.impl.LeadImpl +import io.snappydata.util.ServiceUtils +import io.snappydata.{Constant, Property, ServiceManager} +import org.slf4j.LoggerFactory + +import org.apache.spark.scheduler._ +import org.apache.spark.{SparkContext, SparkException} + +/** + * Snappy's cluster manager that is responsible for creating + * scheduler and scheduler backend. + */ +class SnappyEmbeddedModeClusterManager extends ExternalClusterManager { + + private val logger = LoggerFactory.getLogger(getClass) + + SnappyClusterManager.init(this) + + @volatile var schedulerBackend: SnappyCoarseGrainedSchedulerBackend = _ + + override def createTaskScheduler(sc: SparkContext, masterURL: String): TaskScheduler = { + // If there is an application that is trying to join snappy + // as lead in embedded mode, we need the locator to connect + // to the snappy distributed system and hence the locator is + // passed in masterurl itself. + if (sc.master.startsWith(Constant.SNAPPY_URL_PREFIX)) { + val locator = sc.master.replaceFirst(Constant.SNAPPY_URL_PREFIX, "").trim + + val (prop, value) = { + if (locator.indexOf("mcast-port") >= 0) { + val split = locator.split("=") + (split(0).trim, split(1).trim) + } + else if (locator.isEmpty || + locator == "null" || + !ServiceUtils.LOCATOR_URL_PATTERN.matcher(locator).matches() + ) { + throw new Exception(s"locator info not provided in the snappy embedded url ${sc.master}") + } + (Property.Locators.name, locator) + } + + logger.info(s"setting from url $prop with $value") + sc.conf.set(prop, value) + } + new SnappyTaskSchedulerImpl(sc) + } + + override def canCreate(masterURL: String): Boolean = + masterURL.startsWith("snappydata") + + override def createSchedulerBackend(sc: SparkContext, masterURL: String, + scheduler: TaskScheduler): SchedulerBackend = { + sc.addSparkListener(new BlockManagerIdListener(sc)) + schedulerBackend = new SnappyCoarseGrainedSchedulerBackend( + scheduler.asInstanceOf[TaskSchedulerImpl], sc.env.rpcEnv) + + schedulerBackend + } + + def initialize(scheduler: TaskScheduler, + backend: SchedulerBackend): Unit = { + assert(scheduler.isInstanceOf[TaskSchedulerImpl]) + val schedulerImpl = scheduler.asInstanceOf[TaskSchedulerImpl] + + schedulerImpl.initialize(backend) + + // fail if not invoked by launcher + ServiceManager.currentFabricServiceInstance match { + case _: LeadImpl => // ok + case null => throw new SparkException( + "Lead creation only supported from ServiceManager API") + case service => throw new SparkException( + s"Trying to start lead on node already booted as $service") + } + + // wait for store to initialize (acquire lead lock or go to standby) + LeadImpl.invokeLeadStart(schedulerImpl.sc.conf) + } + + def stopLead(): Unit = { + LeadImpl.invokeLeadStop() + } + +} + +object SnappyClusterManager { + + private[this] var _cm: SnappyEmbeddedModeClusterManager = _ + + def init(mgr: SnappyEmbeddedModeClusterManager): Unit = { + _cm = mgr + } + + def cm: Option[SnappyEmbeddedModeClusterManager] = Option(_cm) +} diff --git a/cluster/src/main/scala/org/apache/spark/sql/SnappySessionFactory.scala b/cluster/src/main/scala/org/apache/spark/sql/SnappySessionFactory.scala new file mode 100644 index 0000000000..3055919073 --- /dev/null +++ b/cluster/src/main/scala/org/apache/spark/sql/SnappySessionFactory.scala @@ -0,0 +1,155 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ +package org.apache.spark.sql + + +import com.pivotal.gemfirexd.internal.engine.Misc +import com.typesafe.config.{Config, ConfigException} +import io.snappydata.{Constant, ServiceManager} +import io.snappydata.impl.LeadImpl +import spark.jobserver.context.SparkContextFactory +import spark.jobserver.util.ContextURLClassLoader +import spark.jobserver.{ContextLike, SparkJobBase, SparkJobInvalid, SparkJobValid, SparkJobValidation} + +import org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator +import org.apache.spark.{SparkConf, SparkContext} +import org.apache.spark.util.SnappyUtils + + +class SnappySessionFactory extends SparkContextFactory { + + type C = SnappySession with ContextLike + + def makeContext(sparkConf: SparkConf, config: Config, contextName: String): C = { + SnappySessionFactory.newSession() + } +} + +object SnappySessionFactory { + + def updateCredentials(snc: SnappySession, jobConfig: Config, + fromStreamCtx: Boolean = false): Config = { + if (Misc.isSecurityEnabled) { + try { + // Pass job credentials to snappy session + val username = jobConfig.getString("snappydata.user") + val password = jobConfig.getString("snappydata.password") + if (fromStreamCtx) { + val old = snc.sqlContext.getConf(com.pivotal.gemfirexd.Attribute.USERNAME_ATTR, "") + if (!old.isEmpty && !old.equalsIgnoreCase(username)) { + throw new UnsupportedOperationException("Cannot submit a streaming job using an " + + "existing streaming context and a different username, when cluster is secure.") + } + } + snc.sqlContext.setConf(com.pivotal.gemfirexd.Attribute.USERNAME_ATTR, username) + snc.sqlContext.setConf(com.pivotal.gemfirexd.Attribute.PASSWORD_ATTR, password) + snc.sqlContext.setConf(Constant.STORE_PROPERTY_PREFIX + + com.pivotal.gemfirexd.Attribute.USERNAME_ATTR, "*****") + // Clear admin user/password from jobConfig before passing it to user job. + cleanJobConfig(jobConfig) + } catch { + case _: ConfigException.Missing => jobConfig // Config not found + } + } else { + jobConfig + } + } + + def cleanJobConfig(c: Config): Config = { + c.withoutPath(Constant.STORE_PROPERTY_PREFIX + com.pivotal.gemfirexd.Attribute.USERNAME_ATTR) + .withoutPath(Constant.STORE_PROPERTY_PREFIX + com.pivotal.gemfirexd.Attribute.PASSWORD_ATTR) + .withoutPath(com.pivotal.gemfirexd.Attribute.USERNAME_ATTR) + .withoutPath(com.pivotal.gemfirexd.Attribute.PASSWORD_ATTR) + .withoutPath("gemfire.sys.security-password") + .withoutPath("javax.jdo.option.ConnectionURL") + // Remove snappydata properties file path when available. + } + + protected def newSession(): SnappySession with ContextLike = + new SnappySession(SparkContext.getActive.get) with ContextLike { + + override def isValidJob(job: SparkJobBase): Boolean = job.isInstanceOf[SnappySQLJob] + + // Calling this method from JobKill. + override def stop(): Unit = { + // Stopping all StreamingQueries started by the session. + // If it's a normal job there won't be any streaming query and it will be a no -op. + this.sessionState.streamingQueryManager.active.foreach(q => q.stop()) + } + + // Callback added to provide our classloader to load job classes. + // If Job class directly refers to any jars which has been provided + // by install_jars, this can help. + override def makeClassLoader(parent: ContextURLClassLoader): ContextURLClassLoader = { + val cl = SnappyUtils.getSnappyContextURLClassLoader(parent) + val lead = ServiceManager.getLeadInstance.asInstanceOf[LeadImpl] + val loader = lead.urlclassloader + if (loader != null) { + loader.getURLs.foreach(u => { + cl.addURL(u) + }) + } + cl + } + } +} + + +trait SnappySQLJob extends SparkJobBase { + type C = Any + + final override def validate(sc: C, config: Config): SparkJobValidation = { + SnappyJobValidate.validate(isValidJob(sc.asInstanceOf[SnappySession], + SnappySessionFactory.updateCredentials(sc.asInstanceOf[SnappySession], config))) + } + + final override def runJob(sc: C, jobConfig: Config): Any = { + CodeGenerator.jobClassLoader.set(Thread.currentThread().getContextClassLoader) + val snSession = sc.asInstanceOf[SnappySession] + val sparkContext = snSession.sparkContext + try { + SnappyUtils.setSessionDependencies(sparkContext, + appName = this.getClass.getCanonicalName, + classLoader = Thread.currentThread().getContextClassLoader) + runSnappyJob(snSession, SnappySessionFactory.updateCredentials(snSession, jobConfig)) + } finally { + SnappyUtils.clearSessionDependencies(sparkContext) + } + } + + def isValidJob(sc: SnappySession, config: Config): SnappyJobValidation + + def runSnappyJob(sc: SnappySession, jobConfig: Config): Any +} + +abstract class JavaSnappySQLJob extends SnappySQLJob + +object SnappyJobValidate { + def validate(status: SnappyJobValidation): SparkJobValidation = { + status match { + case _: SnappyJobValid => SparkJobValid + case j: SnappyJobInvalid => SparkJobInvalid(j.reason) + case _ => SparkJobInvalid("isValid method is not correct") + } + } +} + +trait SnappyJobValidation + +case class SnappyJobValid() extends SnappyJobValidation + +case class SnappyJobInvalid(reason: String) extends SnappyJobValidation diff --git a/cluster/src/main/scala/org/apache/spark/sql/hive/thriftserver/SnappyHiveThriftServer2.scala b/cluster/src/main/scala/org/apache/spark/sql/hive/thriftserver/SnappyHiveThriftServer2.scala new file mode 100644 index 0000000000..07cbcee7bb --- /dev/null +++ b/cluster/src/main/scala/org/apache/spark/sql/hive/thriftserver/SnappyHiveThriftServer2.scala @@ -0,0 +1,139 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ + +package org.apache.spark.sql.hive.thriftserver + +import java.net.InetAddress + +import org.apache.hadoop.hive.ql.metadata.Hive +import org.apache.hive.service.cli.thrift.ThriftCLIService +import org.apache.log4j.{Level, LogManager} + +import org.apache.spark.Logging +import org.apache.spark.sql.hive.client.HiveClientImpl +import org.apache.spark.sql.hive.thriftserver.HiveThriftServer2.HiveThriftServer2Listener +import org.apache.spark.sql.hive.thriftserver.ui.ThriftServerTab +import org.apache.spark.sql.hive.{HiveUtils, SnappyHiveExternalCatalog} +import org.apache.spark.sql.{SnappyContext, SnappySession, SparkSession} + +/** + * Launch an embedded hive thrift server supporting SnappySession instead of HiveSessionState. + */ +object SnappyHiveThriftServer2 extends Logging { + + def start(useHiveSession: Boolean): HiveThriftServer2 = { + logInfo(s"Starting HiveServer2 using ${if (useHiveSession) "hive" else "snappy"} session") + + val sc = SnappyContext.globalSparkContext match { + case null => throw new IllegalStateException("No SparkContext available") + case context => context + } + val conf = sc.conf + val sparkSession = if (useHiveSession) { + SparkSession.builder().config(conf).enableHiveSupport().getOrCreate() + } else new SnappySession(sc) + SparkSQLEnv.sqlContext = sparkSession.sqlContext + SparkSQLEnv.sparkContext = sc + sparkSession.conf.set("spark.sql.hive.version", HiveUtils.hiveExecutionVersion) + + // New executionHive is used to get the HiveServer2 configuration. When SnappySession + // is being used then only the hive server2 settings are copied from it while the + // full conf used is from the internal hive client from SnappySharedState. + + // avoid meta-store init warnings + val rootLogger = LogManager.getRootLogger + val metaLogger = LogManager.getLogger("org.apache.hadoop.hive.metastore.MetaStoreDirectSql") + val currentRootLevel = rootLogger.getLevel + val currentMetaLevel = metaLogger.getLevel + rootLogger.setLevel(Level.ERROR) + metaLogger.setLevel(Level.ERROR) + val externalCatalog = SnappyHiveExternalCatalog.getExistingInstance + val hiveConf = try { + val executionHive = HiveUtils.newClientForExecution(conf, + sparkSession.sessionState.newHadoopConf()) + val serverConf = executionHive.conf + // close the temporary hive client if present + val hiveClient = executionHive.clientLoader.cachedHive + if (hiveClient != null) { + Hive.set(hiveClient.asInstanceOf[Hive]) + Hive.closeCurrent() + executionHive.clientLoader.cachedHive = null + } + if (useHiveSession) serverConf + else { + // use internal hive conf adding hive.server2 configurations + val conf = externalCatalog.client().asInstanceOf[HiveClientImpl].conf + val itr = serverConf.iterator() + while (itr.hasNext) { + val entry = itr.next() + if (entry.getKey.startsWith("hive.server2")) { + conf.set(entry.getKey, entry.getValue) + } + } + conf + } + } finally { + rootLogger.setLevel(currentRootLevel) + metaLogger.setLevel(currentMetaLevel) + } + + val server = new HiveThriftServer2(SparkSQLEnv.sqlContext) + externalCatalog.withHiveExceptionHandling({ + server.init(hiveConf) + server.start() + getHostPort(server) match { + case None => logInfo("Started HiveServer2") + case Some((host, port)) => logInfo(s"Started HiveServer2 on $host[$port]") + } + HiveThriftServer2.listener = new HiveThriftServer2Listener( + server, SparkSQLEnv.sqlContext.conf) + sc.addSparkListener(HiveThriftServer2.listener) + }, handleDisconnects = false) + server + } + + def getHostPort(server: HiveThriftServer2): Option[(InetAddress, Int)] = { + val itr = server.getServices.iterator() + while (itr.hasNext) { + itr.next() match { + case service: ThriftCLIService => + val address = service.getServerIPAddress + val port = service.getPortNumber + return Some(address -> port) + case _ => + } + } + None + } + + def attachUI(): Unit = { + if (SparkSQLEnv.sqlContext != null) { + HiveThriftServer2.uiTab = Some(new ThriftServerTab(SparkSQLEnv.sparkContext)) + } + } + + def close(): Unit = { + SparkSQLEnv.sqlContext = null + SparkSQLEnv.sparkContext = null + HiveThriftServer2.uiTab match { + case Some(ui) => ui.detach() + case _ => + } + HiveThriftServer2.uiTab = null + HiveThriftServer2.listener = null + } +} diff --git a/cluster/src/main/scala/org/apache/spark/sql/streaming/SnappyStreamingContextFactory.scala b/cluster/src/main/scala/org/apache/spark/sql/streaming/SnappyStreamingContextFactory.scala new file mode 100644 index 0000000000..580568d98f --- /dev/null +++ b/cluster/src/main/scala/org/apache/spark/sql/streaming/SnappyStreamingContextFactory.scala @@ -0,0 +1,101 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ +package org.apache.spark.sql.streaming + +import com.typesafe.config.{Config, ConfigException} +import io.snappydata.ServiceManager +import io.snappydata.impl.LeadImpl +import spark.jobserver.context.SparkContextFactory +import spark.jobserver.{ContextLike, SparkJobBase, SparkJobValidation} +import org.apache.spark.{SparkConf, SparkContext} +import org.apache.spark.sql.{SnappyJobValidate, SnappyJobValidation, SnappySessionFactory} +import org.apache.spark.streaming.{JavaSnappyStreamingJob, Milliseconds, SnappyStreamingContext} +import org.apache.spark.util.SnappyUtils +import spark.jobserver.util.ContextURLClassLoader + +abstract class SnappyStreamingJob extends SparkJobBase { + override type C = SnappyStreamingContext + + final override def validate(sc: C, config: Config): SparkJobValidation = { + SnappyJobValidate.validate(isValidJob(sc.asInstanceOf[SnappyStreamingContext], + SnappySessionFactory.updateCredentials(sc.asInstanceOf[SnappyStreamingContext] + .snappySession, config, fromStreamCtx = true))) + } + + final override def runJob(sc: C, jobConfig: Config): Any = { + val snc = sc.asInstanceOf[SnappyStreamingContext] + try { + SnappyUtils.setSessionDependencies( + snc.sparkContext, + appName = this.getClass.getCanonicalName, + classLoader = Thread.currentThread().getContextClassLoader) + + runSnappyJob(snc, SnappySessionFactory.updateCredentials(snc.snappySession, jobConfig, + fromStreamCtx = true)) + } finally { + } + } + + def isValidJob(sc: SnappyStreamingContext, config: Config): SnappyJobValidation + + def runSnappyJob(sc: SnappyStreamingContext, jobConfig: Config): Any + +} + +class SnappyStreamingContextFactory extends SparkContextFactory { + + override type C = SnappyStreamingContext with ContextLike + + override def makeContext(sparkConf: SparkConf, config: Config, contextName: String): C = { + val interval = config.getInt("streaming.batch_interval") + + new SnappyStreamingContext(SparkContext.getActive.get, + Milliseconds(interval)) with ContextLike { + + override def isValidJob(job: SparkJobBase): Boolean = + job.isInstanceOf[SnappyStreamingJob] || job.isInstanceOf[JavaSnappyStreamingJob] + + override def stop(): Unit = { + try { + val stopGracefully = config.getBoolean("streaming.stopGracefully") + SnappyStreamingContext.getActive + .foreach(c => c.stop(stopSparkContext = false, stopGracefully = stopGracefully)) + } catch { + case _: ConfigException.Missing => SnappyStreamingContext.getActive + .foreach(c => c.stop(stopSparkContext = false, stopGracefully = true)) + } finally { + SnappyUtils.clearSessionDependencies(sparkContext) + } + } + + // Callback added to provide our classloader to load job classes. + // If Job class directly refers to any jars which has been provided + // by install_jars, this can help. + override def makeClassLoader(parent: ContextURLClassLoader): ContextURLClassLoader = { + val cl = SnappyUtils.getSnappyContextURLClassLoader(parent) + val lead = ServiceManager.getLeadInstance.asInstanceOf[LeadImpl] + val loader = lead.urlclassloader + if (loader != null) { + loader.getURLs.foreach(u => { + cl.addURL(u) + }) + } + cl + } + } + } +} diff --git a/cluster/src/main/scala/org/apache/spark/status/api/v1/AllExternalTablesResource.scala b/cluster/src/main/scala/org/apache/spark/status/api/v1/AllExternalTablesResource.scala new file mode 100644 index 0000000000..69ae40a7f6 --- /dev/null +++ b/cluster/src/main/scala/org/apache/spark/status/api/v1/AllExternalTablesResource.scala @@ -0,0 +1,32 @@ +/* + * Changes for SnappyData data platform. + * + * Portions Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ +package org.apache.spark.status.api.v1 + +import javax.ws.rs.{GET, Produces} +import javax.ws.rs.core.MediaType + + +@Produces(Array(MediaType.APPLICATION_JSON)) +private[v1] class AllExternalTablesResource { + @GET + def tablesList(): Seq[ExternalTableSummary] = { + // get all table stats details + TableDetails.getAllExternalTablesInfo + } +} diff --git a/cluster/src/main/scala/org/apache/spark/status/api/v1/AllMembersResource.scala b/cluster/src/main/scala/org/apache/spark/status/api/v1/AllMembersResource.scala new file mode 100644 index 0000000000..b86264b530 --- /dev/null +++ b/cluster/src/main/scala/org/apache/spark/status/api/v1/AllMembersResource.scala @@ -0,0 +1,32 @@ +/* + * Changes for SnappyData data platform. + * + * Portions Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ +package org.apache.spark.status.api.v1 + +import javax.ws.rs.{GET, Produces} +import javax.ws.rs.core.MediaType + + +@Produces(Array(MediaType.APPLICATION_JSON)) +private[v1] class AllMembersResource() { + @GET + def membersList(): Seq[MemberSummary] = { + // get all members stats details + MemberDetails.getAllMembersInfo + } +} diff --git a/cluster/src/main/scala/org/apache/spark/status/api/v1/AllTablesResource.scala b/cluster/src/main/scala/org/apache/spark/status/api/v1/AllTablesResource.scala new file mode 100644 index 0000000000..f50d74caf7 --- /dev/null +++ b/cluster/src/main/scala/org/apache/spark/status/api/v1/AllTablesResource.scala @@ -0,0 +1,32 @@ +/* + * Changes for SnappyData data platform. + * + * Portions Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ +package org.apache.spark.status.api.v1 + +import javax.ws.rs.{GET, Produces} +import javax.ws.rs.core.MediaType + + +@Produces(Array(MediaType.APPLICATION_JSON)) +private[v1] class AllTablesResource { + @GET + def tablesList(): Seq[TableSummary] = { + // get all table stats details + TableDetails.getAllTablesInfo + } +} diff --git a/cluster/src/main/scala/org/apache/spark/status/api/v1/ClusterDetails.scala b/cluster/src/main/scala/org/apache/spark/status/api/v1/ClusterDetails.scala new file mode 100644 index 0000000000..07183236f6 --- /dev/null +++ b/cluster/src/main/scala/org/apache/spark/status/api/v1/ClusterDetails.scala @@ -0,0 +1,68 @@ +/* + * Changes for SnappyData data platform. + * + * Portions Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ +package org.apache.spark.status.api.v1 + + +import scala.collection.mutable +import scala.collection.mutable.ListBuffer + +import com.pivotal.gemfirexd.internal.engine.ui.ClusterStatistics + +object ClusterDetails { + def getClusterDetailsInfo: Seq[ClusterSummary] = { + val clusterBuff: ListBuffer[ClusterSummary] = ListBuffer.empty[ClusterSummary] + + val csInstance = ClusterStatistics.getInstance() + + val coresInfo = mutable.HashMap.empty[String, Int] + coresInfo += ("totalCores" -> csInstance.getTotalCPUCores) + + val clusterInfo = mutable.HashMap.empty[String, Any] + clusterInfo += ("coresInfo" -> coresInfo); + clusterInfo += ("timeLine" -> + csInstance.getUsageTrends(ClusterStatistics.TREND_TIMELINE)); + clusterInfo += ("cpuUsageTrend" -> + csInstance.getUsageTrends(ClusterStatistics.TREND_CPU_USAGE)); + clusterInfo += ("jvmUsageTrend" -> + csInstance.getUsageTrends(ClusterStatistics.TREND_JVM_HEAP_USAGE)); + clusterInfo += ("heapUsageTrend" -> + csInstance.getUsageTrends(ClusterStatistics.TREND_HEAP_USAGE)); + clusterInfo += ("heapStorageUsageTrend" -> + csInstance.getUsageTrends(ClusterStatistics.TREND_HEAP_STORAGE_USAGE)); + clusterInfo += ("heapExecutionUsageTrend" -> + csInstance.getUsageTrends(ClusterStatistics.TREND_HEAP_EXECUTION_USAGE)); + clusterInfo += ("offHeapUsageTrend" -> + csInstance.getUsageTrends(ClusterStatistics.TREND_OFFHEAP_USAGE)); + clusterInfo += ("offHeapStorageUsageTrend" -> + csInstance.getUsageTrends(ClusterStatistics.TREND_OFFHEAP_STORAGE_USAGE)); + clusterInfo += ("offHeapExecutionUsageTrend" -> + csInstance.getUsageTrends(ClusterStatistics.TREND_OFFHEAP_EXECUTION_USAGE)); + clusterInfo += ("aggrMemoryUsageTrend" -> + csInstance.getUsageTrends(ClusterStatistics.TREND_AGGR_MEMORY_USAGE)); + clusterInfo += ("diskStoreDiskSpaceTrend" -> + csInstance.getUsageTrends(ClusterStatistics.TREND_DISKSTORE_DISKSPACE_USAGE)); + + val membersInfo = MemberDetails.getAllMembersInfo + val tablesInfo = TableDetails.getAllTablesInfo + val extTablesInfo = TableDetails.getAllExternalTablesInfo + + clusterBuff += new ClusterSummary(clusterInfo, membersInfo, tablesInfo, extTablesInfo) + clusterBuff.toList + } +} diff --git a/cluster/src/main/scala/org/apache/spark/status/api/v1/ClusterInfoResource.scala b/cluster/src/main/scala/org/apache/spark/status/api/v1/ClusterInfoResource.scala new file mode 100644 index 0000000000..3dd4f9117a --- /dev/null +++ b/cluster/src/main/scala/org/apache/spark/status/api/v1/ClusterInfoResource.scala @@ -0,0 +1,31 @@ +/* + * Changes for SnappyData data platform. + * + * Portions Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ +package org.apache.spark.status.api.v1 + +import javax.ws.rs.{GET, Produces} +import javax.ws.rs.core.MediaType + +@Produces(Array(MediaType.APPLICATION_JSON)) +private[v1] class ClusterInfoResource { + @GET + def clusterInfo(): Seq[ClusterSummary] = { + // get all members stats details + ClusterDetails.getClusterDetailsInfo + } +} diff --git a/cluster/src/main/scala/org/apache/spark/status/api/v1/MemberDetails.scala b/cluster/src/main/scala/org/apache/spark/status/api/v1/MemberDetails.scala new file mode 100644 index 0000000000..85a08a1b12 --- /dev/null +++ b/cluster/src/main/scala/org/apache/spark/status/api/v1/MemberDetails.scala @@ -0,0 +1,141 @@ +/* + * Changes for SnappyData data platform. + * + * Portions Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ +package org.apache.spark.status.api.v1 + +import com.pivotal.gemfirexd.internal.engine.ui.MemberStatistics +import io.snappydata.SnappyTableStatsProviderService + +object MemberDetails { + + def getAllMembersInfo: Seq[MemberSummary] = { + val allMembers = SnappyTableStatsProviderService.getService.getMembersStatsFromService + allMembers.mapValues(mem => {getMemberSummary(mem)}).values.toList + } + + def getMembersInfo(memId: String): Seq[MemberSummary] = { + val allMembers = SnappyTableStatsProviderService.getService.getMembersStatsFromService + allMembers.filter(_._2.getId.equalsIgnoreCase(memId)) + .mapValues(mem => {getMemberSummary(mem)}).values.toList + } + + def getMemberSummary(memberDetails: MemberStatistics): MemberSummary = { + + val status = memberDetails.getStatus + /* + val statusImgUri = if (status.toString.toLowerCase.equals("running")) { + "/static/snappydata/running-status-icon-20x19.png" + } else { + "/static/snappydata/stopped-status-icon-20x19.png" + } + */ + + val memberId = memberDetails.getId + + val nameOrId = { + if (memberDetails.getName.isEmpty + || memberDetails.getName.equalsIgnoreCase("NA")) { + memberDetails.getId + } else { + memberDetails.getName + } + } + + val host = memberDetails.getHost + val fullDirName = memberDetails.getUserDir + val shortDirName = fullDirName.substring( + fullDirName.lastIndexOf(System.getProperty("file.separator")) + 1) + val logFile = memberDetails.getLogFile + val processId = memberDetails.getProcessId + + val distStoreUUID = memberDetails.getDiskStoreUUID + val distStoreName = memberDetails.getDiskStoreName + + val isLead: Boolean = memberDetails.isLead + val isActiveLead: Boolean = memberDetails.isLeadActive + val isLocator: Boolean = memberDetails.isLocator + val isDataServer: Boolean = memberDetails.isDataServer + val memberType = { + if (isLead || isActiveLead) { + "LEAD" + } else if (isLocator) { + "LOCATOR" + } else if (isDataServer) { + "DATA SERVER" + } else { + "CONNECTOR" + } + } + + val cores = memberDetails.getCores + val cpuActive = memberDetails.getCpuActive + val clients = memberDetails.getClientsCount + + val heapStoragePoolUsed = memberDetails.getHeapStoragePoolUsed + val heapStoragePoolSize = memberDetails.getHeapStoragePoolSize + val heapExecutionPoolUsed = memberDetails.getHeapExecutionPoolUsed + val heapExecutionPoolSize = memberDetails.getHeapExecutionPoolSize + + val offHeapStoragePoolUsed = memberDetails.getOffHeapStoragePoolUsed + val offHeapStoragePoolSize = memberDetails.getOffHeapStoragePoolSize + val offHeapExecutionPoolUsed = memberDetails.getOffHeapExecutionPoolUsed + val offHeapExecutionPoolSize = memberDetails.getOffHeapExecutionPoolSize + + val heapMemorySize = memberDetails.getHeapMemorySize + val heapMemoryUsed = memberDetails.getHeapMemoryUsed + val offHeapMemorySize = memberDetails.getOffHeapMemorySize + val offHeapMemoryUsed = memberDetails.getOffHeapMemoryUsed + + val jvmHeapMax = memberDetails.getJvmMaxMemory + val jvmHeapTotal = memberDetails.getJvmTotalMemory + val jvmHeapUsed = memberDetails.getJvmUsedMemory + val jvmHeapFree = memberDetails.getJvmFreeMemory + + val diskStoreDiskSpace = memberDetails.getDiskStoreDiskSpace + + val timeLine = memberDetails.getUsageTrends(MemberStatistics.TREND_TIMELINE) + val cpuUsageTrends = memberDetails.getUsageTrends(MemberStatistics.TREND_CPU_USAGE) + val jvmUsageTrends = memberDetails.getUsageTrends(MemberStatistics.TREND_JVM_HEAP_USAGE) + val heapUsageTrends = memberDetails.getUsageTrends(MemberStatistics.TREND_HEAP_USAGE) + val heapStorageUsageTrends = memberDetails.getUsageTrends( + MemberStatistics.TREND_HEAP_STORAGE_USAGE) + val heapExecutionUsageTrends = memberDetails.getUsageTrends( + MemberStatistics.TREND_HEAP_EXECUTION_USAGE) + val offHeapUsageTrends = memberDetails.getUsageTrends(MemberStatistics.TREND_OFFHEAP_USAGE) + val offHeapStorageUsageTrends = memberDetails.getUsageTrends( + MemberStatistics.TREND_OFFHEAP_STORAGE_USAGE) + val offHeapExecutionUsageTrends = memberDetails.getUsageTrends( + MemberStatistics.TREND_OFFHEAP_EXECUTION_USAGE) + val aggrMemoryUsageTrends = memberDetails.getUsageTrends( + MemberStatistics.TREND_AGGR_MEMORY_USAGE) + val diskStoreDiskSpaceTrend = memberDetails.getUsageTrends( + MemberStatistics.TREND_DISKSTORE_DISKSPACE_USAGE); + + new MemberSummary(memberId, nameOrId.toString, host, shortDirName, fullDirName, + logFile, processId, distStoreUUID, distStoreName, status, memberType, isLocator, + isDataServer, isLead, isActiveLead, cores, + cpuActive, clients, jvmHeapMax, jvmHeapUsed, jvmHeapTotal, jvmHeapFree, heapStoragePoolUsed, + heapStoragePoolSize, heapExecutionPoolUsed, heapExecutionPoolSize, heapMemorySize, + heapMemoryUsed, offHeapStoragePoolUsed, offHeapStoragePoolSize, offHeapExecutionPoolUsed, + offHeapExecutionPoolSize, offHeapMemorySize, offHeapMemoryUsed, diskStoreDiskSpace, + timeLine, cpuUsageTrends, jvmUsageTrends, heapUsageTrends, heapStorageUsageTrends, + heapExecutionUsageTrends, offHeapUsageTrends, offHeapStorageUsageTrends, + offHeapExecutionUsageTrends, aggrMemoryUsageTrends, diskStoreDiskSpaceTrend) + } + +} diff --git a/cluster/src/main/scala/org/apache/spark/status/api/v1/MembersDetailsResource.scala b/cluster/src/main/scala/org/apache/spark/status/api/v1/MembersDetailsResource.scala new file mode 100644 index 0000000000..132332f5c3 --- /dev/null +++ b/cluster/src/main/scala/org/apache/spark/status/api/v1/MembersDetailsResource.scala @@ -0,0 +1,37 @@ +/* + * Changes for SnappyData data platform. + * + * Portions Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ +package org.apache.spark.status.api.v1 + +import javax.ws.rs.{GET, PathParam, Produces} +import javax.ws.rs.core.MediaType + + +@Produces(Array(MediaType.APPLICATION_JSON)) +private[v1] class MembersDetailsResource() { + @GET + def membersDetails(@PathParam("memberId") memberId: String): Seq[MemberSummary] = { + // get members stats details + val memInfo = MemberDetails.getMembersInfo(memberId) + if(memInfo.length == 0){ + throw new NotFoundException("Unknown Member: " + memberId) + } else { + memInfo + } + } +} diff --git a/cluster/src/main/scala/org/apache/spark/status/api/v1/SnappyApiRootResource.scala b/cluster/src/main/scala/org/apache/spark/status/api/v1/SnappyApiRootResource.scala new file mode 100644 index 0000000000..e7502c41b5 --- /dev/null +++ b/cluster/src/main/scala/org/apache/spark/status/api/v1/SnappyApiRootResource.scala @@ -0,0 +1,83 @@ +/* + * Changes for SnappyData data platform. + * + * Portions Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ +package org.apache.spark.status.api.v1 + +import javax.ws.rs._ + +import org.eclipse.jetty.servlet.{ServletContextHandler, ServletHolder} +import org.glassfish.jersey.server.ServerProperties +import org.glassfish.jersey.servlet.ServletContainer + + +/** + * Main entry point for serving snappy application data as json, using JAX-RS. + * + * Each resource should have endpoints that return **public** classes defined in snappy-api.scala. + * Mima binary compatibility checks ensure that we don't inadvertently make changes that break the + * api. + * The returned objects are automatically converted to json by jackson with JacksonMessageWriter. + * In addition, there are a number of tests in HistoryServerSuite that compare the json to "golden + * files". Any changes and additions should be reflected there as well -- see the notes in + * HistoryServerSuite. + */ + +// todo : need to add tests to test below apis + +@Path("/services") +private[v1] class SnappyApiRootResource extends ApiRequestContext { + + @Path("clusterinfo") + def getClusterInfo(): ClusterInfoResource = { + new ClusterInfoResource + } + + @Path("allmembers") + def getAllMembers(): AllMembersResource = { + new AllMembersResource + } + + @Path("memberdetails/{memberId}") + def getMemberDetails(): MembersDetailsResource = { + new MembersDetailsResource + } + + @Path("alltables") + def getAllTables(): AllTablesResource = { + new AllTablesResource + } + + @Path("allexternaltables") + def getAllExternalTables(): AllExternalTablesResource = { + new AllExternalTablesResource + } + +} + +private[spark] object SnappyApiRootResource { + + def getServletHandler(uiRoot: UIRoot): ServletContextHandler = { + val jerseyContext = new ServletContextHandler(ServletContextHandler.NO_SESSIONS) + jerseyContext.setContextPath("/snappy-api") + val holder: ServletHolder = new ServletHolder(classOf[ServletContainer]) + holder.setInitParameter(ServerProperties.PROVIDER_PACKAGES, "org.apache.spark.status.api.v1") + UIRootFromServletContext.setUiRoot(jerseyContext, uiRoot) + jerseyContext.addServlet(holder, "/*") + jerseyContext + } +} \ No newline at end of file diff --git a/cluster/src/main/scala/org/apache/spark/status/api/v1/TableDetails.scala b/cluster/src/main/scala/org/apache/spark/status/api/v1/TableDetails.scala new file mode 100644 index 0000000000..744c4cec36 --- /dev/null +++ b/cluster/src/main/scala/org/apache/spark/status/api/v1/TableDetails.scala @@ -0,0 +1,64 @@ +/* + * Changes for SnappyData data platform. + * + * Portions Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ +package org.apache.spark.status.api.v1 + +import io.snappydata.SnappyTableStatsProviderService + +object TableDetails { + + def getAllTablesInfo: Seq[TableSummary] = { + + val tableBuff = SnappyTableStatsProviderService.getService.getAllTableStatsFromService + + tableBuff.mapValues(table => { + val storageModel = { + if (table.isColumnTable) { + "COLUMN" + } else { + "ROW" + } + } + + val distributionType = { + if (table.isReplicatedTable){ + "REPLICATE" + } else { + "PARTITION" + } + } + + new TableSummary(table.getTableName, storageModel, distributionType, + table.isColumnTable, table.isReplicatedTable, table.getRowCount, table.getSizeInMemory, + table.getSizeSpillToDisk, table.getTotalSize, table.getBucketCount, + table.getRedundancy, table.isRedundancyImpaired, table.isAnyBucketLost) + }).values.toList + + } + + def getAllExternalTablesInfo: Seq[ExternalTableSummary] = { + + val externalTableBuff = + SnappyTableStatsProviderService.getService.getAllExternalTableStatsFromService + + externalTableBuff.mapValues(table => { + new ExternalTableSummary(table.getTableFullyQualifiedName, table.getProvider, + table.getDataSourcePath) + }).values.toList + } +} diff --git a/cluster/src/main/scala/org/apache/spark/status/api/v1/snappyapi.scala b/cluster/src/main/scala/org/apache/spark/status/api/v1/snappyapi.scala new file mode 100644 index 0000000000..86ab23421e --- /dev/null +++ b/cluster/src/main/scala/org/apache/spark/status/api/v1/snappyapi.scala @@ -0,0 +1,103 @@ +/* + * Changes for SnappyData data platform. + * + * Portions Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ +package org.apache.spark.status.api.v1 + +import java.util.UUID + +import scala.collection.mutable + + + +class ClusterSummary private[spark]( + val clusterInfo: mutable.HashMap[String, Any], + val membersInfo: Seq[MemberSummary], + val tablesInfo: Seq[TableSummary], + val externalTablesInfo: Seq[ExternalTableSummary] +) + +class MemberSummary private[spark]( + val id: String, + val name: String, + val host: String, + val userDir: String, + val userDirFullPath: String, + val logFile: String, + val processId: String, + val diskStoreUUID: UUID, + val diskStoreName: String, + val status: String, + val memberType: String, + val isLocator: Boolean, + val isDataServer: Boolean, + val isLead: Boolean, + val isActiveLead: Boolean, + val cores: Int, + val cpuActive: Int, + val clients: Long, + val maxMemory: Long, + val usedMemory: Long, + val totalMemory: Long, + val freeMemory: Long, + val heapStoragePoolUsed: Long, + val heapStoragePoolSize: Long, + val heapExecutionPoolUsed: Long, + val heapExecutionPoolSize: Long, + val heapMemorySize: Long, + val heapMemoryUsed: Long, + val offHeapStoragePoolUsed: Long, + val offHeapStoragePoolSize: Long, + val offHeapExecutionPoolUsed: Long, + val offHeapExecutionPoolSize: Long, + val offHeapMemorySize: Long, + val offHeapMemoryUsed: Long, + val diskStoreDiskSpace: Long, + val timeLine: Array[Object], + val cpuUsageTrend: Array[Object], + val jvmUsageTrend: Array[Object], + val heapUsageTrend: Array[Object], + val heapStorageUsageTrend: Array[Object], + val heapExecutionUsageTrend: Array[Object], + val offHeapUsageTrend: Array[Object], + val offHeapStorageUsageTrend: Array[Object], + val offHeapExecutionUsageTrend: Array[Object], + val aggrMemoryUsageTrend: Array[Object], + val diskStoreDiskSpaceTrend: Array[Object] +) + +class TableSummary private[spark]( + val tableName: String, + val storageModel: String, + val distributionType: String, + val isColumnTable: Boolean, + val isReplicatedTable: Boolean, + val rowCount: Long, + val sizeInMemory: Long, + val sizeSpillToDisk: Long, + val totalSize: Long, + val bucketCount: Int, + val redundancy: Int, + val redundancyImpaired: Boolean, + val isAnyBucketLost: Boolean +) + +class ExternalTableSummary private[spark]( + val tableFQName: String, + val provider: String, + val source: String +) diff --git a/cluster/src/main/scala/org/apache/spark/ui/SnappyBasicAuthenticator.scala b/cluster/src/main/scala/org/apache/spark/ui/SnappyBasicAuthenticator.scala new file mode 100644 index 0000000000..db2a169495 --- /dev/null +++ b/cluster/src/main/scala/org/apache/spark/ui/SnappyBasicAuthenticator.scala @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ +package org.apache.spark.ui + +import java.util +import java.util.Properties +import javax.security.auth.Subject +import javax.servlet.ServletRequest +import javax.servlet.http.HttpServletRequest + +import scala.collection.JavaConverters._ + +import com.pivotal.gemfirexd.Attribute +import com.pivotal.gemfirexd.internal.engine.distributed.utils.GemFireXDUtils +import com.pivotal.gemfirexd.internal.engine.{GfxdConstants, Misc} +import com.pivotal.gemfirexd.internal.shared.common.sanity.SanityManager +import org.eclipse.jetty.security.DefaultUserIdentity +import org.eclipse.jetty.security.authentication.BasicAuthenticator +import org.eclipse.jetty.server.{Request, UserIdentity} +import templates.security.UsernamePrincipal + +import org.apache.spark.Logging + +class SnappyBasicAuthenticator extends BasicAuthenticator with Logging { + + override def login(username: String, password: Any, request: ServletRequest): UserIdentity = { + + val props = new Properties() + props.setProperty(Attribute.USERNAME_ATTR, username) + props.setProperty(Attribute.PASSWORD_ATTR, password.toString) + + val memStore = Misc.getMemStoreBooting + val result = memStore.getDatabase.getAuthenticationService.authenticate( + memStore.getDatabaseName, props) + + if (result != null) { + val msg = s"ACCESS DENIED, user [$username]. $result" + if (GemFireXDUtils.TraceAuthentication) { + SanityManager.DEBUG_PRINT(GfxdConstants.TRACE_AUTHENTICATION, msg) + } + null + } else { + val principal = new UsernamePrincipal(username) + val response = request match { + case r: Request => r.getResponse + case _ => null + } + this.renewSession(request.asInstanceOf[HttpServletRequest], response) + + new DefaultUserIdentity(new Subject(false, Set(principal).asJava, new util.HashSet(), + new util.HashSet()), principal, JettyUtils.snappyDataRoles) + } + } +} \ No newline at end of file diff --git a/cluster/src/main/scala/org/apache/spark/ui/SnappyDashboardPage.scala b/cluster/src/main/scala/org/apache/spark/ui/SnappyDashboardPage.scala new file mode 100644 index 0000000000..c5339a314c --- /dev/null +++ b/cluster/src/main/scala/org/apache/spark/ui/SnappyDashboardPage.scala @@ -0,0 +1,494 @@ +/* + * Changes for SnappyData data platform. + * + * Portions Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ + +package org.apache.spark.ui + +import java.text.SimpleDateFormat +import java.util.Date +import javax.servlet.http.HttpServletRequest + +import scala.collection.mutable +import scala.xml.Node + +import com.pivotal.gemfirexd.internal.engine.ui.{SnappyExternalTableStats, SnappyRegionStats} +import io.snappydata.SnappyTableStatsProviderService + +import org.apache.spark.internal.Logging + +private[ui] class SnappyDashboardPage (parent: SnappyDashboardTab) + extends WebUIPage("") with Logging { + + override def render(request: HttpServletRequest): Seq[Node] = { + + val pageHeaderText: String = SnappyDashboardPage.pageHeaderText + + // Generate Pages HTML + val pageTitleNode = createPageTitleNode(pageHeaderText) + + val clusterStatsDetails = { + val clustersStatsTitle = createTitleNode(SnappyDashboardPage.clusterStatsTitle, + SnappyDashboardPage.clusterStatsTitleTooltip, + "clustersStatsTitle", + true) + val clusterDetails = clusterStats + + clustersStatsTitle ++ clusterDetails + } + + val membersStatsDetails = { + val membersStatsTitle = createTitleNode(SnappyDashboardPage.membersStatsTitle, + SnappyDashboardPage.membersStatsTitleTooltip, + "membersStatsTitle", + true) + val membersStatsTable = memberStats + + membersStatsTitle ++ membersStatsTable + } + + val tablesStatsDetails = { + val tablesStatsTitle = createTitleNode(SnappyDashboardPage.tablesStatsTitle, + SnappyDashboardPage.tablesStatsTitleTooltip, + "tablesStatsTitle", + true) + val tablesStatsTable = tableStats + + tablesStatsTitle ++ tablesStatsTable + } + + val extTablesStatsDetails = { + val extTablesStatsTitle = createTitleNode(SnappyDashboardPage.extTablesStatsTitle, + SnappyDashboardPage.extTablesStatsTitleTooltip, + "extTablesStatsTitle", + false) + val extTablesStatsTable = extTableStats + + extTablesStatsTitle ++ extTablesStatsTable + } + + val jsScripts = + + val pageContent = jsScripts ++ pageTitleNode ++ clusterStatsDetails ++ membersStatsDetails ++ + tablesStatsDetails ++ extTablesStatsDetails + + UIUtils.headerSparkPage(pageHeaderText, pageContent, parent, Some(500), + useDataTables = true, isSnappyPage = true) + + } + + private def createPageTitleNode(title: String): Seq[Node] = { +
+
+
+
+
+
+
+ + +
+
Auto Refresh:
+
+
+
+
+

+ {title} +

+
+
+
+
+
+ Total CPU Cores: + +
+
+
+ } + + private def createTitleNode(title: String, tooltip: String, nodeId: String, display: Boolean): + Seq[Node] = { + + val displayDefault: String = if (display) { "" } else { "display: none;" } + +
+
+

+ {title} +

+
+
+ } + + private def clusterStats(): Seq[Node] = { +
+ +
+
+
+
+
+
+
+
+
+
+
+ } + + private def memberStats(): Seq[Node] = { +
+ + + + + + + + + + + + + +
+
+ +
+
+ + {SnappyDashboardPage.memberStatsColumn("status")} + + + + {SnappyDashboardPage.memberStatsColumn("description")} + + + + {SnappyDashboardPage.memberStatsColumn("memberType")} + + + + {SnappyDashboardPage.memberStatsColumn("cpuUsage")} + + + + {SnappyDashboardPage.memberStatsColumn("memoryUsage")} + + + + Heap Memory
(Used / Total) +
+
+ + Off-Heap Memory
(Used / Total) +
+
+
+ } + + private def tableStats(): Seq[Node] = { +
+ + + + + + + + + + + + + + + +
+ + {SnappyDashboardPage.tableStatsColumn("name")} + + + + {SnappyDashboardPage.tableStatsColumn("storageModel")} + + + + {SnappyDashboardPage.tableStatsColumn("distributionType")} + + + + {SnappyDashboardPage.tableStatsColumn("rowCount")} + + + + {SnappyDashboardPage.tableStatsColumn("sizeInMemory")} + + + + {SnappyDashboardPage.tableStatsColumn("sizeSpillToDisk")} + + + + {SnappyDashboardPage.tableStatsColumn("totalSize")} + + + + {SnappyDashboardPage.tableStatsColumn("bucketCount")} + + + + {SnappyDashboardPage.tableStatsColumn("redundancy")} + + + + {SnappyDashboardPage.tableStatsColumn("redundancyStatus")} + +
+
+ } + + private def extTableStats(): Seq[Node] = { + + + } + +} + +object SnappyDashboardPage { + val pageHeaderText = "Dashboard" + + object Status { + val normal = "Normal" + val warning = "Warning" + val error = "Error" + val severe = "Severe" + } + + val ValueNotApplicable = "N/A" + + val clusterStatsTitle = "Cluster" + val clusterStatsTitleTooltip = "Clusters Summary" + val clusterStats = scala.collection.mutable.HashMap.empty[String, Any] + clusterStats += ("status" -> "Cluster Status") + clusterStats += ("members" -> "Members") + clusterStats += ("servers" -> "Data Servers") + clusterStats += ("leads" -> "Leads") + clusterStats += ("locators" -> "Locators") + clusterStats += ("clients" -> "Connections") + clusterStats += ("tables" -> "Tables") + clusterStats += ("cpuUsage" -> "CPU Usage") + clusterStats += ("cpuUsageTooltip" -> "Clusters CPU Usage") + clusterStats += ("memoryUsage" -> "Memory Usage") + clusterStats += ("memoryUsageTooltip" -> "Clusters Total Memory Usage") + clusterStats += ("heapUsage" -> "Heap Usage") + clusterStats += ("heapUsageTooltip" -> "Clusters Total Heap Usage") + clusterStats += ("offHeapUsage" -> "Off-Heap Usage") + clusterStats += ("offHeapUsageTooltip" -> "Clusters Total Off-Heap Usage") + clusterStats += ("jvmHeapUsage" -> "JVM Heap Usage") + clusterStats += ("jvmHeapUsageTooltip" -> "Clusters Total JVM Heap Usage") + + val membersStatsTitle = "Members" + val membersStatsTitleTooltip = "Members Summary" + val memberStatsColumn = scala.collection.mutable.HashMap.empty[String, String] + memberStatsColumn += ("expandCollapseTooltip" -> "Expand/Collapse All Rows") + memberStatsColumn += ("status" -> "Status") + memberStatsColumn += ("statusTooltip" -> "Members Status") + memberStatsColumn += ("id" -> "Id") + memberStatsColumn += ("idTooltip" -> "Members unique Identifier") + memberStatsColumn += ("name" -> "Name") + memberStatsColumn += ("nameTooltip" -> "Members Name") + memberStatsColumn += ("nameOrId" -> "Member") + memberStatsColumn += ("nameOrIdTooltip" -> "Members Name/Id") + memberStatsColumn += ("description" -> "Member") + memberStatsColumn += ("descriptionTooltip" -> "Members Description") + memberStatsColumn += ("host" -> "Host") + memberStatsColumn += ("hostTooltip" -> "Physical machine on which member is running") + memberStatsColumn += ("cpuUsage" -> "CPU Usage") + memberStatsColumn += ("cpuUsageTooltip" -> "CPU used by Member Host") + memberStatsColumn += ("memoryUsage" -> "Memory Usage") + memberStatsColumn += ("memoryUsageTooltip" -> "Memory(Heap + Off-Heap) used by Member") + memberStatsColumn += ("usedMemory" -> "Used Memory") + memberStatsColumn += ("usedMemoryTooltip" -> "Used Memory") + memberStatsColumn += ("totalMemory" -> "Total Memory") + memberStatsColumn += ("totalMemoryTooltip" -> "Total Memory") + memberStatsColumn += ("clients" -> "Connections") + memberStatsColumn += ("clientsTooltip" -> "Number of JDBC connections to Member") + memberStatsColumn += ("memberType" -> "Type") + memberStatsColumn += ("memberTypeTooltip" -> "Member is Lead / Locator / Data Server") + memberStatsColumn += ("lead" -> "Lead") + memberStatsColumn += ("leadTooltip" -> "Member is Lead") + memberStatsColumn += ("locator" -> "Locator") + memberStatsColumn += ("locatorTooltip" -> "Member is Locator") + memberStatsColumn += ("server" -> "Server") + memberStatsColumn += ("serverTooltip" -> "Member is Server") + memberStatsColumn += ("storageMemoryUsed" -> "StorageUsed") + memberStatsColumn += ("storageMemoryToolTip" -> "Total storage pool memory used") + memberStatsColumn += ("storageMemoryPoolSize" -> "StoragePoolSize") + memberStatsColumn += ("storageMemorySizeToolTip" -> "Max storage pool memory size") + memberStatsColumn += ("executionMemoryUsed" -> "ExecutionUsed") + memberStatsColumn += ("executionMemoryToolTip" -> "Total execution pool memory used") + memberStatsColumn += ("executionMemoryPoolSize" -> "ExecutionPoolSize") + memberStatsColumn += ("executionMemorySizeToolTip" -> "Max execution pool memory size") + memberStatsColumn += ("heapMemory" -> "Heap Memory (Used / Total)") + memberStatsColumn += ("heapMemoryTooltip" -> "Members used and total Heap Memory") + memberStatsColumn += ("offHeapMemory" -> "Off-Heap Memory (Used / Total)") + memberStatsColumn += ("offHeapMemoryTooltip" -> "Members used and total Off Heap Memory") + memberStatsColumn += ("jvmHeapMemory" -> "JVM Heap (Used / Total)") + memberStatsColumn += ("jvmHeapMemoryTooltip" -> "Members used and total JVM Heap") + + val tablesStatsTitle = "Tables" + val tablesStatsTitleTooltip = "Tables Summary" + val tableStatsColumn = scala.collection.mutable.HashMap.empty[String, String] + tableStatsColumn += ("id" -> "Id") + tableStatsColumn += ("idTooltip" -> "Tables unique Identifier") + tableStatsColumn += ("name" -> "Name") + tableStatsColumn += ("nameTooltip" -> "Tables Name") + tableStatsColumn += ("storageModel" -> "Storage Model") + tableStatsColumn += ("storageModelTooltip" -> "Storage Model is either COLUMN or ROW ") + tableStatsColumn += ("distributionType" -> "Distribution Type") + tableStatsColumn += ("distributionTypeTooltip" -> + "Distribution Type is either PARTITIONED or REPLICATED table ") + tableStatsColumn += ("rowCount" -> "Row Count") + tableStatsColumn += ("rowCountTooltip" -> "Total Rows in Table") + tableStatsColumn += ("sizeInMemory" -> "In-Memory Size") + tableStatsColumn += ("sizeInMemoryTooltip" -> "Tables Size in Memory") + tableStatsColumn += ("sizeSpillToDisk" -> "Spill-To-Disk Size") + tableStatsColumn += ("sizeSpillToDiskTooltip" -> "Tables Spillover to Disk Size ") + tableStatsColumn += ("totalSize" -> "Total Size") + tableStatsColumn += ("totalSizeTooltip" -> + "Tables Total Size (In Memory size + Overflown To Disk Size)") + tableStatsColumn += ("bucketCount" -> "Buckets") + tableStatsColumn += ("bucketCountTooltip" -> + "Number of Buckets in Table. Red number indicates some buckets are offline.") + tableStatsColumn += ("redundancy" -> "Redundancy") + tableStatsColumn += ("redundancyTooltip" -> "Number of redundant copies") + tableStatsColumn += ("redundancyStatus" -> "Redundancy Status") + tableStatsColumn += ("redundancyStatusTooltip" -> "Is redundancy satisfied or broken") + + val extTablesStatsTitle = "External Tables" + val extTablesStatsTitleTooltip = "External Tables Summary" + val extTableStatsColumn = scala.collection.mutable.HashMap.empty[String, String] + extTableStatsColumn += ("id" -> "Id") + extTableStatsColumn += ("idTooltip" -> "External Tables unique Identifier") + extTableStatsColumn += ("name" -> "Name") + extTableStatsColumn += ("nameTooltip" -> "External Tables Name") + extTableStatsColumn += ("type" -> "Type") + extTableStatsColumn += ("typeTooltip" -> "External Tables Type") + extTableStatsColumn += ("provider" -> "Provider") + extTableStatsColumn += ("providerTooltip" -> "External Tables Provider") + extTableStatsColumn += ("externalSource" -> "Source") + extTableStatsColumn += ("externalSourceTooltip" -> "External Source of Tables ") + +} diff --git a/cluster/src/main/scala/org/apache/spark/ui/SnappyDashboardTab.scala b/cluster/src/main/scala/org/apache/spark/ui/SnappyDashboardTab.scala new file mode 100644 index 0000000000..1589d5b809 --- /dev/null +++ b/cluster/src/main/scala/org/apache/spark/ui/SnappyDashboardTab.scala @@ -0,0 +1,90 @@ +/* + * Changes for SnappyData data platform. + * + * Portions Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ + +package org.apache.spark.ui + +import javax.servlet.http.HttpServletRequest + +import scala.collection.mutable.ArrayBuffer + +import io.snappydata.gemxd.SnappyDataVersion +import scala.util.control.Breaks._ + +import org.apache.spark.internal.Logging +import org.apache.spark.status.api.v1.SnappyApiRootResource +import org.apache.spark.ui.JettyUtils._ + +class SnappyDashboardTab(sparkUI: SparkUI) extends SparkUITab(sparkUI, "dashboard") with Logging { + val parent = sparkUI + val appUIBaseAddress = parent.appUIAddress + + // Attaching dashboard ui page + val snappyDashboardPage = new SnappyDashboardPage(this) + attachPage(snappyDashboardPage) + // Attaching members details page + val snappyMemberDetailsPage = new SnappyMemberDetailsPage(this) + attachPage(snappyMemberDetailsPage) + // Attach Tab + parent.attachTab(this) + + // Move Dashboard tab to first place + val tabsList = parent.getTabs + val newTabsList = ArrayBuffer[WebUITab]() + // Add dashboard first + newTabsList += tabsList.last + // Add remaining tabs in tabs list + tabsList.foreach(tab => { + if (!tab.prefix.equalsIgnoreCase("dashboard")) { + newTabsList += tab + } + }) + + // Set updated tabs list + parent.setTabs(newTabsList) + + // Set SnappyData Product Version in SparkUI + SparkUI.setProductVersion(SnappyDataVersion.getSnappyDataProductVersion) + + updateRedirectionHandler + + // Replace default spark jobs page redirection handler by Snappy Dashboard page + // redirection handler + def updateRedirectionHandler: Unit = { + val handlers = parent.getHandlers + breakable { + handlers.foreach(h => { + if (h.getContextPath.equals("/")) { + // Detach DEFAULT JOBS page redirection handler + parent.detachHandler(h) + // Attach DASHBOARD page redirection handler + parent.attachHandler(createRedirectHandler("/", "/dashboard/", basePath = basePath)) + break + } + }) + } + + parent.attachHandler(SnappyApiRootResource.getServletHandler(parent)) + // create and add member logs request handler + parent.attachHandler(createServletHandler("/dashboard/memberDetails/log", + (request: HttpServletRequest) => snappyMemberDetailsPage.renderLog(request), + parent.securityManager, + parent.conf)) + } + +} diff --git a/cluster/src/main/scala/org/apache/spark/ui/SnappyMemberDetailsPage.scala b/cluster/src/main/scala/org/apache/spark/ui/SnappyMemberDetailsPage.scala new file mode 100644 index 0000000000..9937bc355d --- /dev/null +++ b/cluster/src/main/scala/org/apache/spark/ui/SnappyMemberDetailsPage.scala @@ -0,0 +1,473 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ +package org.apache.spark.ui + +import java.io.File +import java.text.SimpleDateFormat +import java.util.Date +import javax.servlet.http.HttpServletRequest + +import scala.collection.mutable +import scala.util.control.Breaks._ +import scala.xml.{Node, Unparsed} + +import com.pivotal.gemfirexd.internal.engine.distributed.GfxdListResultCollector +import com.pivotal.gemfirexd.internal.engine.distributed.GfxdListResultCollector.ListResultCollectorValue +import com.pivotal.gemfirexd.internal.engine.sql.execute.MemberLogsMessage +import com.pivotal.gemfirexd.internal.engine.ui.MemberStatistics +import io.snappydata.SnappyTableStatsProviderService + +import org.apache.spark.internal.Logging +import org.apache.spark.util.Utils + + +private[ui] class SnappyMemberDetailsPage(parent: SnappyDashboardTab) + extends WebUIPage("memberDetails") with Logging { + + private var workDir: File = null + private var logFileName: String = null + private val defaultBytes: Long = 1024 * 100 + + private def createPageTitleNode(title: String): Seq[Node] = { +
+
+
+
+
+
+
+ + +
+
Auto Refresh:
+
+
+
+
+

+ {title} +

+
+
+ } + + private def getMemberStats(memberDetails: MemberStatistics): Seq[Node] = { + + val status = memberDetails.getStatus + + val statusImgUri = if (status.equalsIgnoreCase("running")) { + "/static/snappydata/running-status-icon-70x68.png" + } else { + "/static/snappydata/warning-status-icon-70x68.png" + } + + val memberType = { + if (memberDetails.isLead) { + if (memberDetails.isLeadActive) { + "LEAD (Active)" + } else { + "LEAD" + } + } else if (memberDetails.isLocator) { + "LOCATOR" + } else if (memberDetails.isDataServer) { + "DATA SERVER" + } else { + "CONNECTOR" + } + } + + val cpuUsage = memberDetails.getCpuActive.toDouble; + + val diskStoreDiskSpace = memberDetails.getDiskStoreDiskSpace + + val heapStoragePoolUsed = memberDetails.getHeapStoragePoolUsed + val heapStoragePoolSize = memberDetails.getHeapStoragePoolSize + val heapExecutionPoolUsed = memberDetails.getHeapExecutionPoolUsed + val heapExecutionPoolSize = memberDetails.getHeapExecutionPoolSize + + val offHeapStoragePoolUsed = memberDetails.getOffHeapStoragePoolUsed + val offHeapStoragePoolSize = memberDetails.getOffHeapStoragePoolSize + val offHeapExecutionPoolUsed = memberDetails.getOffHeapExecutionPoolUsed + val offHeapExecutionPoolSize = memberDetails.getOffHeapExecutionPoolSize + + val heapMemorySize = memberDetails.getHeapMemorySize + val heapMemoryUsed = memberDetails.getHeapMemoryUsed + val offHeapMemorySize = memberDetails.getOffHeapMemorySize + val offHeapMemoryUsed = memberDetails.getOffHeapMemoryUsed + val jvmHeapSize = memberDetails.getJvmTotalMemory + val jvmHeapUsed = memberDetails.getJvmUsedMemory + + var memoryUsage: Long = 0 + if ((heapMemorySize + offHeapMemorySize) > 0) { + memoryUsage = (heapMemoryUsed + offHeapMemoryUsed) * 100 / + (heapMemorySize + offHeapMemorySize) + } + var jvmHeapUsage: Long = 0 + if (jvmHeapSize > 0) { + jvmHeapUsage = jvmHeapUsed * 100 / jvmHeapSize + } + + val memberBasicDetailsContent = { +
+ Member :
+ {memberDetails.getId} +
+
+ Type :
+ {memberType} +
+
+ Process ID :
+ {memberDetails.getProcessId} +
+
+ Status :
+ {status} +
+ } + + val heapHtmlContent = if (memberType.toString.equalsIgnoreCase("LOCATOR")) { +
+ Storage Heap:
+ {SnappyMemberDetailsPage.ValueNotApplicable} +
+
+ Execution Heap:
+ {SnappyMemberDetailsPage.ValueNotApplicable} +
+
+ Total Heap:
+ {SnappyMemberDetailsPage.ValueNotApplicable} +
+ } else { +
+ Storage Heap:
+ + {Utils.bytesToString(heapStoragePoolUsed).toString + " / " + + Utils.bytesToString(heapStoragePoolSize).toString} + +
+
+ Execution Heap:
+ + {Utils.bytesToString(heapExecutionPoolUsed).toString + " / " + + Utils.bytesToString(heapExecutionPoolSize).toString} + +
+
+ Total Heap:
+ + {Utils.bytesToString(heapMemoryUsed).toString + " / " + + Utils.bytesToString(heapMemorySize).toString} + +
+ } + + val offHeapHtmlContent = if (memberType.toString.equalsIgnoreCase("LOCATOR")) { +
+ Storage Off-Heap:
+ + {SnappyMemberDetailsPage.ValueNotApplicable} + +
+
+ Execution Off-Heap:
+ + {SnappyMemberDetailsPage.ValueNotApplicable} + +
+
+ Total Off-Heap:
+ + {SnappyMemberDetailsPage.ValueNotApplicable} + +
+ } else { +
+ Storage Off-Heap:
+ + {Utils.bytesToString(offHeapStoragePoolUsed).toString + " / " + + Utils.bytesToString(offHeapStoragePoolSize).toString} + +
+
+ Execution Off-Heap:
+ + {Utils.bytesToString(offHeapExecutionPoolUsed).toString + " / " + + Utils.bytesToString(offHeapExecutionPoolSize).toString} + +
+
+ Total Off-Heap:
+ + {Utils.bytesToString(offHeapMemoryUsed).toString + " / " + + Utils.bytesToString(offHeapMemorySize).toString} + +
+ } + + val diskSpaceHtmlContent = { +
+ Disk Space:
+ + {Utils.bytesToString(diskStoreDiskSpace).toString} + +
+ } + +
+
+ {memberBasicDetailsContent} +
+ {heapHtmlContent} +
+ {offHeapHtmlContent} +
+ {diskSpaceHtmlContent} +
+
+
+
+
+
+
+
+
+
+
+
+ } + + override def render(request: HttpServletRequest): Seq[Node] = { + + val offset = Option(request.getParameter("offset")).map(_.toLong) + val byteLength = + Option(request.getParameter("byteLength")).map(_.toLong).getOrElse(defaultBytes) + + val memberId = Option(request.getParameter("memId")).map { memberId => + UIUtils.decodeURLParameter(memberId) + }.getOrElse { + throw new IllegalArgumentException(s"Missing memId parameter") + } + + val allMembers = SnappyTableStatsProviderService.getService.getMembersStatsFromService + val memberDetails: MemberStatistics = { + var mem: MemberStatistics = null + breakable { + allMembers.foreach(m => { + if (m._2.getId().equalsIgnoreCase(memberId)) { + mem = m._2 + break + } + }) + } + mem + } + + if (memberDetails == null) { + throw new IllegalArgumentException(s"Missing memId parameter") + } + + val memberStats = getMemberStats(memberDetails) + + // set members workDir and LogFileName + workDir = new File(memberDetails.getUserDir) + logFileName = memberDetails.getLogFile + + // Get Log Details + val collector = new GfxdListResultCollector(null, true) + val msg = new MemberLogsMessage(collector) + msg.setMemberId(memberId) + msg.setByteLength(byteLength) + msg.setLogDirectory(workDir); + msg.setLogFileName(logFileName); + + if (offset == None) { + // set offset null + msg.setOffset(null) + } else { + msg.setOffset(offset.get) + } + + msg.executeFunction() + + val memStats = collector.getResult + val itr = memStats.iterator() + var logData: java.util.HashMap[String, Any] = new java.util.HashMap[String, Any]; + + while (itr.hasNext) { + val o = itr.next().asInstanceOf[ListResultCollectorValue] + val memMap = o.resultOfSingleExecution.asInstanceOf[java.util.HashMap[String, Any]] + logData = memMap.get("logData").asInstanceOf[java.util.HashMap[String, Any]] + } + + val logText = logData.get("logText") + val startByte = logData.get("startIndex").asInstanceOf[Long] + val endByte = logData.get("endIndex").asInstanceOf[Long] + val logLength = logData.get("totalLength").asInstanceOf[Long] + + val curLogLength = endByte - startByte + + val range = + + Showing {curLogLength} Bytes: {startByte.toString} - {endByte.toString} of {logLength} + + + val moreButton = + + + val newButton = + + + val alert = + + + val logParams = "/?memId=%s".format(memberId) + + val jsOnload = "window.onload = " + + s"initLogPage('$logParams', $curLogLength, $startByte, $endByte, $logLength, $byteLength);" + + val content = +
+ {range} +
+
{moreButton}
+
{logText}
+ {alert} +
{newButton}
+
+ +
+ + val pageHeaderText: String = SnappyMemberDetailsPage.pageHeaderText + + // Generate Pages HTML + val pageTitleNode = createPageTitleNode(pageHeaderText) + + var PageContent: Seq[Node] = mutable.Seq.empty + + val memberLogTitle = +
+
+

+ Member Logs +

+
+ Location : + {memberDetails.getUserDir}/{memberDetails.getLogFile} +
+
+
+ + val jsScripts = ++ + + + PageContent = jsScripts ++ pageTitleNode ++ memberStats ++ memberLogTitle ++ content + + UIUtils.headerSparkPage(pageHeaderText, PageContent, parent, Some(500), + useDataTables = true, isSnappyPage = true) + } + + def renderLog(request: HttpServletRequest): String = { + + val offset = Option(request.getParameter("offset")).map(_.toLong) + val byteLength = + Option(request.getParameter("byteLength")).map(_.toLong).getOrElse(defaultBytes) + + val memberId = Option(request.getParameter("memId")).map { memberId => + UIUtils.decodeURLParameter(memberId) + }.getOrElse { + throw new IllegalArgumentException(s"Missing memId parameter") + } + + // Get Log Details + val collector = new GfxdListResultCollector(null, true) + val msg = new MemberLogsMessage(collector) + msg.setMemberId(memberId) + msg.setByteLength(byteLength) + msg.setLogDirectory(workDir) + msg.setLogFileName(logFileName) + + if (offset == None) { + // set offset null + msg.setOffset(null) + } else { + msg.setOffset(offset.get) + } + + msg.executeFunction() + + val memStats = collector.getResult + val itr = memStats.iterator() + var logData: java.util.HashMap[String, Any] = new java.util.HashMap[String, Any]; + + while (itr.hasNext) { + val o = itr.next().asInstanceOf[ListResultCollectorValue] + val memMap = o.resultOfSingleExecution.asInstanceOf[java.util.HashMap[String, Any]] + logData = memMap.get("logData").asInstanceOf[java.util.HashMap[String, Any]] + } + val logText = logData.get("logText") + val startByte = logData.get("startIndex").asInstanceOf[Long] + val endByte = logData.get("endIndex").asInstanceOf[Long] + val logLength = logData.get("totalLength").asInstanceOf[Long] + + val pre = + s"==== Bytes $startByte-$endByte of $logLength of ${workDir.getPath}/$logFileName ====\n" + + pre + logText + + } + +} + +object SnappyMemberDetailsPage { + val pageHeaderText = "Member Details" + + object Status { + val stopped = "Stopped" + val running = "Running" + } + + val ValueNotApplicable = "N/A" + + val memberStats = scala.collection.mutable.HashMap.empty[String, String] + memberStats += ("status" -> "Status") + memberStats += ("statusTooltip" -> "Members Status") + memberStats += ("cpuUsage" -> "CPU Usage") + memberStats += ("cpuUsageTooltip" -> "CPU used by Member Host") + memberStats += ("memoryUsage" -> "Memory Usage") + memberStats += ("memoryUsageTooltip" -> "Memory(Heap + Off-Heap) used by Member") + memberStats += ("jvmHeapUsage" -> "JVM Heap Usage") + memberStats += ("jvmHeapUsageTooltip" -> "Clusters Total JVM Heap Usage") +} \ No newline at end of file diff --git a/cluster/src/main/scala/org/apache/spark/ui/SnappyStatsPage.scala b/cluster/src/main/scala/org/apache/spark/ui/SnappyStatsPage.scala new file mode 100644 index 0000000000..e04c48d4bc --- /dev/null +++ b/cluster/src/main/scala/org/apache/spark/ui/SnappyStatsPage.scala @@ -0,0 +1,72 @@ +/* + * Changes for SnappyData data platform. + * + * Portions Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ + +package org.apache.spark.ui + +import javax.servlet.http.HttpServletRequest + +import scala.xml.Node + +import com.pivotal.gemfirexd.internal.engine.ui.SnappyRegionStats +import io.snappydata.SnappyTableStatsProviderService + +import org.apache.spark.Logging +import org.apache.spark.util.Utils + +/** Page showing list of tables currently stored in the cluster */ +private[ui] class SnappyStatsPage(parent: SnappyStatsTab) + extends WebUIPage("") with Logging { + val numFormatter = java.text.NumberFormat.getIntegerInstance + + def render(request: HttpServletRequest): Seq[Node] = { + val uiDisplayInfo = SnappyTableStatsProviderService.getService + .getAggregatedStatsOnDemand + + val uiTableInfo = uiDisplayInfo._1 + val nodes = if (uiTableInfo.nonEmpty) { + +

Snappy Tables

{UIUtils.listingTable(header, rowTable, uiTableInfo.values)} +
+ } else Nil + + UIUtils.headerSparkPage("Snappy Store", nodes, parent, Some(500)) + + } + + private def header = Seq("Table Name", "Table Type", "Memory Used", "Total Rows") + + + private def rowTable(stats: SnappyRegionStats) = { + val columnTable = if (stats.isColumnTable) " COLUMN " else " ROW " + + + {stats.getTableName} + + + {columnTable} + + + {Utils.bytesToString(stats.getSizeInMemory)} + + + {numFormatter.format(stats.getRowCount)} + + + } +} diff --git a/cluster/src/main/scala/org/apache/spark/ui/SnappyStatsTab.scala b/cluster/src/main/scala/org/apache/spark/ui/SnappyStatsTab.scala new file mode 100644 index 0000000000..35cc22f1a3 --- /dev/null +++ b/cluster/src/main/scala/org/apache/spark/ui/SnappyStatsTab.scala @@ -0,0 +1,30 @@ +/* + * Changes for SnappyData data platform. + * + * Portions Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ + +package org.apache.spark.ui + +import org.apache.spark.Logging + +/** Web UI showing storage status of all Snappy Tables */ +private[ui] class SnappyStatsTab(sparkUI: SparkUI) + extends SparkUITab(sparkUI, "Snappy Store") with Logging { + val parent = sparkUI + attachPage(new SnappyStatsPage(this)) + parent.attachTab(this) +} diff --git a/cluster/src/main/scala/org/apache/spark/util/SnappyUtils.scala b/cluster/src/main/scala/org/apache/spark/util/SnappyUtils.scala new file mode 100644 index 0000000000..8359d542bf --- /dev/null +++ b/cluster/src/main/scala/org/apache/spark/util/SnappyUtils.scala @@ -0,0 +1,138 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ + +package org.apache.spark.util + +import java.io.File +import java.net.{URL, URLClassLoader} +import java.security.SecureClassLoader + +import _root_.io.snappydata.Constant +import com.pivotal.gemfirexd.internal.engine.Misc +import org.joda.time.DateTime +import spark.jobserver.util.ContextURLClassLoader +import org.apache.spark.deploy.SparkHadoopUtil +import org.apache.spark.sql.collection.ToolsCallbackInit +import org.apache.spark.ui.SparkUI +import org.apache.spark.{SparkContext, SparkEnv} + +import scala.util.Try + +object SnappyUtils { + + def getSparkUI(sc: SparkContext): Option[SparkUI] = sc.ui + + def getSnappyStoreContextLoader(parent: ClassLoader): ClassLoader = parent match { + case _: SnappyContextLoader => parent // no double wrap + case _ => new SnappyContextLoader(parent) + } + + def removeJobJar(sc: SparkContext): Unit = { + def getName(path: String): String = new File(path).getName + + val jobJarToRemove = sc.getLocalProperty(Constant.CHANGEABLE_JAR_NAME) + val keyToRemove = sc.listJars().filter(getName(_) == getName(jobJarToRemove)) + if (keyToRemove.nonEmpty) { + val callbacks = ToolsCallbackInit.toolsCallback + // @TODO This is a temp workaround to fix SNAP-1133. sc.addedJar + // should be directly be accessible from here. + // May be due to scala version mismatch. + if (callbacks != null) { + callbacks.removeAddedJar(sc, keyToRemove.head) + } + } + } + + def removeJobJar(sc: SparkContext, jarName: String): Unit = { + def getName(path: String): String = new File(path).getName + + val keyToRemove = sc.listJars().filter(getName(_) == getName(jarName)) + if (keyToRemove.nonEmpty) { + val callbacks = ToolsCallbackInit.toolsCallback + // @TODO This is a temp workaround to fix SNAP-1133. sc.addedJar + // should be directly be accessible from here. + // May be due to scala version mismatch. + if (callbacks != null) { + callbacks.removeAddedJar(sc, keyToRemove.head) + } + } + } + + def setSessionDependencies(sparkContext: SparkContext, + appName: String, + classLoader: ClassLoader): Unit = { + assert(classOf[URLClassLoader].isAssignableFrom(classLoader.getClass)) + val dependentJars = classLoader.asInstanceOf[URLClassLoader].getURLs + val sparkJars = dependentJars.map(url => { + Try(sparkContext.env.rpcEnv.fileServer.addJar(new File(url.toURI))).getOrElse("") + }) + val localProperty = (Seq(appName, DateTime.now) ++ sparkJars.filterNot( + _.isEmpty).toSeq).mkString(",") + sparkContext.setLocalProperty(Constant.CHANGEABLE_JAR_NAME, localProperty) + } + + def clearSessionDependencies(sparkContext: SparkContext): Unit = { + sparkContext.setLocalProperty(Constant.CHANGEABLE_JAR_NAME, null) + } + + def getSnappyContextURLClassLoader( + parent: ContextURLClassLoader): ContextURLClassLoader = parent match { + case _: SnappyContextURLLoader => parent // no double wrap + case _ => new SnappyContextURLLoader(parent) + } + + def doFetchFile( + url: String, + targetDir: File, + filename: String): File = { + + val env = SparkEnv.get + val conf = env.conf + val hadoopConf = SparkHadoopUtil.get.newConfiguration(conf) + org.apache.spark.util.Utils.doFetchFile(url, targetDir, + filename, conf, env.securityManager, hadoopConf) + new File(targetDir, filename) + } +} + +class SnappyContextLoader(parent: ClassLoader) + extends SecureClassLoader(parent) { + + @throws(classOf[ClassNotFoundException]) + override def loadClass(name: String): Class[_] = { + try { + parent.loadClass(name) + } catch { + case _: ClassNotFoundException => + Misc.getMemStore.getDatabase.getClassFactory.loadClassFromDB(name) + } + } +} + +class SnappyContextURLLoader(parent: ClassLoader) + extends ContextURLClassLoader(Array[URL](), parent) { + + @throws(classOf[ClassNotFoundException]) + override def loadClass(name: String): Class[_] = { + try { + super.loadClass(name) + } catch { + case _: ClassNotFoundException => + Misc.getMemStore.getDatabase.getClassFactory.loadClassFromDB(name) + } + } +} diff --git a/cluster/src/test/java/io/snappydata/app/SnappyContextConcurrencySuite.java b/cluster/src/test/java/io/snappydata/app/SnappyContextConcurrencySuite.java new file mode 100644 index 0000000000..4f11b6e53b --- /dev/null +++ b/cluster/src/test/java/io/snappydata/app/SnappyContextConcurrencySuite.java @@ -0,0 +1,162 @@ +package io.snappydata.app; + +import java.io.Serializable; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.Callable; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; +import java.util.concurrent.atomic.AtomicLong; + +import com.google.common.base.Joiner; +import io.snappydata.SnappyFunSuite; +import io.snappydata.test.dunit.DistributedTestBase; +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Row; +import org.apache.spark.sql.SnappyContext; +import org.junit.Test; + +public class SnappyContextConcurrencySuite extends SnappyFunSuite { + + + @Test + @SuppressWarnings("unchecked") + public void testMultithreadedAccess() { + + final AtomicLong counter = new AtomicLong(); + + int poolSize = 5; + List tasks = new ArrayList<>(); + ExecutorService pool = Executors.newFixedThreadPool(poolSize); + for (int i = 1; i <= poolSize; i++) { + tasks.add(new SnappyQueryJob(SnappyContext.apply(sc()), i, counter)); + } + + long l1 = System.currentTimeMillis(); + List> futures ; + try { + futures = pool.invokeAll(tasks); + + DistributedTestBase.WaitCriterion ev = new DistributedTestBase.WaitCriterion() { + public boolean done() { + return counter.get() == 50; + } + public String description() { + return null; + } + }; + DistributedTestBase.waitForCriterion(ev, 3*60*1000, 5000, true); + pool.shutdownNow(); + + } catch (InterruptedException e) { + System.out.println("Thread interrupted"); + } + long l2 = System.currentTimeMillis(); + System.out.println(" Time taken " + (l2 - l1)); + + + } +} + +class SnappyQueryJob implements Callable, Serializable { + String threadId; + transient org.apache.spark.sql.SnappyContext sqlContext; + AtomicLong counter; + + + public SnappyQueryJob(org.apache.spark.sql.SnappyContext _sqlContext, int + id, AtomicLong ctr) { + + threadId = "thread_" + id; + this.sqlContext = _sqlContext; + this.counter = ctr; + } + + public void actualWork() { + for (int i = 0; i < 10; i++) { + + String tempTableName = threadId; + String tblName = threadId + "_" + i; + + List dummyList = new ArrayList(); + for (int j = 0; j < 2; j++) { + DummyBeanClass object = new DummyBeanClass(); + object.setCol2("" + i); + object.setCol1(i); + dummyList.add(object); + } + + Dataset tempdf = sqlContext.emptyDataFrame(); + tempdf.registerTempTable(tempTableName); + + JavaSparkContext javaSparkContext = new JavaSparkContext(sqlContext.sparkContext()); + JavaRDD rdd = javaSparkContext.parallelize(dummyList); + Dataset df = sqlContext.createDataFrame(rdd, DummyBeanClass.class); + df.write().format("column").saveAsTable(tblName); + String _query = String.format("select count(*) from %s", tblName); + + String _tempQuery = String.format("select count(*) from %s", tempTableName); + + + List res; + try { + res = sqlContext.sql(_query).collectAsList(); + res = sqlContext.sql(_tempQuery).collectAsList(); + } catch (Exception e) { + e.printStackTrace(); + System.out.println("*Exception " + debugTables() + "**"); + throw e; + } + sqlContext.dropTable(tblName, false); + sqlContext.dropTable(tempTableName, false); + //System.out.println(" dropped table " + tblName + " and tempTable " + tempTableName); + counter.addAndGet(1); + try { + Thread.sleep(1000); + } catch (Exception e) { + System.out.println("Thread interrupted"); + } + } + } + + public String call() { + try { + actualWork(); + } catch (Throwable th) { + th.printStackTrace(); + } + + return "success"; + } + + private String debugTables() { + String v = Joiner.on(',').join(sqlContext.tableNames()); + if (v == null) return ""; + else return v; + } + + public class DummyBeanClass implements Serializable { + + public Integer col1; + public String col2; + + public Integer getCol1() { + return col1; + } + + public String getCol2() { + return col2; + } + + public void setCol1(Integer col1) { + this.col1 = col1; + } + + public void setCol2(String col2) { + this.col2 = col2; + } + } +} diff --git a/cluster/src/test/resources/log4j.properties b/cluster/src/test/resources/log4j.properties new file mode 100644 index 0000000000..85513aca15 --- /dev/null +++ b/cluster/src/test/resources/log4j.properties @@ -0,0 +1,136 @@ +# +# Copyright (c) 2018 SnappyData, Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you +# may not use this file except in compliance with the License. You +# may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. See the License for the specific language governing +# permissions and limitations under the License. See accompanying +# LICENSE file. +# +# Some parts taken from Spark's log4j.properties having license below. +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +log4j.rootCategory=INFO, file + +# RollingFile appender +log4j.appender.file=org.apache.log4j.RollingFileAppender +log4j.appender.file.append=true +log4j.appender.file.file=snappydata.log +log4j.appender.file.MaxFileSize=1GB +log4j.appender.file.MaxBackupIndex=10000 +log4j.appender.file.layout=io.snappydata.log4j.PatternLayout +log4j.appender.file.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS zzz} %t %p %c{1}: %m%n + +# Appender for code dumps of WholeStageCodegenExec, CodeGenerator etc +log4j.appender.code=org.apache.log4j.RollingFileAppender +log4j.appender.code.append=true +log4j.appender.code.file=generatedcode.log +log4j.appender.code.MaxFileSize=1GB +log4j.appender.code.MaxBackupIndex=10000 +log4j.appender.code.layout=io.snappydata.log4j.PatternLayout +log4j.appender.code.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS zzz} %t %p %c{1}: %m%n + +# Console appender +log4j.appender.console=org.apache.log4j.ConsoleAppender +log4j.appender.console.target=System.out +log4j.appender.console.layout=io.snappydata.log4j.PatternLayout +log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS zzz} %t %p %c{1}: %m%n + +# Ignore messages below warning level from Jetty, because it's a bit verbose +log4j.logger.org.spark-project.jetty=WARN +org.spark-project.jetty.LEVEL=WARN +log4j.logger.org.mortbay.jetty=WARN +log4j.logger.org.eclipse.jetty=WARN + +# Some packages are noisy for no good reason. +log4j.additivity.org.apache.hadoop.hive.serde2.lazy.LazyStruct=false +log4j.logger.org.apache.hadoop.hive.serde2.lazy.LazyStruct=OFF + +log4j.additivity.org.apache.hadoop.hive.metastore.RetryingHMSHandler=false +log4j.logger.org.apache.hadoop.hive.metastore.RetryingHMSHandler=OFF + +log4j.additivity.hive.log=false +log4j.logger.hive.log=OFF + +log4j.additivity.parquet.hadoop.ParquetRecordReader=false +log4j.logger.parquet.hadoop.ParquetRecordReader=OFF + +log4j.additivity.org.apache.parquet.hadoop.ParquetRecordReader=false +log4j.logger.org.apache.parquet.hadoop.ParquetRecordReader=OFF + +log4j.additivity.org.apache.parquet.hadoop.ParquetOutputCommitter=false +log4j.logger.org.apache.parquet.hadoop.ParquetOutputCommitter=OFF + +log4j.additivity.hive.ql.metadata.Hive=false +log4j.logger.hive.ql.metadata.Hive=OFF + +log4j.additivity.org.apache.hadoop.hive.ql.io.RCFile=false +log4j.logger.org.apache.hadoop.hive.ql.io.RCFile=ERROR + +# Other Spark classes that generate unnecessary logs at INFO level +log4j.logger.org.apache.spark.broadcast.TorrentBroadcast=WARN +log4j.logger.org.apache.spark.ContextCleaner=WARN +log4j.logger.org.apache.spark.MapOutputTracker=WARN +log4j.logger.org.apache.spark.scheduler.TaskSchedulerImpl=WARN +log4j.logger.org.apache.spark.storage.ShuffleBlockFetcherIterator=WARN +log4j.logger.org.apache.spark.scheduler.DAGScheduler=WARN +log4j.logger.org.apache.spark.scheduler.TaskSetManager=WARN +log4j.logger.org.apache.spark.scheduler.FairSchedulableBuilder=WARN +log4j.logger.org.apache.spark.scheduler.cluster.CoarseGrainedSchedulerBackend$DriverEndpoint=WARN +log4j.logger.org.apache.spark.storage.BlockManagerInfo=WARN +log4j.logger.org.apache.hadoop.hive=WARN +log4j.logger.org.apache.spark.sql.execution.datasources=WARN +log4j.logger.org.apache.spark.scheduler.SnappyTaskSchedulerImpl=WARN +log4j.logger.org.apache.spark.MapOutputTrackerMasterEndpoint=WARN +log4j.logger.org.apache.spark.MapOutputTrackerMaster=WARN +log4j.logger.org.apache.spark.storage.memory.MemoryStore=WARN +log4j.logger.org.apache.spark.MapOutputTrackerWorker=WARN +log4j.logger.org.apache.parquet=ERROR +log4j.logger.parquet=ERROR +log4j.logger.org.apache.hadoop.io.compress=WARN +log4j.logger.spark.jobserver.LocalContextSupervisorActor=WARN +log4j.logger.spark.jobserver.JarManager=WARN +log4j.logger.org.datanucleus=ERROR +# Task logger created in SparkEnv +log4j.logger.org.apache.spark.Task=WARN +log4j.logger.org.apache.spark.sql.catalyst.parser.CatalystSqlParser=WARN + +# Keep log-level of some classes as INFO even if root level is higher +log4j.logger.io.snappydata.impl.LeadImpl=INFO +log4j.logger.io.snappydata.impl.ServerImpl=INFO +log4j.logger.io.snappydata.impl.LocatorImpl=INFO +log4j.logger.spray.can.server.HttpListener=INFO + +# for generated code of plans +log4j.logger.org.apache.spark.sql.execution.WholeStageCodegenExec=DEBUG, code +log4j.additivity.org.apache.spark.sql.execution.WholeStageCodegenExec=false +log4j.logger.org.apache.spark.sql.execution.WholeStageCodegenRDD=INFO, code +log4j.additivity.org.apache.spark.sql.execution.WholeStageCodegenRDD=false +# for all Spark generated code (including ad-hoc UnsafeProjection calls etc) +log4j.logger.org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator=WARN, code +log4j.additivity.org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator=false +# for SnappyData generated code used on store (ComplexTypeSerializer, JDBC inserts ...) +log4j.logger.org.apache.spark.sql.store.CodeGeneration=INFO, code +log4j.additivity.org.apache.spark.sql.store.CodeGeneration=false diff --git a/cluster/src/test/resources/scripts/cassandra_script1 b/cluster/src/test/resources/scripts/cassandra_script1 new file mode 100644 index 0000000000..3af588e400 --- /dev/null +++ b/cluster/src/test/resources/scripts/cassandra_script1 @@ -0,0 +1,8 @@ +DROP KEYSPACE if exists test; +CREATE KEYSPACE test WITH replication = {'class':'SimpleStrategy', 'replication_factor' : 3}; +use test; +CREATE TABLE customer(customer_id int PRIMARY KEY, customer_name text, customer_city text, customer_phone varint); +insert into customer(customer_id,customer_name,customer_city,customer_phone) values(1,'aa','pune',123456789); +insert into customer(customer_id,customer_name,customer_city,customer_phone) values(2,'bb','pune',123456789); +insert into customer(customer_id,customer_name,customer_city,customer_phone) values(3,'cc','pune',123456789); +select * from customer; \ No newline at end of file diff --git a/cluster/src/test/scala/io/snappydata/QueryTest.scala b/cluster/src/test/scala/io/snappydata/QueryTest.scala new file mode 100644 index 0000000000..5ae4ffc395 --- /dev/null +++ b/cluster/src/test/scala/io/snappydata/QueryTest.scala @@ -0,0 +1,394 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ + +package io.snappydata + +import java.io.File + +import scala.collection.JavaConverters._ + +import com.pivotal.gemfirexd.TestUtil + +import org.apache.spark.sql.execution.benchmark.ColumnCacheBenchmark +import org.apache.spark.sql.execution.exchange.{BroadcastExchangeExec, ShuffleExchange} +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.{AnalysisException, Row, SnappyContext, SnappySession, SparkSession} + +class QueryTest extends SnappyFunSuite { + + test("Test exists in select") { + val snContext = SnappyContext(sc) + + snContext.sql("CREATE TABLE titles(title_id varchar(20), title varchar(80) " + + "not null, type varchar(12) not null, pub_id varchar(4), price int not null, " + + "advance int not null , royalty int , ytd_sales int,notes varchar(200))") + + snContext.sql("insert into titles values ('1', 'Secrets', " + + "'popular_comp', '1389', 20, 8000, 10, 4095,'Note 1')") + snContext.sql("insert into titles values ('2', 'The', " + + "'business', '1389', 19, 5000, 10, 4095,'Note 2')") + snContext.sql("insert into titles values ('3', 'Emotional', " + + "'psychology', '0736', 7, 4000, 10, 3336,'Note 3')") + snContext.sql("insert into titles values ('4', 'Prolonged', " + + "'psychology', '0736', 19, 2000, 10, 4072,'Note 4')") + snContext.sql("insert into titles values ('5', 'With', " + + "'business', '1389', 11, 5000, 10, 3876,'Note 5')") + snContext.sql("insert into titles values ('6', 'Valley', " + + "'mod_cook', '0877', 9, 0, 12, 2032,'Note 6')") + snContext.sql("insert into titles values ('7', 'Any?', " + + "'trad_cook', '0877', 14, 8000, 10, 4095,'Note 7')") + snContext.sql("insert into titles values ('8', 'Fifty', " + + "'trad_cook', '0877', 11, 4000, 14, 1509,'Note 8')") + + snContext.sql("CREATE TABLE sales(stor_id varchar(4) not null, " + + "ord_num varchar(20) not null, qty int not null, " + + "payterms varchar(12) not null,title_id varchar(80))") + + snContext.sql("insert into sales values('1', 'QA7442.3', 75, 'ON Billing','1')") + snContext.sql("insert into sales values('2', 'D4482', 10, 'Net 60', '1')") + snContext.sql("insert into sales values('3', 'N914008', 20, 'Net 30', '2')") + snContext.sql("insert into sales values('4', 'N914014', 25, 'Net 30', '3')") + snContext.sql("insert into sales values('5', '423LL922', 15, 'ON Billing','3')") + snContext.sql("insert into sales values('6', '423LL930', 10, 'ON Billing','2')") + + val df = snContext.sql("SELECT title, price FROM titles WHERE EXISTS (" + + "SELECT * FROM sales WHERE sales.title_id = titles.title_id AND qty >30)") + df.collect() + } + + test("SNAP-1159_1482") { + val session = SnappyContext(sc).snappySession + session.sql(s"set ${Property.ColumnBatchSize.name}=100") + session.sql(s"set ${SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key}=1") + val data1 = session.range(20).selectExpr("id") + val data2 = session.range(80).selectExpr("id", "cast ((id / 4) as long) as k", + "(case when (id % 4) < 2 then cast((id % 4) as long) else null end) as v") + data1.write.format("column").saveAsTable("t1") + data2.write.format("column").saveAsTable("t2") + + // SNAP-1482: check for engineering format numeric values + var r = session.sql("select 2.1e-2").collect() + assert(r(0).getDouble(0) == 0.021) + r = session.sql("select 2.1e+2").collect() + assert(r(0).getDouble(0) == 210) + r = session.sql("select 2.1e2").collect() + assert(r(0).getDouble(0) == 210) + + SparkSession.clearActiveSession() + val spark = SparkSession.builder().getOrCreate() + val sdata1 = spark.range(20).selectExpr("id") + val sdata2 = spark.createDataFrame(data2.collect().toSeq.asJava, data2.schema) + sdata1.createOrReplaceTempView("t1") + sdata2.createOrReplaceTempView("t2") + + val query = "select k, v from t1 inner join t2 where t1.id = t2.k order by k, v" + val df = session.sql(query) + val result1 = df.collect().mkString(" ") + val result2 = spark.sql(query).collect().mkString(" ") + if (result1 != result2) { + fail(s"Expected result: $result2\nGot: $result1") + } + } + + /** + * Distinct query failure in code generation reported on github + * (https://github.com/SnappyDataInc/snappydata/issues/534) + */ + test("GITHUB-534") { + val session = SnappyContext(sc).snappySession + session.sql("CREATE TABLE yes_with(device_id VARCHAR(200), " + + "sdk_version VARCHAR(200)) USING COLUMN OPTIONS(PARTITION_BY 'device_id')") + session.insert("yes_with", Row("id1", "v1"), Row("id1", "v2"), + Row("id2", "v1"), Row("id2", "v1"), Row("id2", "v3")) + val r = session.sql("select sdk_version, count(distinct device_id) from (" + + "select sdk_version,device_id from YES_WITH group by sdk_version, " + + "device_id) a group by sdk_version") + ColumnCacheBenchmark.collect(r, + Seq(Row("v1", 2), Row("v2", 1), Row("v3", 1))) + } + + test("SNAP-1714") { + val snc = new SnappySession(this.sc) + snc.sql("CREATE TABLE ColumnTable(\"a/b\" INT ,Col2 INT, Col3 INT) " + + "USING column " + + "options " + + "(" + + "PARTITION_BY 'col2'," + + "BUCKETS '1')") + snc.sql("insert into ColumnTable(\"a/b\",col2,col3) values(1,2,3)") + snc.sql("select col2,col3 from columnTable").collect() + snc.sql("select col2, col3, `a/b` from columnTable").collect() + snc.sql("select col2, col3, \"a/b\" from columnTable").collect() + snc.sql("select col2, col3, \"A/B\" from columnTable").collect() + snc.sql("select col2, col3, `A/B` from columnTable").collect() + + snc.sql("select col2,col3 from columnTable").collect() + snc.table("columnTable").select("col3", "col2", "a/b").collect() + snc.table("columnTable").select("col3", "Col2", "A/b").collect() + snc.table("columnTable").select("COL3", "Col2", "A/B").collect() + snc.table("columnTable").select("COL3", "Col2", "`A/B`").collect() + snc.table("columnTable").select("COL3", "Col2", "`a/b`").collect() + + snc.conf.set("spark.sql.caseSensitive", "true") + try { + snc.table("columnTable").select("col3", "col2", "A/b").collect() + fail("expected to fail for case-sensitive=true") + } catch { + case _: AnalysisException => // expected + } + try { + snc.table("columnTable").select("COL3", "COL2", "A/B").collect() + fail("expected to fail for case-sensitive=true") + } catch { + case _: AnalysisException => // expected + } + try { + snc.sql("select col2, col3, \"A/B\" from columnTable").collect() + fail("expected to fail for case-sensitive=true") + } catch { + case _: AnalysisException => // expected + } + try { + snc.sql("select COL2, COL3, `A/B` from columnTable").collect() + fail("expected to fail for case-sensitive=true") + } catch { + case _: AnalysisException => // expected + } + // hive meta-store is case-insensitive so column table names are not + snc.sql("select col2, col3, \"a/b\" from columnTable").collect() + snc.sql("select col2, col3, `a/b` from ColumnTable").collect() + snc.table("columnTable").select("col3", "col2", "a/b").collect() + snc.table("COLUMNTABLE").select("col3", "col2", "a/b").collect() + + snc.conf.set("spark.sql.caseSensitive", "false") + } + + private def setupTestData(session: SnappySession): Unit = { + import session.implicits._ + + val row = identity[(java.lang.Integer, java.lang.Double)] _ + + val l = Seq( + row(1, 2.0), + row(1, 2.0), + row(2, 1.0), + row(2, 1.0), + row(3, 3.0), + row(null, null), + row(null, 5.0), + row(6, null)).toDF("a", "b") + + val r = Seq( + row(2, 3.0), + row(2, 3.0), + row(3, 2.0), + row(4, 1.0), + row(null, null), + row(null, 5.0), + row(6, null)).toDF("c", "d") + + val t = r.filter($"c".isNotNull && $"d".isNotNull) + + l.createOrReplaceTempView("l") + r.createOrReplaceTempView("r") + t.createOrReplaceTempView("t") + } + + test("SNAP-1886_1888") { + val session = this.snc.snappySession + import session.implicits._ + + setupTestData(session) + + session.dropTable("t1", ifExists = true) + session.dropTable("t2", ifExists = true) + session.dropTable("onerow", ifExists = true) + + Seq(1, 2).toDF("c1").write.format("column").saveAsTable("t1") + Seq(1).toDF("c2").write.format("column").saveAsTable("t2") + Seq(1).toDF("c1").write.format("column").saveAsTable("onerow") + + // SNAP-1886 + checkAnswer( + session.sql( + """ + | select c1 from onerow t1 + | where exists (select 1 + | from (select 1 from onerow t2 LIMIT 1) + | where t1.c1=t2.c1)""".stripMargin), + Row(1) :: Nil) + + // SNAP-1888 + checkAnswer( + session.sql( + """select l.a from l + |where ( + | select cntPlusOne + 1 as cntPlusTwo from ( + | select cnt + 1 as cntPlusOne from ( + | select sum(r.c) s, count(*) cnt from r where l.a = r.c having cnt = 0 + | ) + | ) + |) = 2""".stripMargin), + Row(1) :: Row(1) :: Row(null) :: Row(null) :: Nil) + } + + test("SNAP-2088 check for null handling with dictionary optimized joins and filters") { + val snc = this.snc + val t1 = "snap2088" + val t2 = "snap2088_2" + + snc.sql(s"create table $t1 (airport_id int, name string, city string, country string) " + + s"using column options (COLUMN_BATCH_SIZE '50')") + snc.sql(s"create table $t2 (airport_id int, name string, city string, country string) " + + s"using column options (COLUMN_BATCH_SIZE '5000')") + + val data = snc.range(10000).selectExpr("cast ((rand() * 100000) as int) as airport_id", + "concat('name_', cast((id % 20) as string)) as name", + "(case when id%2=0 then null else concat('city_', cast((id%10) as string)) end) as city", + "concat('country_', cast((id % 3) as string)) as country") + data.cache() + data.count() + data.createOrReplaceTempView("data") + data.write.insertInto(t1) + data.write.insertInto(t2) + + // Some queries that either throw exception or give incorrect results as noted in SNAP-2088. + val queries = Array( + "select distinct city from $t", + "select distinct city from $t order by city", + "select distinct city from $t where country like 'country_1%'", + "select * from $t where city is null", + "select * from $t where city is null and country like 'country_1%'", + "select count(*), city from $t group by city", + "select count(*), city from $t where country like 'country_1%' group by city", + "select count(*), city, collect_list(airport_id), collect_list(name), " + + "collect_list(country) from (select * from $t order by airport_id, name, country) " + + "as t group by city order by city", + "select count(*), city, collect_list(airport_id), collect_list(name), " + + "collect_list(country) from (select * from $t where country like 'country_1%' " + + " order by airport_id, name, country) as t group by city order by city" + ) + + // To validate the results against queries directly on data disabling snappy aggregation. + snc.sql("set snappydata.sql.hashAggregateSize=-1") + val expectedResults = queries.map(q => snc.sql(q.replace("$t", "data")).collect()) + + snc.sql("set snappydata.sql.hashAggregateSize=0") + var results = queries.map { q => + snc.sql(q.replace("$t", t1)) -> snc.sql(q.replace("$t", t2)) + } + + for (((r1, r2), e) <- results.zip(expectedResults)) { + checkAnswer(r1, e) + checkAnswer(r2, e) + } + + // fire updates and check again + snc.sql(s"update $t1 set airport_id = airport_id, name = name, city = city, " + + s"country = country where (airport_id % 3) = 0") + snc.sql(s"update $t2 set airport_id = airport_id, name = name, city = city, " + + s"country = country where (airport_id % 2) = 0") + + results = queries.map { q => + snc.sql(q.replace("$t", t1)) -> snc.sql(q.replace("$t", t2)) + } + + for (((r1, r2), e) <- results.zip(expectedResults)) { + checkAnswer(r1, e) + checkAnswer(r2, e) + } + } + + test("SNAP-2080 alter table add column and then index on that") { + val snc = this.snc + snc.sql(s"CREATE TABLE APP.TEST ( COL1 VARCHAR(36) NOT NULL ) using row options()") + snc.sql(s"ALTER TABLE APP.TEST ADD COLUMN COL2 VARCHAR(36)") + snc.sql(s"create index APP.X_TEST_COL1 on APP.TEST (col1)") + snc.sql(s"create index APP.X_TEST_COL2 on APP.TEST (col2)") + snc.sql(s"ALTER TABLE APP.TEST ADD COLUMN COL3 CHAR(4)") + snc.sql(s"create index APP.X_TEST_COL3 on APP.TEST (col3)") + snc.sql(s"insert into TEST values ('one', 'vone', 'cone'), ('two', 'vtwo', 'ctwo')") + val r = snc.sql(s"select count(*) from TEST").collect() + assert(1 === r.length) + assert(2 === r.head.get(0)) + snc.sql(s"ALTER TABLE APP.TEST ADD COLUMN COL5 blob") + } + + /** check exchange and broadcast plan reuse for row, column and parquet */ + test("SNAP-2789: check broadcast/exchange reuse") { + val session = this.snc.snappySession + + val query = "select count(t1.data), count(*) from test1 t1 join test2 t2 on (t1.id = t2.id) " + + "union all " + + "select count(*), count(t1.data) from test1 t1 join test2 t2 on (t1.id = t2.id)" + for (tableType <- Seq("column", "row", "parquet")) { + val (declaration, options) = if (tableType == "parquet") { + "external " -> ((table: String) => s"options (path '${table}_pq')") + } else "" -> ((_: String) => "options (partition_by 'data')") + + def tableDeclaration(table: String, size: Int): String = { + s"create ${declaration}table $table using $tableType ${options(table)} as " + + s"select id, case when id % 100 = 0 then null else 'data' || id end as data " + + s"from range($size)" + } + + session.sql(tableDeclaration("test1", 50000)) + session.sql(tableDeclaration("test2", 20000)) + + // with exchange + session.sessionState.conf.setConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD, -1L) + var df = session.sql(query) + var plan = df.queryExecution.executedPlan + // exactly one exchange of test1 and test2 is expected + val exchanges = plan.collect { + case e: ShuffleExchange if e.outputPartitioning.numPartitions > 1 => e + } + assert(exchanges.length === 2) + assert(exchanges.head.treeString.toLowerCase.contains("test1")) + assert(exchanges(1).treeString.toLowerCase.contains("test2")) + + var result = df.collect() + assert(result.length === 2) + assert(result(0).getLong(0) === 19800) + assert(result(1).getLong(0) === 20000) + + // with broadcast + session.sessionState.conf.setConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD, + SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.defaultValue.get) + df = session.sql(query) + plan = df.queryExecution.executedPlan + // exactly one broadcast of test1 or test2 is expected + val broadcasts = plan.collect { + case e: BroadcastExchangeExec => e + } + assert(broadcasts.length === 1) + // both sides are small enough to be broadcast + val broadcastString = broadcasts.head.treeString.toLowerCase + assert(broadcastString.contains("test2") || broadcastString.contains("test1")) + result = df.collect() + assert(result.length === 2) + assert(result(0).getLong(0) === 19800) + assert(result(1).getLong(0) === 20000) + + session.sql("drop table test1") + session.sql("drop table test2") + } + // delete the directories created for parquet + TestUtil.deleteDir(new File("test1_pq")) + TestUtil.deleteDir(new File("test2_pq")) + } +} diff --git a/cluster/src/test/scala/io/snappydata/Snap_213.scala b/cluster/src/test/scala/io/snappydata/Snap_213.scala new file mode 100644 index 0000000000..6a7e18fd7f --- /dev/null +++ b/cluster/src/test/scala/io/snappydata/Snap_213.scala @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ +package io.snappydata + +import java.sql.{Connection, DriverManager} + +import com.pivotal.gemfirexd.TestUtil +import org.scalatest.BeforeAndAfterAll + +import org.apache.spark.sql.execution.datasources.jdbc.DriverRegistry + +class Snap_213 + extends SnappyFunSuite + with BeforeAndAfterAll { + + override def beforeAll(): Unit = { + super.beforeAll() + // force boot GemFireXD if not booted; just getting SnappyContext should do + logInfo(s"Using SnappyContext $snc") + } + + override def afterAll(): Unit = { + TestUtil.stopNetServer() + super.afterAll() + } + + test("Test to verify long bytes as parameters works in insert") { + DriverRegistry.register(Constant.JDBC_CLIENT_DRIVER) + val hostPort = TestUtil.startNetServer() + + logInfo("server started") + val conn: Connection = DriverManager.getConnection( + "jdbc:snappydata://" + hostPort) + + val tableName = "TEST_TABLE" + + conn.createStatement().execute("create table " + tableName + + " (fr varchar(23), id integer, b1 blob, b2 blob, b3 blob, b4 blob, " + + "b5 blob, b6 blob, b7 blob, secid integer) partition by column(id)") + + val b1 = Array.fill[Byte](10000)(0) + val b2 = Array.fill[Byte](10000)(0) + val b3 = Array.fill[Byte](15000)(0) + val b4 = Array.fill[Byte](25000)(0) + val b5 = Array.fill[Byte](14000)(0) + val b6 = Array.fill[Byte](10000)(0) + val b7 = Array.fill[Byte](20000)(0) + val stmt = conn.prepareStatement("insert into " + tableName + + " values ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)") + stmt.setString(1, "firstRow") + stmt.setInt(2, 10) + stmt.setBytes(3, b1) + stmt.setBytes(4, b2) + stmt.setBytes(5, b3) + stmt.setBytes(6, b4) + stmt.setBytes(7, b5) + stmt.setBytes(8, b6) + stmt.setBytes(9, b7) + stmt.setInt(10, 20) + stmt.execute() + conn.close() + } +} diff --git a/cluster/src/test/scala/io/snappydata/benchmark/LoadPerformance/BulkLoad_Snappy.scala b/cluster/src/test/scala/io/snappydata/benchmark/LoadPerformance/BulkLoad_Snappy.scala new file mode 100644 index 0000000000..0f09b38f4a --- /dev/null +++ b/cluster/src/test/scala/io/snappydata/benchmark/LoadPerformance/BulkLoad_Snappy.scala @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ + +package io.snappydata.benchmark.LoadPerformance + +import java.io.{PrintStream, FileOutputStream, File} + +import com.typesafe.config.Config +import io.snappydata.benchmark.TPCHColumnPartitionedTable + +import org.apache.spark.sql._ + +/** + * Created by kishor on 29/8/16. + */ +object BulkLoad_Snappy extends SnappySQLJob{ + + var tpchDataPath: String = _ + var buckets_Order_Lineitem: String = _ + + + override def runSnappyJob(snSession: SnappySession, jobConfig: Config): Any = { + val snc = snSession.sqlContext + + var loadPerfFileStream: FileOutputStream = new FileOutputStream(new File(s"BulkLoadPerf.out")) + var loadPerfPrintStream:PrintStream = new PrintStream(loadPerfFileStream) + + val isSnappy = true + val dbName = "TPCH" + + val usingOptionString = s""" + USING row + OPTIONS ()""" + + + snc.sql("DROP TABLE IF EXISTS " + "LINEITEM") + snc.sql("DROP TABLE IF EXISTS " + "ORDERS") + + TPCHColumnPartitionedTable.testLoadOrderTablePerformance(snc, tpchDataPath, isSnappy, + buckets_Order_Lineitem,loadPerfPrintStream) + TPCHColumnPartitionedTable.testLoadLineItemTablePerformance(snc, tpchDataPath, isSnappy, + buckets_Order_Lineitem,loadPerfPrintStream) + + loadPerfPrintStream.close() + loadPerfFileStream.close() + } + + override def isValidJob(snSession: SnappySession, config: Config): SnappyJobValidation = { + + tpchDataPath = if (config.hasPath("dataLocation")) { + config.getString("dataLocation") + } else { + "/QASNAPPY/TPCH/DATA/1" + } + + buckets_Order_Lineitem = if (config.hasPath("Buckets_Order_Lineitem")) { + config.getString("Buckets_Order_Lineitem") + } else { + "15" + } + + + if (!(new File(tpchDataPath)).exists()) { + return new SnappyJobInvalid("Incorrect tpch data path. " + + "Specify correct location") + } + + SnappyJobValid() + } +} diff --git a/cluster/src/test/scala/io/snappydata/benchmark/LoadPerformance/BulkLoad_Spark.scala b/cluster/src/test/scala/io/snappydata/benchmark/LoadPerformance/BulkLoad_Spark.scala new file mode 100644 index 0000000000..55bea852fa --- /dev/null +++ b/cluster/src/test/scala/io/snappydata/benchmark/LoadPerformance/BulkLoad_Spark.scala @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ + +package io.snappydata.benchmark.LoadPerformance + +import java.io.{PrintStream, File, FileOutputStream} + +import io.snappydata.benchmark.TPCHColumnPartitionedTable + +import org.apache.spark.sql.SQLContext +import org.apache.spark.{SparkConf, SparkContext} + +/** + * Created by kishor on 29/8/16. + */ +object BulkLoad_Spark { + + def main(args: Array[String]) { + var loadPerfFileStream: FileOutputStream = new FileOutputStream(new File(s"BulkLoadPerf.out")) + var loadPerfPrintStream:PrintStream = new PrintStream(loadPerfFileStream) + + val conf = new SparkConf().setAppName("BulkLoad_Spark") + + val props = null + var isSnappy = false + val sc = new SparkContext(conf) + val snc = new SQLContext(sc) + val path = args(0) + val buckets = args(1) + + snc.sql("DROP TABLE IF EXISTS " + "LINEITEM") + snc.sql("DROP TABLE IF EXISTS " + "ORDERS") + + TPCHColumnPartitionedTable.testLoadOrderTablePerformance(snc, path, isSnappy, buckets, loadPerfPrintStream) + TPCHColumnPartitionedTable.testLoadLineItemTablePerformance(snc, path, isSnappy, buckets, loadPerfPrintStream) + + loadPerfPrintStream.close() + loadPerfFileStream.close() + sc.stop() + + } +} diff --git a/cluster/src/test/scala/io/snappydata/benchmark/LoadPerformance/ParquetLoad.scala b/cluster/src/test/scala/io/snappydata/benchmark/LoadPerformance/ParquetLoad.scala new file mode 100644 index 0000000000..353f533899 --- /dev/null +++ b/cluster/src/test/scala/io/snappydata/benchmark/LoadPerformance/ParquetLoad.scala @@ -0,0 +1,146 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ + +package io.snappydata.benchmark.LoadPerformance + +import java.io.{File, PrintWriter} + +import scala.util.{Failure, Success, Try} + +import com.typesafe.config.Config + +import org.apache.spark.sql.types.{StructField, StructType} +import org.apache.spark.sql._ + +/** + * Created by kishor on 29/8/16. + */ +object ParquetLoad extends SnappySQLJob{ + + var parquetFilePath: String = _ + override def runSnappyJob(snSession: SnappySession, jobConfig: Config): Any = { + + val snc = snSession.sqlContext + def getCurrentDirectory = new java.io.File(".").getCanonicalPath + val pw = new PrintWriter("ParquetLoadPerformance.out") + Try { + + // Drop tables if already exists + snc.dropTable("AIRLINE_SNAPPY_ROW", ifExists = true) + snc.dropTable("AIRLINE_SNAPPY_COLUMN", ifExists = true) + + pw.println(s"****** ParquetLoadPerformance ******") + + //Read Parquet File + val parquetFile = snc.read.parquet(parquetFilePath) + val updatedSchema = replaceReservedWords(parquetFile.schema) + + pw.println(s"Row Count : " + parquetFile.count()) + + pw.println("loading spark table" ) + var start = System.currentTimeMillis() + parquetFile.registerTempTable("AIRLINE_SPARK") + parquetFile.cache() + parquetFile.count() + var end = System.currentTimeMillis() + val sparkTableLoadTime = end-start + pw.println(s"Time taken to load spark table : $sparkTableLoadTime ") + + //Create Snappy tables + //row table + snc.createTable("AIRLINE_SNAPPY_ROW", "row", + updatedSchema, Map.empty[String, String]) + + pw.println( "Created table AIRLINE_SNAPPY_ROW " ) + + //column table + snc.createTable("AIRLINE_SNAPPY_COLUMN", "column", + updatedSchema, Map("buckets" -> "16")) + + pw.println( "Created table AIRLINE_SNAPPY_COLUMN " ) + + + // Save Parquet to snappy table + pw.println("loading row table" ) + start = System.currentTimeMillis() + parquetFile.write.format("row").mode(SaveMode.Append).saveAsTable("AIRLINE_SNAPPY_ROW") + //airlineDF.write.format("row").mode(SaveMode.Append).saveAsTable("AIRLINE_SNAPPY_ROW") + end = System.currentTimeMillis() + val snappyRowTableLoadTime = end-start + pw.println(s"Time taken to load row table : $snappyRowTableLoadTime" ) + + + pw.println("loading column table" ) + start = System.currentTimeMillis() + parquetFile.write.format("column").mode(SaveMode.Append).saveAsTable("AIRLINE_SNAPPY_COLUMN") + //airlineDF.write.format("column").mode(SaveMode.Append).saveAsTable("AIRLINE_SNAPPY_COLUMN") + end = System.currentTimeMillis() + val snappyColumnTableLoadTime = end-start + pw.println(s"Time taken to load column table : $snappyColumnTableLoadTime") + + val slownessRowTable = ((snappyRowTableLoadTime - sparkTableLoadTime) / sparkTableLoadTime) * 100 + val slownessColumnTable = ((snappyColumnTableLoadTime - sparkTableLoadTime) / sparkTableLoadTime) * 100 + + pw. println(s"Parquet loading to row table compare to spark table is slow by $slownessRowTable %") + pw. println(s"Parquet loading to column table compare to spark table is slow by $slownessColumnTable %") + + } match { + case Success(v) => pw.close() + s"See ${getCurrentDirectory}/CreateAndLoadAirlineDataJob.out" + case Failure(e) => pw.close(); + throw e; + } + // scalastyle:on println + } + + /** + * Validate if the data files are available, else throw SparkJobInvalid + * + */ + override def isValidJob(snSession: SnappySession, config: Config): SnappyJobValidation = { + parquetFilePath = if (config.hasPath("airline_file")) { + config.getString("airline_file") + } else { + "../../../../../../../../examples/quickstart/data/airlineParquetData" + } + + if (!(new File(parquetFilePath)).exists()) { + return new SnappyJobInvalid("Incorrect airline path. " + + "Specify airline_file property in APP_PROPS") + } + + SnappyJobValid() + } + + /** + * Replace the words that are reserved in Snappy store + * @param airlineSchema schema with reserved words + * @return updated schema + */ + private def replaceReservedWords(airlineSchema : StructType) : StructType = { + new StructType( airlineSchema.map( s => { + if (s.name.equals("Year")) { + new StructField("Year_", s.dataType, s.nullable, s.metadata) + } + else if (s.name.equals("Month")) { + new StructField("Month_", s.dataType, s.nullable, s.metadata) + } + else { + s + }}).toArray) + } +} diff --git a/cluster/src/test/scala/io/snappydata/benchmark/TPCHColumnPartitionedTable.scala b/cluster/src/test/scala/io/snappydata/benchmark/TPCHColumnPartitionedTable.scala new file mode 100644 index 0000000000..dd9ebf9934 --- /dev/null +++ b/cluster/src/test/scala/io/snappydata/benchmark/TPCHColumnPartitionedTable.scala @@ -0,0 +1,799 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ +package io.snappydata.benchmark + +import java.io.{File, PrintStream} +import java.sql.Statement + +import org.apache.spark.SparkContext +import org.apache.spark.sql.execution.benchmark.ColumnCacheBenchmark +import org.apache.spark.sql.snappy._ +import org.apache.spark.sql.{DataFrame, SQLContext, SnappyContext, Column} + + +// scalastyle:off println +object TPCHColumnPartitionedTable { + + def createPartTable_Memsql(stmt: Statement): Unit = { + stmt.execute("CREATE TABLE PART ( " + + "P_PARTKEY INTEGER NOT NULL," + + "P_NAME VARCHAR(55) NOT NULL," + + "P_MFGR VARCHAR(25) NOT NULL," + + "P_BRAND VARCHAR(10) NOT NULL," + + "P_TYPE VARCHAR(25) NOT NULL," + + "P_SIZE INTEGER NOT NULL," + + "P_CONTAINER VARCHAR(10) NOT NULL," + + "P_RETAILPRICE DECIMAL(15,2) NOT NULL," + + "P_COMMENT VARCHAR(23) NOT NULL," + + "KEY (P_PARTKEY) USING CLUSTERED COLUMNSTORE," + + "SHARD KEY (P_PARTKEY))" + ) + println("Created Table PART") + } + + + def createPartSuppTable_Memsql(stmt: Statement): Unit = { + stmt.execute("CREATE TABLE PARTSUPP ( " + + "PS_PARTKEY INTEGER NOT NULL," + + "PS_SUPPKEY INTEGER NOT NULL," + + "PS_AVAILQTY INTEGER NOT NULL," + + "PS_SUPPLYCOST DECIMAL(15,2) NOT NULL," + + "PS_COMMENT VARCHAR(199) NOT NULL," + + "KEY (PS_PARTKEY) USING CLUSTERED COLUMNSTORE," + + "SHARD KEY (PS_PARTKEY))" + ) + println("Created Table PARTSUPP") + } + + def createCustomerTable_Memsql(stmt: Statement): Unit = { + stmt.execute("CREATE TABLE CUSTOMER ( " + + "C_CUSTKEY INTEGER NOT NULL," + + "C_NAME VARCHAR(25) NOT NULL," + + "C_ADDRESS VARCHAR(40) NOT NULL," + + "C_NATIONKEY INTEGER NOT NULL," + + "C_PHONE VARCHAR(15) NOT NULL," + + "C_ACCTBAL DECIMAL(15,2) NOT NULL," + + "C_MKTSEGMENT VARCHAR(10) NOT NULL," + + "C_COMMENT VARCHAR(117) NOT NULL," + + "KEY (C_CUSTKEY) USING CLUSTERED COLUMNSTORE," + + "SHARD KEY (C_CUSTKEY))" + ) + println("Created Table CUSTOMER") + } + + def createOrderTable_Memsql(stmt: Statement): Unit = { + + stmt.execute("CREATE TABLE ORDERS ( " + + "O_ORDERKEY INTEGER NOT NULL," + + "O_CUSTKEY INTEGER NOT NULL," + + "O_ORDERSTATUS CHAR(1) NOT NULL," + + "O_TOTALPRICE DECIMAL(15,2) NOT NULL," + + "O_ORDERDATE DATE NOT NULL," + + "O_ORDERPRIORITY CHAR(15) NOT NULL," + + "O_CLERK CHAR(15) NOT NULL," + + "O_SHIPPRIORITY INTEGER NOT NULL," + + "O_COMMENT VARCHAR(79) NOT NULL," + + "KEY (O_ORDERKEY) USING CLUSTERED COLUMNSTORE," + + "SHARD KEY(O_ORDERKEY))" + ) + println("Created Table ORDERS") + } + + def createLineItemTable_Memsql(stmt: Statement): Unit = { + stmt.execute("CREATE TABLE LINEITEM ( L_ORDERKEY INTEGER NOT NULL," + + "L_PARTKEY INTEGER NOT NULL," + + "L_SUPPKEY INTEGER NOT NULL," + + "L_LINENUMBER INTEGER NOT NULL," + + "L_QUANTITY DECIMAL(15,2) NOT NULL," + + "L_EXTENDEDPRICE DECIMAL(15,2) NOT NULL," + + "L_DISCOUNT DECIMAL(15,2) NOT NULL," + + "L_TAX DECIMAL(15,2) NOT NULL," + + "L_RETURNFLAG CHAR(1) NOT NULL," + + "L_LINESTATUS CHAR(1) NOT NULL," + + "L_SHIPDATE DATE NOT NULL," + + "L_COMMITDATE DATE NOT NULL," + + "L_RECEIPTDATE DATE NOT NULL," + + "L_SHIPINSTRUCT CHAR(25) NOT NULL," + + "L_SHIPMODE CHAR(10) NOT NULL," + + "L_COMMENT VARCHAR(44) NOT NULL," + + "KEY (L_ORDERKEY) USING CLUSTERED COLUMNSTORE," + + "SHARD KEY (L_ORDERKEY)) " + ) + + println("Created Table LINEITEM") + } + + def createPopulateOrderTable(sqlContext: SQLContext, path: String, isSnappy: Boolean, + buckets: String = "128", loadPerfPrintStream: PrintStream = null, redundancy : String = "0", + persistence: Boolean = false, persistence_type: String = "", numberOfLoadingStages : Int = 1, + isParquet : Boolean = false, createParquet : Boolean = false, + trace : Boolean = false, cacheTables : Boolean = true, provider: String = "column") : Unit = { + val sc = sqlContext.sparkContext + val startTime = System.currentTimeMillis() + var orderDF: DataFrame = null + var unionOrderDF: DataFrame = null + // use parquet data if available + for (i <- 1 to numberOfLoadingStages) { + if (isParquet) { + val startReadParquetTime = System.currentTimeMillis() + orderDF = sqlContext.read.format("parquet").load(s"$path/parquet_orders_$i") + val endReadParquetTime = System.currentTimeMillis() + if (trace && loadPerfPrintStream != null) { + loadPerfPrintStream.println(s"TRACE_ORDERS_READ_PARQUET, " + + s"${endReadParquetTime - startReadParquetTime}") + } + } else { + // apply a tbl.i suffix to table filename only when data is loaded in more than one stages. + var stage = "" + if (numberOfLoadingStages > 1) { + stage = s".$i" + } + val orderData = sc.textFile(s"$path/orders.tbl$stage") + val orderReadings = orderData.map(s => s.split('|')).map( + s => TPCHTableSchema.parseOrderRow(s)) + val orderDF1 = sqlContext.createDataFrame(orderReadings) + val newSchema = TPCHTableSchema.newOrderSchema(orderDF1.schema) + orderDF = ColumnCacheBenchmark.applySchema(orderDF1, newSchema) + if (createParquet) { + val startWriteParquetTime = System.currentTimeMillis() + orderDF.repartition(buckets.toInt, orderDF.col("o_orderkey")) + .write.format("parquet").save(s"$path/parquet_orders_$i") + val endWriteParquetTime = System.currentTimeMillis() + if (trace && loadPerfPrintStream != null) { + loadPerfPrintStream.println(s"TRACE_ORDERS_WRITE_PARQUET, " + + s"${endWriteParquetTime - startWriteParquetTime}") + } + } + } + val newSchema = TPCHTableSchema.newOrderSchema(orderDF.schema) + if (isSnappy) { + if (i == 1) { + var p1 = Map(("PARTITION_BY" -> "o_orderkey"), ("BUCKETS" -> buckets), + ("REDUNDANCY" -> redundancy)) + if (persistence) { + p1 += "PERSISTENCE" -> s"$persistence_type" + } + val snappyContext = sqlContext.asInstanceOf[SnappyContext] + + val startCreateTableTime = System.currentTimeMillis() + snappyContext.dropTable("ORDERS", ifExists = true) + snappyContext.createTable("ORDERS", provider, newSchema, p1) + val endCreateTableTime = System.currentTimeMillis() + if (trace && loadPerfPrintStream != null) { + loadPerfPrintStream.println(s"TRACE_ORDERS_CREATE_TABLE, " + + s"${endCreateTableTime - startCreateTableTime}") + } + } + val startInsertDataTime = System.currentTimeMillis() + orderDF.write.insertInto("ORDERS") + val endInsertDataTime = System.currentTimeMillis() + if (trace && loadPerfPrintStream != null) { + loadPerfPrintStream.println(s"TRACE_ORDERS_INSERT_DATA, " + + s"${endInsertDataTime - startInsertDataTime}") + } + + } else { + if (i == 1) { + unionOrderDF = orderDF + } else { + unionOrderDF = unionOrderDF.union(orderDF) + } + } + } + if (!isSnappy) { + if (!buckets.equals("0")) { + val rePartitionedDF = unionOrderDF.repartition(buckets.toInt, + unionOrderDF("o_orderkey")) + rePartitionedDF.createOrReplaceTempView("ORDERS") + } else { + unionOrderDF.createOrReplaceTempView("ORDERS") + } + if (cacheTables) { + sqlContext.cacheTable("ORDERS") + } + sqlContext.table("ORDERS").count() + } + val endTime = System.currentTimeMillis() + if (loadPerfPrintStream != null) { + loadPerfPrintStream.println(s"ORDERS, ${endTime - startTime}") + } + } + + def createAndPopulateOrder_CustTable(sqlContext: SQLContext, path: String, isSnappy: Boolean, + buckets: String, loadPerfPrintStream: PrintStream = null): Unit = { + val sc = sqlContext.sparkContext + val startTime = System.currentTimeMillis() + val orderData = sc.textFile(s"$path/orders.tbl") + val orderReadings = orderData.map(s => s.split('|')).map(s => TPCHTableSchema.parseOrderRow(s)) + val orderDF = sqlContext.createDataFrame(orderReadings) + val newSchema = TPCHTableSchema.newOrderSchema(orderDF.schema) + if (isSnappy) { + val p1 = Map(("PARTITION_BY" -> "o_custkey"), ("BUCKETS" -> buckets), ("COLOCATE_WITH" -> + "CUSTOMER")) + val snappyContext = sqlContext.asInstanceOf[SnappyContext] + snappyContext.dropTable("ORDERS_CUST", ifExists = true) + snappyContext.createTable("ORDERS_CUST", "column", newSchema, p1) + orderDF.write.insertInto("ORDERS_CUST") + val endTime = System.currentTimeMillis() + if (loadPerfPrintStream != null) { + loadPerfPrintStream.println("ORDERS_CUST, " + + (endTime - startTime)) + } + } + } + + + def createPopulateLineItemTable(sqlContext: SQLContext, path: String, isSnappy: Boolean, + buckets: String = "128", loadPerfPrintStream: PrintStream = null, redundancy : String = "0", + persistence: Boolean = false, persistence_type: String = "", numberOfLoadingStages : Int = 1, + isParquet : Boolean = false, createParquet : Boolean = false, + trace : Boolean = false, cacheTables : Boolean = true) : Unit = { + val sc = sqlContext.sparkContext + val startTime = System.currentTimeMillis() + var lineItemDF: DataFrame = null + + var unionLineItemDF: DataFrame = null + // use parquet data if available + for (i <- 1 to numberOfLoadingStages) { + if (isParquet) { + val startReadParquetTime = System.currentTimeMillis() + lineItemDF = sqlContext.read.format("parquet").load(s"$path/parquet_lineitem_$i") + val endReadParquetTime = System.currentTimeMillis() + if (trace && loadPerfPrintStream != null) { + loadPerfPrintStream.println(s"TRACE_LINEITEM_READ_PARQUET, " + + s"${endReadParquetTime - startReadParquetTime}") + } + } else { + var stage = "" + if (numberOfLoadingStages > 1) { + stage = s".$i" + } + val lineItemData = sc.textFile(s"$path/lineitem.tbl$stage") + val lineItemReadings = lineItemData.map(s => s.split('|')).map(s => TPCHTableSchema + .parseLineItemRow(s)) + val lineItemDF1 = sqlContext.createDataFrame(lineItemReadings) + val newSchema = TPCHTableSchema.newLineItemSchema(lineItemDF1.schema) + + lineItemDF = ColumnCacheBenchmark.applySchema(lineItemDF1, newSchema) + if (createParquet) { + val startWriteParquetTime = System.currentTimeMillis() + lineItemDF.repartition(buckets.toInt, lineItemDF.col("l_orderkey")) + .write.format("parquet").save(s"$path/parquet_lineitem_$i") + val endWriteParquetTime = System.currentTimeMillis() + if (trace && loadPerfPrintStream != null) { + loadPerfPrintStream.println(s"TRACE_LINEITEM_WRITE_PARQUET, " + + s"${endWriteParquetTime - startWriteParquetTime}") + } + } + } + val newSchema = TPCHTableSchema.newLineItemSchema(lineItemDF.schema) + if (isSnappy) { + if (i == 1) { + var p1 = Map(("PARTITION_BY" -> "l_orderkey"), ("COLOCATE_WITH" -> "ORDERS"), + ("BUCKETS" -> buckets), ("REDUNDANCY" -> redundancy)) + if (persistence) { + p1 += "PERSISTENCE" -> s"$persistence_type" + } + val snappyContext = sqlContext.asInstanceOf[SnappyContext] + val startCreateTableTime = System.currentTimeMillis() + snappyContext.dropTable("LINEITEM", ifExists = true) + snappyContext.createTable("LINEITEM", "column", newSchema, p1) + val endCreateTableTime = System.currentTimeMillis() + if (trace && loadPerfPrintStream != null) { + loadPerfPrintStream.println(s"TRACE_LINEITEM_CREATE_TABLE, " + + s"${endCreateTableTime - startCreateTableTime}") + } + } + val startInsertDataTime = System.currentTimeMillis() + lineItemDF.write.insertInto("LINEITEM") + val endInsertDataTime = System.currentTimeMillis() + if (trace && loadPerfPrintStream != null) { + loadPerfPrintStream.println(s"TRACE_LINEITEM_INSERT_DATA, " + + s"${endInsertDataTime - startInsertDataTime}") + } + } else { + if (i == 1) { + unionLineItemDF = lineItemDF + } else { + unionLineItemDF = unionLineItemDF.union(lineItemDF) + } + } + } + if(!isSnappy){ + if (!buckets.equals("0")) { + val rePartitionedDF = unionLineItemDF.repartition(buckets.toInt, + unionLineItemDF("l_orderkey")) + rePartitionedDF.createOrReplaceTempView("LINEITEM") + } else { + unionLineItemDF.createOrReplaceTempView("LINEITEM") + } + if (cacheTables) { + sqlContext.cacheTable("LINEITEM") + } + sqlContext.table("LINEITEM").count() + } + val endTime = System.currentTimeMillis() + if (loadPerfPrintStream != null) { + loadPerfPrintStream.println(s"LINEITEM, ${endTime - startTime}") + } + } + + def createAndPopulateLineItem_partTable(sqlContext: SQLContext, path: String, isSnappy: Boolean, + buckets: String, loadPerfPrintStream: PrintStream = null): Unit = { + val sc = sqlContext.sparkContext + val startTime = System.currentTimeMillis() + val lineItemData = sc.textFile(s"$path/lineitem.tbl") + val lineItemReadings = lineItemData.map(s => s.split('|')).map(s => TPCHTableSchema + .parseLineItemRow(s)) + val lineItemPartDF = sqlContext.createDataFrame(lineItemReadings) + val newSchema = TPCHTableSchema.newLineItemSchema(lineItemPartDF.schema) + if (isSnappy) { + val p1 = Map(("PARTITION_BY" -> "l_partkey"), ("COLOCATE_WITH" -> "PART"), ("BUCKETS" -> + buckets)) + + val snappyContext = sqlContext.asInstanceOf[SnappyContext] + snappyContext.dropTable("LINEITEM_PART", ifExists = true) + snappyContext.createTable("LINEITEM_PART", "column", newSchema, p1) + lineItemPartDF.write.insertInto("LINEITEM_PART") + val endTime = System.currentTimeMillis() + if (loadPerfPrintStream != null) { + loadPerfPrintStream.println("LINEITEM_PART, " + + (endTime - startTime)) + } + } + } + + def createPopulateCustomerTable(sqlContext: SQLContext, path: String, isSnappy: Boolean, + buckets: String = "128", loadPerfPrintStream: PrintStream = null, redundancy : String = "0", + persistence: Boolean = false, persistence_type: String = "", numberOfLoadingStages : Int = 1, + isParquet : Boolean = false, createParquet : Boolean = false, + trace : Boolean = false, cacheTables : Boolean = true) : Unit = { + val sc = sqlContext.sparkContext + val startTime = System.currentTimeMillis() + var customerDF: DataFrame = null + var unionCustomerDF: DataFrame = null + for (i <- 1 to numberOfLoadingStages) { + // use parquet data if available + if (isParquet) { + customerDF = sqlContext.read.format("parquet").load(s"$path/parquet_customer_$i") + } else { + var stage = "" + if (numberOfLoadingStages > 1) { + stage = s".$i" + } + val customerData = sc.textFile(s"$path/customer.tbl$stage") + val customerReadings = customerData.map(s => s.split('|')).map(s => TPCHTableSchema + .parseCustomerRow(s)) + val customerDF1 = sqlContext.createDataFrame(customerReadings) + val newSchema = TPCHTableSchema.newCustomerSchema(customerDF1.schema) + + customerDF = ColumnCacheBenchmark.applySchema(customerDF1, newSchema) + if (createParquet) { + customerDF.repartition(buckets.toInt, customerDF.col("c_custkey")) + .write.format("parquet").save(s"$path/parquet_customer_$i") + } + } + val newSchema = TPCHTableSchema.newCustomerSchema(customerDF.schema) + if (isSnappy) { + if (i == 1) { + var p1 = Map(("PARTITION_BY" -> "c_custkey"), ("BUCKETS" -> buckets), + ("REDUNDANCY" -> redundancy)) + if (persistence) { + p1 += "PERSISTENCE" -> s"$persistence_type" + } + + val snappyContext = sqlContext.asInstanceOf[SnappyContext] + snappyContext.dropTable("CUSTOMER", ifExists = true) + snappyContext.createTable("CUSTOMER", "column", newSchema, p1) + } + customerDF.write.insertInto("CUSTOMER") + } else { + if (i == 1) { + unionCustomerDF = customerDF + } else { + unionCustomerDF = unionCustomerDF.union(customerDF) + } + } + } + if(!isSnappy){ + if (!buckets.equals("0")) { + val rePartitionedDF = unionCustomerDF.repartition(buckets.toInt, + unionCustomerDF("c_custkey")) + rePartitionedDF.createOrReplaceTempView("CUSTOMER") + } else { + unionCustomerDF.createOrReplaceTempView("CUSTOMER") + } + if (cacheTables) { + sqlContext.cacheTable("CUSTOMER") + } + sqlContext.table("CUSTOMER").count() + } + val endTime = System.currentTimeMillis() + if (loadPerfPrintStream != null) { + loadPerfPrintStream.println(s"CUSTOMER, ${endTime - startTime}") + } + } + + + def createPopulatePartTable(sqlContext: SQLContext, path: String, isSnappy: Boolean, + buckets: String = "128", loadPerfPrintStream: PrintStream = null, redundancy : String = "0", + persistence: Boolean = false, persistence_type: String = "", numberOfLoadingStages : Int = 1, + isParquet : Boolean = false, createParquet : Boolean = false, + trace : Boolean = false, cacheTables : Boolean = true) : Unit = { + val sc = sqlContext.sparkContext + val startTime = System.currentTimeMillis() + var partDF: DataFrame = null + var unionPartDF: DataFrame = null + for(i <- 1 to numberOfLoadingStages) { + // use parquet data if available + if (isParquet) { + partDF = sqlContext.read.format("parquet").load(s"$path/parquet_part_$i") + } else { + var stage = "" + if (numberOfLoadingStages > 1) { + stage = s".$i" + } + val partData = sc.textFile(s"$path/part.tbl$stage") + val partReadings = partData.map(s => s.split('|')).map(s => TPCHTableSchema.parsePartRow(s)) + val partDF1 = sqlContext.createDataFrame(partReadings) + val newSchema = TPCHTableSchema.newPartSchema(partDF1.schema) + + partDF = ColumnCacheBenchmark.applySchema(partDF1, newSchema) + if (createParquet) { + partDF.repartition(buckets.toInt, partDF.col("p_partkey")) + .write.format("parquet").save(s"$path/parquet_part_$i") + } + + } + val newSchema = TPCHTableSchema.newPartSchema(partDF.schema) + if (isSnappy) { + if (i == 1) { + var p1 = Map(("PARTITION_BY" -> "p_partkey"), ("BUCKETS" -> buckets), + ("REDUNDANCY" -> redundancy)) + if (persistence) { + p1 += "PERSISTENCE" -> s"$persistence_type" + } + val snappyContext = sqlContext.asInstanceOf[SnappyContext] + snappyContext.dropTable("PART", ifExists = true) + snappyContext.createTable("PART", "column", newSchema, p1) + } + partDF.write.insertInto("PART") + } else { + if (i == 1) { + unionPartDF = partDF + } else { + unionPartDF = unionPartDF.union(partDF) + } + } + } + if(!isSnappy){ + if (!buckets.equals("0")) { + val rePartitionedDF = unionPartDF.repartition(buckets.toInt, + unionPartDF("p_partkey")) + rePartitionedDF.createOrReplaceTempView("PART") + } else { + unionPartDF.createOrReplaceTempView("PART") + } + if (cacheTables) { + sqlContext.cacheTable("PART") + } + sqlContext.table("PART").count() + } + val endTime = System.currentTimeMillis() + if (loadPerfPrintStream != null) { + loadPerfPrintStream.println(s"PART, ${endTime - startTime}") + } + } + + def createPopulatePartSuppTable(sqlContext: SQLContext, path: String, isSnappy: Boolean, + buckets: String = "128", loadPerfPrintStream: PrintStream = null, redundancy : String = "0", + persistence: Boolean = false, persistence_type: String = "", numberOfLoadingStages : Int = 1, + isParquet : Boolean = false, createParquet : Boolean = false, + trace : Boolean = false, cacheTables : Boolean = true) : Unit = { + val sc = sqlContext.sparkContext + val startTime = System.currentTimeMillis() + var partSuppDF: DataFrame = null + var unionPartSuppDF: DataFrame = null + for (i <- 1 to numberOfLoadingStages) { + // use parquet data if available + if (isParquet) { + partSuppDF = sqlContext.read.format("parquet").load(s"$path/parquet_partsupp_$i") + } else { + var stage = "" + if (numberOfLoadingStages > 1) { + stage = s".$i" + } + val partSuppData = sc.textFile(s"$path/partsupp.tbl$stage") + val partSuppReadings = partSuppData.map(s => s.split('|')).map(s => TPCHTableSchema + .parsePartSuppRow(s)) + val partSuppDF1 = sqlContext.createDataFrame(partSuppReadings) + val newSchema = TPCHTableSchema.newPartSuppSchema(partSuppDF1.schema) + + partSuppDF = ColumnCacheBenchmark.applySchema(partSuppDF1, newSchema) + if (createParquet) { + partSuppDF.repartition(buckets.toInt, partSuppDF.col("ps_partkey")) + .write.format("parquet").save(s"$path/parquet_partsupp_$i") + } + } + val newSchema = TPCHTableSchema.newPartSuppSchema(partSuppDF.schema) + if (isSnappy) { + if (i == 1) { + var p1 = Map(("PARTITION_BY" -> "ps_partkey"), ("BUCKETS" -> buckets), + ("COLOCATE_WITH" -> "PART"), ("REDUNDANCY" -> redundancy)) + if (persistence) { + p1 += "PERSISTENCE" -> s"$persistence_type" + } + val snappyContext = sqlContext.asInstanceOf[SnappyContext] + snappyContext.dropTable("PARTSUPP", ifExists = true) + snappyContext.createTable("PARTSUPP", "column", newSchema, p1) + } + partSuppDF.write.insertInto("PARTSUPP") + } else { + if (i == 1) { + unionPartSuppDF = partSuppDF + } else { + unionPartSuppDF = unionPartSuppDF.union(partSuppDF) + } + } + } + if (!isSnappy) { + if (!buckets.equals("0")) { + val rePartitionedDF = unionPartSuppDF.repartition(buckets.toInt, + unionPartSuppDF("ps_partkey")) + rePartitionedDF.createOrReplaceTempView("PARTSUPP") + } else { + unionPartSuppDF.createOrReplaceTempView("PARTSUPP") + } + if (cacheTables) { + sqlContext.cacheTable("PARTSUPP") + } + sqlContext.table("PARTSUPP").count() + } + val endTime = System.currentTimeMillis() + if (loadPerfPrintStream != null) { + loadPerfPrintStream.println(s"PARTSUPP, ${endTime - startTime}") + } + } + + + def createAndPopulateOrderSampledTable(sc: SparkContext, path: String): Unit = { + val snappyContext = SnappyContext(sc) + val orderDF = snappyContext.table("ORDERS") + val orderSampled = orderDF.stratifiedSample(Map( + "qcs" -> "O_ORDERDATE", // O_SHIPPRIORITY + "fraction" -> 0.03, + "strataReservoirSize" -> 50)) + orderSampled.registerTempTable("ORDERS_SAMPLED") + snappyContext.cacheTable("orders_sampled") + println("Created Sampled Table ORDERS_SAMPLED " + snappyContext.sql( + "select count(*) as sample_count from orders_sampled").collectAsList()) + } + + def createAndPopulateLineItemSampledTable(sc: SparkContext, path: String): Unit = { + val snappyContext = SnappyContext(sc) + val lineOrderDF = snappyContext.table("LINEITEM") + val lineOrderSampled = lineOrderDF.stratifiedSample(Map( + "qcs" -> "L_SHIPDATE", // L_RETURNFLAG + "fraction" -> 0.03, + "strataReservoirSize" -> 50)) + println(" Logic relation while creation " + lineOrderSampled.logicalPlan.output) + lineOrderSampled.registerTempTable("LINEITEM_SAMPLED") + snappyContext.cacheTable("lineitem_sampled") + println("Created Sampled Table LINEITEM_SAMPLED " + snappyContext.sql( + "select count(*) as sample_count from lineitem_sampled").collectAsList()) + } + + def createAndPopulateNationTable(sqlContext: SQLContext, path: String, isSnappy: Boolean, + buckets: String, loadPerfPrintStream: PrintStream = null, cacheTables : Boolean = true): Unit = { + val sc = sqlContext.sparkContext + val startTime = System.currentTimeMillis() + val nationData = sc.textFile(s"$path/nation.tbl") + val nationReadings = nationData.map(s => s.split('|')).map(s => TPCHTableSchema + .parseNationRow(s)) + val nationdf = sqlContext.createDataFrame(nationReadings) + val newSchema = TPCHTableSchema.newNationSchema(nationdf.schema) + if (isSnappy) { + val p1 = Map(("PARTITION_BY" -> "N_NATIONKEY"), ("BUCKETS" -> buckets)) + val snappyContext = sqlContext.asInstanceOf[SnappyContext] + snappyContext.dropTable("NATION", ifExists = true) + snappyContext.createTable("NATION", "column", newSchema, p1) + nationdf.write.insertInto("NATION") + } else { + nationdf.createOrReplaceTempView("NATION") + if (cacheTables) { + sqlContext.cacheTable("NATION") + } + sqlContext.table("NATION").count() + } + val endTime = System.currentTimeMillis() + if (loadPerfPrintStream != null) { + loadPerfPrintStream.println(s"NATION, ${endTime - startTime}") + } + } + + def createAndPopulateRegionTable(sqlContext: SQLContext, path: String, isSnappy: Boolean, + buckets: String, loadPerfPrintStream: PrintStream = null, cacheTables : Boolean = true): Unit = { + val sc = sqlContext.sparkContext + val startTime = System.currentTimeMillis() + val regionData = sc.textFile(s"$path/region.tbl") + val regionReadings = regionData.map(s => s.split('|')).map(s => TPCHTableSchema + .parseRegionRow(s)) + val regionDF = sqlContext.createDataFrame(regionReadings) + val newSchema = TPCHTableSchema.newRegionSchema(regionDF.schema) + if (isSnappy) { + val p1 = Map(("PARTITION_BY" -> "R_REGIONKEY"), ("BUCKETS" -> buckets)) + val snappyContext = sqlContext.asInstanceOf[SnappyContext] + snappyContext.dropTable("REGION", ifExists = true) + snappyContext.createTable("REGION", "column", newSchema, p1) + regionDF.write.insertInto("REGION") + } else { + regionDF.createOrReplaceTempView("REGION") + if (cacheTables) { + sqlContext.cacheTable("REGION") + } + sqlContext.table("REGION").count() + } + val endTime = System.currentTimeMillis() + if (loadPerfPrintStream != null) { + loadPerfPrintStream.println(s"REGION, ${endTime - startTime}") + } + } + + def createAndPopulateSupplierTable(sqlContext: SQLContext, path: String, isSnappy: Boolean, + buckets: String = "128", loadPerfPrintStream: PrintStream = null, redundancy : String = "0", + persistence: Boolean = false, persistence_type: String = "", numberOfLoadingStages : Int = 1, + isParquet : Boolean = false, createParquet : Boolean = false, + trace : Boolean = false, cacheTables : Boolean = true): Unit = { + val sc = sqlContext.sparkContext + val startTime = System.currentTimeMillis() + var suppDF: DataFrame = null + var unionSuppDF: DataFrame = null + + for (i <- 1 to numberOfLoadingStages) { + // use parquet data if available + val parquetDir = s"$path/parquet_supplier_$i" + if (isParquet && new File(parquetDir).exists()) { + suppDF = sqlContext.read.format("parquet").load(parquetDir) + } else { + var stage = "" + if (numberOfLoadingStages > 1) { + stage = s".$i" + } + val suppData = sc.textFile(s"$path/supplier.tbl$stage") + val suppReadings = suppData.map(s => s.split('|')).map(s => TPCHTableSchema + .parseSupplierRow(s)) + val suppDF1 = sqlContext.createDataFrame(suppReadings) + val newSchema = TPCHTableSchema.newSupplierSchema(suppDF1.schema) + + suppDF = ColumnCacheBenchmark.applySchema(suppDF1, newSchema) + if (createParquet) { + suppDF.repartition(buckets.toInt, suppDF.col("S_SUPPKEY")) + .write.format("parquet").save(s"$path/parquet_supplier_$i") + } + } + val newSchema = TPCHTableSchema.newSupplierSchema(suppDF.schema) + if (isSnappy) { + if (i == 1) { + var p1 = Map(("PARTITION_BY" -> "S_SUPPKEY"), ("BUCKETS" -> buckets), + ("REDUNDANCY" -> redundancy)) + if (persistence) { + p1 += "PERSISTENCE" -> s"$persistence_type" + } + val snappyContext = sqlContext.asInstanceOf[SnappyContext] + snappyContext.dropTable("SUPPLIER", ifExists = true) + snappyContext.createTable("SUPPLIER", "column", newSchema, p1) + } + suppDF.write.insertInto("SUPPLIER") + } else { + if (i == 1) { + unionSuppDF = suppDF + } else { + unionSuppDF = unionSuppDF.union(suppDF) + } + } + } + if (!isSnappy) { + if (!buckets.equals("0")) { + val rePartitionedDF = unionSuppDF.repartition(buckets.toInt, + unionSuppDF("S_SUPPKEY")) + rePartitionedDF.createOrReplaceTempView("SUPPLIER") + } else { + unionSuppDF.createOrReplaceTempView("SUPPLIER") + } + if (cacheTables) { + sqlContext.cacheTable("SUPPLIER") + } + sqlContext.table("SUPPLIER").count() + } + val endTime = System.currentTimeMillis() + if (loadPerfPrintStream != null) { + loadPerfPrintStream.println(s"SUPPLIER, ${endTime - startTime}") + } + } + + def testLoadOrderTablePerformance(sqlContext: SQLContext, path: String, isSnappy: Boolean, + buckets: String, loadPerfPrintStream: PrintStream = null, cacheTables : Boolean = true): Unit = { + + val sc = sqlContext.sparkContext + val startTime = System.currentTimeMillis() + val orderData = sc.textFile(s"$path/orders.tbl") + val orderReadings = orderData.map(s => s.split('|')).map(s => TPCHTableSchema.parseOrderRow(s)) + val orderDF = sqlContext.createDataFrame(orderReadings) + val newSchema = TPCHTableSchema.newOrderSchema(orderDF.schema) + if (isSnappy) { + val p1 = Map(("PARTITION_BY" -> "o_orderkey"), ("BUCKETS" -> buckets)) + val snappyContext = sqlContext.asInstanceOf[SnappyContext] + snappyContext.dropTable("ORDERS", ifExists = true) + snappyContext.createTable("ORDERS", "column", newSchema, p1) + orderDF.write.insertInto("ORDERS") + } else { + var newOrderDF: DataFrame = null + val numPartitions = buckets.toInt + if (numPartitions > 0) { + newOrderDF = orderDF.repartition(buckets.toInt, orderDF.col("o_orderkey")) + } else { + newOrderDF = orderDF.repartition(orderDF.col("o_orderkey")) + } + newOrderDF.createOrReplaceTempView("ORDERS") + if (cacheTables) { + sqlContext.cacheTable("ORDERS") + } + sqlContext.table("ORDERS").count() + } + val endTime = System.currentTimeMillis() + if (loadPerfPrintStream != null) { + loadPerfPrintStream.println(s"ORDERS, ${endTime - startTime}") + } + } + + def testLoadLineItemTablePerformance(sqlContext: SQLContext, path: String, isSnappy: Boolean, + buckets: String, loadPerfPrintStream: PrintStream = null, cacheTables : Boolean = true): Unit = { + val sc = sqlContext.sparkContext + val startTime = System.currentTimeMillis() + val lineItemData = sc.textFile(s"$path/lineitem.tbl") + val lineItemReadings = lineItemData.map(s => s.split('|')).map(s => TPCHTableSchema + .parseLineItemRow(s)) + val lineItemDF = sqlContext.createDataFrame(lineItemReadings) + val newSchema = TPCHTableSchema.newLineItemSchema(lineItemDF.schema) + if (isSnappy) { + val p1 = Map(("PARTITION_BY" -> "l_orderkey"), ("COLOCATE_WITH" -> "ORDERS"), ("BUCKETS" -> + buckets)) + + val snappyContext = sqlContext.asInstanceOf[SnappyContext] + snappyContext.dropTable("LINEITEM", ifExists = true) + snappyContext.createTable("LINEITEM", "column", newSchema, p1) + lineItemDF.write.insertInto("LINEITEM") + } else { + var newLineItemDF: DataFrame = null + val numPartitions = buckets.toInt + if (numPartitions > 0) { + newLineItemDF = lineItemDF.repartition(buckets.toInt, lineItemDF.col("l_orderkey")) + } else { + newLineItemDF = lineItemDF.repartition(lineItemDF.col("l_orderkey")) + } + newLineItemDF.createOrReplaceTempView("LINEITEM") + if (cacheTables) { + sqlContext.cacheTable("LINEITEM") + } + sqlContext.table("LINEITEM").count() + } + val endTime = System.currentTimeMillis() + if (loadPerfPrintStream != null) { + loadPerfPrintStream.println(s"LINEITEM, ${endTime - startTime}") + } + } +} diff --git a/cluster/src/test/scala/io/snappydata/benchmark/TPCHReplicatedTable.scala b/cluster/src/test/scala/io/snappydata/benchmark/TPCHReplicatedTable.scala new file mode 100644 index 0000000000..581cb37f3d --- /dev/null +++ b/cluster/src/test/scala/io/snappydata/benchmark/TPCHReplicatedTable.scala @@ -0,0 +1,186 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ +package io.snappydata.benchmark + +import java.io.PrintStream +import java.sql.Statement + +import org.apache.spark.sql.{DataFrame, SQLContext, SnappyContext} + +object TPCHReplicatedTable { + + def createRegionTable_Memsql(stmt: Statement): Unit = { + stmt.execute("CREATE REFERENCE TABLE REGION (" + + "R_REGIONKEY INTEGER NOT NULL PRIMARY KEY," + + "R_NAME CHAR(25) NOT NULL," + + "R_COMMENT VARCHAR(152))" + ) + println("Created Table REGION") + } + + def createNationTable_Memsql(stmt: Statement): Unit = { + stmt.execute("CREATE REFERENCE TABLE NATION (" + + "N_NATIONKEY INTEGER NOT NULL PRIMARY KEY," + + "N_NAME CHAR(25) NOT NULL," + + "N_REGIONKEY INTEGER NOT NULL," + + "N_COMMENT VARCHAR(152))" + ) + println("Created Table NATION") + } + + def createSupplierTable_Memsql(stmt: Statement): Unit = { + stmt.execute("CREATE REFERENCE TABLE SUPPLIER ( " + + "S_SUPPKEY INTEGER NOT NULL PRIMARY KEY," + + "S_NAME CHAR(25) NOT NULL," + + "S_ADDRESS VARCHAR(40) NOT NULL," + + "S_NATIONKEY INTEGER NOT NULL," + + "S_PHONE CHAR(15) NOT NULL," + + "S_ACCTBAL DECIMAL(15,2) NOT NULL," + + "S_COMMENT VARCHAR(101) NOT NULL)" + ) + println("Created Table SUPPLIER") + } + + def createPopulateRegionTable(usingOptionString: String, sqlContext: SQLContext, path: String, + isSnappy: Boolean, loadPerfPrintStream: PrintStream = null, + trace : Boolean = false, cacheTables : Boolean = true): Unit = { + val sc = sqlContext.sparkContext + val startTime = System.currentTimeMillis() + val regionData = sc.textFile(s"$path/region.tbl") + val regionReadings = regionData.map(s => s.split('|')).map(s => TPCHTableSchema + .parseRegionRow(s)) + val regionDF = sqlContext.createDataFrame(regionReadings) + if (isSnappy) { + val snappyContext = sqlContext.asInstanceOf[SnappyContext] + snappyContext.dropTable("REGION", ifExists = true) + snappyContext.sql( + """CREATE TABLE REGION ( + R_REGIONKEY INTEGER NOT NULL PRIMARY KEY, + R_NAME VARCHAR(25) NOT NULL, + R_COMMENT VARCHAR(152) + ) """ + usingOptionString + ) + println("Created Table REGION") + regionDF.write.insertInto("REGION") + } else { + regionDF.createOrReplaceTempView("REGION") + if(cacheTables) { + sqlContext.cacheTable("REGION") + } + sqlContext.table("REGION").count() + } + val endTime = System.currentTimeMillis() + if (loadPerfPrintStream != null) { + loadPerfPrintStream.println(s"REGION,${endTime - startTime}") + } + } + + def createPopulateNationTable(usingOptionString: String, sqlContext: SQLContext, path: String, + isSnappy: Boolean, loadPerfPrintStream: PrintStream = null, + trace : Boolean = false, cacheTables : Boolean = true): Unit = { + val sc = sqlContext.sparkContext + val startTime = System.currentTimeMillis() + val nationData = sc.textFile(s"$path/nation.tbl") + val nationReadings = nationData.map(s => s.split('|')).map(s => TPCHTableSchema + .parseNationRow(s)) + val nationDF = sqlContext.createDataFrame(nationReadings) + if (isSnappy) { + val snappyContext = sqlContext.asInstanceOf[SnappyContext] + snappyContext.dropTable("NATION", ifExists = true) + snappyContext.sql( + """CREATE TABLE NATION ( + N_NATIONKEY INTEGER NOT NULL PRIMARY KEY, + N_NAME VARCHAR(25) NOT NULL, + N_REGIONKEY INTEGER NOT NULL REFERENCES REGION(R_REGIONKEY), + N_COMMENT VARCHAR(152) + ) """ + usingOptionString + ) + println("Created Table NATION") + nationDF.write.insertInto("NATION") + } else { + nationDF.createOrReplaceTempView("NATION") + if(cacheTables) { + sqlContext.cacheTable("NATION") + } + sqlContext.table("NATION").count() + } + val endTime = System.currentTimeMillis() + if (loadPerfPrintStream != null) { + loadPerfPrintStream.println(s"NATION,${endTime - startTime}") + } + } + + def createPopulateSupplierTable(usingOptionString: String, sqlContext: SQLContext, path: String, + isSnappy: Boolean, loadPerfPrintStream: PrintStream = null, numberOfLoadingStages : Int = 1, + trace : Boolean = false, cacheTables : Boolean = true) + : Unit = { + val sc = sqlContext.sparkContext + val startTime = System.currentTimeMillis() + var unionSupplierDF : DataFrame = null + + for(i <- 1 to numberOfLoadingStages) { + // apply a tbl.i suffix to table filename only when data is loaded in more than one stages. + var stage = "" + if (numberOfLoadingStages > 1) { + stage = s".$i" + } + val supplierData = sc.textFile(s"$path/supplier.tbl$stage") + val supplierReadings = supplierData.map(s => s.split('|')).map(s => TPCHTableSchema + .parseSupplierRow(s)) + val supplierDF = sqlContext.createDataFrame(supplierReadings) + if (isSnappy) { + if (i == 1) { + val snappyContext = sqlContext.asInstanceOf[SnappyContext] + snappyContext.dropTable("SUPPLIER", ifExists = true) + snappyContext.sql( + """CREATE TABLE SUPPLIER ( + S_SUPPKEY INTEGER NOT NULL PRIMARY KEY, + S_NAME VARCHAR(25) NOT NULL, + S_ADDRESS VARCHAR(40) NOT NULL, + S_NATIONKEY INTEGER NOT NULL, + S_PHONE VARCHAR(15) NOT NULL, + S_ACCTBAL DECIMAL(15,2) NOT NULL, + S_COMMENT VARCHAR(101) NOT NULL + ) """ + usingOptionString + ) + println("Created Table SUPPLIER") + } + supplierDF.write.insertInto("SUPPLIER") + } else { + if (i == 1) { + unionSupplierDF = supplierDF + } else { + unionSupplierDF = unionSupplierDF.union(supplierDF) + } + } + } + if(!isSnappy){ + unionSupplierDF.createOrReplaceTempView("SUPPLIER") + if(cacheTables) { + sqlContext.cacheTable("SUPPLIER") + } + sqlContext.table("SUPPLIER").count() + } + + val endTime = System.currentTimeMillis() + if (loadPerfPrintStream != null) { + loadPerfPrintStream.println(s"Time taken to create SUPPLIER Table : ${endTime - startTime}") + } + } + + +} diff --git a/cluster/src/test/scala/io/snappydata/benchmark/TPCHRowPartitionedTable.scala b/cluster/src/test/scala/io/snappydata/benchmark/TPCHRowPartitionedTable.scala new file mode 100644 index 0000000000..5151dd9bdf --- /dev/null +++ b/cluster/src/test/scala/io/snappydata/benchmark/TPCHRowPartitionedTable.scala @@ -0,0 +1,198 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ +package io.snappydata.benchmark + +import java.io.PrintStream +import java.sql.Statement + +import org.apache.spark.sql.{SQLContext, SnappyContext} + +object TPCHRowPartitionedTable { + + def createPartTable_Memsql(stmt:Statement): Unit = { + stmt.execute("CREATE TABLE PART ( " + + "P_PARTKEY INTEGER NOT NULL PRIMARY KEY,"+ + "P_NAME VARCHAR(55) NOT NULL,"+ + "P_MFGR VARCHAR(25) NOT NULL,"+ + "P_BRAND VARCHAR(10) NOT NULL,"+ + "P_TYPE VARCHAR(25) NOT NULL,"+ + "P_SIZE INTEGER NOT NULL,"+ + "P_CONTAINER VARCHAR(10) NOT NULL,"+ + "P_RETAILPRICE DECIMAL(15,2) NOT NULL,"+ + "P_COMMENT VARCHAR(23) NOT NULL)" + ) + println("Created Table PART") + } + + + def createPartSuppTable_Memsql(stmt:Statement): Unit = { + stmt.execute("CREATE TABLE PARTSUPP ( " + + "PS_PARTKEY INTEGER NOT NULL," + + "PS_SUPPKEY INTEGER NOT NULL," + + "PS_AVAILQTY INTEGER NOT NULL," + + "PS_SUPPLYCOST DECIMAL(15,2) NOT NULL," + + "PS_COMMENT VARCHAR(199) NOT NULL," + + "PRIMARY KEY (PS_PARTKEY,PS_SUPPKEY))" +// stmt.execute("CREATE TABLE PARTSUPP ( " + +// "PS_PARTKEY INTEGER NOT NULL," + +// "PS_SUPPKEY INTEGER NOT NULL," + +// "PS_AVAILQTY INTEGER NOT NULL," + +// "PS_SUPPLYCOST DECIMAL(15,2) NOT NULL," + +// "PS_COMMENT VARCHAR(199) NOT NULL," + +// "SHARD KEY(PS_PARTKEY),"+ +// "KEY(PS_SUPPKEY),"+ +// "PRIMARY KEY (PS_PARTKEY,PS_SUPPKEY))" + + ) + println("Created Table PARTSUPP") + } + + def createCustomerTable_Memsql(stmt:Statement): Unit = { + stmt.execute("CREATE TABLE CUSTOMER ( " + + "C_CUSTKEY INTEGER NOT NULL PRIMARY KEY," + + "C_NAME VARCHAR(25) NOT NULL," + + "C_ADDRESS VARCHAR(40) NOT NULL," + + "C_NATIONKEY INTEGER NOT NULL," + + "C_PHONE VARCHAR(15) NOT NULL," + + "C_ACCTBAL DECIMAL(15,2) NOT NULL," + + "C_MKTSEGMENT VARCHAR(10) NOT NULL," + + "C_COMMENT VARCHAR(117) NOT NULL)" +// stmt.execute("CREATE TABLE CUSTOMER ( " + +// "C_CUSTKEY INTEGER NOT NULL PRIMARY KEY," + +// "C_NAME VARCHAR(25) NOT NULL," + +// "C_ADDRESS VARCHAR(40) NOT NULL," + +// "C_NATIONKEY INTEGER NOT NULL," + +// "C_PHONE VARCHAR(15) NOT NULL," + +// "C_ACCTBAL DECIMAL(15,2) NOT NULL," + +// "C_MKTSEGMENT VARCHAR(10) NOT NULL," + +// "C_COMMENT VARCHAR(117) NOT NULL,"+ +// "KEY(C_NATIONKEY))" + ) + println("Created Table CUSTOMER") + } + + def createPopulatePartTable(usingOptionString: String, props: Map[String, String], sqlContext: SQLContext, + path: String, isSnappy: Boolean, buckets:String, loadPerfPrintStream : PrintStream): Unit = { + val sc = sqlContext.sparkContext + val startTime=System.currentTimeMillis() + val partData = sc.textFile(s"$path/part.tbl") + val partReadings = partData.map(s => s.split('|')).map(s => TPCHTableSchema.parsePartRow(s)) + val partDF = sqlContext.createDataFrame(partReadings) + + if (isSnappy) { + val snappyContext = sqlContext.asInstanceOf[SnappyContext] + snappyContext.sql( + s"""CREATE TABLE PART ( + P_PARTKEY INTEGER NOT NULL PRIMARY KEY, + P_NAME VARCHAR(55) NOT NULL, + P_MFGR VARCHAR(25) NOT NULL, + P_BRAND VARCHAR(10) NOT NULL, + P_TYPE VARCHAR(25) NOT NULL, + P_SIZE INTEGER NOT NULL, + P_CONTAINER VARCHAR(10) NOT NULL, + P_RETAILPRICE DECIMAL(15,2) NOT NULL, + P_COMMENT VARCHAR(23) NOT NULL + ) PARTITION BY COLUMN (P_PARTKEY) + BUCKETS $buckets + """ + usingOptionString + ) + println("Created Table PART") + partDF.write.insertInto("PART") + val endTime = System.currentTimeMillis() + loadPerfPrintStream.println(s"Time taken to create PART Table : ${endTime-startTime}") + } else { + partDF.createOrReplaceTempView("PART") + sqlContext.cacheTable("PART") + sqlContext.table("PART").count() + val endTime = System.currentTimeMillis() + loadPerfPrintStream.println(s"Time taken to create PART Table : ${endTime-startTime}") + } + } + + def createPopulatePartSuppTable(usingOptionString: String, props: Map[String, String], sqlContext: SQLContext, + path: String, isSnappy: Boolean, bukcets:String, loadPerfPrintStream : PrintStream): Unit = { + val sc = sqlContext.sparkContext + val startTime=System.currentTimeMillis() + val partSuppData = sc.textFile(s"$path/partsupp.tbl") + val partSuppReadings = partSuppData.map(s => s.split('|')).map(s => TPCHTableSchema.parsePartSuppRow(s)) + val partSuppDF = sqlContext.createDataFrame(partSuppReadings) + + if (isSnappy) { + val snappyContext = sqlContext.asInstanceOf[SnappyContext] + snappyContext.sql( + s"""CREATE TABLE PARTSUPP ( + PS_PARTKEY INTEGER NOT NULL, + PS_SUPPKEY INTEGER NOT NULL, + PS_AVAILQTY INTEGER NOT NULL, + PS_SUPPLYCOST DECIMAL(15,2) NOT NULL, + PS_COMMENT VARCHAR(199) NOT NULL, + PRIMARY KEY (PS_PARTKEY, PS_SUPPKEY) + ) PARTITION BY COLUMN (PS_PARTKEY) COLOCATE WITH (PART) + BUCKETS $bukcets + """ + usingOptionString + ) + println("Created Table PARTSUPP") + partSuppDF.write.insertInto("PARTSUPP") + val endTime = System.currentTimeMillis() + loadPerfPrintStream.println(s"Time taken to create PARTSUPP Table : ${endTime-startTime}") + } else { + partSuppDF.createOrReplaceTempView("PARTSUPP") + sqlContext.cacheTable("PARTSUPP") + sqlContext.table("PARTSUPP").count() + val endTime = System.currentTimeMillis() + loadPerfPrintStream.println(s"Time taken to create PARTSUPP Table : ${endTime-startTime}") + } + } + + def createPopulateCustomerTable(usingOptionString: String, props: Map[String, String], sqlContext: SQLContext, + path: String, isSnappy: Boolean, buckets:String, loadPerfPrintStream : PrintStream): Unit = { + val sc = sqlContext.sparkContext + val startTime=System.currentTimeMillis() + val customerData = sc.textFile(s"$path/customer.tbl") + val customerReadings = customerData.map(s => s.split('|')).map(s => TPCHTableSchema.parseCustomerRow(s)) + val customerDF = sqlContext.createDataFrame(customerReadings) + + if (isSnappy) { + val snappyContext = sqlContext.asInstanceOf[SnappyContext] + snappyContext.sql( + s"""CREATE TABLE CUSTOMER ( + C_CUSTKEY INTEGER NOT NULL PRIMARY KEY, + C_NAME VARCHAR(25) NOT NULL, + C_ADDRESS VARCHAR(40) NOT NULL, + C_NATIONKEY INTEGER NOT NULL , + C_PHONE VARCHAR(15) NOT NULL, + C_ACCTBAL DECIMAL(15,2) NOT NULL, + C_MKTSEGMENT VARCHAR(10) NOT NULL, + C_COMMENT VARCHAR(117) NOT NULL + ) PARTITION BY COLUMN (C_CUSTKEY) + BUCKETS $buckets + """ + usingOptionString + ) + println("Created Table CUSTOMER") + customerDF.write.insertInto("CUSTOMER") + val endTime = System.currentTimeMillis() + loadPerfPrintStream.println(s"Time taken to create CUSTOMER Table : ${endTime-startTime}") + } else { + customerDF.createOrReplaceTempView("CUSTOMER") + sqlContext.cacheTable("CUSTOMER") + sqlContext.table("CUSTOMER").count() + val endTime = System.currentTimeMillis() + loadPerfPrintStream.println(s"Time taken to create CUSTOMER Table : ${endTime-startTime}") + } + } + +} diff --git a/cluster/src/test/scala/io/snappydata/benchmark/TPCHTableSchema.scala b/cluster/src/test/scala/io/snappydata/benchmark/TPCHTableSchema.scala new file mode 100644 index 0000000000..286673ad30 --- /dev/null +++ b/cluster/src/test/scala/io/snappydata/benchmark/TPCHTableSchema.scala @@ -0,0 +1,253 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ + +package io.snappydata.benchmark + +import java.sql.Date + +import org.apache.spark.sql.types.StructType + + +object TPCHTableSchema { + + case class StreamMessageRegionObject( + r_regionkey: Int, + r_name: String, + r_comment: String + ) + + def newRegionSchema(schema: StructType): StructType = { + new StructType(schema.map(_.copy(nullable = false)).toArray) + } + + def parseRegionRow(s: Array[String]): StreamMessageRegionObject = { + StreamMessageRegionObject( + s(0).toInt, + s(1), + s(2) + ) + } + + case class StreamMessageNationObject( + n_nationkey: Int, + n_name: String, + n_regionkey: Int, + n_comment: String + ) + + def newNationSchema(schema: StructType): StructType = { + new StructType(schema.map(_.copy(nullable = false)).toArray) + } + + def parseNationRow(s: Array[String]): StreamMessageNationObject = { + StreamMessageNationObject( + s(0).toInt, + s(1), + s(2).toInt, + s(3) + ) + } + + case class StreamMessageSupplierObject( + s_suppkey: Int, + s_name: String, + s_address: String, + s_nationkey: Int, + s_phone: String, + s_acctbal: Double, + s_comment: String + ) + + def newSupplierSchema(schema: StructType): StructType = { + new StructType(schema.map(_.copy(nullable = false)).toArray) + } + + def parseSupplierRow(s: Array[String]): StreamMessageSupplierObject = { + StreamMessageSupplierObject( + s(0).toInt, + s(1), + s(2), + s(3).toInt, + s(4), + s(5).toDouble, + s(6) + ) + } + + case class StreamMessageOrderObject( + o_orderkey: Long, + o_custkey: Int, + o_orderstatus: String, + o_totalprice: Double, + o_orderdate: Date, + o_orderpriority: String, + o_clerk: String, + o_shippriority: Int, + o_comment: String + ) + + + def newOrderSchema(schema: StructType): StructType = { + new StructType(schema.map(_.copy(nullable = false)).toArray) + } + + def parseOrderRow(s: Array[String]): StreamMessageOrderObject = { + StreamMessageOrderObject( + s(0).toLong, + s(1).toInt, + s(2), + s(3).toDouble, + formatDate(s(4)), + s(5), + s(6), + s(7).toInt, + s(8) + ) + } + + case class StreamMessageLineItemObject( + l_orderkey: Long, + l_partkey: Int, + l_suppkey: Int, + l_linenumber: Int, + l_quantity: Double, + l_extendedprice: Double, + l_discount: Double, + l_tax: Double, + l_returnflag: String, + l_linestatus: String, + l_shipdate: Date, + l_commitdate: Date, + l_receiptdate: Date, + l_shipinstruct: String, + l_shipmode: String, + l_comment: String + ) + + def newLineItemSchema(schema: StructType): StructType = { + new StructType(schema.map(_.copy(nullable = false)).toArray) + } + + def parseLineItemRow(s: Array[String]): StreamMessageLineItemObject = { + StreamMessageLineItemObject( + s(0).toLong, + s(1).toInt, + s(2).toInt, + s(3).toInt, + s(4).toDouble, + s(5).toDouble, + s(6).toDouble, + s(7).toDouble, + s(8), + s(9), + formatDate(s(10)), + formatDate(s(11)), + formatDate(s(12)), + s(13), + s(14), + s(15) + ) + } + + case class StreamMessagePartObject( + p_partkey: Int, + p_name: String, + p_mfgr: String, + p_brand: String, + p_type: String, + p_size: Int, + p_container: String, + p_retailprice: Double, + p_comment: String + ) + + def newPartSchema(schema: StructType): StructType = { + new StructType(schema.map(_.copy(nullable = false)).toArray) + } + + def parsePartRow(s: Array[String]): StreamMessagePartObject = { + StreamMessagePartObject( + s(0).toInt, + s(1), + s(2), + s(3), + s(4), + s(5).toInt, + s(6), + s(7).toDouble, + s(8) + ) + } + + case class StreamMessagePartSuppObject( + ps_partkey: Int, + ps_suppkey: Int, + ps_availqty: Int, + ps_supplycost: Double, + ps_comment: String + ) + + def newPartSuppSchema(schema: StructType): StructType = { + new StructType(schema.map(_.copy(nullable = false)).toArray) + } + + def parsePartSuppRow(s: Array[String]): StreamMessagePartSuppObject = { + StreamMessagePartSuppObject( + s(0).toInt, + s(1).toInt, + s(2).toInt, + s(3).toDouble, + s(4) + ) + } + + case class StreamMessageCustomerObject( + C_CUSTKEY: Int, + C_NAME: String, + C_ADDRESS: String, + C_NATIONKEY: Int, + C_PHONE: String, + C_ACCTBAL: Double, + C_MKTSEGMENT: String, + C_COMMENT: String + ) + + def newCustomerSchema(schema: StructType): StructType = { + new StructType(schema.map(_.copy(nullable = false)).toArray) + } + + def parseCustomerRow(s: Array[String]): StreamMessageCustomerObject = { + StreamMessageCustomerObject( + s(0).toInt, + s(1), + s(2), + s(3).toInt, + s(4), + s(5).toDouble, + s(6), + s(7) + ) + } + + def formatDate(dateString: String): Date = { + val splittedDate = dateString.split("-") + val year = splittedDate(0) + val month = splittedDate(1) + val day = splittedDate(2) + new Date(year.toInt - 1900, month.toInt - 1, day.toInt) + } +} diff --git a/cluster/src/test/scala/io/snappydata/benchmark/TPCH_Queries.scala b/cluster/src/test/scala/io/snappydata/benchmark/TPCH_Queries.scala new file mode 100644 index 0000000000..513ecfabba --- /dev/null +++ b/cluster/src/test/scala/io/snappydata/benchmark/TPCH_Queries.scala @@ -0,0 +1,2861 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ + +package io.snappydata.benchmark + +import org.apache.spark.Logging + +object TPCH_Queries extends Logging { + + private var random = new scala.util.Random(42) + + def setRandomSeed(randomSeed: Integer = 42): Unit = { + this.random = new scala.util.Random(randomSeed) + } + + def getQuery(query: String, isDynamic: Boolean, isSnappy: Boolean): String = query match { + case "1" => + if (isSnappy) { + createQuery(TPCH_Queries.getQuery1, TPCH_Queries.getQ1Parameter(isDynamic)) + } else { + createQuery(TPCH_Queries.getQuery1_Memsql, TPCH_Queries.getQ1Parameter(isDynamic)) + } + case "2" => + createQuery(TPCH_Queries.getQuery2, TPCH_Queries.getQ2Parameter(isDynamic)) + case "3" => + createQuery(TPCH_Queries.getQuery3, TPCH_Queries.getQ3Parameter(isDynamic)) + case "4" => + if (isSnappy) { + createQuery(TPCH_Queries.getQuery4, TPCH_Queries.getQ4Parameter(isDynamic)) + } else { + createQuery(TPCH_Queries.getQuery4_Memsql, TPCH_Queries.getQ4Parameter(isDynamic)) + } + case "5" => + if (isSnappy) { + createQuery(TPCH_Queries.getQuery5, TPCH_Queries.getQ5Parameter(isDynamic)) + } else { + createQuery(TPCH_Queries.getQuery5_Memsql, TPCH_Queries.getQ5Parameter(isDynamic)) + } + case "6" => + if (isSnappy) { + createQuery(TPCH_Queries.getQuery6, TPCH_Queries.getQ6Parameter(isDynamic)) + } else { + createQuery(TPCH_Queries.getQuery6_Memsql, TPCH_Queries.getQ6Parameter(isDynamic)) + } + case "7" => + createQuery(TPCH_Queries.getQuery7, TPCH_Queries.getQ7Parameter(isDynamic)) + case "8" => + createQuery(TPCH_Queries.getQuery8, TPCH_Queries.getQ8Parameter(isDynamic)) + case "9" => + createQuery(TPCH_Queries.getQuery9, TPCH_Queries.getQ9Parameter(isDynamic)) + case "10" => + if (isSnappy) { + createQuery(TPCH_Queries.getQuery10, TPCH_Queries.getQ10Parameter(isDynamic)) + } else { + createQuery(TPCH_Queries.getQuery10_Memsql, TPCH_Queries.getQ10Parameter(isDynamic)) + } + case "11" => + createQuery(TPCH_Queries.getQuery11, TPCH_Queries.getQ11Parameter(isDynamic)) + case "12" => + if (isSnappy) { + createQuery(TPCH_Queries.getQuery12, TPCH_Queries.getQ12Parameter(isDynamic)) + } else { + createQuery(TPCH_Queries.getQuery12_Memsql, TPCH_Queries.getQ12Parameter(isDynamic)) + } + case "13" => + createQuery(TPCH_Queries.getQuery13, TPCH_Queries.getQ13Parameter(isDynamic)) + case "14" => + if (isSnappy) { + createQuery(TPCH_Queries.getQuery14, TPCH_Queries.getQ14Parameter(isDynamic)) + } else { + createQuery(TPCH_Queries.getQuery14_Memsql, TPCH_Queries.getQ14Parameter(isDynamic)) + } + case "15" => + if (isSnappy) { + createQuery(TPCH_Queries.getQuery15_Temp, TPCH_Queries.getQ15TempParameter(isDynamic)) + } else { + createQuery(TPCH_Queries.getQuery15_Temp_Memsql, + TPCH_Queries.getQ15TempParameter(isDynamic)) + } + case "16" => + createQuery(TPCH_Queries.getQuery16, TPCH_Queries.getQ16Parameter(isDynamic)) + case "17" => + createQuery(TPCH_Queries.getQuery17, TPCH_Queries.getQ17Parameter(isDynamic)) + case "18" => + createQuery(TPCH_Queries.getQuery18, TPCH_Queries.getQ18Parameter(isDynamic)) + case "19" => + createQuery(TPCH_Queries.getQuery19, TPCH_Queries.getQ19Parameter(isDynamic)) + case "20" => + if (isSnappy) { + createQuery(TPCH_Queries.getQuery20, TPCH_Queries.getQ20Parameter(isDynamic)) + } else { + createQuery(TPCH_Queries.getQuery20_Memsql, TPCH_Queries.getQ20Parameter(isDynamic)) + } + case "21" => + createQuery(TPCH_Queries.getQuery21, TPCH_Queries.getQ21Parameter(isDynamic)) + case "22" => + createQuery(TPCH_Queries.getQuery22, TPCH_Queries.getQ22Parameter(isDynamic)) + } + + def createQuery(query: String, paramters: Array[String]): String = { + var generatedQuery = query + for (s <- paramters) { + logInfo(s"KBKBKB : createQuery : $s") + generatedQuery = generatedQuery.replaceFirst("\\?", s) + } + logInfo(s"KBKBKB : My query : $generatedQuery") + generatedQuery + } + + + def getQuery1: String = { + // DELTA = 90 + " select" + + " l_returnflag," + + " l_linestatus," + + " sum(l_quantity) as sum_qty," + + " sum(l_extendedprice) as sum_base_price," + + " sum(l_extendedprice*(1-l_discount)) as sum_disc_price," + + " sum(l_extendedprice*(1-l_discount)*(1+l_tax)) as sum_charge," + + " avg(l_quantity) as avg_qty," + + " avg(l_extendedprice) as avg_price," + + " avg(l_discount) as avg_disc," + + " count(*) as count_order" + + " from" + + " LINEITEM" + + " where" + + " l_shipdate <= DATE_SUB('1997-12-31', ? )" + + " group by" + + " l_returnflag," + + " l_linestatus" + + " order by" + + " l_returnflag," + + " l_linestatus" + + } + + def getQuery1_Memsql: String = { + // DELTA = 90 + " select" + + " l_returnflag," + + " l_linestatus," + + " sum(l_quantity) as sum_qty," + + " sum(l_extendedprice) as sum_base_price," + + " sum(l_extendedprice*(1-l_discount)) as sum_disc_price," + + " sum(l_extendedprice*(1-l_discount)*(1+l_tax)) as sum_charge," + + " avg(l_quantity) as avg_qty," + + " avg(l_extendedprice) as avg_price," + + " avg(l_discount) as avg_disc," + + " count(*) as count_order" + + " from" + + " LINEITEM" + + " where" + + " l_shipdate <= '1997-12-31' - interval '?' day" + + " group by" + + " l_returnflag," + + " l_linestatus" + + " order by" + + " l_returnflag," + + " l_linestatus" + } + + + def getQ1Parameter(isDynamic: Boolean): Array[String] = { + if (isDynamic) { + val min = 60 + val max = 120 + Array(s"${min + random.nextInt((max - min) + 1)}") + } else { + Array("90") + } + } + + + def getResultString1: String = { + "l_returnflag|l_linestatus|sum_qty|sum_base_price|sum_disc_price|sum_charge|avg_qty|avg_price" + + "|avg_disc|count_order" + } + + def getQuery2: String = { + " select" + + " S_ACCTBAL," + + " S_NAME," + + " N_NAME," + + " P_PARTKEY," + + " P_MFGR," + + " S_ADDRESS," + + " S_PHONE," + + " S_COMMENT" + + " from" + + " SUPPLIER," + + " NATION," + + " REGION," + + " PART," + + " PARTSUPP" + + " where" + + " S_NATIONKEY = N_NATIONKEY" + + " and N_REGIONKEY = R_REGIONKEY" + + " and R_NAME = '?'" + + " and S_SUPPKEY = PS_SUPPKEY" + + " and P_PARTKEY = PS_PARTKEY" + + " and P_SIZE = ?" + + " and P_TYPE like '%?'" + + " and PS_SUPPLYCOST = (" + + " select" + + " min(PS_SUPPLYCOST)" + + " from" + + " SUPPLIER, NATION," + + " REGION, PARTSUPP" + + " where" + + " S_NATIONKEY = N_NATIONKEY" + + " and N_REGIONKEY = R_REGIONKEY" + + " and R_NAME = '?'" + + " and S_SUPPKEY = PS_SUPPKEY" + + " and P_PARTKEY = PS_PARTKEY" + + " )" + + " order by" + + " S_ACCTBAL desc," + + " N_NAME," + + " S_NAME," + + " P_PARTKEY" + + " limit 100" + } + + def getQuery2ForPrepStatement: String = { + " select" + + " S_ACCTBAL," + + " S_NAME," + + " N_NAME," + + " P_PARTKEY," + + " P_MFGR," + + " S_ADDRESS," + + " S_PHONE," + + " S_COMMENT" + + " from" + + " SUPPLIER," + + " NATION," + + " REGION," + + " PART," + + " PARTSUPP" + + " where" + + " S_NATIONKEY = N_NATIONKEY" + + " and N_REGIONKEY = R_REGIONKEY" + + " and R_NAME = ?" + + " and S_SUPPKEY = PS_SUPPKEY" + + " and P_PARTKEY = PS_PARTKEY" + + " and P_SIZE = ?" + + " and P_TYPE like ?" + + " and PS_SUPPLYCOST = (" + + " select" + + " min(PS_SUPPLYCOST)" + + " from" + + " SUPPLIER, NATION," + + " REGION, PARTSUPP" + + " where" + + " S_NATIONKEY = N_NATIONKEY" + + " and N_REGIONKEY = R_REGIONKEY" + + " and R_NAME = ?" + + " and S_SUPPKEY = PS_SUPPKEY" + + " and P_PARTKEY = PS_PARTKEY" + + " )" + + " order by" + + " S_ACCTBAL desc," + + " N_NAME," + + " S_NAME," + + " P_PARTKEY" + + " limit 100" + } + + + def getQ2Parameter(isDynamic: Boolean): Array[String] = { + if (isDynamic) { + // size 1 to 50 + // type TIN NICKEL BRASS STEEL COPPER + // region AFRICA AMERICA ASIA EUROPE MIDDLE EAST + val min = 1 + val max = 50 + val size = s"${min + random.nextInt((max - min) + 1)}" + val syllable3 = Array("TIN", "NICKEL", "BRASS", "STEEL", "COPPER") + val syllableIndex = random.nextInt(syllable3.length) + val syllableType = syllable3(syllableIndex) + val regions = Array("AFRICA", "AMERICA", "ASIA", "EUROPE", "MIDDLE EAST") + val regionIndex = random.nextInt(regions.length) + val region = regions(regionIndex) + Array(region, size, syllableType, region) + } + else { + Array("ASIA", "24", "STEEL", "ASIA") + } + } + + def getResultString2: String = { + "S_ACCTBAL|S_NAME|N_NAME|P_PARTKEY|P_MFGR|S_ADDRESS|S_PHONE|S_COMMENT" + } + + def getQuery3: String = { + " select" + + " l_orderkey," + + " sum(l_extendedprice*(1-l_discount)) as revenue," + + " o_orderdate," + + " o_shippriority" + + " from" + + " ORDERS," + + " LINEITEM," + + " CUSTOMER " + + " where" + + " C_MKTSEGMENT = '?'" + + " and C_CUSTKEY = o_custkey" + + " and l_orderkey = o_orderkey" + + " and o_orderdate < '?'" + + " and l_shipdate > '?' " + + " group by" + + " l_orderkey," + + " o_orderdate," + + " o_shippriority" + + " order by" + + " l_orderkey" + + " limit 10" + } + + def getQuery3ForPrepStatement: String = { + " select" + + " l_orderkey," + + " sum(l_extendedprice*(1-l_discount)) as revenue," + + " o_orderdate," + + " o_shippriority" + + " from" + + " ORDERS," + + " LINEITEM," + + " CUSTOMER " + + " where" + + " C_MKTSEGMENT = ?" + + " and C_CUSTKEY = o_custkey" + + " and l_orderkey = o_orderkey" + + " and o_orderdate < ?" + + " and l_shipdate > ? " + + " group by" + + " l_orderkey," + + " o_orderdate," + + " o_shippriority" + + " order by" + + " l_orderkey" + + " limit 10" + } + + def getQ3Parameter(isDynamic: Boolean): Array[String] = { + // segment AUTOMOBILE BUILDING FURNITURE MACHINERY HOUSEHOLD + // date1 randomly selected day within [1995-03-01 .. 1995-03-31] + if (isDynamic) { + val segments = Array("AUTOMOBILE", "BUILDING", "FURNITURE", "MACHINERY", "HOUSEHOLD") + val segmentIndex = random.nextInt(segments.length) + val segment = segments(segmentIndex) + + val fromDate = java.time.LocalDate.of(1995, 3, 1) + val toDate = java.time.LocalDate.of(1995, 3, 31) + val diff = java.time.temporal.ChronoUnit.DAYS.between(fromDate, toDate) + // val random = new random(System.nanoTime) + // You may want a different seed + val selectedDate = fromDate.plusDays(random.nextInt(diff.toInt)) + Array(segment, selectedDate.toString, selectedDate.toString) + } else { + Array("BUILDING", "1995-03-15", "1995-03-15") + } + } + + def getResultString3: String = { + "l_orderkey|revenue|o_orderdate|o_shippriority" + } + + def getQuery4: String = { + // 1.DATE = 1993-07-01. + " select" + + " o_orderpriority," + + " count(*) as order_count" + + " from" + + " ORDERS" + + " where" + + " o_orderdate >= '?'" + + " and o_orderdate < add_months('?',3)" + + " and exists (" + + " select" + + " l_orderkey" + + " from" + + " LINEITEM" + + " where" + + " l_orderkey = o_orderkey" + + " and l_commitdate < l_receiptdate" + + " )" + + " group by" + + " o_orderpriority" + + " order by" + + " o_orderpriority" + } + + def getQuery4ForPrepStatement: String = { + // 1.DATE = 1993-07-01. + " select" + + " o_orderpriority," + + " count(*) as order_count" + + " from" + + " ORDERS" + + " where" + + " o_orderdate >= ?" + + " and o_orderdate < add_months(?, 3)" + + " and exists (" + + " select" + + " l_orderkey" + + " from" + + " LINEITEM" + + " where" + + " l_orderkey = o_orderkey" + + " and l_commitdate < l_receiptdate" + + " )" + + " group by" + + " o_orderpriority" + + " order by" + + " o_orderpriority" + } + + def getQuery4_Memsql: String = { + // 1.DATE = 1993-07-01. + " select" + + " o_orderpriority," + + " count(*) as order_count" + + " from" + + " ORDERS" + + " where" + + " o_orderdate >= '?'" + + " and o_orderdate < '?' + interval '3' month" + + " and exists (" + + " select" + + " l_orderkey" + + " from" + + " LINEITEM" + + " where" + + " l_orderkey = o_orderkey" + + " and l_commitdate < l_receiptdate" + + " )" + + " group by" + + " o_orderpriority" + + " order by" + + " o_orderpriority" + } + + def getQ4Parameter(isDynamic: Boolean): Array[String] = { + if (isDynamic) { + /* DATE is the first day of a randomly selected month between the first + month of 1993 and the 10th month of 1997. */ + val min = 1 + val max = 10 + val month = { + min + random.nextInt((max - min) + 1) + } + + val minYear = 1993 + val maxYear = 1997 + val year = { + minYear + random.nextInt((maxYear - minYear) + 1) + } + val date = java.time.LocalDate.of(year, month, 1) + Array(date.toString, date.toString) + } else { + Array("1993-07-01", "1993-07-01") + } + } + + + def getResultString4: String = { + "o_orderpriority|order_count" + } + + def getQuery5: String = { + // 1. REGION = ASIA; + // 2. DATE = 1994-01-01. + " select" + + " n_name," + + " sum(l_extendedprice * (1 - l_discount)) as revenue" + + " from" + + " SUPPLIER," + + " NATION," + + " REGION," + + " ORDERS," + + " LINEITEM," + + " CUSTOMER" + + " where" + + " s_nationkey = n_nationkey" + + " and n_regionkey = r_regionkey" + + " and r_name = '?'" + + " and C_CUSTKEY = o_custkey" + + " and l_orderkey = o_orderkey" + + " and l_suppkey = s_suppkey" + + " and C_NATIONKEY = s_nationkey" + + " and o_orderdate >= '?'" + + " and o_orderdate < add_months('?', 12)" + + " group by" + + " n_name" + + " order by" + + " revenue desc" + } + + def getQuery5ForPrepStatement: String = { + // 1. REGION = ASIA; + // 2. DATE = 1994-01-01. + " select" + + " n_name," + + " sum(l_extendedprice * (1 - l_discount)) as revenue" + + " from" + + " SUPPLIER," + + " NATION," + + " REGION," + + " ORDERS," + + " LINEITEM," + + " CUSTOMER" + + " where" + + " s_nationkey = n_nationkey" + + " and n_regionkey = r_regionkey" + + " and r_name = ?" + + " and C_CUSTKEY = o_custkey" + + " and l_orderkey = o_orderkey" + + " and l_suppkey = s_suppkey" + + " and C_NATIONKEY = s_nationkey" + + " and o_orderdate >= ?" + + " and o_orderdate < add_months(?, 12)" + + " group by" + + " n_name" + + " order by" + + " revenue desc" + } + + def getQuery5_Memsql: String = { + // 1. REGION = ASIA; + // 2. DATE = 1994-01-01. + " select" + + " N_NAME," + + " sum(l_extendedprice * (1 - l_discount)) as revenue" + + " from" + + " SUPPLIER," + + " NATION," + + " REGION," + + " ORDERS," + + " LINEITEM," + + " CUSTOMER" + + " where" + + " s_nationkey = n_nationkey" + + " and n_regionkey = r_regionkey" + + " and r_name = '?'" + + " and C_CUSTKEY = o_custkey" + + " and l_orderkey = o_orderkey" + + " and l_suppkey = s_suppkey" + + " and C_NATIONKEY = s_nationkey" + + " and o_orderdate >= '?'" + + " and o_orderdate < '?' + interval '1' year" + + " group by" + + " N_NAME" + + " order by" + + " revenue desc" + } + + def getQ5Parameter(isDynamic: Boolean): Array[String] = { + if (isDynamic) { + /* 1. REGION is randomly selected within the list of values defined for R_NAME + in C;aise 4.2.3; + 2. DATE is the first of January of a randomly selected year within [1993 .. 1997] */ + + val regions = Array("AFRICA", "AMERICA", "ASIA", "EUROPE", "MIDDLE EAST") + val regionIndex = random.nextInt(regions.length) + val region = regions(regionIndex) + + val minYear = 1993 + val maxYear = 1997 + val year = { + minYear + random.nextInt((maxYear - minYear) + 1) + } + + val date = java.time.LocalDate.of(year, 1, 1) + Array(region, date.toString, date.toString) + } else { + Array("ASIA", "1994-01-01", "1994-01-01") + } + } + + + def getResultString5: String = { + "N_NAME|revenue" + } + + def getQuery6: String = { + // 1. DATE = 1994-01-01; + // 2. DISCOUNT = 0.06; + // 3. QUANTITY = 24. + " select" + + " sum(l_extendedprice*l_discount) as revenue" + + " from" + + " LINEITEM" + + " where" + + " l_shipdate >= '?'" + + " and l_shipdate < add_months('?', 12)" + + " and l_discount between ? - 0.01 and ? + 0.01" + + " and l_quantity < ?" + } + + def getQuery6ForPrepStatement: String = { + // 1. DATE = 1994-01-01; + // 2. DISCOUNT = 0.06; + // 3. QUANTITY = 24. + " select" + + " sum(l_extendedprice*l_discount) as revenue" + + " from" + + " LINEITEM" + + " where" + + " l_shipdate >= ?" + + " and l_shipdate < add_months(?, 12)" + + " and l_discount between ? - 0.01 and ? + 0.01" + + " and l_quantity < ?" + } + + def getQuery6_Memsql: String = { + // 1. DATE = 1994-01-01; + // 2. DISCOUNT = 0.06; + // 3. QUANTITY = 24. + " select" + + " sum(l_extendedprice*l_discount) as revenue" + + " from" + + " LINEITEM" + + " where" + + " l_shipdate >= '?'" + + " and l_shipdate < '?' + interval '1' year" + + " and l_discount between ? - 0.01 and ? + 0.01" + + " and l_quantity < ?" + } + + def getQ6Parameter(isDynamic: Boolean): Array[String] = { + if (isDynamic) { + /* 1. DATE is the first of January of a randomly selected year within [1993 .. 1997]; + 2. DISCOUNT is randomly selected within [0.02 .. 0.09]; + 3. QUANTITY is randomly selected within [24 .. 25]. */ + + val minYear = 1993 + val maxYear = 1997 + val year = { + minYear + random.nextInt((maxYear - minYear) + 1) + } + val date = java.time.LocalDate.of(year, 1, 1) + + val discounts = Array("0.02", "0.03", "0.04", "0.05", "0.06", "0.07", "0.08", "0.09") + val discountIndex = random.nextInt(discounts.length) + val discount = discounts(discountIndex) + + val minQuantity = 24 + val maxQuantity = 25 + val quantity = s"${minQuantity + random.nextInt((maxQuantity - minQuantity) + 1)}" + + Array(date.toString, date.toString, discount, discount, quantity) + } else { + Array("1994-01-01", "1994-01-01", "0.06", "0.06", "24") + } + } + + def getResultString6: String = { + "revenue" + } + + def getQuery7: String = { + // 1. NATION1 = FRANCE; + // 2. NATION2 = GERMANY. + "select" + + " supp_nation," + + " cust_nation," + + " l_year, " + + " sum(volume) as revenue" + + " from (" + + " select" + + " n1.n_name as supp_nation," + + " n2.n_name as cust_nation," + + // " extract m(year from l_shipdate) as l_year," + + " year(l_shipdate) as l_year," + + " l_extendedprice * (1 - l_discount) as volume" + + " from" + + " SUPPLIER," + + " LINEITEM," + + " ORDERS," + + " CUSTOMER," + + " NATION n1," + + " NATION n2" + + " where" + + " s_suppkey = l_suppkey" + + " and o_orderkey = l_orderkey" + + " and C_CUSTKEY = o_custkey" + + " and s_nationkey = n1.n_nationkey" + + " and C_NATIONKEY = n2.n_nationkey" + + " and (" + + " (n1.n_name = '?' and n2.n_name = '?')" + + " or (n1.n_name = '?' and n2.n_name = '?')" + + " )" + + " and l_shipdate between '1995-01-01' and '1996-12-31'" + + " ) as shipping" + + " group by" + + " supp_nation," + + " cust_nation," + + " l_year" + + " order by" + + " supp_nation," + + " cust_nation," + + " l_year" + } + + def getQuery7ForPrepStatement: String = { + // 1. NATION1 = FRANCE; + // 2. NATION2 = GERMANY. + "select" + + " supp_nation," + + " cust_nation," + + " l_year, " + + " sum(volume) as revenue" + + " from (" + + " select" + + " n1.n_name as supp_nation," + + " n2.n_name as cust_nation," + + // " extract m(year from l_shipdate) as l_year," + + " year(l_shipdate) as l_year," + + " l_extendedprice * (1 - l_discount) as volume" + + " from" + + " SUPPLIER," + + " LINEITEM," + + " ORDERS," + + " CUSTOMER," + + " NATION n1," + + " NATION n2" + + " where" + + " s_suppkey = l_suppkey" + + " and o_orderkey = l_orderkey" + + " and C_CUSTKEY = o_custkey" + + " and s_nationkey = n1.n_nationkey" + + " and C_NATIONKEY = n2.n_nationkey" + + " and (" + + " (n1.n_name = ? and n2.n_name = ?)" + + " or (n1.n_name = ? and n2.n_name = ?)" + + " )" + + " and l_shipdate between '1995-01-01' and '1996-12-31'" + + " ) as shipping" + + " group by" + + " supp_nation," + + " cust_nation," + + " l_year" + + " order by" + + " supp_nation," + + " cust_nation," + + " l_year" + } + + def getQ7Parameter(isDynamic: Boolean): Array[String] = { + if (isDynamic) { + /* 1. NATION1 is randomly selected within the list of values defined for N_NAME in Clause + 4.2.3; + 2. NATION2 is randomly selected within the list of values defined for N_NAME in Clause 4.2.3 and + must be different from the value selected for NATION1 in item 1 above. */ + + val nations = Array("ALGERIA", "ARGENTINA", "BRAZIL", "CANADA", "EGYPT", "ETHIOPIA", "FRANCE", + "GERMANY", "INDIA", "INDONESIA", "IRAN", "IRAQ", "JAPAN", "JORDAN", "KENYA", + "MOROCCO", "MOZAMBIQUE", "PERU", "CHINA", "ROMANIA", "SAUDI ARABIA", "VIETNAM", + "RUSSIA", "UNITED KINGDOM", "UNITED STATES") + + val nation1Index = random.nextInt(nations.length) + val nation1 = nations(nation1Index) + + val nation2Index = if (nation1Index > (nations.length / 2)) nation1Index - 1 + else nation1Index + 1 + val nation2 = nations(nation2Index) + + Array(nation1, nation2, nation2, nation1) + } else { + Array("FRANCE", "GERMANY", "GERMANY", "FRANCE") + } + } + + def getResultString7: String = { + "supp_nation|cust_nation|l_year|revenue" + } + + def getQuery8: String = { + // 1. NATION = BRAZIL; + // 2. REGION = AMERICA; + // 3. TYPE = ECONOMY ANODIZED STEEL. + "select" + + " o_year," + + " sum(case" + + " when nation = '?'" + + " then volume" + + " else 0" + + " end) / sum(volume) as mkt_share" + + " from (" + + " select" + + " year(o_orderdate) as o_year," + + " l_extendedprice * (1-l_discount) as volume," + + " n2.n_name as nation" + + " from" + + " LINEITEM," + + " PART," + + " ORDERS," + + " CUSTOMER," + + " NATION n1," + + " REGION," + + " NATION n2," + + " SUPPLIER" + + " where" + + " p_partkey = l_partkey" + + " and l_orderkey = o_orderkey" + + " and o_custkey = C_CUSTKEY" + + " and C_NATIONKEY = n1.n_nationkey" + + " and n1.n_regionkey = r_regionkey" + + " and r_name = '?'" + + " and o_orderdate between '1995-01-01' and '1996-12-31'" + + " and p_type = '?'" + + " and s_suppkey = l_suppkey" + + " and s_nationkey = n2.n_nationkey" + + " ) as all_nations" + + " group by" + + " o_year" + + " order by" + + " o_year" + + } + + def getQuery8ForPrepStatement: String = { + // 1. NATION = BRAZIL; + // 2. REGION = AMERICA; + // 3. TYPE = ECONOMY ANODIZED STEEL. + "select" + + " o_year," + + " sum(case" + + " when nation = ?" + + " then volume" + + " else 0" + + " end) / sum(volume) as mkt_share" + + " from (" + + " select" + + " year(o_orderdate) as o_year," + + " l_extendedprice * (1-l_discount) as volume," + + " n2.n_name as nation" + + " from" + + " LINEITEM," + + " PART," + + " ORDERS," + + " CUSTOMER," + + " NATION n1," + + " REGION," + + " NATION n2," + + " SUPPLIER" + + " where" + + " p_partkey = l_partkey" + + " and l_orderkey = o_orderkey" + + " and o_custkey = C_CUSTKEY" + + " and C_NATIONKEY = n1.n_nationkey" + + " and n1.n_regionkey = r_regionkey" + + " and r_name = ?" + + " and o_orderdate between '1995-01-01' and '1996-12-31'" + + " and p_type = ?" + + " and s_suppkey = l_suppkey" + + " and s_nationkey = n2.n_nationkey" + + " ) as all_nations" + + " group by" + + " o_year" + + " order by" + + " o_year" + + } + + def getQ8Parameter(isDynamic: Boolean): Array[String] = { + if (isDynamic) { + /* + 1. NATION is randomly selected within the list of values defined for N_NAME in Clause 4.2.3; + 2. REGION is the value defined in Clause 4.2.3 for R_NAME where R_REGIONKEY corresponds to + N_REGIONKEY for the selected NATION in item 1 above; + 3. TYPE is randomly selected within the list of 3-syllable strings defined for Types in + Clause 4.2.2.13. */ + + val nationsMap: Map[String, Array[String]] = + Map("AFRICA" -> Array("ALGERIA", "ETHIOPIA", "KENYA", "MOROCCO", "MOZAMBIQUE"), + "AMERICA" -> Array("ARGENTINA", "BRAZIL", "CANADA", "PERU", "UNITED STATES"), + "ASIA" -> Array("INDIA", "INDONESIA", "JAPAN", "CHINA", "VIETNAM"), + "EUROPE" -> Array("FRANCE", "GERMANY", "ROMANIA", "RUSSIA", "UNITED KINGDOM"), + "MIDDLE EAST" -> Array("EGYPT", "IRAN", "IRAQ", "JORDAN", "SAUDI ARABIA")) + + val regionKeys = nationsMap.keySet.toArray + val regionIndex = random.nextInt(regionKeys.length) + val region = regionKeys(regionIndex) + + val nations: Array[String] = nationsMap(region) + val nationIndex = random.nextInt(nations.length) + val nation = nations(nationIndex) + + val syllables1 = Array("STANDARD", "SMALL", "MEDIUM", "LARGE", "ECONOMY", "PROMO") + val syllables2 = Array("ANODIZED", "BURNISHED", "PLATED", "POLISHED", "BRUSHED") + val syllables3 = Array("TIN", "NICKEL", "BRASS", "STEEL", "COPPER") + + val syllable1Index = random.nextInt(syllables1.length) + val syllable1 = syllables1(syllable1Index) + + val syllable2Index = random.nextInt(syllables2.length) + val syllable2 = syllables2(syllable2Index) + + val syllable3Index = random.nextInt(syllables3.length) + val syllable3 = syllables3(syllable3Index) + + val pType = s"$syllable1 $syllable2 $syllable3" + + Array(nation, region, pType) + } else { + Array("BRAZIL", "AMERICA", "ECONOMY ANODIZED STEEL") + } + } + + def getResultString8: String = { + "YEAR|MKT_SHARE" + } + + def getQuery9: String = { + // 1. COLOR = green. + "select" + + " nation," + + " o_year," + + " sum(amount) as sum_profit" + + " from (" + + " select" + + " n_name as nation," + + " year(o_orderdate) as o_year," + + " l_extendedprice * (1 - l_discount) - ps_supplycost * l_quantity as " + + "amount" + + " from" + + " LINEITEM," + + " PART," + + " ORDERS," + + " SUPPLIER," + + " NATION," + + " PARTSUPP" + + " where" + + " s_suppkey = l_suppkey" + + " and ps_suppkey = l_suppkey" + + " and ps_partkey = l_partkey" + + " and p_partkey = l_partkey" + + " and o_orderkey = l_orderkey" + + " and s_nationkey = n_nationkey" + + " and p_name like '%?%'" + + " ) as profit" + + " group by" + + " nation," + + " o_year" + + " order by" + + " nation," + + " o_year desc" + + } + + def getQuery9ForPrepStatement: String = { + // 1. COLOR = green. + "select" + + " nation," + + " o_year," + + " sum(amount) as sum_profit" + + " from (" + + " select" + + " n_name as nation," + + " year(o_orderdate) as o_year," + + " l_extendedprice * (1 - l_discount) - ps_supplycost * l_quantity as " + + "amount" + + " from" + + " LINEITEM," + + " PART," + + " ORDERS," + + " SUPPLIER," + + " NATION," + + " PARTSUPP" + + " where" + + " s_suppkey = l_suppkey" + + " and ps_suppkey = l_suppkey" + + " and ps_partkey = l_partkey" + + " and p_partkey = l_partkey" + + " and o_orderkey = l_orderkey" + + " and s_nationkey = n_nationkey" + + " and p_name like ? " + + " ) as profit" + + " group by" + + " nation," + + " o_year" + + " order by" + + " nation," + + " o_year desc" + + } + + def getQ9Parameter(isDynamic: Boolean): Array[String] = { + if (isDynamic) { + /* COLOR is randomly selected within the list of values defined for the generation + of P_NAME in Clause 4.2.3. */ + + val pnames = Array("almond", "antique", "aquamarine", "azure", "beige", "bisque", "black", + "blanched", "blue", "blush", "brown", "burlywood", "burnished", "chartreuse", + "chiffon", "chocolate", "coral", "cornflower", "cornsilk", "cream", "cyan", "dark", + "deep", "dim", "dodger", "drab", "firebrick", "floral", "forest", "frosted", + "gainsboro", "ghost", "goldenrod", "green", "grey", "honeydew", "hot", "indian", + "ivory", "khaki", "lace", "lavender", "lawn", "lemon", "light", "lime", "linen", + "magenta", "maroon", "medium", "metallic", "midnight", "mint", "misty", "moccasin", + "navajo", "navy", "olive", "orange", "orchid", "pale", "papaya", "peach", "peru", + "pink", "plum", "powder", "puff", "purple", "red", "rose", "rosy", "royal", "saddle", + "salmon", "sandy", "seashell", "sienna", "sky", "slate", "smoke", "snow", "spring", + "steel", "tan", "thistle", "tomato", "turquoise", "violet", "wheat", "white", "yellow") + + val pnameIndex = random.nextInt(pnames.length) + val pname = pnames(pnameIndex) + + Array(pname) + } else { + Array("green") + } + } + + def getResultString9: String = { + "NATION|YEAR|SUM_PROFIT" + } + + def getQuery10: String = { + // 1.DATE = 1993-10-01. + "select" + + " C_CUSTKEY," + + " C_NAME," + + " sum(l_extendedprice * (1 - l_discount)) as revenue," + + " C_ACCTBAL," + + " n_name," + + " C_ADDRESS," + + " C_PHONE," + + " C_COMMENT" + + " from" + + " ORDERS," + + " LINEITEM," + + " CUSTOMER," + + " NATION" + + " where" + + " C_CUSTKEY = o_custkey" + + " and l_orderkey = o_orderkey" + + " and o_orderdate >= '?'" + + " and o_orderdate < add_months('?', 3)" + + " and l_returnflag = 'R'" + + " and C_NATIONKEY = n_nationkey" + + " group by" + + " C_CUSTKEY," + + " C_NAME," + + " C_ACCTBAL," + + " C_PHONE," + + " n_name," + + " C_ADDRESS," + + " C_COMMENT" + + " order by" + + " revenue desc" + + " limit 20" + } + + def getQuery10ForPrepStatement: String = { + // 1.DATE = 1993-10-01. + "select" + + " C_CUSTKEY," + + " C_NAME," + + " sum(l_extendedprice * (1 - l_discount)) as revenue," + + " C_ACCTBAL," + + " n_name," + + " C_ADDRESS," + + " C_PHONE," + + " C_COMMENT" + + " from" + + " ORDERS," + + " LINEITEM," + + " CUSTOMER," + + " NATION" + + " where" + + " C_CUSTKEY = o_custkey" + + " and l_orderkey = o_orderkey" + + " and o_orderdate >= ?" + + " and o_orderdate < add_months(?, 3)" + + " and l_returnflag = 'R'" + + " and C_NATIONKEY = n_nationkey" + + " group by" + + " C_CUSTKEY," + + " C_NAME," + + " C_ACCTBAL," + + " C_PHONE," + + " n_name," + + " C_ADDRESS," + + " C_COMMENT" + + " order by" + + " revenue desc" + + " limit 20" + } + + def getQuery10_Memsql: String = { + // 1.DATE = 1993-10-01. + "select" + + " C_CUSTKEY," + + " C_NAME," + + " sum(l_extendedprice * (1 - l_discount)) as revenue," + + " C_ACCTBAL," + + " n_name," + + " C_ADDRESS," + + " C_PHONE," + + " C_COMMENT" + + " from" + + " ORDERS," + + " LINEITEM," + + " CUSTOMER," + + " NATION" + + " where" + + " C_CUSTKEY = o_custkey" + + " and l_orderkey = o_orderkey" + + " and o_orderdate >= '?'" + + " and o_orderdate < '?' + interval '3' month" + + " and l_returnflag = 'R'" + + " and C_NATIONKEY = N_NATIONKEY" + + " group by" + + " C_CUSTKEY," + + " C_NAME," + + " C_ACCTBAL," + + " C_PHONE," + + " N_NAME," + + " C_ADDRESS," + + " C_COMMENT" + + " order by" + + " revenue desc" + + " limit 20" + + } + + def getQuery10_ForPrepareStatement: String = { + // 1.DATE = 1993-10-01. + "select" + + " C_CUSTKEY," + + " C_NAME," + + " sum(l_extendedprice * (1 - l_discount)) as revenue," + + " C_ACCTBAL," + + " n_name," + + " C_ADDRESS," + + " C_PHONE," + + " C_COMMENT" + + " from" + + " ORDERS," + + " LINEITEM," + + " CUSTOMER," + + " NATION" + + " where" + + " C_CUSTKEY = o_custkey" + + " and l_orderkey = o_orderkey" + + " and o_orderdate >= '1993-10-01'" + + " and o_orderdate < add_months('1993-10-01', 3)" + + " and l_returnflag = 'R'" + + " and C_NATIONKEY = n_nationkey" + + " group by" + + " C_CUSTKEY," + + " C_NAME," + + " C_ACCTBAL," + + " C_PHONE," + + " n_name," + + " C_ADDRESS," + + " C_COMMENT" + + " order by" + + " revenue desc" + + " limit 20" + } + + def getQ10Parameter(isDynamic: Boolean): Array[String] = { + if (isDynamic) { + /* DATE is the first day of a randomly selected month from the second + month of 1993 to the first month of 1995. */ + val min = 2 + val max = 12 + val month = min + random.nextInt((max - min) + 1) + + val minYear = 1993 + val maxYear = 1994 + val year = minYear + random.nextInt((maxYear - minYear) + 1) + + + val date = java.time.LocalDate.of(year, month, 1) + Array(date.toString, date.toString) + } else { + Array("1993-10-01", "1993-10-01") + } + } + + def getResultString10: String = { + "C_CUSTKEY|C_NAME|REVENUE|C_ACCTBAL|N_NAME|C_ADDRESS|C_PHONE|C_COMMENT" + } + + def getQuery11: String = { + // 1. NATION = GERMANY; + // 2. FRACTION = 0.0001. + "select" + + " PS_PARTKEY," + + " sum(PS_SUPPLYCOST * PS_AVAILQTY) as value" + + " from" + + " SUPPLIER," + + " NATION," + + " PARTSUPP" + + " where" + + " PS_SUPPKEY = S_SUPPKEY" + + " and S_NATIONKEY = N_NATIONKEY" + + " and N_NAME = '?'" + + " group by" + + " PS_PARTKEY having" + + " sum(PS_SUPPLYCOST * PS_AVAILQTY) > (" + + " select" + + " sum(PS_SUPPLYCOST * PS_AVAILQTY) * 0.0000001" + + " from" + + " SUPPLIER," + + " NATION," + + " PARTSUPP" + + " where" + + " PS_SUPPKEY = S_SUPPKEY" + + " and S_NATIONKEY = N_NATIONKEY" + + " and N_NAME = '?'" + + " )" + + " order by" + + " value desc" + } + + def getQuery11ForPrepStatement: String = { + // 1. NATION = GERMANY; + // 2. FRACTION = 0.0001. + "select" + + " PS_PARTKEY," + + " sum(PS_SUPPLYCOST * PS_AVAILQTY) as value" + + " from" + + " SUPPLIER," + + " NATION," + + " PARTSUPP" + + " where" + + " PS_SUPPKEY = S_SUPPKEY" + + " and S_NATIONKEY = N_NATIONKEY" + + " and N_NAME = ?" + + " group by" + + " PS_PARTKEY having" + + " sum(PS_SUPPLYCOST * PS_AVAILQTY) > (" + + " select" + + " sum(PS_SUPPLYCOST * PS_AVAILQTY) * 0.0000001" + + " from" + + " SUPPLIER," + + " NATION," + + " PARTSUPP" + + " where" + + " PS_SUPPKEY = S_SUPPKEY" + + " and S_NATIONKEY = N_NATIONKEY" + + " and N_NAME = ?" + + " )" + + " order by" + + " value desc" + } + + def getQ11Parameter(isDynamic: Boolean): Array[String] = { + if (isDynamic) { + /* 1. NATION is randomly selected within the list of values defined for N_NAME + in Clause 4.2.3; + 2. FRACTION is chosen as 0.0001 / SF. */ + + val nations = Array("ALGERIA", "ARGENTINA", "BRAZIL", "CANADA", "EGYPT", "ETHIOPIA", "FRANCE", + "GERMANY", "INDIA", "INDONESIA", "IRAN", "IRAQ", "JAPAN", "JORDAN", "KENYA", + "MOROCCO", "MOZAMBIQUE", "PERU", "CHINA", "ROMANIA", "SAUDI ARABIA", "VIETNAM", + "RUSSIA", "UNITED KINGDOM", "UNITED STATES") + + val nation1Index = random.nextInt(nations.length) + val nation = nations(nation1Index) + + Array(nation, nation) + } else { + Array("GERMANY", "GERMANY") + } + } + + def getResultString11: String = { + "PS_PARTKEY|VALUE" + } + + def getQuery12: String = { + // 1.SHIPMODE1 = MAIL; + // 2. SHIPMODE2 = SHIP; + // 3. DATE = 1994-01-01. + "select" + + " l_shipmode," + + " sum(case" + + " when o_orderpriority ='1-URGENT'" + + " or o_orderpriority ='2-HIGH'" + + " then 1" + + " else 0" + + " end" + + " ) as high_line_count," + + " sum(case" + + " when o_orderpriority <> '1-URGENT'" + + " and o_orderpriority <> '2-HIGH'" + + " then 1" + + " else 0" + + " end" + + " ) as low_line_count" + + " from" + + " ORDERS," + + " LINEITEM" + + " where" + + " o_orderkey = l_orderkey" + + " and l_shipmode in ('?', '?')" + + " and l_commitdate < l_receiptdate" + + " and l_shipdate < l_commitdate" + + " and l_receiptdate >= '?'" + + " and l_receiptdate < add_months('?',12)" + + " group by" + + " l_shipmode" + + " order by" + + " l_shipmode" + } + + def getQuery12ForPrepStatement: String = { + // 1.SHIPMODE1 = MAIL; + // 2. SHIPMODE2 = SHIP; + // 3. DATE = 1994-01-01. + "select" + + " l_shipmode," + + " sum(case" + + " when o_orderpriority ='1-URGENT'" + + " or o_orderpriority ='2-HIGH'" + + " then 1" + + " else 0" + + " end" + + " ) as high_line_count," + + " sum(case" + + " when o_orderpriority <> '1-URGENT'" + + " and o_orderpriority <> '2-HIGH'" + + " then 1" + + " else 0" + + " end" + + " ) as low_line_count" + + " from" + + " ORDERS," + + " LINEITEM" + + " where" + + " o_orderkey = l_orderkey" + + " and l_shipmode in (?, ?)" + + " and l_commitdate < l_receiptdate" + + " and l_shipdate < l_commitdate" + + " and l_receiptdate >= ?" + + " and l_receiptdate < add_months(?,12)" + + " group by" + + " l_shipmode" + + " order by" + + " l_shipmode" + } + + def getQuery12_Memsql: String = { + // 1.SHIPMODE1 = MAIL; + // 2. SHIPMODE2 = SHIP; + // 3. DATE = 1994-01-01. + "select" + + " l_shipmode," + + " sum(case" + + " when o_orderpriority ='1-URGENT'" + + " or o_orderpriority ='2-HIGH'" + + " then 1" + + " else 0" + + " end" + + " ) as high_line_count," + + " sum(case" + + " when o_orderpriority <> '1-URGENT'" + + " and o_orderpriority <> '2-HIGH'" + + " then 1" + + " else 0" + + " end" + + " ) as low_line_count" + + " from" + + " ORDERS," + + " LINEITEM" + + " where" + + " o_orderkey = l_orderkey" + + " and l_shipmode in ('?', '?')" + + " and l_commitdate < l_receiptdate" + + " and l_shipdate < l_commitdate" + + " and l_receiptdate >= '?'" + + " and l_receiptdate < '?' + interval '1' year" + + " group by" + + " l_shipmode" + + " order by" + + " l_shipmode" + + } + + def getQ12Parameter(isDynamic: Boolean): Array[String] = { + if (isDynamic) { + /* 1.SHIPMODE1 is randomly selected within the list of values defined for Modes in + Clause 4.2.2.13; + 2. SHIPMODE2 is randomly selected within the list of values defined for Modes in + Clause 4.2.2.13 and must be different from the value selected for SHIPMODE1 in item 1; + 3. DATE is the first of January of a randomly selected year within [1993 .. 1997]. */ + + val shipmodes = Array("REG AIR", "AIR", "RAIL", "SHIP", "TRUCK", "MAIL", "FOB") + + val shipmode1Index = random.nextInt(shipmodes.length) + val shipmode1 = shipmodes(shipmode1Index) + + val shipmode2Index = if (shipmode1Index > (shipmodes.length / 2)) shipmode1Index - 1 + else shipmode1Index + 1 + val shipmode2 = shipmodes(shipmode2Index) + + val minYear = 1993 + val maxYear = 1997 + val year = { + minYear + random.nextInt((maxYear - minYear) + 1) + } + val date = java.time.LocalDate.of(year, 1, 1) + + Array(shipmode1, shipmode2, date.toString, date.toString) + } else { + Array("MAIL", "SHIP", "1994-01-01", "1994-01-01") + } + + } + + def getResultString12: String = { + "L_SHIPMODE|HIGH_LINE_COUNT|LOW_LINE_COUNT" + } + + def getQuery13: String = { + // 1. WORD1 = special. + // 2. WORD2 = requests. + "select" + + " c_count, " + + " count(*) as custdist" + + " from (" + + " select" + + " C_CUSTKEY," + + " count(o_orderkey) as c_count" + + " from" + + " CUSTOMER left outer join ORDERS on" + + " C_CUSTKEY = o_custkey" + + " and o_comment not like '%?%?%'" + + " group by" + + " C_CUSTKEY" + + " ) as c_orders" + + " group by" + + " c_count" + + " order by" + + " custdist desc," + + " c_count desc" + } + + def getQuery13ForPrepStatement: String = { + // 1. WORD1 = special. + // 2. WORD2 = requests. + "select" + + " c_count, " + + " count(*) as custdist" + + " from (" + + " select" + + " C_CUSTKEY," + + " count(o_orderkey) as c_count" + + " from" + + " CUSTOMER left outer join ORDERS on" + + " C_CUSTKEY = o_custkey" + + " and o_comment not like ?" + + " group by" + + " C_CUSTKEY" + + " ) as c_orders" + + " group by" + + " c_count" + + " order by" + + " custdist desc," + + " c_count desc" + } + + def getQ13Parameter(isDynamic: Boolean): Array[String] = { + if (isDynamic) { + /* 1. WORD1 is randomly selected from 4 possible values: special, pending, unusual, express. + 2. WORD2 is randomly selected from 4 possible values: packages, requests, accounts, + deposits */ + + val words1 = Array("special", "pending", "unusual", "express") + val words2 = Array("packages", "requests", "accounts", "deposits") + + val word1Index = random.nextInt(words1.length) + val word1 = words1(word1Index) + + val word2Index = random.nextInt(words2.length) + val word2 = words2(word2Index) + + Array(word1, word2) + } else { + Array("special", "requests") + } + + } + + def getResultString13: String = { + "C_COUNT|CUSTDIST" + } + + def getQuery14: String = { + // 1.DATE = 1995-09-01. + "select" + + " 100.00 * sum(case" + + " when p_type like 'PROMO%'" + + " then l_extendedprice*(1-l_discount)" + + " else 0" + + " end" + + " ) / sum(l_extendedprice * (1 - l_discount)) as promo_revenue" + + " from" + + " LINEITEM," + + " PART" + + " where" + + " l_partkey = p_partkey" + + " and l_shipdate >= '?'" + + " and l_shipdate < add_months ('?', 1)" + } + + def getQuery14ForPrepStatement: String = { + // 1.DATE = 1995-09-01. + "select" + + " 100.00 * sum(case" + + " when p_type like 'PROMO%'" + + " then l_extendedprice*(1-l_discount)" + + " else 0" + + " end" + + " ) / sum(l_extendedprice * (1 - l_discount)) as promo_revenue" + + " from" + + " LINEITEM," + + " PART" + + " where" + + " l_partkey = p_partkey" + + " and l_shipdate >= ?" + + " and l_shipdate < add_months (?, 1)" + } + + def getQuery14_Memsql: String = { + // 1.DATE = 1995-09-01. + "select" + + " 100.00 * sum(case" + + " when P_TYPE like 'PROMO%'" + + " then l_extendedprice*(1-l_discount)" + + " else 0" + + " end" + + " ) / sum(l_extendedprice * (1 - l_discount)) as promo_revenue" + + " from" + + " LINEITEM," + + " PART" + + " where" + + " l_partkey = P_PARTKEY" + + " and l_shipdate >= '1995-09-01'" + + " and l_shipdate < '1995-09-01'+ interval '1' month" + + } + + def getQ14Parameter(isDynamic: Boolean): Array[String] = { + if (isDynamic) { + /* 1.DATE is the first day of a month randomly selected from a random year within + [1993 .. 1997].. */ + + val minYear = 1993 + val maxYear = 1997 + val year = { + minYear + random.nextInt((maxYear - minYear) + 1) + } + val date = java.time.LocalDate.of(year, 1, 1) + + Array(date.toString, date.toString) + } else { + Array("1995-09-01", "1995-09-01") + } + } + + def getResultString14: String = { + "PROMO_REVENUE" + } + + def getTempQuery15_Original: String = { + "create temporary view " + + " revenue as" + + " select" + + " l_suppkey as supplier_no ," + + " sum(l_extendedprice * (1 - l_discount)) as total_revenue" + + " from" + + " LINEITEM" + + " where" + + " l_shipdate >= '?'" + + " and l_shipdate < '?' + interval '3' month" + + " group by" + + " l_suppkey" + } + + def getQuery15_Original: String = { + "select" + + " s_suppkey," + + " s_name," + + " s_address," + + " s_phone," + + " total_revenue" + + " from" + + " SUPPLIER," + + " revenue" + + " where" + + " s_suppkey = supplier_no" + + " and total_revenue = (" + + " select" + + " max(total_revenue)" + + " from" + + " revenue" + + " )" + + " order by" + + " s_suppkey;" + } + + + def getQuery15_Temp: String = { + "select" + + " l_suppkey as supplier_no," + + " sum(l_extendedprice * (1 - l_discount)) as total_revenue" + + " from" + + " LINEITEM" + + " where" + + " l_shipdate >= '?'" + + " and l_shipdate < add_months('?',3) " + + " group by" + + " l_suppkey" + } + + def getQuery15_Temp_Memsql: String = { + "create view " + + " revenue as" + + " select" + + " l_suppkey as supplier_no ," + + " sum(l_extendedprice * (1 - l_discount)) as total_revenue" + + " from" + + " LINEITEM" + + " where" + + " l_shipdate >= '?'" + + " and l_shipdate < '?' + interval '3' month" + + " group by" + + " l_suppkey" + } + + def getQ15TempParameter(isDynamic: Boolean): Array[String] = { + if (isDynamic) { + /* DATE is the first day of a randomly selected month between the first month of 1993 + and the 10th month of 1997. */ + + val min = 2 + val max = 10 + val month = min + random.nextInt((max - min) + 1) + + val minYear = 1993 + val maxYear = 1997 + val year = minYear + random.nextInt((maxYear - minYear) + 1) + + + val date = java.time.LocalDate.of(year, month, 1) + Array(date.toString, date.toString) + } else { + Array("1993-02-01", "1996-01-01") + } + + } + + def getQuery15: String = { + "select " + + " s_suppkey," + + " s_name," + + " s_address," + + " s_phone," + + " total_revenue" + + " from" + + " SUPPLIER, revenue" + + " where" + + " s_suppkey = supplier_no" + + " and floor(total_revenue) = (" + + " select" + + " floor(max(total_revenue))" + + " from" + + " revenue" + + " ) " + + " order by" + + " s_suppkey" + } + + + def getResultString15: String = { + "S_SUPPKEY|S_NAME|S_ADDRESS|S_PHONE|TOTAL_REVENUE" + } + + def getQuery16: String = { + // 1. BRAND = Brand#45. + // 2. TYPE = MEDIUM POLISHED . + // 3. SIZE1 = 49 + // 4. SIZE2 = 14 + // 5. SIZE3 = 23 + // 6. SIZE4 = 45 + // 7. SIZE5 = 19 + // 8. SIZE6 = 3 + // 9. SIZE7 = 36 + // 10. SIZE8 = 9. + "select" + + " p_brand," + + " p_type," + + " p_size," + + " count(distinct ps_suppkey) as supplier_cnt" + + " from" + + " PARTSUPP," + + " PART" + + " where" + + " p_partkey = ps_partkey" + + " and p_brand <> 'Brand#??'" + + " and p_type not like '?%'" + + " and p_size in (?, ?, ?, ?, ?, ?, ?, ?)" + + " and not exists (" + + " select" + + " s_suppkey" + + " from" + + " SUPPLIER" + + " where" + + " s_suppkey = ps_suppkey and" + + " s_comment like '%Customer%Complaints%'" + + " )" + + " group by" + + " p_brand," + + " p_type," + + " p_size" + + " order by" + + " supplier_cnt desc," + + " p_brand," + + " p_type," + + " p_size" + } + + def getQuery16ForPrepStatement: String = { + // 1. BRAND = Brand#45. + // 2. TYPE = MEDIUM POLISHED . + // 3. SIZE1 = 49 + // 4. SIZE2 = 14 + // 5. SIZE3 = 23 + // 6. SIZE4 = 45 + // 7. SIZE5 = 19 + // 8. SIZE6 = 3 + // 9. SIZE7 = 36 + // 10. SIZE8 = 9. + "select" + + " p_brand," + + " p_type," + + " p_size," + + " count(distinct ps_suppkey) as supplier_cnt" + + " from" + + " PARTSUPP," + + " PART" + + " where" + + " p_partkey = ps_partkey" + + " and p_brand <> ?" + + " and p_type not like ?" + + " and p_size in (?, ?, ?, ?, ?, ?, ?, ?)" + + " and not exists (" + + " select" + + " s_suppkey" + + " from" + + " SUPPLIER" + + " where" + + " s_suppkey = ps_suppkey and" + + " s_comment like '%Customer%Complaints%'" + + " )" + + " group by" + + " p_brand," + + " p_type," + + " p_size" + + " order by" + + " supplier_cnt desc," + + " p_brand," + + " p_type," + + " p_size" + } + + def getQ16Parameter(isDynamic: Boolean): Array[String] = { + if (isDynamic) { + /* 1. BRAND = Brand#MN where M and N are two single character strings representing + two numbers randomly and independently selected within [1 .. 5]; + 2. TYPE is made of the first 2 syllables of a string randomly selected within the + list of 3-syllable strings defined for Types in Clause 4.2.2.13; + 3. SIZE1 is randomly selected as a set of eight different values within [1 .. 50]; + 4. SIZE2 is randomly selected as a set of eight different values within [1 .. 50]; + 5. SIZE3 is randomly selected as a set of eight different values within [1 .. 50]; + 6. SIZE4 is randomly selected as a set of eight different values within [1 .. 50]; + 7. SIZE5 is randomly selected as a set of eight different values within [1 .. 50]; + 8. SIZE6 is randomly selected as a set of eight different values within [1 .. 50]; + 9. SIZE7 is randomly selected as a set of eight different values within [1 .. 50]; + 10. SIZE8 is randomly selected as a set of eight different values within [1 .. 50] */ + + val brands = Array("1", "2", "3", "4", "5") + val mIndex = random.nextInt(brands.length) + val nIndex = random.nextInt(brands.length) + val m = brands(mIndex) + val n = brands(nIndex) + + val syllables1 = Array("STANDARD", "SMALL", "MEDIUM", "LARGE", "ECONOMY", "PROMO") + val syllables2 = Array("ANODIZED", "BURNISHED", "PLATED", "POLISHED", "BRUSHED") + val syllable1Index = random.nextInt(syllables1.length) + val syllable1 = syllables1(syllable1Index) + val syllable2Index = random.nextInt(syllables2.length) + val syllable2 = syllables2(syllable2Index) + val pType = s"$syllable1 $syllable2" + + val min = 1 + val max = 50 + val size1 = (min + random.nextInt(max - min)).toString + val size2 = (min + random.nextInt(max - min)).toString + val size3 = (min + random.nextInt(max - min)).toString + val size4 = (min + random.nextInt(max - min)).toString + val size5 = (min + random.nextInt(max - min)).toString + val size6 = (min + random.nextInt(max - min)).toString + val size7 = (min + random.nextInt(max - min)).toString + val size8 = (min + random.nextInt(max - min)).toString + + Array(m, n, pType, size1, size2, size3, size4, size5, size6, size7, size8) + } else { + Array("4", "5", "MEDIUM POLISHED", "49", "14", "23", "45", "19", "3", "36", "9") + } + } + + def getResultString16: String = { + "P_BRAND|P_TYPE|P_SIZE|SUPPLIER_CNT" + } + + def getQuery17: String = { + // 1. BRAND = Brand#23; + // 2. CONTAINER = MED BOX. + "select" + + " sum(l_extendedprice) / 7.0 as avg_yearly" + + " from" + + " LINEITEM," + + " PART" + + " where" + + " P_PARTKEY = l_partkey" + + " and P_BRAND = 'Brand#??'" + + " and P_CONTAINER = '?'" + + " and l_quantity < (" + + " select" + + " 0.2 * avg(l_quantity)" + + " from" + + " LINEITEM" + + " where" + + " l_partkey = P_PARTKEY" + + " )" + } + + def getQuery17ForPrepStatement: String = { + // 1. BRAND = Brand#23; + // 2. CONTAINER = MED BOX. + "select" + + " sum(l_extendedprice) / 7.0 as avg_yearly" + + " from" + + " LINEITEM," + + " PART" + + " where" + + " P_PARTKEY = l_partkey" + + " and P_BRAND = ?" + + " and P_CONTAINER = ?" + + " and l_quantity < (" + + " select" + + " 0.2 * avg(l_quantity)" + + " from" + + " LINEITEM" + + " where" + + " l_partkey = P_PARTKEY" + + " )" + } + + def getQ17Parameter(isDynamic: Boolean): Array[String] = { + if (isDynamic) { + /* 1. BRAND = 'Brand#MN' where MN is a two character string representing two numbers randomly + and independently selected within [1 .. 5]; + 2. CONTAINER is randomly selected within the list of 2-syllable strings defined for + Containers in Clause 4.2.2.13. */ + + val brands = Array("1", "2", "3", "4", "5") + val mIndex = random.nextInt(brands.length) + val nIndex = random.nextInt(brands.length) + val m = brands(mIndex) + val n = brands(nIndex) + + val syllables1 = Array("SM", "LG", "MED", "JUMBO", "WRAP") + val syllables2 = Array("CASE", "BOX", "BAG", "JAR", "PKG", "PACK", "CAN", "DRUM") + val syllable1Index = random.nextInt(syllables1.length) + val syllable1 = syllables1(syllable1Index) + val syllable2Index = random.nextInt(syllables2.length) + val syllable2 = syllables2(syllable2Index) + val pContainer = s"$syllable1 $syllable2" + Array(m, n, pContainer) + } else { + Array("2", "3", "SM PACK") + } + } + + def getResultString17: String = { + "AVG_YEARLY" + } + + /* def getQuery18: String = { + // 1.QUANTITY = 300 + " select" + + " C_NAME," + + " C_CUSTKEY," + + " o_orderkey," + + " o_orderdate," + + " o_totalprice," + + " sum(l_quantity)" + + " from" + + " LINEITEM," + + " (" + + " select" + + " l_orderkey as o" + + " from" + + " LINEITEM" + + " group by" + + " l_orderkey having sum(l_quantity) > ?" + + " ) as temp," + + " ORDERS," + + " CUSTOMER" + + " where" + + " l_orderkey = temp.o" + + " and o_orderkey = l_orderkey" + + " and C_CUSTKEY = o_custkey" + + " group by" + + " C_NAME," + + " C_CUSTKEY," + + " o_orderkey," + + " o_orderdate," + + " o_totalprice" + + " order by" + + " o_totalprice desc," + + " o_orderdate limit 100" + } */ + + def getQuery18: String = { + // 1.QUANTITY = 300 + "select" + + " C_NAME," + + " C_CUSTKEY," + + " o_orderkey," + + " o_orderdate," + + " o_totalprice," + + " sum(l_quantity)" + + " from" + + " CUSTOMER," + + " ORDERS," + + " LINEITEM" + + " where" + + " o_orderkey in (" + + " select" + + " l_orderkey" + + " from" + + " LINEITEM" + + " group by" + + " l_orderkey having" + + " sum(l_quantity) > ?" + + " )" + + " and C_CUSTKEY = o_custkey" + + " and o_orderkey = l_orderkey" + + " group by" + + " C_NAME," + + " C_CUSTKEY," + + " o_orderkey," + + " o_orderdate," + + " o_totalprice" + + " order by" + + " o_totalprice desc," + + " o_orderdate" + + " limit 100" + } + + def getQuery18ForPrepStatement: String = { + // 1.QUANTITY = 300 + "select" + + " C_NAME," + + " C_CUSTKEY," + + " o_orderkey," + + " o_orderdate," + + " o_totalprice," + + " sum(l_quantity)" + + " from" + + " CUSTOMER," + + " ORDERS," + + " LINEITEM" + + " where" + + " o_orderkey in (" + + " select" + + " l_orderkey" + + " from" + + " LINEITEM" + + " group by" + + " l_orderkey having" + + " sum(l_quantity) > ?" + + " )" + + " and C_CUSTKEY = o_custkey" + + " and o_orderkey = l_orderkey" + + " group by" + + " C_NAME," + + " C_CUSTKEY," + + " o_orderkey," + + " o_orderdate," + + " o_totalprice" + + " order by" + + " o_totalprice desc," + + " o_orderdate" + + " limit 100" + } + + def getQ18Parameter(isDynamic: Boolean): Array[String] = { + if (isDynamic) { + /* QUANTITY is randomly selected within [312..315] */ + + val min = 312 + val max = 315 + val quantity = min + random.nextInt(max - min) + Array(quantity.toString) + } else { + Array("300") + } + } + + def getResultString18: String = { + "C_NAME|C_CUSTKEY|O_ORDERKEY|O_ORDERDATE|O_TOTALPRICE|Sum(L_QUANTITY)" + } + + def getQuery19: String = { + // 1. QUANTITY1 = 1. + // 2. QUANTITY2 = 10. + // 3. QUANTITY3 = 20. + // 4. BRAND1 = Brand#12. + // 5. BRAND2 = Brand#23. + // 6. BRAND3 = Brand#34. + "select" + + " sum(l_extendedprice * (1 - l_discount) ) as revenue" + + " from" + + " LINEITEM," + + " PART" + + " where" + + " (" + + " p_partkey = l_partkey" + + " and p_brand = 'Brand#?'" + + " and p_container in ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG')" + + " and l_quantity >= ? and l_quantity <= ? + 10" + + " and p_size between 1 and 5" + + " and l_shipmode in ('AIR', 'AIR REG')" + + " and l_shipinstruct = 'DELIVER IN PERSON'" + + " )" + + " or" + + " (" + + " p_partkey = l_partkey" + + " and p_brand = 'Brand#?'" + + " and p_container in ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK')" + + " and l_quantity >= ? and l_quantity <= ? + 10" + + " and p_size between 1 and 10" + + " and l_shipmode in ('AIR', 'AIR REG')" + + " and l_shipinstruct = 'DELIVER IN PERSON'" + + " )" + + " or" + + " (" + + " p_partkey = l_partkey" + + " and p_brand = 'Brand#?'" + + " and p_container in ( 'LG CASE', 'LG BOX', 'LG PACK', 'LG PKG')" + + " and l_quantity >= ? and l_quantity <= ? + 10" + + " and p_size between 1 and 15" + + " and l_shipmode in ('AIR', 'AIR REG')" + + " and l_shipinstruct = 'DELIVER IN PERSON'" + + " )" + + } + + def getQuery19ForPrepStatement: String = { + // 1. QUANTITY1 = 1. + // 2. QUANTITY2 = 10. + // 3. QUANTITY3 = 20. + // 4. BRAND1 = Brand#12. + // 5. BRAND2 = Brand#23. + // 6. BRAND3 = Brand#34. + "select" + + " sum(l_extendedprice * (1 - l_discount) ) as revenue" + + " from" + + " LINEITEM," + + " PART" + + " where" + + " (" + + " p_partkey = l_partkey" + + " and p_brand = ?" + + " and p_container in ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG')" + + " and l_quantity >= ? and l_quantity <= ? + 10" + + " and p_size between 1 and 5" + + " and l_shipmode in ('AIR', 'AIR REG')" + + " and l_shipinstruct = 'DELIVER IN PERSON'" + + " )" + + " or" + + " (" + + " p_partkey = l_partkey" + + " and p_brand = ?" + + " and p_container in ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK')" + + " and l_quantity >= ? and l_quantity <= ? + 10" + + " and p_size between 1 and 10" + + " and l_shipmode in ('AIR', 'AIR REG')" + + " and l_shipinstruct = 'DELIVER IN PERSON'" + + " )" + + " or" + + " (" + + " p_partkey = l_partkey" + + " and p_brand = ?" + + " and p_container in ( 'LG CASE', 'LG BOX', 'LG PACK', 'LG PKG')" + + " and l_quantity >= ? and l_quantity <= ? + 10" + + " and p_size between 1 and 15" + + " and l_shipmode in ('AIR', 'AIR REG')" + + " and l_shipinstruct = 'DELIVER IN PERSON'" + + " )" + + } + + def getQ19Parameter(isDynamic: Boolean): Array[String] = { + if (isDynamic) { + /* 1. QUANTITY1 is randomly selected within [1..10]. + 2. QUANTITY2 is randomly selected within [10..20]. + 3. QUANTITY3 is randomly selected within [20..30]. + 4. BRAND1, BRAND2, BRAND3 = 'Brand#MN' where each MN is a two character string + representing two num- bers randomly and independently selected within [1 .. 5] */ + + var min = 1 + var max = 10 + val quantity1 = (min + random.nextInt(max - min)).toString + min = 10 + max = 20 + val quantity2 = (min + random.nextInt(max - min)).toString + min = 20 + max = 30 + val quantity3 = (min + random.nextInt(max - min)).toString + + val brands = Array("1", "2", "3", "4", "5") + var mIndex = random.nextInt(brands.length) + var nIndex = random.nextInt(brands.length) + var m = brands(mIndex) + var n = brands(nIndex) + val mn1 = m + n + + mIndex = random.nextInt(brands.length) + nIndex = random.nextInt(brands.length) + m = brands(mIndex) + n = brands(nIndex) + val mn2 = m + n + + mIndex = random.nextInt(brands.length) + nIndex = random.nextInt(brands.length) + m = brands(mIndex) + n = brands(nIndex) + val mn3 = m + n + + Array(mn1, quantity1, quantity1, mn2, quantity2, quantity2, mn3, quantity3, quantity3) + } + else { + Array("12", "1", "1", "23", "10", "10", "34", "20", "20") + } + } + + + def getResultString19: String = { + "REVENUE" + } + + def getQuery20: String = { + // 1. COLOR = forest. + // 2. DATE = 1994-01-01. + // 3. NATION = CANADA. + "select" + + " S_NAME," + + " S_ADDRESS" + + " from" + + " SUPPLIER, NATION" + + " where" + + " S_SUPPKEY in (" + + " select" + + " PS_SUPPKEY" + + " from" + + " PARTSUPP" + + " where" + + " PS_PARTKEY in (" + + " select" + + " P_PARTKEY" + + " from" + + " PART" + + " where" + + " P_NAME like '?%'" + + " )" + + " and PS_AVAILQTY > (" + + " select" + + " 0.5 * sum(l_quantity)" + + " from" + + " LINEITEM" + + " where" + + " l_partkey = PS_PARTKEY" + + " and l_suppkey = PS_SUPPKEY" + + " and l_shipdate >= '?'" + + " and l_shipdate < add_months('?', 12)" + + " )" + + " )" + + " and S_NATIONKEY = N_NATIONKEY" + + " and N_NAME = '?'" + + " order by" + + " S_NAME" + } + + def getQuery20ForPrepStatement: String = { + // 1. COLOR = forest. + // 2. DATE = 1994-01-01. + // 3. NATION = CANADA. + "select" + + " S_NAME," + + " S_ADDRESS" + + " from" + + " SUPPLIER, NATION" + + " where" + + " S_SUPPKEY in (" + + " select" + + " PS_SUPPKEY" + + " from" + + " PARTSUPP" + + " where" + + " PS_PARTKEY in (" + + " select" + + " P_PARTKEY" + + " from" + + " PART" + + " where" + + " P_NAME like ?" + + " )" + + " and PS_AVAILQTY > (" + + " select" + + " 0.5 * sum(l_quantity)" + + " from" + + " LINEITEM" + + " where" + + " l_partkey = PS_PARTKEY" + + " and l_suppkey = PS_SUPPKEY" + + " and l_shipdate >= ?" + + " and l_shipdate < add_months(?, 12)" + + " )" + + " )" + + " and S_NATIONKEY = N_NATIONKEY" + + " and N_NAME = ?" + + " order by" + + " S_NAME" + } + + def getQuery20_Memsql: String = { + // 1. COLOR = forest. + // 2. DATE = 1994-01-01. + // 3. NATION = CANADA. + "select" + + " S_NAME," + + " S_ADDRESS" + + " from" + + " SUPPLIER, NATION" + + " where" + + " S_SUPPKEY in (" + + " select" + + " PS_SUPPKEY" + + " from" + + " PARTSUPP" + + " where" + + " PS_PARTKEY in (" + + " select" + + " P_PARTKEY" + + " from" + + " PART" + + " where" + + " P_NAME like '?%'" + + " )" + + " and PS_AVAILQTY > (" + + " select" + + " 0.5 * sum(l_quantity)" + + " from" + + " LINEITEM" + + " where" + + " l_partkey = PS_PARTKEY" + + " and l_suppkey = PS_SUPPKEY" + + " and l_shipdate >= '?'" + + " and l_shipdate < '?' + interval 1 year" + + " )" + + " )" + + " and S_NATIONKEY = N_NATIONKEY" + + " and N_NAME = '?'" + + " order by" + + " S_NAME" + + } + + def getQ20Parameter(isDynamic: Boolean): Array[String] = { + if (isDynamic) { + /* 1. COLOR is randomly selected within the list of values defined for the generation + of P_NAME. + 2. DATE is the first of January of a randomly selected year within 1993..1997. + 3. NATION is randomly selected within the list of values defined for N_NAME in Clause4.2.3 */ + val pnames = Array("almond", "antique", "aquamarine", "azure", "beige", "bisque", "black", + "blanched", "blue", "blush", "brown", "burlywood", "burnished", "chartreuse", + "chiffon", "chocolate", "coral", "cornflower", "cornsilk", "cream", "cyan", "dark", + "deep", "dim", "dodger", "drab", "firebrick", "floral", "forest", "frosted", + "gainsboro", "ghost", "goldenrod", "green", "grey", "honeydew", "hot", "indian", + "ivory", "khaki", "lace", "lavender", "lawn", "lemon", "light", "lime", "linen", + "magenta", "maroon", "medium", "metallic", "midnight", "mint", "misty", "moccasin", + "navajo", "navy", "olive", "orange", "orchid", "pale", "papaya", "peach", "peru", + "pink", "plum", "powder", "puff", "purple", "red", "rose", "rosy", "royal", "saddle", + "salmon", "sandy", "seashell", "sienna", "sky", "slate", "smoke", "snow", "spring", + "steel", "tan", "thistle", "tomato", "turquoise", "violet", "wheat", "white", "yellow") + + val colorIndex = random.nextInt(pnames.length) + val color = pnames(colorIndex) + + val minYear = 1993 + val maxYear = 1997 + val year = minYear + random.nextInt((maxYear - minYear) + 1) + + val date = java.time.LocalDate.of(year, 1, 1) + + val nations = Array("ALGERIA", "ARGENTINA", "BRAZIL", "CANADA", "EGYPT", "ETHIOPIA", "FRANCE", + "GERMANY", "INDIA", "INDONESIA", "IRAN", "IRAQ", "JAPAN", "JORDAN", "KENYA", + "MOROCCO", "MOZAMBIQUE", "PERU", "CHINA", "ROMANIA", "SAUDI ARABIA", "VIETNAM", + "RUSSIA", "UNITED KINGDOM", "UNITED STATES") + + val nationIndex = random.nextInt(nations.length) + val nation = nations(nationIndex) + + Array(color, date.toString, date.toString, nation) + } else { + Array("khaki", "1994-01-01", "1994-01-01", "CANADA") + } + } + + + def getResultString20: String = { + "S_NAME|S_ADDRESS" + } + + def getQuery21: String = { + // NATION = SAUDI ARABIA. + "select" + + " s_name," + + " count(*) as numwait" + + " from" + + " SUPPLIER," + + " LINEITEM l1," + + " ORDERS," + + " NATION" + + " where" + + " s_suppkey = l1.l_suppkey" + + " and o_orderkey = l1.l_orderkey" + + " and o_orderstatus = 'F'" + + " and l1.l_receiptdate > l1.l_commitdate" + + " and not exists (" + + " select" + + " l3.l_orderkey" + + " from" + + " LINEITEM l3" + + " where" + + " l3.l_orderkey = l1.l_orderkey" + + " and l3.l_suppkey <> l1.l_suppkey" + + " and l3.l_receiptdate > l3.l_commitdate" + + " )" + + " and exists (" + + " select" + + " l2.l_orderkey" + + " from" + + " LINEITEM l2" + + " where" + + " l2.l_orderkey = l1.l_orderkey" + + " and l2.l_suppkey <> l1.l_suppkey" + + " )" + + " and s_nationkey = n_nationkey" + + " and n_name = '?'" + + " group by" + + " s_name" + + " order by" + + " numwait desc," + + " s_name limit 100" + } + + def getQuery21ForPrepStatement: String = { + // NATION = SAUDI ARABIA. + "select" + + " s_name," + + " count(*) as numwait" + + " from" + + " SUPPLIER," + + " LINEITEM l1," + + " ORDERS," + + " NATION" + + " where" + + " s_suppkey = l1.l_suppkey" + + " and o_orderkey = l1.l_orderkey" + + " and o_orderstatus = 'F'" + + " and l1.l_receiptdate > l1.l_commitdate" + + " and not exists (" + + " select" + + " l3.l_orderkey" + + " from" + + " LINEITEM l3" + + " where" + + " l3.l_orderkey = l1.l_orderkey" + + " and l3.l_suppkey <> l1.l_suppkey" + + " and l3.l_receiptdate > l3.l_commitdate" + + " )" + + " and exists (" + + " select" + + " l2.l_orderkey" + + " from" + + " LINEITEM l2" + + " where" + + " l2.l_orderkey = l1.l_orderkey" + + " and l2.l_suppkey <> l1.l_suppkey" + + " )" + + " and s_nationkey = n_nationkey" + + " and n_name = ?" + + " group by" + + " s_name" + + " order by" + + " numwait desc," + + " s_name limit 100" + } + + def getQ21Parameter(isDynamic: Boolean): Array[String] = { + if (isDynamic) { + /* NATION is randomly selected within the list of values defined for N_NAME in Clause 4.2.3 */ + + val nations = Array("ALGERIA", "ARGENTINA", "BRAZIL", "CANADA", "EGYPT", "ETHIOPIA", "FRANCE", + "GERMANY", "INDIA", "INDONESIA", "IRAN", "IRAQ", "JAPAN", "JORDAN", "KENYA", + "MOROCCO", "MOZAMBIQUE", "PERU", "CHINA", "ROMANIA", "SAUDI ARABIA", "VIETNAM", + "RUSSIA", "UNITED KINGDOM", "UNITED STATES") + + val nationIndex = random.nextInt(nations.length) + val nation = nations(nationIndex) + + Array(nation) + } else { + Array("VIETNAM") + } + + } + + def getResultString21: String = { + "S_NAME|NUMWAIT" + } + + def getTempQuery22(useIndex: Boolean): String = { + "select" + + " avg(C_ACCTBAL)" + + " from" + + " CUSTOMER" + + " where" + + " C_ACCTBAL > 0.00" + + " and SUBSTR (C_PHONE,1,2) in" + + " ('13','31','23','29','30','18','17')" + + } + + def getQuery22(value: String, useIndex: Boolean): String = { + if (!useIndex) { + "select" + + " cntrycode," + + " count(*) as numcust," + + " sum(C_ACCTBAL) as totacctbal" + + " from (" + + " select" + + " SUBSTR(C_PHONE,1,2) as cntrycode," + + " C_ACCTBAL" + + " from" + + " CUSTOMER left outer join ORDERS on o_custkey = C_CUSTKEY " + + " where" + + " SUBSTR(C_PHONE,1,2) IN" + + " ('13','31','23','29','30','18','17')" + + " and C_ACCTBAL > " + + " " + value + + " and o_orderkey IS NULL " + + " ) as custsale" + + " group by" + + " cntrycode" + + " order by" + + " cntrycode" + } else { + "select" + + " cntrycode," + + " count(*) as numcust," + + " sum(C_ACCTBAL) as totacctbal" + + " from (" + + " select" + + " SUBSTR(C_PHONE,1,2) as cntrycode," + + " C_ACCTBAL" + + " from" + + " CUSTOMER left outer join ORDERS_CUST on o_custkey = C_CUSTKEY " + + " where" + + " SUBSTR(C_PHONE,1,2) IN" + + " ('13','31','23','29','30','18','17')" + + " and C_ACCTBAL > " + + " " + value + + " and o_orderkey IS NULL " + + " ) as custsale" + + " group by" + + " cntrycode" + + " order by" + + " cntrycode" + } + } + + def getQuery22: String = { + // 1. I1 = 13. + // 2. I2 = 31. + // 3. I3 = 23. + // 4. I4 = 29. + // 5. I5 = 30. + // 6. I6 = 18. + // 7. I7 = 17. + "select" + + " cntrycode," + + " count(*) as numcust," + + " sum(C_ACCTBAL) as totacctbal" + + " from (" + + " select" + + " SUBSTR(C_PHONE,1,2) as cntrycode," + + " C_ACCTBAL" + + " from" + + " CUSTOMER " + + " where" + + " SUBSTR(C_PHONE,1,2) in" + + " ('?','?','?','?','?','?','?')" + + " and C_ACCTBAL > (" + + " select" + + " avg(C_ACCTBAL)" + + " from" + + " CUSTOMER" + + " where" + + " C_ACCTBAL > 0.00" + + " and SUBSTR(C_PHONE,1,2) in" + + " ('?','?','?','?','?','?','?')" + + " )" + + " and not exists (" + + " select" + + " o_custkey" + + " from" + + " ORDERS" + + " where" + + " o_custkey = C_CUSTKEY" + + " )" + + " ) as custsale" + + " group by" + + " cntrycode" + + " order by" + + " cntrycode" + } + + def getQuery22ForPrepStatement: String = { + // 1. I1 = 13. + // 2. I2 = 31. + // 3. I3 = 23. + // 4. I4 = 29. + // 5. I5 = 30. + // 6. I6 = 18. + // 7. I7 = 17. + "select" + + " cntrycode," + + " count(*) as numcust," + + " sum(C_ACCTBAL) as totacctbal" + + " from (" + + " select" + + " SUBSTR(C_PHONE,1,2) as cntrycode," + + " C_ACCTBAL" + + " from" + + " CUSTOMER " + + " where" + + " SUBSTR(C_PHONE,1,2) in" + + " (?,?,?,?,?,?,?)" + + " and C_ACCTBAL > (" + + " select" + + " avg(C_ACCTBAL)" + + " from" + + " CUSTOMER" + + " where" + + " C_ACCTBAL > 0.00" + + " and SUBSTR(C_PHONE,1,2) in" + + " (?,?,?,?,?,?,?)" + + " )" + + " and not exists (" + + " select" + + " o_custkey" + + " from" + + " ORDERS" + + " where" + + " o_custkey = C_CUSTKEY" + + " )" + + " ) as custsale" + + " group by" + + " cntrycode" + + " order by" + + " cntrycode" + } + + def getQ22Parameter(isDynamic: Boolean): Array[String] = { + if (isDynamic) { + /* I1 ... I7 are randomly selected without repetition from the possible values for + Country code as defined in Clause 4.2.2.9. */ + + val countryCodes: Array[String] = new Array[String](7) + var x: Int = 0 + do { + val min = 1 + val max = 25 + val code = (10 + (min + random.nextInt(max - min))).toString + if (x == 0) { + countryCodes(x) = code + x = x + 1 + } else { + if (!countryCodes.contains(code)) { + countryCodes(x) = code + x = x + 1 + } + } + } while (x < 7) + + countryCodes ++ countryCodes + } else { + Array("13", "31", "23", "29", "30", "18", "17", "13", "31", "23", "29", "30", "18", "17") + } + } + + def getResultString22: String = { + "CNTRYCODE|NUMCUST|TOTACCTBAL" + } + + def getQuery: String = + // "select N_NAME from NATION where N_REGIONKEY = (select R_REGIONKEY from REGION where + // R_NAME='ASIA')" + "select count(*) from LINEITEM" + + def getResultString1s: String = + "sl_returnflag|l_linestatus|sum_qty|sum_qty_err|sum_base_price|sum_base_price_err" + + "|sum_disc_price|sum_disc_price_err|sum_charge|sum_charge_err|" + + "avg_qty|avg_qty_err|avg_price|avg_price_err|avg_disc|avg_disc_err|count_order" + + def getResultString5s: String = + "N_NAME|revenue|revenue_err" + + def getResultString: String = "" + + def getSampledQuery1: String = + // DELTA = 90 + " select" + + " l_returnflag," + + " l_linestatus," + + " sum(l_quantity) as sum_qty," + + " error estimate sum(l_quantity) as sum_qty_err," + + " sum(l_extendedprice) as sum_base_price," + + " error estimate sum(l_extendedprice) as sum_base_price_err," + + " sum(l_extendedprice*(1-l_discount)) as sum_disc_price," + + " error estimate sum(l_extendedprice*(1-l_discount)) as sum_disc_price_err," + + " sum(l_extendedprice*(1-l_discount)*(1+l_tax)) as sum_charge," + + " error estimate sum(l_extendedprice*(1-l_discount)*(1+l_tax)) as sum_charge_err," + + " avg(l_quantity) as avg_qty," + + " error estimate avg(l_quantity) as avg_qty_err," + + " avg(l_extendedprice) as avg_price," + + " error estimate avg(l_extendedprice) as avg_price_err," + + " avg(l_discount) as avg_disc," + + " error estimate avg(l_discount) as avg_disc_err," + + " count(*) as count_order" + + " from" + + " lineitem_sampled" + + " where" + + " l_shipdate <= DATE_SUB('1998-12-01',90)" + + " group by" + + " l_returnflag," + + " l_linestatus" + + " order by" + + " l_returnflag," + + " l_linestatus" + + def getSampledQuery3: String = { + // 1. SEGMENT = BUILDING; + // 2. DATE = 1995-03-15. + " select" + + " l_orderkey," + + " sum(l_extendedprice*(1-l_discount)) as revenue," + + " error estimate sum(l_extendedprice*(1-l_discount)) as revenue_err," + + " o_orderdate," + + " o_shippriority" + + " from" + + " orders_sampled," + + " LINEITEM," + + " CUSTOMER" + + " where" + + " C_MKTSEGMENT = 'BUILDING'" + + " and C_CUSTKEY = o_custkey" + + " and l_orderkey = o_orderkey" + + " and o_orderdate < add_months('1995-03-15',0)" + + " and l_shipdate > add_months('1995-03-15',0) " + + " group by" + + " l_orderkey," + + " o_orderdate," + + " o_shippriority" + + " order by" + + " o_orderdate" + } + + def getSampledQuery3_1: String = { + // 1. SEGMENT = BUILDING; + // 2. DATE = 1995-03-15. + " select" + + " l_orderkey," + + " sum(l_extendedprice*(1-l_discount)) as revenue," + + " error estimate sum(l_extendedprice*(1-l_discount)) as revenue_err," + + " o_orderdate," + + " o_shippriority" + + " from" + + " ORDERS," + + " lineitem_sampled," + + " CUSTOMER" + + " where" + + " C_MKTSEGMENT = 'BUILDING'" + + " and C_CUSTKEY = o_custkey" + + " and l_orderkey = o_orderkey" + + " and o_orderdate < add_months('1995-03-15',0)" + + " and l_shipdate > add_months('1995-03-15',0) " + + " group by" + + " l_orderkey," + + " o_orderdate," + + " o_shippriority" + + " order by" + + " o_orderdate" + } + + def getSampledQuery5: String = { + // 1. REGION = ASIA; + // 2. DATE = 1994-01-01. + " select" + + " N_NAME," + + " sum(l_extendedprice * (1 - l_discount)) as revenue," + + " error estimate sum(l_extendedprice * (1 - l_discount)) as revenue_err" + + " from" + + " CUSTOMER," + + " ORDERS," + + " lineitem_sampled," + + " SUPPLIER," + + " NATION," + + " REGION" + + " where" + + " C_CUSTKEY = o_custkey" + + " and l_orderkey = o_orderkey" + + " and l_suppkey = S_SUPPKEY" + + " and C_NATIONKEY = S_NATIONKEY" + + " and S_NATIONKEY = N_NATIONKEY" + + " and N_REGIONKEY = R_REGIONKEY" + + " and R_NAME = 'ASIA'" + + " and o_orderdate >= add_months('1994-01-01',0)" + + // " and o_orderdate < date '[DATE]' + interval '1' year" + + " and o_orderdate < add_months('1994-01-01', 12)" + + " group by" + + " N_NAME" + + " order by" + + " revenue desc" + } + + def getSampledQuery6: String = { + // 1. DATE = 1994-01-01; + // 2. DISCOUNT = 0.06; + // 3. QUANTITY = 24. + " select" + + " sum(l_extendedprice*l_discount) as revenue," + + " error estimate sum(l_extendedprice*l_discount) as revenue_err" + + " from" + + " lineitem_sampled" + + " where" + + " l_shipdate >= add_months('1994-01-01',0)" + + " and l_shipdate < add_months('1994-01-01', 12)" + + " and l_discount between 0.06 - 0.01 and 0.06 + 0.01" + + " and l_quantity < 24" + } + + def getSampledQuery10: String = { + // 1. DATE = 1993-10-01. + "select" + + " C_CUSTKEY," + + " C_NAME," + + " sum(l_extendedprice * (1 - l_discount)) as revenue," + + " error estimate sum(l_extendedprice * (1 - l_discount)) as revenue_err," + + " C_ACCTBAL," + + " N_NAME," + + " C_ADDRESS," + + " C_PHONE," + + " C_COMMENT" + + " from" + + " orders_sampled," + + " LINEITEM," + + " NATION," + + " CUSTOMER" + + " where" + + " C_CUSTKEY = o_custkey" + + " and l_orderkey = o_orderkey" + + " and o_orderdate >= add_months('1993-10-01',0)" + + " and o_orderdate < add_months('1993-10-01', 3)" + + " and l_returnflag = 'R'" + + " and C_NATIONKEY = N_NATIONKEY" + + " group by" + + " C_CUSTKEY," + + " C_NAME," + + " C_ACCTBAL," + + " C_PHONE," + + " N_NAME," + + " C_ADDRESS," + + " C_COMMENT" + + " order by" + + " revenue desc" + } + + def getSampledQuery10_1: String = { + "select" + + " C_CUSTKEY," + + " C_NAME," + + " sum(l_extendedprice * (1 - l_discount)) as revenue," + + " error estimate sum(l_extendedprice * (1 - l_discount)) as revenue_err," + + " C_ACCTBAL," + + " N_NAME," + + " C_ADDRESS," + + " C_PHONE," + + " C_COMMENT" + + " from" + + " ORDERS," + + " lineitem_sampled," + + " NATION," + + " CUSTOMER" + + " where" + + " C_CUSTKEY = o_custkey" + + " and l_orderkey = o_orderkey" + + " and o_orderdate >= add_months('1993-10-01',0)" + + " and o_orderdate < add_months('1993-10-01', 3)" + + " and l_returnflag = 'R'" + + " and C_NATIONKEY = N_NATIONKEY" + + " group by" + + " C_CUSTKEY," + + " C_NAME," + + " C_ACCTBAL," + + " C_PHONE," + + " N_NAME," + + " C_ADDRESS," + + " C_COMMENT" + + " order by" + + " revenue desc" + } +} diff --git a/cluster/src/test/scala/io/snappydata/benchmark/kuduimpala/TPCH_Impala.scala b/cluster/src/test/scala/io/snappydata/benchmark/kuduimpala/TPCH_Impala.scala new file mode 100644 index 0000000000..1fdf1f5570 --- /dev/null +++ b/cluster/src/test/scala/io/snappydata/benchmark/kuduimpala/TPCH_Impala.scala @@ -0,0 +1,1097 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ +package io.snappydata.benchmark.kuduimpala + +import java.io.{File, FileOutputStream, PrintStream} +import java.sql.{ResultSet, Statement} + + +object TPCH_Impala { + + var avgFileStream:FileOutputStream = new FileOutputStream(new File(s"Average.out")) + var avgPrintStream:PrintStream = new PrintStream(avgFileStream) + + def close(): Unit ={ + avgPrintStream.close() + avgFileStream.close() + } + + def execute(queryNumber: String, isResultCollection: Boolean, stmt: Statement, warmup:Integer, runsForAverage:Integer): Unit = { + + var queryFileStream = new FileOutputStream(new File(s"$queryNumber.out")) + var queryPrintStream = new PrintStream(queryFileStream) + + + var resultFormat = queryNumber match { + case "1" => getResultString1() + case "2" => getResultString2() + case "3" => getResultString3() + case "4" => getResultString4() + case "5" => getResultString5() + case "6" => getResultString6() + case "7" => getResultString7() + case "8" => getResultString8() + case "9" => getResultString9() + case "10" => getResultString10() + case "11" => getResultString11() + case "12" => getResultString12() + case "13" => getResultString13() + case "14" => getResultString14() + case "15" => getResultString15() + case "16" => getResultString16() + case "17" => getResultString17() + case "18" => getResultString18() + case "19" => getResultString19() + case "20" => getResultString20() + case "21" => getResultString21() + case "22" => getResultString22() + } + + var rs: ResultSet = null + try { + println(s"Started executing $queryNumber") + queryPrintStream.println(s"$queryNumber") + if(isResultCollection){ + rs = queryExecution(queryNumber, stmt) + //rs = stmt.executeQuery(query) + queryPrintStream.println(s"$resultFormat") + val rsmd = rs.getMetaData() + val columnsNumber = rsmd.getColumnCount(); + var count : Int = 0 + while (rs.next()) { + count += 1 + for (i:Int <- 1 to columnsNumber) { + if (i > 1) queryPrintStream.print(",") + queryPrintStream.print(rs.getString(i)) + } + queryPrintStream.println() + } + println(s"Number of results : $count") + println(s"$queryNumber Result Collected in file $queryNumber.out") + if (queryNumber.equals("q15")) { + stmt.execute("drop view revenue") + } + } else { + var totalTime: Long = 0 + var bestTime: Long=0 + for (i <- 1 to (warmup + runsForAverage)) { + val startTime = System.currentTimeMillis() + rs = queryExecution(queryNumber, stmt) + //rs = stmt.executeQuery(query) + while (rs.next()) { + //just iterating over result + } + val endTime = System.currentTimeMillis() + val iterationTime = endTime - startTime + if(i==1){ + bestTime = iterationTime + }else{ + if(iterationTime < bestTime) + bestTime = iterationTime + } + queryPrintStream.println(s"$iterationTime") + if (i > warmup) { + totalTime += iterationTime + } + if (queryNumber.equals("q15")) { + stmt.execute("drop view revenue") + } + } + queryPrintStream.println(s"${totalTime / runsForAverage}") + avgPrintStream.println(s"$queryNumber,$bestTime / ${totalTime /runsForAverage}") + } + println(s"Finished executing $queryNumber") + + + } catch { + case e: Exception => { + e.printStackTrace() + e.printStackTrace(queryPrintStream) + e.printStackTrace(avgPrintStream) + println(s" Exception while executing $queryNumber in written to file $queryNumber.txt") + } + } finally { + if(isResultCollection) { + queryPrintStream.close() + queryFileStream.close() + avgPrintStream.close() + avgFileStream.close() + } + + } + rs.close() + } + + def queryExecution(queryNumber:String, stmt:Statement): ResultSet ={ + + val rs : ResultSet = queryNumber match { + case "1" => { + stmt.executeQuery(getQuery1()) + } + case "2" => { + stmt.executeQuery(getQuery2()) + } + case "3" => { + stmt.executeQuery(getQuery3()) + } + case "4" => { + stmt.executeQuery(getQuery4()) + } + case "5" => { + stmt.executeQuery(getQuery5()) + } + case "6" => { + stmt.executeQuery(getQuery6()) + } + case "7" => { + stmt.executeQuery(getQuery7()) + } + case "8" => { + stmt.executeQuery(getQuery8()) + } + case "9" => { + stmt.executeQuery(getQuery9()) + } + case "10" => { + stmt.executeQuery(getQuery10()) + } + case "11" => { + stmt.executeQuery(getQuery11()) + } + case "12" => { + stmt.executeQuery(getQuery12()) + } + case "13" => { + stmt.executeQuery(getQuery13()) + } + case "14" => { + stmt.executeQuery(getQuery14()) + } + case "15" => { + stmt.execute(getTempQuery15()) + stmt.executeQuery(getQuery15()) + } + case "16" => { + stmt.executeQuery(getQuery16()) + } + case "17" => { + stmt.executeQuery(getQuery17()) + } + case "18" => { + stmt.executeQuery(getQuery18()) + } + case "19" => { + stmt.executeQuery(getQuery19()) + } + case "20" => { + stmt.executeQuery(getQuery20()) + } + case "21" => { + stmt.executeQuery(getQuery21()) + } + case "22" => { + stmt.executeQuery(getQuery22()) + } + } + rs + + } + + def getQuery1(): String = { + //DELTA = 90 + " select" + + " l_returnflag," + + " l_linestatus," + + " sum(l_quantity) as sum_qty," + + " sum(l_extendedprice) as sum_base_price," + + " sum(l_extendedprice*(1-l_discount)) as sum_disc_price," + + " sum(l_extendedprice*(1-l_discount)*(1+l_tax)) as sum_charge," + + " avg(l_quantity) as avg_qty," + + " avg(l_extendedprice) as avg_price," + + " avg(l_discount) as avg_disc," + + " count(*) as count_order" + + " from" + + " LINEITEM" + + " where" + + " l_shipdate <= DATE_SUB('1997-12-31',90)" + + " group by" + + " l_returnflag," + + " l_linestatus" + + " order by" + + " l_returnflag," + + " l_linestatus" + } + + def getResultString1(): String = { + "l_returnflag l_linestatus sum_qty sum_base_price sum_disc_price sum_charge avg_qty avg_price avg_disc count_order" + } + + def getQuery2(): String = { + // 1. SIZE = 15; + // 2. TYPE = BRASS; + // 3. REGION = EUROPE + + " select" + + " S_ACCTBAL," + + " S_NAME," + + " N_NAME," + + " P_PARTKEY," + + " P_MFGR," + + " S_ADDRESS," + + " S_PHONE," + + " S_COMMENT" + + " from" + + " PART," + + " SUPPLIER," + + " PARTSUPP," + + " NATION," + + " REGION" + + " where" + + " P_PARTKEY = PS_PARTKEY" + + " and S_SUPPKEY = PS_SUPPKEY" + + " and P_SIZE = 24" + + " and P_TYPE like '%STEEL'" + + " and S_NATIONKEY = N_NATIONKEY" + + " and N_REGIONKEY = R_REGIONKEY" + + " and R_NAME = 'ASIA'" + + " and PS_SUPPLYCOST = (" + + " select" + + " min(PS_SUPPLYCOST)" + + " from" + + " PARTSUPP, SUPPLIER," + + " NATION, REGION" + + " where" + + " P_PARTKEY = PS_PARTKEY" + + " and S_SUPPKEY = PS_SUPPKEY" + + " and S_NATIONKEY = N_NATIONKEY" + + " and N_REGIONKEY = R_REGIONKEY" + + " and R_NAME = 'ASIA'" + + " )" + + " order by" + + " S_ACCTBAL desc," + + " N_NAME," + + " S_NAME," + + " P_PARTKEY limit 100" + + } + + def getResultString2(): String = { + "S_ACCTBAL S_NAME N_NAME P_PARTKEY P_MFGR S_ADDRESS S_PHONE S_COMMENT" + } + + def getQuery3(): String = { + // 1. SEGMENT = BUILDING; + // 2. DATE = 1995-03-15. + " select" + + " l_orderkey," + + " sum(l_extendedprice*(1-l_discount)) as revenue," + + " o_orderdate," + + " o_shippriority" + + " from" + + " CUSTOMER," + + " ORDERS," + + " LINEITEM" + + " where" + + " C_MKTSEGMENT = 'BUILDING'" + + " and C_CUSTKEY = o_custkey" + + " and l_orderkey = o_orderkey" + + " and o_orderdate < '1995-03-15'" + + " and l_shipdate > '1995-03-15' " + + " group by" + + " l_orderkey," + + " o_orderdate," + + " o_shippriority" + + " order by" + + " o_orderdate limit 10" + + } + + def getResultString3(): String = { + "l_orderkey revenue o_orderdate o_shippriority" + } + + def getQuery4(): String = { + //1.DATE = 1993-07-01. + " select" + + " o_orderpriority," + + " count(*) as order_count" + + " from" + + " ORDERS" + + " where" + + " o_orderdate >= '1993-07-01'" + + " and o_orderdate < add_months('1993-07-01',3)" + + " and exists (" + + " select" + + " *" + + " from" + + " LINEITEM" + + " where" + + " l_orderkey = o_orderkey" + + " and l_commitdate < l_receiptdate" + + " )" + + " group by" + + " o_orderpriority" + + " order by" + + " o_orderpriority" + + } + + def getResultString4(): String = { + "o_orderpriority order_count" + } + + def getQuery5(): String = { + //1. REGION = ASIA; + //2. DATE = 1994-01-01. + " select" + + " n_name," + + " sum(l_extendedprice * (1 - l_discount)) as revenue" + + " from" + + " ORDERS," + + " LINEITEM," + + " SUPPLIER," + + " NATION," + + " REGION," + + " CUSTOMER" + + " where" + + " C_CUSTKEY = o_custkey" + + " and l_orderkey = o_orderkey" + + " and l_suppkey = s_suppkey" + + " and C_NATIONKEY = s_nationkey" + + " and s_nationkey = n_nationkey" + + " and n_regionkey = r_regionkey" + + " and r_name = 'ASIA'" + + " and o_orderdate >= '1994-01-01'" + + " and o_orderdate < add_months('1994-01-01', 12)" + + " group by" + + " n_name" + + " order by" + + " revenue desc" + } + + def getResultString5(): String = { + "N_NAME revenue" + } + + def getQuery6(): String = { + //1. DATE = 1994-01-01; + //2. DISCOUNT = 0.06; + //3. QUANTITY = 24. + " select" + + " sum(l_extendedprice*l_discount) as revenue" + + " from" + + " LINEITEM" + + " where" + + " l_shipdate >= '1994-01-01'" + + " and l_shipdate < add_months('1994-01-01', 12)" + + " and l_discount between 0.06 - 0.01 and 0.06 + 0.01" + + " and l_quantity < 24" + } + + def getResultString6():String = { + "revenue" + } + + def getQuery7(): String = { + // 1. NATION1 = FRANCE; + // 2. NATION2 = GERMANY. + "select" + + " supp_nation," + + " cust_nation," + + " l_year, " + + " sum(volume) as revenue" + + " from (" + + " select" + + " n1.n_name as supp_nation," + + " n2.n_name as cust_nation," + + " year(l_shipdate) as l_year," + + " l_extendedprice * (1 - l_discount) as volume" + + " from" + + " SUPPLIER," + + " LINEITEM," + + " ORDERS," + + " CUSTOMER," + + " NATION n1," + + " NATION n2" + + " where" + + " s_suppkey = l_suppkey" + + " and o_orderkey = l_orderkey" + + " and C_CUSTKEY = o_custkey" + + " and s_nationkey = n1.n_nationkey" + + " and C_NATIONKEY = n2.n_nationkey" + + " and (" + + " (n1.n_name = 'FRANCE' and n2.n_name = 'GERMANY')" + + " or (n1.n_name = 'GERMANY' and n2.n_name = 'FRANCE')" + + " )" + + " and l_shipdate between '1995-01-01' and '1996-12-31'" + + " ) as shipping" + + " group by" + + " supp_nation," + + " cust_nation," + + " l_year" + + " order by" + + " supp_nation," + + " cust_nation," + + " l_year" + } + + def getResultString7():String = { + "supp_nation|cust_nation|l_year|revenue" + } + + def getQuery8(): String = { + // 1. NATION = BRAZIL; + // 2. REGION = AMERICA; + // 3. TYPE = ECONOMY ANODIZED STEEL. + "select" + + " o_year," + + " sum(case" + + " when nation = 'BRAZIL'" + + " then volume" + + " else 0" + + " end) / sum(volume) as mkt_share" + + " from (" + + " select" + + " year(o_orderdate) as o_year," + + " l_extendedprice * (1-l_discount) as volume," + + " n2.n_name as nation" + + " from" + + " LINEITEM," + + " ORDERS," + + " CUSTOMER," + + " SUPPLIER," + + " NATION n1," + + " REGION," + + " NATION n2," + + " PART" + + " where" + + " p_partkey = l_partkey" + + " and s_suppkey = l_suppkey" + + " and l_orderkey = o_orderkey" + + " and o_custkey = C_CUSTKEY" + + " and C_NATIONKEY = n1.n_nationkey" + + " and n1.n_regionkey = r_regionkey" + + " and r_name = 'AMERICA'" + + " and s_nationkey = n2.n_nationkey" + + " and o_orderdate between '1995-01-01' and '1996-12-31'" + + " and p_type = 'ECONOMY ANODIZED STEEL'" + + " ) as all_nations" + + " group by" + + " o_year" + + " order by" + + " o_year" + + } + + def getResultString8():String = { + "YEAR|MKT_SHARE" + } + + def getQuery9(): String = { + //1. COLOR = green. + "select" + + " nation," + + " o_year," + + " sum(amount) as sum_profit" + + " from (" + + " select" + + " n_name as nation," + + " year(o_orderdate) as o_year," + + " l_extendedprice * (1 - l_discount) - ps_supplycost * l_quantity as amount" + + " from" + + " LINEITEM," + + " ORDERS," + + " SUPPLIER," + + " NATION," + + " PART," + + " PARTSUPP" + + " where" + + " s_suppkey = l_suppkey" + + " and ps_suppkey = l_suppkey" + + " and ps_partkey = l_partkey" + + " and p_partkey = l_partkey" + + " and o_orderkey = l_orderkey" + + " and s_nationkey = n_nationkey" + + " and p_name like '%green%'" + + " ) as profit" + + " group by" + + " nation," + + " o_year" + + " order by" + + " nation," + + " o_year desc" + } + + def getResultString9():String = { + "NATION|YEAR|SUM_PROFIT" + } + def getQuery10(): String = { + //1. DATE = 1993-10-01. + "select" + + " C_CUSTKEY," + + " C_NAME," + + " sum(l_extendedprice * (1 - l_discount)) as revenue," + + " C_ACCTBAL," + + " n_name," + + " C_ADDRESS," + + " C_PHONE," + + " C_COMMENT" + + " from" + + " ORDERS," + + " LINEITEM," + + " CUSTOMER," + + " NATION" + + " where" + + " C_CUSTKEY = o_custkey" + + " and l_orderkey = o_orderkey" + + " and o_orderdate >= '1993-10-01'" + + " and o_orderdate < add_months('1993-10-01', 3)" + + " and l_returnflag = 'R'" + + " and C_NATIONKEY = n_nationkey" + + " group by" + + " C_CUSTKEY," + + " C_NAME," + + " C_ACCTBAL," + + " C_PHONE," + + " n_name," + + " C_ADDRESS," + + " C_COMMENT" + + " order by" + + " revenue desc limit 20" + // } + } + + def getResultString10():String = { + "C_CUSTKEY|C_NAME|REVENUE|C_ACCTBAL|N_NAME|C_ADDRESS|C_PHONE|C_COMMENT" + } + + def getQuery11(): String = { + // 1. NATION = GERMANY; + // 2. FRACTION = 0.0001. + "select" + + " PS_PARTKEY," + + " sum(PS_SUPPLYCOST * PS_AVAILQTY) as value" + + " from" + + " PARTSUPP," + + " SUPPLIER," + + " NATION" + + " where" + + " PS_SUPPKEY = S_SUPPKEY" + + " and S_NATIONKEY = N_NATIONKEY" + + " and N_NAME = 'GERMANY'" + + " group by" + + " PS_PARTKEY having" + + " sum(PS_SUPPLYCOST * PS_AVAILQTY) > (" + + " select" + + " sum(PS_SUPPLYCOST * PS_AVAILQTY) * 0.0001" + + " from" + + " PARTSUPP," + + " SUPPLIER," + + " NATION" + + " where" + + " PS_SUPPKEY = S_SUPPKEY" + + " and S_NATIONKEY = N_NATIONKEY" + + " and N_NAME = 'GERMANY'" + + " )" + + " order by" + + " value desc" + } + + def getResultString11(): String = { + "PS_PARTKEY VALUE" + } + + + def getQuery12(): String = { + // 1.SHIPMODE1 = MAIL; + // 2. SHIPMODE2 = SHIP; + // 3. DATE = 1994-01-01. + "select" + + " l_shipmode," + + " sum(case" + + " when o_orderpriority ='1-URGENT'" + + " or o_orderpriority ='2-HIGH'" + + " then 1" + + " else 0" + + " end" + + " ) as high_line_count," + + " sum(case" + + " when o_orderpriority <> '1-URGENT'" + + " and o_orderpriority <> '2-HIGH'" + + " then 1" + + " else 0" + + " end" + + " ) as low_line_count" + + " from" + + " ORDERS," + + " LINEITEM" + + " where" + + " o_orderkey = l_orderkey" + + " and l_shipmode in ('MAIL', 'SHIP')" + + " and l_commitdate < l_receiptdate" + + " and l_shipdate < l_commitdate" + + " and l_receiptdate >= '1994-01-01'" + + " and l_receiptdate < add_months('1994-01-01',12)" + + " group by" + + " l_shipmode" + + " order by" + + " l_shipmode" + + } + + def getResultString12(): String = { + "L_SHIPMODE HIGH_LINE_COUNT LOW_LINE_COUNT" + } + + def getTempQuery13():String={ + "create view"+ + " ViewQ13 as"+ + " select" + + " C_CUSTKEY," + + " count(o_orderkey) as c_count" + + " from" + + " CUSTOMER left outer join ORDERS on" + + " C_CUSTKEY = o_custkey" + + " and o_comment not like '%special%requests%'" + + " group by" + + " C_CUSTKEY" + } + def getQuery13(): String = { + // 1. WORD1 = special. + // 2. WORD2 = requests. + + "select "+ + " c_count, "+ + " count(*) as custdist "+ + " from "+ + " ("+ + " select "+ + " c_custkey,"+ + " count(o_orderkey) as c_count"+ + " from"+ + " customer c left outer join "+ + " orders o on c.c_custkey = o.o_custkey"+ + " and not o.o_comment like '%special%requests%'"+ + " group by c_custkey ) "+ + " c_orders"+ + " group by c_count"+ + " order by custdist desc,"+ + " c_count desc" + + } + + // def getQuery13(): String = { + // // 1. WORD1 = special. + // // 2. WORD2 = requests. + // "select" + + // " c_count, " + + // " count(*) as custdist" + + // " from (" + + // " select" + + // " C_CUSTKEY," + + // " count(o_orderkey)" + + // " from" + + // " CUSTOMER left outer join ORDERS on" + + // " C_CUSTKEY = o_custkey" + + // " and o_comment not like ‘%special%requests%’" + + // " group by" + + // " C_CUSTKEY" + + // " )as c_orders (C_CUSTKEY, c_count)" + + // " group by" + + // " c_count" + + // " order by" + + // " custdist desc," + + // " c_count desc" + // } + + def getResultString13(): String = { + "C_COUNT CUSTDIST" + } + + def getQuery14(): String = { + //1.DATE = 1995-09-01. + "select" + + " 100.00 * sum(case" + + " when P_TYPE like 'PROMO%'" + + " then l_extendedprice*(1-l_discount)" + + " else 0" + + " end" + + " ) / sum(l_extendedprice * (1 - l_discount)) as promo_revenue" + + " from" + + " LINEITEM," + + " PART" + + " where" + + " l_partkey = P_PARTKEY" + + " and l_shipdate >= '1995-09-01'" + + " and l_shipdate < add_months ('1995-09-01', 1)" + + } + + def getResultString14(): String = { + "PROMO_REVENUE" + } + + + def getTempQuery15(): String = { + "create view " + + " revenue as" + + " select" + + " l_suppkey as supplier_no ," + + " sum(l_extendedprice * (1 - l_discount)) as total_revenue" + + " from" + + " LINEITEM" + + " where" + + " l_shipdate >= '1996-01-01'" + + " and l_shipdate < add_months('1996-01-01',3)" + + " group by" + + " l_suppkey" + } + + def getQuery15(): String = { + "select" + + " s_suppkey," + + " s_name," + + " s_address," + + " s_phone," + + " total_revenue" + + " from" + + " SUPPLIER," + + " revenue" + + " where" + + " s_suppkey = supplier_no" + + " and total_revenue = (" + + " select" + + " max(total_revenue)" + + " from" + + " revenue" + + " )" + + " order by" + + " s_suppkey;" + } + + def getResultString15(): String = { + "" + } + + def getQuery16(): String = { + // 1. BRAND = Brand#45. + // 2. TYPE = MEDIUM POLISHED . + // 3. SIZE1 = 49 + // 4. SIZE2 = 14 + // 5. SIZE3 = 23 + // 6. SIZE4 = 45 + // 7. SIZE5 = 19 + // 8. SIZE6 = 3 + // 9. SIZE7 = 36 + // 10. SIZE8 = 9. + "select" + + " P_BRAND," + + " P_TYPE," + + " P_SIZE," + + " count(distinct PS_SUPPKEY) as supplier_cnt" + + " from" + + " PARTSUPP," + + " PART" + + " where" + + " P_PARTKEY = PS_PARTKEY" + + " and P_BRAND <> 'Brand#45'" + + " and P_TYPE not like 'MEDIUM POLISHED%'" + + " and P_SIZE in (49, 14, 23, 45, 19, 3, 36, 9)" + + " and PS_SUPPKEY not in (" + + " select" + + " S_SUPPKEY" + + " from" + + " SUPPLIER" + + " where" + + " S_COMMENT like '%Customer%Complaints%'" + + " )" + + " group by" + + " P_BRAND," + + " P_TYPE," + + " P_SIZE" + + " order by" + + " supplier_cnt desc," + + " P_BRAND," + + " P_TYPE," + + " P_SIZE" + } + + def getResultString16(): String = { + "P_BRAND P_TYPE P_SIZE SUPPLIER_CNT" + } + + def getQuery17(): String = { + // 1. BRAND = Brand#23; + // 2. CONTAINER = MED BOX. + "select" + + " sum(l_extendedprice) / 7.0 as avg_yearly" + + " from" + + " LINEITEM," + + " PART" + + " where" + + " P_PARTKEY = l_partkey" + + " and P_BRAND = 'Brand#23'" + + " and P_CONTAINER = 'SM PACK'" + + " and l_quantity < (" + + " select" + + " 0.2 * avg(l_quantity)" + + " from" + + " LINEITEM" + + " where" + + " l_partkey = P_PARTKEY" + + " )" + //" )" + + } + + def getResultString17(): String = { + "AVG_YEARLY" + } + + def getQuery18(): String = { + //1.QUANTITY = 300 + "select" + + " C_NAME," + + " C_CUSTKEY," + + " o_orderkey," + + " o_orderdate," + + " o_totalprice," + + " sum(l_quantity)" + + " from" + + " CUSTOMER," + + " ORDERS," + + " LINEITEM" + + " where" + + " o_orderkey in (" + + " select" + + " l_orderkey" + + " from" + + " LINEITEM" + + " group by" + + " l_orderkey having" + + " sum(l_quantity) > 300" + + " )" + + " and C_CUSTKEY = o_custkey" + + " and o_orderkey = l_orderkey" + + " group by" + + " C_NAME," + + " C_CUSTKEY," + + " o_orderkey," + + " o_orderdate," + + " o_totalprice" + + " order by" + + " o_totalprice desc," + + " o_orderdate limit 100" + } + + def getResultString18(): String = { + "C_NAME C_CUSTKEY O_ORDERKEY O_ORDERDATE O_TOTALPRICE Sum(L_QUANTITY)" + } + + def getQuery19(): String = { + // 1. QUANTITY1 = 1. + // 2. QUANTITY2 = 10. + // 3. QUANTITY3 = 20. + // 4. BRAND1 = Brand#12. + // 5. BRAND2 = Brand#23. + // 6. BRAND3 = Brand#34. + //"select sum(l_extendedprice * (1 - l_discount)) as revenue from LINEITEM, PART where (P_PARTKEY = l_partkey and P_BRAND = ‘Brand#12’ and P_CONTAINER in ( ‘SM CASE’, ‘SM BOX’, ‘SM PACK’, ‘SM PKG’) and l_quantity >= 1 and l_quantity <= 1 + 10 and P_SIZE between 1 and 5 and l_shipmode in (‘AIR’, ‘AIR REG’) and l_shipinstruct = ‘DELIVER IN PERSON’) or (P_PARTKEY = l_partkey and P_BRAND = ‘Brand#23’ and P_CONTAINER in (‘MED BAG’, ‘MED BOX’, ‘MED PKG’, ‘MED PACK’) and l_quantity >= 10 and l_quantity <= 10 + 10 and P_SIZE between 1 and 10 and l_shipmode in (‘AIR’, ‘AIR REG’) and l_shipinstruct = ‘DELIVER IN PERSON’ ) or ( P_PARTKEY = l_partkey and P_BRAND = ‘Brand#34’ and P_CONTAINER in ( ‘LG CASE’, ‘LG BOX’, ‘LG PACK’, ‘LG PKG’) and l_quantity >= 20 and l_quantity <= 20 + 10 and P_SIZE between 1 and 15 and l_shipmode in (‘AIR’, ‘AIR REG’) and l_shipinstruct = ‘DELIVER IN PERSON’ )" + "select" + + " sum(l_extendedprice * (1 - l_discount)) as revenue" + + " from" + + " LINEITEM," + + " PART" + + " where" + + " (" + + " P_PARTKEY = l_partkey" + + " and P_BRAND = 'Brand#12'" + + " and P_CONTAINER in ( 'SM CASE', 'SM BOX', 'SM PACK', 'SM PKG')" + + " and l_quantity >= 1 and l_quantity <= 1 + 10" + + " and l_shipmode in ('AIR', 'AIR REG')" + + " and l_shipinstruct = 'DELIVER IN PERSON'" + + " and P_SIZE between 1 and 5" + + " )" + + " or" + + " (" + + " P_PARTKEY = l_partkey" + + " and P_BRAND = 'Brand#23'" + + " and P_CONTAINER in ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK')" + + " and l_quantity >= 10 and l_quantity <= 10 + 10" + + " and l_shipmode in ('AIR', 'AIR REG')" + + " and l_shipinstruct = 'DELIVER IN PERSON'" + + " and P_SIZE between 1 and 10" + + " )" + + " or" + + " (" + + " P_PARTKEY = l_partkey" + + " and P_BRAND = 'Brand#34'" + + " and P_CONTAINER in ( 'LG CASE', 'LG BOX', 'LG PACK', 'LG PKG')" + + " and l_quantity >= 20 and l_quantity <= 20 + 10" + + " and l_shipmode in ('AIR', 'AIR REG')" + + " and l_shipinstruct = 'DELIVER IN PERSON'" + + " and P_SIZE between 1 and 15" + + " )" + } + + def getResultString19(): String = { + "REVENUE" + } + + def getQuery20(): String = { + // 1. COLOR = forest. + // 2. DATE = 1994-01-01. + // 3. NATION = CANADA. + "select" + + " S_NAME," + + " S_ADDRESS" + + " from" + + " SUPPLIER, NATION" + + " where" + + " S_SUPPKEY in (" + + " select" + + " PS_SUPPKEY" + + " from" + + " PARTSUPP" + + " where" + + " PS_PARTKEY in (" + + " select" + + " P_PARTKEY" + + " from" + + " PART" + + " where" + + " P_NAME like 'khaki%'" + + " )" + + " and PS_AVAILQTY > (" + + " select" + + " 0.5 * sum(l_quantity)" + + " from" + + " LINEITEM" + + " where" + + " l_partkey = PS_PARTKEY" + + " and l_suppkey = PS_SUPPKEY" + + " and l_shipdate >= '1994-01-01'" + + " and l_shipdate < add_months('1994-01-01', 12)" + + " )" + + " )" + + " and S_NATIONKEY = N_NATIONKEY" + + " and N_NAME = 'CANADA'" + + " order by" + + " S_NAME" + + } + + def getResultString20(): String = { + "S_NAME S_ADDRESS" + } + + def getQuery21(): String = { + //NATION = SAUDI ARABIA. + "select" + + " S_NAME," + + " count(*) as numwait" + + " from" + + " SUPPLIER," + + " LINEITEM l1," + + " ORDERS," + + " NATION" + + " where" + + " S_SUPPKEY = l1.l_suppkey" + + " and o_orderkey = l1.l_orderkey" + + " and o_orderstatus = 'F'" + + " and l1.l_receiptdate > l1.l_commitdate" + + " and exists (" + + " select" + + " *" + + " from" + + " LINEITEM l2" + + " where" + + " l2.l_orderkey = l1.l_orderkey" + + " and l2.l_suppkey <> l1.l_suppkey" + + " )" + + " and not exists (" + + " select" + + " *" + + " from" + + " LINEITEM l3" + + " where" + + " l3.l_orderkey = l1.l_orderkey" + + " and l3.l_suppkey <> l1.l_suppkey" + + " and l3.l_receiptdate > l3.l_commitdate" + + " )" + + " and S_NATIONKEY = N_NATIONKEY" + + " and N_NAME = 'VIETNAM'" + + " group by" + + " S_NAME" + + " order by" + + " numwait desc," + + " S_NAME limit 100" + } + + def getResultString21(): String = { + "S_NAME NUMWAIT" + } + + def getQuery22(): String = { + // 1. I1 = 13. + // 2. I2 = 31. + // 3. I3 = 23. + // 4. I4 = 29. + // 5. I5 = 30. + // 6. I6 = 18. + // 7. I7 = 17. + "select" + + " cntrycode," + + " count(*) as numcust," + + " sum(C_ACCTBAL) as totacctbal" + + " from (" + + " select" + + " SUBSTR(C_PHONE,1,2) as cntrycode," + + " C_ACCTBAL" + + " from" + + " CUSTOMER " + + " where" + + " SUBSTR(C_PHONE,1,2) in" + + " ('13','31','23','29','30','18','17')" + + " and C_ACCTBAL > (" + + " select" + + " avg(C_ACCTBAL)" + + " from" + + " CUSTOMER" + + " where" + + " C_ACCTBAL > 0.00" + + " and SUBSTR(C_PHONE,1,2) in" + + " ('13','31','23','29','30','18','17')" + + " )" + + " and not exists (" + + " select" + + " *" + + " from" + + " ORDERS" + + " where" + + " o_custkey = C_CUSTKEY" + + " )" + + " ) as custsale" + + " group by" + + " cntrycode" + + " order by" + + " cntrycode" + } + + def getResultString22(): String = { + "CNTRYCODE NUMCUST TOTACCTBAL" + } + } diff --git a/cluster/src/test/scala/io/snappydata/benchmark/kuduimpala/TPCH_Impala_Query.scala b/cluster/src/test/scala/io/snappydata/benchmark/kuduimpala/TPCH_Impala_Query.scala new file mode 100644 index 0000000000..3bb8b8dbc3 --- /dev/null +++ b/cluster/src/test/scala/io/snappydata/benchmark/kuduimpala/TPCH_Impala_Query.scala @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ +package io.snappydata.benchmark.kuduimpala + +import java.sql.DriverManager + + +object TPCH_Impala_Query { + + def main(args: Array[String]) { + + val host = args(0) + val port = args(1) + val queries: Array[String] = args(2).split(",") + var isResultCollection : Boolean = args(3).toBoolean + var warmup : Integer = args(4).toInt + var runsForAverage : Integer = args(5).toInt + + Class.forName("com.cloudera.impala.jdbc4.Driver") + val dbAddress = "jdbc:impala://" + host + ":" + port + "/" + val conn = DriverManager.getConnection(dbAddress) + val stmt = conn.createStatement + + stmt.execute("USE TPCH") + + for(query <- queries) + TPCH_Impala.execute(query, isResultCollection, stmt, warmup, runsForAverage) + println("I am done with execution") + stmt.close(); + TPCH_Impala.close() + println("I should exit now") + } + } diff --git a/cluster/src/test/scala/io/snappydata/benchmark/kuduimpala/TPCH_Impala_Tables.scala b/cluster/src/test/scala/io/snappydata/benchmark/kuduimpala/TPCH_Impala_Tables.scala new file mode 100644 index 0000000000..44c25c1f85 --- /dev/null +++ b/cluster/src/test/scala/io/snappydata/benchmark/kuduimpala/TPCH_Impala_Tables.scala @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ +package io.snappydata.benchmark.kuduimpala + +import java.sql.DriverManager + +import io.snappydata.benchmark.{TPCHColumnPartitionedTable, TPCHReplicatedTable} + +object TPCH_Impala_Tables { + + def main(args: Array[String]) { + + val host = args(0) + val port = args(1) + val dbName = "TPCH" +// val user = "root" +// val password = "" + + Class.forName("com.cloudera.impala.jdbc4.Driver") + val dbAddress = "jdbc:impala://" + host + ":" + port + "/" + val conn = DriverManager.getConnection(dbAddress) + val stmt = conn.createStatement + +// stmt.execute("DROP DATABASE IF EXISTS " + dbName) +// stmt.execute("CREATE DATABASE IF NOT EXISTS " + dbName) +// stmt.execute("USE " + dbName) + + +// TPCHReplicatedTable.createNationTable_Memsql(stmt) +// +// TPCHReplicatedTable.createSupplierTable_Memsql(stmt) +// +// TPCHColumnPartitionedTable.createPartTable_Memsql(stmt) +// +// TPCHColumnPartitionedTable.createPartSuppTable_Memsql(stmt) +// +// TPCHColumnPartitionedTable.createCustomerTable_Memsql(stmt) +// +// TPCHColumnPartitionedTable.createOrderTable_Memsql(stmt) +// +// TPCHColumnPartitionedTable.createLineItemTable_Memsql(stmt) + + var rs = stmt.executeQuery("SHOW TABLES") + println("Tables" + rs) + while (rs.next()) { + System.out.println(rs.getString(1)); + } + + stmt.close(); + + + } + } diff --git a/cluster/src/test/scala/io/snappydata/benchmark/memsql/ConcurrentMemsql.scala b/cluster/src/test/scala/io/snappydata/benchmark/memsql/ConcurrentMemsql.scala new file mode 100644 index 0000000000..ecd968335e --- /dev/null +++ b/cluster/src/test/scala/io/snappydata/benchmark/memsql/ConcurrentMemsql.scala @@ -0,0 +1,94 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ + +package io.snappydata.benchmark.memsql + +import java.io.{FileOutputStream, PrintStream} +import java.sql.DriverManager +import java.util.Date + +import scala.util.control.NonFatal + +object ConcurrentMemsql { + + def main(args: Array[String]): Unit = { + + val host = args(0) + val port = 3306 + val dbName = "TPCH" + val user = "root" + val password = "" + + val readerThread = new Thread(new Runnable { + def run() { + Class.forName("com.mysql.jdbc.Driver") + val dbAddress = "jdbc:mysql://" + host + ":" + port + "/" + val conn = DriverManager.getConnection(dbAddress, user, password) + val stmt = conn.createStatement + stmt.execute("USE " + dbName) + val avgFileStream = new FileOutputStream(new java.io.File(s"reader.out")) + val avgPrintStream = new PrintStream(avgFileStream) + for (i <- 1 to 100000) { + + var starttime = System.nanoTime() + // val rs = stmt.executeQuery("select count(*) as counter from PARTSUPP where ps_suppkey = 18692 and Ps_partkey = 7663535; ") + val rs = stmt.executeQuery("select PS_AVAILQTY as counter from PARTSUPP where ps_suppkey = 18692 and PS_partkeY = 653535") + var count = 0 + while (rs.next()) { + count = rs.getInt("counter") + //just iterating over result + //count+=1 + } + var timetaken = (System.nanoTime() - starttime)/1000 + + avgPrintStream.println(s"Total time taken $timetaken results : $count ${new Date()} ") + + } + avgPrintStream.close() + } + }).start() + + val writerThread = new Thread(new Runnable { + def run() { + Class.forName("com.mysql.jdbc.Driver") + val dbAddress = "jdbc:mysql://" + host + ":" + port + "/" + val conn = DriverManager.getConnection(dbAddress, user, password) + val stmt = conn.createStatement + stmt.execute("USE " + dbName) + val avgFileStream = new FileOutputStream(new java.io.File(s"writer.out")) + val avgPrintStream = new PrintStream(avgFileStream) + var startCounter = 7653535 + avgPrintStream.println(s"insertion started ${new Date()}") + for (i <- 1 to 100000) { + startCounter+=1 + try { + var starttime = System.nanoTime() + // val rs = stmt.execute(s"insert into PARTSUPP values ($startCounter, 18692 , 2, 4.11, 'aa') ") + val rs = stmt.execute(s"update PARTSUPP set PS_AVAILQTY = PS_AVAILQTY +1") + } catch { + case NonFatal(e) => e.printStackTrace(avgPrintStream) + } + } + + avgPrintStream.println(s"insertion ended ${new Date()}") + avgPrintStream.close() + + } + + }).start() + } +} diff --git a/cluster/src/test/scala/io/snappydata/benchmark/memsql/TPCH_Memsql.scala b/cluster/src/test/scala/io/snappydata/benchmark/memsql/TPCH_Memsql.scala new file mode 100644 index 0000000000..85b12601e0 --- /dev/null +++ b/cluster/src/test/scala/io/snappydata/benchmark/memsql/TPCH_Memsql.scala @@ -0,0 +1,1085 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ +package io.snappydata.benchmark.memsql + +import java.io.{File, FileOutputStream, PrintStream} +import java.sql.{ResultSet, Statement} + +import io.snappydata.benchmark.TPCH_Queries + +// scalastyle:off println +object TPCH_Memsql { + + var avgFileStream: FileOutputStream = new FileOutputStream(new File(s"Average.csv")) + var avgPrintStream: PrintStream = new PrintStream(avgFileStream) + avgPrintStream.println(s"Query,AverageResponseTime") + + def close(): Unit = { + avgPrintStream.close() + avgFileStream.close() + } + + def setRandomSeed(randomSeed : Integer): Unit ={ + TPCH_Queries.setRandomSeed(randomSeed) + } + + def execute(queryNumber: String, isResultCollection: Boolean, stmt: Statement, + warmup: Int, runsForAverage: Int, isDynamic: Boolean): Unit = { + + val queryFileStream = new FileOutputStream(new File(s"$queryNumber.out")) + val queryPrintStream = new PrintStream(queryFileStream) + + var rs: ResultSet = null + try { + println(s"Started executing $queryNumber") + queryPrintStream.println(s"$queryNumber") + if (isResultCollection) { + val queryToBeExecuted = TPCH_Queries.getQuery(queryNumber, isDynamic, isSnappy = false) + rs = queryExecution(queryNumber, queryToBeExecuted, stmt) + // rs = queryExecution(queryNumber, stmt) + // rs = stmt.executeQuery(query) + // queryPrintStream.println(s"$resultFormat") + val rsmd = rs.getMetaData + val columnsNumber = rsmd.getColumnCount + var count : Int = 0 + while (rs.next()) { + count += 1 + for (i: Int <- 1 to columnsNumber) { + if (i > 1) queryPrintStream.print(",") + queryPrintStream.print(rs.getString(i)) + } + queryPrintStream.println() + } + println(s"Number of results : $count") + println(s"$queryNumber Result Collected in file $queryNumber.out") + if (queryNumber.equals("15")) { + stmt.execute("drop view revenue") + } + } else { + var totalTime: Long = 0 + for (i <- 1 to (warmup + runsForAverage)) { + val queryToBeExecuted = TPCH_Queries.getQuery(queryNumber, isDynamic, isSnappy = false) + val startTime = System.currentTimeMillis() + rs = queryExecution(queryNumber, queryToBeExecuted, stmt) + // rs = stmt.executeQuery(query) + while (rs.next()) { + // just iterating over result + } + val endTime = System.currentTimeMillis() + val iterationTime = endTime - startTime + queryPrintStream.println(s"$iterationTime") + if (i > warmup) { + totalTime += iterationTime + } + if (queryNumber.equals("15")) { + stmt.execute("drop view revenue") + } + } + queryPrintStream.println(s"Query $queryNumber average = ${totalTime / runsForAverage}") + avgPrintStream.println(s"$queryNumber,${totalTime /runsForAverage}") + } + println(s"Finished executing $queryNumber") + + + } catch { + case e: Exception => + e.printStackTrace() + e.printStackTrace(queryPrintStream) + e.printStackTrace(avgPrintStream) + println(s" Exception while executing $queryNumber in written to file $queryNumber.txt") + } finally { + if (isResultCollection) { + queryPrintStream.close() + queryFileStream.close() + avgPrintStream.close() + avgFileStream.close() + } + + } + rs.close() + } + + def queryExecution(queryNumber: String, query: String, stmt: Statement): ResultSet = { + var queryToBeExceuted = query + if (queryNumber.equals("15")) { + stmt.execute(queryToBeExceuted) + queryToBeExceuted = TPCH_Queries.getQuery15 + } + stmt.executeQuery(queryToBeExceuted) + + + /* + val rs : ResultSet = queryNumber match { + case "q1" => { + stmt.executeQuery(getQuery1()) + } + case "q2" => { + stmt.executeQuery(getQuery2()) + } + case "q3" => { + stmt.executeQuery(getQuery3()) + } + case "q4" => { + stmt.executeQuery(getQuery4()) + } + case "q5" => { + stmt.executeQuery(getQuery5()) + } + case "q6" => { + stmt.executeQuery(getQuery6()) + } + case "q7" => { + stmt.executeQuery(getQuery7()) + } + case "q8" => { + stmt.executeQuery(getQuery8()) + } + case "q9" => { + stmt.executeQuery(getQuery9()) + } + case "q10" => { + stmt.executeQuery(getQuery10()) + } + case "q11" => { + stmt.executeQuery(getQuery11()) + } + case "q12" => { + stmt.executeQuery(getQuery12()) + } + case "q13" => { + stmt.execute(getTempQuery13()) + stmt.executeQuery(getQuery13()) + } + case "q14" => { + stmt.executeQuery(getQuery14()) + } + case "q15" => { + stmt.execute(getTempQuery15()) + stmt.executeQuery(getQuery15()) + } + case "q16" => { + stmt.executeQuery(getQuery16()) + } + case "q17" => { + stmt.executeQuery(getQuery17()) + } + case "q18" => { + stmt.executeQuery(getQuery18()) + } + case "q19" => { + stmt.executeQuery(getQuery19()) + } + case "q20" => { + stmt.executeQuery(getQuery20()) + } + case "q21" => { + stmt.executeQuery(getQuery21()) + } + case "q22" => { + stmt.executeQuery(getQuery22()) + } + } + rs */ + + } + + /* def getQuery1(): String = { + //DELTA = 90 + " select" + + " l_returnflag," + + " l_linestatus," + + " sum(l_quantity) as sum_qty," + + " sum(l_extendedprice) as sum_base_price," + + " sum(l_extendedprice*(1-l_discount)) as sum_disc_price," + + " sum(l_extendedprice*(1-l_discount)*(1+l_tax)) as sum_charge," + + " avg(l_quantity) as avg_qty," + + " avg(l_extendedprice) as avg_price," + + " avg(l_discount) as avg_disc," + + " count(*) as count_order" + + " from" + + " LINEITEM" + + " where" + + " l_shipdate <= '1997-12-31' - interval '90' day" + + " group by" + + " l_returnflag," + + " l_linestatus" + + " order by" + + " l_returnflag," + + " l_linestatus" + } + + + def getResultString1(): String = { + "l_returnflag l_linestatus sum_qty sum_base_price sum_disc_price sum_charge avg_qty avg_price avg_disc count_order" + } + + def getQuery2(): String = { + // 1. SIZE = 15; + // 2. TYPE = BRASS; + // 3. REGION = EUROPE + + " select" + + " S_ACCTBAL," + + " S_NAME," + + " N_NAME," + + " P_PARTKEY," + + " P_MFGR," + + " S_ADDRESS," + + " S_PHONE," + + " S_COMMENT" + + " from" + + " PART," + + " SUPPLIER," + + " PARTSUPP," + + " NATION," + + " REGION" + + " where" + + " P_PARTKEY = PS_PARTKEY" + + " and S_SUPPKEY = PS_SUPPKEY" + + " and P_SIZE = 24" + + " and P_TYPE like '%STEEL'" + + " and S_NATIONKEY = N_NATIONKEY" + + " and N_REGIONKEY = R_REGIONKEY" + + " and R_NAME = 'ASIA'" + + " and PS_SUPPLYCOST = (" + + " select" + + " min(PS_SUPPLYCOST)" + + " from" + + " PARTSUPP, SUPPLIER," + + " NATION, REGION" + + " where" + + " P_PARTKEY = PS_PARTKEY" + + " and S_SUPPKEY = PS_SUPPKEY" + + " and S_NATIONKEY = N_NATIONKEY" + + " and N_REGIONKEY = R_REGIONKEY" + + " and R_NAME = 'ASIA'" + + " )" + + " order by" + + " S_ACCTBAL desc," + + " N_NAME," + + " S_NAME," + + " P_PARTKEY" + + " limit 100" + + } + + def getResultString2(): String = { + "S_ACCTBAL S_NAME N_NAME P_PARTKEY P_MFGR S_ADDRESS S_PHONE S_COMMENT" + } + + def getQuery3(): String = { + // 1. SEGMENT = BUILDING; + // 2. DATE = 1995-03-15. + " select" + + " l_orderkey," + + " sum(l_extendedprice*(1-l_discount)) as revenue," + + " o_orderdate," + + " o_shippriority" + + " from" + + " CUSTOMER," + + " ORDERS," + + " LINEITEM" + + " where" + + " C_MKTSEGMENT = 'BUILDING'" + + " and C_CUSTKEY = o_custkey" + + " and l_orderkey = o_orderkey" + + " and o_orderdate < '1995-03-15'" + + " and l_shipdate > '1995-03-15' " + + " group by" + + " l_orderkey," + + " o_orderdate," + + " o_shippriority" + + " order by" + + " l_orderkey" + + " limit 10" + } + + def getResultString3(): String = { + "l_orderkey revenue o_orderdate o_shippriority" + } + + def getQuery4(): String = { + //1.DATE = 1993-07-01. + " select" + + " o_orderpriority," + + " count(*) as order_count" + + " from" + + " ORDERS" + + " where" + + " o_orderdate >= '1993-07-01'" + + //" and o_orderdate < '1993-07-01' + interval '3' month" + + " and o_orderdate < '1993-07-01' + interval '3' month" + + " and exists (" + + " select" + + " l_orderkey" + + " from" + + " LINEITEM" + + " where" + + " l_orderkey = o_orderkey" + + " and l_commitdate < l_receiptdate" + + " )" + + " group by" + + " o_orderpriority" + + " order by" + + " o_orderpriority" + } + + def getResultString4(): String = { + "o_orderpriority order_count" + } + + def getQuery5(): String = { + //1. REGION = ASIA; + //2. DATE = 1994-01-01. + " select" + + " N_NAME," + + " sum(l_extendedprice * (1 - l_discount)) as revenue" + + " from" + + " CUSTOMER," + + " ORDERS," + + " LINEITEM," + + " SUPPLIER," + + " NATION," + + " REGION" + + " where" + + " C_CUSTKEY = o_custkey" + + " and l_orderkey = o_orderkey" + + " and l_suppkey = S_SUPPKEY" + + " and C_NATIONKEY = S_NATIONKEY" + + " and S_NATIONKEY = N_NATIONKEY" + + " and N_REGIONKEY = R_REGIONKEY" + + " and R_NAME = 'ASIA'" + + " and o_orderdate >= '1994-01-01'" + + " and o_orderdate < '1994-01-01' + interval '1' year" + + " group by" + + " N_NAME" + + " order by" + + " revenue desc" + } + + def getResultString5(): String = { + "N_NAME revenue" + } + + def getQuery6(): String = { + //1. DATE = 1994-01-01; + //2. DISCOUNT = 0.06; + //3. QUANTITY = 24. + " select" + + " sum(l_extendedprice*l_discount) as revenue" + + " from" + + " LINEITEM" + + " where" + + " l_shipdate >= '1994-01-01'" + + " and l_shipdate < '1994-01-01' + interval '1' year" + + " and l_discount between 0.06 - 0.01 and 0.06 + 0.01" + + " and l_quantity < 24" + } + + def getResultString6(): String = { + "revenue" + } + + def getQuery7(): String = { + // 1. NATION1 = FRANCE; + // 2. NATION2 = GERMANY. + "select" + + " supp_nation," + + " cust_nation," + + " l_year, " + + " sum(volume) as revenue" + + " from (" + + " select" + + " n1.N_NAME as supp_nation," + + " n2.N_NAME as cust_nation," + + //" extract m(year from l_shipdate) as l_year," + + " year(l_shipdate) as l_year," + + " l_extendedprice * (1 - l_discount) as volume" + + " from" + + " SUPPLIER," + + " LINEITEM," + + " ORDERS," + + " CUSTOMER," + + " NATION n1," + + " NATION n2" + + " where" + + " S_SUPPKEY = l_suppkey" + + " and o_orderkey = l_orderkey" + + " and C_CUSTKEY = o_custkey" + + " and S_NATIONKEY = n1.N_NATIONKEY" + + " and C_NATIONKEY = n2.N_NATIONKEY" + + " and (" + + " (n1.N_NAME = 'FRANCE' and n2.N_NAME = 'GERMANY')" + + " or (n1.N_NAME = 'GERMANY' and n2.N_NAME = 'FRANCE')" + + " )" + + " and l_shipdate between '1995-01-01' and '1996-12-31'" + + " ) as shipping" + + " group by" + + " supp_nation," + + " cust_nation," + + " l_year" + + " order by" + + " supp_nation," + + " cust_nation," + + " l_year" + + } + + def getResultString7(): String = { + "supp_nation cust_nation l_year revenue" + } + + def getQuery8(): String = { + // 1. NATION = BRAZIL; + // 2. REGION = AMERICA; + // 3. TYPE = ECONOMY ANODIZED STEEL. + "select" + + " o_year," + + " sum(case" + + " when nation = 'BRAZIL'" + + " then volume" + + " else 0" + + " end) / sum(volume) as mkt_share" + + " from (" + + " select" + + //" extract(year from o_orderdate) as o_year," + + " year(o_orderdate) as o_year,"+ + " l_extendedprice * (1-l_discount) as volume," + + " n2.N_NAME as nation" + + " from" + + " PART," + + " SUPPLIER," + + " LINEITEM," + + " ORDERS," + + " CUSTOMER," + + " NATION n1," + + " NATION n2," + + " REGION" + + " where" + + " P_PARTKEY = l_partkey" + + " and S_SUPPKEY = l_suppkey" + + " and l_orderkey = o_orderkey" + + " and o_custkey = C_CUSTKEY" + + " and C_NATIONKEY = n1.N_NATIONKEY" + + " and n1.N_REGIONKEY = R_REGIONKEY" + + " and R_NAME = 'AMERICA'" + + " and s_NATIONkey = n2.N_NATIONKEY" + + " and o_orderdate between '1995-01-01' and '1996-12-31'" + + " and P_TYPE = 'ECONOMY ANODIZED STEEL'" + + " ) as all_nations" + + " group by" + + " o_year" + + " order by" + + " o_year" + + } + + def getResultString8(): String = { + "YEAR MKT_SHARE" + } + + def getQuery9(): String = { + //1. COLOR = green. + "select" + + " nation," + + " o_year," + + " sum(amount) as sum_profit" + + " from (" + + " select" + + " N_NAME as nation," + + //" extract(year from o_orderdate) as o_year," + + " year(o_orderdate) as o_year," + + " l_extendedprice * (1 - l_discount) - PS_SUPPLYCOST * l_quantity as amount" + + " from" + + " PART," + + " SUPPLIER," + + " LINEITEM," + + " PARTSUPP," + + " ORDERS," + + " NATION" + + " where" + + " S_SUPPKEY = l_suppkey" + + " and PS_SUPPKEY = l_suppkey" + + " and PS_PARTKEY = l_partkey" + + " and P_PARTKEY = l_partkey" + + " and o_orderkey = l_orderkey" + + " and S_NATIONKEY = N_NATIONKEY" + + " and P_NAME like '%green%'" + + " ) as profit" + + " group by" + + " nation," + + " o_year" + + " order by" + + " nation," + + " o_year desc" + } + + def getResultString9(): String = { + "NATION YEAR SUM_PROFIT" + } + + def getQuery10(): String = { + //1. DATE = 1993-10-01. + "select" + + " C_CUSTKEY," + + " C_NAME," + + " sum(l_extendedprice * (1 - l_discount)) as revenue," + + " C_ACCTBAL," + + " N_NAME," + + " C_ADDRESS," + + " C_PHONE," + + " C_COMMENT" + + " from" + + " CUSTOMER," + + " ORDERS," + + " LINEITEM," + + " NATION" + + " where" + + " C_CUSTKEY = o_custkey" + + " and l_orderkey = o_orderkey" + + " and o_orderdate >= '1993-10-01'" + + " and o_orderdate < '1993-10-01' + interval '3' month" + + " and l_returnflag = 'R'" + + " and C_NATIONKEY = N_NATIONKEY" + + " group by" + + " C_CUSTKEY," + + " C_NAME," + + " C_ACCTBAL," + + " C_PHONE," + + " N_NAME," + + " C_ADDRESS," + + " C_COMMENT" + + " order by" + + " revenue desc" + + " limit 20" + + } + + def getResultString10(): String = { + "C_CUSTKEY C_NAME REVENUE C_ACCTBAL N_NAME C_ADDRESS C_PHONE C_COMMENT" + } + + def getQuery11(): String = { + // 1. NATION = GERMANY; + // 2. FRACTION = 0.0001. + "select" + + " PS_PARTKEY," + + " sum(PS_SUPPLYCOST * PS_AVAILQTY) as value" + + " from" + + " PARTSUPP," + + " SUPPLIER," + + " NATION" + + " where" + + " PS_SUPPKEY = S_SUPPKEY" + + " and S_NATIONKEY = N_NATIONKEY" + + " and N_NAME = 'GERMANY'" + + " group by" + + " PS_PARTKEY having" + + " sum(PS_SUPPLYCOST * PS_AVAILQTY) > (" + + " select" + + " sum(PS_SUPPLYCOST * PS_AVAILQTY) * 0.0000001" + + " from" + + " PARTSUPP," + + " SUPPLIER," + + " NATION" + + " where" + + " PS_SUPPKEY = S_SUPPKEY" + + " and S_NATIONKEY = N_NATIONKEY" + + " and N_NAME = 'GERMANY'" + + " )" + + " order by" + + " value desc" + } + + def getResultString11(): String = { + "PS_PARTKEY VALUE" + } + + + def getQuery12(): String = { + // 1.SHIPMODE1 = MAIL; + // 2. SHIPMODE2 = SHIP; + // 3. DATE = 1994-01-01. + "select" + + " l_shipmode," + + " sum(case" + + " when o_orderpriority ='1-URGENT'" + + " or o_orderpriority ='2-HIGH'" + + " then 1" + + " else 0" + + " end" + + " ) as high_line_count," + + " sum(case" + + " when o_orderpriority <> '1-URGENT'" + + " and o_orderpriority <> '2-HIGH'" + + " then 1" + + " else 0" + + " end" + + " ) as low_line_count" + + " from" + + " ORDERS," + + " LINEITEM" + + " where" + + " o_orderkey = l_orderkey" + + " and l_shipmode in ('MAIL', 'SHIP')" + + " and l_commitdate < l_receiptdate" + + " and l_shipdate < l_commitdate" + + " and l_receiptdate >= '1994-01-01'" + + " and l_receiptdate < '1994-01-01' + interval '1' year" + + " group by" + + " l_shipmode" + + " order by" + + " l_shipmode" + + } + + def getResultString12(): String = { + "L_SHIPMODE HIGH_LINE_COUNT LOW_LINE_COUNT" + } + + def getTempQuery13():String={ + "create view"+ + " ViewQ13 as"+ + " select" + + " C_CUSTKEY," + + " count(o_orderkey) as c_count" + + " from" + + " CUSTOMER left outer join ORDERS on" + + " C_CUSTKEY = o_custkey" + + " and o_comment not like '%special%requests%'" + + " group by" + + " C_CUSTKEY" + } + def getQuery13(): String = { + // 1. WORD1 = special. + // 2. WORD2 = requests. + "select" + + " c_count, " + + " count(*) as custdist" + + " from " + + " ViewQ13" + + " group by" + + " c_count" + + " order by" + + " custdist desc," + + " c_count desc" + + } + + // def getQuery13(): String = { + // // 1. WORD1 = special. + // // 2. WORD2 = requests. + // "select" + + // " c_count, " + + // " count(*) as custdist" + + // " from (" + + // " select" + + // " C_CUSTKEY," + + // " count(o_orderkey)" + + // " from" + + // " CUSTOMER left outer join ORDERS on" + + // " C_CUSTKEY = o_custkey" + + // " and o_comment not like ‘%special%requests%’" + + // " group by" + + // " C_CUSTKEY" + + // " )as c_orders (C_CUSTKEY, c_count)" + + // " group by" + + // " c_count" + + // " order by" + + // " custdist desc," + + // " c_count desc" + // } + + def getResultString13(): String = { + "C_COUNT CUSTDIST" + } + + def getQuery14(): String = { + //1.DATE = 1995-09-01. + "select" + + " 100.00 * sum(case" + + " when P_TYPE like 'PROMO%'" + + " then l_extendedprice*(1-l_discount)" + + " else 0" + + " end" + + " ) / sum(l_extendedprice * (1 - l_discount)) as promo_revenue" + + " from" + + " LINEITEM," + + " PART" + + " where" + + " l_partkey = P_PARTKEY" + + " and l_shipdate >= '1995-09-01'" + + " and l_shipdate < '1995-09-01'+ interval '1' month" + + } + + def getResultString14(): String = { + "PROMO_REVENUE" + } + + + def getTempQuery15(): String = { + "create view " + + " revenue as" + + " select" + + " l_suppkey as supplier_no ," + + " sum(l_extendedprice * (1 - l_discount)) as total_revenue" + + " from" + + " LINEITEM" + + " where" + + " l_shipdate >= '1996-01-01'" + + " and l_shipdate < '1996-01-01' + interval '3' month" + + " group by" + + " l_suppkey" + } + + def getQuery15(): String = { + "select" + + " s_suppkey," + + " s_name," + + " s_address," + + " s_phone," + + " total_revenue" + + " from" + + " SUPPLIER," + + " revenue" + + " where" + + " s_suppkey = supplier_no" + + " and total_revenue = (" + + " select" + + " max(total_revenue)" + + " from" + + " revenue" + + " )" + + " order by" + + " s_suppkey;" + } + + def getResultString15(): String = { + "" + } + + def getQuery16(): String = { + // 1. BRAND = Brand#45. + // 2. TYPE = MEDIUM POLISHED . + // 3. SIZE1 = 49 + // 4. SIZE2 = 14 + // 5. SIZE3 = 23 + // 6. SIZE4 = 45 + // 7. SIZE5 = 19 + // 8. SIZE6 = 3 + // 9. SIZE7 = 36 + // 10. SIZE8 = 9. + "select" + + " P_BRAND," + + " P_TYPE," + + " P_SIZE," + + " count(distinct PS_SUPPKEY) as supplier_cnt" + + " from" + + " PARTSUPP," + + " PART" + + " where" + + " P_PARTKEY = PS_PARTKEY" + + " and P_BRAND <> 'Brand#45'" + + " and P_TYPE not like 'MEDIUM POLISHED%'" + + " and P_SIZE in (49, 14, 23, 45, 19, 3, 36, 9)" + + " and PS_SUPPKEY not in (" + + " select" + + " S_SUPPKEY" + + " from" + + " SUPPLIER" + + " where" + + " S_COMMENT like '%Customer%Complaints%'" + + " )" + + " group by" + + " P_BRAND," + + " P_TYPE," + + " P_SIZE" + + " order by" + + " supplier_cnt desc," + + " P_BRAND," + + " P_TYPE," + + " P_SIZE" + } + + def getResultString16(): String = { + "P_BRAND P_TYPE P_SIZE SUPPLIER_CNT" + } + + def getQuery17(): String = { + // 1. BRAND = Brand#23; + // 2. CONTAINER = MED BOX. + "select" + + " sum(l_extendedprice) / 7.0 as avg_yearly" + + " from" + + " LINEITEM," + + " PART" + + " where" + + " P_PARTKEY = l_partkey" + + " and P_BRAND = 'Brand#23'" + + " and P_CONTAINER = 'SM PACK'" + + " and l_quantity < (" + + " select" + + " 0.2 * avg(l_quantity)" + + " from" + + " LINEITEM" + + " where" + + " l_partkey = P_PARTKEY" + + " )" + //" )" + + } + + def getResultString17(): String = { + "AVG_YEARLY" + } + + def getQuery18(): String = { + //1.QUANTITY = 300 + "select" + + " C_NAME," + + " C_CUSTKEY," + + " o_orderkey," + + " o_orderdate," + + " o_totalprice," + + " sum(l_quantity)" + + " from" + + " CUSTOMER," + + " ORDERS," + + " LINEITEM" + + " where" + + " o_orderkey in (" + + " select" + + " l_orderkey" + + " from" + + " LINEITEM" + + " group by" + + " l_orderkey having" + + " sum(l_quantity) > 300" + + " )" + + " and C_CUSTKEY = o_custkey" + + " and o_orderkey = l_orderkey" + + " group by" + + " C_NAME," + + " C_CUSTKEY," + + " o_orderkey," + + " o_orderdate," + + " o_totalprice" + + " order by" + + " o_totalprice desc," + + " o_orderdate" + + " limit 100" + } + + def getResultString18(): String = { + "C_NAME C_CUSTKEY O_ORDERKEY O_ORDERDATE O_TOTALPRICE Sum(L_QUANTITY)" + } + + def getQuery19(): String = { + // 1. QUANTITY1 = 1. + // 2. QUANTITY2 = 10. + // 3. QUANTITY3 = 20. + // 4. BRAND1 = Brand#12. + // 5. BRAND2 = Brand#23. + // 6. BRAND3 = Brand#34. + //"select sum(l_extendedprice * (1 - l_discount)) as revenue from LINEITEM, PART where (P_PARTKEY = l_partkey and P_BRAND = ‘Brand#12’ and P_CONTAINER in ( ‘SM CASE’, ‘SM BOX’, ‘SM PACK’, ‘SM PKG’) and l_quantity >= 1 and l_quantity <= 1 + 10 and P_SIZE between 1 and 5 and l_shipmode in (‘AIR’, ‘AIR REG’) and l_shipinstruct = ‘DELIVER IN PERSON’) or (P_PARTKEY = l_partkey and P_BRAND = ‘Brand#23’ and P_CONTAINER in (‘MED BAG’, ‘MED BOX’, ‘MED PKG’, ‘MED PACK’) and l_quantity >= 10 and l_quantity <= 10 + 10 and P_SIZE between 1 and 10 and l_shipmode in (‘AIR’, ‘AIR REG’) and l_shipinstruct = ‘DELIVER IN PERSON’ ) or ( P_PARTKEY = l_partkey and P_BRAND = ‘Brand#34’ and P_CONTAINER in ( ‘LG CASE’, ‘LG BOX’, ‘LG PACK’, ‘LG PKG’) and l_quantity >= 20 and l_quantity <= 20 + 10 and P_SIZE between 1 and 15 and l_shipmode in (‘AIR’, ‘AIR REG’) and l_shipinstruct = ‘DELIVER IN PERSON’ )" + "select" + + " sum(l_extendedprice * (1 - l_discount)) as revenue" + + " from" + + " LINEITEM," + + " PART" + + " where" + + " (" + + " P_PARTKEY = l_partkey" + + " and P_BRAND = \"Brand#12\"" + + " and P_CONTAINER in ( \"SM CASE\", \"SM BOX\", \"SM PACK\", \"SM PKG\")" + + " and l_quantity >= 1 and l_quantity <= 1 + 10" + + " and l_shipmode in (\"AIR\", \"AIR REG\")" + + " and l_shipinstruct = \"DELIVER IN PERSON\"" + + " and P_SIZE between 1 and 5" + + " )" + + " or" + + " (" + + " P_PARTKEY = l_partkey" + + " and P_BRAND = \"Brand#23\"" + + " and P_CONTAINER in (\"MED BAG\", \"MED BOX\", \"MED PKG\", \"MED PACK\")" + + " and l_quantity >= 10 and l_quantity <= 10 + 10" + + " and l_shipmode in (\"AIR\", \"AIR REG\")" + + " and l_shipinstruct = \"DELIVER IN PERSON\"" + + " and P_SIZE between 1 and 10" + + " )" + + " or" + + " (" + + " P_PARTKEY = l_partkey" + + " and P_BRAND = \"Brand#34\"" + + " and P_CONTAINER in ( \"LG CASE\", \"LG BOX\", \"LG PACK\", \"LG PKG\")" + + " and l_quantity >= 20 and l_quantity <= 20 + 10" + + " and l_shipmode in (\"AIR\", \"AIR REG\")" + + " and l_shipinstruct = \"DELIVER IN PERSON\"" + + " and P_SIZE between 1 and 15" + + " )" + } + + def getResultString19(): String = { + "REVENUE" + } + + def getQuery20(): String = { + // 1. COLOR = forest. + // 2. DATE = 1994-01-01. + // 3. NATION = CANADA. + "select" + + " S_NAME," + + " S_ADDRESS" + + " from" + + " SUPPLIER, NATION" + + " where" + + " S_SUPPKEY in (" + + " select" + + " PS_SUPPKEY" + + " from" + + " PARTSUPP" + + " where" + + " PS_PARTKEY in (" + + " select" + + " P_PARTKEY" + + " from" + + " PART" + + " where" + + " P_NAME like 'khaki%'" + + " )" + + " and PS_AVAILQTY > (" + + " select" + + " 0.5 * sum(l_quantity)" + + " from" + + " LINEITEM" + + " where" + + " l_partkey = PS_PARTKEY" + + " and l_suppkey = PS_SUPPKEY" + + " and l_shipdate >= '1994-01-01'" + + " and l_shipdate < '1994-01-01' + interval 1 year" + + " )" + + " )" + + " and S_NATIONKEY = N_NATIONKEY" + + " and N_NAME = 'CANADA'" + + " order by" + + " S_NAME" + + } + + def getResultString20(): String = { + "S_NAME S_ADDRESS" + } + + def getQuery21(): String = { + //NATION = SAUDI ARABIA. + "select" + + " S_NAME," + + " count(*) as numwait" + + " from" + + " SUPPLIER," + + " LINEITEM l1," + + " ORDERS," + + " NATION" + + " where" + + " S_SUPPKEY = l1.l_suppkey" + + " and o_orderkey = l1.l_orderkey" + + " and o_orderstatus = \"F\"" + + " and l1.l_receiptdate > l1.l_commitdate" + + " and exists (" + + " select" + + " l2.l_orderkey" + + " from" + + " LINEITEM l2" + + " where" + + " l2.l_orderkey = l1.l_orderkey" + + " and l2.l_suppkey <> l1.l_suppkey" + + " )" + + " and not exists (" + + " select" + + " l3.l_orderkey" + + " from" + + " LINEITEM l3" + + " where" + + " l3.l_orderkey = l1.l_orderkey" + + " and l3.l_suppkey <> l1.l_suppkey" + + " and l3.l_receiptdate > l3.l_commitdate" + + " )" + + " and S_NATIONKEY = N_NATIONKEY" + + " and N_NAME = \"VIETNAM\"" + + " group by" + + " S_NAME" + + " order by" + + " numwait desc," + + " S_NAME" + + " limit 100" + } + + def getResultString21(): String = { + "S_NAME NUMWAIT" + } + + def getQuery22(): String = { + // 1. I1 = 13. + // 2. I2 = 31. + // 3. I3 = 23. + // 4. I4 = 29. + // 5. I5 = 30. + // 6. I6 = 18. + // 7. I7 = 17. + "select" + + " cntrycode," + + " count(*) as numcust," + + " sum(C_ACCTBAL) as totacctbal" + + " from (" + + " select" + + " substring(C_PHONE from 1 for 2) as cntrycode," + + " C_ACCTBAL" + + " from" + + " CUSTOMER " + + " where" + + " substring(C_PHONE from 1 for 2) in" + + " (\"13\",\"31\",\"23\",\"29\",\"30\",\"18\",\"17\")" + + " and C_ACCTBAL > (" + + " select" + + " avg(C_ACCTBAL)" + + " from" + + " CUSTOMER" + + " where" + + " C_ACCTBAL > 0.00" + + " and substring (C_PHONE from 1 for 2) in" + + " (\"13\",\"31\",\"23\",\"29\",\"30\",\"18\",\"17\")" + + " )" + + " and not exists (" + + " select" + + " *" + + " from" + + " ORDERS" + + " where" + + " o_custkey = C_CUSTKEY" + + " )" + + " ) as custsale" + + " group by" + + " cntrycode" + + " order by" + + " cntrycode" + } + + def getResultString22(): String = { + "CNTRYCODE NUMCUST TOTACCTBAL" + } */ + } diff --git a/cluster/src/test/scala/io/snappydata/benchmark/memsql/TPCH_Memsql_Query.scala b/cluster/src/test/scala/io/snappydata/benchmark/memsql/TPCH_Memsql_Query.scala new file mode 100644 index 0000000000..a9447d6283 --- /dev/null +++ b/cluster/src/test/scala/io/snappydata/benchmark/memsql/TPCH_Memsql_Query.scala @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ +package io.snappydata.benchmark.memsql + +import java.sql.DriverManager + +object TPCH_Memsql_Query { + + def main(args: Array[String]) { + + val host = args(0) + val port = args(1) + val queries:Array[String] = args(2).split(",") + val dbName = "TPCH" + val user = "root" + val password = "" + + //Class.forName("com.mysql.jdbc.Driver") + // The new mysql Java connector 8.0 generates a warning if the old driver class name is used. + Class.forName("com.mysql.cj.jdbc.Driver") + val dbAddress = "jdbc:mysql://" + host + ":" + port + "/" + val conn = DriverManager.getConnection(dbAddress, user, password) + val stmt = conn.createStatement + + var isResultCollection : Boolean = args(3).toBoolean + var warmup : Integer = args(4).toInt + var runsForAverage : Integer = args(5).toInt + var isDynamic : Boolean = args(6).toBoolean + val randomSeed = args(7).toInt + + stmt.execute("USE " + dbName) + + TPCH_Memsql.setRandomSeed(randomSeed) + for(query <- queries) { + TPCH_Memsql.execute(query, isResultCollection, stmt, warmup, runsForAverage, isDynamic) + } + /*query match { + case "1" => TPCH_Memsql.execute("q1", isResultCollection, stmt, warmup, runsForAverage) + case "2" => TPCH_Memsql.execute("q2", isResultCollection, stmt, warmup, runsForAverage) + case "3"=> TPCH_Memsql.execute("q3", isResultCollection, stmt, warmup, runsForAverage) + case "4" => TPCH_Memsql.execute("q4", isResultCollection, stmt, warmup, runsForAverage) + case "5" => TPCH_Memsql.execute("q5", isResultCollection, stmt, warmup, runsForAverage) + case "6" => TPCH_Memsql.execute("q6", isResultCollection, stmt, warmup, runsForAverage) + case "7" => TPCH_Memsql.execute("q7", isResultCollection, stmt, warmup, runsForAverage) + case "8" => TPCH_Memsql.execute("q8", isResultCollection, stmt, warmup, runsForAverage) + case "9" => TPCH_Memsql.execute("q9", isResultCollection, stmt, warmup, runsForAverage) + case "10" => TPCH_Memsql.execute("q10", isResultCollection, stmt, warmup, runsForAverage) + case "11" => TPCH_Memsql.execute("q11", isResultCollection, stmt, warmup, runsForAverage) + case "12" => TPCH_Memsql.execute("q12", isResultCollection, stmt, warmup, runsForAverage) + case "13" => TPCH_Memsql.execute("q13", isResultCollection, stmt, warmup, runsForAverage) + case "14" => TPCH_Memsql.execute("q14", isResultCollection, stmt, warmup, runsForAverage) + case "15" => TPCH_Memsql.execute("q15", isResultCollection, stmt, warmup, runsForAverage) + case "16" => TPCH_Memsql.execute("q16", isResultCollection, stmt, warmup, runsForAverage) + case "17" => TPCH_Memsql.execute("q17", isResultCollection, stmt, warmup, runsForAverage) + case "18" => TPCH_Memsql.execute("q18", isResultCollection, stmt, warmup, runsForAverage) + case "19" => TPCH_Memsql.execute("q19", isResultCollection, stmt, warmup, runsForAverage) + case "20" => TPCH_Memsql.execute("q20", isResultCollection, stmt, warmup, runsForAverage) + case "21" => TPCH_Memsql.execute("q21", isResultCollection, stmt, warmup, runsForAverage) + case "22" => TPCH_Memsql.execute("q22", isResultCollection, stmt, warmup, runsForAverage) + println("---------------------------------------------------------------------------------") + }*/ + + stmt.close(); + TPCH_Memsql.close() + + } + } diff --git a/cluster/src/test/scala/io/snappydata/benchmark/memsql/TPCH_Memsql_Query_StreamExecution.scala b/cluster/src/test/scala/io/snappydata/benchmark/memsql/TPCH_Memsql_Query_StreamExecution.scala new file mode 100644 index 0000000000..0dc9e9d7b2 --- /dev/null +++ b/cluster/src/test/scala/io/snappydata/benchmark/memsql/TPCH_Memsql_Query_StreamExecution.scala @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ +package io.snappydata.benchmark.memsql + +import java.io._ +import java.sql.DriverManager + +import scala.collection.mutable.Map + +object TPCH_Memsql_Query_StreamExecution { + + def main(args: Array[String]) { + + val host = args(0) + val port = args(1) + val queries:Array[String] = args(2).split(",") + val dbName = "TPCH" + val user = "root" + val password = "" + + Class.forName("com.mysql.jdbc.Driver") + val dbAddress = "jdbc:mysql://" + host + ":" + port + "/" + val conn = DriverManager.getConnection(dbAddress, user, password) + val stmt = conn.createStatement + + var isResultCollection : Boolean = args(3).toBoolean + var warmUp : Integer = args(4).toInt + var runsForAverage : Integer = args(5).toInt + + val avgFileStream: FileOutputStream = new FileOutputStream(new File(s"Memsql_Average.out")) + val avgPrintStream: PrintStream = new PrintStream(avgFileStream) + + stmt.execute("USE " + dbName) + + var avgTime: Map[String, Long]= Map() + for (i <- 1 to (warmUp + runsForAverage)) { + for (query <- queries) { + val executionTime : Long = TPCH_Memsql_StreamExecution.execute(query, isResultCollection, stmt) + if (!isResultCollection) { + var out: BufferedWriter = new BufferedWriter(new FileWriter(s"Memsql_$query.out", true)); + out.write( executionTime + "\n") + out.close() + } + if (i > warmUp) { + if (avgTime contains query) { + avgTime(query) = avgTime.get(query).get + executionTime + } else { + avgTime += (query -> executionTime) + } + } + } + } + for (query <- queries) { + avgPrintStream.println(s"$query,${avgTime.get(query).get / runsForAverage}") + } + avgPrintStream.close() + avgFileStream.close() + stmt.close(); + TPCH_Memsql.close() + + } +} diff --git a/cluster/src/test/scala/io/snappydata/benchmark/memsql/TPCH_Memsql_StreamExecution.scala b/cluster/src/test/scala/io/snappydata/benchmark/memsql/TPCH_Memsql_StreamExecution.scala new file mode 100644 index 0000000000..7f01f06fbb --- /dev/null +++ b/cluster/src/test/scala/io/snappydata/benchmark/memsql/TPCH_Memsql_StreamExecution.scala @@ -0,0 +1,1060 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ +package io.snappydata.benchmark.memsql + +import java.io.{File, FileOutputStream, PrintStream} +import java.sql.{ResultSet, Statement} + + +object TPCH_Memsql_StreamExecution { + + var avgFileStream:FileOutputStream = new FileOutputStream(new File(s"Average.out")) + var avgPrintStream:PrintStream = new PrintStream(avgFileStream) + + def close(): Unit ={ + avgPrintStream.close() + avgFileStream.close() + } + + def execute(queryNumber: String, isResultCollection: Boolean, stmt: Statement): Long = { + + var rs: ResultSet = null + var iterationTime: Long = 0 + println(s"Started executing $queryNumber") + if (isResultCollection) { + var queryFileStream = new FileOutputStream(new File(s"$queryNumber.out")) + var queryPrintStream = new PrintStream(queryFileStream) + + try { + rs = queryExecution(queryNumber, stmt) + //rs = stmt.executeQuery(query) + //queryPrintStream.println(s"$resultFormat") + val rsmd = rs.getMetaData() + val columnsNumber = rsmd.getColumnCount(); + var count: Int = 0 + while (rs.next()) { + count += 1 + for (i: Int <- 1 to columnsNumber) { + if (i > 1) queryPrintStream.print(",") + queryPrintStream.print(rs.getString(i)) + } + queryPrintStream.println() + } + println(s"NUmber of results : $count") + println(s"$queryNumber Result Collected in file $queryNumber.out") + if (queryNumber.equals("13")) { + stmt.execute("drop view ViewQ13") + } + if (queryNumber.equals("15")) { + stmt.execute("drop view revenue") + } + } catch { + case e: Exception => { + e.printStackTrace() + e.printStackTrace(queryPrintStream) + println(s" Exception while executing $queryNumber in written to file $queryNumber.txt") + } + } finally { + queryPrintStream.close() + queryFileStream.close() + } + } else { + var totalTime: Long = 0 + val startTime = System.currentTimeMillis() + rs = queryExecution(queryNumber, stmt) + //rs = stmt.executeQuery(query) + while (rs.next()) { + //just iterating over result + } + val endTime = System.currentTimeMillis() + iterationTime = endTime - startTime + + if (queryNumber.equals("13")) { + stmt.execute("drop view ViewQ13") + } + if (queryNumber.equals("15")) { + stmt.execute("drop view revenue") + } + + } + println(s"Finished executing $queryNumber") + rs.close() + return iterationTime + + } + + def queryExecution(queryNumber:String, stmt:Statement): ResultSet ={ + + val rs : ResultSet = queryNumber match { + case "1" => { + stmt.executeQuery(getQuery1()) + } + case "2" => { + stmt.executeQuery(getQuery2()) + } + case "3" => { + stmt.executeQuery(getQuery3()) + } + case "4" => { + stmt.executeQuery(getQuery4()) + } + case "5" => { + stmt.executeQuery(getQuery5()) + } + case "6" => { + stmt.executeQuery(getQuery6()) + } + case "7" => { + stmt.executeQuery(getQuery7()) + } + case "8" => { + stmt.executeQuery(getQuery8()) + } + case "9" => { + stmt.executeQuery(getQuery9()) + } + case "10" => { + stmt.executeQuery(getQuery10()) + } + case "11" => { + stmt.executeQuery(getQuery11()) + } + case "12" => { + stmt.executeQuery(getQuery12()) + } + case "13" => { + stmt.execute(getTempQuery13()) + stmt.executeQuery(getQuery13()) + } + case "14" => { + stmt.executeQuery(getQuery14()) + } + case "15" => { + stmt.execute(getTempQuery15()) + stmt.executeQuery(getQuery15()) + } + case "16" => { + stmt.executeQuery(getQuery16()) + } + case "17" => { + stmt.executeQuery(getQuery17()) + } + case "18" => { + stmt.executeQuery(getQuery18()) + } + case "19" => { + stmt.executeQuery(getQuery19()) + } + case "20" => { + stmt.executeQuery(getQuery20()) + } + case "21" => { + stmt.executeQuery(getQuery21()) + } + case "22" => { + stmt.executeQuery(getQuery22()) + } + } + rs + + } + + def getQuery1(): String = { + //DELTA = 90 + " select" + + " l_returnflag," + + " l_linestatus," + + " sum(l_quantity) as sum_qty," + + " sum(l_extendedprice) as sum_base_price," + + " sum(l_extendedprice*(1-l_discount)) as sum_disc_price," + + " sum(l_extendedprice*(1-l_discount)*(1+l_tax)) as sum_charge," + + " avg(l_quantity) as avg_qty," + + " avg(l_extendedprice) as avg_price," + + " avg(l_discount) as avg_disc," + + " count(*) as count_order" + + " from" + + " LINEITEM" + + " where" + + " l_shipdate <= '1997-12-31' - interval '90' day" + + " group by" + + " l_returnflag," + + " l_linestatus" + + " order by" + + " l_returnflag," + + " l_linestatus" + } + + def getResultString1(): String = { + "l_returnflag l_linestatus sum_qty sum_base_price sum_disc_price sum_charge avg_qty avg_price avg_disc count_order" + } + + def getQuery2(): String = { + // 1. SIZE = 15; + // 2. TYPE = BRASS; + // 3. REGION = EUROPE + + " select" + + " S_ACCTBAL," + + " S_NAME," + + " N_NAME," + + " P_PARTKEY," + + " P_MFGR," + + " S_ADDRESS," + + " S_PHONE," + + " S_COMMENT" + + " from" + + " PART," + + " SUPPLIER," + + " PARTSUPP," + + " NATION," + + " REGION" + + " where" + + " P_PARTKEY = PS_PARTKEY" + + " and S_SUPPKEY = PS_SUPPKEY" + + " and P_SIZE = 24" + + " and P_TYPE like '%STEEL'" + + " and S_NATIONKEY = N_NATIONKEY" + + " and N_REGIONKEY = R_REGIONKEY" + + " and R_NAME = 'ASIA'" + + " and PS_SUPPLYCOST = (" + + " select" + + " min(PS_SUPPLYCOST)" + + " from" + + " PARTSUPP, SUPPLIER," + + " NATION, REGION" + + " where" + + " P_PARTKEY = PS_PARTKEY" + + " and S_SUPPKEY = PS_SUPPKEY" + + " and S_NATIONKEY = N_NATIONKEY" + + " and N_REGIONKEY = R_REGIONKEY" + + " and R_NAME = 'ASIA'" + + " )" + + " order by" + + " S_ACCTBAL desc," + + " N_NAME," + + " S_NAME," + + " P_PARTKEY" + + " limit 100" + + } + + def getResultString2(): String = { + "S_ACCTBAL S_NAME N_NAME P_PARTKEY P_MFGR S_ADDRESS S_PHONE S_COMMENT" + } + + def getQuery3(): String = { + // 1. SEGMENT = BUILDING; + // 2. DATE = 1995-03-15. + " select" + + " l_orderkey," + + " sum(l_extendedprice*(1-l_discount)) as revenue," + + " o_orderdate," + + " o_shippriority" + + " from" + + " CUSTOMER," + + " ORDERS," + + " LINEITEM" + + " where" + + " C_MKTSEGMENT = 'BUILDING'" + + " and C_CUSTKEY = o_custkey" + + " and l_orderkey = o_orderkey" + + " and o_orderdate < '1995-03-15'" + + " and l_shipdate > '1995-03-15' " + + " group by" + + " l_orderkey," + + " o_orderdate," + + " o_shippriority" + + " order by" + + " o_orderdate" + + " limit 10" + } + + def getResultString3(): String = { + "l_orderkey revenue o_orderdate o_shippriority" + } + + def getQuery4(): String = { + //1.DATE = 1993-07-01. + " select" + + " o_orderpriority," + + " count(*) as order_count" + + " from" + + " ORDERS" + + " where" + + " o_orderdate >= '1993-07-01'" + + //" and o_orderdate < '1993-07-01' + interval '3' month" + + " and o_orderdate < '1993-07-01' + interval '3' month" + + " and exists (" + + " select" + + " *" + + " from" + + " LINEITEM" + + " where" + + " l_orderkey = o_orderkey" + + " and l_commitdate < l_receiptdate" + + " )" + + " group by" + + " o_orderpriority" + + " order by" + + " o_orderpriority" + } + + def getResultString4(): String = { + "o_orderpriority order_count" + } + + def getQuery5(): String = { + //1. REGION = ASIA; + //2. DATE = 1994-01-01. + " select" + + " N_NAME," + + " sum(l_extendedprice * (1 - l_discount)) as revenue" + + " from" + + " CUSTOMER," + + " ORDERS," + + " LINEITEM," + + " SUPPLIER," + + " NATION," + + " REGION" + + " where" + + " C_CUSTKEY = o_custkey" + + " and l_orderkey = o_orderkey" + + " and l_suppkey = S_SUPPKEY" + + " and C_NATIONKEY = S_NATIONKEY" + + " and S_NATIONKEY = N_NATIONKEY" + + " and N_REGIONKEY = R_REGIONKEY" + + " and R_NAME = 'ASIA'" + + " and o_orderdate >= '1994-01-01'" + + " and o_orderdate < '1994-01-01' + interval '1' year" + + " group by" + + " N_NAME" + + " order by" + + " revenue desc" + } + + def getResultString5(): String = { + "N_NAME revenue" + } + + def getQuery6(): String = { + //1. DATE = 1994-01-01; + //2. DISCOUNT = 0.06; + //3. QUANTITY = 24. + " select" + + " sum(l_extendedprice*l_discount) as revenue" + + " from" + + " LINEITEM" + + " where" + + " l_shipdate >= '1994-01-01'" + + " and l_shipdate < '1994-01-01' + interval '1' year" + + " and l_discount between 0.06 - 0.01 and 0.06 + 0.01" + + " and l_quantity < 24" + } + + def getResultString6(): String = { + "revenue" + } + + def getQuery7(): String = { + // 1. NATION1 = FRANCE; + // 2. NATION2 = GERMANY. + "select" + + " supp_nation," + + " cust_nation," + + " l_year, " + + " sum(volume) as revenue" + + " from (" + + " select" + + " n1.N_NAME as supp_nation," + + " n2.N_NAME as cust_nation," + + //" extract m(year from l_shipdate) as l_year," + + " year(l_shipdate) as l_year," + + " l_extendedprice * (1 - l_discount) as volume" + + " from" + + " SUPPLIER," + + " LINEITEM," + + " ORDERS," + + " CUSTOMER," + + " NATION n1," + + " NATION n2" + + " where" + + " S_SUPPKEY = l_suppkey" + + " and o_orderkey = l_orderkey" + + " and C_CUSTKEY = o_custkey" + + " and S_NATIONKEY = n1.N_NATIONKEY" + + " and C_NATIONKEY = n2.N_NATIONKEY" + + " and (" + + " (n1.N_NAME = 'FRANCE' and n2.N_NAME = 'GERMANY')" + + " or (n1.N_NAME = 'GERMANY' and n2.N_NAME = 'FRANCE')" + + " )" + + " and l_shipdate between '1995-01-01' and '1996-12-31'" + + " ) as shipping" + + " group by" + + " supp_nation," + + " cust_nation," + + " l_year" + + " order by" + + " supp_nation," + + " cust_nation," + + " l_year" + + } + + def getResultString7(): String = { + "supp_nation cust_nation l_year revenue" + } + + def getQuery8(): String = { + // 1. NATION = BRAZIL; + // 2. REGION = AMERICA; + // 3. TYPE = ECONOMY ANODIZED STEEL. + "select" + + " o_year," + + " sum(case" + + " when nation = 'BRAZIL'" + + " then volume" + + " else 0" + + " end) / sum(volume) as mkt_share" + + " from (" + + " select" + + //" extract(year from o_orderdate) as o_year," + + " year(o_orderdate) as o_year,"+ + " l_extendedprice * (1-l_discount) as volume," + + " n2.N_NAME as nation" + + " from" + + " PART," + + " SUPPLIER," + + " LINEITEM," + + " ORDERS," + + " CUSTOMER," + + " NATION n1," + + " NATION n2," + + " REGION" + + " where" + + " P_PARTKEY = l_partkey" + + " and S_SUPPKEY = l_suppkey" + + " and l_orderkey = o_orderkey" + + " and o_custkey = C_CUSTKEY" + + " and C_NATIONKEY = n1.N_NATIONKEY" + + " and n1.N_REGIONKEY = R_REGIONKEY" + + " and R_NAME = 'AMERICA'" + + " and s_NATIONkey = n2.N_NATIONKEY" + + " and o_orderdate between '1995-01-01' and '1996-12-31'" + + " and P_TYPE = 'ECONOMY ANODIZED STEEL'" + + " ) as all_nations" + + " group by" + + " o_year" + + " order by" + + " o_year" + + } + + def getResultString8(): String = { + "YEAR MKT_SHARE" + } + + def getQuery9(): String = { + //1. COLOR = green. + "select" + + " nation," + + " o_year," + + " sum(amount) as sum_profit" + + " from (" + + " select" + + " N_NAME as nation," + + //" extract(year from o_orderdate) as o_year," + + " year(o_orderdate) as o_year," + + " l_extendedprice * (1 - l_discount) - PS_SUPPLYCOST * l_quantity as amount" + + " from" + + " PART," + + " SUPPLIER," + + " LINEITEM," + + " PARTSUPP," + + " ORDERS," + + " NATION" + + " where" + + " S_SUPPKEY = l_suppkey" + + " and PS_SUPPKEY = l_suppkey" + + " and PS_PARTKEY = l_partkey" + + " and P_PARTKEY = l_partkey" + + " and o_orderkey = l_orderkey" + + " and S_NATIONKEY = N_NATIONKEY" + + " and P_NAME like '%green%'" + + " ) as profit" + + " group by" + + " nation," + + " o_year" + + " order by" + + " nation," + + " o_year desc" + } + + def getResultString9(): String = { + "NATION YEAR SUM_PROFIT" + } + + def getQuery10(): String = { + //1. DATE = 1993-10-01. + "select" + + " C_CUSTKEY," + + " C_NAME," + + " sum(l_extendedprice * (1 - l_discount)) as revenue," + + " C_ACCTBAL," + + " N_NAME," + + " C_ADDRESS," + + " C_PHONE," + + " C_COMMENT" + + " from" + + " CUSTOMER," + + " ORDERS," + + " LINEITEM," + + " NATION" + + " where" + + " C_CUSTKEY = o_custkey" + + " and l_orderkey = o_orderkey" + + " and o_orderdate >= '1993-10-01'" + + " and o_orderdate < '1993-10-01' + interval '3' month" + + " and l_returnflag = 'R'" + + " and C_NATIONKEY = N_NATIONKEY" + + " group by" + + " C_CUSTKEY," + + " C_NAME," + + " C_ACCTBAL," + + " C_PHONE," + + " N_NAME," + + " C_ADDRESS," + + " C_COMMENT" + + " order by" + + " revenue desc" + + " limit 20" + + } + + def getResultString10(): String = { + "C_CUSTKEY C_NAME REVENUE C_ACCTBAL N_NAME C_ADDRESS C_PHONE C_COMMENT" + } + + def getQuery11(): String = { + // 1. NATION = GERMANY; + // 2. FRACTION = 0.0001. + "select" + + " PS_PARTKEY," + + " sum(PS_SUPPLYCOST * PS_AVAILQTY) as value" + + " from" + + " PARTSUPP," + + " SUPPLIER," + + " NATION" + + " where" + + " PS_SUPPKEY = S_SUPPKEY" + + " and S_NATIONKEY = N_NATIONKEY" + + " and N_NAME = 'GERMANY'" + + " group by" + + " PS_PARTKEY having" + + " sum(PS_SUPPLYCOST * PS_AVAILQTY) > (" + + " select" + + " sum(PS_SUPPLYCOST * PS_AVAILQTY) * 0.0001" + + " from" + + " PARTSUPP," + + " SUPPLIER," + + " NATION" + + " where" + + " PS_SUPPKEY = S_SUPPKEY" + + " and S_NATIONKEY = N_NATIONKEY" + + " and N_NAME = 'GERMANY'" + + " )" + + " order by" + + " value desc" + } + + def getResultString11(): String = { + "PS_PARTKEY VALUE" + } + + + def getQuery12(): String = { + // 1.SHIPMODE1 = MAIL; + // 2. SHIPMODE2 = SHIP; + // 3. DATE = 1994-01-01. + "select" + + " l_shipmode," + + " sum(case" + + " when o_orderpriority ='1-URGENT'" + + " or o_orderpriority ='2-HIGH'" + + " then 1" + + " else 0" + + " end" + + " ) as high_line_count," + + " sum(case" + + " when o_orderpriority <> '1-URGENT'" + + " and o_orderpriority <> '2-HIGH'" + + " then 1" + + " else 0" + + " end" + + " ) as low_line_count" + + " from" + + " ORDERS," + + " LINEITEM" + + " where" + + " o_orderkey = l_orderkey" + + " and l_shipmode in ('MAIL', 'SHIP')" + + " and l_commitdate < l_receiptdate" + + " and l_shipdate < l_commitdate" + + " and l_receiptdate >= '1994-01-01'" + + " and l_receiptdate < '1994-01-01' + interval '1' year" + + " group by" + + " l_shipmode" + + " order by" + + " l_shipmode" + + } + + def getResultString12(): String = { + "L_SHIPMODE HIGH_LINE_COUNT LOW_LINE_COUNT" + } + + def getTempQuery13():String={ + "create view"+ + " ViewQ13 as"+ + " select" + + " C_CUSTKEY," + + " count(o_orderkey) as c_count" + + " from" + + " CUSTOMER left outer join ORDERS on" + + " C_CUSTKEY = o_custkey" + + " and o_comment not like '%special%requests%'" + + " group by" + + " C_CUSTKEY" + } + def getQuery13(): String = { + // 1. WORD1 = special. + // 2. WORD2 = requests. + "select" + + " c_count, " + + " count(*) as custdist" + + " from " + + " ViewQ13" + + " group by" + + " c_count" + + " order by" + + " custdist desc," + + " c_count desc" + + } + + // def getQuery13(): String = { + // // 1. WORD1 = special. + // // 2. WORD2 = requests. + // "select" + + // " c_count, " + + // " count(*) as custdist" + + // " from (" + + // " select" + + // " C_CUSTKEY," + + // " count(o_orderkey)" + + // " from" + + // " CUSTOMER left outer join ORDERS on" + + // " C_CUSTKEY = o_custkey" + + // " and o_comment not like ‘%special%requests%’" + + // " group by" + + // " C_CUSTKEY" + + // " )as c_orders (C_CUSTKEY, c_count)" + + // " group by" + + // " c_count" + + // " order by" + + // " custdist desc," + + // " c_count desc" + // } + + def getResultString13(): String = { + "C_COUNT CUSTDIST" + } + + def getQuery14(): String = { + //1.DATE = 1995-09-01. + "select" + + " 100.00 * sum(case" + + " when P_TYPE like 'PROMO%'" + + " then l_extendedprice*(1-l_discount)" + + " else 0" + + " end" + + " ) / sum(l_extendedprice * (1 - l_discount)) as promo_revenue" + + " from" + + " LINEITEM," + + " PART" + + " where" + + " l_partkey = P_PARTKEY" + + " and l_shipdate >= '1995-09-01'" + + " and l_shipdate < '1995-09-01'+ interval '1' month" + + } + + def getResultString14(): String = { + "PROMO_REVENUE" + } + + + def getTempQuery15(): String = { + "create view " + + " revenue as" + + " select" + + " l_suppkey as supplier_no ," + + " sum(l_extendedprice * (1 - l_discount)) as total_revenue" + + " from" + + " LINEITEM" + + " where" + + " l_shipdate >= '1996-01-01'" + + " and l_shipdate < '1996-01-01' + interval '3' month" + + " group by" + + " l_suppkey" + } + + def getQuery15(): String = { + "select" + + " s_suppkey," + + " s_name," + + " s_address," + + " s_phone," + + " total_revenue" + + " from" + + " SUPPLIER," + + " revenue" + + " where" + + " s_suppkey = supplier_no" + + " and total_revenue = (" + + " select" + + " max(total_revenue)" + + " from" + + " revenue" + + " )" + + " order by" + + " s_suppkey;" + } + + def getResultString15(): String = { + "" + } + + def getQuery16(): String = { + // 1. BRAND = Brand#45. + // 2. TYPE = MEDIUM POLISHED . + // 3. SIZE1 = 49 + // 4. SIZE2 = 14 + // 5. SIZE3 = 23 + // 6. SIZE4 = 45 + // 7. SIZE5 = 19 + // 8. SIZE6 = 3 + // 9. SIZE7 = 36 + // 10. SIZE8 = 9. + "select" + + " P_BRAND," + + " P_TYPE," + + " P_SIZE," + + " count(distinct PS_SUPPKEY) as supplier_cnt" + + " from" + + " PARTSUPP," + + " PART" + + " where" + + " P_PARTKEY = PS_PARTKEY" + + " and P_BRAND <> 'Brand#45'" + + " and P_TYPE not like 'MEDIUM POLISHED%'" + + " and P_SIZE in (49, 14, 23, 45, 19, 3, 36, 9)" + + " and PS_SUPPKEY not in (" + + " select" + + " S_SUPPKEY" + + " from" + + " SUPPLIER" + + " where" + + " S_COMMENT like '%Customer%Complaints%'" + + " )" + + " group by" + + " P_BRAND," + + " P_TYPE," + + " P_SIZE" + + " order by" + + " supplier_cnt desc," + + " P_BRAND," + + " P_TYPE," + + " P_SIZE" + } + + def getResultString16(): String = { + "P_BRAND P_TYPE P_SIZE SUPPLIER_CNT" + } + + def getQuery17(): String = { + // 1. BRAND = Brand#23; + // 2. CONTAINER = MED BOX. + "select" + + " sum(l_extendedprice) / 7.0 as avg_yearly" + + " from" + + " LINEITEM," + + " PART" + + " where" + + " P_PARTKEY = l_partkey" + + " and P_BRAND = 'Brand#23'" + + " and P_CONTAINER = 'SM PACK'" + + " and l_quantity < (" + + " select" + + " 0.2 * avg(l_quantity)" + + " from" + + " LINEITEM" + + " where" + + " l_partkey = P_PARTKEY" + + " )" + //" )" + + } + + def getResultString17(): String = { + "AVG_YEARLY" + } + + def getQuery18(): String = { + //1.QUANTITY = 300 + "select" + + " C_NAME," + + " C_CUSTKEY," + + " o_orderkey," + + " o_orderdate," + + " o_totalprice," + + " sum(l_quantity)" + + " from" + + " CUSTOMER," + + " ORDERS," + + " LINEITEM" + + " where" + + " o_orderkey in (" + + " select" + + " l_orderkey" + + " from" + + " LINEITEM" + + " group by" + + " l_orderkey having" + + " sum(l_quantity) > 300" + + " )" + + " and C_CUSTKEY = o_custkey" + + " and o_orderkey = l_orderkey" + + " group by" + + " C_NAME," + + " C_CUSTKEY," + + " o_orderkey," + + " o_orderdate," + + " o_totalprice" + + " order by" + + " o_totalprice desc," + + " o_orderdate" + + " limit 100" + } + + def getResultString18(): String = { + "C_NAME C_CUSTKEY O_ORDERKEY O_ORDERDATE O_TOTALPRICE Sum(L_QUANTITY)" + } + + def getQuery19(): String = { + // 1. QUANTITY1 = 1. + // 2. QUANTITY2 = 10. + // 3. QUANTITY3 = 20. + // 4. BRAND1 = Brand#12. + // 5. BRAND2 = Brand#23. + // 6. BRAND3 = Brand#34. + //"select sum(l_extendedprice * (1 - l_discount)) as revenue from LINEITEM, PART where (P_PARTKEY = l_partkey and P_BRAND = ‘Brand#12’ and P_CONTAINER in ( ‘SM CASE’, ‘SM BOX’, ‘SM PACK’, ‘SM PKG’) and l_quantity >= 1 and l_quantity <= 1 + 10 and P_SIZE between 1 and 5 and l_shipmode in (‘AIR’, ‘AIR REG’) and l_shipinstruct = ‘DELIVER IN PERSON’) or (P_PARTKEY = l_partkey and P_BRAND = ‘Brand#23’ and P_CONTAINER in (‘MED BAG’, ‘MED BOX’, ‘MED PKG’, ‘MED PACK’) and l_quantity >= 10 and l_quantity <= 10 + 10 and P_SIZE between 1 and 10 and l_shipmode in (‘AIR’, ‘AIR REG’) and l_shipinstruct = ‘DELIVER IN PERSON’ ) or ( P_PARTKEY = l_partkey and P_BRAND = ‘Brand#34’ and P_CONTAINER in ( ‘LG CASE’, ‘LG BOX’, ‘LG PACK’, ‘LG PKG’) and l_quantity >= 20 and l_quantity <= 20 + 10 and P_SIZE between 1 and 15 and l_shipmode in (‘AIR’, ‘AIR REG’) and l_shipinstruct = ‘DELIVER IN PERSON’ )" + "select" + + " sum(l_extendedprice * (1 - l_discount)) as revenue" + + " from" + + " LINEITEM," + + " PART" + + " where" + + " (" + + " P_PARTKEY = l_partkey" + + " and P_BRAND = \"Brand#12\"" + + " and P_CONTAINER in ( \"SM CASE\", \"SM BOX\", \"SM PACK\", \"SM PKG\")" + + " and l_quantity >= 1 and l_quantity <= 1 + 10" + + " and l_shipmode in (\"AIR\", \"AIR REG\")" + + " and l_shipinstruct = \"DELIVER IN PERSON\"" + + " and P_SIZE between 1 and 5" + + " )" + + " or" + + " (" + + " P_PARTKEY = l_partkey" + + " and P_BRAND = \"Brand#23\"" + + " and P_CONTAINER in (\"MED BAG\", \"MED BOX\", \"MED PKG\", \"MED PACK\")" + + " and l_quantity >= 10 and l_quantity <= 10 + 10" + + " and l_shipmode in (\"AIR\", \"AIR REG\")" + + " and l_shipinstruct = \"DELIVER IN PERSON\"" + + " and P_SIZE between 1 and 10" + + " )" + + " or" + + " (" + + " P_PARTKEY = l_partkey" + + " and P_BRAND = \"Brand#34\"" + + " and P_CONTAINER in ( \"LG CASE\", \"LG BOX\", \"LG PACK\", \"LG PKG\")" + + " and l_quantity >= 20 and l_quantity <= 20 + 10" + + " and l_shipmode in (\"AIR\", \"AIR REG\")" + + " and l_shipinstruct = \"DELIVER IN PERSON\"" + + " and P_SIZE between 1 and 15" + + " )" + } + + def getResultString19(): String = { + "REVENUE" + } + + def getQuery20(): String = { + // 1. COLOR = forest. + // 2. DATE = 1994-01-01. + // 3. NATION = CANADA. + "select" + + " S_NAME," + + " S_ADDRESS" + + " from" + + " SUPPLIER, NATION" + + " where" + + " S_SUPPKEY in (" + + " select" + + " PS_SUPPKEY" + + " from" + + " PARTSUPP" + + " where" + + " PS_PARTKEY in (" + + " select" + + " P_PARTKEY" + + " from" + + " PART" + + " where" + + " P_NAME like 'khaki%'" + + " )" + + " and PS_AVAILQTY > (" + + " select" + + " 0.5 * sum(l_quantity)" + + " from" + + " LINEITEM" + + " where" + + " l_partkey = PS_PARTKEY" + + " and l_suppkey = PS_SUPPKEY" + + " and l_shipdate >= '1994-01-01'" + + " and l_shipdate < '1994-01-01' + interval 1 year" + + " )" + + " )" + + " and S_NATIONKEY = N_NATIONKEY" + + " and N_NAME = 'CANADA'" + + " order by" + + " S_NAME" + + } + + def getResultString20(): String = { + "S_NAME S_ADDRESS" + } + + def getQuery21(): String = { + //NATION = SAUDI ARABIA. + "select" + + " S_NAME," + + " count(*) as numwait" + + " from" + + " SUPPLIER," + + " LINEITEM l1," + + " ORDERS," + + " NATION" + + " where" + + " S_SUPPKEY = l1.l_suppkey" + + " and o_orderkey = l1.l_orderkey" + + " and o_orderstatus = \"F\"" + + " and l1.l_receiptdate > l1.l_commitdate" + + " and exists (" + + " select" + + " *" + + " from" + + " LINEITEM l2" + + " where" + + " l2.l_orderkey = l1.l_orderkey" + + " and l2.l_suppkey <> l1.l_suppkey" + + " )" + + " and not exists (" + + " select" + + " *" + + " from" + + " LINEITEM l3" + + " where" + + " l3.l_orderkey = l1.l_orderkey" + + " and l3.l_suppkey <> l1.l_suppkey" + + " and l3.l_receiptdate > l3.l_commitdate" + + " )" + + " and S_NATIONKEY = N_NATIONKEY" + + " and N_NAME = \"VIETNAM\"" + + " group by" + + " S_NAME" + + " order by" + + " numwait desc," + + " S_NAME" + + " limit 100" + } + + def getResultString21(): String = { + "S_NAME NUMWAIT" + } + + def getQuery22(): String = { + // 1. I1 = 13. + // 2. I2 = 31. + // 3. I3 = 23. + // 4. I4 = 29. + // 5. I5 = 30. + // 6. I6 = 18. + // 7. I7 = 17. + "select" + + " cntrycode," + + " count(*) as numcust," + + " sum(C_ACCTBAL) as totacctbal" + + " from (" + + " select" + + " substring(C_PHONE from 1 for 2) as cntrycode," + + " C_ACCTBAL" + + " from" + + " CUSTOMER " + + " where" + + " substring(C_PHONE from 1 for 2) in" + + " (\"13\",\"31\",\"23\",\"29\",\"30\",\"18\",\"17\")" + + " and C_ACCTBAL > (" + + " select" + + " avg(C_ACCTBAL)" + + " from" + + " CUSTOMER" + + " where" + + " C_ACCTBAL > 0.00" + + " and substring (C_PHONE from 1 for 2) in" + + " (\"13\",\"31\",\"23\",\"29\",\"30\",\"18\",\"17\")" + + " )" + + " and not exists (" + + " select" + + " *" + + " from" + + " ORDERS" + + " where" + + " o_custkey = C_CUSTKEY" + + " )" + + " ) as custsale" + + " group by" + + " cntrycode" + + " order by" + + " cntrycode" + } + + def getResultString22(): String = { + "CNTRYCODE NUMCUST TOTACCTBAL" + } +} diff --git a/cluster/src/test/scala/io/snappydata/benchmark/memsql/TPCH_Memsql_Tables.scala b/cluster/src/test/scala/io/snappydata/benchmark/memsql/TPCH_Memsql_Tables.scala new file mode 100644 index 0000000000..9428bce52f --- /dev/null +++ b/cluster/src/test/scala/io/snappydata/benchmark/memsql/TPCH_Memsql_Tables.scala @@ -0,0 +1,105 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ +package io.snappydata.benchmark.memsql + +import java.sql.DriverManager + +import io.snappydata.benchmark.{TPCHColumnPartitionedTable, TPCHReplicatedTable} + +object TPCH_Memsql_Tables { + + def main(args: Array[String]) { + + val host = args(0) + val port = args(1) + val dataDirectory = args(2) + val numberOfDataLoadingStages : String = args(3) + + val dbName = "TPCH" + val user = "root" + val password = "" + + //Class.forName("com.mysql.jdbc.Driver") + Class.forName("com.mysql.cj.jdbc.Driver") + val dbAddress = "jdbc:mysql://" + host + ":" + port + "/" + val conn = DriverManager.getConnection(dbAddress, user, password) + val stmt = conn.createStatement + + // Create TPC-H database and tables + + stmt.execute("DROP DATABASE IF EXISTS " + dbName) + stmt.execute("CREATE DATABASE IF NOT EXISTS " + dbName) + stmt.execute("USE " + dbName) + println("---------------------------------------------------") + + TPCHReplicatedTable.createRegionTable_Memsql(stmt) + + TPCHReplicatedTable.createNationTable_Memsql(stmt) + + TPCHReplicatedTable.createSupplierTable_Memsql(stmt) + + TPCHColumnPartitionedTable.createPartTable_Memsql(stmt) + + TPCHColumnPartitionedTable.createPartSuppTable_Memsql(stmt) + + TPCHColumnPartitionedTable.createCustomerTable_Memsql(stmt) + + TPCHColumnPartitionedTable.createOrderTable_Memsql(stmt) + + TPCHColumnPartitionedTable.createLineItemTable_Memsql(stmt) + + var rs = stmt.executeQuery("SHOW TABLES") + println("---------------------------------------------------") + println(System.lineSeparator() + "Tables in TPC-H database:") + println("---------------------------------------------------") + while (rs.next()) { + System.out.println(rs.getString(1)); + } + println("---------------------------------------------------") + + // Load data into TPC-H tables + + val smallTables = List("REGION", "NATION") + val largeTables = List("PART", "PARTSUPP", "CUSTOMER", "ORDERS", "LINEITEM", "SUPPLIER") + + // replicated/ reference tables are small and are loaded from a single file (in a single stage) + for(table <- smallTables){ + println(s"Loading data from '${dataDirectory}/${table.toLowerCase}.tbl' into table ${table}"); + stmt.execute(s"LOAD DATA INFILE '${dataDirectory}/${table.toLowerCase}.tbl' INTO TABLE ${table} COLUMNS TERMINATED BY '|' LINES TERMINATED BY '|\n' "); + println(s"Finished loading data in ${table}") + } + + // partitioned tables can be read in one or multiple stages - i.e. from one or multiple files/ chunks + for(table <- largeTables){ + val stages : Int = numberOfDataLoadingStages.toInt + + if(stages == 1){ + println(s"Loading data from '${dataDirectory}/${table.toLowerCase}.tbl' into table ${table}"); + stmt.execute(s"LOAD DATA INFILE '${dataDirectory}/${table.toLowerCase}.tbl' INTO TABLE ${table} COLUMNS TERMINATED BY '|' LINES TERMINATED BY '|\n' "); + } + else{ + for(stage <- 1 to numberOfDataLoadingStages.toInt){ + println(s"Loading data from '${dataDirectory}/${table.toLowerCase}.tbl.${stage}' into table ${table}"); + stmt.execute(s"LOAD DATA INFILE '${dataDirectory}/${table.toLowerCase}.tbl.${stage}' INTO TABLE ${table} COLUMNS TERMINATED BY '|' LINES TERMINATED BY '|\n' "); + } + } + println(s"Finished loading data in ${table}") + } + + stmt.close(); + } + } diff --git a/cluster/src/test/scala/io/snappydata/benchmark/snappy/TPCDSSuite.scala b/cluster/src/test/scala/io/snappydata/benchmark/snappy/TPCDSSuite.scala new file mode 100644 index 0000000000..8176b0be3e --- /dev/null +++ b/cluster/src/test/scala/io/snappydata/benchmark/snappy/TPCDSSuite.scala @@ -0,0 +1,136 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ + +package io.snappydata.benchmark.snappy + +import java.io.{File, FileOutputStream, PrintStream} + +import io.snappydata.SnappyFunSuite +import org.apache.spark.sql.execution.benchmark.TPCDSQuerySnappyBenchmark +import org.apache.spark.sql.{SnappySession, SparkSession} +import org.apache.spark.{SparkConf, SparkContext} +import org.scalatest.BeforeAndAfterAll + + +class TPCDSSuite extends SnappyFunSuite + with BeforeAndAfterAll { + + var tpcdsQueries = Seq[String]() + var runTPCDSSuite = "" + + + val conf = + new SparkConf() + .setMaster("local[*]") + .setAppName("test-sql-context") + .set("spark.driver.allowMultipleContexts", "true") + .set("spark.sql.shuffle.partitions", "4") + .set("spark.driver.memory", "1g") + .set("spark.executor.memory", "1g") + .set("spark.sql.autoBroadcastJoinThreshold", (20 * 1024 * 1024).toString) + + override def beforeAll(): Unit = { + super.beforeAll() + tpcdsQueries = Seq( + "q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8", "q9", "q10", "q11", + "q12", "q13", "q14a", "q14b", "q15", "q16", "q17", "q18", "q19", "q20", + "q21", "q22", "q23a", "q23b", "q24a", "q24b", "q25", "q26", "q27", "q28", "q29", "q30", + "q31", "q32", "q33", "q34", "q35", "q36", "q37", "q38", "q39a", "q39b", "q40", + "q41", "q42", "q43", "q44", "q45", "q46", "q47", "q48", "q49", "q50", + "q51", "q52", "q53", "q54", "q55", "q56", "q57", "q58", "q59", "q60", + "q61", "q62", "q63", "q64", "q65", "q66", "q67", "q68", "q69", "q70", + "q71", "q72", "q73", "q74", "q75", "q76", "q77", "q78", "q79", "q80", + "q81", "q82", "q83", "q84", "q85", "q86", "q87", "q88", "q89", "q90", + "q91", "q92", "q93", "q94", "q95", "q96", "q97", "q98", "q99") + runTPCDSSuite = System.getenv("TPCDS_SUITE") + if (runTPCDSSuite == null) { + println("TPCDS_SUITE should be set as an environment variable in order to run TPCDSSuite") + } + } + + // Disabling the test run from precheckin as it takes around an hour. + // TODO : Add TPCDS tests to be run as a part of smokePerf bt which will run on a dedicated + // machine. + + test("Test with Snappy") { + if (runTPCDSSuite.equalsIgnoreCase("true")) { + val sc = new SparkContext(conf) + TPCDSQuerySnappyBenchmark.snappy = new SnappySession(sc) + val dataLocation = "/export/shared/QA_DATA/TPCDS/data" + val snappyHome = System.getenv("SNAPPY_HOME") + val snappyRepo = s"$snappyHome/../../.." + + TPCDSQuerySnappyBenchmark.execute(dataLocation, + queries = tpcdsQueries, true, s"$snappyRepo/spark/sql/core/src/test/resources/tpcds") + } + } + + // Disabling the test run from precheckin as it takes around an hour. + // TODO : Add TPCDS tests to be run as a part of smokePerf bt which will run on a dedicated + // machine. + + test("Test with Spark") { + if (runTPCDSSuite.equalsIgnoreCase("true")) { + TPCDSQuerySnappyBenchmark.spark = SparkSession.builder.config(conf).getOrCreate() + val dataLocation = "/export/shared/QA_DATA/TPCDS/data" + val snappyHome = System.getenv("SNAPPY_HOME") + val snappyRepo = s"$snappyHome/../../.."; + + TPCDSQuerySnappyBenchmark.execute(dataLocation, + queries = tpcdsQueries, false, s"$snappyRepo/spark/sql/core/src/test/resources/tpcds") + + } + } + + // Disabling the validation for now as this requires the expected result files to be created + // using stock spark before hand. + + ignore("Validate Results") { + + for (query <- tpcdsQueries) { + + val actualResultsAvailableAt = "path for actual result" + val expectedResultsAvailableAt = "path for expected result" + + val resultFileStream: FileOutputStream = new FileOutputStream(new File("Comparison.out")) + val resultOutputStream: PrintStream = new PrintStream(resultFileStream) + + val expectedFile = sc.textFile(s"file://$expectedResultsAvailableAt/Spark_$query.out") + val actualFile = sc.textFile(s"file://$actualResultsAvailableAt/Snappy_$query.out") + + val expectedLineSet = expectedFile.collect().toList.sorted + val actualLineSet = actualFile.collect().toList.sorted + + if (!actualLineSet.equals(expectedLineSet)) { + if (!(expectedLineSet.size == actualLineSet.size)) { + resultOutputStream.println(s"For $query " + + s"result count mismatched observed with " + + s"expected ${expectedLineSet.size} and actual ${actualLineSet.size}") + } else { + for ((expectedLine, actualLine) <- expectedLineSet zip actualLineSet) { + if (!expectedLine.equals(actualLine)) { + resultOutputStream.println(s"For $query result mismatched observed") + resultOutputStream.println(s"Expected : $expectedLine") + resultOutputStream.println(s"Found : $actualLine") + resultOutputStream.println(s"-------------------------------------") + } + } + } + } + } + } +} diff --git a/cluster/src/test/scala/io/snappydata/benchmark/snappy/TPCH.scala b/cluster/src/test/scala/io/snappydata/benchmark/snappy/TPCH.scala new file mode 100644 index 0000000000..b651bb0ebf --- /dev/null +++ b/cluster/src/test/scala/io/snappydata/benchmark/snappy/TPCH.scala @@ -0,0 +1,1001 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ +package io.snappydata.benchmark.snappy + +trait TPCHBase { + type QUERY_TYPE = Seq[String] // we can add parameter type check later. +} + +/** + * Original TPCH queries as per specification. + */ +trait TPCH extends TPCHBase { + + def q1: String = + s""" + |select + | l_returnflag, + | l_linestatus, + | sum(l_quantity) as sum_qty, + | sum(l_extendedprice) as sum_base_price, + | sum(l_extendedprice*(1-l_discount)) as sum_disc_price, + | sum(l_extendedprice*(1-l_discount)*(1+l_tax)) as sum_charge, + | avg(l_quantity) as avg_qty, + | avg(l_extendedprice) as avg_price, + | avg(l_discount) as avg_disc, + | count(*) as count_order + |from + | lineitem + |where + | l_shipdate <= date '1998-12-01' - interval '[DELTA]' day (3) + |group by + | l_returnflag, + | l_linestatus + |order by + | l_returnflag, + | l_linestatus + """.stripMargin + + def q1Tokens: QUERY_TYPE = Seq("date '1998-12-01' - interval '[DELTA]' day (3)") + + def q2: String = + s""" + |select + | s_acctbal, + | s_name, + | n_name, + | p_partkey, + | p_mfgr, + | s_address, + | s_phone, + | s_comment + |from + | part, + | supplier, + | partsupp, + | nation, + | region + |where + | p_partkey = ps_partkey + | and s_suppkey = ps_suppkey + | and p_size = [SIZE] + | and p_type like '%[TYPE]' + | and s_nationkey = n_nationkey + | and n_regionkey = r_regionkey + | and r_name = '[REGION]' + | and ps_supplycost = ( + | select + | min(ps_supplycost) + | from + | partsupp, supplier, + | nation, region + | where + | p_partkey = ps_partkey + | and s_suppkey = ps_suppkey + | and s_nationkey = n_nationkey + | and n_regionkey = r_regionkey + | and r_name = '[REGION]' + | ) + |order by + | s_acctbal desc, + | n_name, + | s_name, + | p_partkey + """.stripMargin + + def q2Tokens: QUERY_TYPE = Seq("[SIZE]", "[TYPE]", "[REGION]") + + def q3: String = + s""" + |select + | l_orderkey, + | sum(l_extendedprice*(1-l_discount)) as revenue, + | o_orderdate, + | o_shippriority + |from + | customer, + | orders, + | lineitem + |where + | c_mktsegment = '[SEGMENT]' + | and c_custkey = o_custkey + | and l_orderkey = o_orderkey + | and o_orderdate < date '[DATE]' + | and l_shipdate > date '[DATE]' + |group by + | l_orderkey, + | o_orderdate, + | o_shippriority + |order by + | revenue desc, + | o_orderdate + """.stripMargin + + def q3Tokens: QUERY_TYPE = Seq("[SEGMENT]", "date '[DATE]'") + + def q4: String = + s""" + |select + | o_orderpriority, + | count(*) as order_count + |from + | orders + |where + | o_orderdate >= date '[DATE]' + | and o_orderdate < date '[DATE] ' + interval '3' month + | and exists ( + | select + | * + | from + | lineitem + | where + | l_orderkey = o_orderkey + | and l_commitdate < l_receiptdate + | ) + |group by + | o_orderpriority + |order by + | o_orderpriority + """.stripMargin + + // note the additional space for interval token. date '[DATE] ' gets distinguished from + // date '[DATE]' and hence replaceAll doesn't changes it. Didn't wanted to get into + // regex here. + def q4Tokens: QUERY_TYPE = Seq("date '[DATE]'", "date '[DATE] ' + interval '3' month") + + def q5: String = + s""" + |select + | n_name, + | sum(l_extendedprice * (1 - l_discount)) as revenue + |from + | customer, + | orders, + | lineitem, + | supplier, + | nation, + | region + |where + | c_custkey = o_custkey + | and l_orderkey = o_orderkey + | and l_suppkey = s_suppkey + | and c_nationkey = s_nationkey + | and s_nationkey = n_nationkey + | and n_regionkey = r_regionkey + | and r_name = '[REGION]' + | and o_orderdate >= date '[DATE]' + | and o_orderdate < date '[DATE] ' + interval '1' year + |group by + | n_name + |order by + | revenue desc + """.stripMargin + + def q5Tokens: QUERY_TYPE = Seq("[REGION]", "date '[DATE]'", "date '[DATE] ' + interval '1' year") + + def q6: String = + s""" + |select + | sum(l_extendedprice*l_discount) as revenue + |from + | lineitem + |where + | l_shipdate >= date '[DATE]' + | and l_shipdate < date '[DATE] ' + interval '1' year + | and l_discount between [DISCOUNT] - 0.01 and [DISCOUNT] + 0.01 + | and l_quantity < [QUANTITY] + """.stripMargin + + def q6Tokens: QUERY_TYPE = Seq("[DISCOUNT]", "date '[DATE]'", + "date '[DATE] ' + interval '1' year", "[QUANTITY]") + + def q7: String = + s""" + |select + | supp_nation, + | cust_nation, + | l_year, + | sum(volume) as revenue + |from ( + | select + | n1.n_name as supp_nation, + | n2.n_name as cust_nation, + | extract(year from l_shipdate) as l_year, + | l_extendedprice * (1 - l_discount) as volume + | from + | supplier, + | lineitem, + | orders, + | customer, + | nation n1, + | nation n2 + | where + | s_suppkey = l_suppkey + | and o_orderkey = l_orderkey + | and c_custkey = o_custkey + | and s_nationkey = n1.n_nationkey + | and c_nationkey = n2.n_nationkey + | and ( + | (n1.n_name = '[NATION1]' and n2.n_name = '[NATION2]') + | or (n1.n_name = '[NATION2]' and n2.n_name = '[NATION1]') + | ) + | and l_shipdate between date '1995-01-01' and date '1996-12-31' + | ) as shipping + |group by + | supp_nation, + | cust_nation, + | l_year + |order by + | supp_nation, + | cust_nation, + | l_year + """.stripMargin + + def q7Tokens: QUERY_TYPE = Seq("between date '1995-01-01' and date '1996-12-31'", + "extract(year from ", + "[NATION1]", "[NATION2]") + + def q8: String = + s""" + |select + | o_year, + | sum(case + | when nation = '[NATION]' then volume + | else 0 + | end) / sum(volume) as mkt_share + |from ( + | select + | extract(year from o_orderdate) as o_year, + | l_extendedprice * (1-l_discount) as volume, + | n2.n_name as nation + | from + | part, + | supplier, + | lineitem, + | orders, + | customer, + | nation n1, + | nation n2, + | region + | where + | p_partkey = l_partkey + | and s_suppkey = l_suppkey + | and l_orderkey = o_orderkey + | and o_custkey = c_custkey + | and c_nationkey = n1.n_nationkey + | and n1.n_regionkey = r_regionkey + | and r_name = '[REGION]' + | and s_nationkey = n2.n_nationkey + | and o_orderdate between date '1995-01-01' and date '1996-12-31' + | and p_type = '[TYPE]' + | ) as all_nations + |group by + | o_year + |order by + | o_year + """.stripMargin + + def q8Tokens: QUERY_TYPE = Seq("between date '1995-01-01' and date '1996-12-31'", + "extract(year from ", + "[NATION]", "[REGION]", "[TYPE]") + + def q9: String = + s""" + |select + | nation, + | o_year, + | sum(amount) as sum_profit + |from ( + | select + | n_name as nation, + | extract(year from o_orderdate) as o_year, + | l_extendedprice * (1 - l_discount) - ps_supplycost * l_quantity as amount + | from + | part, + | supplier, + | lineitem, + | partsupp, + | orders, + | nation + | where + | s_suppkey = l_suppkey + | and ps_suppkey = l_suppkey + | and ps_partkey = l_partkey + | and p_partkey = l_partkey + | and o_orderkey = l_orderkey + | and s_nationkey = n_nationkey + | and p_name like '%[COLOR]%' + | ) as profit + |group by + | nation, + | o_year + |order by + | nation, + | o_year desc + """.stripMargin + + def q9Tokens: QUERY_TYPE = Seq("extract(year from ","[COLOR]") + + def q10: String = + s""" + |select + | c_custkey, + | c_name, + | sum(l_extendedprice * (1 - l_discount)) as revenue, + | c_acctbal, + | n_name, + | c_address, + | c_phone, + | c_comment + |from + | customer, + | orders, + | lineitem, + | nation + |where + | c_custkey = o_custkey + | and l_orderkey = o_orderkey + | and o_orderdate >= date '[DATE]' + | and o_orderdate < date '[DATE] ' + interval '3' month + | and l_returnflag = 'R' + | and c_nationkey = n_nationkey + |group by + | c_custkey, + | c_name, + | c_acctbal, + | c_phone, + | n_name, + | c_address, + | c_comment + |order by + | revenue desc + """.stripMargin + + def q10Tokens: QUERY_TYPE = Seq("date '[DATE]'", "date '[DATE] ' + interval '3' month") + + def q11: String = + s""" + |select + | ps_partkey, + | sum(ps_supplycost * ps_availqty) as value + |from + | partsupp, + | supplier, + | nation + |where + | ps_suppkey = s_suppkey + | and s_nationkey = n_nationkey + | and n_name = '[NATION]' + |group by + | ps_partkey having + | sum(ps_supplycost * ps_availqty) > ( + | select + | sum(ps_supplycost * ps_availqty) * [FRACTION] + | from + | partsupp, + | supplier, + | nation + | where + | ps_suppkey = s_suppkey + | and s_nationkey = n_nationkey + | and n_name = '[NATION]' + | ) + |order by + | value desc + """.stripMargin + + def q11Tokens: QUERY_TYPE = Seq("[NATION]", "[FRACTION]") + + def q12: String = + s""" + |select + | l_shipmode, + | sum(case + | when o_orderpriority ='1-URGENT' or o_orderpriority ='2-HIGH' + | then 1 + | else 0 + | end + | ) as high_line_count, + | sum(case + | when o_orderpriority <> '1-URGENT' and o_orderpriority <> '2-HIGH' + | then 1 + | else 0 + | end + | ) as low_line_count + |from + | orders, + | lineitem + |where + | o_orderkey = l_orderkey + | and l_shipmode in ('[SHIPMODE1]', '[SHIPMODE2]') + | and l_commitdate < l_receiptdate + | and l_shipdate < l_commitdate + | and l_receiptdate >= date '[DATE]' + | and l_receiptdate < date '[DATE] ' + interval '1' year + |group by + | l_shipmode + |order by + | l_shipmode + """.stripMargin + + def q12Tokens: QUERY_TYPE = Seq("[SHIPMODE1]", "[SHIPMODE2]", "date '[DATE]'", + "date '[DATE] ' + interval '1' year") + + def q13: String = + s""" + |select + | c_count, + | count(*) as custdist + |from ( + | select + | c_custkey, + | count(o_orderkey) as c_count + | from + | customer left outer join orders on + | c_custkey = o_custkey + | and o_comment not like '%[WORD1]%[WORD2]%' + | group by + | c_custkey + | ) as c_orders + |group by + | c_count + |order by + | custdist desc, + | c_count desc + """.stripMargin + + def q13Tokens: QUERY_TYPE = Seq("[WORD1]", "[WORD2]") + + def q14: String = + s""" + |select + | 100.00 * sum(case + | when p_type like 'PROMO%' + | then l_extendedprice*(1-l_discount) + | else 0 + | end + | ) / sum(l_extendedprice * (1 - l_discount)) as promo_revenue + |from + | lineitem, + | part + |where + | l_partkey = p_partkey + | and l_shipdate >= date '[DATE]' + | and l_shipdate < date '[DATE] ' + interval '1' month + """.stripMargin + + def q14Tokens: QUERY_TYPE = Seq("date '[DATE]'", "date '[DATE] ' + interval '1' month") + + def q15v: String = + s""" + |create view revenue[STREAM_ID] (supplier_no, total_revenue) as + | select + | l_suppkey as supplier_no, + | sum(l_extendedprice * (1 - l_discount)) as total_revenue + | from + | lineitem + | where + | l_shipdate >= date '[DATE]' + | and l_shipdate < date '[DATE] ' + interval '3' month + | group by + | l_suppkey + """.stripMargin + + def q15vTokens: QUERY_TYPE = Seq("[STREAM_ID]", "date '[DATE]'", + "date '[DATE] ' + interval '3' month") + + def q15: String = + s""" + |select + | s_suppkey, + | s_name, + | s_address, + | s_phone, + | total_revenue + |from + | supplier, + | revenue[STREAM_ID] + |where + | s_suppkey = supplier_no + | and total_revenue = ( + | select + | max(total_revenue) + | from + | revenue[STREAM_ID] + | ) + |order by + | s_suppkey + """.stripMargin + + def q15Tokens: QUERY_TYPE = Seq("[STREAM_ID]") + + def q16: String = + s""" + |select + | p_brand, + | p_type, + | p_size, + | count(distinct ps_suppkey) as supplier_cnt + |from + | partsupp, + | part + |where + | p_partkey = ps_partkey + | and p_brand <> '[BRAND]' + | and p_type not like '[TYPE]%' + | and p_size in ([SIZE1], [SIZE2], [SIZE3], [SIZE4], [SIZE5], [SIZE6], [SIZE7], [SIZE8]) + | and ps_suppkey not in ( + | select + | s_suppkey + | from + | supplier + | where + | s_comment like '%Customer%Complaints%' + | ) + |group by + | p_brand, + | p_type, + | p_size + |order by + | supplier_cnt desc, + | p_brand, + | p_type, + | p_size + """.stripMargin + + def q16Tokens: QUERY_TYPE = Seq("[BRAND]", "[TYPE]", "[SIZE1]", "[SIZE2]", "[SIZE3]", "[SIZE4]", + "[SIZE5]", "[SIZE6]", "[SIZE7]", "[SIZE8]") + + def q17: String = + s""" + |select + | sum(l_extendedprice) / 7.0 as avg_yearly + | from + | lineitem, + | part + |where + | p_partkey = l_partkey + | and p_brand = '[BRAND]' + | and p_container = '[CONTAINER]' + | and l_quantity < ( + | select + | 0.2 * avg(l_quantity) + | from + | lineitem + | where + | l_partkey = p_partkey + | ) + """.stripMargin + + def q17Tokens: QUERY_TYPE = Seq("[BRAND]", "[CONTAINER]") + + def q18: String = + s""" + |select + | c_name, + | c_custkey, + | o_orderkey, + | o_orderdate, + | o_totalprice, + | sum(l_quantity) + |from + | customer, + | orders, + | lineitem + |where + | o_orderkey in ( + | select + | l_orderkey + | from + | lineitem + | group by + | l_orderkey + | having + | sum(l_quantity) > [QUANTITY] + | ) + | and c_custkey = o_custkey + | and o_orderkey = l_orderkey + |group by + | c_name, + | c_custkey, + | o_orderkey, + | o_orderdate, + | o_totalprice + |order by + | o_totalprice desc, + | o_orderdate + """.stripMargin + + def q18Tokens: QUERY_TYPE = Seq("[QUANTITY]") + + def q19: String = + s""" + |select + | sum(l_extendedprice * (1 - l_discount) ) as revenue + |from + | lineitem, + | part + |where + | ( + | p_partkey = l_partkey + | and p_brand = '[BRAND1]' + | and p_container in ( 'SM CASE', 'SM BOX', 'SM PACK', 'SM PKG') + | and l_quantity >= [QUANTITY1] and l_quantity <= [QUANTITY1] + 10 + | and p_size between 1 and 5 + | and l_shipmode in ('AIR', 'AIR REG') + | and l_shipinstruct = 'DELIVER IN PERSON' + | ) + | or + | ( + | p_partkey = l_partkey + | and p_brand = '[BRAND2]' + | and p_container in ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK') + | and l_quantity >= [QUANTITY2] and l_quantity <= [QUANTITY2] + 10 + | and p_size between 1 and 10 + | and l_shipmode in ('AIR', 'AIR REG') + | and l_shipinstruct = 'DELIVER IN PERSON' + | ) + | or + | ( + | p_partkey = l_partkey + | and p_brand = '[BRAND3]' + | and p_container in ( 'LG CASE', 'LG BOX', 'LG PACK', 'LG PKG') + | and l_quantity >= [QUANTITY3] and l_quantity <= [QUANTITY3] + 10 + | and p_size between 1 and 15 + | and l_shipmode in ('AIR', 'AIR REG') + | and l_shipinstruct = 'DELIVER IN PERSON' + | ) + """.stripMargin + + def q19Tokens: QUERY_TYPE = Seq("[BRAND1]", "[QUANTITY1]", + "[BRAND2]", "[QUANTITY2]", + "[BRAND3]", "[QUANTITY3]") + + def q20: String = + s""" + |select + | s_name, + | s_address + |from + | supplier, nation + |where + | s_suppkey in ( + | select + | ps_suppkey + | from + | partsupp + | where + | ps_partkey in ( + | select + | p_partkey + | from + | part + | where + | p_name like '[COLOR]%' + | ) + | and ps_availqty > ( + | select + | 0.5 * sum(l_quantity) + | from + | lineitem + | where + | l_partkey = ps_partkey + | and l_suppkey = ps_suppkey + | and l_shipdate >= date '[DATE]' + | and l_shipdate < date '[DATE] ' + interval '1' year + | ) + | ) + | and s_nationkey = n_nationkey + | and n_name = '[NATION]' + |order by + | s_name + """.stripMargin + + def q20Tokens: QUERY_TYPE = Seq("[COLOR]", "date '[DATE]'", + "date '[DATE] ' + interval '1' year", "[NATION]") + + def q21: String = + s""" + |select + | s_name, + | count(*) as numwait + |from + | supplier, + | lineitem l1, + | orders, + | nation + |where + | s_suppkey = l1.l_suppkey + | and o_orderkey = l1.l_orderkey + | and o_orderstatus = 'F' + | and l1.l_receiptdate > l1.l_commitdate + | and exists ( + | select + | * + | from + | lineitem l2 + | where + | l2.l_orderkey = l1.l_orderkey + | and l2.l_suppkey <> l1.l_suppkey + | ) + | and not exists ( + | select + | * + | from + | lineitem l3 + | where + | l3.l_orderkey = l1.l_orderkey + | and l3.l_suppkey <> l1.l_suppkey + | and l3.l_receiptdate > l3.l_commitdate + | ) + | and s_nationkey = n_nationkey + | and n_name = '[NATION]' + |group by + | s_name + |order by + | numwait desc, + | s_name + """.stripMargin + + def q21Tokens: QUERY_TYPE = Seq("[NATION]") + + def q22: String = + s""" + |select + | cntrycode, + | count(*) as numcust, + | sum(c_acctbal) as totacctbal + |from ( + | select + | substring(c_phone from 1 for 2) as cntrycode, + | c_acctbal + | from + | customer + | where + | substring(c_phone from 1 for 2) in + | ('[I1]','[I2]','[I3]','[I4]','[I5]','[I6]','[I7]') + | and c_acctbal > ( + | select + | avg(c_acctbal) + | from + | customer + | where + | c_acctbal > 0.00 + | and substring(c_phone from 1 for 2) in + | ('[I1]','[I2]','[I3]','[I4]','[I5]','[I6]','[I7]') + | ) + | and not exists ( + | select + | * + | from + | orders + | where + | o_custkey = c_custkey + | ) + | ) as custsale + |group by + | cntrycode + |order by + | cntrycode + """.stripMargin + + def q22Tokens: QUERY_TYPE = Seq("substring(c_phone from 1 for 2)", "[I1]", "[I2]", "[I3]", + "[I4]", "[I5]", "[I6]", "[I7]") + + +} + +trait Adapter extends TPCHBase { + protected val qNumPattern = "(\\d+)(.*)".r + val viewName = "(?s)\\W+(create\\s+view\\s+)(\\w+)(.*?as)(.*)".r + + protected val defaults = Map( + " 1 0 " -> "90", + " 2 0 " -> "24", + " 2 1 " -> "STEEL", + " 2 2 " -> "ASIA", + " 3 0 " -> "BUILDING", + " 3 1 " -> "1995-03-15", + " 4 0 " -> "1993-07-01", + " 4 1 " -> "1993-07-01", + " 5 0 " -> "ASIA", + " 5 1 " -> "1994-01-01", + " 5 2 " -> "1994-01-01", + " 6 0 " -> "0.06", + " 6 1 " -> "1994-01-01", + " 6 2 " -> "1994-01-01", + " 6 3 " -> "24", + " 7 0 " -> "<<>>", + " 7 1 " -> "<<>>", + " 7 2 " -> "FRANCE", + " 7 3 " -> "GERMANY", + " 8 0 " -> "<<>>", + " 8 1 " -> "<<>>", + " 8 2 " -> "BRAZIL", + " 8 3 " -> "AMERICA", + " 8 4 " -> "ECONOMY ANODIZED STEEL", + " 9 0 " -> "<<>>", + " 9 1 " -> "green", + "10 0 " -> "1993-10-01", + "10 1 " -> "1993-10-01", + "11 0 " -> "GERMANY", + "11 1 " -> "0.0001", + "12 0 " -> "MAIL", + "12 1 " -> "SHIP", + "12 2 " -> "1994-01-01", + "12 3 " -> "1994-01-01", + "13 0 " -> "special", + "13 1 " -> "requests", + "14 0 " -> "1995-09-01", + "14 1 " -> "1995-09-01", + "15 0 " -> "_1", + "15v 0 " -> "_1", + "15v 1 " -> "1996-01-01", + "15v 2 " -> "1996-01-01", + "16 0 " -> "Brand#45", + "16 1 " -> "MEDIUM POLISHED", + "16 2 " -> "49", + "16 3 " -> "14", + "16 4 " -> "23", + "16 5 " -> "45", + "16 6 " -> "19", + "16 7 " -> "3", + "16 8 " -> "36", + "16 9 " -> "9", + "17 0 " -> "Brand#23", + "17 1 " -> "SM PACK", + "18 0 " -> "300", + "19 0 " -> "Brand#12", + "19 1 " -> "1", + "19 2 " -> "Brand#23", + "19 3 " -> "10", + "19 4 " -> "Brand#34", + "19 5 " -> "20", + "20 0 " -> "khaki", + "20 1 " -> "1994-01-01", + "20 2 " -> "1994-01-01", + "20 3 " -> "CANADA", + "21 0 " -> "VIETNAM", + "22 0 " -> "<<<>>>", + "22 1 " -> "13", + "22 2 " -> "31", + "22 3 " -> "23", + "22 4 " -> "29", + "22 5 " -> "30", + "22 6 " -> "18", + "22 7 " -> "17", + "" -> "" + ) + + def replace(qNum: String, tokens: QUERY_TYPE, queryStr: String, args: String*): String +} + + +trait DynamicQueryGetter extends TPCHBase { + self: Adapter => + + lazy val valAdapter: Adapter = self + + final def deriveFromTokens(qNum: String, queryStr: String, + tokens: QUERY_TYPE, args: String*): String = { + + val newArgs = if (args.isEmpty) args else { + tokens.zipWithIndex.sliding(2).flatMap(_.toList + match { + case (l, i) :: (r, _) :: Nil + if l.indexOf("date '[DATE]'") >= 0 && r.indexOf("date '[DATE] '") >= 0 => + Seq(args(i), args(i)) + case (_, i) :: _ if i < args.length => + Seq(args(i)) + case _ => Nil + }).toList + } + + def sideBySide(left: Seq[String], right: Seq[String]): Seq[String] = { + val maxLeftSize = left.map(_.length).max + val leftPadded = left ++ Seq.fill(math.max(right.size - left.size, 0))(" ") + val rightPadded = right ++ Seq.fill(math.max(left.size - right.size, 0))(" ") + leftPadded.zip(rightPadded).map { + case (l, r) => l + (" " * ((maxLeftSize - l.length) + 3)) + r + } + } + + if (newArgs.nonEmpty && tokens.length != newArgs.length) { + val errorMessage = s"ERROR: Query $qNum has argument mismatch \n" + + s" ${sideBySide(tokens, newArgs).mkString("\n")} " + throw new Exception(errorMessage) + } + + valAdapter.replace(qNum, tokens, queryStr, newArgs: _*) + } + + /* + import scala.language.experimental.macros + def getFinalQueryString(qNum: Int, args: String*): String = macro TPCH.getQryStr_impl + */ + + def getFinalQueryString(qNum: String, args: String*): String = { + import scala.reflect.runtime.universe._ + val _this = runtimeMirror(this.getClass.getClassLoader).reflect(this) + val qry = _this.symbol.typeSignature.member(s"q${qNum}": TermName) + val toks = _this.symbol.typeSignature.member(s"q${qNum}Tokens": TermName) + + val qstr = _this.reflectMethod(qry.asMethod).apply() + val tokSeq = _this.reflectMethod(toks.asMethod).apply() + + deriveFromTokens(qNum, qstr.asInstanceOf[String], tokSeq.asInstanceOf[QUERY_TYPE], args: _*) + /* + val ret = _this.reflectMethod(fn.asMethod)(qNum, + qstr.asInstanceOf[String], tokSeq.asInstanceOf[QUERY_TYPE], + args) + + ret.asInstanceOf[String] + */ + } + + /* + def getFinalQueryString(qNum: Int, args: String*): String = { + import scala.reflect.runtime.universe._ + import scala.tools.reflect.ToolBox + + val tb = runtimeMirror(this.getClass.getClassLoader).mkToolBox() + val tokenTerm = typeOf[this.type].member(s"q${qNum}Tokens": TermName) + val queryTerm = typeOf[this.type].member(s"q${qNum}": TermName) + val deriveFromTokensTerm = typeOf[this.type].member("deriveFromTokens": TermName) + val self = q"this" + + val ast = + q""" + $self.$deriveFromTokensTerm($qNum, $self.$tokenTerm, $self.$queryTerm, ..${args}) + """ + val xx = showCode(ast) + + // scalastyle:off println + println(xx.toString) + /* + val compiled = tb.compile(tb.parse(s""" + $self.deriveFromTokensTerm($qNum, $self.q${qNum}Tokens, $self.q$qNum, ${args}) + """)) + */ + + val compiled = tb.compile(ast) + compiled().asInstanceOf[String] + /* + val dd = tb.compile( + q""" + $deriveFromTokensTerm($qNum, $tokenTerm, $queryTerm, ..${args}) + """) + dd().asInstanceOf[String] + */ + } + */ + +} + +/* +object TPCH { + import scala.reflect.macros.blackbox.Context + def getQryStr_impl(c: Context)(qNum: c.Expr[Int], args: c.Expr[String]*): c.Expr[String] = { + import c.universe._ + reify(s + | q${qNum}Tokens.zipWithIndex.foldLeft(q${qNum}) { case (src, (tok, i)) => + | replace($qNum, i, src, tok, ${args.tail: _*}) + | } + """) + } +} +*/ diff --git a/cluster/src/test/scala/io/snappydata/benchmark/snappy/TPCHPerfComparer.java b/cluster/src/test/scala/io/snappydata/benchmark/snappy/TPCHPerfComparer.java new file mode 100644 index 0000000000..d9de171a7a --- /dev/null +++ b/cluster/src/test/scala/io/snappydata/benchmark/snappy/TPCHPerfComparer.java @@ -0,0 +1,165 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ +package io.snappydata.benchmark.snappy; + +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.PrintStream; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.text.SimpleDateFormat; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Comparator; +import java.util.Date; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +public class TPCHPerfComparer { + + public static void main(String[] args) { + //read given directory + //create a list of map + //iterate over all the actual run directories : decide the sequence + //get the lead directory + //get Average.out file + //create a map of query Vs execution time + //add this map into above list + + //for each query iterate over list of map + // from map get the value + //it the value is present + //treat first value are base and divide subsequent values with this base value and plot values + + Path p = Paths.get(args[0]); + final int maxDepth = 5; + List errorList = new ArrayList(); + try { + SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH-mm-ss") ; + FileOutputStream reportOutputStream = new FileOutputStream(new File(p.toString(), "ComparisonReport_"+dateFormat.format(new Date())+".txt")); + PrintStream reportPrintStream = new PrintStream(reportOutputStream); + + Stream matches = Files.find(p, maxDepth, (path, attr) -> path.getFileName().toString().equals("1_Snappy_Average.out")); + List files = matches.collect(Collectors.toList()); + + Collections.sort(files, new Comparator() { + public int compare(Path o1, Path o2) { + try { + return Files.getLastModifiedTime(o1).compareTo(Files.getLastModifiedTime(o2)); + } catch (IOException e) { + e.printStackTrace(); + } + return 0; + } + }); + + Stream sortedPaths = files.stream(); + AtomicInteger atomicCount = new AtomicInteger(0); + System.out.println("---------------------------------------------------------------------------------------"); + reportPrintStream.println("--------------------------------------------------------------------------------"); + reportPrintStream.println(" Comparison Report "); + reportPrintStream.println(" "+new Date()+" "); + reportPrintStream.println("--------------------------------------------------------------------------------"); + Stream> averages = sortedPaths.map(path -> + { + int folderCount = atomicCount.incrementAndGet(); + Map perfMap = new HashMap(); + System.out.println("#"+folderCount + " : " + path.getParent().getParent().getParent()); + reportPrintStream.println("#"+folderCount + " : " + path.getParent().getParent().getParent()); + //reportPrintStream.println(atomicCount.incrementAndGet() + " : " + path.getParent().getParent().getParent()); + try { + Files.lines(path).map(line->line.split(",")).forEach(element -> perfMap.put(Integer.parseInt(element[0]), Double.parseDouble(element[1]))); + } catch (Exception e) { + e.printStackTrace(); + } + return perfMap; + }); + + List> averageList = averages.collect(Collectors.toList()); + System.out.println("---------------------------------------------------------------------------------------"); + reportPrintStream.println("--------------------------------------------------------------------------------"); + + System.out.print("Query"); + reportPrintStream.print("Query"); + for(int j = 0 ; j < atomicCount.get(); j++) { + System.out.print(" #" + j); + reportPrintStream.print(" #" + j); + } + System.out.println(); + reportPrintStream.println(); + + for(int i=1; i < 23; i++){ + System.out.print(i < 10 ? " 0" + i : " "+i); + reportPrintStream.print(i < 10 ? " 0" + i : " "+i); + System.out.print(" "); + reportPrintStream.print(" "); + int count = 0; + Double firstValue = 0.0; + int whichRevPerfDown = 0; + for (Map singleMap : averageList) { + if (count == 0) { + if(singleMap.get(i) != null) { + firstValue = singleMap.get(i); + System.out.print("---"); + reportPrintStream.print("---"); + //System.out.print(firstValue); + count++; + }else{ + System.out.print(" "); + } + } else { + if (singleMap.get(i) != null) { + double secondValue = singleMap.get(i); + double perf = (firstValue-secondValue)/secondValue; + perf = perf > 0 ? 1 + perf : -1 + perf; + if(perf > 0){ + System.out.print(" "); + reportPrintStream.print(" "); + }else{ + System.out.print(" "); + reportPrintStream.print(" "); + if(perf < -1.10){ + errorList.add("For Query "+ i + ", It is observed that revision #" + whichRevPerfDown +"'s performance degraded by "+ perf); + } + } + System.out.printf("%.2f",perf); + reportPrintStream.printf("%.2f",perf); + } + } + whichRevPerfDown++; + } + System.out.println(""); + reportPrintStream.println(""); + } + reportPrintStream.println("------------------------------------------------------------------------------------"); + for(String error : errorList){ + System.out.println(error); + reportPrintStream.println(error); + } + } catch (IOException e) { + e.printStackTrace(); + } + assert errorList.isEmpty(): "Performance degradation is observed for TPCH queries. Please have a look at ComparisonReport.txt"; + } +} + diff --git a/cluster/src/test/scala/io/snappydata/benchmark/snappy/tpcds/QueryExecutionJob.scala b/cluster/src/test/scala/io/snappydata/benchmark/snappy/tpcds/QueryExecutionJob.scala new file mode 100644 index 0000000000..9caaa9dea1 --- /dev/null +++ b/cluster/src/test/scala/io/snappydata/benchmark/snappy/tpcds/QueryExecutionJob.scala @@ -0,0 +1,163 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ +package io.snappydata.benchmark.snappy.tpcds + +import java.io.{File, FileOutputStream, PrintStream} + +import com.typesafe.config.Config +import io.snappydata.benchmark.snappy.tpch.QueryExecutor + +import org.apache.spark.Logging +import org.apache.spark.sql._ +import org.apache.spark.sql.catalyst.util.fileToString + +object QueryExecutionJob extends SnappySQLJob with Logging { + var sqlSparkProperties: Array[String] = _ + var queries: Array[String] = _ + var queryPath: String = _ + var isResultCollection: Boolean = _ + var warmUp: Integer = _ + var runsForAverage: Integer = _ + + def runSnappyJob(snSession: SnappySession, jobConfig: Config): Any = { + val snc = snSession.sqlContext + + for (prop <- sqlSparkProperties) { + snc.sql(s"set $prop") + } + + val avgFileStream: FileOutputStream = new FileOutputStream( + new File(s"Snappy_Average.out")) + val avgPrintStream: PrintStream = new PrintStream(avgFileStream) + + queries.foreach { name => + try { + + val path: String = s"$queryPath/$name.sql" + val queryString = fileToString(new File(path)) + + val queryFileName = s"$name.out" + + val queryFileStream: FileOutputStream = new FileOutputStream(new File(queryFileName)) + val queryPrintStream: PrintStream = new PrintStream(queryFileStream) + + var totalTime: Long = 0 + + // scalastyle:off println + // println("Query : " + queryString) + + if (isResultCollection) { + // queryPrintStream.println(queryToBeExecuted) + val (resultSet, _) = QueryExecutor.queryExecution(name, queryString, snSession.sqlContext, + genPlan = true) + println(s"$name : ${resultSet.length}") + + for (row <- resultSet) { + queryPrintStream.println(row.toSeq.map { + case d: Double => "%18.4f".format(d).trim() + case v => v + }.mkString(",")) + } + println(s"$name Result Collected in file $queryFileName") + } else { + for (i <- 1 to (warmUp + runsForAverage)) { + // queryPrintStream.println(queryToBeExecuted) + val startTime = System.currentTimeMillis() + var cnts: Array[Row] = null + if (i == 1) { + cnts = QueryExecutor.queryExecution(name, queryString, snSession.sqlContext, + genPlan = true)._1 + } else { + cnts = QueryExecutor.queryExecution(name, queryString, snSession.sqlContext)._1 + } + for (_ <- cnts) { + // just iterating over result + } + val endTime = System.currentTimeMillis() + val iterationTime = endTime - startTime + // scalastyle:off println + queryPrintStream.println(s"$iterationTime") + + if (i > warmUp) { + totalTime += iterationTime + } + cnts = null + } + } + + // scalastyle:off println + // println(s"${totalTime / runsForAverage}") + println("-----------------------------------------------") + queryPrintStream.println(s"${totalTime / runsForAverage}") + avgPrintStream.println(s"$name, executionTime = ${totalTime / runsForAverage}") + println("-----------------------------------------------") + + } + catch { + case e: Exception => println(s"Failed $name") + logError("Exception in job", e) + } + } + avgPrintStream.close() + avgFileStream.close() + QueryExecutor.close + } + + override def isValidJob(snSession: SnappySession, config: Config): SnappyJobValidation = { + + val sqlSparkProps = if (config.hasPath("sparkSqlProps")) { + config.getString("sparkSqlProps") + } + else " " + sqlSparkProperties = sqlSparkProps.split(",") + + val tempqueries = if (config.hasPath("queries")) { + config.getString("queries") + } else { + return SnappyJobInvalid("Specify Query number to be executed") + } + // scalastyle:off println + println(s"tempqueries : $tempqueries") + queries = tempqueries.split(",") + + queryPath = if (config.hasPath("queryPath")) { + config.getString("queryPath") + } else { + "" + } + + isResultCollection = if (config.hasPath("resultCollection")) { + config.getBoolean("resultCollection") + } else { + return SnappyJobInvalid("Specify whether to to collect results") + } + + warmUp = if (config.hasPath("warmUpIterations")) { + config.getInt("warmUpIterations") + } else { + return SnappyJobInvalid("Specify number of warmup iterations ") + } + runsForAverage = if (config.hasPath("actualRuns")) { + config.getInt("actualRuns") + } else { + return SnappyJobInvalid("Specify number of iterations of which average result is " + + "calculated") + } + + SnappyJobValid() + } +} diff --git a/cluster/src/test/scala/io/snappydata/benchmark/snappy/tpcds/SparkApp.scala b/cluster/src/test/scala/io/snappydata/benchmark/snappy/tpcds/SparkApp.scala new file mode 100644 index 0000000000..16fec34fed --- /dev/null +++ b/cluster/src/test/scala/io/snappydata/benchmark/snappy/tpcds/SparkApp.scala @@ -0,0 +1,218 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ +package io.snappydata.benchmark.snappy.tpcds + +import java.io.{File, FileOutputStream, PrintStream} + +import io.snappydata.benchmark.snappy.tpch.QueryExecutor + +import org.apache.spark.sql.catalyst.util.fileToString +import org.apache.spark.sql.{Row, SparkSession} + +object SparkApp { + + def main(args: Array[String]) { + + val sc: SparkSession = SparkSession + .builder + .appName("TPCDS_Spark") + .getOrCreate() + + for(arg <- args){ + println(arg) + } + + val sparkSqlProps = args(0).split(",") + val dataLocation = args(1) + val queries = args(2).split(",").toSeq + val queryPath = args(3) + var buckets_ColumnTable = args(4).toInt + var isResultCollection = args(5).toBoolean + var warmUp = args(6).toInt + var runsForAverage = args(7).toInt + + for (prop <- sparkSqlProps) { + // scalastyle:off println + println(prop) + sc.sql(s"set $prop") + } + + val snc = sc.sqlContext + + for (prop <- sparkSqlProps) { + snc.sql(s"set $prop") + } + + // scalastyle:off println + println(s"****************queries : $queries") + // scalastyle:on println + + /*catalog_page", "catalog_returns", "customer", "customer_address", + "customer_demographics", "date_dim", "household_demographics", "inventory", "item", + "promotion", "store", "store_returns", "catalog_sales", "web_sales", "store_sales", + "web_returns", "web_site", "reason", "call_center", "warehouse", "ship_mode", "income_band", + "time_dim", "web_page"*/ + + val tables = Seq("call_center", "catalog_page", "date_dim", "household_demographics", + "income_band", "promotion", "reason", "ship_mode", "store", "time_dim", + "warehouse", "web_page" , "web_site", "item", "customer_demographics") + + tables.map { tableName => + + sc.read.parquet(s"$dataLocation/$tableName").createOrReplaceTempView(tableName) + snc.cacheTable(tableName) + val count = sc.table(tableName).count() + tableName -> count + + // scalastyle:off println + println("-----------------------------------------------") + println(s"Table Created...$tableName with rows $count") + println("-----------------------------------------------") + } + + /* + catalog_returns cr1.cr_order_number **cr_order_number** + catalog_sales (cs1.cs_order_number, cs_item_sk, cs_bill_customer_sk) **cs_order_number** + customer (c_customer_sk , c_current_addr_sk) **c_customer_sk** + customer_addr (ca_address_sk) **ca_address_sk** + inventory (i_item_sk) **i_item_sk** + store_returns (sr_item_sk, sr_customer_sk) **sr_item_k** + store_sales (customer_sk, address_sk, ss_item_sk) **ss_item_k** + web_returns wr.wr_order_number **wr_order_number** + web_sales (customer_sk, ws_bill_customer_sk , ws.ws_order_number) **.ws_order_number*** + */ + + var partitionBy : String = "cr_order_number" + var tableName : String = "catalog_returns" + createPartitionedTables(sc, dataLocation, partitionBy, tableName, buckets_ColumnTable) + + partitionBy = "cs_order_number" + tableName = "catalog_sales" + createPartitionedTables(sc, dataLocation, partitionBy, tableName, buckets_ColumnTable) + + partitionBy = "c_customer_sk" + tableName = "customer" + createPartitionedTables(sc, dataLocation, partitionBy, tableName, buckets_ColumnTable) + + partitionBy = "ca_address_sk" + tableName = "customer_address" + createPartitionedTables(sc, dataLocation, partitionBy, tableName, buckets_ColumnTable) + + partitionBy = "inv_item_sk" + tableName = "inventory" + createPartitionedTables(sc, dataLocation, partitionBy, tableName, buckets_ColumnTable) + + partitionBy = "sr_item_sk" + tableName = "store_returns" + createPartitionedTables(sc, dataLocation, partitionBy, tableName, buckets_ColumnTable) + + partitionBy = "ss_item_sk" + tableName = "store_sales" + createPartitionedTables(sc, dataLocation, partitionBy, tableName, buckets_ColumnTable) + + partitionBy = "wr_order_number" + tableName = "web_returns" + createPartitionedTables(sc, dataLocation, partitionBy, tableName, buckets_ColumnTable) + + partitionBy = "ws_order_number" + tableName = "web_sales" + createPartitionedTables(sc, dataLocation, partitionBy, tableName, buckets_ColumnTable) + + + var avgFileStream: FileOutputStream = new FileOutputStream( + new File(s"Spark_Average.out")) + var avgPrintStream: PrintStream = new PrintStream(avgFileStream) + + queries.foreach { name => + + try { + + val path: String = s"$queryPath/$name.sql" + val queryString = fileToString(new File(path)) + + var totalTime: Long = 0 + + // scalastyle:off println + println(s"Running Query $name now.") + + for (i <- 1 to (warmUp + runsForAverage)) { + // queryPrintStream.println(queryToBeExecuted) + val startTime = System.currentTimeMillis() + var cnts: Array[Row] = null + if (i == 1) { + QueryExecutor.planPrintStream = avgPrintStream + cnts = QueryExecutor.queryExecution(name, queryString, sc.sqlContext, false)._1 + } else { + cnts = QueryExecutor.queryExecution(name, queryString, sc.sqlContext)._1 + } + for (s <- cnts) { + // just iterating over result + } + val endTime = System.currentTimeMillis() + val iterationTime = endTime - startTime + + // scalastyle:off println + println(s"iterationTime = $iterationTime") + + if (i > warmUp) { + totalTime += iterationTime + } + cnts = null + } + + // scalastyle:off println + //println(s"${totalTime / runsForAverage}") + println("-----------------------------------------------") + avgPrintStream.println(s"$name, executionTime = ${totalTime / runsForAverage}") + println("-----------------------------------------------") + + } + catch { + case e: Exception => println(s"Failed $name " + e.printStackTrace()) + } + } + +// try { +// Thread.sleep(Long.MaxValue) +// } +// catch { +// case _ => +// } + //TPCDSQuerySnappyBenchmark.snappy = snSession + //TPCDSQuerySnappyBenchmark.execute(dataLocation, queries, true, queryPath) + } + + + def createPartitionedTables(sc: SparkSession, dataLocation: String, + partitionBy: String , tableName: String, buckets: Int): Unit = { + val df = sc.sqlContext.read.parquet(s"$dataLocation/$tableName") + df.createOrReplaceTempView(tableName) + df.repartition(buckets, df(partitionBy)).createOrReplaceTempView(tableName) + df.createOrReplaceTempView(tableName) + sc.sqlContext.cacheTable(tableName) + // tableName -> sc.table(tableName).count() + val count = sc.table(tableName).count() + + // scalastyle:off println + println("-----------------------------------------------") + println(s"Table Created...$tableName with row $count") + println("-----------------------------------------------") + } + + //TPCDSQuerySnappyBenchmark.spark = sc + //TPCDSQuerySnappyBenchmark.execute(dataLocation, queries, false, queryPath) +} diff --git a/cluster/src/test/scala/io/snappydata/benchmark/snappy/tpcds/TableCreationJob.scala b/cluster/src/test/scala/io/snappydata/benchmark/snappy/tpcds/TableCreationJob.scala new file mode 100644 index 0000000000..3c84e518a8 --- /dev/null +++ b/cluster/src/test/scala/io/snappydata/benchmark/snappy/tpcds/TableCreationJob.scala @@ -0,0 +1,134 @@ +package io.snappydata.benchmark.snappy.tpcds + +import java.io.{File, FileOutputStream, PrintStream} + +import com.typesafe.config.Config +import io.snappydata.benchmark.{TPCHColumnPartitionedTable, TPCHReplicatedTable} + +import org.apache.spark.sql.types.StructType +import org.apache.spark.sql.{SnappyJobValid, SnappyJobValidation, SnappySQLJob, SnappySession} + +object TableCreationJob extends SnappySQLJob{ + + var sqlSparkProperties: Array[String] = _ + var dataLocation: String = _ + var buckets_ColumnTable: String = _ + + override def runSnappyJob(snSession: SnappySession, jobConfig: Config): Any = { + val snc = snSession.sqlContext + snc.sparkContext.hadoopConfiguration.set("fs.s3a.connection.maximum", "1000"); + val isSnappy = true + + val loadPerfFileStream: FileOutputStream = new FileOutputStream(new File("Snappy_LoadPerf.out")) + val loadPerfPrintStream: PrintStream = new PrintStream(loadPerfFileStream) + + for (prop <- sqlSparkProperties) { + snc.sql(s"set $prop") + } + + val tables = Seq("call_center", "catalog_page", "date_dim", "household_demographics", + "income_band", "promotion", "reason", "ship_mode", "store", "time_dim", + "warehouse", "web_page" , "web_site", "item", "customer_demographics") + + tables.map { tableName => + //println(s"Table Creation Started...$tableName") + val df = snSession.read.parquet(s"$dataLocation/$tableName") + snSession.createTable(tableName, "row", + new StructType(df.schema.map(_.copy(nullable = true)).toArray), + Map[String, String] ()) + df.write.insertInto(tableName) + val cnt = df.collect().length; + // scalastyle:off println + println("-----------------------------------------------") + println(s"Table Created...$tableName with rows $cnt") + + println("-----------------------------------------------") + } + + + var props = Map(("PARTITION_BY" -> "cr_order_number"), ("BUCKETS" -> buckets_ColumnTable)) + var tableName = "catalog_returns" + createColumnPartitionedTables(snSession, props, tableName) + + props = Map(("PARTITION_BY" -> "cs_order_number"), ("BUCKETS" -> buckets_ColumnTable), + ("COLOCATE_WITH" -> "CATALOG_RETURNS")) + tableName = "catalog_sales" + createColumnPartitionedTables(snSession, props, tableName) + + props = Map(("PARTITION_BY" -> "wr_order_number"), ("BUCKETS" -> buckets_ColumnTable), + ("COLOCATE_WITH" -> "CATALOG_SALES")) + tableName = "web_returns" + createColumnPartitionedTables(snSession, props, tableName) + + props = Map(("PARTITION_BY" -> "ws_order_number"), ("BUCKETS" -> buckets_ColumnTable), + ("COLOCATE_WITH" -> "WEB_RETURNS")) + tableName = "web_sales" + createColumnPartitionedTables(snSession, props, tableName) + + + props = Map(("PARTITION_BY" -> "inv_item_sk"), ("BUCKETS" -> buckets_ColumnTable)) + tableName = "inventory" + createColumnPartitionedTables(snSession, props, tableName) + + props = Map(("PARTITION_BY" -> "sr_item_sk"), ("BUCKETS" -> buckets_ColumnTable), + ("COLOCATE_WITH" -> "INVENTORY")) + tableName = "store_returns" + createColumnPartitionedTables(snSession, props, tableName) + + props = Map(("PARTITION_BY" -> "ss_item_sk"), ("BUCKETS" -> buckets_ColumnTable), + ("COLOCATE_WITH" -> "STORE_RETURNS")) + tableName = "store_sales" + createColumnPartitionedTables(snSession, props, tableName) + + + props = Map(("PARTITION_BY" -> "c_customer_sk"), ("BUCKETS" -> buckets_ColumnTable)) + tableName = "customer" + createColumnPartitionedTables(snSession, props, tableName) + + props = Map(("PARTITION_BY" -> "ca_address_sk"), ("BUCKETS" -> buckets_ColumnTable)) + tableName = "customer_address" + createColumnPartitionedTables(snSession, props, tableName) + + val avgFileStream: FileOutputStream = new FileOutputStream( + new File(s"Snappy_Average.out")) + val avgPrintStream: PrintStream = new PrintStream(avgFileStream) + + } + + def createColumnPartitionedTables(snappy: SnappySession, + props: Map[String,String] , tableName: String): Unit = { + + val df = snappy.read.parquet(s"$dataLocation/$tableName") + snappy.createTable(tableName, "column", + new StructType(df.schema.map(_.copy(nullable = false)).toArray), props) + df.write.insertInto(tableName) + val cnt = df.collect().length + // scalastyle:off println + println("-----------------------------------------------") + println(s"Table Created...$tableName with rows $cnt") + println("-----------------------------------------------") + } + + override def isValidJob(snSession: SnappySession, config: Config): SnappyJobValidation = { + + val sqlSparkProps = if (config.hasPath("sparkSqlProps")) { + config.getString("sparkSqlProps") + } + else " " + sqlSparkProperties = sqlSparkProps.split(",") + + dataLocation = if (config.hasPath("dataDir")) { + config.getString("dataDir") + } else { + "/QASNAPPY/TPCH/DATA/1" + } + + buckets_ColumnTable = if (config.hasPath("Buckets_ColumnTable")) { + config.getString("Buckets_ColumnTable") + } else { + "8" + } + + SnappyJobValid() + } +} diff --git a/cluster/src/test/scala/io/snappydata/benchmark/snappy/tpch/DataValidationJob.scala b/cluster/src/test/scala/io/snappydata/benchmark/snappy/tpch/DataValidationJob.scala new file mode 100644 index 0000000000..375778f674 --- /dev/null +++ b/cluster/src/test/scala/io/snappydata/benchmark/snappy/tpch/DataValidationJob.scala @@ -0,0 +1,158 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ + +package io.snappydata.benchmark.snappy.tpch + +import java.io.{File, FileOutputStream, PrintStream} + +import com.typesafe.config.Config + +import org.apache.spark.sql.TPCHUtils.{getClass, logWarning} +import org.apache.spark.{SparkConf, SparkContext} +import org.apache.spark.sql.{SnappyContext, SnappyJobInvalid, SnappyJobValid, SnappyJobValidation, SnappySQLJob, SnappySession} + +/** + * Created by kishor on 13/4/17. + */ +object DataValidationJob extends SnappySQLJob { + + var queries: Array[String] = _ + var isDynamic: Boolean = _ + var isResultCollection: Boolean = _ + var isSnappy: Boolean = true + var warmUp: Integer = _ + var runsForAverage: Integer = _ + var expectedResultsAvailableAt: String = _ + var actualResultsAvailableAt: String = _ + + + override def runSnappyJob(snSession: SnappySession, jobConfig: Config): Any = { + val snc = snSession.sqlContext + val sc = snSession.sparkContext + + val fineName = if (!isDynamic) { + if (isSnappy) "ResultValidattion_Snappy.out" else "ResultValidattion__Spark.out" + } else { + "ResultValidattion_Snappy_Tokenization.out" + } + + val resultFileStream: FileOutputStream = new FileOutputStream(new File(fineName)) + val resultOutputStream: PrintStream = new PrintStream(resultFileStream) + + // scalastyle:off + for (query <- queries) { + println(s"For Query $query") + + if (!isDynamic) { + val expectedFile = sc.textFile(s"file://$expectedResultsAvailableAt/1_Spark_$query.out") + + val actualFile = sc.textFile(s"file://$actualResultsAvailableAt/1_Snappy_$query.out") + + val expectedLineSet = expectedFile.collect().toList.sorted + val actualLineSet = actualFile.collect().toList.sorted + + if (!actualLineSet.equals(expectedLineSet)) { + if (!(expectedLineSet.size == actualLineSet.size)) { + resultOutputStream.println(s"For $query " + + s"result count mismatched observed with " + + s"expected ${expectedLineSet.size} and actual ${actualLineSet.size}") + } else { + for ((expectedLine, actualLine) <- expectedLineSet zip actualLineSet) { + if (!expectedLine.equals(actualLine)) { + resultOutputStream.println(s"For $query result mismatched observed") + resultOutputStream.println(s"Expected : $expectedLine") + resultOutputStream.println(s"Found : $actualLine") + resultOutputStream.println(s"-------------------------------------") + } + } + } + } + } else { + val firstRunFileName = s"Snappy_${query}_FirstRun.out" + val firstRunFile = sc.textFile(firstRunFileName) + + val secondRunFileName = s"Snappy_${query}_SecondRun.out" + val secondRunFile = sc.textFile(secondRunFileName) + + val expectedLineSet = firstRunFile.collect().toList.sorted + val actualLineSet = secondRunFile.collect().toList.sorted + + if (actualLineSet.equals(expectedLineSet)) { + resultOutputStream.println(s"For $query result matched observed") + resultOutputStream.println(s"-------------------------------------") + } + } + } + // scalastyle:on + resultOutputStream.close() + resultFileStream.close() + + val resultOutputFile = sc.textFile(fineName) + + if(!isDynamic) { + assert(resultOutputFile.count() == 0, + s"Query result mismatch Observed. Look at Result_Snappy.out for detailed failure") + /* if (resultOutputFile.count() != 0) { + logWarning( + s"QUERY RESULT MISMATCH OBSERVED. Look at Result_Snappy.out for detailed failure") + } */ + } else { + assert(resultOutputFile.count() == 0, + s"Query result match Observed. Look at Result_Snappy_Tokenization.out for detailed failure") + /* if (resultOutputFile.count() != 0) { + logWarning( + s"QUERY RESYLT MATCH OBSERVED. Look at Result_Snappy_Tokenization.out for detailed" + + s" failure") + } */ + } + } + + + override def isValidJob(snSession: SnappySession, config: Config): SnappyJobValidation = { + + val tempqueries = if (config.hasPath("queries")) { + config.getString("queries") + } else { + return SnappyJobInvalid("Specify Query number to be executed") + } + + // scalastyle:off println + println(s"tempqueries : $tempqueries") + queries = tempqueries.split("-") + + isDynamic = if (config.hasPath("isDynamic")) { + config.getBoolean("isDynamic") + } else { + return SnappyJobInvalid("Specify whether to use dynamic paramters") + } + + expectedResultsAvailableAt = if (config.hasPath("ExpectedResultsAvailableAt")) { + config.getString("ExpectedResultsAvailableAt") + } else { + return SnappyJobInvalid("Specify ExpectedResultsAvailableAt") + } + + actualResultsAvailableAt = if (config.hasPath("ActualResultsAvailableAt")) { + config.getString("ActualResultsAvailableAt") + } else { + return SnappyJobInvalid("Specify ActualResultsAvailableAt") + } + + SnappyJobValid() + } +} + diff --git a/cluster/src/test/scala/io/snappydata/benchmark/snappy/tpch/QueryExecutionJdbc.scala b/cluster/src/test/scala/io/snappydata/benchmark/snappy/tpch/QueryExecutionJdbc.scala new file mode 100644 index 0000000000..e2cc0318f7 --- /dev/null +++ b/cluster/src/test/scala/io/snappydata/benchmark/snappy/tpch/QueryExecutionJdbc.scala @@ -0,0 +1,276 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ +package io.snappydata.benchmark.snappy.tpch + +import java.io.{File, FileOutputStream, PrintStream} +import java.sql.{DriverManager, PreparedStatement} + +import io.snappydata.benchmark.TPCH_Queries + +object QueryExecutionJdbc { + + def main(args: Array[String]) { + + val avgFileStream: FileOutputStream = new FileOutputStream(new File(s"Snappy_Average.out")) + val avgPrintStream: PrintStream = new PrintStream(avgFileStream) + + val host = args(0) + val port = args(1) + val dbName = "TPCH" + + val dbAddress = "jdbc:snappydata://" + host + ":" + port + "/" + val conn = DriverManager.getConnection(dbAddress) + + val queries: Array[String] = args(2).split(",") + // scalastyle:off println + println(queries.length) + var isResultCollection: Boolean = args(3).toBoolean + var warmup: Integer = args(4).toInt + var runsForAverage: Integer = args(5).toInt + var isDynamic: Boolean = args(6).toBoolean + var traceEvents: Boolean = args(7).toBoolean + val randomSeed: Integer = args(8).toInt + + TPCH_Queries.setRandomSeed(randomSeed) + + for (query <- queries) { + var prepStatement: PreparedStatement = null + query match { + case "1" => { + prepStatement = conn.prepareStatement(TPCH_Queries.getQuery1) + var parameters = TPCH_Queries.getQ1Parameter(isDynamic) + prepStatement.setString(1, parameters(0)) + } + case "2" => { + prepStatement = conn.prepareStatement(TPCH_Queries.getQuery2) + var parameters = TPCH_Queries.getQ2Parameter(isDynamic) + prepStatement.setString(1, parameters(0)) + prepStatement.setString(2, parameters(1)) + prepStatement.setString(3, parameters(2)) + prepStatement.setString(4, parameters(3)) + } + case "3" => { + prepStatement = conn.prepareStatement(TPCH_Queries.getQuery3) + var parameters = TPCH_Queries.getQ3Parameter(isDynamic) + prepStatement.setString(1, parameters(0)) + prepStatement.setString(2, parameters(1)) + prepStatement.setString(3, parameters(2)) + } + case "4" => { + prepStatement = conn.prepareStatement(TPCH_Queries.getQuery4) + var parameters = TPCH_Queries.getQ4Parameter(isDynamic) + prepStatement.setString(1, parameters(0)) + prepStatement.setString(2, parameters(1)) + } + case "5" => { + prepStatement = conn.prepareStatement(TPCH_Queries.getQuery5) + var parameters = TPCH_Queries.getQ5Parameter(isDynamic) + prepStatement.setString(1, parameters(0)) + prepStatement.setString(2, parameters(1)) + prepStatement.setString(3, parameters(2)) + } + case "6" => { + prepStatement = conn.prepareStatement(TPCH_Queries.getQuery6) + var parameters = TPCH_Queries.getQ6Parameter(isDynamic) + prepStatement.setString(1, parameters(0)) + prepStatement.setString(2, parameters(1)) + prepStatement.setString(3, parameters(2)) + prepStatement.setString(4, parameters(3)) + prepStatement.setString(5, parameters(4)) + } + case "7" => { + prepStatement = conn.prepareStatement(TPCH_Queries.getQuery7) + var parameters = TPCH_Queries.getQ7Parameter(isDynamic) + prepStatement.setString(1, parameters(0)) + prepStatement.setString(2, parameters(1)) + prepStatement.setString(3, parameters(2)) + prepStatement.setString(4, parameters(3)) + } + case "8" => { + prepStatement = conn.prepareStatement(TPCH_Queries.getQuery8) + var parameters = TPCH_Queries.getQ8Parameter(isDynamic) + prepStatement.setString(1, parameters(0)) + prepStatement.setString(2, parameters(1)) + prepStatement.setString(3, parameters(2)) + } + case "9" => { + prepStatement = conn.prepareStatement(TPCH_Queries.getQuery9) + var parameters = TPCH_Queries.getQ9Parameter(isDynamic) + prepStatement.setString(1, parameters(0)) + } + case "10" => { + prepStatement = conn.prepareStatement(TPCH_Queries.getQuery10) + var parameters = TPCH_Queries.getQ10Parameter(isDynamic) + prepStatement.setString(1, parameters(0)) + prepStatement.setString(2, parameters(1)) + } + case "11" => { + prepStatement = conn.prepareStatement(TPCH_Queries.getQuery11) + var parameters = TPCH_Queries.getQ11Parameter(isDynamic) + prepStatement.setString(1, parameters(0)) + prepStatement.setString(2, parameters(1)) + } + case "12" => { + prepStatement = conn.prepareStatement(TPCH_Queries.getQuery12) + var parameters = TPCH_Queries.getQ12Parameter(isDynamic) + prepStatement.setString(1, parameters(0)) + prepStatement.setString(2, parameters(1)) + prepStatement.setString(3, parameters(2)) + prepStatement.setString(4, parameters(3)) + } + case "13" => { + prepStatement = conn.prepareStatement(TPCH_Queries.getQuery13) + var parameters = TPCH_Queries.getQ13Parameter(isDynamic) + prepStatement.setString(1, parameters(0)) + prepStatement.setString(2, parameters(1)) + } + case "14" => { + prepStatement = conn.prepareStatement(TPCH_Queries.getQuery14) + var parameters = TPCH_Queries.getQ14Parameter(isDynamic) + prepStatement.setString(1, parameters(0)) + prepStatement.setString(2, parameters(1)) + } + case "15" => { + + } + case "16" => { + prepStatement = conn.prepareStatement(TPCH_Queries.getQuery16) + var parameters = TPCH_Queries.getQ16Parameter(isDynamic) + prepStatement.setString(1, parameters(0)) + prepStatement.setString(2, parameters(1)) + prepStatement.setString(3, parameters(2)) + prepStatement.setString(4, parameters(3)) + prepStatement.setString(5, parameters(4)) + prepStatement.setString(6, parameters(5)) + prepStatement.setString(7, parameters(6)) + prepStatement.setString(8, parameters(7)) + prepStatement.setString(9, parameters(8)) + prepStatement.setString(10, parameters(9)) + prepStatement.setString(11, parameters(10)) + + } + case "17" => { + prepStatement = conn.prepareStatement(TPCH_Queries.getQuery17) + var parameters = TPCH_Queries.getQ17Parameter(isDynamic) + prepStatement.setString(1, parameters(0)) + prepStatement.setString(2, parameters(1)) + prepStatement.setString(3, parameters(2)) + } + case "18" => { + prepStatement = conn.prepareStatement(TPCH_Queries.getQuery18) + var parameters = TPCH_Queries.getQ18Parameter(isDynamic) + prepStatement.setString(1, parameters(0)) + } + case "19" => { + prepStatement = conn.prepareStatement(TPCH_Queries.getQuery19) + var parameters = TPCH_Queries.getQ19Parameter(isDynamic) + prepStatement.setString(1, parameters(0)) + prepStatement.setString(2, parameters(1)) + prepStatement.setString(3, parameters(2)) + prepStatement.setString(4, parameters(3)) + prepStatement.setString(5, parameters(4)) + prepStatement.setString(6, parameters(5)) + prepStatement.setString(7, parameters(6)) + prepStatement.setString(8, parameters(7)) + prepStatement.setString(9, parameters(8)) + } + case "20" => { + prepStatement = conn.prepareStatement(TPCH_Queries.getQuery20) + var parameters = TPCH_Queries.getQ20Parameter(isDynamic) + prepStatement.setString(1, parameters(0)) + prepStatement.setString(2, parameters(1)) + prepStatement.setString(3, parameters(2)) + prepStatement.setString(4, parameters(3)) + } + case "21" => { + prepStatement = conn.prepareStatement(TPCH_Queries.getQuery21) + var parameters = TPCH_Queries.getQ21Parameter(isDynamic) + prepStatement.setString(1, parameters(0)) + } + case "22" => { + prepStatement = conn.prepareStatement(TPCH_Queries.getQuery22) + var parameters = TPCH_Queries.getQ22Parameter(isDynamic) + prepStatement.setString(1, parameters(0)) + prepStatement.setString(2, parameters(1)) + prepStatement.setString(3, parameters(2)) + prepStatement.setString(4, parameters(3)) + prepStatement.setString(5, parameters(4)) + prepStatement.setString(6, parameters(5)) + prepStatement.setString(7, parameters(6)) + prepStatement.setString(8, parameters(7)) + prepStatement.setString(9, parameters(8)) + prepStatement.setString(10, parameters(9)) + prepStatement.setString(11, parameters(10)) + prepStatement.setString(12, parameters(11)) + prepStatement.setString(13, parameters(12)) + prepStatement.setString(14, parameters(13)) + } + } + QueryExecutor.execute_statement(query, isResultCollection, prepStatement, warmup, + runsForAverage, avgPrintStream) + prepStatement.close() + } + + /* //code for SNAP- 1296 + println("----------------------------------Use of Statement-------------------------------") + val stmt = conn.createStatement() + var rs = stmt.executeQuery(TPCH_Snappy.getQuery10) + var rsmd = rs.getMetaData() + println(s"KBKB : rsmd : $rsmd") + var columnsNumber = rsmd.getColumnCount(); + println(s"KBKB : columnsNumber : $columnsNumber") + var count : Int = 0 + while (rs.next()) { + count += 1 + for (i <- 1 to columnsNumber) { + if (i > 1) print(",") + print(rs.getString(i)) + } + println() + } + println(s"NUmber of results : $count") + stmt.close() + + println("----------------------------------Use of PreparedStatement-------------------------------") + var prepStatement = conn.prepareStatement(TPCH_Snappy.getQuery10) + rs = prepStatement.executeQuery + rsmd = rs.getMetaData() + println(s"KBKB : rsmd : $rsmd") + columnsNumber = rsmd.getColumnCount(); + println(s"KBKB : columnsNumber : $columnsNumber") +// rs.last() +// println(s"KBKBKB : totoal result size : ${rs.getRow}") + count = 0 + while (rs.next()) { + count += 1 + for (i <- 1 to columnsNumber) { + if (i > 1) print(",") + print(rs.getString(i)) + } + println() + } + println(s"NUmber of results : $count") + prepStatement.close() +*/ + + avgPrintStream.close() + avgFileStream.close() + + QueryExecutor.close + + } +} diff --git a/cluster/src/test/scala/io/snappydata/benchmark/snappy/tpch/QueryExecutionJob.scala b/cluster/src/test/scala/io/snappydata/benchmark/snappy/tpch/QueryExecutionJob.scala new file mode 100644 index 0000000000..bd64c611fd --- /dev/null +++ b/cluster/src/test/scala/io/snappydata/benchmark/snappy/tpch/QueryExecutionJob.scala @@ -0,0 +1,148 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ + +package io.snappydata.benchmark.snappy.tpch + +import java.io.{File, FileOutputStream, PrintStream} + +import scala.language.implicitConversions + +import com.typesafe.config.Config + +import org.apache.spark.sql._ +import org.apache.spark.{SparkConf, SparkContext} + +object QueryExecutionJob extends SnappySQLJob { + + var sqlSparkProperties: Array[String] = _ + var queries: Array[String] = _ + var isDynamic: Boolean = _ + var isResultCollection: Boolean = _ + var isSnappy: Boolean = true + var warmUp: Integer = _ + var runsForAverage: Integer = _ + var threadNumber: Integer = _ + var traceEvents : Boolean = _ + var randomSeed : Integer = _ + + override def runSnappyJob(snSession: SnappySession, jobConfig: Config): Any = { + val snc = snSession.sqlContext + + val avgFileStream: FileOutputStream = new FileOutputStream( + new File(s"${threadNumber}_Snappy_AverageResponseTimes.csv")) + val avgPrintStream: PrintStream = new PrintStream(avgFileStream) + avgPrintStream.println(s"Query,AverageResponseTime") + + for (prop <- sqlSparkProperties) { + snc.sql(s"set $prop") + } + + // scalastyle:off println + println(s"****************queries : $queries") + // scalastyle:on println + + QueryExecutor.setRandomSeed(randomSeed) + for (query <- queries) { + QueryExecutor.execute(query, snc, isResultCollection, isSnappy, + threadNumber, isDynamic, warmUp, runsForAverage, avgPrintStream) + } + avgPrintStream.close() + avgFileStream.close() + + QueryExecutor.close + } + + def main(args: Array[String]): Unit = { + val isResultCollection = false + val isSnappy = true + + val conf = new SparkConf() + .setAppName("TPCH") + .setMaster("snappydata://localhost:10334") + .set("jobserver.enabled", "false") + val sc = new SparkContext(conf) + val snc = + SnappyContext(sc) + + queries = Array("16") + runJob(snc, null) + } + + override def isValidJob(snSession: SnappySession, config: Config): SnappyJobValidation = { + + val sqlSparkProps = if (config.hasPath("sparkSqlProps")) { + config.getString("sparkSqlProps") + } + else " " + + sqlSparkProperties = sqlSparkProps.split(",") + + val tempQueries = if (config.hasPath("queries")) { + config.getString("queries") + } else { + return SnappyJobInvalid("Specify Query number to be executed") + } + + // scalastyle:off println + println(s"tempqueries : $tempQueries") + queries = tempQueries.split("-") + + isDynamic = if (config.hasPath("isDynamic")) { + config.getBoolean("isDynamic") + } else { + return SnappyJobInvalid("Specify whether to use dynamic paramters") + } + + isResultCollection = if (config.hasPath("resultCollection")) { + config.getBoolean("resultCollection") + } else { + return SnappyJobInvalid("Specify whether to to collect results") + } + + warmUp = if (config.hasPath("warmUpIterations")) { + config.getInt("warmUpIterations") + } else { + return SnappyJobInvalid("Specify number of warmup iterations ") + } + runsForAverage = if (config.hasPath("actualRuns")) { + config.getInt("actualRuns") + } else { + return SnappyJobInvalid("Specify number of iterations of which average result is " + + "calculated") + } + + threadNumber = if (config.hasPath("threadNumber")) { + config.getInt("threadNumber") + } else { + 1 + } + + traceEvents = if (config.hasPath("traceEvents")) { + config.getBoolean("traceEvents") + } else { + false + } + + randomSeed = if (config.hasPath("randomSeed")) { + config.getInt("randomSeed") + } else { + 42 + } + + SnappyJobValid() + } +} diff --git a/cluster/src/test/scala/io/snappydata/benchmark/snappy/tpch/QueryExecutionSmartConnector.scala b/cluster/src/test/scala/io/snappydata/benchmark/snappy/tpch/QueryExecutionSmartConnector.scala new file mode 100644 index 0000000000..b6273ae3b4 --- /dev/null +++ b/cluster/src/test/scala/io/snappydata/benchmark/snappy/tpch/QueryExecutionSmartConnector.scala @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ + +package io.snappydata.benchmark.snappy.tpch + +import java.io.{File, FileOutputStream, PrintStream} + +import org.apache.spark.sql.{SnappySession, SparkSession} + +/** + * Created by kishor on 19/7/17. + */ +object QueryExecutionSmartConnector { + + def main(args: Array[String]) { + + val sc: SparkSession = SparkSession + .builder + .getOrCreate + + val queries = args(0).split("-") + val sparkSqlProps = args(1).split(",") + val isDynamic = args(2).toBoolean + val isResultCollection = args(3).toBoolean + val warmUpIterations = args(4).toInt + val actualRuns = args(5).toInt + val threadNumber = args(6).toInt + + var avgFileStream: FileOutputStream = new FileOutputStream( + new File(s"${threadNumber}_Smart_Average.out")) + var avgPrintStream: PrintStream = new PrintStream(avgFileStream) + + val snSession = new SnappySession(sc.sparkContext) + + for(prop <- sparkSqlProps) { + // scalastyle:off println + println(prop) + snSession.sql(s"set $prop") + } + + for (i <- 1 to 1) { + for (query <- queries) { + QueryExecutor.execute(query, snSession.sqlContext, isResultCollection, false, + threadNumber, isDynamic, warmUpIterations, actualRuns, avgPrintStream) + } + } + + avgPrintStream.close() + avgFileStream.close() + QueryExecutor.close + + sc.stop() + + } +} diff --git a/cluster/src/test/scala/io/snappydata/benchmark/snappy/tpch/QueryExecutor.scala b/cluster/src/test/scala/io/snappydata/benchmark/snappy/tpch/QueryExecutor.scala new file mode 100644 index 0000000000..581b949aed --- /dev/null +++ b/cluster/src/test/scala/io/snappydata/benchmark/snappy/tpch/QueryExecutor.scala @@ -0,0 +1,245 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ + +package io.snappydata.benchmark.snappy.tpch + +import java.io.{File, FileOutputStream, PrintStream} +import java.sql.{PreparedStatement, ResultSet} + +import io.snappydata.benchmark.TPCH_Queries + +import org.apache.spark.sql.{DataFrame, Row, SQLContext} + +/** + * Created by kishor on 27/10/15. + */ +object QueryExecutor { + + var planFileStream: FileOutputStream = _ + var planPrintStream: PrintStream = _ + + def close: Unit = if (planFileStream != null) { + planPrintStream.close + planFileStream.close() + } + + def setRandomSeed(randomSeed : Integer): Unit ={ + TPCH_Queries.setRandomSeed(randomSeed) + } + + def execute_statement(queryNumber: String, isResultCollection: Boolean, stmt: PreparedStatement, + warmup: Integer, runsForAverage: Integer, avgPrintStream: PrintStream = null): Unit = { + + var queryFileStream = new FileOutputStream(new File(s"Q$queryNumber.out")) + var queryPrintStream = new PrintStream(queryFileStream) + + var rs: ResultSet = null + try { + // scalastyle:off println + println(s"Started executing $queryNumber") + if (isResultCollection) { + rs = queryExecution(queryNumber, stmt) + // queryPrintStream.println(s"$resultFormat") + val rsmd = rs.getMetaData() + val columnsNumber = rsmd.getColumnCount(); + var count: Int = 0 + while (rs.next()) { + count += 1 + for (i <- 1 to columnsNumber) { + if (i > 1) queryPrintStream.print(",") + queryPrintStream.print(rs.getString(i)) + } + queryPrintStream.println() + } + println(s"Number of results : $count") + println(s"$queryNumber Result collected in file $queryNumber.out") + if (queryNumber.equals("13")) { + stmt.execute("drop view ViewQ13") + } + if (queryNumber.equals("15")) { + stmt.execute("drop view revenue") + } + } else { + var totalTime: Long = 0 + for (i <- 1 to (warmup + runsForAverage)) { + val startTime = System.currentTimeMillis() + rs = queryExecution(queryNumber, stmt) + // rs = stmt.executeQuery(query) + while (rs.next()) { + // just iterating over result + } + val endTime = System.currentTimeMillis() + val iterationTime = endTime - startTime + queryPrintStream.println(s"$iterationTime") + if (i > warmup) { + totalTime += iterationTime + } + if (queryNumber.equals("13")) { + stmt.execute("drop view ViewQ13") + } + if (queryNumber.equals("15")) { + stmt.execute("drop view revenue") + } + } + queryPrintStream.println(s"${totalTime / runsForAverage}") + avgPrintStream.println(s"$queryNumber,${totalTime / runsForAverage}") + } + println(s"Finished executing $queryNumber") + } catch { + case e: Exception => { + e.printStackTrace() + e.printStackTrace(queryPrintStream) + e.printStackTrace(avgPrintStream) + println(s" Exception while executing $queryNumber in written to file $queryNumber.txt") + } + } finally { + if (isResultCollection) { + queryPrintStream.close() + queryFileStream.close() + } + + } + rs.close() + } + + + def execute(queryNumber: String, sqlContext: SQLContext, isResultCollection: Boolean, + isSnappy: Boolean, threadNumber: Int = 1, isDynamic: Boolean = false, warmup: Int = 0, + runsForAverage: Int = 1, avgTimePrintStream: PrintStream = null): Unit = { + + val planFileName = if (isSnappy) s"${threadNumber}_QueryPlans_Snappy.out" + else s"${threadNumber}_QueryPlans_Spark.out" + val queryResultsFileName = if (isSnappy) s"${threadNumber}_Snappy_Q${queryNumber}_Results.out" + else s"${threadNumber}_Spark_Q${queryNumber}_Results.out" + val queryStatisticsFileName = if (isSnappy) s"${threadNumber}_Snappy_Q${queryNumber}_Timings.csv" + else s"${threadNumber}_Spark_Q${queryNumber}_Timings.csv" + + if (planFileStream == null && planPrintStream == null) { + planFileStream = new FileOutputStream(new File(planFileName)) + planPrintStream = new PrintStream(planFileStream) + } + + val queryStatisticsFileStream: FileOutputStream = new FileOutputStream(new File(queryStatisticsFileName)) + val queryStatisticsPrintStream: PrintStream = new PrintStream(queryStatisticsFileStream) + queryStatisticsPrintStream.println(s"Iteration,ResponseTime") + // scalastyle:off println + try { + println(s"Started executing $queryNumber") + + if (isResultCollection) { + val queryResultsFileStream: FileOutputStream = new FileOutputStream(new File(queryResultsFileName)) + val queryResultsPrintStream: PrintStream = new PrintStream(queryResultsFileStream) + + try { + var queryToBeExecuted = TPCH_Queries.getQuery(queryNumber, isDynamic, isSnappy = true) + val (resultSet, _) = queryExecution(queryNumber, queryToBeExecuted, sqlContext, genPlan = true) + println(s"$queryNumber : ${resultSet.length}") + + for (row <- resultSet) { + queryResultsPrintStream.println(row.toSeq.map { + case d: Double => "%18.4f".format(d).trim() + case v => v + }.mkString(",")) + } + println(s"Q$queryNumber Result Collected in file $queryResultsFileName") + } catch { + case e: Exception => { + e.printStackTrace(queryResultsPrintStream) + println(s" Exception while executing $queryNumber in writing to file $queryResultsFileName") + } + } finally { + queryResultsFileStream.close() + } + } else { + var totalTime: Long = 0 + for (i <- 1 to (warmup + runsForAverage)) { + var queryToBeExecuted = TPCH_Queries.getQuery(queryNumber, isDynamic, isSnappy = true) + val startTime = System.currentTimeMillis() + var cnts: Array[Row] = null + if (i == 1) { // collect plan only once during the first iteration + cnts = queryExecution(queryNumber, queryToBeExecuted, sqlContext, genPlan = true)._1 + } else { + cnts = queryExecution(queryNumber, queryToBeExecuted, sqlContext)._1 + } + for (s <- cnts) { + // just iterating over result + // TODO: not required in job here as df.count() is being used. + // // or else check to see if iterating on each result row was intended + } + val endTime = System.currentTimeMillis() + val iterationTime = endTime - startTime + queryStatisticsPrintStream.println(s"$i,$iterationTime") + if (i > warmup) { + totalTime += iterationTime + } + cnts = null + } + queryStatisticsPrintStream.println(s"Average,${totalTime / runsForAverage}") + avgTimePrintStream.println(s"$queryNumber,${totalTime / runsForAverage}") + } + println(s"Finished executing $queryNumber") + } catch { + case e: Exception => { + e.printStackTrace(queryStatisticsPrintStream) + e.printStackTrace(avgTimePrintStream) + println(s" Exception while executing $queryNumber in writing to file $queryResultsFileName") + } + } finally { + queryStatisticsPrintStream.close() + } + // scalastyle:on println + } + + def printPlan(df: DataFrame, query: String): Unit = { + // scalastyle:off println + if (planPrintStream != null) { + planPrintStream.println(query) + planPrintStream.println(df.queryExecution.executedPlan) + } else { + df.explain(true) + } + // scalastyle:on println + } + + def queryExecution(queryNumber: String, prepStatement: PreparedStatement): ResultSet = { + val rs: ResultSet = queryNumber match { + case "15" => + prepStatement.execute(TPCH_Queries.getTempQuery15_Original) + prepStatement.executeQuery(TPCH_Queries.getQuery15_Original) + case _ => + prepStatement.executeQuery() + } + rs + } + + def queryExecution(queryNumber: String, query: String, sqlContext: SQLContext, + genPlan: Boolean = false): (scala.Array[org.apache.spark.sql.Row], DataFrame) = { + var queryToBeExecuted = query + if (queryNumber.equals("15")) { + val result = sqlContext.sql(queryToBeExecuted) + // val result = sqlContext.sql(getTempQuery15_1) + result.createOrReplaceTempView("revenue") + queryToBeExecuted = TPCH_Queries.getQuery15 + } + val df = sqlContext.sql(queryToBeExecuted) + if (genPlan) { + printPlan(df, queryNumber) + } + (df.collect(), df) + } + +} diff --git a/cluster/src/test/scala/io/snappydata/benchmark/snappy/tpch/SparkApp.scala b/cluster/src/test/scala/io/snappydata/benchmark/snappy/tpch/SparkApp.scala new file mode 100644 index 0000000000..58fa9b03a6 --- /dev/null +++ b/cluster/src/test/scala/io/snappydata/benchmark/snappy/tpch/SparkApp.scala @@ -0,0 +1,178 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ +package io.snappydata.benchmark.snappy.tpch + +import java.io.{File, FileOutputStream, PrintStream} + +import io.snappydata.benchmark.{TPCHColumnPartitionedTable, TPCHReplicatedTable} + +import org.apache.spark.sql.SparkSession + +object SparkApp { + + def main(args: Array[String]) { + + val usingOptionString = null + val sparkSession: SparkSession = SparkSession + .builder + // set local as master to debug the app on a local cluster + // .master("local") + .appName("TPCH_Spark") + .getOrCreate + + val threadNumber = args(0).toInt + val tpchDataPath = args(1) + val numberOfLoadStages = args(2).toInt + val isParquet = args(3).toBoolean + val rePartition = args(4).toBoolean + val isSupplierColumn = args(5).toBoolean + val buckets_Supplier = args(6) + val buckets_Order_Lineitem = args(7) + val buckets_Cust_Part_PartSupp = args(8) + + val queries = args(9).split("-") + val sparkSqlProps = args(10).split(",") + val isDynamic = args(11).toBoolean + val isResultCollection = args(12).toBoolean + val warmUpIterations = args(13).toInt + val actualRuns = args(14).toInt + + val traceEvents = args(15).toBoolean + val cacheTables = args(16).toBoolean + val randomSeed = args(17).toInt + + val loadPerfFileStream: FileOutputStream = new FileOutputStream( + new File(s"${threadNumber}_Spark_LoadPerf.csv")) + val loadPerfPrintStream: PrintStream = new PrintStream(loadPerfFileStream) + loadPerfPrintStream.println(s"Table, CreationTime") + + val avgFileStream: FileOutputStream = new FileOutputStream( + new File(s"${threadNumber}_Spark_Average.csv")) + val avgPrintStream: PrintStream = new PrintStream(avgFileStream) + avgPrintStream.println(s"Query,AverageResponseTime") + + // create tables : load tables from (Parquet/ tbl) files into spark cache + + TPCHReplicatedTable.createPopulateRegionTable(usingOptionString, + sparkSession.sqlContext, tpchDataPath, + isSnappy = false, loadPerfPrintStream, + trace = traceEvents, + cacheTables = cacheTables) + + TPCHReplicatedTable.createPopulateNationTable(usingOptionString, + sparkSession.sqlContext, tpchDataPath, + isSnappy = false, loadPerfPrintStream, + trace = traceEvents, + cacheTables = cacheTables) + + if (isSupplierColumn) { + TPCHColumnPartitionedTable.createAndPopulateSupplierTable( + sparkSession.sqlContext, tpchDataPath, + isSnappy = false, + buckets = if (rePartition) buckets_Supplier else "0", + loadPerfPrintStream, + numberOfLoadingStages = numberOfLoadStages, + isParquet = isParquet, + trace = traceEvents, + cacheTables = cacheTables) + } else { + TPCHReplicatedTable.createPopulateSupplierTable(usingOptionString, sparkSession.sqlContext, + tpchDataPath, isSnappy = false, loadPerfPrintStream, + numberOfLoadingStages = numberOfLoadStages, + trace = traceEvents, + cacheTables = cacheTables) + } + + TPCHColumnPartitionedTable.createPopulateOrderTable(sparkSession.sqlContext, + tpchDataPath, + isSnappy = false, + buckets = if (rePartition) buckets_Order_Lineitem else "0", + loadPerfPrintStream, + numberOfLoadingStages = numberOfLoadStages, + isParquet = isParquet, + trace = traceEvents, + cacheTables = cacheTables) + + TPCHColumnPartitionedTable.createPopulateLineItemTable(sparkSession.sqlContext, + tpchDataPath, + isSnappy = false, + buckets = if (rePartition) buckets_Order_Lineitem else "0", + loadPerfPrintStream = loadPerfPrintStream, + numberOfLoadingStages = numberOfLoadStages, + isParquet = isParquet, + trace = traceEvents, + cacheTables = cacheTables) + + TPCHColumnPartitionedTable.createPopulateCustomerTable(sparkSession.sqlContext, + tpchDataPath, + isSnappy = false, + buckets = if (rePartition) buckets_Cust_Part_PartSupp else "0", + loadPerfPrintStream = loadPerfPrintStream, + numberOfLoadingStages = numberOfLoadStages, + isParquet = isParquet, + trace = traceEvents, + cacheTables = cacheTables) + + TPCHColumnPartitionedTable.createPopulatePartTable(sparkSession.sqlContext, + tpchDataPath, + isSnappy = false, + buckets = if (rePartition) buckets_Cust_Part_PartSupp else "0", + loadPerfPrintStream = loadPerfPrintStream, + numberOfLoadingStages = numberOfLoadStages, + isParquet = isParquet, + trace = traceEvents, + cacheTables = cacheTables) + + TPCHColumnPartitionedTable.createPopulatePartSuppTable(sparkSession.sqlContext, + tpchDataPath, + isSnappy = false, + if (rePartition) buckets_Cust_Part_PartSupp else "0", + loadPerfPrintStream = loadPerfPrintStream, + numberOfLoadingStages = numberOfLoadStages, + isParquet = isParquet, + trace = traceEvents, + cacheTables = cacheTables) + + // set spark SQL properties and run queries + + for(prop <- sparkSqlProps) { + // scalastyle:off println + println(prop) + sparkSession.sql(s"set $prop") + } + + QueryExecutor.setRandomSeed(randomSeed) + for (query <- queries) { + QueryExecutor.execute(query, sparkSession.sqlContext, isResultCollection, + isSnappy = false, + threadNumber, isDynamic, warmUpIterations, actualRuns, avgPrintStream) + } + + // cleanup + + loadPerfPrintStream.flush() + loadPerfPrintStream.close() + loadPerfFileStream.close() + avgPrintStream.flush() + avgPrintStream.close() + avgFileStream.close() + + QueryExecutor.close + sparkSession.stop() + + } +} diff --git a/cluster/src/test/scala/io/snappydata/benchmark/snappy/tpch/SparkAppUsingJob.scala b/cluster/src/test/scala/io/snappydata/benchmark/snappy/tpch/SparkAppUsingJob.scala new file mode 100644 index 0000000000..3007637341 --- /dev/null +++ b/cluster/src/test/scala/io/snappydata/benchmark/snappy/tpch/SparkAppUsingJob.scala @@ -0,0 +1,185 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ + +package io.snappydata.benchmark.snappy.tpch + +import java.io.{File, FileOutputStream, PrintStream} + +import com.typesafe.config.Config +import io.snappydata.benchmark.{TPCHColumnPartitionedTable, TPCHReplicatedTable} + +import org.apache.spark.sql.{SnappyJobInvalid, SnappyJobValid, SnappyJobValidation, SnappySQLJob, SnappySession} + +/** + * Created by kishor on 12/5/17. + */ +object SparkAppUsingJob extends SnappySQLJob { + + var tpchDataPath: String = _ + var numberOfLoadStages: Integer = _ + var isParquet : Boolean = _ + var queries: Array[String] = _ + var sqlSparkProperties: Array[String] = _ + var isDynamic: Boolean = _ + var isResultCollection: Boolean = _ + var warmUp: Integer = _ + var runsForAverage: Integer = _ + var threadNumber: Integer = _ + var randomSeed : Integer = _ + + override def runSnappyJob(snSession: SnappySession, jobConfig: Config): Any = { + val snc = snSession.sqlContext + snc.sparkContext.hadoopConfiguration.set("fs.s3a.connection.maximum", "1000") + + val isSnappy = false + val usingOptionString = null + var loadPerfFileStream: FileOutputStream = new FileOutputStream( + new File(s"${threadNumber}_Spark_LoadPerf.csv")) + var loadPerfPrintStream: PrintStream = new PrintStream(loadPerfFileStream) + loadPerfPrintStream.println(s"Table, CreationTime") + + val avgFileStream: FileOutputStream = new FileOutputStream( + new File(s"${threadNumber}_Spark_Average.csv")) + val avgPrintStream: PrintStream = new PrintStream(avgFileStream) + avgPrintStream.println(s"Query,AverageResponseTime") + + + snc.dropTable("NATION", ifExists = true) + snc.dropTable("REGION", ifExists = true) + snc.dropTable("SUPPLIER", ifExists = true) + snc.dropTable("PARTSUPP", ifExists = true) + snc.dropTable("PART", ifExists = true) + snc.dropTable("CUSTOMER", ifExists = true) + snc.dropTable("LINEITEM", ifExists = true) + snc.dropTable("ORDERS", ifExists = true) + + TPCHReplicatedTable.createPopulateRegionTable(usingOptionString, snc, tpchDataPath, + isSnappy, loadPerfPrintStream) + TPCHReplicatedTable.createPopulateNationTable(usingOptionString, snc, tpchDataPath, + isSnappy, loadPerfPrintStream) + TPCHReplicatedTable.createPopulateSupplierTable(usingOptionString, snc, tpchDataPath, + isSnappy, loadPerfPrintStream, numberOfLoadStages) + + TPCHColumnPartitionedTable.createPopulateOrderTable(snc, tpchDataPath, isSnappy, + loadPerfPrintStream = loadPerfPrintStream, numberOfLoadingStages = numberOfLoadStages, + isParquet = isParquet) + TPCHColumnPartitionedTable.createPopulateLineItemTable(snc, tpchDataPath, isSnappy, + loadPerfPrintStream = loadPerfPrintStream, numberOfLoadingStages = numberOfLoadStages, + isParquet = isParquet) + TPCHColumnPartitionedTable.createPopulateCustomerTable(snc, tpchDataPath, isSnappy, + loadPerfPrintStream = loadPerfPrintStream, numberOfLoadingStages = numberOfLoadStages, + isParquet = isParquet) + TPCHColumnPartitionedTable.createPopulatePartTable(snc, tpchDataPath, isSnappy, + loadPerfPrintStream = loadPerfPrintStream, numberOfLoadingStages = numberOfLoadStages, + isParquet = isParquet) + TPCHColumnPartitionedTable.createPopulatePartSuppTable(snc, tpchDataPath, isSnappy, + loadPerfPrintStream = loadPerfPrintStream, numberOfLoadingStages = numberOfLoadStages, + isParquet = isParquet) + + for(prop <- sqlSparkProperties) { + // scalastyle:off println + println(prop) + snc.sql(s"set $prop") + } + + QueryExecutor.setRandomSeed(randomSeed) + for (query <- queries) { + QueryExecutor.execute(query, snc, isResultCollection, isSnappy, + threadNumber, isDynamic, warmUp, runsForAverage, avgPrintStream) + } + QueryExecutor.close + + } + + override def isValidJob(snSession: SnappySession, config: Config): SnappyJobValidation = { + + tpchDataPath = if (config.hasPath("dataLocation")) { + config.getString("dataLocation") + } else { + "/QASNAPPY/TPCH/DATA/1" + } + + + numberOfLoadStages = if (config.hasPath("NumberOfLoadStages")) { + config.getString("NumberOfLoadStages").toInt + } else { + 1 + } + + isParquet = if (config.hasPath("isParquet")) { + config.getBoolean("isParquet") + } else { + false + } + + + val sqlSparkProps = if (config.hasPath("sparkSqlProps")) { + config.getString("sparkSqlProps") + } + else " " + + sqlSparkProperties = sqlSparkProps.split(",") + + val tempqueries = if (config.hasPath("queries")) { + config.getString("queries") + } else { + return SnappyJobInvalid("Specify Query number to be executed") + } + + // scalastyle:off println + println(s"tempqueries : $tempqueries") + queries = tempqueries.split("-") + + isDynamic = if (config.hasPath("isDynamic")) { + config.getBoolean("isDynamic") + } else { + return SnappyJobInvalid("Specify whether to use dynamic paramters") + } + + isResultCollection = if (config.hasPath("resultCollection")) { + config.getBoolean("resultCollection") + } else { + return SnappyJobInvalid("Specify whether to to collect results") + } + + warmUp = if (config.hasPath("warmUpIterations")) { + config.getInt("warmUpIterations") + } else { + return SnappyJobInvalid("Specify number of warmup iterations ") + } + runsForAverage = if (config.hasPath("actualRuns")) { + config.getInt("actualRuns") + } else { + return SnappyJobInvalid("Specify number of iterations of which average result is " + + "calculated") + } + + threadNumber = if (config.hasPath("threadNumber")) { + config.getInt("threadNumber") + } else { + 1 + } + + randomSeed = if (config.hasPath("randomSeed")) { + config.getInt("randomSeed") + } else { + 42 + } + + SnappyJobValid() + } +} diff --git a/cluster/src/test/scala/io/snappydata/benchmark/snappy/tpch/TableCreationJob.scala b/cluster/src/test/scala/io/snappydata/benchmark/snappy/tpch/TableCreationJob.scala new file mode 100644 index 0000000000..c842d1ae36 --- /dev/null +++ b/cluster/src/test/scala/io/snappydata/benchmark/snappy/tpch/TableCreationJob.scala @@ -0,0 +1,178 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ +package io.snappydata.benchmark.snappy.tpch + +import java.io.{PrintStream, FileOutputStream, File} + +import com.typesafe.config.Config +import io.snappydata.benchmark.{TPCHColumnPartitionedTable, TPCHReplicatedTable} + +import org.apache.spark.sql._ + +object TableCreationJob extends SnappySQLJob { + + var tpchDataPath: String = _ + var buckets_Order_Lineitem: String = _ + var buckets_Cust_Part_PartSupp: String = _ + var buckets_Supplier: String = _ + var isSupplierColumn: Boolean = _ + var redundancy: String = _ + var persistence: Boolean = _ + var persistence_type: String = _ + var numberOfLoadStages : String = _ + var isParquet : Boolean = _ + var createParquet : Boolean = _ + var traceEvents : Boolean = _ + + override def runSnappyJob(snSession: SnappySession, jobConfig: Config): Any = { + val snc = snSession.sqlContext + snc.sparkContext.hadoopConfiguration.set("fs.s3a.connection.maximum", "1000"); + val isSnappy = true + + val loadPerfFileStream: FileOutputStream = new FileOutputStream(new File("SnappyLoadTablesPerfStats.csv")) + val loadPerfPrintStream: PrintStream = new PrintStream(loadPerfFileStream) + loadPerfPrintStream.println(s"Table, CreationTime") + + var usingOptionString = " USING row OPTIONS ()" + if(persistence){ + usingOptionString = s" USING row OPTIONS (PERSISTENT '${persistence_type}')" + } + + snc.dropTable("NATION", ifExists = true) + snc.dropTable("REGION", ifExists = true) + snc.dropTable("SUPPLIER", ifExists = true) + snc.dropTable("PARTSUPP", ifExists = true) + snc.dropTable("LINEITEM_PART", ifExists = true) + snc.dropTable("PART", ifExists = true) + snc.dropTable("ORDERS_CUST", ifExists = true) + snc.dropTable("CUSTOMER", ifExists = true) + snc.dropTable("LINEITEM", ifExists = true) + snc.dropTable("ORDERS", ifExists = true) + + TPCHReplicatedTable.createPopulateRegionTable(usingOptionString, snc, tpchDataPath, isSnappy, + loadPerfPrintStream, trace = false, cacheTables = false) + TPCHReplicatedTable.createPopulateNationTable(usingOptionString, snc, tpchDataPath, isSnappy, + loadPerfPrintStream, trace = false, cacheTables = false) + + if (isSupplierColumn) { + TPCHColumnPartitionedTable.createAndPopulateSupplierTable(snc, tpchDataPath, isSnappy, + buckets_Supplier, loadPerfPrintStream, redundancy, persistence, persistence_type, + numberOfLoadStages.toInt, isParquet, createParquet) + } else { + TPCHReplicatedTable.createPopulateSupplierTable(usingOptionString, snc, tpchDataPath, + isSnappy, loadPerfPrintStream, numberOfLoadStages.toInt) + } + + TPCHColumnPartitionedTable.createPopulateOrderTable(snc, tpchDataPath, isSnappy, + buckets_Order_Lineitem, loadPerfPrintStream, redundancy, persistence, persistence_type, + numberOfLoadStages.toInt, isParquet, createParquet, trace = traceEvents, cacheTables = false) + TPCHColumnPartitionedTable.createPopulateLineItemTable(snc, tpchDataPath, isSnappy, + buckets_Order_Lineitem, loadPerfPrintStream, redundancy, persistence, persistence_type, + numberOfLoadStages.toInt, isParquet, createParquet, trace = traceEvents, cacheTables = false) + TPCHColumnPartitionedTable.createPopulateCustomerTable(snc, tpchDataPath, isSnappy, + buckets_Cust_Part_PartSupp, loadPerfPrintStream, redundancy, persistence, persistence_type, + numberOfLoadStages.toInt, isParquet, createParquet, trace = traceEvents, cacheTables = false) + TPCHColumnPartitionedTable.createPopulatePartTable(snc, tpchDataPath, isSnappy, + buckets_Cust_Part_PartSupp, loadPerfPrintStream, redundancy, persistence, persistence_type, + numberOfLoadStages.toInt, isParquet, createParquet, trace = traceEvents, cacheTables = false) + TPCHColumnPartitionedTable.createPopulatePartSuppTable(snc, tpchDataPath, isSnappy, + buckets_Cust_Part_PartSupp, loadPerfPrintStream, redundancy, persistence, persistence_type, + numberOfLoadStages.toInt, isParquet, createParquet, trace = traceEvents, cacheTables = false) + } + + override def isValidJob(snSession: SnappySession, config: Config): SnappyJobValidation = { + + tpchDataPath = if (config.hasPath("dataLocation")) { + config.getString("dataLocation") + } else { + "/QASNAPPY/TPCH/DATA/1" + } + + buckets_Order_Lineitem = if (config.hasPath("Buckets_Order_Lineitem")) { + config.getString("Buckets_Order_Lineitem") + } else { + "15" + } + + buckets_Cust_Part_PartSupp = if (config.hasPath("Buckets_Cust_Part_PartSupp")) { + config.getString("Buckets_Cust_Part_PartSupp") + } else { + "15" + } + + buckets_Supplier = if (config.hasPath("Buckets_Supplier")) { + config.getString("Buckets_Supplier") + } else { + "3" + } + + isSupplierColumn = if (config.hasPath("IsSupplierColumnTable")) { + config.getBoolean("IsSupplierColumnTable") + } else { + false + } + + redundancy = if (config.hasPath("Redundancy")) { + config.getString("Redundancy") + } else { + "0" + } + + persistence = if (config.hasPath("Persistence")) { + config.getBoolean("Persistence") + } else { + false + } + + persistence_type = if (config.hasPath("Persistence_Type")) { + config.getString("Persistence_Type") + } else { + "false" + } + + numberOfLoadStages = if (config.hasPath("NumberOfLoadStages")) { + config.getString("NumberOfLoadStages") + } else { + "1" + } + + isParquet = if (config.hasPath("isParquet")) { + config.getBoolean("isParquet") + } else { + false + } + + createParquet = if (config.hasPath("createParquet")) { + config.getBoolean("createParquet") + } else { + false + } + + traceEvents = if (config.hasPath("traceEvents")) { + config.getBoolean("traceEvents") + } else { + false + } + + /*if (!new File(tpchDataPath).exists()) { + return SnappyJobInvalid("Incorrect tpch data path. " + + "Specify correct location") + }*/ + + SnappyJobValid() + } +} diff --git a/cluster/src/test/scala/io/snappydata/benchmark/snappy/tpch/TableCreationSmartConnector.scala b/cluster/src/test/scala/io/snappydata/benchmark/snappy/tpch/TableCreationSmartConnector.scala new file mode 100644 index 0000000000..8abf46b65d --- /dev/null +++ b/cluster/src/test/scala/io/snappydata/benchmark/snappy/tpch/TableCreationSmartConnector.scala @@ -0,0 +1,117 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ +package io.snappydata.benchmark.snappy.tpch + +import java.io.{File, FileOutputStream, PrintStream} + +import io.snappydata.benchmark.{TPCHColumnPartitionedTable, TPCHReplicatedTable} + +import org.apache.spark.sql.{SnappySession, SparkSession} + +/** + * Created by kishor on 19/7/17. + */ + +object TableCreationSmartConnector { + + def main(args: Array[String]) { + + val sc: SparkSession = SparkSession + .builder + .appName("TPCH_Spark_SmartConnector") + .getOrCreate + + val tpchDataPath = args(0) + val numberOfLoadStages = args(1).toInt + val isParquet = args(2).toBoolean + val createParquet = args(3).toBoolean + val buckets_Order_Lineitem = args(4) + val buckets_Cust_Part_PartSupp = args(5) + val isSupplierColumn = args(6).toBoolean + val buckets_Supplier = args(7) + val redundancy = args(8) + val persistence = args(9).toBoolean + val persistence_Type = args(10) + val traceEvents = args(11).toBoolean + val threadNumber = args(12).toInt + + var usingOptionString = " USING row OPTIONS ()" + if(persistence){ + usingOptionString = s" USING row OPTIONS (PERSISTENT '${persistence_Type}')" + } + + val loadPerfFileStream: FileOutputStream = new FileOutputStream( + new File(s"${threadNumber}_Smart_LoadPerf.out")) + val loadPerfPrintStream: PrintStream = new PrintStream(loadPerfFileStream) + + val snSession = new SnappySession(sc.sparkContext) + snSession.sparkContext.hadoopConfiguration.set("fs.s3a.connection.maximum", "1000") + + snSession.dropTable("NATION", ifExists = true) + snSession.dropTable("REGION", ifExists = true) + snSession.dropTable("SUPPLIER", ifExists = true) + snSession.dropTable("PARTSUPP", ifExists = true) + snSession.dropTable("LINEITEM_PART", ifExists = true) + snSession.dropTable("PART", ifExists = true) + snSession.dropTable("ORDERS_CUST", ifExists = true) + snSession.dropTable("CUSTOMER", ifExists = true) + snSession.dropTable("LINEITEM", ifExists = true) + snSession.dropTable("ORDERS", ifExists = true) + + TPCHReplicatedTable.createPopulateRegionTable(usingOptionString, snSession.sqlContext, + tpchDataPath, true, loadPerfPrintStream, trace = traceEvents, cacheTables = false) + TPCHReplicatedTable.createPopulateNationTable(usingOptionString, snSession.sqlContext, + tpchDataPath, true, loadPerfPrintStream, trace = traceEvents, cacheTables = false) + + if (isSupplierColumn) { + TPCHColumnPartitionedTable.createAndPopulateSupplierTable(snSession.sqlContext, tpchDataPath, + true, buckets_Supplier, loadPerfPrintStream, redundancy, persistence, persistence_Type, + numberOfLoadStages.toInt, isParquet, createParquet = createParquet, + trace = traceEvents, cacheTables = false) + } else { + TPCHReplicatedTable.createPopulateSupplierTable(usingOptionString, snSession.sqlContext, + tpchDataPath, true, loadPerfPrintStream, numberOfLoadStages.toInt) + } + + TPCHColumnPartitionedTable.createPopulateOrderTable(snSession.sqlContext, tpchDataPath, true, + buckets_Order_Lineitem, loadPerfPrintStream, redundancy, persistence, persistence_Type, + numberOfLoadStages.toInt, isParquet, createParquet = createParquet, + trace = traceEvents, cacheTables = false) + + TPCHColumnPartitionedTable.createPopulateLineItemTable(snSession.sqlContext, tpchDataPath, true, + buckets_Order_Lineitem, loadPerfPrintStream, redundancy, persistence, persistence_Type, + numberOfLoadStages.toInt, isParquet, createParquet = createParquet, + trace = traceEvents, cacheTables = false) + TPCHColumnPartitionedTable.createPopulateCustomerTable(snSession.sqlContext, tpchDataPath, true, + buckets_Cust_Part_PartSupp, loadPerfPrintStream, redundancy, persistence, persistence_Type, + numberOfLoadStages.toInt, isParquet, createParquet = createParquet, + trace = traceEvents, cacheTables = false) + TPCHColumnPartitionedTable.createPopulatePartTable(snSession.sqlContext, tpchDataPath, true, + buckets_Cust_Part_PartSupp, loadPerfPrintStream, redundancy, persistence, persistence_Type, + numberOfLoadStages.toInt, isParquet, createParquet = createParquet, + trace = traceEvents, cacheTables = false) + TPCHColumnPartitionedTable.createPopulatePartSuppTable(snSession.sqlContext, tpchDataPath, true, + buckets_Cust_Part_PartSupp, loadPerfPrintStream, redundancy, persistence, persistence_Type, + numberOfLoadStages.toInt, isParquet, createParquet = createParquet, + trace = traceEvents, cacheTables = false) + + loadPerfPrintStream.close() + loadPerfFileStream.close() + sc.stop() + + } +} diff --git a/cluster/src/test/scala/io/snappydata/benchmark/snappy/tpchmodifiers.scala b/cluster/src/test/scala/io/snappydata/benchmark/snappy/tpchmodifiers.scala new file mode 100644 index 0000000000..de5a371e32 --- /dev/null +++ b/cluster/src/test/scala/io/snappydata/benchmark/snappy/tpchmodifiers.scala @@ -0,0 +1,155 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ +package io.snappydata.benchmark.snappy + +import scala.util.matching.Regex + +import org.apache.spark.sql.DataFrame +import org.apache.spark.sql.catalyst.TableIdentifier +import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation +import org.apache.spark.sql.catalyst.expressions.SubqueryExpression +import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan + +trait SnappyTPCH extends TPCH { + override def q3: String = + s""" + |${super.q3} + |limit 10 + """.stripMargin +} + +trait SnappyAdapter extends Adapter with DynamicQueryGetter { + + type RETURN = (scala.Array[org.apache.spark.sql.Row], DataFrame) + + protected val functionOverrides = Map( + " 1 0 " -> "DATE_SUB('1997-12-31',[VAL])", + " 7 0 " -> "between '1995-01-01' and '1996-12-31'", + " 8 0 " -> "between '1995-01-01' and '1996-12-31'", + "22 0 " -> "SUBSTR(C_PHONE,1,2)", + "" -> "" + ) + + override def replace(qNum: String, tokens: QUERY_TYPE, queryStr: String, args: String*): + String = { + tokens.zipWithIndex.foldLeft(queryStr) { case (src, (tok, i)) => + replaceEach(qNum, i, src, tok, args: _*) + } + } + + private def replaceEach(qNum: String, + tokNum: Int, query: String, + tok: String, args: String*): String = { + + val qNumPattern(queryNumber, _) = qNum + val seek = (if (queryNumber.toInt < 10) " " else "") + + s"$qNum $tokNum" + + (if (tokNum < 10) " " else "") + + def rep(newArg: String): String = query.replace(tok, newArg) + + def modifyWith(arg: String) = { + functionModifier(tok).fold(rep(arg)) { r => + rep(functionOverrides.getOrElse(seek, r)).replace("[VAL]", arg) + } + } + + args.length match { + case 0 => modifyWith(defaults(seek)) + case _ if tokNum < args.length => args(tokNum) match { + case s if s.isEmpty => modifyWith(defaults(seek)) + case s => modifyWith(s) + } + } + } + + private def functionModifier(tok: String): Option[String] = { + def find(elems: Array[String], str: String) = elems.indexWhere(_.indexOf(str) > 0) + + tok match { + case t if t.startsWith("date") && t.indexOf("month") > 0 => + val elems = t.split("'") + // println(s"${elems.mkString("--")} ${elems(find(elems, "interval") + 1)} ") + val interval = Integer.parseInt(elems(find(elems, "interval") + 1)) + Some(s"ADD_MONTHS('[VAL]', $interval)") + case t if t.startsWith("date") && t.indexOf("year") > 0 => + val elems = t.split("'") + // println(s"${elems.mkString("--")} ${elems(find(elems, "interval") + 1)} ") + val interval = 12 * Integer.parseInt(elems(find(elems, "interval") + 1)) + Some(s"ADD_MONTHS('[VAL]', $interval)") + case t if t.startsWith("date") => + Some("'[VAL]'") + case t if t.startsWith("substring") => + Some("SUBSTR") + case t if t.startsWith("between") => + Some("BETWEEN") + case t if t.startsWith("extract(year from ") => + Some("YEAR(") + case _ => None + } + } + + def estimateSizes(qNum: Int, tableSizes: Map[String, Long], + executor: (String) => DataFrame): Long = { + getQueryStrings(qNum, executor).foldLeft(0L) { case (sz, queryString) => + // This is an indirect hack to estimate the size of each query's input by traversing the + // logical plan and adding up the sizes of all tables that appear in the plan. Note that this + // currently doesn't take WITH subqueries into account which might lead to fairly inaccurate + // per-row processing time for those cases. + val queryRelations = scala.collection.mutable.HashSet[String]() + executor(queryString).queryExecution.logical.map { + case ur@UnresolvedRelation(t: TableIdentifier, _) => + queryRelations.add(t.table.toLowerCase) + case lp: LogicalPlan => + lp.expressions.foreach { + _ foreach { + case subquery: SubqueryExpression => + subquery.plan.foreach { + case ur@UnresolvedRelation(t: TableIdentifier, _) => + queryRelations.add(t.table.toLowerCase) + case _ => + } + case _ => + } + } + case _ => + } + sz + queryRelations.map(tableSizes.getOrElse(_, 0L)).sum + } + } + + def execute(qNum: Int, executor: (String) => DataFrame): RETURN = { + val df = executor(getQueryStrings(qNum, executor).last) + if (df == null) { + return (null, null) + } + (df.collect(), df) + } + + private def getQueryStrings(qNum: Int, executor: (String) => DataFrame): Seq[String] = { + qNum match { + case 15 => + val viewName(_, vn, _, viewQuery) = getFinalQueryString(s"${qNum}v") + val result = executor(viewQuery) + if (result == null) return "" :: Nil + result.createOrReplaceTempView(vn) + viewQuery :: getFinalQueryString(qNum.toString) :: Nil + case _ => + getFinalQueryString(qNum.toString) :: Nil + } + } +} diff --git a/cluster/src/test/scala/io/snappydata/cluster/JDBCConnectionPoolTestSuite.scala b/cluster/src/test/scala/io/snappydata/cluster/JDBCConnectionPoolTestSuite.scala new file mode 100644 index 0000000000..aff1571017 --- /dev/null +++ b/cluster/src/test/scala/io/snappydata/cluster/JDBCConnectionPoolTestSuite.scala @@ -0,0 +1,240 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ +package io.snappydata.cluster + +import java.sql.{Connection, DriverManager} +import java.util.Properties + +import com.pivotal.gemfirexd.TestUtil +import io.snappydata.SnappyFunSuite +import org.scalatest.BeforeAndAfterAll + +class JDBCConnectionPoolTestSuite extends SnappyFunSuite with BeforeAndAfterAll { + + val driverName = "io.snappydata.jdbc.ClientPoolDriver" + + test("Test JDBC connection pool with null properties") { + snc + val serverHostPort = TestUtil.startNetServer() + + val url = s"jdbc:snappydata:pool://$serverHostPort" + // scalastyle:off + Class.forName(driverName) + val properties = null + for (i <- 1 to 3) { + val conn = DriverManager.getConnection(url, properties) + assert(null != conn) + conn.close() + } + } + + test("Test JDBC connection pool URL case sensitivity properties") { + snc + val serverHostPort = TestUtil.startNetServer() + + val url = s"JDBC:SNAPPYDATA:POOL://$serverHostPort" + // scalastyle:off + Class.forName(driverName) + val properties = null + val conn = DriverManager.getConnection(url, properties) + assert(null != conn) + conn.close() + + val url1 = s"JDBC:SNAPPYDATA:Pool://$serverHostPort" + val conn1 = DriverManager.getConnection(url1, properties) + assert(null != conn1) + conn1.close() + } + + test("Test connection pool with pool and connection properties") { + snc + val serverHostPort = TestUtil.startNetServer() + val properties = new Properties + properties.setProperty("pool-maxActive", "5") + properties.setProperty("pool-initialSize", "5") + properties.setProperty("user", "app") + properties.setProperty("password", "app") + val url = s"jdbc:snappydata:pool://$serverHostPort" + // scalastyle:off + Class.forName(driverName) + for (i <- 1 to 3) { + val conn = DriverManager.getConnection(url, properties) + assert(null != conn) + conn.close() + } + } + + test("Test connection pool with random property") { + snc + val serverHostPort = TestUtil.startNetServer() + val properties = new Properties + properties.setProperty("pool-maxActive", "5") + properties.setProperty("pool-initialSize", "5") + properties.setProperty("user", "app") + properties.setProperty("password", "app") + val url = s"jdbc:snappydata:pool://$serverHostPort" + // scalastyle:off + Class.forName(driverName) + for (i <- 1 to 3) { + val conn = DriverManager.getConnection(url, properties) + assert(null != conn) + conn.close() + } + } + + test("Test connection reset settings autocommit,isolationlevel,readOnly state") { + snc + val serverHostPort = TestUtil.startNetServer() + val properties = new Properties + properties.setProperty("user", "app") + properties.setProperty("password", "app") + properties.setProperty("pool.initialSize", "1") + properties.setProperty("pool.maxActive", "1") + properties.setProperty("pool.maxIdle", "1") + properties.setProperty("pool.minIdle", "1") + val url = s"jdbc:snappydata:pool://$serverHostPort" + // scalastyle:off + Class.forName(driverName) + val conn = DriverManager.getConnection(url, properties) + assert(null != conn) + conn.setAutoCommit(true) + conn.setReadOnly(true) + conn.setTransactionIsolation(Connection.TRANSACTION_READ_COMMITTED) + conn.close() + + val conn1 = DriverManager.getConnection(url, properties) + assert(null != conn1) + assert(!conn1.getAutoCommit, " auto commit should return true, which is a default value.") + assert(!conn1.isReadOnly, "auto commit should return false, which is a default value. ") + assert(conn1.getTransactionIsolation == Connection.TRANSACTION_NONE) + conn1.close() + } + + test("Test connection pool with max connection call than the initial size") { + snc + val serverHostPort = TestUtil.startNetServer() + val properties = new Properties + properties.setProperty("pool.maxActive", "10") + properties.setProperty("pool.initialSize", "5") + properties.setProperty("user", "app") + properties.setProperty("password", "app") + val url = s"jdbc:snappydata:pool://$serverHostPort" + // scalastyle:off + Class.forName(driverName) + for (i <- 1 to 10) { + val conn = DriverManager.getConnection(url, properties) + assert(null != conn) + conn.close() + } + } + + test("Test connection pool without passing any property") { + val serverHostPort = TestUtil.startNetServer() + val url = s"jdbc:snappydata:pool://$serverHostPort" + // scalastyle:off + Class.forName(driverName) + val conn = DriverManager.getConnection(url) + assert(null != conn) + conn.close() + } + + test("Test JDBC connection of pool to create, insert and read Query ") { + snc + val serverHostPort = TestUtil.startNetServer() + val url = s"jdbc:snappydata:pool://$serverHostPort" + // scalastyle:off + Class.forName(driverName) + val properties = new Properties + properties.setProperty("user", "app") + properties.setProperty("password", "app") + val conn = DriverManager.getConnection(url, properties) + val stmt = conn.createStatement() + var sql = "DROP TABLE IF EXISTS TEST_JDBC_DRIVER_POOL" + stmt.executeUpdate(sql) + sql = "CREATE TABLE TEST_JDBC_DRIVER_POOL (id INTEGER , " + + "col1 VARCHAR(255), col2 VARCHAR(255)," + " age INTEGER );" + stmt.executeUpdate(sql) + + val preparedStatement = conn.prepareStatement("insert into " + + "TEST_JDBC_DRIVER_POOL VALUES (?,?,?,?)") + var i = 1 + while (i < 1000) { + preparedStatement.setInt(1, i) + preparedStatement.setString(2, "Col_1_Value_" + i) + preparedStatement.setString(3, "Col_2_Value_" + i) + preparedStatement.setInt(4, i) + preparedStatement.execute + i += 1 + } + + sql = "select count(*) from TEST_JDBC_DRIVER_POOL" + val rs = stmt.executeQuery(sql) + var count = 0 + while (rs.next()) { + count = rs.getInt(1) + } + assert(count == 999) + + stmt.close() + conn.close() + } + + test("Test JDBC connection pool to drop table") { + snc + val serverHostPort = TestUtil.startNetServer() + val url = s"jdbc:snappydata:pool://$serverHostPort" + // scalastyle:off + Class.forName(driverName) + val properties = new Properties + properties.setProperty("user", "app") + properties.setProperty("password", "app") + val conn = DriverManager.getConnection(url, properties) + val stmt = conn.createStatement() + val sql = "DROP TABLE IF EXISTS TEST_JDBC_DRIVER_POOL" + assert(0 == stmt.executeUpdate(sql)) + conn.close() + } + + test("Test connection pool for pool exhaustion") { + try { + snc + val serverHostPort = TestUtil.startNetServer() + val properties = new Properties + properties.setProperty("pool.maxIdle", "1") + properties.setProperty("pool.maxWait", "30") + properties.setProperty("pool.removeAbandoned", "true") + properties.setProperty("pool.removeAbandonedTimeout", "15") + properties.setProperty("pool.minIdle", "1") + properties.setProperty("pool.maxActive", "3") + properties.setProperty("pool.initialSize", "1") + properties.setProperty("user", "app") + properties.setProperty("password", "app") + + val url = s"jdbc:snappydata:pool://$serverHostPort" + // scalastyle:off + Class.forName(driverName) + // max active is 3 and trying to use more than that + for (_ <- 1 to 6) { + val conn = DriverManager.getConnection(url, properties) + // conn.close() + } + fail("Expected PoolExhaustedException") + } catch { + case _: org.apache.tomcat.jdbc.pool.PoolExhaustedException => // expected + } + } +} diff --git a/cluster/src/test/scala/io/snappydata/cluster/PreparedQueryRoutingSingleNodeSuite.scala b/cluster/src/test/scala/io/snappydata/cluster/PreparedQueryRoutingSingleNodeSuite.scala new file mode 100644 index 0000000000..799d72bae1 --- /dev/null +++ b/cluster/src/test/scala/io/snappydata/cluster/PreparedQueryRoutingSingleNodeSuite.scala @@ -0,0 +1,1403 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ +package io.snappydata.cluster + +import java.sql.{Connection, DriverManager, PreparedStatement, ResultSet, SQLException} + +import com.pivotal.gemfirexd.TestUtil +import com.pivotal.gemfirexd.internal.engine.distributed.utils.GemFireXDUtils +import io.snappydata.{SnappyFunSuite, SnappyTableStatsProviderService} +import org.scalatest.BeforeAndAfterAll + +import org.apache.spark.sql.{SnappyContext, SnappySession} +import org.apache.spark.{Logging, SparkConf} + +class PreparedQueryRoutingSingleNodeSuite extends SnappyFunSuite with BeforeAndAfterAll { + + private val default_chunk_size = GemFireXDUtils.DML_MAX_CHUNK_SIZE + + protected override def newSparkConf(addOn: SparkConf => SparkConf): SparkConf = { + /** + * Setting local[n] here actually supposed to affect number of reservoir created + * while sampling. + * + * Change of 'n' will influence results if they are dependent on weights - derived + * from hidden column in sample table. + */ + new org.apache.spark.SparkConf().setAppName("PreparedQueryRoutingSingleNodeSuite") + .setMaster("local[6]") + // .set("spark.logConf", "true") + // .set("mcast-port", "4958") + } + + override def beforeAll(): Unit = { + // System.setProperty("org.codehaus.janino.source_debugging.enable", "true") + // System.setProperty("spark.testing", "true") + super.beforeAll() + // reducing DML chunk size size to force lead node to send + // results in multiple batches + setDMLMaxChunkSize(50L) + } + + override def afterAll(): Unit = { + // System.clearProperty("org.codehaus.janino.source_debugging.enable") + // System.clearProperty("spark.testing") + setDMLMaxChunkSize(default_chunk_size) + super.afterAll() + } + + def setDMLMaxChunkSize(size: Long): Unit = { + GemFireXDUtils.DML_MAX_CHUNK_SIZE = size + } + + def query0(tableName: String, serverHostPort: String): Unit = { + // sc.setLogLevel("TRACE") + val conn = DriverManager.getConnection("jdbc:snappydata://" + serverHostPort) + + var prepStatement: java.sql.PreparedStatement = null + var prepStatement1: java.sql.PreparedStatement = null + var prepStatement2: java.sql.PreparedStatement = null + var prepStatement3: java.sql.PreparedStatement = null + var prepStatement4: java.sql.PreparedStatement = null + var prepStatement5: java.sql.PreparedStatement = null + try { + val qry = s"select ol_int_id, ol_int2_id, ol_str_id " + + s" from $tableName " + + s" where ol_int_id < ? " + + s" and ol_int2_id in (?, ?, ?) " + + s" and ol_str_id LIKE ? " + + s" limit 20" + + s"" + + prepStatement = conn.prepareStatement(qry) + prepStatement.setInt(1, 500) + prepStatement.setInt(2, 100) + prepStatement.setInt(3, 200) + prepStatement.setInt(4, 300) + prepStatement.setString(5, "%0") + PreparedQueryRoutingSingleNodeSuite.verifyResults("qry-1", prepStatement.executeQuery, + Array(100, 200, 300), 1) + + prepStatement.setInt(1, 900) + prepStatement.setInt(2, 600) + prepStatement.setInt(3, 700) + prepStatement.setInt(4, 800) + prepStatement.setString(5, "%0") + PreparedQueryRoutingSingleNodeSuite.verifyResults("qry-2", prepStatement.executeQuery, + Array(600, 700, 800), 1) + + prepStatement.setInt(1, 900) + prepStatement.setInt(2, 600) + prepStatement.setInt(3, 700) + prepStatement.setInt(4, 800) + prepStatement.setString(5, "%0%") + PreparedQueryRoutingSingleNodeSuite.verifyResults("qry-2.2", prepStatement.executeQuery, + Array(600, 700, 800), 2) + + val qry1 = s"select ol_int_id, ol_int2_id, ol_str_id " + + s" from $tableName " + + s" where ol_int_id < ? " + + s" and ol_int2_id in (?, ?, 300) " + + s" limit 20" + + s"" + + prepStatement1 = conn.prepareStatement(qry1) + prepStatement1.setInt(1, 500) + prepStatement1.setInt(2, 100) + prepStatement1.setInt(3, 200) + PreparedQueryRoutingSingleNodeSuite.verifyResults("qry1-1", prepStatement1.executeQuery, + Array(100, 200, 300), 3) + + prepStatement1.setInt(1, 500) + prepStatement1.setInt(2, 100) + prepStatement1.setInt(3, 400) + PreparedQueryRoutingSingleNodeSuite.verifyResults("qry1-2", prepStatement1.executeQuery, + Array(100, 400, 300), 3) + + val qry2 = s"select ol_int_id, ol_int2_id, ol_str_id " + + s" from $tableName " + + s" where ol_int_id < ? " + + s" and ol_int2_id in (?, ?, 800) " + + s" limit 20" + + s"" + prepStatement2 = conn.prepareStatement(qry2) + prepStatement2.setInt(1, 900) + prepStatement2.setInt(2, 600) + prepStatement2.setInt(3, 700) + PreparedQueryRoutingSingleNodeSuite.verifyResults("qry2-1", prepStatement2.executeQuery, + Array(600, 700, 800), 3) + + prepStatement2.setInt(1, 900) + prepStatement2.setInt(2, 400) + prepStatement2.setInt(3, 500) + PreparedQueryRoutingSingleNodeSuite.verifyResults("qry2-2", prepStatement2.executeQuery, + Array(400, 500, 800), 3) + + val qry3 = s"select ol_int_id, ol_int2_id, ol_str_id " + + s" from $tableName " + + s" where ol_int_id < ? " + + s" and ol_int2_id in (?, ?, ?) " + + s" limit 20" + + s"" + + prepStatement3 = conn.prepareStatement(qry3) + prepStatement3.setInt(1, 500) + prepStatement3.setInt(2, 100) + prepStatement3.setInt(3, 200) + prepStatement3.setInt(4, 300) + PreparedQueryRoutingSingleNodeSuite.verifyResults("qry3-1", prepStatement3.executeQuery, + Array(100, 200, 300), 3) + + prepStatement3.setInt(1, 900) + prepStatement3.setInt(2, 600) + prepStatement3.setInt(3, 700) + prepStatement3.setInt(4, 800) + PreparedQueryRoutingSingleNodeSuite.verifyResults("qry3-2", prepStatement3.executeQuery, + Array(600, 700, 800), 3) + + val qry4 = s"select ol_int_id, ol_int2_id, ol_str_id " + + s" from $tableName " + + s" where ? > ol_int_id " + + s" and ol_int2_id in (?, ?, ?) " + + s" limit 20" + + s"" + + prepStatement4 = conn.prepareStatement(qry4) + prepStatement4.setInt(1, 500) + prepStatement4.setInt(2, 100) + prepStatement4.setInt(3, 200) + prepStatement4.setInt(4, 300) + PreparedQueryRoutingSingleNodeSuite.verifyResults("qry4-1", prepStatement4.executeQuery, + Array(100, 200, 300), 4) + + prepStatement4.setInt(1, 900) + prepStatement4.setInt(2, 600) + prepStatement4.setInt(3, 700) + prepStatement4.setInt(4, 800) + PreparedQueryRoutingSingleNodeSuite.verifyResults("qry4-2", prepStatement4.executeQuery, + Array(600, 700, 800), 4) + + val qry5 = s"select ol_int_id, ol_int2_id, ol_str_id " + + s" from $tableName " + + s" where cast(ol_int_id as double) < ? " + + s" and ol_int2_id in (?, ?, ?) " + + s" limit 20" + + s"" + + prepStatement5 = conn.prepareStatement(qry5) + prepStatement5.setDouble(1, 500.01) + prepStatement5.setInt(2, 100) + prepStatement5.setInt(3, 200) + prepStatement5.setInt(4, 300) + PreparedQueryRoutingSingleNodeSuite.verifyResults("qry5-1", prepStatement5.executeQuery, + Array(100, 200, 300), 5) + + prepStatement5.setDouble(1, 900.01) + prepStatement5.setInt(2, 600) + prepStatement5.setInt(3, 700) + prepStatement5.setInt(4, 800) + PreparedQueryRoutingSingleNodeSuite.verifyResults("qry5-2", prepStatement5.executeQuery, + Array(600, 700, 800), 5) + + // Thread.sleep(1000000) + } finally { + if (prepStatement != null) prepStatement.close() + if (prepStatement1 != null) prepStatement1.close() + if (prepStatement2 != null) prepStatement2.close() + if (prepStatement3 != null) prepStatement3.close() + if (prepStatement4 != null) prepStatement4.close() + conn.close() + } + } + + test("test Prepared Statement via JDBC") { + SnappySession.getPlanCache.invalidateAll() + assert(SnappySession.getPlanCache.asMap().size() == 0) + SnappyTableStatsProviderService.TEST_SUSPEND_CACHE_INVALIDATION = true + val tableName = "order_line_col" + try { + snc.sql(s"create table $tableName (ol_int_id integer," + + s" ol_int2_id integer, ol_str_id STRING) using column " + + "options( partition_by 'ol_int_id, ol_int2_id', buckets '2')") + + + val serverHostPort = TestUtil.startNetServer() + // logInfo("network server started") + PreparedQueryRoutingSingleNodeSuite.insertRows(tableName, 1000, serverHostPort) + query0(tableName, serverHostPort) + } finally { + snc.sql(s"drop table $tableName") + SnappyTableStatsProviderService.TEST_SUSPEND_CACHE_INVALIDATION = false + } + } + + test("test Metadata for Prepared Statement via JDBC") { + SnappySession.getPlanCache.invalidateAll() + assert(SnappySession.getPlanCache.asMap().size() == 0) + SnappyTableStatsProviderService.TEST_SUSPEND_CACHE_INVALIDATION = true + val tableName = "order_line_col" + try { + snc.sql(s"create table $tableName (ol_int_id integer," + + s" ol_int2_id integer, ol_str_id STRING) using column " + + "options( partition_by 'ol_int_id, ol_int2_id', buckets '2')") + + val serverHostPort = TestUtil.startNetServer() + // logInfo("network server started") + PreparedQueryRoutingSingleNodeSuite.insertRows(tableName, 100, serverHostPort) + query6(tableName, serverHostPort) + query7(tableName, serverHostPort) + query8(tableName, serverHostPort) + query9(tableName, serverHostPort) + query10(tableName, serverHostPort) + query11(tableName, serverHostPort) + query12(tableName, serverHostPort) + } finally { + snc.sql(s"drop table $tableName") + SnappyTableStatsProviderService.TEST_SUSPEND_CACHE_INVALIDATION = false + } + } + + def query1(tableName1: String, tableName2: String, serverHostPort: String): Unit = { + // sc.setLogLevel("TRACE") + val conn = DriverManager.getConnection( + "jdbc:snappydata://" + serverHostPort) + + var prepStatement: java.sql.PreparedStatement = null + try { + val qry = s"select ol_1_int_id, ol_2_int2_id, ol_1_str_id " + + s" from $tableName1 A inner join $tableName2 B " + + s" on A.ol_1_int_id = B.ol_2_int_id " + + s" where ol_1_int2_id < ? " + + s" and ol_2_int2_id in (?, ?, ?) " + + s" limit 20" + + s"" + + prepStatement = conn.prepareStatement(qry) + prepStatement.setInt(1, 500) + prepStatement.setInt(2, 100) + prepStatement.setInt(3, 200) + prepStatement.setInt(4, 300) + PreparedQueryRoutingSingleNodeSuite.verifyResults("query1-1", prepStatement.executeQuery, + Array(100, 200, 300), 1) + + prepStatement.setInt(1, 900) + prepStatement.setInt(2, 600) + prepStatement.setInt(3, 700) + prepStatement.setInt(4, 800) + PreparedQueryRoutingSingleNodeSuite.verifyResults("query1-2", prepStatement.executeQuery, + Array(600, 700, 800), 1) + + // Thread.sleep(1000000) + } finally { + if (prepStatement != null) prepStatement.close() + conn.close() + } + } + + def query2(tableName1: String, tableName2: String, serverHostPort: String): Unit = { + // sc.setLogLevel("TRACE") + val conn = DriverManager.getConnection( + "jdbc:snappydata://" + serverHostPort) + + var prepStatement: java.sql.PreparedStatement = null + try { + val qry = s"select sum(ol_1_int_id) s, 0, 'a' " + + s" from $tableName1 " + + s" group by ol_1_int2_id having sum(ol_1_int_id) in (?, ?, ?) " + + s" limit 20" + + s"" + + prepStatement = conn.prepareStatement(qry) + prepStatement.setInt(1, 400) + prepStatement.setInt(2, 300) + prepStatement.setInt(3, 200) + PreparedQueryRoutingSingleNodeSuite.verifyResults("query2-1", prepStatement.executeQuery, + Array(400, 200, 300), 2) + + prepStatement.setInt(1, 600) + prepStatement.setInt(2, 800) + prepStatement.setInt(3, 700) + PreparedQueryRoutingSingleNodeSuite.verifyResults("query2-2", prepStatement.executeQuery, + Array(600, 700, 800), 2) + + // Thread.sleep(1000000) + } finally { + if (prepStatement != null) prepStatement.close() + conn.close() + } + } + + def query3(tableName1: String, tableName2: String, serverHostPort: String): Unit = { + // sc.setLogLevel("TRACE") + val conn = DriverManager.getConnection( + "jdbc:snappydata://" + serverHostPort) + + var prepStatement: java.sql.PreparedStatement = null + try { + val qry = s"select ol_1_int_id, ol_2_int2_id, ol_1_str_id " + + s" from $tableName1 A inner join $tableName2 B " + + s" on A.ol_1_int_id = B.ol_2_int_id " + + s" where ol_1_int2_id < ? " + + s" and ol_2_int2_id = case when ol_2_int2_id < ? then ? else ? end " + + s" limit 20" + + s"" + + prepStatement = conn.prepareStatement(qry) + prepStatement.setInt(1, 500) + prepStatement.setInt(2, 300) + prepStatement.setInt(3, 200) + prepStatement.setInt(4, 400) + PreparedQueryRoutingSingleNodeSuite.verifyResults("query3-1", prepStatement.executeQuery, + Array(200, 400), 3) + + prepStatement.setInt(1, 900) + prepStatement.setInt(2, 700) + prepStatement.setInt(3, 600) + prepStatement.setInt(4, 800) + PreparedQueryRoutingSingleNodeSuite.verifyResults("query3-2", prepStatement.executeQuery, + Array(600, 800), 3) + + // Thread.sleep(1000000) + } finally { + if (prepStatement != null) prepStatement.close() + conn.close() + } + } + + def query4(tableName1: String, tableName2: String, serverHostPort: String): Unit = { + // sc.setLogLevel("TRACE") + val conn = DriverManager.getConnection("jdbc:snappydata://" + serverHostPort) + + var prepStatement: java.sql.PreparedStatement = null + try { + val qry = s"select ol_1_int_id, ol_1_int2_id, ol_1_str_id " + + s" from $tableName1 " + + s" where ol_1_int2_id = ? " + + s" union " + + s" select ol_2_int_id, ol_2_int2_id, ol_2_str_id " + + s" from $tableName2 " + + s" where ol_2_int2_id in (?, ?, ?) " + + s" limit 20" + + s"" + + prepStatement = conn.prepareStatement(qry) + prepStatement.setInt(1, 100) + prepStatement.setInt(2, 300) + prepStatement.setInt(3, 200) + prepStatement.setInt(4, 400) + PreparedQueryRoutingSingleNodeSuite.verifyResults("query4-1", prepStatement.executeQuery, + Array(100, 200, 300, 400), 4) + + prepStatement.setInt(1, 900) + prepStatement.setInt(2, 600) + prepStatement.setInt(3, 700) + prepStatement.setInt(4, 800) + PreparedQueryRoutingSingleNodeSuite.verifyResults("query4-2", prepStatement.executeQuery, + Array(900, 600, 700, 800), 4) + + // Thread.sleep(1000000) + } finally { + if (prepStatement != null) prepStatement.close() + conn.close() + } + } + + def query5(tableName1: String, tableName2: String, serverHostPort: String): Unit = { + // sc.setLogLevel("TRACE") + val conn = DriverManager.getConnection("jdbc:snappydata://" + serverHostPort) + + var prepStatement: java.sql.PreparedStatement = null + try { + val qry = s"select ol_1_int_id, ol_1_int2_id, ol_1_str_id " + + s" from $tableName1 " + + s" where ol_1_int_id < ? " + + s" and ol_1_int2_id in (" + + s"select ol_2_int_id " + + s" from $tableName2 " + + s" where ol_2_int_id in (?, ?, ?) " + + s") " + + s" limit 20" + + s"" + + prepStatement = conn.prepareStatement(qry) + prepStatement.setInt(1, 500) + prepStatement.setInt(2, 100) + prepStatement.setInt(3, 200) + prepStatement.setInt(4, 300) + PreparedQueryRoutingSingleNodeSuite.verifyResults("query5-1", prepStatement.executeQuery, + Array(100, 200, 300), 4) + + prepStatement.setInt(1, 900) + prepStatement.setInt(2, 600) + prepStatement.setInt(3, 700) + prepStatement.setInt(4, 800) + PreparedQueryRoutingSingleNodeSuite.verifyResults("query5-2", prepStatement.executeQuery, + Array(600, 700, 800), 4) + + // Thread.sleep(1000000) + } finally { + if (prepStatement != null) prepStatement.close() + conn.close() + } + } + + def query6(tableName: String, serverHostPort: String): Unit = { + // sc.setLogLevel("TRACE") + val conn = DriverManager.getConnection("jdbc:snappydata://" + serverHostPort) + + var prepStatement: java.sql.PreparedStatement = null + try { + val qry = s"select count(ol_int_id) as a , sum(ol_int2_id) as b, ol_str_id as c " + + s" from $tableName " + + s" where ol_int_id < ? " + + s" group by ol_str_id " + + s" limit 20" + + s"" + + prepStatement = conn.prepareStatement(qry) + prepStatement.setInt(1, 500) + assert(prepStatement.getMetaData().getColumnCount() == 3) + + val rs: ResultSet = prepStatement.executeQuery + assert(prepStatement.getMetaData().getColumnCount() == 3) + assert(rs.getMetaData().getColumnCount() == 3) + + var index = 0 + while (rs.next()) { + val i = rs.getInt(1) + // val j = rs.getInt(2) + // val s = rs.getString(3) + // logInfo(s"row($index) $i $j $s ") + index += 1 + } + assert(index == 20) + + // logInfo(s"$qryName Number of rows read " + index) + rs.close() + // Thread.sleep(1000000) + } finally { + if (prepStatement != null) prepStatement.close() + conn.close() + } + } + + def query7(tableName: String, serverHostPort: String): Unit = { + // sc.setLogLevel("TRACE") + val conn = DriverManager.getConnection("jdbc:snappydata://" + serverHostPort) + + var prepStatement: java.sql.PreparedStatement = null + try { + val qry = s"select count(ol_int_id) , sum(ol_int2_id), ol_str_id " + + s" from $tableName " + + s" where ol_int_id < ? " + + s" group by ol_str_id " + + s"" + + prepStatement = conn.prepareStatement(qry) + prepStatement.setInt(1, 500) + assert(prepStatement.getMetaData().getColumnCount() == 3) + + val rs: ResultSet = prepStatement.executeQuery + assert(prepStatement.getMetaData().getColumnCount() == 3) + assert(rs.getMetaData().getColumnCount() == 3) + + var index = 0 + while (rs.next()) { + val i = rs.getInt(1) + // val j = rs.getInt(2) + // val s = rs.getString(3) + // logInfo(s"row($index) $i $j $s ") + index += 1 + } + assert(index == 100) + + // logInfo(s"$qryName Number of rows read " + index) + rs.close() + // Thread.sleep(1000000) + } finally { + if (prepStatement != null) prepStatement.close() + conn.close() + } + } + + def query8(tableName: String, serverHostPort: String): Unit = { + // sc.setLogLevel("TRACE") + val conn = DriverManager.getConnection("jdbc:snappydata://" + serverHostPort) + + var prepStatement: java.sql.PreparedStatement = null + try { + val qry = s"select ol_int_id as a, ol_int2_id as b, ol_int_id as c" + + s" from $tableName " + + s"" + + prepStatement = conn.prepareStatement(qry) + assert(prepStatement.getMetaData().getColumnCount() == 3) + + val rs: ResultSet = prepStatement.executeQuery + assert(prepStatement.getMetaData().getColumnCount() == 3) + assert(rs.getMetaData().getColumnCount() == 3) + + var index = 0 + while (rs.next()) { + val i = rs.getInt(1) + // val j = rs.getInt(2) + // val s = rs.getString(3) + // logInfo(s"row($index) $i $j $s ") + index += 1 + } + assert(index == 100) + + // logInfo(s"$qryName Number of rows read " + index) + rs.close() + // Thread.sleep(1000000) + } finally { + if (prepStatement != null) prepStatement.close() + conn.close() + } + } + + def query9(tableName: String, serverHostPort: String): Unit = { + // sc.setLogLevel("TRACE") + val conn = DriverManager.getConnection("jdbc:snappydata://" + serverHostPort) + + var prepStatement: java.sql.PreparedStatement = null + try { + val qry = s"select ol_int_id, ol_int2_id, ol_int_id" + + s" from $tableName " + + s" limit 20" + + s"" + + prepStatement = conn.prepareStatement(qry) + assert(prepStatement.getMetaData().getColumnCount() == 3) + + val rs: ResultSet = prepStatement.executeQuery + assert(prepStatement.getMetaData().getColumnCount() == 3) + assert(rs.getMetaData().getColumnCount() == 3) + + var index = 0 + while (rs.next()) { + val i = rs.getInt(1) + // val j = rs.getInt(2) + // val s = rs.getString(3) + // logInfo(s"row($index) $i $j $s ") + index += 1 + } + assert(index == 20) + + // logInfo(s"$qryName Number of rows read " + index) + rs.close() + // Thread.sleep(1000000) + } finally { + if (prepStatement != null) prepStatement.close() + conn.close() + } + } + + def query10(tableName: String, serverHostPort: String): Unit = { + // sc.setLogLevel("TRACE") + val conn = DriverManager.getConnection("jdbc:snappydata://" + serverHostPort) + + var prepStatement: java.sql.PreparedStatement = null + try { + val qry = s"select ol_int_id as a, ol_int2_id as b, ol_int_id as c" + + s" from $tableName " + + s" where ol_int_id < ? " + + s" limit 20" + + s"" + + prepStatement = conn.prepareStatement(qry) + prepStatement.setInt(1, 500) + assert(prepStatement.getMetaData().getColumnCount() == 3) + + val rs: ResultSet = prepStatement.executeQuery + assert(prepStatement.getMetaData().getColumnCount() == 3) + assert(rs.getMetaData().getColumnCount() == 3) + + var index = 0 + while (rs.next()) { + val i = rs.getInt(1) + // val j = rs.getInt(2) + // val s = rs.getString(3) + // logInfo(s"row($index) $i $j $s ") + index += 1 + } + assert(index == 20) + + // logInfo(s"$qryName Number of rows read " + index) + rs.close() + // Thread.sleep(1000000) + } finally { + if (prepStatement != null) prepStatement.close() + conn.close() + } + } + + def query11(tableName: String, serverHostPort: String): Unit = { + // sc.setLogLevel("TRACE") + val conn = DriverManager.getConnection("jdbc:snappydata://" + serverHostPort) + + var prepStatement: java.sql.PreparedStatement = null + try { + val qry = s"select count(distinct ol_int_id) , sum(ol_int2_id), ol_str_id " + + s" from $tableName " + + s" group by ol_str_id " + + s"" + + prepStatement = conn.prepareStatement(qry) + assert(prepStatement.getMetaData().getColumnCount() == 3) + + val rs: ResultSet = prepStatement.executeQuery + assert(prepStatement.getMetaData().getColumnCount() == 3) + assert(rs.getMetaData().getColumnCount() == 3) + + var index = 0 + while (rs.next()) { + val i = rs.getInt(1) + // val j = rs.getInt(2) + // val s = rs.getString(3) + // logInfo(s"row($index) $i $j $s ") + index += 1 + } + assert(index == 100) + + // logInfo(s"$qryName Number of rows read " + index) + rs.close() + // Thread.sleep(1000000) + } finally { + if (prepStatement != null) prepStatement.close() + conn.close() + } + } + + def query12(tableName: String, serverHostPort: String): Unit = { + // sc.setLogLevel("TRACE") + val conn = DriverManager.getConnection("jdbc:snappydata://" + serverHostPort) + + var prepStatement: java.sql.PreparedStatement = null + try { + val qry = s"select distinct(ol_int_id) " + + s" from $tableName " + + s" limit 20" + + s"" + + prepStatement = conn.prepareStatement(qry) + assert(prepStatement.getMetaData().getColumnCount() == 1) + + val rs: ResultSet = prepStatement.executeQuery + assert(prepStatement.getMetaData().getColumnCount() == 1) + assert(rs.getMetaData().getColumnCount() == 1) + + var index = 0 + while (rs.next()) { + val i = rs.getInt(1) + // val j = rs.getInt(2) + // val s = rs.getString(3) + // logInfo(s"row($index) $i $j $s ") + index += 1 + } + assert(index == 20) + + // logInfo(s"$qryName Number of rows read " + index) + rs.close() + // Thread.sleep(1000000) + } finally { + if (prepStatement != null) prepStatement.close() + conn.close() + } + } + + test("test Join, SubQuery and Aggragtes") { + SnappySession.getPlanCache.invalidateAll() + assert(SnappySession.getPlanCache.asMap().size() == 0) + SnappyTableStatsProviderService.TEST_SUSPEND_CACHE_INVALIDATION = true + try { + val tableName1 = "order_line_1_col" + val tableName2 = "order_line_2_col" + snc.sql(s"create table $tableName1 (ol_1_int_id integer," + + s" ol_1_int2_id integer, ol_1_str_id STRING) using column " + + "options( partition_by 'ol_1_int_id, ol_1_int2_id', buckets '2')") + + snc.sql(s"create table $tableName2 (ol_2_int_id integer," + + s" ol_2_int2_id integer, ol_2_str_id STRING) using column " + + "options( partition_by 'ol_2_int_id, ol_2_int2_id', buckets '2')") + + + val serverHostPort = TestUtil.startNetServer() + // logInfo("network server started") + PreparedQueryRoutingSingleNodeSuite.insertRows(tableName1, 1000, serverHostPort) + PreparedQueryRoutingSingleNodeSuite.insertRows(tableName2, 1000, serverHostPort) + query1(tableName1, tableName2, serverHostPort) + query2(tableName1, tableName2, serverHostPort) + query3(tableName1, tableName2, serverHostPort) + query4(tableName1, tableName2, serverHostPort) + query5(tableName1, tableName2, serverHostPort) + } finally { + SnappyTableStatsProviderService.TEST_SUSPEND_CACHE_INVALIDATION = false + } + } + + test("update delete on column table") { + val snc = this.snc + val serverHostPort = TestUtil.startNetServer() + // logInfo("network server started") + PreparedQueryRoutingSingleNodeSuite.updateDeleteOnColumnTable(snc, serverHostPort) + } + + test("SNAP-1981: Equality on string columns") { + val snc = this.snc + val serverHostPort = TestUtil.startNetServer() + // logInfo("network server started") + PreparedQueryRoutingSingleNodeSuite.equalityOnStringColumn(snc, serverHostPort) + } + + test("SNAP-1994 Test functions and expressions") { + SnappyTableStatsProviderService.TEST_SUSPEND_CACHE_INVALIDATION = true + try { + testSNAP1994() + } finally { + SnappyTableStatsProviderService.TEST_SUSPEND_CACHE_INVALIDATION = false + } + } + + private def testSNAP1994(): Unit = { + snc.sql(s"Drop Table if exists double_tab") + snc.sql(s"Create Table double_tab (a INT, d Double, s String) " + + "using column options()") + snc.sql(s"insert into double_tab values(1, 1.111111, '1a'), (2, 2.222222, '2b')," + + s" (3, 3.33333, '3c')") + snc.sql(s"Create Table double_tab_2 (a INT, d Double, s String) " + + "using column options()") + snc.sql(s"insert into double_tab_2 values(1, 1.111111, '1a'), (2, 2.222222, '2b')," + + s" (3, 3.33333, '3c')") + val cacheMap = SnappySession.getPlanCache.asMap() + cacheMap.clear() + assert( cacheMap.size() == 0) + val serverHostPort = TestUtil.startNetServer() + val conn = DriverManager.getConnection("jdbc:snappydata://" + serverHostPort) + try { + def close(prepStatement: java.sql.PreparedStatement): Unit = if (prepStatement != null) { + prepStatement.close() + } + val prepStatement0 = conn.prepareStatement(s"select * from double_tab" + + s" where round(d, 2) < ?") + assert(cacheMap.size() == 0) + prepStatement0.setDouble(1, 3.33) + var update = prepStatement0.executeQuery() + var index = 0 + while (update.next()) { + val i = update.getInt(1) + val j = update.getBigDecimal(2) + logInfo(s"1-row($index) $i $j") + index += 1 + assert(i == 1 || i == 2) + } + logInfo(s"1-Number of rows read " + index) + assert(index == 2) + assert(cacheMap.size() == 1) + + prepStatement0.setDouble(1, 4.3333) + update = prepStatement0.executeQuery() + index = 0 + while (update.next()) { + val i = update.getInt(1) + val j = update.getBigDecimal(2) + logInfo(s"2-row($index) $i $j") + index += 1 + assert(i == 1 || i == 2 || i == 3) + } + logInfo(s"2-Number of rows read " + index) + assert(index == 3) + assert(cacheMap.size() == 1) + close(prepStatement0) + + val prepStatement1 = conn.prepareStatement(s"select a + ?, d from double_tab") + assert(cacheMap.size() == 1) + prepStatement1.setInt(1, 2) + update = prepStatement1.executeQuery() + index = 0 + while (update.next()) { + val i = update.getInt(1) + val j = update.getBigDecimal(2) + logInfo(s"3-row($index) $i $j") + index += 1 + assert(i > 2 && i < 6) + } + logInfo(s"3-Number of rows read " + index) + assert(index == 3) + assert(cacheMap.size() == 2) + + prepStatement1.setInt(1, 3) + update = prepStatement1.executeQuery() + index = 0 + while (update.next()) { + val i = update.getInt(1) + val j = update.getBigDecimal(2) + logInfo(s"4-row($index) $i $j") + index += 1 + assert(i > 3 && i < 7) + } + logInfo(s"4-Number of rows read " + index) + assert(index == 3) + assert(cacheMap.size() == 2) + close(prepStatement1) + + val prepStatement2 = conn.prepareStatement(s"select a," + + s" d from double_tab where UPPER(s) = ?") + assert(cacheMap.size() == 2) + prepStatement2.setString(1, "1A") + update = prepStatement2.executeQuery() + index = 0 + while (update.next()) { + val i = update.getInt(1) + val j = update.getString(2) + logInfo(s"5-row($index) $i $j") + index += 1 + assert(i == 1) + } + logInfo(s"5-Number of rows read " + index) + assert(index == 1) + assert(cacheMap.size() == 3) + + prepStatement2.setString(1, "2B") + update = prepStatement2.executeQuery() + index = 0 + while (update.next()) { + val i = update.getInt(1) + val j = update.getString(2) + logInfo(s"6-row($index) $i $j") + index += 1 + assert(i == 2) + } + logInfo(s"6-Number of rows read " + index) + assert(index == 1) + assert(cacheMap.size() == 3) + close(prepStatement2) + + val prepStatement3: PreparedStatement = conn.prepareStatement(s"select * from double_tab t1" + + s" inner join double_tab_2 t2 on t1.a = t2.a where t1.d > ? and " + + s" t1.a in ( select a from double_tab_2 where d < ? )") + assert(cacheMap.size() == 3) + // Anyway TableStatsProviderService.aggregateStats clear stats + // So clearing here for better assertion in testing + cacheMap.clear() + prepStatement3.setInt(1, 1) + prepStatement3.setInt(2, 3) + update = prepStatement3.executeQuery() + index = 0 + while (update.next()) { + val i = update.getInt(1) + logInfo(s"7-row($index) $i") + index += 1 + assert(i == 1 || i == 2) + } + logInfo(s"7-Number of rows read " + index) + assert(index == 2) + assert(cacheMap.size() == 0) + + prepStatement3.setInt(1, 2) + prepStatement3.setInt(2, 4) + update = prepStatement3.executeQuery() + index = 0 + while (update.next()) { + val i = update.getInt(1) + logInfo(s"8-row($index) $i") + index += 1 + assert(i == 2 || i == 3) + } + logInfo(s"8-Number of rows read " + index) + assert(index == 2) + assert(cacheMap.size() == 0) + close(prepStatement3) + + val prepStatement4 = conn.prepareStatement(s"select * from double_tab" + + s" where round(d, 2) < round(3.33, 2)") + assert(cacheMap.size() == 0) + + update = prepStatement4.executeQuery() + index = 0 + while (update.next()) { + val i = update.getInt(1) + val j = update.getBigDecimal(2) + logInfo(s"9-row($index) $i $j") + index += 1 + assert(i == 1 || i == 2) + } + logInfo(s"9-Number of rows read " + index) + assert(index == 2) + assert(cacheMap.size() == 1) + close(prepStatement4) + + val prepStatement5 = conn.prepareStatement(s"select a," + + s" nvl(d, ?) from double_tab where UPPER(s) = ?") + assert(cacheMap.size() == 1) + prepStatement5.setInt(1, 2) + prepStatement5.setString(2, "1A") + update = prepStatement5.executeQuery() + index = 0 + while (update.next()) { + val i = update.getInt(1) + val j = update.getBigDecimal(2) + logInfo(s"10-row($index) $i $j") + index += 1 + assert(i == 1) + } + logInfo(s"10-Number of rows read " + index) + assert(index == 1) + assert(cacheMap.size() == 2) + close(prepStatement5) + try { + val faultyPrepStatement = conn.prepareStatement(s"select * from double_tab" + + s" where round(d, ?) < round(?, ?)") + fail("PreparedStatement creation should have failed") + } catch { + case sqle: SQLException + if sqle.getMessage.indexOf("cannot have parameterized argument") != -1 => + case x: Throwable => throw x + } + } finally { + conn.close() + } + } + + test("Test bug SNAP-2446") { + var conn: Connection = null + val ddlStr = s"create table MAP(MAP_CONNECTION_ID BIGINT NOT NULL," + + s" SOURCE_DATA_CONNECTION_CODE INT NOT NULL," + + s" DESTINATION_DATA_CONNECTION_CODE INT NOT NULL," + + s" ACTIVE_FLAG BOOLEAN, PRIMARY KEY(MAP_CONNECTION_ID)) USING ROW OPTIONS()" + + snc.sql(ddlStr) + snc.sql(s"insert into MAP values (-28416, 19375, 424345, true)") + val serverHostPort = TestUtil.startNetServer() + conn = DriverManager.getConnection( + "jdbc:snappydata://" + serverHostPort + "/route-query=false/") + + val sqlText = s"SELECT DESTINATION_DATA_CONNECTION_CODE," + + "SOURCE_DATA_CONNECTION_CODE,ACTIVE_FLAG FROM MAP" + + val rs2 = conn.createStatement().executeQuery(sqlText) + assert(rs2.next()) + assert(rs2.getBoolean(3)) + conn.close() + } + + test("Test broadcast hash joins and scalar sub-queries") { + SnappyTableStatsProviderService.TEST_SUSPEND_CACHE_INVALIDATION = true + var conn: Connection = null + try { + val ddlStr = "(YearI INT," + // NOT NULL + "MonthI INT," + // NOT NULL + "DayOfMonth INT," + // NOT NULL + "DayOfWeek INT," + // NOT NULL + "DepTime INT," + + "CRSDepTime INT," + + "ArrTime INT," + + "CRSArrTime INT," + + "UniqueCarrier VARCHAR(20)," + // NOT NULL + "FlightNum INT," + + "TailNum VARCHAR(20)," + + "ActualElapsedTime INT," + + "CRSElapsedTime INT," + + "AirTime INT," + + "ArrDelay INT," + + "DepDelay INT," + + "Origin VARCHAR(20)," + + "Dest VARCHAR(20)," + + "Distance INT," + + "TaxiIn INT," + + "TaxiOut INT," + + "Cancelled INT," + + "CancellationCode VARCHAR(20)," + + "Diverted INT," + + "CarrierDelay INT," + + "WeatherDelay INT," + + "NASDelay INT," + + "SecurityDelay INT," + + "LateAircraftDelay INT," + + "ArrDelaySlot INT)" + + val hfile: String = getClass.getResource("/2015.parquet").getPath + val snContext = snc + snContext.sql("set spark.sql.shuffle.partitions=6") + + val airlineDF = snContext.read.load(hfile) + val airlineparquetTable = "airlineparquetTable" + airlineDF.registerTempTable(airlineparquetTable) + + val colTableName = "airlineColTable" + snc.sql(s"CREATE TABLE $colTableName $ddlStr" + + "USING column options()") + + airlineDF.write.insertInto(colTableName) + + def close(prepStatement: java.sql.PreparedStatement): Unit = if (prepStatement != null) { + prepStatement.close() + } + val cacheMap = SnappySession.getPlanCache.asMap() + cacheMap.clear() + assert( cacheMap.size() == 0) + val serverHostPort = TestUtil.startNetServer() + conn = DriverManager.getConnection("jdbc:snappydata://" + serverHostPort) + val prepStatement1 = conn.prepareStatement("select avg(taxiin + taxiout)avgTaxiTime," + + s" count( * ) numFlights, " + + s" dest, avg(arrDelay) arrivalDelay from $colTableName " + + s" where (taxiin > ? or taxiout > ?) and dest in (select dest from $colTableName " + + s" group by dest having count ( * ) > ?) group by dest order " + + s" by avgTaxiTime desc") + assert(cacheMap.size() == 0) + prepStatement1.setInt(1, 10) + prepStatement1.setInt(2, 10) + prepStatement1.setInt(3, 10000) + var update = prepStatement1.executeQuery() + var index = 0 + val result1 = List("ORD", "LAX", "LGA", "MIA", "JFK", "DFW", "CLT", "EWR", "MCO", "ATL", + "DTW", "BOS", "DEN", "CLE", "IAH", "FLL", "PHL", "PHX", "SFO", "IAD", "LAS", "RSW", + "BNA", "MSP", "SEA", "DCA", "MDW", "RDU", "MKE", "HNL", "SLC", "TPA", "BWI", "AUS", + "MCI", "STL", "MSY", "SAT", "SNA", "DAL", "PDX", "SMF", "HOU", "SAN", "OAK", "SJC") + while (update.next()) { + val s = update.getString(3) + // logInfo(s"1-row($index) $s ") + assert(result1.contains(s)) + index += 1 + } + logInfo(s"1-Number of rows read " + index) + assert(index == 46) + assert(cacheMap.size() == 0) + + prepStatement1.setInt(1, 5) + prepStatement1.setInt(2, 5) + prepStatement1.setInt(3, 5000) + update = prepStatement1.executeQuery() + index = 0 + val result2 = List( "ORD", "LGA", "LAX", "MIA", "JFK", "CLT", "EWR", "DFW", "SJU", "CVG", + "ATL", "PBI", "DTW", "BOS", "MCO", "CLE", "PHL", "IAH", "IAD", "RIC", "FLL", "DEN", + "SFO", "MEM", "MSP", "CMH", "JAX", "RSW", "SEA", "DCA", "PHX", "PIT", "MKE", "RDU", + "IND", "SLC", "BNA", "LAS", "BDL", "TUS", "TPA", "BUF", "OMA", "OKC", "AUS", "MDW", + "MCI", "OGG", "TUL", "MSY", "BWI", "STL", "ABQ", "SAT", "PDX", "SNA", "HNL", "SAN", + "SMF", "ONT", "SJC", "OAK", "HOU", "DAL", "BUR") + while (update.next()) { + val s = update.getString(3) + // logInfo(s"2-row($index) $s ") + assert(result2.contains(s)) + index += 1 + } + logInfo(s"2-Number of rows read " + index) + assert(index == 65) + assert(cacheMap.size() == 0) + close(prepStatement1) + } + finally { + if (conn != null) { + conn.close() + } + SnappyTableStatsProviderService.TEST_SUSPEND_CACHE_INVALIDATION = false + } + } +} + +object PreparedQueryRoutingSingleNodeSuite extends Logging { + + def insertRows(tableName: String, numRows: Int, serverHostPort: String): Unit = { + + val conn = DriverManager.getConnection("jdbc:snappydata://" + serverHostPort) + + val rows = (1 to numRows).toSeq + val prepareStatement = conn.prepareStatement(s"insert into $tableName values(?, ?, ?)") + try { + var i = 1 + rows.foreach(d => { + prepareStatement.setInt(1, d) + prepareStatement.setInt(2, d) + prepareStatement.setString(3, s"$d") + prepareStatement.addBatch() + i += 1 + if (i % 1000 == 0) { + val ret = prepareStatement.executeBatch() + ret.foreach(r => assert(r == 1)) + assert(ret.length == 999, ret.length) + i = 0 + } + }) + val ret = prepareStatement.executeBatch() + ret.foreach(r => assert(r == 1)) + logInfo(s"committed $numRows rows") + } finally { + prepareStatement.close() + conn.close() + } + } + + def verifyResults(qry: String, rs: ResultSet, results: Array[Int], + cacheMapSize: Int): Unit = { + val cacheMap = SnappySession.getPlanCache.asMap() + + var index = 0 + while (rs.next()) { + val i = rs.getInt(1) + val j = rs.getInt(2) + val s = rs.getString(3) + logInfo(s"$qry row($index) $i $j $s ") + index += 1 + assert(results.contains(i)) + } + + logInfo(s"$qry Number of rows read " + index) + assert(index == results.length) + rs.close() + + logInfo(s"cachemapsize = $cacheMapSize and .size = ${cacheMap.size()}") + assert( cacheMap.size() == cacheMapSize || -1 == cacheMapSize) + } + + def update_delete_query1(tableName1: String, cacheMapSize: Int, serverHostPort: String): Unit = { + // sc.setLogLevel("TRACE") + val conn = DriverManager.getConnection("jdbc:snappydata://" + serverHostPort) + + var prepStatement0: java.sql.PreparedStatement = null + var prepStatement1: java.sql.PreparedStatement = null + var prepStatement2: java.sql.PreparedStatement = null + var prepStatement3: java.sql.PreparedStatement = null + var prepStatement4: java.sql.PreparedStatement = null + var prepStatement5: java.sql.PreparedStatement = null + try { + prepStatement0 = conn.prepareStatement( s"select ol_1_int_id, ol_1_int2_id, ol_1_str_id" + + s" from $tableName1" + + s" where ol_1_int2_id < ? or ol_1_int_id > ? ") + prepStatement0.setInt(1, 3) + prepStatement0.setInt(2, 997) + verifyResults("update_delete_query1-select0", prepStatement0.executeQuery, + Array(1, 2, 998, 999, 1000), cacheMapSize) + prepStatement1 = conn.prepareStatement(s"delete from $tableName1 where ol_1_int2_id < ? ") + prepStatement1.setInt(1, 400) + prepStatement1.addBatch() + prepStatement1.setInt(1, 500) + prepStatement1.addBatch() + val delete1 = prepStatement1.executeBatch() + assert(delete1(0) == 399, delete1(0)) + assert(delete1(1) == 100, delete1(1)) + + prepStatement2 = conn.prepareStatement(s"delete from $tableName1 where ol_1_int2_id > ? ") + prepStatement2.setInt(1, 502) + val delete2 = prepStatement2.executeUpdate + assert(delete2 == 498, delete2) + + prepStatement3 = + conn.prepareStatement(s"update $tableName1 set ol_1_int_id = ? where ol_1_int2_id = ? ") + prepStatement3.setInt(1, 1000) + prepStatement3.setInt(2, 500) + val update1 = prepStatement3.executeUpdate + assert(update1 == 1, update1) + + prepStatement4 = + conn.prepareStatement(s"update $tableName1 set ol_1_int_id = ? where ol_1_int2_id > ? ") + prepStatement4.setInt(1, 2000) + prepStatement4.setInt(2, 501) + prepStatement4.addBatch() + prepStatement4.setInt(1, 2000) + prepStatement4.setInt(2, 500) + prepStatement4.addBatch() + val update2 = prepStatement4.executeBatch() + assert(update2(0) == 1, update2(0)) + assert(update2(1) == 2, update2(1)) + + prepStatement5 = conn.prepareStatement( s"select ol_1_int_id, ol_1_int2_id, ol_1_str_id" + + s" from $tableName1" + + " where ol_1_int_id < ?") + prepStatement5.setInt(1, 10000) + verifyResults("update_delete_query1-select1", prepStatement5.executeQuery, + Array(1000, 2000, 2000), cacheMapSize + 1) + + prepStatement3.setInt(1, 4000) + prepStatement3.setInt(2, 500) + val update3 = prepStatement3.executeUpdate + assert(update3 == 1, update3) + + prepStatement4.setInt(1, 5000) + prepStatement4.setInt(2, 500) + val update4 = prepStatement4.executeUpdate + assert(update4 == 2, update4) + + verifyResults("update_delete_query1-select2", prepStatement5.executeQuery, + Array(4000, 5000, 5000), cacheMapSize + 1) + // Thread.sleep(1000000) + } finally { + def close(prepStatement: java.sql.PreparedStatement) = + if (prepStatement != null) prepStatement.close() + close(prepStatement0) + close(prepStatement1) + close(prepStatement2) + close(prepStatement3) + close(prepStatement4) + close(prepStatement5) + conn.close() + } + } + + def update_delete_query2(tableName1: String, cacheMapSize: Int, serverHostPort: String): Unit = { + // sc.setLogLevel("TRACE") + val conn = DriverManager.getConnection("jdbc:snappydata://" + serverHostPort) + + var prepStatement0: java.sql.PreparedStatement = null + var prepStatement1: java.sql.PreparedStatement = null + val s = conn.createStatement() + try { + prepStatement0 = + conn.prepareStatement(s"update $tableName1 set ol_1_str_id = ? where ol_1_int2_id = ? ") + prepStatement0.setString(1, "7777") + prepStatement0.setInt(2, 500) + val update1 = prepStatement0.executeUpdate + assert(update1 == 1, update1) + + prepStatement1 = conn.prepareStatement( s"select ol_1_int_id, ol_1_int2_id, ol_1_str_id" + + s" from $tableName1" + + " where ol_1_str_id like ?") + prepStatement1.setString(1, "7777") + verifyResults("update_delete_query2-select1", prepStatement1.executeQuery, Array(4000), + cacheMapSize) + + prepStatement0.setString(1, "8888") + prepStatement0.setInt(2, 501) + val update2 = prepStatement0.executeUpdate + assert(update2 == 1, update2) + + prepStatement1.setString(1, "8888") + verifyResults("update_delete_query2-select1", prepStatement1.executeQuery, Array(5000), + cacheMapSize) + // Thread.sleep(1000000) + } finally { + if (prepStatement0 != null) prepStatement0.close() + if (prepStatement1 != null) prepStatement1.close() + s.close() + conn.close() + } + } + + def updateDeleteOnColumnTable(snc: SnappyContext, serverHostPort: String): Unit = { + SnappySession.getPlanCache.invalidateAll() + assert(SnappySession.getPlanCache.asMap().size() == 0) + SnappyTableStatsProviderService.TEST_SUSPEND_CACHE_INVALIDATION = true + try { + val tableName1 = "order_line_1_col_ud" + val tableName2 = "order_line_2_row_ud" + snc.sql(s"create table $tableName1 (ol_1_int_id integer," + + s" ol_1_int2_id integer, ol_1_str_id STRING) using column " + + "options( partition_by 'ol_1_int2_id', buckets '2'," + + " COLUMN_BATCH_SIZE '100')") + + snc.sql(s"create table $tableName2 (ol_1_int_id integer," + + s" ol_1_int2_id integer, ol_1_str_id STRING) using row " + + "options( partition_by 'ol_1_int2_id', buckets '2')") + + + insertRows(tableName1, 1000, serverHostPort) + insertRows(tableName2, 1000, serverHostPort) + update_delete_query1(tableName1, 1, serverHostPort) + update_delete_query1(tableName2, 3, serverHostPort) + update_delete_query2(tableName1, 5, serverHostPort) + update_delete_query2(tableName2, 6, serverHostPort) + } finally { + SnappyTableStatsProviderService.TEST_SUSPEND_CACHE_INVALIDATION = false + } + } + + def equalityOnStringColumn_query1(tableName1: String, cacheMapSize: Int, + serverHostPort: String): Unit = { + // sc.setLogLevel("TRACE") + val conn = DriverManager.getConnection("jdbc:snappydata://" + serverHostPort) + + var prepStatement0: java.sql.PreparedStatement = null + var prepStatement1: java.sql.PreparedStatement = null + var prepStatement2: java.sql.PreparedStatement = null + try { + prepStatement0 = conn.prepareStatement( s"select ol_1_int_id, ol_1_int2_id, ol_1_str_id" + + s" from $tableName1" + + s" where ol_1_str_id = ? or ol_1_str_id = ? or ol_1_str_id like ? ") + prepStatement0.setString(1, "1") + prepStatement0.setString(2, "2") + prepStatement0.setString(3, "99%") + verifyResults("equalityOnStringColumn_query1-select0", prepStatement0.executeQuery, + Array(1, 2, 99, 990, 991, 992, 993, 994, 995, 996, 997, 998, 999), cacheMapSize) + + prepStatement0.setString(1, "3") + prepStatement0.setString(2, "4") + prepStatement0.setString(3, "94%") + verifyResults("equalityOnStringColumn_query1-select1", prepStatement0.executeQuery, + Array(3, 4, 94, 940, 941, 942, 943, 944, 945, 946, 947, 948, 949), cacheMapSize) + + prepStatement1 = conn.prepareStatement( s"select ol_1_int_id, ol_1_int2_id, ol_1_str_id" + + s" from $tableName1" + + s" where ol_1_str_id = ? or ol_1_str_id = ? or ol_1_str_id like ?" + + s" limit 20") + prepStatement1.setString(1, "1") + prepStatement1.setString(2, "2") + prepStatement1.setString(3, "99%") + verifyResults("equalityOnStringColumn_query2-select0", prepStatement1.executeQuery, + Array(1, 2, 99, 990, 991, 992, 993, 994, 995, 996, 997, 998, 999), cacheMapSize + 1) + + prepStatement1.setString(1, "3") + prepStatement1.setString(2, "4") + prepStatement1.setString(3, "94%") + verifyResults("equalityOnStringColumn_query2-select1", prepStatement1.executeQuery, + Array(3, 4, 94, 940, 941, 942, 943, 944, 945, 946, 947, 948, 949), cacheMapSize + 1) + + prepStatement2 = conn.prepareStatement( s"select ol_1_int_id, ol_1_int2_id, ol_1_str_id" + + s" from $tableName1" + + s" where ol_1_str_id = ? or ol_1_str_id = ?") + prepStatement2.setString(1, "5") + prepStatement2.setString(2, "6") + verifyResults("equalityOnStringColumn_query3-select0", prepStatement2.executeQuery, + Array(5, 6), cacheMapSize + 2) + + prepStatement2.setString(1, "7") + prepStatement2.setString(2, "8") + verifyResults("equalityOnStringColumn_query3-select1", prepStatement2.executeQuery, + Array(7, 8), cacheMapSize + 2) + } finally { + def close(prepStatement: java.sql.PreparedStatement) = + if (prepStatement != null) prepStatement.close() + close(prepStatement0) + close(prepStatement1) + close(prepStatement2) + conn.close() + } + } + + def equalityOnStringColumn(snc: SnappyContext, serverHostPort: String): Unit = { + SnappySession.getPlanCache.invalidateAll() + assert(SnappySession.getPlanCache.asMap().size() == 0) + SnappyTableStatsProviderService.TEST_SUSPEND_CACHE_INVALIDATION = true + try { + val tableName1 = "order_line_1_col_eq" + val tableName2 = "order_line_2_row_eq" + snc.sql(s"create table $tableName1 (ol_1_int_id integer," + + s" ol_1_int2_id integer, ol_1_str_id STRING) using column " + + "options( partition_by 'ol_1_int2_id', buckets '2'," + + " COLUMN_BATCH_SIZE '100')") + + snc.sql(s"create table $tableName2 (ol_1_int_id integer," + + s" ol_1_int2_id integer, ol_1_str_id STRING) using row " + + "options( partition_by 'ol_1_int2_id', buckets '2')") + + insertRows(tableName1, 1000, serverHostPort) + insertRows(tableName2, 1000, serverHostPort) + equalityOnStringColumn_query1(tableName1, 1, serverHostPort) + equalityOnStringColumn_query1(tableName2, 4, serverHostPort) + } finally { + SnappyTableStatsProviderService.TEST_SUSPEND_CACHE_INVALIDATION = false + } + } +} diff --git a/cluster/src/test/scala/io/snappydata/cluster/QueryRoutingSingleNodeSuite.scala b/cluster/src/test/scala/io/snappydata/cluster/QueryRoutingSingleNodeSuite.scala new file mode 100644 index 0000000000..7a9a58065e --- /dev/null +++ b/cluster/src/test/scala/io/snappydata/cluster/QueryRoutingSingleNodeSuite.scala @@ -0,0 +1,1002 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ +package io.snappydata.cluster + +import java.sql.{DriverManager, ResultSet} + +import com.pivotal.gemfirexd.TestUtil +import com.pivotal.gemfirexd.internal.engine.distributed.utils.GemFireXDUtils +import io.snappydata.gemxd.SnappySessionPerConnection +import io.snappydata.{SnappyFunSuite, SnappyTableStatsProviderService} +import org.scalatest.BeforeAndAfterAll + +import org.apache.spark.sql.SnappySession +import org.apache.spark.sql.store.ColumnTableBatchInsertTest +import org.junit.Assert._ +import org.apache.spark.SnappyJavaUtils.snappyJavaUtil + +class QueryRoutingSingleNodeSuite extends SnappyFunSuite with BeforeAndAfterAll { + + val default_chunk_size = GemFireXDUtils.DML_MAX_CHUNK_SIZE + var serverHostPort = "" + val tableName = "order_line_col" + + override def beforeAll(): Unit = { + super.beforeAll() + // reducing DML chunk size size to force lead node to send + // results in multiple batches + setDMLMaxChunkSize(50L) + serverHostPort = TestUtil.startNetServer() + logInfo("network server started") + } + + override def afterAll(): Unit = { + setDMLMaxChunkSize(default_chunk_size) + TestUtil.stopNetServer() + logInfo("network server stopped") + super.afterAll() + } + + def setDMLMaxChunkSize(size: Long): Unit = { + GemFireXDUtils.DML_MAX_CHUNK_SIZE = size + } + + def insertRows(numRows: Int): Unit = { + + val conn = DriverManager.getConnection( + "jdbc:snappydata://" + serverHostPort) + + val rows = (1 to numRows).toSeq + val stmt = conn.createStatement() + try { + var i = 1 + rows.foreach(d => { + stmt.addBatch(s"insert into $tableName values($i, '1')") + i += 1 + if (i % 1000 == 0) { + stmt.executeBatch() + i = 0 + } + }) + stmt.executeBatch() + logInfo(s"committed $numRows rows") + } finally { + stmt.close() + conn.close() + } + } + + def query(): Unit = { + val conn = DriverManager.getConnection( + "jdbc:snappydata://" + serverHostPort) + + val stmt = conn.createStatement() + try { + val rs = stmt.executeQuery( + s"select ol_w_id from $tableName ") + var index = 0 + while (rs.next()) { + rs.getInt(1) + index += 1 + } + logInfo("Number of rows read " + index) + rs.close() + } finally { + stmt.close() + conn.close() + } + } + + test("test serialization with lesser dml chunk size") { + + snc.sql("create table order_line_col (ol_w_id integer,ol_d_id STRING) using column " + + "options( partition_by 'ol_w_id, ol_d_id', buckets '8')") + + insertRows(1000) + + (1 to 5).foreach(d => query()) + } + def insertRows(tableName: String, numRows: Int, serverHostPort: String): Unit = { + + val conn: java.sql.Connection = DriverManager.getConnection( + "jdbc:snappydata://" + serverHostPort) + + val rows = (1 to numRows).toSeq + val stmt: java.sql.Statement = conn.createStatement() + try { + var i = 1 + rows.foreach(d => { + stmt.addBatch(s"insert into $tableName values($i, $i, '$i')") + i += 1 + if (i % 1000 == 0) { + stmt.executeBatch() + i = 0 + } + }) + stmt.executeBatch() + logInfo(s"committed $numRows rows") + } finally { + stmt.close() + conn.close() + } + } + + private def verifyResults(qry: String, rs: ResultSet, results: Array[Int], + cacheMapSize: Int): Unit = { + val cacheMap = SnappySession.getPlanCache.asMap() + assert(cacheMap.size() == cacheMapSize || -1 == cacheMapSize) + + var index = 0 + while (rs.next()) { + val i = rs.getInt(1) + val j = rs.getInt(2) + val s = rs.getString(3) + logInfo(s"$qry row($index) $i $j $s") + index += 1 + + assert(results.contains(i)) + } + + logInfo(s"$qry Number of rows read " + index) + assert(index == results.length) + rs.close() + } + + // TODO: After Fix remove this comment + // This test do not work with 100, 200 but works with 300 + def query1(tableName1: String, tableName2: String, serverHostPort: String): Unit = { + // sc.setLogLevel("TRACE") + val conn: java.sql.Connection = DriverManager.getConnection( + "jdbc:snappydata://" + serverHostPort) + val stmt: java.sql.Statement = conn.createStatement() + try { + val qry = s"select ol_1_int_id, ol_1_int2_id, ol_1_str_id " + + s" from $tableName1 " + + s" where ol_1_int_id < 500 " + + s" and ol_1_int2_id in (" + + s"select ol_2_int_id " + + s" from $tableName2 " + + s" where ol_2_int_id = 100 " + + s") " + + s" limit 20" + + s"" + verifyResults("query1-1", stmt.executeQuery(qry), Array(100), + -1) // TODO pass a number than -1 + } finally { + stmt.close() + conn.close() + } + } + + def query2(tableName1: String, tableName2: String, serverHostPort: String): Unit = { + // sc.setLogLevel("TRACE") + val conn: java.sql.Connection = DriverManager.getConnection( + "jdbc:snappydata://" + serverHostPort) + val stmt: java.sql.Statement = conn.createStatement() + try { + val qry = s"select ol_1_int_id, ol_1_int2_id, ol_1_str_id " + + s" from $tableName1 " + + s" where ol_1_int_id < 500 " + + s" and ol_1_int2_id in (" + + s"select ol_2_int_id " + + s" from $tableName2 " + + s" where ol_2_int_id in (100, 200, 300) " + + s") " + + s" limit 20" + + s"" + verifyResults("query2-1", stmt.executeQuery(qry), Array(100, 200, 300), 0) + + val qry2 = s"select ol_1_int_id, ol_1_int2_id, ol_1_str_id " + + s" from $tableName1 " + + s" where ol_1_int_id < 900 " + + s" and ol_1_int2_id in (" + + s"select ol_2_int_id " + + s" from $tableName2 " + + s" where ol_2_int_id in (600, 700, 800) " + + s") " + + s" limit 20" + + s"" + verifyResults("query2-2", stmt.executeQuery(qry2), Array(600, 700, 800), 0) + } finally { + stmt.close() + conn.close() + } + } + + def query2snc(tableName1: String, tableName2: String, serverHostPort: String, iter: Int): Unit = { + val qry = s"select ol_1_int_id, ol_1_int2_id, ol_1_str_id " + + s" from $tableName1 " + + s" where ol_1_int_id < 500 " + + s" and ol_1_int2_id in (" + + s"select ol_2_int_id " + + s" from $tableName2 " + + s" where ol_2_int_id in (100, 200, 300) " + + s") " + + s" limit 20" + + s"" + logInfo(s"Iter $iter QUERY = $qry") + val df1 = snc.sql(qry) + val res1 = df1.collect() + logInfo(s"Iter $iter with query = $qry") + logInfo(res1.mkString("\n")) + logInfo(s"Iter $iter query end and res1 size = ${res1.length}") + assert(res1.length == 3) + + val qry2 = s"select ol_1_int_id, ol_1_int2_id, ol_1_str_id " + + s" from $tableName1 " + + s" where ol_1_int_id < 900 " + + s" and ol_1_int2_id in (" + + s"select ol_2_int_id " + + s" from $tableName2 " + + s" where ol_2_int_id in (600, 700, 800) " + + s") " + + s" limit 20" + + s"" + val df2 = snc.sql(qry2) + val res2 = df2.collect() + logInfo(s"Iter $iter with query2 = $qry2") + logInfo(res2.mkString("\n")) + logInfo(s"Iter $iter query2 end with res size = ${res2.length}") + assert(!res1.sameElements(res2)) + assert(res2.length == 3) + } + + test("Tokenization test with IN SubQuery") { + SnappySession.getPlanCache.invalidateAll() + assert(SnappySession.getPlanCache.asMap().size() == 0) + SnappyTableStatsProviderService.TEST_SUSPEND_CACHE_INVALIDATION = true + try { + val tableName1 = "order_line_1_col" + val tableName2 = "order_line_2_col" + snc.sql(s"create table $tableName1 (ol_1_int_id integer," + + s" ol_1_int2_id integer, ol_1_str_id STRING) using column " + + "options( partition_by 'ol_1_int_id, ol_1_int2_id', buckets '2')") + + snc.sql(s"create table $tableName2 (ol_2_int_id integer," + + s" ol_2_int2_id integer, ol_2_str_id STRING) using column " + + "options( partition_by 'ol_2_int_id, ol_2_int2_id', buckets '2')") + + insertRows(tableName1, 1000, serverHostPort) + insertRows(tableName2, 1000, serverHostPort) + query1(tableName1, tableName2, serverHostPort) + (0 to 5).foreach(i => query2snc(tableName1, tableName2, serverHostPort, i)) + query2(tableName1, tableName2, serverHostPort) + } finally { + SnappyTableStatsProviderService.TEST_SUSPEND_CACHE_INVALIDATION = false + } + } + + test("SNAP-1615") { + val tName = "table1615" + snc.sql(s"create table $tName (id int, price decimal(38,18), name varchar(10)) using column") + + val conn: java.sql.Connection = DriverManager.getConnection( + "jdbc:snappydata://" + serverHostPort) + val stmt: java.sql.Statement = conn.createStatement() + try { + stmt.addBatch(s"insert into $tName values(1,10.4,'abc')") + stmt.addBatch(s"insert into $tName values(2,112.4,'aaa')") + stmt.addBatch(s"insert into $tName values(3,1452.4,'bbb')") + stmt.addBatch(s"insert into $tName values(4,16552.4,'ccc')") + stmt.addBatch(s"insert into $tName values(5,null,'ddd')") + stmt.addBatch(s"insert into $tName values(6,10.6,'ddd')") + stmt.executeBatch() + logInfo(s"inserted rows") + } finally { + stmt.close() + conn.close() + } + + (1 to 5).foreach(_ => query1615(tName, serverHostPort)) + } + + def query1615(tName: String, sHostPort: String): Unit = { + val conn = DriverManager.getConnection("jdbc:snappydata://" + sHostPort) + val stmt = conn.createStatement() + try { + val rs = stmt.executeQuery(s"select avg(price),name from $tName group by name") + var index = 0 + var sum: BigDecimal = 0 + while (rs.next()) { + sum += rs.getBigDecimal(1) + assert(rs.getString(2) != null) + index += 1 + } + logInfo(s"Number of rows read $index sum=$sum") + assert(index == 5, index) + assert(sum - 18138.2 == 0, sum) + rs.close() + } finally { + stmt.close() + conn.close() + } + } + + def insertBooleanRows(numRows: Int): Unit = { + + val conn = DriverManager.getConnection( + "jdbc:snappydata://" + serverHostPort) + + val rows = (1 to numRows).toSeq + val stmt = conn.createStatement() + try { + var i = 1 + rows.foreach(d => { + stmt.addBatch(s"insert into order_line_row_bool values(${i % 2 == 0}, $i)") + i += 1 + if (i % 1000 == 0) { + stmt.executeBatch() + i = 0 + } + }) + stmt.executeBatch() + logInfo(s"committed $numRows rows") + } finally { + stmt.close() + conn.close() + } + } + + def queryBooleanRows(): Unit = { + val conn = DriverManager.getConnection( + "jdbc:snappydata://" + serverHostPort) + + val stmt = conn.createStatement() + try { + val query = s"select distinct ol_w_id from order_line_row_bool" + val count = snc.sql(query).collect().length + assert(count == 2) + logInfo("snc: Number of rows read " + count) + + val rs = stmt.executeQuery(query) + var index = 0 + while (rs.next()) { + rs.getInt(1) + index += 1 + } + logInfo("jdbc: Number of rows read " + index) + assert(index == 2) + rs.close() + } finally { + stmt.close() + conn.close() + } + } + + test("1655: test Boolean in Row Table") { + + snc.sql("create table order_line_row_bool (ol_w_id Boolean, ol_d_id Long) using row " + + "options( partition_by 'ol_w_id, ol_d_id', buckets '8')") + + insertBooleanRows(1000) + + (1 to 5).foreach(d => queryBooleanRows()) + } + + test("1737: Failure to convert UTF8String error with index") { + + snc.sql("CREATE TABLE IF NOT EXISTS app.ds_property (" + + "ds_name VARCHAR(250) NOT NULL," + + "ds_column VARCHAR(150) NOT NULL," + + "property VARCHAR(150) NOT NULL," + + "ds_class_id CHAR(1) NOT NULL," + + "string_value VARCHAR(1024)," + + "long_value BIGINT," + + "double_value DOUBLE," + + "updated_ts TIMESTAMP NOT NULL," + + "PRIMARY KEY (ds_name, ds_column, property))" + + "USING ROW OPTIONS (" + + "PARTITION_BY 'ds_name'," + + "buckets '2'," + + "PERSISTENT 'SYNCHRONOUS')") + + snc.sql("CREATE INDEX app.ds_property_colprop_idx ON app.ds_property(ds_column, property)") + snc.sql("CREATE INDEX app.ds_property_property_idx ON app.ds_property(property)") + snc.sql("CREATE INDEX app.ds_property_dsnameprop_idx ON app.ds_property(ds_name, property)") + + val conn = DriverManager.getConnection("jdbc:snappydata://" + serverHostPort) + val stmt = conn.createStatement() + try { + stmt.execute(s"insert into app.ds_property " + + s"values ('a', 'b', 'c', 'x', 'd', 1, 1.1, '1995-12-30 11:12:30')") + stmt.execute(s"insert into app.ds_property values " + + s"('a', '-', 'FAMILY', 'C', 'FindDatasetTestFamily_1', 1, 0.1, '1995-12-30 11:12:30')") + stmt.execute(s"insert into app.ds_property values " + + s"('b', '-', 'DOUBLE_PROP', 'C', 'FindDatasetTestFamily_1', 1, 0.3," + + s"'1995-12-30 11:12:30')") + + + val query = s"SELECT p.ds_name,p.ds_column,p.property,p.ds_class_id,p.string_value," + + s" p.long_value,p.double_value FROM app.ds_property p " + + s" WHERE (p.ds_column = '-' AND p.property = 'FAMILY' AND p.string_value =" + + s" 'FindDatasetTestFamily_1') OR" + + s" (p.ds_column = '-' AND p.property = 'DOUBLE_PROP' AND p.double_value <= 0.2) OR " + + s" (p.ds_class_id = 'C' AND p.property = 'DOUBLE_PROP' AND p.double_value > 0.2) OR " + + s" (p.ds_class_id = 'C' AND p.property = 'DOUBLE_PROP' AND p.double_value < 0.2)" + + val count = snc.sql(query).collect().length + assert(count == 2) + logInfo("snc: Number of rows read " + count) + + val rs = stmt.executeQuery(query) + var index = 0 + while (rs.next()) { + index += 1 + logInfo(s"$index: ${rs.getString(1)} ${rs.getString(2)} ${rs.getString(3)} " + + s"${rs.getString(4)} ${rs.getString(5)} ${rs.getLong(6)} ${rs.getBigDecimal(7)}") + } + logInfo("jdbc: Number of rows read " + index) + assert(index == 2) + rs.close() + } finally { + stmt.close() + conn.close() + } + } + + def insertRows(tableName: String, numRows: Int): Unit = { + + val conn = DriverManager.getConnection( + "jdbc:snappydata://" + serverHostPort) + + val rows = (1 to numRows).toSeq + val stmt = conn.createStatement() + try { + var i = 1 + rows.foreach(d => { + stmt.addBatch(s"insert into $tableName values($d, $d, '$d')") + i += 1 + if (i % 1000 == 0) { + stmt.executeBatch() + i = 0 + } + }) + stmt.executeBatch() + logInfo(s"committed $numRows rows") + } finally { + stmt.close() + conn.close() + } + } + + def update_delete_query1(tableName1: String, cacheMapSize: Int): Unit = { + // sc.setLogLevel("TRACE") + val conn = DriverManager.getConnection("jdbc:snappydata://" + serverHostPort) + val s = conn.createStatement() + try { + val delete1 = s.executeUpdate(s"delete from $tableName1 where ol_1_int2_id < 400 ") + assert(delete1 == 399, delete1) + val delete2 = s.executeUpdate(s"delete from $tableName1 where ol_1_int2_id < 500 ") + assert(delete2 == 100, delete2) + + val delete3 = s.executeUpdate(s"delete from $tableName1 where ol_1_int2_id > 502 ") + assert(delete3 == 498, delete3) + + val update1 = + s.executeUpdate(s"update $tableName1 set ol_1_int_id = 1000 where ol_1_int2_id = 500 ") + assert(update1 == 1, update1) + + val update2 = + s.executeUpdate(s"update $tableName1 set ol_1_int_id = 2000 where ol_1_int2_id > 500 ") + assert(update2 == 2, update2) + + val selectQry = s"select ol_1_int_id, ol_1_int2_id, ol_1_str_id from $tableName1 limit 20" + verifyResults("update_delete_query1-select1", s.executeQuery(selectQry), + Array(1000, 2000, 2000), cacheMapSize) + + val update3 = + s.executeUpdate(s"update $tableName1 set ol_1_int_id = 4000 where ol_1_int2_id = 500 ") + assert(update3 == 1, update3) + + val update4 = + s.executeUpdate(s"update $tableName1 set ol_1_int_id = 5000 where ol_1_int2_id > 500 ") + assert(update4 == 2, update4) + + verifyResults("update_delete_query1-select2", s.executeQuery(selectQry), + Array(4000, 5000, 5000), cacheMapSize) + + // Thread.sleep(1000000) + } finally { + s.close() + conn.close() + } + } + + def update_delete_query2(tableName1: String, cacheMapSize: Int): Unit = { + // sc.setLogLevel("TRACE") + val conn = DriverManager.getConnection("jdbc:snappydata://" + serverHostPort) + val s = conn.createStatement() + try { + val update1 = s.executeUpdate(s"UPDATE $tableName1 SET ol_1_int_id = ol_1_int_id + 1 " + + s" WHERE ol_1_int2_id IN (SELECT max(ol_1_int2_id) from $tableName1)") + assert(update1 == 1, update1) + + val delete1 = s.executeUpdate(s"delete from $tableName1 where ol_1_int2_id in " + + s"(SELECT min(ol_1_int2_id) from $tableName1)") + assert(delete1 == 1, delete1) + + val selectQry1 = s"select ol_1_int_id, ol_1_int2_id, ol_1_str_id from $tableName1 limit 20" + verifyResults("update_delete_query2-select1", + s.executeQuery(selectQry1), Array(5000, 5001), cacheMapSize) + } finally { + s.close() + conn.close() + } + } + + def update_delete_query3(tableName1: String, cacheMapSize: Int, numPartition: Int): Unit = { + // sc.setLogLevel("TRACE") + val conn = DriverManager.getConnection("jdbc:snappydata://" + serverHostPort) + val s = conn.createStatement() + try { + val update1 = snc.sql(s"UPDATE $tableName1 SET ol_1_int_id = ol_1_int_id + 1 " + + s" WHERE ol_1_int2_id IN (SELECT max(ol_1_int2_id) from $tableName1)") + val sum_update1 = update1.collect().map(_.get(0).asInstanceOf[Number].longValue).sum + val count_update1 = update1.count() + assert(sum_update1 == 1) + assert(count_update1 == numPartition) + + val delete1 = snc.sql(s"delete from $tableName1 where ol_1_int2_id in " + + s"(SELECT min(ol_1_int2_id) from $tableName1)") + val sum_delete1 = delete1.collect().map(_.get(0).asInstanceOf[Number].longValue).sum + val count_delete1 = delete1.count() + assert(sum_delete1 == 1) + assert(count_delete1 == numPartition) + + val selectQry1 = s"select ol_1_int_id, ol_1_int2_id, ol_1_str_id from $tableName1 limit 20" + verifyResults("update_delete_query3-select1", + s.executeQuery(selectQry1), Array(5002), cacheMapSize) + } finally { + s.close() + conn.close() + } + } + + test("update delete on column table") { + SnappySession.getPlanCache.invalidateAll() + assert(SnappySession.getPlanCache.asMap().size() == 0) + SnappyTableStatsProviderService.TEST_SUSPEND_CACHE_INVALIDATION = true + try { + val tableName1 = "order_line_1_col_ud" + val tableName2 = "order_line_2_row_ud" + snc.sql(s"create table $tableName1 (ol_1_int_id integer," + + s" ol_1_int2_id integer, ol_1_str_id STRING) using column " + + "options( partition_by 'ol_1_int2_id', buckets '2')") + + snc.sql(s"create table $tableName2 (ol_1_int_id integer," + + s" ol_1_int2_id integer, ol_1_str_id STRING) using row " + + "options( partition_by 'ol_1_int2_id', buckets '2')") + + insertRows(tableName1, 1000) + insertRows(tableName2, 1000) + update_delete_query1(tableName1, 1) + update_delete_query2(tableName1, 2) + update_delete_query3(tableName1, 3, 2) + + update_delete_query1(tableName2, 4) + update_delete_query2(tableName2, 5) + update_delete_query3(tableName2, 6, 1) + } finally { + SnappyTableStatsProviderService.TEST_SUSPEND_CACHE_INVALIDATION = false + } + } + + def insertRows2(tableName: String, numRows: Int): Unit = { + val conn = DriverManager.getConnection("jdbc:snappydata://" + serverHostPort) + val rows = (1 to numRows).toSeq + val stmt = conn.createStatement() + try { + var i = 1 + rows.foreach(d => { + val d1 = d + 1 + stmt.addBatch(s"insert into $tableName values($d, $d1, '$d1')") + i += 1 + if (i % 1000 == 0) { + stmt.executeBatch() + i = 0 + } + }) + stmt.executeBatch() + logInfo(s"insertRows2: committed $numRows rows") + } finally { + stmt.close() + conn.close() + } + } + + def insertInto(tableName1: String, tableName2: String, rowsExpected: Int): Unit = { + val conn = DriverManager.getConnection("jdbc:snappydata://" + serverHostPort) + val stmt = conn.createStatement() + try { + val numRows = stmt.executeUpdate(s"insert into $tableName1 select * from $tableName2") + logInfo(s"insertInto $numRows rows") + assert(numRows == rowsExpected) + } finally { + stmt.close() + conn.close() + } + } + + def putInto(tableName1: String, tableName2: String, rowsExpected: Int): Unit = { + val conn = DriverManager.getConnection("jdbc:snappydata://" + serverHostPort) + val stmt = conn.createStatement() + try { + val numRows = stmt.executeUpdate(s"put into $tableName1 select * from $tableName2") + logInfo(s"putInto $numRows rows") + assert(numRows == rowsExpected) + } finally { + stmt.close() + conn.close() + } + } + + test("put into on row table") { + SnappySession.getPlanCache.invalidateAll() + assert(SnappySession.getPlanCache.asMap().size() == 0) + SnappyTableStatsProviderService.TEST_SUSPEND_CACHE_INVALIDATION = true + + def createTable(tableName: String): Unit = + snc.sql(s"create table $tableName (ol_1_int_id integer primary key," + + s" ol_1_int2_id integer, ol_1_str_id STRING) using row " + + "options( partition_by 'ol_1_int_id', buckets '2')" + + // TODO SNAP-1945: This leads to duplicate key value error + // "options( partition_by 'ol_1_int2_id', buckets '2')" + + "") + + try { + val tableName1 = "order_line_1_row_pi" + val tableName2 = "order_line_2_row_pi" + val tableName3 = "order_line_3_row_pi" + createTable(tableName1) + createTable(tableName2) + createTable(tableName3) + + insertRows(tableName1, 10) + insertInto(tableName3, tableName1, 10) + + insertRows2(tableName2, 5) + putInto(tableName3, tableName2, 5) + + val df = snc.sql(s"select * from $tableName3") + assert(df.count() == 10) + var assertionNotFailed = true + df.foreach(r => { + val col1 = r.getInt(0) + val col2 = r.getInt(1) + if (col1 < 6) { + assertionNotFailed = assertionNotFailed && (col1 + 1 == col2) + } else { + assertionNotFailed = assertionNotFailed && (col1 == col2) + } + }) + assert(assertionNotFailed) + } finally { + SnappyTableStatsProviderService.TEST_SUSPEND_CACHE_INVALIDATION = false + } + } + + test("Spark caching using SQL") { + // first test using session + val sc = this.sc + val session = this.snc.snappySession + ColumnTableBatchInsertTest.testSparkCachingUsingSQL(sc, session.sql, session.catalog.isCached, + df => session.sharedState.cacheManager.lookupCachedData(df).isDefined) + + // next using JDBC connection + val conn = DriverManager.getConnection("jdbc:snappydata://" + serverHostPort) + try { + val stmt = conn.createStatement() + // dummy query to create session for connection + stmt.executeQuery("show tables") + val allSessions = SnappySessionPerConnection.getAllSessions + // only one connection session should be present + assert(allSessions.length === 1) + val connSession = allSessions.head + // skip the "isCached" checks with JDBC since session is different for JDBC connection + ColumnTableBatchInsertTest.testSparkCachingUsingSQL(sc, + SnappyFunSuite.resultSetToDataset(connSession, stmt), connSession.catalog.isCached, + df => connSession.sharedState.cacheManager.lookupCachedData(df).isDefined) + stmt.close() + } finally { + conn.close() + } + } + + test("Test Bug SNAP-2707 with jdbc connection") { + + val conn = DriverManager.getConnection("jdbc:snappydata://" + serverHostPort) + val stmt = conn.createStatement() + snc.sql("drop table if exists t") + snc.sql("create table t(id integer primary key, str string) using row") + stmt.execute("put into t values(100, 'aa')") + stmt.execute("put into t (id, str) values (101, 'bb') ") + stmt.execute("put into t values(102, 'cc')") + stmt.execute("put into t values(102, 'dd')") + assertEquals(3, snc.sql("select * from t").count()) + val rs = snc.sql("select str from t where id = 102") + val rows = rs.collect() + for (row <- rows) { + assertEquals("dd", row.getAs[String]("str")) + } + + snc.sql("drop table if exists t1") + snc.sql("create table t1(id integer, id2 string) using column options(key_columns 'id')") + stmt.execute("put into t1 values(100, 'aa') ") + stmt.execute("put into t1 (id, id2) values(101, 'sb') ") + stmt.execute("put into t1 values(102, 'cc')") + stmt.execute("put into t1 values(102, 'dd')") + assertEquals(3, snc.sql("select * from t1").count()) + val rs1 = snc.sql("select id2 from t1 where id = 102") + val rows1 = rs1.collect() + for (row <- rows1) { + assertEquals("dd", row.getAs[String]("id2")) + } + + snc.sql("drop table if exists t2") + snc.sql("create table t2(id integer, id2 string) using column " + + "options(key_columns 'id', COLUMN_MAX_DELTA_ROWS '1', buckets '1')") + for (i <- 1 to 10) { + stmt.execute("insert into t2 values(" + i + ",'test" + i + "')") + } + + for (i <- 1 to 10) { + stmt.execute("put into t2 values(" + i + ",'test" + i + 1 + "')") + } + + val rs2 = snc.sql("select * from t2 order by id") + assertEquals(10, rs2.count()) + val rows2 = rs2.collect() + var i = 1 + for (row <- rows2) { + assertEquals("test" + i + 1, row.getAs[String]("id2")) + i = i + 1 + } + + snc.sql("drop table if exists columntable") + snc.sql("CREATE TABLE columnTable (bigIntCol BIGINT," + + " binaryCol1 BINARY, boolCol BOOLEAN , byteCol BYTE," + + " charCol CHAR( 30 ) , dateCol DATE , decimalCol DECIMAL( 10, 2 ) ," + + " doubleCol DOUBLE , floatCol FLOAT , intCol INT , integerCol INTEGER," + + " longVarcharCol LONG , numericCol NUMERIC, numeric1Col NUMERIC(10,2)," + + " doublePrecisionCol DOUBLE PRECISION, realCol REAL, stringCol STRING," + + " timestampCol TIMESTAMP , varcharCol VARCHAR( 20 ))" + + " using COLUMN options(BUCKETS '8', key_columns 'bigIntcol');") + stmt.execute("put into columntable values(-10, NULL, true, 56, 'ABC456'," + + " current_date, -66, 0.0111, -2.225E-307, -10, 10, 123456, -1, 1," + + " 123.56, 0.089, 'abcd', current_timestamp, 'SNAPPY')") + stmt.execute("put into columntable (bigIntCol, binaryCol1, boolCol, byteCol," + + " charCol, dateCol , decimalCol , doubleCol , floatCol , intCol)" + + " values (1000, 1010, FALSE, 97,'1234567890abcdefghij'," + + " date('1970-01-08'), 66, 2.2, 1.0E8, 1000)") + assertEquals(2, snc.sql("select * from columntable").count()) + } + + test("Test Bug SNAP-2707 with snappy session") { + + snc.sql("drop table if exists t") + snc.sql("create table t(id integer primary key, STR string) using row ") + snc.sql("put into t values(100, 'aa')") + snc.sql("put into t (id, str) values (101, 'bb') ") + snc.sql("put into t (id) values (104) ") + snc.sql("put into t values(102, 'cc')") + snc.sql("put into t values(102, 'dd')") + assertEquals(4, snc.sql("select * from t").count()) + val rs = snc.sql("select STR from t where id = 102") + val rows = rs.collect() + for (row <- rows) { + assertEquals("dd", row.getAs[String]("str")) + } + + snc.sql("drop table if exists t1") + snc.sql("create table t1(id integer, ID2 string) using column options(key_columns 'id')") + snc.sql("put into t1 (id, id2) values (101, 'bb') ") + snc.sql("put into t1 values (100, 'aa') ") + snc.sql("put into t1 (id) values (104) ") + snc.sql("put into t1 values(102, 'cc')") + snc.sql("put into t1 values(102, 'dd')") + snc.sql("put into t1 values(103, NULL)") + assertEquals(5, snc.sql("select * from t1").count()) + val rs1 = snc.sql("select id2 from t1 where id = 102") + val rows1 = rs1.collect() + for (row <- rows1) { + assertEquals("dd", row.getAs[String]("id2")) + } + + + snc.sql("drop table if exists t2") + snc.sql("create table t2(id integer, ID2 string) using column " + + "options(key_columns 'id', COLUMN_MAX_DELTA_ROWS '1', buckets '1')") + for (i <- 1 to 10) { + snc.sql("insert into t2 values(" + i + ",'test" + i + "')") + } + + for (i <- 1 to 10) { + snc.sql("put into t2 values(" + i + ",'test" + i + 1 + "')") + } + + val rs2 = snc.sql("select * from t2 order by id") + assertEquals(10, rs2.count()) + val rows2 = rs2.collect() + var i = 1 + for (row <- rows2) { + assertEquals("test" + i + 1, row.getAs[String]("id2")) + i = i + 1 + } + + snc.sql("drop table if exists columntable") + snc.sql("CREATE TABLE columnTable (bigIntCol BIGINT," + + " binaryCol1 BINARY, boolCol BOOLEAN , byteCol BYTE," + + " charCol CHAR( 30 ) , dateCol DATE , decimalCol DECIMAL( 10, 2 ) ," + + " doubleCol DOUBLE , floatCol FLOAT , intCol INT , integerCol INTEGER," + + " longVarcharCol LONG , numericCol NUMERIC, numeric1Col NUMERIC(10,2)," + + " doublePrecisionCol DOUBLE PRECISION, realCol REAL, stringCol STRING," + + " timestampCol TIMESTAMP , varcharCol VARCHAR( 20 ))" + + " using COLUMN options(BUCKETS '8', key_columns 'bigIntcol');") + snc.sql("put into columntable values(-10, NULL, true, 56, 'ABC456'," + + " current_date, -66, 0.0111, -2.225E-307, -10, 10, 123456, -1, 1," + + " 123.56, 0.089, 'abcd', current_timestamp, 'SNAPPY')") + snc.sql("put into columntable (bigIntCol, binaryCol1, boolCol, byteCol," + + " charCol, dateCol , decimalCol , doubleCol , floatCol , intCol)" + + " values (1000, 1010, FALSE, 97,'1234567890abcdefghij'," + + " date('1970-01-08'), 66, 2.2, 1.0E8, 1000)") + assertEquals(2, snc.sql("select * from columntable").count()) + } + + test("Test Bug SNAP-3038 with jdbc connection") { + + val conn = DriverManager.getConnection("jdbc:snappydata://" + serverHostPort) + val stmt = conn.createStatement() + snc.sql("drop schema if exists std1") + snc.sql("create schema std1") + snc.sql("drop table if exists std1.t") + snc.sql("create table std1.t(id integer primary key, str string) using row") + stmt.execute("put into std1.t values(100, 'aa')") + stmt.execute("put into std1.t (id, str) values (101, 'bb') ") + stmt.execute("put into std1.t values(102, 'cc')") + stmt.execute("put into std1.t values(102, 'dd')") + assertEquals(3, snc.sql("select * from std1.t").count()) + val rs = snc.sql("select str from std1.t where id = 102") + val rows = rs.collect() + for (row <- rows) { + assertEquals("dd", row.getAs[String]("str")) + } + + snc.sql("drop table if exists std1.t1") + snc.sql("create table std1.t1(id integer, id2 string) using column options(key_columns 'id')") + stmt.execute("put into std1.t1 values(100, 'aa') ") + stmt.execute("put into std1.t1 (id, id2) values(101, 'sb') ") + stmt.execute("put into std1.t1 values(102, 'cc')") + stmt.execute("put into std1.t1 values(102, 'dd')") + assertEquals(3, snc.sql("select * from std1.t1").count()) + val rs1 = snc.sql("select id2 from std1.t1 where id = 102") + val rows1 = rs1.collect() + for (row <- rows1) { + assertEquals("dd", row.getAs[String]("id2")) + } + + snc.sql("drop table if exists std1.t2") + snc.sql("create table std1.t2(id integer, id2 string) using column " + + "options(key_columns 'id', COLUMN_MAX_DELTA_ROWS '1', buckets '1')") + for (i <- 1 to 10) { + stmt.execute("insert into std1.t2 values(" + i + ",'test" + i + "')") + } + + for (i <- 1 to 10) { + stmt.execute("put into std1.t2 values(" + i + ",'test" + i + 1 + "')") + } + + val rs2 = snc.sql("select * from std1.t2 order by id") + assertEquals(10, rs2.count()) + val rows2 = rs2.collect() + var i = 1 + for (row <- rows2) { + assertEquals("test" + i + 1, row.getAs[String]("id2")) + i = i + 1 + } + + snc.sql("drop table if exists std1.columntable") + snc.sql("CREATE TABLE std1.columnTable (bigIntCol BIGINT," + + " binaryCol1 BINARY, boolCol BOOLEAN , byteCol BYTE," + + " charCol CHAR( 30 ) , dateCol DATE , decimalCol DECIMAL( 10, 2 ) ," + + " doubleCol DOUBLE , floatCol FLOAT , intCol INT , integerCol INTEGER," + + " longVarcharCol LONG , numericCol NUMERIC, numeric1Col NUMERIC(10,2)," + + " doublePrecisionCol DOUBLE PRECISION, realCol REAL, stringCol STRING," + + " timestampCol TIMESTAMP , varcharCol VARCHAR( 20 ))" + + " using COLUMN options(BUCKETS '8', key_columns 'bigIntcol');") + snc.sql("put into std1.columntable values(-10, NULL, true, 56, 'ABC456'," + + " current_date, -66, 0.0111, -2.225E-307, -10, 10, 123456, -1, 1," + + " 123.56, 0.089, 'abcd', current_timestamp, 'SNAPPY')") + snc.sql("put into std1.columntable (bigIntCol, binaryCol1, boolCol, byteCol," + + " charCol, dateCol , decimalCol , doubleCol , floatCol , intCol)" + + " values (1000, 1010, FALSE, 97,'1234567890abcdefghij'," + + " date('1970-01-08'), 66, 2.2, 1.0E8, 1000)") + assertEquals(2, snc.sql("select * from std1.columntable").count()) + } + + test("Test Bug SNAP-3038 with snappy session") { + + snc.sql("drop schema if exists std2") + snc.sql("create schema std2") + snc.sql("drop table if exists std2.t") + snc.sql("create table std2.t(id integer primary key, STR string) using row ") + snc.sql("put into std2.t values(100, 'aa')") + snc.sql("put into std2.t (id, str) values (101, 'bb') ") + snc.sql("put into std2.t (id) values (104) ") + snc.sql("put into std2.t values(102, 'cc')") + snc.sql("put into std2.t values(102, 'dd')") + assertEquals(4, snc.sql("select * from std2.t").count()) + val rs = snc.sql("select STR from std2.t where id = 102") + val rows = rs.collect() + for (row <- rows) { + assertEquals("dd", row.getAs[String]("str")) + } + + snc.sql("drop table if exists std2.t1") + snc.sql("create table std2.t1(id integer, ID2 string) using column options(key_columns 'id')") + snc.sql("put into std2.t1 (id, id2) values (101, 'bb') ") + snc.sql("put into std2.t1 values (100, 'aa') ") + snc.sql("put into std2.t1 (id) values (104) ") + snc.sql("put into std2.t1 values(102, 'cc')") + snc.sql("put into std2.t1 values(102, 'dd')") + snc.sql("put into std2.t1 values(103, NULL)") + assertEquals(5, snc.sql("select * from std2.t1").count()) + val rs1 = snc.sql("select id2 from std2.t1 where id = 102") + val rows1 = rs1.collect() + for (row <- rows1) { + assertEquals("dd", row.getAs[String]("id2")) + } + + + snc.sql("drop table if exists std2.t2") + snc.sql("create table std2.t2(id integer, ID2 string) using column " + + "options(key_columns 'id', COLUMN_MAX_DELTA_ROWS '1', buckets '1')") + for (i <- 1 to 10) { + snc.sql("insert into std2.t2 values(" + i + ",'test" + i + "')") + } + + for (i <- 1 to 10) { + snc.sql("put into std2.t2 values(" + i + ",'test" + i + 1 + "')") + } + + val rs2 = snc.sql("select * from std2.t2 order by id") + assertEquals(10, rs2.count()) + val rows2 = rs2.collect() + var i = 1 + for (row <- rows2) { + assertEquals("test" + i + 1, row.getAs[String]("id2")) + i = i + 1 + } + + snc.sql("drop table if exists std2.columntable") + snc.sql("CREATE TABLE std2.columntable (bigIntCol BIGINT," + + " binaryCol1 BINARY, boolCol BOOLEAN , byteCol BYTE," + + " charCol CHAR( 30 ) , dateCol DATE , decimalCol DECIMAL( 10, 2 ) ," + + " doubleCol DOUBLE , floatCol FLOAT , intCol INT , integerCol INTEGER," + + " longVarcharCol LONG , numericCol NUMERIC, numeric1Col NUMERIC(10,2)," + + " doublePrecisionCol DOUBLE PRECISION, realCol REAL, stringCol STRING," + + " timestampCol TIMESTAMP , varcharCol VARCHAR( 20 ))" + + " using COLUMN options(BUCKETS '8', key_columns 'bigIntcol');") + snc.sql("put into std2.columntable values(-10, NULL, true, 56, 'ABC456'," + + " current_date, -66, 0.0111, -2.225E-307, -10, 10, 123456, -1, 1," + + " 123.56, 0.089, 'abcd', current_timestamp, 'SNAPPY')") + snc.sql("put into std2.columntable (bigIntCol, binaryCol1, boolCol, byteCol," + + " charCol, dateCol , decimalCol , doubleCol , floatCol , intCol)" + + " values (1000, 1010, FALSE, 97,'1234567890abcdefghij'," + + " date('1970-01-08'), 66, 2.2, 1.0E8, 1000)") + assertEquals(2, snc.sql("select * from std2.columntable").count()) + } +} diff --git a/cluster/src/test/scala/io/snappydata/cluster/StringAsClobTestSuite.scala b/cluster/src/test/scala/io/snappydata/cluster/StringAsClobTestSuite.scala new file mode 100644 index 0000000000..93e30f8311 --- /dev/null +++ b/cluster/src/test/scala/io/snappydata/cluster/StringAsClobTestSuite.scala @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ +package io.snappydata.cluster + +import java.sql.DriverManager + +import com.pivotal.gemfirexd.TestUtil +import com.pivotal.gemfirexd.internal.engine.distributed.utils.GemFireXDUtils +import io.snappydata.SnappyFunSuite +import org.scalatest.BeforeAndAfterAll + +class StringAsClobTestSuite extends SnappyFunSuite with BeforeAndAfterAll { + + private val default_chunk_size = GemFireXDUtils.DML_MAX_CHUNK_SIZE + var serverHostPort = "" + val tableName = "order_line_col" + + override def beforeAll(): Unit = { + super.beforeAll() + // reducing DML chunk size size to force lead node to send + // results in multiple batches + setDMLMaxChunkSize(50L) + } + + override def afterAll(): Unit = { + setDMLMaxChunkSize(default_chunk_size) + super.afterAll() + } + + def setDMLMaxChunkSize(size: Long): Unit = { + GemFireXDUtils.DML_MAX_CHUNK_SIZE = size + } + + test("Test char") { + snc + val serverHostPort2 = TestUtil.startNetServer() + logInfo("network server started") + val conn = DriverManager.getConnection(s"jdbc:snappydata://$serverHostPort2") + val s = conn.createStatement() + s.executeUpdate(s"create table $tableName (id int not null primary key, name String, address " + + "String) USING row OPTIONS(partition_by 'id')") + // "String) partition by column(id)") + s.executeUpdate(s"insert into $tableName values(111, 'aaa', 'hello')") + s.executeUpdate(s"insert into $tableName values(222, 'bbb', 'halo')") + s.executeUpdate(s"insert into $tableName values(333, 'aaa', 'hello')") + s.executeUpdate(s"update $tableName set name='abc1' where id=111") + val rs = s.executeQuery(s"select id, name, address from $tableName") + while (rs.next()) { + logInfo(s"${rs.getInt(1)} ${rs.getString(2)} ${rs.getString(3)}") + } + val rs2 = s.executeQuery(s"select id from $tableName where name='abc1'") + if (rs2.next()) { + assert(rs2.getInt(1) == 111) + } + + rs.close() + rs2.close() + conn.close() + } +} diff --git a/cluster/src/test/scala/io/snappydata/filodb/FiloDBApp_Column.scala b/cluster/src/test/scala/io/snappydata/filodb/FiloDBApp_Column.scala new file mode 100644 index 0000000000..cb75bd1c26 --- /dev/null +++ b/cluster/src/test/scala/io/snappydata/filodb/FiloDBApp_Column.scala @@ -0,0 +1,108 @@ +package io.snappydata.filodb + +import scala.concurrent.duration.Duration +import scala.concurrent.{Await, Future} +import scala.util.Random + + +import org.apache.spark.sql.{DataFrame, SaveMode, SnappyContext} +import org.apache.spark.{SparkConf, SparkContext} + +/** + * This application depicts how a Spark cluster can + * connect to a Snappy cluster to fetch and query the tables + * using Scala APIs in a Spark App. + */ +object FiloDBApp_Column { + + def main(args: Array[String]) { + // scalastyle:off println + + val taxiCsvFile = args(0) + val numRuns = 50 + + // Queries + val medallions = Array("23A89BC906FBB8BD110677FBB0B0A6C5", + "3F8D8853F7EF89A7630565DDA38E9526", + "3FE07421C46041F4801C7711F5971C63", + "789B8DC7F3CB06A645B0CDC24591B832", + "18E80475A4E491022BC2EF8559DABFD8", + "761033F2C6F96EBFA9F578E968FDEDE5", + "E4C72E0EE95C31D6B1FEFCF3F876EF90", + "AF1421FCAA4AE912BDFC996F8A9B5675", + "FB085B55ABF581ADBAD3E16283C78C01", + "29CBE2B638D6C9B7239D2CA7A72A70E9") + + // trip info for a single driver within a given time range + val singleDriverQueries = (1 to 20).map { i => + val medallion = medallions(Random.nextInt(medallions.size)) + s"SELECT avg(trip_distance), avg(passenger_count) from nyctaxi where medallion = '$medallion'" + + s" AND pickup_datetime > '2013-01-15T00Z' AND pickup_datetime < '2013-01-22T00Z'" + } + + // average trip distance by day for several days + + val allQueries = singleDriverQueries + + + val conf = (new SparkConf).setMaster("local[8]") + .setAppName("test") + .set("spark.scheduler.mode", "FAIR") + .set("spark.ui.enabled", "false") // No need for UI when doing perf stuff + + val sc = new SparkContext(conf) + val snc = SnappyContext(sc) + snc.sql("set spark.sql.shuffle.partitions=4") + snc.dropTable("NYCTAXI", ifExists = true) + + // Ingest file - note, this will take several minutes + puts("Starting ingestion...") + val csvDF = snc.read.format("com.databricks.spark.csv"). + option("header", "true").option("inferSchema", "true").load(taxiCsvFile) + + val p1 = Map(("PARTITION_BY" -> "medallion"), ("BUCKETS" -> "5")) + snc.createTable("NYCTAXI", "column", csvDF.schema, p1) + csvDF.write.format("column").mode(SaveMode.Append).options(p1).saveAsTable("NYCTAXI") + puts("Ingestion done.") + + // run queries + + val cnts = snc.sql("select count(*) from NYCTAXI").collect() + for (s <- cnts) { + var output = s.toString() + puts(s"Total count : $output") + } + + import scala.concurrent.ExecutionContext.Implicits.global + + val cachedDF = new collection.mutable.HashMap[String, DataFrame] + + def getCachedDF(query: String): DataFrame = + cachedDF.getOrElseUpdate(query, snc.sql(query)) + + def runQueries(queries: Array[String], numQueries: Int = 1000): Unit = { + val startMillis = System.currentTimeMillis + val futures = (0 until numQueries).map(i => getCachedDF(queries(Random.nextInt(queries.size))).rdd.collectAsync) + val fut = Future.sequence(futures.asInstanceOf[Seq[Future[Array[_]]]]) + Await.result(fut, Duration.Inf) + val endMillis = System.currentTimeMillis + val qps = numQueries / ((endMillis - startMillis) / 1000.0) + puts(s"Ran $numQueries queries in ${endMillis - startMillis} millis. QPS = $qps") + } + + puts("Warming up...") + runQueries(allQueries.toArray, 100) + Thread sleep 2000 + puts("Now running queries for real...") + (0 until numRuns).foreach { i => runQueries(allQueries.toArray) } + + sc.stop() + + } + + def puts(s: String): Unit = { + //scalastyle:off + println(s) + //scalastyle:on + } +} diff --git a/cluster/src/test/scala/io/snappydata/filodb/FiloDBApp_Row.scala b/cluster/src/test/scala/io/snappydata/filodb/FiloDBApp_Row.scala new file mode 100644 index 0000000000..e2810bb283 --- /dev/null +++ b/cluster/src/test/scala/io/snappydata/filodb/FiloDBApp_Row.scala @@ -0,0 +1,173 @@ +package io.snappydata.filodb + +import java.sql.{DriverManager, PreparedStatement} +import java.util.concurrent.Executors + +import scala.concurrent.duration.Duration +import scala.util.{Failure, Random, Success} + +import org.apache.spark.sql.{SaveMode, SnappyContext} +import org.apache.spark.{SparkConf, SparkContext} + +/** + * This application depicts how a Spark cluster can + * connect to a Snappy cluster to fetch and query the tables + * using Scala APIs in a Spark App. + */ +object FiloDBApp_Row { + + def main(args: Array[String]) { + // scalastyle:off println + + val taxiCsvFile = args(0) + val numRuns = 50 + + // Queries + val medallions = Array("23A89BC906FBB8BD110677FBB0B0A6C5", + "3F8D8853F7EF89A7630565DDA38E9526", + "3FE07421C46041F4801C7711F5971C63", + "789B8DC7F3CB06A645B0CDC24591B832", + "18E80475A4E491022BC2EF8559DABFD8", + "761033F2C6F96EBFA9F578E968FDEDE5", + "E4C72E0EE95C31D6B1FEFCF3F876EF90", + "AF1421FCAA4AE912BDFC996F8A9B5675", + "FB085B55ABF581ADBAD3E16283C78C01", + "29CBE2B638D6C9B7239D2CA7A72A70E9") + + // trip info for a single driver within a given time range + val singleDriverQueries = (1 to 20).map { i => + val medallion = medallions(Random.nextInt(medallions.size)) + // s"SELECT avg(trip_distance), avg(passenger_count) from nyctaxi where medallion = '$medallion'" + + // s" AND pickup_datetime > '2013-01-15T00Z' AND pickup_datetime < '2013-01-22T00Z'" + s"SELECT AVG(TRIP_DISTANCE), AVG(PASSENGER_COUNT) FROM NYCTAXI WHERE MEDALLION = '$medallion'" + + s" AND PICKUP_DATETIME > '2013-01-15T00Z' AND PICKUP_DATETIME < '2013-01-22T00Z'" + } + + // average trip distance by day for several days + + val allQueries = singleDriverQueries + + + val props = Map( + "poolImpl" -> "tomcat", + "poolProperties" -> "maxActive=256" + ) + + val conf = (new SparkConf).setMaster("local[8]") + .setAppName("test") + .set("spark.scheduler.mode", "FAIR") + .set("spark.ui.enabled", "false") + .set(io.snappydata.Constant.STORE_PROPERTY_PREFIX + "conserve-sockets", "false") + + val sc = new SparkContext(conf) + val snc = SnappyContext(sc) + snc.sql("set spark.sql.shuffle.partitions=4") + snc.dropTable("NYCTAXI", ifExists = true) + + // Ingest file - note, this will take several minutes + puts("Starting ingestion...") + val csvDF = snc.read.format("com.databricks.spark.csv"). + option("header", "true").option("inferSchema", "false").load(taxiCsvFile) + puts(s"csvDF count : ${csvDF.count()}") + + + val usingOptionString = + s""" + USING row + OPTIONS ()""" + + val usingOptionString1 = s" USING row OPTIONS (PARTITION_BY 'MEDALLION', BUCKETS '8')" + + + snc.sql( + s"""CREATE TABLE NYCTAXI (MEDALLION VARCHAR(100) NOT NULL, + HACK_LICENSE VARCHAR(100), + VENDOR_ID VARCHAR(100), + RATE_CODE INTEGER, + STORE_AND_FWD_FLAG VARCHAR(100), + PICKUP_DATETIME VARCHAR(100), + DROPOFF_DATETIME VARCHAR(100), + PASSENGER_COUNT INTEGER, + TRIP_TIME_IN_SECS INTEGER, + TRIP_DISTANCE DOUBLE, + PICKUP_LONGITUDE DOUBLE, + PICKUP_LATITUDE DOUBLE, + DROPOFF_LONGITUDE DOUBLE, + DROPOFF_LATITUDE DOUBLE + ) + """ + usingOptionString1 + ) + + puts("Table Created") + + snc.sql( + "CREATE INDEX INDEX_PICKUP_DATETIME ON NYCTAXI (MEDALLION)" + ) + puts("Index Created") + + csvDF.write.format("row").mode(SaveMode.Append).options(props).saveAsTable("NYCTAXI") + //csvDF.insertInto("NYCTAXI") + puts("Ingestion done.") + + Thread sleep 2000 + + + import scala.concurrent.ExecutionContext.Implicits.global + import scala.concurrent._ + + implicit val ec = ExecutionContext.fromExecutorService(Executors.newFixedThreadPool(8)) + + val cachedPS : collection.mutable.HashMap[String, PreparedStatement] = new collection.mutable.HashMap[String, PreparedStatement] + + def prepareForQuery(query: String) : PreparedStatement = { + val connection = DriverManager.getConnection("jdbc:snappydata:") + connection.setAutoCommit(false) + connection.prepareStatement(query) + } + + def getFuturePrepStatement(query: String): Future[PreparedStatement] = { + val task: Future[PreparedStatement] = Future { + cachedPS.getOrElseUpdate(query, prepareForQuery(query)) + } + task.onComplete { + { + case Success(value) => { + value.synchronized { + //synchronized(value) { + val rs = value.executeQuery() + while (rs.next()) { + } + } + } + case Failure(e) => println(s"D'oh! The Future failed: ${e.getMessage}") + } + } + task + } + + def runQueries(queries: Array[String], numQueries: Int = 1000): Unit = { + val startMillis = System.currentTimeMillis + val futures = (0 until numQueries).map(i => getFuturePrepStatement(queries(Random.nextInt(queries.size)))) + val f = Future.sequence(futures.toList) + Await.ready(f, Duration.Inf) + val endMillis = System.currentTimeMillis + val qps = numQueries / ((endMillis - startMillis) / 1000.0) + puts(s"Ran $numQueries queries in ${endMillis - startMillis} millis. QPS = $qps") + } + + puts("Warming up...") + runQueries(allQueries.toArray, 100) + Thread sleep 2000 + puts("Now running queries for real...") + (0 until numRuns).foreach { i => runQueries(allQueries.toArray) } + + sc.stop() + + } + + def puts(s: String): Unit = { + //scalastyle:off + println(s) + //scalastyle:on + } +} diff --git a/cluster/src/test/scala/io/snappydata/filodb/FiloDb_SnappyJob.scala b/cluster/src/test/scala/io/snappydata/filodb/FiloDb_SnappyJob.scala new file mode 100644 index 0000000000..22912cfce4 --- /dev/null +++ b/cluster/src/test/scala/io/snappydata/filodb/FiloDb_SnappyJob.scala @@ -0,0 +1,144 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ + +package io.snappydata.filodb + +import scala.concurrent.duration.Duration +import scala.concurrent.{Await, Future} +import scala.util.Random +import scala.concurrent.ExecutionContext.Implicits.global + +import com.typesafe.config.Config + +import org.apache.spark.sql.{SnappyJobValid, SnappyJobValidation, DataFrame, SaveMode, SnappySession, SnappySQLJob} + + + +object FiloDb_SnappyJob extends SnappySQLJob { + + var nycTaxiDataPath: String = _ + var sqlSparkProperties: Array[String] = _ + + val cachedDF = new collection.mutable.HashMap[String, DataFrame] + + override def runSnappyJob(snSession: SnappySession, jobConfig: Config): Any = { + val sc = snSession.sqlContext + val taxiCsvFile: String = nycTaxiDataPath + val numRuns = 50 // Make this higher when doing performance profiling + + for (prop <- sqlSparkProperties) { + sc.sql(s"set $prop") + } + + // Queries + val medallions = Array("23A89BC906FBB8BD110677FBB0B0A6C5", + "3F8D8853F7EF89A7630565DDA38E9526", + "3FE07421C46041F4801C7711F5971C63", + "789B8DC7F3CB06A645B0CDC24591B832", + "18E80475A4E491022BC2EF8559DABFD8", + "761033F2C6F96EBFA9F578E968FDEDE5", + "E4C72E0EE95C31D6B1FEFCF3F876EF90", + "AF1421FCAA4AE912BDFC996F8A9B5675", + "FB085B55ABF581ADBAD3E16283C78C01", + "29CBE2B638D6C9B7239D2CA7A72A70E9") + + // trip info for a single driver within a given time range + val singleDriverQueries = (1 to 20).map { i => + val medallion = medallions(Random.nextInt(medallions.size)) + s"SELECT avg(trip_distance), avg(passenger_count) " + + s"from nyctaxi where medallion = '$medallion'" + + s" AND pickup_datetime > '2013-01-15T00Z' AND pickup_datetime < '2013-01-22T00Z'" + } + + // average trip distance by day for several days + + val allQueries = singleDriverQueries + + // Ingest file - note, this will take several minutes + puts("Starting ingestion...") + + val csvDF = sc.read.format("com.databricks.spark.csv"). + option("header", "true").option("inferSchema", "true").load(taxiCsvFile) + + csvDF.printSchema() + + val p1 = Map(("PARTITION_BY" -> "medallion") /* ,("BUCKETS"-> "5") */) + sc.createTable("NYCTAXI", "column", csvDF.schema, p1) + csvDF.write.format("column").mode(SaveMode.Append).options(p1).saveAsTable("NYCTAXI") + puts("Ingestion done.") + + val cnts = sc.sql("select count(*) from NYCTAXI").collect() + puts(s"Total data inserted ${cnts.length}") + + // + // val taxiDF = sql.filoDataset("nyc_taxi") + // taxiDF.registerTempTable("nyc_taxi") + // val numRecords = taxiDF.count() + // puts(s"Ingested $numRecords records") + // + // // run queries + // + + def getCachedDF(query: String): DataFrame = + cachedDF.getOrElseUpdate(query, sc.sql(query)) + + def runQueries(queries: Array[String], numQueries: Int = 1000): Unit = { + val startMillis = System.currentTimeMillis + val futures = (0 until numQueries).map( + i => getCachedDF(queries(Random.nextInt(queries.size))).rdd.collectAsync) + val fut = Future.sequence(futures.asInstanceOf[Seq[Future[Array[_]]]]) + Await.result(fut, Duration.Inf) + val endMillis = System.currentTimeMillis + val qps = numQueries / ((endMillis - startMillis) / 1000.0) + puts(s"Ran $numQueries queries in ${endMillis - startMillis} millis. QPS = $qps") + } + + puts("Warming up...") + runQueries(allQueries.toArray, 100) + Thread sleep 2000 + puts("Now running queries for real...") + (0 until numRuns).foreach { i => runQueries(allQueries.toArray) } + + // clean up! + //FiloSetup.shutdown() + //sc.stop() + } + + + override def isValidJob(snSession: SnappySession, config: Config): SnappyJobValidation = { + nycTaxiDataPath = if (config.hasPath("dataLocation")) { + config.getString("dataLocation") + } else { + "/QASNAPPY/TPCH/DATA/1" + } + + val sqlSparkProps = if (config.hasPath("sparkSqlProps")) { + config.getString("sparkSqlProps") + } + else " " + + sqlSparkProperties = sqlSparkProps.split(" ") + + SnappyJobValid() + } + + def puts(s: String): Unit = { + //scalastyle:off + println(s) + //scalastyle:on + } +} diff --git a/cluster/src/test/scala/io/snappydata/tools/LeaderLauncherSpec.scala b/cluster/src/test/scala/io/snappydata/tools/LeaderLauncherSpec.scala new file mode 100644 index 0000000000..360c5b11ba --- /dev/null +++ b/cluster/src/test/scala/io/snappydata/tools/LeaderLauncherSpec.scala @@ -0,0 +1,211 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ +package io.snappydata.tools + +import scala.collection.mutable.ArrayBuffer + +import io.snappydata.impl.LeadImpl +import io.snappydata.{Constant, LocalizedMessages, Property} +import org.scalatest.{Matchers, WordSpec} + +import org.apache.spark.sql.SnappyContext +import org.apache.spark.{SparkConf, SparkContext} + +/** + * BDD style tests + */ +class LeaderLauncherSpec extends WordSpec with Matchers { + + private def doExtract(param: String, prop: String) = param.toLowerCase.startsWith("-" + prop) + + "leader" when { + "started" should { + "have host-data as false" in { + { + + val l = new LeadImpl + val opts = l.initStartupArgs((new SparkConf).set( + Property.McastPort.name, "4958")) + + val hdProp = opts.get(Constant.STORE_PROPERTY_PREFIX + + com.pivotal.gemfirexd.Attribute.GFXD_HOST_DATA) + + assert(hdProp != null) + assert(!hdProp.toBoolean) + } + + { + val l = new LeadImpl + val p = (new SparkConf).set(Property.McastPort.name, "4958") + p.set("host-data", "true") + + val opts = l.initStartupArgs(p) + + val hdProp = opts.get(Constant.STORE_PROPERTY_PREFIX + + com.pivotal.gemfirexd.Attribute.GFXD_HOST_DATA) + + assert(hdProp != null) + assert(!hdProp.toBoolean) + } + + } + + "have host-data true for loner" in { + + // { + // + // val l = new LeadImpl + // val conf = new SparkConf(). + // setMaster(s"${Constant.JDBC_URL_PREFIX}${Property.mcastPort}=0"). + // setAppName("check hostdata true") + // val sc = new SparkContext(conf) + // try { + // val opts = l.initStartupArgs(conf, sc) + // + // val hdProp = opts.get(Constant.STORE_PROPERTY_PREFIX + + // com.pivotal.gemfirexd.Attribute.GFXD_HOST_DATA) + // + // assert(hdProp != null) + // assert(hdProp.toBoolean == true) + // } finally { + // sc.stop() + // } + // } + + { + // Stop if already any present + val sparkContext = SnappyContext.globalSparkContext + if (sparkContext != null) sparkContext.stop() + + val l = new LeadImpl + val conf = (new SparkConf). + setMaster("local[3]").setAppName("with local master") + conf.set(Property.McastPort.name, "0") + conf.set(Constant.STORE_PROPERTY_PREFIX + "host-data", "false") + val sc = new SparkContext(conf) + try { + val opts = l.initStartupArgs(conf, sc) + + val hdProp = opts.get(Constant.STORE_PROPERTY_PREFIX + + com.pivotal.gemfirexd.Attribute.GFXD_HOST_DATA) + + assert(hdProp != null) + assert(hdProp.toBoolean) + } finally { + sc.stop() + } + } + + } + + "always add implicit server group" in { + { + val l = new LeadImpl + val opts = l.initStartupArgs((new SparkConf).set( + Property.McastPort.name, "4958")) + + val hdProp = opts.get(Constant.STORE_PROPERTY_PREFIX + + com.pivotal.gemfirexd.Attribute.SERVER_GROUPS) + + assert(hdProp != null) + assert(hdProp == LeadImpl.LEADER_SERVERGROUP) + } + + { + val l = new LeadImpl + val p = (new SparkConf).set(Property.McastPort.name, "4958") + p.set(Constant.STORE_PROPERTY_PREFIX + + com.pivotal.gemfirexd.Attribute.SERVER_GROUPS, "DUMMY,GRP") + val opts = l.initStartupArgs(p) + + val hdProp = opts.get(Constant.STORE_PROPERTY_PREFIX + + com.pivotal.gemfirexd.Attribute.SERVER_GROUPS) + + assert(hdProp != null) + assert(hdProp.endsWith("," + LeadImpl.LEADER_SERVERGROUP)) + } + } + + + "not start net server" in { + val netServerProp: String = "run-netserver" + + { + val l = new LeaderLauncher("Test default net server") + val opts = l.initStartupArgs(ArrayBuffer("start")) + + val hdProp = opts.filter(doExtract(_, netServerProp)) + + assert(hdProp.length == 1) + assert(!hdProp(0).split("=")(1).toBoolean) + } + + { + val l = new LeaderLauncher("Test overwrite net server") + val opts = l.initStartupArgs(ArrayBuffer("start", "-run-netserver=true")) + + val hdProp = opts.filter(doExtract(_, netServerProp)) + + assert(hdProp.length == 1) + assert(!hdProp(0).split("=")(1).toBoolean) + } + } + + "assert no zero arg message " in { + intercept[AssertionError] { + new LeaderLauncher("Test default net server").initStartupArgs( + ArrayBuffer(), exitOnEmptyArgs = false) + }.getMessage.equals(LocalizedMessages.res.getTextMessage("SD_ZERO_ARGS")) + } + + val replaceString = "" + " have jobserver tmp directory redirected " in { + val l = new LeadImpl + val conf = l.getConfig(Array.empty) + val f = conf.getString("spark.jobserver.filedao.rootdir") + assert(f.indexOf(replaceString) == -1) + assert(f === "./spark-jobserver/filedao/data") + val d = conf.getString("spark.jobserver.datadao.rootdir") + assert(d.indexOf(replaceString) == -1) + assert(d === "./spark-jobserver/upload") + val s = conf.getString("spark.jobserver.sqldao.rootdir") + assert(s.indexOf(replaceString) == -1) + assert(s === "./spark-jobserver/sqldao/data") + } + + " have jobserver tmp directory from syshome" in { + val directory = "/dummy" + System.setProperty( + com.pivotal.gemfirexd.internal.iapi.reference.Property.SYSTEM_HOME_PROPERTY, directory) + val l = new LeadImpl + val conf = l.getConfig(Array.empty) + val f = conf.getString("spark.jobserver.filedao.rootdir") + assert(f.indexOf(replaceString) == -1) + assert(f startsWith directory) + val d = conf.getString("spark.jobserver.datadao.rootdir") + assert(d.indexOf(replaceString) == -1) + assert(d startsWith directory) + val s = conf.getString("spark.jobserver.sqldao.rootdir") + assert(s.indexOf(replaceString) == -1) + assert(s startsWith directory) + System.clearProperty( + com.pivotal.gemfirexd.internal.iapi.reference.Property.SYSTEM_HOME_PROPERTY) + } + } // end started + } + +} diff --git a/cluster/src/test/scala/org/apache/spark/memory/MemoryFunSuite.scala b/cluster/src/test/scala/org/apache/spark/memory/MemoryFunSuite.scala new file mode 100644 index 0000000000..c4f22fb532 --- /dev/null +++ b/cluster/src/test/scala/org/apache/spark/memory/MemoryFunSuite.scala @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ + +package org.apache.spark.memory + +import com.pivotal.gemfirexd.TestUtil +import io.snappydata.util.TestUtils +import org.scalatest.{BeforeAndAfter, BeforeAndAfterAll} + +import org.apache.spark.SparkFunSuite +import org.apache.spark.sql.{SnappyContext, SnappySession, SparkSession} + +class MemoryFunSuite extends SparkFunSuite with BeforeAndAfter with BeforeAndAfterAll { + + override def afterAll(): Unit = { + System.clearProperty("snappydata.umm.memtrace") + return + } + + override def beforeAll(): Unit = { + if (SnappyContext.globalSparkContext != null) { + SnappyContext.globalSparkContext.stop() + } + System.setProperty("snappydata.umm.memtrace", "true") + } + + after { + if (SnappyContext.globalSparkContext != null) { + val snappySession = new SnappySession(SnappyContext.globalSparkContext) + TestUtils.dropAllSchemas(snappySession) + SnappyContext.globalSparkContext.stop() + } + TestUtil.stopNetServer() + } + + // Only use if sure of the problem + def assertApproximate(value1: Long, value2: Long, error: Int = 2): Unit = { + if (value1 == value2) return + if (Math.abs(value1 - value2) > (value2 * error) / 100) { + throw new java.lang.AssertionError(s"assertion " + + s"failed $value1 & $value2 are not within permissable limit") + } + } + + private[memory] def createSparkSession(memoryFraction: Double, + storageFraction: Double, + sparkMemory: Long = 500000, + cachedBatchSize: Int = 500): SparkSession = { + SparkSession + .builder + .appName(getClass.getName) + .master("local[*]") + .config(io.snappydata.Property.ColumnBatchSize.name, cachedBatchSize) + .config("spark.memory.fraction", memoryFraction) + .config("spark.memory.storageFraction", storageFraction) + .config("spark.testing.memory", sparkMemory) + .config("spark.testing.reservedMemory", "0") + .config("snappydata.store.critical-heap-percentage", "90") + .config("spark.testing.maxStorageFraction", "0.9") + .config("spark.memory.manager", "org.apache.spark.memory.SnappyUnifiedMemoryManager") + .config("spark.storage.unrollMemoryThreshold", 500) + .getOrCreate + } +} diff --git a/cluster/src/test/scala/org/apache/spark/memory/MemoryManagerStatsSuite.scala b/cluster/src/test/scala/org/apache/spark/memory/MemoryManagerStatsSuite.scala new file mode 100644 index 0000000000..ee14cc245c --- /dev/null +++ b/cluster/src/test/scala/org/apache/spark/memory/MemoryManagerStatsSuite.scala @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ +package org.apache.spark.memory + +import io.snappydata.test.dunit.DistributedTestBase.InitializeRun + +import org.apache.spark.SparkEnv +import org.apache.spark.sql.{SnappySession, SparkSession} + + +class MemoryManagerStatsSuite extends MemoryFunSuite { + + InitializeRun.setUp() + + test("Test heap stats") { + val offHeap = false + val sparkSession = createSparkSession(1, 0.5) + new SnappySession(sparkSession.sparkContext) + + val memoryManager = SparkEnv.get.memoryManager + .asInstanceOf[SnappyUnifiedMemoryManager] + val stats = memoryManager.wrapperStats + assert(stats.getMaxStorageSize(offHeap) == 450000) + assert(stats.getStoragePoolSize(offHeap) >= 200000) + val blockId = MemoryManagerCallback.storageBlockId + assert(!SparkEnv.get.memoryManager.acquireStorageMemory(blockId, 500000L, MemoryMode.ON_HEAP)) + // Some other heap allocation from Snappy layer might have failed + assert(stats.getNumFailedStorageRequest(offHeap) >= 1) + assert(stats.getExecutionPoolSize(offHeap) == (500000 - stats.getStoragePoolSize(offHeap))) + memoryManager.dropAllObjects(MemoryMode.ON_HEAP) + assert(stats.getStorageMemoryUsed(offHeap) == 0) + + + val taskAttemptId = 0L + // artificially acquire memory more memory than available + val numBytes = + SparkEnv.get.memoryManager.acquireExecutionMemory(500000L, taskAttemptId, MemoryMode.ON_HEAP) + assert(stats.getStoragePoolSize(offHeap) == 250000) + // Only can evict till original storage fraction + assert(stats.getExecutionPoolSize(offHeap) == numBytes) + } + + test("Test off-heap stats") { + val offHeap = true + val sparkSession = SparkSession + .builder + .appName(getClass.getName) + .master("local[*]") + .config(io.snappydata.Property.ColumnBatchSize.name, 500) + .config("spark.memory.fraction", 1) + .config("spark.memory.storageFraction", 0.5) + .config("spark.testing.memory", 500000) + .config("spark.testing.reservedMemory", "0") + .config("snappydata.store.critical-heap-percentage", "90") + .config("spark.testing.maxStorageFraction", "0.9") + .config("spark.memory.manager", "org.apache.spark.memory.SnappyUnifiedMemoryManager") + .config("spark.storage.unrollMemoryThreshold", 50000) + .config("snappydata.store.memory-size", 200000) + .getOrCreate + + new SnappySession(sparkSession.sparkContext) + + val memoryManager = SparkEnv.get.memoryManager + .asInstanceOf[SnappyUnifiedMemoryManager] + val stats = memoryManager.wrapperStats + assert(stats.getMaxStorageSize(offHeap) == 190000) // 95% + assert(stats.getStoragePoolSize(offHeap) >= 100000) + val blockId = MemoryManagerCallback.storageBlockId + assert(!SparkEnv.get.memoryManager.acquireStorageMemory(blockId, 500000L, MemoryMode.ON_HEAP)) + assert(stats.getNumFailedStorageRequest(offHeap) >= 1) + assert(stats.getExecutionPoolSize(offHeap) == (200000 - stats.getStoragePoolSize(offHeap))) + memoryManager.dropAllObjects(MemoryMode.OFF_HEAP) + assert(stats.getStorageMemoryUsed(offHeap) == 0) + + + val taskAttemptId = 0L + // artificially acquire memory + val numBytes = + SparkEnv.get.memoryManager.acquireExecutionMemory(100000L, + taskAttemptId, MemoryMode.OFF_HEAP) + // Only can evict till original storage fraction + assert(stats.getStoragePoolSize(offHeap) == 100000) + + assert(stats.getExecutionPoolSize(offHeap) == numBytes) + } +} diff --git a/cluster/src/test/scala/org/apache/spark/memory/SnappyLocalIndexAccountingSuite.scala b/cluster/src/test/scala/org/apache/spark/memory/SnappyLocalIndexAccountingSuite.scala new file mode 100644 index 0000000000..6d874d90a8 --- /dev/null +++ b/cluster/src/test/scala/org/apache/spark/memory/SnappyLocalIndexAccountingSuite.scala @@ -0,0 +1,308 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ + +package org.apache.spark.memory + +import java.sql.DriverManager + +import com.gemstone.gemfire.internal.cache.LocalRegion +import com.pivotal.gemfirexd.TestUtil +import io.snappydata.SnappyTableStatsProviderService +import io.snappydata.test.dunit.DistributedTestBase.InitializeRun + +import org.apache.spark.SparkEnv +import org.apache.spark.sql.types.{IntegerType, StructField, StructType} +import org.apache.spark.sql.{Row, SnappyContext, SnappySession} + + +class SnappyLocalIndexAccountingSuite extends MemoryFunSuite { + + InitializeRun.setUp() + + val struct = (new StructType()) + .add(StructField("col1", IntegerType, true)) + .add(StructField("col2", IntegerType, true)) + .add(StructField("col3", IntegerType, true)) + + + val memoryMode = MemoryMode.ON_HEAP + + test("Test Drop index releases memory"){ + val sparkSession = createSparkSession(1, 0, 2000000L) + val snSession = new SnappySession(sparkSession.sparkContext) + val serverHostPort = TestUtil.startNetServer() + LocalRegion.MAX_VALUE_BEFORE_ACQUIRE = 1 + + val options = "OPTIONS (PARTITION_BY 'col1', " + + "BUCKETS '1')" + snSession.sql("CREATE TABLE t1 (Col1 INT, Col2 INT, Col3 INT, col4 INT, col5 INT" + + ") " + " USING row " + + options + ) + (1 to 20).map(i => snSession.insert("t1", Row(i, i, i, i, i))) + SparkEnv.get.memoryManager. + asInstanceOf[SnappyUnifiedMemoryManager].dropAllObjects(memoryMode) + assert(SparkEnv.get.memoryManager.storageMemoryUsed == 0) + val conn = DriverManager.getConnection( + "jdbc:snappydata://" + serverHostPort) + val stmt = conn.createStatement() + stmt.execute("create index t1_index1 on t1 (col1)") + val afterCreateIndex = SparkEnv.get.memoryManager.storageMemoryUsed + assert(afterCreateIndex > 0) + stmt.execute("drop index t1_index1") + val afterDropIndex = SparkEnv.get.memoryManager.storageMemoryUsed + assert(afterDropIndex < afterCreateIndex) + } + + test("Test Put Overhead on row partitioned table") { + val sparkSession = createSparkSession(1, 0, 2000000L) + val snSession = new SnappySession(sparkSession.sparkContext) + val serverHostPort = TestUtil.startNetServer() + LocalRegion.MAX_VALUE_BEFORE_ACQUIRE = 1 + + val options = "OPTIONS (PARTITION_BY 'col1', " + + "BUCKETS '1')" + snSession.sql("CREATE TABLE t1 (Col1 INT, Col2 INT, Col3 INT, col4 INT, col5 INT" + + ") " + " USING row " + + options + ) + (1 to 10).map(i => snSession.insert("t1", Row(i, i, i, i, i))) + SparkEnv.get.memoryManager.asInstanceOf[SnappyUnifiedMemoryManager].dropAllObjects(memoryMode) + val afterInsertSize_WithoutIndex = SparkEnv.get.memoryManager.storageMemoryUsed + val conn = DriverManager.getConnection( + "jdbc:snappydata://" + serverHostPort) + val stmt = conn.createStatement() + stmt.execute("create index t1_index1 on t1 (col1)") + SparkEnv.get.memoryManager.asInstanceOf[SnappyUnifiedMemoryManager].dropAllObjects(memoryMode) + (1 to 10).map(i => snSession.insert("t1", Row(i, i, i, i, i))) + val afterIndexCreationSize = SparkEnv.get.memoryManager.storageMemoryUsed + assert(afterIndexCreationSize > afterInsertSize_WithoutIndex) + stmt.execute("drop index t1_index1") + snSession.dropTable("t1") + } + + + + test("Test CreateIndex before insert") { + val sparkSession = createSparkSession(1, 0, 2000000L) + val snSession = new SnappySession(sparkSession.sparkContext) + val serverHostPort = TestUtil.startNetServer() + LocalRegion.MAX_VALUE_BEFORE_ACQUIRE = 1 + val options = Map("PARTITION_BY" -> "col1", + "BUCKETS" -> "1" + ) + val beforeTableSize = SparkEnv.get.memoryManager.storageMemoryUsed + snSession.createTable("t1", "row", struct, options) + val afterTableSize = SparkEnv.get.memoryManager.storageMemoryUsed + assert(afterTableSize > beforeTableSize) + + + val conn = DriverManager.getConnection( + "jdbc:snappydata://" + serverHostPort) + val stmt = conn.createStatement() + stmt.execute("create index t1_index1 on t1 (col1)") + + SparkEnv.get.memoryManager.asInstanceOf[SnappyUnifiedMemoryManager].dropAllObjects(memoryMode) + (1 to 10).map(i => snSession.insert("t1", Row(i, i, i))) + val afterPutWithoutIndex = SparkEnv.get.memoryManager.storageMemoryUsed + SparkEnv.get.memoryManager.asInstanceOf[SnappyUnifiedMemoryManager].dropAllObjects(memoryMode) + SnappyTableStatsProviderService.getService.getAggregatedStatsOnDemand + (1 to 10).map(i => snSession.insert("t1", Row(i, i, i))) + val afterPutWitIndex = SparkEnv.get.memoryManager.storageMemoryUsed + assert(afterPutWitIndex > afterPutWithoutIndex) + snSession.dropTable("t1") + } + + + test("Test CreateIndex on row persistent partitioned table") { + val sparkSession = createSparkSession(1, 0, 2000000L) + val snSession = new SnappySession(sparkSession.sparkContext) + val serverHostPort = TestUtil.startNetServer() + LocalRegion.MAX_VALUE_BEFORE_ACQUIRE = 1 + val options = Map("PARTITION_BY" -> "col1", + "BUCKETS" -> "1", + "PERSISTENT" -> "SYNCHRONOUS" + ) + val beforeTableSize = SparkEnv.get.memoryManager.storageMemoryUsed + snSession.createTable("t1", "row", struct, options) + val afterTableSize = SparkEnv.get.memoryManager.storageMemoryUsed + assert(afterTableSize > beforeTableSize) + + val row = Row(100000000, 10000000, 10000000) + (1 to 10).map(i => snSession.insert("t1", Row(i, i, i))) + val afterInsertSize = SparkEnv.get.memoryManager.storageMemoryUsed + + val conn = DriverManager.getConnection( + "jdbc:snappydata://" + serverHostPort) + val stmt = conn.createStatement() + stmt.execute("create index t1_index1 on t1 (col1)") + val afterIndexCreationSize = SparkEnv.get.memoryManager.storageMemoryUsed + assert(afterIndexCreationSize > afterInsertSize) + snSession.dropTable("t1") + } + + test("Test CreateIndex on row replicated table") { + val sparkSession = createSparkSession(1, 0, 2000000L) + val snSession = new SnappySession(sparkSession.sparkContext) + val serverHostPort = TestUtil.startNetServer() + LocalRegion.MAX_VALUE_BEFORE_ACQUIRE = 1 + val options = Map.empty[String, String] + val beforeTableSize = SparkEnv.get.memoryManager.storageMemoryUsed + snSession.createTable("t1", "row", struct, options) + val afterTableSize = SparkEnv.get.memoryManager.storageMemoryUsed + assert(afterTableSize > beforeTableSize) + + val row = Row(100000000, 10000000, 10000000) + (1 to 10).map(i => snSession.insert("t1", Row(i, i, i))) + val afterInsertSize = SparkEnv.get.memoryManager.storageMemoryUsed + val conn = DriverManager.getConnection( + "jdbc:snappydata://" + serverHostPort) + val stmt = conn.createStatement() + stmt.execute("create index t1_index1 on t1 (col1)") + val afterIndexCreationSize = SparkEnv.get.memoryManager.storageMemoryUsed + assert(afterIndexCreationSize > afterInsertSize) + snSession.dropTable("t1") + } + + test("Test CreateIndex on row persistent replicated table") { + val sparkSession = createSparkSession(1, 0, 2000000L) + val snSession = new SnappySession(sparkSession.sparkContext) + val serverHostPort = TestUtil.startNetServer() + LocalRegion.MAX_VALUE_BEFORE_ACQUIRE = 1 + val options = Map("PERSISTENT" -> "SYNCHRONOUS") + val beforeTableSize = SparkEnv.get.memoryManager.storageMemoryUsed + snSession.createTable("t1", "row", struct, options) + val afterTableSize = SparkEnv.get.memoryManager.storageMemoryUsed + assert(afterTableSize > beforeTableSize) + + val row = Row(100000000, 10000000, 10000000) + (1 to 10).map(i => snSession.insert("t1", Row(i, i, i))) + val afterInsertSize = SparkEnv.get.memoryManager.storageMemoryUsed + + val conn = DriverManager.getConnection( + "jdbc:snappydata://" + serverHostPort) + val stmt = conn.createStatement() + stmt.execute("create index t1_index1 on t1 (col1)") + val afterIndexCreationSize = SparkEnv.get.memoryManager.storageMemoryUsed + assert(afterIndexCreationSize > afterInsertSize) + snSession.dropTable("t1") + } + + test("Test Index recovery on row partitioned table") { + var sparkSession = createSparkSession(1, 0, 2000000L) + var snSession = new SnappySession(sparkSession.sparkContext) + val serverHostPort = TestUtil.startNetServer() + LocalRegion.MAX_VALUE_BEFORE_ACQUIRE = 1 + val options = Map("PARTITION_BY" -> "col1", + "BUCKETS" -> "1", + "PERSISTENT" -> "SYNCHRONOUS" + ) + snSession.createTable("t1", "row", struct, options) + (1 to 10).map(i => snSession.insert("t1", Row(i, i, i))) + val conn = DriverManager.getConnection( + "jdbc:snappydata://" + serverHostPort) + val stmt = conn.createStatement() + stmt.execute("create index t1_index1 on t1 (col1)") + val afterIndex = SparkEnv.get.memoryManager.storageMemoryUsed + SnappyContext.globalSparkContext.stop() + sparkSession = createSparkSession(1, 0, 2000000L) + snSession = new SnappySession(sparkSession.sparkContext) + val afterRecoverySize = SparkEnv.get.memoryManager.storageMemoryUsed + assertApproximate(afterIndex, afterRecoverySize, 20) + snSession.dropTable("t1") + } + + test("Test Index recovery on row replicated table") { + var sparkSession = createSparkSession(1, 0, 2000000L) + var snSession = new SnappySession(sparkSession.sparkContext) + val serverHostPort = TestUtil.startNetServer() + LocalRegion.MAX_VALUE_BEFORE_ACQUIRE = 1 + val options = Map("PERSISTENT" -> "SYNCHRONOUS") + snSession.createTable("t1", "row", struct, options) + (1 to 10).map(i => snSession.insert("t1", Row(i, i, i))) + val conn = DriverManager.getConnection( + "jdbc:snappydata://" + serverHostPort) + val stmt = conn.createStatement() + stmt.execute("create index t1_index1 on t1 (col1)") + val afterIndex = SparkEnv.get.memoryManager.storageMemoryUsed + SnappyContext.globalSparkContext.stop() + sparkSession = createSparkSession(1, 0, 2000000L) + snSession = new SnappySession(sparkSession.sparkContext) + val afterRecoverySize = SparkEnv.get.memoryManager.storageMemoryUsed + assertApproximate(afterIndex, afterRecoverySize, 20) + snSession.dropTable("t1") + } + + test("Test Index recovery on row partitioned table with overflow") { + var sparkSession = createSparkSession(1, 0, 2000000L) + var snSession = new SnappySession(sparkSession.sparkContext) + val serverHostPort = TestUtil.startNetServer() + LocalRegion.MAX_VALUE_BEFORE_ACQUIRE = 1 + + val options = "OPTIONS (BUCKETS '1', " + + "PARTITION_BY 'Col1', " + + "PERSISTENCE 'none', " + + "EVICTION_BY 'LRUCOUNT 30')" + snSession.sql("CREATE TABLE t1 (Col1 INT, Col2 INT, Col3 INT, col4 INT, col5 INT" + + ") " + " USING row " + + options + ) + val conn = DriverManager.getConnection( + "jdbc:snappydata://" + serverHostPort) + val stmt = conn.createStatement() + stmt.execute("create index t1_index1 on t1 (col1, col2)") + SparkEnv.get.memoryManager.asInstanceOf[SnappyUnifiedMemoryManager].dropAllObjects(memoryMode) + (1 to 30).map(i => snSession.insert("t1", Row(i, i, i, i, i))) + val afterThreeEntries = SparkEnv.get.memoryManager.storageMemoryUsed + val avgEntrySize = afterThreeEntries /3 + SparkEnv.get.memoryManager.asInstanceOf[SnappyUnifiedMemoryManager].dropAllObjects(memoryMode) + (31 to 60).map(i => snSession.insert("t1", Row(i, i, i, i, i))) + val withOverflow = SparkEnv.get.memoryManager.storageMemoryUsed + val avgEntrySizeWithOverflow = withOverflow /3 + assert(avgEntrySizeWithOverflow > avgEntrySize) + snSession.dropTable("t1") + } + + test("Test Index recovery on row replicated table with overflow") { + var sparkSession = createSparkSession(1, 0, 2000000L) + var snSession = new SnappySession(sparkSession.sparkContext) + val serverHostPort = TestUtil.startNetServer() + LocalRegion.MAX_VALUE_BEFORE_ACQUIRE = 1 + + val options = "OPTIONS (EVICTION_BY 'LRUCOUNT 3', " + + "PERSISTENCE 'NONE')" + snSession.sql("CREATE TABLE t1 (Col1 INT, Col2 INT, Col3 INT, col4 INT, col5 INT" + + ") " + " USING row " + + options + ) + val conn = DriverManager.getConnection( + "jdbc:snappydata://" + serverHostPort) + val stmt = conn.createStatement() + stmt.execute("create index t1_index1 on t1 (col1, col2)") + SparkEnv.get.memoryManager.asInstanceOf[SnappyUnifiedMemoryManager].dropAllObjects(memoryMode) + (1 to 3).map(i => snSession.insert("t1", Row(i, i, i, i, i))) + val afterThirtyEntries = SparkEnv.get.memoryManager.storageMemoryUsed + val avgEntrySize = afterThirtyEntries /3 + SparkEnv.get.memoryManager.asInstanceOf[SnappyUnifiedMemoryManager].dropAllObjects(memoryMode) + (4 to 6).map(i => snSession.insert("t1", Row(i, i, i, i, i))) + val withOverflow = SparkEnv.get.memoryManager.storageMemoryUsed + val avgEntrySizeWithOverflow = withOverflow /3 + assert(avgEntrySizeWithOverflow > avgEntrySize) + assert(snSession.sql("select * from t1").collect().length == 6) + snSession.dropTable("t1") + } +} diff --git a/cluster/src/test/scala/org/apache/spark/memory/SnappyMemoryAccountingSuite.scala b/cluster/src/test/scala/org/apache/spark/memory/SnappyMemoryAccountingSuite.scala new file mode 100644 index 0000000000..df997aa904 --- /dev/null +++ b/cluster/src/test/scala/org/apache/spark/memory/SnappyMemoryAccountingSuite.scala @@ -0,0 +1,663 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ + +package org.apache.spark.memory + +import java.nio.charset.StandardCharsets +import java.sql.SQLException +import java.util.Properties + +import scala.actors.Futures._ + +import com.gemstone.gemfire.cache.LowMemoryException +import com.gemstone.gemfire.internal.cache.{GemFireCacheImpl, LocalRegion} +import com.pivotal.gemfirexd.internal.engine.Misc +import io.snappydata.cluster.ClusterManagerTestBase +import io.snappydata.externalstore.Data +import io.snappydata.test.dunit.DistributedTestBase.InitializeRun + +import org.apache.spark.sql.catalyst.expressions.{SpecificInternalRow, UnsafeProjection, UnsafeRow} +import org.apache.spark.sql.types._ +import org.apache.spark.sql.{CachedDataFrame, Row, SnappyContext, SnappySession} +import org.apache.spark.unsafe.types.UTF8String +import org.apache.spark.{SparkEnv, TaskContextImpl} + + +class SnappyMemoryAccountingSuite extends MemoryFunSuite { + + InitializeRun.setUp() + + + val struct = (new StructType()) + .add(StructField("col1", IntegerType, true)) + .add(StructField("col2", IntegerType, true)) + .add(StructField("col3", IntegerType, true)) + + val options = Map("PARTITION_BY" -> "col1", "EVICTION_BY" -> + "LRUHEAPPERCENT") + val coptions = Map("PARTITION_BY" -> "col1", "BUCKETS" -> "1", + "EVICTION_BY" -> "LRUHEAPPERCENT") + val cwoptions = Map("BUCKETS" -> "1", "EVICTION_BY" -> "LRUHEAPPERCENT") + val roptions = Map("EVICTION_BY" -> "LRUHEAPPERCENT", + "PERSISTENCE" -> "NONE") + val poptions = Map("PARTITION_BY" -> "col1", "BUCKETS" -> "1", "PERSISTENCE" -> "SYNCHRONOUS") + val memoryMode = MemoryMode.ON_HEAP + + test("Test drop table accounting for column partitioned table") { + val sparkSession = createSparkSession(1, 0, 2000000L) + val snSession = new SnappySession(sparkSession.sparkContext) + LocalRegion.MAX_VALUE_BEFORE_ACQUIRE = 1 + val options = Map("PARTITION_BY" -> "col1", + "BUCKETS" -> "1", + "EVICTION_BY" -> "LRUHEAPPERCENT" + ) + val beforeTableSize = SparkEnv.get.memoryManager.storageMemoryUsed + snSession.createTable("t1", "column", struct, options) + val afterTableSize = SparkEnv.get.memoryManager.storageMemoryUsed + assert(afterTableSize > beforeTableSize) + + val row = Row(100000000, 10000000, 10000000) + (1 to 10).map(i => snSession.insert("t1", row)) + val afterInsertSize = SparkEnv.get.memoryManager.storageMemoryUsed + snSession.dropTable("t1") + val afterDropSize = SparkEnv.get.memoryManager.storageMemoryUsed + // For less number of rows in table the below assertion might + // fail as some of hive table store dropped table entries. + assert(afterDropSize < afterInsertSize) + } + + test("Test drop table accounting for replicated table") { + val sparkSession = createSparkSession(1, 0, 2000000L) + val snSession = new SnappySession(sparkSession.sparkContext) + LocalRegion.MAX_VALUE_BEFORE_ACQUIRE = 1 + val options = Map.empty[String, String] + + val beforeTableSize = SparkEnv.get.memoryManager.storageMemoryUsed + snSession.createTable("t1", "row", struct, options) + val afterTableSize = SparkEnv.get.memoryManager.storageMemoryUsed + assert(afterTableSize > beforeTableSize) + + val row = Row(100000000, 10000000, 10000000) + (1 to 10).map(i => snSession.insert("t1", row)) + val afterInsertSize = SparkEnv.get.memoryManager.storageMemoryUsed + snSession.dropTable("t1") + val afterDropSize = SparkEnv.get.memoryManager.storageMemoryUsed + // For less number of rows in table the below assertion might + // fail as some of hive table store dropped table entries. + assert(afterDropSize < afterInsertSize) + } + + test("Test truncate table accounting for replicated table") { + val sparkSession = createSparkSession(1, 0, 2000000L) + val snSession = new SnappySession(sparkSession.sparkContext) + LocalRegion.MAX_VALUE_BEFORE_ACQUIRE = 1 + val options = Map.empty[String, String] + + val beforeTableSize = SparkEnv.get.memoryManager.storageMemoryUsed + snSession.createTable("t1", "row", struct, options) + val afterTableSize = SparkEnv.get.memoryManager.storageMemoryUsed + assert(afterTableSize > beforeTableSize) + + val row = Row(100000000, 10000000, 10000000) + (1 to 10).map(i => snSession.insert("t1", row)) + val afterInsertSize = SparkEnv.get.memoryManager.storageMemoryUsed + snSession.truncateTable("t1") + val afterTruncateSize = SparkEnv.get.memoryManager.storageMemoryUsed + assert(afterTruncateSize < afterInsertSize) + } + + test("Test truncate table accounting for PR table") { + val sparkSession = createSparkSession(1, 0, 2000000L) + val snSession = new SnappySession(sparkSession.sparkContext) + LocalRegion.MAX_VALUE_BEFORE_ACQUIRE = 1 + val options = Map("PARTITION_BY" -> "col1", + "BUCKETS" -> "1", + "EVICTION_BY" -> "LRUHEAPPERCENT" + ) + + val beforeTableSize = SparkEnv.get.memoryManager.storageMemoryUsed + snSession.createTable("t1", "row", struct, options) + val afterTableSize = SparkEnv.get.memoryManager.storageMemoryUsed + assert(afterTableSize > beforeTableSize) + + val row = Row(100000000, 10000000, 10000000) + (1 to 10).map(i => snSession.insert("t1", row)) + val afterInsertSize = SparkEnv.get.memoryManager.storageMemoryUsed + snSession.truncateTable("t1") + val afterTruncateSize = SparkEnv.get.memoryManager.storageMemoryUsed + assert(afterTruncateSize < afterInsertSize) + } + + test("Test delete all accounting for replicated table") { + val sparkSession = createSparkSession(1, 0, 2000000L) + val snSession = new SnappySession(sparkSession.sparkContext) + LocalRegion.MAX_VALUE_BEFORE_ACQUIRE = 1 + val options = Map.empty[String, String] + + val beforeTableSize = SparkEnv.get.memoryManager.storageMemoryUsed + snSession.createTable("t1", "row", struct, options) + val afterTableSize = SparkEnv.get.memoryManager.storageMemoryUsed + assert(afterTableSize > beforeTableSize) + + val row = Row(100000000, 10000000, 10000000) + (1 to 10).map(i => snSession.insert("t1", row)) + val afterInsertSize = SparkEnv.get.memoryManager.storageMemoryUsed + snSession.sql("delete from t1") + val afetrDeleteSize = SparkEnv.get.memoryManager.storageMemoryUsed + assert(afetrDeleteSize < afterInsertSize) + } + + test("Test delete all accounting for PR table") { + val sparkSession = createSparkSession(1, 0, 2000000L) + val snSession = new SnappySession(sparkSession.sparkContext) + LocalRegion.MAX_VALUE_BEFORE_ACQUIRE = 1 + val options = Map("PARTITION_BY" -> "col1", + "BUCKETS" -> "1", + "EVICTION_BY" -> "LRUHEAPPERCENT" + ) + + val beforeTableSize = SparkEnv.get.memoryManager.storageMemoryUsed + snSession.createTable("t1", "row", struct, options) + val afterTableSize = SparkEnv.get.memoryManager.storageMemoryUsed + assert(afterTableSize > beforeTableSize) + + val row = Row(100000000, 10000000, 10000000) + (1 to 10).map(i => snSession.insert("t1", row)) + val afterInsertSize = SparkEnv.get.memoryManager.storageMemoryUsed + snSession.sql("delete from t1") + val afetrDeleteSize = SparkEnv.get.memoryManager.storageMemoryUsed + assert(afetrDeleteSize < afterInsertSize) + } + + test("Test drop table accounting for row partitioned table") { + val sparkSession = createSparkSession(1, 0, 2000000L) + val snSession = new SnappySession(sparkSession.sparkContext) + LocalRegion.MAX_VALUE_BEFORE_ACQUIRE = 1 + val options = Map("PARTITION_BY" -> "col1", + "BUCKETS" -> "1", + "EVICTION_BY" -> "LRUHEAPPERCENT" + ) + + val beforeTableSize = SparkEnv.get.memoryManager.storageMemoryUsed + snSession.createTable("t1", "row", struct, options) + val afterTableSize = SparkEnv.get.memoryManager.storageMemoryUsed + assert(afterTableSize > beforeTableSize) + + val row = Row(100000000, 10000000, 10000000) + (1 to 10).map(i => snSession.insert("t1", row)) + val afterInsertSize = SparkEnv.get.memoryManager.storageMemoryUsed + snSession.dropTable("t1") + val afterDropSize = SparkEnv.get.memoryManager.storageMemoryUsed + // For less number of rows in table the below assertion might + // fail as some of hive table store dropped table entries. + assert(afterDropSize < afterInsertSize) + } + + + test("Test accounting for column table with eviction") { + val sparkSession = createSparkSession(1, 0) + val snSession = new SnappySession(sparkSession.sparkContext) + LocalRegion.MAX_VALUE_BEFORE_ACQUIRE = 1 + val options = Map("PARTITION_BY" -> "col1", + "BUCKETS" -> "1", + "EVICTION_BY" -> "LRUHEAPPERCENT" + ) + snSession.createTable("t1", "column", struct, options) + SparkEnv.get.memoryManager.asInstanceOf[SnappyUnifiedMemoryManager].dropAllObjects(memoryMode) + assert(SparkEnv.get.memoryManager.storageMemoryUsed == 0) + val taskAttemptId = 0L + // artificially acquire memory + SparkEnv.get.memoryManager.acquireExecutionMemory(5000L, taskAttemptId, memoryMode) + + var totalEvictedBytes = 0L + + val memoryEventListener = new MemoryEventListener { + override def onEviction(objectName: String, evictedBytes: Long): Unit = { + totalEvictedBytes += evictedBytes + } + } + SnappyUnifiedMemoryManager.addMemoryEventListener(memoryEventListener) + + // 208 *10. 208 is the row size + memory overhead + + var rows = 0 + try { + for (i <- 1 to 100) { + val row = Row(100000000, 10000000, 10000000) + logInfo(s"RowCount1 = $rows") + snSession.insert("t1", row) + rows += 1 + logInfo(s"RowCount2 = $rows") + } + } catch { + case sqle: SQLException if sqle.getSQLState == "XCL54" => + logInfo(s"RowCount3 in exception = $rows") + assert(totalEvictedBytes > 0) + } + SparkEnv.get.memoryManager. + asInstanceOf[SnappyUnifiedMemoryManager].dropAllObjects(memoryMode) + val count = snSession.sql("select * from t1").count() + assert(count >= rows) + snSession.dropTable("t1") + } + + test("Test accounting for recovery of row partitioned tables with lru count & no persistent") { + var sparkSession = createSparkSession(1, 0) + var snSession = new SnappySession(sparkSession.sparkContext) + LocalRegion.MAX_VALUE_BEFORE_ACQUIRE = 1 + val options = "OPTIONS (BUCKETS '1', " + + "PARTITION_BY 'Col1', " + + "PERSISTENCE 'none', " + + "EVICTION_BY 'LRUCOUNT 3')" + snSession.sql("CREATE TABLE t1 (Col1 INT, Col2 INT, Col3 INT) " + " USING row " + + options + ) + val beforeInsertMem = SparkEnv.get.memoryManager.storageMemoryUsed + + val row = Row(100000000, 10000000, 10000000) + (1 to 5).map(i => snSession.insert("t1", row)) + + SnappyContext.globalSparkContext.stop() + assert(SparkEnv.get == null) + sparkSession = createSparkSession(1, 0) + snSession = new SnappySession(sparkSession.sparkContext) + + assert(snSession.sql("select * from t1").collect().length == 0) + val afterRebootMemory = SparkEnv.get.memoryManager.storageMemoryUsed + assert(beforeInsertMem == afterRebootMemory) // 4 bytes for hashmap. Need to check + snSession.dropTable("t1") + } + + test("Test accounting for recovery of row partitioned tables with lru count & persistent") { + assert(GemFireCacheImpl.getInstance == null) + var sparkSession = createSparkSession(1, 0) + var snSession = new SnappySession(sparkSession.sparkContext) + LocalRegion.MAX_VALUE_BEFORE_ACQUIRE = 1 + val options = "OPTIONS (BUCKETS '1', " + + "PARTITION_BY 'Col1', " + + "PERSISTENCE 'SYNCHRONOUS', " + + "EVICTION_BY 'LRUCOUNT 3', " + + "OVERFLOW 'true')" + snSession.sql("CREATE TABLE t1 (Col1 INT, Col2 INT, Col3 INT) " + " USING row " + + options + ) + + val row = Row(100000000, 10000000, 10000000) + (1 to 5).map(i => snSession.insert("t1", row)) + val beforeRebootMemory = SparkEnv.get.memoryManager.storageMemoryUsed + SnappyContext.globalSparkContext.stop() + + assert(SparkEnv.get == null) + sparkSession = createSparkSession(1, 0) + snSession = new SnappySession(sparkSession.sparkContext) + + assert(snSession.sql("select * from t1").collect().length == 5) + + val afterRebootMemory = SparkEnv.get.memoryManager.storageMemoryUsed + // Due to a design flaw in recovery we always recover one more value than the LRU limit. + assertApproximate(beforeRebootMemory, afterRebootMemory) + snSession.dropTable("t1") + } + + test("Test accounting for recovery of row replicate tables with lru count & no persistent") { + + var sparkSession = createSparkSession(1, 0) + var snSession = new SnappySession(sparkSession.sparkContext) + LocalRegion.MAX_VALUE_BEFORE_ACQUIRE = 1 + val options = "OPTIONS (EVICTION_BY 'LRUCOUNT 3', OVERFLOW 'true', PERSISTENCE 'none')" + snSession.sql("CREATE TABLE t1 (Col1 INT, Col2 INT, Col3 INT) USING row " + + options) + val beforeInsertMem = SparkEnv.get.memoryManager.storageMemoryUsed + + val row = Row(100000000, 10000000, 10000000) + (1 to 5).map(i => snSession.insert("t1", row)) + + SnappyContext.globalSparkContext.stop() + assert(SparkEnv.get == null) + sparkSession = createSparkSession(1, 0) + snSession = new SnappySession(sparkSession.sparkContext) + + assert(snSession.sql("select * from t1").collect().length == 0) + val afterRebootMemory = SparkEnv.get.memoryManager.storageMemoryUsed + assert(beforeInsertMem == afterRebootMemory) // 4 bytes for hashmap. Need to check + snSession.dropTable("t1") + } + + test("Test accounting for recovery of row replicate tables with lru count & persistent") { + assert(GemFireCacheImpl.getInstance == null) + var sparkSession = createSparkSession(1, 0) + var snSession = new SnappySession(sparkSession.sparkContext) + LocalRegion.MAX_VALUE_BEFORE_ACQUIRE = 1 + val options = "OPTIONS (EVICTION_BY 'LRUCOUNT 3', PERSISTENCE 'SYNCHRONOUS')" + snSession.sql("CREATE TABLE t1 (Col1 INT, Col2 INT, Col3 INT) " + " USING row " + + options + ) + + val row = Row(100000000, 10000000, 10000000) + (1 to 5).map(i => snSession.insert("t1", row)) + + val beforeRebootMemory = SparkEnv.get.memoryManager.storageMemoryUsed + SnappyContext.globalSparkContext.stop() + assert(SparkEnv.get == null) + sparkSession = createSparkSession(1, 0) + snSession = new SnappySession(sparkSession.sparkContext) + + assert(snSession.sql("select * from t1").collect().length == 5) + val afterRebootMemory = SparkEnv.get.memoryManager.storageMemoryUsed + // Due to a design flaw in recovery we always recover one more value than the LRU limit. + assertApproximate(beforeRebootMemory, afterRebootMemory) + snSession.dropTable("t1") + } + + + test("Test Recovery column partitioned table") { + var sparkSession = createSparkSession(1, 0, 100000000L) + + var snSession = new SnappySession(sparkSession.sparkContext) + LocalRegion.MAX_VALUE_BEFORE_ACQUIRE = 1 + val options = "OPTIONS (BUCKETS '1', PARTITION_BY 'Col1', PERSISTENCE 'SYNCHRONOUS')" + snSession.sql("CREATE TABLE t1 (Col1 INT, Col2 INT, Col3 INT) " + " USING column " + + options + ) + + (1 to 10).map(i => snSession.insert("t1", Row(i, 10000000, 10000000))) + + val beforeRebootMemory = SparkEnv.get.memoryManager.storageMemoryUsed + SnappyContext.globalSparkContext.stop() + assert(SparkEnv.get == null) + sparkSession = createSparkSession(1, 0, 1000000L) + snSession = new SnappySession(sparkSession.sparkContext) + + assert(snSession.sql("select * from t1").collect().length == 10) + val afterRebootMemory = SparkEnv.get.memoryManager.storageMemoryUsed + assertApproximate(beforeRebootMemory, afterRebootMemory, 4) + snSession.dropTable("t1") + } + + + test("Test accounting of eviction for row partitioned table with lru heap percent") { + val sparkSession = createSparkSession(1, 0) + val snSession = new SnappySession(sparkSession.sparkContext) + LocalRegion.MAX_VALUE_BEFORE_ACQUIRE = 1 + val options = Map("PARTITION_BY" -> "col1", + "PERSISTENCE" -> "none", + "BUCKETS" -> "1", + "EVICTION_BY" -> "LRUHEAPPERCENT" + ) + snSession.createTable("t1", "row", struct, options) + SparkEnv.get.memoryManager.asInstanceOf[SnappyUnifiedMemoryManager].dropAllObjects(memoryMode) + + val taskAttemptId = 0L + // artificially acquire memory + SparkEnv.get.memoryManager.acquireExecutionMemory(4000L, taskAttemptId, memoryMode) + var memoryIncreaseDuetoEviction = 0L + val memoryEventListener = new MemoryEventListener { + override def onPositiveMemoryIncreaseDueToEviction(objectName: String, bytes: Long): Unit = { + memoryIncreaseDuetoEviction += bytes + } + } + SnappyUnifiedMemoryManager.addMemoryEventListener(memoryEventListener) + + // 208 *10. 208 is the row size + memory overhead + import scala.util.control.Breaks._ + + var rows = 0 + try { + breakable { + for (i <- 1 to 20) { + val row = Row(100000000, 10000000, 10000000) + snSession.insert("t1", row) + rows += 1 + } + } + } catch { + case e: Exception => { + assert(memoryIncreaseDuetoEviction > 0) + } + } + SparkEnv.get.memoryManager. + asInstanceOf[SnappyUnifiedMemoryManager].dropAllObjects(memoryMode) + val count = snSession.sql("select * from t1").count() + assert(count == rows) + snSession.dropTable("t1") + } + + test("Test accounting of delete for row partitioned tables") { + val sparkSession = createSparkSession(1, 0) + val snSession = new SnappySession(sparkSession.sparkContext) + LocalRegion.MAX_VALUE_BEFORE_ACQUIRE = 1 + snSession.createTable("t1", "row", struct, poptions) + val afterCreateTable = SparkEnv.get.memoryManager.storageMemoryUsed + val region = GemFireCacheImpl.getExisting.getRegion("/APP/T1").asInstanceOf[LocalRegion] + val row = Row(1, 1, 1) + snSession.insert("t1", row) + assert(SparkEnv.get.memoryManager.storageMemoryUsed > 0) // borrowed from execution memory + snSession.delete("t1", "col1=1") + // we need to wait for atleast OLD_ENTRIES_CLEANER_TIME_INTERVAL + ClusterManagerTestBase.waitForCriterion( + (SparkEnv.get.memoryManager.storageMemoryUsed == afterCreateTable), + s"The memory after delete is not same even after waiting for oldEntryRemoval", + 4 * Misc.getGemFireCache.getOldEntryRemovalPeriod, 500, true) + snSession.dropTable("t1") + } + + + test("Test Spark Cache") { + val sparkSession = createSparkSession(1, 0) + val snSession = new SnappySession(sparkSession.sparkContext) + LocalRegion.MAX_VALUE_BEFORE_ACQUIRE = 1 + SparkEnv.get.memoryManager. + asInstanceOf[SnappyUnifiedMemoryManager].dropAllObjects(memoryMode) + val beforeCache = SparkEnv.get.memoryManager.storageMemoryUsed + val data = Seq(Seq(1, 2, 3), Seq(7, 8, 9), Seq(9, 2, 3), Seq(4, 2, 3), Seq(5, 6, 7)) + val rdd = sparkSession.sparkContext.parallelize(data, 2).map(s => new Data(s(0), s(1), s(2))) + val dataDF = snSession.createDataFrame(rdd) + dataDF.cache() + dataDF.count + val afterCache = SparkEnv.get.memoryManager.storageMemoryUsed + assert(afterCache > beforeCache) + } + + test("Test accounting of delete for replicated tables") { + val sparkSession = createSparkSession(1, 0, sparkMemory = 1200000L) + val snSession = new SnappySession(sparkSession.sparkContext) + LocalRegion.MAX_VALUE_BEFORE_ACQUIRE = 1 + snSession.createTable("t1", "row", struct, Map.empty[String, String]) + val afterCreateTable = SparkEnv.get.memoryManager.storageMemoryUsed + val row = Row(1, 1, 1) + snSession.insert("t1", row) + assert(SparkEnv.get.memoryManager.storageMemoryUsed > 0) // borrowed from execution memory + snSession.delete("t1", "col1=1") + // we need to wait for atleast OLD_ENTRIES_CLEANER_TIME_INTERVAL + ClusterManagerTestBase.waitForCriterion( + (SparkEnv.get.memoryManager.storageMemoryUsed == afterCreateTable), + s"The memory after delete is not same even after waiting for oldEntryRemoval", + 4 * Misc.getGemFireCache.getOldEntryRemovalPeriod, 500, true) + // assert(afterDelete == afterCreateTable) + snSession.dropTable("t1") + } + + test("Test accounting of update for replicated tables") { + val sparkSession = createSparkSession(1, 0, 1000000L) + val snSession = new SnappySession(sparkSession.sparkContext) + LocalRegion.MAX_VALUE_BEFORE_ACQUIRE = 1 + val struct = (new StructType()) + .add(StructField("col1", IntegerType, true)) + .add(StructField("col2", IntegerType, true)) + .add(StructField("col3", StringType, true)) + + snSession.createTable("t1", "row", struct, Map.empty[String, String]) + val row = Row(1, 1, "1") + snSession.insert("t1", row) + val afterInsert = SparkEnv.get.memoryManager.storageMemoryUsed + snSession.update("t1", "COL1=1", Row("XXXXXXXXXX"), "COL3") + val afterUpdate = SparkEnv.get.memoryManager.storageMemoryUsed + assert(afterUpdate > afterInsert) + snSession.dropTable("t1") + } + + test("Test accounting of update for row partitioned tables") { + val sparkSession = createSparkSession(1, 0, 1000000L) + val snSession = new SnappySession(sparkSession.sparkContext) + LocalRegion.MAX_VALUE_BEFORE_ACQUIRE = 1 + val struct = (new StructType()) + .add(StructField("col1", IntegerType, true)) + .add(StructField("col2", IntegerType, true)) + .add(StructField("col3", StringType, true)) + + snSession.createTable("t1", "row", struct, roptions) + val row = Row(1, 1, "1") + snSession.insert("t1", row) + val afterInsert = SparkEnv.get.memoryManager.storageMemoryUsed + snSession.update("t1", "COL1=1", Row("XXXXXXXXXX"), "COL3") + val afterUpdate = SparkEnv.get.memoryManager.storageMemoryUsed + assert(afterUpdate > afterInsert) + snSession.dropTable("t1") + } + + test("Test accounting of drop table for replicated tables") { + val sparkSession = createSparkSession(1, 0, sparkMemory = 1200000L) + val snSession = new SnappySession(sparkSession.sparkContext) + LocalRegion.MAX_VALUE_BEFORE_ACQUIRE = 1 + val beforeCreateTable = SparkEnv.get.memoryManager.storageMemoryUsed + snSession.createTable("t1", "row", struct, roptions) + val row = Row(1, 1, 1) + snSession.insert("t1", row) + snSession.dropTable("t1") + val afterDropTable = SparkEnv.get.memoryManager.storageMemoryUsed + // Approximate because drop table adds entry in system table which causes memory to grow a bit + assertApproximate(afterDropTable, beforeCreateTable, error = 10) + } + + test("Test storage for column tables with df inserts") { + val sparkSession = createSparkSession(1, 0) + val snSession = new SnappySession(sparkSession.sparkContext) + LocalRegion.MAX_VALUE_BEFORE_ACQUIRE = 1 + snSession.createTable("t1", "column", struct, cwoptions) + val afterCreate = SparkEnv.get.memoryManager.storageMemoryUsed + val data = (1 to 10).toSeq + + val rdd = sparkSession.sparkContext.parallelize(data, 2) + .map(s => Data1(s, s + 1, s + 2)) + val dataDF = snSession.createDataFrame(rdd) + dataDF.write.insertInto("t1") + assert(SparkEnv.get.memoryManager.storageMemoryUsed > afterCreate) + val count = snSession.sql("select * from t1").count() + assert(count == 10) + snSession.dropTable("t1") + + } + + test("Concurrent query mem-check"){ + val sparkSession = createSparkSession(1, 0, 1000000) + val snSession = new SnappySession(sparkSession.sparkContext) + LocalRegion.MAX_VALUE_BEFORE_ACQUIRE = 120 * 100 + + val options = "OPTIONS (BUCKETS '8', " + + "PARTITION_BY 'Col1')" + + snSession.sql("CREATE TABLE t1 (Col1 INT, Col2 INT, Col3 INT) " + " USING row " + + options + ) + val rowCount = 100 + + def runQueries(i : Int): Unit = { + for (_ <- 0 until rowCount) { + snSession.insert("t1", Row(1, 1, 1)) + } + } + + val tasks = for (i <- 1 to 5) yield future { + runQueries(i) + } + + // wait a lot + awaitAll(20000000L, tasks: _*) + + // Rough estimation of 120 bytes per row + assert(SparkEnv.get.memoryManager.storageMemoryUsed >= 120 * 100 * 5 ) + val count = snSession.sql("select * from t1").count() + assert(count == 500) + snSession.dropTable("t1") + } + + test("CachedDataFrame accounting") { + val sparkSession = createSparkSession(1, 1) + // create SnappySession to boot GemFireCache which is required for SnappyUMM + new SnappySession(sparkSession.sparkContext) + val fieldTypes: Array[DataType] = Array(LongType, StringType, BinaryType) + val converter = UnsafeProjection.create(fieldTypes) + + val row = new SpecificInternalRow(fieldTypes) + row.setLong(0, 0) + row.update(1, UTF8String.fromString("Hello")) + row.update(2, "World".getBytes(StandardCharsets.UTF_8)) + + val unsafeRow: UnsafeRow = converter.apply(row) + + SparkEnv.get.memoryManager + .acquireStorageMemory(MemoryManagerCallback.storageBlockId, 300, memoryMode) + + val taskMemoryManager = + new TaskMemoryManager(sparkSession.sparkContext.env.memoryManager, 0L) + val taskContext = + new TaskContextImpl(0, 0, taskAttemptId = 1, 0, taskMemoryManager, new Properties, null) + try { + CachedDataFrame(taskContext, Seq(unsafeRow).iterator) + assert(false , "Should not have obtained memory") + } catch { + case lme : LowMemoryException => // Success + } + } + + // @TODO Place holder for column partitioned tables. Enable them after Sumedh's changes + + // Enable test after Sumedh's checkin + ignore("Test accounting of delete for column partitioned tables") { + val sparkSession = createSparkSession(1, 0) + val snSession = new SnappySession(sparkSession.sparkContext) + LocalRegion.MAX_VALUE_BEFORE_ACQUIRE = 1 + snSession.createTable("t1", "column", struct, poptions) + val afterCreateTable = SparkEnv.get.memoryManager.storageMemoryUsed + val row = Row(1, 1, 1) + snSession.insert("t1", row) + assert(SparkEnv.get.memoryManager.storageMemoryUsed > 0) // borrowed from execution memory + snSession.delete("t1", "col1=1") + val afterDelete = SparkEnv.get.memoryManager.storageMemoryUsed + assert(afterDelete == afterCreateTable) + snSession.dropTable("t1") + } + + ignore("Test accounting of update for column partitioned tables") { + val sparkSession = createSparkSession(1, 0, 1000000L) + val snSession = new SnappySession(sparkSession.sparkContext) + LocalRegion.MAX_VALUE_BEFORE_ACQUIRE = 1 + val struct = (new StructType()) + .add(StructField("col1", IntegerType, true)) + .add(StructField("col2", IntegerType, true)) + .add(StructField("col3", StringType, true)) + + snSession.createTable("t1", "column", struct, roptions) + val row = Row(1, 1, "1") + snSession.insert("t1", row) + val afterInsert = SparkEnv.get.memoryManager.storageMemoryUsed + snSession.update("t1", "COL1=1", Row("XXXXXXXXXX"), "COL3") + val afterUpdate = SparkEnv.get.memoryManager.storageMemoryUsed + assert(afterUpdate > afterInsert) + snSession.dropTable("t1") + } +} diff --git a/cluster/src/test/scala/org/apache/spark/memory/SnappyStorageEvictorSuite.scala b/cluster/src/test/scala/org/apache/spark/memory/SnappyStorageEvictorSuite.scala new file mode 100644 index 0000000000..b9f960895b --- /dev/null +++ b/cluster/src/test/scala/org/apache/spark/memory/SnappyStorageEvictorSuite.scala @@ -0,0 +1,169 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ +package org.apache.spark.memory + + +import com.gemstone.gemfire.internal.cache.LocalRegion +import io.snappydata.test.dunit.DistributedTestBase.InitializeRun + +import org.apache.spark.SparkEnv +import org.apache.spark.sql.types.{IntegerType, StructField, StructType} +import org.apache.spark.sql.{Row, SnappySession} +import org.apache.spark.storage.TestBlockId + +case class Data1(col1: Int, col2: Int, col3: Int) + +class SnappyStorageEvictorSuite extends MemoryFunSuite { + + InitializeRun.setUp() + + private val struct = new StructType() + .add(StructField("col1", IntegerType)) + .add(StructField("col2", IntegerType)) + .add(StructField("col3", IntegerType)) + + val options = Map("PARTITION_BY" -> "col1", + "EVICTION_BY" -> "LRUHEAPPERCENT", + "PERSISTENCE" -> "none") + val coptions = Map("PARTITION_BY" -> "col1", + "BUCKETS" -> "1", "EVICTION_BY" -> "LRUHEAPPERCENT") + val cwoptions = Map("EVICTION_BY" -> "LRUHEAPPERCENT") + val roptions = Map("EVICTION_BY" -> "LRUHEAPPERCENT") + + val memoryMode = MemoryMode.ON_HEAP + + test("Test UnRollMemory") { + val sparkSession = createSparkSession(1, 0) + new SnappySession(sparkSession.sparkContext) // initialize SnappyData components + val memoryManager = SparkEnv.get.memoryManager + .asInstanceOf[SnappyUnifiedMemoryManager] + memoryManager.dropAllObjects(memoryMode) + assert(memoryManager.storageMemoryUsed == 0) + val blockId = TestBlockId(s"SNAPPY_STORAGE_BLOCK_ID_test") + memoryManager.acquireUnrollMemory(blockId, 500, memoryMode) + + assert(memoryManager.storageMemoryUsed == 500) + val key = new MemoryOwner("_SPARK_CACHE_", memoryMode) + assert(memoryManager.memoryForObject.get(key) == 500) + memoryManager.releaseUnrollMemory(500, memoryMode) + + assert(memoryManager.getStoragePoolMemoryUsed(MemoryMode.OFF_HEAP) + + memoryManager.getStoragePoolMemoryUsed(MemoryMode.ON_HEAP) == 0) + assert(memoryManager.memoryForObject.get(key) == 0) + } + + + test("Test storage when storage can borrow from execution memory") { + val sparkSession = createSparkSession(1, 0) + val snSession = new SnappySession(sparkSession.sparkContext) + LocalRegion.MAX_VALUE_BEFORE_ACQUIRE = 1 + snSession.createTable("t1", "row", struct, options) + SparkEnv.get.memoryManager.asInstanceOf[SnappyUnifiedMemoryManager].dropAllObjects(memoryMode) + assert(SparkEnv.get.memoryManager.storageMemoryUsed == 0) + val row = Row(1, 1, 1) + snSession.insert("t1", row) + val afterInsertSize = SparkEnv.get.memoryManager.storageMemoryUsed + assert( afterInsertSize > 0) // borrowed from execution memory + snSession.delete("t1", "col1=1") + val afterDeleteSize = SparkEnv.get.memoryManager.storageMemoryUsed + assert(afterDeleteSize < afterInsertSize) + snSession.dropTable("t1") + } + + test("Test storage when storage can not borrow from execution memory") { + val sparkSession = createSparkSession(1, 0.5, sparkMemory = 1500000L) + val snSession = new SnappySession(sparkSession.sparkContext) + LocalRegion.MAX_VALUE_BEFORE_ACQUIRE = 1 + snSession.createTable("t1", "row", struct, options) + var memoryIncreaseDuetoEviction = 0L + + val memoryEventListener = new MemoryEventListener { + override def onPositiveMemoryIncreaseDueToEviction(objectName: String, bytes: Long): Unit = { + memoryIncreaseDuetoEviction += bytes + } + } + SnappyUnifiedMemoryManager.addMemoryEventListener(memoryEventListener) + + val snappyMemoryManager = SparkEnv.get.memoryManager.asInstanceOf[SnappyUnifiedMemoryManager] + + snappyMemoryManager.dropAllObjects(memoryMode) + assert(SparkEnv.get.memoryManager.storageMemoryUsed == 0) + val taskAttemptId = 0L + // artificially acquire memory + SparkEnv.get.memoryManager.acquireExecutionMemory(500L, taskAttemptId, memoryMode) + assert(SparkEnv.get.memoryManager.executionMemoryUsed == 500) + + import scala.util.control.Breaks._ + + var numRows = 0 + try { + breakable { + for (i <- 1 to 20) { + val rows = (1 to 1000).map(j => Row(i * 1000 + j, i, j)) + snSession.insert("t1", rows: _*) + numRows += 1000 + } + fail("Should not have reached here due to LowMemory") + } + } catch { + case _: Exception => + assert(memoryIncreaseDuetoEviction > 0) + assert(snappyMemoryManager.wrapperStats.getNumFailedEvictionRequest(false) > 1) + } + snappyMemoryManager.dropAllObjects(memoryMode) + SparkEnv.get.memoryManager.releaseExecutionMemory(500L, taskAttemptId, memoryMode) + val count = snSession.sql("select * from t1").count() + assert(count >= numRows) + snSession.dropTable("t1") + } + + test("Test eviction when storage memory has borrowed some memory from execution") { + val sparkSession = createSparkSession(1, 0.5, 1500000L) + val snSession = new SnappySession(sparkSession.sparkContext) + LocalRegion.MAX_VALUE_BEFORE_ACQUIRE = 1 + snSession.createTable("t1", "row", struct, options) + SparkEnv.get.memoryManager.asInstanceOf[SnappyUnifiedMemoryManager].dropAllObjects(memoryMode) + assert(SparkEnv.get.memoryManager.storageMemoryUsed == 0) + + (1 to 6).map(i => { + val row = Row(i, i, i) + snSession.insert("t1", row) + }) + assert(SparkEnv.get.memoryManager.storageMemoryUsed > 500L) + // based on 32 bytes value and 88 bytes entry overhead + val count = snSession.sql("select * from t1").count() + assert(count == 6) + + // @TODO Uncomment this assertion up once we set per + // region entry overhead and put a check before eviction + // assert(SparkEnv.get.memoryManager.storageMemoryUsed == 500L) + val otherExecutorThread = new Thread(new Runnable { + def run() { + // This should not hang as we are dropping the table after + // this thread is executed. + SparkEnv.get.memoryManager.acquireExecutionMemory(750L, 1L, memoryMode) + } + }) + otherExecutorThread.start() + snSession.dropTable("t1") + + if (otherExecutorThread.isAlive) { + otherExecutorThread.wait(2000) + } + + } +} diff --git a/cluster/src/test/scala/org/apache/spark/sql/IndexTest.scala b/cluster/src/test/scala/org/apache/spark/sql/IndexTest.scala new file mode 100644 index 0000000000..4501ede1c9 --- /dev/null +++ b/cluster/src/test/scala/org/apache/spark/sql/IndexTest.scala @@ -0,0 +1,299 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ +package org.apache.spark.sql + +import java.util.TimeZone + +import io.snappydata.benchmark.TPCH_Queries +import io.snappydata.benchmark.snappy.tpch.QueryExecutor +import io.snappydata.benchmark.snappy.{SnappyAdapter, TPCH} +import io.snappydata.{PlanTest, Property, SnappyFunSuite} +import org.scalatest.BeforeAndAfterEach + +import org.apache.spark.sql.catalyst.TableIdentifier +import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation +import org.apache.spark.sql.catalyst.expressions.SubqueryExpression +import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Sort} +import org.apache.spark.sql.types.{IntegerType, StructField, StructType} +import org.apache.spark.util.Benchmark + +class IndexTest extends SnappyFunSuite with PlanTest with BeforeAndAfterEach { + + override def beforeAll(): Unit = { + // System.setProperty("org.codehaus.janino.source_debugging.enable", "true") + System.setProperty("spark.sql.codegen.comments", "true") + System.setProperty("spark.testing", "true") + super.beforeAll() + } + + override def afterAll(): Unit = { + // System.clearProperty("org.codehaus.janino.source_debugging.enable") + System.clearProperty("spark.sql.codegen.comments") + System.clearProperty("spark.testing") + Property.PartitionPruning.set(snc.conf, true) + super.afterAll() + } + + test("test PutInto and DeleteFrom") { + + snc.sql("create table checko (col1 Integer primary key, col2 Integer) using row options " + + "(partition_by 'col1') ") + + val data = sc.parallelize(Seq(Row(1, 1), Row(2, 2), Row(3, 3), Row(4, 4), Row(5, 5), + Row(6, 6))) + + val struct = StructType( + StructField("i", IntegerType, true) :: + StructField("b", IntegerType, false) :: Nil) + + val df = snc.createDataFrame(data, struct) + import snappy._ + df.write.putInto("APP.CHECKO") + + assert(snc.sql("select * from checko").count() == 6) + + df.selectExpr("i as col1", "b as col2").where("i > 4").write.deleteFrom("APP.CHECKO") + + assert(snc.sql("select * from checko").count() == 4) + + df.filter("b < 2").selectExpr("i as col1").write.deleteFrom("APP.CHECKO") + + assert(snc.sql("select * from checko").count() == 3) + } + + test("check varchar index") { + /* + snc.sql("Create table ODS.ORGANIZATIONS(" + + "org_id bigint GENERATED BY DEFAULT AS IDENTITY NOT NULL," + + "ver bigint NOT NULL," + + "client_id bigint NOT NULL," + + "org_nm varchar(80), " + + "org_typ_ref_id bigint NOT NULL," + + "descr LONG VARCHAR," + + "empr_tax_id varchar(25)," + + "web_site varchar(100)," + + "eff_dt DATE," + + "expr_dt DATE," + + "vld_frm_dt " + + "TIMESTAMP NOT NULL," + + "vld_to_dt TIMESTAMP," + + "src_sys_ref_id LONG VARCHAR NOT NULL," + + "src_sys_rec_id LONG VARCHAR," + + "PRIMARY KEY (client_id,org_id)" + + ")" + + "using row options (partition_by 'org_id')" + + "") + */ + snc.sql("Create table ODS.ORGANIZATIONS(" + + "org_id bigint GENERATED BY DEFAULT AS IDENTITY NOT NULL," + + "client_id bigint NOT NULL," + + "descr LONG VARCHAR," + + "PRIMARY KEY (client_id,org_id)" + + ") " + + "using row options (partition_by 'org_id')" + + "") + + snc.sql("create index ods.idx_org on ODS.ORGANIZATIONS (CLIENT_ID, DESCR)") + + snc.sql("insert into ods.organizations(client_id, descr) values(8006, 'EL')") + snc.sql("update ods.organizations set descr = 'EL " + + " " + + " ' where client_id = 8006") + snc.sql("select * from ods.organizations").collect() + snc.sql("select client_id, descr from ods.organizations where client_id = 8006").collect() + } + + test("tpch queries") { + val qryProvider = new TPCH with SnappyAdapter + + val queries = Array("1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", + "12", "13", "14", "15", "16", "17", "18", "19", + "20", "21", "22") + + TPCHUtils.createAndLoadTables(snc, true) + + val existing = snc.getConf(io.snappydata.Property.EnableExperimentalFeatures.name) + snc.setConf(io.snappydata.Property.EnableExperimentalFeatures.name, "true") + + for ((q, i) <- queries.zipWithIndex) { + val qNum = i + 1 + val (expectedAnswer, _) = qryProvider.execute(qNum, str => { + snc.sql(str) + }) + var queryToBeExecuted = TPCH_Queries.getQuery(q, false, true) + val (newAnswer, df) = QueryExecutor.queryExecution(q, queryToBeExecuted, snc, false) + val isSorted = df.logicalPlan.collect { case s: Sort => s }.nonEmpty + QueryTest.sameRows(expectedAnswer, newAnswer, isSorted).map { results => + s""" + |Results do not match for query: $qNum + |Timezone: ${TimeZone.getDefault} + |Timezone Env: ${sys.env.getOrElse("TZ", "")} + | + |${df.queryExecution} + |== Results == + |$results + """.stripMargin + } + logInfo(s"Done $qNum") + } + snc.setConf(io.snappydata.Property.EnableExperimentalFeatures.name, existing) + + } + + ignore("Benchmark tpch") { + + try { + val queries = Array("1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", + "12", "13", "14", "15", "16", "17", "18", "19", + "20", "21", "22") + + sc(c => c.set("spark.local.dir", "/data/temp")) + + TPCHUtils.createAndLoadTables(snc, true) + + snc.sql( + s"""CREATE INDEX idx_orders_cust ON orders(o_custkey) + options (COLOCATE_WITH 'customer') + """) + + snc.sql( + s"""CREATE INDEX idx_lineitem_part ON lineitem(l_partkey) + options (COLOCATE_WITH 'part') + """) + + val tables = Seq("nation", "region", "supplier", "customer", "orders", "lineitem", "part", + "partsupp") + + val tableSizes = tables.map { tableName => + (tableName, snc.table(tableName).count()) + }.toMap + + logInfo(tableSizes.mkString("\n")) + runBenchmark("select o_orderkey from orders where o_orderkey = 1", tableSizes, 2) + runBenchmark("select o_orderkey from orders where o_orderkey = 32", tableSizes) + runBenchmark("select o_orderkey from orders where o_orderkey = 801", tableSizes) + runBenchmark("select o_orderkey from orders where o_orderkey = 1409", tableSizes) + // queries.foreach(q => benchmark(q, tableSizes)) + + } finally { + snc.sql(s"DROP INDEX if exists idx_orders_cust") + snc.sql(s"DROP INDEX if exists idx_lineitem_part") + } + } + + private def togglePruning(onOff: Boolean, snc: SnappyContext) = + Property.PartitionPruning.set(snc.conf, onOff) + + def runBenchmark(queryString: String, tableSizes: Map[String, Long], numSecs: Int = 0): Unit = { + + // This is an indirect hack to estimate the size of each query's input by traversing the + // logical plan and adding up the sizes of all tables that appear in the plan. Note that this + // currently doesn't take WITH subqueries into account which might lead to fairly inaccurate + // per-row processing time for those cases. + val queryRelations = scala.collection.mutable.HashSet[String]() + snc.sql(queryString).queryExecution.logical.map { + case ur@UnresolvedRelation(t: TableIdentifier, _) => + queryRelations.add(t.table.toLowerCase) + case lp: LogicalPlan => + lp.expressions.foreach { + _ foreach { + case subquery: SubqueryExpression => + subquery.plan.foreach { + case ur@UnresolvedRelation(t: TableIdentifier, _) => + queryRelations.add(t.table.toLowerCase) + case _ => + } + case _ => + } + } + case _ => + } + val size = queryRelations.map(tableSizes.getOrElse(_, 0L)).sum + + import scala.concurrent.duration._ + val b = new Benchmark(s"JoinOrder optimization", size, + warmupTime = numSecs.seconds) + b.addCase("WithOut Partition Pruning", numIters = 0, + prepare = () => togglePruning(onOff = false, snc), + cleanup = () => {})(_ => snc.sql(queryString).collect()) + b.addCase("With Partition Pruning", numIters = 0, + prepare = () => togglePruning(onOff = true, snc), + cleanup = () => {})(_ => snc.sql(queryString).collect()) + b.run() + } + + def benchmark(qNum: String, tableSizes: Map[String, Long]): Unit = { + + val qryProvider = new TPCH with SnappyAdapter + val query = qNum.toInt + + def executor(str: String) = snc.sql(str) + + val size = qryProvider.estimateSizes(query, tableSizes, executor) + logInfo(s"$qNum size $size") + val b = new Benchmark(s"JoinOrder optimization", size, minNumIters = 10) + + def case1(): Unit = snc.setConf(io.snappydata.Property.EnableExperimentalFeatures.name, + "false") + + def case2(): Unit = snc.setConf(io.snappydata.Property.EnableExperimentalFeatures.name, + "true") + + def case3(): Unit = { + snc.setConf(io.snappydata.Property.EnableExperimentalFeatures.name, + "true") + } + +// def evalSnappyMods(genPlan: Boolean) = TPCH_Snappy.queryExecution(qNum, snc, useIndex = false, +// genPlan = genPlan)._1.foreach(_ => ()) + + var queryToBeExecuted = TPCH_Queries.getQuery(qNum, false, true) + def evalSnappyMods(genPlan: Boolean) = QueryExecutor.queryExecution( + qNum, queryToBeExecuted, snc, false)._1.foreach(_ => ()) + + def evalBaseTPCH = qryProvider.execute(query, executor)._1.foreach(_ => ()) + + // b.addCase(s"$qNum baseTPCH index = F", prepare = case1)(i => evalBaseTPCH) + // b.addCase(s"$qNum baseTPCH joinOrder = T", prepare = case2)(i => evalBaseTPCH) + b.addCase(s"$qNum without PartitionPruning", numIters = 0, + prepare = () => togglePruning(onOff = false, snc), + cleanup = () => {})(_ => evalSnappyMods(false)) + b.addCase(s"$qNum with PartitionPruning", numIters = 0, + prepare = () => togglePruning(onOff = true, snc), + cleanup = () => {})(_ => evalSnappyMods(false)) + /* + b.addCase(s"$qNum snappyMods joinOrder = T", prepare = case2)(i => evalSnappyMods(false)) + b.addCase(s"$qNum baseTPCH index = T", prepare = case3)(i => evalBaseTPCH) + */ + b.run() + + } + + test("northwind queries") { + // val sctx = sc(c => c.set("spark.sql.inMemoryColumnarStorage.batchSize", "40000")) + // val snc = getOrCreate(sctx) + // NorthWindDUnitTest.createAndLoadColumnTables(snc) + // val s = "select distinct shipcountry from orders" + // snc.sql(s).collect() + // NWQueries.assertJoin(snc, NWQueries.Q42, "Q42", 22, 1, classOf[LocalJoin]) + /* + Thread.sleep(1000 * 60 * 60) + NWQueries.assertJoin(snc, NWQueries.Q42, "Q42", 22, 1, classOf[LocalJoin]) + */ + } + +} diff --git a/cluster/src/test/scala/org/apache/spark/sql/LikeEscapeSimplificationSuite.scala b/cluster/src/test/scala/org/apache/spark/sql/LikeEscapeSimplificationSuite.scala new file mode 100644 index 0000000000..8cffb6e57c --- /dev/null +++ b/cluster/src/test/scala/org/apache/spark/sql/LikeEscapeSimplificationSuite.scala @@ -0,0 +1,138 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ +/* + * Adapted from Spark's LikeSimplificationSuite having the license below. +q * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.sql + +import org.apache.spark.sql.catalyst.dsl.expressions._ +import org.apache.spark.sql.catalyst.dsl.plans._ +import org.apache.spark.sql.catalyst.expressions.{Contains, EndsWith, Length, StartsWith} +import org.apache.spark.sql.catalyst.optimizer.LikeSimplification +import org.apache.spark.sql.catalyst.plans.PlanTest +import org.apache.spark.sql.catalyst.plans.logical.LocalRelation +import org.apache.spark.sql.internal.LikeEscapeSimplification + +/** + * Tests for checking of like simplification with escaped wildcard characters. + */ +class LikeEscapeSimplificationSuite extends PlanTest { + + private val testRelation = LocalRelation('a.string) + + test("simplify Like into StartsWith") { + val originalQuery = testRelation.where(('a like "abc%") || ('a like "abc\\%")) + + val optimized = LikeEscapeSimplification(originalQuery.analyze) + val optimized2 = LikeSimplification(originalQuery.analyze) + val correctAnswer = testRelation + .where(StartsWith('a, "abc") || ('a === "abc%")).analyze + val correctAnswer2 = testRelation + .where(StartsWith('a, "abc") || ('a like "abc\\%")).analyze + + comparePlans(optimized, correctAnswer) + comparePlans(optimized2, correctAnswer2) + } + + test("simplify Like into EndsWith") { + val originalQuery = testRelation.where(('a like "%xyz") || ('a like "\\%xyz")) + + val optimized = LikeEscapeSimplification(originalQuery.analyze) + val optimized2 = LikeSimplification(originalQuery.analyze) + val correctAnswer = testRelation + .where(EndsWith('a, "xyz") || ('a === "%xyz")).analyze + val correctAnswer2 = testRelation + .where(EndsWith('a, "xyz") || ('a like "\\%xyz")).analyze + + comparePlans(optimized, correctAnswer) + comparePlans(optimized2, correctAnswer2) + } + + test("simplify Like into startsWith and EndsWith") { + val originalQuery = testRelation.where(('a like "abc\\%def") || + ('a like "abc%def") || ('a like "abc\\%%def")) + + // no optimization in LikeEscapeSimplification yet for this + val optimized = LikeEscapeSimplification(originalQuery.analyze) + val optimized2 = LikeSimplification(originalQuery.analyze) + val correctAnswer = testRelation + .where(('a === "abc%def") || ('a like "abc%def") || ('a like "abc\\%%def")).analyze + val correctAnswer2 = testRelation + .where(('a like "abc\\%def") || + (Length('a) >= 6 && (StartsWith('a, "abc") && EndsWith('a, "def"))) || + ('a like "abc\\%%def")).analyze + + comparePlans(optimized, correctAnswer) + comparePlans(optimized2, correctAnswer2) + } + + test("simplify Like into Contains") { + val originalQuery = testRelation + .where(('a like "%mn%") || ('a like "%mn\\%") || + ('a like "%\\%mn%") || ('a like "%mn\\%%") || + ('a like "%\\%mn\\%%") || ('a like "%\\_mn\\_%") || + ('a like "%%mn\\%%") || ('a like "%\\%_mn\\_%")) + + val optimized = LikeEscapeSimplification(originalQuery.analyze) + val optimized2 = LikeSimplification(originalQuery.analyze) + val correctAnswer = testRelation + .where(Contains('a, "mn") || EndsWith('a, "mn%") || + Contains('a, "%mn") || Contains('a, "mn%") || + Contains('a, "%mn%") || Contains('a, "_mn_") || + ('a like "%%mn\\%%") || ('a like "%\\%_mn\\_%")).analyze + val correctAnswer2 = testRelation + .where(Contains('a, "mn") || ('a like "%mn\\%") || + ('a like "%\\%mn%") || ('a like "%mn\\%%") || + ('a like "%\\%mn\\%%") || ('a like "%\\_mn\\_%") || + ('a like "%%mn\\%%") || ('a like "%\\%_mn\\_%")).analyze + + comparePlans(optimized, correctAnswer) + comparePlans(optimized2, correctAnswer2) + } + + test("simplify Like into EqualTo") { + val originalQuery = testRelation.where(('a like "") || ('a like "abc") || + ('a like "a\\%b\\_c") || ('a like "\\%abc\\_") || ('a like "\\%abc_")) + + val optimized = LikeEscapeSimplification(originalQuery.analyze) + val optimized2 = LikeSimplification(originalQuery.analyze) + val correctAnswer = testRelation + .where(('a === "") || ('a === "abc") || ('a === "a%b_c") || + ('a === "%abc_") || ('a like "\\%abc_")).analyze + val correctAnswer2 = testRelation + .where(('a === "") || ('a === "abc") || ('a like "a\\%b\\_c") || + ('a like "\\%abc\\_") || ('a like "\\%abc_")).analyze + + comparePlans(optimized, correctAnswer) + comparePlans(optimized2, correctAnswer2) + } +} diff --git a/cluster/src/test/scala/org/apache/spark/sql/MiscTest.scala b/cluster/src/test/scala/org/apache/spark/sql/MiscTest.scala new file mode 100644 index 0000000000..84ff19dfaa --- /dev/null +++ b/cluster/src/test/scala/org/apache/spark/sql/MiscTest.scala @@ -0,0 +1,128 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ +package org.apache.spark.sql + +import scala.util.control.NonFatal +import io.snappydata.SnappyFunSuite +import org.apache.spark.Logging +import org.apache.spark.scheduler._ + +/** + * Tests that don't fall under any other category + */ +class MiscTest extends SnappyFunSuite with Logging { + + test("With Clause") { + snc.sql("drop table if exists nulls_table") + snc.sql(s"create table table1 (ol_1_int_id integer," + + s" ol_1_int2_id integer, ol_1_str_id STRING) using column " + + "options( partition_by 'ol_1_int2_id', buckets '2')") + + snc.sql("WITH temp_table AS ( SELECT ol_1_int2_id as col1," + + " sum(ol_1_int_id) AS col2 FROM table1 GROUP BY ol_1_int2_id)" + + " SELECT ol_1_int2_id FROM temp_table ," + + " table1 WHERE ol_1_int2_id = col1 LIMIT 100 ").collect() + } + + test("Pool test") { + // create a dummy pool + val rootPool = new Pool("lowlatency", SchedulingMode.FAIR, 0, 0) + sc.taskScheduler.rootPool.addSchedulable(rootPool) + + try { + snc.sql("set snappydata.scheduler.pool=xyz") + fail("unknown spark scheduler cannot be set") + } catch { + case _: IllegalArgumentException => // do nothing + case NonFatal(e) => + fail("setting unknown spark scheduler with a different error", e) + } + + snc.sql("set snappydata.scheduler.pool=lowlatency") + snc.sql("select 1").count + assert(sc.getLocalProperty("spark.scheduler.pool") === "lowlatency") + } + + test("SNAP-2434") { + val sqlstrs = Seq(s"select app.test.* from app.test", + s"select test.* from test", s"select * from test") + sqlstrs.foreach(sqlstr => + try { + snc.sql(sqlstr) + fail(s"this should have given TableNotFoundException") + } catch { + case tnfe: TableNotFoundException => + case ae: AnalysisException => if (!ae.getMessage().contains("Table or view not found")) { + throw ae + } + case t: Throwable => fail(s"unexpected exception $t") + } + ) + } + + test("SNAP-2438") { + try { + snc.sql(s"create table good(dept string, sal int) using column options()") + snc.sql(s"create table test.good(dept string, sal int) using column options()") + snc.sql(s"insert into test.good values('IT', 10000), ('HR', 9000), ('ADMIN', 4000)") + var arr = snc.sql(s"select * from good").collect() + assert(arr.size === 0) + snc.sql(s"set schema test") + arr = snc.sql(s"select * from good").collect() + assert(arr.size === 3) + } finally { + snc.sql(s"set schema app") + } + } + + test("SNAP-2440") { + snc.sql("create table test(col1 int not null, col2 int not null) using column") + snc.sql("create table emp.test1(col1 int not null, col2 int not null) using column") + snc.sql("insert into test values (1, 2), (4, 5), (6, 7)") + snc.sql("insert into emp.test1 values (1, 2), (4, 5), (6, 7)") + val sz = snc.sql(s"select * from app.test").collect().length + val sqlstrs = Seq("select app.test.* from app.test", + "select app.test.col1, app.test.col2 from app.test", + "select col1, col2 from app.test", + "select * from app.test", + "select test.* from test", + "select emp.test1.* from emp.test1", + "select emp.test1.col1, emp.test1.col2 from emp.test1", + "select col1, col2 from emp.test1", + "select * from emp.test1", + "select test1.* from emp.test1") + sqlstrs.foreach(sqlstr => { + val res = snc.sql(sqlstr).collect() + assert(res.length === 3) + assert(res(0).get(0) != res(0).get(1)) + }) + + val badsqls = Seq("select apppp.test.* from app.test", + "select app.test.col99, app.test.col2 from app.test", + "select testt.* from app.test", + "select apppp.test.* from emp.test1", + "select emp.test1.col99, emp.test1.col2 from emp.test1", + "select testt.* from emp.test1") + badsqls.foreach(sqlstr => + try { + snc.sql(sqlstr) + fail(s"expected analysis exception for $sqlstr") + } catch { + case ae: AnalysisException => // expected ... ignore + }) + } +} diff --git a/cluster/src/test/scala/org/apache/spark/sql/NWQueries.scala b/cluster/src/test/scala/org/apache/spark/sql/NWQueries.scala new file mode 100644 index 0000000000..52290215d2 --- /dev/null +++ b/cluster/src/test/scala/org/apache/spark/sql/NWQueries.scala @@ -0,0 +1,1161 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ +package org.apache.spark.sql + +import io.snappydata.SnappyFunSuite + +import org.apache.spark.sql.execution._ + +object NWQueries extends SnappyFunSuite { + + val Q1: String = "SELECT CategoryID,CategoryName,Description FROM Categories" + + val Q2: String = "SELECT * FROM Customers" + + val Q3: String = "SELECT * FROM Orders" + + // SELECTing Specific Columns + val Q4: String = "SELECT FirstName, LastName FROM Employees" + + // Sorting By Multiple Columns + val Q5: String = "SELECT FirstName, LastName" + + " FROM Employees" + + " ORDER BY LastName" + + // Sorting By Column Position + val Q6: String = "SELECT Title, FirstName, LastName" + + " FROM Employees" + + " ORDER BY 1,3" + + // Ascending and Descending Sorts + val Q7: String = "SELECT Title, FirstName, LastName" + + " FROM Employees " + + " ORDER BY Title ASC, LastName DESC" + + // Checking for Equality + val Q8: String = "SELECT Title, FirstName, LastName" + + " FROM Employees " + + " WHERE Title = 'Sales Representative'" + + // Checking for Inequality + val Q9: String = "SELECT FirstName, LastName" + + " FROM Employees" + + " WHERE Title <> 'Sales Representative'" + + // Checking for Greater or Less Than + val Q10: String = "SELECT FirstName, LastName" + + " FROM Employees " + + " WHERE LastName >= 'N'" + + // Checking for NULL + val Q11: String = "SELECT FirstName, LastName" + + " FROM Employees " + + " WHERE Region IS NULL" + + // WHERE and ORDER BY + val Q12: String = "SELECT FirstName, LastName" + + " FROM Employees" + + " WHERE LastName >= 'N'" + + " ORDER BY LastName DESC" + + // Using the WHERE clause to check for equality or inequality + val Q13: String = "SELECT OrderDate, ShippedDate, CustomerID, Freight" + + " FROM Orders " + + " WHERE OrderDate = '1997-05-19'" + + // Using WHERE and ORDER BY Together + val Q14: String = "SELECT CompanyName, ContactName, Fax" + + " FROM Customers" + + " WHERE Fax IS NOT NULL" + + " ORDER BY CompanyName" + + // The IN Operator + val Q15: String = "SELECT TitleOfCourtesy, FirstName, LastName" + + " FROM Employees" + + " WHERE TitleOfCourtesy IN ('Ms.','Mrs.')" + + // The LIKE Operator + val Q16: String = "SELECT TitleOfCourtesy, FirstName, LastName" + + " FROM Employees" + + " WHERE TitleOfCourtesy LIKE 'M%'" + + val Q17: String = "SELECT FirstName, LastName, BirthDate" + + " FROM Employees" + + " WHERE BirthDate BETWEEN '1950-01-01' AND '1959-12-31 23:59:59'" + + val Q18: String = "SELECT CONCAT(FirstName, ' ', LastName)" + + " FROM Employees" + + val Q19: String = "SELECT OrderID, Freight, Freight * 1.1 AS FreightTotal" + + " FROM Orders" + + " WHERE Freight >= 500" + + val Q20: String = "SELECT SUM(Quantity) AS TotalUnits" + + " FROM Order_Details" + + " WHERE ProductID=3" + + val Q21: String = "SELECT MIN(HireDate) AS FirstHireDate," + + " MAX(HireDate) AS LastHireDate" + + " FROM Employees" + + val Q22: String = "SELECT City, COUNT(EmployeeID) AS NumEmployees" + + " FROM Employees " + + " WHERE Title = 'Sales Representative'" + + " GROUP BY City" + + " HAVING COUNT(EmployeeID) > 1" + + " ORDER BY NumEmployees" + + val Q23: String = "SELECT COUNT(DISTINCT City) AS NumCities" + + " FROM Employees" + + val Q24: String = "SELECT ProductID, AVG(UnitPrice) AS AveragePrice" + + " FROM Products " + + " GROUP BY ProductID " + + " HAVING AVG(UnitPrice) > 70" + + " ORDER BY AveragePrice" + + val Q25: String = "SELECT CompanyName FROM Customers WHERE CustomerID = " + + "(SELECT CustomerID FROM Orders WHERE OrderID = 10290)" + + val Q25_1: String = "SELECT CompanyName FROM Customers WHERE CustomerID = " + + "(SELECT CustomerID FROM Orders WHERE OrderID = 10295)" + + val Q25_2: String = "SELECT CompanyName FROM Customers WHERE CustomerID = " + + "(SELECT CustomerID FROM Orders WHERE OrderID = 10391)" + + val Q26: String = "SELECT CompanyName FROM Customers WHERE CustomerID IN (SELECT CustomerID " + + "FROM Orders WHERE OrderDate BETWEEN '1997-01-01' AND '1997-12-31')" + + val Q26_1: String = "SELECT CompanyName FROM Customers WHERE CustomerID IN (SELECT CustomerID " + + "FROM Orders WHERE OrderDate BETWEEN Cast('1997-09-30' as TIMESTAMP) AND " + + "Cast('1997-12-24' as TIMESTAMP))" + + val Q26_2: String = "SELECT CompanyName FROM Customers WHERE CustomerID IN (SELECT CustomerID " + + "FROM Orders WHERE OrderDate BETWEEN Cast('1997-10-01' as TIMESTAMP) AND " + + "Cast('1997-12-31' as TIMESTAMP))" + + val Q27: String = "SELECT ProductName, SupplierID FROM Products WHERE SupplierID" + + " IN (SELECT SupplierID FROM Suppliers WHERE CompanyName IN" + + "('Exotic Liquids', 'Grandma Kellys Homestead', 'Tokyo Traders'))" + + val Q27_1: String = "SELECT ProductName, SupplierID FROM Products WHERE SupplierID" + + " IN (SELECT SupplierID FROM Suppliers WHERE CompanyName IN" + + "('Pavlova Ltd.'))" + + val Q27_2: String = "SELECT ProductName, SupplierID FROM Products WHERE SupplierID" + + " IN (SELECT SupplierID FROM Suppliers WHERE CompanyName IN" + + "('Pavlova Ltd.', 'Karkki Oy'))" + + val Q27_3: String = "SELECT ProductName, SupplierID FROM Products WHERE SupplierID" + + " IN (SELECT SupplierID FROM Suppliers WHERE CompanyName IN" + + "('Grandma Kellys Homestead'))" + + val Q27_4: String = "SELECT ProductName, SupplierID FROM Products WHERE SupplierID" + + " IN (SELECT SupplierID FROM Suppliers WHERE CompanyName IN" + + "('Exotic Liquids', 'Karkki Oy'))" + + val Q28: String = "SELECT ProductName FROM Products WHERE CategoryID = (SELECT " + + "CategoryID FROM Categories WHERE CategoryName = 'Seafood')" + + val Q28_1: String = "SELECT ProductName FROM Products WHERE CategoryID = (SELECT " + + "CategoryID FROM Categories WHERE CategoryName = 'Condiments')" + + val Q28_2: String = "SELECT ProductName FROM Products WHERE CategoryID = (SELECT " + + "CategoryID FROM Categories WHERE CategoryName = 'Produce')" + + val Q29: String = "SELECT CompanyName FROM Suppliers WHERE SupplierID IN " + + "(SELECT SupplierID FROM Products WHERE CategoryID = 8)" + + val Q29_1: String = "SELECT CompanyName FROM Suppliers WHERE SupplierID IN " + + "(SELECT SupplierID FROM Products WHERE CategoryID = 5)" + + val Q29_2: String = "SELECT CompanyName FROM Suppliers WHERE SupplierID IN " + + "(SELECT SupplierID FROM Products WHERE CategoryID = 3)" + + val Q30: String = "SELECT CompanyName FROM Suppliers WHERE SupplierID IN (SELECT SupplierID" + + " FROM Products WHERE CategoryID = (SELECT CategoryID FROM Categories" + + " WHERE CategoryName = 'Seafood'))" + + val Q30_1: String = "SELECT CompanyName FROM Suppliers WHERE SupplierID IN (SELECT SupplierID" + + " FROM Products WHERE CategoryID = (SELECT CategoryID FROM Categories" + + " WHERE CategoryName = 'Condiments'))" + + val Q30_2: String = "SELECT CompanyName FROM Suppliers WHERE SupplierID IN (SELECT SupplierID" + + " FROM Products WHERE CategoryID = (SELECT CategoryID FROM Categories" + + " WHERE CategoryName = 'Confections'))" + + val Q31: String = "SELECT Employees.EmployeeID, Employees.FirstName," + + " Employees.LastName, Orders.OrderID, Orders.OrderDate" + + " FROM Employees JOIN Orders ON" + + " (Employees.EmployeeID = Orders.EmployeeID)" + + " ORDER BY Orders.OrderDate" + + val Q31_1: String = "SELECT Employees.EmployeeID, Employees.FirstName," + + " Employees.LastName, Orders.OrderID, Orders.OrderDate" + + " FROM Employees JOIN Orders ON" + + " (Employees.EmployeeID = Orders.EmployeeID)" + + " where Orders.EmployeeID < 5" + + " ORDER BY Orders.OrderDate" + + val Q31_2: String = "SELECT Employees.EmployeeID, Employees.FirstName," + + " Employees.LastName, Orders.OrderID, Orders.OrderDate" + + " FROM Employees JOIN Orders ON" + + " (Employees.EmployeeID = Orders.EmployeeID)" + + " where Orders.EmployeeID > 5" + + " ORDER BY Orders.OrderDate" + + val Q31_3: String = "SELECT Employees.EmployeeID, Employees.FirstName," + + " Employees.LastName, Orders.OrderID, Orders.OrderDate" + + " FROM Employees JOIN Orders ON" + + " (Employees.EmployeeID = Orders.EmployeeID)" + + " where Orders.EmployeeID < 3" + + " ORDER BY Orders.OrderDate" + + val Q31_4: String = "SELECT Employees.EmployeeID, Employees.FirstName," + + " Employees.LastName, Orders.OrderID, Orders.OrderDate" + + " FROM Employees JOIN Orders ON" + + " (Employees.EmployeeID = Orders.EmployeeID)" + + " where Orders.EmployeeID > 3" + + " ORDER BY Orders.OrderDate" + + val Q32: String = "SELECT o.OrderID, c.CompanyName, e.FirstName, e.LastName" + + " FROM Orders o" + + " JOIN Employees e ON (e.EmployeeID = o.EmployeeID)" + + " JOIN Customers c ON (c.CustomerID = o.CustomerID)" + + " WHERE o.ShippedDate > o.RequiredDate AND o.OrderDate > '1998-01-01'" + + " ORDER BY c.CompanyName" + + val Q32_1: String = "SELECT o.OrderID, c.CompanyName, e.FirstName, e.LastName" + + " FROM Orders o" + + " JOIN Employees e ON (e.EmployeeID = o.EmployeeID)" + + " JOIN Customers c ON (c.CustomerID = o.CustomerID)" + + " WHERE o.ShippedDate < o.RequiredDate AND o.OrderDate > Cast('1997-12-01' as TIMESTAMP)" + + " ORDER BY c.CompanyName" + + val Q33: String = "SELECT e.FirstName, e.LastName, o.OrderID" + + " FROM Employees e JOIN Orders o ON" + + " (e.EmployeeID = o.EmployeeID)" + + " WHERE o.RequiredDate < o.ShippedDate" + + " ORDER BY e.LastName, e.FirstName" + + val Q33_1: String = "SELECT e.FirstName, e.LastName, o.OrderID" + + " FROM Employees e JOIN Orders o ON" + + " (e.EmployeeID = o.EmployeeID)" + + " WHERE o.RequiredDate > o.ShippedDate" + + " ORDER BY e.LastName, e.FirstName" + + val Q34: String = "SELECT p.ProductName, SUM(od.Quantity) AS TotalUnits" + + " FROM Order_Details od JOIN Products p ON" + + " (p.ProductID = od.ProductID)" + + " GROUP BY p.ProductName" + + " HAVING SUM(Quantity) < 200" + + val Q34_1: String = "SELECT p.ProductName, SUM(od.Quantity) AS TotalUnits" + + " FROM Order_Details od JOIN Products p ON" + + " (p.ProductID = od.ProductID)" + + " GROUP BY p.ProductName" + + " HAVING SUM(Quantity) >10 and SUM(Quantity) <100" + + val Q34_2: String = "SELECT p.ProductName, SUM(od.Quantity) AS TotalUnits" + + " FROM Order_Details od JOIN Products p ON" + + " (p.ProductID = od.ProductID)" + + " GROUP BY p.ProductName" + + " HAVING SUM(Quantity) >100 and SUM(Quantity) <200" + + val Q35: String = "SELECT COUNT(DISTINCT e.EmployeeID) AS numEmployees," + + " COUNT(DISTINCT c.CustomerID) AS numCompanies," + + " e.City as employeeCity, c.City as customerCity" + + " FROM Employees e JOIN Customers c ON" + + " (e.City = c.City)" + + " GROUP BY e.City, c.City " + + " ORDER BY numEmployees DESC" + + val Q35_1: String = "SELECT COUNT(DISTINCT e.EmployeeID) AS numEmployees," + + " COUNT(DISTINCT c.CustomerID) AS numCompanies," + + " e.City as employeeCity, c.City as customerCity" + + " FROM Employees e JOIN Customers c ON" + + " (e.City = c.City)" + + " where e.EmployeeID > 5 " + + " GROUP BY e.City, c.City " + + " ORDER BY numEmployees DESC" + + val Q35_2: String = "SELECT COUNT(DISTINCT e.EmployeeID) AS numEmployees," + + " COUNT(DISTINCT c.CustomerID) AS numCompanies," + + " e.City as employeeCity, c.City as customerCity" + + " FROM Employees e JOIN Customers c ON" + + " (e.City = c.City)" + + " where e.EmployeeID > 1 " + + " GROUP BY e.City, c.City " + + " ORDER BY numEmployees DESC" + + val Q36: String = "select distinct (a.ShippedDate) as ShippedDate," + + " a.OrderID," + + " b.Subtotal," + + " year(a.ShippedDate) as Year" + + " from Orders a" + + " inner join" + + " (" + + " select distinct OrderID," + + " sum(UnitPrice * Quantity * (1 - Discount)) as Subtotal" + + " from order_details" + + " group by OrderID" + + " ) b on a.OrderID = b.OrderID" + + " where a.ShippedDate is not null" + + " and a.ShippedDate > '1996-12-24' and a.ShippedDate < '1997-09-30'" + + " order by ShippedDate" + + val Q36_1: String = "select distinct (a.ShippedDate) as ShippedDate," + + " a.OrderID," + + " b.Subtotal," + + " year(a.ShippedDate) as Year" + + " from Orders a" + + " inner join" + + " (" + + " select distinct OrderID," + + " sum(UnitPrice * Quantity * (1 - Discount)) as Subtotal" + + " from order_details" + + " group by OrderID" + + " ) b on a.OrderID = b.OrderID" + + " where a.ShippedDate is not null" + + " and a.ShippedDate > Cast('1997-02-24' as TIMESTAMP) and " + + " a.ShippedDate < Cast('1997-09-30' as TIMESTAMP)" + + " order by ShippedDate" + + val Q36_2: String = "select distinct (a.ShippedDate) as ShippedDate," + + " a.OrderID," + + " b.Subtotal," + + " year(a.ShippedDate) as Year" + + " from Orders a" + + " inner join" + + " (" + + " select distinct OrderID," + + " sum(UnitPrice * Quantity * (2 - Discount)) as Subtotal" + + " from order_details" + + " group by OrderID" + + " ) b on a.OrderID = b.OrderID" + + " where a.ShippedDate is not null" + + " and a.ShippedDate > Cast('1996-02-24' as TIMESTAMP) and " + + " a.ShippedDate < Cast('1996-09-30' as TIMESTAMP)" + + " order by ShippedDate" + + val Q37: String = "select distinct a.CategoryID," + + " a.CategoryName," + + " b.ProductName," + + " sum(c.ExtendedPrice) as ProductSales" + + " from Categories a " + + " inner join Products b on a.CategoryID = b.CategoryID" + + " inner join" + + " ( select distinct y.OrderID," + + " y.ProductID," + + " x.ProductName," + + " y.UnitPrice," + + " y.Quantity," + + " y.Discount," + + " round(y.UnitPrice * y.Quantity * (1 - y.Discount), 2) as ExtendedPrice" + + " from Products x" + + " inner join Order_Details y on x.ProductID = y.ProductID" + + " order by y.OrderID" + + " ) c on c.ProductID = b.ProductID" + + " inner join Orders d on d.OrderID = c.OrderID" + + " where d.OrderDate > '1997-01-01' and d.OrderDate < '1997-12-31'" + + " group by a.CategoryID, a.CategoryName, b.ProductName" + + " order by a.CategoryName, b.ProductName, ProductSales" + + /* + org.apache.spark.sql.AnalysisException: The correlated scalar subquery can only contain + equality predicates: (UNITPRICE#976#1042 >= UNITPRICE#976); + val Q38: String = "select distinct ProductName as Ten_Most_Expensive_Products," + + " UnitPrice" + + " from Products as a" + + " where 10 >= (select count(distinct UnitPrice)" + + " from Products as b" + + " where b.UnitPrice == a.UnitPrice)" + + "order by UnitPrice desc" + */ + + // A simple query to get detailed information for each sale so that invoice can be issued. + val Q38: String = "select distinct b.ShipName," + + " b.ShipAddress," + + " b.ShipCity," + + " b.ShipRegion," + + " b.ShipPostalCode," + + " b.ShipCountry," + + " b.CustomerID," + + " c.CompanyName as custCompanyName," + + " c.Address," + + " c.City," + + " c.Region," + + " c.PostalCode," + + " c.Country, " + + " concat(d.FirstName, ' ', d.LastName) as Salesperson," + + " b.OrderID," + + " b.OrderDate," + + " b.RequiredDate," + + " b.ShippedDate," + + " a.CompanyName as shippersCompanyName," + + " e.ProductID," + + " f.ProductName," + + " e.UnitPrice," + + " e.Quantity," + + " e.Discount," + + " e.UnitPrice * e.Quantity * (1 - e.Discount) as ExtendedPrice," + + " b.Freight" + + " from Shippers a " + + " inner join Orders b on a.ShipperID = b.ShipVia" + + " inner join Customers c on c.CustomerID = b.CustomerID" + + " inner join Employees d on d.EmployeeID = b.EmployeeID" + + " inner join Order_Details e on b.OrderID = e.OrderID" + + " inner join Products f on f.ProductID = e.ProductID" + + " order by b.ShipName" + + val Q38_1: String = "select distinct b.ShipName," + + " b.ShipAddress," + + " b.ShipCity," + + " b.ShipRegion," + + " b.ShipPostalCode," + + " b.ShipCountry," + + " b.CustomerID," + + " c.CompanyName as custCompanyName," + + " c.Address," + + " c.City," + + " c.Region," + + " c.PostalCode," + + " c.Country, " + + " concat(d.FirstName, ' ', d.LastName) as Salesperson," + + " b.OrderID," + + " b.OrderDate," + + " b.RequiredDate," + + " b.ShippedDate," + + " a.CompanyName as shippersCompanyName," + + " e.ProductID," + + " f.ProductName," + + " e.UnitPrice," + + " e.Quantity," + + " e.Discount," + + " e.UnitPrice * e.Quantity * (1 - e.Discount) as ExtendedPrice," + + " b.Freight" + + " from Shippers a " + + " inner join Orders b on a.ShipperID = b.ShipVia" + + " inner join Customers c on c.CustomerID = b.CustomerID" + + " inner join Employees d on d.EmployeeID = b.EmployeeID" + + " inner join Order_Details e on b.OrderID = e.OrderID" + + " inner join Products f on f.ProductID = e.ProductID" + + " where b.ShippedDate > Cast('1996-07-10' as TIMESTAMP)" + + " order by b.ShipName" + + val Q38_2: String = "select distinct b.ShipName," + + " b.ShipAddress," + + " b.ShipCity," + + " b.ShipRegion," + + " b.ShipPostalCode," + + " b.ShipCountry," + + " b.CustomerID," + + " c.CompanyName as custCompanyName," + + " c.Address," + + " c.City," + + " c.Region," + + " c.PostalCode," + + " c.Country, " + + " concat(d.FirstName, ' ', d.LastName) as Salesperson," + + " b.OrderID," + + " b.OrderDate," + + " b.RequiredDate," + + " b.ShippedDate," + + " a.CompanyName as shippersCompanyName," + + " e.ProductID," + + " f.ProductName," + + " e.UnitPrice," + + " e.Quantity," + + " e.Discount," + + " e.UnitPrice * e.Quantity * (1 - e.Discount) as ExtendedPrice," + + " b.Freight" + + " from Shippers a " + + " inner join Orders b on a.ShipperID = b.ShipVia" + + " inner join Customers c on c.CustomerID = b.CustomerID" + + " inner join Employees d on d.EmployeeID = b.EmployeeID" + + " inner join Order_Details e on b.OrderID = e.OrderID" + + " inner join Products f on f.ProductID = e.ProductID" + + " where b.ShippedDate > Cast('1996-07-29' as TIMESTAMP)" + + " order by b.ShipName" + + val Q39: String = "select s.supplierid,s.companyname,p.productid,p.productname " + + "from suppliers s join products p on(s.supplierid= p.supplierid) and" + + " s.companyname IN('Grandma Kellys Homestead','Tokyo Traders','Exotic Liquids')" + + val Q40: String = "SELECT c.customerID, o.orderID FROM customers c INNER JOIN orders o " + + "ON c.CustomerID = o.CustomerID" + + val Q40_1: String = "SELECT c.customerID, o.orderID FROM customers c INNER JOIN orders o " + + "ON c.CustomerID = o.CustomerID where c.CustomerID='LINOD'" + + val Q40_2: String = "SELECT c.customerID, o.orderID FROM customers c INNER JOIN orders o " + + "ON c.CustomerID = o.CustomerID where c.CustomerID='SEVES'" + + val Q41: String = "SELECT order_details.OrderID,ShipCountry,UnitPrice,Quantity,Discount" + + " FROM orders INNER JOIN Order_Details ON Orders.OrderID = Order_Details.OrderID" + + val Q42: String = "SELECT ShipCountry," + + " Sum(Order_Details.UnitPrice * Quantity * Discount)" + + " AS ProductSales FROM Orders INNER JOIN Order_Details ON" + + " Orders.OrderID = Order_Details.OrderID GROUP BY ShipCountry" + + val Q42_1: String = "SELECT ShipCountry," + + " Sum(Order_Details.UnitPrice * Quantity * Discount)" + + " AS ProductSales FROM Orders INNER JOIN Order_Details ON" + + " Orders.OrderID = Order_Details.OrderID where orders.OrderID > 11000 GROUP BY ShipCountry" + + val Q42_2: String = "SELECT ShipCountry," + + " Sum(Order_Details.UnitPrice * Quantity * Discount)" + + " AS ProductSales FROM Orders INNER JOIN Order_Details ON" + + " Orders.OrderID = Order_Details.OrderID where orders.OrderID > 11070 GROUP BY ShipCountry" + + val Q43: String = "SELECT * FROM orders LEFT SEMI JOIN order_details " + + "ON orders.OrderID = order_details.OrderId" + + val Q43_1: String = "SELECT * FROM orders LEFT SEMI JOIN order_details " + + "ON orders.OrderID = order_details.OrderId where orders.OrderID > 11067" + + val Q43_2: String = "SELECT * FROM orders LEFT SEMI JOIN order_details " + + "ON orders.OrderID = order_details.OrderId where orders.OrderID > 11075" + + val Q44: String = "SELECT * FROM orders LEFT SEMI JOIN order_details" + + val Q45: String = "SELECT orders.OrderID as OOID, CustomerID,EmployeeID,OrderDate,RequiredDate," + + "ShippedDate,ShipVia,Freight,ShipName,ShipAddress,ShipCity,ShipRegion,ShipPostalCode," + + "ShipCountry FROM orders JOIN order_details" + val Q46: String = "SELECT orders.OrderID as OOID, CustomerID,EmployeeID,OrderDate,RequiredDate," + + "ShippedDate,ShipVia,Freight,ShipName,ShipAddress,ShipCity,ShipRegion,ShipPostalCode," + + "ShipCountry FROM orders LEFT JOIN order_details" + val Q47: String = "SELECT orders.OrderID as OOID, CustomerID,EmployeeID,OrderDate,RequiredDate," + + "ShippedDate,ShipVia,Freight,ShipName,ShipAddress,ShipCity,ShipRegion,ShipPostalCode," + + "ShipCountry FROM orders RIGHT JOIN order_details" + val Q48: String = "SELECT orders.OrderID as OOID, CustomerID,EmployeeID,OrderDate,RequiredDate," + + "ShippedDate,ShipVia,Freight,ShipName,ShipAddress,ShipCity,ShipRegion,ShipPostalCode," + + "ShipCountry FROM orders FULL OUTER JOIN order_details" + val Q49: String = "SELECT orders.OrderID as OOID, CustomerID,EmployeeID,OrderDate,RequiredDate," + + "ShippedDate,ShipVia,Freight,ShipName,ShipAddress,ShipCity,ShipRegion,ShipPostalCode," + + "ShipCountry FROM orders FULL JOIN order_details" + val Q49_1: String = "SELECT orders.OrderID as OOID, CustomerID,EmployeeID,OrderDate," + + "RequiredDate," + + " ShippedDate,ShipVia,Freight,ShipName,ShipAddress,ShipCity,ShipRegion,ShipPostalCode, " + + "ShipCountry FROM orders FULL JOIN order_details" + + " where orders.ShippedDate > Cast('1996-07-29' as TIMESTAMP)" + + val Q49_2: String = "SELECT orders.OrderID as OOID, CustomerID,EmployeeID,OrderDate," + + "RequiredDate," + + " ShippedDate,ShipVia,Freight,ShipName,ShipAddress,ShipCity,ShipRegion,ShipPostalCode, " + + "ShipCountry FROM orders FULL JOIN order_details" + + " where orders.ShippedDate > Cast('1996-07-10' as TIMESTAMP)" + + val Q50: String = "SELECT orders.OrderID as OOID, CustomerID,EmployeeID,OrderDate,RequiredDate," + + "ShippedDate,ShipVia,Freight,ShipName,ShipAddress,ShipCity,ShipRegion,ShipPostalCode," + + "ShipCountry FROM orders JOIN order_details" + + " ON Orders.OrderID = Order_Details.OrderID" + + val Q51: String = "SELECT orders.OrderID as OOID, CustomerID,EmployeeID,OrderDate,RequiredDate," + + "ShippedDate,ShipVia,Freight,ShipName,ShipAddress,ShipCity,ShipRegion,ShipPostalCode," + + "ShipCountry FROM orders LEFT JOIN order_details" + + " ON Orders.OrderID = Order_Details.OrderID" + val Q51_1: String = "SELECT orders.OrderID as OOID, CustomerID,EmployeeID,OrderDate," + + "RequiredDate," + + " ShippedDate,ShipVia,Freight,ShipName,ShipAddress,ShipCity,ShipRegion,ShipPostalCode, " + + "ShipCountry FROM orders LEFT JOIN order_details" + + " ON Orders.OrderID = Order_Details.OrderID " + + " where orders.ShippedDate > Cast('1996-07-10' as TIMESTAMP)" + val Q51_2: String = "SELECT orders.OrderID as OOID, CustomerID,EmployeeID,OrderDate," + + "RequiredDate," + + " ShippedDate,ShipVia,Freight,ShipName,ShipAddress,ShipCity,ShipRegion,ShipPostalCode, " + + "ShipCountry FROM orders LEFT JOIN order_details" + + " ON Orders.OrderID = Order_Details.OrderID " + + " where orders.ShippedDate > Cast('1996-07-29' as TIMESTAMP)" + val Q52: String = "SELECT orders.OrderID as OOID, CustomerID,EmployeeID,OrderDate,RequiredDate," + + "ShippedDate,ShipVia,Freight,ShipName,ShipAddress,ShipCity,ShipRegion,ShipPostalCode," + + "ShipCountry FROM orders RIGHT JOIN order_details" + + " ON Orders.OrderID = Order_Details.OrderID" + val Q53: String = "SELECT orders.OrderID as OOID, CustomerID,EmployeeID,OrderDate,RequiredDate," + + "ShippedDate,ShipVia,Freight,ShipName,ShipAddress,ShipCity,ShipRegion,ShipPostalCode," + + "ShipCountry FROM orders FULL OUTER JOIN order_details" + + " ON Orders.OrderID = Order_Details.OrderID" + val Q54: String = "SELECT orders.OrderID as OOID, CustomerID,EmployeeID,OrderDate,RequiredDate," + + "ShippedDate,ShipVia,Freight,ShipName,ShipAddress,ShipCity,ShipRegion,ShipPostalCode," + + "ShipCountry FROM orders FULL JOIN order_details" + + " ON Orders.OrderID = Order_Details.OrderID" + + // Number of units in stock by category and supplier continent + val Q55: String = "select c.CategoryName as Product_Category," + + " case when s.Country in" + + " ('UK','Spain','Sweden','Germany','Norway'," + + " 'Denmark','Netherlands','Finland','Italy','France')" + + " then 'Europe'" + + " when s.Country in ('USA','Canada','Brazil')" + + " then 'America'" + + " else 'Asia-Pacific'" + + " end as Supplier_Continent," + + " sum(p.UnitsInStock) as UnitsInStock" + + " from Suppliers s " + + " inner join Products p on p.SupplierID=s.SupplierID" + + " inner join Categories c on c.CategoryID=p.CategoryID" + + " group by c.CategoryName," + + " case when s.Country in" + + " ('UK','Spain','Sweden','Germany','Norway'," + + " 'Denmark','Netherlands','Finland','Italy','France')" + + " then 'Europe'" + + " when s.Country in ('USA','Canada','Brazil')" + + " then 'America'" + + " else 'Asia-Pacific'" + + " end" + + val Q55_1: String = "select c.CategoryName as Product_Category," + + " case when s.Country in" + + " ('UK','Spain','Sweden','Germany','Norway'," + + " 'Denmark','Netherlands','Finland','Italy','France')" + + " then 'Europe'" + + " when s.Country in ('USA','Canada','Brazil')" + + " then 'America'" + + " else 'Asia-Pacific'" + + " end as Supplier_Continent," + + " sum(p.UnitsInStock) as UnitsInStock" + + " from Suppliers s " + + " inner join Products p on p.SupplierID=s.SupplierID" + + " inner join Categories c on c.CategoryID=p.CategoryID" + + " where s.Country IN ('USA','UK')" + + " group by c.CategoryName," + + " case when s.Country in" + + " ('UK','Spain','Sweden','Germany','Norway'," + + " 'Denmark','Netherlands','Finland','Italy','France')" + + " then 'Europe'" + + " when s.Country in ('USA','Canada','Brazil')" + + " then 'America'" + + " else 'Asia-Pacific'" + + " end" + + val Q55_2: String = "select c.CategoryName as Product_Category," + + " case when s.Country in" + + " ('UK','Spain','Sweden','Germany','Norway'," + + " 'Denmark','Netherlands','Finland','Italy','France')" + + " then 'Europe'" + + " when s.Country in ('USA','Canada','Brazil')" + + " then 'America'" + + " else 'Asia-Pacific'" + + " end as Supplier_Continent," + + " sum(p.UnitsInStock) as UnitsInStock" + + " from Suppliers s " + + " inner join Products p on p.SupplierID=s.SupplierID" + + " inner join Categories c on c.CategoryID=p.CategoryID" + + " where s.Country IN ('Canada','France')" + + " group by c.CategoryName," + + " case when s.Country in" + + " ('UK','Spain','Sweden','Germany','Norway'," + + " 'Denmark','Netherlands','Finland','Italy','France')" + + " then 'Europe'" + + " when s.Country in ('USA','Canada','Brazil')" + + " then 'America'" + + " else 'Asia-Pacific'" + + " end" + + // This query shows sales figures by categories - mainly just aggregation with sub-query. + // The inner query aggregates to product level, and the outer query further aggregates + // the result set from inner-query to category level. + val Q56: String = "select CategoryName, format_number(sum(ProductSales), 2) as CategorySales" + + " from" + + " (" + + " select distinct a.CategoryName," + + " b.ProductName," + + " format_number(sum(c.UnitPrice * c.Quantity * (1 - c.Discount)), 2) as ProductSales," + + " concat('Qtr ', quarter(d.ShippedDate)) as ShippedQuarter" + + " from Categories as a" + + " inner join Products as b on a.CategoryID = b.CategoryID" + + " inner join Order_Details as c on b.ProductID = c.ProductID" + + " inner join Orders as d on d.OrderID = c.OrderID" + + " where d.ShippedDate > '1997-01-01' and d.ShippedDate < '1997-12-31'" + + " group by a.CategoryName," + + " b.ProductName," + + " concat('Qtr ', quarter(d.ShippedDate))" + + " order by a.CategoryName," + + " b.ProductName," + + " ShippedQuarter" + + " ) as x" + + " group by CategoryName" + + " order by CategoryName" + + val Q56_1: String = "select CategoryName, format_number(sum(ProductSales), 2) as CategorySales" + + " from" + + " (" + + " select distinct a.CategoryName," + + " b.ProductName," + + " format_number(sum(c.UnitPrice * c.Quantity * (1 - c.Discount)), 2) as ProductSales," + + " concat('Qtr ', quarter(d.ShippedDate)) as ShippedQuarter" + + " from Categories as a" + + " inner join Products as b on a.CategoryID = b.CategoryID" + + " inner join Order_Details as c on b.ProductID = c.ProductID" + + " inner join Orders as d on d.OrderID = c.OrderID" + + " where d.ShippedDate < Cast('1997-12-01' as TIMESTAMP) and " + + "d.ShippedDate > Cast('1996-07-10' as TIMESTAMP)" + + " group by a.CategoryName," + + " b.ProductName," + + " concat('Qtr ', quarter(d.ShippedDate))" + + " order by a.CategoryName," + + " b.ProductName," + + " ShippedQuarter" + + " ) as x" + + " group by CategoryName" + + " order by CategoryName" + + val Q56_2: String = "select CategoryName, format_number(sum(ProductSales), 2) as CategorySales" + + " from" + + " (" + + " select distinct a.CategoryName," + + " b.ProductName," + + " format_number(sum(c.UnitPrice * c.Quantity * (1 - c.Discount)), 2) as ProductSales," + + " concat('Qtr ', quarter(d.ShippedDate)) as ShippedQuarter" + + " from Categories as a" + + " inner join Products as b on a.CategoryID = b.CategoryID" + + " inner join Order_Details as c on b.ProductID = c.ProductID" + + " inner join Orders as d on d.OrderID = c.OrderID" + + " where d.ShippedDate < Cast('1998-01-01' as TIMESTAMP) and " + + "d.ShippedDate > Cast('1996-07-29' as TIMESTAMP)" + + " group by a.CategoryName," + + " b.ProductName," + + " concat('Qtr ', quarter(d.ShippedDate))" + + " order by a.CategoryName," + + " b.ProductName," + + " ShippedQuarter" + + " ) as x" + + " group by CategoryName" + + " order by CategoryName" + + val Q56_3: String = "select CategoryName, format_number(sum(ProductSales), 2) as CategorySales" + + " from" + + " (" + + " select distinct a.CategoryName," + + " b.ProductName," + + " format_number(sum(c.UnitPrice * c.Quantity * (1 - c.Discount)), 2) as ProductSales," + + " concat('Qtr ', quarter(d.ShippedDate)) as ShippedQuarter" + + " from Categories as a" + + " inner join Products as b on a.CategoryID = b.CategoryID" + + " inner join Order_Details as c on b.ProductID = c.ProductID" + + " inner join Orders as d on d.OrderID = c.OrderID" + + " where d.ShippedDate < Cast('1998-12-01' as TIMESTAMP) and " + + "d.ShippedDate > Cast('1996-07-10' as TIMESTAMP)" + + " group by a.CategoryName," + + " b.ProductName," + + " concat('Qtr ', quarter(d.ShippedDate))" + + " order by a.CategoryName," + + " b.ProductName," + + " ShippedQuarter" + + " ) as x" + + " group by CategoryName" + + " order by CategoryName" + + val queries = List( + "Q1" -> Q1, + "Q2" -> Q2, + "Q3" -> Q3, + "Q4" -> Q4, + "Q5" -> Q5, + "Q6" -> Q6, + "Q7" -> Q7, + "Q8" -> Q8, + "Q9" -> Q9, + "Q10" -> Q10, + "Q11" -> Q11, + "Q12" -> Q12, + "Q13" -> Q13, + "Q14" -> Q14, + "Q15" -> Q15, + "Q16" -> Q16, + "Q17" -> Q17, + "Q18" -> Q18, + "Q19" -> Q19, + "Q20" -> Q20, + "Q21" -> Q21, + "Q22" -> Q22, + "Q23" -> Q23, + "Q24" -> Q24, + "Q25" -> Q25, + "Q25_1" -> Q25_1, + "Q25_2" -> Q25_2, + "Q26" -> Q26, + "Q26_1" -> Q26_1, + "Q26_2" -> Q26_2, + "Q27" -> Q27, + "Q27_1" -> Q27_1, + "Q27_2" -> Q27_2, + "Q27_3" -> Q27_3, + "Q27_4" -> Q27_4, + "Q28" -> Q28, + "Q28_1" -> Q28_1, + "Q28_2" -> Q28_2, + "Q29" -> Q29, + "Q29_1" -> Q29_1, + "Q29_2" -> Q29_2, + "Q30" -> Q30, + "Q30_1" -> Q30_1, + "Q30_2" -> Q30_2, + "Q31" -> Q31, + "Q31_1" -> Q31_1, + "Q31_2" -> Q31_2, + "Q31_3" -> Q31_3, + "Q31_4" -> Q31_4, + "Q32" -> Q32, + "Q32_1" -> Q32_1, + "Q33" -> Q33, + "Q33_1" -> Q33_1, + "Q34" -> Q34, + "Q34_1" -> Q34_1, + "Q34_2" -> Q34_2, + "Q35" -> Q35, + "Q35_1" -> Q35_1, + "Q35_2" -> Q35_2, + "Q36" -> Q36, + "Q36_1" -> Q36_1, + "Q36_2" -> Q36_2, + "Q37" -> Q37, + "Q38" -> Q38, + "Q38_1" -> Q38_1, + "Q38_2" -> Q38_2, + "Q39" -> Q39, + "Q40" -> Q40, + "Q40_1" -> Q40_1, + "Q40_2" -> Q40_2, + "Q41" -> Q41, + "Q42" -> Q42, + "Q42_1" -> Q42_1, + "Q42_2" -> Q42_2, + "Q43" -> Q43, + "Q43_1" -> Q43_1, + "Q43_2" -> Q43_2, + "Q44" -> Q44, + "Q45" -> Q45, + "Q46" -> Q46, + "Q47" -> Q47, + "Q48" -> Q48, + "Q49" -> Q49, + "Q49_1" -> Q49_1, + "Q49_2" -> Q49_2, + "Q50" -> Q50, + "Q51" -> Q51, + "Q51_1" -> Q51_1, + "Q51_2" -> Q51_2, + "Q52" -> Q52, + "Q53" -> Q53, + "Q54" -> Q54, + "Q55" -> Q55, + "Q55_1" -> Q55_1, + "Q55_2" -> Q55_2, + "Q56" -> Q56, + "Q56_1" -> Q56_1, + "Q56_2" -> Q56_2, + "Q56_3" -> Q56_3 + ) + + def regions(sqlContext: SQLContext): DataFrame = + sqlContext.read.format("com.databricks.spark.csv") + .option("header", "true") + .option("inferSchema", "true") + .option("maxCharsPerColumn", "4096") + .option("nullValue", "NULL") + .csv((getClass.getResource("/northwind/regions.csv").getPath)) + + val regions_table = "create table regions (" + + "RegionID int, " + + "RegionDescription string)" + + def categories(sqlContext: SQLContext): DataFrame = + sqlContext.read.format("com.databricks.spark.csv") + .option("header", "true") + .option("inferSchema", "true") + .option("maxCharsPerColumn", "4096") + .option("nullValue", "NULL") + .csv((getClass.getResource("/northwind/categories.csv").getPath)) + + val categories_table = "create table categories (" + + "CategoryID int, " + + "CategoryName string, " + + "Description string, " + + "Picture blob)" + + def shippers(sqlContext: SQLContext): DataFrame = + sqlContext.read.format("com.databricks.spark.csv") + .option("header", "true") + .option("inferSchema", "true") + .option("maxCharsPerColumn", "4096") + .option("nullValue", "NULL") + .csv((getClass.getResource("/northwind/shippers.csv").getPath)) + + val shippers_table = "create table shippers (" + + "ShipperID int not null, " + + "CompanyName string not null, " + + "Phone string)" + + def employees(sqlContext: SQLContext): DataFrame = + sqlContext.read.format("com.databricks.spark.csv") + .option("header", "true") + .option("inferSchema", "true") + .option("maxCharsPerColumn", "4096") + .option("nullValue", "NULL") + .csv((getClass.getResource("/northwind/employees.csv").getPath)) + + val employees_table = "create table employees(" + + "EmployeeID int, " + + "LastName string, " + + "FirstName string, " + + "Title string, " + + "TitleOfCourtesy string, " + + "BirthDate timestamp, " + + "HireDate timestamp, " + + "Address string, " + + "City string, " + + "Region string, " + + "PostalCode string, " + + "Country string, " + + "HomePhone string, " + + "Extension string, " + + "Photo blob, " + + "Notes string, " + + "ReportsTo int, " + + "PhotoPath string)" + + def customers(sqlContext: SQLContext): DataFrame = + sqlContext.read.format("com.databricks.spark.csv") + .option("header", "true") + .option("inferSchema", "true") + .option("maxCharsPerColumn", "4096") + .option("nullValue", "NULL") + .csv((getClass.getResource("/northwind/customers.csv").getPath)) + + val customers_table = "create table customers(" + + "CustomerID string, " + + "CompanyName string, " + + "ContactName string, " + + "ContactTitle string, " + + "Address string, " + + "City string, " + + "Region string, " + + "PostalCode string, " + + "Country string, " + + "Phone string, " + + "Fax string)" + + def orders(sqlContext: SQLContext): DataFrame = + sqlContext.read.format("com.databricks.spark.csv") + .option("header", "true") + .option("inferSchema", "true") + .option("maxCharsPerColumn", "4096") + .option("nullValue", "NULL") + .csv((getClass.getResource("/northwind/orders.csv").getPath)) + + val orders_table = "create table orders (" + + "OrderID int, " + + "CustomerID string, " + + "EmployeeID int, " + + "OrderDate timestamp, " + + "RequiredDate timestamp, " + + "ShippedDate timestamp, " + + "ShipVia int, " + + "Freight double, " + + "ShipName string, " + + "ShipAddress string, " + + "ShipCity string, " + + "ShipRegion string, " + + "ShipPostalCode string, " + + "ShipCountry string)" + + def order_details(sqlContext: SQLContext): DataFrame = + sqlContext.read.format("com.databricks.spark.csv") + .option("header", "true") + .option("inferSchema", "true") + .option("nullValue", "NULL") + .option("maxCharsPerColumn", "4096") + .csv((getClass.getResource("/northwind/order-details.csv").getPath)) + + val order_details_table = "create table order_details (" + + "OrderID int, " + + "ProductID int, " + + "UnitPrice double, " + + "Quantity smallint, " + + "Discount double)" + + def products(sqlContext: SQLContext): DataFrame = + sqlContext.read.format("com.databricks.spark.csv") + .option("header", "true") + .option("inferSchema", "true") + .option("maxCharsPerColumn", "4096") + .option("nullValue", "NULL") + .csv((getClass.getResource("/northwind/products.csv").getPath)) + + val products_table = "create table products(" + + // "ProductID int not null, " + + "ProductID int, " + + "ProductName string, " + + "SupplierID int, " + + "CategoryID int," + + "QuantityPerUnit string, " + + "UnitPrice double, " + + "UnitsInStock smallint, " + + "UnitsOnOrder smallint," + + "ReorderLevel smallint, " + + "Discontinued smallint) " + + def suppliers(sqlContext: SQLContext): DataFrame = + sqlContext.read.format("com.databricks.spark.csv") + .option("header", "true") + .option("inferSchema", "true") + .option("maxCharsPerColumn", "4096") + .option("nullValue", "NULL") + .csv((getClass.getResource("/northwind/suppliers.csv").getPath)) + + val suppliers_table = "create table suppliers(" + + "SupplierID int, " + + "CompanyName string, " + + "ContactName string, " + + "ContactTitle string, " + + "Address string, " + + "City string, " + + "Region string, " + + "PostalCode string, " + + "Country string, " + + "Phone string, " + + "Fax string, " + + "HomePage string) " + + def territories(sqlContext: SQLContext): DataFrame = + sqlContext.read.format("com.databricks.spark.csv") + .option("header", "true") + .option("inferSchema", "true") + .option("maxCharsPerColumn", "4096") + .option("nullValue", "NULL") + .csv((getClass.getResource("/northwind/territories.csv").getPath)) + + val territories_table = "create table territories(" + + "TerritoryID string, " + + "TerritoryDescription string, " + + "RegionID string)" + + def employee_territories(sqlContext: SQLContext): DataFrame = + sqlContext.read.format("com.databricks.spark.csv") + .option("header", "true") + .option("inferSchema", "true") + .option("maxCharsPerColumn", "4096") + .option("nullValue", "NULL") + .csv((getClass.getResource("/northwind/employee-territories.csv").getPath)) + + val employee_territories_table = "create table employee_territories(" + + "EmployeeID int, " + + "TerritoryID string)" + + def dropTables(snc: SnappyContext): Unit = { + snc.sql("drop table if exists regions") + snc.sql("drop table if exists categories") + snc.sql("drop table if exists products") + snc.sql("drop table if exists order_details") + snc.sql("drop table if exists orders") + snc.sql("drop table if exists customers") + snc.sql("drop table if exists employees") + snc.sql("drop table if exists employee_territories") + snc.sql("drop table if exists shippers") + snc.sql("drop table if exists suppliers") + snc.sql("drop table if exists territories") + } + + /** + * Enable this flag for local testing in case a change causes multiple cases + * of mismatches to be introduced. + */ + private val WARN_FOR_PARTITION_MISMATCH = false + + def assertJoin(snc: SnappyContext, sqlString: String, queryNum: String, numRows: Int, + numPartitions: Int, c: Class[_]): Any = { + snc.sql("set spark.sql.crossJoin.enabled = true") + val df = snc.sql(sqlString) + val count = df.count() + assert(count == numRows, + "Mismatch got df.count -> " + count + " but expected numRows -> " + + numRows + " for queryNum = " + queryNum) + val expectedPartitions = (numPartitions - 4) to (numPartitions + 4) + if (!expectedPartitions.contains(df.rdd.partitions.length)) { + logWarning("Mismatch got df.rdd.partitions.length -> " + df.rdd.partitions.length + + " but expected numPartitions -> " + numPartitions + + " for queryNum = " + queryNum) + } + } + + private def assertQueryCommon(df: DataFrame, sqlString: String, + queryNum: String, numRows: Int, c: Class[_]): Any = { + val physical = df.queryExecution.sparkPlan + val operators = physical.collect { + case j: ProjectExec => j + case j: PartitionedDataSourceScan => j + case j: PartitionedPhysicalScan => j + case j: LocalTableScanExec => j + case j: CoalesceExec => j + case j: FilterExec => j + case j: OutputFakerExec => j + case j: RangeExec => j + case j: SampleExec => j + case j: SubqueryExec => j + case j: UnionExec => j + } + if (operators.head.getClass != c) { + throw new IllegalStateException(s"$sqlString expected operator: $c," + + s" but got ${operators.head}\n physical: \n$physical") + } + val count = df.count() + assert(count == numRows, + "Mismatch got df.count -> " + count + " but expected numRows -> " + + numRows + " for queryNum = " + queryNum) + } + + def assertQuery(snc: SnappyContext, sqlString: String, queryNum: String, + numRows: Int, numPartitions: Int, c: Class[_]): Any = { + val df = snc.sql(sqlString) + assertQueryCommon(df, sqlString, queryNum, numRows, c) + + if (WARN_FOR_PARTITION_MISMATCH) { + if (df.rdd.partitions.length != numPartitions) { + logWarning("Mismatch got df.rdd.partitions.length -> " + df.rdd.partitions.length + + " but expected numPartitions -> " + numPartitions + + " for queryNum = " + queryNum) + } + } else { + assert(df.rdd.partitions.length == numPartitions, + "Mismatch got df.rdd.partitions.length -> " + df.rdd.partitions.length + + " but expected numPartitions -> " + numPartitions + + " for queryNum = " + queryNum) + } + } + + def assertQuery(snc: SnappyContext, sqlString: String, queryNum: String, + numRows: Int, numPartitions: Array[Int], c: Class[_]): Any = { + val df = snc.sql(sqlString) + assertQueryCommon(df, sqlString, queryNum, numRows, c) + + val rddNumPartitions = df.rdd.partitions.length + if (WARN_FOR_PARTITION_MISMATCH) { + if (!numPartitions.contains(rddNumPartitions)) { + logWarning("Mismatch got df.rdd.partitions.length -> " + rddNumPartitions + + " but expected one of numPartitions -> " + numPartitions.toSeq + + " for queryNum=" + queryNum) + } + } else { + assert(numPartitions.contains(rddNumPartitions), + "Mismatch got df.rdd.partitions.length -> " + rddNumPartitions + + " but expected one of numPartitions -> " + numPartitions.toSeq + + " for queryNum=" + queryNum) + } + } +} diff --git a/cluster/src/test/scala/org/apache/spark/sql/NorthWindTest.scala b/cluster/src/test/scala/org/apache/spark/sql/NorthWindTest.scala new file mode 100644 index 0000000000..69ad197a5a --- /dev/null +++ b/cluster/src/test/scala/org/apache/spark/sql/NorthWindTest.scala @@ -0,0 +1,620 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ +package org.apache.spark.sql + +import io.snappydata.Property.PlanCaching +import io.snappydata.SnappyFunSuite +import org.scalatest.{BeforeAndAfter, BeforeAndAfterAll} + +import org.apache.spark.Logging +import org.apache.spark.sql.execution._ +import org.apache.spark.sql.execution.columnar.ColumnTableScan +import org.apache.spark.sql.execution.joins._ +import org.apache.spark.sql.execution.row.RowTableScan + +class NorthWindTest + extends SnappyFunSuite + with Logging + with BeforeAndAfter + with BeforeAndAfterAll { + + after { + NWQueries.dropTables(snc) + } + + test("Test replicated row tables queries") { + createAndLoadReplicatedTables(snc) + validateReplicatedTableQueries(snc) + + // test SNAP-1152 + val df = snc.sql("SELECT COUNT(DISTINCT e.EmployeeID) AS numEmployees," + + " COUNT(DISTINCT c.CustomerID) AS numCompanies, e.City, c.City" + + " FROM Employees e LEFT JOIN Customers c ON (e.City = c.City)" + + " GROUP BY e.City, c.City ORDER BY numEmployees DESC;") + df.count() + } + + test("Test partitioned row tables queries") { + createAndLoadPartitionedTables(snc) + validatePartitionedRowTableQueries(snc) + } + + test("Test column tables queries") { + createAndLoadColumnTables(snc) + validatePartitionedColumnTableQueries(snc) + } + + // enable if transformations are supported in plan-caching. + test("SNAP-2451"){ + val planCaching = PlanCaching.get(snc.sessionState.conf) + PlanCaching.set(snc.sessionState.conf, false) + try { + createAndLoadColumnTables(snc) + + val df1 = snc.sql("SELECT ShipCountry, Sum(Order_Details.UnitPrice * Quantity * Discount)" + + " AS ProductSales FROM Orders INNER JOIN Order_Details ON" + + " Orders.OrderID = Order_Details.OrderID" + + " where orders.OrderID > 11000 GROUP BY ShipCountry") + + val result1 = df1.repartition(1).collect() + assert(result1.length == 22) + + + val df2 = snc.sql("SELECT ShipCountry, Sum(Order_Details.UnitPrice * Quantity * Discount)" + + " AS ProductSales FROM Orders INNER JOIN Order_Details ON" + + " Orders.OrderID = Order_Details.OrderID" + + " where orders.OrderID > 11070 GROUP BY ShipCountry") + + val result2 = df2.repartition(1).collect() + assert(result2.length == 7) + } finally { + PlanCaching.set(snc.sessionState.conf, planCaching) + } + + } + + test("Test colocated tables queries") { + createAndLoadColocatedTables(snc) + validateColocatedTableQueries(snc) + } + + def createAndLoadReplicatedTables(snc: SnappyContext): Unit = { + snc.sql(NWQueries.regions_table) + NWQueries.regions(snc).write.insertInto("regions") + + snc.sql(NWQueries.categories_table) + NWQueries.categories(snc).write.insertInto("categories") + + snc.sql(NWQueries.shippers_table) + NWQueries.shippers(snc).write.insertInto("shippers") + + snc.sql(NWQueries.employees_table) + NWQueries.employees(snc).write.insertInto("employees") + + snc.sql(NWQueries.customers_table) + NWQueries.customers(snc).write.insertInto("customers") + + snc.sql(NWQueries.orders_table) + NWQueries.orders(snc).write.insertInto("orders") + + snc.sql(NWQueries.order_details_table) + NWQueries.order_details(snc).write.insertInto("order_details") + + snc.sql(NWQueries.products_table) + NWQueries.products(snc).write.insertInto("products") + + snc.sql(NWQueries.suppliers_table) + NWQueries.suppliers(snc).write.insertInto("suppliers") + + snc.sql(NWQueries.territories_table) + NWQueries.territories(snc).write.insertInto("territories") + + snc.sql(NWQueries.employee_territories_table) + NWQueries.employee_territories(snc).write.insertInto("employee_territories") + } + + private def validateReplicatedTableQueries(snc: SnappyContext): Unit = { + // TODO fix the for scala test as well + for (q <- NWQueries.queries.filter(q => !q._1.contains("_"))) { + q._1 match { + case "Q1" => NWQueries.assertQuery(snc, NWQueries.Q1, "Q1", 8, 1, classOf[RowTableScan]) + case "Q2" => NWQueries.assertQuery(snc, NWQueries.Q2, "Q2", 91, 1, classOf[RowTableScan]) + case "Q3" => NWQueries.assertQuery(snc, NWQueries.Q3, "Q3", 830, 1, classOf[RowTableScan]) + case "Q4" => NWQueries.assertQuery(snc, NWQueries.Q4, "Q4", 9, 1, classOf[RowTableScan]) + case "Q5" => NWQueries.assertQuery(snc, NWQueries.Q5, "Q5", 9, 1, classOf[RowTableScan]) + case "Q6" => NWQueries.assertQuery(snc, NWQueries.Q6, "Q6", 9, 1, classOf[RowTableScan]) + case "Q7" => NWQueries.assertQuery(snc, NWQueries.Q7, "Q7", 9, 1, classOf[RowTableScan]) + case "Q8" => NWQueries.assertQuery(snc, NWQueries.Q8, "Q8", 6, 1, classOf[FilterExec]) + case "Q9" => NWQueries.assertQuery(snc, NWQueries.Q9, "Q9", 3, 1, classOf[ProjectExec]) + case "Q10" => NWQueries.assertQuery(snc, NWQueries.Q10, "Q10", 2, 1, classOf[FilterExec]) + case "Q11" => NWQueries.assertQuery(snc, NWQueries.Q11, "Q11", 4, 1 , classOf[ProjectExec]) + case "Q12" => NWQueries.assertQuery(snc, NWQueries.Q12, "Q12", 2, 1 , classOf[FilterExec]) + case "Q13" => NWQueries.assertQuery(snc, NWQueries.Q13, "Q13", 2, 1, classOf[FilterExec]) + case "Q14" => NWQueries.assertQuery(snc, NWQueries.Q14, "Q14", 69, 1 , classOf[FilterExec]) + case "Q15" => NWQueries.assertQuery(snc, NWQueries.Q15, "Q15", 5, 1 , classOf[FilterExec]) + case "Q16" => NWQueries.assertQuery(snc, NWQueries.Q16, "Q16", 8, 1 , classOf[FilterExec]) + case "Q17" => NWQueries.assertQuery(snc, NWQueries.Q17, "Q17", 3, 1 , classOf[FilterExec]) + case "Q18" => NWQueries.assertQuery(snc, NWQueries.Q18, "Q18", 9, 1, classOf[ProjectExec]) + case "Q19" => NWQueries.assertQuery(snc, NWQueries.Q19, "Q19", 13, 1, classOf[ProjectExec]) + case "Q20" => NWQueries.assertQuery(snc, NWQueries.Q20, "Q20", 1, 1, classOf[ProjectExec]) + case "Q21" => NWQueries.assertQuery(snc, NWQueries.Q21, "Q21", 1, 1, classOf[RowTableScan]) + case "Q22" => NWQueries.assertQuery(snc, NWQueries.Q22, "Q22", 1, 1, classOf[ProjectExec]) + case "Q23" => NWQueries.assertQuery(snc, NWQueries.Q23, "Q23", 1, 1, classOf[RowTableScan]) + case "Q24" => NWQueries.assertQuery(snc, NWQueries.Q24, "Q24", 4, 1, classOf[ProjectExec]) + case "Q25" => NWQueries.assertJoin(snc, NWQueries.Q25, "Q25", 1, 1, classOf[RowTableScan]) + case "Q26" => NWQueries.assertJoin(snc, NWQueries.Q26, "Q26", 86, 1, + classOf[SortMergeJoinExec]) + case "Q27" => NWQueries.assertJoin(snc, NWQueries.Q27, "Q27", 9, 1, + classOf[SortMergeJoinExec]) + case "Q28" => NWQueries.assertJoin(snc, NWQueries.Q28, "Q28", 12, 1, classOf[RowTableScan]) + case "Q29" => NWQueries.assertJoin(snc, NWQueries.Q29, "Q29", 8, 1, + classOf[SortMergeJoinExec]) + case "Q30" => NWQueries.assertJoin(snc, NWQueries.Q30, "Q30", 8, 1, + classOf[SortMergeJoinExec]) + case "Q31" => NWQueries.assertJoin(snc, NWQueries.Q31, "Q31", 830, 1, classOf[HashJoinExec]) + case "Q32" => NWQueries.assertJoin(snc, NWQueries.Q32, "Q32", 8, 1, classOf[HashJoinExec]) + case "Q33" => NWQueries.assertJoin(snc, NWQueries.Q33, "Q33", 37, 1, classOf[HashJoinExec]) + case "Q34" => NWQueries.assertJoin(snc, NWQueries.Q34, "Q34", 5, 1, classOf[HashJoinExec]) + case "Q35" => NWQueries.assertJoin(snc, NWQueries.Q35, "Q35", 3, 4, classOf[HashJoinExec]) + case "Q36" => NWQueries.assertJoin(snc, NWQueries.Q36, "Q36", 290, 1, classOf[HashJoinExec]) + case "Q37" => NWQueries.assertJoin(snc, NWQueries.Q37, "Q37", 77, 4, classOf[HashJoinExec]) + case "Q38" => NWQueries.assertJoin(snc, NWQueries.Q38, "Q38", 2155, 1, + classOf[HashJoinExec]) + case "Q39" => NWQueries.assertJoin(snc, NWQueries.Q39, "Q39", 9, 1, classOf[HashJoinExec]) + case "Q40" => NWQueries.assertJoin(snc, NWQueries.Q40, "Q40", 830, 1, classOf[HashJoinExec]) + case "Q41" => NWQueries.assertJoin(snc, NWQueries.Q41, "Q41", 2155, 1, + classOf[HashJoinExec]) + case "Q42" => NWQueries.assertJoin(snc, NWQueries.Q42, "Q42", 22, 1, classOf[HashJoinExec]) + case "Q43" => NWQueries.assertJoin(snc, NWQueries.Q43, "Q43", 830, 1, + classOf[SortMergeJoinExec]) + case "Q44" => NWQueries.assertJoin(snc, NWQueries.Q44, "Q44", 830, 1, + classOf[BroadcastNestedLoopJoinExec]) + case "Q45" => NWQueries.assertJoin(snc, NWQueries.Q45, "Q45", 1788650, 1, + classOf[CartesianProductExec]) + case "Q46" => NWQueries.assertJoin(snc, NWQueries.Q46, "Q46", 1788650, 1, + classOf[BroadcastNestedLoopJoinExec]) + case "Q47" => NWQueries.assertJoin(snc, NWQueries.Q47, "Q47", 1788650, 1, + classOf[BroadcastNestedLoopJoinExec]) + case "Q48" => NWQueries.assertJoin(snc, NWQueries.Q48, "Q48", 1788650, 5, + classOf[BroadcastNestedLoopJoinExec]) + case "Q49" => NWQueries.assertJoin(snc, NWQueries.Q49, "Q49", 1788650, 5, + classOf[BroadcastNestedLoopJoinExec]) + case "Q50" => NWQueries.assertJoin(snc, NWQueries.Q50, "Q50", 2155, 1, + classOf[HashJoinExec]) + case "Q51" => NWQueries.assertJoin(snc, NWQueries.Q51, "Q51", 2155, 1, + classOf[SortMergeJoinExec]) + case "Q52" => NWQueries.assertJoin(snc, NWQueries.Q52, "Q52", 2155, 1, + classOf[SortMergeJoinExec]) + case "Q53" => NWQueries.assertJoin(snc, NWQueries.Q53, "Q53", 2155, 1, + classOf[SortMergeJoinExec]) + case "Q54" => NWQueries.assertJoin(snc, NWQueries.Q54, "Q54", 2155, 1, + classOf[SortMergeJoinExec]) + case "Q55" => NWQueries.assertJoin(snc, NWQueries.Q55, "Q55", 21, 1, classOf[HashJoinExec]) + case "Q56" => NWQueries.assertJoin(snc, NWQueries.Q56, "Q56", 8, 1, classOf[HashJoinExec]) + } + } + } + + private def createAndLoadPartitionedTables(snc: SnappyContext): Unit = { + + snc.sql(NWQueries.regions_table) + NWQueries.regions(snc).write.insertInto("regions") + + snc.sql(NWQueries.categories_table) + NWQueries.categories(snc).write.insertInto("categories") + + snc.sql(NWQueries.shippers_table) + NWQueries.shippers(snc).write.insertInto("shippers") + + snc.sql(NWQueries.employees_table) + NWQueries.employees(snc).write.insertInto("employees") + + snc.sql(NWQueries.customers_table) + NWQueries.customers(snc).write.insertInto("customers") + + snc.sql(NWQueries.orders_table + " using row options (partition_by 'OrderId', buckets '16')") + NWQueries.orders(snc).write.insertInto("orders") + + snc.sql(NWQueries.order_details_table + + " using row options (partition_by 'OrderId', buckets '16', COLOCATE_WITH 'orders')") + NWQueries.order_details(snc).write.insertInto("order_details") + + snc.sql(NWQueries.products_table + + " using row options ( partition_by 'ProductID', buckets '32')") + NWQueries.products(snc).write.insertInto("products") + + snc.sql(NWQueries.suppliers_table + + " USING row options (PARTITION_BY 'SupplierID', buckets '8' )") + NWQueries.suppliers(snc).write.insertInto("suppliers") + + snc.sql(NWQueries.territories_table + + " using row options (partition_by 'TerritoryID', buckets '8')") + NWQueries.territories(snc).write.insertInto("territories") + + snc.sql(NWQueries.employee_territories_table + + " using row options(partition_by 'EmployeeID', buckets '4')") + NWQueries.employee_territories(snc).write.insertInto("employee_territories") + + } + + private def validatePartitionedRowTableQueries(snc: SnappyContext): Unit = { + // TODO fix the for scala test as well + for (q <- NWQueries.queries.filter(q => !q._1.contains("_"))) { + q._1 match { + case "Q1" => NWQueries.assertQuery(snc, NWQueries.Q1, "Q1", 8, 1, classOf[RowTableScan]) + case "Q2" => NWQueries.assertQuery(snc, NWQueries.Q2, "Q2", 91, 1, classOf[RowTableScan]) + case "Q3" => NWQueries.assertQuery(snc, NWQueries.Q3, "Q3", 830, 4, classOf[RowTableScan]) + case "Q4" => NWQueries.assertQuery(snc, NWQueries.Q4, "Q4", 9, 1, classOf[RowTableScan]) + case "Q5" => NWQueries.assertQuery(snc, NWQueries.Q5, "Q5", 9, 1, classOf[RowTableScan]) + case "Q6" => NWQueries.assertQuery(snc, NWQueries.Q6, "Q6", 9, 1, classOf[RowTableScan]) + case "Q7" => NWQueries.assertQuery(snc, NWQueries.Q7, "Q7", 9, 1, classOf[RowTableScan]) + case "Q8" => NWQueries.assertQuery(snc, NWQueries.Q8, "Q8", 6, 1, classOf[FilterExec]) + case "Q9" => NWQueries.assertQuery(snc, NWQueries.Q9, "Q9", 3, 1, classOf[ProjectExec]) + case "Q10" => NWQueries.assertQuery(snc, NWQueries.Q10, "Q10", 2, 1, classOf[FilterExec]) + case "Q11" => NWQueries.assertQuery(snc, NWQueries.Q11, "Q11", 4, 1 , classOf[ProjectExec]) + case "Q12" => NWQueries.assertQuery(snc, NWQueries.Q12, "Q12", 2, 1 , classOf[FilterExec]) + case "Q13" => NWQueries.assertQuery(snc, NWQueries.Q13, "Q13", 2, 4, classOf[FilterExec]) + case "Q14" => NWQueries.assertQuery(snc, NWQueries.Q14, "Q14", 69, 1 , classOf[FilterExec]) + case "Q15" => NWQueries.assertQuery(snc, NWQueries.Q15, "Q15", 5, 1 , classOf[FilterExec]) + case "Q16" => NWQueries.assertQuery(snc, NWQueries.Q16, "Q16", 8, 1 , classOf[FilterExec]) + case "Q17" => NWQueries.assertQuery(snc, NWQueries.Q17, "Q17", 3, 1 , classOf[FilterExec]) + case "Q18" => NWQueries.assertQuery(snc, NWQueries.Q18, "Q18", 9, 1, classOf[ProjectExec]) + case "Q19" => NWQueries.assertQuery(snc, NWQueries.Q19, "Q19", 13, 4, classOf[ProjectExec]) + case "Q20" => NWQueries.assertQuery(snc, NWQueries.Q20, "Q20", 1, 1, classOf[ProjectExec]) + case "Q21" => NWQueries.assertQuery(snc, NWQueries.Q21, "Q21", 1, 1, classOf[RowTableScan]) + case "Q22" => NWQueries.assertQuery(snc, NWQueries.Q22, "Q22", 1, 1, classOf[ProjectExec]) + case "Q23" => NWQueries.assertQuery(snc, NWQueries.Q23, "Q23", 1, 1, classOf[RowTableScan]) + case "Q24" => NWQueries.assertQuery(snc, NWQueries.Q24, "Q24", 4, 4, classOf[ProjectExec]) + case "Q25" => NWQueries.assertJoin(snc, NWQueries.Q25, "Q25", 1, 1, classOf[RowTableScan]) + case "Q26" => NWQueries.assertJoin(snc, NWQueries.Q26, "Q26", 86, 1, + classOf[BroadcastHashJoinExec]) + case "Q27" => NWQueries.assertJoin(snc, NWQueries.Q27, "Q27", 9, 32, + classOf[BroadcastHashJoinExec]) + case "Q28" => NWQueries.assertJoin(snc, NWQueries.Q28, "Q28", 12, 4, classOf[RowTableScan]) + case "Q29" => NWQueries.assertJoin(snc, NWQueries.Q29, "Q29", 8, 8, + classOf[BroadcastHashJoinExec]) + case "Q30" => NWQueries.assertJoin(snc, NWQueries.Q30, "Q30", 8, 8, + classOf[BroadcastHashJoinExec]) + case "Q31" => NWQueries.assertJoin(snc, NWQueries.Q31, "Q31", 830, 4, classOf[HashJoinExec]) + case "Q32" => NWQueries.assertJoin(snc, NWQueries.Q32, "Q32", 8, 4, classOf[HashJoinExec]) + case "Q33" => NWQueries.assertJoin(snc, NWQueries.Q33, "Q33", 37, 4, classOf[HashJoinExec]) + case "Q34" => NWQueries.assertJoin(snc, NWQueries.Q34, "Q34", 5, 32, + classOf[BroadcastHashJoinExec]) + case "Q35" => NWQueries.assertJoin(snc, NWQueries.Q35, "Q35", 3, 4, classOf[HashJoinExec]) + case "Q36" => NWQueries.assertJoin(snc, NWQueries.Q36, "Q36", 290, 16, + classOf[BroadcastHashJoinExec]) + case "Q37" => NWQueries.assertJoin(snc, NWQueries.Q37, "Q37", 77, 32, + classOf[BroadcastHashJoinExec]) + case "Q38" => NWQueries.assertJoin(snc, NWQueries.Q38, "Q38", 2155, 32, + classOf[SortMergeJoinExec]) + case "Q39" => NWQueries.assertJoin(snc, NWQueries.Q39, "Q39", 9, 32, + classOf[HashJoinExec]) + case "Q40" => NWQueries.assertJoin(snc, NWQueries.Q40, "Q40", 830, 4, classOf[HashJoinExec]) + case "Q41" => NWQueries.assertJoin(snc, NWQueries.Q41, "Q41", 2155, 13, + classOf[HashJoinExec]) + case "Q42" => NWQueries.assertJoin(snc, NWQueries.Q42, "Q42", 22, 16, + classOf[HashJoinExec]) + case "Q43" => NWQueries.assertJoin(snc, NWQueries.Q43, "Q43", 830, 13, + classOf[SortMergeJoinExec]) + case "Q44" => NWQueries.assertJoin(snc, NWQueries.Q44, "Q44", 830, 13, + classOf[BroadcastNestedLoopJoinExec]) + case "Q45" => NWQueries.assertJoin(snc, NWQueries.Q45, "Q45", 1788650, 13, + classOf[CartesianProductExec]) + case "Q46" => NWQueries.assertJoin(snc, NWQueries.Q46, "Q46", 1788650, 13, + classOf[BroadcastNestedLoopJoinExec]) + case "Q47" => NWQueries.assertJoin(snc, NWQueries.Q47, "Q47", 1788650, 13, + classOf[BroadcastNestedLoopJoinExec]) + case "Q48" => NWQueries.assertJoin(snc, NWQueries.Q48, "Q48", 1788650, 17, + classOf[BroadcastNestedLoopJoinExec]) + case "Q49" => NWQueries.assertJoin(snc, NWQueries.Q49, "Q49", 1788650, 17, + classOf[BroadcastNestedLoopJoinExec]) + case "Q50" => NWQueries.assertJoin(snc, NWQueries.Q50, "Q50", 2155, 13, + classOf[HashJoinExec]) + case "Q51" => NWQueries.assertJoin(snc, NWQueries.Q51, "Q51", 2155, 13, + classOf[SortMergeJoinExec]) + case "Q52" => NWQueries.assertJoin(snc, NWQueries.Q52, "Q52", 2155, 13, + classOf[SortMergeJoinExec]) + case "Q53" => NWQueries.assertJoin(snc, NWQueries.Q53, "Q53", 2155, 13, + classOf[SortMergeJoinExec]) + case "Q54" => NWQueries.assertJoin(snc, NWQueries.Q54, "Q54", 2155, 13, + classOf[SortMergeJoinExec]) + case "Q55" => NWQueries.assertJoin(snc, NWQueries.Q55, "Q55", 21, 32, + classOf[HashJoinExec]) + case "Q56" => NWQueries.assertJoin(snc, NWQueries.Q56, "Q56", 8, 9, classOf[HashJoinExec]) + } + } + } + + private def createAndLoadColumnTables(snc: SnappyContext): Unit = { + + snc.sql(NWQueries.regions_table) + NWQueries.regions(snc).write.insertInto("regions") + + snc.sql(NWQueries.categories_table) + NWQueries.categories(snc).write.insertInto("categories") + + snc.sql(NWQueries.shippers_table) + NWQueries.shippers(snc).write.insertInto("shippers") + + snc.sql(NWQueries.employees_table + " using column options()") + NWQueries.employees(snc).write.insertInto("employees") + + snc.sql(NWQueries.customers_table) + NWQueries.customers(snc).write.insertInto("customers") + + snc.sql(NWQueries.orders_table + " using column options (partition_by 'OrderId', buckets '16')") + NWQueries.orders(snc).write.insertInto("orders") + + snc.sql(NWQueries.order_details_table + + " using column options (partition_by 'OrderId', buckets '16', COLOCATE_WITH 'orders')") + NWQueries.order_details(snc).write.insertInto("order_details") + + snc.sql(NWQueries.products_table + + " USING column options ( partition_by 'ProductID,SupplierID', buckets '16')") + NWQueries.products(snc).write.insertInto("products") + + snc.sql(NWQueries.suppliers_table + + " USING column options (PARTITION_BY 'SupplierID', buckets '16' )") + NWQueries.suppliers(snc).write.insertInto("suppliers") + + snc.sql(NWQueries.territories_table + + " using column options (partition_by 'TerritoryID', buckets '8')") + NWQueries.territories(snc).write.insertInto("territories") + + snc.sql(NWQueries.employee_territories_table + + " using row options(partition_by 'EmployeeID', buckets '4')") + NWQueries.employee_territories(snc).write.insertInto("employee_territories") + } + + private def validatePartitionedColumnTableQueries(snc: SnappyContext): Unit = { + + // TODO fix the for scala test as well + for (q <- NWQueries.queries.filter(q => !q._1.contains("_"))) { + q._1 match { + case "Q1" => NWQueries.assertQuery(snc, NWQueries.Q1, "Q1", 8, 1, classOf[RowTableScan]) + case "Q2" => NWQueries.assertQuery(snc, NWQueries.Q2, "Q2", 91, 1, classOf[RowTableScan]) + case "Q3" => NWQueries.assertQuery(snc, NWQueries.Q3, "Q3", 830, 4, + classOf[ColumnTableScan]) + case "Q4" => NWQueries.assertQuery(snc, NWQueries.Q4, "Q4", 9, 4, classOf[ColumnTableScan]) + case "Q5" => NWQueries.assertQuery(snc, NWQueries.Q5, "Q5", 9, 4, classOf[ColumnTableScan]) + case "Q6" => NWQueries.assertQuery(snc, NWQueries.Q6, "Q6", 9, 4, classOf[ColumnTableScan]) + case "Q7" => NWQueries.assertQuery(snc, NWQueries.Q7, "Q7", 9, 4, classOf[ColumnTableScan]) + case "Q8" => NWQueries.assertQuery(snc, NWQueries.Q8, "Q8", 6, 4, classOf[FilterExec]) + case "Q9" => NWQueries.assertQuery(snc, NWQueries.Q9, "Q9", 3, 4, classOf[ProjectExec]) + case "Q10" => NWQueries.assertQuery(snc, NWQueries.Q10, "Q10", 2, 4, classOf[FilterExec]) + case "Q11" => NWQueries.assertQuery(snc, NWQueries.Q11, "Q11", 4, 4, classOf[ProjectExec]) + case "Q12" => NWQueries.assertQuery(snc, NWQueries.Q12, "Q12", 2, 3, classOf[FilterExec]) + case "Q13" => NWQueries.assertQuery(snc, NWQueries.Q13, "Q13", 2, 4, classOf[FilterExec]) + case "Q14" => NWQueries.assertQuery(snc, NWQueries.Q14, "Q14", 69, 1, classOf[FilterExec]) + case "Q15" => NWQueries.assertQuery(snc, NWQueries.Q15, "Q15", 5, 4, classOf[FilterExec]) + case "Q16" => NWQueries.assertQuery(snc, NWQueries.Q16, "Q16", 8, 4, classOf[FilterExec]) + case "Q17" => NWQueries.assertQuery(snc, NWQueries.Q17, "Q17", 3, 4, classOf[FilterExec]) + case "Q18" => NWQueries.assertQuery(snc, NWQueries.Q18, "Q18", 9, 4, classOf[ProjectExec]) + case "Q19" => NWQueries.assertQuery(snc, NWQueries.Q19, "Q19", 13, 4, classOf[ProjectExec]) + case "Q20" => NWQueries.assertQuery(snc, NWQueries.Q20, "Q20", 1, 1, classOf[ProjectExec]) + case "Q21" => NWQueries.assertQuery(snc, NWQueries.Q21, "Q21", 1, 1, + classOf[ColumnTableScan]) + case "Q22" => NWQueries.assertQuery(snc, NWQueries.Q22, "Q22", 1, 2, classOf[ProjectExec]) + case "Q23" => NWQueries.assertQuery(snc, NWQueries.Q23, "Q23", 1, 1, + classOf[ColumnTableScan]) + case "Q24" => NWQueries.assertQuery(snc, NWQueries.Q24, "Q24", 4, 4, classOf[ProjectExec]) + case "Q25" => NWQueries.assertJoin(snc, NWQueries.Q25, "Q25", 1, 1, classOf[RowTableScan]) + case "Q26" => NWQueries.assertJoin(snc, NWQueries.Q26, "Q26", 86, 4, + classOf[SortMergeJoinExec]) + case "Q27" => NWQueries.assertJoin(snc, NWQueries.Q27, "Q27", 9, 16, + classOf[BroadcastHashJoinExec]) + case "Q28" => NWQueries.assertJoin(snc, NWQueries.Q28, "Q28", 12, 4, + classOf[ColumnTableScan]) + case "Q29" => NWQueries.assertJoin(snc, NWQueries.Q29, "Q29", 8, 16, + classOf[SortMergeJoinExec]) + case "Q30" => NWQueries.assertJoin(snc, NWQueries.Q30, "Q30", 8, 16, + classOf[SortMergeJoinExec]) + case "Q31" => NWQueries.assertJoin(snc, NWQueries.Q31, "Q31", 830, 16, + classOf[HashJoinExec]) + case "Q32" => NWQueries.assertJoin(snc, NWQueries.Q32, "Q32", 8, 9, + classOf[HashJoinExec]) + case "Q33" => NWQueries.assertJoin(snc, NWQueries.Q33, "Q33", 37, 9, + classOf[HashJoinExec]) + case "Q34" => NWQueries.assertJoin(snc, NWQueries.Q34, "Q34", 5, 16, + classOf[HashJoinExec]) + case "Q35" => NWQueries.assertJoin(snc, NWQueries.Q35, "Q35", 3, 4, classOf[HashJoinExec]) + case "Q36" => NWQueries.assertJoin(snc, NWQueries.Q36, "Q36", 290, 16, + classOf[HashJoinExec]) + case "Q37" => NWQueries.assertJoin(snc, NWQueries.Q37, "Q37", 77, 16, + classOf[HashJoinExec]) + case "Q38" => NWQueries.assertJoin(snc, NWQueries.Q38, "Q38", 2155, 16, + classOf[HashJoinExec]) + case "Q39" => NWQueries.assertJoin(snc, NWQueries.Q39, "Q39", 9, 16, + classOf[HashJoinExec]) + case "Q40" => NWQueries.assertJoin(snc, NWQueries.Q40, "Q40", 830, 4, classOf[HashJoinExec]) + case "Q41" => NWQueries.assertJoin(snc, NWQueries.Q41, "Q41", 2155, 13, + classOf[HashJoinExec]) + case "Q42" => NWQueries.assertJoin(snc, NWQueries.Q42, "Q42", 22, 16, + classOf[HashJoinExec]) + case "Q43" => NWQueries.assertJoin(snc, NWQueries.Q43, "Q43", 830, 13, + classOf[SortMergeJoinExec]) + case "Q44" => NWQueries.assertJoin(snc, NWQueries.Q44, "Q44", 830, 13, + classOf[BroadcastNestedLoopJoinExec]) + case "Q45" => NWQueries.assertJoin(snc, NWQueries.Q45, "Q45", 1788650, 13, + classOf[CartesianProductExec]) + case "Q46" => NWQueries.assertJoin(snc, NWQueries.Q46, "Q46", 1788650, 13, + classOf[BroadcastNestedLoopJoinExec]) + case "Q47" => NWQueries.assertJoin(snc, NWQueries.Q47, "Q47", 1788650, 13, + classOf[BroadcastNestedLoopJoinExec]) + case "Q48" => NWQueries.assertJoin(snc, NWQueries.Q48, "Q48", 1788650, 17, + classOf[BroadcastNestedLoopJoinExec]) + case "Q49" => NWQueries.assertJoin(snc, NWQueries.Q49, "Q49", 1788650, 17, + classOf[BroadcastNestedLoopJoinExec]) + case "Q50" => NWQueries.assertJoin(snc, NWQueries.Q50, "Q50", 2155, 13, + classOf[HashJoinExec]) + case "Q51" => NWQueries.assertJoin(snc, NWQueries.Q51, "Q51", 2155, 13, + classOf[SortMergeJoinExec]) + case "Q52" => NWQueries.assertJoin(snc, NWQueries.Q52, "Q52", 2155, 13, + classOf[SortMergeJoinExec]) + case "Q53" => NWQueries.assertJoin(snc, NWQueries.Q53, "Q53", 2155, 13, + classOf[SortMergeJoinExec]) + case "Q54" => NWQueries.assertJoin(snc, NWQueries.Q54, "Q54", 2155, 13, + classOf[SortMergeJoinExec]) + case "Q55" => NWQueries.assertJoin(snc, NWQueries.Q55, "Q55", 21, 16, + classOf[HashJoinExec]) + case "Q56" => NWQueries.assertJoin(snc, NWQueries.Q56, "Q56", 8, 9, classOf[HashJoinExec]) + } + } + } + + private def createAndLoadColocatedTables(snc: SnappyContext): Unit = { + + snc.sql(NWQueries.regions_table) + NWQueries.regions(snc).write.insertInto("regions") + + snc.sql(NWQueries.categories_table) + NWQueries.categories(snc).write.insertInto("categories") + + snc.sql(NWQueries.shippers_table) + NWQueries.shippers(snc).write.insertInto("shippers") + + snc.sql(NWQueries.employees_table + + " using row options( partition_by 'EmployeeID', buckets '8')") + NWQueries.employees(snc).write.insertInto("employees") + + snc.sql(NWQueries.customers_table + + " using column options( partition_by 'CustomerID', buckets '16')") + NWQueries.customers(snc).write.insertInto("customers") + + snc.sql(NWQueries.orders_table + + " using row options (partition_by 'CustomerID', buckets '16', colocate_with 'customers')") + NWQueries.orders(snc).write.insertInto("orders") + + snc.sql(NWQueries.order_details_table + + " using row options ( partition_by 'ProductID', buckets '16')") + NWQueries.order_details(snc).write.insertInto("order_details") + + snc.sql(NWQueries.products_table + + " USING column options ( partition_by 'ProductID', buckets '16'," + + " colocate_with 'order_details')") + NWQueries.products(snc).write.insertInto("products") + + snc.sql(NWQueries.suppliers_table + + " USING column options (PARTITION_BY 'SupplierID', buckets '16')") + NWQueries.suppliers(snc).write.insertInto("suppliers") + + snc.sql(NWQueries.territories_table + + " using column options (partition_by 'TerritoryID', buckets '8')") + NWQueries.territories(snc).write.insertInto("territories") + + snc.sql(NWQueries.employee_territories_table + + " using row options(partition_by 'TerritoryID', buckets '8', colocate_with 'territories') ") + NWQueries.employee_territories(snc).write.insertInto("employee_territories") + + } + + + private def validateColocatedTableQueries(snc: SnappyContext): Unit = { + // TODO fix the for scala test as well + for (q <- NWQueries.queries.filter(q => !q._1.contains("_"))) { + q._1 match { + case "Q1" => NWQueries.assertQuery(snc, NWQueries.Q1, "Q1", 8, 1, classOf[RowTableScan]) + case "Q2" => NWQueries.assertQuery(snc, NWQueries.Q2, "Q2", 91, 4, classOf[ColumnTableScan]) + case "Q3" => NWQueries.assertQuery(snc, NWQueries.Q3, "Q3", 830, 4, classOf[RowTableScan]) + case "Q4" => NWQueries.assertQuery(snc, NWQueries.Q4, "Q4", 9, 4, classOf[RowTableScan]) + case "Q5" => NWQueries.assertQuery(snc, NWQueries.Q5, "Q5", 9, 4, classOf[RowTableScan]) + case "Q6" => NWQueries.assertQuery(snc, NWQueries.Q6, "Q6", 9, 4, classOf[RowTableScan]) + case "Q7" => NWQueries.assertQuery(snc, NWQueries.Q7, "Q7", 9, 4, classOf[RowTableScan]) + case "Q8" => NWQueries.assertQuery(snc, NWQueries.Q8, "Q8", 6, 4, classOf[FilterExec]) + case "Q9" => NWQueries.assertQuery(snc, NWQueries.Q9, "Q9", 3, 4, classOf[ProjectExec]) + case "Q10" => NWQueries.assertQuery(snc, NWQueries.Q10, "Q10", 2, 4, classOf[FilterExec]) + case "Q11" => NWQueries.assertQuery(snc, NWQueries.Q11, "Q11", 4, 4, classOf[ProjectExec]) + case "Q12" => NWQueries.assertQuery(snc, NWQueries.Q12, "Q12", 2, 3, classOf[FilterExec]) + case "Q13" => NWQueries.assertQuery(snc, NWQueries.Q13, "Q13", 2, 4, classOf[FilterExec]) + case "Q14" => NWQueries.assertQuery(snc, NWQueries.Q14, "Q14", 69, 4, classOf[FilterExec]) + case "Q15" => NWQueries.assertQuery(snc, NWQueries.Q15, "Q15", 5, 4, classOf[FilterExec]) + case "Q16" => NWQueries.assertQuery(snc, NWQueries.Q16, "Q16", 8, 4, classOf[FilterExec]) + case "Q17" => NWQueries.assertQuery(snc, NWQueries.Q17, "Q17", 3, 4, classOf[FilterExec]) + case "Q18" => NWQueries.assertQuery(snc, NWQueries.Q18, "Q18", 9, 4, classOf[ProjectExec]) + case "Q19" => NWQueries.assertQuery(snc, NWQueries.Q19, "Q19", 13, 4, classOf[ProjectExec]) + case "Q20" => NWQueries.assertQuery(snc, NWQueries.Q20, "Q20", 1, 1, classOf[ProjectExec]) + case "Q21" => NWQueries.assertQuery(snc, NWQueries.Q21, "Q21", 1, 1, classOf[RowTableScan]) + case "Q22" => NWQueries.assertQuery(snc, NWQueries.Q22, "Q22", 1, 2, classOf[ProjectExec]) + case "Q23" => NWQueries.assertQuery(snc, NWQueries.Q23, "Q23", 1, 1, classOf[RowTableScan]) + case "Q24" => NWQueries.assertQuery(snc, NWQueries.Q24, "Q24", 4, 4, classOf[ProjectExec]) + case "Q25" => NWQueries.assertJoin(snc, NWQueries.Q25, "Q25", 1, 4, + classOf[ColumnTableScan]) + case "Q26" => NWQueries.assertJoin(snc, NWQueries.Q26, "Q26", 86, 19, + classOf[BroadcastHashJoinExec]) + case "Q27" => NWQueries.assertJoin(snc, NWQueries.Q27, "Q27", 9, 16, + classOf[SortMergeJoinExec]) + case "Q28" => NWQueries.assertJoin(snc, NWQueries.Q28, "Q28", 12, 4, + classOf[ColumnTableScan]) + case "Q29" => NWQueries.assertJoin(snc, NWQueries.Q29, "Q29", 8, 16, + classOf[BroadcastHashJoinExec]) + case "Q30" => NWQueries.assertJoin(snc, NWQueries.Q30, "Q30", 8, 16, + classOf[BroadcastHashJoinExec]) + case "Q31" => NWQueries.assertJoin(snc, NWQueries.Q31, "Q31", 830, 16, + classOf[BroadcastHashJoinExec]) + case "Q32" => NWQueries.assertJoin(snc, NWQueries.Q32, "Q32", 8, 9, + classOf[BroadcastHashJoinExec]) + case "Q33" => NWQueries.assertJoin(snc, NWQueries.Q33, "Q33", 37, 9, + classOf[BroadcastHashJoinExec]) + case "Q34" => NWQueries.assertJoin(snc, NWQueries.Q34, "Q34", 5, 16, + classOf[BroadcastHashJoinExec]) + case "Q35" => NWQueries.assertJoin(snc, NWQueries.Q35, "Q35", 3, 4, + classOf[BroadcastHashJoinExec]) + case "Q36" => NWQueries.assertJoin(snc, NWQueries.Q36, "Q36", 290, 16, + classOf[BroadcastHashJoinExec]) + case "Q37" => NWQueries.assertJoin(snc, NWQueries.Q37, "Q37", 77, 16, + classOf[BroadcastHashJoinExec]) + case "Q38" => NWQueries.assertJoin(snc, NWQueries.Q38, "Q38", 2155, 16, + classOf[HashJoinExec]) + case "Q39" => NWQueries.assertJoin(snc, NWQueries.Q39, "Q39", 9, 16, + classOf[BroadcastHashJoinExec]) + case "Q40" => NWQueries.assertJoin(snc, NWQueries.Q40, "Q40", 830, 19, + classOf[BroadcastHashJoinExec]) + case "Q41" => NWQueries.assertJoin(snc, NWQueries.Q41, "Q41", 2155, 16, + classOf[BroadcastHashJoinExec]) + case "Q42" => NWQueries.assertJoin(snc, NWQueries.Q42, "Q42", 22, 16, + classOf[BroadcastHashJoinExec]) + case "Q43" => NWQueries.assertJoin(snc, NWQueries.Q43, "Q43", 830, 16, + classOf[SortMergeJoinExec]) + case "Q44" => NWQueries.assertJoin(snc, NWQueries.Q44, "Q44", 830, 19, + classOf[BroadcastNestedLoopJoinExec]) + case "Q45" => NWQueries.assertJoin(snc, NWQueries.Q45, "Q45", 1788650, 19, + classOf[CartesianProductExec]) + case "Q46" => NWQueries.assertJoin(snc, NWQueries.Q46, "Q46", 1788650, 19, + classOf[BroadcastNestedLoopJoinExec]) + case "Q47" => NWQueries.assertJoin(snc, NWQueries.Q47, "Q47", 1788650, 16, + classOf[BroadcastNestedLoopJoinExec]) + case "Q48" => NWQueries.assertJoin(snc, NWQueries.Q48, "Q48", 1788650, 23, + classOf[BroadcastNestedLoopJoinExec]) + case "Q49" => NWQueries.assertJoin(snc, NWQueries.Q49, "Q49", 1788650, 23, + classOf[BroadcastNestedLoopJoinExec]) + case "Q50" => NWQueries.assertJoin(snc, NWQueries.Q50, "Q50", 2155, 16, + classOf[HashJoinExec]) + case "Q51" => NWQueries.assertJoin(snc, NWQueries.Q51, "Q51", 2155, 16, + classOf[SortMergeJoinExec]) + case "Q52" => NWQueries.assertJoin(snc, NWQueries.Q52, "Q52", 2155, 16, + classOf[SortMergeJoinExec]) + case "Q53" => NWQueries.assertJoin(snc, NWQueries.Q53, "Q53", 2155, 16, + classOf[SortMergeJoinExec]) + case "Q54" => NWQueries.assertJoin(snc, NWQueries.Q54, "Q54", 2155, 16, + classOf[SortMergeJoinExec]) + case "Q55" => NWQueries.assertJoin(snc, NWQueries.Q55, "Q55", 21, 16, + classOf[HashJoinExec]) + case "Q56" => NWQueries.assertJoin(snc, NWQueries.Q56, "Q56", 8, 9, classOf[HashJoinExec]) + } + } + } +} diff --git a/cluster/src/test/scala/org/apache/spark/sql/SQLFunctionsTestSuite.scala b/cluster/src/test/scala/org/apache/spark/sql/SQLFunctionsTestSuite.scala new file mode 100644 index 0000000000..0748b68d6d --- /dev/null +++ b/cluster/src/test/scala/org/apache/spark/sql/SQLFunctionsTestSuite.scala @@ -0,0 +1,2949 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ +package org.apache.spark.sql + +import java.io.{File, FileOutputStream, PrintWriter} +import java.math.BigDecimal +import java.nio.file.{Files, Paths} +import java.sql.{Date, Timestamp} +import java.text.SimpleDateFormat +import java.util.Calendar + +import scala.io.Source +import scala.language.postfixOps + +import io.snappydata.SnappyFunSuite +import org.scalatest.{BeforeAndAfter, BeforeAndAfterAll} + +import org.apache.spark.Logging +import org.apache.spark.sql.NorthWindDUnitTest.writeToFile +import org.apache.spark.sql.types._ +import org.junit.Assert._ + +class SQLFunctionsTestSuite extends SnappyFunSuite + with Logging + with BeforeAndAfter + with BeforeAndAfterAll { + + // scalastyle:off println + + val sparkSession = SparkSession.builder().master("local[*]").getOrCreate() + // snc.sql("set snappydata.sql.tokenize=true") + // snc.sql("set snappydata.sql.planCaching=true") + + val pw = new PrintWriter(new FileOutputStream( + new File("SQLFunctionTestSuite.out"), true)) + + var query = "" + + override def beforeAll(): Unit = { + super.beforeAll() + createRowTable() + createColumnTable() + createSparkTable() + } + + override def afterAll(): Unit = { + super.afterAll() + dropTables() + } + + def createRowTable(): Unit = { + snc.sql("CREATE TABLE rowTable (bigIntCol BIGINT," + + " binaryCol1 BINARY," + + " boolCol BOOLEAN ," + + " byteCol BYTE," + + " charCol CHAR( 30 )," + + " dateCol DATE ," + + " decimalCol DECIMAL( 11) ," + + " doubleCol DOUBLE ," + + " floatCol FLOAT ," + + " intCol INT," + + " integerCol INTEGER ," + + " longVarcharCol LONG VARCHAR," + + " numericCol NUMERIC," + + " numeric1Col NUMERIC(10,2)," + + " doublePrecisionCol DOUBLE PRECISION," + + " realCol REAL," + + " stringCol STRING," + + " timestampCol TIMESTAMP," + + " varcharCol VARCHAR( 20 ))") + + snc.sql("insert into rowtable values (1000, NULL, NULL, NULL," + + " '1234567890abcdefghij', date('1970-01-08'), 66, 2.2, 1.0E8, 1000, 1000," + + " '1234567890abcdefghij', 100000.0, 100000.0, 2.2, null, 'abcd'," + + " timestamp('1997-01-01 03:03:03'), 'abcd')") + + snc.sql(s"insert into rowtable values (-10, NULL, true, NULL," + + " 'ABC@#', current_date, -66, 0.0111, -2.225E-307, -10, 10," + + " 'ABC@#', -1, 1, 123.56, 0.089, 'abcd', current_timestamp, 'SNAPPY')") + } + + def createColumnTable(): Unit = { + snc.sql("CREATE TABLE columnTable (bigIntCol BIGINT," + + " binaryCol1 BINARY," + + " boolCol BOOLEAN ," + + " byteCol BYTE," + + " charCol CHAR( 30 ) ," + + " dateCol DATE ," + + " decimalCol DECIMAL( 10, 2 ) ," + + " doubleCol DOUBLE ," + + " floatCol FLOAT ," + + " intCol INT ," + + " integerCol INTEGER," + + " longVarcharCol LONG ," + + " numericCol NUMERIC," + + " numeric1Col NUMERIC(10,2)," + + " doublePrecisionCol DOUBLE PRECISION," + + " realCol REAL," + + " stringCol STRING," + + " timestampCol TIMESTAMP ," + + " varcharCol VARCHAR( 20 )," + + " arrayStringCol ARRAY," + + " arrayIntCol ARRAY," + + " mapCol MAP," + + " structCol STRUCT) using COLUMN options(BUCKETS '8')") + + snc.sql("insert into columntable select 1000, NULL, NULL, NULL," + + " '1234567890abcdefghij', date('1970-01-08'), 66, 2.2, 1.0E8, 1000, 1000," + + " '1234567890abcdefghij', 100000.0, 100000.0, 2.2, NULL," + + " 'abcd', timestamp('1997-01-01 03:03:03'), 'abcd', NULL, NULL, NULL, NULL") + + snc.sql(s"insert into columntable select -10, NULL, true, NULL," + + " 'ABC@#', current_date, -66, 0.0111, -2.225E-307, -10, 10," + + " 'ABC@#', -1, 1, 123.56, 0.089, 'abcd', current_timestamp, 'SNAPPY'," + + " Array('abc','def','efg'), Array(1,2,3), Map(1,'abc'), Struct('abc',123)") + + } + + def createSparkTable(): Unit = { + + val DecimalType = DataTypes.createDecimalType(10, 2) + val now = Calendar.getInstance().getTime() + val dateFormat = new SimpleDateFormat("yyyy-MM-dd") + val date1 = java.sql.Date.valueOf(dateFormat.format(Date.valueOf("1970-01-08"))) + val current_date = java.sql.Date.valueOf(dateFormat.format(now)) + val timeFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss") + val time1 = java.sql.Timestamp.valueOf( + timeFormat.format(Timestamp.valueOf("9999-12-31 23:59:59.999999"))) + val current_timestamp = java.sql.Timestamp.valueOf(timeFormat.format(now)) + + val schema = List( + StructField("bigIntCol", IntegerType, true), + StructField("binaryCol1", BinaryType, true), + StructField("boolCol", BooleanType, true), + StructField("byteCol", ByteType, true), + StructField("charCol", StringType, true), + StructField("dateCol", DateType, true), + StructField("decimalCol", DecimalType, true), + StructField("doubleCol", DoubleType, true), + StructField("floatCol", FloatType, true), + StructField("intCol", IntegerType, true), + StructField("integerCol", IntegerType, true), + StructField("longVarcharCol", StringType, true), + StructField("numericCol", DecimalType, true), + StructField("numeric1Col", DecimalType, true), + StructField("doublePrecisionCol", DoubleType, true), + StructField("realCol", FloatType, true), + StructField("stringCol", StringType, true), + StructField("timestampCol", TimestampType, true), + StructField("varcharCol", StringType, true), + StructField("arrayStringCol", ArrayType(StringType), true), + StructField("arrayIntCol", ArrayType(IntegerType), true), + StructField("mapCol", MapType(IntegerType, StringType), true), + StructField("structCol", StructType(Seq(StructField("c1", StringType, false), + StructField("c2", IntegerType, false))), true) + ) + + val data = Seq( + Row(1000, null, null, null, "1234567890abcdefghij", + date1, new BigDecimal(66), 2.2, 1.0E8f, + 1000, 1000, "1234567890abcdefghij", new BigDecimal(100000.0), + new BigDecimal(100000.0), 2.2, null, "abcd", + time1, "abcd'", null, null, null, null), + Row(-10, null, true, null, "ABC@#", + current_date, new BigDecimal(-66), 0.0111, -2.225E-307f, + -10, 10, "ABC@#", new BigDecimal(-1), + new BigDecimal(1), 123.56, 0.089f, "abcd", + current_timestamp, "SNAPPY'", Array("abc", "def", "efg"), + Array(1, 2, 3), scala.collection.immutable.Map(1 -> "abc"), + Row("abc", 123)) + ) + + val someDF = sparkSession.createDataFrame( + sparkSession.sparkContext.parallelize(data), + StructType(schema) + ) + someDF.createTempView("sparkTable") + } + + def dropTables(): Unit = { + snc.sql("DROP TABLE IF EXISTS rowTable") + snc.sql("DROP TABLE IF EXISTS columnTable") + sparkSession.sql("DROP TABLE IF EXISTS sparkTable") + } + + protected def getTempDir(dirName: String, onlyOnce: Boolean): String = { + var log: File = new File(".") + if (onlyOnce) { + val logParent = log.getAbsoluteFile.getParentFile.getParentFile + if (logParent.list().contains("output.txt")) { + log = logParent + } else if (logParent.getParentFile.list().contains("output.txt")) { + log = logParent.getParentFile + } + } + var dest: String = null + dest = log.getCanonicalPath + File.separator + dirName + val tempDir: File = new File(dest) + if (!tempDir.exists) tempDir.mkdir() + tempDir.getAbsolutePath + } + + + private def getSortedFiles(file: File): Array[File] = { + file.getParentFile.listFiles.filter(_.getName.startsWith(file.getName)).sortBy { f => + val n = f.getName + val i = n.lastIndexOf('.') + n.substring(i + 1).toInt + } + } + + def validateResult(sparkDf: DataFrame, snappyDf: DataFrame): Unit = { + // expects results are sorted on some common key + val sparkSchema = sparkDf.schema + val snappySchema = snappyDf.schema + assert(sparkSchema == snappySchema, "schemas from spark and snappy are not equal") + + val fields = sparkSchema.map(_.name.toUpperCase()) + + assert(sparkDf.count() == snappyDf.count(), "counts from spark and snappy are not equal") + + val sparkResult = sparkDf.collect() + val snappyResult = snappyDf.collect() + + var i = 0 + while(i < sparkResult.size - 1) { + val snRow = snappyResult(i) + val spRow = sparkResult(i) + sparkSchema.foreach(f => { + val fieldIndex = fields.indexOf(f.name.toUpperCase()) + assert(fieldIndex >= 0, s"field not found in schema. " + + s"fieldname=${f.name} fields=${fields.toSeq}") + val snField: Any = snRow.getAs(fieldIndex) + val spField: Any = spRow.getAs(fieldIndex) + assert(snField == spField, + s"field from spark row and snappy row are not equal. field name=${f.name} at index=${i}") + }) + i += 1 + } + } + + test("abs") { + + query = "select abs(-1)" + var snappyDf = snc.sql(s"$query") + var sparkDf = sparkSession.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "select abs(1)" + var snappyDf1 = snc.sql(s"$query") + var sparkDf1 = sparkSession.sql(s"$query") + validateResult(sparkDf1, snappyDf1) + + val c1s = snappyDf.columns + val c2s = snappyDf1.columns + assert(!c1s.sameElements(c2s)) + + query = "select abs(1.1)" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "select abs(-1.1)" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "select abs(0.0)" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + + } + + test("coalesce") { + + query = "SELECT COALESCE(NULL,NULL,NULL,'abc',NULL,'Example.com')" + var snappyDf = snc.sql(s"$query") + var sparkDf = sparkSession.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT COALESCE(NULL, 1, 2, 'abc')" + var snappyDf1 = snc.sql(s"$query") + var sparkDf1 = sparkSession.sql(s"$query") + validateResult(sparkDf1, snappyDf1) + + val c1s = snappyDf.columns + val c2s = snappyDf1.columns + assert(!c1s.sameElements(c2s)) + + query = "SELECT COALESCE(1, 2)" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT COALESCE(NULL, NULL)" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + } + + test("cast") { + + // On snappy shell for below query throws error + // snappy> select cast('NaN' as double); + // ERROR 22003: (SQLState=22003 Severity=20000) + // (Server=localhost/127.0.0.1[1528] Thread=ThriftProcessor-0) + // The resulting value is outside the range for data type 'DOUBLE' column 'null'. + query = "select cast('NaN' as double)" + var snappyDf = snc.sql(s"$query") + var sparkDf = sparkSession.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT CAST(25.65 AS varchar(12))" + var snappyDf1 = snc.sql(s"$query") + var sparkDf1 = sparkSession.sql(s"$query") + validateResult(sparkDf1, snappyDf1) + + val c1s = snappyDf.columns + val c2s = snappyDf1.columns + assert(!c1s.sameElements(c2s)) + + query = "SELECT cast('10' as int)" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT CAST('2017-08-25' AS date)" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + + } + + test("explode") { + + query = "SELECT explode(array(10, 20))" + var sparkDf = sparkSession.sql(s"$query") + var snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT explode(array(0))" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT explode(array(NULL,1))" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + } + + test("greatest") { + + query = "SELECT greatest(10, 9, 2, 4, 3)" + var snappyDf = snc.sql(s"$query") + var sparkDf = sparkSession.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT greatest(0, NULL)" + var snappyDf1 = snc.sql(s"$query") + var sparkDf1 = sparkSession.sql(s"$query") + validateResult(sparkDf1, snappyDf1) + + val c1s = snappyDf.columns + val c2s = snappyDf1.columns + assert(!c1s.sameElements(c2s)) + } + + test("if") { + + query = "SELECT if(1 < 2, 'a', 'b')" + var snappyDf = snc.sql(s"$query") + var sparkDf = sparkSession.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT if(0 < NULL, 'a', 'b')" + var snappyDf1 = snc.sql(s"$query") + var sparkDf1 = sparkSession.sql(s"$query") + validateResult(sparkDf1, snappyDf1) + + val c1s = snappyDf.columns + val c2s = snappyDf1.columns + assert(!c1s.sameElements(c2s)) + } + + test("inline") { + + query = "SELECT inline(array(struct(1, 'a'), struct(2, 'b')))" + var snappyDf = snc.sql(s"$query") + var sparkDf = sparkSession.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT inline(array(struct(1), struct(2)))" + var snappyDf1 = snc.sql(s"$query") + var sparkDf1 = sparkSession.sql(s"$query") + validateResult(sparkDf1, snappyDf1) + + val c1s = snappyDf.columns + val c2s = snappyDf1.columns + assert(!c1s.sameElements(c2s)) + } + + test("isnan") { + + query = "SELECT isnan(cast('NaN' as double))" + var snappyDf = snc.sql(s"$query") + var sparkDf = sparkSession.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT isnan(123)" + var snappyDf1 = snc.sql(s"$query") + var sparkDf1 = sparkSession.sql(s"$query") + validateResult(sparkDf1, snappyDf1) + + val c1s = snappyDf.columns + val c2s = snappyDf1.columns + assert(!c1s.sameElements(c2s)) + } + + test("ifnull") { + + query = "SELECT ifnull(NULL, array('2'))" + var snappyDf = snc.sql(s"$query") + var sparkDf = sparkSession.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT ifnull(2, 3)" + var snappyDf1 = snc.sql(s"$query") + var sparkDf1 = sparkSession.sql(s"$query") + validateResult(sparkDf1, snappyDf1) + + val c1s = snappyDf.columns + val c2s = snappyDf1.columns + assert(!c1s.sameElements(c2s)) + } + + test("isnull") { + + query = "SELECT isnull(1)" + var snappyDf = snc.sql(s"$query") + var sparkDf = sparkSession.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT isnull('abc')" + var snappyDf1 = snc.sql(s"$query") + var sparkDf1 = sparkSession.sql(s"$query") + validateResult(sparkDf1, snappyDf1) + + val c1s = snappyDf.columns + val c2s = snappyDf1.columns + assert(!c1s.sameElements(c2s)) + + query = "SELECT isnull(null)" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + } + + test("isnotnull") { + + query = "SELECT isnotnull(1)" + var snappyDf = snc.sql(s"$query") + var sparkDf = sparkSession.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT isnotnull('abc')" + var snappyDf1 = snc.sql(s"$query") + var sparkDf1 = sparkSession.sql(s"$query") + validateResult(sparkDf1, snappyDf1) + + val c1s = snappyDf.columns + val c2s = snappyDf1.columns + assert(!c1s.sameElements(c2s)) + + query = "SELECT isnotnull(null)" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + } + + test("least") { + + query = "SELECT least(10, 9, 2, 4, 3)" + var snappyDf = snc.sql(s"$query") + var sparkDf = sparkSession.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT least(null, 9, 3)" + var snappyDf1 = snc.sql(s"$query") + var sparkDf1 = sparkSession.sql(s"$query") + validateResult(sparkDf1, snappyDf1) + + val c1s = snappyDf.columns + val c2s = snappyDf1.columns + assert(!c1s.sameElements(c2s)) + } + + test("nanvl") { + + query = "SELECT nanvl(cast('NaN' as double), 123)" + var snappyDf = snc.sql(s"$query") + var sparkDf = sparkSession.sql(s"$query") + validateResult(sparkDf, snappyDf) + + // On snappy shell throws error for below query + // snappy> SELECT nanvl(cast('NaN' as double), cast('NaN' as double)); + // ERROR 22003: (SQLState=22003 Severity=20000) + // (Server=localhost/127.0.0.1[1528] Thread=ThriftProcessor-0) + // The resulting value is outside the range for data type 'DOUBLE' column 'null'. + query = "SELECT nanvl(cast('NaN' as double), cast('NaN' as double))" + var snappyDf1 = snc.sql(s"$query") + var sparkDf1 = sparkSession.sql(s"$query") + validateResult(sparkDf1, snappyDf1) + + val c1s = snappyDf.columns + val c2s = snappyDf1.columns + assert(!c1s.sameElements(c2s)) + + // snappy> SELECT nanvl('NaN','NaN'); + // ERROR 22003: (SQLState=22003 Severity=20000) + // (Server=localhost/127.0.0.1[1528] Thread=ThriftProcessor-0) + // The resulting value is outside the range for data type 'DOUBLE' column 'null'. + query = "SELECT nanvl('NaN','NaN')" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + + } + + test("nullif") { + + query = "SELECT nullif(2, 2)" + var snappyDf = snc.sql(s"$query") + var sparkDf = sparkSession.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT nullif( 9, 3)" + var snappyDf1 = snc.sql(s"$query") + var sparkDf1 = sparkSession.sql(s"$query") + validateResult(sparkDf1, snappyDf1) + + val c1s = snappyDf.columns + val c2s = snappyDf1.columns + assert(!c1s.sameElements(c2s)) + + query = "SELECT nullif( 9, 9, 4)" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + + // Below query fails to run with snappysession. + // Test passing individualy but fails to run in precheckin + + // query = "SELECT nullif( 9, 9, 9)" + // sparkDf = sparkSession.sql(s"$query") + // snappyDf = snc.sql(s"$query") + // validateResult(sparkDf, snappyDf) + + } + + test("nvl") { + query = "SELECT nvl(NULL, array('2'))" + var snappyDf = snc.sql(s"$query") + var sparkDf = sparkSession.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT nvl( 9, 3)" + var snappyDf1 = snc.sql(s"$query") + var sparkDf1 = sparkSession.sql(s"$query") + validateResult(sparkDf1, snappyDf1) + + val c1s = snappyDf.columns + val c2s = snappyDf1.columns + assert(!c1s.sameElements(c2s)) + } + + test("nvl2") { + + query = "SELECT nvl2(NULL, 2, 1)" + var snappyDf = snc.sql(s"$query") + var sparkDf = sparkSession.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT nvl2( 9, 3, 1)" + var snappyDf1 = snc.sql(s"$query") + var sparkDf1 = sparkSession.sql(s"$query") + validateResult(sparkDf1, snappyDf1) + + val c1s = snappyDf.columns + val c2s = snappyDf1.columns + assert(!c1s.sameElements(c2s)) + } + + test("posexplode") { + + query = "SELECT posexplode(array(10,20))" + var snappyDf = snc.sql(s"$query") + var sparkDf = sparkSession.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT posexplode(array(10,0,null))" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + } + + test("rand") { + query = "select rand()" + var snappyDf = snc.sql(s"$query") + assertEquals(1, snappyDf.count()) + + query = "select rand(null)" + var snappyDf1 = snc.sql(s"$query") + assertEquals(1, snappyDf1.count()) + + val c1s = snappyDf.columns + val c2s = snappyDf1.columns + assert(!c1s.sameElements(c2s)) + + // Throws error on snappy shell as well as in test + // snappy> select rand(0); + // ERROR 42000: (SQLState=42000 Severity=20000) + // (Server=localhost/127.0.0.1[1528] Thread=ThriftProcessor-0) + // Syntax error or analysis exception: Input argument + // to rand must be an integer, long or null literal.; + try { + query = "select rand(0)" + snappyDf = snc.sql(s"$query") + assertEquals(1, snappyDf.count()) + } catch { + case e: Exception => { + e.printStackTrace() + } + } + + // Throws error on snappy shell as well as in test + // snappy> select rand(2); + // ERROR 42000: (SQLState=42000 Severity=20000) + // (Server=localhost/127.0.0.1[1528] Thread=ThriftProcessor-0) + // Syntax error or analysis exception: Input argument + // to rand must be an integer, long or null literal.; + + try { + query = "select rand(2)" + snappyDf = snc.sql(s"$query") + assertEquals(1, snappyDf.count()) + } catch { + case e: Exception => { + e.printStackTrace() + } + } + + } + + test("randn") { + query = "select randn()" + var snappyDf = snc.sql(s"$query") + assertEquals(1, snappyDf.count()) + + query = "select randn(null)" + var snappyDf1 = snc.sql(s"$query") + assertEquals(1, snappyDf1.count()) + + val c1s = snappyDf.columns + val c2s = snappyDf1.columns + assert(!c1s.sameElements(c2s)) + + // Throws error on snappy shell as well as in test + // snappy> select randn(0); + // ERROR 42000: (SQLState=42000 Severity=20000) + // (Server=localhost/127.0.0.1[1528] Thread=ThriftProcessor-0) + // Syntax error or analysis exception: Input argument + // to randn must be an integer, long or null literal.; + try { + query = "select randn(0)" + snappyDf = snc.sql(s"$query") + assertEquals(1, snappyDf.count()) + } catch { + case e: Exception => { + e.printStackTrace() + } + } + + // Throws error on snappy shell as well as in test + // snappy> select randn(2); + // ERROR 42000: (SQLState=42000 Severity=20000) + // (Server=localhost/127.0.0.1[1528] Thread=ThriftProcessor-0) + // Syntax error or analysis exception: Input argument + // to randn must be an integer, long or null literal.; + try { + query = "select randn(2)" + snappyDf = snc.sql(s"$query") + assertEquals(1, snappyDf.count()) + } catch { + case e: Exception => { + e.printStackTrace() + } + } + + } + + test("stack") { + + query = "SELECT stack(2, 1, 2, 3)" + var snappyDf = snc.sql(s"$query") + var sparkDf = sparkSession.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT stack(2, 1, 2, 3, 4)" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + } + + test("when") { + + query = "SELECT case when 2>1 then 2 else 1 end" + var snappyDf = snc.sql(s"$query") + var sparkDf = sparkSession.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT case when 2<1 then 1 else 2 end" + var snappyDf1 = snc.sql(s"$query") + var sparkDf1 = sparkSession.sql(s"$query") + validateResult(sparkDf1, snappyDf1) + + val c1s = snappyDf.columns + val c2s = snappyDf1.columns + assert(!c1s.sameElements(c2s)) + } + + test("acos") { + + // On snappy shell throws below error + // snappy> select acos(2); + // ERROR 22003: (SQLState=22003 Severity=20000) + // (Server=localhost/127.0.0.1[1528] Thread=ThriftProcessor-0) + // The resulting value is outside the range for data type 'DOUBLE' column 'null'. + query = "select acos(2)" + var snappyDf = snc.sql(s"$query") + var sparkDf = sparkSession.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT acos(1)" + var snappyDf1 = snc.sql(s"$query") + var sparkDf1 = sparkSession.sql(s"$query") + validateResult(sparkDf1, snappyDf1) + + val c1s = snappyDf.columns + val c2s = snappyDf1.columns + assert(!c1s.sameElements(c2s)) + + query = "SELECT acos(-1)" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT acos(0)" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT acos(null)" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT acos(2.2)" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + } + + test("asin") { + + query = "SELECT asin(0)" + var snappyDf = snc.sql(s"$query") + var sparkDf = sparkSession.sql(s"$query") + validateResult(sparkDf, snappyDf) + + // On snappy shell throws below error + // snappy> SELECT asin(2); + // ERROR 22003: (SQLState=22003 Severity=20000) + // (Server=localhost/127.0.0.1[1528] Thread=ThriftProcessor-0) + // The resulting value is outside the range for data type 'DOUBLE' column 'null'. + query = "SELECT asin(2)" + var snappyDf1 = snc.sql(s"$query") + var sparkDf1 = sparkSession.sql(s"$query") + validateResult(sparkDf1, snappyDf1) + + val c1s = snappyDf.columns + val c2s = snappyDf1.columns + assert(!c1s.sameElements(c2s)) + + query = "SELECT asin(-2)" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT asin(null)" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT asin(2.2)" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + } + + test("atan") { + + query = "SELECT atan(0)" + var snappyDf = snc.sql(s"$query") + var sparkDf = sparkSession.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT atan(2)" + var snappyDf1 = snc.sql(s"$query") + var sparkDf1 = sparkSession.sql(s"$query") + validateResult(sparkDf1, snappyDf1) + + val c1s = snappyDf.columns + val c2s = snappyDf1.columns + assert(!c1s.sameElements(c2s)) + + query = "SELECT atan(-2)" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT atan(null)" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT atan(2.2)" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + } + + test("atan2") { + + query = "SELECT atan2(0, 0)" + var snappyDf = snc.sql(s"$query") + var sparkDf = sparkSession.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT atan2(2, 3)" + var snappyDf1 = snc.sql(s"$query") + var sparkDf1 = sparkSession.sql(s"$query") + validateResult(sparkDf1, snappyDf1) + + val c1s = snappyDf.columns + val c2s = snappyDf1.columns + assert(!c1s.sameElements(c2s)) + + query = "SELECT atan2(2, null)" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT atan2(2.2, 3)" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + } + + test("bin") { + + query = "SELECT bin(13)" + var snappyDf = snc.sql(s"$query") + var sparkDf = sparkSession.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT bin(-13)" + var snappyDf1 = snc.sql(s"$query") + var sparkDf1 = sparkSession.sql(s"$query") + validateResult(sparkDf1, snappyDf1) + + val c1s = snappyDf.columns + val c2s = snappyDf1.columns + assert(!c1s.sameElements(c2s)) + + query = "SELECT bin(null)" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT bin(13.3)" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + } + + test("bround") { + + query = "SELECT bround(2.5, 0)" + var snappyDf = snc.sql(s"$query") + var sparkDf = sparkSession.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT bround(2.5, 3) as col2" + var snappyDf1 = snc.sql(s"$query") + var sparkDf1 = sparkSession.sql(s"$query") + validateResult(sparkDf1, snappyDf1) + + val c1s = snappyDf.columns + val c2s = snappyDf1.columns + assert(!c1s.sameElements(c2s)) + + query = "SELECT bround(2.5, null)" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT round(0, null)" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + } + + test("cbrt") { + + query = "SELECT cbrt(25)" + var snappyDf = snc.sql(s"$query") + var sparkDf = sparkSession.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT cbrt(0)" + var snappyDf1 = snc.sql(s"$query") + var sparkDf1 = sparkSession.sql(s"$query") + validateResult(sparkDf1, snappyDf1) + + val c1s = snappyDf.columns + val c2s = snappyDf1.columns + assert(!c1s.sameElements(c2s)) + + query = "SELECT cbrt(null)" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT cbrt(27.0)" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + } + + test("ceil") { + + query = "SELECT ceil(-0.1)" + var snappyDf = snc.sql(s"$query") + var sparkDf = sparkSession.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT ceil(5)" + var snappyDf1 = snc.sql(s"$query") + var sparkDf1 = sparkSession.sql(s"$query") + validateResult(sparkDf1, snappyDf1) + + val c1s = snappyDf.columns + val c2s = snappyDf1.columns + assert(!c1s.sameElements(c2s)) + + query = "SELECT ceil(0)" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT ceil(null)" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + } + + test("ceiling") { + + query = "SELECT ceiling(-0.1)" + var snappyDf = snc.sql(s"$query") + var sparkDf = sparkSession.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT ceiling(5)" + var snappyDf1 = snc.sql(s"$query") + var sparkDf1 = sparkSession.sql(s"$query") + validateResult(sparkDf1, snappyDf1) + + val c1s = snappyDf.columns + val c2s = snappyDf1.columns + assert(!c1s.sameElements(c2s)) + + query = "SELECT ceiling(0)" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT ceiling(null)" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + } + + test("cos") { + + query = "SELECT cos(0)" + var snappyDf = snc.sql(s"$query") + var sparkDf = sparkSession.sql(s"$query") + validateResult(sparkDf, snappyDf) + query = "SELECT cos(2)" + var snappyDf1 = snc.sql(s"$query") + var sparkDf1 = sparkSession.sql(s"$query") + validateResult(sparkDf1, snappyDf1) + + val c1s = snappyDf.columns + val c2s = snappyDf1.columns + assert(!c1s.sameElements(c2s)) + + query = "SELECT cos(-2)" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT cos(null)" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT cos(2.2)" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + } + + test("cosh") { + + query = "SELECT cosh(0)" + var snappyDf = snc.sql(s"$query") + var sparkDf = sparkSession.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT cosh(2)" + var snappyDf1 = snc.sql(s"$query") + var sparkDf1 = sparkSession.sql(s"$query") + validateResult(sparkDf1, snappyDf1) + + val c1s = snappyDf.columns + val c2s = snappyDf1.columns + assert(!c1s.sameElements(c2s)) + + query = "SELECT cosh(-2)" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT cosh(null)" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT cosh(2.2)" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + } + + test("conv") { + + query = "SELECT conv('100', 2, 10)" + var snappyDf = snc.sql(s"$query") + var sparkDf = sparkSession.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT conv(-10, 16, -10)" + var snappyDf1 = snc.sql(s"$query") + var sparkDf1 = sparkSession.sql(s"$query") + validateResult(sparkDf1, snappyDf1) + + val c1s = snappyDf.columns + val c2s = snappyDf1.columns + assert(!c1s.sameElements(c2s)) + } + + test("degrees") { + + query = "SELECT degrees(3.141592653589793)" + var snappyDf = snc.sql(s"$query") + var sparkDf = sparkSession.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT degrees(6.283185307179586 )" + var snappyDf1 = snc.sql(s"$query") + var sparkDf1 = sparkSession.sql(s"$query") + validateResult(sparkDf1, snappyDf1) + + val c1s = snappyDf.columns + val c2s = snappyDf1.columns + assert(!c1s.sameElements(c2s)) + + query = "SELECT degrees(null)" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT degrees(0)" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + } + + test("e") { + + query = "SELECT e()" + var sparkDf = sparkSession.sql(s"$query") + var snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + } + + test("exp") { + + query = "SELECT exp(0)" + var snappyDf = snc.sql(s"$query") + var sparkDf = sparkSession.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT exp(2)" + var snappyDf1 = snc.sql(s"$query") + var sparkDf1 = sparkSession.sql(s"$query") + validateResult(sparkDf1, snappyDf1) + + val c1s = snappyDf.columns + val c2s = snappyDf1.columns + assert(!c1s.sameElements(c2s)) + + query = "SELECT exp(null)" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + } + + test("expm1") { + + query = "SELECT expm1(0)" + var snappyDf = snc.sql(s"$query") + var sparkDf = sparkSession.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT expm1(2)" + var snappyDf1 = snc.sql(s"$query") + var sparkDf1 = sparkSession.sql(s"$query") + validateResult(sparkDf1, snappyDf1) + + val c1s = snappyDf.columns + val c2s = snappyDf1.columns + assert(!c1s.sameElements(c2s)) + + query = "SELECT expm1(null)" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + } + + test("floor") { + + query = "SELECT floor(5)" + var snappyDf = snc.sql(s"$query") + var sparkDf = sparkSession.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT floor(null)" + var snappyDf1 = snc.sql(s"$query") + var sparkDf1 = sparkSession.sql(s"$query") + validateResult(sparkDf1, snappyDf1) + + val c1s = snappyDf.columns + val c2s = snappyDf1.columns + assert(!c1s.sameElements(c2s)) + + query = "SELECT floor(0)" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT floor(-0.1)" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + } + + test("factorial") { + + query = "SELECT factorial(5)" + var snappyDf = snc.sql(s"$query") + var sparkDf = sparkSession.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT factorial(-5)" + var snappyDf1 = snc.sql(s"$query") + var sparkDf1 = sparkSession.sql(s"$query") + validateResult(sparkDf1, snappyDf1) + + val c1s = snappyDf.columns + val c2s = snappyDf1.columns + assert(!c1s.sameElements(c2s)) + + query = "SELECT factorial(0)" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT factorial(null)" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + } + + test("hex") { + + query = "SELECT hex(17)" + var snappyDf = snc.sql(s"$query") + var sparkDf = sparkSession.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT hex(0)" + var snappyDf1 = snc.sql(s"$query") + var sparkDf1 = sparkSession.sql(s"$query") + validateResult(sparkDf1, snappyDf1) + + val c1s = snappyDf.columns + val c2s = snappyDf1.columns + assert(!c1s.sameElements(c2s)) + + query = "SELECT hex(null)" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT hex('Spark SQL')" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + } + + test("hypot") { + + query = "SELECT hypot(3, 4)" + var snappyDf = snc.sql(s"$query") + var sparkDf = sparkSession.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT hypot(7,8)" + var snappyDf1 = snc.sql(s"$query") + var sparkDf1 = sparkSession.sql(s"$query") + validateResult(sparkDf1, snappyDf1) + + val c1s = snappyDf.columns + val c2s = snappyDf1.columns + assert(!c1s.sameElements(c2s)) + + query = "SELECT hypot(0,0)" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT hypot(0,null)" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT hypot(null,null)" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + } + + test("log") { + + query = "SELECT log(10, 100)" + var snappyDf = snc.sql(s"$query") + var sparkDf = sparkSession.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT log(10,1000)" + var snappyDf1 = snc.sql(s"$query") + var sparkDf1 = sparkSession.sql(s"$query") + validateResult(sparkDf1, snappyDf1) + + val c1s = snappyDf.columns + val c2s = snappyDf1.columns + assert(!c1s.sameElements(c2s)) + + query = "SELECT log(10,0)" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT log(10,null)" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT log(10, 1000.234)" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + } + + test("log10") { + + query = "SELECT log10(10)" + var snappyDf = snc.sql(s"$query") + var sparkDf = sparkSession.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT log10(0)" + var snappyDf1 = snc.sql(s"$query") + var sparkDf1 = sparkSession.sql(s"$query") + validateResult(sparkDf1, snappyDf1) + + val c1s = snappyDf.columns + val c2s = snappyDf1.columns + assert(!c1s.sameElements(c2s)) + + query = "SELECT log10(-2)" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT log10(null)" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT log10(1.2)" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + + } + + test("log1p") { + + query = "SELECT log1p(0)" + var snappyDf = snc.sql(s"$query") + var sparkDf = sparkSession.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT log1p(2)" + var snappyDf1 = snc.sql(s"$query") + var sparkDf1 = sparkSession.sql(s"$query") + validateResult(sparkDf1, snappyDf1) + + val c1s = snappyDf.columns + val c2s = snappyDf1.columns + assert(!c1s.sameElements(c2s)) + + query = "SELECT log1p(-2)" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT log1p(1.2)" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT log1p(null)" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + } + + test("log2") { + + query = "SELECT log2(0)" + var snappyDf = snc.sql(s"$query") + var sparkDf = sparkSession.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT log2(2)" + var snappyDf1 = snc.sql(s"$query") + var sparkDf1 = sparkSession.sql(s"$query") + validateResult(sparkDf1, snappyDf1) + + val c1s = snappyDf.columns + val c2s = snappyDf1.columns + assert(!c1s.sameElements(c2s)) + + query = "SELECT log2(-2)" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT log2(1.2)" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT log2(null)" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + } + + test("ln") { + + query = "SELECT ln(0)" + var snappyDf = snc.sql(s"$query") + var sparkDf = sparkSession.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT ln(1)" + var snappyDf1 = snc.sql(s"$query") + var sparkDf1 = sparkSession.sql(s"$query") + validateResult(sparkDf1, snappyDf1) + + val c1s = snappyDf.columns + val c2s = snappyDf1.columns + assert(!c1s.sameElements(c2s)) + + query = "SELECT ln(-1)" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT ln(1.2)" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT ln(null)" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + } + + test("negative") { + + query = "SELECT negative(0)" + var snappyDf = snc.sql(s"$query") + var sparkDf = sparkSession.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT negative(1)" + var snappyDf1 = snc.sql(s"$query") + var sparkDf1 = sparkSession.sql(s"$query") + validateResult(sparkDf1, snappyDf1) + + val c1s = snappyDf.columns + val c2s = snappyDf1.columns + assert(!c1s.sameElements(c2s)) + + query = "SELECT negative(-1)" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT negative(null)" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT negative(1.2)" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT negative(-1.2)" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + + } + + test("pi") { + + query = "SELECT pi()" + var snappyDf = snc.sql(s"$query") + var sparkDf = sparkSession.sql(s"$query") + validateResult(sparkDf, snappyDf) + } + + test("pmod") { + + query = "SELECT pmod(10,3)" + var snappyDf = snc.sql(s"$query") + var sparkDf = sparkSession.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT pmod(-10,3)" + var snappyDf1 = snc.sql(s"$query") + var sparkDf1 = sparkSession.sql(s"$query") + validateResult(sparkDf1, snappyDf1) + + val c1s = snappyDf.columns + val c2s = snappyDf1.columns + assert(!c1s.sameElements(c2s)) + + query = "SELECT pmod(0,3)" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT pmod(null,3)" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT pmod(1.2,3)" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + } + + test("positive") { + + query = "SELECT positive(0)" + var snappyDf = snc.sql(s"$query") + var sparkDf = sparkSession.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT positive(1)" + var snappyDf1 = snc.sql(s"$query") + var sparkDf1 = sparkSession.sql(s"$query") + validateResult(sparkDf1, snappyDf1) + + val c1s = snappyDf.columns + val c2s = snappyDf1.columns + assert(!c1s.sameElements(c2s)) + + query = "SELECT positive(-1)" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT positive(null)" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT positive(1.2)" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT positive(-1.2)" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + + } + + test("pow") { + + query = "SELECT pow(3,2)" + var snappyDf = snc.sql(s"$query") + var sparkDf = sparkSession.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT pow(-10,3)" + var snappyDf1 = snc.sql(s"$query") + var sparkDf1 = sparkSession.sql(s"$query") + validateResult(sparkDf1, snappyDf1) + + val c1s = snappyDf.columns + val c2s = snappyDf1.columns + assert(!c1s.sameElements(c2s)) + + query = "SELECT pow(0,3)" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT pow(null,3)" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT pow(1.2,3)" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + } + + test("power") { + + query = "SELECT power(3,2)" + var snappyDf = snc.sql(s"$query") + var sparkDf = sparkSession.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT power(-10,3)" + var snappyDf1 = snc.sql(s"$query") + var sparkDf1 = sparkSession.sql(s"$query") + validateResult(sparkDf1, snappyDf1) + + val c1s = snappyDf.columns + val c2s = snappyDf1.columns + assert(!c1s.sameElements(c2s)) + + query = "SELECT power(0,3)" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT power(null,3)" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT power(1.2,3)" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + } + + test("radians") { + + query = "SELECT radians(360.0)" + var snappyDf = snc.sql(s"$query") + var sparkDf = sparkSession.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT radians(180)" + var snappyDf1 = snc.sql(s"$query") + var sparkDf1 = sparkSession.sql(s"$query") + validateResult(sparkDf1, snappyDf1) + + val c1s = snappyDf.columns + val c2s = snappyDf1.columns + assert(!c1s.sameElements(c2s)) + + query = "SELECT radians(0)" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT radians(null)" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + } + + test("rint") { + + query = "SELECT rint(12.3456)" + var snappyDf = snc.sql(s"$query") + var sparkDf = sparkSession.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT rint(-12.3456)" + var snappyDf1 = snc.sql(s"$query") + var sparkDf1 = sparkSession.sql(s"$query") + validateResult(sparkDf1, snappyDf1) + + val c1s = snappyDf.columns + val c2s = snappyDf1.columns + assert(!c1s.sameElements(c2s)) + + query = "SELECT rint(180)" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT rint(0)" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT rint(null)" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + } + + test("round") { + + query = "SELECT round(2.5, 0)" + var snappyDf = snc.sql(s"$query") + var sparkDf = sparkSession.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT round(2.5, 3)" + var snappyDf1 = snc.sql(s"$query") + var sparkDf1 = sparkSession.sql(s"$query") + validateResult(sparkDf1, snappyDf1) + + val c1s = snappyDf.columns + val c2s = snappyDf1.columns + assert(!c1s.sameElements(c2s)) + + query = "SELECT round(2.5, null)" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT round(0, null)" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + } + + test("shiftleft") { + + query = "SELECT shiftleft(4, 1)" + var snappyDf = snc.sql(s"$query") + var sparkDf = sparkSession.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT shiftleft(0, 1)" + var snappyDf1 = snc.sql(s"$query") + var sparkDf1 = sparkSession.sql(s"$query") + validateResult(sparkDf1, snappyDf1) + + val c1s = snappyDf.columns + val c2s = snappyDf1.columns + assert(!c1s.sameElements(c2s)) + + query = "SELECT shiftleft(null, null)" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT shiftleft(2.2, 2)" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT shiftleft(2.2, 0)" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + } + + test("shiftright") { + + query = "SELECT shiftright(4, 1)" + var snappyDf = snc.sql(s"$query") + var sparkDf = sparkSession.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT shiftright(0, 1)" + var snappyDf1 = snc.sql(s"$query") + var sparkDf1 = sparkSession.sql(s"$query") + validateResult(sparkDf1, snappyDf1) + + val c1s = snappyDf.columns + val c2s = snappyDf1.columns + assert(!c1s.sameElements(c2s)) + + query = "SELECT shiftright(null, null)" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT shiftright(2.2, 2)" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT shiftright(2.2, 0)" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + } + + test("shiftrightunsigned") { + + query = "SELECT shiftrightunsigned(4, 1)" + var snappyDf = snc.sql(s"$query") + var sparkDf = sparkSession.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT shiftrightunsigned(0, 1)" + var snappyDf1 = snc.sql(s"$query") + var sparkDf1 = sparkSession.sql(s"$query") + validateResult(sparkDf1, snappyDf1) + + val c1s = snappyDf.columns + val c2s = snappyDf1.columns + assert(!c1s.sameElements(c2s)) + + query = "SELECT shiftrightunsigned(null, null)" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT shiftrightunsigned(2.2, 2)" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT shiftrightunsigned(2.2, 0)" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + } + + test("sign") { + + query = "SELECT sign(40)" + var snappyDf = snc.sql(s"$query") + var sparkDf = sparkSession.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT sign(-40)" + var snappyDf1 = snc.sql(s"$query") + var sparkDf1 = sparkSession.sql(s"$query") + validateResult(sparkDf1, snappyDf1) + + val c1s = snappyDf.columns + val c2s = snappyDf1.columns + assert(!c1s.sameElements(c2s)) + + query = "SELECT sign(0)" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT sign(null)" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT sign(-4.20)" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + } + + test("signum") { + + query = "SELECT signum(40)" + var snappyDf = snc.sql(s"$query") + var sparkDf = sparkSession.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT signum(-40)" + var snappyDf1 = snc.sql(s"$query") + var sparkDf1 = sparkSession.sql(s"$query") + validateResult(sparkDf1, snappyDf1) + + val c1s = snappyDf.columns + val c2s = snappyDf1.columns + assert(!c1s.sameElements(c2s)) + + query = "SELECT signum(0)" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT signum(null)" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT signum(-4.20)" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + } + + test("sin") { + + query = "SELECT sin(0)" + var snappyDf = snc.sql(s"$query") + var sparkDf = sparkSession.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT sin(2)" + var snappyDf1 = snc.sql(s"$query") + var sparkDf1 = sparkSession.sql(s"$query") + validateResult(sparkDf1, snappyDf1) + + val c1s = snappyDf.columns + val c2s = snappyDf1.columns + assert(!c1s.sameElements(c2s)) + + query = "SELECT sin(-2)" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT sin(null)" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT sin(2.2)" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + } + + test("sinh") { + + query = "SELECT sinh(0)" + var snappyDf = snc.sql(s"$query") + var sparkDf = sparkSession.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT sinh(2)" + var snappyDf1 = snc.sql(s"$query") + var sparkDf1 = sparkSession.sql(s"$query") + validateResult(sparkDf1, snappyDf1) + + val c1s = snappyDf.columns + val c2s = snappyDf1.columns + assert(!c1s.sameElements(c2s)) + + query = "SELECT sinh(-2)" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT sinh(null)" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT sinh(2.2)" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + } + + test("str_to_map") { + + query = "SELECT str_to_map(null)" + var snappyDf = snc.sql(s"$query") + var sparkDf = sparkSession.sql(s"$query") + validateResult(sparkDf, snappyDf) + + // throws below error + // org.apache.spark.sql.AnalysisException: + // Cannot have map type columns in DataFrame which calls set + // operations(intersect, except, etc.), but the type of + // column str_to_map(CAST(NULL AS STRING), ,, :) is map;; + query = "SELECT str_to_map('a:1,b:2,c:3', ',', ':')" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT str_to_map('a')" + var snappyDf1 = snc.sql(s"$query") + var sparkDf1 = sparkSession.sql(s"$query") + validateResult(sparkDf1, snappyDf1) + + val c1s = snappyDf.columns + val c2s = snappyDf1.columns + assert(!c1s.sameElements(c2s)) + + query = "SELECT str_to_map('-1.2:a')" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT str_to_map(null)" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + } + + test("sqrt") { + + query = "SELECT sqrt(4)" + var snappyDf = snc.sql(s"$query") + var sparkDf = sparkSession.sql(s"$query") + validateResult(sparkDf, snappyDf) + + // On snappy shell throws below error for this query + // snappy> select sqrt(-4); + // ERROR 22003: (SQLState=22003 Severity=20000) + // (Server=localhost/127.0.0.1[1528] Thread=ThriftProcessor-1) + // The resulting value is outside the range for data type 'DOUBLE' column 'null'. + query = "SELECT sqrt(-4)" + var snappyDf1 = snc.sql(s"$query") + var sparkDf1 = sparkSession.sql(s"$query") + validateResult(sparkDf1, snappyDf1) + + val c1s = snappyDf.columns + val c2s = snappyDf1.columns + assert(!c1s.sameElements(c2s)) + + query = "SELECT sqrt(0)" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT sqrt(null)" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT sqrt(4.4)" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + } + + test("tan") { + + query = "SELECT tan(0)" + var snappyDf = snc.sql(s"$query") + var sparkDf = sparkSession.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT tan(2)" + var snappyDf1 = snc.sql(s"$query") + var sparkDf1 = sparkSession.sql(s"$query") + validateResult(sparkDf1, snappyDf1) + + val c1s = snappyDf.columns + val c2s = snappyDf1.columns + assert(!c1s.sameElements(c2s)) + + query = "SELECT tan(-2)" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT tan(null)" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT tan(2.2)" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT tan(-2.2)" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + } + + test("tanh") { + + query = "SELECT tanh(0)" + var snappyDf = snc.sql(s"$query") + var sparkDf = sparkSession.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT tanh(2)" + var snappyDf1 = snc.sql(s"$query") + var sparkDf1 = sparkSession.sql(s"$query") + validateResult(sparkDf1, snappyDf1) + + val c1s = snappyDf.columns + val c2s = snappyDf1.columns + assert(!c1s.sameElements(c2s)) + + query = "SELECT tanh(-2)" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT tanh(null)" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT tanh(2.2)" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT tanh(-2.2)" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + } + + test("+") { + + query = "SELECT (1+1)+3" + var snappyDf = snc.sql(s"$query") + var sparkDf = sparkSession.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT 1.2+3+(4.5+2)" + + var snappyDf1 = snc.sql(s"$query") + var sparkDf1 = sparkSession.sql(s"$query") + validateResult(sparkDf1, snappyDf1) + + val c1s = snappyDf.columns + val c2s = snappyDf1.columns + assert(!c1s.sameElements(c2s)) + + query = "SELECT 0+0" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT 0+null" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + } + + test("-") { + + query = "SELECT 1-1-1" + var snappyDf = snc.sql(s"$query") + var sparkDf = sparkSession.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT 0-0" + var snappyDf1 = snc.sql(s"$query") + var sparkDf1 = sparkSession.sql(s"$query") + validateResult(sparkDf1, snappyDf1) + + val c1s = snappyDf.columns + val c2s = snappyDf1.columns + assert(!c1s.sameElements(c2s)) + + query = "SELECT 0-null" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT 1.2-3-(4.5-2)" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + } + + test("*") { + + query = "SELECT 4*2" + var snappyDf = snc.sql(s"$query") + var sparkDf = sparkSession.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT 0*0" + var snappyDf1 = snc.sql(s"$query") + var sparkDf1 = sparkSession.sql(s"$query") + validateResult(sparkDf1, snappyDf1) + + val c1s = snappyDf.columns + val c2s = snappyDf1.columns + assert(!c1s.sameElements(c2s)) + + query = "SELECT 0*null" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT 1.2*3*(4.5*2)" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + } + + test("/") { + + query = "SELECT 4/2" + var snappyDf = snc.sql(s"$query") + var sparkDf = sparkSession.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT 0/0" + var snappyDf1 = snc.sql(s"$query") + var sparkDf1 = sparkSession.sql(s"$query") + validateResult(sparkDf1, snappyDf1) + + val c1s = snappyDf.columns + val c2s = snappyDf1.columns + assert(!c1s.sameElements(c2s)) + + query = "SELECT 0/null" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT 4.5/2" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + } + + test("%") { + + query = "SELECT 4%2" + var snappyDf = snc.sql(s"$query") + var sparkDf = sparkSession.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT 0%0" + var snappyDf1 = snc.sql(s"$query") + var sparkDf1 = sparkSession.sql(s"$query") + validateResult(sparkDf1, snappyDf1) + + val c1s = snappyDf.columns + val c2s = snappyDf1.columns + assert(!c1s.sameElements(c2s)) + + query = "SELECT 0%null" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT 4.5%2" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + } + + test("avg") { + + var sparkQuery = "SELECT avg(intcol) from sparktable" + var snappyQuery = "SELECT avg(intcol) from rowtable" + var sparkDf = sparkSession.sql(s"$sparkQuery") + var snappyDf = snc.sql(s"$snappyQuery") + validateResult(sparkDf, snappyDf) + + sparkQuery = "SELECT avg(intcol) from sparktable" + snappyQuery = "SELECT avg(intcol) from columnTable" + sparkDf = sparkSession.sql(s"$sparkQuery") + snappyDf = snc.sql(s"$snappyQuery") + validateResult(sparkDf, snappyDf) + } + + test("count") { + + var sparkQuery = "SELECT count(*) from sparktable" + var snappyQuery = "SELECT count(*) from rowtable" + var sparkDf = sparkSession.sql(s"$sparkQuery") + var snappyDf = snc.sql(s"$snappyQuery") + validateResult(sparkDf, snappyDf) + + sparkQuery = "SELECT count(intcol) from sparktable" + var snappyQuery1 = "SELECT count(intcol) from rowtable" + sparkDf = sparkSession.sql(s"$sparkQuery") + var snappyDf1 = snc.sql(s"$snappyQuery1") + validateResult(sparkDf, snappyDf1) + + var snappyDF = snc.sql(s"$snappyQuery") + var snappyDF1 = snc.sql(s"$snappyQuery1") + + val c1s = snappyDF.columns + val c2s = snappyDf1.columns + assert(!c1s.sameElements(c2s)) + + sparkQuery = "SELECT count(distinct(intcol)) from sparktable" + snappyQuery = "SELECT count(distinct(intcol)) from rowtable" + sparkDf = sparkSession.sql(s"$sparkQuery") + snappyDf = snc.sql(s"$snappyQuery") + validateResult(sparkDf, snappyDf) + + sparkQuery = "SELECT count(*) from sparktable" + snappyQuery = "SELECT count(*) from columnTable" + sparkDf = sparkSession.sql(s"$sparkQuery") + snappyDf = snc.sql(s"$snappyQuery") + validateResult(sparkDf, snappyDf) + + sparkQuery = "SELECT count(intcol) from sparktable" + snappyQuery = "SELECT count(intcol) from columnTable" + sparkDf = sparkSession.sql(s"$sparkQuery") + snappyDf = snc.sql(s"$snappyQuery") + validateResult(sparkDf, snappyDf) + + sparkQuery = "SELECT count(distinct(intcol)) from sparktable" + snappyQuery = "SELECT count(distinct(intcol)) from columnTable" + sparkDf = sparkSession.sql(s"$sparkQuery") + snappyDf = snc.sql(s"$snappyQuery") + validateResult(sparkDf, snappyDf) + } + + test("first") { + + var sparkQuery = "SELECT first(stringcol) from sparktable" + var snappyQuery = "SELECT first(stringcol) from rowtable" + var sparkDf = sparkSession.sql(s"$sparkQuery") + var snappyDf = snc.sql(s"$snappyQuery") + validateResult(sparkDf, snappyDf) + + sparkQuery = "SELECT first(stringcol, true) from sparktable" + var snappyQuery1 = "SELECT first(stringcol, true) from rowtable" + sparkDf = sparkSession.sql(s"$sparkQuery") + var snappyDf1 = snc.sql(s"$snappyQuery1") + validateResult(sparkDf, snappyDf1) + + var snappyDF = snc.sql(s"$snappyQuery") + var snappyDF1 = snc.sql(s"$snappyQuery1") + + val c1s = snappyDF.columns + val c2s = snappyDF1.columns + assert(!c1s.sameElements(c2s)) + + sparkQuery = "SELECT first(stringcol) from sparktable" + snappyQuery = "SELECT first(stringcol) from columntable" + sparkDf = sparkSession.sql(s"$sparkQuery") + snappyDf = snc.sql(s"$snappyQuery") + validateResult(sparkDf, snappyDf) + + sparkQuery = "SELECT first(stringcol, true) from sparktable" + snappyQuery = "SELECT first(stringcol, true) from columntable" + sparkDf = sparkSession.sql(s"$sparkQuery") + snappyDf = snc.sql(s"$snappyQuery") + validateResult(sparkDf, snappyDf) + } + + test("first_value") { + + var sparkQuery = "SELECT first_value(stringcol) from sparktable" + var snappyQuery = "SELECT first_value(stringcol) from rowtable" + var sparkDf = sparkSession.sql(s"$sparkQuery") + var snappyDf = snc.sql(s"$snappyQuery") + validateResult(sparkDf, snappyDf) + + // throws below error + // org.apache.spark.sql.AnalysisException: + // The second argument of First should be a boolean literal.;; + + try { + sparkQuery = "SELECT first_value(stringcol, true) from sparktable" + var snappyQuery1 = "SELECT first_value(stringcol, true) from rowtable" + sparkDf = sparkSession.sql(s"$sparkQuery") + var snappyDf1 = snc.sql(s"$snappyQuery1") + validateResult(sparkDf, snappyDf1) + + var snappyDF = snc.sql(s"$snappyQuery") + var snappyDF1 = snc.sql(s"$snappyQuery1") + + val c1s = snappyDF.columns + val c2s = snappyDF1.columns + assert(!c1s.sameElements(c2s)) + } catch { + case e: Exception => { + e.printStackTrace() + } + } + + sparkQuery = "SELECT first_value(stringcol) from sparktable" + snappyQuery = "SELECT first_value(stringcol) from columntable" + sparkDf = sparkSession.sql(s"$sparkQuery") + snappyDf = snc.sql(s"$snappyQuery") + validateResult(sparkDf, snappyDf) + + // throws below error + // org.apache.spark.sql.AnalysisException: + // The second argument of First should be a boolean literal.;; + try { + sparkQuery = "SELECT first_value(stringcol, true) from sparktable" + snappyQuery = "SELECT first_value(stringcol, true) from columntable" + sparkDf = sparkSession.sql(s"$sparkQuery") + snappyDf = snc.sql(s"$snappyQuery") + validateResult(sparkDf, snappyDf) + } catch { + case e: Exception => { + e.printStackTrace() + } + } + } + + test("last") { + + var sparkQuery = "SELECT last(stringcol) from sparktable" + var snappyQuery = "SELECT last(stringcol) from rowtable" + var sparkDf = sparkSession.sql(s"$sparkQuery") + var snappyDf = snc.sql(s"$snappyQuery") + validateResult(sparkDf, snappyDf) + + sparkQuery = "SELECT last(stringcol, true) from sparktable" + var snappyQuery1 = "SELECT last(stringcol, true) from rowtable" + sparkDf = sparkSession.sql(s"$sparkQuery") + var snappyDf1 = snc.sql(s"$snappyQuery1") + validateResult(sparkDf, snappyDf1) + + var snappyDF = snc.sql(s"$snappyQuery") + var snappyDF1 = snc.sql(s"$snappyQuery1") + + val c1s = snappyDF.columns + val c2s = snappyDF1.columns + assert(!c1s.sameElements(c2s)) + + sparkQuery = "SELECT last(stringcol) from sparktable" + snappyQuery = "SELECT last(stringcol) from columntable" + sparkDf = sparkSession.sql(s"$sparkQuery") + snappyDf = snc.sql(s"$snappyQuery") + validateResult(sparkDf, snappyDf) + + sparkQuery = "SELECT last(stringcol, true) from sparktable" + snappyQuery = "SELECT last(stringcol, true) from columntable" + sparkDf = sparkSession.sql(s"$sparkQuery") + snappyDf = snc.sql(s"$snappyQuery") + validateResult(sparkDf, snappyDf) + } + + test("last_value") { + + var sparkQuery = "SELECT last_value(stringcol) from sparktable" + var snappyQuery = "SELECT last_value(stringcol) from rowtable" + var sparkDf = sparkSession.sql(s"$sparkQuery") + var snappyDf = snc.sql(s"$snappyQuery") + validateResult(sparkDf, snappyDf) + + // throws below error + // snappy> SELECT last_value(stringcol, true) from columntable; + // ERROR 42000: (SQLState=42000 Severity=20000) + // (Server=localhost/127.0.0.1[1528] Thread=ThriftProcessor-0) + // Syntax error or analysis exception: + // The second argument of First should be a boolean literal.;; + + try { + sparkQuery = "SELECT last_value(stringcol, true) from sparktable" + var snappyQuery1 = "SELECT last_value(stringcol, true) from rowtable" + sparkDf = sparkSession.sql(s"$sparkQuery") + var snappyDf1 = snc.sql(s"$snappyQuery1") + validateResult(sparkDf, snappyDf1) + + var snappyDF = snc.sql(s"$snappyQuery") + var snappyDF1 = snc.sql(s"$snappyQuery1") + + val c1s = snappyDf.columns + val c2s = snappyDf1.columns + assert(!c1s.sameElements(c2s)) + } catch { + case e: Exception => { + e.printStackTrace() + } + } + + sparkQuery = "SELECT last_value(stringcol) from sparktable" + snappyQuery = "SELECT last_value(stringcol) from columntable" + sparkDf = sparkSession.sql(s"$sparkQuery") + snappyDf = snc.sql(s"$snappyQuery") + validateResult(sparkDf, snappyDf) + + // throws below error + // org.apache.spark.sql.AnalysisException: + // The second argument of last should be a boolean literal.;; + + try { + sparkQuery = "SELECT last_value(stringcol, true) from sparktable" + snappyQuery = "SELECT last_value(stringcol, true) from columntable" + sparkDf = sparkSession.sql(s"$sparkQuery") + snappyDf = snc.sql(s"$snappyQuery") + validateResult(sparkDf, snappyDf) + } catch { + case e: Exception => { + e.printStackTrace() + } + } + } + + test("max") { + + var sparkQuery = "SELECT max(intcol) from sparktable" + var snappyQuery = "SELECT max(intcol) from rowtable" + var sparkDf = sparkSession.sql(s"$sparkQuery") + var snappyDf = snc.sql(s"$snappyQuery") + validateResult(sparkDf, snappyDf) + + sparkQuery = "SELECT max(intcol) from sparktable" + snappyQuery = "SELECT max(intcol) from columnTable" + sparkDf = sparkSession.sql(s"$sparkQuery") + snappyDf = snc.sql(s"$snappyQuery") + validateResult(sparkDf, snappyDf) + } + + test("min") { + + var sparkQuery = "SELECT min(intcol) from sparktable" + var snappyQuery = "SELECT min(intcol) from rowtable" + var sparkDf = sparkSession.sql(s"$sparkQuery") + var snappyDf = snc.sql(s"$snappyQuery") + validateResult(sparkDf, snappyDf) + + sparkQuery = "SELECT min(intcol) from sparktable" + snappyQuery = "SELECT min(intcol) from columnTable" + sparkDf = sparkSession.sql(s"$sparkQuery") + snappyDf = snc.sql(s"$snappyQuery") + validateResult(sparkDf, snappyDf) + } + + test("sum") { + + var sparkQuery = "SELECT sum(intcol) from sparktable" + var snappyQuery = "SELECT sum(intcol) from rowtable" + var sparkDf = sparkSession.sql(s"$sparkQuery") + var snappyDf = snc.sql(s"$snappyQuery") + validateResult(sparkDf, snappyDf) + + sparkQuery = "SELECT sum(intcol) from sparktable" + snappyQuery = "SELECT sum(intcol) from columnTable" + sparkDf = sparkSession.sql(s"$sparkQuery") + snappyDf = snc.sql(s"$snappyQuery") + validateResult(sparkDf, snappyDf) + } + + test("length") { + + var sparkQuery = "SELECT length(stringcol) from sparktable" + var snappyQuery = "SELECT length(stringcol) from columntable" + var sparkDf = sparkSession.sql(s"$sparkQuery") + var snappyDf = snc.sql(s"$snappyQuery") + validateResult(sparkDf, snappyDf) + + sparkQuery = "SELECT length(stringcol) from sparktable" + snappyQuery = "SELECT length(stringcol) from rowTable" + sparkDf = sparkSession.sql(s"$sparkQuery") + snappyDf = snc.sql(s"$snappyQuery") + validateResult(sparkDf, snappyDf) + + query = "SELECT length('Spark SQL')" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + } + + test("lower") { + + var sparkQuery = "SELECT lower(stringcol) from sparktable" + var snappyQuery = "SELECT lower(stringcol) from rowtable" + var sparkDf = sparkSession.sql(s"$sparkQuery") + var snappyDf = snc.sql(s"$snappyQuery") + validateResult(sparkDf, snappyDf) + + sparkQuery = "SELECT lower(stringcol) from sparktable" + snappyQuery = "SELECT lower(stringcol) from columnTable" + sparkDf = sparkSession.sql(s"$sparkQuery") + snappyDf = snc.sql(s"$snappyQuery") + validateResult(sparkDf, snappyDf) + + query = "SELECT lower('Spark SQL')" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT lower('abcABC123@#$%^&')" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + + } + + test("lcase") { + + var sparkQuery = "SELECT lcase(stringcol) from sparktable" + var snappyQuery = "SELECT lcase(stringcol) from rowtable" + var sparkDf = sparkSession.sql(s"$sparkQuery") + var snappyDf = snc.sql(s"$snappyQuery") + validateResult(sparkDf, snappyDf) + + sparkQuery = "SELECT lcase(stringcol) from sparktable" + snappyQuery = "SELECT lcase(stringcol) from columnTable" + sparkDf = sparkSession.sql(s"$sparkQuery") + snappyDf = snc.sql(s"$snappyQuery") + validateResult(sparkDf, snappyDf) + + query = "SELECT lcase('Spark SQL')" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT lcase('abcABC123@#$%^&')" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + } + + test("upper") { + + var sparkQuery = "SELECT upper(stringcol) from sparktable" + var snappyQuery = "SELECT upper(stringcol) from rowtable" + var sparkDf = sparkSession.sql(s"$sparkQuery") + var snappyDf = snc.sql(s"$snappyQuery") + validateResult(sparkDf, snappyDf) + + sparkQuery = "SELECT upper(stringcol) from sparktable" + snappyQuery = "SELECT upper(stringcol) from columnTable" + sparkDf = sparkSession.sql(s"$sparkQuery") + snappyDf = snc.sql(s"$snappyQuery") + validateResult(sparkDf, snappyDf) + + query = "SELECT upper('Spark SQL')" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT upper('abcABC123@#$%^&')" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + } + + test("ucase") { + + var sparkQuery = "SELECT ucase(stringcol) from sparktable" + var snappyQuery = "SELECT ucase(stringcol) from rowtable" + var sparkDf = sparkSession.sql(s"$sparkQuery") + var snappyDf = snc.sql(s"$snappyQuery") + validateResult(sparkDf, snappyDf) + + sparkQuery = "SELECT ucase(stringcol) from sparktable" + snappyQuery = "SELECT ucase(stringcol) from columnTable" + sparkDf = sparkSession.sql(s"$sparkQuery") + snappyDf = snc.sql(s"$snappyQuery") + validateResult(sparkDf, snappyDf) + + query = "SELECT ucase('Spark SQL')" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + + query = "SELECT ucase('abcABC123@#$%^&')" + sparkDf = sparkSession.sql(s"$query") + snappyDf = snc.sql(s"$query") + validateResult(sparkDf, snappyDf) + + } + + test("sort_array") { + + + query = "SELECT sort_array(array('b', 'd', 'c', 'a'))" + var snappyDf = snc.sql(s"$query") + var sparkDf = sparkSession.sql(s"$query") + validateResult(sparkDf, snappyDf) + + // ERROR : org.apache.spark.sql.AnalysisException: cannot resolve + // 'sort_array(array('b', 'd', 'c', 'a'), true)' due to data type + // mismatch: Sort order in second argument requires a boolean literal.;; + // 'Project [sort_array(array(ParamLiteral:0,468#1,b, + // ParamLiteral:1,468#1,d, ParamLiteral:2,468#1,c, + // ParamLiteral:3,468#1,a), ParamLiteral:4,468#4,true) AS RES#7890] + try { + query = "SELECT sort_array(array('b', 'd', 'c', 'a'), true) as res1" + var snappyDf1 = snc.sql(s"$query") + var sparkDf1 = sparkSession.sql(s"$query") + validateResult(sparkDf1, snappyDf1) + + val c1s = snappyDf.columns + val c2s = snappyDf1.columns + assert(!c1s.sameElements(c2s)) + } catch { + case e: Exception => { + e.printStackTrace() + } + } + + } + + test("collect_list") { + + var sparkQuery = "SELECT collect_list(stringcol) from sparktable" + var snappyQuery = "SELECT collect_list(stringcol) from rowtable" + var sparkDf = sparkSession.sql(s"$sparkQuery") + var snappyDf = snc.sql(s"$snappyQuery") + validateResult(sparkDf, snappyDf) + + sparkQuery = "SELECT collect_list(stringcol) from sparktable" + snappyQuery = "SELECT collect_list(stringcol) from columnTable" + sparkDf = sparkSession.sql(s"$sparkQuery") + snappyDf = snc.sql(s"$snappyQuery") + validateResult(sparkDf, snappyDf) + } + + test("collect_set") { + + var sparkQuery = "SELECT collect_set(stringcol) from sparktable" + var snappyQuery = "SELECT collect_set(stringcol) from rowtable" + var sparkDf = sparkSession.sql(s"$sparkQuery") + var snappyDf = snc.sql(s"$snappyQuery") + validateResult(sparkDf, snappyDf) + + sparkQuery = "SELECT collect_set(stringcol) from sparktable" + snappyQuery = "SELECT collect_set(stringcol) from columnTable" + sparkDf = sparkSession.sql(s"$sparkQuery") + snappyDf = snc.sql(s"$snappyQuery") + validateResult(sparkDf, snappyDf) + + sparkQuery = "SELECT collect_set(intcol) from sparktable" + snappyQuery = "SELECT collect_set(intcol) from rowtable" + sparkDf = sparkSession.sql(s"$sparkQuery") + snappyDf = snc.sql(s"$snappyQuery") + validateResult(sparkDf, snappyDf) + + sparkQuery = "SELECT collect_set(intcol) from sparktable" + snappyQuery = "SELECT collect_set(intcol) from columnTable" + sparkDf = sparkSession.sql(s"$sparkQuery") + snappyDf = snc.sql(s"$snappyQuery") + validateResult(sparkDf, snappyDf) + + } + + test("concat") { + + var sparkQuery = "SELECT concat(stringcol,intcol) from sparktable order by intcol asc" + var snappyQuery = "SELECT concat(stringcol,intcol) from rowtable order by intcol asc" + var sparkDf = sparkSession.sql(s"$sparkQuery") + var snappyDf = snc.sql(s"$snappyQuery") + validateResult(sparkDf, snappyDf) + + sparkQuery = "SELECT concat(stringcol,intcol) from sparktable order by intcol asc" + snappyQuery = "SELECT concat(stringcol,intcol) from columnTable order by intcol asc" + sparkDf = sparkSession.sql(s"$sparkQuery") + snappyDf = snc.sql(s"$snappyQuery") + validateResult(sparkDf, snappyDf) + + query = "SELECT concat('Spark', 'SQL')" + var snappyDF = snc.sql(s"$query") + sparkDf = sparkSession.sql(s"$query") + validateResult(sparkDf, snappyDF) + + query = "SELECT concat('Spark', 123)" + var snappyDF1 = snc.sql(s"$query") + sparkDf = sparkSession.sql(s"$query") + validateResult(sparkDf, snappyDF1) + + val c1s = snappyDF.columns + val c2s = snappyDF1.columns + assert(!c1s.sameElements(c2s)) + } + + test("concat_ws") { + + var sparkQuery = "SELECT concat_ws(' ',stringcol,intcol)" + + " from sparktable order by intcol asc" + var snappyQuery = "SELECT concat_ws(' ',stringcol,intcol) from rowtable order by intcol asc" + var sparkDf = sparkSession.sql(s"$sparkQuery") + var snappyDf = snc.sql(s"$snappyQuery") + validateResult(sparkDf, snappyDf) + + sparkQuery = "SELECT concat_ws(' ',stringcol,intcol) from sparktable order by intcol asc" + snappyQuery = "SELECT concat_ws(' ',stringcol,intcol) from columnTable order by intcol asc" + sparkDf = sparkSession.sql(s"$sparkQuery") + snappyDf = snc.sql(s"$snappyQuery") + validateResult(sparkDf, snappyDf) + + query = "SELECT concat_ws(' ','Spark', 'SQL')" + var snappyDF = snc.sql(s"$query") + sparkDf = sparkSession.sql(s"$query") + validateResult(sparkDf, snappyDF) + + query = "SELECT concat_ws(' ','Spark', 123)" + var snappyDF1 = snc.sql(s"$query") + sparkDf = sparkSession.sql(s"$query") + validateResult(sparkDf, snappyDF1) + + val c1s = snappyDF.columns + val c2s = snappyDF1.columns + assert(!c1s.sameElements(c2s)) + } + + test("elt") { + + query = "SELECT elt(1,'Spark','sql')" + var snappyDF = snc.sql(s"$query") + var sparkDf = sparkSession.sql(s"$query") + validateResult(sparkDf, snappyDF) + + query = "SELECT elt(2,'Spark', 123)" + var snappyDF1 = snc.sql(s"$query") + sparkDf = sparkSession.sql(s"$query") + validateResult(sparkDf, snappyDF1) + + val c1s = snappyDF.columns + val c2s = snappyDF1.columns + assert(!c1s.sameElements(c2s)) + } + + test("find_in_set") { + + query = "SELECT find_in_set('c','abc,b,ab,c,def')" + var snappyDF = snc.sql(s"$query") + var sparkDF = sparkSession.sql(s"$query") + validateResult(sparkDF, snappyDF) + + query = "SELECT find_in_set(1, '2,3,1')" + var snappyDF1 = snc.sql(s"$query") + var sparkDF1 = sparkSession.sql(s"$query") + validateResult(sparkDF1, snappyDF1) + + val c1s = snappyDF.columns + val c2s = snappyDF1.columns + assert(!c1s.sameElements(c2s)) + } + + test("format_number") { + + query = "SELECT format_number(12332.123456, 4)" + var snappyDF = snc.sql(s"$query") + var sparkDF = sparkSession.sql(s"$query") + validateResult(sparkDF, snappyDF) + + query = "SELECT format_number(12332.123456, 1)" + var snappyDF1 = snc.sql(s"$query") + var sparkDF1 = sparkSession.sql(s"$query") + validateResult(sparkDF1, snappyDF1) + + val c1s = snappyDF.columns + val c2s = snappyDF1.columns + assert(!c1s.sameElements(c2s)) + } + + test("format_string") { + + query = "SELECT format_string('Hello World %d %s', 100, 'days')" + var snappyDF = snc.sql(s"$query") + var sparkDF = sparkSession.sql(s"$query") + validateResult(sparkDF, snappyDF) + + query = "SELECT format_string('Hello World %d', 10)" + var snappyDF1 = snc.sql(s"$query") + var sparkDF1 = sparkSession.sql(s"$query") + validateResult(sparkDF1, snappyDF1) + + val c1s = snappyDF.columns + val c2s = snappyDF1.columns + assert(!c1s.sameElements(c2s)) + } + + test("initcap") { + + query = "SELECT initcap('sPark sql')" + var snappyDF = snc.sql(s"$query") + var sparkDF = sparkSession.sql(s"$query") + validateResult(sparkDF, snappyDF) + + query = "SELECT initcap('ssssPark sql')" + var snappyDF1 = snc.sql(s"$query") + var sparkDF1 = sparkSession.sql(s"$query") + validateResult(sparkDF1, snappyDF1) + + val c1s = snappyDF.columns + val c2s = snappyDF1.columns + assert(!c1s.sameElements(c2s)) + } + + test("instr") { + + query = "SELECT instr('SparkSQL', 'SQL')" + var snappyDF = snc.sql(s"$query") + var sparkDF = sparkSession.sql(s"$query") + validateResult(sparkDF, snappyDF) + + query = "SELECT instr('123abcABC', 'ab')" + var snappyDF1 = snc.sql(s"$query") + var sparkDF1 = sparkSession.sql(s"$query") + validateResult(sparkDF1, snappyDF1) + + val c1s = snappyDF.columns + val c2s = snappyDF1.columns + assert(!c1s.sameElements(c2s)) + } + + test("levenshtein") { + + query = "SELECT levenshtein('kitten', 'sitting')" + var snappyDF = snc.sql(s"$query") + var sparkDF = sparkSession.sql(s"$query") + validateResult(sparkDF, snappyDF) + + query = "SELECT levenshtein('Snappy', 'Spark')" + var snappyDF1 = snc.sql(s"$query") + var sparkDF1 = sparkSession.sql(s"$query") + validateResult(sparkDF1, snappyDF1) + + val c1s = snappyDF.columns + val c2s = snappyDF1.columns + assert(!c1s.sameElements(c2s)) + } + + test("locate") { + + query = "SELECT locate('bar', 'foobarbar', 5)" + var snappyDF = snc.sql(s"$query") + var sparkDF = sparkSession.sql(s"$query") + validateResult(sparkDF, snappyDF) + + query = "SELECT locate('abc', 'defghrih', 2)" + var snappyDF1 = snc.sql(s"$query") + var sparkDF1 = sparkSession.sql(s"$query") + validateResult(sparkDF1, snappyDF1) + + val c1s = snappyDF.columns + val c2s = snappyDF1.columns + assert(!c1s.sameElements(c2s)) + } + + test("lpad") { + + query = "SELECT lpad('hi', 5, '??')" + var snappyDF = snc.sql(s"$query") + var sparkDF = sparkSession.sql(s"$query") + validateResult(sparkDF, snappyDF) + + query = "SELECT lpad('hi', 1, '??')" + var snappyDF1 = snc.sql(s"$query") + var sparkDF1 = sparkSession.sql(s"$query") + validateResult(sparkDF1, snappyDF1) + + val c1s = snappyDF.columns + val c2s = snappyDF1.columns + assert(!c1s.sameElements(c2s)) + } + + test("add_months") { + + var sparkQuery = "SELECT add_months(datecol,1) from sparktable order by datecol asc" + var snappyQuery = "SELECT add_months(datecol,1) from rowtable order by datecol asc" + var sparkDf = sparkSession.sql(s"$sparkQuery") + var snappyDf = snc.sql(s"$snappyQuery") + validateResult(sparkDf, snappyDf) + + sparkQuery = "SELECT add_months(datecol,1) from sparktable order by datecol asc" + snappyQuery = "SELECT add_months(datecol,1) from columnTable order by datecol asc" + sparkDf = sparkSession.sql(s"$sparkQuery") + snappyDf = snc.sql(s"$snappyQuery") + validateResult(sparkDf, snappyDf) + + query = "SELECT add_months('2016-08-31', 1)" + var snappyDF = snc.sql(s"$query") + sparkDf = sparkSession.sql(s"$query") + validateResult(sparkDf, snappyDF) + + query = "SELECT add_months('2016-08-31', 0)" + var snappyDF1 = snc.sql(s"$query") + sparkDf = sparkSession.sql(s"$query") + validateResult(sparkDf, snappyDF1) + + val c1s = snappyDF.columns + val c2s = snappyDF1.columns + assert(!c1s.sameElements(c2s)) + } + + +} diff --git a/cluster/src/test/scala/org/apache/spark/sql/SingleNodeTest.scala b/cluster/src/test/scala/org/apache/spark/sql/SingleNodeTest.scala new file mode 100644 index 0000000000..ab55f944dd --- /dev/null +++ b/cluster/src/test/scala/org/apache/spark/sql/SingleNodeTest.scala @@ -0,0 +1,188 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ +package org.apache.spark.sql + +import io.snappydata.benchmark.TPCHColumnPartitionedTable +import io.snappydata.{PlanTest, SnappyFunSuite} +import org.scalatest.BeforeAndAfterEach +import org.apache.spark.rdd.ZippedPartitionsPartition +import org.apache.spark.sql.catalyst.plans.physical.SinglePartition +import org.apache.spark.sql.collection.MultiBucketExecutorPartition +import org.apache.spark.sql.execution.columnar.ColumnTableScan +import org.apache.spark.sql.execution.row.RowTableScan + +class SingleNodeTest extends SnappyFunSuite with PlanTest with BeforeAndAfterEach { + + override def beforeAll(): Unit = { + // System.setProperty("org.codehaus.janino.source_debugging.enable", "true") + System.setProperty("spark.sql.codegen.comments", "true") + System.setProperty("spark.testing", "true") + super.beforeAll() + } + + override def afterAll(): Unit = { + // System.clearProperty("org.codehaus.janino.source_debugging.enable") + System.clearProperty("spark.sql.codegen.comments") + System.clearProperty("spark.testing") + super.afterAll() + } + + test("Nodes Pruning for column table") { + val earlierValue = io.snappydata.Property.ColumnBatchSize.get(snc.sessionState.conf) + try { + io.snappydata.Property.ColumnBatchSize.set(snc.sessionState.conf, "1000") + SingleNodeTest.testNodesPruning(snc, "column") + } finally { + io.snappydata.Property.ColumnBatchSize.set(snc.sessionState.conf, earlierValue) + } + } + + test("Nodes Pruning for row table") { + SingleNodeTest.testNodesPruning(snc, "row") + } + + test("case when generation") { + + snc.sql("create table czec1(c1 varchar(10), c2 integer) using column") + (1 until 10).foreach(v => snc.sql(s"insert into czec1 values('$v', $v)")) + + val expected = Set("[3,3,3]", + "[2,Other,2]", + "[5,Other,5]" + ) + + val found = snc.sql( + "SELECT \"CZECB\".\"C1\" AS \"CB\"," + + " (CASE \"CZECB\".\"C1\" WHEN '3' THEN '3' ELSE 'Other' END) AS \"CB__group_\"," + + " SUM(\"CZECB\".\"C2\") AS \"sum_CC_ok\" " + + "FROM \"APP\".\"CZEC1\" \"CZECB\"" + + "WHERE ((\"CZECB\".\"C1\" IN ('2', '3', '5')) AND " + + "((\"CZECB\".\"C1\" IS NULL) OR " + + "(NOT ({fn LOCATE('1',{fn LCASE(\"CZECB\".\"C1\")},1)} > 0)))) " + + "GROUP BY 2, 1" + ).collect().map(_.toString).toSet + + assert(expected.equals(found)) + } +} + + +object SingleNodeTest { + + val query0 = "select * from orders where o_orderkey = 1 or o_orderkey = 1" + + val query1 = "select * from orders where o_orderkey = " + + val query2 = "select * from orders where o_orderkey = {fn substring('d1xxd2', 2, 1)} " + + val query3 = "select * from orders where o_orderkey = substring('acbc801xx', 5, 3) " + + val query4 = "select * from orders where o_orderkey = {fn trim(" + + "substring(' acbc801xx', length(' 12345'), length('801'))) }" + + val query5 = "select * from orders where o_orderkey = trim(" + + "substring(' acbc1410xx', length(' 12345'), length('1410'))) " + + val query6 = "select O_ORDERDATE, {fn TIMESTAMPADD(SQL_TSI_DAY," + + " {fn FLOOR((-1 * {fn DAYOFYEAR(O_ORDERDATE)} - 1))}, O_ORDERDATE)}" + + " from orders where O_ORDERKEY = 32" + + def testNodesPruning(snc: SnappyContext, tblProvider: String = "column"): Unit = { + // scalastyle:off println + val tpchDataPath = TPCHColumnPartitionedTable.getClass.getResource("/TPCH").getPath + val buckets_Order_Lineitem = "5" + TPCHColumnPartitionedTable.createPopulateOrderTable(snc, tpchDataPath, + isSnappy = true, buckets_Order_Lineitem, null, provider = tblProvider) + + tblProvider match { + case "row" => SmartConnectorFunctions.verifyRowTablePruning(snc) + case "column" => testColumnTablePruning(snc) + } + } + + private def testColumnTablePruning(snc: SnappyContext): Unit = { + + def validateSinglePartition(df: DataFrame, bucketId: Int): Unit = { + val plan = df.queryExecution.executedPlan.collectFirst { + case c: ColumnTableScan => c + } + + val scanRDD = plan.map(_.dataRDD). + getOrElse(throw new AssertionError("Expecting ColumnTable Scan")) + val partitions = scanRDD.partitions + assert(plan.get.outputPartitioning == SinglePartition) + assert(partitions.length == 1, { + val sb = new StringBuilder("Pruning not in effect ? partitions found ") + partitions.foreach(p => sb.append(p.index).append(",")) + sb.toString + }) + val bstr = partitions(0) match { + case zp: ZippedPartitionsPartition => zp.partitionValues.map { + case mb: MultiBucketExecutorPartition => mb.bucketsString + } + case _ => Nil + } + + // each BucketExecutor must have only one bucket. + // there are 2 BucketExecutor entries due to ZipPartion of RowBuffer. + assert(bstr.forall(_.toInt == bucketId), s"Expected $bucketId, found $bstr") + + val metrics = df.queryExecution.executedPlan.collectLeaves().head + .metrics + .filterKeys(k => + k.equals("columnBatchesSeen") || + k.equals("columnBatchesSkipped") + ).toList + + assert(metrics.head._2.value - metrics(1)._2.value == 1, + s"Stats Predicate filter not applied during scan ? \n" + + s" difference between" + + s" ${metrics.map(a => s"${a._2.value} (${a._1})").mkString(" and ")}" + + s" is expected to be exactly 1.") + } + + validateSinglePartition(executeQuery(snc, query1 + 1, 1), 4) + validateSinglePartition(executeQuery(snc, query1 + 32, 32), 0) + validateSinglePartition(executeQuery(snc, query1 + 801, 801), 4) + // repeating the query deliberately + validateSinglePartition(executeQuery(snc, query1 + 801, 801), 4) + validateSinglePartition(executeQuery(snc, query1 + 1408, 1408), 0) + validateSinglePartition(executeQuery(snc, query1 + 1409, 1409), 2) + validateSinglePartition(executeQuery(snc, query1 + 1410, 1410), 0) + validateSinglePartition(executeQuery(snc, query1 + 1796, 1796), 4) + validateSinglePartition(executeQuery(snc, query1 + 801, 801), 4) + executeQuery(snc, query1 + "'1'", 1) + executeQuery(snc, query1 + "'32'", 32) + executeQuery(snc, query2, 1) + executeQuery(snc, query3, 801) + executeQuery(snc, query4, 801) + executeQuery(snc, query5, 1410) + + val df = executeQuery(snc, query6, 32, false) + val r = df.collect()(0) + assert(r.getDate(0).toString.equals("1995-07-16")) + assert(r.getDate(1).toString.equals("1994-12-30")) + } + + private def executeQuery(snc: SnappyContext, sql: String, orderKey: Int, + doAssert: Boolean = true) : DataFrame = { + val df = snc.sql(sql) + if(doAssert) assert(df.collect()(0).getLong(0) == orderKey) + df + } + +} diff --git a/cluster/src/test/scala/org/apache/spark/sql/SnappySQLQuerySuite.scala b/cluster/src/test/scala/org/apache/spark/sql/SnappySQLQuerySuite.scala new file mode 100644 index 0000000000..b676cf311e --- /dev/null +++ b/cluster/src/test/scala/org/apache/spark/sql/SnappySQLQuerySuite.scala @@ -0,0 +1,600 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ +/* + * Test for SPARK-10316 taken from Spark's DataFrameSuite having license as below. + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql + +import io.snappydata.Property.PlanCaching +import io.snappydata.{Property, SnappyFunSuite} +import org.scalatest.Matchers._ + +import org.apache.spark.sql.catalyst.encoders.{ExpressionEncoder, RowEncoder} +import org.apache.spark.sql.execution.FilterExec +import org.apache.spark.sql.execution.aggregate.{HashAggregateExec, SnappyHashAggregateExec} +import org.apache.spark.sql.execution.benchmark.ColumnCacheBenchmark +import org.apache.spark.sql.execution.joins.HashJoinExec +import org.apache.spark.sql.functions.{bround, rand, round} +import org.apache.spark.sql.test.SQLTestData.TestData2 + +class SnappySQLQuerySuite extends SnappyFunSuite { + + private lazy val session: SnappySession = snc.snappySession + private val idPattern = "(,[0-9]+)?#[0-9L]+".r + + // Ported test from Spark + test("SNAP-1885 : left semi greater than predicate and equal operator") { + val df = snc.createDataFrame(snc.sparkContext.parallelize( + TestData2(1, 1) :: + TestData2(1, 2) :: + TestData2(2, 1) :: + TestData2(2, 2) :: + TestData2(3, 1) :: + TestData2(3, 2) :: Nil, 2)) + df.write.format("row").saveAsTable("testData2") + + checkAnswer( + snc.sql("SELECT * FROM testData2 x LEFT SEMI JOIN testData2 y " + + "ON x.b = y.b and x.a >= y.a + 2"), + Seq(Row(3, 1), Row(3, 2)) + ) + + checkAnswer( + snc.sql("SELECT * FROM testData2 x LEFT SEMI JOIN testData2 y " + + "ON x.b = y.a and x.a >= y.b + 1"), + Seq(Row(2, 1), Row(2, 2), Row(3, 1), Row(3, 2)) + ) + } + + test("SNAP-1884 Join with temporary table not returning rows") { + val df = snc.createDataFrame(snc.sparkContext.parallelize( + LowerCaseData(1, "a") :: + LowerCaseData(2, "b") :: + LowerCaseData(3, "c") :: + LowerCaseData(4, "d") :: Nil)) + df.write.format("row").saveAsTable("lowerCaseData") + snc.sql("SELECT DISTINCT n FROM lowerCaseData ORDER BY n DESC") + .limit(2) + .createOrReplaceTempView("subset1") + snc.sql("SELECT DISTINCT n FROM lowerCaseData ORDER BY n ASC") + .limit(2) + .createOrReplaceTempView("subset2") + checkAnswer( + snc.sql("SELECT * FROM lowerCaseData INNER JOIN subset1 ON " + + "subset1.n = lowerCaseData.n ORDER BY lowerCaseData.n"), + Row(3, "c", 3) :: + Row(4, "d", 4) :: Nil) + + checkAnswer( + snc.sql("SELECT * FROM lowerCaseData INNER JOIN subset2 " + + "ON subset2.n = lowerCaseData.n ORDER BY lowerCaseData.n"), + Row(1, "a", 1) :: + Row(2, "b", 2) :: Nil) + + snc.sql("set spark.sql.autoBroadcastJoinThreshold=-1") + df.write.format("column").saveAsTable("collowerCaseData") + + snc.sql("SELECT DISTINCT n FROM collowerCaseData ORDER BY n DESC") + .limit(2) + .createOrReplaceTempView("colsubset1") + snc.sql("SELECT DISTINCT n FROM collowerCaseData ORDER BY n ASC") + .limit(2) + .createOrReplaceTempView("colsubset2") + checkAnswer( + snc.sql("SELECT * FROM collowerCaseData INNER JOIN colsubset1 ON " + + "colsubset1.n = collowerCaseData.n ORDER BY collowerCaseData.n"), + Row(3, "c", 3) :: + Row(4, "d", 4) :: Nil) + + checkAnswer( + snc.sql("SELECT * FROM collowerCaseData INNER JOIN colsubset2 " + + "ON colsubset2.n = collowerCaseData.n ORDER BY collowerCaseData.n"), + Row(1, "a", 1) :: + Row(2, "b", 2) :: Nil) + } + + import session.implicits._ + + test("SNAP-1840 -> uncorrelated scalar subquery") { + + val df = Seq((1, "one"), (2, "two"), (3, "three")).toDF("key", "value") + df.write.format("row").saveAsTable("subqueryData") + + checkAnswer( + session.sql("select -(select max(key) from subqueryData)"), + Array(Row(-3)) + ) + + checkAnswer( + session.sql("select (select key from subqueryData where key > 2 order by key limit 1) + 1"), + Array(Row(4)) + ) + + checkAnswer( + session.sql("select (select value from subqueryData limit 0)"), + Array(Row(null)) + ) + + checkAnswer( + session.sql("select (select min(value) from subqueryData" + + " where key = (select max(key) from subqueryData) - 1)"), + Array(Row("two")) + ) + session.dropTable("subqueryData", ifExists = true) + } + + test("NOT EXISTS predicate subquery") { + val row = identity[(java.lang.Integer, java.lang.Double)] _ + + lazy val l = Seq( + row(1, 2.0), + row(1, 2.0), + row(2, 1.0), + row(2, 1.0), + row(3, 3.0), + row(null, null), + row(null, 5.0), + row(6, null)).toDF("a", "b") + + lazy val r = Seq( + row(2, 3.0), + row(2, 3.0), + row(3, 2.0), + row(4, 1.0), + row(null, null), + row(null, 5.0), + row(6, null)).toDF("c", "d") + + l.write.format("row").saveAsTable("l") + r.write.format("row").saveAsTable("r") + + checkAnswer( + session.sql("select * from l where not exists (select * from r where l.a = r.c)"), + Row(1, 2.0) :: Row(1, 2.0) :: Row(null, null) :: Row(null, 5.0) :: Nil) + checkAnswer( + session.sql("select * from l where not exists " + + "(select * from r where l.a = r.c and l.b < r.d)"), + Row(1, 2.0) :: Row(1, 2.0) :: Row(3, 3.0) :: + Row(null, null) :: Row(null, 5.0) :: Row(6, null) :: Nil) + + session.dropTable("l", ifExists = true) + session.dropTable("r", ifExists = true) + } + + // taken from same test in Spark's DataFrameSuite + test("SPARK-10316: allow non-deterministic expressions to project in PhysicalScan") { + session.sql("create table rowTable (id long, id2 long) using row") + session.range(1, 11).select($"id", $"id" * 2).write.insertInto("rowTable") + val input = session.table("rowTable") + + val df = input.select($"id", rand(0).as('r)) + val result = df.as("a").join(df.filter($"r" < 0.5).as("b"), $"a.id" === $"b.id").collect() + result.foreach { row => + assert(row.getDouble(1) - row.getDouble(3) === 0.0 +- 0.001) + } + session.sql("drop table rowTable") + } + + test("AQP-292 snappy plan generation failure for aggregation on group by column") { + session.sql("create table testTable (id long, tag string) using column options (buckets '2')") + session.range(100000).selectExpr( + "id", "concat('tag', cast ((id >> 6) as string)) as tag").write.insertInto("testTable") + val query = "select tag, count(tag) c from testTable group by tag order by c desc" + val rs = session.sql(query) + // snappy aggregation should have been used for this query + val plan = rs.queryExecution.executedPlan + assert(plan.find(_.isInstanceOf[SnappyHashAggregateExec]).isDefined) + assert(plan.find(_.isInstanceOf[HashAggregateExec]).isEmpty) + // collect the result to force default hashAggregateSize property take effect + implicit val encoder: ExpressionEncoder[Row] = RowEncoder(rs.schema) + val result = session.createDataset(rs.collect().toSeq) + session.sql(s"set ${Property.HashAggregateSize} = -1") + try { + val ds = session.sql(query) + val plan = ds.queryExecution.executedPlan + assert(plan.find(_.isInstanceOf[SnappyHashAggregateExec]).isEmpty) + assert(plan.find(_.isInstanceOf[HashAggregateExec]).isDefined) + checkAnswer(result, ds.collect()) + } finally { + session.sql(s"set ${Property.HashAggregateSize} = 0") + } + } + + private def getUpdateCount(df: DataFrame, tableType: String): Long = { + // row table execution without keys is done directly on store that returns integer counts + if (tableType == "row") df.collect().map(_.getInt(0)).sum + else df.collect().map(_.getLong(0)).sum + } + + test("Double exists and update exists sub-query") { + val snc = new SnappySession(sc) + for (tableType <- Seq("row", "column")) { + snc.sql("create table r1(col1 INT, col2 STRING, col3 String, col4 Int)" + + s" using $tableType") + snc.sql("create table r2(col1 INT, col2 STRING, col3 String, col4 Int)" + + s" using $tableType") + snc.sql("create table r3(col1 INT, col2 STRING, col3 String, col4 Int)" + + s" using $tableType") + + snc.insert("r1", Row(1, "1", "1", 100)) + snc.insert("r1", Row(2, "2", "2", 2)) + snc.insert("r1", Row(4, "4", "4", 4)) + snc.insert("r1", Row(7, "7", "7", 4)) + + snc.insert("r2", Row(1, "1", "1", 1)) + snc.insert("r2", Row(2, "2", "2", 2)) + snc.insert("r2", Row(3, "3", "3", 3)) + + snc.insert("r3", Row(1, "1", "1", 1)) + snc.insert("r3", Row(2, "2", "2", 2)) + snc.insert("r3", Row(4, "4", "4", 4)) + + val df = snc.sql("select * from r1 where " + + "(exists (select col1 from r2 where r2.col1=r1.col1) " + + "or exists(select col1 from r3 where r3.col1=r1.col1))") + + df.collect() + checkAnswer(df, Seq(Row(1, "1", "1", 100), + Row(2, "2", "2", 2), Row(4, "4", "4", 4))) + + var updateSql = "update r1 set col1 = 100 where exists " + + s"(select 1 from r1 t where t.col1 = r1.col1 and t.col1 = 4)" + assert(getUpdateCount(snc.sql(updateSql), tableType) == 1) + assert(getUpdateCount(snc.sql(updateSql), tableType) == 0) + + updateSql = "update r1 set col1 = 200 where exists " + + s"(select 1 from r2 t where t.col1 = r1.col1 and t.col1 = 2)" + assert(getUpdateCount(snc.sql(updateSql), tableType) == 1) + assert(getUpdateCount(snc.sql(updateSql), tableType) == 0) + + checkAnswer(snc.sql("select * from r1"), Seq(Row(1, "1", "1", 100), + Row(200, "2", "2", 2), Row(100, "4", "4", 4), Row(7, "7", "7", 4))) + + snc.sql("drop table r1") + snc.sql("drop table r2") + snc.sql("drop table r3") + } + } + + test("SNAP-2387") { + val numRows = 400000 + val snappy = this.snc.snappySession + val spark = new SparkSession(snappy.sparkContext) + var ds = snappy.range(numRows).selectExpr( + "id as fare_amount", "(rand() * 1000.0) as tip_amount") + ds.createOrReplaceTempView("taxi_trip_fare") + ds.cache() + assert(ds.count() === numRows) + ds = spark.internalCreateDataFrame(ds.queryExecution.toRdd, ds.schema) + ds.createOrReplaceTempView("taxi_trip_fare") + ds.cache() + assert(ds.count() === numRows) + + val q1 = "SELECT (ROUND( (tip_amount / fare_amount) * 100)) tip_pct " + + "FROM taxi_trip_fare WHERE fare_amount > 0.00 and tip_amount < 50 " + + "GROUP BY (ROUND( tip_amount / fare_amount * 100 ))" + val q2 = "SELECT (ROUND( (tip_amount / fare_amount) * (90 + 10))) tip_pct " + + "FROM taxi_trip_fare WHERE fare_amount > 0.00 and tip_amount < 50 " + + "GROUP BY (ROUND( tip_amount / fare_amount * (90 + 10)))" + val q3 = "SELECT (ROUND((tip_amount / fare_amount) * 100)) tip_pct " + + "FROM taxi_trip_fare WHERE fare_amount > 0.00 and tip_amount < 50 " + + "GROUP BY (ROUND(tip_amount / fare_amount * 100)) " + + "ORDER BY (ROUND(tip_amount / fare_amount * 100)) limit 30" + val q4 = "SELECT (ROUND((tip_amount / fare_amount) * (90 + 10))) tip_pct " + + "FROM taxi_trip_fare WHERE fare_amount > 0.00 and tip_amount < 50 " + + "GROUP BY (ROUND(tip_amount / fare_amount * (90 + 10))) " + + "ORDER BY (ROUND(tip_amount / fare_amount * (90 + 10))) limit 10" + ColumnCacheBenchmark.collect(snappy.sql(q1), spark.sql(q1).collect()) + ColumnCacheBenchmark.collect(snappy.sql(q2), spark.sql(q2).collect()) + ColumnCacheBenchmark.collect(snappy.sql(q3), spark.sql(q3).collect()) + ColumnCacheBenchmark.collect(snappy.sql(q4), spark.sql(q4).collect()) + + // check with different values of constants + val q5 = "SELECT (ROUND( (tip_amount / fare_amount) * 99)) tip_pct " + + "FROM taxi_trip_fare WHERE fare_amount > 0.00 and tip_amount < 40 " + + "GROUP BY (ROUND( tip_amount / fare_amount * 99 ))" + val q6 = "SELECT (ROUND( (tip_amount / fare_amount) * (40 + 68))) tip_pct " + + "FROM taxi_trip_fare WHERE fare_amount > 0.00 and tip_amount < 55 " + + "GROUP BY (ROUND( tip_amount / fare_amount * (40 + 68)))" + val q7 = "SELECT (ROUND((tip_amount / fare_amount) * 98)) tip_pct " + + "FROM taxi_trip_fare WHERE fare_amount > 0.00 and tip_amount < 60 " + + "GROUP BY (ROUND(tip_amount / fare_amount * 98)) " + + "ORDER BY (ROUND(tip_amount / fare_amount * 98)) limit 30" + val q8 = "SELECT (ROUND((tip_amount / fare_amount) * (32 + 60))) tip_pct " + + "FROM taxi_trip_fare WHERE fare_amount > 0.00 and tip_amount < 45 " + + "GROUP BY (ROUND(tip_amount / fare_amount * (32 + 60))) " + + "ORDER BY (ROUND(tip_amount / fare_amount * (32 + 60))) limit 10" + ColumnCacheBenchmark.collect(snappy.sql(q5), spark.sql(q5).collect()) + ColumnCacheBenchmark.collect(snappy.sql(q6), spark.sql(q6).collect()) + ColumnCacheBenchmark.collect(snappy.sql(q7), spark.sql(q7).collect()) + ColumnCacheBenchmark.collect(snappy.sql(q8), spark.sql(q8).collect()) + + // check error cases + val q9 = "SELECT (ROUND( (tip_amount / fare_amount) * 108)) tip_pct " + + "FROM taxi_trip_fare WHERE fare_amount > 0.00 and tip_amount < 55 " + + "GROUP BY (ROUND( tip_amount / fare_amount * (40 + 68)))" + val q10 = "SELECT (ROUND((tip_amount / fare_amount) * 98)) tip_pct " + + "FROM taxi_trip_fare WHERE fare_amount > 0.00 and tip_amount < 60 " + + "GROUP BY (ROUND(tip_amount / fare_amount * 100)) " + + "ORDER BY (ROUND(tip_amount / fare_amount * 100)) limit 30" + val q11 = "SELECT (ROUND((tip_amount / fare_amount) * (40 + 68))) tip_pct " + + "FROM taxi_trip_fare WHERE fare_amount > 0.00 and tip_amount < 60 " + + "GROUP BY (ROUND(tip_amount / fare_amount * (40 + 68)) " + + "ORDER BY (ROUND(tip_amount / fare_amount * 108)) limit 30" + intercept[AnalysisException](snappy.sql(q9)) + intercept[AnalysisException](snappy.sql(q10)) + intercept[AnalysisException](snappy.sql(q11)) + } + + test("test SNAP-2508") { + + snc.sql("create table asif_test.gl_account (glAccountNumber clob)") + snc.sql("create table asif_test.gl_account_text (glAccountNumber clob)") + + snc.sql("insert into asif_test.gl_account values('0660130001')") + snc.sql("insert into asif_test.gl_account_text values('0660130001')") + + snc.sql(s"create table leaf1 (glaccountnumber clob, parentnodeid clob)") + snc.sql(s"insert into leaf1 values('0660130001', '6010100'), ('ABCDEF', '50000')") + + snc.sql("create table asif_test.node_hierarchy(nodeid clob)") + snc.sql("insert into asif_test.node_hierarchy values('6010100')") + + + snc.sql("create table asif_test.node_object_mapping(nodeid clob, " + + "glaccountnumber clob, fromvalue clob)") + + snc.sql("insert into asif_test.node_object_mapping values('6010100', '0660130001', '8')") + + + snc.sql("CREATE OR REPLACE VIEW suranjan_t2 as SELECT" + + " a.glaccountnumber" + + " FROM asif_test.gl_account a " + + " LEFT OUTER JOIN asif_test.gl_account_text b " + + " ON a.glAccountNumber = b.glAccountNumber") + + snc.sql("create or replace view suranjan_t as SELECT" + + " a.nodeid" + + ", c.glaccountnumber" + + " FROM asif_test.node_hierarchy a" + + " LEFT JOIN asif_test.node_object_mapping b" + + " ON a.nodeid = b.nodeid" + + " LEFT JOIN suranjan_t2 c" + + " ON c.glaccountnumber between b.fromvalue and b.fromvalue") + + + snc.sql("set spark.sql.autoBroadcastJoinThreshold=-1") + snc.sql("set snappydata.sql.disableHashJoin=false") + + val result = snc.sql("select " + + " LeafLevel.glaccountnumber" + + " , HLVL06.nodeid AS HIER_LEVEL_06_NODE_ID" + + " FROM leaf1 LeafLevel" + + " JOIN suranjan_t HLVL06" + + " On (HLVL06.nodeid = LeafLevel.parentnodeid)") + + // scalastyle:off + // println(result.queryExecution.optimizedPlan) + // println(result.queryExecution.executedPlan) + // scalastyle:on + + // result.show() + assert(result.count == 1) + } + + test("IS DISTINCT, IS NOT DISTINCT and <=> expressions") { + val snappy = this.snc.snappySession + val df = snappy.sql("select (case when id & 1 = 0 then null else id end) as a, " + + "(case when id & 2 = 0 then null else id end) b from range(1000)") + val rs1 = df.selectExpr("a is not distinct from b").collect() + val rs2 = df.selectExpr( + "(a is not null AND b is not null AND a = b) OR (a is null AND b is null)").collect() + assert(rs1.length === 1000) + assert(rs2.length === 1000) + assert(rs1 === rs2) + assert(df.selectExpr("a is not distinct from b").collect() === df.selectExpr( + "a <=> b").collect()) + + assert(df.selectExpr("a is distinct from b").collect() === df.selectExpr( + "(a is not null AND b is not null AND a <> b) OR (a is null AND b is not null) OR " + + "(a is not null AND b is null)").collect()) + assert(df.selectExpr("a IS DISTINCT FROM b").collect() === df.selectExpr( + "NOT (a <=> b)").collect()) + } + + test("Push down TPCH Q19") { + session.sql("set spark.sql.autoBroadcastJoinThreshold=-1") + session.sql("set snappydata.sql.planCaching=true").collect() + val planCaching = PlanCaching.get(snc.sessionState.conf) + PlanCaching.set(snc.sessionState.conf, true) + try { + // this loop exists because initial implementation had a problem + // in RefParamLiteral.hashCode() that caused it to fail once in 2-3 runs + for (_ <- 1 to 4) { + testTPCHQ19() + } + } finally { + session.sql(s"set spark.sql.autoBroadcastJoinThreshold=${10L * 1024 * 1024}") + PlanCaching.set(snc.sessionState.conf, planCaching) + } + } + + private def testTPCHQ19(): Unit = { + // check common sub-expression elimination in query leading to push down + // of filters should not be inhibited due to ParamLiterals + import session.implicits._ + + session.sql("create table ct1 (id long, data string) using column") + session.sql("create table ct2 (id long, data string) using column") + session.sql("insert into ct1 select id, 'data' || id from range(100000)") + session.sql("insert into ct2 select id, 'data' || id from range(100000)") + + var ds = session.sql("select ct1.id, ct2.data from ct1 join ct2 on (ct1.id = ct2.id) where " + + "(ct1.id < 1000 and ct2.data = 'data100') or (ct1.id < 1000 and ct1.data = 'data100')") + var analyzedFilter = "Filter (((id#0 < cast(ParamLiteral:0#0,1000 as bigint)) && " + + "(data#0 = ParamLiteral:1#0,data100)) || ((id#0 < cast(ParamLiteral:2#0,1000 as " + + "bigint)) && (data#0 = ParamLiteral:3#0,data100)))" + + def expectedTree: String = + s"""Project [id#0, data#0] + |+- $analyzedFilter + | +- Join Inner, (id#0 = id#0) + | :- SubqueryAlias ct1 + | : +- Relation[id#0,data#0] ColumnFormatRelation[app.ct1] + | +- SubqueryAlias ct2 + | +- Relation[id#0,data#0] ColumnFormatRelation[app.ct2] + |""".stripMargin + assert(idPattern.replaceAllIn(ds.queryExecution.analyzed.treeString, "#0") === expectedTree) + assert(ds.collect() === Array(Row(100L, "data100"))) + + // check filter push down in the plan + var filters = ds.queryExecution.executedPlan.collect { + case f: FilterExec => assert(f.child.nodeName === "ColumnTableScan"); f + } + assert(filters.length === 2) + assert(filters.forall(_.toString.contains("<"))) + // check pushed down filters should not be in HashJoin + var joins = ds.queryExecution.executedPlan.collect { + case j: HashJoinExec => j + } + assert(joins.length === 1) + assert(joins.head.condition.isDefined) + var condString = joins.head.condition.get.toString() + assert(condString.contains("data#")) + assert(!condString.contains("id#")) + + // similar query but different values in the two positions should lead to a different + // plan with no filter push down + ds = session.sql("select ct1.id, ct2.data from ct1 join ct2 on (ct1.id = ct2.id) where " + + "(ct1.id < 1000 and ct2.data = 'data100') or (ct1.id < 20 and ct1.data = 'data100')") + analyzedFilter = "Filter (((id#0 < cast(ParamLiteral:0#0,1000 as bigint)) && " + + "(data#0 = ParamLiteral:1#0,data100)) || ((id#0 < cast(ParamLiteral:2#0,20 as " + + "bigint)) && (data#0 = ParamLiteral:3#0,data100)))" + assert(idPattern.replaceAllIn(ds.queryExecution.analyzed.treeString, "#0") === expectedTree) + assert(ds.collect() === Array(Row(100L, "data100"))) + + // check no filter push down in the plan + filters = ds.queryExecution.executedPlan.collect { + case f: FilterExec => assert(f.child.nodeName === "ColumnTableScan"); f + } + assert(filters.length === 2) + assert(filters.forall(!_.toString.contains("<"))) + // check all filters should be in HashJoin + joins = ds.queryExecution.executedPlan.collect { + case j: HashJoinExec => j + } + assert(joins.length === 1) + assert(joins.head.condition.isDefined) + condString = joins.head.condition.get.toString() + assert(condString.contains("data#")) + assert(condString.contains("id#")) + + ds = session.sql("select ct1.id, ct2.data from ct1 join ct2 on (ct1.id = ct2.id) where " + + "(ct1.id < 10 and ct2.data = 'data100') or (ct1.id < 10 and ct1.data = 'data100')") + assert(ds.collect().length === 0) + // check filter push down in the plan + filters = ds.queryExecution.executedPlan.collect { + case f: FilterExec => assert(f.child.nodeName === "ColumnTableScan"); f + } + assert(filters.length === 2) + assert(filters.forall(_.toString.contains("<"))) + ds = session.sql("select ct1.id, ct2.data from ct1 join ct2 on (ct1.id = ct2.id) where " + + "(ct1.id < 10 and ct2.data = 'data100') or (ct1.id < 20 and ct1.data = 'data100')") + assert(ds.collect().length === 0) + // check no filter push down in the plan + filters = ds.queryExecution.executedPlan.collect { + case f: FilterExec => assert(f.child.nodeName === "ColumnTableScan"); f + } + assert(filters.length === 2) + assert(filters.forall(!_.toString.contains("<"))) + + session.sql("drop table ct1") + session.sql("drop table ct2") + + // check for some combinations of repeated constants + val df = Seq(5, 55, 555).map(Tuple1(_)).toDF("a") + checkAnswer( + df.select(round('a), round('a, -1), round('a, -2)), + Seq(Row(5, 10, 0), Row(55, 60, 100), Row(555, 560, 600)) + ) + checkAnswer( + df.select(bround('a), bround('a, -1), bround('a, -2)), + Seq(Row(5, 0, 0), Row(55, 60, 100), Row(555, 560, 600)) + ) + + val pi = BigDecimal("3.1415") + checkAnswer( + session.sql(s"SELECT round($pi, -3), round($pi, -2), round($pi, -1), " + + s"round($pi, 0), round($pi, 1), round($pi, 2), round($pi, 3)"), + Seq(Row(BigDecimal("0E3"), BigDecimal("0E2"), BigDecimal("0E1"), BigDecimal(3), + BigDecimal("3.1"), BigDecimal("3.14"), BigDecimal("3.142"))) + ) + checkAnswer( + session.sql(s"SELECT bround($pi, -3), bround($pi, -2), bround($pi, -1), " + + s"bround($pi, 0), bround($pi, 1), bround($pi, 2), bround($pi, 3)"), + Seq(Row(BigDecimal("0E3"), BigDecimal("0E2"), BigDecimal("0E1"), BigDecimal(3), + BigDecimal("3.1"), BigDecimal("3.14"), BigDecimal("3.142"))) + ) + + // more than 4 constants to replace + val pi1 = pi + 1 + val pi2 = pi + 2 + val pi3 = pi + 3 + val pi4 = pi + 4 + val pi5 = pi + 5 + val pi6 = pi + 6 + checkAnswer( + session.sql(s"SELECT round($pi, -3), round($pi6, -2), round($pi, -1), " + + s"round($pi, 0), round($pi1, 1), round($pi, 2), round($pi2, 3), " + + s"round($pi3, 1), round($pi1, 2), round($pi4, 2), round($pi, 3), " + + s"round($pi5, 3), round($pi4, 0), round($pi, 1), round($pi6, 2)"), + Seq(Row(BigDecimal("0E3"), BigDecimal("0E2"), BigDecimal("0E1"), + BigDecimal(3), BigDecimal("4.1"), BigDecimal("3.14"), BigDecimal("5.142"), + BigDecimal("6.1"), BigDecimal("4.14"), BigDecimal("7.14"), BigDecimal("3.142"), + BigDecimal("8.142"), BigDecimal(7), BigDecimal("3.1"), BigDecimal("9.14"))) + ) + checkAnswer( + session.sql(s"SELECT bround($pi, -3), bround($pi6, -2), bround($pi, -1), " + + s"bround($pi, 0), bround($pi1, 1), bround($pi, 2), bround($pi2, 3), " + + s"bround($pi3, 1), bround($pi1, 2), bround($pi4, 2), bround($pi, 3), " + + s"bround($pi5, 3), bround($pi4, 0), bround($pi, 1), bround($pi6, 2)"), + Seq(Row(BigDecimal("0E3"), BigDecimal("0E2"), BigDecimal("0E1"), + BigDecimal(3), BigDecimal("4.1"), BigDecimal("3.14"), BigDecimal("5.142"), + BigDecimal("6.1"), BigDecimal("4.14"), BigDecimal("7.14"), BigDecimal("3.142"), + BigDecimal("8.142"), BigDecimal(7), BigDecimal("3.1"), BigDecimal("9.14"))) + ) + } +} + + +case class LowerCaseData(n: Int, l: String) diff --git a/cluster/src/test/scala/org/apache/spark/sql/execution/DataGenerator.scala b/cluster/src/test/scala/org/apache/spark/sql/execution/DataGenerator.scala new file mode 100644 index 0000000000..36f20728e0 --- /dev/null +++ b/cluster/src/test/scala/org/apache/spark/sql/execution/DataGenerator.scala @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ +package org.apache.spark.sql.execution + +import org.apache.spark.sql.types._ +import org.apache.spark.sql.{DataFrame, SparkSession} + +/** + * Utility to generate data given a schema. This is a very primitive version. + * Can be enhanced to cater to joins/group by etc. + */ +object DataGenerator { + + val numVals = 100 + val numTypes = 100 + val numRoles = 100 + val numGroups = 100 + val numNames = 1000 + val numIds = 5000 + + val numIters = 10 + + val numElems1 = 12 * numVals * numIds + val numElems2 = 4 * numIds + val numElems3 = numTypes * numTypes + + def generateDataFrame(sc: SparkSession, schema: StructType, numRows: Long): DataFrame = { + val rows = schema.fields.zipWithIndex.map { case (f, i) => + randomValue(f.dataType, i) + } + sc.range(numRows).selectExpr(rows: _*) + } + + def randomValue(fieldType: DataType, index: Int): String = { + fieldType match { + case IntegerType => s"(id % $numIds) as intval$index" + case ByteType => s"cast((id % $numTypes) as byte) as byteval$index" + case LongType => s"cast((id % $numTypes) as long) as longval$index" + case _: DecimalType => s"cast ((rand() * 100.0) as decimal(28, 10) as decval$index" + case DoubleType => s"cast((id % $numTypes) as double) as doubleval$index" + case TimestampType => s"((id % 2) + 2014) as timeval$index" + case BooleanType => s"((id % 2) == 0) as boolval$index" + case StringType => s"cast((id % $numNames) as string) as strval$index" + case other: DataType => + throw new UnsupportedOperationException(s"Unexpected data type $other") + } + } + + def generateDataFrameWithUnique(sc: SparkSession, + schema: StructType, + uniqueRange : (Long, Long), + uniqueFields : Seq[String]): DataFrame = { + val rows = schema.fields.zipWithIndex.map { case (f, i) => + randomValueWithUnique(f.dataType, i , uniqueFields.contains(f.name), uniqueRange) + } + val range = uniqueRange._2 - uniqueRange._1 + sc.range(range).selectExpr(rows: _*) + } + + def randomValueWithUnique(fieldType: DataType, index: Int, + isUnique: Boolean, uniqueRange : (Long, Long)): String = { + val lowerRangeVal = uniqueRange._1 + fieldType match { + case IntegerType => + if (isUnique) s"(id + ${lowerRangeVal}) as intval$index" + else s"id as intval$index" + case ByteType => if (isUnique) s"cast((id + $lowerRangeVal) as byte) as byteval$index" + else s"cast(id as byte) as byteval$index" + case LongType => + if (isUnique) s"cast((id + $lowerRangeVal) as long) as longval$index" + else s"cast(id as long) as longval$index" + case _: DecimalType => s"cast ((rand() * 100.0) as decimal(28, 10) as decval$index" + case DoubleType => s"cast(id as double) as doubleval$index" + case TimestampType => s"((id % 2) + 2014) as timeval$index" + case BooleanType => s"((id % 2) == 0) as boolval$index" + case StringType => s"cast(id as string) as strval$index" + case other: DataType => + throw new UnsupportedOperationException(s"Unexpected data type $other") + } + } +} diff --git a/cluster/src/test/scala/org/apache/spark/sql/execution/SnappyTableMutableAPISuite.scala b/cluster/src/test/scala/org/apache/spark/sql/execution/SnappyTableMutableAPISuite.scala new file mode 100644 index 0000000000..33e8c3d0b6 --- /dev/null +++ b/cluster/src/test/scala/org/apache/spark/sql/execution/SnappyTableMutableAPISuite.scala @@ -0,0 +1,1212 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ +package org.apache.spark.sql.execution + +import java.sql.DriverManager + +import com.pivotal.gemfirexd.TestUtil +import io.snappydata.SnappyFunSuite +import io.snappydata.core.Data +import org.scalatest.{BeforeAndAfter, BeforeAndAfterAll} + +import org.apache.spark.Logging +import org.apache.spark.sql.snappy._ +import org.apache.spark.sql.types._ +import org.apache.spark.sql.{AnalysisException, Row, SnappyContext, SnappySession} + +case class DataWithMultipleKeys(pk1: Int, col1: Int, pk2: String, col2: Long) +case class DataDiffCol(column1: Int, column2: Int, column3: Int) + +case class DataDiffColMultipleKeys(pk1: Int, col1: Int, pk2: String, col2: Long) + +case class DataWithMatchingKeyColumns(column1: Int, column2: Int, column3: Int) + +case class DataStrCol(column1: Int, column2: String, column3: Int) + + +case class RData(C_CustKey: Int, C_Name: String, + C_Address: String, C_NationKey: Int, + C_Phone: String, C_AcctBal: Decimal, + C_MktSegment: String, C_Comment: String, skip: String) + +class SnappyTableMutableAPISuite extends SnappyFunSuite with Logging with BeforeAndAfter + with BeforeAndAfterAll { + + val data1 = Seq(Seq(1, 22, 3), Seq(7, 81, 9), Seq(9, 23, 3), Seq(4, 24, 3), + Seq(5, 6, 7), Seq(88, 88, 88)) + + val data2 = Seq(Seq(1, 22, 3), Seq(7, 81, 9), Seq(9, 23, 3), Seq(4, 24, 3), + Seq(5, 6, 7), Seq(8, 8, 8), Seq(88, 88, 90)) + + val data3 = Seq(Seq(1, "22", 3), Seq(7, "81", 9), Seq(9, "23", 3), Seq(4, null, 3), + Seq(5, "6", 7), Seq(88, "88", 88)) + + val data4 = Seq(Seq(1, "22", 3), Seq(7, "81", 9), Seq(9, "23", 3), Seq(4, null, 3), + Seq(5, "6", 7), Seq(8, "8", 8), Seq(88, "88", 90)) + + val data5 = Seq(Seq(1, 22, "str1", 3L), Seq(7, 81, "str7", 9L), Seq(9, 23, "str9", 3L), + Seq(4, 24, "str4", 3L), Seq(5, 6, "str5", 7L), Seq(8, 8, "str8", 8L), Seq(88, 88, "str88", 88L)) + + val data6 = Seq(Seq(1, 22, "str1", 3L), Seq(7, 81, "str7", 9L), Seq(9, 23, "str9", 3L), + Seq(4, 24, "str4", 3L), Seq(5, 6, "str5", 7L), Seq(88, 88, "str88", 88L)) + + private var netPort = 0 + + after { + snc.dropTable("col_table", ifExists = true) + snc.dropTable("row_table", ifExists = true) + } + + override def beforeAll(): Unit = { + super.beforeAll() + assert(this.snc !== null) + // start a local network server + netPort = TestUtil.startNetserverAndReturnPort() + } + + override def afterAll(): Unit = { + super.afterAll() + TestUtil.stopNetServer() + } + + test("PutInto with sql") { + val snc = new SnappySession(sc) + val rdd = sc.parallelize(data1, 2).map(s => Data(s(0), s(1), s(2))) + val df1 = snc.createDataFrame(rdd) + + val rdd2 = sc.parallelize(data2, 2).map(s => Data(s(0), s(1), s(2))) + val df2 = snc.createDataFrame(rdd2) + + val props = Map("BUCKETS" -> "2", "PARTITION_BY" -> "col1", "key_columns" -> "col2") + val props1 = Map.empty[String, String] + + snc.createTable("col_table", "column", df1.schema, props) + snc.createTable("row_table", "row", df2.schema, props1) + + df1.write.insertInto("col_table") + df2.write.insertInto("row_table") + + snc.sql("put into table col_table" + + " select * from row_table") + + val resultdf = snc.table("col_table").collect() + assert(resultdf.length == 7) + assert(resultdf.contains(Row(8, 8, 8))) + assert(resultdf.contains(Row(88, 88, 90))) + } + + ignore("Multiple update with correlated subquery") { + val snc = new SnappySession(sc) + val rdd = sc.parallelize(data1, 2).map(s => Data(s(0), s(1), s(2))) + val df1 = snc.createDataFrame(rdd) + + val rdd2 = sc.parallelize(data2, 2).map(s => Data(s(0), s(1), s(2))) + val df2 = snc.createDataFrame(rdd2) + + val props = Map("BUCKETS" -> "2", "PARTITION_BY" -> "col1", "key_columns" -> "col2") + val props1 = Map.empty[String, String] + + snc.createTable("col_table", "column", df1.schema, props) + snc.createTable("row_table", "row", df2.schema, props1) + + df1.write.insertInto("col_table") + df2.write.insertInto("row_table") + + snc.sql("update col_table set col3 = " + + " (select col3 from row_table where col_table.col2 = row_table.col2 )") + + val resultdf = snc.table("col_table").collect() + assert(resultdf.length == 7) + assert(resultdf.contains(Row(8, 8, 8))) + assert(resultdf.contains(Row(88, 88, 90))) + } + + test("Single column update with join") { + val snc = new SnappySession(sc) + val rdd = sc.parallelize(data1, 2).map(s => Data(s(0), s(1), s(2))) + val df1 = snc.createDataFrame(rdd) + + val rdd2 = sc.parallelize(data2, 2).map(s => Data(s(0), s(1), s(2))) + val df2 = snc.createDataFrame(rdd2) + + val props = Map("BUCKETS" -> "2", "PARTITION_BY" -> "col1", "key_columns" -> "col2") + val props1 = Map.empty[String, String] + + snc.createTable("col_table", "column", df1.schema, props) + snc.createTable("row_table", "row", df2.schema, props1) + + df1.write.insertInto("col_table") + df2.write.insertInto("row_table") + + snc.sql("update col_table set a.col3 = b.col3 from " + + "col_table a join row_table b on a.col2 = b.col2") + + val resultdf = snc.table("col_table").collect() + assert(resultdf.length == 6) + assert(resultdf.contains(Row(88, 88, 90))) + } + + test("Multiple columns update with join") { + val snc = new SnappySession(sc) + snc.sql("create table col_table(col1 INT, col2 STRING, col3 String, col4 Int)" + + " using column options(BUCKETS '2', PARTITION_BY 'col1') ") + snc.sql("create table row_table(col1 INT, col2 STRING, col3 String, col4 Int)" + + " using row options(BUCKETS '2', PARTITION_BY 'col1') ") + + snc.insert("row_table", Row(1, "1", "100", 100)) + snc.insert("row_table", Row(2, "2", "200", 200)) + snc.insert("row_table", Row(4, "4", "400", 4)) + + snc.insert("col_table", Row(1, "1", "1", 1)) + snc.insert("col_table", Row(2, "2", null, 2)) + snc.insert("col_table", Row(4, "4", "3", 3)) + + snc.sql("update col_table set a.col3 = b.col3, a.col4 = b.col4 from " + + "col_table a join row_table b on a.col2 = b.col2") + + val resultdf = snc.table("col_table").collect() + assert(resultdf.length == 3) + assert(resultdf.contains(Row(1, "1", "100", 100))) + assert(resultdf.contains(Row(2, "2", "200", 200))) + assert(resultdf.contains(Row(4, "4", "400", 4))) + } + + test("Multiple columns update with join syntax simpler-sybase") { + val snc = new SnappySession(sc) + snc.sql("create table col_table(col1 INT, col2 STRING, col3 String, col4 Int)" + + " using column options(BUCKETS '2', PARTITION_BY 'col1') ") + snc.sql("create table row_table(col1 INT, col2 STRING, col3 String, col4 Int)" + + " using row options(BUCKETS '2', PARTITION_BY 'col1') ") + + snc.insert("row_table", Row(1, "1", "100", 100)) + snc.insert("row_table", Row(2, "2", "200", 200)) + snc.insert("row_table", Row(4, "4", "400", 4)) + + snc.insert("col_table", Row(1, "1", "1", 1)) + snc.insert("col_table", Row(2, "2", null, 2)) + snc.insert("col_table", Row(4, "4", "3", 3)) + + snc.sql("update col_table set a.col3 = b.col3, a.col4 = b.col4 from " + + "col_table a, row_table b where a.col2 = b.col2") + + val resultdf = snc.table("col_table").collect() + assert(resultdf.length == 3) + assert(resultdf.contains(Row(1, "1", "100", 100))) + assert(resultdf.contains(Row(2, "2", "200", 200))) + assert(resultdf.contains(Row(4, "4", "400", 4))) + } + + + ignore("Multiple columns update with join : Row PR tables") { + val snc = new SnappySession(sc) + snc.sql("create table col_table(col1 INT, col2 STRING, col3 String, col4 Int)" + + " using column options(BUCKETS '2', PARTITION_BY 'col1') ") + snc.sql("create table row_table(col1 INT, col2 STRING, col3 String, col4 Int)" + + " using row options(BUCKETS '2', PARTITION_BY 'col1') ") + + snc.insert("col_table", Row(1, "1", "100", 100)) + snc.insert("col_table", Row(2, "2", "200", 200)) + snc.insert("col_table", Row(4, "4", "400", 4)) + + snc.insert("row_table", Row(1, "1", "1", 1)) + snc.insert("row_table", Row(1, "5", "30", 30)) + snc.insert("row_table", Row(2, "2", null, 2)) + snc.insert("row_table", Row(4, "4", "3", 3)) + + snc.sql("update row_table set a.col3 = b.col3, a.col4 = b.col4 from " + + "row_table a join col_table b on a.col2 = b.col2") + + val resultdf = snc.table("row_table").collect() + assert(resultdf.length == 4) + assert(resultdf.contains(Row(1, "1", "100", 100))) + assert(resultdf.contains(Row(2, "2", "200", 200))) + assert(resultdf.contains(Row(4, "4", "400", 4))) + assert(resultdf.contains(Row(1, "5", "30", 30))) // Unchanged + } + + test("Multiple columns update with join : Column tables") { + val snc = new SnappySession(sc) + snc.sql("create table col_table(col1 INT, col2 STRING, col3 String, col4 Int)" + + " using column options(BUCKETS '2', PARTITION_BY 'col1') ") + snc.sql("create table row_table(col1 INT, col2 STRING, col3 String, col4 Int)" + + " using row options(BUCKETS '2', PARTITION_BY 'col1') ") + + snc.insert("row_table", Row(1, "1", "100", 100)) + snc.insert("row_table", Row(2, "2", "200", 200)) + snc.insert("row_table", Row(4, "4", "400", 4)) + + snc.insert("col_table", Row(1, "1", "1", 1)) + snc.insert("col_table", Row(1, "5", "30", 30)) + snc.insert("col_table", Row(2, "2", null, 2)) + snc.insert("col_table", Row(4, "4", "3", 3)) + + snc.sql("update col_table set a.col3 = b.col3, a.col4 = b.col4 from " + + "col_table a join row_table b on a.col2 = b.col2") + + val resultdf = snc.table("col_table").collect() + assert(resultdf.length == 4) + assert(resultdf.contains(Row(1, "1", "100", 100))) + assert(resultdf.contains(Row(2, "2", "200", 200))) + assert(resultdf.contains(Row(4, "4", "400", 4))) + assert(resultdf.contains(Row(1, "5", "30", 30))) // Unchanged + } + + + ignore("Multiple columns update with join : Row RR tables") { + val snc = new SnappySession(sc) + snc.sql("create table col_table(col1 INT, col2 STRING, col3 String, col4 Int)" + + " using column options(BUCKETS '2', PARTITION_BY 'col1') ") + snc.sql("create table row_table(col1 INT, col2 STRING, col3 String, col4 Int)" + + " using row ") + snc.insert("col_table", Row(1, "1", "100", 100)) + snc.insert("col_table", Row(2, "2", "200", 200)) + snc.insert("col_table", Row(4, "4", "400", 4)) + + snc.insert("row_table", Row(1, "1", "1", 1)) + snc.insert("row_table", Row(1, "5", "30", 30)) + snc.insert("row_table", Row(2, "2", null, 2)) + snc.insert("row_table", Row(4, "4", "3", 3)) + + snc.sql("update row_table set a.col3 = b.col3, a.col4 = b.col4 from " + + "row_table a join col_table b on a.col2 = b.col2") + + val resultdf = snc.table("row_table").collect() + assert(resultdf.length == 4) + assert(resultdf.contains(Row(1, "1", "100", 100))) + assert(resultdf.contains(Row(2, "2", "200", 200))) + assert(resultdf.contains(Row(4, "4", "400", 4))) + assert(resultdf.contains(Row(1, "5", "30", 30))) // Unchanged + } + + test("Single column update with subquery : Row RR tables") { + val snc = new SnappySession(sc) + snc.sql("create table col_table(col1 INT, col2 STRING, col3 String, col4 Int)" + + " using column options(BUCKETS '2', PARTITION_BY 'col1') ") + snc.sql("create table row_table(col1 INT, col2 STRING, col3 String, col4 Int)" + + " using row ") + snc.insert("col_table", Row(1, "1", "100", 100)) + snc.insert("col_table", Row(2, "2", "200", 200)) + snc.insert("col_table", Row(4, "4", "400", 4)) + + snc.insert("row_table", Row(1, "1", "1", 1)) + snc.insert("row_table", Row(1, "5", "30", 30)) + snc.insert("row_table", Row(2, "2", null, 2)) + snc.insert("row_table", Row(4, "4", "3", 3)) + + val df = snc.sql("update row_table set col3 = '5' where col2 in (select col2 from col_table)") + df.collect() + + val resultdf = snc.table("row_table").collect() + assert(resultdf.length == 4) + assert(resultdf.contains(Row(1, "1", "5", 1))) + assert(resultdf.contains(Row(2, "2", "5", 2))) + assert(resultdf.contains(Row(4, "4", "5", 3))) + assert(resultdf.contains(Row(1, "5", "30", 30))) // Unchanged + } + + ignore("Single column update with subquery with avg : Row RR tables") { + val snc = new SnappySession(sc) + snc.sql("create table col_table(col1 INT, col2 STRING, col3 String, col4 Int)" + + " using column options(BUCKETS '2', PARTITION_BY 'col1') ") + snc.sql("create table row_table(col1 INT, col2 STRING, col3 String, col4 Int)" + + " using row ") + snc.insert("col_table", Row(1, "2", "100", 100)) + snc.insert("col_table", Row(2, "2", "200", 200)) + snc.insert("col_table", Row(4, "2", "400", 4)) + + snc.insert("row_table", Row(1, "1", "1", 1)) + snc.insert("row_table", Row(1, "5", "30", 30)) + snc.insert("row_table", Row(2, "2", null, 2)) + snc.insert("row_table", Row(4, "4", "3", 3)) + + snc.sql("update row_table set col3 = '5' where col2 > (select avg(col2) from col_table)") + + val resultdf = snc.table("row_table").collect() + assert(resultdf.length == 4) + assert(resultdf.contains(Row(4, "4", "5", 3))) + assert(resultdf.contains(Row(1, "5", "5", 30))) // Unchanged + } + + test("row table without child") { + val snc = new SnappySession(sc) + snc.sql("create table col_table(col1 INT, col2 STRING, col3 String, col4 Int)" + + " using column options(BUCKETS '2', PARTITION_BY 'col1') ") + snc.sql("create table row_table(col1 INT, col2 STRING, col3 String, col4 Int)" + + " using row options(BUCKETS '2', PARTITION_BY 'col1') ") + + snc.insert("col_table", Row(1, "1", "100", 100)) + snc.insert("col_table", Row(2, "2", "200", 200)) + snc.insert("col_table", Row(4, "4", "400", 4)) + + snc.insert("row_table", Row(1, "1", "1", 1)) + snc.insert("row_table", Row(2, "2", null, 2)) + snc.insert("row_table", Row(4, "4", "3", 3)) + + snc.sql("update row_table set col3 = '4' ") + + } + + + test("PutInto with API") { + val snc = new SnappySession(sc) + val rdd = sc.parallelize(data1, 2).map(s => Data(s(0), s(1), s(2))) + val df1 = snc.createDataFrame(rdd) + val rdd2 = sc.parallelize(data2, 2).map(s => Data(s(0), s(1), s(2))) + val df2 = snc.createDataFrame(rdd2) + + snc.createTable("col_table", "column", df1.schema, Map("key_columns" -> "col2")) + + df1.write.insertInto("col_table") // insert initial data + df2.cache() + df2.count() + df2.write.putInto("col_table") // update & insert subsequent data + + val resultdf = snc.table("col_table").collect() + assert(resultdf.length == 7) + assert(resultdf.contains(Row(8, 8, 8))) + assert(resultdf.contains(Row(88, 88, 90))) + } + + test("PutInto Cache Test") { + val snc = new SnappySession(sc) + val rdd = sc.parallelize(data1, 2).map(s => Data(s(0), s(1), s(2))) + val df1 = snc.createDataFrame(rdd) + val rdd2 = sc.parallelize(data2, 2).map(s => Data(s(0), s(1), s(2))) + val df2 = snc.createDataFrame(rdd2) + snc.sql("set spark.sql.defaultSizeInBytes=1000") + snc.createTable("col_table", "column", df1.schema, Map("key_columns" -> "col2")) + + df1.write.insertInto("col_table") // insert initial data + df2.write.putInto("col_table") // update & insert subsequent data + df2.write.putInto("col_table") + val resultdf = snc.table("col_table").collect() + assert(resultdf.length == 7) + assert(resultdf.contains(Row(8, 8, 8))) + assert(resultdf.contains(Row(88, 88, 90))) + } + + test("PutInto with API for pure inserts") { + val snc = new SnappySession(sc) + val rdd = sc.parallelize(data1, 2).map(s => Data(s(0), s(1), s(2))) + val df1 = snc.createDataFrame(rdd) + val df2 = snc.range(100, 110).selectExpr("id", "id", "id") + snc.sql("set spark.sql.defaultSizeInBytes=1000") + snc.createTable("col_table", "column", df1.schema, + Map("key_columns" -> "col2", "buckets" -> "4")) + + df1.write.insertInto("col_table") // insert initial data + df2.write.putInto("col_table") // update & insert subsequent data + + val resultdf = snc.table("col_table").collect() + assert(resultdf.length == 16) + assert(resultdf.contains(Row(100, 100, 100))) + } + + test("PutInto Cache test for intermediate joins") { + val snc = new SnappySession(sc) + val rdd = sc.parallelize(data1, 2).map(s => Data(s(0), s(1), s(2))) + val df1 = snc.createDataFrame(rdd) + val df2 = snc.range(100, 110).selectExpr("id", "id", "id") + snc.sql("set spark.sql.defaultSizeInBytes=1000") + snc.createTable("col_table", "column", df1.schema, + Map("key_columns" -> "col2", "buckets" -> "4")) + + df1.write.insertInto("col_table") // insert initial data + snc.sharedState.cacheManager.clearCache() + df2.write.putInto("col_table") // update & insert subsequent data + assert(snc.sharedState.cacheManager.isEmpty) + + val resultdf = snc.table("col_table").collect() + assert(resultdf.length == 16) + assert(resultdf.contains(Row(100, 100, 100))) + } + + test("PutInto with different column names") { + val snc = new SnappySession(sc) + val rdd = sc.parallelize(data1, 2).map(s => Data(s(0), s(1), s(2))) + val df1 = snc.createDataFrame(rdd) + val rdd2 = sc.parallelize(data2, 2).map(s => DataDiffCol(s(0), s(1), s(2))) + val df2 = snc.createDataFrame(rdd2) + + snc.createTable("col_table", "column", df1.schema, Map("key_columns" -> "col2")) + + df1.write.insertInto("col_table") // insert initial data + df2.write.putInto("col_table") // update & insert subsequent data + + val resultdf = snc.table("col_table").collect() + assert(resultdf.length == 7) + assert(resultdf.contains(Row(8, 8, 8))) + assert(resultdf.contains(Row(88, 88, 90))) + } + + test("PutInto with null key values") { + val snc = new SnappySession(sc) + snc.sql("create table col_table(col1 INT, col2 STRING, col3 INT)" + + " using column options(BUCKETS '2', PARTITION_BY 'col1', key_columns 'col2') ") + snc.sql("create table row_table(col1 INT, col2 STRING, col3 INT)") + + snc.insert("row_table", Row(1, "1", 1)) + snc.insert("row_table", Row(2, "2", 2)) + snc.insert("row_table", Row(3, null, 3)) + + snc.insert("col_table", Row(1, "1", 1)) + snc.insert("col_table", Row(2, "2", 2)) + snc.insert("col_table", Row(3, null, 3)) + + snc.sql("put into table col_table" + + " select * from row_table") + + val resultdf = snc.table("col_table").collect() + assert(resultdf.length == 4) + // TODO What should be the behaviour ? + assert(resultdf.filter(r => r.equals(Row(3, null, 3))).size == 2) + } + + test("PutInto op changed row count validation") { + val snc = new SnappySession(sc) + snc.sql("create table col_table(col1 INT, col2 STRING, col3 INT)" + + " using column options(BUCKETS '4', PARTITION_BY 'col1', key_columns 'col2') ") + snc.sql("create table row_table(col1 INT, col2 STRING, col3 INT)") + + snc.insert("row_table", Row(1, "1", 11)) + snc.insert("row_table", Row(9, "9", 99)) + snc.insert("row_table", Row(2, "2", 22)) + snc.insert("row_table", Row(3, "4", 3)) + + snc.insert("col_table", Row(1, "1", 1)) + snc.insert("col_table", Row(9, "9", 9)) + snc.insert("col_table", Row(2, "2", 2)) + snc.insert("col_table", Row(3, "5", 3)) + + val df = snc.sql("put into table col_table" + + " select * from row_table") + + assert(df.collect()(0)(0) == 4) + val resultdf = snc.table("col_table").collect() + assert(resultdf.length == 5) + } + + test("PutInto with only key values") { + val snc = new SnappySession(sc) + snc.sql("create table col_table(col1 INT)" + + " using column options(BUCKETS '2', PARTITION_BY 'col1', key_columns 'col1') ") + snc.sql("create table row_table(col1 INT)") + + snc.insert("row_table", Row(1)) + snc.insert("row_table", Row(2)) + snc.insert("row_table", Row(3)) + + snc.insert("col_table", Row(1)) + snc.insert("col_table", Row(2)) + snc.insert("col_table", Row(3)) + + intercept[AnalysisException]{ + snc.sql("put into table col_table" + + " select * from row_table") + } + + } + + test("Bug - Incorrect updateExpresion") { + val snc = new SnappySession(sc) + snc.sql("create table col_table (col1 int, col2 int, col3 int)" + + " using column options(partition_by 'col2', key_columns 'col2') ") + snc.sql("create table row_table " + + "(col1 int, col2 int, col3 int) using row options(partition_by 'col2')") + + snc.insert("row_table", Row(1, 1, 1)) + snc.insert("row_table", Row(2, 2, 2)) + snc.insert("row_table", Row(3, 3, 3)) + snc.sql("put into table col_table" + + " select * from row_table") + + snc.sql("put into table col_table" + + " select * from row_table") + + val df = snc.table("col_table").collect() + + assert(df.contains(Row(1, 1, 1))) + assert(df.contains(Row(2, 2, 2))) + assert(df.contains(Row(3, 3, 3))) + } + + + test("PutInto with multiple column key") { + val snc = new SnappySession(sc) + + snc.sql("create table col_table(col1 INT, col2 STRING, col3 String, col4 Int)" + + " using column options(BUCKETS '2', PARTITION_BY 'col1', key_columns 'col2, col3') ") + + snc.sql("create table row_table(col1 INT, col2 STRING, col3 String, col4 Int)") + + + snc.insert("row_table", Row(1, "1", "1", 100)) + snc.insert("row_table", Row(2, "2", "2", 2)) + snc.insert("row_table", Row(4, "4", "4", 4)) + + snc.insert("col_table", Row(1, "1", "1", 1)) + snc.insert("col_table", Row(2, "2", "2", 2)) + snc.insert("col_table", Row(3, "3", "3", 3)) + + snc.sql("put into table col_table" + + " select * from row_table") + + val resultdf = snc.table("col_table").collect() + assert(resultdf.length == 4) + assert(resultdf.contains(Row(1, "1", "1", 100))) + assert(resultdf.contains(Row(4, "4", "4", 4))) + } + + test("PutInto with multiple column key and null values") { + val snc = new SnappySession(sc) + + snc.sql("create table col_table(col1 INT, col2 STRING, col3 String, col4 Int)" + + " using column options(BUCKETS '2', PARTITION_BY 'col1', key_columns 'col2, col3') ") + + snc.sql("create table row_table(col1 INT, col2 STRING, col3 String, col4 Int)") + + + snc.insert("row_table", Row(1, "1", "1", 100)) + snc.insert("row_table", Row(2, "2", "2", 2)) + snc.insert("row_table", Row(4, "4", "4", 4)) + + snc.insert("col_table", Row(1, "1", "1", 1)) + snc.insert("col_table", Row(2, "2", null, 2)) + snc.insert("col_table", Row(3, "3", "3", 3)) + + snc.sql("put into table col_table" + + " select * from row_table") + + val resultdf = snc.table("col_table").collect() + assert(resultdf.length == 5) + assert(resultdf.contains(Row(2, "2", null, 2))) + assert(resultdf.contains(Row(2, "2", "2", 2))) + } + + test("PutInto selecting from same table") { + val snc = new SnappySession(sc) + + snc.sql("create table col_table(col1 INT, col2 STRING, col3 String, col4 Int)" + + " using column options(BUCKETS '2', PARTITION_BY 'col1', key_columns 'col2, col3') ") + + snc.insert("col_table", Row(1, "1", "1", 1)) + snc.insert("col_table", Row(2, "2", "2", 2)) + snc.insert("col_table", Row(3, "3", "3", 3)) + + intercept[AnalysisException]{ + snc.sql("put into table col_table" + + " select * from col_table") + } + val resultdf = snc.table("col_table").collect() + assert(resultdf.length == 3) + } + + test("PutInto Key columns validation") { + val snc = new SnappySession(sc) + snc.sql("create table col_table(col1 INT, col2 STRING, col3 String, col4 Int)" + + " using column options(BUCKETS '2', PARTITION_BY 'col1') ") + snc.sql("create table row_table(col1 INT, col2 STRING, col3 String, col4 Int)" + + " using row options(BUCKETS '2', PARTITION_BY 'col1') ") + + snc.insert("row_table", Row(1, "1", "1", 100)) + snc.insert("row_table", Row(2, "2", "2", 2)) + snc.insert("row_table", Row(4, "4", "4", 4)) + + snc.insert("col_table", Row(1, "1", "1", 1)) + snc.insert("col_table", Row(2, "2", null, 2)) + snc.insert("col_table", Row(3, "3", "3", 3)) + + intercept[AnalysisException]{ + snc.sql("put into table col_table" + + " select * from row_table") + } + } + + test("deleteFrom table exists syntax with alias") { + val snc = new SnappySession(sc) + snc.sql("create table col_table(col1 INT, col2 STRING, col3 String, col4 Int)" + + " using column options(BUCKETS '2', PARTITION_BY 'col1') ") + snc.sql("create table row_table(col1 INT, col2 STRING, col3 String, col4 Int)" + + " using row options(BUCKETS '2', PARTITION_BY 'col1') ") + + snc.insert("row_table", Row(1, "1", "1", 100)) + snc.insert("row_table", Row(2, "2", "2", 2)) + snc.insert("row_table", Row(4, "4", "4", 4)) + + snc.insert("col_table", Row(1, "1", "1", 1)) + snc.insert("col_table", Row(2, "2", null, 2)) + snc.insert("col_table", Row(3, "3", "3", 3)) + snc.sql("delete from col_table a where exists (select 1 from row_table b where a.col2 = " + + "b.col2 and a.col3 = b.col3)") + + val resultdf = snc.table("col_table").collect() + assert(resultdf.length == 2) + assert(resultdf.contains(Row(3, "3", "3", 3))) + assert(resultdf.contains(Row(2, "2", null, 2))) + } + + test("deleteFrom table exists syntax without alias") { + val snc = new SnappySession(sc) + snc.sql("create table col_table(col1 INT, col2 STRING, col3 String, col4 Int)" + + " using column options(BUCKETS '2', PARTITION_BY 'col1') ") + snc.sql("create table row_table(col1 INT, col2 STRING, col3 String, col4 Int)" + + " using row options(BUCKETS '2', PARTITION_BY 'col1') ") + + snc.insert("row_table", Row(1, "1", "1", 100)) + snc.insert("row_table", Row(2, "2", "2", 2)) + snc.insert("row_table", Row(4, "4", "4", 4)) + + snc.insert("col_table", Row(1, "1", "1", 1)) + snc.insert("col_table", Row(2, "2", null, 2)) + snc.insert("col_table", Row(3, "3", "3", 3)) + snc.sql("delete from col_table where exists (select 1 from row_table where col_table.col2 = " + + "row_table.col2 and col_table.col3 = row_table.col3)") + + val resultdf = snc.table("col_table").collect() + assert(resultdf.length == 2) + assert(resultdf.contains(Row(3, "3", "3", 3))) + assert(resultdf.contains(Row(2, "2", null, 2))) + } + + test("deleteFrom table where(a,b) select a,b syntax") { + val snc = new SnappySession(sc) + snc.sql("create table col_table(col1 INT, col2 STRING, col3 String, col4 Int)" + + " using column options(BUCKETS '2', PARTITION_BY 'col1') ") + snc.sql("create table row_table(col1 INT, col2 STRING, col3 String, col4 Int)" + + " using row options(BUCKETS '2', PARTITION_BY 'col1') ") + + snc.insert("row_table", Row(1, "1", "1", 100)) + snc.insert("row_table", Row(2, "2", "2", 2)) + snc.insert("row_table", Row(4, "4", "4", 4)) + + snc.insert("col_table", Row(1, "1", "1", 1)) + snc.insert("col_table", Row(2, "2", null, 2)) + snc.insert("col_table", Row(3, "3", "3", 3)) + snc.sql("delete from col_table where (col2, col3) in (select col2, col3 from row_table)") + + val resultdf = snc.table("col_table").collect() + assert(resultdf.length == 2) + assert(resultdf.contains(Row(3, "3", "3", 3))) + assert(resultdf.contains(Row(2, "2", null, 2))) + } + + ignore("deleteFrom table where(a,b) select a,b syntax Row table") { + val snc = new SnappySession(sc) + snc.sql("create table col_table(col1 INT, col2 STRING, col3 String, col4 Int)" + + " using column options(BUCKETS '2', PARTITION_BY 'col1') ") + snc.sql("create table row_table(col1 INT, col2 STRING, col3 String, col4 Int)" + + " using row options(BUCKETS '2', PARTITION_BY 'col1') ") + + snc.insert("row_table", Row(1, "5", "5", 100)) + snc.insert("row_table", Row(1, "1", "1", 100)) + snc.insert("row_table", Row(2, "2", "2", 2)) + snc.insert("row_table", Row(4, "4", "4", 4)) + + snc.insert("col_table", Row(9, "1", "1", 1)) + snc.insert("col_table", Row(2, "2", null, 2)) + snc.insert("col_table", Row(3, "3", "3", 3)) + snc.sql("delete from row_table where (col2, col3) in (select col2, col3 from col_table)") + + val resultdf = snc.table("row_table").collect() + assert(resultdf.length == 3) + assert(resultdf.contains(Row(1, "5", "5", 100))) + assert(resultdf.contains(Row(4, "4", "4", 4))) + assert(resultdf.contains(Row(2, "2", "2", 2))) + } + + test("DeleteFrom dataframe API: column tables") { + val snc = new SnappySession(sc) + val rdd = sc.parallelize(data5, 2).map(s => DataWithMultipleKeys(s(0).asInstanceOf[Int], + s(1).asInstanceOf[Int], s(2).asInstanceOf[String], s(3).asInstanceOf[Long])) + val df1 = snc.createDataFrame(rdd) + val rdd2 = sc.parallelize(data6, 2).map(s => DataDiffColMultipleKeys(s(0).asInstanceOf[Int], + s(1).asInstanceOf[Int], s(2).asInstanceOf[String], s(3).asInstanceOf[Long])) + val df2 = snc.createDataFrame(rdd2) + + snc.createTable("col_table", "column", + df1.schema, Map("key_columns" -> "pk2,pk1")) + + df1.write.insertInto("col_table") + df2.write.deleteFrom("col_table") + + val resultdf = snc.table("col_table").collect() + assert(resultdf.length == 1) + assert(resultdf.contains(Row(8, 8, "str8", 8))) + } + + test("DeleteFrom empty Key columns validation: column tables") { + val snc = new SnappySession(sc) + val rdd = sc.parallelize(data5, 2).map(s => DataWithMultipleKeys(s(0).asInstanceOf[Int], + s(1).asInstanceOf[Int], s(2).asInstanceOf[String], s(3).asInstanceOf[Long])) + val df1 = snc.createDataFrame(rdd) + val rdd2 = sc.parallelize(data6, 2).map(s => DataDiffColMultipleKeys(s(0).asInstanceOf[Int], + s(1).asInstanceOf[Int], s(2).asInstanceOf[String], s(3).asInstanceOf[Long])) + val df2 = snc.createDataFrame(rdd2) + + snc.createTable("col_table", "column", + df1.schema, Map.empty[String, String]) + + df1.write.insertInto("col_table") + + val message = intercept[AnalysisException] { + df2.write.deleteFrom("col_table") + }.getMessage + assert(message.contains("DeleteFrom operation requires key " + + "columns(s) or primary key defined on table.")) + } + + test("DeleteFrom missing Key columns validation: column tables") { + val snc = new SnappySession(sc) + val rdd = sc.parallelize(data5, 2).map(s => DataWithMultipleKeys(s(0).asInstanceOf[Int], + s(1).asInstanceOf[Int], s(2).asInstanceOf[String], s(3).asInstanceOf[Long])) + val df1 = snc.createDataFrame(rdd) + val rdd2 = sc.parallelize(data6, 2).map(s => DataDiffColMultipleKeys(s(0).asInstanceOf[Int], + s(1).asInstanceOf[Int], s(2).asInstanceOf[String], s(3).asInstanceOf[Long])) + val df2 = snc.createDataFrame(rdd2) + + snc.sql("create table col_table(pk1 int, col1 int, pk3 varchar(50), col2 int)" + + " using column options(key_columns 'PK3,PK1')") + + df1.write.insertInto("col_table") + + val message = intercept[AnalysisException] { + df2.write.deleteFrom("col_table") + }.getMessage + assert(message.contains("column `pk3` cannot be resolved on the right side of the operation.")) + } + + test("Bug - SNAP-2157") { + snc.sql("CREATE TABLE app.customer (C_CustKey int NOT NULL,C_Name varchar(64)," + + "C_Address varchar(64),C_NationKey int,C_Phone varchar(64),C_AcctBal decimal(13,2)," + + "C_MktSegment varchar(64),C_Comment varchar(120)," + + " skip varchar(64), PRIMARY KEY (C_CustKey))") + + val data = (1 to 10).map(i => RData(i, s"$i name", + s"$i addr", + i, + s"$i phone", + Decimal(1), + s"$i mktsegment", + s"$i comment", + s"$i skip")) + + val rdd = sc.parallelize(data, 2) + val df1 = snc.createDataFrame(rdd) + df1.write.insertInto("app.customer") + + df1.write.deleteFrom("app.customer") + val df0 = snc.table("app.customer") + assert(df0.count() == 0) + } + + test("DeleteFrom dataframe API: row tables") { + val snc = new SnappySession(sc) + val rdd = sc.parallelize(data5, 2).map(s => DataWithMultipleKeys(s(0).asInstanceOf[Int], + s(1).asInstanceOf[Int], s(2).asInstanceOf[String], s(3).asInstanceOf[Long])) + val df1 = snc.createDataFrame(rdd) + val rdd2 = sc.parallelize(data6, 2).map(s => DataDiffColMultipleKeys(s(0).asInstanceOf[Int], + s(1).asInstanceOf[Int], s(2).asInstanceOf[String], s(3).asInstanceOf[Long])) + val df2 = snc.createDataFrame(rdd2) + + snc.sql("create table row_table(pk1 int , col1 int, pk2 varchar(50), col2 int," + + " primary key (pk2,pk1)) using row options(partition_by 'pk2,col1')") + df1.write.insertInto("row_table") + df2.write.deleteFrom("row_table") + + val resultdf = snc.table("row_table").collect() + assert(resultdf.length == 1) + assert(resultdf.contains(Row(8, 8, "str8", 8))) + } + + test("DeleteFrom dataframe API Row replicated tables") { + val snc = new SnappySession(sc) + val rdd = sc.parallelize(data5, 2).map(s => DataWithMultipleKeys(s(0).asInstanceOf[Int], + s(1).asInstanceOf[Int], s(2).asInstanceOf[String], s(3).asInstanceOf[Long])) + val df1 = snc.createDataFrame(rdd) + val rdd2 = sc.parallelize(data6, 2).map(s => DataDiffColMultipleKeys(s(0).asInstanceOf[Int], + s(1).asInstanceOf[Int], s(2).asInstanceOf[String], s(3).asInstanceOf[Long])) + val df2 = snc.createDataFrame(rdd2) + + snc.sql("create table row_table(pk1 int, col1 int, pk2 varchar(50), col2 int," + + " primary key(pk2,pk1)) using row") + df1.write.insertInto("row_table") + df2.write.deleteFrom("row_table") + + val resultdf = snc.table("row_table").collect() + assert(resultdf.length == 1) + assert(resultdf.contains(Row(8, 8, "str8", 8))) + } + + test("DeleteFrom empty Key columns validation: row tables") { + val snc = new SnappySession(sc) + val rdd = sc.parallelize(data5, 2).map(s => DataWithMultipleKeys(s(0).asInstanceOf[Int], + s(1).asInstanceOf[Int], s(2).asInstanceOf[String], s(3).asInstanceOf[Long])) + val df1 = snc.createDataFrame(rdd) + val rdd2 = sc.parallelize(data6, 2).map(s => DataDiffColMultipleKeys(s(0).asInstanceOf[Int], + s(1).asInstanceOf[Int], s(2).asInstanceOf[String], s(3).asInstanceOf[Long])) + val df2 = snc.createDataFrame(rdd2) + + snc.createTable("row_table", "row", + df1.schema, Map.empty[String, String]) + + df1.write.insertInto("row_table") + + val message = intercept[AnalysisException]{ + df2.write.deleteFrom("row_table") + }.getMessage + + assert(message.contains("DeleteFrom operation requires " + + "key columns(s) or primary key defined on table.")) + } + + + test("DeleteFrom missing Key columns validation: row tables") { + val snc = new SnappySession(sc) + val rdd = sc.parallelize(data5, 2).map(s => DataWithMultipleKeys(s(0).asInstanceOf[Int], + s(1).asInstanceOf[Int], s(2).asInstanceOf[String], s(3).asInstanceOf[Long])) + val df1 = snc.createDataFrame(rdd) + val rdd2 = sc.parallelize(data6, 2).map(s => DataDiffColMultipleKeys(s(0).asInstanceOf[Int], + s(1).asInstanceOf[Int], s(2).asInstanceOf[String], s(3).asInstanceOf[Long])) + val df2 = snc.createDataFrame(rdd2) + + snc.sql("create table row_table(pk1 int, col1 int, pk3 varchar(50), col2 int" + + ", primary key(PK1, PK3)) using row") + + df1.write.insertInto("row_table") + + val message = intercept[AnalysisException]{ + df2.write.deleteFrom("row_table") + }.getMessage + + assert(message.contains("column `pk3` cannot be resolved on the right side of the operation.")) + } + + test("Delete From SQL using JDBC: row tables") { + val snc = this.snc + val rdd = sc.parallelize(data5, 2).map(s => DataWithMultipleKeys(s(0).asInstanceOf[Int], + s(1).asInstanceOf[Int], s(2).asInstanceOf[String], s(3).asInstanceOf[Long])) + val df1 = snc.createDataFrame(rdd) + val rdd2 = sc.parallelize(data6, 2).map(s => DataDiffColMultipleKeys(s(0).asInstanceOf[Int], + s(1).asInstanceOf[Int], s(2).asInstanceOf[String], s(3).asInstanceOf[Long])) + val df2 = snc.createDataFrame(rdd2) + + snc.sql("create table row_table(pk1 int, col1 int, pk2 varchar(50), col2 int," + + " primary key(pk2,pk1)) using row") + df1.write.insertInto("row_table") + val conn = DriverManager.getConnection(s"jdbc:snappydata://localhost:$netPort") + try { + df2.createGlobalTempView("delete_df") + val stmt = conn.createStatement() + stmt.execute("DELETE FROM row_table SELECT pk1, pk2 from delete_df") + stmt.close() + } finally { + snc.dropTable("delete_df") + conn.close() + } + val resultdf = snc.table("row_table").collect() + assert(resultdf.length == 1) + assert(resultdf.contains(Row(8, 8, "str8", 8))) + } + + test("Delete From SQL using JDBC: column tables") { + val snc = this.snc + val rdd = sc.parallelize(data5, 2).map(s => DataWithMultipleKeys(s(0).asInstanceOf[Int], + s(1).asInstanceOf[Int], s(2).asInstanceOf[String], s(3).asInstanceOf[Long])) + val df1 = snc.createDataFrame(rdd) + val rdd2 = sc.parallelize(data6, 2).map(s => DataDiffColMultipleKeys(s(0).asInstanceOf[Int], + s(1).asInstanceOf[Int], s(2).asInstanceOf[String], s(3).asInstanceOf[Long])) + val df2 = snc.createDataFrame(rdd2) + + snc.sql("create table col_table(pk1 int, col1 int, pk2 varchar(50), col2 int) " + + "using column options(key_columns 'pk2,pk1')") + + df1.write.insertInto("col_table") + + val conn = DriverManager.getConnection(s"jdbc:snappydata://localhost:$netPort") + try { + df2.createGlobalTempView("delete_df") + val stmt = conn.createStatement() + stmt.execute("DELETE FROM col_table SELECT pk1, pk2 from delete_df") + stmt.close() + } finally { + conn.close() + snc.dropTable("delete_df") + } + val resultdf = snc.table("col_table").collect() + assert(resultdf.length == 1) + assert(resultdf.contains(Row(8, 8, "str8", 8))) + } + + test("Delete From SQL with key column aliasing") { + val snc = this.snc + val rdd = sc.parallelize(data5, 2).map(s => DataWithMultipleKeys(s(0).asInstanceOf[Int], + s(1).asInstanceOf[Int], s(2).asInstanceOf[String], s(3).asInstanceOf[Long])) + val df1 = snc.createDataFrame(rdd) + val rdd2 = sc.parallelize(data6, 2).map(s => DataDiffColMultipleKeys(s(0).asInstanceOf[Int], + s(1).asInstanceOf[Int], s(2).asInstanceOf[String], s(3).asInstanceOf[Long])) + val df2 = snc.createDataFrame(rdd2) + + snc.sql("create table col_table(pk1 int, col1 int, pk3 varchar(50), col2 int) " + + "using column options(key_columns 'pk3,pk1')") + + df1.write.insertInto("col_table") + + val conn = DriverManager.getConnection(s"jdbc:snappydata://localhost:$netPort") + try { + df2.createGlobalTempView("delete_df") + val stmt = conn.createStatement() + stmt.execute("DELETE FROM col_table SELECT pk1, pk2 as pk3 from delete_df") + stmt.close() + } finally { + conn.close() + snc.dropTable("delete_df") + } + val resultdf = snc.table("col_table").collect() + assert(resultdf.length == 1) + assert(resultdf.contains(Row(8, 8, "str8", 8))) + } + + private def bug2348Test(): Unit = { + var snc = new SnappySession(sc) + snc.sql("create table t1(id long," + + "datekey int," + + "checkin_date int," + + "checkout_date int," + + "crawl_time int," + + "batch tinyint," + + "source tinyint," + + "is_high_star tinyint," + + "mt_poi_id bigint," + + "mt_room_id bigint," + + "mt_breakfast tinyint," + + "mt_goods_id bigint," + + "mt_bd_id int," + + "mt_goods_vendor_id long," + + "mt_business_type tinyint," + + "mt_room_status tinyint," + + "mt_poi_uv int," + + "mt_price1 int," + + "mt_price2 int," + + "mt_price3 int," + + "mt_price4 int," + + "mt_price5 int," + + "mt_price6 int," + + "mt_price7 int," + + "mt_price8 int," + + "mt_flag1 tinyint," + + "mt_flag2 tinyint," + + "mt_flag3 tinyint," + + "comp_site_id int," + + "comp_poi_id varchar(200)," + + "comp_room_id long," + + "comp_breakfast tinyint," + + "comp_goods_id varchar(200)," + + "comp_goods_vendor varchar(200)," + + "comp_room_status tinyint," + + "comp_is_promotion tinyint," + + "comp_pay_type tinyint," + + "comp_goods_type tinyint," + + "comp_price1 int," + + "comp_price2 int," + + "comp_price3 int," + + "comp_price4 int," + + "comp_price5 int," + + "comp_price6 int," + + "comp_price7 int," + + "comp_price8 int," + + "comp_flag1 tinyint," + + "comp_flag2 tinyint," + + "comp_flag3 tinyint," + + "valid_status tinyint," + + "gmt_time timestamp," + + "version timestamp," + + "interval_days int," + + "real_batch bigint," + + "start_time_long bigint," + + "end_time_long bigint," + + "start_time bigint," + + "end_time bigint," + + "start_real_batch bigint," + + "end_real_batch bigint," + + "flag int," + + "insert_time bigint) " + + "USING column OPTIONS (PARTITION_BY 'mt_poi_id'," + + "REDUNDANCY '0',BUCKETS '1'," + + "PERSISTENCE 'ASYNC', OVERFLOW 'true')") + + snc.sql("create table t2(id bigint," + + "datekey int," + + "checkin_date int," + + "checkout_date int," + + "crawl_time bigint," + + "batch int," + + "source int," + + "is_high_star int," + + "mt_poi_id bigint," + + "mr_room_id bigint," + + "mt_breakfast int," + + "mt_goods_id bigint," + + "mt_bd_id int," + + "mt_goods_vendor_id bigint," + + "mr_business_type int," + + "mt_room_status int," + + "mt_poi_uv int," + + "mt_price1 int," + + "mt_price2 int," + + "mt_price3 int," + + "mt_price4 int," + + "mt_price5 int," + + "mt_price6 int," + + "mt_price7 int," + + "mt_price8 int," + + "mt_flag1 int," + + "mt_flag2 int," + + "mt_flag3 int," + + "comp_site_id int," + + "comp_poi_id varchar(200)," + + "comp_room_id long," + + "comp_breakfast tinyint," + + "comp_goods_id varchar(200)," + + "comp_goods_vendor varchar(200)," + + "comp_room_status tinyint," + + "comp_is_promotion tinyint," + + "comp_pay_type tinyint," + + "comp_goods_type tinyint," + + "comp_price1 int," + + "comp_price2 int," + + "comp_price3 int," + + "comp_price4 int," + + "comp_price5 int," + + "comp_price6 int," + + "comp_price7 int," + + "comp_price8 int," + + "comp_flag1 tinyint," + + "comp_flag2 tinyint," + + "comp_flag3 tinyint," + + "valid_status tinyint," + + "gmt_time timestamp," + + "version timestamp," + + "interval_days int," + + "real_batch bigint," + + "start_time_long bigint," + + "end_time_long bigint," + + "start_time bigint," + + "end_time bigint," + + "start_real_batch bigint," + + "end_real_batch bigint," + + "flag int," + + "insert_time bigint) USING " + + "column OPTIONS (PARTITION_BY 'mt_poi_id'," + + "REDUNDANCY '0',BUCKETS '1'," + + "PERSISTENCE 'ASYNC',OVERFLOW 'true')") + + val table1 = snc.table("t1") + val df1 = DataGenerator.generateDataFrame(snc, table1.schema, 20000) + df1.write.insertInto("t1") + val table2 = snc.table("t2") + val df2 = DataGenerator.generateDataFrame(snc, table2.schema, 20000) + df2.write.insertInto("t2") + snc.sql("update t1 set a.flag = 0 from t1 a join" + + " t2 b on a.mt_poi_id = b.mt_poi_id and a.comp_goods_id = b.comp_goods_id " + + "and a.mt_goods_id = b.mt_goods_id and a.datekey = b.datekey and" + + " a.CHECKIN_DATE = b.CHECKIN_DATE and b.start_time_long > a.end_time_long where a.flag = 1") + + snc.sql("update t1 set a.flag = 0,a.end_time_long = b.start_time_long," + + " a.end_time = b.start_time,a.end_real_batch = b.start_real_batch" + + " from t1 a join t2 b on a.mt_poi_id = b.mt_poi_id" + + " and a.comp_goods_id = b.comp_goods_id and a.mt_goods_id = b.mt_goods_id" + + " and a.datekey = b.datekey and a.CHECKIN_DATE = b.CHECKIN_DATE" + + " and b.start_time_long <= a.end_time_long AND" + + " (a.comp_price1 <> b.comp_price1 or a.comp_price3 <> b.comp_price3" + + " or a.mt_price1 <> b.mt_price1 or a.mt_price3 <> b.mt_price3 or" + + " a.mt_price4 <> b.mt_price4 or a.mt_price5 <> b.mt_price5 or" + + " a.mt_room_status <> b.mt_room_status or" + + " a.comp_room_status <> b.comp_room_status) where a.flag = 1") + + snc.sql("update t1 set a.end_time_long = b.start_time_long + 2 * 60 * 60 * 1000," + + "a.end_time= b.start_time,a.end_real_batch = b.start_real_batch" + + " from t1 a join t2 b on a.mt_poi_id = b.mt_poi_id and" + + " a.comp_goods_id = b.comp_goods_id and a.mt_goods_id = b.mt_goods_id" + + " and a.datekey = b.datekey and a.CHECKIN_DATE = b.CHECKIN_DATE and" + + " b.start_time_long <= a.end_time_long AND a.comp_price1 = b.comp_price1" + + " and a.comp_price3 = b.comp_price3 and a.mt_price1 = b.mt_price1" + + " and a.mt_price3 = b.mt_price3 and a.mt_price4 = b.mt_price4 and" + + " a.mt_price5 = b.mt_price5 and a.mt_room_status = b.mt_room_status" + + " and a.comp_room_status = b.comp_room_status where a.flag = 1") + + SnappyContext.globalSparkContext.stop() + + snc = new SnappySession(sc) + snc.sql("select count(1) from t1").collect() + } + + test("Bug-2348 : Invalid stats bitmap") { + try { + bug2348Test() + } catch { + case t: Throwable => throw t + } finally { + val snc = new SnappySession(sc) + snc.dropTable("t1", ifExists = true) + snc.dropTable("t2", ifExists = true) + } + + } + + test("Bug-2369 : Incorrect Filtering on join predicate") { + var snc = new SnappySession(sc) + snc.sql("CREATE TABLE SNAPPY_COL_TABLE3(r1 Integer, r2 Integer) " + + "USING COLUMN OPTIONS(PARTITION_BY 'R1');") + snc.sql("CREATE TABLE SNAPPY_COL_TABLE4(r1 Integer, r2 Integer) " + + "USING COLUMN OPTIONS(PARTITION_BY 'R1');") + + snc.insert("SNAPPY_COL_TABLE3", Row(1, 1)) + snc.insert("SNAPPY_COL_TABLE3", Row(2, 2)) + snc.insert("SNAPPY_COL_TABLE3", Row(3, 3)) + snc.insert("SNAPPY_COL_TABLE3", Row(5, 5)) + + snc.insert("SNAPPY_COL_TABLE4", Row(1, 1)) + snc.insert("SNAPPY_COL_TABLE4", Row(2, 2)) + snc.insert("SNAPPY_COL_TABLE4", Row(3, 3)) + snc.insert("SNAPPY_COL_TABLE4", Row(4, 4)) + snc.insert("SNAPPY_COL_TABLE4", Row(5, 5)) + + val dfLeftJoin = snc.sql("SELECT * FROM " + + "SNAPPY_COL_TABLE4 t LEFT OUTER JOIN SNAPPY_COL_TABLE3 tt " + + "ON tt.R1 = t.R2").collect() + assert(dfLeftJoin.length == 5) + assert(dfLeftJoin.contains(Row(4, 4, null, null))) + + val df = snc.sql("SELECT * FROM SNAPPY_COL_TABLE4 t INNER JOIN SNAPPY_COL_TABLE3 tt " + + "ON tt.R1 = t.R2").collect() + assert(df.length == 4) + + val df1 = snc.sql("SELECT * FROM SNAPPY_COL_TABLE4 t INNER JOIN SNAPPY_COL_TABLE3 tt " + + "ON tt.R1 = t.R2 " + + "WHERE t.R2 >= 3 AND t.R2 < 5").collect() + assert(df1.length == 1) + assert(df1.contains(Row(3, 3, 3, 3))) + + val df2 = snc.sql("SELECT * FROM SNAPPY_COL_TABLE4 t INNER JOIN SNAPPY_COL_TABLE3 tt " + + "ON tt.R1 = t.R2 " + + "WHERE t.R2 >= 3 AND t.R2 < 5 " + + " AND " + + "tt.R1 >= 3 AND tt.R1 < 5;").collect() + assert(df2.length == 1) + assert(df2.contains(Row(3, 3, 3, 3))) + } +} diff --git a/cluster/src/test/scala/org/apache/spark/sql/execution/benchmark/ColumnCacheBenchmark.scala b/cluster/src/test/scala/org/apache/spark/sql/execution/benchmark/ColumnCacheBenchmark.scala new file mode 100644 index 0000000000..8a01a308ba --- /dev/null +++ b/cluster/src/test/scala/org/apache/spark/sql/execution/benchmark/ColumnCacheBenchmark.scala @@ -0,0 +1,515 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ +/* + * Some initial code adapted from https://github.com/apache/spark/pull/13899 having + * the below license. + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution.benchmark + +import com.gemstone.gemfire.internal.cache.GemFireCacheImpl +import io.snappydata.SnappyFunSuite + +import org.apache.spark.SparkConf +import org.apache.spark.sql._ +import org.apache.spark.sql.execution.benchmark.ColumnCacheBenchmark.addCaseWithCleanup +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.types.StructType +import org.apache.spark.util.Benchmark + +class ColumnCacheBenchmark extends SnappyFunSuite { + + private val cores = math.min(16, Runtime.getRuntime.availableProcessors()) + + override def beforeAll(): Unit = { + super.beforeAll() + stopAll() + } + + override def afterAll(): Unit = { + super.afterAll() + stopAll() + } + + override protected def newSparkConf( + addOn: SparkConf => SparkConf = null): SparkConf = + TAQTest.newSparkConf(addOn) + + private lazy val sparkSession = new SparkSession(sc) + private lazy val snappySession = snc.snappySession + + ignore("cache with randomized keys - insert") { + benchmarkRandomizedKeys(size = 50000000, queryPath = false) + } + + test("cache with randomized keys - query") { + benchmarkRandomizedKeys(size = 50000000, queryPath = true) + } + + ignore("PutInto Vs Insert") { + benchMarkForPutIntoColumnTable(size = 50000000) + } + + test("Performance and validity check for SNAP-2118") { + val snappy = this.snappySession + snappy.sql("DROP TABLE IF EXISTS TABLE1") + snappy.sql( + """ + |create table TABLE1( + | id integer, + | month integer, + | val1 decimal(28,10), + | name varchar(200), + | val_name varchar(200), + | year integer, + | type_id integer, + | val_id integer) + |using column options (partition_by 'id') + """.stripMargin) + + snappy.sql("DROP TABLE IF EXISTS TABLE2") + snappy.sql( + """ + |CREATE TABLE TABLE2( + | role_id INTEGER, + | id INTEGER, + | group_id INTEGER, + | group_name VARCHAR(200), + | name2 VARCHAR(200)) + |USING COLUMN OPTIONS (PARTITION_BY 'id', COLOCATE_WITH 'TABLE1') + """.stripMargin) + + snappy.sql("DROP TABLE IF EXISTS TABLE3") + snappy.sql( + """ + |CREATE TABLE TABLE3( + | type_id INTEGER, + | target_name VARCHAR(200), + | factor DECIMAL(32,16)) + |USING COLUMN OPTIONS (PARTITION_BY 'type_id'); + """.stripMargin) + + val numVals = 100 + val numTypes = 100 + val numRoles = 100 + val numGroups = 100 + val numNames = 1000 + val numIds = 5000 + + val numIters = 10 + + val numElems1 = 12 * numVals * numIds + val numElems2 = 4 * numIds + val numElems3 = numTypes * numTypes + + var ds1 = snappy.range(numElems1).selectExpr(s"(id % $numIds) as id", + s"cast((id / ($numVals * $numIds)) as int) as month", + "cast ((rand() * 100.0) as decimal(28, 10)) as val1", + s"concat('cmd_', cast((id % $numNames) as string)) as name", + s"concat('val_', cast(cast((id / (12 * $numIds)) as int) as string)) as val_name", + "((id % 2) + 2014) as year", s"(id % $numTypes) type_id", s"(id % $numVals) val_id") + ds1.cache() + ds1.count() + ds1.write.insertInto("TABLE1") + + val ds2 = snappy.range(numElems2).selectExpr(s"cast((rand() * $numRoles) as int)", + s"id % $numIds", s"id % $numGroups", "concat('grp_', cast((id % 100) as string))", + "concat('site_', cast((id % 1000) as string))") + ds2.write.insertInto("TABLE2") + + val ds3 = snappy.range(numElems3).selectExpr(s"id % $numTypes", + s"concat('type_', cast(cast((id / $numTypes) as int) as string))", "rand() * 100.0") + ds3.write.insertInto("TABLE3") + + val sql = "select b.group_name, a.name, " + + "sum(a.val1 * c.factor) " + + "from TABLE1 a, TABLE2 b, TABLE3 c " + + "where a.id = b.id and a.year = 2015 and " + + "a.val_name like 'val\\_42%' and b.role_id = 99 and c.type_id = a.type_id and " + + "c.target_name = 'type_36' group by b.group_name, a.name" + + val benchmark = new Benchmark("SNAP-2118 with random data", numElems1) + + var expectedResult: Array[Row] = null + benchmark.addCase("smj", numIters, () => snappy.sql("set snappydata.hashJoinSize=-1"), + () => {}) { i => + if (i == 1) expectedResult = snappy.sql(sql).collect() + else snappy.sql(sql).collect() + } + benchmark.addCase("hash", numIters, () => snappy.sql("set snappydata.hashJoinSize=1g"), + () => {}) { i => + if (i == 1) ColumnCacheBenchmark.collect(snappy.sql(sql), expectedResult) + else snappy.sql(sql).collect() + } + benchmark.run() + + // also check with null values and updates (SNAP-2088) + + snappy.truncateTable("table1") + // null values every 8th row + ds1 = ds1.selectExpr("id", "month", "(case when (id & 7) = 0 then null else val1 end) val1", + "name", "(case when (id & 7) = 0 then null else val_name end) as val_name", + "year", "type_id", "(case when (id & 7) = 0 then null else val_id end) val_id") + ds1.createOrReplaceTempView("TABLE1_TEMP1") + ds1.write.insertInto("table1") + + expectedResult = snappy.sql(sql.replace("TABLE1", "TABLE1_TEMP1")).collect() + ColumnCacheBenchmark.collect(snappy.sql(sql), expectedResult) + + // even more null values every 4th row but on TABLE1 these are set using update + ds1 = ds1.selectExpr("id", "month", "(case when (id & 3) = 0 then null else val1 end) val1", + "name", "(case when (id & 3) = 0 then null else val_name end) as val_name", + "year", "type_id", "(case when (id & 3) = 0 then null else val_id end) val_id") + ds1.createOrReplaceTempView("TABLE1_TEMP2") + + expectedResult = snappy.sql(sql.replace("TABLE1", "TABLE1_TEMP2")).collect() + snappy.sql("update table1 set val1 = null, val_name = null, val_id = null where (id & 3) = 0") + ColumnCacheBenchmark.collect(snappy.sql(sql), expectedResult) + + // more update statements but these don't change anything rather change to same value + snappy.sql("update table1 set val1 = case when (id & 3) = 0 then null else val1 end, " + + "val_name = case when (id & 3) = 0 then null else val_name end, " + + "val_id = case when (id & 3) = 0 then null else val_id end where (id % 10) = 0") + ColumnCacheBenchmark.collect(snappy.sql(sql), expectedResult) + + snappy.sql("DROP TABLE IF EXISTS TABLE3") + snappy.sql("DROP TABLE IF EXISTS TABLE2") + snappy.sql("DROP TABLE IF EXISTS TABLE1") + } + + test("insert more than 64K data") { + snc.conf.setConfString(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key, "-1") + createAndTestBigTable() + + createAndTestTableWithNulls(size = 20000, numCols = 300) + createAndTestTableWithNulls(size = 100000, numCols = 20) + + snc.conf.setConfString(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key, + SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.defaultValueString) + } + + test("PutInto wide column table") { + snc.conf.setConfString(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key, "-1") + createAndTestPutIntoInBigTable() + } + + private def doGC(): Unit = { + System.gc() + System.runFinalization() + System.gc() + System.runFinalization() + } + + private def benchMarkForPutIntoColumnTable(size: Int, numIters: Int = 10): Unit = { + val benchmark = new Benchmark("PutInto Vs Insert", size) + val sparkSession = this.sparkSession + val snappySession = this.snappySession + import org.apache.spark.sql.snappy._ + if (GemFireCacheImpl.getCurrentBufferAllocator.isDirect) { + logInfo("ColumnCacheBenchmark: using off-heap for performance comparison") + } else { + logInfo("ColumnCacheBenchmark: using heap for performance comparison") + } + + val testDF2 = snappySession.range(size) + .selectExpr("id", "(rand() * 1000.0) as k") + + def prepare(): Unit = { + doGC() + sparkSession.sql("drop table if exists test") + snappySession.sql("create table test (id bigint not null, k double not null) " + + s"using column options(partition_by 'id', buckets '$cores', key_columns 'id')") + } + + def cleanup(): Unit = { + snappySession.sql("drop table if exists test") + doGC() + } + + def testCleanup(): Unit = { + snappySession.sql("truncate table if exists test") + } + + // As expected putInto is two times slower than a simple insert + addCaseWithCleanup(benchmark, "Insert", numIters, prepare, cleanup, testCleanup) { _ => + testDF2.write.insertInto("test") + } + addCaseWithCleanup(benchmark, "PutInto", numIters, prepare, cleanup, testCleanup) { _ => + testDF2.write.putInto("test") + } + benchmark.run() + } + + /** + * Benchmark caching randomized keys created from a range. + */ + private def benchmarkRandomizedKeys(size: Int, queryPath: Boolean, + numIters: Int = 10, runSparkCaching: Boolean = true): Unit = { + val benchmark = new Benchmark("Cache random keys", size) + val sparkSession = this.sparkSession + val snappySession = this.snappySession + if (GemFireCacheImpl.getCurrentBufferAllocator.isDirect) { + logInfo("ColumnCacheBenchmark: using off-heap for performance comparison") + } else { + logInfo("ColumnCacheBenchmark: using heap for performance comparison") + } + sparkSession.sql("drop table if exists test") + snappySession.sql("drop table if exists test") + val testDF = sparkSession.range(size) + .selectExpr("id", "(rand() * 1000.0) as k") + val testDF2 = snappySession.range(size) + .selectExpr("id", "(rand() * 1000.0) as k") + testDF.createOrReplaceTempView("test") + + val query = "select avg(k), avg(id) from test" + val expectedAnswer = sparkSession.sql(query).collect().toSeq + val expectedAnswer2 = testDF2.selectExpr("avg(k)", "avg(id)").collect().toSeq + + /** + * Add a benchmark case, optionally specifying whether to cache the dataset. + */ + def addBenchmark(name: String, cache: Boolean, params: Map[String, String] = Map(), + snappy: Boolean = false): Unit = { + val defaults = params.keys.flatMap { k => sparkSession.conf.getOption(k).map((k, _)) } + val defaults2 = params.keys.flatMap { k => snappySession.conf.getOption(k).map((k, _)) } + + def prepare(): Unit = { + params.foreach { case (k, v) => sparkSession.conf.set(k, v) } + params.foreach { case (k, v) => snappySession.conf.set(k, v) } + sparkSession.catalog.clearCache() + doGC() + if (cache) { + testDF.createOrReplaceTempView("test") + sparkSession.catalog.cacheTable("test") + } else if (snappy) { + snappySession.sql("drop table if exists test") + snappySession.sql("create table test (id bigint not null, k double not null) " + + s"using column options(buckets '$cores')") + testDF2.write.insertInto("test") + } + if (snappy) { + snappySession.sql("set snappydata.linkPartitionsToBuckets=true") + val results = snappySession.sql("select count(*), spark_partition_id() " + + "from test group by spark_partition_id()").collect().toSeq + snappySession.sql("set snappydata.linkPartitionsToBuckets=false") + val counts = results.map(_.getLong(0)) + // expect the counts to not vary by more than 800k (max 200k per batch) + val min = counts.min + val max = counts.max + assert(max - min <= 800000, "Unexpectedly large data skew: " + + results.map(r => s"${r.getInt(1)}=${r.getLong(0)}").mkString(",")) + // check for SNAP-2200 by forcing overflow with updates + snappySession.sql("update test set id = id + 1") + snappySession.sql("update test set k = k + 1.0") + ColumnCacheBenchmark.collect(snappySession.sql( + "select max(id), min(id) from test"), Seq(Row(size, 1L))) + // repopulate for the benchmark test + snappySession.sql("truncate table test") + testDF2.write.insertInto("test") + ColumnCacheBenchmark.collect(snappySession.sql(query), expectedAnswer2) + } else { + ColumnCacheBenchmark.collect(sparkSession.sql(query), expectedAnswer) + } + testCleanup() + } + + def cleanup(): Unit = { + defaults.foreach { case (k, v) => sparkSession.conf.set(k, v) } + defaults2.foreach { case (k, v) => snappySession.conf.set(k, v) } + sparkSession.catalog.clearCache() + snappySession.sql("drop table if exists test") + doGC() + } + + def testCleanup(): Unit = { + if (!queryPath) { + if (snappy) { + snappySession.sql("truncate table if exists test") + } else { + sparkSession.catalog.clearCache() + } + doGC() + } + } + + addCaseWithCleanup(benchmark, name, numIters, prepare, cleanup, testCleanup) { _ => + if (queryPath) { + if (snappy) { + ColumnCacheBenchmark.collect(snappySession.sql(query), expectedAnswer2) + } else { + ColumnCacheBenchmark.collect(sparkSession.sql(query), expectedAnswer) + } + } else { + // also benchmark the time it takes to build the column buffers + if (snappy) { + testDF2.write.insertInto("test") + } else { + if (cache) { + sparkSession.catalog.cacheTable("test") + sparkSession.sql("select count(*) from test").collect() + } + } + } + } + } + + // Benchmark cases: + // (1) Caching with defaults + // (2) Caching with SnappyData column batches with defaults + + if (runSparkCaching) { + addBenchmark("cache = T", cache = true, Map.empty) + } + addBenchmark("snappy = T", cache = false, Map.empty, snappy = true) + + benchmark.run() + } + + private def createAndTestPutIntoInBigTable(): Unit = { + snappySession.sql("drop table if exists wide_table") + snappySession.sql("drop table if exists wide_table1") + import org.apache.spark.sql.snappy._ + val size = 100 + val num_col = 300 + val str = (1 to num_col).map(i => s" '$i' as C$i") + val testDF = snappySession.range(size).select(str.map { expr => + Column(snappySession.sessionState.sqlParser.parseExpression(expr)) + }: _*) + + + testDF.collect() + val sql = (1 to num_col).map(i => s"C$i STRING").mkString(",") + snappySession.sql(s"create table wide_table($sql) " + + s" using column options(key_columns 'C2,C3')") + snappySession.sql(s"create table wide_table1($sql) " + + s" using column options()") + // Creating another table for Range related issue SNAP-2142 + testDF.write.insertInto("wide_table") + testDF.write.insertInto("wide_table1") + val tableDF = snappySession.table("wide_table1") + tableDF.write.putInto("wide_table") + } + + private def createAndTestBigTable(): Unit = { + snappySession.sql("drop table if exists wide_table") + + val size = 100 + val num_col = 300 + val str = (1 to num_col).map(i => s" '$i' as C$i") + val testDF = snappySession.range(size).select(str.map { expr => + Column(snappySession.sessionState.sqlParser.parseExpression(expr)) + }: _*) + + + testDF.collect() + val sql = (1 to num_col).map(i => s"C$i STRING").mkString(",") + snappySession.sql(s"create table wide_table($sql) using column") + snappySession.sql(s"create table wide_table1($sql) using column") + testDF.write.insertInto("wide_table") + testDF.write.insertInto("wide_table1") + + val uniqDf = snappySession.table("wide_table").dropDuplicates(Array("C1")) + logInfo("Number of unique rows in wide_table = " + uniqDf.count()) + // check fallback plans being invoked via API + logInfo(uniqDf.collect().mkString("\n")) + // and also via SQL + val s = (2 to num_col).map(i => s"last(C$i)").mkString(",") + snappySession.sql(s"select C1, $s from wide_table group by C1").collect() + + val df = snappySession.sql("select *" + + " from wide_table a , wide_table1 b where a.c1 = b.c1 and a.c1 = '1'") + df.collect() + + val df0 = snappySession.sql(s"select * from wide_table") + df0.collect() + + val avgProjections = (1 to num_col).map(i => s"AVG(C$i)").mkString(",") + val df1 = snappySession.sql(s"select $avgProjections from wide_table") + df1.collect() + + val df2 = snappySession.sql(s"select $avgProjections from wide_table where C1 = '1' ") + df2.collect() + } + + private def createAndTestTableWithNulls(size: Int, numCols: Int): Unit = { + snappySession.sql("drop table if exists nulls_table") + + val str = (1 to numCols).map(i => + s" (case when rand() < 0.5 then null else '$i' end) as C$i") + val testDF = snappySession.range(size).select(str.map { expr => + Column(snappySession.sessionState.sqlParser.parseExpression(expr)) + }: _*) + + val sql = (1 to numCols).map(i => s"C$i STRING").mkString(",") + snappySession.sql(s"create table nulls_table($sql) using column") + testDF.write.insertInto("nulls_table") + + assert(snappySession.sql(s"select count(*) from nulls_table") + .collect()(0).getLong(0) == size) + } +} + +object ColumnCacheBenchmark { + + /** + * Collect a [[Dataset[Row]] and check whether the collected result matches + * the expected answer. + */ + def collect(df: Dataset[Row], expectedAnswer: Seq[Row]): Unit = { + QueryTest.checkAnswer(df, expectedAnswer, checkToRDD = false) match { + case Some(errMessage) => throw new RuntimeException(errMessage) + case None => // all good + } + } + + def applySchema(df: Dataset[Row], newSchema: StructType): Dataset[Row] = { + df.sqlContext.internalCreateDataFrame(df.queryExecution.toRdd, newSchema) + } + + def addCaseWithCleanup( + benchmark: Benchmark, + name: String, + numIters: Int = 0, + prepare: () => Unit, + cleanup: () => Unit, + testCleanup: () => Unit, + testPrepare: () => Unit = () => Unit)(f: Int => Unit): Unit = { + val timedF = (timer: Benchmark.Timer) => { + testPrepare() + timer.startTiming() + f(timer.iteration) + timer.stopTiming() + testCleanup() + } + benchmark.benchmarks += Benchmark.Case(name, timedF, numIters, prepare, cleanup) + } +} diff --git a/cluster/src/test/scala/org/apache/spark/sql/execution/benchmark/MapTest.scala b/cluster/src/test/scala/org/apache/spark/sql/execution/benchmark/MapTest.scala new file mode 100644 index 0000000000..1c357974af --- /dev/null +++ b/cluster/src/test/scala/org/apache/spark/sql/execution/benchmark/MapTest.scala @@ -0,0 +1,761 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ +package org.apache.spark.sql.execution.benchmark + +import scala.collection.mutable + +import com.gemstone.gnu.trove.{THashMap, THashSet} +import io.snappydata.SnappyFunSuite +import it.unimi.dsi.fastutil.objects.{Object2ObjectOpenHashMap, ObjectOpenHashSet} +import org.eclipse.collections.impl.map.mutable.UnifiedMap +import org.eclipse.collections.impl.set.mutable.UnifiedSet + +import org.apache.spark.sql.execution.benchmark.ColumnCacheBenchmark.addCaseWithCleanup +import org.apache.spark.util.Benchmark +import org.apache.spark.util.random.XORShiftRandom + +/** + * Some tests for basic map/set structures used by SnappyData and store. + */ +class MapTest extends SnappyFunSuite { + private val GET = 1 + private val INSERT = 2 + private val DELETE = 3 + + ignore("hash set comparison") { + val numEntries = 1000000 + val numOperations = 1000000 + val numIterations = 10 + + val oset1 = new THashSet(numEntries, 0.7f) + val omap2 = new java.util.HashMap[Item, Item](numEntries) + var imap3: Map[Item, Item] = null + val omap3 = new mutable.HashMap[Item, Item] + val oset4 = new ObjectOpenHashSet[Item](numEntries) + val oset5 = new UnifiedSet[Item](numEntries) + + val rnd = new XORShiftRandom() + val data = Array.fill(numEntries)(Item(rnd.nextLong(), + s"str${rnd.nextInt(100)}", rnd.nextDouble())) + val dataI = Array.fill(numEntries)(Item(rnd.nextLong(), + s"str${rnd.nextInt(100)}", rnd.nextDouble())) + val operations = Array.fill(numOperations)(rnd.nextInt(10) match { + case 0 | 1 | 2 | 3 | 4 | 5 => // get + val index = rnd.nextInt(data.length + dataI.length) + val item = if (index >= data.length) dataI(index - data.length) + else data(index) + item.operation = GET + item + case 6 | 7 | 8 => // insert + val item = dataI(rnd.nextInt(dataI.length)) + item.operation = INSERT + item + case 9 => // delete + val index = rnd.nextInt(data.length + dataI.length) + val item = if (index >= data.length) dataI(index - data.length) + else data(index) + item.operation = DELETE + item + }) + + var benchmark = new Benchmark("hashing mixed ops", numOperations) + + val results = new mutable.ArrayBuffer[Long]() + + addCaseWithCleanup(benchmark, "THashSet", numIterations, + () => Unit, () => Unit, () => Unit, () => { + oset1.clear() + data.foreach(oset1.add) + })(_ => { + var sum = 0L + for (op <- operations) { + val item = op.operation match { + case GET => oset1.get(op).asInstanceOf[Item] + case INSERT => + oset1.add(op) + op + case DELETE => if (oset1.remove(op)) op else null + } + if (item ne null) { + sum += item.l + item.s.length + item.d.toLong + } + } + results += sum + }) + addCaseWithCleanup(benchmark, "Java HashMap", numIterations, + () => Unit, () => Unit, () => Unit, () => { + omap2.clear() + data.foreach(d => omap2.put(d, d)) + })(_ => { + var sum = 0L + for (op <- operations) { + val item = op.operation match { + case GET => omap2.get(op) + case INSERT => + omap2.put(op, op) + op + case DELETE => omap2.remove(op) + } + if (item ne null) { + sum += item.l + item.s.length + item.d.toLong + } + } + results += sum + }) + addCaseWithCleanup(benchmark, "FastUtil", numIterations, + () => Unit, () => Unit, () => Unit, () => { + oset4.clear() + data.foreach(oset4.add) + })(_ => { + var sum = 0L + for (op <- operations) { + val item = op.operation match { + case GET => oset4.get(op) + case INSERT => + oset4.add(op) + op + case DELETE => if (oset4.remove(op)) op else null + } + if (item ne null) { + sum += item.l + item.s.length + item.d.toLong + } + } + results += sum + }) + addCaseWithCleanup(benchmark, "Eclipse Collections", numIterations, + () => Unit, () => Unit, () => Unit, () => { + oset5.clear() + data.foreach(oset5.add) + })(_ => { + var sum = 0L + for (op <- operations) { + val item = op.operation match { + case GET => oset5.get(op) + case INSERT => + oset5.add(op) + op + case DELETE => if (oset5.remove(op)) op else null + } + if (item ne null) { + sum += item.l + item.s.length + item.d.toLong + } + } + results += sum + }) + + benchmark.run() + + var expected = results.head + results.indices.foreach { index => + val r = results(index) + assert(r === expected, s"Mismatch at index = $index") + } + + benchmark = new Benchmark("hashing iteration", numEntries) + results.clear() + + addCaseWithCleanup(benchmark, "THashSet", numIterations, + () => data.foreach(oset1.add), () => Unit, () => Unit)(_ => { + var sum = 0L + val iter = oset1.iterator() + while (iter.hasNext) { + val item = iter.next().asInstanceOf[Item] + sum += item.l + item.s.length + item.d.toLong + } + results += sum + }) + addCaseWithCleanup(benchmark, "Java HashMap", numIterations, + () => data.foreach(d => omap2.put(d, d)), omap2.clear, () => Unit)(_ => { + var sum = 0L + val iter = omap2.keySet().iterator() + while (iter.hasNext) { + val item = iter.next() + sum += item.l + item.s.length + item.d.toLong + } + results += sum + }) + addCaseWithCleanup(benchmark, "FastUtil", numIterations, + () => data.foreach(oset4.add), oset4.clear, () => Unit)(_ => { + var sum = 0L + val iter = oset4.iterator() + while (iter.hasNext) { + val item = iter.next() + sum += item.l + item.s.length + item.d.toLong + } + results += sum + }) + addCaseWithCleanup(benchmark, "Eclipse Collections", numIterations, + () => data.foreach(oset5.add), oset5.clear, () => Unit)(_ => { + var sum = 0L + val iter = oset5.iterator() + while (iter.hasNext) { + val item = iter.next() + sum += item.l + item.s.length + item.d.toLong + } + results += sum + }) + + benchmark.run() + + expected = results.head + results.indices.foreach { index => + val r = results(index) + assert(r === expected, s"Mismatch at index = $index") + } + + benchmark = new Benchmark("hashing gets", numEntries) + results.clear() + + addCaseWithCleanup(benchmark, "Scala Immutable HashMap", numIterations, + () => { + data.foreach(d => omap3.put(d, d)) + imap3 = omap3.toMap + omap3.clear + }, () => imap3 = null, () => Unit)(_ => { + var sum = 0L + var i = 0 + while (i < numEntries) { + val item = imap3(data(i)) + sum += item.l + item.s.length + item.d.toLong + i += 1 + } + results += sum + }) + addCaseWithCleanup(benchmark, "Scala HashMap", numIterations, + () => data.foreach(d => omap3.put(d, d)), omap3.clear, () => Unit)(_ => { + var sum = 0L + var i = 0 + while (i < numEntries) { + val item = omap3(data(i)) + sum += item.l + item.s.length + item.d.toLong + i += 1 + } + results += sum + }) + addCaseWithCleanup(benchmark, "THashSet", numIterations, + () => data.foreach(oset1.add), oset1.clear, () => Unit)(_ => { + var sum = 0L + var i = 0 + while (i < numEntries) { + val item = oset1.get(data(i)).asInstanceOf[Item] + sum += item.l + item.s.length + item.d.toLong + i += 1 + } + results += sum + }) + addCaseWithCleanup(benchmark, "Java HashMap", numIterations, + () => data.foreach(d => omap2.put(d, d)), omap2.clear, () => Unit)(_ => { + var sum = 0L + var i = 0 + while (i < numEntries) { + val item = omap2.get(data(i)) + sum += item.l + item.s.length + item.d.toLong + i += 1 + } + results += sum + }) + addCaseWithCleanup(benchmark, "FastUtil", numIterations, + () => data.foreach(oset4.add), oset4.clear, () => Unit)(_ => { + var sum = 0L + var i = 0 + while (i < numEntries) { + val item = oset4.get(data(i)) + sum += item.l + item.s.length + item.d.toLong + i += 1 + } + results += sum + }) + addCaseWithCleanup(benchmark, "Eclipse Collections", numIterations, + () => data.foreach(oset5.add), oset5.clear, () => Unit)(_ => { + var sum = 0L + var i = 0 + while (i < numEntries) { + val item = oset5.get(data(i)) + sum += item.l + item.s.length + item.d.toLong + i += 1 + } + results += sum + }) + + benchmark.run() + + expected = results.head + results.indices.foreach { index => + val r = results(index) + assert(r === expected, s"Mismatch at index = $index") + } + + benchmark = new Benchmark("hashing inserts", numEntries) + results.clear() + + addCaseWithCleanup(benchmark, "THashSet", numIterations, + oset1.clear, () => Unit, oset1.clear)( + _ => data.foreach(oset1.add)) + addCaseWithCleanup(benchmark, "Java HashMap", numIterations, + () => omap2.clear(), () => Unit, () => omap2.clear())( + _ => data.foreach(d => omap2.put(d, d))) + addCaseWithCleanup(benchmark, "FastUtil", numIterations, + oset4.clear, () => Unit, oset4.clear)( + _ => data.foreach(oset4.add)) + addCaseWithCleanup(benchmark, "Eclipse Collections", numIterations, + oset5.clear, () => Unit, oset5.clear)( + _ => data.foreach(oset5.add)) + + benchmark.run() + } + + ignore("hash map comparison") { + val numEntries = 1000000 + val numOperations = 1000000 + val numIterations = 10 + + val omap1 = new THashMap(numEntries, 0.7f) + val omap2 = new java.util.HashMap[Item, Item](numEntries) + var imap3: Map[Item, Item] = null + val omap3 = new scala.collection.mutable.HashMap[Item, Item]() + val omap4 = new Object2ObjectOpenHashMap[Item, Item](numEntries) + val omap5 = new UnifiedMap[Item, Item](numEntries) + + val rnd = new XORShiftRandom() + val data = Array.fill(numEntries)(Item(rnd.nextLong(), + s"str${rnd.nextInt(100)}", rnd.nextDouble())) + val dataI = Array.fill(numEntries)(Item(rnd.nextLong(), + s"str${rnd.nextInt(100)}", rnd.nextDouble())) + val operations = Array.fill(numOperations)(rnd.nextInt(10) match { + case 0 | 1 | 2 | 3 | 4 | 5 => // get + val index = rnd.nextInt(data.length + dataI.length) + val item = if (index >= data.length) dataI(index - data.length) + else data(index) + item.operation = GET + item + case 6 | 7 | 8 => // insert + val item = dataI(rnd.nextInt(dataI.length)) + item.operation = INSERT + item + case 9 => // delete + val index = rnd.nextInt(data.length + dataI.length) + val item = if (index >= data.length) dataI(index - data.length) + else data(index) + item.operation = DELETE + item + }) + + var benchmark = new Benchmark("hashing mixed ops", numOperations) + + val results = new mutable.ArrayBuffer[Long]() + + addCaseWithCleanup(benchmark, "THashMap", numIterations, + () => Unit, () => Unit, () => Unit, () => { + omap1.clear() + data.foreach(d => omap1.put(d, d)) + })(_ => { + var sum = 0L + for (op <- operations) { + val item = op.operation match { + case GET => omap1.get(op).asInstanceOf[Item] + case INSERT => + omap1.put(op, op) + op + case DELETE => omap1.remove(op).asInstanceOf[Item] + } + if (item ne null) { + sum += item.l + item.s.length + item.d.toLong + } + } + results += sum + }) + addCaseWithCleanup(benchmark, "Java HashMap", numIterations, + () => Unit, () => Unit, () => Unit, () => { + omap2.clear() + data.foreach(d => omap2.put(d, d)) + })(_ => { + var sum = 0L + for (op <- operations) { + val item = op.operation match { + case GET => omap2.get(op) + case INSERT => + omap2.put(op, op) + op + case DELETE => omap2.remove(op) + } + if (item ne null) { + sum += item.l + item.s.length + item.d.toLong + } + } + results += sum + }) + addCaseWithCleanup(benchmark, "Scala HashMap", numIterations, + () => Unit, () => Unit, () => Unit, () => { + omap3.clear() + data.foreach(d => omap3.put(d, d)) + })(_ => { + var sum = 0L + for (op <- operations) { + val item = op.operation match { + case GET => omap3.get(op).orNull + case INSERT => + omap3.put(op, op) + op + case DELETE => omap3.remove(op).orNull + } + if (item ne null) { + sum += item.l + item.s.length + item.d.toLong + } + } + results += sum + }) + addCaseWithCleanup(benchmark, "FastUtil", numIterations, + () => Unit, () => Unit, () => Unit, () => { + omap4.clear() + data.foreach(d => omap4.put(d, d)) + })(_ => { + var sum = 0L + for (op <- operations) { + val item = op.operation match { + case GET => omap4.get(op) + case INSERT => + omap4.put(op, op) + op + case DELETE => omap4.remove(op) + } + if (item ne null) { + sum += item.l + item.s.length + item.d.toLong + } + } + results += sum + }) + addCaseWithCleanup(benchmark, "Eclipse Collections Map", numIterations, + () => Unit, () => Unit, () => Unit, () => { + omap5.clear() + data.foreach(d => omap5.put(d, d)) + })(_ => { + var sum = 0L + for (op <- operations) { + val item = op.operation match { + case GET => omap5.get(op) + case INSERT => + omap5.put(op, op) + op + case DELETE => omap5.remove(op) + } + if (item ne null) { + sum += item.l + item.s.length + item.d.toLong + } + } + results += sum + }) + + benchmark.run() + + var expected = results.head + results.indices.foreach { index => + val r = results(index) + assert(r === expected, s"Mismatch at index = $index") + } + + benchmark = new Benchmark("hashing iteration", numEntries) + results.clear() + + addCaseWithCleanup(benchmark, "THashMap", numIterations, + () => data.foreach(d => omap1.put(d, d)), omap1.clear, () => Unit)(_ => { + var sum = 0L + val iter = omap1.keySet().iterator() + while (iter.hasNext) { + val item = iter.next().asInstanceOf[Item] + sum += item.l + item.s.length + item.d.toLong + } + results += sum + }) + addCaseWithCleanup(benchmark, "Java HashMap", numIterations, + () => data.foreach(d => omap2.put(d, d)), omap2.clear, () => Unit)(_ => { + var sum = 0L + val iter = omap2.keySet().iterator() + while (iter.hasNext) { + val item = iter.next() + sum += item.l + item.s.length + item.d.toLong + } + results += sum + }) + addCaseWithCleanup(benchmark, "Scala HashMap", numIterations, + () => data.foreach(d => omap3.put(d, d)), omap3.clear, () => Unit)(_ => { + results += omap3.keysIterator.foldLeft(0L)((sum, item) => + sum + item.l + item.s.length + item.d.toLong) + }) + addCaseWithCleanup(benchmark, "FastUtil", numIterations, + () => data.foreach(d => omap4.put(d, d)), omap4.clear, () => Unit)(_ => { + var sum = 0L + val iter = omap4.keySet().iterator() + while (iter.hasNext) { + val item = iter.next() + sum += item.l + item.s.length + item.d.toLong + } + results += sum + }) + addCaseWithCleanup(benchmark, "Eclipse Collections Map", numIterations, + () => data.foreach(d => omap5.put(d, d)), omap5.clear, () => Unit)(_ => { + var sum = 0L + val iter = omap5.keySet().iterator() + while (iter.hasNext) { + val item = iter.next() + sum += item.l + item.s.length + item.d.toLong + } + results += sum + }) + + benchmark.run() + + expected = results.head + results.indices.foreach { index => + val r = results(index) + assert(r === expected, s"Mismatch at index = $index") + } + + benchmark = new Benchmark("hashing gets", numEntries) + results.clear() + + addCaseWithCleanup(benchmark, "Scala Immutable HashMap", numIterations, + () => { + data.foreach(d => omap3.put(d, d)) + imap3 = omap3.toMap + omap3.clear + }, () => imap3 = null, () => Unit)(_ => { + var sum = 0L + var i = 0 + while (i < numEntries) { + val item = imap3(data(i)) + sum += item.l + item.s.length + item.d.toLong + i += 1 + } + results += sum + }) + addCaseWithCleanup(benchmark, "Scala HashMap", numIterations, + () => data.foreach(d => omap3.put(d, d)), omap3.clear, () => Unit)(_ => { + var sum = 0L + var i = 0 + while (i < numEntries) { + val item = omap3(data(i)) + sum += item.l + item.s.length + item.d.toLong + i += 1 + } + results += sum + }) + addCaseWithCleanup(benchmark, "THashMap", numIterations, + () => data.foreach(d => omap1.put(d, d)), omap1.clear, () => Unit)(_ => { + var sum = 0L + var i = 0 + while (i < numEntries) { + val item = omap1.get(data(i)).asInstanceOf[Item] + sum += item.l + item.s.length + item.d.toLong + i += 1 + } + results += sum + }) + addCaseWithCleanup(benchmark, "Java HashMap", numIterations, + () => data.foreach(d => omap2.put(d, d)), omap2.clear, () => Unit)(_ => { + var sum = 0L + var i = 0 + while (i < numEntries) { + val item = omap2.get(data(i)) + sum += item.l + item.s.length + item.d.toLong + i += 1 + } + results += sum + }) + addCaseWithCleanup(benchmark, "FastUtil", numIterations, + () => data.foreach(d => omap4.put(d, d)), omap4.clear, () => Unit)(_ => { + var sum = 0L + var i = 0 + while (i < numEntries) { + val item = omap4.get(data(i)) + sum += item.l + item.s.length + item.d.toLong + i += 1 + } + results += sum + }) + addCaseWithCleanup(benchmark, "Eclipse Collections Map", numIterations, + () => data.foreach(d => omap5.put(d, d)), omap5.clear, () => Unit)(_ => { + var sum = 0L + var i = 0 + while (i < numEntries) { + val item = omap5.get(data(i)) + sum += item.l + item.s.length + item.d.toLong + i += 1 + } + results += sum + }) + + benchmark.run() + + expected = results.head + results.indices.foreach { index => + val r = results(index) + assert(r === expected, s"Mismatch at index = $index") + } + + benchmark = new Benchmark("hashing inserts", numEntries) + results.clear() + + addCaseWithCleanup(benchmark, "THashMap", numIterations, + omap1.clear, () => Unit, omap1.clear)( + _ => data.foreach(d => omap1.put(d, d))) + addCaseWithCleanup(benchmark, "Java HashMap", numIterations, + omap2.clear, () => Unit, omap2.clear)( + _ => data.foreach(d => omap2.put(d, d))) + addCaseWithCleanup(benchmark, "Scala HashMap", numIterations, + omap3.clear, () => Unit, omap3.clear)( + _ => data.foreach(d => omap3.put(d, d))) + addCaseWithCleanup(benchmark, "FastUtil", numIterations, + omap4.clear, () => Unit, omap4.clear)( + _ => data.foreach(d => omap4.put(d, d))) + addCaseWithCleanup(benchmark, "Eclipse Collections Map", numIterations, + omap5.clear, () => Unit, omap5.clear)( + _ => data.foreach(d => omap5.put(d, d))) + + benchmark.run() + } + + ignore("compare small map gets") { + val numEntries = 20 + val numLoops = 1000000 + val numIterations = 10 + + val omap1 = new THashMap(numEntries, 0.7f) + val omap2 = new java.util.HashMap[String, String](numEntries) + var imap3: Map[String, String] = null + val omap3 = new scala.collection.mutable.HashMap[String, String]() + val omap4 = new java.util.concurrent.ConcurrentHashMap[String, String](32, 0.7f, 1) + val omap5 = new scala.collection.concurrent.TrieMap[String, String] + val omap6 = new Object2ObjectOpenHashMap[String, String](numEntries) + val omap7 = new UnifiedMap[String, String](numEntries) + + val rnd = new XORShiftRandom() + val data = Array.fill(numEntries)(s"str${rnd.nextInt(100)}") + + val benchmark = new Benchmark("hashing gets", numEntries * numLoops) + + benchmark.addCase("Scala Immutable HashMap", numIterations, + () => { + data.foreach(d => omap3.put(d, d)) + imap3 = omap3.toMap + omap3.clear + }, () => imap3 = null)(_ => { + var loop = 0 + while (loop < numLoops) { + var i = 0 + while (i < numEntries) { + assert(data(i) == imap3(data(i))) + i += 1 + } + loop += 1 + } + }) + benchmark.addCase("Scala HashMap", numIterations, + () => data.foreach(d => omap3.put(d, d)), omap3.clear)(_ => { + var loop = 0 + while (loop < numLoops) { + var i = 0 + while (i < numEntries) { + assert(data(i) == omap3(data(i))) + i += 1 + } + loop += 1 + } + }) + benchmark.addCase("THashMap", numIterations, + () => data.foreach(d => omap1.put(d, d)), omap1.clear)(_ => { + var loop = 0 + while (loop < numLoops) { + var i = 0 + while (i < numEntries) { + assert(data(i) == omap1.get(data(i))) + i += 1 + } + loop += 1 + } + }) + benchmark.addCase("Java HashMap", numIterations, + () => data.foreach(d => omap2.put(d, d)), omap2.clear)(_ => { + var loop = 0 + while (loop < numLoops) { + var i = 0 + while (i < numEntries) { + assert(data(i) == omap2.get(data(i))) + i += 1 + } + loop += 1 + } + }) + benchmark.addCase("Java ConcurrentHashMap", numIterations, + () => data.foreach(d => omap4.put(d, d)), omap4.clear)(_ => { + var loop = 0 + while (loop < numLoops) { + var i = 0 + while (i < numEntries) { + assert(data(i) == omap4.get(data(i))) + i += 1 + } + loop += 1 + } + }) + benchmark.addCase("Scala TrieMap", numIterations, + () => data.foreach(d => omap5.put(d, d)), omap5.clear)(_ => { + var loop = 0 + while (loop < numLoops) { + var i = 0 + while (i < numEntries) { + assert(data(i) == omap5(data(i))) + i += 1 + } + loop += 1 + } + }) + benchmark.addCase("FastUtil Map", numIterations, + () => data.foreach(d => omap6.put(d, d)), omap6.clear)(_ => { + var loop = 0 + while (loop < numLoops) { + var i = 0 + while (i < numEntries) { + assert(data(i) == omap6.get(data(i))) + i += 1 + } + loop += 1 + } + }) + benchmark.addCase("Eclipse Collections Map", numIterations, + () => data.foreach(d => omap7.put(d, d)), omap7.clear)(_ => { + var loop = 0 + while (loop < numLoops) { + var i = 0 + while (i < numEntries) { + assert(data(i) == omap7.get(data(i))) + i += 1 + } + loop += 1 + } + }) + + benchmark.run() + } +} + +final case class Item(l: Long, s: String, d: Double) { + var operation: Int = _ + + override def hashCode(): Int = (l ^ (l >>> 32)).toInt + + override def toString: String = s"Item($l, $s, $d, op=$operation)" +} diff --git a/cluster/src/test/scala/org/apache/spark/sql/execution/benchmark/StringBenchmark.scala b/cluster/src/test/scala/org/apache/spark/sql/execution/benchmark/StringBenchmark.scala new file mode 100644 index 0000000000..2bba333a41 --- /dev/null +++ b/cluster/src/test/scala/org/apache/spark/sql/execution/benchmark/StringBenchmark.scala @@ -0,0 +1,250 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ + +package org.apache.spark.sql.execution.benchmark + +import java.nio.charset.StandardCharsets +import java.util.UUID + +import scala.io.Source + +import io.snappydata.SnappyFunSuite +import it.unimi.dsi.fastutil.longs.LongArrayList + +import org.apache.spark.unsafe.array.ByteArrayMethods +import org.apache.spark.unsafe.types.UTF8String +import org.apache.spark.unsafe.{Native, Platform} +import org.apache.spark.util.Benchmark +import org.apache.spark.util.random.XORShiftRandom + +/** + * Comparisons for UTF8String optimizations. + */ +class StringBenchmark extends SnappyFunSuite { + + private val allocatedMemoryList: LongArrayList = new LongArrayList + + override def afterAll(): Unit = { + super.afterAll() + if (allocatedMemoryList.size() > 0) { + val iter = allocatedMemoryList.iterator() + while (iter.hasNext) { + Platform.freeMemory(iter.nextLong()) + } + allocatedMemoryList.clear() + } + } + + private def toDirectUTF8String(s: String): UTF8String = { + val b = s.getBytes(StandardCharsets.UTF_8) + val numBytes = b.length + val ub = Platform.allocateMemory(numBytes) + allocatedMemoryList.add(ub) + Platform.copyMemory(b, Platform.BYTE_ARRAY_OFFSET, null, ub, numBytes) + UTF8String.fromAddress(null, ub, numBytes) + } + + private def doGC(): Unit = { + System.gc() + System.runFinalization() + System.gc() + System.runFinalization() + } + + private def runUTF8StringCompareTo(numElements: Int, numDistinct: Int, + numIters: Int = 10, preSorted: Boolean = false): Unit = { + val rnd = new XORShiftRandom + + def randomSuffix: String = { + (1 to rnd.nextInt(6)).map(_ => rnd.nextInt(10)).mkString("") + } + + val randData = Array.fill(numDistinct)(s"${UUID.randomUUID().toString}-$randomSuffix") + val sdata = Array.fill(numElements)(randData(rnd.nextInt(numDistinct))) + val data = sdata.map(UTF8String.fromString) + val udata = sdata.map(toDirectUTF8String) + + if (preSorted) { + java.util.Arrays.sort(data, null) + java.util.Arrays.sort(udata, null) + } + var cdata: Array[UTF8String] = null + var cdata2: Array[UTF8String] = null + var cdata3: Array[UTF8String] = null + + def displayNumber(num: Int): String = { + if (num % 1000000 == 0) s"${num / 1000000}M" + else if (num % 1000 == 0) s"${num / 1000}K" + else num.toString + } + + val benchmark = new Benchmark(s"Sort${if (preSorted) "(pre-sorted)" else ""} " + + s"num=${displayNumber(numElements)} distinct=${displayNumber(numDistinct)}", numElements) + + ColumnCacheBenchmark.addCaseWithCleanup(benchmark, "Spark", numIters, () => Unit, + doGC, () => Unit, () => cdata = data.clone()) { _ => + java.util.Arrays.sort(cdata, new java.util.Comparator[UTF8String] { + override def compare(o1: UTF8String, o2: UTF8String): Int = { + StringBenchmark.sparkCompare(o1, o2) + } + }) + } + ColumnCacheBenchmark.addCaseWithCleanup(benchmark, "Snappy", numIters, () => Unit, + doGC, () => Unit, () => cdata2 = data.clone()) { _ => + java.util.Arrays.sort(cdata2, null) + } + ColumnCacheBenchmark.addCaseWithCleanup(benchmark, "Snappy (off-heap)", numIters, () => Unit, + doGC, () => Unit, () => cdata3 = udata.clone()) { _ => + java.util.Arrays.sort(cdata3, null) + } + + benchmark.run() + + // compare the results + assert(cdata.toSeq === cdata2.toSeq) + assert(cdata.toSeq === cdata3.toSeq) + } + + ignore("UTF8String optimized compareTo") { + runUTF8StringCompareTo(1000000, 1000) + runUTF8StringCompareTo(1000000, 1000000) + runUTF8StringCompareTo(1000000, 1000, preSorted = true) + runUTF8StringCompareTo(1000000, 1000000, preSorted = true) + } + + ignore("UTF8String optimized contains") { + val customerFile = getClass.getResource("/customer.csv").getPath + val numLoads = 1500 + val numIters = 20 + + val sdata = (1 to numLoads).flatMap(_ => Source.fromFile(customerFile).getLines()).toArray + val numElements = sdata.length + val data = sdata.map(UTF8String.fromString) + val udata = sdata.map(toDirectUTF8String) + val search = "71,HOUSEHOLD" + val expectedMatches = 3 * numLoads + val searchStr = UTF8String.fromString(search) + val usearchStr = toDirectUTF8String(search) + val pattern = java.util.regex.Pattern.compile(search) + + if (Native.isLoaded) { + // scalastyle:off + println("Using native JNI calls") + // scalastyle:on + } + + val benchmark = new Benchmark("compare contains", numElements) + + benchmark.addCase("UTF8String (orig)", numIters) { _ => + var i = 0 + var matched = 0 + while (i < numElements) { + if (StringBenchmark.sparkContains(data(i), searchStr)) { + matched += 1 + } + i += 1 + } + assert(matched === expectedMatches) + } + benchmark.addCase("UTF8String (opt heap)", numIters) { _ => + var i = 0 + var matched = 0 + while (i < numElements) { + if (data(i).contains(searchStr)) { + matched += 1 + } + i += 1 + } + assert(matched === expectedMatches) + } + benchmark.addCase("UTF8String (opt off-heap)", numIters) { _ => + var i = 0 + var matched = 0 + while (i < numElements) { + if (udata(i).contains(usearchStr)) { + matched += 1 + } + i += 1 + } + assert(matched === expectedMatches) + } + benchmark.addCase("String", numIters) { _ => + var i = 0 + var matched = 0 + while (i < numElements) { + if (sdata(i).contains(search)) { + matched += 1 + } + i += 1 + } + assert(matched === expectedMatches) + } + benchmark.addCase("Regex", numIters) { _ => + var i = 0 + var matched = 0 + while (i < numElements) { + if (pattern.matcher(sdata(i)).find(0)) { + matched += 1 + } + i += 1 + } + assert(matched === expectedMatches) + } + + benchmark.run() + } +} + +object StringBenchmark { + + /** + * This is the equivalent of original upstream Apache Spark UTF8String.compare + * having the exact same performance profile (and byte code). + */ + def sparkCompare(o1: UTF8String, o2: UTF8String): Int = { + val len = Math.min(o1.numBytes(), o2.numBytes()) + var i = 0 + while (i < len) { + val res = (Platform.getByte(o1.getBaseObject, o1.getBaseOffset + i) & 0xFF) - + (Platform.getByte(o2.getBaseObject, o2.getBaseOffset + i) & 0xFF) + if (res != 0) return res + i += 1 + } + o1.numBytes() - o2.numBytes() + } + + /** + * This is the equivalent of original upstream Apache Spark UTF8String.contains + * having the exact same performance profile (and byte code). + */ + def sparkContains(source: UTF8String, target: UTF8String): Boolean = { + if (target.numBytes == 0) return true + val first = target.getByte(0) + var i = 0 + while (i <= source.numBytes - target.numBytes) { + if (source.getByte(i) == first && matchAt(source, target, i)) return true + i += 1 + } + false + } + + private def matchAt(source: UTF8String, target: UTF8String, pos: Int): Boolean = { + if (target.numBytes + pos > source.numBytes || pos < 0) return false + ByteArrayMethods.arrayEquals(source.getBaseObject, source.getBaseOffset + pos, + target.getBaseObject, target.getBaseOffset, target.numBytes) + } +} diff --git a/cluster/src/test/scala/org/apache/spark/sql/execution/benchmark/TAQTest.scala b/cluster/src/test/scala/org/apache/spark/sql/execution/benchmark/TAQTest.scala new file mode 100644 index 0000000000..6281c7520e --- /dev/null +++ b/cluster/src/test/scala/org/apache/spark/sql/execution/benchmark/TAQTest.scala @@ -0,0 +1,536 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ +package org.apache.spark.sql.execution.benchmark + +import java.sql.{Date, DriverManager, Timestamp} +import java.time.{ZoneId, ZonedDateTime} + +import scala.util.Random + +import com.typesafe.config.Config +import io.snappydata.SnappyFunSuite +import org.scalatest.Assertions + +import org.apache.spark.memory.SnappyUnifiedMemoryManager +import org.apache.spark.sql._ +import org.apache.spark.sql.collection.Utils +import org.apache.spark.sql.execution.benchmark.TAQTest.CreateOp +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.types.{Decimal, DecimalType, StringType, StructField, StructType} +import org.apache.spark.unsafe.types.UTF8String +import org.apache.spark.util.Benchmark +import org.apache.spark.util.random.XORShiftRandom +import org.apache.spark.{Logging, SparkConf, SparkContext} + +class TAQTest extends SnappyFunSuite { + + override protected def newSparkConf( + addOn: SparkConf => SparkConf = null): SparkConf = + TAQTest.newSparkConf(addOn) + + override def beforeAll(): Unit = { + super.beforeAll() + stopAll() + } + + override def afterAll(): Unit = { + super.afterAll() + stopAll() + } + + test("select queries with random data (eviction) - insert") { + val quoteSize = 34000000L + val tradeSize = 5000000L + val numDays = 1 + val numIters = 3 + TAQTest.benchmarkRandomizedKeys(sc, quoteSize, tradeSize, + quoteSize, numDays, queryNumber = 1, numIters, doInit = true, + op = CreateOp.Quote, runSparkCaching = false) + TAQTest.benchmarkRandomizedKeys(sc, quoteSize, tradeSize, + tradeSize, numDays, queryNumber = 2, numIters, doInit = false, + op = CreateOp.Trade, runSparkCaching = false) + } + + test("select queries with random data - query") { + val quoteSize = 3400000L + val tradeSize = 500000L + val numDays = 1 + val numIters = 10 + TAQTest.benchmarkRandomizedKeys(sc, quoteSize, tradeSize, + quoteSize, numDays, queryNumber = 1, numIters, doInit = true) + TAQTest.benchmarkRandomizedKeys(sc, quoteSize, tradeSize, + tradeSize, numDays, queryNumber = 2, numIters, doInit = false) + TAQTest.benchmarkRandomizedKeys(sc, quoteSize, tradeSize, + tradeSize, numDays, queryNumber = 3, numIters, doInit = false) + } + + ignore("basic query performance with JDBC") { + val numRuns = 1000 + val numIters = 1000 + val conn = DriverManager.getConnection("jdbc:snappydata://localhost:1527") + val stmt = conn.createStatement() + val rs = stmt.executeQuery("values dsid()") + rs.next() + logInfo(s"Connected to server ${rs.getString(1)}") + rs.close() + for (_ <- 1 to numRuns) { + val start = System.nanoTime() + for (_ <- 1 to numIters) { + // val rs = stmt.executeQuery("select * from citi_order where id=1000") + val rs = stmt.executeQuery("select count(*) from citi_order") + var count = 0 + while (rs.next()) { + count += 1 + } + assert(count == 1) + } + val end = System.nanoTime() + val millis = (end - start) / 1000000.0 + logInfo(s"Time taken for $numIters runs = ${millis}ms, " + + s"average = ${millis / numIters}ms") + } + stmt.close() + conn.close() + } +} + +class TAQTestJob extends SnappySQLJob with Logging { + + override def runSnappyJob(snSession: SnappySession, jobConfig: Config): Any = { + val sc = snSession.sparkContext + // SCALE OUT case with 10 billion rows + val quoteSize = 8500000000L + val tradeSize = 1250000000L + val numDays = 16 + val numIters = 10 + TAQTest.benchmarkRandomizedKeys(sc, + quoteSize, tradeSize, quoteSize, numDays, queryNumber = 1, numIters, + doInit = true, runSparkCaching = false) + TAQTest.benchmarkRandomizedKeys(sc, + quoteSize, tradeSize, tradeSize, numDays, queryNumber = 2, numIters, + doInit = false, runSparkCaching = false) + TAQTest.benchmarkRandomizedKeys(sc, + quoteSize, tradeSize, tradeSize, numDays, queryNumber = 3, numIters, + doInit = false, runSparkCaching = false) + Boolean.box(true) + } + + def runSnappyJob2(sc: SnappyContext, jobConfig: Config): Any = { + val numRuns = 1000 + val numIters = 1000 + val session = sc.snappySession + for (_ <- 1 to numRuns) { + val start = System.nanoTime() + for (_ <- 1 to numIters) { + Utils.sqlInternal(session, "select * from citi_order where id=1000 " + + "--GEMFIREXD-PROPERTIES executionEngine=Spark").collectInternal() + } + val end = System.nanoTime() + val millis = (end - start) / 1000000.0 + logInfo(s"Time taken for $numIters runs = ${millis}ms, " + + s"average = ${millis / numIters}ms") + } + Boolean.box(true) + } + + override def isValidJob(snSession: SnappySession, + config: Config): SnappyJobValidation = SnappyJobValid() +} + +case class Quote(sym: UTF8String, ex: UTF8String, bid: Double, + time: Timestamp, date: Date) + +case class Trade(sym: UTF8String, ex: UTF8String, price: Decimal, + time: Timestamp, date: Date, size: Double, c1: Array[UTF8String], + c2: Map[UTF8String, Double]) + +object TAQTest extends Logging with Assertions { + + private[benchmark] var COLUMN_TABLE = true + + val EXCHANGES: Array[String] = Array("NYSE", "NASDAQ", "AMEX", "TSE", + "LON", "BSE", "BER", "EPA", "TYO") + val ALL_SYMBOLS: Array[String] = { + val syms = new Array[String](400) + for (i <- 0 until 10) { + syms(i) = s"SY0$i" + } + for (i <- 10 until 100) { + syms(i) = s"SY$i" + } + for (i <- 100 until 400) { + syms(i) = s"S$i" + } + syms + } + val SYMBOLS: Array[String] = ALL_SYMBOLS.take(100) + + val sqlQuote: String = + s""" + |CREATE TABLE quote ( + | sym CHAR(4) NOT NULL, + | ex VARCHAR(64) NOT NULL, + | bid DOUBLE NOT NULL, + | time TIMESTAMP NOT NULL, + | date DATE NOT NULL + |) + """.stripMargin + val sqlTrade: String = + s""" + |CREATE TABLE trade ( + | sym CHAR(4) NOT NULL, + | ex VARCHAR(64) NOT NULL, + | price DECIMAL(10,4) NOT NULL, + | time TIMESTAMP NOT NULL, + | date DATE NOT NULL, + | size DOUBLE NOT NULL, + | c1 ARRAY NOT NULL, + | c2 MAP NOT NULL + |) + """.stripMargin + + private val d = "2016-06-06" + // private val s = "SY23" + val cacheQueries2 = Array( + "select avg(bid) from cQuote", + "select sym, avg(bid) from cQuote group by sym", + "select sym, last(price) from cTrade group by sym", + "select cQuote.sym, last(bid) from cQuote join cS " + + s"on (cQuote.sym = cS.sym) where date='$d' group by cQuote.sym" + ) + val cacheQueries = Array( + "select cQuote.sym, last(bid) from cQuote join cS " + + s"on (cQuote.sym = cS.sym) where date='$d' group by cQuote.sym", + "select cTrade.sym, ex, last(price) from cTrade join cS " + + s"on (cTrade.sym = cS.sym) where date='$d' group by cTrade.sym, ex", + "select cTrade.sym, hour(time), avg(size) from cTrade join cS " + + s"on (cTrade.sym = cS.sym) where date='$d' group by cTrade.sym, hour(time)" /* , + "select * from (select time, price, sym from cTrade where " + + s"date='$d' and sym='$s') t " + + "left outer join (select time, bid, sym from cQuote where " + + s"date='$d' and sym='$s') q " + + s"on q.time=(select max(time) from q where time<=t.time and sym='$s') " + + "where price SparkConf = null): SparkConf = { + val cores = math.min(16, Runtime.getRuntime.availableProcessors()) + val conf = new SparkConf() + .setIfMissing("spark.master", s"local[$cores]") + .setAppName("microbenchmark") + conf.set("snappydata.store.critical-heap-percentage", "95") + if (SnappySession.isEnterpriseEdition) { + conf.set("snappydata.store.memory-size", "1200m") + } + conf.set("spark.memory.manager", classOf[SnappyUnifiedMemoryManager].getName) + .set("spark.serializer", "org.apache.spark.serializer.PooledKryoSerializer") + .set("spark.closure.serializer", "org.apache.spark.serializer.PooledKryoSerializer") + .set("snappydata.sql.planCaching", random.nextBoolean().toString) + if (addOn != null) { + addOn(conf) + } + conf + } + + /** + * Benchmark caching randomized keys created from a range. + */ + def benchmarkRandomizedKeys(sc: SparkContext, quoteSize: Long, + tradeSize: Long, size: Long, numDays: Int, queryNumber: Int, + numIters: Int, doInit: Boolean, op: CreateOp.Type = CreateOp.Read, + runSparkCaching: Boolean = true): Unit = { + + val spark = new SparkSession(sc) + val session = new SnappySession(sc) + + import session.implicits._ + + val benchmark = new Benchmark("Cache random data", size) + val quoteRDD = sc.range(0, quoteSize).mapPartitions { itr => + val rnd = new XORShiftRandom + val syms = ALL_SYMBOLS.map(UTF8String.fromString) + val numSyms = syms.length + val exs = EXCHANGES.map(UTF8String.fromString) + val numExs = exs.length + var day = 0 + val zoneId = ZoneId.systemDefault() + var cal = ZonedDateTime.of(2016, 6, day + 6, 0, 0, 0, 0, zoneId) + var millisTime = cal.toInstant.toEpochMilli + var date = new Date(millisTime) + var dayCounter = 0 + itr.map { id => + val sym = syms(math.abs(rnd.nextInt() % numSyms)) + val ex = exs(math.abs(rnd.nextInt() % numExs)) + if (numDays > 1) { + dayCounter += 1 + // change date after some number of iterations + if (dayCounter == 10000) { + day = (day + 1) % numDays + cal = ZonedDateTime.of(2016, 6, day + 6, 0, 0, 0, 0, zoneId) + millisTime = cal.toInstant.toEpochMilli + date = new Date(millisTime) + dayCounter = 0 + } + } + val gid = (id % 400).toInt + // reset the timestamp every once in a while + if (gid == 0) { + // seconds < 59 so that millis+gid does not overflow into next hour + cal = ZonedDateTime.of(2016, 6, day + 6, rnd.nextInt() & 0x07, + math.abs(rnd.nextInt() % 60), math.abs(rnd.nextInt() % 59), + math.abs(rnd.nextInt() % 1000000000), zoneId) + millisTime = cal.toInstant.toEpochMilli + } + val time = new Timestamp(millisTime + gid) + Quote(sym, ex, rnd.nextDouble() * 1000.0, time, date) + } + } + val tradeRDD = sc.range(0, tradeSize).mapPartitions { itr => + val rnd = new XORShiftRandom + val syms = ALL_SYMBOLS.map(UTF8String.fromString) + val numSyms = syms.length + val exs = EXCHANGES.map(UTF8String.fromString) + val numExs = exs.length + var day = 0 + val zoneId = ZoneId.systemDefault() + var cal = ZonedDateTime.of(2016, 6, day + 6, 0, 0, 0, 0, zoneId) + var millisTime = cal.toInstant.toEpochMilli + var date = new Date(millisTime) + var dayCounter = 0 + itr.map { id => + val sym = syms(math.abs(rnd.nextInt() % numSyms)) + val ex = exs(math.abs(rnd.nextInt() % numExs)) + if (numDays > 1) { + dayCounter += 1 + // change date after some number of iterations + if (dayCounter == 10000) { + // change date + day = (day + 1) % numDays + cal = ZonedDateTime.of(2016, 6, day + 6, 0, 0, 0, 0, zoneId) + millisTime = cal.toInstant.toEpochMilli + date = new Date(millisTime) + dayCounter = 0 + } + } + val gid = (id % 400).toInt + // reset the timestamp every once in a while + if (gid == 0) { + // seconds < 59 so that millis+gid does not overflow into next hour + cal = ZonedDateTime.of(2016, 6, day + 6, rnd.nextInt() & 0x07, + math.abs(rnd.nextInt() % 60), math.abs(rnd.nextInt() % 59), + math.abs(rnd.nextInt() % 1000000000), zoneId) + millisTime = cal.toInstant.toEpochMilli + } + val time = new Timestamp(millisTime + gid) + val dec = Decimal(math.abs(rnd.nextInt() % 100000000), 10, 4) + val c1 = Array(sym, ex, sym) + val bid = rnd.nextDouble() * 1000 + val c2 = Map(sym -> bid, ex -> bid) + Trade(sym, ex, dec, time, date, rnd.nextDouble() * 1000, c1, c2) + } + } + + val quoteDF = spark.createDataset(quoteRDD) + val quoteDataDF = spark.internalCreateDataFrame( + quoteDF.queryExecution.toRdd, + StructType(quoteDF.schema.fields.map(_.copy(nullable = false)))) + val tradeDF = spark.createDataset(tradeRDD) + val tradeDataDF = spark.internalCreateDataFrame( + tradeDF.queryExecution.toRdd, + StructType(tradeDF.schema.fields.map { + case f if f.dataType.isInstanceOf[DecimalType] => + f.copy(dataType = DecimalType(10, 4), nullable = false) + case f => f.copy(nullable = false) + })) + + val qDF = session.createDataset(quoteRDD) + val tDF = session.createDataset(tradeRDD) + val sDF = session.createDataset(SYMBOLS) + val symDF = spark.internalCreateDataFrame( + spark.createDataset(SYMBOLS).queryExecution.toRdd, + StructType(Array(StructField("SYM", StringType, nullable = false)))) + + quoteDataDF.createOrReplaceTempView("cQuote") + tradeDataDF.createOrReplaceTempView("cTrade") + symDF.createOrReplaceTempView("cS") + + def cacheTable(spark: SparkSession, table: String): Unit = { + spark.catalog.cacheTable(table) + spark.sql(s"select count(*) from $table").collect() + } + + /** + * Add a benchmark case, optionally specifying whether to cache the DataSet. + */ + def addBenchmark(name: String, cache: Boolean, + params: Map[String, String] = Map(), query: String, + expectedNumResults: Int, snappy: Boolean, init: Boolean): Unit = { + val defaults = params.keys.flatMap { + k => session.conf.getOption(k).map((k, _)) + } + + def prepare(): Unit = { + params.foreach { case (k, v) => + session.conf.set(k, v); spark.conf.set(k, v) + } + doGC() + if (cache) { + spark.catalog.clearCache() + cacheTable(spark, "cQuote") + cacheTable(spark, "cTrade") + cacheTable(spark, "cS") + spark.sql(query).collect() + } else { + assert(snappy, "Only cache=T or snappy=T supported") + if (init) { + session.sql("drop table if exists quote") + session.sql("drop table if exists trade") + session.sql("drop table if exists S") + val partitioning = if (op == CreateOp.Read) { + " options (partition_by 'sym')" + } else "" + if (COLUMN_TABLE) { + session.sql(s"$sqlQuote using column$partitioning") + session.sql(s"$sqlTrade using column$partitioning") + } else { + session.sql(s"$sqlQuote using row$partitioning") + session.sql(s"$sqlTrade using row$partitioning") + } + session.sql(s"CREATE TABLE S (sym CHAR(4) NOT NULL)") + qDF.write.insertInto("quote") + tDF.write.insertInto("trade") + sDF.write.insertInto("S") + } + session.sql(query).collect() + } + testCleanup() + doGC() + } + + def cleanup(): Unit = { + SnappySession.clearAllCache() + defaults.foreach { case (k, v) => + session.conf.set(k, v); spark.conf.set(k, v) + } + doGC() + } + + def testCleanup(): Unit = { + if (op != CreateOp.Read) { + if (snappy) { + session.sql("truncate table quote") + session.sql("truncate table trade") + } else { + spark.catalog.clearCache() + } + doGC() + } + } + + ColumnCacheBenchmark.addCaseWithCleanup(benchmark, name, numIters, + prepare, cleanup, testCleanup) { _ => + op match { + case CreateOp.Read => + if (snappy) { + collect(session.sql(query), expectedNumResults) + } else { + collect(spark.sql(query), expectedNumResults) + } + case CreateOp.Quote if snappy => + qDF.write.insertInto("quote") + case CreateOp.Quote => + cacheTable(spark, "cQuote") + case CreateOp.Trade if snappy => + tDF.write.insertInto("trade") + case CreateOp.Trade => + cacheTable(spark, "cTrade") + } + } + } + + session.conf.set(SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key, "true") + session.conf.set(SQLConf.WHOLESTAGE_FALLBACK.key, "false") + spark.conf.set(SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key, "true") + spark.conf.set(SQLConf.WHOLESTAGE_FALLBACK.key, "false") + + // Benchmark cases: + // (1) Spark caching with column batch compression + // (2) SnappyData Column table with plan optimizations + + var init = doInit + + if (runSparkCaching) { + addBenchmark(s"Q$queryNumber: cache = T", cache = true, + Map.empty, query = cacheQueries(queryNumber - 1), + expectedNumResults = expectedResultSizes(queryNumber - 1), + snappy = false, init) + } + + addBenchmark(s"Q$queryNumber: cache = F snappy = T", cache = false, + Map.empty, query = queries(queryNumber - 1), + expectedNumResults = expectedResultSizes(queryNumber - 1), + snappy = true, init) + init = false + + benchmark.run() + } +} diff --git a/cluster/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQuerySnappyBenchmark.scala b/cluster/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQuerySnappyBenchmark.scala new file mode 100644 index 0000000000..c263479d4a --- /dev/null +++ b/cluster/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQuerySnappyBenchmark.scala @@ -0,0 +1,154 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ + +package org.apache.spark.sql.execution.benchmark + +import java.io.{File, FileOutputStream, PrintStream} + +import org.apache.spark.sql._ +import org.apache.spark.sql.catalyst.TableIdentifier +import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation +import org.apache.spark.sql.catalyst.expressions.SubqueryExpression +import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan +import org.apache.spark.sql.catalyst.util.fileToString +import org.apache.spark.sql.types.StructType +import org.apache.spark.util.Benchmark +import org.apache.spark.{SparkConf, SparkContext} + +import scala.collection.mutable.ArrayBuffer + + +object TPCDSQuerySnappyBenchmark { + + var spark: SparkSession = _ + var snappy: SnappySession = _ + var ds: DataFrame = _ + + val tables = Seq("catalog_page", "catalog_returns", "customer", "customer_address", + "customer_demographics", "date_dim", "household_demographics", "inventory", "item", + "promotion", "store", "store_returns", "catalog_sales", "web_sales", "store_sales", + "web_returns", "web_site", "reason", "call_center", "warehouse", "ship_mode", "income_band", + "time_dim", "web_page") + + var snappyRS: FileOutputStream = new FileOutputStream( + new File(s"Snappy_Results.out")) + var sparkRS : FileOutputStream = new FileOutputStream( + new File(s"Spark_Results.out")) + + var snappyPS: PrintStream = new PrintStream(snappyRS) + var sparkPS: PrintStream = new PrintStream(sparkRS) + + def setupTables(dataLocation: String, isSnappy: Boolean): Map[String, Long] = { + val props = Map("BUCKETS" -> "7") + + tables.map { tableName => + if (isSnappy) { + + val df = snappy.read.parquet(s"$dataLocation/$tableName") + snappy.createTable(tableName, "column", + new StructType(df.schema.map(_.copy(nullable = false)).toArray), props) + df.write.insertInto(tableName) + + // scalastyle:off println + println("Table Created..."+ tableName) + tableName -> snappy.table(tableName).count() + } + else { + spark.read.parquet(s"$dataLocation/$tableName").createOrReplaceTempView(tableName) + spark.sqlContext.cacheTable(tableName) + tableName -> spark.table(tableName).count() + } + }.toMap + } + + def execute(dataLocation: String, queries: Seq[String], isSnappy: Boolean = false, + queryPath: String = ""): Unit = { + + require(dataLocation.nonEmpty, + "please modify the value of dataLocation to point to your local TPCDS data") + val tableSizes = setupTables(dataLocation, isSnappy) + + queries.foreach { name => + + val path: String = s"$queryPath/$name.sql" + val queryString = fileToString(new File(path)) + + // This is an indirect hack to estimate the size of each query's input by traversing the + // logical plan and adding up the sizes of all tables that appear in the plan. Note that this + // currently doesn't take WITH subqueries into account which might lead to fairly inaccurate + // per-row processing time for those cases. + try { + val queryRelations = scala.collection.mutable.HashSet[String]() + + if (isSnappy) { + ds = snappy.sqlContext.sql(queryString) + //println("Plan..."+ ds.queryExecution.executedPlan) + } + else + ds = spark.sql(queryString) + + ds.queryExecution.logical.map { + case ur@UnresolvedRelation(t: TableIdentifier, _) => + queryRelations.add(t.table) + case lp: LogicalPlan => + lp.expressions.foreach { + _ foreach { + case subquery: SubqueryExpression => + subquery.plan.foreach { + case ur@UnresolvedRelation(t: TableIdentifier, _) => + queryRelations.add(t.table) + case _ => + } + case _ => + } + } + case _ => + } + + val numRows = queryRelations.map(tableSizes.getOrElse(_, 0L)).sum + val benchmark = new Benchmark(s"TPCDS Snappy", numRows, 5) + + benchmark.addCase(name) { i => + + if (isSnappy) { + val rs = snappy.sqlContext.sql(queryString).collect() + // snappyPS = new PrintStream(new FileOutputStream(new File(s"Snappy_$name.out"))) + // normalizeRows(rs, snappyPS) + } + else { + val rs = spark.sql(queryString).collect() + //sparkPS = new PrintStream(new FileOutputStream(new File(s"Spark_$name.out"))) + //normalizeRows(rs, sparkPS) + } + } + benchmark.run() + + } catch { + case e: Exception => println(s"Failed $name " + e.printStackTrace()) + } + } + } + + private def normalizeRows(resultSet: Array[Row], printStream: PrintStream): Unit = { + for (row <- resultSet) { + printStream.println(row.toSeq.map { + // case d: Double => "%18.4f".format(d).trim() + case v => v + }.mkString("|")) + } + } +} diff --git a/cluster/src/test/scala/org/apache/spark/sql/kafka010/SnappyStructuredKafkaSuite.scala b/cluster/src/test/scala/org/apache/spark/sql/kafka010/SnappyStructuredKafkaSuite.scala new file mode 100644 index 0000000000..27754afb50 --- /dev/null +++ b/cluster/src/test/scala/org/apache/spark/sql/kafka010/SnappyStructuredKafkaSuite.scala @@ -0,0 +1,304 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ + +package org.apache.spark.sql.kafka010 + +import java.util.concurrent.atomic.AtomicInteger + +import io.snappydata.SnappyFunSuite +import org.apache.kafka.common.TopicPartition +import org.scalatest.concurrent.Eventually +import org.scalatest.{BeforeAndAfter, BeforeAndAfterAll} + +import org.apache.spark.sql.Row +import org.apache.spark.sql.catalyst.encoders.RowEncoder +import org.apache.spark.sql.functions.{count, window} +import org.apache.spark.sql.streaming.ProcessingTime + +case class Account(accountName: String) + +class SnappyStructuredKafkaSuite extends SnappyFunSuite with Eventually + with BeforeAndAfter with BeforeAndAfterAll { + + private lazy val session = snc.sparkSession + + private var kafkaTestUtils: KafkaTestUtils = _ + + override def beforeAll() { + super.beforeAll() + kafkaTestUtils = new KafkaTestUtils + kafkaTestUtils.setup() + } + + override def afterAll() { + super.afterAll() + if (kafkaTestUtils != null) { + kafkaTestUtils.teardown() + kafkaTestUtils = null + } + } + + def framework: String = this.getClass.getSimpleName + + private val topicId = new AtomicInteger(0) + + private def newTopic(): String = s"topic-${topicId.getAndIncrement()}" + + test("SnappyData Structured Streaming with Kafka") { + import session.implicits._ + + snc.sql("drop table if exists users") + snc.sql("create table users (id int, name string) using column options(key_columns 'id')") + + val topic = newTopic() + kafkaTestUtils.createTopic(topic, partitions = 3) + kafkaTestUtils.sendMessages(topic, + (100 to 200).map(i => i.toString + ",name_" + i).toArray, Some(0)) + kafkaTestUtils.sendMessages(topic, + (10 to 20).map(i => i.toString + ",name_" + i).toArray, Some(1)) + kafkaTestUtils.sendMessages(topic, Array("1,name_1"), Some(2)) + + val streamingDF = session + .readStream + .format("kafka") + .option("kafka.bootstrap.servers", kafkaTestUtils.brokerAddress) + .option("subscribe", topic) + .option("startingOffsets", "earliest") + .load + + implicit val encoder = RowEncoder(snc.table("users").schema) + + val streamingQuery = streamingDF + .selectExpr("CAST(value AS STRING)") + .as[String] + .map(_.split(",")) + .map(r => { + Row(r(0).toInt, r(1)) + }) + .writeStream + .format("snappysink") + .queryName("simple") + .outputMode("append") + .trigger(ProcessingTime("1 seconds")) + .option("tablename", "APP.USERS").option("streamqueryid", "abc") + .option("checkpointLocation", "/tmp/snappyTable") + .start + + streamingQuery.processAllAvailable() + assert(113 == session.sql("select * from APP.USERS").count) + } + + + test("ETL Job") { + import session.implicits._ + + val topic = newTopic() + kafkaTestUtils.createTopic(topic, partitions = 3) + + val partitions = Map( + new TopicPartition(topic, 0) -> 0L, + new TopicPartition(topic, 1) -> 0L, + new TopicPartition(topic, 2) -> 0L + ) + + val startingOffsets = JsonUtils.partitionOffsets(partitions) + + val streamingDF = session + .readStream + .format("kafka") + .option("kafka.bootstrap.servers", kafkaTestUtils.brokerAddress) + .option("kafka.metadata.max.age.ms", "1") + .option("maxOffsetsPerTrigger", 10) + .option("subscribe", topic) + .option("startingOffsets", startingOffsets) + .load + + val streamingQuery = streamingDF + .selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)") + .as[(String, String)] + .writeStream + .format("memory") + // .option("checkpointLocation", "/tmp/etl") + .queryName("snappyTable") + .outputMode("append") + .trigger(ProcessingTime("1 seconds")) + .start + + kafkaTestUtils.sendMessages(topic, (100 to 200).map(_.toString).toArray, Some(0)) + kafkaTestUtils.sendMessages(topic, (10 to 20).map(_.toString).toArray, Some(1)) + kafkaTestUtils.sendMessages(topic, Array("1"), Some(2)) + + streamingQuery.processAllAvailable() + assert(113 == session.sql("select * from snappyTable").count) + } + + test("infinite streaming aggregation") { + import session.implicits._ + + val topic = newTopic() + kafkaTestUtils.createTopic(topic, partitions = 3) + + val partitions = Map( + new TopicPartition(topic, 0) -> 0L, + new TopicPartition(topic, 1) -> 0L, + new TopicPartition(topic, 2) -> 0L + ) + + val startingOffsets = JsonUtils.partitionOffsets(partitions) + + val streamingDF = session + .readStream + .format("kafka") + .option("kafka.bootstrap.servers", kafkaTestUtils.brokerAddress) + .option("kafka.metadata.max.age.ms", "1") + .option("maxOffsetsPerTrigger", 10) + .option("subscribe", topic) + .option("startingOffsets", startingOffsets) + .option("failOnDataLoss", "false") + .load + + val streamingQuery = streamingDF + .selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)").groupBy("value").count() + .as[(String, String)] + .writeStream + .format("memory") + .option("checkpointLocation", "/tmp/infinite-" + System.currentTimeMillis()) + .queryName("snappyAggrTable") + .outputMode("complete") + .trigger(ProcessingTime("1 seconds")) + .start + + kafkaTestUtils.sendMessages(topic, (100 to 150).map(_.toString).toArray, Some(0)) + kafkaTestUtils.sendMessages(topic, (125 to 150).map(_.toString).toArray, Some(1)) + kafkaTestUtils.sendMessages(topic, (100 to 124).map(_.toString).toArray, Some(2)) + + streamingQuery.processAllAvailable() + + assert(51 == session.sql("select * from snappyAggrTable").count) + assert(2.0 == session.sql("select avg(count) from snappyAggrTable").collect()(0).getDouble(0)) + } + + test("sliding window aggregation") { + import session.implicits._ + + val topic = newTopic() + kafkaTestUtils.createTopic(topic, partitions = 3) + + val partitions = Map( + new TopicPartition(topic, 0) -> 0L, + new TopicPartition(topic, 1) -> 0L, + new TopicPartition(topic, 2) -> 0L + ) + + val startingOffsets = JsonUtils.partitionOffsets(partitions) + + kafkaTestUtils.sendMessages(topic, (100 to 150).map(_.toString).toArray, Some(0)) + kafkaTestUtils.sendMessages(topic, (125 to 150).map(_.toString).toArray, Some(1)) + kafkaTestUtils.sendMessages(topic, (100 to 124).map(_.toString).toArray, Some(2)) + + val streamingDF = session + .readStream + .format("kafka") + .option("kafka.bootstrap.servers", kafkaTestUtils.brokerAddress) + .option("kafka.metadata.max.age.ms", "1") + .option("maxOffsetsPerTrigger", 10) + .option("subscribe", topic) + .option("startingOffsets", startingOffsets) + .option("failOnDataLoss", "false") + .load + + val windowedAggregation = streamingDF + .groupBy(window($"timestamp", "1 seconds") as 'window) + .agg(count("*") as 'count) + .select($"window".getField("start") as 'window, $"count") + + val streamingQuery = windowedAggregation + .writeStream + .format("memory") + .option("checkpointLocation", "/tmp/snappyWindowAggrTable") + .outputMode("complete") + .queryName("snappyWindowAggrTable") + .start() + + streamingQuery.processAllAvailable() + logInfo(session.sql("select * from snappyWindowAggrTable").limit(200).collect().mkString("\n")) + streamingQuery.stop() + } + + test("streaming join to snappy table") { + import session.implicits._ + + val rdd = snc.sparkContext.parallelize((15 to 25).map(i => Account(i.toString))) + val dfBlackList = snc.createDataFrame(rdd) + // create a SnappyData table + snc.createTable("blacklist", "row", dfBlackList.schema, Map.empty[String, String]) + + import org.apache.spark.sql.snappy._ + dfBlackList.write.putInto("blacklist") // populate the table 'blacklist'. + + val topic = newTopic() + kafkaTestUtils.createTopic(topic, partitions = 3) + + // Read the accounts from Kafka source + val acctStreamingDF = session + .readStream + .format("kafka") + .option("kafka.bootstrap.servers", kafkaTestUtils.brokerAddress) + .option("subscribe", topic) + .option("startingOffsets", "earliest").load + .selectExpr("CAST(value AS STRING) accountName").as[(String)] + + val streamingQuery = acctStreamingDF.join(session.table("blacklist"), "accountName") + .writeStream + .outputMode("append") + .format("memory") + .queryName("snappyResultTable") + .trigger(ProcessingTime("1 seconds")) + .start + + kafkaTestUtils.sendMessages(topic, (10 to 18).map(_.toString).toArray, Some(1)) + kafkaTestUtils.sendMessages(topic, (20 to 30).map(_.toString).toArray, Some(2)) + + streamingQuery.processAllAvailable() + assert(10 == session.sql("select * from snappyResultTable").count) + } + + // Unsupported operations with streaming DataFrames/Datasets - + + // Multiple streaming aggregations (i.e. a chain of aggregations on a + // streaming DF) are not yet supported on streaming Datasets. + // Limit and take first N rows are not supported on streaming Datasets. + // Distinct operations on streaming Datasets are not supported. + // Sorting operations are supported on streaming Datasets only after + // an aggregation and in Complete Output Mode. + // Outer joins between a streaming and a static Datasets are conditionally supported. + // Full outer join with a streaming Dataset is not supported + // Left outer join with a streaming Dataset on the right is not supported + // Right outer join with a streaming Dataset on the left is not supported + // Any kind of joins between two streaming Datasets are not yet supported. + // They are actions that will immediately run queries and return results, + // which does not make sense on a streaming Dataset. + + // count() - Cannot return a single count from a streaming Dataset. + // Instead, use ds.groupBy.count() which returns a streaming Dataset containing a running count. + // foreach() - Instead use ds.writeStream.foreach(...). + // show() - Instead use the console sink. + + // sorting on the input stream is not supported, as it requires keeping + // track of all the data received in the stream. + // This is therefore fundamentally hard to execute efficiently. +} diff --git a/cluster/src/test/scala/org/apache/spark/sql/policy/AlterTableRowLevelSecurityEnableTest.scala b/cluster/src/test/scala/org/apache/spark/sql/policy/AlterTableRowLevelSecurityEnableTest.scala new file mode 100644 index 0000000000..2bd5798e37 --- /dev/null +++ b/cluster/src/test/scala/org/apache/spark/sql/policy/AlterTableRowLevelSecurityEnableTest.scala @@ -0,0 +1,194 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ +package org.apache.spark.sql.policy + +import java.sql.{Connection, DriverManager} + +import com.pivotal.gemfirexd.{Attribute, TestUtil} +import org.junit.Assert._ + +import org.apache.spark.sql.SnappyContext + +class AlterTableRowLevelSecurityEnableTest extends PolicyTestBase { + + var serverHostPort: String = _ + + val props = Map.empty[String, String] + val tableOwner = "ashahid" + val colTable = "ColumnTable" + val rowTable = "RowTable" + val numElements = 100 + val colTableName: String = s"$tableOwner.$colTable" + val rowTableName: String = s"$tableOwner.$rowTable" + + var ownerContext: SnappyContext = _ + + override def beforeAll(): Unit = { + super.beforeAll() + + val seq = for (i <- 0 until numElements) yield { + (s"name_$i", i) + } + val rdd = sc.parallelize(seq) + + ownerContext = snc.newSession() + serverHostPort = TestUtil.startNetServer() + ownerContext.snappySession.conf.set(Attribute.USERNAME_ATTR, tableOwner) + + val dataDF = ownerContext.createDataFrame(rdd) + + ownerContext.sql(s"CREATE TABLE $colTableName (name String, id Int) " + + s" USING column ") + + ownerContext.sql(s"CREATE TABLE $rowTableName (name String, id Int) " + + s" USING row ") + dataDF.write.insertInto(colTableName) + dataDF.write.insertInto(rowTableName) + + } + + override def afterAll(): Unit = { + ownerContext.dropTable(colTableName, ifExists = true) + ownerContext.dropTable(rowTableName, ifExists = true) + super.afterAll() + } + + test("check rls enable/disable for jdbc client") { + val conn = getConnection + try { + val stmt = conn.createStatement() + + var rs = stmt.executeQuery(s"select ROWLEVELSECURITYENABLED from sys.systables " + + s"where tableschemaname = '${tableOwner.toUpperCase}' and " + + s"tablename = '${colTable.toUpperCase}'") + + assert(rs.next()) + assertFalse(rs.getBoolean(1)) + + rs = stmt.executeQuery(s"select ROWLEVELSECURITYENABLED from sys.systables " + + s"where tableschemaname = '${tableOwner.toUpperCase}' and " + + s"tablename = '${rowTable.toUpperCase}'") + + assert(rs.next()) + assertFalse(rs.getBoolean(1)) + + stmt.execute(s"alter table $rowTableName enable row level security") + + stmt.execute(s"alter table $colTableName enable row level security") + + rs = stmt.executeQuery(s"select ROWLEVELSECURITYENABLED from sys.systables " + + s"where tableschemaname = '${tableOwner.toUpperCase}' and " + + s"tablename = '${colTable.toUpperCase}'") + + assert(rs.next()) + assertTrue(rs.getBoolean(1)) + + rs = stmt.executeQuery(s"select ROWLEVELSECURITYENABLED from sys.systables " + + s"where tableschemaname = '${tableOwner.toUpperCase}' and " + + s"tablename = '${rowTable.toUpperCase}'") + + assert(rs.next()) + assertTrue(rs.getBoolean(1)) + + stmt.execute(s"alter table $rowTableName disable row level security") + + stmt.execute(s"alter table $colTableName disable row level security") + + rs = stmt.executeQuery(s"select ROWLEVELSECURITYENABLED from sys.systables " + + s"where tableschemaname = '${tableOwner.toUpperCase}' and " + + s"tablename = '${colTable.toUpperCase}'") + + assert(rs.next()) + assertFalse(rs.getBoolean(1)) + + rs = stmt.executeQuery(s"select ROWLEVELSECURITYENABLED from sys.systables " + + s"where tableschemaname = '${tableOwner.toUpperCase}' and " + + s"tablename = '${rowTable.toUpperCase}'") + + assert(rs.next()) + assertFalse(rs.getBoolean(1)) + } finally { + conn.close() + } + + } + + test("check rls enable/disable for snappy context") { + val conn = getConnection + try { + val stmt = conn.createStatement() + + var rs = stmt.executeQuery(s"select ROWLEVELSECURITYENABLED from sys.systables " + + s"where tableschemaname = '${tableOwner.toUpperCase}' and " + + s"tablename = '${colTable.toUpperCase}'") + + assert(rs.next()) + assertFalse(rs.getBoolean(1)) + + rs = stmt.executeQuery(s"select ROWLEVELSECURITYENABLED from sys.systables " + + s"where tableschemaname = '${tableOwner.toUpperCase}' and " + + s"tablename = '${rowTable.toUpperCase}'") + + assert(rs.next()) + assertFalse(rs.getBoolean(1)) + + ownerContext.sql(s"alter table $rowTableName enable row level security") + + ownerContext.sql(s"alter table $colTableName enable row level security") + + rs = stmt.executeQuery(s"select ROWLEVELSECURITYENABLED from sys.systables " + + s"where tableschemaname = '${tableOwner.toUpperCase}' and " + + s"tablename = '${colTable.toUpperCase}'") + + assert(rs.next()) + assertTrue(rs.getBoolean(1)) + + rs = stmt.executeQuery(s"select ROWLEVELSECURITYENABLED from sys.systables " + + s"where tableschemaname = '${tableOwner.toUpperCase}' and " + + s"tablename = '${rowTable.toUpperCase}'") + + assert(rs.next()) + assertTrue(rs.getBoolean(1)) + + ownerContext.sql(s"alter table $rowTableName disable row level security") + + ownerContext.sql(s"alter table $colTableName disable row level security") + + rs = stmt.executeQuery(s"select ROWLEVELSECURITYENABLED from sys.systables " + + s"where tableschemaname = '${tableOwner.toUpperCase}' and " + + s"tablename = '${colTable.toUpperCase}'") + + assert(rs.next()) + assertFalse(rs.getBoolean(1)) + + rs = stmt.executeQuery(s"select ROWLEVELSECURITYENABLED from sys.systables " + + s"where tableschemaname = '${tableOwner.toUpperCase}' and " + + s"tablename = '${rowTable.toUpperCase}'") + + assert(rs.next()) + assertFalse(rs.getBoolean(1)) + } finally { + conn.close() + } + } + + + private def getConnection: Connection = { + DriverManager.getConnection(s"jdbc:snappydata://$serverHostPort") + } + +} diff --git a/cluster/src/test/scala/org/apache/spark/sql/policy/PolicyJdbcClientTest.scala b/cluster/src/test/scala/org/apache/spark/sql/policy/PolicyJdbcClientTest.scala new file mode 100644 index 0000000000..e58fb5065a --- /dev/null +++ b/cluster/src/test/scala/org/apache/spark/sql/policy/PolicyJdbcClientTest.scala @@ -0,0 +1,643 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ +package org.apache.spark.sql.policy + +import java.sql.{Connection, DriverManager, Statement} +import java.util.Properties + +import com.pivotal.gemfirexd.internal.engine.Misc +import com.pivotal.gemfirexd.{Attribute, TestUtil} +import org.junit.Assert.{assertEquals, assertFalse} + +import org.apache.spark.sql.{Row, SnappySession} + +class PolicyJdbcClientTest extends PolicyTestBase { + + var serverHostPort: String = _ + + val props = Map.empty[String, String] + val tableOwner = "ashahid" + val numElements = 100 + val colTableName: String = s"$tableOwner.ColumnTable" + val rowTableName: String = s"$tableOwner.RowTable" + var ownerSession: SnappySession = _ + + override def beforeAll(): Unit = { + super.beforeAll() + val seq = for (i <- 0 until numElements) yield { + (s"name_$i", i) + } + val rdd = sc.parallelize(seq) + ownerSession = snc.snappySession.newSession() + serverHostPort = TestUtil.startNetServer() + ownerSession.conf.set(Attribute.USERNAME_ATTR, tableOwner) + + val dataDF = ownerSession.createDataFrame(rdd) + + ownerSession.sql(s"CREATE TABLE $colTableName (name String, id Int) " + + s" USING column ") + + ownerSession.sql(s"CREATE TABLE $rowTableName (name String, id Int) " + + s" USING row ") + dataDF.write.insertInto(colTableName) + dataDF.write.insertInto(rowTableName) + val conn = getConnection() + try { + val stmt = conn.createStatement() + stmt.execute(s"alter table $colTableName enable row level security") + stmt.execute(s"alter table $rowTableName enable row level security") + } finally { + conn.close() + } + + } + + override def afterAll(): Unit = { + ownerSession.dropTable(colTableName, ifExists = true) + ownerSession.dropTable(rowTableName, ifExists = true) + TestUtil.stopNetServer() + super.afterAll() + } + + test("Policy creation on a column table using jdbc client") { + this.testPolicy(colTableName) + } + + test("Policy creation on a row table using jdbc client") { + this.testPolicy(rowTableName) + } + + private def testPolicy(tableName: String) { + val conn = getConnection(Some(tableOwner)) + val stmt = conn.createStatement() + val conn1 = getConnection(Some("UserX")) + try { + stmt.execute(s"create policy testPolicy1 on " + + s"$tableName for select to current_user using id < 0") + var rs = stmt.executeQuery(s"select * from $tableName") + var rsSize = 0 + while (rs.next()) rsSize += 1 + assertEquals(numElements, rsSize) + rsSize = 0 + val stmt1 = conn1.createStatement() + rs = stmt1.executeQuery(s"select * from $tableName") + while (rs.next()) rsSize += 1 + assertEquals(0, rsSize) + stmt.execute("drop policy testPolicy1") + } finally { + conn.close() + conn1.close() + } + } + + test("Policy application on views on table with policy created before view creation") { + val conn = getConnection(Some(tableOwner)) + val stmt = conn.createStatement() + val conn1 = getConnection(Some("UserX")) + try { + stmt.execute(s"create policy testPolicy1 on " + + s"$colTableName for select to current_user using id < 0") + stmt.execute(s"create policy testPolicy2 on " + + s"$rowTableName for select to current_user using id < 0") + stmt.execute(s"CREATE VIEW col_view AS SELECT id FROM $colTableName") + stmt.execute(s"CREATE VIEW row_view AS SELECT id FROM $rowTableName") + var rs = stmt.executeQuery(s"select * from col_view") + var rsSize = 0 + while (rs.next()) rsSize += 1 + assertEquals(numElements, rsSize) + rsSize = 0 + + rs = stmt.executeQuery(s"select * from row_view") + rsSize = 0 + while (rs.next()) rsSize += 1 + assertEquals(numElements, rsSize) + rsSize = 0 + + val stmt1 = conn1.createStatement() + rs = stmt1.executeQuery(s"select * from $tableOwner.col_view") + while (rs.next()) rsSize += 1 + assertEquals(0, rsSize) + + rs = stmt1.executeQuery(s"select * from $tableOwner.row_view") + while (rs.next()) rsSize += 1 + assertEquals(0, rsSize) + + stmt.execute("drop policy testPolicy1") + stmt.execute("drop policy testPolicy2") + } finally { + stmt.execute("drop view col_view") + stmt.execute("drop view row_view") + conn.close() + conn1.close() + } + } + + test("Policy application on views on table with policy created after view creation") { + val conn = getConnection(Some(tableOwner)) + val stmt = conn.createStatement() + val conn1 = getConnection(Some("UserX")) + try { + stmt.execute(s"CREATE VIEW col_view AS SELECT id FROM $colTableName") + stmt.execute(s"CREATE VIEW row_view AS SELECT id FROM $rowTableName") + stmt.execute(s"create policy testPolicy1 on " + + s"$colTableName for select to current_user using id < 0") + stmt.execute(s"create policy testPolicy2 on " + + s"$rowTableName for select to current_user using id < 0") + var rs = stmt.executeQuery(s"select * from col_view") + var rsSize = 0 + while (rs.next()) rsSize += 1 + assertEquals(numElements, rsSize) + rsSize = 0 + + rs = stmt.executeQuery(s"select * from row_view") + rsSize = 0 + while (rs.next()) rsSize += 1 + assertEquals(numElements, rsSize) + rsSize = 0 + + val stmt1 = conn1.createStatement() + rs = stmt1.executeQuery(s"select * from $tableOwner.col_view") + while (rs.next()) rsSize += 1 + assertEquals(0, rsSize) + + rs = stmt1.executeQuery(s"select * from $tableOwner.row_view") + while (rs.next()) rsSize += 1 + assertEquals(0, rsSize) + + stmt.execute("drop policy testPolicy1") + stmt.execute("drop policy testPolicy2") + } finally { + stmt.execute("drop view col_view") + stmt.execute("drop view row_view") + conn.close() + conn1.close() + } + } + + test("test policy not applied for update | delete on row table - SNAP-2576") { + this.updateOrDeleteOntableWithPolicy("row") + } + + test("test policy not applied for update | delete on column table - SNAP-2576") { + this.updateOrDeleteOntableWithPolicy("column") + } + + private def updateOrDeleteOntableWithPolicy(tableType: String): Unit = { + val conn = getConnection(Some(tableOwner)) + val stmt = conn.createStatement() + val conn1 = getConnection(Some("UserX")) + val stmt1 = conn1.createStatement() + ownerSession.sql(s"CREATE TABLE temp (username String, id Int) " + + s" USING $tableType ") + val seq = Seq("USERX" -> 4, "USERX" -> 5, "USERX" -> 6, "USERY" -> 7, + "USERY" -> 8, "USERY" -> 9) + val rdd = sc.parallelize(seq) + + val dataDF = ownerSession.createDataFrame(rdd) + + dataDF.write.insertInto("temp") + + stmt.execute(s"create policy testPolicy1 on " + + s" temp for select to current_user using " + + s" id < 0") + + stmt.execute("alter table temp enable row level security") + + + val q1 = s"select * from $tableOwner.temp" + var rs = stmt1.executeQuery(q1) + assertFalse(rs.next()) + + var n = stmt1.executeUpdate(s"update $tableOwner.temp set " + + s"username = 'USERZ' where username = 'USERX'") + + assertEquals(3, n) + + rs = stmt.executeQuery(s"select * from temp where username = 'USERZ'") + n = 0 + while (rs.next()) { + n += 1 + } + assertEquals(3, n) + + n = stmt1.executeUpdate(s"delete from $tableOwner.temp where username = 'USERZ'") + assertEquals(3, n) + rs = stmt.executeQuery(s"select * from temp where username = 'USERZ'") + assertFalse(rs.next()) + + ownerSession.sql("drop policy testPolicy1") + ownerSession.sql(s"drop table temp") + } + + test("test multiple policies application using snappy context on column table") { + this.testMultiplePolicy(colTableName) + } + + test("test multiple policies application using snappy context on row table") { + this.testMultiplePolicy(rowTableName) + } + + test("Test plan invalidation when queries & policy creation are mixed on column table") { + this.testMultiplePolicyCreationWithQuery(colTableName) + } + + test("Test plan invalidation when queries & policy creation are mixed on row table") { + this.testMultiplePolicyCreationWithQuery(rowTableName) + } + + test("test policy recreation on column table ENT:38") { + this.testPolicyRecreation(colTableName) + } + + test("test policy recreation on row table ENT:38") { + this.testPolicyRecreation(colTableName) + } + + def testPolicyRecreation(tableName: String): Unit = { + val conn = getConnection(Some(tableOwner)) + val stmt = conn.createStatement() + val conn1 = getConnection(Some("UserX")) + val stmt1 = conn1.createStatement() + try { + + var rs = stmt.executeQuery(s"select * from $tableName where id > 25 or id < 10 ") + var rsSize = 0 + while (rs.next()) rsSize += 1 + assertEquals(numElements - 1 - 25 + 10, rsSize) + + rsSize = 0 + rs = stmt1.executeQuery(s"select * from $tableName where id > 25 or id < 10 ") + while (rs.next()) rsSize += 1 + assertEquals(numElements - 1 - 25 + 10, rsSize) + + + stmt.execute(s"create policy testPolicy1 on " + + s"$tableName for select to current_user using id > 10") + + rs = stmt.executeQuery(s"select * from $tableName where id > 25 or id < 10 ") + rsSize = 0 + while (rs.next()) rsSize += 1 + assertEquals(numElements - 1 - 25 + 10, rsSize) + rsSize = 0 + + rs = stmt1.executeQuery(s"select * from $tableName where id > 25 or id < 10 ") + while (rs.next()) rsSize += 1 + assertEquals(numElements - 1 - 25, rsSize) + + stmt.execute(s"alter table $tableName disable row level security") + stmt.execute("drop policy testPolicy1") + rsSize = 0 + rs = stmt1.executeQuery(s"select * from $tableName where id > 25 or id < 10 ") + while (rs.next()) rsSize += 1 + assertEquals(numElements - 1 - 25 + 10, rsSize) + + stmt.execute(s"create policy testPolicy1 on " + + s"$tableName for select to current_user using id > 10") + + stmt.execute(s"alter table $tableName enable row level security") + rsSize = 0 + rs = stmt1.executeQuery(s"select * from $tableName where id > 25 or id < 10 ") + while (rs.next()) rsSize += 1 + assertEquals(numElements - 1 - 25, rsSize) + + stmt.execute("drop policy testPolicy1") + + } finally { + conn.close() + conn1.close() + } + } + + + def testMultiplePolicyCreationWithQuery(tableName: String): Unit = { + val conn = getConnection(Some(tableOwner)) + val stmt = conn.createStatement() + val conn1 = getConnection(Some("UserX")) + val stmt1 = conn1.createStatement() + try { + + var rs = stmt.executeQuery(s"select * from $tableName where id > 25 or id < 10 ") + var rsSize = 0 + while (rs.next()) rsSize += 1 + assertEquals(numElements - 1 - 25 + 10, rsSize) + + rsSize = 0 + rs = stmt1.executeQuery(s"select * from $tableName where id > 25 or id < 10 ") + while (rs.next()) rsSize += 1 + assertEquals(numElements - 1 - 25 + 10, rsSize) + + + stmt.execute(s"create policy testPolicy1 on " + + s"$tableName for select to current_user using id > 10") + + rs = stmt.executeQuery(s"select * from $tableName where id > 25 or id < 10 ") + rsSize = 0 + while (rs.next()) rsSize += 1 + assertEquals(numElements - 1 - 25 + 10, rsSize) + rsSize = 0 + + rs = stmt1.executeQuery(s"select * from $tableName where id > 25 or id < 10 ") + while (rs.next()) rsSize += 1 + assertEquals(numElements - 1 - 25, rsSize) + + + stmt.execute(s"create policy testPolicy2 on " + + s"$tableName for select to current_user using id < 30") + + rs = stmt.executeQuery(s"select * from $tableName where id > 25 or id < 10 ") + rsSize = 0 + while (rs.next()) rsSize += 1 + assertEquals(numElements - 1 - 25 + 10, rsSize) + rsSize = 0 + + rs = stmt1.executeQuery(s"select * from $tableName where id > 25 or id < 10 ") + while (rs.next()) rsSize += 1 + assertEquals(4, rsSize) + stmt.execute("drop policy testPolicy1") + stmt.execute("drop policy testPolicy2") + } finally { + conn.close() + conn1.close() + } + } + + private def testMultiplePolicy(tableName: String) { + val conn = getConnection(Some(tableOwner)) + val stmt = conn.createStatement() + val conn1 = getConnection(Some("UserX")) + try { + stmt.execute(s"create policy testPolicy1 on " + + s"$tableName for select to current_user using id > 10") + + stmt.execute(s"create policy testPolicy2 on " + + s"$tableName for select to current_user using id < 30") + + var rs = stmt.executeQuery(s"select * from $tableName where id > 25 or id < 10 ") + var rsSize = 0 + while (rs.next()) rsSize += 1 + assertEquals(numElements - 1 - 25 + 10, rsSize) + rsSize = 0 + val stmt1 = conn1.createStatement() + rs = stmt1.executeQuery(s"select * from $tableName where id > 25 or id < 10 ") + while (rs.next()) rsSize += 1 + assertEquals(4, rsSize) + stmt.execute("drop policy testPolicy1") + stmt.execute("drop policy testPolicy2") + } finally { + conn.close() + conn1.close() + } + } + + + test("old query plan invalidation on enabling rls on column table using jdbc client") { + this.testQueryPlanInvalidationOnRLSEnbaling(colTableName) + } + + test("old query plan invalidation on enabling rls on row table using jdbc client") { + this.testQueryPlanInvalidationOnRLSEnbaling(rowTableName) + } + + test("syspolicies table/vti") { + // create some policies on column & row tables + val conn = getConnection(Some(tableOwner)) + val stmt = conn.createStatement() + try { + stmt.execute(s"create policy testPolicy1 on " + + s"$colTableName for select to current_user using id > 10") + + stmt.execute(s"create policy testPolicy2 on " + + s"$rowTableName for select to current_user using id < 30") + + stmt.execute(s"create policy testPolicy3 on " + + s"$rowTableName for select to current_user using id < 70") + + val expectedColumns = List("NAME", "SCHEMANAME", "TABLENAME", + "POLICYFOR", "APPLYTO", "FILTER", "OWNER") + val expectedResults = Map("TESTPOLICY1" -> (tableOwner.toUpperCase, + colTableName.toUpperCase.substring(colTableName.indexOf('.') + 1), + "select", "current_user", "id > 10", + tableOwner.toUpperCase), + "TESTPOLICY2" -> (tableOwner.toUpperCase, + rowTableName.toUpperCase.substring(rowTableName.indexOf('.') + 1), + "select", "current_user", "id < 30", + tableOwner.toUpperCase), + "TESTPOLICY3" -> (tableOwner.toUpperCase, + rowTableName.toUpperCase.substring(rowTableName.indexOf('.') + 1), + "select", "current_user", "id < 70", + tableOwner.toUpperCase) + ) + + // check using session + val ds = ownerSession.sql("select * from sys.syspolicies") + val rows = ds.collect() + assert(expectedColumns === ds.schema.map(_.name.toUpperCase)) + assert(expectedResults.toSeq.sortBy(_._1).map(p => Row(p._1, p._2._1, p._2._2, + p._2._3, p._2._4, p._2._5, p._2._6)) === rows.toSeq.sortBy(_.getString(0))) + + val rs = stmt.executeQuery("select * from sys.syspolicies") + val rsmd = rs.getMetaData + assertEquals(expectedColumns.size, rsmd.getColumnCount) + for (i <- 1 to rsmd.getColumnCount) { + assert(expectedColumns.contains(rsmd.getColumnName(i))) + } + + var actualNumRows = 0 + while (rs.next()) { + actualNumRows += 1 + assert(expectedResults.contains(rs.getString("NAME"))) + val expectedRow = expectedResults(rs.getString("NAME")) + assertEquals(expectedRow._1, rs.getString("SCHEMANAME")) + assertEquals(expectedRow._2, rs.getString("TABLENAME")) + assertEquals(expectedRow._3, rs.getString("POLICYFOR")) + assertEquals(expectedRow._4, rs.getString("APPLYTO")) + assertEquals(expectedRow._5, rs.getString("FILTER")) + assertEquals(expectedRow._6, rs.getString("OWNER")) + } + assertEquals(expectedResults.size, actualNumRows) + + // check the connection metadata apis are not getting polluted + // with policies + val md = conn.getMetaData + val tableTypes = md.getTableTypes + // table type should not include policy + while (tableTypes.next()) { + val tt = tableTypes.getString(1) + assert(tt.toLowerCase.indexOf("policy") == -1) + assert(tt.toLowerCase.indexOf("policies") == -1) + } + + val rs1 = md.getTables(null, null, "%", null) + // should find the SYS.SYSPOLICIES table in meta-data + var foundSysPolicies = false + while (rs1.next()) { + if (rs1.getString("TABLE_NAME") == "SYSPOLICIES") { + foundSysPolicies = true + assert(rs1.getString("TABLE_SCHEM") === "SYS") + assert(rs1.getString("TABLE_TYPE") === "VIRTUAL TABLE") + } + } + assert(foundSysPolicies, "Failed to find SYS.SYSPOLICIES table in meta-data") + + stmt.execute("drop policy testPolicy1") + stmt.execute("drop policy testPolicy2") + stmt.execute("drop policy testPolicy3") + } finally { + conn.close() + } + } + + private def testQueryPlanInvalidationOnRLSEnbaling(tableName: String): Unit = { + // first disable RLS + ownerSession.sql(s"alter table $tableName disable row level security") + // now create a policy + ownerSession.sql(s"create policy testPolicy1 on " + + s"$tableName for select to current_user using id < 30") + val conn = getConnection(Some(tableOwner)) + + val conn1 = getConnection(Some("UserX")) + try { + + val q = s"select * from $tableName where id > 70" + val stmt1 = conn1.createStatement() + var rs = stmt1.executeQuery(q) + var numRows = 0 + while (rs.next()) numRows += 1 + assertEquals(29, numRows) + // fire again + rs = stmt1.executeQuery(q) + numRows = 0 + while (rs.next()) numRows += 1 + assertEquals(29, numRows) + // fire again + rs = stmt1.executeQuery(q) + numRows = 0 + while (rs.next()) numRows += 1 + assertEquals(29, numRows) + + val stmt = conn.createStatement() + rs = stmt.executeQuery(q) + numRows = 0 + while (rs.next()) numRows += 1 + assertEquals(29, numRows) + + // fire again + rs = stmt1.executeQuery(q) + numRows = 0 + while (rs.next()) numRows += 1 + assertEquals(29, numRows) + + // Now enable RLS + + stmt.execute(s"alter table $tableName enable row level security") + rs = stmt.executeQuery(q) + numRows = 0 + while (rs.next()) numRows += 1 + assertEquals(29, numRows) + + rs = stmt1.executeQuery(q) + numRows = 0 + while (rs.next()) numRows += 1 + assertEquals(0, numRows) + ownerSession.sql("drop policy testPolicy1") + } finally { + conn1.close() + conn.close() + } + + } + + test("Drop table with policies using JDBC client") { + val seq2 = for (i <- 0 until numElements) yield { + (s"name_$i", i) + } + val rdd2 = sc.parallelize(seq2) + + val dataDF2 = ownerSession.createDataFrame(rdd2) + + val colTableName2: String = s"$tableOwner.ColumnTable2" + val rowTableName2: String = s"$tableOwner.RowTable2" + val colTableName3: String = s"$tableOwner.ColumnTable3" + + ownerSession.sql(s"CREATE TABLE $colTableName2 (name String, id Int) " + + s" USING column ") + ownerSession.sql(s"CREATE TABLE $rowTableName2 (name String, id Int) " + + s" USING row ") + ownerSession.sql(s"CREATE TABLE $colTableName3 (name String, id Int) " + + s" USING column ") + + dataDF2.write.insertInto(colTableName2) + dataDF2.write.insertInto(rowTableName2) + dataDF2.write.insertInto(colTableName3) + + val conn = getConnection(Some(tableOwner)) + val stmt = conn.createStatement() + try { + stmt.execute(s"alter table $colTableName2 enable row level security") + stmt.execute(s"alter table $rowTableName2 enable row level security") + stmt.execute(s"alter table $colTableName3 enable row level security") + + stmt.execute(s"create policy testPolicy1_for_ColumnTable3 on " + + s"$colTableName3 for select to current_user using id > 11") + stmt.execute(s"create policy testPolicy2_for_ColumnTable3 on " + + s"$colTableName3 for select to current_user using id < 22") + + testDropTable(colTableName2, stmt) + testDropTable(rowTableName2, stmt) + + // colTableName3 was not dropped, so policies should exist + assert(checkIfPoliciesOnTableExist(colTableName3)) + + testDropTable(colTableName3, stmt) + } finally { + conn.close() + } + + } + + private def testDropTable(tableName: String, stmt: Statement) { + stmt.execute(s"create policy testPolicy11 on " + + s"$tableName for select to current_user using id > 11") + stmt.execute(s"create policy testPolicy22 on " + + s"$tableName for select to current_user using id < 22") + stmt.execute(s"drop table $tableName") + assert(!checkIfPoliciesOnTableExist(tableName), s"Policy for $tableName should not be present") + } + + // return true if a policy exists for a table else false + private def checkIfPoliciesOnTableExist(tableName: String): Boolean = { + val policies = Misc.getMemStore.getExternalCatalog.getPolicies() + val it = policies.listIterator() + while (it.hasNext) { + val p = it.next() + // println("Actual tablename:" + tableName + ", tableName in policy:" + p.tableName) + if ((p.schemaName + "." + p.tableName).equalsIgnoreCase(tableName)) { + return true + } + } + false + } + + private def getConnection(user: Option[String] = None): Connection = { + val props = new Properties() + if (user.isDefined) { + props.put(Attribute.USERNAME_ATTR, user.get) + } + DriverManager.getConnection(s"jdbc:snappydata://$serverHostPort", props) + } + +} diff --git a/cluster/src/test/scala/org/apache/spark/sql/policy/PolicyTest.scala b/cluster/src/test/scala/org/apache/spark/sql/policy/PolicyTest.scala new file mode 100644 index 0000000000..d68930b6d2 --- /dev/null +++ b/cluster/src/test/scala/org/apache/spark/sql/policy/PolicyTest.scala @@ -0,0 +1,487 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ +package org.apache.spark.sql.policy + +import com.pivotal.gemfirexd.Attribute +import com.pivotal.gemfirexd.internal.engine.Misc +import org.junit.Assert.{assertEquals, assertTrue} + +import org.apache.spark.SparkConf +import org.apache.spark.sql.SnappyContext +import org.apache.spark.sql.catalyst.expressions.{EqualTo, Literal} +import org.apache.spark.sql.catalyst.plans.logical.Filter +import org.apache.spark.sql.types.StringType +import org.apache.spark.unsafe.types.UTF8String + +class PolicyTest extends PolicyTestBase { + + val props = Map.empty[String, String] + val tableOwner = "ashahid" + val numElements = 100 + val colTableName: String = s"$tableOwner.ColumnTable" + val rowTableName: String = s"$tableOwner.RowTable" + var ownerContext: SnappyContext = _ + + protected override def newSparkConf(addOn: SparkConf => SparkConf): SparkConf = { + val conf = new org.apache.spark.SparkConf() + .setAppName("PolicyTest") + .setMaster("local[4]") + .set("spark.sql.crossJoin.enabled", "true") + if (addOn != null) { + addOn(conf) + } else { + conf + } + } + + override def beforeAll(): Unit = { + super.beforeAll() + val seq = for (i <- 0 until numElements) yield { + (s"name_$i", i) + } + val rdd = sc.parallelize(seq) + ownerContext = snc.newSession() + ownerContext.snappySession.conf.set(Attribute.USERNAME_ATTR, tableOwner) + + val dataDF = ownerContext.createDataFrame(rdd) + + ownerContext.sql(s"CREATE TABLE $colTableName (name String, id Int) " + + s" USING column ") + + ownerContext.sql(s"CREATE TABLE $rowTableName (name String, id Int) " + + s" USING row ") + dataDF.write.insertInto(colTableName) + dataDF.write.insertInto(rowTableName) + ownerContext.sql(s"alter table $colTableName enable row level security") + ownerContext.sql(s"alter table $rowTableName enable row level security") + } + + override def afterAll(): Unit = { + ownerContext.dropTable(colTableName, ifExists = true) + ownerContext.dropTable(rowTableName, ifExists = true) + super.afterAll() + } + + test("Policy creation on a column table using snappy context") { + this.testPolicy(colTableName) + } + + test("Policy creation on a row table using snappy context") { + this.testPolicy(rowTableName) + } + + private def testPolicy(tableName: String) { + ownerContext.sql(s"create policy testPolicy1 on " + + s"$tableName for select to current_user using id < 0") + var rs = ownerContext.sql(s"select * from $tableName").collect() + assertEquals(numElements, rs.length) + + val snc2 = snc.newSession() + snc2.snappySession.conf.set(Attribute.USERNAME_ATTR, "UserX") + + rs = snc2.sql(s"select * from $tableName").collect() + assertEquals(0, rs.length) + ownerContext.sql("drop policy testPolicy1") + } + + test("Check Policy Filter applied to the plan only once") { + ownerContext.sql(s"create policy testPolicy2 on " + + s"$colTableName for select to current_user using id > 0") + + val snc2 = snc.newSession() + snc2.snappySession.conf.set(Attribute.USERNAME_ATTR, "UserX") + + val df = snc2.sql(s"select * from $colTableName") + val allFilters = df.queryExecution.analyzed.collect { + case f: Filter => f + } + assertEquals(1, allFilters.map(_.condition).flatMap(ex => { + ex.collect { + case x@EqualTo(Literal(l1, StringType), Literal(l2, StringType)) + if l1.equals(UTF8String.fromString(PolicyProperties.rlsConditionString)) && + l2.equals(UTF8String.fromString(PolicyProperties.rlsConditionString)) => x + } + }).length) + + ownerContext.sql("drop policy testPolicy2") + } + + test("test multiple policies application using snappy context on column table") { + this.testMultiplePolicy(colTableName) + } + + test("test multiple policies application using snappy context on row table") { + this.testMultiplePolicy(rowTableName) + } + + test("old query plan invalidation on creation of policy on column table using snappy context") { + this.testQueryPlanInvalidation(colTableName) + } + + test("old query plan invalidation on creation of policy on row table using snappy context") { + this.testQueryPlanInvalidation(rowTableName) + } + + private def testQueryPlanInvalidation(tableName: String): Unit = { + + val snc2 = snc.newSession() + snc2.snappySession.conf.set(Attribute.USERNAME_ATTR, "UserX") + val q = s"select * from $tableName where id > 70" + var rs = snc2.sql(q) + assertEquals(29, rs.collect().length) + // fire again + rs = snc2.sql(q) + assertEquals(29, rs.collect().length) + // fire again + rs = ownerContext.sql(q) + assertEquals(29, rs.collect().length) + + // fire again + rs = snc2.sql(q) + assertEquals(29, rs.collect().length) + + + // now create a policy + ownerContext.sql(s"create policy testPolicy1 on " + + s"$tableName for select to current_user using id < 30") + + rs = ownerContext.sql(q) + assertEquals(29, rs.collect().length) + + rs = snc2.sql(q) + assertEquals(0, rs.collect().length) + ownerContext.sql("drop policy testPolicy1") + + } + + test("old query plan invalidation on enabling rls on column table using snappy context") { + this.testQueryPlanInvalidationOnRLSEnbaling(colTableName) + } + + test("old query plan invalidation on enabling rls on row table using snappy context") { + this.testQueryPlanInvalidationOnRLSEnbaling(rowTableName) + } + + private def testQueryPlanInvalidationOnRLSEnbaling(tableName: String): Unit = { + // first disable RLS + ownerContext.sql(s"alter table $tableName disable row level security") + // now create a policy + ownerContext.sql(s"create policy testPolicy1 on " + + s"$tableName for select to current_user using id < 30") + + val snc2 = snc.newSession() + snc2.snappySession.conf.set(Attribute.USERNAME_ATTR, "UserX") + val q = s"select * from $tableName where id > 70" + + var rs = snc2.sql(q) + assertEquals(29, rs.collect().length) + // fire again + rs = snc2.sql(q) + assertEquals(29, rs.collect().length) + // fire again + rs = ownerContext.sql(q) + assertEquals(29, rs.collect().length) + + rs = ownerContext.sql(q) + assertEquals(29, rs.collect().length) + + // fire again + rs = snc2.sql(q) + assertEquals(29, rs.collect().length) + + // Now enable RLS + + ownerContext.sql(s"alter table $tableName enable row level security") + + + rs = ownerContext.sql(q) + assertEquals(29, rs.collect().length) + + rs = snc2.sql(q) + assertEquals(0, rs.collect().length) + ownerContext.sql("drop policy testPolicy1") + + } + + test("test bug causing recursion with query having filter using col table - ENT-40") { + this.testRecursionBug(colTableName) + } + + test("test bug causing recursion with query having filter using row table - ENT-40") { + this.testRecursionBug(rowTableName) + } + + + test("test policy filter with subquery for row table") { + this.whereClauseWithExistsCondition(rowTableName) + } + + test("test policy filter with subquery for col table") { + this.whereClauseWithExistsCondition(colTableName) + } + + private def whereClauseWithExistsCondition(tableName: String): Unit = { + val mappingTable = "mapping" + ownerContext.sql(s"CREATE TABLE $mappingTable (username String, hisid Int) " + + s" USING row ") + val seq = Seq("USERX" -> 4, "USERX" -> 5, "USERX" -> 6, "USERY" -> 7, + "USERY" -> 8, "USERY" -> 9) + val rdd = sc.parallelize(seq) + + val dataDF = ownerContext.createDataFrame(rdd) + + dataDF.write.insertInto(mappingTable) + + ownerContext.sql(s"create policy testPolicy1 on " + + s"$tableName for select to current_user using " + + s"exists( select 1 from $tableOwner.$mappingTable " + + s" where username = current_user() and id = hisid)") + + val snc2 = snc.newSession() + snc2.snappySession.conf.set(Attribute.USERNAME_ATTR, "UserX") + val q1 = s"select * from $tableName " + var rs = snc2.sql(q1).collect() + assertEquals(3, rs.length) + var idResults = rs.map(_.getInt(1)) + assertTrue(idResults.contains(4)) + assertTrue(idResults.contains(5)) + assertTrue(idResults.contains(6)) + + // fire the query but use table alias and a filter + val q2 = s"select * from $tableName x where x.id < 6 " + rs = snc2.sql(q2).collect() + assertEquals(2, rs.length) + idResults = rs.map(_.getInt(1)) + assertTrue(idResults.contains(4)) + assertTrue(idResults.contains(5)) + + + ownerContext.sql("drop policy testPolicy1") + ownerContext.sql(s"drop table $mappingTable") + + } + + test("test policy filter with subquery for row table with row table joined to itself") { + this.tableWithPolicyJoinedToItself(rowTableName) + } + + test("test policy filter with subquery for col table with col table joined to itself") { + this.tableWithPolicyJoinedToItself(colTableName) + } + + test("test policy not applied for update | delete on row table - SNAP-2576") { + this.updateOrDeleteOntableWithPolicy("row") + } + + test("test policy not applied for update | delete on column table - SNAP-2576") { + this.updateOrDeleteOntableWithPolicy("column") + } + + private def updateOrDeleteOntableWithPolicy(tableType: String): Unit = { + + ownerContext.sql(s"CREATE TABLE temp (username String, id Int) " + + s" USING $tableType ") + val seq = Seq("USERX" -> 4, "USERX" -> 5, "USERX" -> 6, "USERY" -> 7, + "USERY" -> 8, "USERY" -> 9) + val rdd = sc.parallelize(seq) + + val dataDF = ownerContext.createDataFrame(rdd) + + dataDF.write.insertInto("temp") + + ownerContext.sql(s"create policy testPolicy1 on " + + s" temp for select to current_user using " + + s" id < 0") + + ownerContext.sql("alter table temp enable row level security") + + val snc2 = snc.newSession() + snc2.snappySession.conf.set(Attribute.USERNAME_ATTR, "UserX") + val q1 = s"select * from $tableOwner.temp" + var rs = snc2.sql(q1).collect() + assertEquals(0, rs.length) + + snc2.sql(s"update $tableOwner.temp set username = 'USERZ' where username = 'USERX'") + + rs = ownerContext.sql(s"select * from temp where username = 'USERZ'").collect() + assertEquals(3, rs.length) + + snc2.sql(s"delete from $tableOwner.temp where username = 'USERZ'") + + rs = ownerContext.sql(s"select * from temp where username = 'USERZ'").collect() + assertEquals(0, rs.length) + + ownerContext.sql("drop policy testPolicy1") + ownerContext.sql(s"drop table temp") + } + + private def tableWithPolicyJoinedToItself(tableName: String): Unit = { + val mappingTable = "mapping" + ownerContext.sql(s"CREATE TABLE $mappingTable (username String, hisid Int) " + + s" USING row ") + val seq = Seq("USERX" -> 4, "USERX" -> 5, "USERX" -> 6, "USERY" -> 7, + "USERY" -> 8, "USERY" -> 9) + val rdd = sc.parallelize(seq) + + val dataDF = ownerContext.createDataFrame(rdd) + + dataDF.write.insertInto(mappingTable) + + ownerContext.sql(s"create policy testPolicy1 on " + + s"$tableName for select to current_user using " + + s"exists( select 1 from $tableOwner.$mappingTable " + + s" where username = current_user() and id = hisid)") + + val snc2 = snc.newSession() + snc2.snappySession.conf.set(Attribute.USERNAME_ATTR, "UserX") + val q1 = s"select * from $tableName tab1 , $tableName tab2 " + + s"where tab1.id < 6 and tab2.id < 6 " + val rs = snc2.sql(q1).collect() + assertEquals(4, rs.length) + val idResults = rs.map(x => x.getInt(1) -> x.get(3)) + assertTrue(idResults.contains((4, 4))) + assertTrue(idResults.contains((4, 5))) + assertTrue(idResults.contains((5, 4))) + assertTrue(idResults.contains((5, 5))) + ownerContext.sql("drop policy testPolicy1") + ownerContext.sql(s"drop table $mappingTable") + } + + private def testRecursionBug(tableName: String): Unit = { + ownerContext.sql(s"create policy testPolicy1 on " + + s"$tableName for select to userX using id < 30 and name = 'name_1'") + val snc2 = snc.newSession() + snc2.snappySession.conf.set(Attribute.USERNAME_ATTR, "UserX") + var q = s"select * from $tableName where id < 20 and name = 'name_1'" + + var rs = snc2.sql(q).collect() + assertEquals(1, rs.length) + // now create another policy + ownerContext.sql(s"create policy testPolicy2 on " + + s"$tableName for select to userX using id < 20 and name = 'name_2'") + rs = snc2.sql(q).collect() + assertEquals(0, rs.length) + + ownerContext.sql(s"create policy testPolicy3 on " + + s"$tableName for select to userX using id < 10 and name = 'name_4'") + + ownerContext.sql(s"create policy testPolicy4 on " + + s"$tableName for select to userX using id < 5 and name = 'name_5'") + + q = s"select * from $tableName where id < 20 and name = 'name_1' and id > 10 " + + s"and name = 'name7'" + + rs = snc2.sql(q).collect() + assertEquals(0, rs.length) + ownerContext.sql("drop policy testPolicy1") + ownerContext.sql("drop policy testPolicy2") + ownerContext.sql("drop policy testPolicy3") + ownerContext.sql("drop policy testPolicy4") + + } + + private def testMultiplePolicy(tableName: String) { + ownerContext.sql(s"create policy testPolicy1 on " + + s"$tableName for select to current_user using id > 10") + ownerContext.sql(s"create policy testPolicy2 on " + + s"$tableName for select to current_user using id < 20") + var rs = ownerContext.sql(s"select * from $tableName").collect() + assertEquals(numElements, rs.length) + + val snc2 = snc.newSession() + snc2.snappySession.conf.set(Attribute.USERNAME_ATTR, "UserX") + + rs = snc2.sql(s"select * from $tableName").collect() + assertEquals(9, rs.length) + ownerContext.sql("drop policy testPolicy1") + ownerContext.sql("drop policy testPolicy2") + } + + test("Policy creation & dropping allowed by all users if security is disabled") { + val snc2 = snc.newSession() + snc2.snappySession.conf.set(Attribute.USERNAME_ATTR, "UserX") + snc2.sql(s"create policy testPolicy2 on " + + s"$colTableName for select to current_user using id > 10") + snc2.sql("drop policy testPolicy2") + + } + + test("Drop table with policies") { + val seq2 = for (i <- 0 until numElements) yield { + (s"name_$i", i) + } + val rdd2 = sc.parallelize(seq2) +// ownerContext = snc.newSession() +// ownerContext.snappySession.conf.set(Attribute.USERNAME_ATTR, tableOwner) + + val dataDF2 = ownerContext.createDataFrame(rdd2) + + val colTableName2: String = s"$tableOwner.ColumnTable2" + val rowTableName2: String = s"$tableOwner.RowTable2" + val colTableName3: String = s"$tableOwner.ColumnTable3" + + ownerContext.sql(s"CREATE TABLE $colTableName2 (name String, id Int) " + + s" USING column ") + ownerContext.sql(s"CREATE TABLE $rowTableName2 (name String, id Int) " + + s" USING row ") + ownerContext.sql(s"CREATE TABLE $colTableName3 (name String, id Int) " + + s" USING column ") + + dataDF2.write.insertInto(colTableName2) + dataDF2.write.insertInto(rowTableName2) + dataDF2.write.insertInto(colTableName3) + ownerContext.sql(s"alter table $colTableName2 enable row level security") + ownerContext.sql(s"alter table $rowTableName2 enable row level security") + ownerContext.sql(s"alter table $colTableName3 enable row level security") + + ownerContext.sql(s"create policy testPolicy1_for_ColumnTable3 on " + + s"$colTableName3 for select to current_user using id > 11") + ownerContext.sql(s"create policy testPolicy2_for_ColumnTable3 on " + + s"$colTableName3 for select to current_user using id < 22") + + testDropTable(colTableName2) + testDropTable(rowTableName2) + + // colTableName3 was not dropped, so policies should exist + assert(checkIfPoliciesOnTableExist(colTableName3)) + + testDropTable(colTableName3) + } + + private def testDropTable(tableName: String) { + ownerContext.sql(s"create policy testPolicy11 on " + + s"$tableName for select to current_user using id > 11") + ownerContext.sql(s"create policy testPolicy22 on " + + s"$tableName for select to current_user using id < 22") + ownerContext.sql(s"drop table $tableName") + assert(!checkIfPoliciesOnTableExist(tableName), s"Policy for $tableName should not be present") + } + + // return true if a policy exists for a table else false + private def checkIfPoliciesOnTableExist(tableName: String): Boolean = { + val policies = Misc.getMemStore.getExternalCatalog.getPolicies + val it = policies.listIterator() + while (it.hasNext) { + val p = it.next() + // println("Actual tablename:" + tableName + ", tableName in policy:" + p.tableName) + if ((p.schemaName + "." + p.tableName).equalsIgnoreCase(tableName)) { + return true + } + } + false + } +} diff --git a/cluster/src/test/scala/org/apache/spark/sql/policy/PolicyTestBase.scala b/cluster/src/test/scala/org/apache/spark/sql/policy/PolicyTestBase.scala new file mode 100644 index 0000000000..20be3394b6 --- /dev/null +++ b/cluster/src/test/scala/org/apache/spark/sql/policy/PolicyTestBase.scala @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ +package org.apache.spark.sql.policy + +import com.pivotal.gemfirexd.Attribute +import com.pivotal.gemfirexd.Property.{AUTH_LDAP_SEARCH_BASE, AUTH_LDAP_SERVER} +import com.pivotal.gemfirexd.internal.engine.store.GemFireStore +import com.pivotal.gemfirexd.internal.iapi.reference.Property +import com.pivotal.gemfirexd.security.{LdapTestServer, SecurityTestUtils} +import io.snappydata.{Constant, SnappyFunSuite} +import org.scalatest.{BeforeAndAfter, BeforeAndAfterAll} + +import org.apache.spark.{Logging, SparkConf} + +abstract class PolicyTestBase extends SnappyFunSuite + with Logging + with BeforeAndAfter + with BeforeAndAfterAll { + + protected val sysUser = "gemfire10" + + override def beforeAll(): Unit = { + super.beforeAll() + stopAll() + + System.setProperty(Property.SNAPPY_ENABLE_RLS, "true") + GemFireStore.ALLOW_RLS_WITHOUT_SECURITY = true + } + + protected def newLDAPSparkConf(addOn: (SparkConf) => SparkConf): SparkConf = { + val ldapProperties = SecurityTestUtils.startLdapServerAndGetBootProperties(0, 0, sysUser, + getClass.getResource("/auth.ldif").getPath) + for (k <- List(Attribute.AUTH_PROVIDER, AUTH_LDAP_SERVER, AUTH_LDAP_SEARCH_BASE)) { + System.setProperty(k, ldapProperties.getProperty(k)) + } + System.setProperty(Constant.STORE_PROPERTY_PREFIX + Attribute.USERNAME_ATTR, sysUser) + System.setProperty(Constant.STORE_PROPERTY_PREFIX + Attribute.PASSWORD_ATTR, sysUser) + val conf = new org.apache.spark.SparkConf() + .setAppName("PolicyTest") + .setMaster("local[4]") + .set("spark.sql.crossJoin.enabled", "true") + .set(Attribute.AUTH_PROVIDER, ldapProperties.getProperty(Attribute.AUTH_PROVIDER)) + .set(Constant.STORE_PROPERTY_PREFIX + Attribute.USERNAME_ATTR, sysUser) + .set(Constant.STORE_PROPERTY_PREFIX + Attribute.PASSWORD_ATTR, sysUser) + + if (addOn != null) { + addOn(conf) + } else { + conf + } + } + + override def afterAll(): Unit = { + try { + super.afterAll() + stopAll() + + val ldapServer = LdapTestServer.getInstance() + if (ldapServer.isServerStarted) { + ldapServer.stopService() + } + } finally { + for (k <- List(Attribute.AUTH_PROVIDER, AUTH_LDAP_SERVER, AUTH_LDAP_SEARCH_BASE)) { + System.clearProperty(k) + System.clearProperty("gemfirexd." + k) + System.clearProperty(Constant.STORE_PROPERTY_PREFIX + k) + } + System.clearProperty(Constant.STORE_PROPERTY_PREFIX + Attribute.USERNAME_ATTR) + System.clearProperty(Constant.STORE_PROPERTY_PREFIX + Attribute.PASSWORD_ATTR) + System.setProperty("gemfirexd.authentication.required", "false") + GemFireStore.ALLOW_RLS_WITHOUT_SECURITY = false + System.clearProperty(Property.SNAPPY_ENABLE_RLS) + } + } +} diff --git a/cluster/src/test/scala/org/apache/spark/sql/policy/RestrictTableCreationPolicyTest.scala b/cluster/src/test/scala/org/apache/spark/sql/policy/RestrictTableCreationPolicyTest.scala new file mode 100644 index 0000000000..288e4997c3 --- /dev/null +++ b/cluster/src/test/scala/org/apache/spark/sql/policy/RestrictTableCreationPolicyTest.scala @@ -0,0 +1,224 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ +package org.apache.spark.sql.policy + +import java.sql.{Connection, DriverManager, SQLException} +import java.util.Properties + +import com.pivotal.gemfirexd.internal.iapi.error.StandardException +import com.pivotal.gemfirexd.internal.iapi.reference.Property +import com.pivotal.gemfirexd.{Attribute, TestUtil} +import org.junit.Assert.assertEquals + +import org.apache.spark.SparkConf +import org.apache.spark.sql.SnappySession + +class RestrictTableCreationPolicyTest extends PolicyTestBase { + + val user1 = "gemfire1" + val user2 = "gemfire2" + val user3 = "gemfire3" + val tableCreator: String = user1 + val ownerLdapGroup = "gemGroup1" // users gem1, gem2, gem3 + val otherLdapGroup = "gemGroup3" // users gem6, gem7, gem8 + val otherUser = "gemfire6" + val props = Map.empty[String, String] + val schema = "tax" + val numElements = 100 + val colTableName: String = s"$schema.ColumnTable" + val rowTableName: String = s"$schema.RowTable" + var ownerSession: SnappySession = _ + + var serverHostPort: String = _ + + override def beforeAll(): Unit = { + System.setProperty(Property.SNAPPY_RESTRICT_TABLE_CREATE, "true") + super.beforeAll() + val seq = for (i <- 0 until numElements) yield { + (s"name_$i", i) + } + val rdd = sc.parallelize(seq) + ownerSession = snc.snappySession.newSession() + serverHostPort = TestUtil.startNetServer() + ownerSession.conf.set(Attribute.USERNAME_ATTR, tableCreator) + ownerSession.conf.set(Attribute.PASSWORD_ATTR, tableCreator) + val dataDF = ownerSession.createDataFrame(rdd) + + // check failure in CREATE SCHEMA with authorization when using user session + try { + ownerSession.sql(s"create schema $schema authorization ldapgroup:$ownerLdapGroup") + fail("Expected security failure") + } catch { + case se: SQLException if se.getSQLState == "42508" => // expected + } + // CREATE SCHEMA with authorization should work with admin privileges + val adminSession = snc.snappySession.newSession() + adminSession.conf.set(Attribute.USERNAME_ATTR, sysUser) + adminSession.conf.set(Attribute.PASSWORD_ATTR, sysUser) + adminSession.sql(s"create schema $schema authorization ldapgroup:$ownerLdapGroup") + + ownerSession.sql(s"CREATE TABLE $colTableName (name String, id Int) " + + s" USING column ") + + ownerSession.sql(s"CREATE TABLE $rowTableName (name String, id Int) " + + s" USING row ") + ownerSession.sql(s"grant select on table $colTableName to ldapgroup:$otherLdapGroup") + ownerSession.sql(s"grant select on table $rowTableName to ldapgroup:$otherLdapGroup") + + ownerSession.sql(s"alter table $colTableName enable row level security") + ownerSession.sql(s"alter table $rowTableName enable row level security") + + dataDF.write.insertInto(colTableName) + dataDF.write.insertInto(rowTableName) + } + + protected override def newSparkConf(addOn: (SparkConf) => SparkConf): SparkConf = { + newLDAPSparkConf(addOn) + } + + override def afterAll(): Unit = { + ownerSession.dropTable(colTableName, ifExists = true) + ownerSession.dropTable(rowTableName, ifExists = true) + super.afterAll() + System.clearProperty(Property.SNAPPY_RESTRICT_TABLE_CREATE) + } + + + test("Policy creation on a column table using jdbc client") { + this.testPolicy(colTableName) + } + + test("Policy creation on a row table using jdbc client") { + this.testPolicy(rowTableName) + } + + private def testPolicy(tableName: String) { + val conn = getConnection(Some(tableCreator)) + val stmt = conn.createStatement() + val conn1 = getConnection(Some(otherUser)) + val conn2 = getConnection(Some(user2)) + val conn3 = getConnection(Some(user3)) + try { + stmt.execute(s"create policy $schema.testPolicy1 on " + + s"$tableName for select to current_user using id < 0") + var rs = stmt.executeQuery(s"select * from $tableName") + var rsSize = 0 + while (rs.next()) rsSize += 1 + assertEquals(numElements, rsSize) + rsSize = 0 + val stmt1 = conn1.createStatement() + rs = stmt1.executeQuery(s"select * from $tableName") + while (rs.next()) rsSize += 1 + assertEquals(0, rsSize) + + // users gemfire2 & gemfire3 should also not get policy applied on them + val stmt2 = conn2.createStatement() + rs = stmt2.executeQuery(s"select * from $tableName") + rsSize = 0 + while (rs.next()) rsSize += 1 + assertEquals(numElements, rsSize) + rsSize = 0 + val stmt3 = conn3.createStatement() + rs = stmt3.executeQuery(s"select * from $tableName") + rsSize = 0 + while (rs.next()) rsSize += 1 + assertEquals(numElements, rsSize) + rsSize = 0 + // let user2 drop the policy + stmt2.execute(s"drop policy $schema.testPolicy1") + } finally { + conn.close() + conn1.close() + conn2.close() + conn3.close() + } + } + + test("users of other ldap group not allowed to create or drop policies") { + val conn = getConnection(Some(tableCreator)) + val stmt = conn.createStatement() + val conn1 = getConnection(Some(otherUser)) + val conn2 = getConnection(Some(user2)) + val conn3 = getConnection(Some(user3)) + try { + val stmt1 = conn1.createStatement() + try { + stmt1.execute(s"create policy $schema.testPolicy1 on " + + s"$colTableName for select to current_user using id < 0") + fail("other user cannot create policy in other's schema") + } catch { + case _: SQLException => + case _: StandardException => + case th: Throwable => throw th + } + + stmt.execute(s"create policy $schema.testPolicy1 on " + + s"$colTableName for select to current_user using id < 0") + // let other user drop the policy + try { + stmt1.execute(s"drop policy $schema.testPolicy1") + fail("other user cannot drop policy in other's schema") + } catch { + case _: SQLException => + case _: StandardException => + case th: Throwable => throw th + } + val stmt2 = conn2.createStatement() + stmt2.execute(s"drop policy $schema.testPolicy1") + } finally { + conn.close() + conn1.close() + conn2.close() + conn3.close() + } + } + + test("check toggle row level security behaviour for ldap groups") { + val conn = getConnection(Some(tableCreator)) + val conn1 = getConnection(Some(otherUser)) + val conn2 = getConnection(Some(user2)) + val conn3 = getConnection(Some(user3)) + try { + val stmt1 = conn1.createStatement() + try { + stmt1.execute(s"alter table $colTableName disable row level security") + } catch { + case _: SQLException => + case _: StandardException => + case th: Throwable => throw th + } + val stmt2 = conn2.createStatement() + stmt2.execute(s"alter table $colTableName enable row level security") + } finally { + conn.close() + conn1.close() + conn2.close() + conn3.close() + } + + } + + private def getConnection(user: Option[String] = None): Connection = { + val props = new Properties() + if (user.isDefined) { + props.put(Attribute.USERNAME_ATTR, user.get) + props.put(Attribute.PASSWORD_ATTR, user.get) + } + DriverManager.getConnection(s"jdbc:snappydata://$serverHostPort", props) + } + +} diff --git a/cluster/src/test/scala/org/apache/spark/sql/policy/SecurityEnabledJdbcClientPolicyTest.scala b/cluster/src/test/scala/org/apache/spark/sql/policy/SecurityEnabledJdbcClientPolicyTest.scala new file mode 100644 index 0000000000..f5ac707041 --- /dev/null +++ b/cluster/src/test/scala/org/apache/spark/sql/policy/SecurityEnabledJdbcClientPolicyTest.scala @@ -0,0 +1,111 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ +package org.apache.spark.sql.policy + +import java.sql.{Connection, DriverManager} +import java.util.Properties + +import com.pivotal.gemfirexd.{Attribute, TestUtil} +import org.junit.Assert.{assertFalse, assertTrue} + +import org.apache.spark.SparkConf +import org.apache.spark.sql.SnappyContext + +class SecurityEnabledJdbcClientPolicyTest extends PolicyTestBase { + + val user1 = "gemfire1" + val user2 = "gemfire2" + + val props = Map.empty[String, String] + val tableOwner: String = user1 + val numElements = 100 + val colTableName: String = s"$tableOwner.ColumnTable" + val rowTableName: String = s"$tableOwner.RowTable" + var ownerContext: SnappyContext = _ + + var serverHostPort: String = _ + + + override def beforeAll(): Unit = { + super.beforeAll() + val seq = for (i <- 0 until numElements) yield { + (s"name_$i", i) + } + val rdd = sc.parallelize(seq) + ownerContext = snc.newSession() + serverHostPort = TestUtil.startNetServer() + ownerContext.snappySession.conf.set(Attribute.USERNAME_ATTR, tableOwner) + ownerContext.snappySession.conf.set(Attribute.PASSWORD_ATTR, tableOwner) + val dataDF = ownerContext.createDataFrame(rdd) + + ownerContext.sql(s"CREATE TABLE $colTableName (name String, id Int) " + + s" USING column ") + + ownerContext.sql(s"CREATE TABLE $rowTableName (name String, id Int) " + + s" USING row ") + ownerContext.sql(s"grant select on table $colTableName to $user2") + ownerContext.sql(s"grant select on table $rowTableName to $user2") + dataDF.write.insertInto(colTableName) + dataDF.write.insertInto(rowTableName) + } + + protected override def newSparkConf(addOn: (SparkConf) => SparkConf): SparkConf = { + newLDAPSparkConf(addOn) + } + + override def afterAll(): Unit = { + ownerContext.dropTable(colTableName, ifExists = true) + ownerContext.dropTable(rowTableName, ifExists = true) + super.afterAll() + } + + + test("test bug causing recursion with query having filter using col table - ENT-40") { + this.testRecursionBug(colTableName) + } + + test("test bug causing recursion with query having filter using row table - ENT-40") { + this.testRecursionBug(rowTableName) + } + + private def testRecursionBug(tableName: String): Unit = { + ownerContext.sql(s"create policy testPolicy1 on " + + s"$tableName for select to $user2 using id < 30") + + val conn1 = getConnection(Some(user2)) + try { + val q = s"select * from $tableName where id < 20 and name = 'name_3'" + val rs = conn1.createStatement().executeQuery(q) + assertTrue(rs.next()) + assertFalse(rs.next()) + ownerContext.sql("drop policy testPolicy1") + } finally { + conn1.close() + } + + } + + private def getConnection(user: Option[String] = None): Connection = { + val props = new Properties() + if (user.isDefined) { + props.put(Attribute.USERNAME_ATTR, user.get) + props.put(Attribute.PASSWORD_ATTR, user.get) + } + DriverManager.getConnection(s"jdbc:snappydata://$serverHostPort", props) + } + +} diff --git a/cluster/src/test/scala/org/apache/spark/sql/policy/SecurityEnabledPolicyTest.scala b/cluster/src/test/scala/org/apache/spark/sql/policy/SecurityEnabledPolicyTest.scala new file mode 100644 index 0000000000..72a71c70e2 --- /dev/null +++ b/cluster/src/test/scala/org/apache/spark/sql/policy/SecurityEnabledPolicyTest.scala @@ -0,0 +1,172 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ +package org.apache.spark.sql.policy + +import java.sql.SQLException + +import com.pivotal.gemfirexd.Attribute +import com.pivotal.gemfirexd.internal.iapi.error.StandardException +import org.junit.Assert.assertEquals + +import org.apache.spark.SparkConf +import org.apache.spark.sql.SnappyContext + +class SecurityEnabledPolicyTest extends PolicyTestBase { + + val user1 = "gemfire1" + val user2 = "gemfire2" + + val props = Map.empty[String, String] + val tableOwner: String = user1 + val numElements = 100 + val colTableName: String = s"$tableOwner.ColumnTable" + val rowTableName: String = s"$tableOwner.RowTable" + var ownerContext: SnappyContext = _ + + override def beforeAll(): Unit = { + super.beforeAll() + val seq = for (i <- 0 until numElements) yield { + (s"name_$i", i) + } + val rdd = sc.parallelize(seq) + ownerContext = snc.newSession() + ownerContext.snappySession.conf.set(Attribute.USERNAME_ATTR, tableOwner) + ownerContext.snappySession.conf.set(Attribute.PASSWORD_ATTR, tableOwner) + val dataDF = ownerContext.createDataFrame(rdd) + + ownerContext.sql(s"CREATE TABLE $colTableName (name String, id Int) " + + s" USING column ") + + ownerContext.sql(s"CREATE TABLE $rowTableName (name String, id Int) " + + s" USING row ") + dataDF.write.insertInto(colTableName) + dataDF.write.insertInto(rowTableName) + } + + protected override def newSparkConf(addOn: (SparkConf) => SparkConf): SparkConf = { + newLDAPSparkConf(addOn) + } + + override def afterAll(): Unit = { + ownerContext.dropTable(colTableName, ifExists = true) + ownerContext.dropTable(rowTableName, ifExists = true) + super.afterAll() + } + + + test("Check only owner of the table can create policy and drop it") { + val snc2 = snc.newSession() + snc2.snappySession.conf.set(Attribute.USERNAME_ATTR, user2) + snc2.snappySession.conf.set(Attribute.PASSWORD_ATTR, user2) + try { + snc2.sql(s"create policy testPolicy2 on " + + s"$colTableName for select to current_user using id > 10") + fail("Only owner of the table should be allowed to create policy on it") + } catch { + case _: SQLException => + case _: StandardException => + } + + ownerContext.sql(s"create policy testPolicy2 on " + + s"$colTableName for select to current_user using id > 10") + + try { + snc2.sql(s"drop policy $tableOwner.testPolicy2") + fail("Only owner of the Policy can drop the policy") + } catch { + case _: SQLException => + case _: StandardException => + } + + ownerContext.sql("drop policy testPolicy2") + } + + test("check policy applied to ldap group") { + // the ldap group gemGroup2 contains gemfire3, gemfire4, gemfire5 + ownerContext.sql(s"create policy testPolicy1 on " + + s"$colTableName for select to ldapGroup:gemGroup2, gemfire6 using id > 90") + + ownerContext.sql(s"alter table $colTableName enable row level security") + + ownerContext.sql(s"GRANT select ON TABLE $colTableName TO ldapGroup:gemGroup2," + + s" gemfire6, gemfire7, gemfire2") + + val snc2 = snc.newSession() + snc2.snappySession.conf.set(Attribute.USERNAME_ATTR, user2) + snc2.snappySession.conf.set(Attribute.PASSWORD_ATTR, user2) + var rs = snc2.sql(s"select * from $colTableName") + assertEquals(numElements, rs.collect().length) + + val snc3 = snc.newSession() + snc3.snappySession.conf.set(Attribute.USERNAME_ATTR, "gemfire3") + snc3.snappySession.conf.set(Attribute.PASSWORD_ATTR, "gemfire3") + rs = snc3.sql(s"select * from $colTableName") + assertEquals(9, rs.collect().length) + + val snc4 = snc.newSession() + snc4.snappySession.conf.set(Attribute.USERNAME_ATTR, "gemfire4") + snc4.snappySession.conf.set(Attribute.PASSWORD_ATTR, "gemfire4") + rs = snc4.sql(s"select * from $colTableName") + assertEquals(9, rs.collect().length) + + val snc5 = snc.newSession() + snc5.snappySession.conf.set(Attribute.USERNAME_ATTR, "gemfire5") + snc5.snappySession.conf.set(Attribute.PASSWORD_ATTR, "gemfire5") + rs = snc5.sql(s"select * from $colTableName") + assertEquals(9, rs.collect().length) + + val snc6 = snc.newSession() + snc6.snappySession.conf.set(Attribute.USERNAME_ATTR, "gemfire6") + snc6.snappySession.conf.set(Attribute.PASSWORD_ATTR, "gemfire6") + rs = snc6.sql(s"select * from $colTableName") + assertEquals(9, rs.collect().length) + + val snc7 = snc.newSession() + snc7.snappySession.conf.set(Attribute.USERNAME_ATTR, "gemfire7") + snc7.snappySession.conf.set(Attribute.PASSWORD_ATTR, "gemfire7") + rs = snc7.sql(s"select * from $colTableName") + assertEquals(numElements, rs.collect().length) + + + rs = ownerContext.sql(s"select * from $colTableName") + assertEquals(numElements, rs.collect().length) + } + + test("test sql function CURRENT_USER_LDAP_GROUPS()") { + val snc3 = snc.newSession() + snc3.snappySession.conf.set(Attribute.USERNAME_ATTR, "gemfire3") + snc3.snappySession.conf.set(Attribute.PASSWORD_ATTR, "gemfire3") + snc3.sql(s"CREATE TABLE temp (grp String) " + + s" USING row ") + snc3.sql("insert into temp values ('gemGroup1')," + + "('gemGroup2'), ('gemGroup4'), ('gemGroup6')") + + val rs = snc3.sql("select * from temp where " + + "array_contains(current_user_ldap_groups(), upper(grp)) " + + " and array_contains(current_user_ldap_groups(), 'GEMGROUP3') == false " + + " and array_contains(current_user_ldap_groups(), 'GEMGROUP5') == false " + + " and array_contains(current_user_ldap_groups(), 'GEMGROUP7') == false " + + " and array_contains(current_user_ldap_groups(), 'GEMGROUP8') == false " + + " and array_contains(current_user_ldap_groups(), 'GEMGROUP9') == false " + + " and array_contains(current_user_ldap_groups(), 'GEMGROUP10') == false " + ) + assertEquals(4, rs.collect().length) + snc3.sql("drop table if exists temp") + + } + +} diff --git a/cluster/src/test/scala/org/apache/spark/sql/store/BitSetTest.scala b/cluster/src/test/scala/org/apache/spark/sql/store/BitSetTest.scala new file mode 100644 index 0000000000..648844b47e --- /dev/null +++ b/cluster/src/test/scala/org/apache/spark/sql/store/BitSetTest.scala @@ -0,0 +1,196 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + * + * Some of the code taken from Spark's BitSetSuite having the below license. + */ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.store + +import io.snappydata.SnappyFunSuite + +import org.apache.spark.sql.execution.columnar.encoding.BitSet +import org.apache.spark.unsafe.Platform + +/** + * Tests for the static methods of [[BitSet]]. + * + * Some parts taken from Spark's BitSetSuite. + */ +class BitSetTest extends SnappyFunSuite { + + private val baseAddress = Platform.LONG_ARRAY_OFFSET + + private var bitsetSize = 0 + + private def get(bitset: Array[Long], index: Int): Boolean = + BitSet.isSet(bitset, baseAddress, index) + + private def set(bitset: Array[Long], index: Int): Unit = + BitSet.set(bitset, baseAddress, index) + + private def clear(bitset: Array[Long], index: Int): Unit = + BitSet.clear(bitset, baseAddress, index) + + private def anySet(bitset: Array[Long], index: Int): Boolean = + BitSet.anySet(bitset, baseAddress + ((index + 7) >> 3), + (((bitsetSize << 6) - index) + 63) >> 6) + + private def nextSetBit(bitset: Array[Long], index: Int): Int = + BitSet.nextSetBit(bitset, baseAddress, index, bitsetSize) + + private def cardinality(bitset: Array[Long], index: Int): Int = + BitSet.cardinality(bitset, baseAddress, index, bitsetSize) + + test("basic set, get and clear") { + val maxSetBit = 96 + val setBits = Seq(0, 9, 1, 10, 90, maxSetBit) + val bitset = new Array[Long](4) + bitsetSize = 2 + + for (i <- 0 until 100) { + assert(!get(bitset, i)) + } + + setBits.foreach(i => set(bitset, i)) + + for (i <- 0 until 100) { + assert(get(bitset, i) === setBits.contains(i)) + } + for (i <- 0 until 100) { + assert(anySet(bitset, i) === (i <= maxSetBit), "failed for " + i) + } + + // clear the bits and check after each clear + for (i <- 0 until 100) { + if (setBits.contains(i)) { + clear(bitset, i) + } + for (j <- 0 until 100) { + assert(get(bitset, j) === (j > i && setBits.contains(j))) + assert(anySet(bitset, j) === (j <= maxSetBit && i < maxSetBit)) + } + } + + for (i <- 0 until 100) { + assert(!get(bitset, i)) + assert(!anySet(bitset, i)) + } + + setBits.foreach(i => clear(bitset, i)) + + for (i <- 0 until 100) { + assert(!get(bitset, i)) + assert(!anySet(bitset, i)) + } + } + + test("100% full bit set then clear all") { + val bitset = new Array[Long](200) + bitsetSize = 157 + + for (i <- 0 until 10000) { + assert(!get(bitset, i)) + set(bitset, i) + } + for (i <- 0 until 10000) { + assert(get(bitset, i)) + } + // clear the bits and check after each clear + for (i <- 0 until 10000) { + clear(bitset, i) + for (j <- 0 until 10000) { + assert(get(bitset, j) === (j > i)) + } + } + for (i <- 0 until 10000) { + assert(!get(bitset, i)) + assert(!anySet(bitset, i)) + } + } + + test("nextSetBit") { + val setBits = Seq(0, 9, 1, 10, 90, 96) + val bitset = new Array[Long](4) + bitsetSize = 2 + + setBits.foreach(i => set(bitset, i)) + + assert(nextSetBit(bitset, 0) === 0) + assert(nextSetBit(bitset, 1) === 1) + assert(nextSetBit(bitset, 2) === 9) + assert(nextSetBit(bitset, 9) === 9) + assert(nextSetBit(bitset, 10) === 10) + assert(nextSetBit(bitset, 11) === 90) + assert(nextSetBit(bitset, 80) === 90) + assert(nextSetBit(bitset, 91) === 96) + assert(nextSetBit(bitset, 96) === 96) + assert(nextSetBit(bitset, 97) === Int.MaxValue) + } + + test("cardinality") { + val setBits = Seq(0, 9, 1, 10, 100, 90, 34, 108, 130, 127, 128, 96, 123, 180, 191) + val bitset = new Array[Long](3) + bitsetSize = 2 + + setBits.foreach(set(bitset, _)) + + assert(cardinality(bitset, 0) === 0) + assert(cardinality(bitset, 1) === 1) + assert(cardinality(bitset, 2) === 2) + assert(cardinality(bitset, 9) === 2) + assert(cardinality(bitset, 10) === 3) + assert(cardinality(bitset, 11) === 4) + assert(cardinality(bitset, 80) === 5) + assert(cardinality(bitset, 91) === 6) + assert(cardinality(bitset, 96) === 6) + assert(cardinality(bitset, 97) === 7) + assert(cardinality(bitset, 100) === 7) + assert(cardinality(bitset, 101) === 8) + assert(cardinality(bitset, 107) === 8) + assert(cardinality(bitset, 108) === 8) + assert(cardinality(bitset, 109) === 9) + assert(cardinality(bitset, 123) === 9) + assert(cardinality(bitset, 124) === 10) + assert(cardinality(bitset, 127) === 10) + assert(cardinality(bitset, 128) === 11) + assert(cardinality(bitset, 130) === 11) + assert(cardinality(bitset, 131) === 11) + assert(cardinality(bitset, 150) === 11) + assert(cardinality(bitset, 180) === 11) + assert(cardinality(bitset, 181) === 11) + assert(cardinality(bitset, 190) === 11) + assert(cardinality(bitset, 191) === 11) + assert(cardinality(bitset, 192) === 11) + assert(cardinality(bitset, 193) === 11) + assert(cardinality(bitset, 298989839) === 11) + } +} diff --git a/cluster/src/test/scala/org/apache/spark/sql/store/BugTest.scala b/cluster/src/test/scala/org/apache/spark/sql/store/BugTest.scala new file mode 100644 index 0000000000..b5051032c5 --- /dev/null +++ b/cluster/src/test/scala/org/apache/spark/sql/store/BugTest.scala @@ -0,0 +1,1066 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ +package org.apache.spark.sql.store + +import java.io.{BufferedReader, FileReader} +import java.lang +import java.sql.{Connection, DriverManager, SQLException, Statement} +import java.util.Properties + +import com.pivotal.gemfirexd.TestUtil +import io.snappydata.SnappyFunSuite.resultSetToDataset +import io.snappydata.{Property, SnappyFunSuite} +import org.junit.Assert._ +import org.scalatest.BeforeAndAfterAll + +import org.apache.spark.scheduler.{SparkListener, SparkListenerTaskEnd} +import org.apache.spark.sql.catalog.Column +import org.apache.spark.sql.collection.Utils +import org.apache.spark.sql.types.{StringType, StructField, StructType} +import org.apache.spark.sql.{Row, SaveMode, SparkSession} + +class BugTest extends SnappyFunSuite with BeforeAndAfterAll { + + override def beforeAll(): Unit = { + super.beforeAll() + } + + override def afterAll(): Unit = { + super.afterAll() + } + + test("SNAP-2342 nested query involving joins & union throws Exception") { + snc.sql(s"create table tab1 ( " + + "field1 string," + + "field2 string," + + "field3 string," + + "field4 string," + + "field5 string," + + "field6 string," + + "field7 string," + + "field8 string," + + "field9 string," + + "field10 string," + + "field11 string," + + "field12 string," + + "field13 string," + + "field14 string," + + "field15 string," + + "field16 string," + + "field17 string," + + "localfield8 string," + + "localfield9 string," + + "localfield10 string," + + "localfield11 string," + + "localfield15 string," + + "localfield16 string," + + "field18 string," + + "field19 string," + + "sfield13 string," + + "field20 string," + + "field21 string," + + "field22 string," + + "field23 string )") + + snc.sql("create table tab2 (" + + "field24 string," + + "field9 string," + + "field25 string," + + "field10 string," + + "field11 string," + + "field12 string," + + "field13 string," + + "field14 string," + + "field15 string," + + "field16 string," + + "field17 string," + + "localfield9 string," + + "localfield10 string," + + "localfield11 string," + + "localfield15 string," + + "localfield16 string," + + "field18 string," + + "localfield23 string," + + "field19 string," + + "sfield13 string," + + "field26 string," + + "field20 string," + + "field23 string)") + + snc.sql("create table tab3 (" + + "field27 string, " + + " field27description string, " + + " field28 string )") + + snc.sql("create table tab4 (" + + " field29 string," + + "field29description string," + + " field27 string, " + + " field27description string)") + + snc.sql("create table tab5 (" + + "field27 string," + + "field27description string," + + "field30 string," + + "field30description string)") + + snc.sql("create table tab6 (" + + "field1 string," + + "field27 string," + + "field27description string," + + "field28 string," + + "field31 string," + + "field32 string," + + "field32description string)") + + snc.sql(s"create or replace view view1 as " + + s"( select field33,field27,first(field27Description) as field27Description, " + + s"first(field29) as field29, " + + s"first(field29Description) as field29Description, first(field30) as field30, " + + s"first(field30Description) as field30Description, " + + s"first(field28) as field28," + + s"format_number(sum(field34),2) as field34," + + s" format_number(sum(field34),2) as field35,format_number(sum(total),2) as total from" + + s" ( select a.field14 as field33,a.field17 as leLocal," + + s" a.field19 as field36," + + s" a.field20 as field37,a.field11 as field27," + + s"a.localfield11 as field32," + + s" SUM(field12) as field35,SUM(sfield13) as field34,SUM(field13) as total," + + s" first(b.field27Description) as field27Description," + + s" first(b.field28) as field28," + + s" first((case when a.field20='x1' then e.field32Description " + + s" when a.field20='b1' then b.field27Description else '' end)) " + + s" as field32Description ," + + s" first(c.field29Description) as field29Description," + + s"first(d.field30Description) as field30Description, " + + s" first(c.field29) as field29, first(d.field30) as field30 from ( select field16," + + s"field14," + + s" field17,field19,field9,field20,localfield11," + + s" field11,last(localfield10),SUM(field13) as field13," + + s" SUM(field12) as field12,SUM(sfield13) as sfield13, field11 ," + + s" 'Y1' as field1,localfield11 as field32 from " + + s" ( select field16,field14,field17,field19,field9,field20," + + s" localfield11,field11,localfield10,field13,field12,sfield13" + + s" from tab1 where field16='0L' and field14='7600' " + + s" AND field9='2017' and field8<=3 AND field20='b1' union all" + + s" select field16,field14,field17,field19,field9,field20," + + s" localfield11,field11,localfield10,field13,field12," + + s" sfield13 from tab2 where field16='0L' and field14='7600'" + + s" AND field9='2017' AND field20='btb_latam' ) group by field16," + + s" field14,field17,field19,field9,field20," + + s" localfield11,field11 ) a" + + s" left join tab3 b on (a.field11=b.field27) left join " + + s" tab4 c " + + s" on (a.field11=c.field27) left join tab5 d on (a.field11=d.field27)" + + s" left join tab6 e on(a.field1=e.field1 and " + + s" a.field11 = e.field27 and a.field32 = e.field32 ) group by a.field14," + + s"a.field17," + + s" a.field19,a.field20,a.field11,a.localfield11," + + s"c.field29,d.field30) group by field33,field27)") + + snc.sql("drop view view1") + snc.sql("drop table if exists tab1") + snc.sql("drop table if exists tab2") + snc.sql("drop table if exists tab3") + snc.sql("drop table if exists tab4") + snc.sql("drop table if exists tab5") + snc.sql("drop table if exists tab6") + + } +///////// + test("Bug SNAP-2332 . ParamLiteral found in First/Last Aggregate Function") { + snc + var serverHostPort2 = TestUtil.startNetServer() + var conn = DriverManager.getConnection(s"jdbc:snappydata://$serverHostPort2") + var stmt = conn.createStatement() + val snappy = snc.snappySession + + val insertDF = snappy.range(50).selectExpr("id", "(id * 12) as k", + "concat('val', cast(100 as string)) as s") + + snappy.sql("drop table if exists test") + snappy.sql("create table test (id bigint, k bigint, s varchar(10)) " + + "using column options(buckets '8')") + insertDF.write.insertInto("test") + val query1 = "select sum(id) as summ, first(s, true) as firstt, last(s, true) as lastt" + + " from test having (first(s, true) = 'val100' or last(s, true) = 'val100' )" + + + var ps = conn.prepareStatement(query1) + var resultset = ps.executeQuery() + while (resultset.next()) { + resultset.getDouble(1) + } + + var rs = snappy.sql(query1) + rs.collect() + rs = snappy.sql(query1) + rs.collect() + + resultset = stmt.executeQuery(query1) + while (resultset.next()) { + resultset.getDouble(1) + } + + val query2 = "select sum(id) summ , first(s) firstt, last(s) lastt " + + " from test having (first(s) = 'val100' or last(s) = 'val100' )" + + ps = conn.prepareStatement(query2) + resultset = ps.executeQuery() + while (resultset.next()) { + resultset.getDouble(1) + } + + resultset = stmt.executeQuery(query2) + while (resultset.next()) { + resultset.getDouble(1) + } + + rs = snappy.sql(query2) + rs.collect() + + + stmt.execute(s"create or replace view X as ($query2)") + val query3 = "select * from X where summ > 0" + ps = conn.prepareStatement(query3) + resultset = ps.executeQuery() + while (resultset.next()) { + resultset.getDouble(1) + } + + rs = snappy.sql(query3) + rs.collect() + rs = snappy.sql(query3) + rs.collect() + resultset = stmt.executeQuery(query3) + while (resultset.next()) { + resultset.getDouble(1) + } + + val table1 = s"create table tab1 ( " + + "field1 string," + + "field2 string," + + "field3 string," + + "field4 string," + + "field5 string," + + "field6 string," + + "field7 string," + + "field8 string," + + "field9 string," + + "field10 string," + + "field11 string," + + "field12 string," + + "field13 string," + + "field14 string," + + "field15 string," + + "field16 string," + + "field17 string," + + "localfield8 string," + + "localfield9 string," + + "localfield10 string," + + "localfield11 string," + + "localfield15 string," + + "localfield16 string," + + "field18 string," + + "field19 string," + + "sfield13 string," + + "field20 string," + + "field21 string," + + "field22 string," + + "field23 string )" + + val table2 = "create table tab7 (" + + "globalfield16 string," + + "globalfield16description string," + + "localfield16 string, " + + "localfield16description string," + + "field20 string)" + + val table3 = "create table field29_hier (" + + "field38 string," + + "field39 string," + + "field40 string," + + "field41 string," + + "field42 string," + + "field43 string," + + "subfield38 string," + + "subfield39 string," + + "field44 string," + + "field45 string," + + "field46 string)" + + stmt.execute(table1) + stmt.execute(table2) + stmt.execute(table3) + + val view = "CREATE or replace view view2 as (SELECT " + + "A.field9,first(A.field8) as field8,A.field14, A.localfield16," + + "A.field20, A.field11,A.field3, A.field19, " + + " first(A.field15) as field15," + + " first(A.field23) as field23, first(A.field10) as field10," + + "SUM(A.field13 ) as field13, " + + "first(B.globalfield16Description) as globalfield16Description," + + " first(C.field44) as field44," + + " first(C.field38) as field38, " + + "first(C.field45) as field45," + + " first(C.subfield38) as subfield38, " + + "first(C.field40) as field40" + + " FROM tab1 A LEFT JOIN tab7 B " + + " ON A.field20 = B.field20 AND " + + " B.globalfield16 = A.localfield16 LEFT JOIN field29_hier C ON " + + " A.field3 = field42 WHERE A.localfield16 ='0L' " + + " GROUP BY A.field14, A.field19, " + + "A.field9, A.field20, A.field11, A.field3, A.localfield16 " + + "having ( SUM(A.field13 ) > 0.001F or SUM(A.field13 ) < -0.001F) );" + + stmt.execute(view) + + val q = "SELECT field14 FROM view2 GROUP BY 1" + ps = conn.prepareStatement(q) + + resultset = ps.executeQuery() + while (resultset.next()) { + resultset.getString(1) + } + stmt.execute("drop view view2") + snc.sql("drop table if exists tab1") + snc.sql("drop table if exists tab7") + snc.sql("drop table if exists field29_hier") + conn.close() + TestUtil.stopNetServer() + + } + + test("big view") { + val snc = this.snc + val serverHostPort2 = TestUtil.startNetServer() + val conn = DriverManager.getConnection(s"jdbc:snappydata://$serverHostPort2") + val session = this.snc.snappySession + + // check temporary view with USING and its meta-data + val hfile: String = getClass.getResource("/2015.parquet").getPath + val stagingDF = snc.read.load(hfile) + snc.createTable("airline", "column", stagingDF.schema, + Map.empty[String, String]) + + // create a big view on it + val viewFile = getClass.getResource("/bigviewcase.sql") + val br = new BufferedReader(new FileReader(viewFile.getFile)) + var viewSql = "" + var keepGoing = true + while(keepGoing) { + val x = br.readLine() + if (x != null) { + viewSql += x + } else { + keepGoing = false + } + } + val viewname = "AIRLINEBOGUSVIEW" + + // check catalog cache is cleared for VIEWs + val cstmt = conn.prepareCall(s"call SYS.GET_COLUMN_TABLE_SCHEMA(?, ?, ?)") + cstmt.setString(1, "APP") + cstmt.setString(2, viewname) + cstmt.registerOutParameter(3, java.sql.Types.CLOB) + try { + cstmt.execute() + assert(cstmt.getString(3) === "") + fail("expected to fail") + } catch { + case se: SQLException if se.getSQLState == "XIE0M" => + } + + // create view + session.sql(viewSql) + + // meta-data lookup should not fail now + cstmt.execute() + assert(cstmt.getString(3).contains( + "CASE WHEN (yeari > 0) THEN CAST(1 AS DECIMAL(11,1)) ELSE CAST(1.1 AS DECIMAL(11,1)) END")) + + // query on view + session.sql(s"select count(*) from $viewname").collect() + // check column names + val rs = conn.getMetaData.getColumns(null, null, viewname, "%") + var foundValidColumnName = false + while(rs.next() && !foundValidColumnName) { + val colName = rs.getString("COLUMN_NAME") + if (colName == "yeari") { + foundValidColumnName = true + } + } + assert(foundValidColumnName) + + snc.sql(s"drop view $viewname") + snc.sql("drop table airline") + conn.close() + TestUtil.stopNetServer() + } + + test("Column table creation test - SNAP-2577") { + snc + var serverHostPort2 = TestUtil.startNetServer() + var conn = DriverManager.getConnection(s"jdbc:snappydata://$serverHostPort2") + var stmt = conn.createStatement() + val session = this.snc.snappySession + stmt.execute(s"CREATE TABLE temp (username String, id Int) " + + s" USING column ") + val seq = Seq("USERX" -> 4, "USERX" -> 5, "USERX" -> 6, "USERY" -> 7, + "USERY" -> 8, "USERY" -> 9) + val rdd = sc.parallelize(seq) + + val dataDF = session.createDataFrame(rdd) + + dataDF.write.insertInto("temp") + snc.sql("drop table temp") + conn.close() + TestUtil.stopNetServer() + } + + test("Bug SNAP-2758 . view containing aggregate function & join throws error") { + snc + var serverHostPort2 = TestUtil.startNetServer() + var conn = DriverManager.getConnection(s"jdbc:snappydata://$serverHostPort2") + var stmt = conn.createStatement() + val snappy = snc.snappySession + snappy.sql("drop table if exists test1") + snappy.sql("create table test1 (col1_1 int, col1_2 int, col1_3 int, col1_4 string) " + + "using column ") + + snappy.sql("create table test2 (col2_1 int, col2_2 int, col2_3 int, col2_5 string) " + + "using column ") + + snappy.sql(" CREATE OR REPLACE VIEW v1 as select col2_1, col2_2, " + + "col2_5 as longtext from test2 where col2_3 > 10") + + val q1 = "select a.col1_1, a.col1_2, " + + " CASE WHEN a.col1_4 = '' THEN '#' ELSE a.col1_4 END functionalAreaCode," + + "b.longtext as name, " + + " sum(a.col1_3)" + + "from test1 a left outer join v1 as b on a.col1_1 = b.col2_1" + + " group by a.col1_1, a.col1_2, " + + " CASE WHEN a.col1_4 = '' THEN '#' ELSE a.col1_4 END," + + " b.longtext " + snappy.sql(q1) + snappy.sql(s" CREATE OR REPLACE VIEW v3 as $q1") + + val q = "select a.col1_1, a.col1_2, " + + " CASE WHEN a.col1_4 = '' THEN '#' ELSE a.col1_4 END functionalAreaCode," + + "'#' as fsid, " + + "b.longtext as name, " + + " sum(a.col1_3)" + + "from test1 a left outer join v1 as b on a.col1_1 = b.col2_1" + + " group by a.col1_1, a.col1_2, " + + " CASE WHEN a.col1_4 = '' THEN '#' ELSE a.col1_4 END," + + " '#'," + + " b.longtext " + snappy.sql(q) + snappy.sql(s" CREATE OR REPLACE VIEW v2 as $q") + snappy.sql("select count(*) from v2").collect() + + stmt.execute("drop view v3") + stmt.execute("drop view v2") + stmt.execute("drop view v1") + snc.sql("drop table if exists test1") + snc.sql("drop table if exists test2") + + conn.close() + TestUtil.stopNetServer() + + } + + test("Bug SNAP-2887") { + snc + var serverHostPort2 = TestUtil.startNetServer() + var conn = DriverManager.getConnection(s"jdbc:snappydata://$serverHostPort2") + var stmt = conn.createStatement() + val snappy = snc.snappySession + snappy.sql("drop table if exists portfolio") + snappy.sql(s"create table portfolio (cid int not null, sid int not null, " + + s"qty int not null,availQty int not null, subTotal int, tid int, " + + s"constraint portf_pk primary key (cid, sid))") + + val insertStr = s"insert into portfolio values (?, ?, ?, ?, ? , ?)" + val ps = conn.prepareStatement(insertStr) + for (i <- 1 until 101) { + ps.setInt(1, i % 10) + ps.setInt(2, i * 10) + ps.setInt(3, i) + ps.setInt(4, i) + ps.setInt(5, i) + ps.setInt(6, 10) + ps.executeUpdate() + } + val query = s"select * from portfolio where cid = ? and Sid = ? and tid = ?" + val qps = conn.prepareStatement(query) + for (i <- 0 until 11) { + qps.setInt(1, 8) + qps.setInt(2, 20) + qps.setInt(3, 10) + val rs = qps.executeQuery() + var count = 0 + while (rs.next()) { + count += 1 + } + assert(count == 0) + } + snappy.sql(s"create index portfolio_sid on portfolio (sId )") + + for (i <- 0 until 11) { + qps.setInt(1, 8) + qps.setInt(2, 20) + qps.setInt(3, 10) + val rs = qps.executeQuery() + var count = 0 + while (rs.next()) { + + count += 1 + } + assert(count == 0) + } + stmt.execute("drop index if exists portfolio_sid") + stmt.execute("drop table if exists portfolio") + } + + test("Bug SNAP-2890") { + snc + var serverHostPort2 = TestUtil.startNetServer() + var conn = DriverManager.getConnection(s"jdbc:snappydata://$serverHostPort2") + var stmt = conn.createStatement() + val snappy = snc.snappySession + val numCols = 132 + snappy.conf.set(Property.ColumnBatchSize.name, "256") + snappy.sql("drop table if exists test1") + val sb = new StringBuilder + for(i <- 1 until numCols + 1) { + sb.append(s"col$i string,") + } + sb.deleteCharAt(sb.length -1) + + snappy.sql(s"create table test1 (${sb.toString()}) " + + "using column ") + val params = Array.fill(numCols)('?').mkString(",") + val insertStr = s"insert into test1 values (${params})" + val ps = conn.prepareStatement(insertStr) + for (i <- 0 until 1000) { + for (j <- 1 until numCols + 1) { + ps.setString(j, j.toString) + } + ps.addBatch() + } + ps.executeBatch() + snappy.sql(s"create table test2 using column as ( select * from test1)") + for(i <- 1 until numCols + 1) { + snappy.sql(s"select col${i} from test1").collect().foreach(r => + assert(r.getString(0).toInt == i) ) + } + for(i <- 1 until (numCols + 1)/2) { + snappy.sql(s"select col${i}, col${numCols - i + 1} from test1").collect().foreach(r => + { + assert(r.getString(0).toInt == i) + assert(r.getString(1).toInt == numCols -i + 1) + } ) + } + for(i <- 1 until numCols + 1) { + val projSeq = for (j <- 1 until i + 1) yield { + s"col${j}" + } + val projectionStr = projSeq.mkString(",") + + snappy.sql(s"select $projectionStr from test1 limit 10").collect().foreach(r => + for (j <- 1 until i + 1 ) { + assert(r.getString(j -1).toInt == j) + }) + } + snappy.sql("select col126 from test2").collect() + snappy.sql("select col128 from test2").collect() + snappy.sql("select col127 from test2").collect() + snappy.sql("select col130 from test2").collect() + snappy.sql("select col129 from test2").collect() + snc.sql("drop table if exists test1") + snc.sql("drop table if exists test2") + conn.close() + TestUtil.stopNetServer() + } + + test("SNAP-2718") { + snc + val path1 = getClass.getResource("/patients1000.csv").getPath + val df1 = snc.read.format("csv").option("header", "true").load(path1) + df1.registerTempTable("patients") + val path2 = getClass.getResource("/careplans1000.csv").getPath + val df2 = snc.read.format("csv").option("header", "true").load(path2) + df2.registerTempTable("careplans") + + snc.sql("select p.first, p.last from (select patient from ( select *, " + + "case when description in ('Anti-suicide psychotherapy', 'Psychiatry care plan', " + + "'Major depressive disorder clinical management plan') then 1 else 0 end as coverage " + + "from careplans )c group by patient having sum(coverage) = 0)q " + + "join patients p on id = patient ").collect + + df1.createOrReplaceTempView("patients_v") + df2.createOrReplaceTempView("careplans_v") + + snc.sql("select p.first, p.last from (select patient from ( select *, " + + "case when description in ('Anti-suicide psychotherapy', 'Psychiatry care plan', " + + "'Major depressive disorder clinical management plan') then 1 else 0 end as coverage " + + "from careplans_v )c group by patient having sum(coverage) = 0)q " + + "join patients_v p on id = patient ").collect + + snc.dropTempTable("patients") + snc.dropTempTable("careplans") + snc.sql("drop view patients_v") + snc.sql("drop view careplans_v") + } + + test("SNAP-2368") { + snc + try { + var serverHostPort2 = TestUtil.startNetServer() + var conn = DriverManager.getConnection(s"jdbc:snappydata://$serverHostPort2") + val schema = StructType(List(StructField("name", StringType, nullable = true))) + val data = Seq( + Row("abc"), + Row("def") + ) + val stmt = conn.createStatement() + val sparkSession = SparkSession.builder.appName("test"). + sparkContext(snc.sparkContext).getOrCreate() + val namesDF = sparkSession.createDataFrame(snc.sparkContext.parallelize(data), schema) + namesDF.createOrReplaceTempView("names") + sparkSession.table("names"). + write.mode(SaveMode.Overwrite).jdbc( + s"jdbc:snappydata://$serverHostPort2/", "names", new Properties()) + var rs = stmt.executeQuery("select tabletype from sys.systables where tablename = 'NAMES'") + rs.next() + var tableType = rs.getString(1) + assertEquals("T", tableType) + stmt.execute("drop table names") + rs = stmt.executeQuery("select tabletype from sys.systables where tablename = 'NAMES'") + assertFalse(rs.next()) + val props = new Properties() + props.put("createTableOptions", " using column options( buckets '13')") + props.put("isolationLevel", "NONE") + sparkSession.table("names"). + write.mode(SaveMode.Overwrite).jdbc( + s"jdbc:snappydata://$serverHostPort2/", "names", props) + + rs = stmt.executeQuery("select tabletype from sys.systables where tablename = 'NAMES'") + rs.next() + tableType = rs.getString(1) + assertEquals("C", tableType) + stmt.execute("drop table if exists test") + } finally { + TestUtil.stopNetServer + } + } + + ignore("SNAP-2910") { + snc + try { + var serverHostPort2 = TestUtil.startNetServer() + var conn = DriverManager.getConnection(s"jdbc:snappydata://$serverHostPort2") + val schema = StructType(List(StructField("name", StringType, nullable = true))) + val data = Seq( + Row("abc"), + Row("def") + ) + + val stmt = conn.createStatement() + + val sparkSession = SparkSession.builder.appName("test"). + sparkContext(snc.sparkContext).getOrCreate() + val namesDF = sparkSession.createDataFrame(snc.sparkContext.parallelize(data), schema) + namesDF.createOrReplaceTempView("names") + + val props = new Properties() + props.put("createTableOptions", " using column options( buckets '13')") + sparkSession.table("names"). + write.mode(SaveMode.Overwrite).jdbc( + s"jdbc:snappydata://$serverHostPort2/", "names", props) + + val rs = stmt.executeQuery("select tabletype from sys.systables where tablename = 'NAMES'") + rs.next() + val tableType = rs.getString(1) + assertEquals("C", tableType) + stmt.execute("drop table if exists test") + } finally { + TestUtil.stopNetServer + } + } + + test("SNAP-2237") { + snc + snc.sql("drop table if exists test1") + snc.sql("create table test1 (col1_1 int, col1_2 int, col1_3 int, col1_4 string) " + + "using column ") + val insertDF = snc.range(50).selectExpr("id", "id*2", "id * 3", + "cast (id as string)") + insertDF.write.insertInto("test1") + snc.sql("select col1_2, sum(col1_1) as summ from test1 group by col1_2 " + + "order by sum(col1_1)").collect + snc.sql("select col1_2, sum(col1_1) as summ from test1 " + + "group by col1_2 order by summ").collect + snc.sql("select lower(col1_2) as x, " + + "sum(col1_1) as summ from test1 group by lower(col1_2) ").collect + snc.sql("select lower(col1_2) as x, sum(col1_1) as summ from test1 " + + "group by x").collect + snc.dropTable("test1") + } + + test("Verify number of tasks for limit query") { + val hfile: String = getClass.getResource("/2015.parquet").getPath + snc.sql(s"CREATE EXTERNAL TABLE STAGING_AIRLINE USING parquet options(path '$hfile')") + var numTasks = 0 + snc.sparkContext.addSparkListener( new SparkListener { + override def onTaskEnd(taskEnd: SparkListenerTaskEnd): Unit = numTasks += 1 + }) + snc.sql(s"select * from STAGING_AIRLINE limit 1").collect() + // num tasks should be 1 as only 1 partition needs to be scanned to get 1 row + assert(numTasks == 1, s"numTasks should be 1. numTasks is $numTasks") + + snc.sql(s"CREATE TABLE T1 (COL1 INT) using column options ('buckets' '3')") + snc.sql(s"INSERT INTO T1 VALUES (1)") + snc.sql(s"INSERT INTO T1 VALUES (2)") + snc.sql(s"INSERT INTO T1 VALUES (3)") + snc.sql(s"INSERT INTO T1 VALUES (4)") + snc.sql(s"INSERT INTO T1 VALUES (5)") + snc.sql(s"INSERT INTO T1 VALUES (6)") + numTasks = 0 + snc.sql(s"select * from T1 limit 1").collect() + // num tasks should be 1 as only 1 partition needs to be scanned to get 1 row + assert(numTasks == 1, s"numTasks should be 1. numTasks is $numTasks") + snc.sql(s"drop table STAGING_AIRLINE") + snc.sql(s"drop table T1") + } + + test("multi-partition limit") { + val snappy = snc.snappySession + snappy.sql("create table testLimit (id long, data string, data2 string) using column " + + "options (partition_by 'id', buckets '128') as " + + "select id, 'someTestData_' || id, 'someOtherData_' || id from range(10000)") + val schema = snappy.table("testLimit").schema + val port = TestUtil.startNetserverAndReturnPort() + val conn = DriverManager.getConnection(s"jdbc:snappydata://localhost:$port") + val stmt = conn.createStatement() + val rows = Utils.resultSetToSparkInternalRows( + stmt.executeQuery("select * from testLimit limit 5000"), schema) + assert(rows.length === 5000) + val res = snappy.sql("select * from testLimit limit 10000") + val expected = snappy.sql("select * from testLimit").collect() + checkAnswer(res, expected) + val res2 = SnappyFunSuite.resultSetToDataset( + snappy, stmt)("select * from testLimit limit 10000") + checkAnswer(res2, expected) + + conn.close() + snappy.sql("drop table testLimit") + TestUtil.stopNetServer() + } + + test("support for 'default' schema without explicit quotes") { + val session = snc.snappySession + val serverHostPort = TestUtil.startNetServer() + val conn = DriverManager.getConnection( + "jdbc:snappydata://" + serverHostPort) + + session.sql("create table default.t1(id bigint primary key, name varchar(10))") + var keys = session.sessionCatalog.getKeyColumns("default.t1") + assert(keys.length === 1) + assert(keys.head.toString === new Column("id", null, "bigint", false, false, false).toString) + + // also test from JDBC + val stmt = conn.createStatement() + stmt.execute("create table default.t2(id bigint not null primary key, name varchar(10))") + keys = session.sessionCatalog.getKeyColumns("default.t2") + assert(keys.length === 1) + assert(keys.head.toString === new Column("id", null, "bigint", false, false, false).toString) + + session.sql("insert into default.t1 values (1, 'name1'), (2, 'name2')") + var res = session.sql("select * from default.t1 order by id").collect() + assert(res === Array(Row(1L, "name1"), Row(2L, "name2"))) + res = session.sql("select * from default.t1 where id = 1").collect() + assert(res === Array(Row(1L, "name1"))) + res = session.sql("select * from `DEFAULT`.t1 where id = 2").collect() + assert(res === Array(Row(2L, "name2"))) + session.sql("insert into `default`.`t1` values (3, 'name3'), (4, 'name4')") + res = session.sql("select * from `default`.`t1` order by id").collect() + assert(res === Array(Row(1L, "name1"), Row(2L, "name2"), Row(3L, "name3"), Row(4L, "name4"))) + res = session.sql("select * from default.t1 where id = 3").collect() + assert(res === Array(Row(3L, "name3"))) + res = session.sql("select * from `DEFAULT`.t1 where id = 4").collect() + assert(res === Array(Row(4L, "name4"))) + + stmt.execute("insert into default.t2 values (1, 'name1'), (2, 'name2')") + res = resultSetToDataset(session, stmt)("select * from default.t2 order by id").collect() + assert(res === Array(Row(1L, "name1"), Row(2L, "name2"))) + res = resultSetToDataset(session, stmt)("select * from default.t2 where id = 1").collect() + assert(res === Array(Row(1L, "name1"))) + res = resultSetToDataset(session, stmt)("select * from `default`.t2 where id = 2").collect() + assert(res === Array(Row(2L, "name2"))) + stmt.execute("insert into `DEFAULT`.`T2` values (3, 'name3'), (4, 'name4')") + res = resultSetToDataset(session, stmt)("select * from default.t2 order by id").collect() + assert(res === Array(Row(1L, "name1"), Row(2L, "name2"), Row(3L, "name3"), Row(4L, "name4"))) + res = resultSetToDataset(session, stmt)("select * from default.t2 where id = 3").collect() + assert(res === Array(Row(3L, "name3"))) + res = resultSetToDataset(session, stmt)("select * from `DEFAULT`.`t2` where id = 4").collect() + assert(res === Array(Row(4L, "name4"))) + + // check ALTER TABLE + session.sql("alter table default.t1 set eviction maxsize 1000") + session.sql("alter table `DEFAULT`.t2 set eviction maxsize 1000") + stmt.execute("alter table default.t1 set eviction maxsize 500") + stmt.execute("alter table \"default\".\"t2\" set eviction maxsize 500") + + stmt.close() + conn.close() + + TestUtil.stopNetServer() + } + + test("SNAP3007") { + val session = snc.snappySession + val serverHostPort = TestUtil.startNetServer() + val conn = DriverManager.getConnection( + "jdbc:snappydata://" + serverHostPort) + + // scalastyle:off println + println(s"testSNAP3007: Connected to $serverHostPort") + val stmt = conn.createStatement() + stmt.execute("CREATE TABLE app.application(application VARCHAR(64), " + + "content CLOB, active BOOLEAN, configuration CLOB)") + var ps = null + + val sql = "INSERT INTO app.application VALUES (?, ?, ?, ?)" + val pstmt1 = conn.prepareStatement(sql) + pstmt1.setString(1, "a") + pstmt1.setString(2, "b") + pstmt1.setBoolean(3, true) + pstmt1.setString(4, "c") + pstmt1.addBatch() + pstmt1.executeBatch + pstmt1.close() + + val sql2 = "DELETE FROM app.application" + val pstmt2 = conn.prepareStatement(sql2) + pstmt2.addBatch() + val rows = pstmt2.executeBatch + pstmt2.close() + + val sql3 = "select count(*) from app.application" + val rs = conn.createStatement().executeQuery(sql3) + assert(rs.next()) + assert(rs.getInt(1) == 0, "Table should not contain any data after delete statement") + rs.close() + + } + + test("SNAP-2730 - support NAN values") { + val session = snc.snappySession + val serverHostPort = TestUtil.startNetServer() + val conn = DriverManager.getConnection( + "jdbc:snappydata://" + serverHostPort) + val stmt = conn.createStatement() + val result = stmt.executeQuery("select acos(30)") + assert(result.next(), "result set should have 1 record") + assert(result.getDouble(1).isNaN, "result is not NaN value") + stmt.close() + } + + test("SNAP2765") { + val session = snc.snappySession + val serverHostPort = TestUtil.startNetServer() + val conn = DriverManager.getConnection( + "jdbc:snappydata://" + serverHostPort) + + // scalastyle:off println + println(s"testSNAP2765: Connected to $serverHostPort") + val stmt = conn.createStatement() + stmt.execute("create table t1(col1 int, col2 int, col3 int, col4 int) using row") + val ps1 = conn.prepareStatement("insert into t1(col1, col2, col3, col4) values(?, ?, ?, ?)") + ps1.setInt(1, 1) + ps1.setInt(2, 1) + ps1.setInt(3, 1) + ps1.setInt(4, 1) + ps1.execute() + + val ps2 = conn.prepareStatement("select * from t1 where col4=?") + ps2.setInt(1, 1) + val rs2 = ps2.executeQuery() + assert(rs2.next()) + assert(rs2.getInt(1) == 1) + rs2.close() + + stmt.execute("alter table t1 drop col2 restrict") + val se0 = intercept[SQLException] { + conn.prepareStatement("insert into t1(col1, col2, col3, col4) values(?, ?, ?, ?)") + } + assert(se0.getSQLState.equals("42X14")) + + val se1 = intercept[SQLException] { + conn.prepareStatement("insert into t1 values(?, ?, ?, ?)") + } + assert(se1.getSQLState.equals("42802")) + + val ps3 = conn.prepareStatement("insert into t1(col1, col3, col4) values(?, ?, ?)") + ps3.setInt(1, 1) + ps3.setInt(2, 1) + ps3.setInt(3, 1) + ps3.execute() + + val ps4 = conn.prepareStatement("select count(*) from t1 where col4=?") + ps4.setInt(1, 1) + val rs4 = ps4.executeQuery() + assert(rs4.next()) + assert(rs4.getInt(1) == 2) + rs4.close() + + val se2 = intercept[SQLException] { + conn.prepareStatement("update t1 set col2 = ?") + } + assert(se2.getSQLState.equals("42X14")) + + val se3 = intercept[SQLException] { + conn.prepareStatement("insert into t1(col1, col1, col4) values(?, ?, ?)") + } + assert(se3.getSQLState.equals("42X13")) + + val se4 = intercept[SQLException] { + conn.prepareStatement("delete from t1 where col2 = ?") + } + assert(se4.getSQLState.equals("42X04")) + + } + + + test("SNAP3082") { + val session = snc.snappySession + val serverHostPort = TestUtil.startNetServer() + val conn = DriverManager.getConnection( + "jdbc:snappydata://" + serverHostPort) + + + // scalastyle:off println + println(s"SNAP3082: Connected to $serverHostPort") + val stmt = conn.createStatement() + insertDataAndTestSNAP3082(conn, stmt, "DOUBLE") + insertDataAndTestSNAP3082(conn, stmt, "STRING") + insertDataAndTestSNAP3082(conn, stmt, "FLOAT") + insertDataAndTestSNAP3082(conn, stmt, "DECIMAL") + // scalastyle:on println + + } + + private def insertDataAndTestSNAP3082(conn: Connection, stmt: Statement, + dataTypeForSetParams: String): Unit = { + // scalastyle:off println + println(s"Setting prepared statement parameters as $dataTypeForSetParams") + stmt.execute("drop table if exists column_table") + stmt.execute("create table column_table (col1 int, col2 decimal," + + " col3 decimal(10, 5)) using column") + val ps1 = conn.prepareStatement("insert into column_table values (?, ?, ?)") + val numRows = 10 + for (i <- 0 until numRows) { + ps1.setInt(1, i) + dataTypeForSetParams match { + case "DOUBLE" => + ps1.setDouble(2, java.lang.Double.valueOf(i * 0.1)) + ps1.setDouble(3, java.lang.Double.valueOf(i * 0.1)) + case "STRING" => + ps1.setString(2, s"$i" + 0.1) + ps1.setString(3, s"$i" + 0.1) + case "FLOAT" => + ps1.setFloat(2, java.lang.Float.valueOf(new lang.Float(i*0.1))) + ps1.setFloat(3, java.lang.Float.valueOf(new lang.Float(i*0.1))) + case "DECIMAL" => + ps1.setBigDecimal(2, new java.math.BigDecimal(s"$i" + 0.1)) + ps1.setBigDecimal(3, new java.math.BigDecimal(s"$i" + 0.1)) + } + ps1.executeUpdate() + } + + println("executing prepared select statement") + var result1: Array[(java.math.BigDecimal, java.math.BigDecimal)] = new Array(numRows) + val ps2 = conn.prepareStatement("select * from column_table where col2 = ? order by col1") + for (j <- 0 until numRows) { + dataTypeForSetParams match { + case "DOUBLE" => + ps2.setDouble(1, java.lang.Double.valueOf(j * 0.1)) + case "STRING" => + ps2.setString(1, s"$j" + 0.1) + case "FLOAT" => + ps2.setFloat(1, java.lang.Float.valueOf(new lang.Float(j * 0.1))) + case "DECIMAL" => + ps2.setBigDecimal(1, new java.math.BigDecimal(s"$j" + 0.1)) + } + + val rs2 = ps2.executeQuery() + + while (rs2.next()) { + val columnValue1 = rs2.getBigDecimal(2) + val columnValue2 = rs2.getBigDecimal(3) + result1(j) = (columnValue1, columnValue2) + // debug statement +// println(s"rowNumber = $j (columnVale1, columnVale2) = ($columnValue1, $columnValue2) " + +// s" columnVale1 precision = ${columnValue1.precision()} " + +// s" columnVale1 scale = ${columnValue1.scale ()} " + +// s" columnVale2 precision = ${columnValue2.precision()} " + +// s" columnVale2 scale = ${columnValue2.scale ()}") + } + } + + println("executing unprepared select statement") + var result2: Array[(java.math.BigDecimal, java.math.BigDecimal)] = new Array(numRows) + for (j <- 0 until numRows) { + var rs3: java.sql.ResultSet = null + dataTypeForSetParams match { + case "DOUBLE" => + val v = j * 0.1 + rs3 = stmt.executeQuery(s"select * from column_table" + + s" where col2 = cast($v as double) order by col1") + case "STRING" => + val v = s"$j" + 0.1 + rs3 = stmt.executeQuery(s"select * from column_table" + + s" where col2 = cast($v as string) order by col1") + case "FLOAT" => + val v = j * 0.1 + rs3 = stmt.executeQuery(s"select * from column_table" + + s" where col2 = cast($v as float) order by col1") + case "DECIMAL" => + val v = new java.math.BigDecimal(s"$j" + 0.1) + rs3 = stmt.executeQuery(s"select * from column_table" + + s" where col2 = cast($v as decimal) order by col1") + } + while (rs3.next()) { + val columnValue1 = rs3.getBigDecimal(2) + val columnValue2 = rs3.getBigDecimal(3) + result2(j) = (columnValue1, columnValue2) + // debug statement +// println(s"rowNumber = $j (columnVale1, columnVale2) = ($columnValue1, $columnValue2) " + +// s" columnVale1 precision = ${columnValue1.precision()} " + +// s" columnVale1 scale = ${columnValue1.scale ()} " + +// s" columnVale2 precision = ${columnValue2.precision()} " + +// s" columnVale2 scale = ${columnValue2.scale ()}") + } + } + + assert(result1.sameElements(result2), + "results of prepared and unprepared statements do not match") + // scalastyle:on println + } +} diff --git a/cluster/src/test/scala/org/apache/spark/sql/store/ColumnEncodersTest.scala b/cluster/src/test/scala/org/apache/spark/sql/store/ColumnEncodersTest.scala new file mode 100644 index 0000000000..4342dbe4d1 --- /dev/null +++ b/cluster/src/test/scala/org/apache/spark/sql/store/ColumnEncodersTest.scala @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ +package org.apache.spark.sql.store + +import io.snappydata.Property + +import org.apache.spark.sql.SnappySession + +/** + * Tests for ColumnEncoder and ColumnDecoder implementations. + */ +class ColumnEncodersTest extends ColumnTablesTestBase { + + test("Type encoders/decoders test") { + val session = new SnappySession(sc) + session.conf.set(Property.ColumnBatchSize.name, "8k") + runAllTypesTest(session, numRowsLower = 10000, numRowsUpper = 20000, numIterations = 5) + } +} diff --git a/cluster/src/test/scala/org/apache/spark/sql/store/ColumnMutableTest.scala b/cluster/src/test/scala/org/apache/spark/sql/store/ColumnMutableTest.scala new file mode 100644 index 0000000000..c1bffd2402 --- /dev/null +++ b/cluster/src/test/scala/org/apache/spark/sql/store/ColumnMutableTest.scala @@ -0,0 +1,99 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ +package org.apache.spark.sql.store + +import io.snappydata.SnappyFunSuite + +import org.apache.spark.sql.AnalysisException + +/** + * Update, delete tests for column tables. + */ +class ColumnMutableTest extends SnappyFunSuite { + + def singleRowUpdates(provider: String): Unit = { + val session = snc.snappySession + + val pk = if (provider == "row") " primary key" else "" + + // check that partitioning column cannot be updated + session.sql(s"CREATE TABLE TableUpdate(CODE INT$pk, " + + s"DESCRIPTION varchar(100)) USING $provider " + + s"options (partition_by 'DESCRIPTION')") + try { + session.sql("update TableUpdate set DESCRIPTION ='No#complaints' " + + "where CODE = 5") + fail("Expected update on partitioning column to fail") + } catch { + case _: AnalysisException => // expected + } + session.sql("drop table TableUpdate") + + session.sql(s"CREATE TABLE TableUpdate(CODE INT$pk, " + + s"DESCRIPTION varchar(100)) USING $provider " + + s"options (partition_by 'CODE')") + + session.sql("insert into TableUpdate values (5,'test')") + session.sql("insert into TableUpdate values (6,'test1')") + + val df1 = session.sql("select DESCRIPTION from TableUpdate " + + "where DESCRIPTION = 'test'") + assert(df1.count() == 1) + + val d1 = session.sql("select * from TableUpdate") + assert(d1.count() == 2) + + session.sql(s"CREATE TABLE TableUpdate2 USING $provider AS " + + "(select * from TableUpdate)") + + val d2 = session.sql("select * from TableUpdate2") + assert(d2.count() == 2) + + session.sql("update TableUpdate set DESCRIPTION ='No#complaints' " + + "where CODE = 5") + + var df2 = session.sql("select DESCRIPTION from TableUpdate " + + "where DESCRIPTION = 'No#complaints' ") + assert(df2.count() == 1) + + var df3 = session.sql("select DESCRIPTION from TableUpdate " + + "where DESCRIPTION in ('No#complaints', 'test1') ") + assert(df3.count() == 2) + + session.sql("update TableUpdate2 set DESCRIPTION ='No#complaints' " + + "where CODE = 5") + + df2 = session.sql("select DESCRIPTION from TableUpdate2 " + + "where DESCRIPTION = 'No#complaints' ") + assert(df2.count() == 1) + + df3 = session.sql("select DESCRIPTION from TableUpdate2 " + + "where DESCRIPTION in ('No#complaints', 'test1') ") + assert(df3.count() == 2) + + session.dropTable("TableUpdate") + session.dropTable("TableUpdate2") + } + + test("Simple single row updates") { + singleRowUpdates("column") + } + + test("Simple single row updates (row table)") { + singleRowUpdates("row") + } +} diff --git a/cluster/src/test/scala/org/apache/spark/sql/store/ColumnTablesTestBase.scala b/cluster/src/test/scala/org/apache/spark/sql/store/ColumnTablesTestBase.scala new file mode 100644 index 0000000000..61a8dc3b9d --- /dev/null +++ b/cluster/src/test/scala/org/apache/spark/sql/store/ColumnTablesTestBase.scala @@ -0,0 +1,212 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ + +package org.apache.spark.sql.store + +import java.sql.{Date, Timestamp} + +import com.pivotal.gemfirexd.internal.engine.Misc +import com.pivotal.gemfirexd.internal.iapi.util.ReuseFactory +import com.pivotal.gemfirexd.internal.shared.common.reference.Limits +import io.snappydata.SnappyFunSuite + +import org.apache.spark.sql.SnappySession +import org.apache.spark.sql.catalyst.util.DateTimeUtils +import org.apache.spark.sql.types.Decimal +import org.apache.spark.util.random.XORShiftRandom + +/** + * Base class for common methods for column table tests. + */ +abstract class ColumnTablesTestBase extends SnappyFunSuite { + + protected def normalizeFloat(f: Float): Float = { + if (java.lang.Float.isNaN(f) || java.lang.Float.isInfinite(f)) { + 0 + } else if (f < Limits.DB2_SMALLEST_REAL) { + Limits.DB2_SMALLEST_REAL + } else if (f > Limits.DB2_LARGEST_REAL) { + Limits.DB2_LARGEST_REAL + } else if (f > 0 && f < Limits.DB2_SMALLEST_POSITIVE_REAL) { + Limits.DB2_SMALLEST_POSITIVE_REAL + } else if (f < 0 && f > Limits.DB2_LARGEST_NEGATIVE_REAL) { + Limits.DB2_LARGEST_NEGATIVE_REAL + } else f + } + + protected def normalizeDouble(d: Double): Double = { + if (java.lang.Double.isNaN(d) || java.lang.Double.isInfinite(d)) { + 0 + } else if (d < Limits.DB2_SMALLEST_DOUBLE) { + Limits.DB2_SMALLEST_DOUBLE + } else if (d > Limits.DB2_LARGEST_DOUBLE) { + Limits.DB2_LARGEST_DOUBLE + } else if (d > 0 && d < Limits.DB2_SMALLEST_POSITIVE_DOUBLE) { + Limits.DB2_SMALLEST_POSITIVE_DOUBLE + } else if (d < 0 && d > Limits.DB2_LARGEST_NEGATIVE_DOUBLE) { + Limits.DB2_LARGEST_NEGATIVE_DOUBLE + } else d + } + + protected def runAllTypesTest(session: SnappySession, + numRowsLower: Int = 32000, numRowsUpper: Int = 64760, + numIterations: Int = 3, process: IndexedSeq[AllTypes] => Unit = _ => ()): Unit = { + import session.implicits._ + + session.sql("CREATE TABLE TypesTable (Index Int not null, T1 Boolean, " + + "T2 Byte, T3 Short, T4 Int, T5 Long, T6 FLOAT, T7 Double, T8 String, " + + "T9 Decimal(10, 4), T10 Decimal(35, 15), T11 Date, T12 Timestamp, " + + "T13 Binary) USING column options (buckets '8')") + session.sql("CREATE TABLE TypesTable2 (index Int, T1 Boolean NOT NULL, " + + "T2 Byte NOT NULL, T3 Short NOT NULL, T4 Int NOT NULL, " + + "T5 Long not null, T6 FLOAT NOT NULL, T7 Double not null, " + + "T8 String NOT NULL, T9 Decimal(10, 4) NOT NULL, " + + "T10 Decimal(35, 15) NOT NULL, T11 Date not null, " + + "T12 Timestamp not null, T13 Binary not null) " + + "USING column options (buckets '8')") + session.sql("CREATE TABLE TypesTable3 (Index Int not null, T1 Boolean, " + + "T2 Integer, T3 smallint, T4 Int, T5 bigint, T6 REAL, T7 Double, T8 varchar(100), " + + "T9 Decimal(10, 4), T10 Decimal(35, 15), T11 Date, T12 Timestamp, " + + "T13 blob) USING row") + + val rnd = new XORShiftRandom + var nonZeroRowBuffer = false + var c = 1 + while (c <= numIterations || !nonZeroRowBuffer) { + assert(c <= 100, s"failed to get any data in row buffer in $c tries") + c += 1 + val numItems = rnd.nextInt(numRowsLower) + (numRowsUpper - numRowsLower) + val items = (0 until numItems).map { index => + val t1 = rnd.nextInt(3) match { + case 0 => java.lang.Boolean.FALSE + case 1 => java.lang.Boolean.TRUE + case 2 => null + } + + val t2 = rnd.nextInt(150) match { + case b if b < 128 => Byte.box(b.toByte) + case _ => null + } + + val t3 = rnd.nextInt(40000) match { + case s if s < 32768 => Short.box(s.toShort) + case _ => null + } + + val t4 = rnd.nextInt() match { + case i if i < 1500000000 => Int.box(i) + case _ => null + } + + val t5 = rnd.nextLong() match { + case l if l < 7500000000000000000L => Long.box(l) + case _ => null + } + + val t6 = if (t4 ne null) { + Float.box(normalizeFloat(java.lang.Float.intBitsToFloat(t4))) + } else null + + val t7 = if (t5 ne null) { + Double.box(normalizeDouble(java.lang.Double.longBitsToDouble(t5))) + } else null + + val t8 = if (t7 ne null) t7.toString else null + + val t9 = if ((t3 ne null) && (t2 ne null)) { + Decimal(math.abs(t3.toInt).toString + '.' + math.abs(t2.toInt).toString) + } else null + + val t10 = if ((t5 ne null) && (t4 ne null)) { + Decimal(math.abs(t5).toString + '.' + math.abs(t4).toString) + } else null + + val t11 = if (t3 ne null) DateTimeUtils.toJavaDate(t3.toInt) else null + + val t12 = if (t4 ne null) DateTimeUtils.toJavaTimestamp(t4.toLong) else null + + val t13 = if (t8 ne null) t8.getBytes else null + + AllTypes(index, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11, t12, t13) + } + + val ds = session.createDataset(items) + ds.write.insertInto("TypesTable") + ds.write.insertInto("TypesTable2") + ds.write.insertInto("TypesTable3") + + val df = session.sql("select * from TypesTable order by index") + assert(items === df.as[AllTypes].collect().toSeq) + + val df2 = session.sql("select * from TypesTable2 order by index") + ColumnTablesTestBase.hasNulls = false + try { + assert(items === df2.as[AllTypes].collect().toSeq) + } finally { + ColumnTablesTestBase.hasNulls = true + } + + val df3 = session.sql("select * from TypesTable3 order by index") + assert(items === df3.as[AllTypes].collect().toSeq) + + process(items) + + if (!nonZeroRowBuffer) { + nonZeroRowBuffer = Misc.getRegion("/APP/TYPESTABLE", true, false).size() > 0 && + Misc.getRegion("/APP/TYPESTABLE2", true, false).size() > 0 + } + + session.truncateTable("TypesTable") + session.truncateTable("TypesTable2") + session.truncateTable("TypesTable3") + } + + session.dropTable("TypesTable") + session.dropTable("TypesTable2") + session.dropTable("TypesTable3") + } +} + +object ColumnTablesTestBase { + var hasNulls = true +} + +case class AllTypes(index: Int, t1: java.lang.Boolean, t2: java.lang.Byte, + t3: java.lang.Short, t4: java.lang.Integer, t5: java.lang.Long, + t6: java.lang.Float, t7: java.lang.Double, t8: String, t9: Decimal, + t10: Decimal, t11: Date, t12: Timestamp, t13: Array[Byte]) { + + override def equals(obj: Any): Boolean = obj match { + case a: AllTypes if ColumnTablesTestBase.hasNulls => + index == a.index && t1 == a.t1 && t2 == a.t2 && t3 == a.t3 && + t4 == a.t4 && t5 == a.t5 && t6 == a.t6 && t7 == a.t7 && t8 == a.t8 && + t9 == a.t9 && t10 == a.t10 && t11 == a.t11 && t12 == a.t12 && + java.util.Arrays.equals(t13, a.t13) + case a: AllTypes => + // handle nulls on left side + val st8 = if (t8 ne null) t8 else "" + val at13 = if (t13 ne null) t13 else ReuseFactory.getZeroLenByteArray + index == a.index && ((t1 eq null) || t1 == a.t1) && + ((t2 eq null) || t2 == a.t2) && ((t3 eq null) || t3 == a.t3) && + ((t4 eq null) || t4 == a.t4) && ((t5 eq null) || t5 == a.t5) && + ((t6 eq null) || t6 == a.t6) && ((t7 eq null) || t7 == a.t7) && + (st8 == a.t8) && ((t9 eq null) || t9 == a.t9) && + ((t10 eq null) || t10 == a.t10) && ((t11 eq null) || t11 == a.t11) && + ((t12 eq null) || t12 == a.t12) && java.util.Arrays.equals(at13, a.t13) + case _ => false + } +} diff --git a/cluster/src/test/scala/org/apache/spark/sql/store/ColumnUpdateDeleteTest.scala b/cluster/src/test/scala/org/apache/spark/sql/store/ColumnUpdateDeleteTest.scala new file mode 100644 index 0000000000..27ec9a649f --- /dev/null +++ b/cluster/src/test/scala/org/apache/spark/sql/store/ColumnUpdateDeleteTest.scala @@ -0,0 +1,108 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ + +package org.apache.spark.sql.store + +import com.pivotal.gemfirexd.TestUtil +import io.snappydata.ColumnUpdateDeleteTests +import io.snappydata.cluster.PreparedQueryRoutingSingleNodeSuite + +import org.apache.spark.SparkConf +import org.apache.spark.memory.SnappyUnifiedMemoryManager +import org.apache.spark.sql.SnappySession + +/** + * Tests for updates/deletes on column table. + */ +class ColumnUpdateDeleteTest extends ColumnTablesTestBase { + + override def beforeAll(): Unit = { + super.beforeAll() + stopAll() + } + + override def afterAll(): Unit = { + super.afterAll() + stopAll() + } + + override protected def newSparkConf(addOn: SparkConf => SparkConf): SparkConf = { + val conf = new SparkConf() + conf.setIfMissing("spark.master", "local[*]") + .setAppName(getClass.getName) + conf.set("snappydata.store.critical-heap-percentage", "95") + if (SnappySession.isEnterpriseEdition) { + conf.set("snappydata.store.memory-size", "1200m") + } + conf.set("spark.memory.manager", classOf[SnappyUnifiedMemoryManager].getName) + conf.set("spark.serializer", "org.apache.spark.serializer.PooledKryoSerializer") + conf.set("spark.closure.serializer", "org.apache.spark.serializer.PooledKryoSerializer") + conf + } + + test("basic update") { + ColumnUpdateDeleteTests.testBasicUpdate(this.snc.snappySession) + } + + test("stats check after updates") { + ColumnUpdateDeleteTests.testDeltaStats(this.snc.snappySession) + } + + test("basic delete") { + ColumnUpdateDeleteTests.testBasicDelete(this.snc.snappySession) + } + + test("SNAP-1925") { + ColumnUpdateDeleteTests.testSNAP1925(this.snc.snappySession) + } + + test("SNAP-1926") { + ColumnUpdateDeleteTests.testSNAP1926(this.snc.snappySession) + } + + test("concurrent ops") { + ColumnUpdateDeleteTests.testConcurrentOps(this.snc.snappySession) + } + + test("SNAP-2124 update missed") { + ColumnUpdateDeleteTests.testSNAP2124(this.snc.snappySession) + } + + test("SNAP-1985: update delete on string type") { + val tableName1 = "order_line_1_col_str" + val tableName2 = "order_line_2_ud_str" + + snc.sql(s"create table $tableName1 (ol_1_int_id integer," + + s" ol_1_int2_id integer, ol_1_str_id STRING) using column " + + "options( partition_by 'ol_1_int2_id', buckets '2'," + + " COLUMN_BATCH_SIZE '100')") + snc.sql(s"create table $tableName2 (ol_1_int_id integer," + + s" ol_1_int2_id integer, ol_1_str_id STRING) using row " + + "options( partition_by 'ol_1_int2_id', buckets '2')") + + // println("network server started") + val serverHostPort = TestUtil.startNetServer() + PreparedQueryRoutingSingleNodeSuite.insertRows(tableName1, 1000, serverHostPort) + PreparedQueryRoutingSingleNodeSuite.insertRows(tableName2, 1000, serverHostPort) + + snc.sql(s"update $tableName2 set ol_1_str_id = '7777_a_1' where ol_1_int2_id = 500 ") + snc.sql(s"update $tableName2 set ol_1_str_id = '7777_b_2' where ol_1_int2_id = 500 ") + + snc.sql(s"update $tableName1 set ol_1_str_id = '7777_a_1' where ol_1_int2_id = 500 ") + snc.sql(s"update $tableName1 set ol_1_str_id = '7777_b_2' where ol_1_int2_id = 500 ") + } +} diff --git a/cluster/src/test/scala/org/apache/spark/sql/store/ComplexTypesTest.scala b/cluster/src/test/scala/org/apache/spark/sql/store/ComplexTypesTest.scala new file mode 100644 index 0000000000..1095620833 --- /dev/null +++ b/cluster/src/test/scala/org/apache/spark/sql/store/ComplexTypesTest.scala @@ -0,0 +1,153 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ + +package org.apache.spark.sql.store + +import java.sql.{Date, Timestamp} +import java.util.{Calendar, GregorianCalendar} + +import io.snappydata.SnappyFunSuite + +import org.apache.spark.SparkContext +import org.apache.spark.rdd.RDD +import org.apache.spark.sql.SnappySession +import org.apache.spark.sql.execution.benchmark.{ColumnCacheBenchmark, TAQTest} +import org.apache.spark.sql.types.Decimal +import org.apache.spark.unsafe.types.UTF8String +import org.apache.spark.util.random.XORShiftRandom + +class ComplexTypesTest extends SnappyFunSuite { + + test("check complex types insert/select") { + val tradeSize = 50000L + val numDays = 1 + val session = new SnappySession(sc) + + import session.implicits._ + + val tradeRDD = ComplexTypesTest.createTradeRDD(sc, tradeSize, numDays) + + session.sql("drop table if exists trade") + if (COLUMN_TABLE) { + session.sql(s"${ComplexTypesTest.sqlTrade} using column " + + s"options (buckets '8')") + } else { + session.sql(s"${ComplexTypesTest.sqlTrade} using row options " + + s"(partition_by 'sym', buckets '8', overflow 'true')") + } + + val tradeDF = session.createDataset(tradeRDD) + tradeDF.cache() + tradeDF.count() + tradeDF.write.insertInto("trade") + + val query = "select * from trade order by id" + val result = session.sql(query) + // val tDF = tradeDF.toDF().as(RowEncoder(session.table("trade").schema)) + val expected = tradeDF.toDF().sort("id").collect().toSeq + + ColumnCacheBenchmark.collect(result, expected) + + session.sql("drop table trade") + session.catalog.clearCache() + } + + private[sql] var COLUMN_TABLE = true +} + +object ComplexTypesTest { + + def createTradeRDD(sc: SparkContext, tradeSize: Long, + numDays: Int): RDD[TradeData] = { + sc.range(0, tradeSize).mapPartitions { itr => + val rnd = new XORShiftRandom + val syms = TAQTest.ALL_SYMBOLS.map(UTF8String.fromString) + val numSyms = syms.length + val exs = TAQTest.EXCHANGES.map(UTF8String.fromString) + val numExs = exs.length + var day = 0 + // month is 0 based + var cal = new GregorianCalendar(2016, 5, day + 6) + var date = new Date(cal.getTimeInMillis) + var dayCounter = 0 + itr.map { id => + val sym = syms(math.abs(rnd.nextInt() % numSyms)) + val ex = exs(math.abs(rnd.nextInt() % numExs)) + if (numDays > 1) { + dayCounter += 1 + // change date after some number of iterations + if (dayCounter == 10000) { + // change date + day = (day + 1) % numDays + cal = new GregorianCalendar(2016, 5, day + 6) + date = new Date(cal.getTimeInMillis) + dayCounter = 0 + } + } + val gid = (id % 400).toInt + // reset the timestamp every once in a while + if (gid == 0) { + cal.set(Calendar.HOUR, rnd.nextInt() & 0x07) + cal.set(Calendar.MINUTE, math.abs(rnd.nextInt() % 60)) + cal.set(Calendar.SECOND, math.abs(rnd.nextInt() % 60)) + cal.set(Calendar.MILLISECOND, math.abs(rnd.nextInt() % 1000)) + } + val time = new Timestamp(cal.getTimeInMillis + gid) + val dec = Decimal(math.abs(rnd.nextInt() % 1000000)) + val dec2 = Decimal(math.abs(rnd.nextInt() % 1000000000)) + val dec3 = Decimal(math.abs(rnd.nextInt() % 100000000)) + val c1 = Array(id, id + 1, id + 2, id + 3) + val c2 = Array(sym, ex, sym) + val c3 = Map(sym -> dec2, ex -> dec2) + val tradeB = TradeB(dec3, c2) + val c4 = Map(sym -> tradeB, ex -> tradeB) + val idInt = id.toInt + val c5 = TradeC(sym, gid, dec, Map(sym -> idInt, ex -> idInt)) + TradeData(id, sym, ex, dec, time, date, rnd.nextDouble() * 1000, + c1, c2, c3, c4, c5) + } + } + } + + val sqlTrade: String = + s""" + |CREATE TABLE trade ( + | id BIGINT NOT NULL, + | sym CHAR(4) NOT NULL, + | ex VARCHAR(64) NOT NULL, + | price DECIMAL(10,4) NOT NULL, + | time TIMESTAMP NOT NULL, + | date DATE NOT NULL, + | size DOUBLE NOT NULL, + | c1 ARRAY, + | c2 ARRAY NOT NULL, + | c3 MAP NOT NULL, + | c4 MAP>>, + | c5 STRUCT> + |) + """.stripMargin +} + +case class TradeB(dec: Decimal, syms: Array[UTF8String]) + +case class TradeC(sym: UTF8String, gid: Int, dec: Decimal, + map: Map[UTF8String, Int]) + +case class TradeData(id: Long, sym: UTF8String, ex: UTF8String, price: Decimal, + time: Timestamp, date: Date, size: Double, c1: Array[Long], + c2: Array[UTF8String], c3: Map[UTF8String, Decimal], + c4: Map[UTF8String, TradeB], c5: TradeC) diff --git a/cluster/src/test/scala/org/apache/spark/sql/store/RowMutableTest.scala b/cluster/src/test/scala/org/apache/spark/sql/store/RowMutableTest.scala new file mode 100644 index 0000000000..382a59d22b --- /dev/null +++ b/cluster/src/test/scala/org/apache/spark/sql/store/RowMutableTest.scala @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ +package org.apache.spark.sql.store + +import io.snappydata.SnappyFunSuite + +/** + * Update, delete tests for row tables. + */ +class RowMutableTest extends SnappyFunSuite { + + test("Simple key updates") { + val session = snc.snappySession + + session.sql("CREATE TABLE RowTableUpdate(CODE INT, " + + "DESCRIPTION varchar(100)) USING row") + + session.sql("insert into RowTableUpdate values (5,'test')") + session.sql("insert into RowTableUpdate values (6,'test1')") + + val df1 = session.sql("select DESCRIPTION from RowTableUpdate " + + "where DESCRIPTION = 'test'") + assert(df1.count() == 1) + + val d1 = session.sql("select * from RowTableUpdate") + assert(d1.count() == 2) + + session.sql("CREATE TABLE RowTableUpdate2 (CODE INT PRIMARY KEY, " + + "DESCRIPTION varchar(100)) USING row AS (select * from RowTableUpdate)") + + val d2 = session.sql("select * from RowTableUpdate2") + assert(d2.count() == 2) + + session.sql("update RowTableUpdate2 set DESCRIPTION ='No#complaints' " + + "where CODE = 5") + + val df2 = session.sql("select DESCRIPTION from RowTableUpdate2 " + + "where DESCRIPTION = 'No#complaints' ") + assert(df2.count() == 1) + + val df3 = session.sql("select DESCRIPTION from RowTableUpdate2 " + + "where DESCRIPTION in ('No#complaints', 'test1') ") + assert(df3.count() == 2) + + session.dropTable("RowTableUpdate") + session.dropTable("RowTableUpdate2") + } +} diff --git a/cluster/src/test/scala/org/apache/spark/sql/store/SQLMetadataTest.scala b/cluster/src/test/scala/org/apache/spark/sql/store/SQLMetadataTest.scala new file mode 100644 index 0000000000..8fac8798a9 --- /dev/null +++ b/cluster/src/test/scala/org/apache/spark/sql/store/SQLMetadataTest.scala @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ +package org.apache.spark.sql.store + +import java.sql.DriverManager + +import com.pivotal.gemfirexd.TestUtil +import io.snappydata.Property.PlanCaching +import io.snappydata.SnappyFunSuite + +/** + * Same as core [[MetadataTest]] but using JDBC connection. + */ +class SQLMetadataTest extends SnappyFunSuite { + + private var netPort = 0 + + override def beforeAll(): Unit = { + super.beforeAll() + assert(this.snc !== null) + // start a local network server + netPort = TestUtil.startNetserverAndReturnPort() + } + + override def afterAll(): Unit = { + super.afterAll() + TestUtil.stopNetServer() + } + + test("SYS tables/VTIs") { + val session = this.snc.snappySession + val conn = DriverManager.getConnection(s"jdbc:snappydata://localhost:$netPort") + try { + val stmt = conn.createStatement() + MetadataTest.testSYSTablesAndVTIs(SnappyFunSuite.resultSetToDataset(session, stmt), + netServers = Seq(s"localhost/127.0.0.1[$netPort]")) + stmt.close() + } finally { + conn.close() + } + } + + test("DESCRIBE, SHOW and EXPLAIN") { + val session = this.snc.snappySession + val conn = DriverManager.getConnection(s"jdbc:snappydata://localhost:$netPort") + try { + val stmt = conn.createStatement() + MetadataTest.testDescribeShowAndExplain(SnappyFunSuite.resultSetToDataset(session, stmt), + usingJDBC = true, PlanCaching.get(session.sessionState.conf)) + stmt.close() + } finally { + conn.close() + } + } + + test("DSID joins with SYS tables") { + val session = this.snc.snappySession + val conn = DriverManager.getConnection(s"jdbc:snappydata://localhost:$netPort") + try { + val stmt = conn.createStatement() + MetadataTest.testDSIDWithSYSTables(SnappyFunSuite.resultSetToDataset(session, stmt), + Seq(s"localhost/127.0.0.1[$netPort]")) + stmt.close() + } finally { + conn.close() + } + } +} diff --git a/cluster/src/test/scala/org/apache/spark/sql/store/SecurityBugTest.scala b/cluster/src/test/scala/org/apache/spark/sql/store/SecurityBugTest.scala new file mode 100644 index 0000000000..916366b114 --- /dev/null +++ b/cluster/src/test/scala/org/apache/spark/sql/store/SecurityBugTest.scala @@ -0,0 +1,149 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ +package org.apache.spark.sql.store + +import java.sql.{Connection, DriverManager} +import java.util.Properties + +import com.pivotal.gemfirexd.{Attribute, TestUtil} +import com.pivotal.gemfirexd.security.{LdapTestServer, SecurityTestUtils} +import io.snappydata.util.TestUtils +import io.snappydata.{Constant, PlanTest, Property, SnappyFunSuite} +import org.scalatest.BeforeAndAfterAll +import org.junit.Assert.{assertEquals, assertFalse, assertTrue} + +import org.apache.spark.SparkConf + +class SecurityBugTest extends SnappyFunSuite with BeforeAndAfterAll { + private val sysUser = "gemfire10" + var serverHostPort: String = _ + override def beforeAll(): Unit = { + this.stopAll() + super.beforeAll() + snc + serverHostPort = TestUtil.startNetServer() + } + + protected override def newSparkConf(addOn: (SparkConf) => SparkConf): SparkConf = { + val ldapProperties = SecurityTestUtils.startLdapServerAndGetBootProperties(0, 0, sysUser, + getClass.getResource("/auth.ldif").getPath) + import com.pivotal.gemfirexd.Property.{AUTH_LDAP_SERVER, AUTH_LDAP_SEARCH_BASE} + for (k <- List(Attribute.AUTH_PROVIDER, AUTH_LDAP_SERVER, AUTH_LDAP_SEARCH_BASE)) { + System.setProperty(k, ldapProperties.getProperty(k)) + } + System.setProperty(Constant.STORE_PROPERTY_PREFIX + Attribute.USERNAME_ATTR, sysUser) + System.setProperty(Constant.STORE_PROPERTY_PREFIX + Attribute.PASSWORD_ATTR, sysUser) + val conf = new org.apache.spark.SparkConf() + .setAppName("BugTest") + .setMaster("local[3]") + .set(Attribute.AUTH_PROVIDER, ldapProperties.getProperty(Attribute.AUTH_PROVIDER)) + .set(Constant.STORE_PROPERTY_PREFIX + Attribute.USERNAME_ATTR, sysUser) + .set(Constant.STORE_PROPERTY_PREFIX + Attribute.PASSWORD_ATTR, sysUser) + + if (addOn != null) { + addOn(conf) + } else { + conf + } + } + + override def afterAll(): Unit = { + this.stopAll() + TestUtil.stopNetServer() + val ldapServer = LdapTestServer.getInstance() + if (ldapServer.isServerStarted) { + ldapServer.stopService() + } + import com.pivotal.gemfirexd.Property.{AUTH_LDAP_SERVER, AUTH_LDAP_SEARCH_BASE} + for (k <- List(Attribute.AUTH_PROVIDER, AUTH_LDAP_SERVER, AUTH_LDAP_SEARCH_BASE)) { + System.clearProperty(k) + System.clearProperty("gemfirexd." + k) + System.clearProperty(Constant.STORE_PROPERTY_PREFIX + k) + } + System.clearProperty(Constant.STORE_PROPERTY_PREFIX + Attribute.USERNAME_ATTR) + System.clearProperty(Constant.STORE_PROPERTY_PREFIX + Attribute.PASSWORD_ATTR) + System.setProperty("gemfirexd.authentication.required", "false") + } + + test("Bug SNAP-2255 connection pool exhaustion") { + val user1 = "gemfire1" + val user2 = "gemfire2" + + val snc1 = snc.newSession() + snc1.snappySession.conf.set(Attribute.USERNAME_ATTR, user1) + snc1.snappySession.conf.set(Attribute.PASSWORD_ATTR, user1) + + snc1.sql(s"create table test (id integer," + + s" name STRING) using column") + snc1.sql("insert into test values (1, 'name1')") + snc1.sql(s"GRANT select ON TABLE test TO $user2") + + // TODO : Use the actual connection pool limit + val limit = 500 + + for (i <- 1 to limit) { + val snc2 = snc.newSession() + snc2.snappySession.conf.set(Attribute.USERNAME_ATTR, user2) + snc2.snappySession.conf.set(Attribute.PASSWORD_ATTR, user2) + + + val rs = snc2.sql(s"select * from $user1.test").collect() + assertEquals(1, rs.length) + } + } + + test("Bug SNAP-2827 admin is unable to drop schema") { + val user1 = "gemfire1" + val adminSnc = snc.newSession() + adminSnc.snappySession.conf.set(Attribute.USERNAME_ATTR, sysUser) + adminSnc.snappySession.conf.set(Attribute.PASSWORD_ATTR, sysUser) + adminSnc.sql("create schema test_schema1 authorization ldapgroup:gemGroup1") + val snc1 = snc.newSession() + snc1.snappySession.conf.set(Attribute.USERNAME_ATTR, user1) + snc1.snappySession.conf.set(Attribute.PASSWORD_ATTR, user1) + snc1.sql(s"create table test_schema1.test (id integer," + + s" name STRING) using column") + adminSnc.sql("drop table test_schema1.test") + adminSnc.sql("drop schema test_schema1") + adminSnc.sql("create schema test_schema2 authorization ldapgroup:gemGroup1") + adminSnc.sql("drop schema test_schema2") + + val adminConn = getConnection(Some(sysUser)) + val adminStmt = adminConn.createStatement + adminStmt.execute("create schema test_schema3 authorization ldapgroup:gemGroup1") + val userConn = getConnection(Some(user1)) + val userStmt = userConn.createStatement + userStmt.execute("create table test_schema3.test (id integer, name STRING) using column") + adminStmt.execute("drop table test_schema3.test") + adminStmt.execute("drop schema test_schema3") + adminStmt.execute("create schema test_schema4 authorization ldapgroup:gemGroup1") + adminStmt.execute("drop schema test_schema4") + adminStmt.execute("create schema test_schema5 authorization ldapgroup:gemGroup1") + adminSnc.sql("drop schema test_schema5") + adminSnc.sql("create schema test_schema6 authorization ldapgroup:gemGroup1") + adminStmt.execute("drop schema test_schema6") + } + + private def getConnection(user: Option[String] = None): Connection = { + val props = new Properties() + if (user.isDefined) { + props.put(Attribute.USERNAME_ATTR, user.get) + props.put(Attribute.PASSWORD_ATTR, user.get) + } + DriverManager.getConnection(s"jdbc:snappydata://$serverHostPort", props) + } +} diff --git a/cluster/src/test/scala/org/apache/spark/sql/store/SnappyUDFTest.scala b/cluster/src/test/scala/org/apache/spark/sql/store/SnappyUDFTest.scala new file mode 100644 index 0000000000..9ca0e2061a --- /dev/null +++ b/cluster/src/test/scala/org/apache/spark/sql/store/SnappyUDFTest.scala @@ -0,0 +1,520 @@ +/* + * Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ +package org.apache.spark.sql.store + + +import java.math + +import com.pivotal.gemfirexd.internal.engine.Misc +import io.snappydata.SnappyFunSuite +import org.scalatest.BeforeAndAfterAll + +import org.apache.spark.jdbc.{ConnectionConfBuilder, ConnectionUtil} +import org.apache.spark.sql.udf.UserDefinedFunctionsDUnitTest._ + +case class OrderData(ref: Int, description: String, price: Long, + tax : BigDecimal, surcharge: Float, date: java.sql.Date, time : String) + +class SnappyUDFTest extends SnappyFunSuite with BeforeAndAfterAll { + + + + override def beforeAll: Unit = { + val rdd = sc.parallelize((1 to 5).map(i => OrderData(i, s"some $i", i, i/2, + i/2 , java.sql.Date.valueOf("2012-12-12"), "2000-02-03 12:23:04"))) + val refDf = snc.createDataFrame(rdd) + refDf.createTempView("tempTable") + + snc.sql("DROP TABLE IF EXISTS RR_TABLE") + snc.sql("DROP TABLE IF EXISTS COL_TABLE") + + snc.sql("CREATE TABLE RR_TABLE(OrderRef INT NOT NULL, description String, " + + "price BIGINT, serviceTax DECIMAL, surcharge Float, purchase_date DATE, time Timestamp)") + + snc.sql("CREATE TABLE COL_TABLE(OrderRef INT NOT NULL, description String, price " + + "LONG, serviceTax DECIMAL, surcharge Float, purchase_date DATE, time Timestamp) " + + "using column options(PARTITION_BY 'OrderRef')") + + refDf.write.insertInto("RR_TABLE") + refDf.write.insertInto("COL_TABLE") + } + + override def afterAll: Unit = { + snc.sql("DROP TABLE IF EXISTS RR_TABLE") + snc.sql("DROP TABLE IF EXISTS COL_TABLE") + } + + private def dropUdf(udfName: String): Unit = { + snc.sql(s"drop function $udfName") + snc.sql(s"drop function if exists $udfName") + } + + private def showDescribe(udfName : String): Unit = { + assert(snc.snappySession.sessionCatalog.listFunctions("app", + s"${udfName.substring(0, udfName.length - 2)}*"). + find(f => (f._1.toString().contains(udfName))).size == 1) + + assert(snc.snappySession.sql(s"DESCRIBE FUNCTION $udfName").collect().length == 3) + assert(snc.snappySession.sql(s"DESCRIBE FUNCTION EXTENDED $udfName").collect().length == 4) + assert(snc.snappySession.sql(s"DESCRIBE FUNCTION $udfName").collect().length == 3) + assert(snc.snappySession.sql(s"DESCRIBE FUNCTION EXTENDED $udfName").collect().length == 4) + assert(snc.snappySession.sql(s"SHOW FUNCTIONS $udfName").collect().length == 1) + assert(snc.snappySession.sql(s"SHOW FUNCTIONS $udfName").collect().length == 1) + } + + test("Test UDF with Byte Return type with schema") { + val udfText: String = "public class ByteUDF implements" + + " org.apache.spark.sql.api.java.UDF1 {" + + " @Override public java.lang.Byte call(String s){ " + + " return new java.lang.Byte((byte)122); " + + "}" + + "}" + val file = createUDFClass("ByteUDF", udfText) + val jar = createJarFile(Seq(file)) + snc.sql(s"CREATE FUNCTION APP.byteudf AS ByteUDF " + + s"RETURNS BYTE USING JAR " + + s"'$jar'") + snc.sql("select app.byteudf(description) from col_table a").collect() + snc.sql("select APP.byteudf(description) from rr_table").collect() + showDescribe("byteudf") + dropUdf("byteudf") + } + + test("Test Nested UDF with schema") { + var udfText: String = "public class NestedUDF implements" + + " org.apache.spark.sql.api.java.UDF1 {" + + " @Override public java.lang.String call(String s){ " + + " return s; " + + "}" + + "}" + var file = createUDFClass("NestedUDF", udfText) + var jar = createJarFile(Seq(file)) + snc.sql(s"CREATE FUNCTION APP.nestedudf AS NestedUDF " + + s"RETURNS STRING USING JAR " + + s"'$jar'") + + udfText = "public class SubUDF implements" + + " org.apache.spark.sql.api.java.UDF1 {" + + " @Override public java.lang.String call(String s){ " + + " return s; " + + "}" + + "}" + file = createUDFClass("SubUDF", udfText) + jar = createJarFile(Seq(file)) + snc.sql(s"CREATE FUNCTION APP.subudf AS SubUDF " + + s"RETURNS STRING USING JAR " + + s"'$jar'") + + snc.sql(s"""select app.SubUDF(nvl(description,'some')) from col_table""").collect() + snc.sql("select app.SubUDF(nvl(description,'some')) from rr_table").collect() + snc.sql(s"""select app.SubUDF(app.nestedudf(description)) from col_table""").collect() + snc.sql("select app.SubUDF(app.nestedudf(description)) from rr_table").collect() + + snc.sql("select {FN app.SubUDF(nvl(description,'some')) } from col_table a").collect() + snc.sql("select {FN app.SubUDF(nvl(description,'some')) } from rr_table").collect() + snc.sql("select {FN app.SubUDF(app.nestedudf(description)) } from col_table a").collect() + snc.sql("select {FN app.SubUDF(app.nestedudf(description)) } from rr_table").collect() + + dropUdf("nestedudf") + dropUdf("subudf") + } + + test("Test Count(*) ") { + snc.sql("select count(*) from col_table a").collect() + snc.sql("select count(*) from rr_table").collect() + } + + test("Test UDF fn syntax without schema") { + val udfText: String = "public class ByteUDF implements" + + " org.apache.spark.sql.api.java.UDF1 {" + + " @Override public java.lang.Byte call(String s){ " + + " return new java.lang.Byte((byte)122); " + + "}" + + "}" + val file = createUDFClass("ByteUDF", udfText) + val jar = createJarFile(Seq(file)) + snc.sql(s"CREATE FUNCTION APP.byteudf AS ByteUDF " + + s"RETURNS BYTE USING JAR " + + s"'$jar'") + snc.sql("select description, {fn byteudf(description) } from col_table a").collect() + snc.sql("select description, {fn byteudf(description) } from rr_table").collect() + showDescribe("byteudf") + dropUdf("byteudf") + } + + test("Test UDF fn syntax with schema") { + val udfText: String = "public class ByteUDF implements" + + " org.apache.spark.sql.api.java.UDF1 {" + + " @Override public java.lang.Byte call(String s){ " + + " return new java.lang.Byte((byte)122); " + + "}" + + "}" + val file = createUDFClass("ByteUDF", udfText) + val jar = createJarFile(Seq(file)) + snc.sql(s"CREATE FUNCTION APP.byteudf AS ByteUDF " + + s"RETURNS BYTE USING JAR " + + s"'$jar'") + snc.sql("select {FN app.byteudf(description) } from col_table a").collect() + snc.sql("select {FN APP.byteudf(description) } from rr_table").collect() + showDescribe("byteudf") + dropUdf("byteudf") + } + + test("Test UDF with Byte Return type") { + val udfText: String = "public class ByteUDF implements" + + " org.apache.spark.sql.api.java.UDF1 {" + + " @Override public java.lang.Byte call(String s){ " + + " return new java.lang.Byte((byte)122); " + + "}" + + "}" + val file = createUDFClass("ByteUDF", udfText) + val jar = createJarFile(Seq(file)) + snc.sql(s"CREATE FUNCTION APP.byteudf AS ByteUDF " + + s"RETURNS BYTE USING JAR " + + s"'$jar'") + snc.sql("select byteudf(description) from col_table").collect() + snc.sql("select byteudf(description) from rr_table").collect() + showDescribe("byteudf") + dropUdf("byteudf") + } + + test("Test UDF with Short Return type") { + val udfText: String = "public class ShortUDF implements" + + " org.apache.spark.sql.api.java.UDF1 {" + + " @Override public java.lang.Short call(String s){ " + + " return new java.lang.Short((short)122); " + + "}" + + "}" + val file = createUDFClass("ShortUDF", udfText) + val jar = createJarFile(Seq(file)) + snc.sql(s"CREATE FUNCTION APP.shortudf AS ShortUDF " + + s"RETURNS SHORT USING JAR " + + s"'$jar'") + snc.sql("select shortudf(description) from col_table").collect() + snc.sql("select shortudf(description) from rr_table").collect() + showDescribe("shortudf") + dropUdf("shortudf") + } + + test("Test UDF with TIMESTAMP Return type") { + val udfText: String = "public class TimeUDF implements" + + " org.apache.spark.sql.api.java.UDF1 {" + + " @Override public java.sql.Timestamp call(java.sql.Timestamp s){ " + + " return s; " + + "}" + + "}" + val file = createUDFClass("TimeUDF", udfText) + val jar = createJarFile(Seq(file)) + snc.sql(s"CREATE FUNCTION APP.timeudf AS TimeUDF " + + s"RETURNS Timestamp USING JAR " + + s"'$jar'") + snc.sql("select timeudf(time) from col_table").collect() + snc.sql("select timeudf(time) from rr_table").collect() + showDescribe("timeudf") + dropUdf("timeudf") + } + + test("Test UDF with Double Return type") { + val udfText: String = "public class DoubleUDF implements" + + " org.apache.spark.sql.api.java.UDF1 {" + + " @Override public java.lang.Double call(String s){ " + + " return new java.lang.Double(12223.678); " + + "}" + + "}" + val file = createUDFClass("DoubleUDF", udfText) + val jar = createJarFile(Seq(file)) + snc.sql(s"CREATE FUNCTION APP.doubleudf AS DoubleUDF " + + s"RETURNS Double USING JAR " + + s"'$jar'") + snc.sql("select doubleudf(description) from col_table").collect() + snc.sql("select doubleudf(description) from rr_table").collect() + showDescribe("doubleudf") + dropUdf("doubleudf") + } + + test("Test UDF with Boolean Return type") { + val udfText: String = "public class BooleanUDF implements" + + " org.apache.spark.sql.api.java.UDF1 {" + + " @Override public java.lang.Boolean call(String s){ " + + " return new java.lang.Boolean(true); " + + "}" + + "}" + val file = createUDFClass("BooleanUDF", udfText) + val jar = createJarFile(Seq(file)) + snc.sql(s"CREATE FUNCTION APP.booludf AS BooleanUDF " + + s"RETURNS Boolean USING JAR " + + s"'$jar'") + snc.sql("select booludf(description) from col_table").collect() + snc.sql("select booludf(description) from rr_table").collect() + dropUdf("booludf") + } + + test("Test UDF with Date Return type") { + val udfText: String = "public class DateUDF implements" + + " org.apache.spark.sql.api.java.UDF1 {" + + " @Override public java.sql.Date call(java.sql.Date s){ " + + " return s; " + + "}" + + "}" + val file = createUDFClass("DateUDF", udfText) + val jar = createJarFile(Seq(file)) + snc.sql(s"CREATE FUNCTION APP.dateudf AS DateUDF " + + s"RETURNS Date USING JAR " + + s"'$jar'") + snc.sql("select dateudf(purchase_date) from col_table").collect() + snc.sql("select dateudf(purchase_date) from rr_table").collect() + dropUdf("dateudf") + } + + test("Test UDF with float Return type") { + // Intentionally used double types for row tables + val udfText: String = "public class FloatUDF implements" + + " org.apache.spark.sql.api.java.UDF1 {" + + " @Override public Float call(Float s){ " + + " return s; " + + "}" + + "}" + + val udfText1: String = "public class DoubleUDF1 implements" + + " org.apache.spark.sql.api.java.UDF1 {" + + " @Override public Double call(Double s){ " + + " return s; " + + "}" + + "}" + val file1 = createUDFClass("FloatUDF", udfText) + val file2 = createUDFClass("DoubleUDF1", udfText1) + val jar = createJarFile(Seq(file1, file2)) + + snc.sql(s"CREATE FUNCTION APP.floatudf AS FloatUDF " + + s"RETURNS Float USING JAR " + + s"'$jar'") + + snc.sql(s"CREATE FUNCTION APP.doubleudf1 AS DoubleUDF1 " + + s"RETURNS Double USING JAR " + + s"'$jar'") + snc.sql("select floatudf(surcharge) from col_table").collect() + snc.sql("select doubleudf1(surcharge) from rr_table").collect() + dropUdf("floatudf") + dropUdf("doubleudf1") + } + + + test("Test UDF with decimal Return type") { + val udfText: String = "public class DecimalUDF implements" + + " org.apache.spark.sql.api.java.UDF1 {" + + " @Override public java.math.BigDecimal call(java.math.BigDecimal s){ " + + " return s; " + + "}" + + "}" + val file = createUDFClass("DecimalUDF", udfText) + val jar = createJarFile(Seq(file)) + snc.sql(s"CREATE FUNCTION APP.decimaludf AS DecimalUDF " + + s"RETURNS DECIMAL USING JAR " + + s"'$jar'") + snc.sql("select decimaludf(serviceTax) from col_table").collect() + snc.sql("select decimaludf(serviceTax) from rr_table").collect() + dropUdf("decimaludf") + } + + test("Test UDF with Integer Return type") { + val udfText: String = "public class IntegerUDF implements" + + " org.apache.spark.sql.api.java.UDF1 {" + + " @Override public Integer call(String s){ " + + " return s.length(); " + + "}" + + "}" + val file = createUDFClass("IntegerUDF", udfText) + val jar = createJarFile(Seq(file)) + snc.sql(s"CREATE FUNCTION APP.intudf AS IntegerUDF " + + s"RETURNS Integer USING JAR " + + s"'$jar'") + snc.sql("select intudf(description) from col_table").collect() + snc.sql("select intudf(description) from rr_table").collect() + dropUdf("intudf") + } + + test("Test UDF with Long Return type") { + val udfText: String = "public class LongUDF implements" + + " org.apache.spark.sql.api.java.UDF1 {" + + " @Override public Long call(Long s){ " + + " return s; " + + "}" + + "}" + val file = createUDFClass("LongUDF", udfText) + val jar = createJarFile(Seq(file)) + snc.sql(s"CREATE FUNCTION APP.longudf AS LongUDF " + + s"RETURNS Long USING JAR " + + s"'$jar'") + snc.sql("select longudf(PRICE) from col_table").collect() + snc.sql("select longudf(PRICE) from rr_table").collect() + dropUdf("longudf") + } + + + test("Test UDF with Multiple interface") { + val udfText: String = "public class MultUDF implements" + + " org.apache.spark.sql.api.java.UDF1," + + " org.apache.spark.sql.api.java.UDF2 {" + + " @Override public Integer call(Integer s){ " + + " return s; " + + "}" + + " @Override public Integer call(Integer s1, Integer s2){ " + + " return s1 + s2; " + + "}" + + "}" + val file = createUDFClass("MultUDF", udfText) + val jar = createJarFile(Seq(file)) + snc.sql(s"CREATE FUNCTION APP.multudf AS MultUDF " + + s"RETURNS Integer USING JAR " + + s"'$jar'") + snc.sql("select multudf(OrderRef) from col_table").collect() + snc.sql("select multudf(OrderRef, OrderRef) from col_table") + + snc.sql("select multudf(OrderRef) from rr_table").collect() + snc.sql("select multudf(OrderRef, OrderRef) from rr_table").collect() + dropUdf("multudf") + } + + + test("Test UDAFs") { + + val udafTest : String = "import org.apache.spark.sql.Row;" + + "import org.apache.spark.sql.expressions.MutableAggregationBuffer;" + + "import org.apache.spark.sql.expressions.UserDefinedAggregateFunction;" + + "import org.apache.spark.sql.types.DataType;" + + "import org.apache.spark.sql.types.DataTypes;" + + "import org.apache.spark.sql.types.StructType;" + + "" + + "public class LongProductSum extends UserDefinedAggregateFunction " + + "{ " + + "public StructType inputSchema() {" + + " return new StructType()" + + " .add(\"a\", DataTypes.LongType)" + + " .add(\"b\", DataTypes.LongType);" + + " }" + + " " + + " public StructType bufferSchema() {" + + " return new StructType()" + + " .add(\"product\", DataTypes.LongType);" + + " }" + + " public DataType dataType() {" + + " return DataTypes.LongType;" + + " }" + + " public boolean deterministic() {" + + " return true;" + + " }" + + " public void initialize(MutableAggregationBuffer buffer) {" + + " buffer.update(0, 0L);" + + " }" + + " public void update(MutableAggregationBuffer buffer, Row input) {" + + " if (!(input.isNullAt(0) || input.isNullAt(1))) {" + + " buffer.update(0, buffer.getLong(0) + input.getLong(0) * input.getLong(1));" + + " }" + + " }" + + " public void merge(MutableAggregationBuffer buffer1, Row buffer2) {" + + " buffer1.update(0, buffer1.getLong(0) + buffer2.getLong(0));" + + " }" + + " public Object evaluate(Row buffer) {" + + " return buffer.getLong(0);" + + " }" + + "}" + val file = createUDFClass("LongProductSum", udafTest) + val jar = createJarFile(Seq(file)) + snc.sql(s"CREATE FUNCTION APP.longproductsum AS LongProductSum " + + s" RETURNS LONG USING JAR " + + s"'$jar'") + snc.sql("select longproductsum(price, price) from col_table").collect() + dropUdf("longproductsum") + } + + test("Test UDF with String Return type") { + val udfText: String = "public class StringUDF implements" + + " org.apache.spark.sql.api.java.UDF1 {" + + " @Override public String call(String s){ " + + " return s + s; " + + "}" + + "}" + val file = createUDFClass("StringUDF", udfText) + val jar = createJarFile(Seq(file)) + snc.sql(s"CREATE FUNCTION APP.strudf AS StringUDF " + + s"RETURNS STRING USING JAR " + + s"'$jar'") + snc.sql("select strudf(description) from col_table").collect() + snc.sql("select strudf(description) from rr_table").collect() + dropUdf("strudf") + } + + test("Test Spark UDF") { + snc.udf.register("decudf", (n: java.math.BigDecimal) => { n.multiply(new math.BigDecimal(2)) }) + snc.sql("select decudf(tax) from tempTable").collect() + } + + + + test("test dsid function") { + + snc.sql("create table test123( a integer,b integer, c int) using column options()") + snc.sql("insert into test123 values(1,2,3)") + snc.sql("insert into test123 values(31,42,53)") + snc.sql("insert into test123 values(87,76,63)") + snc.sql("insert into test123 values(12,24,53)") + + snc.sql("select DSID() from test123").collect().foreach(row => { + assert(row.getString(0).equals(Misc.getMyId().getId())) + }); + snc.sql("drop table test123") + } + + test("Test UDF other schema") { + val conf = new ConnectionConfBuilder(snc.snappySession).build() + val conn = ConnectionUtil.getPooledConnection("test default conf", conf) + try{ + val st = conn.createStatement + st.execute("create schema trade") + val udfText: String = "public class StringUDF implements" + + " org.apache.spark.sql.api.java.UDF1 {" + + " @Override public String call(String s){ " + + " return s + s; " + + "}" + + "}" + val file = createUDFClass("StringUDF", udfText) + val jar = createJarFile(Seq(file)) + + snc.sql(s"CREATE FUNCTION TRADE.STRUDF AS StringUDF " + + s"RETURNS STRING USING JAR " + + s"'$jar'") + + snc.sql("CREATE TABLE trade.rr_test_table(OrderRef INT NOT NULL, description String, " + + "price BIGINT, serviceTax DECIMAL, surcharge Float, purchase_date DATE, time Timestamp)") + + snc.sql("CREATE TABLE trade.col_test_table(OrderRef INT NOT NULL," + + " description String, price " + + "LONG, serviceTax DECIMAL, surcharge Float, purchase_date DATE, time Timestamp) " + + "using column options(PARTITION_BY 'OrderRef')") + + + snc.sql("select TRADE.strudf(description) from trade.col_test_table").collect() + snc.sql("select trade.STRUDF(description) from trade.rr_test_table").collect() + snc.sql("select TRADE.STRUDF(description) from trade.rr_test_table").collect() + dropUdf("trade.strudf") + } finally { + snc.sql("DROP TABLE IF EXISTS trade.col_test_table") + snc.sql("DROP TABLE IF EXISTS trade.rr_test_table") + } + + } +} diff --git a/cluster/src/test/scala/org/apache/spark/unsafe/NativeUTF8StringPropertyCheckSuite.scala b/cluster/src/test/scala/org/apache/spark/unsafe/NativeUTF8StringPropertyCheckSuite.scala new file mode 100644 index 0000000000..7d34f1510c --- /dev/null +++ b/cluster/src/test/scala/org/apache/spark/unsafe/NativeUTF8StringPropertyCheckSuite.scala @@ -0,0 +1,301 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * Changes for SnappyData data platform. + * + * Portions Copyright (c) 2018 SnappyData, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ + +package org.apache.spark.unsafe + +import java.nio.charset.StandardCharsets + +import io.snappydata.SnappyFunSuite +import it.unimi.dsi.fastutil.longs.LongArrayList +import org.apache.commons.lang3.StringUtils +import org.scalacheck.{Arbitrary, Gen} +import org.scalatest.prop.GeneratorDrivenPropertyChecks +import org.scalatest.{BeforeAndAfter, Matchers} + +import org.apache.spark.unsafe.types.UTF8String + +/** + * This TestSuite utilize ScalaCheck to generate randomized inputs for UTF8String testing. + */ +class NativeUTF8StringPropertyCheckSuite extends SnappyFunSuite + with GeneratorDrivenPropertyChecks with Matchers with BeforeAndAfter { + + private val allocatedMemoryList: LongArrayList = new LongArrayList + + // scalastyle:off println + if (Native.isLoaded) { + println("NATIVE: using native JNI library") + } else { + println("NATIVE: failed to load native JNI library") + } + // scalastyle:on println + + after { + if (allocatedMemoryList.size() > 0) { + val iter = allocatedMemoryList.iterator() + while (iter.hasNext) { + Platform.freeMemory(iter.nextLong()) + } + allocatedMemoryList.clear() + } + } + + private def toUTF8(s: String): UTF8String = { + if (s eq null) return null + val b = s.getBytes(StandardCharsets.UTF_8) + val numBytes = b.length + val ub = Platform.allocateMemory(numBytes) + allocatedMemoryList.add(ub) + Platform.copyMemory(b, Platform.BYTE_ARRAY_OFFSET, null, ub, numBytes) + UTF8String.fromAddress(null, ub, numBytes) + } + + test("toString") { + forAll { (s: String) => + assert(toUTF8(s).toString === s) + } + } + + test("numChars") { + forAll { (s: String) => + assert(toUTF8(s).numChars() === s.length) + } + } + + test("startsWith") { + forAll { (s: String) => + val utf8 = toUTF8(s) + assert(utf8.startsWith(utf8)) + for (i <- 1 to s.length) { + assert(utf8.startsWith(toUTF8(s.dropRight(i)))) + } + } + } + + test("endsWith") { + forAll { (s: String) => + val utf8 = toUTF8(s) + assert(utf8.endsWith(utf8)) + for (i <- 1 to s.length) { + assert(utf8.endsWith(toUTF8(s.drop(i)))) + } + } + } + + test("toUpperCase") { + forAll { (s: String) => + assert(toUTF8(s).toUpperCase === toUTF8(s.toUpperCase)) + } + } + + test("toLowerCase") { + forAll { (s: String) => + assert(toUTF8(s).toLowerCase === toUTF8(s.toLowerCase)) + } + } + + test("compare") { + forAll { (s1: String, s2: String) => + assert(Math.signum(toUTF8(s1).compareTo(toUTF8(s2))) === Math.signum(s1.compareTo(s2))) + } + } + + test("substring") { + forAll { (s: String) => + for (start <- 0 to s.length; end <- 0 to s.length; if start <= end) { + assert(toUTF8(s).substring(start, end).toString === s.substring(start, end)) + } + } + } + + test("contains") { + forAll { (s: String) => + for (start <- 0 to s.length; end <- 0 to s.length; if start <= end) { + val substring = s.substring(start, end) + assert(toUTF8(s).contains(toUTF8(substring)) === s.contains(substring)) + } + } + } + + val whitespaceChar: Gen[Char] = Gen.const(0x20.toChar) + val whitespaceString: Gen[String] = Gen.listOf(whitespaceChar).map(_.mkString) + val randomString: Gen[String] = Arbitrary.arbString.arbitrary + + test("trim, trimLeft, trimRight") { + // lTrim and rTrim are both modified from java.lang.String.trim + def lTrim(s: String): String = { + var st = 0 + val array: Array[Char] = s.toCharArray + while ((st < s.length) && (array(st) == ' ')) { + st += 1 + } + if (st > 0) s.substring(st, s.length) else s + } + + def rTrim(s: String): String = { + var len = s.length + val array: Array[Char] = s.toCharArray + while ((len > 0) && (array(len - 1) == ' ')) { + len -= 1 + } + if (len < s.length) s.substring(0, len) else s + } + + forAll( + whitespaceString, + randomString, + whitespaceString + ) { (start: String, middle: String, end: String) => + val s = start + middle + end + assert(toUTF8(s).trim() === toUTF8(rTrim(lTrim(s)))) + assert(toUTF8(s).trimLeft() === toUTF8(lTrim(s))) + assert(toUTF8(s).trimRight() === toUTF8(rTrim(s))) + } + } + + test("reverse") { + forAll { (s: String) => + assert(toUTF8(s).reverse === toUTF8(s.reverse)) + } + } + + test("indexOf") { + forAll { (s: String) => + for (start <- 0 to s.length; end <- 0 to s.length; if start <= end) { + val substring = s.substring(start, end) + assert(toUTF8(s).indexOf(toUTF8(substring), 0) === s.indexOf(substring)) + } + } + } + + private val randomInt = Gen.choose(-100, 100) + + test("repeat") { + def repeat(str: String, times: Int): String = { + if (times > 0) str * times else "" + } + + // ScalaCheck always generating too large repeat times which might hang the test forever. + forAll(randomString, randomInt) { (s: String, times: Int) => + assert(toUTF8(s).repeat(times) === toUTF8(repeat(s, times))) + } + } + + test("lpad, rpad") { + def padding(origin: String, pad: String, length: Int, isLPad: Boolean): String = { + if (length <= 0) return "" + if (length <= origin.length) { + if (length <= 0) "" else origin.substring(0, length) + } else { + if (pad.length == 0) return origin + val toPad = length - origin.length + val partPad = if (toPad % pad.length == 0) "" else pad.substring(0, toPad % pad.length) + if (isLPad) { + pad * (toPad / pad.length) + partPad + origin + } else { + origin + pad * (toPad / pad.length) + partPad + } + } + } + + forAll( + randomString, + randomString, + randomInt + ) { (s: String, pad: String, length: Int) => + assert(toUTF8(s).lpad(length, toUTF8(pad)) === + toUTF8(padding(s, pad, length, isLPad = true))) + assert(toUTF8(s).rpad(length, toUTF8(pad)) === + toUTF8(padding(s, pad, length, isLPad = false))) + } + } + + private val nullalbeSeq = Gen.listOf(Gen.oneOf[String](null: String, randomString)) + + test("concat") { + def concat(orgin: Seq[String]): String = + if (orgin.contains(null)) null else orgin.mkString + + forAll { (inputs: Seq[String]) => + assert(UTF8String.concat(inputs.map(toUTF8): _*) === toUTF8(inputs.mkString)) + } + forAll(nullalbeSeq) { (inputs: Seq[String]) => + assert(UTF8String.concat(inputs.map(toUTF8): _*) === toUTF8(concat(inputs))) + } + } + + test("concatWs") { + def concatWs(sep: String, inputs: Seq[String]): String = { + if (sep == null) return null + inputs.filter(_ != null).mkString(sep) + } + + forAll { (sep: String, inputs: Seq[String]) => + assert(UTF8String.concatWs(toUTF8(sep), inputs.map(toUTF8): _*) === + toUTF8(inputs.mkString(sep))) + } + forAll(randomString, nullalbeSeq) { (sep: String, inputs: Seq[String]) => + assert(UTF8String.concatWs(toUTF8(sep), inputs.map(toUTF8): _*) === + toUTF8(concatWs(sep, inputs))) + } + } + + // TODO: enable this when we find a proper way to generate valid patterns + ignore("split") { + forAll { (s: String, pattern: String, limit: Int) => + assert(toUTF8(s).split(toUTF8(pattern), limit) === + s.split(pattern, limit).map(toUTF8)) + } + } + + test("levenshteinDistance") { + forAll { (one: String, another: String) => + assert(toUTF8(one).levenshteinDistance(toUTF8(another)) === + StringUtils.getLevenshteinDistance(one, another)) + } + } + + test("hashCode") { + forAll { (s: String) => + assert(toUTF8(s).hashCode() === toUTF8(s).hashCode()) + } + } + + test("equals") { + forAll { (one: String, another: String) => + assert(toUTF8(one).equals(toUTF8(another)) === one.equals(another)) + } + } +} diff --git a/codeStyleSettings.xml b/codeStyleSettings.xml index ff18271923..a6adc4e8ef 100644 --- a/codeStyleSettings.xml +++ b/codeStyleSettings.xml @@ -27,6 +27,7 @@ +