mirror of
https://github.com/minio/minio.git
synced 2025-11-07 21:02:58 -05:00
Support configurable quote character parameter in Select (#8955)
This commit is contained in:
21
mint/run/core/s3select/README.md
Normal file
21
mint/run/core/s3select/README.md
Normal file
@@ -0,0 +1,21 @@
|
||||
|
||||
## `s3select` tests
|
||||
This directory serves as the location for Mint tests for s3select features. Top level `mint.sh` calls `run.sh` to execute tests.
|
||||
|
||||
## Adding new tests
|
||||
New tests are added into `s3select/tests.py` as new functions.
|
||||
|
||||
## Running tests manually
|
||||
- Set environment variables `MINT_DATA_DIR`, `MINT_MODE`, `SERVER_ENDPOINT`, `ACCESS_KEY`, `SECRET_KEY`, `SERVER_REGION` and `ENABLE_HTTPS`
|
||||
- Call `run.sh` with output log file and error log file. for example
|
||||
|
||||
```bash
|
||||
export MINT_DATA_DIR=~/my-mint-dir
|
||||
export MINT_MODE=core
|
||||
export SERVER_ENDPOINT="play.min.io"
|
||||
export ACCESS_KEY="Q3AM3UQ867SPQQA43P2F"
|
||||
export SECRET_KEY="zuf+tfteSlswRu7BJ86wekitnifILbZam1KYY3TG"
|
||||
export ENABLE_HTTPS=1
|
||||
export SERVER_REGION=us-east-1
|
||||
./run.sh /tmp/output.log /tmp/error.log
|
||||
```
|
||||
28
mint/run/core/s3select/run.sh
Executable file
28
mint/run/core/s3select/run.sh
Executable file
@@ -0,0 +1,28 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# Mint (C) 2020 Minio, Inc.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
# handle command line arguments
|
||||
if [ $# -ne 2 ]; then
|
||||
echo "usage: run.sh <OUTPUT-LOG-FILE> <ERROR-LOG-FILE>"
|
||||
exit -1
|
||||
fi
|
||||
|
||||
output_log_file="$1"
|
||||
error_log_file="$2"
|
||||
|
||||
# run path style tests
|
||||
python "./tests.py" 1>>"$output_log_file" 2>"$error_log_file"
|
||||
304
mint/run/core/s3select/tests.py
Normal file
304
mint/run/core/s3select/tests.py
Normal file
@@ -0,0 +1,304 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
# MinIO Python Library for Amazon S3 Compatible Cloud Storage,
|
||||
# (C) 2015-2020 MinIO, Inc.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# from __future__ import division
|
||||
# from __future__ import absolute_import
|
||||
|
||||
import os
|
||||
import io
|
||||
from sys import exit
|
||||
import uuid
|
||||
import inspect
|
||||
import json
|
||||
import time
|
||||
import traceback
|
||||
|
||||
from minio import Minio
|
||||
from minio.select.options import (SelectObjectOptions, CSVInput,
|
||||
RequestProgress, InputSerialization,
|
||||
OutputSerialization, CSVOutput, JsonOutput)
|
||||
|
||||
class LogOutput(object):
|
||||
"""
|
||||
LogOutput is the class for log output. It is required standard for all
|
||||
SDK tests controlled by mint.
|
||||
Here are its attributes:
|
||||
'name': name of the SDK under test, e.g. 's3select'
|
||||
'function': name of the method/api under test with its signature
|
||||
The following python code can be used to
|
||||
pull args information of a <method> and to
|
||||
put together with the method name:
|
||||
<method>.__name__+'('+', '.join(args_list)+')'
|
||||
e.g. 'remove_object(bucket_name, object_name)'
|
||||
'args': method/api arguments with their values, in
|
||||
dictionary form: {'arg1': val1, 'arg2': val2, ...}
|
||||
'duration': duration of the whole test in milliseconds,
|
||||
defaults to 0
|
||||
'alert': any extra information user is needed to be alerted about,
|
||||
like whether this is a Blocker/Gateway/Server related
|
||||
issue, etc., defaults to None
|
||||
'message': descriptive error message, defaults to None
|
||||
'error': stack-trace/exception message(only in case of failure),
|
||||
actual low level exception/error thrown by the program,
|
||||
defaults to None
|
||||
'status': exit status, possible values are 'PASS', 'FAIL', 'NA',
|
||||
defaults to 'PASS'
|
||||
"""
|
||||
|
||||
PASS = 'PASS'
|
||||
FAIL = 'FAIL'
|
||||
NA = 'NA'
|
||||
|
||||
def __init__(self, meth, test_name):
|
||||
self.__args_list = inspect.getargspec(meth).args[1:]
|
||||
self.__name = 'minio-py:'+test_name
|
||||
self.__function = meth.__name__+'('+', '.join(self.__args_list)+')'
|
||||
self.__args = {}
|
||||
self.__duration = 0
|
||||
self.__alert = ''
|
||||
self.__message = None
|
||||
self.__error = None
|
||||
self.__status = self.PASS
|
||||
self.__start_time = time.time()
|
||||
|
||||
@property
|
||||
def name(self): return self.__name
|
||||
|
||||
@property
|
||||
def function(self): return self.__function
|
||||
|
||||
@property
|
||||
def args(self): return self.__args
|
||||
|
||||
@name.setter
|
||||
def name(self, val): self.__name = val
|
||||
|
||||
@function.setter
|
||||
def function(self, val): self.__function = val
|
||||
|
||||
@args.setter
|
||||
def args(self, val): self.__args = val
|
||||
|
||||
def json_report(self, err_msg='', alert='', status=''):
|
||||
self.__args = {k: v for k, v in self.__args.items() if v and v != ''}
|
||||
entry = {'name': self.__name,
|
||||
'function': self.__function,
|
||||
'args': self.__args,
|
||||
'duration': int(round((time.time() - self.__start_time)*1000)),
|
||||
'alert': str(alert),
|
||||
'message': str(err_msg),
|
||||
'error': traceback.format_exc() if err_msg and err_msg != '' else '',
|
||||
'status': status if status and status != '' else
|
||||
self.FAIL if err_msg and err_msg != '' else self.PASS
|
||||
}
|
||||
return json.dumps({k: v for k, v in entry.items() if v and v != ''})
|
||||
|
||||
def generate_bucket_name():
|
||||
return "s3select-test-" + uuid.uuid4().__str__()
|
||||
|
||||
|
||||
def test_csv_input_quote_char(client, log_output):
|
||||
# Get a unique bucket_name and object_name
|
||||
log_output.args['bucket_name'] = bucket_name = generate_bucket_name()
|
||||
|
||||
tests = [
|
||||
# Invalid quote character, should fail
|
||||
('""', b'col1,col2,col3\n', Exception()),
|
||||
# UTF-8 quote character
|
||||
('ع', b'\xd8\xb9col1\xd8\xb9,\xd8\xb9col2\xd8\xb9,\xd8\xb9col3\xd8\xb9\n', b'{"_1":"col1","_2":"col2","_3":"col3"}\n'),
|
||||
# Only one field is quoted
|
||||
('"', b'"col1",col2,col3\n', b'{"_1":"col1","_2":"col2","_3":"col3"}\n'),
|
||||
('"', b'"col1,col2,col3"\n', b'{"_1":"col1,col2,col3"}\n'),
|
||||
('\'', b'"col1",col2,col3\n', b'{"_1":"\\"col1\\"","_2":"col2","_3":"col3"}\n'),
|
||||
('', b'"col1",col2,col3\n', b'{"_1":"\\"col1\\"","_2":"col2","_3":"col3"}\n'),
|
||||
('', b'"col1",col2,col3\n', b'{"_1":"\\"col1\\"","_2":"col2","_3":"col3"}\n'),
|
||||
('', b'"col1","col2","col3"\n', b'{"_1":"\\"col1\\"","_2":"\\"col2\\"","_3":"\\"col3\\""}\n'),
|
||||
('"', b'""""""\n', b'{"_1":"\\"\\""}\n'),
|
||||
]
|
||||
|
||||
try:
|
||||
client.make_bucket(bucket_name)
|
||||
|
||||
for idx, (quote_char, object_content, expected_output) in enumerate(tests):
|
||||
options = SelectObjectOptions(
|
||||
expression="select * from s3object",
|
||||
input_serialization=InputSerialization(
|
||||
compression_type="NONE",
|
||||
csv=CSVInput(FileHeaderInfo="NONE",
|
||||
RecordDelimiter="\n",
|
||||
FieldDelimiter=",",
|
||||
QuoteCharacter=quote_char,
|
||||
QuoteEscapeCharacter=quote_char,
|
||||
Comments="#",
|
||||
AllowQuotedRecordDelimiter="FALSE",),
|
||||
),
|
||||
output_serialization=OutputSerialization(
|
||||
json = JsonOutput(
|
||||
RecordDelimiter="\n",
|
||||
)
|
||||
),
|
||||
request_progress=RequestProgress(
|
||||
enabled="False"
|
||||
)
|
||||
)
|
||||
|
||||
got_output = b''
|
||||
|
||||
try:
|
||||
got_output = exec_select(client, bucket_name, object_content, options, log_output)
|
||||
except Exception as select_err:
|
||||
if not isinstance(expected_output, Exception):
|
||||
raise ValueError('Test {} unexpectedly failed with: {}'.format(idx+1, select_err))
|
||||
else:
|
||||
if isinstance(expected_output, Exception):
|
||||
raise ValueError('Test {}: expected an exception, got {}'.format(idx+1, got_output))
|
||||
if got_output != expected_output:
|
||||
raise ValueError('Test {}: data mismatch. Expected : {}, Received {}'.format(idx+1, expected_output, got_output))
|
||||
|
||||
except Exception as err:
|
||||
raise Exception(err)
|
||||
finally:
|
||||
try:
|
||||
client.remove_bucket(bucket_name)
|
||||
except Exception as err:
|
||||
raise Exception(err)
|
||||
|
||||
# Test passes
|
||||
print(log_output.json_report())
|
||||
|
||||
def test_csv_output_quote_char(client, log_output):
|
||||
# Get a unique bucket_name and object_name
|
||||
log_output.args['bucket_name'] = bucket_name = generate_bucket_name()
|
||||
|
||||
tests = [
|
||||
# UTF-8 quote character
|
||||
("''", b'col1,col2,col3\n', Exception()),
|
||||
("'", b'col1,col2,col3\n', b"'col1','col2','col3'\n"),
|
||||
("", b'col1,col2,col3\n', b'\x00col1\x00,\x00col2\x00,\x00col3\x00\n'),
|
||||
('"', b'col1,col2,col3\n', b'"col1","col2","col3"\n'),
|
||||
('"', b'col"1,col2,col3\n', b'"col""1","col2","col3"\n'),
|
||||
('"', b'\n', b''),
|
||||
]
|
||||
|
||||
try:
|
||||
client.make_bucket(bucket_name)
|
||||
|
||||
for idx, (quote_char, object_content, expected_output) in enumerate(tests):
|
||||
options = SelectObjectOptions(
|
||||
expression="select * from s3object",
|
||||
input_serialization=InputSerialization(
|
||||
compression_type="NONE",
|
||||
csv=CSVInput(FileHeaderInfo="NONE",
|
||||
RecordDelimiter="\n",
|
||||
FieldDelimiter=",",
|
||||
QuoteCharacter='"',
|
||||
QuoteEscapeCharacter='"',
|
||||
Comments="#",
|
||||
AllowQuotedRecordDelimiter="FALSE",),
|
||||
),
|
||||
output_serialization=OutputSerialization(
|
||||
csv=CSVOutput(QuoteFields="ALWAYS",
|
||||
RecordDelimiter="\n",
|
||||
FieldDelimiter=",",
|
||||
QuoteCharacter=quote_char,
|
||||
QuoteEscapeCharacter=quote_char,)
|
||||
),
|
||||
request_progress=RequestProgress(
|
||||
enabled="False"
|
||||
)
|
||||
)
|
||||
|
||||
got_output = b''
|
||||
|
||||
try:
|
||||
got_output = exec_select(client, bucket_name, object_content, options, log_output)
|
||||
except Exception as select_err:
|
||||
if not isinstance(expected_output, Exception):
|
||||
raise ValueError('Test {} unexpectedly failed with: {}'.format(idx+1, select_err))
|
||||
else:
|
||||
if isinstance(expected_output, Exception):
|
||||
raise ValueError('Test {}: expected an exception, got {}'.format(idx+1, got_output))
|
||||
if got_output != expected_output:
|
||||
raise ValueError('Test {}: data mismatch. Expected : {}. Received: {}.'.format(idx+1, expected_output, got_output))
|
||||
|
||||
except Exception as err:
|
||||
raise Exception(err)
|
||||
finally:
|
||||
try:
|
||||
client.remove_bucket(bucket_name)
|
||||
except Exception as err:
|
||||
raise Exception(err)
|
||||
|
||||
# Test passes
|
||||
print(log_output.json_report())
|
||||
|
||||
|
||||
def exec_select(client, bucket_name, object_content, options, log_output):
|
||||
log_output.args['object_name'] = object_name = uuid.uuid4().__str__()
|
||||
try:
|
||||
bytes_content = io.BytesIO(object_content)
|
||||
client.put_object(bucket_name, object_name, io.BytesIO(object_content), len(object_content))
|
||||
|
||||
data = client.select_object_content(bucket_name, object_name, options)
|
||||
# Get the records
|
||||
records = io.BytesIO()
|
||||
for d in data.stream(10*1024):
|
||||
records.write(d.encode('utf-8'))
|
||||
|
||||
return records.getvalue()
|
||||
|
||||
except Exception as err:
|
||||
raise Exception(err)
|
||||
finally:
|
||||
try:
|
||||
client.remove_object(bucket_name, object_name)
|
||||
except Exception as err:
|
||||
raise Exception(err)
|
||||
|
||||
|
||||
def main():
|
||||
"""
|
||||
Functional testing for S3 select.
|
||||
"""
|
||||
|
||||
try:
|
||||
access_key = os.getenv('ACCESS_KEY', 'Q3AM3UQ867SPQQA43P2F')
|
||||
secret_key = os.getenv('SECRET_KEY',
|
||||
'zuf+tfteSlswRu7BJ86wekitnifILbZam1KYY3TG')
|
||||
server_endpoint = os.getenv('SERVER_ENDPOINT', 'play.min.io')
|
||||
secure = os.getenv('ENABLE_HTTPS', '1') == '1'
|
||||
if server_endpoint == 'play.min.io':
|
||||
access_key = 'Q3AM3UQ867SPQQA43P2F'
|
||||
secret_key = 'zuf+tfteSlswRu7BJ86wekitnifILbZam1KYY3TG'
|
||||
secure = True
|
||||
|
||||
client = Minio(server_endpoint, access_key, secret_key, secure=secure)
|
||||
|
||||
log_output = LogOutput(client.select_object_content, 'test_csv_input_quote_char')
|
||||
test_csv_input_quote_char(client, log_output)
|
||||
|
||||
log_output = LogOutput(client.select_object_content, 'test_csv_output_quote_char')
|
||||
test_csv_output_quote_char(client, log_output)
|
||||
|
||||
|
||||
except Exception as err:
|
||||
print(log_output.json_report(err))
|
||||
exit(1)
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Execute only if run as a script
|
||||
main()
|
||||
Reference in New Issue
Block a user