Module ksqldb_confluent.client

Expand source code
# Copyright 2022 Confluent Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Refer to LICENSE for more information.

import asyncio
from asyncio import Future, Queue
from typing import List

import httpx
from httpx import URL, Headers, BasicAuth

from ksqldb_confluent.exceptions import KsqlDbClientException
from ksqldb_confluent.batched_query_result import BatchedQueryResult
from ksqldb_confluent.row import Row
from ksqldb_confluent.__schema_parser import SchemaParser
from ksqldb_confluent.streamed_query_result import StreamedQueryResult
from ksqldb_confluent.__streamed_row import StreamedRow
from ksqldb_confluent.types.schema import Schema
from ksqldb_confluent.__utils import _canonicalize_json_string
from ksqldb_confluent.__utils import get_client_version


async def _handle_streamed_row(
        streamed_row: StreamedRow,
        row_queue: Queue,
        completion_source: Future):
    if streamed_row.header:
        schema_str: str = streamed_row.header.schema
        try:
            schema: Schema = SchemaParser(schema_str).parse_schema()
        except Exception:
            schema = None
        result: StreamedQueryResult = \
            StreamedQueryResult(streamed_row.header.query_id, schema, row_queue)
        completion_source.set_result(result)
    elif streamed_row.row:
        if streamed_row.row.tombstone:
            await row_queue.put(Row([]))
        else:
            await row_queue.put((Row(streamed_row.row.columns)))
    else:
        raise KsqlDbClientException('Received response line of unknown type.')


class Client:
    """
    A client object for issuing commands to ksqlDB.
    """
    _header_accept: str = 'Accept'
    _header_user_agent: str = 'User-agent'
    _user_agent_name: str = 'ksqlDB Python Client'

    def __init__(self,
                 host: str,
                 port: int,
                 api_key: str = '',
                 secret: str = '',
                 use_tls: bool = False,
                 timeout: float = None):
        """
        Constructor for the client.

        Args:
            host: The host of the ksqlDB server
            port: The port of the ksqlDB server. Usually 8088
            api_key: The api_key to authenticate with the ksqlDB server
            secret: The secret to authenticate with the ksqlDB server
            use_tls: use TLS or not
            timeout: timeout in seconds
        """
        self.host = host
        self.port = port
        self.scheme = 'https' if use_tls else 'http'
        self.api_key = api_key
        self.secret = secret
        self.timeout = timeout

        self.client = httpx.AsyncClient(
            http1=False,
            http2=True,
            timeout=self.timeout)

    async def execute_query_async(self, sql: str, timeout: float = None) -> BatchedQueryResult:
        """
        Executes a query. Note that this is limited to pull and push queries and does not cover
        other statement type commands.

        Also, this is appropriate for queries which can run to completion in a batch, namely pull
        queries and push queries with limit statements. Push queries without limit statements will
        never complete.

        Args:
            sql: The sql query to run
            timeout: timeout in seconds

        Returns:
            BatchedQueryResult: Contains the results from the query executed
        """
        streamed_query_result: StreamedQueryResult = await self.stream_query_async(sql, timeout)
        query_id: str = streamed_query_result.query_id
        schema: Schema = streamed_query_result.schema
        rows: List[Row] = []
        row = await streamed_query_result.row_queue.get()
        while row is not None:
            rows.append(row)
            row = await streamed_query_result.row_queue.get()
        return BatchedQueryResult(query_id=query_id, schema=schema, rows=rows)

    async def stream_query_async(self, sql: str, timeout: float = None) -> StreamedQueryResult:
        """
        Executes a query. Note that this is limited to pull and push queries and does not cover
        other statement type commands. This is appropriate for queries which may not run to
        immediate completion, so this works well for both pull and push queries.

        Args:
            sql: The sql query to run
            timeout: timeout in seconds

        Returns:
            StreamedQueryResult: Contains the results from the query executed
        """
        # The Future that indicates if the query result is available or not.
        # The Future will be set once we receive the Header of the query response.
        completion_source: Future = asyncio.Future()
        asyncio.create_task(self._execute_request(sql, completion_source, timeout))
        await completion_source
        return completion_source.result()

    # Executes the given sql
    async def _execute_request(self,
                               sql: str,
                               completion_source: Future,
                               timeout: float = None):
        # Build the URL we'll use for the request.
        path: str = 'query-stream'
        url: URL = URL(f"{self.scheme}://{self.host}:{self.port}/{path}")

        # Construct the request
        data: dict = {'sql': sql}
        headers: dict = {
            self._header_accept: 'application/vnd.ksql.v1+json',
            self._header_user_agent: f'{Client._user_agent_name} v{get_client_version()}'
        }

        # Construct the auth
        auth: BasicAuth = BasicAuth(self.api_key, self.secret)

        try:
            # Issue the request and handle the response
            async with self.client.stream(method='POST', url=url, headers=Headers(headers),
                                          json=data, auth=auth, timeout=timeout) as response:
                try:
                    response.raise_for_status()
                except httpx.RequestError as ex:
                    return completion_source.set_exception(KsqlDbClientException(
                        f"An error occurred while requesting {ex.request.url!r}."))
                except httpx.HTTPStatusError as ex:
                    return completion_source.set_exception(
                        KsqlDbClientException(f"Error response {ex.response.status_code} "
                                              f"while requesting {ex.request.url!r}."))

                # Construct the queue that will hold the rows
                row_queue: Queue = Queue()
                async for row in response.aiter_lines():
                    streamed_row: StreamedRow = \
                        StreamedRow.from_json(_canonicalize_json_string(row))
                    if streamed_row.header or streamed_row.row:
                        await _handle_streamed_row(streamed_row, row_queue, completion_source)

                # Add sentinel None after adding all rows to indicate the end
                await row_queue.put(None)
        except Exception as ex:
            exception: KsqlDbClientException = KsqlDbClientException('Unexpected Client Error',
                                                                     KsqlDbClientException(str(ex)))
            if completion_source.done():
                result: StreamedQueryResult = completion_source.result()
                result.status.set_exception(exception)
            return completion_source.set_exception(exception)

    async def close(self):
        """
        Closes the client and disposes of any resources that it uses,
        including the underlying HttpClient and its resources.
        """
        await self.client.aclose()

Classes

class Client (host: str, port: int, api_key: str = '', secret: str = '', use_tls: bool = False, timeout: float = None)

A client object for issuing commands to ksqlDB.

Constructor for the client.

Args

host
The host of the ksqlDB server
port
The port of the ksqlDB server. Usually 8088
api_key
The api_key to authenticate with the ksqlDB server
secret
The secret to authenticate with the ksqlDB server
use_tls
use TLS or not
timeout
timeout in seconds
Expand source code
class Client:
    """
    A client object for issuing commands to ksqlDB.
    """
    _header_accept: str = 'Accept'
    _header_user_agent: str = 'User-agent'
    _user_agent_name: str = 'ksqlDB Python Client'

    def __init__(self,
                 host: str,
                 port: int,
                 api_key: str = '',
                 secret: str = '',
                 use_tls: bool = False,
                 timeout: float = None):
        """
        Constructor for the client.

        Args:
            host: The host of the ksqlDB server
            port: The port of the ksqlDB server. Usually 8088
            api_key: The api_key to authenticate with the ksqlDB server
            secret: The secret to authenticate with the ksqlDB server
            use_tls: use TLS or not
            timeout: timeout in seconds
        """
        self.host = host
        self.port = port
        self.scheme = 'https' if use_tls else 'http'
        self.api_key = api_key
        self.secret = secret
        self.timeout = timeout

        self.client = httpx.AsyncClient(
            http1=False,
            http2=True,
            timeout=self.timeout)

    async def execute_query_async(self, sql: str, timeout: float = None) -> BatchedQueryResult:
        """
        Executes a query. Note that this is limited to pull and push queries and does not cover
        other statement type commands.

        Also, this is appropriate for queries which can run to completion in a batch, namely pull
        queries and push queries with limit statements. Push queries without limit statements will
        never complete.

        Args:
            sql: The sql query to run
            timeout: timeout in seconds

        Returns:
            BatchedQueryResult: Contains the results from the query executed
        """
        streamed_query_result: StreamedQueryResult = await self.stream_query_async(sql, timeout)
        query_id: str = streamed_query_result.query_id
        schema: Schema = streamed_query_result.schema
        rows: List[Row] = []
        row = await streamed_query_result.row_queue.get()
        while row is not None:
            rows.append(row)
            row = await streamed_query_result.row_queue.get()
        return BatchedQueryResult(query_id=query_id, schema=schema, rows=rows)

    async def stream_query_async(self, sql: str, timeout: float = None) -> StreamedQueryResult:
        """
        Executes a query. Note that this is limited to pull and push queries and does not cover
        other statement type commands. This is appropriate for queries which may not run to
        immediate completion, so this works well for both pull and push queries.

        Args:
            sql: The sql query to run
            timeout: timeout in seconds

        Returns:
            StreamedQueryResult: Contains the results from the query executed
        """
        # The Future that indicates if the query result is available or not.
        # The Future will be set once we receive the Header of the query response.
        completion_source: Future = asyncio.Future()
        asyncio.create_task(self._execute_request(sql, completion_source, timeout))
        await completion_source
        return completion_source.result()

    # Executes the given sql
    async def _execute_request(self,
                               sql: str,
                               completion_source: Future,
                               timeout: float = None):
        # Build the URL we'll use for the request.
        path: str = 'query-stream'
        url: URL = URL(f"{self.scheme}://{self.host}:{self.port}/{path}")

        # Construct the request
        data: dict = {'sql': sql}
        headers: dict = {
            self._header_accept: 'application/vnd.ksql.v1+json',
            self._header_user_agent: f'{Client._user_agent_name} v{get_client_version()}'
        }

        # Construct the auth
        auth: BasicAuth = BasicAuth(self.api_key, self.secret)

        try:
            # Issue the request and handle the response
            async with self.client.stream(method='POST', url=url, headers=Headers(headers),
                                          json=data, auth=auth, timeout=timeout) as response:
                try:
                    response.raise_for_status()
                except httpx.RequestError as ex:
                    return completion_source.set_exception(KsqlDbClientException(
                        f"An error occurred while requesting {ex.request.url!r}."))
                except httpx.HTTPStatusError as ex:
                    return completion_source.set_exception(
                        KsqlDbClientException(f"Error response {ex.response.status_code} "
                                              f"while requesting {ex.request.url!r}."))

                # Construct the queue that will hold the rows
                row_queue: Queue = Queue()
                async for row in response.aiter_lines():
                    streamed_row: StreamedRow = \
                        StreamedRow.from_json(_canonicalize_json_string(row))
                    if streamed_row.header or streamed_row.row:
                        await _handle_streamed_row(streamed_row, row_queue, completion_source)

                # Add sentinel None after adding all rows to indicate the end
                await row_queue.put(None)
        except Exception as ex:
            exception: KsqlDbClientException = KsqlDbClientException('Unexpected Client Error',
                                                                     KsqlDbClientException(str(ex)))
            if completion_source.done():
                result: StreamedQueryResult = completion_source.result()
                result.status.set_exception(exception)
            return completion_source.set_exception(exception)

    async def close(self):
        """
        Closes the client and disposes of any resources that it uses,
        including the underlying HttpClient and its resources.
        """
        await self.client.aclose()

Methods

async def close(self)

Closes the client and disposes of any resources that it uses, including the underlying HttpClient and its resources.

Expand source code
async def close(self):
    """
    Closes the client and disposes of any resources that it uses,
    including the underlying HttpClient and its resources.
    """
    await self.client.aclose()
async def execute_query_async(self, sql: str, timeout: float = None) ‑> BatchedQueryResult

Executes a query. Note that this is limited to pull and push queries and does not cover other statement type commands.

Also, this is appropriate for queries which can run to completion in a batch, namely pull queries and push queries with limit statements. Push queries without limit statements will never complete.

Args

sql
The sql query to run
timeout
timeout in seconds

Returns

BatchedQueryResult
Contains the results from the query executed
Expand source code
async def execute_query_async(self, sql: str, timeout: float = None) -> BatchedQueryResult:
    """
    Executes a query. Note that this is limited to pull and push queries and does not cover
    other statement type commands.

    Also, this is appropriate for queries which can run to completion in a batch, namely pull
    queries and push queries with limit statements. Push queries without limit statements will
    never complete.

    Args:
        sql: The sql query to run
        timeout: timeout in seconds

    Returns:
        BatchedQueryResult: Contains the results from the query executed
    """
    streamed_query_result: StreamedQueryResult = await self.stream_query_async(sql, timeout)
    query_id: str = streamed_query_result.query_id
    schema: Schema = streamed_query_result.schema
    rows: List[Row] = []
    row = await streamed_query_result.row_queue.get()
    while row is not None:
        rows.append(row)
        row = await streamed_query_result.row_queue.get()
    return BatchedQueryResult(query_id=query_id, schema=schema, rows=rows)
async def stream_query_async(self, sql: str, timeout: float = None) ‑> StreamedQueryResult

Executes a query. Note that this is limited to pull and push queries and does not cover other statement type commands. This is appropriate for queries which may not run to immediate completion, so this works well for both pull and push queries.

Args

sql
The sql query to run
timeout
timeout in seconds

Returns

StreamedQueryResult
Contains the results from the query executed
Expand source code
async def stream_query_async(self, sql: str, timeout: float = None) -> StreamedQueryResult:
    """
    Executes a query. Note that this is limited to pull and push queries and does not cover
    other statement type commands. This is appropriate for queries which may not run to
    immediate completion, so this works well for both pull and push queries.

    Args:
        sql: The sql query to run
        timeout: timeout in seconds

    Returns:
        StreamedQueryResult: Contains the results from the query executed
    """
    # The Future that indicates if the query result is available or not.
    # The Future will be set once we receive the Header of the query response.
    completion_source: Future = asyncio.Future()
    asyncio.create_task(self._execute_request(sql, completion_source, timeout))
    await completion_source
    return completion_source.result()