11#include " LocalChdb.h"
2+ #include " AIQueryProcessor.h"
23#include " chdb-internal.h"
34#include " PandasDataFrameBuilder.h"
45#include " ChunkCollectorOutputFormat.h"
910#include < pybind11/pybind11.h>
1011#include < Poco/String.h>
1112#include < Common/logger_useful.h>
13+ #include < Common/quoteString.h>
1214#if USE_JEMALLOC
1315# include < Common/memory.h>
1416#endif
1517
18+ #include < iostream>
19+ #include < sstream>
20+ #include < stdexcept>
21+
1622namespace py = pybind11;
1723
1824extern bool inside_main = true ;
@@ -94,6 +100,27 @@ memoryview_wrapper * query_result::get_memview()
94100 return new memoryview_wrapper (this ->result_wrapper );
95101}
96102
103+ #if USE_CLIENT_AI
104+ namespace
105+ {
106+
107+ bool extractAIPrompt (const std::string & query, std::string & prompt_out)
108+ {
109+ auto trimmed = Poco::trimLeft (query);
110+ if (!trimmed.starts_with (" ??" ))
111+ return false ;
112+
113+ auto prompt = Poco::trimLeft (trimmed.substr (2 ));
114+ if (prompt.empty ())
115+ throw std::runtime_error (" Please provide a natural language query after ??" );
116+
117+ prompt_out = prompt;
118+ return true ;
119+ }
120+
121+ }
122+ #endif
123+
97124
98125// Parse SQLite-style connection string
99126std::pair<std::string, std::map<std::string, std::string>> connection_wrapper::parse_connection_string (const std::string & conn_str)
@@ -220,6 +247,27 @@ connection_wrapper::build_clickhouse_args(const std::string & path, const std::m
220247 return argv;
221248}
222249
250+ std::string connection_wrapper::preprocessQuery (const std::string & query_str)
251+ {
252+ #if USE_CLIENT_AI
253+ try
254+ {
255+ if (!ai_processor)
256+ ai_processor = std::make_unique<AIQueryProcessor>(conn);
257+ return ai_processor->preprocess (query_str);
258+ }
259+ catch (const std::exception & e)
260+ {
261+ throw std::runtime_error (std::string (" AI SQL generation failed: " ) + e.what ());
262+ }
263+ #else
264+ auto trimmed = Poco::trimLeft (query_str);
265+ if (trimmed.starts_with (" ??" ))
266+ throw std::runtime_error (" AI SQL generation is not available in this build. Rebuild with USE_CLIENT_AI enabled." );
267+ return query_str;
268+ #endif
269+ }
270+
223271connection_wrapper::connection_wrapper (const std::string & conn_str)
224272{
225273 auto [path, params] = parse_connection_string (conn_str);
@@ -263,15 +311,37 @@ void connection_wrapper::commit()
263311 // do nothing
264312}
265313
314+ static bool isAIGenSqlQuery (const std::string & original, const std::string & processed)
315+ {
316+ #if USE_CLIENT_AI
317+ auto trimmed = Poco::trimLeft (original);
318+ return trimmed.starts_with (" ??" ) && processed != original;
319+ #else
320+ return false ;
321+ #endif
322+ }
323+
266324query_result * connection_wrapper::query (const std::string & query_str, const std::string & format)
267325{
268326 if (Poco::toLower (format) == " dataframe" )
269327 throw std::runtime_error (" Unsupported output format dataframe, please use 'query_df' function" );
270328
271- CHDB::cachePythonTablesFromQuery (reinterpret_cast <chdb_conn *>(*conn), query_str);
329+ auto query = preprocessQuery (query_str);
330+
331+ #if USE_CLIENT_AI
332+ if (isAIGenSqlQuery (query_str, query))
333+ {
334+ // Return generated SQL as plain text without executing
335+ auto sql_literal = " SELECT " + DB::quoteString (query) + " AS query FORMAT Raw" ;
336+ auto * result = chdb_query_n (*conn, sql_literal.data (), sql_literal.size (), " Raw" , 3 );
337+ return new query_result (result, false );
338+ }
339+ #endif
340+
341+ CHDB::cachePythonTablesFromQuery (reinterpret_cast <chdb_conn *>(*conn), query);
272342 py::gil_scoped_release release;
273343
274- auto * result = chdb_query_n (*conn, query_str .data (), query_str .size (), format.data (), format.size ());
344+ auto * result = chdb_query_n (*conn, query .data (), query .size (), format.data (), format.size ());
275345
276346 const auto & error_msg = CHDB::chdb_result_error_string (result);
277347 if (!error_msg.empty ())
@@ -291,12 +361,14 @@ py::object connection_wrapper::query_df(const std::string & query_str)
291361 chdb_result * result = nullptr ;
292362 CHDB::ChunkQueryResult * chunk_result = nullptr ;
293363
294- CHDB::cachePythonTablesFromQuery (reinterpret_cast <chdb_conn *>(*conn), query_str);
364+ auto query = preprocessQuery (query_str);
365+
366+ CHDB::cachePythonTablesFromQuery (reinterpret_cast <chdb_conn *>(*conn), query);
295367
296368 {
297369 py::gil_scoped_release release;
298370
299- result = chdb_query_n (*conn, query_str .data (), query_str .size (), format.data (), format.size ());
371+ result = chdb_query_n (*conn, query .data (), query .size (), format.data (), format.size ());
300372
301373 const auto & error_msg = CHDB::chdb_result_error_string (result);
302374 if (!error_msg.empty ())
@@ -319,9 +391,10 @@ py::object connection_wrapper::query_df(const std::string & query_str)
319391
320392streaming_query_result * connection_wrapper::send_query (const std::string & query_str, const std::string & format)
321393{
322- CHDB::cachePythonTablesFromQuery (reinterpret_cast <chdb_conn *>(*conn), query_str);
394+ auto query = preprocessQuery (query_str);
395+ CHDB::cachePythonTablesFromQuery (reinterpret_cast <chdb_conn *>(*conn), query);
323396 py::gil_scoped_release release;
324- auto * result = chdb_stream_query_n (*conn, query_str .data (), query_str .size (), format.data (), format.size ());
397+ auto * result = chdb_stream_query_n (*conn, query .data (), query .size (), format.data (), format.size ());
325398 const auto & error_msg = CHDB::chdb_result_error_string (result);
326399 if (!error_msg.empty ())
327400 {
@@ -397,10 +470,11 @@ void connection_wrapper::streaming_cancel_query(streaming_query_result * streami
397470void cursor_wrapper::execute (const std::string & query_str)
398471{
399472 release_result ();
400- CHDB::cachePythonTablesFromQuery (reinterpret_cast <chdb_conn *>(conn->get_conn ()), query_str);
473+ auto query = conn->preprocessQuery (query_str);
474+ CHDB::cachePythonTablesFromQuery (reinterpret_cast <chdb_conn *>(conn->get_conn ()), query);
401475 // Use JSONCompactEachRowWithNamesAndTypes format for better type support
402476 py::gil_scoped_release release;
403- current_result = chdb_query_n (conn->get_conn (), query_str .data (), query_str .size (), CURSOR_DEFAULT_FORMAT, CURSOR_DEFAULT_FORMAT_LEN);
477+ current_result = chdb_query_n (conn->get_conn (), query .data (), query .size (), CURSOR_DEFAULT_FORMAT, CURSOR_DEFAULT_FORMAT_LEN);
404478}
405479
406480
0 commit comments