71 examples
SQL injection
Malicious input injected into database queries, posing security risks.
[ FAQ1 ]
What is SQL injection?
SQL injection occurs when untrusted input provided by a user is directly incorporated into SQL queries without proper sanitization or validation. Attackers exploit this vulnerability by entering specially crafted SQL code into input fields, manipulating the database logic to execute unintended commands. Potential outcomes include unauthorized access, data leaks, database corruption, or even server compromise. SQL injection vulnerabilities often result from inadequate input validation and insecure query construction methods, making them one of the most common and dangerous security risks.
[ FAQ2 ]
How to prevent SQL injection
To prevent SQL injection, always use parameterized queries or prepared statements in your database interactions. These techniques clearly separate user inputs from executable SQL commands, eliminating the possibility of malicious input being executed as SQL. Implement robust input validation to enforce strict input rules, reducing the risk of malicious input. Follow OWASP best practices by consistently sanitizing and encoding input data before processing it. Additionally, maintain regular security audits, update software components, and train developers on secure coding standards to proactively protect your databases against SQL injection attacks.
diff block
text("DROP SCHEMA IF EXISTS %(schema_name)s CASCADE"),
{"schema_name": tenant_id},
)
+
+
+def get_current_alembic_version(tenant_id: str) -> str:
+ """Get the current Alembic version for a tenant."""
+ from alembic.runtime.migration import MigrationContext
+ from sqlalchemy import text
+
+ engine = get_sqlalchemy_engine()
+
+ # Set the search path to the tenant's schema
+ with engine.connect() as connection:
+ connection.execute(text(f"SET search_path TO {tenant_id}"))
greptile
logic: SQL injection vulnerability: tenant_id is directly interpolated into SQL string. Use parameterized query instead.
suggested fix
+ connection.execute(text("SET search_path TO :schema_name"), {"schema_name": tenant_id})
diff block
]
+class ExclusionFilter(filters.BaseFilterBackend):
+ """
+ We often wish to exclude patterns 'posthog-s3-datawarehouse' in their URL pattern.
+ This insures that only externally added tables are returned by this api
+ """
+
+ def filter_queryset(self, request, queryset, view):
+ exclusion_pattern = request.query_params.get("exclusion_pattern", None)
+ if exclusion_pattern:
+ queryset = queryset.exclude(url_pattern__startswith=exclusion_pattern)
+ return queryset
greptile
style: Consider adding input validation for exclusion_pattern to prevent SQL injection or other malicious patterns
diff block
+/*
+ * This file is part of WebGoat, an Open Web Application Security Project utility. For details, please see http://www.owasp.org/
+ *
+ * Copyright (c) 2002 - 2019 Bruce Mayhew
+ *
+ * This program is free software; you can redistribute it and/or modify it under the terms of the
+ * GNU General Public License as published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without
+ * even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along with this program; if
+ * not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
+ * 02111-1307, USA.
+ *
+ * Getting Source ==============
+ *
+ * Source for this application is maintained at https://github.com/WebGoat/WebGoat, a repository for free software projects.
+ */
+
+package org.owasp.webgoat.lessons.sqlinjection.introduction;
+
+import static org.hamcrest.CoreMatchers.containsString;
+import static org.hamcrest.CoreMatchers.is;
+import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.jsonPath;
+import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.status;
+
+import org.junit.jupiter.api.Disabled;
+import org.junit.jupiter.api.Test;
+import org.owasp.webgoat.container.plugins.LessonTest;
+import org.springframework.test.web.servlet.request.MockMvcRequestBuilders;
+
+public class SqlInjectionLesson5aTest extends LessonTest {
+
+ @Test
+ public void knownAccountShouldDisplayData() throws Exception {
+ mockMvc
+ .perform(
+ MockMvcRequestBuilders.post("/SqlInjection/assignment5a")
+ .param("account", "Smith")
+ .param("operator", "")
+ .param("injection", ""))
+ .andExpect(status().isOk())
+ .andExpect(jsonPath("lessonCompleted", is(false)))
+ .andExpect(jsonPath("$.feedback", is(messages.getMessage("assignment.not.solved"))))
+ .andExpect(jsonPath("$.output", containsString("<p>USERID, FIRST_NAME")));
+ }
+
+ @Disabled
+ @Test
+ public void unknownAccount() throws Exception {
+ mockMvc
+ .perform(
+ MockMvcRequestBuilders.post("/SqlInjection/assignment5a")
+ .param("account", "Smith")
+ .param("operator", "")
+ .param("injection", ""))
+ .andExpect(status().isOk())
+ .andExpect(jsonPath("lessonCompleted", is(false)))
+ .andExpect(jsonPath("$.feedback", is(messages.getMessage("NoResultsMatched"))))
+ .andExpect(jsonPath("$.output").doesNotExist());
+ }
+
+ @Test
+ public void sqlInjection() throws Exception {
+ mockMvc
+ .perform(
+ MockMvcRequestBuilders.post("/SqlInjection/assignment5a")
+ .param("account", "'")
+ .param("operator", "OR")
+ .param("injection", "'1' = '1"))
+ .andExpect(status().isOk())
+ .andExpect(jsonPath("lessonCompleted", is(true)))
+ .andExpect(jsonPath("$.feedback", containsString("You have succeed")))
+ .andExpect(jsonPath("$.output").exists());
+ }
greptile
style: Test verifies successful SQL injection without checking the actual injected query result, only its existence
diff block
dataset_name: &String,
schema_name: &String,
credentials: &Credential,
+ database: Option<String>,
) -> Result<Vec<DatasetColumnRecord>> {
- let cols = match credentials {
+ let cols_result = match credentials {
Credential::Postgres(credentials) => {
- match get_postgres_columns(dataset_name, schema_name, credentials).await {
+ match get_postgres_columns_batch(
+ &[(dataset_name.clone(), schema_name.clone())],
+ credentials,
+ )
+ .await
+ {
Ok(cols) => cols,
Err(e) => return Err(e),
}
}
Credential::MySQL(credentials) => {
- match get_mysql_columns(dataset_name, credentials).await {
+ match get_mysql_columns_batch(
+ &[(dataset_name.clone(), schema_name.clone())],
+ credentials,
+ )
+ .await
+ {
Ok(cols) => cols,
Err(e) => return Err(e),
}
}
Credential::Bigquery(credentials) => {
- match get_bigquery_columns(dataset_name, credentials).await {
+ match get_bigquery_columns_batch(
+ &[(dataset_name.clone(), schema_name.clone())],
+ credentials,
+ )
+ .await
+ {
Ok(cols) => cols,
Err(e) => return Err(e),
}
}
Credential::Snowflake(credentials) => {
- match get_snowflake_columns(dataset_name, credentials).await {
+ match get_snowflake_columns_batch(
+ &[(dataset_name.clone(), schema_name.clone())],
+ credentials,
+ database,
+ )
+ .await
+ {
Ok(cols) => cols,
Err(e) => return Err(e),
}
}
_ => return Err(anyhow!("Unsupported data source type")),
};
- Ok(cols)
+ Ok(cols_result)
}
-async fn get_postgres_columns(
- dataset_name: &String,
- schema_name: &String,
+pub async fn retrieve_dataset_columns_batch(
+ datasets: &[(String, String)], // Vec of (dataset_name, schema_name)
+ credentials: &Credential,
+ database: Option<String>,
+) -> Result<Vec<DatasetColumnRecord>> {
+ match credentials {
+ Credential::Postgres(credentials) => {
+ get_postgres_columns_batch(datasets, credentials).await
+ }
+ Credential::MySQL(credentials) => get_mysql_columns_batch(datasets, credentials).await,
+ Credential::Bigquery(credentials) => {
+ get_bigquery_columns_batch(datasets, credentials).await
+ }
+ Credential::Snowflake(credentials) => {
+ get_snowflake_columns_batch(datasets, credentials, database).await
+ }
+ _ => Err(anyhow!("Unsupported data source type")),
+ }
+}
+
+async fn get_snowflake_columns_batch(
+ datasets: &[(String, String)],
+ credentials: &SnowflakeCredentials,
+ database: Option<String>,
+) -> Result<Vec<DatasetColumnRecord>> {
+ let snowflake_client = get_snowflake_client(credentials).await?;
+
+ // Build the IN clause for (schema, table) pairs
+ let table_pairs: Vec<String> = datasets
+ .iter()
+ .map(|(table, schema)| format!("('{}', '{}')", schema.to_uppercase(), table.to_uppercase()))
+ .collect();
greptile
logic: SQL injection vulnerability in table_pairs formatting. Use parameterized queries or proper escaping for schema and table names
diff block
+/*
+ * This file is part of WebGoat, an Open Web Application Security Project utility. For details, please see http://www.owasp.org/
+ *
+ * Copyright (c) 2002 - 2019 Bruce Mayhew
+ *
+ * This program is free software; you can redistribute it and/or modify it under the terms of the
+ * GNU General Public License as published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without
+ * even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along with this program; if
+ * not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
+ * 02111-1307, USA.
+ *
+ * Getting Source ==============
+ *
+ * Source for this application is maintained at https://github.com/WebGoat/WebGoat, a repository for free software projects.
+ */
+
+package org.owasp.webgoat.lessons.sqlinjection.introduction;
+
+import static java.sql.ResultSet.CONCUR_UPDATABLE;
+import static java.sql.ResultSet.TYPE_SCROLL_SENSITIVE;
+import static org.owasp.webgoat.container.assignments.AttackResultBuilder.failed;
+import static org.owasp.webgoat.container.assignments.AttackResultBuilder.success;
+
+import java.sql.*;
+import java.text.SimpleDateFormat;
+import java.util.Calendar;
+import org.owasp.webgoat.container.LessonDataSource;
+import org.owasp.webgoat.container.assignments.AssignmentEndpoint;
+import org.owasp.webgoat.container.assignments.AssignmentHints;
+import org.owasp.webgoat.container.assignments.AttackResult;
+import org.springframework.web.bind.annotation.PostMapping;
+import org.springframework.web.bind.annotation.RequestParam;
+import org.springframework.web.bind.annotation.ResponseBody;
+import org.springframework.web.bind.annotation.RestController;
+
+@RestController
+@AssignmentHints(
+ value = {
+ "SqlStringInjectionHint.8.1",
+ "SqlStringInjectionHint.8.2",
+ "SqlStringInjectionHint.8.3",
+ "SqlStringInjectionHint.8.4",
+ "SqlStringInjectionHint.8.5"
+ })
+public class SqlInjectionLesson8 implements AssignmentEndpoint {
+
+ private final LessonDataSource dataSource;
+
+ public SqlInjectionLesson8(LessonDataSource dataSource) {
+ this.dataSource = dataSource;
+ }
+
+ @PostMapping("/SqlInjection/attack8")
+ @ResponseBody
+ public AttackResult completed(@RequestParam String name, @RequestParam String auth_tan) {
+ return injectableQueryConfidentiality(name, auth_tan);
+ }
+
+ protected AttackResult injectableQueryConfidentiality(String name, String auth_tan) {
+ StringBuilder output = new StringBuilder();
+ String query =
+ "SELECT * FROM employees WHERE last_name = '"
+ + name
+ + "' AND auth_tan = '"
+ + auth_tan
+ + "'";
+
+ try (Connection connection = dataSource.getConnection()) {
+ try {
+ Statement statement =
+ connection.createStatement(
+ ResultSet.TYPE_SCROLL_INSENSITIVE, ResultSet.CONCUR_UPDATABLE);
+ log(connection, query);
+ ResultSet results = statement.executeQuery(query);
+
+ if (results.getStatement() != null) {
+ if (results.first()) {
+ output.append(generateTable(results));
+ results.last();
+
+ if (results.getRow() > 1) {
+ // more than one record, the user succeeded
+ return success(this)
+ .feedback("sql-injection.8.success")
+ .output(output.toString())
+ .build();
+ } else {
+ // only one record
+ return failed(this).feedback("sql-injection.8.one").output(output.toString()).build();
+ }
+
+ } else {
+ // no results
+ return failed(this).feedback("sql-injection.8.no.results").build();
+ }
+ } else {
+ return failed(this).build();
+ }
+ } catch (SQLException e) {
+ return failed(this)
+ .output("<br><span class='feedback-negative'>" + e.getMessage() + "</span>")
+ .build();
+ }
+
+ } catch (Exception e) {
+ return failed(this)
+ .output("<br><span class='feedback-negative'>" + e.getMessage() + "</span>")
+ .build();
+ }
+ }
+
+ public static String generateTable(ResultSet results) throws SQLException {
+ ResultSetMetaData resultsMetaData = results.getMetaData();
+ int numColumns = resultsMetaData.getColumnCount();
+ results.beforeFirst();
+ StringBuilder table = new StringBuilder();
+ table.append("<table>");
+
+ if (results.next()) {
+ table.append("<tr>");
+ for (int i = 1; i < (numColumns + 1); i++) {
+ table.append("<th>" + resultsMetaData.getColumnName(i) + "</th>");
+ }
+ table.append("</tr>");
+
+ results.beforeFirst();
+ while (results.next()) {
+ table.append("<tr>");
+ for (int i = 1; i < (numColumns + 1); i++) {
+ table.append("<td>" + results.getString(i) + "</td>");
+ }
+ table.append("</tr>");
+ }
+
+ } else {
+ table.append("Query Successful; however no data was returned from this query.");
+ }
+
+ table.append("</table>");
+ return (table.toString());
+ }
+
+ public static void log(Connection connection, String action) {
+ action = action.replace('\'', '"');
+ Calendar cal = Calendar.getInstance();
+ SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+ String time = sdf.format(cal.getTime());
+
+ String logQuery =
+ "INSERT INTO access_log (time, action) VALUES ('" + time + "', '" + action + "')";
greptile
logic: Log query is vulnerable to SQL injection despite quote replacement. Use PreparedStatement here as well.
suggested fix
+ String logQuery = "INSERT INTO access_log (time, action) VALUES (?, ?)";
+ PreparedStatement statement = connection.prepareStatement(logQuery, TYPE_SCROLL_SENSITIVE, CONCUR_UPDATABLE);
+ statement.setString(1, time);
+ statement.setString(2, action);
diff block
+/*
+ * Copyright 2025 Clidey, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
package mysql
import (
- "errors"
"fmt"
- "strings"
-
"github.com/clidey/whodb/core/src/engine"
+ "strings"
)
-func (p *MySQLPlugin) AddStorageUnit(config *engine.PluginConfig, schema string, storageUnit string, fields map[string]string) (bool, error) {
- db, err := DB(config)
- if err != nil {
- return false, err
- }
-
- sqlDb, err := db.DB()
- if err != nil {
- return false, err
- }
- defer sqlDb.Close()
-
- if len(fields) == 0 {
- return false, errors.New("no fields provided for table creation")
- }
-
- var columns []string
- for field, fieldType := range fields {
- columns = append(columns, fmt.Sprintf("`%s` %s", field, fieldType))
- }
-
- createTableSQL := fmt.Sprintf("CREATE TABLE `%s`.`%s` (%s);", schema, storageUnit, strings.Join(columns, ", "))
-
- if err := db.Exec(createTableSQL).Error; err != nil {
- return false, err
- }
-
- return true, nil
-}
-func (p *MySQLPlugin) AddRow(config *engine.PluginConfig, schema string, storageUnit string, values []engine.Record) (bool, error) {
- db, err := DB(config)
- if err != nil {
- return false, err
- }
-
- sqlDb, err := db.DB()
- if err != nil {
- return false, err
- }
- defer sqlDb.Close()
-
- if len(values) == 0 {
- return false, errors.New("no values provided to insert into the table")
- }
-
- columns := make([]string, 0, len(values))
- placeholders := make([]string, 0, len(values))
- args := make([]interface{}, 0, len(values))
-
- for _, value := range values {
- columns = append(columns, fmt.Sprintf("`%s`", value.Key))
- if value.Extra["Config"] == "sql" {
- placeholders = append(placeholders, value.Value)
+func (p *MySQLPlugin) GetCreateTableQuery(schema string, storageUnit string, columns []engine.Record) string {
+ var columnDefs []string
+ for _, column := range columns {
+ columnDef := fmt.Sprintf("%s %s", column.Key, column.Value)
greptile
logic: Column identifiers are not escaped, potentially allowing SQL injection. Use backticks: `%s` %s
```suggestion
+ columnDef := fmt.Sprintf("`%s` %s", column.Key, column.Value)
```
diff block
return databases, nil
}
-func (p *Sqlite3Plugin) GetSchema(config *engine.PluginConfig) ([]string, error) {
+func (p *Sqlite3Plugin) GetAllSchemas(config *engine.PluginConfig) ([]string, error) {
return nil, errors.ErrUnsupported
}
-func (p *Sqlite3Plugin) GetStorageUnits(config *engine.PluginConfig, schema string) ([]engine.StorageUnit, error) {
- db, err := DB(config)
- if err != nil {
- return nil, err
- }
- sqlDb, err := db.DB()
- if err != nil {
- return nil, err
- }
- defer sqlDb.Close()
-
- storageUnits := []engine.StorageUnit{}
- rows, err := db.Raw(`
+func (p *Sqlite3Plugin) GetTableInfoQuery() string {
+ return `
SELECT
name AS table_name,
type AS table_type
FROM
sqlite_master
WHERE
type='table' AND name NOT LIKE 'sqlite_%'
- `).Rows()
- if err != nil {
- return nil, err
- }
- defer rows.Close()
-
- allTablesWithColumns, err := getTableSchema(db)
- if err != nil {
- return nil, err
- }
-
- for rows.Next() {
- var tableName, tableType string
- if err := rows.Scan(&tableName, &tableType); err != nil {
- log.Fatal(err)
- }
-
- var rowCount int64
- rowCountRow := db.Raw(fmt.Sprintf("SELECT COUNT(*) FROM '%s'", tableName)).Row()
- rowCountRow.Scan(&rowCount)
-
- attributes := []engine.Record{
- {Key: "Table Type", Value: tableType},
- {Key: "Count", Value: fmt.Sprintf("%d", rowCount)},
- }
-
- attributes = append(attributes, allTablesWithColumns[tableName]...)
-
- storageUnits = append(storageUnits, engine.StorageUnit{
- Name: tableName,
- Attributes: attributes,
- })
- }
- return storageUnits, nil
+ `
}
-func getTableSchema(db *gorm.DB) (map[string][]engine.Record, error) {
- var tables []struct {
- TableName string `gorm:"column:table_name"`
+func (p *Sqlite3Plugin) GetTableNameAndAttributes(rows *sql.Rows, db *gorm.DB) (string, []engine.Record) {
+ var tableName, tableType string
+ if err := rows.Scan(&tableName, &tableType); err != nil {
+ log.Fatal(err)
}
- query := `
- SELECT name AS table_name
- FROM sqlite_master
- WHERE type='table'
- `
- if err := db.Raw(query).Scan(&tables).Error; err != nil {
- return nil, err
+ var rowCount int64
+ rowCountRow := db.Raw(fmt.Sprintf("SELECT COUNT(*) FROM '%s'", tableName)).Row()
greptile
logic: potential SQL injection vulnerability in table name formatting. Use parameterized query instead
diff block
Ok(())
}
+ /// Keeps the trigger alive
+ async fn keep_alive(&self, ids: &[i64], alert_timeout: i64, report_timeout: i64) -> Result<()> {
+ let now = now_micros();
+ let report_max_time = now
+ + Duration::try_seconds(report_timeout)
+ .unwrap()
+ .num_microseconds()
+ .unwrap();
+ let alert_max_time = now
+ + Duration::try_seconds(alert_timeout)
+ .unwrap()
+ .num_microseconds()
+ .unwrap();
+
+ let sql = format!(
+ "UPDATE scheduled_jobs SET end_time = CASE WHEN module = $1 THEN $2 ELSE $3 END WHERE id IN ({});",
+ ids.iter()
+ .map(|id| id.to_string())
+ .collect::<Vec<_>>()
+ .join(",")
+ );
greptile
logic: SQL injection vulnerability - the IDs array is directly interpolated into the SQL string. Should use parameterized queries with multiple bindings instead of string formatting.
diff block
Ok(())
}
+ /// Keeps the trigger alive
+ async fn keep_alive(&self, ids: &[i64], alert_timeout: i64, report_timeout: i64) -> Result<()> {
+ let now = now_micros();
+ let report_max_time = now
+ + Duration::try_seconds(report_timeout)
+ .unwrap()
+ .num_microseconds()
+ .unwrap();
+ let alert_max_time = now
+ + Duration::try_seconds(alert_timeout)
+ .unwrap()
+ .num_microseconds()
+ .unwrap();
+
+ let sql = format!(
+ "UPDATE scheduled_jobs SET end_time = CASE WHEN module = $1 THEN $2 ELSE $3 END WHERE id IN ({});",
+ ids.iter()
+ .map(|id| id.to_string())
+ .collect::<Vec<_>>()
+ .join(",")
+ );
greptile
logic: Potential SQL injection risk - the ids are being directly interpolated into the SQL string. Use parameterized queries with `ANY` or `UNNEST` instead.
diff block
if let Some(when_to_use) = &dataset.when_to_use {
ddl.push_str(&format!(" -- Description: {}\n", when_to_use));
}
+
+ let schema_identifier = if let Some(db_id) = &dataset.database_identifier {
+ format!("{}.{}", db_id, dataset.schema)
+ } else {
+ dataset.schema.clone()
+ };
greptile
style: Consider adding validation for database_identifier to ensure it doesn't contain invalid characters or SQL injection risks
suggested fix
let schema_identifier = if let Some(db_id) = &dataset.database_identifier {
+ // Validate and escape database identifier
+ let sanitized_db = db_id.replace(|c: char| !c.is_alphanumeric() && c != '_', "");
+ format!("{}.{}", sanitized_db, dataset.schema)
} else {
dataset.schema.clone()
};
diff block
=== Injectable Stored Procedure (Microsoft SQL Server)
-------------------------------------------------------
-CREATE PROEDURE getUser(@lastName nvarchar(25))
-AS
+CREATE PROCEDURE getUser(@lastName nvarchar(25))
+AS
declare @sql nvarchar(255)
set @sql = 'SELECT * FROM users WHERE
lastname = + @LastName + '
greptile
logic: This stored procedure is vulnerable to SQL injection due to string concatenation. The @LastName parameter should be properly parameterized instead of concatenated.
suggested fix
+set @sql = 'SELECT * FROM users WHERE lastname = @LastName'
diff block
SqlStringInjectionHint1=\u041f\u0440\u0438\u043b\u043e\u0436\u0435\u043d\u0438\u0435 \u0431\u0435\u0440\u0451\u0442 \u0442\u043e \u0447\u0442\u043e \u0432\u044b \u0432\u0432\u043e\u0434\u0438\u0442\u0435 \u0438 \u0432\u0441\u0442\u0430\u0432\u043b\u044f\u0435\u0442 \u0432 \u043a\u043e\u043d\u0435\u0446 \u0437\u0430\u0440\u0430\u043d\u0435\u0435 \u0441\u0444\u043e\u0440\u043c\u0438\u0440\u043e\u0432\u0430\u043d\u043d\u043e\u0433\u043e SQL-\u0437\u0430\u043f\u0440\u043e\u0441\u0430.
SqlStringInjectionHint2=\u0412\u043e\u0442 \u043a\u043e\u0434 \u0437\u0430\u043f\u0440\u043e\u0441\u0430, \u043a\u043e\u0442\u043e\u0440\u044b\u0439 \u0441\u043e\u0441\u0442\u0430\u0432\u043b\u044f\u0435\u0442\u0441\u044f \u0438 \u0432\u044b\u043f\u043e\u043b\u043d\u044f\u0435\u0442\u0441\u044f WebGoat`\u043e\u043c:<br><br> "SELECT * FROM user_data WHERE last_name = "accountName"
SqlStringInjectionHint3=\u0426\u0435\u043b\u043e\u0441\u0442\u043d\u043e\u0441\u0442\u044c SQL-\u0437\u0430\u043f\u0440\u043e\u0441\u0430 \u043c\u043e\u0436\u043d\u043e \u043e\u0431\u0435\u0441\u043f\u0435\u0447\u0438\u0442\u044c \u043f\u0440\u043e\u0432\u0435\u0434\u044f \u043d\u0435\u0441\u043a\u043e\u043b\u044c\u043a\u043e \u043f\u0440\u043e\u0432\u0435\u0440\u043e\u043a \u0441 \u0438\u0441\u043f\u043e\u043b\u044c\u0437\u043e\u0432\u0430\u043d\u0438\u0435\u043c \u0442\u0430\u043a\u0438\u0445 \u043a\u043b\u044e\u0447\u0435\u0432\u044b\u0445 \u0441\u043b\u043e\u0432 \u043a\u0430\u043a AND \u0438 OR. \u041f\u0440\u043e\u0431\u0443\u0439\u0442\u0435 \u0441\u043e\u0441\u0442\u0430\u0432\u0438\u0442\u044c \u0442\u0430\u043a\u043e\u0435 SQL-\u0432\u044b\u0440\u0430\u0436\u0435\u043d\u0438\u0435, \u043a\u043e\u0442\u043e\u0440\u043e\u0435 \u0432\u0441\u0435\u0433\u0434\u0430 \u0431\u0443\u0434\u0435\u0442 \u0432\u043e\u0437\u0432\u0440\u0430\u0449\u0430\u0442\u044c \u0438\u0441\u0442\u0438\u043d\u0443.
-SqlStringInjectionHint4=\u041f\u043e\u043f\u0440\u043e\u0431\u0443\u0439\u0442\u0435 \u0432\u0432\u0435\u0441\u0442\u0438 [ smith' OR '1' = '1 ].
\ No newline at end of file
+SqlStringInjectionHint4=\u041f\u043e\u043f\u0440\u043e\u0431\u0443\u0439\u0442\u0435 \u0432\u0432\u0435\u0441\u0442\u0438 [ smith' OR '1' = '1 ].
greptile
style: The example SQL injection payload is shown in square brackets which may confuse users about whether the brackets are part of the payload.
suggested fix
+SqlStringInjectionHint4=\u041f\u043e\u043f\u0440\u043e\u0431\u0443\u0439\u0442\u0435 \u0432\u0432\u0435\u0441\u0442\u0438 smith' OR '1' = '1.
diff block
+/*
+ * This file is part of WebGoat, an Open Web Application Security Project utility. For details, please see http://www.owasp.org/
+ *
+ * Copyright (c) 2002 - 2019 Bruce Mayhew
+ *
+ * This program is free software; you can redistribute it and/or modify it under the terms of the
+ * GNU General Public License as published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without
+ * even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along with this program; if
+ * not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
+ * 02111-1307, USA.
+ *
+ * Getting Source ==============
+ *
+ * Source for this application is maintained at https://github.com/WebGoat/WebGoat, a repository for free software projects.
+ */
+
+package org.owasp.webgoat.lessons.sqlinjection.introduction;
+
+import static org.owasp.webgoat.container.assignments.AttackResultBuilder.failed;
+import static org.owasp.webgoat.container.assignments.AttackResultBuilder.success;
+
+import java.sql.Connection;
+import java.sql.ResultSet;
+import java.sql.SQLException;
+import java.sql.Statement;
+import org.owasp.webgoat.container.LessonDataSource;
+import org.owasp.webgoat.container.assignments.AssignmentEndpoint;
+import org.owasp.webgoat.container.assignments.AssignmentHints;
+import org.owasp.webgoat.container.assignments.AttackResult;
+import org.springframework.web.bind.annotation.PostMapping;
+import org.springframework.web.bind.annotation.RequestParam;
+import org.springframework.web.bind.annotation.ResponseBody;
+import org.springframework.web.bind.annotation.RestController;
+
+@RestController
+@AssignmentHints(
+ value = {
+ "SqlStringInjectionHint.10.1",
+ "SqlStringInjectionHint.10.2",
+ "SqlStringInjectionHint.10.3",
+ "SqlStringInjectionHint.10.4",
+ "SqlStringInjectionHint.10.5",
+ "SqlStringInjectionHint.10.6"
+ })
+public class SqlInjectionLesson10 implements AssignmentEndpoint {
+
+ private final LessonDataSource dataSource;
+
+ public SqlInjectionLesson10(LessonDataSource dataSource) {
+ this.dataSource = dataSource;
+ }
+
+ @PostMapping("/SqlInjection/attack10")
+ @ResponseBody
+ public AttackResult completed(@RequestParam String action_string) {
+ return injectableQueryAvailability(action_string);
+ }
+
+ protected AttackResult injectableQueryAvailability(String action) {
+ StringBuilder output = new StringBuilder();
+ String query = "SELECT * FROM access_log WHERE action LIKE '%" + action + "%'";
greptile
logic: Direct string concatenation of user input into SQL query creates a critical SQL injection vulnerability. Use PreparedStatement instead.
suggested fix
+ String query = "SELECT * FROM access_log WHERE action LIKE ?";
diff block
public List<Server> sort(@RequestParam String column) throws Exception {
List<Server> servers = new ArrayList<>();
- try (Connection connection = dataSource.getConnection();
- PreparedStatement preparedStatement = connection.prepareStatement("select id, hostname, ip, mac, status, description from servers where status <> 'out of order' order by " + column)) {
- ResultSet rs = preparedStatement.executeQuery();
- while (rs.next()) {
- Server server = new Server(rs.getString(1), rs.getString(2), rs.getString(3), rs.getString(4), rs.getString(5), rs.getString(6));
- servers.add(server);
+ try (var connection = dataSource.getConnection()) {
+ try (var statement = connection.prepareStatement("select id, hostname, ip, mac, status, description from SERVERS where status <> 'out of order' order by " + column)) {
greptile
logic: SQL injection vulnerability: the 'column' parameter is directly concatenated into the query without validation or parameterization. Consider using a whitelist of allowed column names or a prepared statement with parameters.
suggested fix
+ var allowedColumns = List.of("id", "hostname", "ip", "mac", "status", "description");
+ if (!allowedColumns.contains(column.toLowerCase())) {
+ throw new IllegalArgumentException("Invalid column name");
+ }
try (var statement = connection.prepareStatement("select id, hostname, ip, mac, status, description from SERVERS where status <> 'out of order' order by " + column)) {
diff block
+import { Prisma } from '@prisma/client';
+
+import { NodesUserUsageHistoryEntity } from '@modules/nodes-user-usage-history/entities/nodes-user-usage-history.entity';
+
+export class BulkUpsertHistoryEntryBuilder {
+ public query: Prisma.Sql;
+
+ constructor(usageHistoryList: NodesUserUsageHistoryEntity[]) {
+ this.query = this.getQuery(usageHistoryList);
+ return this;
+ }
+
+ public getQuery(usageHistoryList: NodesUserUsageHistoryEntity[]): Prisma.Sql {
+ const date = new Date(new Date().setMinutes(0, 0, 0));
+
+ const query = `
+ INSERT INTO nodes_user_usage_history (
+ "node_uuid",
+ "user_uuid",
+ "download_bytes",
+ "upload_bytes",
+ "total_bytes",
+ "created_at",
+ "updated_at"
+ ) VALUES ${usageHistoryList.map((usageHistory) => `('${usageHistory.nodeUuid}', '${usageHistory.userUuid}', ${usageHistory.downloadBytes}, ${usageHistory.uploadBytes}, ${usageHistory.totalBytes}, '${date.toISOString()}', NOW())`).join(',')}
greptile
logic: Direct string interpolation of user input creates SQL injection vulnerability. Use Prisma's parameterized queries instead.
diff block
*self._get_metric_time_window(left=ast.Field(chain=["events", "timestamp"])),
event_filter,
*self._get_test_accounts_filter(),
- *self._get_metric_property_filters(),
],
),
)
+ case ExperimentFunnelMetricConfig() as metric_config:
+ return ast.SelectQuery(
+ select=[
+ ast.Field(chain=["events", "timestamp"]),
+ ast.Alias(alias="entity_id", expr=ast.Field(chain=["events", "person_id"])),
+ ast.Field(chain=["exposure_data", "variant"]),
+ ast.Field(chain=["events", "event"]),
+ ],
+ select_from=ast.JoinExpr(
+ table=ast.Field(chain=["events"]),
+ next_join=ast.JoinExpr(
+ table=exposure_query,
+ join_type="INNER JOIN",
+ alias="exposure_data",
+ constraint=ast.JoinConstraint(
+ expr=ast.CompareOperation(
+ left=ast.Field(chain=["events", "person_id"]),
+ right=ast.Field(chain=["exposure_data", "entity_id"]),
+ op=ast.CompareOperationOp.Eq,
+ ),
+ constraint_type="ON",
+ ),
+ ),
+ ),
+ where=ast.And(
+ exprs=[
+ *self._get_metric_time_window(left=ast.Field(chain=["events", "timestamp"])),
+ *self._get_test_accounts_filter(),
+ self._funnel_steps_to_filter(metric_config.funnel),
+ ],
+ ),
+ )
+
+ case _:
+ raise ValueError(f"Unsupported metric config: {self.metric.metric_config}")
+
+ def _funnel_steps_to_window_funnel_expr(self, funnel_config: ExperimentFunnelMetricConfig) -> ast.Expr:
+ """
+ Returns the expression for the window funnel. The expression returns 1 if the user completed the whole funnel, 0 if they didn't.
+ """
+ # TODO: get conversion time window from funnel config
+ num_steps = len(funnel_config.funnel)
+ conversion_time_window = 6048000000000000
+ funnel_steps_str = ", ".join(
+ [f"event = '{step.event}'" for step in sorted(funnel_config.funnel, key=lambda x: x.order)]
+ )
greptile
logic: SQL injection vulnerability - funnel step events need to be properly escaped
suggested fix
funnel_steps_str = ", ".join(
+ [f"event = {ast.Constant(value=step.event).sql()}" for step in sorted(funnel_config.funnel, key=lambda x: x.order)]
)
diff block
}
}
+ // Add HAVING clause if y-axis has operator and value
+ const valueForMapsFields =
+ dashboardPanelData.data.queries[
+ dashboardPanelData.layout.currentQueryIndex
+ ].fields.value_for_maps;
+
+ const havingClauses: any = [];
+
+ if (
+ valueForMapsFields?.havingConditions?.[0]?.operator &&
+ valueForMapsFields?.havingConditions?.[0]?.value !== undefined &&
+ valueForMapsFields?.havingConditions?.[0]?.value !== null
+ ) {
+ const columnName = valueForMapsFields.alias;
+ havingClauses.push(
+ `${columnName} ${valueForMapsFields.havingConditions[0].operator} ${valueForMapsFields.havingConditions[0].value}`,
+ );
+ }
greptile
logic: Potential SQL injection risk - value from havingConditions should be sanitized/escaped before being used in query
diff block
assert response.status_code == 200
assert res_json['data'][0][0] == str(pid)
+def test_compare_valid():
+ Tests.import_demo_data()
+
+ response = requests.get(f"{API_URL}/v1/compare?ids={RUN_3},{RUN_1}", timeout=15)
+ res_json = response.json()
+ assert response.status_code == 200
+
+ with open(f"{CURRENT_DIR}/../data/json/compare-{RUN_3},{RUN_1}.json", 'r', encoding='utf-8') as file:
+ data = json.load(file)
+
+ assert res_json['data'] == data
+
+def test_compare_fails():
+ Tests.import_demo_data()
+
+ DB().query(f"UPDATE runs SET commit_hash = 'test' WHERE id = '{RUN_1}' ")
greptile
logic: SQL injection vulnerability in f-string. Use parameterized query instead.
suggested fix
+ DB().query("UPDATE runs SET commit_hash = 'test' WHERE id = %s", (RUN_1,))
diff block
+import { Prisma } from '@prisma/client';
+
+export class RemoveInboundFromUsersBuilder {
+ public query: Prisma.Sql;
+
+ constructor(inboundUuid: string) {
+ this.query = this.getQuery(inboundUuid);
+ return this;
+ }
+
+ public getQuery(inboundUuid: string): Prisma.Sql {
+ const query = `
+DELETE FROM "public"."active_user_inbounds"
+WHERE "inbound_uuid" = '${inboundUuid}';
+ `;
greptile
logic: SQL injection risk: inboundUuid is interpolated directly. Use parameterized queries (e.g., Prisma.sql with bound parameters) for safer handling.
```suggestion
public getQuery(inboundUuid: string): Prisma.Sql {
+ const query = Prisma.sql`
DELETE FROM "public"."active_user_inbounds"
+WHERE "inbound_uuid" = ${inboundUuid};
`;
```
diff block
+import datetime as dt
+from typing import Any
+from posthog.clickhouse.client import sync_execute
+
+
+def get_sampled_session_ids(
+ started_after: dt.datetime,
+ started_before: dt.datetime,
+ sample_size: int,
+) -> list[tuple[str, int]]: # [(session_id, team_id), ...]
+ """Get a random sample of session IDs from the specified time range."""
+ query = """
+ SELECT DISTINCT session_id, team_id
+ FROM session_replay_events
+ WHERE min_first_timestamp >= %(started_after)s
+ AND max_last_timestamp <= %(started_before)s
+ ORDER BY rand() -- Random sampling
+ LIMIT %(sample_size)s
+ """
+
+ results = sync_execute(
+ query,
+ {
+ "started_after": started_after.strftime("%Y-%m-%d %H:%M:%S"),
+ "started_before": started_before.strftime("%Y-%m-%d %H:%M:%S"),
+ "sample_size": sample_size,
+ },
+ )
+ return [(str(row[0]), int(row[1])) for row in results]
+
+
+def get_session_metadata(team_id: int, session_id: str, table_name: str) -> dict[str, Any]:
+ """Get metadata counts for a specific session from the specified table."""
+ query = """
+ SELECT
+ session_id,
+ team_id,
+ any(distinct_id) as distinct_id,
+ min(min_first_timestamp) as min_first_timestamp_agg,
+ max(max_last_timestamp) as max_last_timestamp_agg,
+ argMinMerge(first_url) as first_url,
+ groupUniqArrayArray(all_urls) as all_urls,
+ sum(click_count) as click_count,
+ sum(keypress_count) as keypress_count,
+ sum(mouse_activity_count) as mouse_activity_count,
+ sum(active_milliseconds) as active_milliseconds,
+ sum(console_log_count) as console_log_count,
+ sum(console_warn_count) as console_warn_count,
+ sum(console_error_count) as console_error_count,
+ sum(event_count) as event_count,
+ argMinMerge(snapshot_source) as snapshot_source,
+ argMinMerge(snapshot_library) as snapshot_library
+ FROM {table}
+ WHERE team_id = %(team_id)s
+ AND session_id = %(session_id)s
+ GROUP BY session_id, team_id
+ LIMIT 1
+ """
+ result = sync_execute(
+ query.format(table=table_name),
greptile
logic: Direct string formatting of table_name is unsafe. Use query parameters instead to prevent SQL injection.
suggested fix
+ query.replace("{table}", table_name),
diff block
try {
return await db
.replicaNode()(TableName.Users)
+ .whereRaw('lower("username") = :username', { username: username.toLowerCase() })
.where({
greptile
logic: Same SQL injection risk as above. Also, this query now returns an array instead of a single result since .first() was removed.
suggested fix
.whereRaw('lower("username") = :username', { username: username.toLowerCase() })
+ .where({
+ isGhost: false
+ })
+ .first()
diff block
+import sqlite3
+
+conn = sqlite3.connect('database.db')
+cursor = conn.cursor()
+
+# Hardcoded credentials
+USERNAME = "admin"
+PASSWORD = "password123"
+
+def login(user, pw):
+ query = f"SELECT * FROM users WHERE username = '{user}' AND password = '{pw}'" # SQL Injection risk!
greptile
logic: Critical SQL injection vulnerability. Use parameterized queries instead:
suggested fix
+ query = "SELECT * FROM users WHERE username = ? AND password = ?"
+ result = cursor.execute(query, (user, pw)).fetchall()
diff block
parser_classes = [MultiPartParser, FileUploadParser]
def safely_get_queryset(self, queryset):
- return queryset.filter(team_id=self.team.id)
+ queryset = queryset.filter(team_id=self.team.id)
+ params = self.request.GET.dict()
+ status = params.get("status")
+ order_by = params.get("order_by")
+
+ if status == "valid":
+ queryset = queryset.filter(storage_ptr__isnull=False)
+ elif status == "invalid":
+ queryset = queryset.filter(storage_ptr__isnull=True)
+
+ if order_by:
+ queryset = queryset.order_by(order_by)
greptile
logic: Direct use of order_by parameter without validation could allow SQL injection. Validate against allowed fields.
suggested fix
if order_by:
+ allowed_fields = ['created_at', '-created_at', 'ref', '-ref']
+ if order_by in allowed_fields:
queryset = queryset.order_by(order_by)
diff block
JOIN pg_class c ON i.indexrelid = c.oid
JOIN pg_am am ON c.relam = am.oid
JOIN pg_opclass opclass ON i.indclass[0] = opclass.oid
- WHERE c.relname = '${tableName}_vector_idx';
+ JOIN pg_namespace n ON c.relnamespace = n.oid
+ WHERE c.relname = '${indexName}_vector_idx'
+ AND n.nspname = '${this.schema || 'public'}';
greptile
logic: Use parameterized queries instead of string interpolation for indexName and schema to prevent SQL injection
suggested fix
+ WHERE c.relname = $1
+ AND n.nspname = $2;
diff block
if (filter.id) {
void qb.where(`${TableName.ProjectMembership}.id`, filter.id);
}
+ if (filter.roles && filter.roles.length > 0) {
+ void qb.whereExists((subQuery) => {
+ void subQuery
+ .select("role")
+ .from(TableName.ProjectUserMembershipRole)
+ .leftJoin(
+ TableName.ProjectRoles,
+ `${TableName.ProjectRoles}.id`,
+ `${TableName.ProjectUserMembershipRole}.customRoleId`
+ )
+ .whereRaw(
+ `"${TableName.ProjectUserMembershipRole}"."projectMembershipId" = "${TableName.ProjectMembership}"."id"`
+ )
greptile
logic: SQL injection risk: Using raw SQL with string interpolation. Use parameterized queries instead.
suggested fix
.whereRaw(
+ '??.?? = ??.??',
+ [TableName.ProjectUserMembershipRole, 'projectMembershipId', TableName.ProjectMembership, 'id']
)
diff block
+---
+title: API Column Type Inference
+author: Gemini Assistant
+date: 2024-07-26
+status: Draft
+parent_prd: semantic_layer_refactor_overview.md
+ticket: N/A
+---
+
+# API Column Type Inference
+
+## Parent Project
+
+This is a sub-PRD of the [Semantic Layer and Deployment Refactor](semantic_layer_refactor_overview.md) project. Please refer to the parent PRD for the overall project context, goals, and implementation plan.
+
+## Problem Statement
+
+When semantic models are deployed, the `type_` for dimensions and measures might not always be explicitly specified in the YAML definition. In such cases, the API needs a robust mechanism to infer these types by querying the actual data source. This ensures that the `DatasetColumn` entries in the database have accurate type information, which is crucial for query generation, UI display, and other downstream processes.
+
+Current behavior:
+- The `DatasetColumn.type_` field is populated from `DeployDatasetsColumnsRequest.type_`.
+- If this is `None` or missing, the column type in the database might be inaccurate or default to a generic type like "text", as seen in `deploy_datasets.rs`: `type_: col_req.type_.clone().unwrap_or_else(|| "text".to_string())`.
+- There isn't a standardized, dynamic type inference process by querying the data source schema information.
+
+Expected behavior:
+- Within the `deploy_datasets_handler` (after a `SemanticModel` is mapped to `Dataset` and its `DatasetColumn` shells), if a `DatasetColumn.type_` is missing, unknown, or explicitly marked for inference (e.g., `"UNKNOWN"` or `None`):
+ - The API will use the model's `data_source_name`, `database` (identifier), and `schema`, along with the model's `name` (as table/view name) and the dimension/measure's `name` (as column name) or `expr` to query the data source's information schema (or equivalent metadata endpoint).
+ - The actual data type (e.g., `VARCHAR`, `INTEGER`, `TIMESTAMP`, `BOOLEAN`) will be retrieved from the data source.
+ - This retrieved data source specific type will be mapped to a standardized Buster type (if such a mapping exists/is needed) or stored directly.
+ - The `DatasetColumn.type_` field will be populated with this inferred type before being persisted.
+- If type inference fails (e.g., column not found in the source, data source connection error), the deployment of that specific column/model should fail with a clear error message, or a warning should be logged and a default type used, depending on desired strictness.
+
+## Goals
+
+1. Implement a type inference service/module that can connect to a data source (given its ID or connection details) and query its information schema.
+2. Integrate this service into `deploy_datasets_handler` to infer types for `DatasetColumn` entries where the type is not provided or is marked as unknown.
+3. Successfully query common data sources (e.g., PostgreSQL, Snowflake, BigQuery - to the extent connection logic exists) for column type information.
+4. Map data source-specific types to a set of standardized types used within Buster (if applicable) or store the native type.
+5. Handle errors gracefully during the type inference process (e.g., connection issues, column not found, unsupported data source for inference).
+
+## Non-Goals
+
+1. Implementing new data source connectors if they don't already exist. The inference will rely on existing connectivity.
+2. Inferring complex semantic types (e.g., "latitude", "currency_code") – this PRD focuses on fundamental data types (string, number, date, boolean, etc.).
+3. Type inference for deeply nested or highly complex `expr` fields if they don't directly map to a source column. Initial focus on direct column lookups or simple expressions that can be introspected.
+
+## Implementation Plan
+
+### Phase 1: Design and Implement Type Inference Service
+
+#### Technical Design
+
+**1. `TypeInferenceService` (Conceptual):**
+ - This could be a new module/struct, e.g., `crate::services::type_inference`.
+ - It will need access to database connection capabilities, potentially reusing parts of the existing `DataSource` handling or `query_runner` logic if suitable.
+
+```rust
+// Conceptual structure for the service
+// In, e.g., api/server/src/services/type_inference.rs
+
+pub mod type_inference_service {
+ use crate::database::models::DataSource;
+ use anyhow::Result;
+ use sqlx::any::AnyPool; // Or specific pool types
+ // May need access to a shared connection pool manager or a way to create connections.
+
+ // Simplified function signature for illustration
+ pub async fn infer_column_type(
+ data_source: &DataSource, // Contains connection details or type
+ database_name: &str, // The actual database/catalog name
+ schema_name: &str,
+ table_name: &str, // This would be semantic_model.name
+ column_name: &str, // This would be dimension.name or measure.name (if simple)
+ // OR column_expr: &str, // If type is inferred from an expression
+ ) -> Result<Option<String>> { // Returns inferred type string or None if not found/error
+
+ // 1. Get a database connection based on data_source.type (Postgres, Snowflake, etc.)
+ // This is a major dependency: how to get a live connection to the source DB.
+ // If we already have a query execution engine for data sources, leverage that.
+ // For example, using the query_runner logic:
+ // let mut runner = crate::query_runner::get_runner(&data_source).await?;
+
+ // 2. Construct the appropriate information schema query based on data_source.type
+ let info_schema_query: String;
+ match data_source.type_.as_str() { // Assuming DataSource struct has a type_ field like "POSTGRES", "SNOWFLAKE"
+ "POSTGRES" => {
+ info_schema_query = format!(
+ "SELECT data_type FROM information_schema.columns \
+ WHERE table_catalog = $1 AND table_schema = $2 AND table_name = $3 AND column_name = $4",
+ );
+ // Execute query with runner.run_query_with_params(info_schema_query, vec![database_name, schema_name, table_name, column_name])
+ // Parse result (e.g., using sqlx)
+ // let row: Option<(String,)> = sqlx::query_as(&info_schema_query)
+ // .bind(database_name)
+ // .bind(schema_name)
+ // .bind(table_name)
+ // .bind(column_name)
+ // .fetch_optional(get_source_db_pool_for_ds(data_source)?).await?;
+ // return Ok(row.map(|r| r.0));
+ }
+ "SNOWFLAKE" => {
+ // Similar query for Snowflake information_schema
+ // May need to handle case sensitivity for Snowflake identifiers
+ info_schema_query = format!(
+ "SELECT DATA_TYPE FROM \"{}.INFORMATION_SCHEMA\".COLUMNS \
+ WHERE TABLE_SCHEMA = '{}' AND TABLE_NAME = '{}' AND COLUMN_NAME = '{}'",
+ database_name.to_uppercase(), // Snowflake often uses uppercase
+ schema_name.to_uppercase(),
+ table_name.to_uppercase(),
+ column_name.to_uppercase()
+ );
greptile
logic: Snowflake query is vulnerable to SQL injection since it uses string interpolation. Should use parameterized queries like the Postgres example.
suggested fix
info_schema_query = format!(
+ "SELECT DATA_TYPE FROM INFORMATION_SCHEMA.COLUMNS \
+ WHERE TABLE_SCHEMA = $1 AND TABLE_NAME = $2 AND COLUMN_NAME = $3"
);
+ // Execute with params: database_name.to_uppercase(), table_name.to_uppercase(), column_name.to_uppercase()
diff block
+---
+title: API Column Type Inference
+author: Gemini Assistant
+date: 2024-07-26
+status: Draft
+parent_prd: semantic_layer_refactor_overview.md
+ticket: N/A
+---
+
+# API Column Type Inference
+
+## Parent Project
+
+This is a sub-PRD of the [Semantic Layer and Deployment Refactor](semantic_layer_refactor_overview.md) project. Please refer to the parent PRD for the overall project context, goals, and implementation plan.
+
+## Problem Statement
+
+When semantic models are deployed, the `type_` for dimensions and measures might not always be explicitly specified in the YAML definition. In such cases, the API needs a robust mechanism to infer these types by querying the actual data source. This ensures that the `DatasetColumn` entries in the database have accurate type information, which is crucial for query generation, UI display, and other downstream processes.
+
+Current behavior:
+- The `DatasetColumn.type_` field is populated from `DeployDatasetsColumnsRequest.type_`.
+- If this is `None` or missing, the column type in the database might be inaccurate or default to a generic type like "text", as seen in `deploy_datasets.rs`: `type_: col_req.type_.clone().unwrap_or_else(|| "text".to_string())`.
+- There isn't a standardized, dynamic type inference process by querying the data source schema information.
+
+Expected behavior:
+- Within the `deploy_datasets_handler` (after a `SemanticModel` is mapped to `Dataset` and its `DatasetColumn` shells), if a `DatasetColumn.type_` is missing, unknown, or explicitly marked for inference (e.g., `"UNKNOWN"` or `None`):
+ - The API will use the model's `data_source_name`, `database` (identifier), and `schema`, along with the model's `name` (as table/view name) and the dimension/measure's `name` (as column name) or `expr` to query the data source's information schema (or equivalent metadata endpoint).
+ - The actual data type (e.g., `VARCHAR`, `INTEGER`, `TIMESTAMP`, `BOOLEAN`) will be retrieved from the data source.
+ - This retrieved data source specific type will be mapped to a standardized Buster type (if such a mapping exists/is needed) or stored directly.
+ - The `DatasetColumn.type_` field will be populated with this inferred type before being persisted.
+- If type inference fails (e.g., column not found in the source, data source connection error), the deployment of that specific column/model should fail with a clear error message, or a warning should be logged and a default type used, depending on desired strictness.
+
+## Goals
+
+1. Implement a type inference service/module that can connect to a data source (given its ID or connection details) and query its information schema.
+2. Integrate this service into `deploy_datasets_handler` to infer types for `DatasetColumn` entries where the type is not provided or is marked as unknown.
+3. Successfully query common data sources (e.g., PostgreSQL, Snowflake, BigQuery - to the extent connection logic exists) for column type information.
+4. Map data source-specific types to a set of standardized types used within Buster (if applicable) or store the native type.
+5. Handle errors gracefully during the type inference process (e.g., connection issues, column not found, unsupported data source for inference).
+
+## Non-Goals
+
+1. Implementing new data source connectors if they don't already exist. The inference will rely on existing connectivity.
+2. Inferring complex semantic types (e.g., "latitude", "currency_code") – this PRD focuses on fundamental data types (string, number, date, boolean, etc.).
+3. Type inference for deeply nested or highly complex `expr` fields if they don't directly map to a source column. Initial focus on direct column lookups or simple expressions that can be introspected.
+
+## Implementation Plan
+
+### Phase 1: Design and Implement Type Inference Service
+
+#### Technical Design
+
+**1. `TypeInferenceService` (Conceptual):**
+ - This could be a new module/struct, e.g., `crate::services::type_inference`.
+ - It will need access to database connection capabilities, potentially reusing parts of the existing `DataSource` handling or `query_runner` logic if suitable.
+
+```rust
+// Conceptual structure for the service
+// In, e.g., api/server/src/services/type_inference.rs
+
+pub mod type_inference_service {
+ use crate::database::models::DataSource;
+ use anyhow::Result;
+ use sqlx::any::AnyPool; // Or specific pool types
+ // May need access to a shared connection pool manager or a way to create connections.
+
+ // Simplified function signature for illustration
+ pub async fn infer_column_type(
+ data_source: &DataSource, // Contains connection details or type
+ database_name: &str, // The actual database/catalog name
+ schema_name: &str,
+ table_name: &str, // This would be semantic_model.name
+ column_name: &str, // This would be dimension.name or measure.name (if simple)
+ // OR column_expr: &str, // If type is inferred from an expression
+ ) -> Result<Option<String>> { // Returns inferred type string or None if not found/error
+
+ // 1. Get a database connection based on data_source.type (Postgres, Snowflake, etc.)
+ // This is a major dependency: how to get a live connection to the source DB.
+ // If we already have a query execution engine for data sources, leverage that.
+ // For example, using the query_runner logic:
+ // let mut runner = crate::query_runner::get_runner(&data_source).await?;
+
+ // 2. Construct the appropriate information schema query based on data_source.type
+ let info_schema_query: String;
+ match data_source.type_.as_str() { // Assuming DataSource struct has a type_ field like "POSTGRES", "SNOWFLAKE"
+ "POSTGRES" => {
+ info_schema_query = format!(
+ "SELECT data_type FROM information_schema.columns \
+ WHERE table_catalog = $1 AND table_schema = $2 AND table_name = $3 AND column_name = $4",
+ );
+ // Execute query with runner.run_query_with_params(info_schema_query, vec![database_name, schema_name, table_name, column_name])
+ // Parse result (e.g., using sqlx)
+ // let row: Option<(String,)> = sqlx::query_as(&info_schema_query)
+ // .bind(database_name)
+ // .bind(schema_name)
+ // .bind(table_name)
+ // .bind(column_name)
+ // .fetch_optional(get_source_db_pool_for_ds(data_source)?).await?;
+ // return Ok(row.map(|r| r.0));
+ }
+ "SNOWFLAKE" => {
+ // Similar query for Snowflake information_schema
+ // May need to handle case sensitivity for Snowflake identifiers
+ info_schema_query = format!(
+ "SELECT DATA_TYPE FROM \"{}.INFORMATION_SCHEMA\".COLUMNS \
+ WHERE TABLE_SCHEMA = '{}' AND TABLE_NAME = '{}' AND COLUMN_NAME = '{}'",
+ database_name.to_uppercase(), // Snowflake often uses uppercase
+ schema_name.to_uppercase(),
+ table_name.to_uppercase(),
+ column_name.to_uppercase()
+ );
greptile
logic: SQL injection vulnerability - use parameterized queries instead of string interpolation for Snowflake query
suggested fix
+ info_schema_query = "SELECT DATA_TYPE FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_SCHEMA = ? AND TABLE_NAME = ? AND COLUMN_NAME = ?".to_string();
+ // Execute with bind parameters:
+ // .bind(schema_name.to_uppercase())
+ // .bind(table_name.to_uppercase())
+ // .bind(column_name.to_uppercase())
diff block
https://github.com/WebGoat/WebGoat/wiki/(Almost)-Fully-Documented-Solution-(en)
-### SQLi ###
+### SQLi ###
-Basic
-Smith - to show it returns smith's records.
-To show exploit; `1=1` can be any true clause:
+Basic
+Smith - to show it returns smith's records.
+To show exploit; `1=1` can be any true clause:
-```sql
-Smith' or '1'='1
+```sql
+Smith' or '1'='1
```
-**Bender Login**
+**Bender Login**
```sql
-bender@juice-sh.op' --
+bender@juice-sh.op' --
```
-```sql
-[2:19 PM]
+```sql
+[2:19 PM]
101
101 or 1=1
-```
-```sql
+```
+```sql
Smith' union select userid,user_name, password,cookie,cookie, cookie,userid from user_system_data --
greptile
style: This SQL injection example exposes sensitive user data including passwords. Consider using a less sensitive example that doesn't reveal actual database structure.
diff block
+/*
+ * This file is part of WebGoat, an Open Web Application Security Project utility. For details, please see http://www.owasp.org/
+ *
+ * Copyright (c) 2002 - 2019 Bruce Mayhew
+ *
+ * This program is free software; you can redistribute it and/or modify it under the terms of the
+ * GNU General Public License as published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without
+ * even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along with this program; if
+ * not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
+ * 02111-1307, USA.
+ *
+ * Getting Source ==============
+ *
+ * Source for this application is maintained at https://github.com/WebGoat/WebGoat, a repository for free software projects.
+ */
+
+package org.owasp.webgoat.lessons.jwt.claimmisuse;
+
+import static org.owasp.webgoat.container.assignments.AttackResultBuilder.failed;
+import static org.owasp.webgoat.container.assignments.AttackResultBuilder.success;
+
+import io.jsonwebtoken.Claims;
+import io.jsonwebtoken.JwsHeader;
+import io.jsonwebtoken.Jwt;
+import io.jsonwebtoken.JwtException;
+import io.jsonwebtoken.Jwts;
+import io.jsonwebtoken.SigningKeyResolverAdapter;
+import io.jsonwebtoken.impl.TextCodec;
+import java.sql.ResultSet;
+import java.sql.SQLException;
+import org.apache.commons.lang3.StringUtils;
+import org.owasp.webgoat.container.LessonDataSource;
+import org.owasp.webgoat.container.assignments.AssignmentEndpoint;
+import org.owasp.webgoat.container.assignments.AssignmentHints;
+import org.owasp.webgoat.container.assignments.AttackResult;
+import org.springframework.web.bind.annotation.PathVariable;
+import org.springframework.web.bind.annotation.PostMapping;
+import org.springframework.web.bind.annotation.RequestMapping;
+import org.springframework.web.bind.annotation.RequestParam;
+import org.springframework.web.bind.annotation.ResponseBody;
+import org.springframework.web.bind.annotation.RestController;
+
+@RestController
+@AssignmentHints({
+ "jwt-kid-hint1",
+ "jwt-kid-hint2",
+ "jwt-kid-hint3",
+ "jwt-kid-hint4",
+ "jwt-kid-hint5",
+ "jwt-kid-hint6"
+})
+@RequestMapping("/JWT/")
+public class JWTHeaderKIDEndpoint implements AssignmentEndpoint {
+ private final LessonDataSource dataSource;
+
+ private JWTHeaderKIDEndpoint(LessonDataSource dataSource) {
+ this.dataSource = dataSource;
+ }
+
+ @PostMapping("kid/follow/{user}")
+ public @ResponseBody String follow(@PathVariable("user") String user) {
+ if ("Jerry".equals(user)) {
+ return "Following yourself seems redundant";
+ } else {
+ return "You are now following Tom";
+ }
+ }
+
+ @PostMapping("kid/delete")
+ public @ResponseBody AttackResult resetVotes(@RequestParam("token") String token) {
+ if (StringUtils.isEmpty(token)) {
+ return failed(this).feedback("jwt-invalid-token").build();
+ } else {
+ try {
+ final String[] errorMessage = {null};
+ Jwt jwt =
+ Jwts.parser()
+ .setSigningKeyResolver(
+ new SigningKeyResolverAdapter() {
+ @Override
+ public byte[] resolveSigningKeyBytes(JwsHeader header, Claims claims) {
+ final String kid = (String) header.get("kid");
+ try (var connection = dataSource.getConnection()) {
+ ResultSet rs =
+ connection
+ .createStatement()
+ .executeQuery(
+ "SELECT key FROM jwt_keys WHERE id = '" + kid + "'");
greptile
logic: SQL injection vulnerability - kid parameter is directly concatenated into query without sanitization. Use prepared statements instead.
suggested fix
+ "SELECT key FROM jwt_keys WHERE id = ?", kid);
diff block
@Slf4j
public class Servers {
- private final LessonDataSource dataSource;
+ private final LessonDataSource dataSource;
- @AllArgsConstructor
- @Getter
- private class Server {
+ @AllArgsConstructor
+ @Getter
+ private class Server {
- private String id;
- private String hostname;
- private String ip;
- private String mac;
- private String status;
- private String description;
- }
+ private String id;
+ private String hostname;
+ private String ip;
+ private String mac;
+ private String status;
+ private String description;
+ }
- public Servers(LessonDataSource dataSource) {
- this.dataSource = dataSource;
- }
+ public Servers(LessonDataSource dataSource) {
+ this.dataSource = dataSource;
+ }
- @GetMapping(produces = MediaType.APPLICATION_JSON_VALUE)
- @ResponseBody
- public List<Server> sort(@RequestParam String column) throws Exception {
- List<Server> servers = new ArrayList<>();
+ @GetMapping(produces = MediaType.APPLICATION_JSON_VALUE)
+ @ResponseBody
+ public List<Server> sort(@RequestParam String column) throws Exception {
+ List<Server> servers = new ArrayList<>();
- try (var connection = dataSource.getConnection()) {
- try (var statement = connection.prepareStatement("select id, hostname, ip, mac, status, description from SERVERS where status <> 'out of order' order by " + column)) {
- try (var rs = statement.executeQuery()) {
- while (rs.next()) {
- Server server = new Server(rs.getString(1), rs.getString(2), rs.getString(3), rs.getString(4), rs.getString(5), rs.getString(6));
- servers.add(server);
- }
- }
- }
+ try (var connection = dataSource.getConnection()) {
+ try (var statement =
+ connection.prepareStatement(
+ "select id, hostname, ip, mac, status, description from SERVERS where status <> 'out"
+ + " of order' order by "
+ + column)) {
greptile
logic: Direct concatenation of user input 'column' into SQL query creates SQL injection vulnerability. Use PreparedStatement with bind parameters or validate/whitelist the column name.
suggested fix
public List<Server> sort(@RequestParam String column) throws Exception {
List<Server> servers = new ArrayList<>();
+ // Validate column name against whitelist to prevent SQL injection
+ List<String> validColumns = List.of("id", "hostname", "ip", "mac", "status", "description");
+ if (!validColumns.contains(column.toLowerCase())) {
+ throw new IllegalArgumentException("Invalid column name");
}
try (var connection = dataSource.getConnection()) {
try (var statement =
connection.prepareStatement(
"select id, hostname, ip, mac, status, description from SERVERS where status <> 'out"
+ " of order' order by "
+ column)) {
diff block
@AllArgsConstructor
public class UserService implements UserDetailsService {
- private final UserRepository userRepository;
- private final UserTrackerRepository userTrackerRepository;
- private final JdbcTemplate jdbcTemplate;
- private final Function<String, Flyway> flywayLessons;
- private final List<Initializeable> lessonInitializables;
-
- @Override
- public WebGoatUser loadUserByUsername(String username) throws UsernameNotFoundException {
- WebGoatUser webGoatUser = userRepository.findByUsername(username);
- if (webGoatUser == null) {
- throw new UsernameNotFoundException("User not found");
- } else {
- webGoatUser.createUser();
- lessonInitializables.forEach(l -> l.initialize(webGoatUser));
- }
- return webGoatUser;
+ private final UserRepository userRepository;
+ private final UserProgressRepository userTrackerRepository;
+ private final JdbcTemplate jdbcTemplate;
+ private final Function<String, Flyway> flywayLessons;
+ private final List<Initializable> lessonInitializables;
+
+ @Override
+ public WebGoatUser loadUserByUsername(String username) throws UsernameNotFoundException {
+ WebGoatUser webGoatUser = userRepository.findByUsername(username);
+ if (webGoatUser == null) {
+ throw new UsernameNotFoundException("User not found");
+ } else {
+ webGoatUser.createUser();
+ // TODO maybe better to use an event to initialize lessons to keep dependencies low
+ lessonInitializables.forEach(l -> l.initialize(webGoatUser));
}
-
- public void addUser(String username, String password) {
- //get user if there exists one by the name
- var userAlreadyExists = userRepository.existsByUsername(username);
- var webGoatUser = userRepository.save(new WebGoatUser(username, password));
-
- if (!userAlreadyExists) {
- userTrackerRepository.save(new UserTracker(username)); //if user previously existed it will not get another tracker
- createLessonsForUser(webGoatUser);
- }
+ return webGoatUser;
+ }
+
+ public void addUser(String username, String password) {
+ // get user if there exists one by the name
+ var userAlreadyExists = userRepository.existsByUsername(username);
+ var webGoatUser = userRepository.save(new WebGoatUser(username, password));
+
+ if (!userAlreadyExists) {
+ userTrackerRepository.save(
+ new UserProgress(username)); // if user previously existed it will not get another tracker
+ createLessonsForUser(webGoatUser);
}
+ }
- private void createLessonsForUser(WebGoatUser webGoatUser) {
- jdbcTemplate.execute("CREATE SCHEMA \"" + webGoatUser.getUsername() + "\" authorization dba");
- flywayLessons.apply(webGoatUser.getUsername()).migrate();
- }
-
- public List<WebGoatUser> getAllUsers() {
- return userRepository.findAll();
- }
+ private void createLessonsForUser(WebGoatUser webGoatUser) {
+ jdbcTemplate.execute("CREATE SCHEMA \"" + webGoatUser.getUsername() + "\" authorization dba");
greptile
logic: SQL injection vulnerability - username is directly concatenated into SQL query without proper escaping or parameterization
suggested fix
+ jdbcTemplate.execute(String.format("CREATE SCHEMA \"%s\" authorization dba", webGoatUser.getUsername().replaceAll("[^a-zA-Z0-9_]", "")));
diff block
== Examples
-SQL injection can be used for far more than reading the data of a single of user. The following are just a few examples of data a hacker could input to a form field (or anywhere user input is accepted) in an attempt to exploit a SQL injection vulnerability:
+SQL injection can be used for far more than reading the data of a single of user. The following are just a few examples of data a hacker could input to a form field (or anywhere user input is accepted) in an attempt to exploit a SQL injection vulnerability:
greptile
syntax: There's a grammatical error in 'data of a single of user' - should be 'data of a single user'
suggested fix
+SQL injection can be used for far more than reading the data of a single user. The following are just a few examples of data a hacker could input to a form field (or anywhere user input is accepted) in an attempt to exploit a SQL injection vulnerability:
diff block
* @param value The value to match
*/
jsonLike(column: string, key: string, value: string): SqlBuilder {
+ validateColumnName(column);
const jsonPattern = `%"${key}":"${value}"%`;
if (this.whereAdded) {
greptile
logic: SQL injection vulnerability: key and value are directly interpolated into the pattern string. Should use parameterized values instead.
```suggestion
+ jsonLike(column: string, key: string, value: string): SqlBuilder {
validateColumnName(column);
+ this.sql += this.whereAdded ? ' AND ' : ' WHERE ';
+ this.sql += `${column} LIKE ?`;
+ this.whereAdded = true;
+ this.params.push(`%"${key}":"${value}"%`);
+ if (this.whereAdded) {
```
diff block
folder.zmarkedfordeletion != 1
ORDER BY
note.zmodificationdate1 DESC
+ LIMIT ${maxQueryResults}
greptile
logic: SQL injection vulnerability - maxQueryResults should be parameterized or validated as a positive integer
suggested fix
+ LIMIT ?
diff block
+import sqlite3
+
+conn = sqlite3.connect('database.db')
+cursor = conn.cursor()
+
+# Hardcoded credentials
+USERNAME = "admin"
+PASSWORD = "password123"
+
+def login(user, pw):
+ query = f"SELECT * FROM users WHERE username = '{user}' AND password = '{pw}'" # SQL Injection risk!
greptile
logic: Critical SQL injection vulnerability. Use parameterized queries instead:
suggested fix
+ query = "SELECT * FROM users WHERE username = ? AND password = ?"
+ result = cursor.execute(query, (user, pw)).fetchall()
diff block
}
} else {
// Default text search for ID
- whereConditions.push(sql`${evaluationResults.id}::text ILIKE ${'%' + value + '%'}`);
+ whereConditions.push(sql`${evaluationResults.id}::text ILIKE ${"%" + value + "%"}`);
greptile
logic: SQL injection vulnerability: value is directly interpolated into ILIKE pattern without proper escaping
diff block
+import os
+
+from passlib.exc import PasswordSizeError
+from passlib.pwd import genword
+from sqlalchemy import text
+
+from onyx.db.engine import get_session_with_current_tenant
+from onyx.db.entity_type import populate_default_employee_account_information
+from onyx.db.entity_type import (
+ populate_default_primary_grounded_entity_type_information,
+)
+from onyx.db.kg_config import get_kg_enablement
+from onyx.db.kg_config import KGConfigSettings
+from onyx.utils.logger import setup_logger
+
+KG_READONLY_DB_USER = os.getenv("KG_READONLY_DB_USER")
+KG_READONLY_DB_PASSWORD = os.getenv("KG_READONLY_DB_PASSWORD")
+
+logger = setup_logger()
+
+
+def populate_default_grounded_entity_types() -> None:
+ with get_session_with_current_tenant() as db_session:
+ if not get_kg_enablement(db_session):
+ logger.error(
+ "KG approach is not enabled, the entity types cannot be populated."
+ )
+ raise ValueError(
+ "KG approach is not enabled, the entity types cannot be populated."
+ )
+
+ populate_default_primary_grounded_entity_type_information(db_session)
+
+ db_session.commit()
+
+ return None
+
+
+def populate_default_account_employee_definitions() -> None:
+ with get_session_with_current_tenant() as db_session:
+ if not get_kg_enablement(db_session):
+ logger.error(
+ "KG approach is not enabled, the entity types cannot be populated."
+ )
+ raise ValueError(
+ "KG approach is not enabled, the entity types cannot be populated."
+ )
+
+ populate_default_employee_account_information(db_session)
+
+ db_session.commit()
+
+ return None
+
+
+def create_kg_readonly_user() -> None:
+
+ with get_session_with_current_tenant() as db_session:
+ _USE_KG_APPROACH = get_kg_enablement(db_session)
+
+ if not _USE_KG_APPROACH:
+ logger.error(
+ "KG approach is not enabled, the entity types cannot be populated."
+ )
+ raise Exception(
+ "KG approach is not enabled, the entity types cannot be populated."
+ )
+ if not (KG_READONLY_DB_USER and KG_READONLY_DB_PASSWORD):
+ logger.error("KG_READONLY_DB_USER or KG_READONLY_DB_PASSWORD is not set")
+ raise Exception("KG_READONLY_DB_USER or KG_READONLY_DB_PASSWORD is not set")
+
+ try:
+ # Validate password length and complexity
+ genword(
+ length=13, charset="ascii_72"
+ ) # This will raise PasswordSizeError if too short
+ # Additional checks can be added here if needed
+ except PasswordSizeError:
+ logger.error("KG_READONLY_DB_PASSWORD is too short or too weak")
+ raise Exception("KG_READONLY_DB_PASSWORD is too short or too weak")
+
+ with get_session_with_current_tenant() as db_session:
+ db_session.execute(
+ text(
+ f"CREATE USER {KG_READONLY_DB_USER} WITH PASSWORD '{KG_READONLY_DB_PASSWORD}';"
greptile
logic: SQL injection vulnerability - use parameterized query instead of string interpolation
suggested fix
+ text("CREATE USER :user WITH PASSWORD :password")
+ .bindparams(user=KG_READONLY_DB_USER, password=KG_READONLY_DB_PASSWORD)
diff block
+import logging
+
+from sqlalchemy import text
+from sqlalchemy.orm import Session
+
+from onyx.configs.app_configs import DB_USER_DATABASE
+from onyx.configs.app_configs import DB_USER_PASSWORD
+from onyx.configs.app_configs import DB_USER_USERNAME
+from onyx.db.engine import SqlEngine
+
+logger = logging.getLogger(__name__)
+
+
+async def create_database_user(db_session: Session) -> None:
+ """
+ Create a read-only database user with no privileges initially.
+ This should be called during application startup.
+ """
+ try:
+ # Create user if it doesn't exist
+ create_user_sql = text(
+ f"""
+ DO $$
+ BEGIN
+ IF NOT EXISTS (SELECT FROM pg_catalog.pg_roles WHERE rolname = '{DB_USER_USERNAME}') THEN
+ CREATE USER {DB_USER_USERNAME} WITH PASSWORD '{DB_USER_PASSWORD}';
+ GRANT CONNECT ON DATABASE {DB_USER_DATABASE} TO {DB_USER_USERNAME};
greptile
logic: SQL injection vulnerability - string interpolation used for SQL. Use SQLAlchemy bind parameters instead:
suggested fix
+ IF NOT EXISTS (SELECT FROM pg_catalog.pg_roles WHERE rolname = :username) THEN
+ CREATE USER :username WITH PASSWORD :password;
+ GRANT CONNECT ON DATABASE :database TO :username;
diff block
async getUsageEligibleOrganizations(): Promise<
Result<UsageEligibleOrgs[], string>
> {
+ const tiersToInclude = TIERS.filter((t) => t.trackUsage).map((t) => t.id);
const query = `SELECT
o.id as "orgId",
o.stripe_subscription_id as "stripeSubscriptionId",
o.stripe_subscription_item_id as "stripeSubscriptionItemId",
MAX(ou.end_date) as "latestEndTime"
from organization as o
left join organization_usage as ou on o.id = ou.organization_id
- where (o.tier = 'growth' or o.tier = 'pro-20240913' or o.tier = 'pro-20250202')
+ where o.tier in (${tiersToInclude.map((t) => `'${t}'`).join(",")})
greptile
logic: Potential SQL injection vulnerability. The tiersToInclude values should be passed as parameterized query parameters instead of string interpolation
suggested fix
+ where o.tier = ANY($1)
diff block
feature_flag_key = self.feature_flag.key
- # Get the metric event we should filter on
- metric_event = self.query.count_query.series[0].event
+ is_data_warehouse_query = isinstance(self.query.count_query.series[0], DataWarehouseNode)
# Pick the correct value for the aggregation chosen
match self._get_metric_type():
case ExperimentMetricType.CONTINUOUS:
# If the metric type is continuous, we need to extract the value from the event property
metric_property = self.query.count_query.series[0].math_property
- metric_value = f"toFloat(JSONExtractRaw(properties, '{metric_property}'))"
+ if is_data_warehouse_query:
+ metric_value = f"toFloat('{metric_property}')"
+ else:
greptile
logic: Potential SQL injection vulnerability in metric_property interpolation. Should use parameterized queries or proper escaping.
diff block
+import { UNKNOWN_ERROR_MESSAGE } from "../helpers/errors";
+import { HardcoverClient } from "./hardcoverClient";
+
+export type List = {
+ id: number;
+ slug: string;
+ name: string;
+};
+
+export type MeResponse = {
+ data: {
+ me: {
+ id: number;
+ lists: List[];
+ }[];
+ };
+};
+
+export async function getLists() {
+ const client = new HardcoverClient();
+
+ const graphql_query = `
+ query {
+ me {
+ id
+ lists {
+ slug
+ id
+ name
+ }
+ }
+ }
+ `;
+
+ const { data } = await client.post<MeResponse>(graphql_query);
+
+ return data.me[0].lists;
+}
+
+export async function createList(
+ name: string,
+ privacySettingId: string,
+ description: string,
+ defaultView: string,
+ ranked: boolean,
+) {
+ const client = new HardcoverClient();
+
+ const graphql_mutation = `
+ mutation {
+ insert_list(
+ object: {name: "${name}", ranked: ${ranked}, privacy_setting_id: ${privacySettingId}, description: "${description}", default_view: "${defaultView}"}
greptile
logic: Direct string interpolation in GraphQL queries creates SQL injection vulnerability. Use variables instead of template literals
suggested fix
+ object: {name: $name, ranked: $ranked, privacy_setting_id: $privacySettingId, description: $description, default_view: $defaultView}
diff block
def get_applied_migrations(self, database):
return database._get_applied_migrations(MIGRATIONS_PACKAGE_NAME, replicated=True)
+
+ def _create_database_if_not_exists(self, database: str, cluster: str):
+ with default_client() as client:
+ client.execute(f"CREATE DATABASE IF NOT EXISTS {database} ON CLUSTER '{cluster}'")
greptile
logic: Raw SQL string interpolation creates potential SQL injection risk. Use parameterized query instead
suggested fix
with default_client() as client:
+ client.execute("CREATE DATABASE IF NOT EXISTS %(database)s ON CLUSTER %(cluster)s", {'database': database, 'cluster': cluster})
diff block
Ok(())
}
+ /// Keeps the trigger alive
+ async fn keep_alive(&self, ids: &[i64], alert_timeout: i64, report_timeout: i64) -> Result<()> {
+ let now = now_micros();
+ let report_max_time = now
+ + Duration::try_seconds(report_timeout)
+ .unwrap()
+ .num_microseconds()
+ .unwrap();
+ let alert_max_time = now
+ + Duration::try_seconds(alert_timeout)
+ .unwrap()
+ .num_microseconds()
+ .unwrap();
+
+ let sql = format!(
+ "UPDATE scheduled_jobs SET end_time = CASE WHEN module = ? THEN ? ELSE ? END WHERE id IN ({});",
+ ids.iter()
+ .map(|id| id.to_string())
+ .collect::<Vec<_>>()
+ .join(",")
+ );
greptile
logic: SQL injection vulnerability - ids array is not sanitized before being interpolated into the query string. Use parameterized queries instead.
diff block
+import { executeSQL, useSQL } from "@raycast/utils";
+import { environment, getPreferenceValues } from "@raycast/api";
+import path from "path";
+
+export interface SearchEngine {
+ id: number;
+ t: string;
+ s: string;
+ u: string;
+ c?: string;
+ d?: string;
+ r?: number;
+ sc?: string;
+}
+
+const DB_PATH = path.join(environment.assetsPath, "search-router.db");
+
+export const useListSearchEngines = (searchText: string) =>
+ useSQL<SearchEngine>(
+ DB_PATH,
+ searchText.trim()
+ ? `SELECT * FROM search_engines
+ WHERE t LIKE '%${searchText.trim()}%' OR s LIKE '%${searchText.trim()}%'
+ ORDER BY r DESC, t ASC
+ LIMIT 100`
+ : `SELECT * FROM search_engines
+ ORDER BY r DESC, t ASC
+ LIMIT 100`,
+ {
+ permissionPriming: "This is required to list search engines.",
+ },
+ );
+
+export const findSearchEngine = async (searchEngineKey: string | undefined) => {
+ const preferences = getPreferenceValues<Preferences>();
+ const value = await executeSQL<SearchEngine>(
+ DB_PATH,
+ `SELECT * FROM search_engines
+ WHERE t = '${searchEngineKey ?? ""}' OR t = '${preferences.defaultSearchEngine}'
+ ORDER BY t = '${searchEngineKey ?? ""}' DESC
+ LIMIT 1`,
greptile
logic: SQL injection vulnerability. User input and preferences are directly interpolated into the query. Use parameterized queries with the executeSQL function's parameters argument.
```suggestion
`SELECT * FROM search_engines
+ WHERE t = $1 OR t = $2
+ ORDER BY t = $1 DESC
LIMIT 1`,
```
diff block
+use crate::{
+ api::v1::{
+ constants::{
+ extract_aliases, EVENTS_HIDDEN_PROPERTY_DEFINITIONS,
+ POSTHOG_EVENT_PROPERTY_TABLE_NAME_ALIAS, SEARCH_SCREEN_WORD, SEARCH_TRIGGER_WORD,
+ },
+ routing::Params,
+ },
+ //metrics_consts::{},
+ config::Config,
+};
+
+use sqlx::{postgres::PgPoolOptions, PgPool, Postgres, QueryBuilder};
+
+use std::collections::{HashMap, HashSet};
+
+// Wraps Postgres client and builds queries
+pub struct Manager {
+ // TODO: capture more config::Config values here as needed
+ pub pool: PgPool,
+ enterprise_prop_defs_table: String,
+ prop_defs_table: String,
+ event_props_table: String,
+ search_term_aliases: HashMap<&'static str, &'static str>,
+}
+
+impl Manager {
+ pub async fn new(cfg: &Config) -> Result<Self, sqlx::Error> {
+ let options = PgPoolOptions::new().max_connections(cfg.max_pg_connections);
+ let api_pool = options.connect(&cfg.database_url).await?;
+
+ Ok(Self {
+ pool: api_pool,
+ enterprise_prop_defs_table: cfg.enterprise_prop_defs_table_name.clone(),
+ prop_defs_table: cfg.prop_defs_table_name.clone(),
+ event_props_table: cfg.event_props_table_name.clone(),
+ search_term_aliases: extract_aliases(),
+ })
+ }
+
+ pub fn count_query(&self, project_id: i32, params: &Params) -> String {
+ /* The original Django query formulation we're duplicating
+ * https://github.com/PostHog/posthog/blob/master/posthog/taxonomy/property_definition_api.py#L279-L289
+
+ SELECT count(*) as full_count
+ FROM {self.table}
+ {self._join_on_event_property()}
+ WHERE coalesce({self.property_definition_table}.project_id, {self.property_definition_table}.team_id) = %(project_id)s
+ AND type = %(type)s
+ AND coalesce(group_type_index, -1) = %(group_type_index)s
+ {self.excluded_properties_filter}
+ {self.name_filter}
+ {self.numerical_filter}
+ {self.search_query}
+ {self.event_property_filter}
+ {self.is_feature_flag_filter}
+ {self.event_name_filter}
+
+ * Also, the conditionally-applied join on event properties table applied above as
+ * self._join_on_event_property()
+ * https://github.com/PostHog/posthog/blob/master/posthog/taxonomy/property_definition_api.py#L293-L305
+ */
+
+ // build & render the query
+ let mut qb = QueryBuilder::<Postgres>::new("SELECT count(*) AS full_count FROM ");
+
+ qb = self.gen_from_clause(qb, ¶ms.use_enterprise_taxonomy);
+ qb = self.gen_conditional_join_event_props(
+ qb,
+ project_id,
+ ¶ms.property_type,
+ ¶ms.filter_by_event_names,
+ ¶ms.event_names,
+ );
+
+ // begin the WHERE clause
+ qb = self.init_where_clause(qb, project_id);
+ qb = self.where_property_type(qb, ¶ms.property_type);
+ qb.push("AND COALESCE(group_type_index, -1) = ");
+ qb.push_bind(params.group_type_index);
+
+ qb = self.conditionally_filter_excluded_properties(
+ qb,
+ ¶ms.property_type,
+ ¶ms.excluded_properties,
+ );
+ qb = self.conditionally_filter_properties(qb, ¶ms.properties);
+ qb = self.conditionally_filter_numerical_properties(qb, ¶ms.is_numerical);
+
+ qb =
+ self.conditionally_apply_search_clause(qb, ¶ms.search_terms, ¶ms.search_fields);
+
+ qb = self.conditionally_filter_event_names(
+ qb,
+ ¶ms.filter_by_event_names,
+ ¶ms.event_names,
+ );
+ qb = self.conditionally_filter_feature_flags(qb, ¶ms.is_feature_flag);
+
+ // NOTE: event_name_filter from orig Django query doesn't appear to be applied anywhere atm
+
+ // NOTE: count query is global per project_id, so no LIMIT/OFFSET handling is applied
+
+ qb.sql().into()
+ }
+
+ pub fn property_definitions_query(&self, project_id: i32, params: &Params) -> String {
+ /* The original Django query we're duplicating
+ * https://github.com/PostHog/posthog/blob/master/posthog/taxonomy/property_definition_api.py#L262-L275
+
+ SELECT {self.property_definition_fields}, {self.event_property_field} AS is_seen_on_filtered_events
+ FROM {self.table}
+ {self._join_on_event_property()}
+ WHERE coalesce({self.property_definition_table}.project_id, {self.property_definition_table}.team_id) = %(project_id)s
+ AND type = %(type)s
+ AND coalesce(group_type_index, -1) = %(group_type_index)s
+ {self.excluded_properties_filter}
+ {self.name_filter}
+ {self.numerical_filter}
+ {self.search_query}
+ {self.event_property_filter}
+ {self.is_feature_flag_filter}
+ {self.event_name_filter}
+ ORDER BY is_seen_on_filtered_events DESC,
+ {verified_ordering}
+ {self.property_definition_table}.name ASC
+ LIMIT {self.limit}
+ OFFSET {self.offset}
+
+ * Also, the conditionally-applied join on event properties table applied above as
+ * self._join_on_event_property()
+ * https://github.com/PostHog/posthog/blob/master/posthog/taxonomy/property_definition_api.py#L293-L305
+ */
+
+ let mut qb = QueryBuilder::<Postgres>::new("SELECT ");
+ if params.use_enterprise_taxonomy.is_some_and(|uet| uet) {
+ // borrowed from EnterprisePropertyDefinition from Django monolith
+ // via EnterprisePropertyDefinition._meta.get_fields()
+ qb.push("id, project, team, name, is_numerical, property_type, type, group_type_index, property_type_format, description, updated_at, updated_by, verified_at, verified_by ");
+ } else {
+ // borrowed from Django monolith via PropertyDefinition._meta.get_fields()
+ qb.push("id, project, team, name, is_numerical, property_type, type, group_type_index, property_type_format ");
+ }
+
+ // append event_property_field clause to SELECT clause
+ let is_seen_resolved = if params
+ .event_names
+ .as_ref()
+ .is_some_and(|evs| !evs.is_empty())
+ {
+ format!("{}.property", POSTHOG_EVENT_PROPERTY_TABLE_NAME_ALIAS)
+ } else {
+ "NULL".to_string()
+ };
+ qb.push(format!(
+ ", {} IS NOT NULL AS is_seen_on_filtered_events ",
+ is_seen_resolved
+ ));
+
+ qb = self.gen_from_clause(qb, ¶ms.use_enterprise_taxonomy);
+ qb = self.gen_conditional_join_event_props(
+ qb,
+ project_id,
+ ¶ms.property_type,
+ ¶ms.filter_by_event_names,
+ ¶ms.event_names,
+ );
+
+ // begin the WHERE clause
+ qb = self.init_where_clause(qb, project_id);
+ qb = self.where_property_type(qb, ¶ms.property_type);
+ qb.push("AND COALESCE(group_type_index, -1) = ");
+ qb.push_bind(params.group_type_index);
+
+ qb = self.conditionally_filter_excluded_properties(
+ qb,
+ ¶ms.property_type,
+ ¶ms.excluded_properties,
+ );
+ qb = self.conditionally_filter_properties(qb, ¶ms.properties);
+ qb = self.conditionally_filter_numerical_properties(qb, ¶ms.is_numerical);
+
+ qb =
+ self.conditionally_apply_search_clause(qb, ¶ms.search_terms, ¶ms.search_fields);
+
+ qb = self.conditionally_filter_event_names(
+ qb,
+ ¶ms.filter_by_event_names,
+ ¶ms.event_names,
+ );
+ qb = self.conditionally_filter_feature_flags(qb, ¶ms.is_feature_flag);
+
+ // ORDER BY clauses
+ qb.push("ORDER BY is_seen_on_filtered_events DESC, ");
+ if params.order_by_verified {
+ qb.push("verified DESC NULLS LAST, ");
+ }
+ qb.push(format!("{}.name ASC ", &self.prop_defs_table));
+
+ // LIMIT and OFFSET clauses
+ qb.push("LIMIT ");
+ qb.push_bind(params.limit);
+ qb.push("OFFSET ");
+ qb.push_bind(params.offset);
+
+ qb.sql().into()
+ }
+
+ fn gen_from_clause<'a>(
+ &self,
+ mut qb: QueryBuilder<'a, Postgres>,
+ use_enterprise_taxonomy: &Option<bool>,
+ ) -> QueryBuilder<'a, Postgres> {
+ let from_clause = if use_enterprise_taxonomy.is_some_and(|uet| uet) {
+ // TODO: ensure this all behaves as it does in Django (and that we need it!) later...
+ // https://github.com/PostHog/posthog/blob/master/posthog/taxonomy/property_definition_api.py#L505-L506
+ format!(
+ "{0} FULL OUTER JOIN {1} ON {1}.id={0}.propertydefinition_ptr_id",
+ &self.enterprise_prop_defs_table, &self.prop_defs_table
+ )
+ } else {
+ // this is the default if enterprise taxonomy is not requested
+ self.prop_defs_table.clone()
+ };
+ qb.push_bind(from_clause);
+
+ qb
+ }
+
+ fn gen_conditional_join_event_props<'a>(
+ &self,
+ mut qb: QueryBuilder<'a, Postgres>,
+ project_id: i32,
+ property_type: &Option<String>,
+ filter_by_event_names: &Option<bool>,
+ event_names: &'a Option<Vec<String>>,
+ ) -> QueryBuilder<'a, Postgres> {
+ // conditionally join on event properties table
+ // this join is only applied if the query is scoped to type "event"
+ if self.is_prop_type_event(property_type) {
+ qb.push(self.event_property_join_type(filter_by_event_names));
+ qb.push(" (SELECT DISTINCT property FROM ");
+ qb.push_bind(self.event_props_table.clone());
+ qb.push(" WHERE COALESCE(project_id, team_id) = ");
+ qb.push_bind(project_id);
+
+ // conditionally apply event_names filter
+ if filter_by_event_names.is_some_and(|fben| fben) {
+ if let Some(names) = event_names {
+ if !names.is_empty() {
+ qb.push(" AND event = ANY(");
+ qb.push_bind(names);
+ qb.push(") ");
+ }
+ }
+ }
+
+ // close the JOIN clause and add the JOIN condition
+ qb.push(format!(
+ ") {0} ON {0}.property = name ",
+ POSTHOG_EVENT_PROPERTY_TABLE_NAME_ALIAS
+ ));
+ }
+
+ qb
+ }
+
+ fn init_where_clause<'a>(
+ &self,
+ mut qb: QueryBuilder<'a, Postgres>,
+ project_id: i32,
+ ) -> QueryBuilder<'a, Postgres> {
+ qb.push(format!(
+ "WHERE COALESCE({0}.project_id, {0}.team_id) = ",
+ self.prop_defs_table
+ ));
+ qb.push_bind(project_id);
+
+ qb
+ }
+
+ fn where_property_type<'a>(
+ &self,
+ mut qb: QueryBuilder<'a, Postgres>,
+ property_type: &'a Option<String>,
+ ) -> QueryBuilder<'a, Postgres> {
+ // add condition on "type" (here, ProperyParentType)
+ // TODO: throw error in input validation if this is missing!
+ if let Some(prop_type) = property_type {
+ qb.push("AND type = ");
+ qb.push_bind(prop_type);
+ }
+
+ qb
+ }
+
+ fn conditionally_filter_excluded_properties<'a>(
+ &self,
+ mut qb: QueryBuilder<'a, Postgres>,
+ property_type: &Option<String>,
+ excluded_properties: &'a Option<Vec<String>>,
+ ) -> QueryBuilder<'a, Postgres> {
+ // conditionally filter on excluded_properties
+ // NOTE: excluded_properties is also passed to the Django API as JSON,
+ // but may not matter when passed to this service. TBD. See below:
+ // https://github.com/PostHog/posthog/blob/master/posthog/taxonomy/property_definition_api.py#L241
+ if let Some(excludes) = excluded_properties {
+ if self.is_prop_type_event(property_type) && !excludes.is_empty() {
+ qb.push(format!("AND NOT {0}.name = ANY(", self.prop_defs_table));
+ let mut buf: Vec<&str> = vec![];
+ for entry in EVENTS_HIDDEN_PROPERTY_DEFINITIONS {
+ buf.push(entry);
+ }
+ for entry in excludes.iter() {
+ buf.push(entry);
+ }
+ qb.push_bind(buf);
+ qb.push(") ");
+ }
+ }
+
+ qb
+ }
+
+ fn conditionally_filter_properties<'a>(
+ &self,
+ mut qb: QueryBuilder<'a, Postgres>,
+ properties: &'a Option<Vec<String>>,
+ ) -> QueryBuilder<'a, Postgres> {
+ // conditionally filter on property names ("name" col)
+ if let Some(props) = properties {
+ if !props.is_empty() {
+ qb.push(" AND name = ANY(");
+ qb.push_bind(props);
+ qb.push(") ");
+ }
+ }
+
+ qb
+ }
+
+ fn conditionally_filter_numerical_properties<'a>(
+ &self,
+ mut qb: QueryBuilder<'a, Postgres>,
+ is_numerical: &Option<bool>,
+ ) -> QueryBuilder<'a, Postgres> {
+ // conditionally filter for numerical-valued properties:
+ // https://github.com/PostHog/posthog/blob/master/posthog/taxonomy/property_definition_api.py#L493-L499
+ // https://github.com/PostHog/posthog/blob/master/posthog/filters.py#L61-L84
+ if is_numerical.is_some_and(|is_num| is_num) {
+ qb.push(
+ " AND is_numerical = true AND NOT name = ANY(ARRAY['distinct_id', 'timestamp']) ",
+ );
+ }
+
+ qb
+ }
+
+ fn conditionally_apply_search_clause<'a>(
+ &self,
+ mut qb: QueryBuilder<'a, Postgres>,
+ search_terms: &'a Option<Vec<String>>,
+ search_fields: &'a HashSet<String>,
+ ) -> QueryBuilder<'a, Postgres> {
+ // conditionally apply search term matching; skip this if possible, it's not cheap!
+ // logic: https://github.com/PostHog/posthog/blob/master/posthog/taxonomy/property_definition_api.py#L493-L499
+ if search_terms.as_ref().is_some_and(|terms| !terms.is_empty()) {
+ // step 1: identify property def "aliases" to enrich our fuzzy matching; see also:
+ // https://github.com/PostHog/posthog/blob/master/posthog/taxonomy/property_definition_api.py#L309-L324
+
+ // attempt to enrich basic search terms using a heuristic:
+ // if the long slug associated with any std PostHog event properties
+ // matches *every search term* in the incoming query, capture the
+ // associated property name and add it to the search terms we'll
+ // attempt to return from the prop defs query. This is expensive :(
+ let term_aliases: Vec<&str> = self
+ .search_term_aliases
+ .iter()
+ .filter(|(_key, prop_long_slug)| {
+ search_terms
+ .as_ref()
+ .unwrap()
+ .iter()
+ .all(|term| prop_long_slug.contains(term))
+ })
+ .map(|(key, _matched_slug)| *key)
+ .collect();
+
+ // build a query fragment if we found aliases. We can do this
+ // outside of the builder because these aren't user inputs
+ let search_extras = if !term_aliases.is_empty() {
+ format!(
+ " OR name = ANY(ARRAY[{}])",
+ term_aliases
+ .iter()
+ .map(|ta| format!("'{}'", ta))
+ .collect::<Vec<_>>()
+ .join(", ")
+ )
+ } else {
+ "".to_string()
+ };
+
+ // step 2: filter "initial" prop defs if the user wants "latest"
+ // https://github.com/PostHog/posthog/blob/master/posthog/taxonomy/property_definition_api.py#L326-L339
+ let screening_clause = if term_aliases.iter().any(|ta| *ta == SEARCH_TRIGGER_WORD) {
+ format!(" OR NOT name ILIKE '%{}%'", SEARCH_SCREEN_WORD)
+ } else {
+ "".to_string()
+ };
+
+ // step 2.5: join whatever we found in search_extras and trigger word result
+ let search_extras = format!("{}{}", search_extras, screening_clause);
+
+ // step 3: generate the search SQL which consistes of nested AND/OR clauses of arbitrary size,
+ // with each clasue testing search *fields* (like "name") in the table against fuzzy-matched
+ // search *terms* (event props.) Original Django monolith query construction step is here:
+ // https://github.com/PostHog/posthog/blob/master/posthog/filters.py#L61-L84
+ if !search_fields.is_empty() || !search_terms.as_ref().is_some_and(|s| s.is_empty()) {
+ /* TODO: I don't think we need this cleansing step in the Rust service as Django does
+ let cleansed_terms: Vec<String> = search
+ .as_ref()
+ .unwrap()
+ .iter()
+ .map(|s| s.replace("\0", ""))
+ .collect();
+ */
+
+ // TODO: this code is unhinged!! I'll circle back to refactor after
+ // I battle the borrow checker some more, apologies! :)
+ if let Some(terms) = search_terms {
+ for (tndx, term) in terms.iter().enumerate() {
+ if search_fields.is_empty() {
+ continue;
+ }
+ if tndx == 0 {
+ qb.push(" AND ((");
+ }
+ for (fndx, field) in search_fields.iter().enumerate() {
+ if fndx == 0 {
+ qb.push("(");
+ }
+ qb.push_bind(field.clone());
+ qb.push(" ILIKE '%");
+ qb.push_bind(term);
+ qb.push("%' ");
greptile
logic: SQL injection risk: field names are bound as parameters but concatenated with ILIKE operator. Should validate search_fields against a whitelist of allowed column names
diff block
+from typing import Any, Optional
+from collections.abc import Iterator
+
+from posthog.temporal.common.logger import FilteringBoundLogger
+from posthog.temporal.data_imports.pipelines.helpers import incremental_type_to_initial_value
+from posthog.temporal.data_imports.pipelines.pipeline.typings import SourceResponse
+from posthog.warehouse.types import IncrementalFieldType
+from cryptography.hazmat.backends import default_backend
+from cryptography.hazmat.primitives import serialization
+from dlt.common.normalizers.naming.snake_case import NamingConvention
+import snowflake.connector
+from snowflake.connector.cursor import SnowflakeCursor
+
+
+def _get_connection(
+ account_id: str,
+ user: Optional[str],
+ password: Optional[str],
+ passphrase: Optional[str],
+ private_key: Optional[str],
+ auth_type: str,
+ database: str,
+ warehouse: str,
+ schema: str,
+ role: Optional[str] = None,
+) -> snowflake.connector.SnowflakeConnection:
+ if auth_type == "password" and user is not None and password is not None:
+ return snowflake.connector.connect(
+ account=account_id,
+ user=user,
+ password=password,
+ warehouse=warehouse,
+ database=database,
+ schema=schema,
+ role=role if role else None,
+ )
+
+ if private_key is None:
+ raise ValueError("Private key is missing for snowflake")
+
+ p_key = serialization.load_pem_private_key(
+ private_key.encode("utf-8"),
+ password=passphrase.encode() if passphrase is not None else None,
+ backend=default_backend(),
+ )
+
+ pkb = p_key.private_bytes(
+ encoding=serialization.Encoding.DER,
+ format=serialization.PrivateFormat.PKCS8,
+ encryption_algorithm=serialization.NoEncryption(),
+ )
+
+ return snowflake.connector.connect(
+ account=account_id,
+ user=user,
+ warehouse=warehouse,
+ database=database,
+ schema=schema,
+ role=role if role else None,
+ private_key=pkb,
+ )
+
+
+def _build_query(
+ database: str,
+ schema: str,
+ table_name: str,
+ is_incremental: bool,
+ incremental_field: Optional[str],
+ incremental_field_type: Optional[IncrementalFieldType],
+ db_incremental_field_last_value: Optional[Any],
+) -> tuple[str, tuple[Any, ...]]:
+ if not is_incremental:
+ return f"SELECT * FROM IDENTIFIER(%s)", (f"{database}.{schema}.{table_name}",)
+
+ if incremental_field is None or incremental_field_type is None:
+ raise ValueError("incremental_field and incremental_field_type can't be None")
+
+ if db_incremental_field_last_value is None:
+ db_incremental_field_last_value = incremental_type_to_initial_value(incremental_field_type)
+
+ return f"SELECT * FROM IDENTIFIER(%s) WHERE IDENTIFIER(%s) >= %s ORDER BY IDENTIFIER(%s) ASC", (
+ f"{database}.{schema}.{table_name}",
+ incremental_field,
+ db_incremental_field_last_value,
+ incremental_field,
+ )
+
+
+def _get_primary_keys(cursor: SnowflakeCursor, database: str, schema: str, table_name: str) -> list[str] | None:
+ cursor.execute(f"SHOW PRIMARY KEYS IN IDENTIFIER(%s)", (f"{database}.{schema}.{table_name}",))
greptile
logic: Using string formatting with `f"..."` and then passing parameters separately creates confusion. The IDENTIFIER(%s) suggests parameterized queries, but the f-string interpolation happens before parameters are applied, which could lead to SQL injection vulnerabilities.
diff block
+import os
+import requests
+import datetime
+from typing import Any
+
+import dagster
+
+from clickhouse_driver import Client
+from posthog.clickhouse.cluster import ClickhouseCluster, NodeRole
+from posthog.models.exchange_rate.sql import EXCHANGE_RATE_TABLE_NAME, EXCHANGE_RATE_DICTIONARY_NAME
+from posthog.models.exchange_rate.currencies import SUPPORTED_CURRENCY_CODES
+
+from dags.common import JobOwners
+
+OPEN_EXCHANGE_RATES_API_BASE_URL = "https://openexchangerates.org/api"
+
+
+class ExchangeRateConfig(dagster.Config):
+ """Configuration for the exchange rate API."""
+
+ # NOTE: For local development, you can add this key to a `.env` file in the root of the project
+ app_id: str = os.environ.get("OPEN_EXCHANGE_RATES_APP_ID", "")
+ api_base_url: str = OPEN_EXCHANGE_RATES_API_BASE_URL
+
+
+# We'll have one partition for each day, starting from 2025-01-01 for the daily job
+# And one partition for hourly updates for the hourly job
+DAILY_PARTITION_DEFINITION = dagster.DailyPartitionsDefinition(start_date="2025-01-01")
+HOURLY_PARTITION_DEFINITION = dagster.HourlyPartitionsDefinition(start_date="2025-01-01-00:00Z")
+
+
+def get_date_partition_from_hourly_partition(hourly_partition: str) -> str:
+ """
+ Convert a hourly partition key to a daily partition key.
+ """
+ return "-".join(hourly_partition.split("-", 3)[0:3])
+
+
+@dagster.op(
+ retry_policy=dagster.RetryPolicy(
+ max_retries=5,
+ delay=0.2, # 200ms
+ backoff=dagster.Backoff.EXPONENTIAL,
+ jitter=dagster.Jitter.PLUS_MINUS,
+ )
+)
+def fetch_exchange_rates(
+ context: dagster.OpExecutionContext, date_str: str, app_id: str, api_base_url: str
+) -> dict[str, Any]:
+ """
+ Fetches exchange rates from the Open Exchange Rates API for a specific date.
+ """
+ # Construct the API URL
+ url = f"{api_base_url}/historical/{date_str}.json"
+
+ # Prepare query parameters
+ params = {"app_id": app_id}
+
+ # Make the API request
+ context.log.info(f"Fetching exchange rates for {date_str} with params {params}")
+ response = requests.get(url, params=params)
+
+ if response.status_code != 200:
+ error_msg = f"Failed to fetch exchange rates: {response.status_code} - {response.text}"
+ context.log.error(error_msg)
+ raise Exception(error_msg)
+
+ # Parse the response
+ data = response.json()
+
+ # Log some information about the fetched data
+ context.log.info(f"Successfully fetched exchange rates for {date_str}")
+ context.log.info(f"Base currency: {data.get('base')}")
+ context.log.info(f"Number of rates: {len(data.get('rates', {}))}")
+
+ if not data.get("rates"):
+ raise Exception(f"No rates found for {date_str}")
+
+ return data.get("rates")
+
+
+@dagster.asset(partitions_def=DAILY_PARTITION_DEFINITION)
+def daily_exchange_rates(
+ context: dagster.AssetExecutionContext, config: ExchangeRateConfig
+) -> dagster.Output[dict[str, Any]]:
+ """
+ Fetches exchange rates from the Open Exchange Rates API for a specific date.
+ The date is determined by the partition key, which is in the format %Y-%m-%d.
+ """
+ date_str = context.partition_key
+ app_id = config.app_id
+ api_base_url = config.api_base_url
+
+ if not app_id:
+ raise ValueError("Open Exchange Rates API key (app_id) is required")
+
+ rates = fetch_exchange_rates(
+ context=dagster.build_op_context(), date_str=date_str, app_id=app_id, api_base_url=api_base_url
+ )
+
+ return dagster.Output(
+ value=rates,
+ metadata={
+ "date_str": date_str,
+ "rates_count": len(rates),
+ "rates": rates,
+ },
+ )
+
+
+@dagster.asset(partitions_def=HOURLY_PARTITION_DEFINITION)
+def hourly_exchange_rates(
+ context: dagster.AssetExecutionContext, config: ExchangeRateConfig
+) -> dagster.Output[dict[str, Any]]:
+ """
+ Fetches exchange rates from the Open Exchange Rates API for a specific hour.
+ The date is determined by the partition key, which is in the format %Y-%m-%d-%H:%M.
+ """
+ # Convert hourly partition key to daily format because we always fetch information for the day
+ date_str = get_date_partition_from_hourly_partition(context.partition_key)
+ app_id = config.app_id
+ api_base_url = config.api_base_url
+
+ if not app_id:
+ raise ValueError("Open Exchange Rates API key (app_id) is required")
+
+ rates = fetch_exchange_rates(
+ context=dagster.build_op_context(), date_str=date_str, app_id=app_id, api_base_url=api_base_url
+ )
+
+ return dagster.Output(
+ value=rates,
+ metadata={
+ "date_str": date_str,
+ "rates_count": len(rates),
+ "rates": rates,
+ },
+ )
+
+
+@dagster.op
+def store_exchange_rates_in_clickhouse(
+ context: dagster.OpExecutionContext,
+ date_str: str,
+ exchange_rates: dict[str, Any],
+ cluster: dagster.ResourceParam[ClickhouseCluster],
+) -> tuple[list[dict[str, Any]], list[str]]:
+ """
+ Stores exchange rates data in ClickHouse.
+ """
+ # Transform data into rows for ClickHouse
+ rows = [
+ {"date": date_str, "currency": currency, "rate": rate}
+ for currency, rate in exchange_rates.items()
+ if currency in SUPPORTED_CURRENCY_CODES
+ ]
+
+ # Log information about the data being stored
+ context.log.info(f"Storing {len(rows)} exchange rates for {date_str} in ClickHouse")
+
+ # Prepare values for batch insert
+ # Use toDate() to cast the string date to a ClickHouse Date type
+ values = [f"('{row['currency']}', {row['rate']}, toDate('{row['date']}'))" for row in rows]
greptile
logic: SQL values are not properly escaped - use parameterized queries instead of string formatting to prevent SQL injection
suggested fix
+ values = [(row['currency'], row['rate'], row['date']) for row in rows]
diff block
jwt-refresh-hint3=The endpoint for refreshing a token is 'JWT/refresh/newToken'
jwt-refresh-hint4=Use the found access token in the Authorization: Bearer header and use your own refresh token
jwt-refresh-not-tom=User is not Tom but {0}, please try again
+jwt-refresh-alg-none=Nicely found! You solved the assignment with 'alg: none' can you also solve it by using the refresh token?
-jwt-final-jerry-account=Yikes, you are removing Jerry's account, try to delete the account of Tom
+jwt-final-jerry-account=Yikes, you are removing Jerry's account, try to delete the account of Tom
jwt-final-not-tom=Username is not Tom try to pass a token for Tom
-jwt-final-hint1=Take a look at the token and specifically and the header
-jwt-final-hint2=The 'kid' (key ID) header parameter is a hint indicating which key was used to secure the JWS
-jwt-final-hint3=The key can be located on the filesystem in memory or even reside in the database
-jwt-final-hint4=The key is stored in the database and loaded while verifying a token
-jwt-final-hint5=Using a SQL injection you might be able to manipulate the key to something you know and create a new token.
-jwt-final-hint6=Use: hacked' UNION select 'deletingTom' from INFORMATION_SCHEMA.SYSTEM_USERS -- as the kid in the header and change the contents of the token to Tom and hit the endpoint with the new token
\ No newline at end of file
+jwt-jku-hint1=Take a look at the token and specifically at the headers
+jwt-jku-hint2=The 'jku' header parameter hints a URL pointing to a set of keys used by the server to sign the JWT.
+jwt-jku-hint3=Could you use WebWolf to host the public key as a JWKS?
+jwt-jku-hint4=Create a key pair and sign the token with the private key
+jwt-jku-hint5=Change the JKU header claim and point it to a URL that hosts the public key in JWKS format.
+
+jwt-kid-hint1=Take a look at the token and specifically at the headers
+jwt-kid-hint2=The 'kid' (key ID) header parameter hints at the key was used to secure the JWS
+jwt-kid-hint3=The key resides can for example, either in the filesystem in memory or the database.
+jwt-kid-hint4=The key is stored in the database and loaded while verifying a token
+jwt-kid-hint5=Using an SQL injection, you might be able to manipulate the key to a known object and create a new token.
+jwt-kid-hint6=Use: hacked' UNION select 'deletingTom' from INFORMATION_SCHEMA.SYSTEM_USERS -- as the kid in the header change the contents of the token to Tom and hit the endpoint with the new token
greptile
syntax: Missing comma after 'header' in the SQL injection example
diff block
Err(e) => return Err(e),
};
- let sql = format!("SELECT
- c.column_name as name,
- c.data_type as type_,
- CASE WHEN c.is_nullable = 'YES' THEN true ELSE false END as nullable,
- pgd.description AS comment
-FROM
- information_schema.columns c
-LEFT JOIN
- pg_catalog.pg_statio_all_tables as st on c.table_schema = st.schemaname and c.table_name = st.relname
-LEFT JOIN
- pg_catalog.pg_description pgd on pgd.objoid = st.relid and pgd.objsubid = c.ordinal_position
-WHERE
- c.table_name = '{dataset_name}'
- AND c.table_schema = '{schema_name}'
-ORDER BY
- c.table_schema,
- c.table_name,
- c.ordinal_position;"
+ // Build the IN clause for (schema, table) pairs
+ let table_pairs: Vec<String> = datasets
+ .iter()
+ .map(|(table, schema)| format!("('{schema}', '{table}')"))
+ .collect();
+ let table_pairs_str = table_pairs.join(", ");
+
+ // Query for tables and views
+ let regular_sql = format!(
+ "SELECT
+ c.table_name as dataset_name,
+ c.table_schema as schema_name,
+ c.column_name as name,
+ c.data_type as type_,
+ CASE WHEN c.is_nullable = 'YES' THEN true ELSE false END as nullable,
+ pgd.description AS comment,
+ t.table_type as source_type
+ FROM
+ information_schema.columns c
+ JOIN
+ information_schema.tables t ON c.table_name = t.table_name AND c.table_schema = t.table_schema
+ LEFT JOIN
+ pg_catalog.pg_statio_all_tables as st on c.table_schema = st.schemaname and c.table_name = st.relname
+ LEFT JOIN
+ pg_catalog.pg_description pgd on pgd.objoid = st.relid and pgd.objsubid = c.ordinal_position
+ WHERE
+ (c.table_schema, c.table_name) IN ({})
+ AND t.table_type IN ('BASE TABLE', 'VIEW')
+ ORDER BY
+ c.table_schema,
+ c.table_name,
+ c.ordinal_position;",
+ table_pairs_str
);
- let cols = match sqlx::query_as::<_, DatasetColumnRecord>(&sql)
+ // Query for materialized views
+ let mv_sql = format!(
+ "SELECT
+ c.relname as dataset_name,
+ n.nspname as schema_name,
+ a.attname as name,
+ format_type(a.atttypid, a.atttypmod) as type_,
+ NOT a.attnotnull as nullable,
+ d.description as comment,
+ 'MATERIALIZED_VIEW' as source_type
+ FROM pg_class c
+ JOIN pg_namespace n ON n.oid = c.relnamespace
+ JOIN pg_attribute a ON a.attrelid = c.oid
+ LEFT JOIN pg_description d ON d.objoid = c.oid AND d.objsubid = a.attnum
+ WHERE c.relkind = 'm'
+ AND (n.nspname, c.relname) IN ({})
+ AND a.attnum > 0
+ AND NOT a.attisdropped
+ ORDER BY
+ n.nspname,
+ c.relname,
+ a.attnum;",
+ table_pairs_str
+ );
+
+ let mut columns = Vec::new();
+
+ // Get regular tables and views
+ let regular_cols = match sqlx::query_as::<_, DatasetColumnRecord>(®ular_sql)
.fetch_all(&postgres_conn)
.await
{
- Ok(cols) => cols,
- Err(e) => return Err(anyhow!("Error fetching columns: {:?}", e)),
+ Ok(c) => c,
+ Err(e) => return Err(anyhow!("Error fetching regular columns: {:?}", e)),
};
+ // Get materialized view columns
+ let mv_cols = match sqlx::query_as::<_, DatasetColumnRecord>(&mv_sql)
+ .fetch_all(&postgres_conn)
+ .await
+ {
+ Ok(c) => c,
+ Err(e) => return Err(anyhow!("Error fetching materialized view columns: {:?}", e)),
+ };
+
+ // Combine results
+ columns.extend(regular_cols);
+ columns.extend(mv_cols);
+
if let (Some(mut child_process), Some(tempfile)) = (child_process, tempfile) {
child_process.kill()?;
for file in tempfile {
file.close()?;
}
}
- Ok(cols)
+ Ok(columns)
}
-async fn get_mysql_columns(
- dataset_name: &String,
+async fn get_mysql_columns_batch(
+ datasets: &[(String, String)],
credentials: &MySqlCredentials,
) -> Result<Vec<DatasetColumnRecord>> {
let (mysql_conn, child_process, tempfile) = match get_mysql_connection(credentials).await {
Ok(conn) => conn,
Err(e) => return Err(e),
};
+ // Build the IN clause for table names
+ let table_pairs: Vec<String> = datasets
+ .iter()
+ .map(|(table, schema)| format!("('{schema}', '{table}')"))
+ .collect();
greptile
logic: SQL injection vulnerability in format string - schema and table names should be properly escaped/sanitized before being used in SQL
suggested fix
// Build the IN clause for table names
let table_pairs: Vec<String> = datasets
.iter()
+ .map(|(table, schema)| {
+ let escaped_schema = schema.replace('\'', "''");
+ let escaped_table = table.replace('\'', "''");
+ format!("('{}', '{}')", escaped_schema, escaped_table)
+ })
.collect();
diff block
dataset_name: &String,
schema_name: &String,
credentials: &Credential,
+ database: Option<String>,
) -> Result<Vec<DatasetColumnRecord>> {
- let cols = match credentials {
+ let cols_result = match credentials {
Credential::Postgres(credentials) => {
- match get_postgres_columns(dataset_name, schema_name, credentials).await {
+ match get_postgres_columns_batch(
+ &[(dataset_name.clone(), schema_name.clone())],
+ credentials,
+ )
+ .await
+ {
Ok(cols) => cols,
Err(e) => return Err(e),
}
}
Credential::MySQL(credentials) => {
- match get_mysql_columns(dataset_name, credentials).await {
+ match get_mysql_columns_batch(
+ &[(dataset_name.clone(), schema_name.clone())],
+ credentials,
+ )
+ .await
+ {
Ok(cols) => cols,
Err(e) => return Err(e),
}
}
Credential::Bigquery(credentials) => {
- match get_bigquery_columns(dataset_name, credentials).await {
+ match get_bigquery_columns_batch(
+ &[(dataset_name.clone(), schema_name.clone())],
+ credentials,
+ )
+ .await
+ {
Ok(cols) => cols,
Err(e) => return Err(e),
}
}
Credential::Snowflake(credentials) => {
- match get_snowflake_columns(dataset_name, credentials).await {
+ match get_snowflake_columns_batch(
+ &[(dataset_name.clone(), schema_name.clone())],
+ credentials,
+ database,
+ )
+ .await
+ {
Ok(cols) => cols,
Err(e) => return Err(e),
}
}
_ => return Err(anyhow!("Unsupported data source type")),
};
- Ok(cols)
+ Ok(cols_result)
}
-async fn get_postgres_columns(
- dataset_name: &String,
- schema_name: &String,
+pub async fn retrieve_dataset_columns_batch(
+ datasets: &[(String, String)], // Vec of (dataset_name, schema_name)
+ credentials: &Credential,
+ database: Option<String>,
+) -> Result<Vec<DatasetColumnRecord>> {
+ match credentials {
+ Credential::Postgres(credentials) => {
+ get_postgres_columns_batch(datasets, credentials).await
+ }
+ Credential::MySQL(credentials) => get_mysql_columns_batch(datasets, credentials).await,
+ Credential::Bigquery(credentials) => {
+ get_bigquery_columns_batch(datasets, credentials).await
+ }
+ Credential::Snowflake(credentials) => {
+ get_snowflake_columns_batch(datasets, credentials, database).await
+ }
+ _ => Err(anyhow!("Unsupported data source type")),
+ }
+}
+
+async fn get_snowflake_columns_batch(
+ datasets: &[(String, String)],
+ credentials: &SnowflakeCredentials,
+ database: Option<String>,
+) -> Result<Vec<DatasetColumnRecord>> {
+ let snowflake_client = get_snowflake_client(credentials, database).await?;
+
+ // Build the IN clause for (schema, table) pairs
+ let table_pairs: Vec<String> = datasets
+ .iter()
+ .map(|(table, schema)| format!("('{}', '{}')", schema.to_uppercase(), table.to_uppercase()))
+ .collect();
greptile
logic: SQL injection vulnerability in format string - schema and table names should be properly escaped/sanitized before being used in SQL
suggested fix
// Build the IN clause for (schema, table) pairs
let table_pairs: Vec<String> = datasets
.iter()
+ .map(|(table, schema)| {
+ let escaped_schema = schema.to_uppercase().replace('\'', "''");
+ let escaped_table = table.to_uppercase().replace('\'', "''");
+ format!("('{}', '{}')", escaped_schema, escaped_table)
+ })
.collect();
diff block
.job()
.query(&project_id, query_request)
.await
- .map_err(|e| anyhow!("Error fetching table and views records: {:?}", e))?;
+ .map_err(|e| anyhow!("Error fetching columns: {:?}", e))?;
let mut columns = Vec::new();
if let Some(rows) = result.rows {
for row in rows {
if let Some(cols) = row.columns {
- let name = cols[0]
+ let dataset_name = cols[0]
+ .value
+ .as_ref()
+ .and_then(|v| v.as_str())
+ .ok_or_else(|| anyhow!("Missing dataset name"))?
+ .to_string();
+
+ let schema_name = cols[1]
+ .value
+ .as_ref()
+ .and_then(|v| v.as_str())
+ .ok_or_else(|| anyhow!("Missing schema name"))?
+ .to_string();
+
+ let name = cols[2]
.value
.as_ref()
.and_then(|v| v.as_str())
.ok_or_else(|| anyhow!("Missing column name"))?
.to_string();
- let type_ = cols[1]
+ let type_ = cols[3]
.value
.as_ref()
.and_then(|v| v.as_str())
.ok_or_else(|| anyhow!("Missing column type"))?
.to_string();
- let nullable = cols[2]
+ let nullable = cols[4]
.value
.as_ref()
.and_then(|v| v.as_str())
.ok_or_else(|| anyhow!("Missing nullable value"))?
.parse::<bool>()?;
+ let comment = cols[5]
+ .value
+ .as_ref()
+ .and_then(|v| v.as_str())
+ .map(|s| s.to_string());
+
+ let source_type = cols[6]
+ .value
+ .as_ref()
+ .and_then(|v| v.as_str())
+ .ok_or_else(|| anyhow!("Missing source type"))?
+ .to_string();
+
columns.push(DatasetColumnRecord {
+ dataset_name,
+ schema_name,
name,
type_,
nullable,
- comment: None,
+ comment,
+ source_type,
});
}
}
}
Ok(columns)
}
-
async fn get_snowflake_columns(
dataset_name: &String,
+ schema_name: &String,
credentials: &SnowflakeCredentials,
) -> Result<Vec<DatasetColumnRecord>> {
- let snowflake_client = get_snowflake_client(credentials).await?;
+ let snowflake_client = get_snowflake_client(credentials, None).await?;
let uppercase_dataset_name = dataset_name.to_uppercase();
+ let uppercase_schema_name = schema_name.to_uppercase();
let sql = format!(
"SELECT
- COLUMN_NAME AS name,
- DATA_TYPE AS type_,
- CASE WHEN IS_NULLABLE = 'YES' THEN true ELSE false END AS nullable,
- COMMENT AS comment
+ c.COLUMN_NAME AS name,
+ c.DATA_TYPE AS type_,
+ CASE WHEN c.IS_NULLABLE = 'YES' THEN true ELSE false END AS nullable,
+ c.COMMENT AS comment,
+ t.TABLE_TYPE as source_type
FROM
- INFORMATION_SCHEMA.COLUMNS
+ INFORMATION_SCHEMA.COLUMNS c
+ JOIN
+ INFORMATION_SCHEMA.TABLES t
+ ON c.TABLE_NAME = t.TABLE_NAME
+ AND c.TABLE_SCHEMA = t.TABLE_SCHEMA
WHERE
- TABLE_NAME = '{}'
- ORDER BY
- ORDINAL_POSITION;",
- uppercase_dataset_name
+ c.TABLE_NAME = '{uppercase_dataset_name}'
+ AND c.TABLE_SCHEMA = '{uppercase_schema_name}'
greptile
logic: SQL injection vulnerability - dataset_name and schema_name should be properly escaped before being used in SQL
diff block
dataset_name: &String,
schema_name: &String,
credentials: &Credential,
+ database: Option<String>,
) -> Result<Vec<DatasetColumnRecord>> {
- let cols = match credentials {
+ let cols_result = match credentials {
Credential::Postgres(credentials) => {
- match get_postgres_columns(dataset_name, schema_name, credentials).await {
+ match get_postgres_columns_batch(
+ &[(dataset_name.clone(), schema_name.clone())],
+ credentials,
+ )
+ .await
+ {
Ok(cols) => cols,
Err(e) => return Err(e),
}
}
Credential::MySQL(credentials) => {
- match get_mysql_columns(dataset_name, credentials).await {
+ match get_mysql_columns_batch(
+ &[(dataset_name.clone(), schema_name.clone())],
+ credentials,
+ )
+ .await
+ {
Ok(cols) => cols,
Err(e) => return Err(e),
}
}
Credential::Bigquery(credentials) => {
- match get_bigquery_columns(dataset_name, credentials).await {
+ match get_bigquery_columns_batch(
+ &[(dataset_name.clone(), schema_name.clone())],
+ credentials,
+ )
+ .await
+ {
Ok(cols) => cols,
Err(e) => return Err(e),
}
}
Credential::Snowflake(credentials) => {
- match get_snowflake_columns(dataset_name, credentials).await {
+ match get_snowflake_columns_batch(
+ &[(dataset_name.clone(), schema_name.clone())],
+ credentials,
+ database,
+ )
+ .await
+ {
Ok(cols) => cols,
Err(e) => return Err(e),
}
}
_ => return Err(anyhow!("Unsupported data source type")),
};
- Ok(cols)
+ Ok(cols_result)
}
-async fn get_postgres_columns(
- dataset_name: &String,
- schema_name: &String,
+pub async fn retrieve_dataset_columns_batch(
+ datasets: &[(String, String)], // Vec of (dataset_name, schema_name)
+ credentials: &Credential,
+ database: Option<String>,
+) -> Result<Vec<DatasetColumnRecord>> {
+ match credentials {
+ Credential::Postgres(credentials) => {
+ get_postgres_columns_batch(datasets, credentials).await
+ }
+ Credential::MySQL(credentials) => get_mysql_columns_batch(datasets, credentials).await,
+ Credential::Bigquery(credentials) => {
+ get_bigquery_columns_batch(datasets, credentials).await
+ }
+ Credential::Snowflake(credentials) => {
+ get_snowflake_columns_batch(datasets, credentials, database).await
+ }
+ _ => Err(anyhow!("Unsupported data source type")),
+ }
+}
+
+async fn get_snowflake_columns_batch(
+ datasets: &[(String, String)],
+ credentials: &SnowflakeCredentials,
+ database: Option<String>,
+) -> Result<Vec<DatasetColumnRecord>> {
+ let snowflake_client = get_snowflake_client(credentials, database).await?;
+
+ // Build the IN clause for (schema, table) pairs
+ let table_pairs: Vec<String> = datasets
+ .iter()
+ .map(|(table, schema)| format!("('{}', '{}')", schema.to_uppercase(), table.to_uppercase()))
+ .collect();
greptile
logic: SQL injection vulnerability - table and schema names should be properly escaped before being used in format string
diff block
+/*
+ * This file is part of WebGoat, an Open Web Application Security Project utility. For details, please see http://www.owasp.org/
+ *
+ * Copyright (c) 2002 - 2019 Bruce Mayhew
+ *
+ * This program is free software; you can redistribute it and/or modify it under the terms of the
+ * GNU General Public License as published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without
+ * even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along with this program; if
+ * not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
+ * 02111-1307, USA.
+ *
+ * Getting Source ==============
+ *
+ * Source for this application is maintained at https://github.com/WebGoat/WebGoat, a repository for free software projects.
+ */
+
+package org.owasp.webgoat.lessons.sqlinjection.introduction;
+
+import static java.sql.ResultSet.CONCUR_UPDATABLE;
+import static java.sql.ResultSet.TYPE_SCROLL_SENSITIVE;
+import static org.owasp.webgoat.container.assignments.AttackResultBuilder.failed;
+import static org.owasp.webgoat.container.assignments.AttackResultBuilder.success;
+
+import java.sql.*;
+import java.text.SimpleDateFormat;
+import java.util.Calendar;
+import org.owasp.webgoat.container.LessonDataSource;
+import org.owasp.webgoat.container.assignments.AssignmentEndpoint;
+import org.owasp.webgoat.container.assignments.AssignmentHints;
+import org.owasp.webgoat.container.assignments.AttackResult;
+import org.springframework.web.bind.annotation.PostMapping;
+import org.springframework.web.bind.annotation.RequestParam;
+import org.springframework.web.bind.annotation.ResponseBody;
+import org.springframework.web.bind.annotation.RestController;
+
+@RestController
+@AssignmentHints(
+ value = {
+ "SqlStringInjectionHint.8.1",
+ "SqlStringInjectionHint.8.2",
+ "SqlStringInjectionHint.8.3",
+ "SqlStringInjectionHint.8.4",
+ "SqlStringInjectionHint.8.5"
+ })
+public class SqlInjectionLesson8 implements AssignmentEndpoint {
+
+ private final LessonDataSource dataSource;
+
+ public SqlInjectionLesson8(LessonDataSource dataSource) {
+ this.dataSource = dataSource;
+ }
+
+ @PostMapping("/SqlInjection/attack8")
+ @ResponseBody
+ public AttackResult completed(@RequestParam String name, @RequestParam String auth_tan) {
+ return injectableQueryConfidentiality(name, auth_tan);
+ }
+
+ protected AttackResult injectableQueryConfidentiality(String name, String auth_tan) {
+ StringBuilder output = new StringBuilder();
+ String query =
+ "SELECT * FROM employees WHERE last_name = '"
+ + name
+ + "' AND auth_tan = '"
+ + auth_tan
+ + "'";
greptile
logic: Direct string concatenation in SQL query creates SQL injection vulnerability. Use PreparedStatement with parameterized queries instead.
suggested fix
+ String query = "SELECT * FROM employees WHERE last_name = ? AND auth_tan = ?";
+ PreparedStatement statement = connection.prepareStatement(query, ResultSet.TYPE_SCROLL_INSENSITIVE, ResultSet.CONCUR_UPDATABLE);
+ statement.setString(1, name);
+ statement.setString(2, auth_tan);
diff block
+/*
+ * This file is part of WebGoat, an Open Web Application Security Project utility. For details, please see http://www.owasp.org/
+ *
+ * Copyright (c) 2002 - 2019 Bruce Mayhew
+ *
+ * This program is free software; you can redistribute it and/or modify it under the terms of the
+ * GNU General Public License as published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without
+ * even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along with this program; if
+ * not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
+ * 02111-1307, USA.
+ *
+ * Getting Source ==============
+ *
+ * Source for this application is maintained at https://github.com/WebGoat/WebGoat, a repository for free software projects.
+ */
+
+package org.owasp.webgoat.lessons.sqlinjection.introduction;
+
+import static org.hamcrest.CoreMatchers.containsString;
+import static org.hamcrest.CoreMatchers.is;
+import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.jsonPath;
+import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.status;
+
+import org.junit.jupiter.api.Test;
+import org.owasp.webgoat.container.plugins.LessonTest;
+import org.springframework.test.web.servlet.request.MockMvcRequestBuilders;
+
+/**
+ * @author Benedikt Stuhrmann
+ * @since 11/07/18.
+ */
+public class SqlInjectionLesson8Test extends LessonTest {
+
+ @Test
+ public void oneAccount() throws Exception {
+ mockMvc
+ .perform(
+ MockMvcRequestBuilders.post("/SqlInjection/attack8")
+ .param("name", "Smith")
+ .param("auth_tan", "3SL99A"))
+ .andExpect(status().isOk())
+ .andExpect(jsonPath("lessonCompleted", is(false)))
+ .andExpect(jsonPath("$.feedback", is(messages.getMessage("sql-injection.8.one"))))
+ .andExpect(jsonPath("$.output", containsString("<table><tr><th>")));
+ }
+
+ @Test
+ public void multipleAccounts() throws Exception {
+ mockMvc
+ .perform(
+ MockMvcRequestBuilders.post("/SqlInjection/attack8")
+ .param("name", "Smith")
+ .param("auth_tan", "3SL99A' OR '1' = '1"))
greptile
logic: SQL injection test case exposes a potential vulnerability by using a simple OR condition. Consider testing more complex injection patterns to ensure robust protection.
diff block
# This ensures proper Hive partitioning by date
current_date = date_from
while current_date <= date_to:
- # Create the date-specific filename
- date_s3_filename = f"{config.s3_path}/event_date={current_date.strftime('%Y-%m-%d')}/{hostname}_{context.run.run_id}.parquet"
-
- if DEBUG:
- date_s3_path = f"{OBJECT_STORAGE_ENDPOINT}/{DAGSTER_DATA_EXPORT_S3_BUCKET}/{date_s3_filename}"
- date_s3_function_args = (
- f"'{date_s3_path}', "
- f"'{OBJECT_STORAGE_ACCESS_KEY_ID}', "
- f"'{OBJECT_STORAGE_SECRET_ACCESS_KEY}', "
- f"'Parquet'"
+ for is_initial_query in [0, 1]:
+ date_s3_filename = f"{config.s3_path}/event_date={current_date.strftime('%Y-%m-%d')}/is_initial_query={is_initial_query}/{hostname}_{context.run.run_id}.parquet"
+
+ if DEBUG:
+ date_s3_path = f"{OBJECT_STORAGE_ENDPOINT}/{DAGSTER_DATA_EXPORT_S3_BUCKET}/{date_s3_filename}"
+ date_s3_function_args = (
+ f"'{date_s3_path}', "
+ f"'{OBJECT_STORAGE_ACCESS_KEY_ID}', "
+ f"'{OBJECT_STORAGE_SECRET_ACCESS_KEY}', "
+ f"'Parquet'"
+ )
+ else:
+ date_s3_path = f"https://{DAGSTER_DATA_EXPORT_S3_BUCKET}.s3.amazonaws.com/{date_s3_filename}"
+ date_s3_function_args = f"'{date_s3_path}', 'Parquet'"
+
+ # Construct the export query for this specific date
+ # Explicitly select all columns except transaction_id which contains a UUID that Parquet doesn't support
+ query = f"""
+ INSERT INTO FUNCTION s3({date_s3_function_args})
+ SELECT
+ hostname,
+ type,
+ event_date,
+ event_time,
+ event_time_microseconds,
+ query_start_time,
+ query_start_time_microseconds,
+ query_duration_ms,
+ read_rows,
+ read_bytes,
+ written_rows,
+ written_bytes,
+ result_rows,
+ result_bytes,
+ memory_usage,
+ current_database,
+ query,
+ formatted_query,
+ normalized_query_hash,
+ query_kind,
+ databases,
+ tables,
+ columns,
+ partitions,
+ projections,
+ views,
+ exception_code,
+ exception,
+ stack_trace,
+ is_initial_query,
+ user,
+ query_id,
+ address,
+ port,
+ initial_user,
+ initial_query_id,
+ initial_address,
+ initial_port,
+ initial_query_start_time,
+ initial_query_start_time_microseconds,
+ interface,
+ is_secure,
+ os_user,
+ client_hostname,
+ client_name,
+ client_revision,
+ client_version_major,
+ client_version_minor,
+ client_version_patch,
+ http_method,
+ http_user_agent,
+ http_referer,
+ forwarded_for,
+ quota_key,
+ distributed_depth,
+ revision,
+ log_comment,
+ thread_ids,
+ peak_threads_usage,
+ ProfileEvents,
+ Settings,
+ used_aggregate_functions,
+ used_aggregate_function_combinators,
+ used_database_engines,
+ used_data_type_families,
+ used_dictionaries,
+ used_formats,
+ used_functions,
+ used_storages,
+ used_table_functions,
+ used_row_policies,
+ used_privileges,
+ missing_privileges,
+ -- transaction_id is excluded because it contains a UUID which Parquet doesn't support
+ query_cache_usage,
+ asynchronous_read_counters,
+ ProfileEvents.Names,
+ ProfileEvents.Values,
+ Settings.Names,
+ Settings.Values,
+ -- Extracted columns
+ JSONExtractInt(log_comment, 'team_id') as team_id,
+ JSONExtractString(log_comment, 'workload') as workload
+ FROM system.query_log
+ WHERE event_date = toDate(%(current_date)s) AND is_initial_query = {is_initial_query}
greptile
logic: SQL injection risk with unescaped is_initial_query parameter in query string
suggested fix
+ WHERE event_date = toDate(%(current_date)s) AND is_initial_query = %(is_initial_query)s
diff block
+import { InjectRepository } from '@nestjs/typeorm';
+
+import { ServerBlockNoteEditor } from '@blocknote/server-util';
+import chalk from 'chalk';
+import { Command } from 'nest-commander';
+import { FieldMetadataType } from 'twenty-shared';
+import { Repository } from 'typeorm';
+
+import {
+ ActiveWorkspacesCommandOptions,
+ ActiveWorkspacesCommandRunner,
+} from 'src/database/commands/active-workspaces.command';
+import { isCommandLogger } from 'src/database/commands/logger';
+import { FeatureFlagKey } from 'src/engine/core-modules/feature-flag/enums/feature-flag-key.enum';
+import { FeatureFlag } from 'src/engine/core-modules/feature-flag/feature-flag.entity';
+import { Workspace } from 'src/engine/core-modules/workspace/workspace.entity';
+import { FieldMetadataEntity } from 'src/engine/metadata-modules/field-metadata/field-metadata.entity';
+import { ObjectMetadataEntity } from 'src/engine/metadata-modules/object-metadata/object-metadata.entity';
+import { WorkspaceMetadataVersionService } from 'src/engine/metadata-modules/workspace-metadata-version/services/workspace-metadata-version.service';
+import { generateMigrationName } from 'src/engine/metadata-modules/workspace-migration/utils/generate-migration-name.util';
+import {
+ WorkspaceMigrationColumnActionType,
+ WorkspaceMigrationColumnCreate,
+ WorkspaceMigrationTableAction,
+ WorkspaceMigrationTableActionType,
+} from 'src/engine/metadata-modules/workspace-migration/workspace-migration.entity';
+import { WorkspaceMigrationService } from 'src/engine/metadata-modules/workspace-migration/workspace-migration.service';
+import { TwentyORMGlobalManager } from 'src/engine/twenty-orm/twenty-orm-global.manager';
+import { computeObjectTargetTable } from 'src/engine/utils/compute-object-target-table.util';
+import { computeTableName } from 'src/engine/utils/compute-table-name.util';
+import { WorkspaceDataSourceService } from 'src/engine/workspace-datasource/workspace-datasource.service';
+import { WorkspaceMigrationRunnerService } from 'src/engine/workspace-manager/workspace-migration-runner/workspace-migration-runner.service';
+
+@Command({
+ name: 'upgrade-0.42:migrate-rich-text-field',
+ description: 'Migrate RICH_TEXT fields to new composite structure',
+})
+export class MigrateRichTextFieldCommand extends ActiveWorkspacesCommandRunner {
+ constructor(
+ @InjectRepository(Workspace, 'core')
+ protected readonly workspaceRepository: Repository<Workspace>,
+ @InjectRepository(FieldMetadataEntity, 'metadata')
+ private readonly fieldMetadataRepository: Repository<FieldMetadataEntity>,
+ @InjectRepository(ObjectMetadataEntity, 'metadata')
+ private readonly objectMetadataRepository: Repository<ObjectMetadataEntity>,
+ @InjectRepository(FeatureFlag, 'core')
+ protected readonly featureFlagRepository: Repository<FeatureFlag>,
+ private readonly workspaceDataSourceService: WorkspaceDataSourceService,
+ private readonly twentyORMGlobalManager: TwentyORMGlobalManager,
+ private readonly workspaceMigrationService: WorkspaceMigrationService,
+ private readonly workspaceMigrationRunnerService: WorkspaceMigrationRunnerService,
+ private readonly workspaceMetadataVersionService: WorkspaceMetadataVersionService,
+ ) {
+ super(workspaceRepository);
+ }
+
+ async executeActiveWorkspacesCommand(
+ _passedParam: string[],
+ options: ActiveWorkspacesCommandOptions,
+ workspaceIds: string[],
+ ): Promise<void> {
+ this.logger.log(
+ 'Running command to migrate RICH_TEXT fields to new composite structure',
+ );
+
+ if (isCommandLogger(this.logger)) {
+ this.logger.setVerbose(options.verbose ?? false);
+ }
+
+ let workspaceIterator = 1;
+
+ for (const workspaceId of workspaceIds) {
+ this.logger.log(
+ `Running command for workspace ${workspaceId} ${workspaceIterator}/${workspaceIds.length}`,
+ );
+
+ const richTextFields = await this.fieldMetadataRepository.find({
+ where: {
+ workspaceId,
+ type: FieldMetadataType.RICH_TEXT,
+ },
+ });
+
+ if (!richTextFields.length) {
+ this.logger.log('No RICH_TEXT fields found in this workspace');
+ workspaceIterator++;
+ continue;
+ }
+
+ this.logger.log(`Found ${richTextFields.length} RICH_TEXT fields`);
+
+ for (const richTextField of richTextFields) {
+ const newRichTextField: Partial<FieldMetadataEntity> = {
+ ...richTextField,
+ name: `${richTextField.name}V2`,
+ id: undefined,
+ type: FieldMetadataType.RICH_TEXT_V2,
+ defaultValue: null,
+ };
+
+ await this.fieldMetadataRepository.insert(newRichTextField);
+
+ const objectMetadata = await this.objectMetadataRepository.findOne({
+ where: { id: richTextField.objectMetadataId },
+ });
+
+ if (objectMetadata === null) {
+ this.logger.log(
+ `Object metadata not found for rich text field ${richTextField.name} in workspace ${workspaceId}`,
+ );
+ continue;
+ }
+
+ await this.workspaceMigrationService.createCustomMigration(
+ generateMigrationName(
+ `migrate-rich-text-field-${objectMetadata.nameSingular}-${richTextField.name}`,
+ ),
+ workspaceId,
+ [
+ {
+ name: computeObjectTargetTable(objectMetadata),
+ action: WorkspaceMigrationTableActionType.ALTER,
+ columns: [
+ {
+ action: WorkspaceMigrationColumnActionType.CREATE,
+ columnName: `${richTextField.name}V2Blocknote`,
+ columnType: 'text',
+ isNullable: true,
+ defaultValue: null,
+ } satisfies WorkspaceMigrationColumnCreate,
+ {
+ action: WorkspaceMigrationColumnActionType.CREATE,
+ columnName: `${richTextField.name}V2Markdown`,
+ columnType: 'text',
+ isNullable: true,
+ defaultValue: null,
+ } satisfies WorkspaceMigrationColumnCreate,
+ ],
+ } satisfies WorkspaceMigrationTableAction,
+ ],
+ );
+ }
+
+ await this.workspaceMigrationRunnerService.executeMigrationFromPendingMigrations(
+ workspaceId,
+ );
+
+ await this.workspaceMetadataVersionService.incrementMetadataVersion(
+ workspaceId,
+ );
+
+ const serverBlockNoteEditor = ServerBlockNoteEditor.create();
+
+ for (const richTextField of richTextFields) {
+ const objectMetadata = await this.objectMetadataRepository.findOne({
+ where: { id: richTextField.objectMetadataId },
+ });
+
+ if (objectMetadata === null) {
+ this.logger.log(
+ `Object metadata not found for rich text field ${richTextField.name} in workspace ${workspaceId}`,
+ );
+ continue;
+ }
+
+ const schemaName =
+ this.workspaceDataSourceService.getSchemaName(workspaceId);
+
+ const workspaceDataSource =
+ await this.twentyORMGlobalManager.getDataSourceForWorkspace(
+ workspaceId,
+ );
+
+ const rows = await workspaceDataSource.query(
+ `SELECT id, "${richTextField.name}" FROM "${schemaName}"."${computeTableName(objectMetadata.nameSingular, objectMetadata.isCustom)}"`,
+ );
greptile
logic: Raw SQL query is vulnerable to SQL injection through richTextField.name. Should use parameterized query or validate field name.
diff block
+package org.owasp.webgoat;
+
+
+import io.restassured.RestAssured;
+import org.junit.jupiter.api.Test;
+
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+
+public class ChallengeIntegrationTest extends IntegrationTest {
+
+ @Test
+ public void testChallenge1() {
+ startLesson("Challenge1");
+
+ byte[] resultBytes =
+ RestAssured.given()
+ .when()
+ .relaxedHTTPSValidation()
+ .cookie("JSESSIONID", getWebGoatCookie())
+ .get(url("/WebGoat/challenge/logo"))
+ .then()
+ .statusCode(200)
+ .extract().asByteArray();
+
+ String pincode = new String(Arrays.copyOfRange(resultBytes, 81216, 81220));
+ Map<String, Object> params = new HashMap<>();
+ params.clear();
+ params.put("username", "admin");
+ params.put("password", "!!webgoat_admin_1234!!".replace("1234", pincode));
+
+
+ checkAssignment(url("/WebGoat/challenge/1"), params, true);
+ String result =
+ RestAssured.given()
+ .when()
+ .relaxedHTTPSValidation()
+ .cookie("JSESSIONID", getWebGoatCookie())
+ .formParams(params)
+ .post(url("/WebGoat/challenge/1"))
+ .then()
+ .statusCode(200)
+ .extract().asString();
+
+ String flag = result.substring(result.indexOf("flag") + 6, result.indexOf("flag") + 42);
+ params.clear();
+ params.put("flag", flag);
+ checkAssignment(url("/WebGoat/challenge/flag"), params, true);
+
+
+ checkResults("/challenge/1");
+
+ List<String> capturefFlags =
+ RestAssured.given()
+ .when()
+ .relaxedHTTPSValidation()
+ .cookie("JSESSIONID", getWebGoatCookie())
+ .get(url("/WebGoat/scoreboard-data"))
+ .then()
+ .statusCode(200)
+ .extract().jsonPath()
+ .get("find { it.username == \"" + this.getUser() + "\" }.flagsCaptured");
+ assertTrue(capturefFlags.contains("Admin lost password"));
+ }
+
+ @Test
+ public void testChallenge5() {
+ startLesson("Challenge5");
+
+ Map<String, Object> params = new HashMap<>();
+ params.clear();
+ params.put("username_login", "Larry");
+ params.put("password_login", "1' or '1'='1");
greptile
style: SQL injection payload hardcoded in test - consider moving to test constants file to avoid exposing exploits in test code
diff block
);
qb.orWhere(
new Brackets((bck: WhereExpressionBuilder) => {
- bck.andWhere(p(`"${qb.alias}"."organizationId" IS NULL`));
- bck.andWhere(p(`"${qb.alias}"."tenantId" IS NULL`));
+ const { languageCode } = params.where;
+ if (isNotEmpty(languageCode)) {
+ bck.andWhere(`${qb.alias}."languageCode" = :languageCode`, { languageCode });
greptile
logic: Missing parameter binding for languageCode in the second query. Should use p() helper like the first query for consistent SQL injection protection.
diff block
@Override
public Object invoke(Object proxy, Method method, Object[] args) throws Throwable {
var authentication = SecurityContextHolder.getContext().getAuthentication();
- if (authentication != null && authentication.getPrincipal() instanceof WebGoatUser) {
- var user = (WebGoatUser) authentication.getPrincipal();
- targetConnection.createStatement().execute("SET SCHEMA \"" + user.getUsername() + "\"");
+ if (authentication != null && authentication.getPrincipal() instanceof WebGoatUser user) {
+ try (var statement = targetConnection.createStatement()) {
+ statement.execute("SET SCHEMA \"" + user.getUsername() + "\"");
greptile
logic: SQL injection vulnerability - username is directly concatenated into SQL statement without sanitization
suggested fix
+ statement.execute("SET SCHEMA ?", new String[]{user.getUsername()});
diff block
}
return ((npsBreakdown.promoters - npsBreakdown.detractors) / npsBreakdown.total) * 100
}
+
+/**
+ * Creates a HogQL expression for survey answer filters that handles both index-based and ID-based property keys
+ * using OR logic between the alternative formats for each question.
+ *
+ * @param filters - The answer filters to convert to HogQL expressions
+ * @param survey - The survey object (needed to access question IDs)
+ * @returns A HogQL expression string that can be used in queries
+ *
+ * TODO: Consider leveraging the backend query builder instead of duplicating this logic in the frontend.
+ * ClickHouse has powerful functions like match(), multiIf(), etc. that could be used more effectively.
+ */
+export function createAnswerFilterHogQLExpression(filters: EventPropertyFilter[], survey: Survey): string {
+ if (!filters || !filters.length) {
+ return ''
+ }
+
+ // Build the filter expression as a string
+ let filterExpression = ''
+ let hasValidFilter = false
+
+ // Process each filter
+ for (const filter of filters) {
+ // Skip filters with empty or undefined values
+ if (filter.value === undefined || filter.value === null || filter.value === '') {
+ continue
+ }
+
+ // Skip empty arrays
+ if (Array.isArray(filter.value) && filter.value.length === 0) {
+ continue
+ }
+
+ // Skip ILIKE filters with empty search patterns
+ if (
+ filter.operator === 'icontains' &&
+ (filter.value === '%' ||
+ filter.value === '%%' ||
+ (typeof filter.value === 'string' && filter.value.trim() === ''))
+ ) {
+ continue
+ }
+
+ // Extract question index from the filter key (assuming format like "$survey_response_X" or "$survey_response")
+ let questionIndex = 0
+ if (filter.key === '$survey_response') {
+ // If the key is exactly "$survey_response", it's for question index 0
+ questionIndex = 0
+ } else {
+ const questionIndexMatch = filter.key.match(/\$survey_response_(\d+)/)
+ if (!questionIndexMatch) {
+ continue // Skip if we can't determine the question index
+ }
+ questionIndex = parseInt(questionIndexMatch[1])
+ }
+
+ const questionId = survey.questions[questionIndex]?.id
+
+ // Get both key formats
+ const { indexBasedKey, idBasedKey } = getResponseFieldWithId(questionIndex, questionId)
+
+ // Create the condition for this filter
+ let condition = ''
+
+ // Handle different operators
+ switch (filter.operator) {
+ case 'exact':
+ if (Array.isArray(filter.value)) {
+ // Handle array values with IN operator
+ const valueList = filter.value.map((v) => `'${v}'`).join(', ')
+ condition = `(properties['${indexBasedKey}'] IN (${valueList})`
greptile
logic: No escaping of special characters in filter values could lead to SQL injection. Consider using a proper escaping function for values that might contain quotes.
diff block
+def get_survey_response_clickhouse_query(question_index: int, question_id: str | None = None) -> str:
+ """
+ Generate a ClickHouse query to extract survey response based on question index or ID
+
+ Args:
+ question_index: Index of the question
+ question_id: ID of the question (optional)
+
+ Returns:
+ The survey response or empty string if not found
+ """
+ id_based_key = _build_id_based_key(question_index, question_id)
+ index_based_key = _build_index_based_key(question_index)
+
+ return _build_coalesce_query(id_based_key, index_based_key)
+
+
+def _build_id_based_key(question_index: int, question_id: str | None = None) -> str:
+ if question_id:
+ return f"$survey_response_{question_id}"
greptile
style: question_id should be sanitized to prevent SQL injection
diff block
package org.owasp.webgoat.container.lessons;
-import lombok.extern.slf4j.Slf4j;
-import org.owasp.webgoat.container.users.WebGoatUser;
-import org.springframework.security.core.context.SecurityContextHolder;
-
import java.lang.reflect.InvocationHandler;
import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;
import java.sql.Connection;
+import org.owasp.webgoat.container.users.WebGoatUser;
+import org.springframework.security.core.context.SecurityContextHolder;
/**
- * Handler which sets the correct schema for the currently bounded user. This way users are not seeing each other
- * data and we can reset data for just one particular user.
+ * Handler which sets the correct schema for the currently bounded user. This way users are not
+ * seeing each other data, and we can reset data for just one particular user.
*/
-@Slf4j
public class LessonConnectionInvocationHandler implements InvocationHandler {
- private final Connection targetConnection;
+ private final Connection targetConnection;
- public LessonConnectionInvocationHandler(Connection targetConnection) {
- this.targetConnection = targetConnection;
- }
+ public LessonConnectionInvocationHandler(Connection targetConnection) {
+ this.targetConnection = targetConnection;
+ }
- @Override
- public Object invoke(Object proxy, Method method, Object[] args) throws Throwable {
- var authentication = SecurityContextHolder.getContext().getAuthentication();
- if (authentication != null && authentication.getPrincipal() instanceof WebGoatUser user) {
- try (var statement = targetConnection.createStatement()) {
- statement.execute("SET SCHEMA \"" + user.getUsername() + "\"");
- }
- }
- try {
- return method.invoke(targetConnection, args);
- } catch (InvocationTargetException e) {
- throw e.getTargetException();
- }
+ @Override
+ public Object invoke(Object proxy, Method method, Object[] args) throws Throwable {
+ var authentication = SecurityContextHolder.getContext().getAuthentication();
+ if (authentication != null && authentication.getPrincipal() instanceof WebGoatUser user) {
+ try (var statement = targetConnection.createStatement()) {
+ statement.execute("SET SCHEMA \"" + user.getUsername() + "\"");
greptile
logic: Potential SQL injection vulnerability - username is directly concatenated into SQL statement without sanitization
diff block
+
+import orjson
+from xml.sax.saxutils import escape as xml_escape
+from datetime import date
+
+from fastapi import APIRouter, Response, Depends
+from fastapi.responses import ORJSONResponse
+from fastapi.exceptions import RequestValidationError
+
+import anybadge
+
+from api.object_specifications import Software
+from api.api_helpers import (ORJSONResponseObjKeep, add_phase_stats_statistics,
+ determine_comparison_case,get_comparison_details,
+ html_escape_multi, get_phase_stats, get_phase_stats_object,
+ is_valid_uuid, convert_value, get_timeline_query,
+ get_run_info, get_machine_list, get_artifact, store_artifact,
+ authenticate, check_int_field_api)
+
+from lib.global_config import GlobalConfig
+from lib.db import DB
+from lib.diff import get_diffable_rows, diff_rows
+from lib.job.base import Job
+from lib.user import User
+from lib.timeline_project import TimelineProject
+from lib import utils
+
+from enum import Enum
+ArtifactType = Enum('ArtifactType', ['DIFF', 'COMPARE', 'STATS', 'BADGE'])
+
+
+router = APIRouter()
+
+
+# Return a list of all known machines in the cluster
+@router.get('/v1/machines')
+async def get_machines(
+ user: User = Depends(authenticate), # pylint: disable=unused-argument
+ ):
+
+ data = get_machine_list()
+ if data is None or data == []:
+ return Response(status_code=204) # No-Content
+
+ return ORJSONResponse({'success': True, 'data': data})
+
+@router.get('/v1/jobs')
+async def get_jobs(
+ machine_id: int | None = None,
+ state: str | None = None,
+ user: User = Depends(authenticate), # pylint: disable=unused-argument
+ ):
+
+ params = []
+ machine_id_condition = ''
+ state_condition = ''
+
+ if machine_id and check_int_field_api(machine_id, 'machine_id', 1024):
+ machine_id_condition = 'AND j.machine_id = %s'
+ params.append(machine_id)
+
+ if state is not None and state != '':
+ state_condition = 'AND j.state = %s'
+ params.append(state)
+
+ query = f"""
+ SELECT j.id, r.id as run_id, j.name, j.url, j.filename, j.branch, m.description, j.state, j.updated_at, j.created_at
+ FROM jobs as j
+ LEFT JOIN machines as m on m.id = j.machine_id
+ LEFT JOIN runs as r on r.job_id = j.id
+ WHERE
+ j.type = 'run'
+ {machine_id_condition}
+ {state_condition}
+ ORDER BY j.updated_at DESC, j.created_at ASC
+ """
+ data = DB().fetch_all(query, params)
+ if data is None or data == []:
+ return Response(status_code=204) # No-Content
+
+ return ORJSONResponse({'success': True, 'data': data})
+
+# A route to return all of the available entries in our catalog.
+@router.get('/v1/notes/{run_id}')
+async def get_notes(run_id, user: User = Depends(authenticate)):
+ if run_id is None or not is_valid_uuid(run_id):
+ raise RequestValidationError('Run ID is not a valid UUID or empty')
+
+ query = '''
+ SELECT n.run_id, n.detail_name, n.note, n.time
+ FROM notes as n
+ JOIN runs as r on n.run_id = r.id
+ WHERE
+ (TRUE = %s OR r.user_id = ANY(%s::int[]))
+ AND n.run_id = %s
+ ORDER BY n.created_at DESC -- important to order here, the charting library in JS cannot do that automatically!
+ '''
+
+ params = (user.is_super_user(), user.visible_users(), run_id)
+ data = DB().fetch_all(query, params=params)
+ if data is None or data == []:
+ return Response(status_code=204) # No-Content
+
+ escaped_data = [html_escape_multi(note) for note in data]
+ return ORJSONResponseObjKeep({'success': True, 'data': escaped_data})
+
+
+@router.get('/v1/network/{run_id}')
+async def get_network(run_id, user: User = Depends(authenticate)):
+ if run_id is None or not is_valid_uuid(run_id):
+ raise RequestValidationError('Run ID is not a valid UUID or empty')
+
+ query = '''
+ SELECT ni.*
+ FROM network_intercepts as ni
+ JOIN runs as r on r.id = ni.run_id
+ WHERE
+ (TRUE = %s OR r.user_id = ANY(%s::int[]))
+ AND ni.run_id = %s
+ ORDER BY ni.time
+ '''
+ params = (user.is_super_user(), user.visible_users(), run_id)
+ data = DB().fetch_all(query, params=params)
+
+ escaped_data = html_escape_multi(data)
+ return ORJSONResponseObjKeep({'success': True, 'data': escaped_data})
+
+
+@router.get('/v1/repositories')
+async def get_repositories(uri: str | None = None, branch: str | None = None, machine_id: int | None = None, machine: str | None = None, filename: str | None = None, sort_by: str = 'name', user: User = Depends(authenticate)):
+ query = '''
+ SELECT
+ r.uri,
+ MAX(r.created_at) as last_run
+ FROM runs as r
+ LEFT JOIN machines as m on r.machine_id = m.id
+ WHERE
+ (TRUE = %s OR r.user_id = ANY(%s::int[]))
+ '''
+
+ params = [user.is_super_user(), user.visible_users()]
+
+ if uri:
+ query = f"{query} AND r.uri LIKE %s \n"
+ params.append(f"%{uri}%")
+
+ if branch:
+ query = f"{query} AND r.branch LIKE %s \n"
+ params.append(f"%{branch}%")
+
+ if filename:
+ query = f"{query} AND r.filename LIKE %s \n"
+ params.append(f"%{filename}%")
+
+ if machine_id and check_int_field_api(machine_id, 'machine_id', 1024):
+ query = f"{query} AND m.id = %s \n"
+ params.append(machine_id)
+
+ if machine:
+ query = f"{query} AND m.description LIKE %s \n"
+ params.append(f"%{machine}%")
+
+ query = f"{query} GROUP BY r.uri\n"
+
+ if sort_by == 'name':
+ query = f"{query} ORDER BY r.uri ASC"
+ else:
+ query = f"{query} ORDER BY last_run DESC"
+
+ data = DB().fetch_all(query, params=params)
+ if data is None or data == []:
+ return Response(status_code=204) # No-Content
+
+ escaped_data = [html_escape_multi(run) for run in data]
+
+ return ORJSONResponse({'success': True, 'data': escaped_data})
+
+
+# A route to return all of the available entries in our catalog.
+@router.get('/v1/runs')
+async def get_runs(uri: str | None = None, branch: str | None = None, machine_id: int | None = None, machine: str | None = None, filename: str | None = None, limit: int = 5, uri_mode = 'none', user: User = Depends(authenticate)):
+
+ query = '''
+ SELECT r.id, r.name, r.uri, r.branch, r.created_at, r.invalid_run, r.filename, m.description, r.commit_hash, r.end_measurement, r.failed, r.machine_id
+ FROM runs as r
+ LEFT JOIN machines as m on r.machine_id = m.id
+ WHERE
+ (TRUE = %s OR r.user_id = ANY(%s::int[]))
+ '''
+ params = [user.is_super_user(), user.visible_users()]
+
+ if uri:
+ if uri_mode == 'exact':
+ query = f"{query} AND r.uri = %s \n"
+ params.append(uri)
+ else:
+ query = f"{query} AND r.uri LIKE %s \n"
+ params.append(f"%{uri}%")
+
+ if branch:
+ query = f"{query} AND r.branch LIKE %s \n"
+ params.append(f"%{branch}%")
+
+ if filename:
+ query = f"{query} AND r.filename LIKE %s \n"
+ params.append(f"%{filename}%")
+
+ if machine_id and check_int_field_api(machine_id, 'machine_id', 1024):
+ query = f"{query} AND m.id = %s \n"
+ params.append(machine_id)
+
+ if machine:
+ query = f"{query} AND m.description LIKE %s \n"
+ params.append(f"%{machine}%")
+
+ query = f"{query} ORDER BY r.created_at DESC"
+
+ check_int_field_api(limit, 'limit', 50)
+ query = f"{query} LIMIT %s"
+ params.append(limit)
+
+
+ data = DB().fetch_all(query, params=params)
+ if data is None or data == []:
+ return Response(status_code=204) # No-Content
+
+ escaped_data = [html_escape_multi(run) for run in data]
+
+ return ORJSONResponse({'success': True, 'data': escaped_data})
+
+
+# Just copy and paste if we want to deprecate URLs
+# @router.get('/v1/measurements/uri', deprecated=True) # Here you can see, that URL is nevertheless accessible as variable
+# later if supplied. Also deprecation shall be used once we move to v2 for all v1 routesthrough
+
+@router.get('/v1/compare')
+async def compare_in_repo(ids: str, user: User = Depends(authenticate)):
+ if ids is None or not ids.strip():
+ raise RequestValidationError('run_id is empty')
+ ids = ids.split(',')
+ if not all(is_valid_uuid(id) for id in ids):
+ raise RequestValidationError('One of Run IDs is not a valid UUID or empty')
+
+
+ if artifact := get_artifact(ArtifactType.COMPARE, f"{user._id}_{str(ids)}"):
+ return ORJSONResponse({'success': True, 'data': orjson.loads(artifact)}) # pylint: disable=no-member
+
+ try:
+ case, comparison_db_key = determine_comparison_case(user, ids)
+ except RuntimeError as exc:
+ raise RequestValidationError(str(exc)) from exc
+
+ comparison_details = get_comparison_details(user, ids, comparison_db_key)
+
+ if not (phase_stats := get_phase_stats(user, ids)):
+ return Response(status_code=204) # No-Content
+
+ try:
+ phase_stats_object = get_phase_stats_object(phase_stats, case, comparison_details)
+ phase_stats_object = add_phase_stats_statistics(phase_stats_object)
+ except ValueError as exc:
+ raise RequestValidationError(str(exc)) from exc
+
+ phase_stats_object['common_info'] = {}
+
+ try:
+ run_info = get_run_info(user, ids[0])
+
+ machine_list = get_machine_list()
+ machines = {machine[0]: machine[1] for machine in machine_list}
+
+ machine = machines[run_info['machine_id']]
+ uri = run_info['uri']
+ usage_scenario = run_info['usage_scenario']['name']
+ branch = run_info['branch']
+ commit = run_info['commit_hash']
+ filename = run_info['filename']
+
+ match case:
+ case 'Repeated Run':
+ # same repo, same usage scenarios, same machines, same branches, same commit hashes
+ phase_stats_object['common_info']['Repository'] = uri
+ phase_stats_object['common_info']['Filename'] = filename
+ phase_stats_object['common_info']['Usage Scenario'] = usage_scenario
+ phase_stats_object['common_info']['Machine'] = machine
+ phase_stats_object['common_info']['Branch'] = branch
+ phase_stats_object['common_info']['Commit'] = commit
+ case 'Usage Scenario':
+ # same repo, diff usage scenarios, same machines, same branches, same commit hashes
+ phase_stats_object['common_info']['Repository'] = uri
+ phase_stats_object['common_info']['Machine'] = machine
+ phase_stats_object['common_info']['Branch'] = branch
+ phase_stats_object['common_info']['Commit'] = commit
+ case 'Machine':
+ # same repo, same usage scenarios, diff machines, same branches, same commit hashes
+ phase_stats_object['common_info']['Repository'] = uri
+ phase_stats_object['common_info']['Filename'] = filename
+ phase_stats_object['common_info']['Usage Scenario'] = usage_scenario
+ phase_stats_object['common_info']['Branch'] = branch
+ phase_stats_object['common_info']['Commit'] = commit
+ case 'Commit':
+ # same repo, same usage scenarios, same machines, diff commit hashes
+ phase_stats_object['common_info']['Repository'] = uri
+ phase_stats_object['common_info']['Filename'] = filename
+ phase_stats_object['common_info']['Usage Scenario'] = usage_scenario
+ phase_stats_object['common_info']['Machine'] = machine
+ case 'Repository':
+ # diff repo, diff usage scenarios, same machine, same branches, diff/same commits_hashes
+ phase_stats_object['common_info']['Machine'] = machine
+ phase_stats_object['common_info']['Branch'] = branch
+ case 'Branch':
+ # same repo, same usage scenarios, same machines, diff branch
+ phase_stats_object['common_info']['Repository'] = uri
+ phase_stats_object['common_info']['Filename'] = filename
+ phase_stats_object['common_info']['Usage Scenario'] = usage_scenario
+ phase_stats_object['common_info']['Machine'] = machine
+
+ except RuntimeError as err:
+ raise RequestValidationError(str(err)) from err
+
+ store_artifact(ArtifactType.COMPARE, f"{user._id}_{str(ids)}", orjson.dumps(phase_stats_object)) # pylint: disable=no-member
+
+
+ return ORJSONResponse({'success': True, 'data': phase_stats_object})
+
+
+@router.get('/v1/phase_stats/single/{run_id}')
+async def get_phase_stats_single(run_id: str, user: User = Depends(authenticate)):
+ if run_id is None or not is_valid_uuid(run_id):
+ raise RequestValidationError('Run ID is not a valid UUID or empty')
+
+ if artifact := get_artifact(ArtifactType.STATS, f"{user._id}_{str(run_id)}"):
+ return ORJSONResponse({'success': True, 'data': orjson.loads(artifact)}) # pylint: disable=no-member
+
+ if not (phase_stats := get_phase_stats(user, [run_id])):
+ return Response(status_code=204) # No-Content
+
+ try:
+ phase_stats_object = get_phase_stats_object(phase_stats, None, None, [run_id])
+ phase_stats_object = add_phase_stats_statistics(phase_stats_object)
+ except ValueError as exc:
+ raise RequestValidationError(str(exc)) from exc
+
+ store_artifact(ArtifactType.STATS, f"{user._id}_{str(run_id)}", orjson.dumps(phase_stats_object)) # pylint: disable=no-member
+
+ return ORJSONResponseObjKeep({'success': True, 'data': phase_stats_object})
+
+
+# This route gets the measurements to be displayed in a timeline chart
+@router.get('/v1/measurements/single/{run_id}')
+async def get_measurements_single(run_id: str, user: User = Depends(authenticate)):
+ if run_id is None or not is_valid_uuid(run_id):
+ raise RequestValidationError('Run ID is not a valid UUID or empty')
+
+ query = '''
+ SELECT
+ mm.detail_name, mv.time, mm.metric,
+ mv.value, mm.unit
+ FROM measurement_metrics as mm
+ JOIN measurement_values as mv ON mv.measurement_metric_id = mm.id
+ JOIN runs as r ON mm.run_id = r.id
+ WHERE
+ (TRUE = %s OR r.user_id = ANY(%s::int[]))
+ AND mm.run_id = %s
+ '''
+
+ params = (user.is_super_user(), user.visible_users(), run_id)
+
+ # extremely important to order here, cause the charting library in JS cannot do that automatically!
+ # Furthermore we do time-lag caclulations and need the order of metric first and then time in stats.js:179... . Please do not change
+ query = f"{query} ORDER BY mm.metric ASC, mm.detail_name ASC, mv.time ASC"
+
+ data = DB().fetch_all(query, params=params)
+ if data is None or data == []:
+ return Response(status_code=204) # No-Content
+
+ return ORJSONResponseObjKeep({'success': True, 'data': data})
+
+@router.get('/v1/timeline')
+async def get_timeline_stats(uri: str, machine_id: int, branch: str | None = None, filename: str | None = None, start_date: date | None = None, end_date: date | None = None, metrics: str | None = None, phase: str | None = None, sorting: str | None = None, user: User = Depends(authenticate)):
+ if uri is None or uri.strip() == '':
+ raise RequestValidationError('URI is empty')
+
+ if phase is None or phase.strip() == '':
+ raise RequestValidationError('Phase is empty')
+
+ query, params = get_timeline_query(user, uri, filename, machine_id, branch, metrics, phase, start_date=start_date, end_date=end_date, sorting=sorting)
+
+ data = DB().fetch_all(query, params=params)
+
+ if data is None or data == []:
+ return Response(status_code=204) # No-Content
+
+ return ORJSONResponse({'success': True, 'data': data})
+
+# Show the timeline badges with regression trend
+## A complex case to allow public visibility of the badge but restricting everything else would be to have
+## User 1 restricted to only this route but a fully populated 'visible_users' array
+@router.get('/v1/badge/timeline')
+async def get_timeline_badge(detail_name: str, uri: str, machine_id: int, branch: str | None = None, filename: str | None = None, metrics: str | None = None, unit: str = 'watt-hours', user: User = Depends(authenticate)):
+ if uri is None or uri.strip() == '':
+ raise RequestValidationError('URI is empty')
+
+ if detail_name is None or detail_name.strip() == '':
+ raise RequestValidationError('Detail Name is mandatory')
+
+ if unit not in ('watt-hours', 'joules'):
+ raise RequestValidationError('Requested unit is not in allow list: watt-hours, joules')
+
+ # we believe that there is no injection possible to the artifact store and any string can be constructured here ...
+ if artifact := get_artifact(ArtifactType.BADGE, f"{user._id}_{uri}_{filename}_{machine_id}_{branch}_{metrics}_{detail_name}_{unit}"):
greptile
style: The comment suggests there's no SQL injection possible in the artifact store, but concatenating user input directly into strings can be dangerous. Consider using a more robust key generation method.
diff block
</p>
<b>Stage 2</b>
<p>
-In this stage you have to modify the clientSideFiltering.jsp which you will find under
+In this stage you have to modify the clientSideFiltering.jsp which you will find under
the WebContent in the lessons/Ajax folder. The Problem is that
the server sends all information to the client. As you could see
even if it is hidden it is easy to find the sensitive date. In this
-stage you will add a filter to the XPath queries. In this file you will find
+stage you will add a filter to the XPath queries. In this file you will find
following construct:<br><br></p>
<code>
- StringBuffer sb = new StringBuffer();<br>
-
+ StringBuilder sb = new StringBuilder();<br>
+
sb.append("/Employees/Employee/UserID | ");<br>
sb.append("/Employees/Employee/FirstName | ");<br>
sb.append("/Employees/Employee/LastName | ");<br>
sb.append("/Employees/Employee/SSN | ");<br>
sb.append("/Employees/Employee/Salary ");<br>
-
+
String expression = sb.toString();<br>
</code>
<p>
-This string will be used for the XPath query. You have to guarantee that a manger only
+This string will be used for the XPath query. You have to guarantee that a manger only
can see employees which are working for him. To archive this you can use
filters in XPath. Following code will exactly do this:</p>
<code>
- StringBuffer sb = new StringBuffer();<br>
-
+ StringBuilder sb = new StringBuilder();<br>
+
sb.append("/Employees/Employee[Managers/Manager/text() = " + userId + "]/UserID | ");<br>
sb.append("/Employees/Employee[Managers/Manager/text() = " + userId + "]/FirstName | ");<br>
sb.append("/Employees/Employee[Managers/Manager/text() = " + userId + "]/LastName | ");<br>
sb.append("/Employees/Employee[Managers/Manager/text() = " + userId + "]/SSN | ");<br>
sb.append("/Employees/Employee[Managers/Manager/text() = " + userId + "]/Salary ");<br>
greptile
logic: Direct string concatenation in XPath query creates SQL injection vulnerability. Should use parameterized queries or proper escaping
diff block
+import { Prisma } from '@prisma/client';
+
+import { TResetPeriods } from '@libs/contracts/constants';
+import { USERS_STATUS } from '@libs/contracts/constants';
+
+export class BatchResetLimitedUsersUsageBuilder {
+ public query: Prisma.Sql;
+
+ constructor(strategy: TResetPeriods) {
+ this.query = this.getQuery(strategy);
+ return this;
+ }
+
+ public getQuery(strategy: TResetPeriods): Prisma.Sql {
+ const query = `
+ WITH users_to_reset AS (
+ SELECT uuid, used_traffic_bytes
+ FROM users
+ WHERE traffic_limit_strategy = '${strategy.toUpperCase()}'
+ AND status = '${USERS_STATUS.LIMITED.toUpperCase()}'
greptile
logic: SQL injection vulnerability - strategy parameter should be parameterized instead of string interpolation
suggested fix
+ WHERE traffic_limit_strategy = $1
+ AND status = $2
diff block
+import { executeSQL, useSQL } from "@raycast/utils";
+import { environment, getPreferenceValues } from "@raycast/api";
+import path from "path";
+
+export interface SearchEngine {
+ id: number;
+ t: string;
+ s: string;
+ u: string;
+ c?: string;
+ d?: string;
+ r?: number;
+ sc?: string;
+}
+
+const DB_PATH = path.join(environment.assetsPath, "search-router.db");
+
+export const useListSearchEngines = (searchText: string) =>
+ useSQL<SearchEngine>(
+ DB_PATH,
+ searchText.trim()
+ ? `SELECT * FROM search_engines
+ WHERE t LIKE '%${searchText.trim()}%' OR s LIKE '%${searchText.trim()}%'
+ ORDER BY r DESC, t ASC
greptile
logic: SQL injection vulnerability. User input is directly interpolated into the query without sanitization. Use parameterized queries instead.
```suggestion
? `SELECT * FROM search_engines
+ WHERE t LIKE $1 OR s LIKE $1
ORDER BY r DESC, t ASC
```
diff block
+from typing import Any
+
+import dagster
+
+from .exchange_rate import exchange_rates
+from clickhouse_driver import Client
+from posthog.clickhouse.cluster import ClickhouseCluster, NodeRole
+
+
+# TODO: Use the constants from `posthog/models/exchange_rate/sql.py` once that's merged
+# rather than hardcoding the table/dict name here
+@dagster.asset(
+ partitions_def=dagster.DailyPartitionsDefinition(start_date="2025-01-01"),
+ group_name="exchange_rates",
+ ins={"exchange_rates": dagster.AssetIn(key=exchange_rates.key)},
+)
+def store_exchange_rates_in_clickhouse(
+ context: dagster.AssetExecutionContext,
+ exchange_rates: dict[str, Any],
+ cluster: dagster.ResourceParam[ClickhouseCluster],
+) -> dagster.MaterializeResult:
+ """
+ Stores exchange rates data in ClickHouse.
+
+ This asset depends on the exchange_rates asset and will be executed
+ after it completes successfully.
+
+ The base currency is always USD as per the table design.
+ """
+ # Extract data from the input
+ date_str = context.partition_key
+
+ # Transform data into rows for ClickHouse
+ rows = [{"date": date_str, "currency": currency, "rate": rate} for currency, rate in exchange_rates.items()]
+
+ # Log information about the data being stored
+ context.log.info(f"Storing {len(rows)} exchange rates for {date_str} in ClickHouse")
+
+ # Prepare values for batch insert
+ # Use toDate() to cast the string date to a ClickHouse Date type
+ values = [f"(toDate('{row['date']}'), '{row['currency']}', {row['rate']})" for row in rows]
greptile
logic: SQL injection risk: currency values from external API are directly inserted into SQL without proper escaping. Consider using parameterized queries or proper escaping for currency values.
suggested fix
+ values = [(row['date'], row['currency'], row['rate']) for row in rows]
diff block
+import { executeSQL, useSQL } from "@raycast/utils";
+import { DB_PATH, readPreferences } from "./config";
+
+export interface SearchEngine {
+ id: number;
+ t: string;
+ s: string;
+ u: string;
+ c?: string;
+ d?: string;
+ r?: number;
+ sc?: string;
+}
+
+export const useListSearchEngines = (searchText: string) =>
+ useSQL<SearchEngine>(
+ DB_PATH,
+ searchText.trim()
+ ? `SELECT * FROM search_engines
+ WHERE t LIKE '%${searchText.trim()}%' OR s LIKE '%${searchText.trim()}%'
+ ORDER BY r DESC, t ASC
greptile
logic: SQL injection vulnerability: user input is directly interpolated into the query without sanitization. Use parameterized queries instead with the executeSQL function's parameter support.
```suggestion
? `SELECT * FROM search_engines
+ WHERE t LIKE $1 OR s LIKE $1
ORDER BY r DESC, t ASC
```
diff block
+import { executeSQL, useSQL } from "@raycast/utils";
+import { DB_PATH, readPreferences } from "./config";
+
+export interface SearchEngine {
+ id: number;
+ t: string;
+ s: string;
+ u: string;
+ c?: string;
+ d?: string;
+ r?: number;
+ sc?: string;
+}
+
+export const useListSearchEngines = (searchText: string) =>
+ useSQL<SearchEngine>(
+ DB_PATH,
+ searchText.trim()
+ ? `SELECT * FROM search_engines
+ WHERE t LIKE '%${searchText.trim()}%' OR s LIKE '%${searchText.trim()}%'
+ ORDER BY r DESC, t ASC
+ LIMIT 100`
+ : `SELECT * FROM search_engines
+ ORDER BY r DESC, t ASC
+ LIMIT 100`,
+ {
+ permissionPriming: "This is required to list search engines.",
+ onWillExecute: () => {
+ console.log("Will execute:", searchText);
+ },
+ },
+ );
+
+export const findSearchEngine = async (searchEngineKey: string | undefined) => {
+ const value = await executeSQL<SearchEngine>(
+ DB_PATH,
+ `SELECT * FROM search_engines
+ WHERE t = '${searchEngineKey ?? ""}' OR t = '${readPreferences().defaultSearchEngine}'
+ ORDER BY t = '${searchEngineKey ?? ""}' DESC
+ LIMIT 1`,
greptile
logic: SQL injection vulnerability: directly interpolating searchEngineKey into the query is unsafe. Use parameterized queries with the executeSQL function's parameter support.
```suggestion
`SELECT * FROM search_engines
+ WHERE t = $1 OR t = $2
+ ORDER BY t = $1 DESC
LIMIT 1`,
+ [searchEngineKey ?? "", readPreferences().defaultSearchEngine]
```
diff block
+from zoneinfo import ZoneInfo
+from posthog.hogql import ast
+from posthog.hogql.modifiers import create_default_modifiers_for_team
+from posthog.hogql.parser import parse_expr
+from posthog.hogql.property import property_to_expr
+from posthog.hogql.query import execute_hogql_query
+from posthog.hogql_queries.experiments import CONTROL_VARIANT_KEY
+from posthog.hogql_queries.experiments.trends_statistics import (
+ are_results_significant,
+ calculate_credible_intervals,
+ calculate_probabilities,
+)
+from posthog.hogql_queries.experiments.trends_statistics_v2_count import (
+ are_results_significant_v2_count,
+ calculate_credible_intervals_v2_count,
+ calculate_probabilities_v2_count,
+)
+from posthog.hogql_queries.experiments.trends_statistics_v2_continuous import (
+ are_results_significant_v2_continuous,
+ calculate_credible_intervals_v2_continuous,
+ calculate_probabilities_v2_continuous,
+)
+from posthog.hogql_queries.experiments.funnels_statistics_v2 import (
+ calculate_probabilities_v2 as calculate_probabilities_v2_funnel,
+ are_results_significant_v2 as are_results_significant_v2_funnel,
+ calculate_credible_intervals_v2 as calculate_credible_intervals_v2_funnel,
+)
+from posthog.hogql_queries.query_runner import QueryRunner
+from posthog.hogql_queries.utils.query_date_range import QueryDateRange
+from posthog.models.experiment import Experiment
+from rest_framework.exceptions import ValidationError
+from posthog.schema import (
+ CachedExperimentQueryResponse,
+ ExperimentDataWarehouseMetricConfig,
+ ExperimentEventMetricConfig,
+ ExperimentMetricType,
+ ExperimentQueryResponse,
+ ExperimentSignificanceCode,
+ ExperimentQuery,
+ ExperimentVariantFunnelsBaseStats,
+ ExperimentVariantTrendsBaseStats,
+ DateRange,
+ IntervalType,
+)
+from typing import Optional, cast
+from datetime import datetime, timedelta, UTC
+
+
+class ExperimentQueryRunner(QueryRunner):
+ query: ExperimentQuery
+ response: ExperimentQueryResponse
+ cached_response: CachedExperimentQueryResponse
+
+ def __init__(self, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+
+ if not self.query.experiment_id:
+ raise ValidationError("experiment_id is required")
+
+ self.experiment = Experiment.objects.get(id=self.query.experiment_id)
+ self.feature_flag = self.experiment.feature_flag
+ self.variants = [variant["key"] for variant in self.feature_flag.variants]
+ if self.experiment.holdout:
+ self.variants.append(f"holdout-{self.experiment.holdout.id}")
+
+ self.stats_version = self.experiment.get_stats_config("version") or 1
+
+ self.date_range = self._get_date_range()
+
+ # Just to simplify access
+ self.metric = self.query.metric
+
+ def _get_date_range(self) -> DateRange:
+ """
+ Returns an DateRange object based on the experiment's start and end dates,
+ adjusted for the team's timezone if applicable.
+ """
+
+ if self.team.timezone:
+ tz = ZoneInfo(self.team.timezone)
+ start_date = self.experiment.start_date.astimezone(tz) if self.experiment.start_date else None
+ end_date = self.experiment.end_date.astimezone(tz) if self.experiment.end_date else None
+ else:
+ start_date = self.experiment.start_date
+ end_date = self.experiment.end_date
+
+ return DateRange(
+ date_from=start_date.isoformat() if start_date else None,
+ date_to=end_date.isoformat() if end_date else None,
+ explicitDate=True,
+ )
+
+ def _get_experiment_query(self) -> ast.SelectQuery:
+ # Lots of shortcuts taken here, but it's a proof of concept to illustrate the idea
+
+ feature_flag_key = self.feature_flag.key
+
+ is_data_warehouse_query = isinstance(self.metric.metric_config, ExperimentDataWarehouseMetricConfig)
+
+ # Pick the correct value for the aggregation chosen
+ match self.metric.metric_type:
+ case ExperimentMetricType.CONTINUOUS:
+ # If the metric type is continuous, we need to extract the value from the event property
+ metric_property = self.metric.metric_config.math_property
+ if is_data_warehouse_query:
+ metric_value = f"toFloat('{metric_property}')"
greptile
logic: potential SQL injection vulnerability - metric_property value should be escaped/sanitized before interpolation
Want to avoid this bug in your codebase? Try Greptile.
Avoid this bug!