#query-language #ip-geolocation #search #tql

bin+lib tellaro-query-language

A flexible, human-friendly query language for searching and filtering structured data

17 releases

new 0.2.22 Feb 4, 2026
0.2.20 Feb 4, 2026
0.2.18 Jan 30, 2026
0.2.7 Dec 9, 2025
0.1.2 Oct 23, 2025

#974 in Database interfaces

Download history 398/week @ 2025-10-20 33/week @ 2025-10-27 51/week @ 2025-11-24 169/week @ 2025-12-08 16/week @ 2025-12-15 7/week @ 2025-12-22 6/week @ 2025-12-29 24/week @ 2026-01-05 22/week @ 2026-01-12 33/week @ 2026-01-19 55/week @ 2026-01-26 32/week @ 2026-02-02

143 downloads per month

Custom license

435KB
9K SLoC

Tellaro Query Language (TQL) - Rust

Crates.io Documentation License: Source Available

A blazing-fast, human-friendly query language for searching and filtering structured data in Rust.

TQL provides an intuitive SQL-like syntax for querying JSON, JSONL, CSV files, and OpenSearch indices with:

  • 300x faster than Python for large file processing
  • First-class file support with CLI and programmatic API
  • OpenSearch integration with automatic DSL translation
  • 25+ field mutators for data transformation (string, encoding, DNS, GeoIP, network)
  • Statistical aggregations for data analysis
use tellaro_query_language::Tql;
use serde_json::json;

let tql = Tql::new();
let records = vec![
    json!({"name": "Alice", "age": 30, "city": "NYC"}),
    json!({"name": "Bob", "age": 25, "city": "LA"}),
];

// Simple query
let results = tql.query(&records, "age > 27").unwrap();
assert_eq!(results.len(), 1);

// With field mutators
let results = tql.query(&records, "name | lowercase = 'alice'").unwrap();

🚀 Quick Start

Installation

Add this to your Cargo.toml:

[dependencies]
tellaro-query-language = "0.1"
serde_json = "1.0"

# Optional: Enable OpenSearch backend
# tellaro-query-language = { version = "0.1", features = ["opensearch"] }

CLI Installation

Install the high-performance command-line tool:

cargo install tellaro-query-language

# Query files directly
tql 'status = "active"' users.json
tql 'age > 25 AND city = "NYC"' data.jsonl

# Statistical aggregations
tql '| stats count() by status' events.jsonl
tql 'status = 200 | stats avg(response_time) by endpoint' logs.jsonl

📁 Query Files (First-Class Support)

CLI Usage

TQL treats files as first-class data sources:

# Query JSON/JSONL files
tql 'status = "active"' users.json
tql 'age > 25 AND city = "NYC"' data.jsonl

# Query CSV files (auto-detects headers)
tql 'price > 100 AND category = "electronics"' products.csv

# Statistical aggregations
tql '| stats count() by status' events.jsonl
tql 'status = 200 | stats average(response_time) by endpoint' logs.jsonl

# Process folders recursively
tql 'level = "ERROR"' logs/ --pattern "*.jsonl" --recursive

# Stream data from stdin
cat large-file.jsonl | tql 'score > 90'

# Output formats
tql 'age > 30' users.json --output results.json   # JSON
tql 'age > 30' users.json --output results.jsonl  # JSONL
tql 'age > 30' users.json                         # Table (console)

Performance: Process 50MB files in ~200ms with streaming (no memory overhead).

Programmatic File Queries

use tellaro_query_language::Tql;
use std::fs::File;
use std::io::BufReader;
use serde_json::Value;

let tql = Tql::new();

// Read and query JSON file
let file = File::open("data.json")?;
let reader = BufReader::new(file);
let records: Vec<Value> = serde_json::from_reader(reader)?;
let results = tql.query(&records, "status = 'active' AND age > 25")?;

// Stream JSONL for large files
let file = File::open("large.jsonl")?;
let reader = BufReader::new(file);
for line in reader.lines() {
    let record: Value = serde_json::from_str(&line?)?;
    if tql.matches(&record, "level = 'ERROR'")? {
        println!("Error found: {}", record);
    }
}

🗄️ OpenSearch Integration

TQL seamlessly integrates with OpenSearch/Elasticsearch:

Automatic DSL Translation

use tellaro_query_language::{Tql, opensearch::{OpenSearchClient, QueryBuilder}};

// Configure OpenSearch (reads from environment)
std::env::set_var("OPENSEARCH_HOSTS", "http://localhost:9200");
std::env::set_var("OPENSEARCH_USERNAME", "admin");
std::env::set_var("OPENSEARCH_PASSWORD", "admin");

// Create client
let config = OpenSearchConfig::from_env()?;
let client = OpenSearchClient::new(config)?;

// Parse TQL query
let tql = Tql::new();
let ast = tql.parse("age > 25 AND status = 'active'")?;

// Build OpenSearch DSL
let builder = QueryBuilder::new(None);
let opensearch_query = builder.build_query(&ast)?;

// Execute search
let response = client.client()
    .search(opensearch::SearchParts::Index(&["users"]))
    .body(opensearch_query)
    .send()
    .await?;

TQL → OpenSearch Query DSL

TQL automatically translates to optimized OpenSearch queries:

TQL Operator OpenSearch Query Example
eq, = term or match status = "active"
ne, != bool + must_not status != "deleted"
gt, gte, lt, lte range age > 25
contains wildcard or match_phrase email contains "@example.com"
startswith prefix name startswith "John"
endswith wildcard filename endswith ".pdf"
matches (regexp) regexp email matches "^\\w+@\\w+"
in terms status in ["active", "pending"]
between range with gte + lte age between [18, 65]
cidr IP range matching ip cidr "192.168.0.0/16"
AND bool + must age > 25 AND city = "NYC"
OR bool + should city = "NYC" OR city = "LA"
NOT bool + must_not NOT status = "deleted"

Field Mapping Intelligence

use tellaro_query_language::opensearch::FieldMappings;

// Get mappings from OpenSearch
let mappings_response = client.client()
    .indices()
    .get_mapping()
    .index(&["users"])
    .send()
    .await?;

let mappings = FieldMappings::from_opensearch_response(
    mappings_response.json().await?
)?;

// Use mappings for intelligent query generation
let builder = QueryBuilder::new(Some(mappings));
let query = builder.build_query(&ast)?;
// Automatically selects .keyword for exact matches on text fields

📖 Syntax Guide

Comparison Operators

// Equality
"status = 'active'"           // Exact match (alias: eq)
"status != 'inactive'"        // Not equal (alias: ne)

// Numeric comparisons
"age > 25"                    // Greater than
"age >= 18"                   // Greater or equal
"age < 65"                    // Less than
"age <= 100"                  // Less or equal

// String operations
"email contains '@example.com'" // Substring match
"name startswith 'John'"      // Prefix match
"filename endswith '.pdf'"    // Suffix match

// Pattern matching
"email matches '^\\w+@\\w+\\.\\w+$'"  // Regex match

// Range and membership
"age between [18, 65]"        // Inclusive range
"status in ['active', 'pending']"     // Value in list
"status not in ['deleted', 'archived']" // Value not in list

// IP operations
"ip cidr '192.168.0.0/16'"    // IP in CIDR range

// Null checks
"field is null"               // Field is null or missing
"field is not null"           // Field exists and is not null

Logical Operators

// AND (all conditions must be true)
"age > 25 AND city = 'NYC'"
"status = 'active' AND role in ['admin', 'moderator']"

// OR (either condition must be true)
"city = 'NYC' OR city = 'LA'"
"status = 'admin' OR role = 'superuser'"

// NOT (negates condition)
"NOT (age < 18)"
"NOT status = 'deleted'"

// Complex expressions with parentheses
"(age > 25 AND city = 'NYC') OR (status = 'vip' AND score > 90)"

Collection Operators

// ANY - at least one array element matches
"ANY tags = 'premium'"
"ANY user.roles = 'admin'"

// ALL - every array element matches
"ALL scores >= 80"
"ALL status = 'active'"

// NONE - no array elements match
"NONE flags = 'spam'"
"NONE violations.severity = 'critical'"

Nested Field Access

// Dot notation for nested objects
"user.profile.email contains '@example.com'"
"metadata.tags.priority = 'high'"

// Array indexing
"tags[0] = 'urgent'"
"history[5].status = 'completed'"

🔄 Field Mutators (25+)

Transform field values inline before comparison:

String Mutators

// Case conversion
"email | lowercase contains '@example.com'"
"name | uppercase = 'JOHN DOE'"

// Whitespace handling
"message | trim = 'hello'"

// String manipulation
"text | length > 100"
"path | split('/') | length = 3"
"text | replace('old', 'new') contains 'new'"

Encoding Mutators

// Base64
"data | b64encode = 'aGVsbG8='"
"encoded | b64decode contains 'secret'"

// URL encoding
"param | urldecode = 'hello world'"

// Hexadecimal encoding
"data | hexencode = '68656c6c6f'"
"encoded | hexdecode = 'hello'"

// Cryptographic hashing
"password | md5 = '5f4dcc3b5aa765d61d8327deb882cf99'"
"data | sha256 = '2cf24dba5fb0a30e26e83b2ac5b9e29e1b161e5c1fa7425e73043362938b9824'"

Network/Security Mutators

// Defang/Refang URLs (security analysis)
"url | defang contains 'hxxp://example[.]com'"
"indicator | refang = 'http://malicious.com'"

// IP address classification
"source_ip | is_private = true"      // Check if IP is RFC 1918 private
"dest_ip | is_global = true"         // Check if IP is globally routable

// Use cases
"source_ip | is_private = true and port = 22"  // Internal SSH connections
"dest_ip | is_global = false"                    // Non-routable destinations

DNS Mutators

// DNS lookups with caching
"hostname | nslookup contains '8.8.8.8'"
"domain | nslookup = '1.1.1.1'"

Performance: DNS results are cached in memory to avoid repeated lookups.

GeoIP Mutators

// GeoIP enrichment (MaxMind and DB-IP support)
"ip | geoip.country_name = 'United States'"
"ip | geoip.city_name = 'New York'"
"ip | geoip.continent_code = 'NA'"

// Configure with environment variables
// TQL_GEOIP_DB_PATH=/path/to/GeoLite2-City.mmdb
// TQL_GEOIP_MMDB_PATH=/usr/share/GeoIP/

Supported fields:

  • geo.country_name, geo.country_iso_code
  • geo.city_name
  • geo.location (lat/lon)
  • geo.continent_code, geo.continent_name
  • geo.region_name, geo.region_iso_code
  • geo.postal_code, geo.timezone

Performance: Uses memory-mapped I/O for efficient database access (200,000+ lookups/sec).

List Mutators

// Boolean aggregations
"tags | any = true"              // Check if any element is truthy
"flags | all = true"             // Check if all elements are truthy

// Numeric aggregations
"scores | avg > 80"              // Calculate average
"values | sum > 1000"            // Calculate sum
"prices | min >= 10"             // Find minimum value
"ratings | max <= 5"             // Find maximum value

// Example with nested arrays
"users.scores | avg > 75"         // Average of nested array
"metrics.values | sum between [100, 500]"  // Sum within range

Chaining Mutators

// Multiple transformations in sequence
"email | trim | lowercase contains '@example.com'"
"data | b64decode | lowercase = 'secret'"
"ip | geoip.country_name | lowercase = 'united states'"

📊 Statistical Aggregations

TQL includes powerful stats functions for data analysis:

Available Functions

use tellaro_query_language::{StatsEvaluator, StatsQuery, AggregationSpec};
use std::collections::HashMap;

let evaluator = StatsEvaluator::new();
let records = vec![
    json!({"city": "NYC", "sales": 100, "product": "laptop"}),
    json!({"city": "LA", "sales": 150, "product": "phone"}),
    json!({"city": "NYC", "sales": 200, "product": "tablet"}),
];

// Count records
let query = StatsQuery {
    aggregations: vec![AggregationSpec {
        function: "count".to_string(),
        field: "*".to_string(),
        alias: Some("total".to_string()),
        params: HashMap::new(),
    }],
    group_by: vec![],
};
let result = evaluator.evaluate_stats(&records, &query)?;
// result["value"] = 3

// Sum with grouping
let query = StatsQuery {
    aggregations: vec![AggregationSpec {
        function: "sum".to_string(),
        field: "sales".to_string(),
        alias: Some("total_sales".to_string()),
        params: HashMap::new(),
    }],
    group_by: vec!["city".to_string()],
};
let result = evaluator.evaluate_stats(&records, &query)?;
// Groups by city: {"NYC": {"total_sales": 300}, "LA": {"total_sales": 150}}

CLI Stats Queries

# Simple aggregations
tql '| stats count()' data.jsonl
tql '| stats sum(revenue), avg(price)' sales.json

# Grouped analysis
tql '| stats count() by status' events.jsonl
tql '| stats sum(sales) by region, category' data.json

# Top N analysis
tql '| stats sum(revenue, top 10) by product' sales.json

# Combined filtering and stats
tql 'region = "west" | stats avg(revenue) by category' data.json

Aggregation Functions

  • count: Count records (count(*) or count(field))
  • unique_count: Count distinct values
  • sum: Sum numeric values
  • avg/average/mean: Calculate mean
  • min/max: Find minimum/maximum values
  • median/med: Calculate median
  • std/stdev/standard_deviation: Calculate standard deviation
  • percentile/p/pct: Calculate percentiles
  • values/unique: Return unique values

🎯 API Reference

Basic Usage

use tellaro_query_language::Tql;
use serde_json::json;

// Create TQL instance
let tql = Tql::new();

// Or with custom depth limits
let tql = Tql::with_max_depth(200);

// Query records
let records = vec![
    json!({"name": "Alice", "age": 30, "city": "NYC"}),
    json!({"name": "Bob", "age": 25, "city": "LA"}),
];

// Execute query
let results = tql.query(&records, "age > 27").unwrap();
println!("Found {} matching records", results.len());

// Count matches
let count = tql.count(&records, "city = 'NYC'").unwrap();
println!("NYC residents: {}", count);

// Check single record
let user = json!({"age": 30, "status": "active"});
if tql.matches(&user, "age >= 18 AND status = 'active'").unwrap() {
    println!("Valid adult user");
}

Query Pre-compilation

For queries executed multiple times, parse once and reuse the AST:

use tellaro_query_language::{Tql, TqlEvaluator};

let tql = Tql::new();

// Parse query once
let ast = tql.parse("age > 25 AND status = 'active'").unwrap();

// Reuse AST for multiple datasets
let evaluator = TqlEvaluator::new();
let results1 = evaluator.filter(&ast, &dataset1).unwrap();
let results2 = evaluator.filter(&ast, &dataset2).unwrap();

Error Handling

use tellaro_query_language::{Tql, TqlError};

let tql = Tql::new();

match tql.query(&records, "invalid query syntax") {
    Ok(results) => println!("Found {} records", results.len()),
    Err(TqlError::SyntaxError(msg)) => eprintln!("Syntax error: {}", msg),
    Err(TqlError::EvaluationError(msg)) => eprintln!("Evaluation error: {}", msg),
    Err(e) => eprintln!("Error: {}", e),
}

⚡ Performance

Benchmarks

Rust Implementation:

  • In-memory queries: ~3,000,000 records/sec
  • File parsing (JSON): ~150MB/sec
  • GeoIP lookups: ~200,000 lookups/sec (memory-mapped)
  • DNS lookups: ~10,000 lookups/sec (with caching)
  • Large file streaming: Process 50MB in ~200ms

vs Python Implementation:

  • 300x faster for file processing
  • 500x faster for GeoIP lookups (memory-mapped vs Python)
  • 100x faster for DNS lookups (async + caching)

Performance Features

  • Zero-copy deserialization where possible
  • Memory-mapped I/O for GeoIP databases
  • In-memory caching for DNS and GeoIP results
  • Streaming file processing with no memory overhead
  • Parallel query evaluation (planned)

Optimization Tips

// Pre-compile queries for reuse
let ast = tql.parse("age > 25").unwrap();
let results1 = evaluator.filter(&ast, &dataset1).unwrap();
let results2 = evaluator.filter(&ast, &dataset2).unwrap();

// Use streaming for large files
let file = File::open("large.jsonl")?;
let reader = BufReader::new(file);
for line in reader.lines() {
    let record: Value = serde_json::from_str(&line?)?;
    if tql.matches(&record, "level = 'ERROR'")? {
        // Process match without loading entire file
    }
}

// Configure cache sizes for mutators
std::env::set_var("TQL_DNS_CACHE_SIZE", "10000");
std::env::set_var("TQL_GEOIP_CACHE_SIZE", "50000");

🗺️ Roadmap

✅ Implemented Features

  • ✅ Core query engine with all operators
  • ✅ 25+ field mutators (string, encoding, network, DNS, GeoIP, list)
  • ✅ Statistical aggregations with grouping
  • ✅ File support (JSON, JSONL, CSV) with CLI
  • ✅ OpenSearch backend with automatic DSL translation
  • ✅ Memory-mapped GeoIP lookups
  • ✅ DNS resolution with caching
  • ✅ High-performance streaming

🚧 In Progress

  • 🚧 OpenSearch stats aggregation translation
  • 🚧 Post-processing for complex mutator chains
  • 🚧 Additional hash functions (SHA1, SHA512)

📋 Planned Features

  • 📋 Parallel record evaluation
  • 📋 Query optimization engine
  • 📋 JSON parsing mutator
  • 📋 Timestamp conversion mutators
  • 📋 PostgreSQL/MySQL backends
  • 📋 Custom mutator plugins via traits

🔮 Future Considerations

  • 🔮 Distributed query execution
  • 🔮 Query result caching
  • 🔮 Real-time data streaming
  • 🔮 WASM compilation for browser usage

🔧 Development

Setup

# Clone repository
git clone https://github.com/tellaro/tellaro-query-language.git
cd tellaro-query-language/tql

# Build
cargo build

# Run tests
cargo test

# Build release
cargo build --release

# Build with OpenSearch feature
cargo build --features opensearch

Testing

# Run all tests
cargo test

# Run with output
cargo test -- --nocapture

# Run integration tests (requires OpenSearch)
export OPENSEARCH_HOSTS=http://localhost:9200
export OPENSEARCH_USERNAME=admin
export OPENSEARCH_PASSWORD=admin
export OPENSEARCH_INTEGRATION_TEST=true
cargo test --features opensearch -- --ignored --test-threads=1

# Run benchmarks
cargo bench

Code Quality

# Format code
cargo fmt

# Linting
cargo clippy -- -D warnings

# Check compilation
cargo check

🤝 Contributing

Contributions are welcome! Please see CONTRIBUTING.md for guidelines.


📄 License

Tellaro Query Language (TQL) is source-available software with specific usage terms:

Permitted Uses:

  • Personal use (individual, non-commercial)
  • Organizational use (within your company/organization)
  • Integration into your applications and services
  • Internal tools and automation

Restricted Uses:

  • Creating derivative query language products
  • Commercial redistribution or resale
  • Offering TQL-based commercial services to third parties
  • Using source code to build competing products

For commercial licensing inquiries, contact: support@tellaro.io

See LICENSE for complete terms and conditions.



💬 Support


🌟 Advanced Examples

Security Log Analysis

use tellaro_query_language::Tql;
use serde_json::json;

let tql = Tql::new();
let logs = vec![
    json!({
        "timestamp": "2024-01-15T10:30:00Z",
        "source_ip": "192.168.1.100",
        "url": "hxxp://malicious[.]com/payload",
        "severity": "high",
        "tags": ["suspicious", "malware"]
    }),
    json!({
        "timestamp": "2024-01-15T10:31:00Z",
        "source_ip": "10.0.0.50",
        "url": "https://safe-site.com",
        "severity": "low",
        "tags": ["normal"]
    }),
];

// Find high-severity events with malicious indicators
let query = r#"
    severity in ['high', 'critical'] AND
    source_ip | is_private = true AND
    (ANY tags = 'malware' OR url | refang contains 'malicious')
"#;

let results = tql.query(&logs, query).unwrap();
assert_eq!(results.len(), 1);
let products = vec![
    json!({
        "name": "Laptop Pro 15",
        "price": 1299.99,
        "tags": ["electronics", "computers", "premium"],
        "rating": {"average": 4.5, "count": 128},
        "in_stock": true
    }),
    json!({
        "name": "Budget Mouse",
        "price": 9.99,
        "tags": ["electronics", "accessories"],
        "rating": {"average": 3.8, "count": 45},
        "in_stock": false
    }),
];

// Find in-stock electronics with good ratings under $1500
let query = r#"
    in_stock = true AND
    price < 1500 AND
    rating.average >= 4.0 AND
    ANY tags = 'electronics'
"#;

let results = tql.query(&products, query).unwrap();
assert_eq!(results.len(), 1);

GeoIP Enrichment Pipeline

// Set GeoIP database path
std::env::set_var("TQL_GEOIP_DB_PATH", "/usr/share/GeoIP/GeoLite2-City.mmdb");

let tql = Tql::new();
let events = vec![
    json!({"ip": "8.8.8.8", "event": "login"}),
    json!({"ip": "1.1.1.1", "event": "api_call"}),
];

// Query with GeoIP enrichment
let query = "ip | geoip.country_name = 'United States'";
let results = tql.query(&events, query).unwrap();

// Results include enriched geo data
println!("{}", serde_json::to_string_pretty(&results[0]).unwrap());
// {
//   "ip": "8.8.8.8",
//   "event": "login",
//   "geo": {
//     "country_name": "United States",
//     "country_iso_code": "US",
//     "city_name": "Mountain View",
//     "location": {"lat": 37.386, "lon": -122.0838}
//   }
// }

Made with ❤️ by the Tellaro Team

Dependencies

~30–50MB
~750K SLoC