Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 26 additions & 2 deletions packages/malloy/src/lang/composite-source-utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ import {
isJoinable,
isJoined,
isQuerySegment,
isRepeatedRecord,
isSourceDef,
isTurtle,
} from '../model/malloy_types';
Expand Down Expand Up @@ -410,6 +411,7 @@ interface JoinDependency {
path: string[];
dependsOn: Set<string>;
checked?: boolean;
onReferencesNestedData?: boolean;
}

function getJoin(
Expand Down Expand Up @@ -449,7 +451,12 @@ function findActiveJoins(

// Add this join's path to the sorted list after its dependencies
if (dep) {
sorted.push({path: dep.path});
const usage: FieldUsage = {path: dep.path};
if (dep.onReferencesNestedData) {
usage.onReferencesNestedData = true;
}

sorted.push(usage);
}
};

Expand Down Expand Up @@ -536,10 +543,27 @@ function expandFieldUsage(
const joinKey = pathToKey('join', joinPath);
const thisDep = getJoin(activeJoinGraph, joinKey, joinPath);

if (isJoined(joinDef) && !thisDep.checked) {
const needsTracking = isJoined(joinDef) || isRepeatedRecord(joinDef);
if (needsTracking && !thisDep.checked) {
thisDep.checked = true;

const joinFieldUsage = getJoinFieldUsage(joinDef, joinPath);

if (
joinPath.length > 1 &&
(isJoined(joinDef) || isRepeatedRecord(joinDef))
) {
const parentPath = joinPath.slice(0, -1);
const parentKey = pathToKey('join', parentPath);
const parentDep = activeJoinGraph[parentKey];

// The parent join's ON expression led us to this nested structure
// Mark it as needing complex SQL generation
if (parentDep && !parentDep.onReferencesNestedData) {
parentDep.onReferencesNestedData = true;
}
}

// Add join's field dependencies to the queue
for (const usage of joinFieldUsage) {
const key = pathToKey('field', usage.path);
Expand Down
50 changes: 50 additions & 0 deletions packages/malloy/src/lang/test/source.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1051,4 +1051,54 @@ describe('source:', () => {
}
}
});
test('join ON expression sets onReferencesNestedData flag', () => {
const t = model`
source: usr is a extend {
rename: id is ai
dimension: email is concat(astr, ".com")
}
source: res is a extend {
rename: user_id is ai
join_one: usr on usr.id = user_id
dimension: usr_email is usr.email
}
source: msg is a extend {
rename: id is ai
dimension: msg_email is concat(astr, '.com')
join_many: res on msg_email = res.usr_email
}
run: msg -> {
select: *, res.usr_email
}
`;

expect(t).toTranslate();

const query = t.translator.getQuery(0);
expect(query).toBeDefined();
if (query) {
const firstSegment = query.pipeline[0];
expect(firstSegment.type).toBe('project');
if (firstSegment.type === 'project') {
expect(firstSegment.activeJoins).toBeDefined();

// Find the 'res' join in activeJoins
const resJoin = firstSegment.activeJoins?.find(
j => j.path.length === 1 && j.path[0] === 'res'
);

// The 'res' join should have onReferencesChildren set to true
// because its ON expression uses res.usr_email which expands to usr.email
expect(resJoin).toBeDefined();
expect(resJoin?.onReferencesNestedData).toBe(true);

// The 'usr' join (nested within res) should NOT have the flag
const usrJoin = firstSegment.activeJoins?.find(
j => j.path.length === 2 && j.path[0] === 'res' && j.path[1] === 'usr'
);
expect(usrJoin).toBeDefined();
expect(usrJoin?.onReferencesNestedData).toBeUndefined();
}
}
});
});
6 changes: 4 additions & 2 deletions packages/malloy/src/model/field_instance.ts
Original file line number Diff line number Diff line change
Expand Up @@ -451,7 +451,8 @@ export class FieldInstanceResult implements FieldInstance {
addStructToJoin(
qs: QueryStruct,
uniqueKeyRequirement: UniqueKeyRequirement
): void {
): JoinInstance {
// CHANGED: now returns JoinInstance
const name = qs.getIdentifier();

let join = this.root().joins.get(name);
Expand All @@ -460,7 +461,7 @@ export class FieldInstanceResult implements FieldInstance {
join.uniqueKeyRequirement,
uniqueKeyRequirement
);
return;
return join; // CHANGED: return existing join
}

// if we have a parent, join it first.
Expand All @@ -480,6 +481,7 @@ export class FieldInstanceResult implements FieldInstance {
join.uniqueKeyRequirement,
uniqueKeyRequirement
);
return join;
}

root(): FieldInstanceResultRoot {
Expand Down
1 change: 1 addition & 0 deletions packages/malloy/src/model/join_instance.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ export class JoinInstance {
leafiest = false;
joinFilterConditions?: QueryFieldBoolean[];
children: JoinInstance[] = [];
onReferencesNestedData?: boolean;
constructor(
public queryStruct: QueryStruct,
public alias: string,
Expand Down
1 change: 1 addition & 0 deletions packages/malloy/src/model/malloy_types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1192,6 +1192,7 @@ export interface FieldUsage {
at?: DocumentLocation;
uniqueKeyRequirement?: UniqueKeyRequirement;
analyticFunctionUse?: boolean;
onReferencesNestedData?: boolean;
}

export function bareFieldUsage(fu: FieldUsage): boolean {
Expand Down
49 changes: 31 additions & 18 deletions packages/malloy/src/model/query_query.ts
Original file line number Diff line number Diff line change
Expand Up @@ -246,16 +246,22 @@ export class QueryQuery extends QueryField {
return {as, field};
}

// Modified addDependantPath to return the JoinInstance
private addDependantPath(
path: string[],
uniqueKeyRequirement: UniqueKeyRequirement
) {
): JoinInstance {
const node = this.parent.getFieldByName(path);
const joinableParent =
node instanceof QueryFieldStruct
? node.queryStruct.getJoinableParent()
: node.parent.getJoinableParent();
this.rootResult.addStructToJoin(joinableParent, uniqueKeyRequirement);

// This returns the JoinInstance
return this.rootResult.addStructToJoin(
joinableParent,
uniqueKeyRequirement
);
}

private dependenciesFromFieldUsage() {
Expand All @@ -269,7 +275,10 @@ export class QueryQuery extends QueryField {
}

for (const joinUsage of this.firstSegment.activeJoins || []) {
this.addDependantPath(joinUsage.path, undefined);
const joinInstance = this.addDependantPath(joinUsage.path, undefined);
if (joinUsage.onReferencesNestedData) {
joinInstance.onReferencesNestedData = true;
}
}
for (const usage of this.firstSegment.expandedFieldUsage || []) {
if (usage.analyticFunctionUse) {
Expand Down Expand Up @@ -876,11 +885,20 @@ export class QueryQuery extends QueryField {
});
}

if (
ji.children.length === 0 ||
conditions === undefined ||
!this.parent.dialect.supportsComplexFilteredSources
) {
const needsComplexPattern =
ji.onReferencesNestedData || // NEW: our flag for ON conditions referencing nested data
(ji.children.length > 0 &&
conditions !== undefined &&
conditions.length > 0 &&
this.parent.dialect.supportsComplexFilteredSources);

if (!needsComplexPattern) {
// Simple case - direct JOIN
if (conditions !== undefined && conditions.length >= 1) {
filters = ` AND (${conditions.join(' AND ')})`;
}
s += ` ${matrixOperation} JOIN ${structSQL} AS ${ji.alias}\n ON ${onCondition}${filters}\n`;
} else {
// LTNOTE: need a check here to see the children's where: conditions are local
// to the source and not to any of it's joined children.
// In Presto, we're going to get a SQL error if in this case
Expand All @@ -894,12 +912,6 @@ export class QueryQuery extends QueryField {
// 'Cannot join a source with a complex filter on a joined source'
// );
// }

if (conditions !== undefined && conditions.length >= 1) {
filters = ` AND (${conditions.join(' AND ')})`;
}
s += ` ${matrixOperation} JOIN ${structSQL} AS ${ji.alias}\n ON ${onCondition}${filters}\n`;
} else {
let select = `SELECT ${ji.alias}.*`;
let joins = '';
for (const childJoin of ji.children) {
Expand All @@ -909,14 +921,15 @@ export class QueryQuery extends QueryField {
getDialectFieldList(childJoin.queryStruct.structDef)
)} AS ${childJoin.alias}`;
}
select += `\nFROM ${structSQL} AS ${
ji.alias
}\n${joins}\nWHERE ${conditions?.join(' AND ')}\n`;
select += `\nFROM ${structSQL} AS ${ji.alias}\n${joins}`;
if (conditions && conditions.length > 0) {
select += `\nWHERE ${conditions.join(' AND ')}\n`;
}
s += `${matrixOperation} JOIN (\n${indent(select)}) AS ${
ji.alias
}\n ON ${onCondition}\n`;
return s;
}
return s;
} else if (qsDef.type === 'array') {
if (qs.parent === undefined || ji.parent === undefined) {
throw new Error('Internal Error, nested structure with no parent.');
Expand Down
63 changes: 63 additions & 0 deletions test/src/databases/all/join.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
/* eslint-disable no-console */

import {RuntimeList, allDatabases} from '../../runtimes';
import {TestSelect} from '../../test-select';
import {databasesFromEnvironmentOr} from '../../util';
import '../../util/db-jest-matchers';

Expand Down Expand Up @@ -318,4 +319,66 @@ describe.each(runtimes.runtimeList)('%s', (databaseName, runtime) => {
`).matchesRows(runtime, {pick_a1: [1]});
}
);
test('join ON expression references nested join dimension', async () => {
const ts = new TestSelect(runtime.dialect);
const usrSQL = ts.generate({id: 1, email: 'email@co.com'});
const resSQL = ts.generate({id: 1, user_id: 1});
const msgSQL = ts.generate({id: 1, msg_email: 'email@co.com'});
await expect(`
source: usr is ${databaseName}.sql("""${usrSQL}""")
source: res is ${databaseName}.sql("""${resSQL}""") extend {
join_one: usr on usr.id = user_id
dimension: usr_email is usr.email
}
source: msg is ${databaseName}.sql("""${msgSQL}""") extend {
join_many: res on res.usr_email = msg_email
}
run: msg -> {
group_by: msg_email, res.usr_email
}
`).malloyResultMatches(runtime, {
msg_email: 'email@co.com',
usr_email: 'email@co.com',
});
});
test('array field in joined source used in join condition', async () => {
const ts = new TestSelect(runtime.dialect);

// Mock GA4 events table with event_params array
const eventsSQL = ts.generate(
{
event_name: 'page_view',
event_params: [
{key: 'page_location', value: '/home'},
{key: 'page_title', value: 'Home Page'},
],
},
{
event_name: 'user_engagement',
event_params: [
{key: 'engagement_time', value: '1000'},
{key: 'page_view', value: 'true'}, // This matches event_name from row 1
],
},
{
event_name: 'scroll',
event_params: [{key: 'percent', value: '50'}],
}
);

await expect(`
source: ga4_1 is duckdb.sql("""${eventsSQL}""") extend {
dimension: event_param is event_params.key
}

source: ga4_2 is duckdb.sql("""${eventsSQL}""") extend {
join_one: ga4 is ga4_1 on event_name = ga4.event_param
}

run: ga4_2 -> {
aggregate: cnt is count()
where: ga4.event_param is not null
}
`).malloyResultMatches(runtime, {cnt: 1}); // Should match 'page_view' = 'page_view'
});
});
Loading