diff --git a/packages/malloy/src/lang/composite-source-utils.ts b/packages/malloy/src/lang/composite-source-utils.ts index 92eda2ac3f..e614857085 100644 --- a/packages/malloy/src/lang/composite-source-utils.ts +++ b/packages/malloy/src/lang/composite-source-utils.ts @@ -39,6 +39,7 @@ import { isJoinable, isJoined, isQuerySegment, + isRepeatedRecord, isSourceDef, isTurtle, } from '../model/malloy_types'; @@ -410,6 +411,7 @@ interface JoinDependency { path: string[]; dependsOn: Set; checked?: boolean; + onReferencesNestedData?: boolean; } function getJoin( @@ -449,7 +451,12 @@ function findActiveJoins( // Add this join's path to the sorted list after its dependencies if (dep) { - sorted.push({path: dep.path}); + const usage: FieldUsage = {path: dep.path}; + if (dep.onReferencesNestedData) { + usage.onReferencesNestedData = true; + } + + sorted.push(usage); } }; @@ -536,10 +543,27 @@ function expandFieldUsage( const joinKey = pathToKey('join', joinPath); const thisDep = getJoin(activeJoinGraph, joinKey, joinPath); - if (isJoined(joinDef) && !thisDep.checked) { + const needsTracking = isJoined(joinDef) || isRepeatedRecord(joinDef); + if (needsTracking && !thisDep.checked) { thisDep.checked = true; + const joinFieldUsage = getJoinFieldUsage(joinDef, joinPath); + if ( + joinPath.length > 1 && + (isJoined(joinDef) || isRepeatedRecord(joinDef)) + ) { + const parentPath = joinPath.slice(0, -1); + const parentKey = pathToKey('join', parentPath); + const parentDep = activeJoinGraph[parentKey]; + + // The parent join's ON expression led us to this nested structure + // Mark it as needing complex SQL generation + if (parentDep && !parentDep.onReferencesNestedData) { + parentDep.onReferencesNestedData = true; + } + } + // Add join's field dependencies to the queue for (const usage of joinFieldUsage) { const key = pathToKey('field', usage.path); diff --git a/packages/malloy/src/lang/test/source.spec.ts b/packages/malloy/src/lang/test/source.spec.ts index 8a025a9f1f..77ea0ec481 100644 --- a/packages/malloy/src/lang/test/source.spec.ts +++ b/packages/malloy/src/lang/test/source.spec.ts @@ -1051,4 +1051,54 @@ describe('source:', () => { } } }); + test('join ON expression sets onReferencesNestedData flag', () => { + const t = model` + source: usr is a extend { + rename: id is ai + dimension: email is concat(astr, ".com") + } + source: res is a extend { + rename: user_id is ai + join_one: usr on usr.id = user_id + dimension: usr_email is usr.email + } + source: msg is a extend { + rename: id is ai + dimension: msg_email is concat(astr, '.com') + join_many: res on msg_email = res.usr_email + } + run: msg -> { + select: *, res.usr_email + } + `; + + expect(t).toTranslate(); + + const query = t.translator.getQuery(0); + expect(query).toBeDefined(); + if (query) { + const firstSegment = query.pipeline[0]; + expect(firstSegment.type).toBe('project'); + if (firstSegment.type === 'project') { + expect(firstSegment.activeJoins).toBeDefined(); + + // Find the 'res' join in activeJoins + const resJoin = firstSegment.activeJoins?.find( + j => j.path.length === 1 && j.path[0] === 'res' + ); + + // The 'res' join should have onReferencesChildren set to true + // because its ON expression uses res.usr_email which expands to usr.email + expect(resJoin).toBeDefined(); + expect(resJoin?.onReferencesNestedData).toBe(true); + + // The 'usr' join (nested within res) should NOT have the flag + const usrJoin = firstSegment.activeJoins?.find( + j => j.path.length === 2 && j.path[0] === 'res' && j.path[1] === 'usr' + ); + expect(usrJoin).toBeDefined(); + expect(usrJoin?.onReferencesNestedData).toBeUndefined(); + } + } + }); }); diff --git a/packages/malloy/src/model/field_instance.ts b/packages/malloy/src/model/field_instance.ts index 7f582be2bd..370abcd334 100644 --- a/packages/malloy/src/model/field_instance.ts +++ b/packages/malloy/src/model/field_instance.ts @@ -451,7 +451,8 @@ export class FieldInstanceResult implements FieldInstance { addStructToJoin( qs: QueryStruct, uniqueKeyRequirement: UniqueKeyRequirement - ): void { + ): JoinInstance { + // CHANGED: now returns JoinInstance const name = qs.getIdentifier(); let join = this.root().joins.get(name); @@ -460,7 +461,7 @@ export class FieldInstanceResult implements FieldInstance { join.uniqueKeyRequirement, uniqueKeyRequirement ); - return; + return join; // CHANGED: return existing join } // if we have a parent, join it first. @@ -480,6 +481,7 @@ export class FieldInstanceResult implements FieldInstance { join.uniqueKeyRequirement, uniqueKeyRequirement ); + return join; } root(): FieldInstanceResultRoot { diff --git a/packages/malloy/src/model/join_instance.ts b/packages/malloy/src/model/join_instance.ts index 5c4384238e..de72263a6a 100644 --- a/packages/malloy/src/model/join_instance.ts +++ b/packages/malloy/src/model/join_instance.ts @@ -17,6 +17,7 @@ export class JoinInstance { leafiest = false; joinFilterConditions?: QueryFieldBoolean[]; children: JoinInstance[] = []; + onReferencesNestedData?: boolean; constructor( public queryStruct: QueryStruct, public alias: string, diff --git a/packages/malloy/src/model/malloy_types.ts b/packages/malloy/src/model/malloy_types.ts index 9accd8d12b..631a38feac 100644 --- a/packages/malloy/src/model/malloy_types.ts +++ b/packages/malloy/src/model/malloy_types.ts @@ -1192,6 +1192,7 @@ export interface FieldUsage { at?: DocumentLocation; uniqueKeyRequirement?: UniqueKeyRequirement; analyticFunctionUse?: boolean; + onReferencesNestedData?: boolean; } export function bareFieldUsage(fu: FieldUsage): boolean { diff --git a/packages/malloy/src/model/query_query.ts b/packages/malloy/src/model/query_query.ts index aa31dac845..bf14f1ef6f 100644 --- a/packages/malloy/src/model/query_query.ts +++ b/packages/malloy/src/model/query_query.ts @@ -246,16 +246,22 @@ export class QueryQuery extends QueryField { return {as, field}; } + // Modified addDependantPath to return the JoinInstance private addDependantPath( path: string[], uniqueKeyRequirement: UniqueKeyRequirement - ) { + ): JoinInstance { const node = this.parent.getFieldByName(path); const joinableParent = node instanceof QueryFieldStruct ? node.queryStruct.getJoinableParent() : node.parent.getJoinableParent(); - this.rootResult.addStructToJoin(joinableParent, uniqueKeyRequirement); + + // This returns the JoinInstance + return this.rootResult.addStructToJoin( + joinableParent, + uniqueKeyRequirement + ); } private dependenciesFromFieldUsage() { @@ -269,7 +275,10 @@ export class QueryQuery extends QueryField { } for (const joinUsage of this.firstSegment.activeJoins || []) { - this.addDependantPath(joinUsage.path, undefined); + const joinInstance = this.addDependantPath(joinUsage.path, undefined); + if (joinUsage.onReferencesNestedData) { + joinInstance.onReferencesNestedData = true; + } } for (const usage of this.firstSegment.expandedFieldUsage || []) { if (usage.analyticFunctionUse) { @@ -876,11 +885,20 @@ export class QueryQuery extends QueryField { }); } - if ( - ji.children.length === 0 || - conditions === undefined || - !this.parent.dialect.supportsComplexFilteredSources - ) { + const needsComplexPattern = + ji.onReferencesNestedData || // NEW: our flag for ON conditions referencing nested data + (ji.children.length > 0 && + conditions !== undefined && + conditions.length > 0 && + this.parent.dialect.supportsComplexFilteredSources); + + if (!needsComplexPattern) { + // Simple case - direct JOIN + if (conditions !== undefined && conditions.length >= 1) { + filters = ` AND (${conditions.join(' AND ')})`; + } + s += ` ${matrixOperation} JOIN ${structSQL} AS ${ji.alias}\n ON ${onCondition}${filters}\n`; + } else { // LTNOTE: need a check here to see the children's where: conditions are local // to the source and not to any of it's joined children. // In Presto, we're going to get a SQL error if in this case @@ -894,12 +912,6 @@ export class QueryQuery extends QueryField { // 'Cannot join a source with a complex filter on a joined source' // ); // } - - if (conditions !== undefined && conditions.length >= 1) { - filters = ` AND (${conditions.join(' AND ')})`; - } - s += ` ${matrixOperation} JOIN ${structSQL} AS ${ji.alias}\n ON ${onCondition}${filters}\n`; - } else { let select = `SELECT ${ji.alias}.*`; let joins = ''; for (const childJoin of ji.children) { @@ -909,14 +921,15 @@ export class QueryQuery extends QueryField { getDialectFieldList(childJoin.queryStruct.structDef) )} AS ${childJoin.alias}`; } - select += `\nFROM ${structSQL} AS ${ - ji.alias - }\n${joins}\nWHERE ${conditions?.join(' AND ')}\n`; + select += `\nFROM ${structSQL} AS ${ji.alias}\n${joins}`; + if (conditions && conditions.length > 0) { + select += `\nWHERE ${conditions.join(' AND ')}\n`; + } s += `${matrixOperation} JOIN (\n${indent(select)}) AS ${ ji.alias }\n ON ${onCondition}\n`; - return s; } + return s; } else if (qsDef.type === 'array') { if (qs.parent === undefined || ji.parent === undefined) { throw new Error('Internal Error, nested structure with no parent.'); diff --git a/test/src/databases/all/join.spec.ts b/test/src/databases/all/join.spec.ts index 6ee64b89a8..791780f59d 100644 --- a/test/src/databases/all/join.spec.ts +++ b/test/src/databases/all/join.spec.ts @@ -24,6 +24,7 @@ /* eslint-disable no-console */ import {RuntimeList, allDatabases} from '../../runtimes'; +import {TestSelect} from '../../test-select'; import {databasesFromEnvironmentOr} from '../../util'; import '../../util/db-jest-matchers'; @@ -318,4 +319,66 @@ describe.each(runtimes.runtimeList)('%s', (databaseName, runtime) => { `).matchesRows(runtime, {pick_a1: [1]}); } ); + test('join ON expression references nested join dimension', async () => { + const ts = new TestSelect(runtime.dialect); + const usrSQL = ts.generate({id: 1, email: 'email@co.com'}); + const resSQL = ts.generate({id: 1, user_id: 1}); + const msgSQL = ts.generate({id: 1, msg_email: 'email@co.com'}); + await expect(` + source: usr is ${databaseName}.sql("""${usrSQL}""") + source: res is ${databaseName}.sql("""${resSQL}""") extend { + join_one: usr on usr.id = user_id + dimension: usr_email is usr.email + } + source: msg is ${databaseName}.sql("""${msgSQL}""") extend { + join_many: res on res.usr_email = msg_email + } + run: msg -> { + group_by: msg_email, res.usr_email + } + `).malloyResultMatches(runtime, { + msg_email: 'email@co.com', + usr_email: 'email@co.com', + }); + }); + test('array field in joined source used in join condition', async () => { + const ts = new TestSelect(runtime.dialect); + + // Mock GA4 events table with event_params array + const eventsSQL = ts.generate( + { + event_name: 'page_view', + event_params: [ + {key: 'page_location', value: '/home'}, + {key: 'page_title', value: 'Home Page'}, + ], + }, + { + event_name: 'user_engagement', + event_params: [ + {key: 'engagement_time', value: '1000'}, + {key: 'page_view', value: 'true'}, // This matches event_name from row 1 + ], + }, + { + event_name: 'scroll', + event_params: [{key: 'percent', value: '50'}], + } + ); + + await expect(` + source: ga4_1 is duckdb.sql("""${eventsSQL}""") extend { + dimension: event_param is event_params.key + } + + source: ga4_2 is duckdb.sql("""${eventsSQL}""") extend { + join_one: ga4 is ga4_1 on event_name = ga4.event_param + } + + run: ga4_2 -> { + aggregate: cnt is count() + where: ga4.event_param is not null + } + `).malloyResultMatches(runtime, {cnt: 1}); // Should match 'page_view' = 'page_view' + }); });