Skip to content

C++: Expose SSA definitions from dataflow #20149

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Aug 1, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions cpp/ql/lib/change-notes/2025-07-31-ssa.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
---
category: feature
---
* Exposed various SSA-related classes (`Definition`, `PhiNode`, `ExplicitDefinition`, `DirectExplicitDefinition`, and `IndirectExplicitDefinition`) which were previously only usable inside the internal dataflow directory.
7 changes: 7 additions & 0 deletions cpp/ql/lib/semmle/code/cpp/controlflow/SSA.qll
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,13 @@ class StandardSsa extends SsaHelper {
}

/**
* NOTE: If possible, prefer the SSA classes exposed by the new dataflow
* library:
* ```
* import semmle.code.cpp.dataflow.new.DataFlow
* // use `DataFlow::Ssa::Definition`
* ```
*
* A definition of one or more SSA variables, including phi node definitions.
* An _SSA variable_, as defined in the literature, is effectively the pair of
* an `SsaDefinition d` and a `StackVariable v`, written `(d, v)` in this
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
private import cpp
private import semmle.code.cpp.ir.IR
private import semmle.code.cpp.ir.dataflow.DataFlow
private import DataFlowPrivate
private import DataFlowUtil
private import DataFlowImplCommon as DataFlowImplCommon
Expand Down Expand Up @@ -60,7 +59,7 @@ private module VirtualDispatch {
* `resolve` predicate to stitch that information together and resolve the
* call.
*/
abstract DataFlow::Node getDispatchValue();
abstract Node getDispatchValue();

/** Gets a candidate target for this call. */
abstract Function resolve();
Expand All @@ -72,17 +71,13 @@ private module VirtualDispatch {
* parameter is true when the search is allowed to continue backwards into
* a parameter; non-recursive callers should pass `_` for `allowFromArg`.
*/
predicate flowsFrom(DataFlow::Node src, boolean allowFromArg) {
predicate flowsFrom(Node src, boolean allowFromArg) {
src = this.getDispatchValue() and allowFromArg = true
or
exists(DataFlow::Node other, boolean allowOtherFromArg |
this.flowsFrom(other, allowOtherFromArg)
|
exists(Node other, boolean allowOtherFromArg | this.flowsFrom(other, allowOtherFromArg) |
// Call argument
exists(DataFlowCall call, Position i |
other
.(DataFlow::ParameterNode)
.isParameterOf(pragma[only_bind_into](call).getStaticCallTarget(), i) and
other.(ParameterNode).isParameterOf(pragma[only_bind_into](call).getStaticCallTarget(), i) and
src.(ArgumentNode).argumentOf(call, pragma[only_bind_into](pragma[only_bind_out](i)))
) and
allowOtherFromArg = true and
Expand All @@ -96,7 +91,7 @@ private module VirtualDispatch {
allowFromArg = false
or
// Local flow
DataFlow::localFlowStep(src, other) and
localFlowStep(src, other) and
allowFromArg = allowOtherFromArg
or
// Flow from global variable to load.
Expand Down Expand Up @@ -159,11 +154,11 @@ private module VirtualDispatch {
private class DataSensitiveExprCall extends DataSensitiveCall {
DataSensitiveExprCall() { not exists(this.getStaticCallTarget()) }

override DataFlow::Node getDispatchValue() { result.asOperand() = this.getCallTargetOperand() }
override Node getDispatchValue() { result.asOperand() = this.getCallTargetOperand() }

override Function resolve() {
exists(FunctionInstruction fi |
this.flowsFrom(DataFlow::instructionNode(fi), _) and
this.flowsFrom(instructionNode(fi), _) and
result = fi.getFunctionSymbol()
) and
(
Expand All @@ -186,7 +181,7 @@ private module VirtualDispatch {
)
}

override DataFlow::Node getDispatchValue() { result.asInstruction() = this.getArgument(-1) }
override Node getDispatchValue() { result.asInstruction() = this.getArgument(-1) }

override MemberFunction resolve() {
exists(Class overridingClass |
Expand All @@ -213,7 +208,7 @@ private module VirtualDispatch {
pragma[noinline]
private predicate hasFlowFromCastFrom(Class derivedClass) {
exists(ConvertToBaseInstruction toBase |
this.flowsFrom(DataFlow::instructionNode(toBase), _) and
this.flowsFrom(instructionNode(toBase), _) and
derivedClass = toBase.getDerivedClass()
)
}
Expand Down Expand Up @@ -270,7 +265,7 @@ private predicate mayBenefitFromCallContext(
exists(InitializeParameterInstruction init |
not exists(call.getStaticCallTarget()) and
init.getEnclosingFunction() = f.getUnderlyingCallable() and
call.flowsFrom(DataFlow::instructionNode(init), _) and
call.flowsFrom(instructionNode(init), _) and
init.getParameter().getIndex() = arg
)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ private import semmle.code.cpp.ir.IR
private import DataFlowDispatch
private import semmle.code.cpp.ir.internal.IRCppLanguage
private import semmle.code.cpp.dataflow.internal.FlowSummaryImpl as FlowSummaryImpl
private import SsaInternals as Ssa
private import SsaImpl as Ssa
private import DataFlowImplCommon as DataFlowImplCommon
private import codeql.util.Unit
private import Node0ToString
Expand Down
88 changes: 52 additions & 36 deletions cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ private import semmle.code.cpp.models.interfaces.DataFlow
private import semmle.code.cpp.dataflow.internal.FlowSummaryImpl as FlowSummaryImpl
private import DataFlowPrivate
private import ModelUtil
private import SsaInternals as Ssa
private import SsaImpl as SsaImpl
private import DataFlowImplCommon as DataFlowImplCommon
private import codeql.util.Unit
private import Node0ToString
Expand All @@ -39,38 +39,39 @@ private newtype TIRDataFlowNode =
TNode0(Node0Impl node) { DataFlowImplCommon::forceCachingInSameStage() } or
TGlobalLikeVariableNode(GlobalLikeVariable var, int indirectionIndex) {
indirectionIndex =
[getMinIndirectionsForType(var.getUnspecifiedType()) .. Ssa::getMaxIndirectionsForType(var.getUnspecifiedType())]
[getMinIndirectionsForType(var.getUnspecifiedType()) .. SsaImpl::getMaxIndirectionsForType(var.getUnspecifiedType())]
} or
TPostUpdateNodeImpl(Operand operand, int indirectionIndex) {
operand = any(FieldAddress fa).getObjectAddressOperand() and
indirectionIndex = [0 .. Ssa::countIndirectionsForCppType(Ssa::getLanguageType(operand))]
indirectionIndex =
[0 .. SsaImpl::countIndirectionsForCppType(SsaImpl::getLanguageType(operand))]
or
Ssa::isModifiableByCall(operand, indirectionIndex)
SsaImpl::isModifiableByCall(operand, indirectionIndex)
} or
TSsaSynthNode(Ssa::SynthNode n) or
TSsaSynthNode(SsaImpl::SynthNode n) or
TSsaIteratorNode(IteratorFlow::IteratorFlowNode n) or
TRawIndirectOperand0(Node0Impl node, int indirectionIndex) {
Ssa::hasRawIndirectOperand(node.asOperand(), indirectionIndex)
SsaImpl::hasRawIndirectOperand(node.asOperand(), indirectionIndex)
} or
TRawIndirectInstruction0(Node0Impl node, int indirectionIndex) {
not exists(node.asOperand()) and
Ssa::hasRawIndirectInstruction(node.asInstruction(), indirectionIndex)
SsaImpl::hasRawIndirectInstruction(node.asInstruction(), indirectionIndex)
} or
TFinalParameterNode(Parameter p, int indirectionIndex) {
exists(Ssa::FinalParameterUse use |
exists(SsaImpl::FinalParameterUse use |
use.getParameter() = p and
use.getIndirectionIndex() = indirectionIndex
)
} or
TFinalGlobalValue(Ssa::GlobalUse globalUse) or
TInitialGlobalValue(Ssa::GlobalDef globalUse) or
TFinalGlobalValue(SsaImpl::GlobalUse globalUse) or
TInitialGlobalValue(SsaImpl::GlobalDef globalUse) or
TBodyLessParameterNodeImpl(Parameter p, int indirectionIndex) {
// Rule out parameters of catch blocks.
not exists(p.getCatchBlock()) and
// We subtract one because `getMaxIndirectionsForType` returns the maximum
// indirection for a glvalue of a given type, and this doesn't apply to
// parameters.
indirectionIndex = [0 .. Ssa::getMaxIndirectionsForType(p.getUnspecifiedType()) - 1] and
indirectionIndex = [0 .. SsaImpl::getMaxIndirectionsForType(p.getUnspecifiedType()) - 1] and
not any(InitializeParameterInstruction init).getParameter() = p
} or
TFlowSummaryNode(FlowSummaryImpl::Private::SummaryNode sn)
Expand All @@ -81,7 +82,7 @@ private newtype TIRDataFlowNode =
class FieldAddress extends Operand {
FieldAddressInstruction fai;

FieldAddress() { fai = this.getDef() and not Ssa::ignoreOperand(this) }
FieldAddress() { fai = this.getDef() and not SsaImpl::ignoreOperand(this) }

/** Gets the field associated with this instruction. */
Field getField() { result = fai.getField() }
Expand Down Expand Up @@ -126,7 +127,7 @@ predicate conversionFlow(
)
or
additional = true and
Ssa::isAdditionalConversionFlow(opFrom, instrTo)
SsaImpl::isAdditionalConversionFlow(opFrom, instrTo)
)
or
isPointerArith = true and
Expand Down Expand Up @@ -183,7 +184,7 @@ class Node extends TIRDataFlowNode {
or
this.asOperand().getUse() = block.getInstruction(i)
or
exists(Ssa::SynthNode ssaNode |
exists(SsaImpl::SynthNode ssaNode |
this.(SsaSynthNode).getSynthNode() = ssaNode and
ssaNode.getBasicBlock() = block and
ssaNode.getIndex() = i
Expand Down Expand Up @@ -364,10 +365,10 @@ class Node extends TIRDataFlowNode {
* pointed to by `p`.
*/
Expr asDefinition(boolean uncertain) {
exists(StoreInstruction store, Ssa::Definition def |
exists(StoreInstruction store, SsaImpl::Definition def |
store = this.asInstruction() and
result = asDefinitionImpl(store) and
Ssa::defToNode(this, def, _) and
SsaImpl::defToNode(this, def, _) and
if def.isCertain() then uncertain = false else uncertain = true
)
}
Expand Down Expand Up @@ -627,7 +628,7 @@ class OperandNode extends Node, Node0 {
* For example, `stripPointers(int*&)` is `int*` and `stripPointers(int*)` is `int`.
*/
Type stripPointer(Type t) {
result = any(Ssa::Indirection ind | ind.getType() = t).getBaseType()
result = any(SsaImpl::Indirection ind | ind.getType() = t).getBaseType()
or
result = t.(PointerToMemberType).getBaseType()
or
Expand Down Expand Up @@ -694,12 +695,12 @@ class PostFieldUpdateNode extends PostUpdateNodeImpl {
* in a data flow graph.
*/
class SsaSynthNode extends Node, TSsaSynthNode {
Ssa::SynthNode node;
SsaImpl::SynthNode node;

SsaSynthNode() { this = TSsaSynthNode(node) }

/** Gets the synthesized SSA node associated with this node. */
Ssa::SynthNode getSynthNode() { result = node }
SsaImpl::SynthNode getSynthNode() { result = node }

override DataFlowCallable getEnclosingCallable() {
result.asSourceCallable() = this.getFunction()
Expand Down Expand Up @@ -782,12 +783,12 @@ class SideEffectOperandNode extends Node instanceof IndirectOperand {
* from a function body.
*/
class FinalGlobalValue extends Node, TFinalGlobalValue {
Ssa::GlobalUse globalUse;
SsaImpl::GlobalUse globalUse;

FinalGlobalValue() { this = TFinalGlobalValue(globalUse) }

/** Gets the underlying SSA use. */
Ssa::GlobalUse getGlobalUse() { result = globalUse }
SsaImpl::GlobalUse getGlobalUse() { result = globalUse }

override DataFlowCallable getEnclosingCallable() {
result.asSourceCallable() = this.getFunction()
Expand All @@ -814,12 +815,12 @@ class FinalGlobalValue extends Node, TFinalGlobalValue {
* a function body.
*/
class InitialGlobalValue extends Node, TInitialGlobalValue {
Ssa::GlobalDef globalDef;
SsaImpl::GlobalDef globalDef;

InitialGlobalValue() { this = TInitialGlobalValue(globalDef) }

/** Gets the underlying SSA definition. */
Ssa::GlobalDef getGlobalDef() { result = globalDef }
SsaImpl::GlobalDef getGlobalDef() { result = globalDef }

override DataFlowCallable getEnclosingCallable() {
result.asSourceCallable() = this.getFunction()
Expand Down Expand Up @@ -1288,11 +1289,11 @@ class UninitializedNode extends Node {
LocalVariable v;

UninitializedNode() {
exists(Ssa::Definition def, Ssa::SourceVariable sv |
exists(SsaImpl::Definition def, SsaImpl::SourceVariable sv |
def.getIndirectionIndex() = 0 and
def.getValue().asInstruction() instanceof UninitializedInstruction and
Ssa::defToNode(this, def, sv) and
v = sv.getBaseVariable().(Ssa::BaseIRVariable).getIRVariable().getAst()
SsaImpl::defToNode(this, def, sv) and
v = sv.getBaseVariable().(SsaImpl::BaseIRVariable).getIRVariable().getAst()
)
}

Expand Down Expand Up @@ -1722,7 +1723,7 @@ private module Cached {
cached
predicate flowsToBackEdge(Node n) {
exists(Node succ, IRBlock bb1, IRBlock bb2 |
Ssa::ssaFlow(n, succ) and
SsaImpl::ssaFlow(n, succ) and
bb1 = n.getBasicBlock() and
bb2 = succ.getBasicBlock() and
bb1 != bb2 and
Expand Down Expand Up @@ -1820,7 +1821,7 @@ private module Cached {
predicate simpleLocalFlowStep(Node nodeFrom, Node nodeTo, string model) {
(
// Def-use/Use-use flow
Ssa::ssaFlow(nodeFrom, nodeTo)
SsaImpl::ssaFlow(nodeFrom, nodeTo)
or
IteratorFlow::localFlowStep(nodeFrom, nodeTo)
or
Expand All @@ -1833,7 +1834,7 @@ private module Cached {
|
simpleOperandLocalFlowStep(iFrom, opTo) and
// Omit when the instruction node also represents the operand.
not iFrom = Ssa::getIRRepresentationOfOperand(opTo)
not iFrom = SsaImpl::getIRRepresentationOfOperand(opTo)
)
or
// Indirect operand -> (indirect) instruction flow
Expand Down Expand Up @@ -1906,7 +1907,7 @@ private module Cached {
// We also want a write coming out of an `OutNode` to flow `nodeTo`.
// This is different from `reverseFlowInstruction` since `nodeFrom` can never
// be an `OutNode` when it's defined by an instruction.
Ssa::outNodeHasAddressAndIndex(nodeFrom, address, indirectionIndex)
SsaImpl::outNodeHasAddressAndIndex(nodeFrom, address, indirectionIndex)
)
}

Expand Down Expand Up @@ -2099,7 +2100,7 @@ private newtype TContent =
TFieldContent(Field f, int indirectionIndex) {
// the indirection index for field content starts at 1 (because `TFieldContent` is thought of as
// the address of the field, `FieldAddress` in the IR).
indirectionIndex = [1 .. Ssa::getMaxIndirectionsForType(f.getUnspecifiedType())] and
indirectionIndex = [1 .. SsaImpl::getMaxIndirectionsForType(f.getUnspecifiedType())] and
// Reads and writes of union fields are tracked using `UnionContent`.
not f.getDeclaringType() instanceof Union
} or
Expand All @@ -2111,7 +2112,9 @@ private newtype TContent =
// field can be read by any read of the union's fields. Again, the indirection index
// is 1-based (because 0 is considered the address).
indirectionIndex =
[1 .. max(Ssa::getMaxIndirectionsForType(getAFieldWithSize(u, bytes).getUnspecifiedType()))]
[1 .. max(SsaImpl::getMaxIndirectionsForType(getAFieldWithSize(u, bytes)
.getUnspecifiedType())
)]
)
} or
TElementContent(int indirectionIndex) {
Expand Down Expand Up @@ -2354,7 +2357,7 @@ module BarrierGuard<guardChecksSig/3 guardChecks> {
controls(g, result, edge)
)
or
result = Ssa::BarrierGuard<guardChecksNode/3>::getABarrierNode()
result = SsaImpl::BarrierGuard<guardChecksNode/3>::getABarrierNode()
}

/**
Expand Down Expand Up @@ -2453,7 +2456,7 @@ module BarrierGuard<guardChecksSig/3 guardChecks> {
)
or
result =
Ssa::BarrierGuardWithIntParam<guardChecksIndirectNode/4>::getABarrierNode(indirectionIndex)
SsaImpl::BarrierGuardWithIntParam<guardChecksIndirectNode/4>::getABarrierNode(indirectionIndex)
}
}

Expand Down Expand Up @@ -2490,7 +2493,7 @@ module InstructionBarrierGuard<instructionGuardChecksSig/3 instructionGuardCheck
controls(g, result, edge)
)
or
result = Ssa::BarrierGuard<guardChecksNode/3>::getABarrierNode()
result = SsaImpl::BarrierGuard<guardChecksNode/3>::getABarrierNode()
}

bindingset[value, n]
Expand Down Expand Up @@ -2520,7 +2523,7 @@ module InstructionBarrierGuard<instructionGuardChecksSig/3 instructionGuardCheck
)
or
result =
Ssa::BarrierGuardWithIntParam<guardChecksIndirectNode/4>::getABarrierNode(indirectionIndex)
SsaImpl::BarrierGuardWithIntParam<guardChecksIndirectNode/4>::getABarrierNode(indirectionIndex)
}
}

Expand Down Expand Up @@ -2576,3 +2579,16 @@ Function getARuntimeTarget(Call call) {
result = DataFlowImplCommon::viableCallableLambda(dfCall, _).asSourceCallable()
)
}

/** A module that provides static single assignment (SSA) information. */
module Ssa {
class Definition = SsaImpl::Definition;

class ExplicitDefinition = SsaImpl::ExplicitDefinition;

class DirectExplicitDefinition = SsaImpl::DirectExplicitDefinition;

class IndirectExplicitDefinition = SsaImpl::IndirectExplicitDefinition;

class PhiNode = SsaImpl::PhiNode;
}
Loading