Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add shared basic block library #18497

Open
wants to merge 6 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -96,9 +96,9 @@ private module CfgInput implements CfgShared::InputSig<Location> {
t instanceof ST::SuccessorTypes::ExitSuccessor
}

predicate idOfAstNode(AstNode node, int id) { node.getId() = id }
int idOfAstNode(AstNode node) { result = node.getId() }

predicate idOfCfgScope(CfgScope node, int id) { idOfAstNode(node, id) }
int idOfCfgScope(CfgScope node) { result = idOfAstNode(node) }
}

private module CfgSplittingInput implements CfgShared::SplittingInputSig<Location, CfgInput> {
Expand Down
2 changes: 0 additions & 2 deletions ruby/ql/lib/codeql/ruby/controlflow/BasicBlocks.qll
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,9 @@ private import codeql.ruby.AST
private import codeql.ruby.ast.internal.AST
private import codeql.ruby.ast.internal.TreeSitter
private import codeql.ruby.controlflow.ControlFlowGraph
private import codeql.ruby.controlflow.ControlFlowGraph as Cfg
private import internal.ControlFlowGraphImpl as CfgImpl
private import CfgNodes
private import SuccessorTypes
private import codeql.controlflow.BasicBlock as BB
private import CfgImpl::BasicBlocks as BasicBlocksImpl

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,9 +65,9 @@ private module CfgInput implements CfgShared::InputSig<Location> {

private predicate idOf(Ruby::AstNode node, int id) = equivalenceRelation(id/2)(node, id)

predicate idOfAstNode(AstNode node, int id) { idOf(AstInternal::toGeneratedInclSynth(node), id) }
int idOfAstNode(AstNode node) { idOf(AstInternal::toGeneratedInclSynth(node), result) }

predicate idOfCfgScope(CfgScope node, int id) { idOfAstNode(node, id) }
int idOfCfgScope(CfgScope node) { result = idOfAstNode(node) }
}

private module CfgSplittingInput implements CfgShared::SplittingInputSig<Location, CfgInput> {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,11 +56,9 @@ private module CfgInput implements InputSig<Location> {
private predicate idOfDbAstNode(Raw::AstNode x, int y) = equivalenceRelation(id/2)(x, y)

// TODO: does not work if fresh ipa entities (`ipa: on:`) turn out to be first of the block
predicate idOfAstNode(AstNode node, int id) {
idOfDbAstNode(Synth::convertAstNodeToRaw(node), id)
}
int idOfAstNode(AstNode node) { idOfDbAstNode(Synth::convertAstNodeToRaw(node), result) }

predicate idOfCfgScope(CfgScope node, int id) { idOfAstNode(node, id) }
int idOfCfgScope(CfgScope node) { result = idOfAstNode(node) }
}

private module CfgSplittingInput implements SplittingInputSig<Location, CfgInput> {
Expand Down
128 changes: 72 additions & 56 deletions shared/controlflow/codeql/controlflow/BasicBlock.qll
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@
/**
* This modules provides an implementation of a basic block class based based
* on a control flow graph implementation.
* This modules provides an implementation of a basic block class based on a
* control flow graph implementation.
*
* INTERNAL use only. This is an experimental API subject to change without
* notice.
*/

private import codeql.util.Location

/** Provides the language-specific input specification. */
signature module InputSig {
signature module InputSig<LocationSig Location> {
class SuccessorType;

/** Hold if `t` represents a conditional successor type. */
Expand All @@ -16,21 +18,31 @@ signature module InputSig {
/** The class of control flow nodes. */
class Node {
string toString();

/** Gets the location of this control flow node. */
Location getLocation();
}

/** Gets an immediate successor of this node. */
Node nodeGetASuccessor(Node node, SuccessorType t);
paldepind marked this conversation as resolved.
Show resolved Hide resolved

/** Holds if `node` is the beginning of an entry basic block. */
predicate nodeIsEntry(Node node);
/**
* Holds if `node` represents an entry node to be used when calculating
* dominance.
*/
predicate nodeIsDominanceEntry(Node node);

/** Holds if `node` is the beginning of an entry basic block. */
predicate nodeIsExit(Node node);
/**
* Holds if `node` represents an exit node to be used when calculating
* post dominance.
*/
predicate nodeIsPostDominanceExit(Node node);
}

/**
* Provides a basic block construction on top of a control flow graph.
*/
module Make<InputSig Input> {
module Make<LocationSig Location, InputSig<Location> Input> {
private import Input

final class BasicBlock = BasicBlockImpl;
Expand All @@ -42,13 +54,17 @@ module Make<InputSig Input> {
/** Holds if this node has more than one predecessor. */
private predicate nodeIsJoin(Node node) { strictcount(nodeGetAPredecessor(node, _)) > 1 }

/** Holds if this node has more than one successor. */
private predicate nodeIsBranch(Node node) { strictcount(nodeGetASuccessor(node, _)) > 1 }
paldepind marked this conversation as resolved.
Show resolved Hide resolved

/**
* A basic block, that is, a maximal straight-line sequence of control flow nodes
* without branches or joins.
*/
private class BasicBlockImpl extends TBasicBlockStart {
/** Gets the location of this basic block. */
Location getLocation() { result = this.getFirstNode().getLocation() }

/** Gets an immediate successor of this basic block, if any. */
BasicBlock getASuccessor() { result = this.getASuccessor(_) }

Expand All @@ -64,6 +80,7 @@ module Make<InputSig Input> {
BasicBlock getAPredecessor(SuccessorType t) { result.getASuccessor(t) = this }

/** Gets the control flow node at a specific (zero-indexed) position in this basic block. */
cached
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It needs to be defined inside the Cached module, and then just referenced here.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It should be possible to reference them with the BasicBlocks:: prefix?

Yes, that works. I didn't think of that 🙈

Node getNode(int pos) { bbIndex(this.getFirstNode(), result, pos) }

/** Gets a control flow node in this basic block. */
Expand Down Expand Up @@ -166,52 +183,6 @@ module Make<InputSig Input> {
cached
newtype TBasicBlock = TBasicBlockStart(Node cfn) { startsBB(cfn) }

/** Holds if `cfn` starts a new basic block. */
private predicate startsBB(Node cfn) {
not exists(nodeGetAPredecessor(cfn, _)) and exists(nodeGetASuccessor(cfn, _))
or
nodeIsJoin(cfn)
or
nodeIsBranch(nodeGetAPredecessor(cfn, _))
or
// In cases such as
//
// ```rb
// if x or y
// foo
// else
// bar
// ```
//
// we have a CFG that looks like
//
// x --false--> [false] x or y --false--> bar
// \ |
// --true--> y --false--
// \
// --true--> [true] x or y --true--> foo
//
// and we want to ensure that both `foo` and `bar` start a new basic block.
exists(nodeGetAPredecessor(cfn, any(SuccessorType s | successorTypeIsCondition(s))))
}

/**
* Holds if `succ` is a control flow successor of `pred` within
* the same basic block.
*/
private predicate intraBBSucc(Node pred, Node succ) {
succ = nodeGetASuccessor(pred, _) and
not startsBB(succ)
}

/**
* Holds if `bbStart` is the first node in a basic block and `cfn` is the
* `i`th node in the same basic block.
*/
cached
predicate bbIndex(Node bbStart, Node cfn, int i) =
shortestDistances(startsBB/1, intraBBSucc/2)(bbStart, cfn, i)

/**
* Holds if the first node of basic block `succ` is a control flow
* successor of the last node of basic block `pred`.
Expand All @@ -227,7 +198,7 @@ module Make<InputSig Input> {
private predicate predBB(BasicBlock succ, BasicBlock pred) { succBB(pred, succ) }

/** Holds if `bb` is an exit basic block that represents normal exit. */
private predicate exitBB(BasicBlock bb) { nodeIsExit(bb.getANode()) }
private predicate exitBB(BasicBlock bb) { nodeIsPostDominanceExit(bb.getANode()) }

/** Holds if `dom` is an immediate post-dominator of `bb`. */
cached
Expand All @@ -237,6 +208,51 @@ module Make<InputSig Input> {

private import Cached

/** Holds if `cfn` starts a new basic block. */
private predicate startsBB(Node cfn) {
not exists(nodeGetAPredecessor(cfn, _)) and exists(nodeGetASuccessor(cfn, _))
or
nodeIsJoin(cfn)
or
nodeIsBranch(nodeGetAPredecessor(cfn, _))
or
// In cases such as
//
// ```rb
// if x or y
// foo
// else
// bar
// ```
//
// we have a CFG that looks like
//
// x --false--> [false] x or y --false--> bar
// \ |
// --true--> y --false--
// \
// --true--> [true] x or y --true--> foo
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This CFG is for x and y - the true/false edges are mixed up. The easiest fix is probably just to replace x or y with x and y.

//
// and we want to ensure that both `foo` and `bar` start a new basic block.
exists(nodeGetAPredecessor(cfn, any(SuccessorType s | successorTypeIsCondition(s))))
}

/**
* Holds if `succ` is a control flow successor of `pred` within
* the same basic block.
*/
predicate intraBBSucc(Node pred, Node succ) {
succ = nodeGetASuccessor(pred, _) and
not startsBB(succ)
}

/**
* Holds if `bbStart` is the first node in a basic block and `cfn` is the
* `i`th node in the same basic block.
*/
private predicate bbIndex(Node bbStart, Node cfn, int i) =
shortestDistances(startsBB/1, intraBBSucc/2)(bbStart, cfn, i)
Comment on lines +258 to +259
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As I mentioned the other day, I don't think shortestDistances is optimal. IIRC it can be beat with a QL recursion, but it's something that'll need fresh measurements on some large examples. I expect the C++ implementation to be in the lead in terms of performance on this particular predicate, and C++ uses a straightforward QL recursion.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since that requires some benchmarking I'd prefer to leave that for a future PR.


/** Holds if `bb` is an entry basic block. */
private predicate entryBB(BasicBlock bb) { nodeIsEntry(bb.getFirstNode()) }
private predicate entryBB(BasicBlock bb) { nodeIsDominanceEntry(bb.getFirstNode()) }
}
33 changes: 16 additions & 17 deletions shared/controlflow/codeql/controlflow/Cfg.qll
Original file line number Diff line number Diff line change
Expand Up @@ -83,14 +83,14 @@ signature module InputSig<LocationSig Location> {
* basic block.
*/
bindingset[node]
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is the bindingset here appropriate? My thinking was that we don't need IDs of all nodes, only the few that end up as the first in a basic block prior to a join block.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think the actual equivalenceRelation based hacks will be able to benefit, so we can remove it.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think, no. I don't think there's any way to implement a numbering scheme that can make use of a pre-bound node. So you're only risking inlining and recomputation.

Suggested change
bindingset[node]

default predicate idOfAstNode(AstNode node, int id) { none() }
int idOfAstNode(AstNode node);

/**
paldepind marked this conversation as resolved.
Show resolved Hide resolved
* Gets an `id` of `scope`. This is used to order the predecessors of a join
* basic block.
*/
bindingset[scope]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
bindingset[scope]

default predicate idOfCfgScope(CfgScope scope, int id) { none() }
int idOfCfgScope(CfgScope scope);
}

/** Provides input needed for CFG splitting. */
Expand Down Expand Up @@ -1537,7 +1537,7 @@ module MakeWithSplitting<

private class NodeAlias = Node;

private module BasicBlockInputSig implements BB::InputSig {
private module BasicBlockInputSig implements BB::InputSig<Location> {
class SuccessorType = Input::SuccessorType;

predicate successorTypeIsCondition = Input::successorTypeIsCondition/1;
Fixed Show fixed Hide fixed
Expand All @@ -1546,30 +1546,26 @@ module MakeWithSplitting<

Node nodeGetASuccessor(Node node, SuccessorType t) { result = node.getASuccessor(t) }
Fixed Show fixed Hide fixed

predicate nodeIsEntry(Node node) { node instanceof EntryNode }
predicate nodeIsDominanceEntry(Node node) { node instanceof EntryNode }

predicate nodeIsExit(Node node) { node.(AnnotatedExitNode).isNormal() }
predicate nodeIsPostDominanceExit(Node node) { node.(AnnotatedExitNode).isNormal() }
}

private module BasicBlockImpl = BB::Make<BasicBlockInputSig>;
private module BasicBlockImpl = BB::Make<Location, BasicBlockInputSig>;

/**
* A basic block, that is, a maximal straight-line sequence of control flow nodes
* without branches or joins.
*/
final class BasicBlock extends BasicBlockImpl::BasicBlock {
// We extend `BasicBlockImpl::BasicBlock` to add the `getScope` and
// `getLocation`.
// We extend `BasicBlockImpl::BasicBlock` to add the `getScope`.
/** Gets the scope of this basic block. */
CfgScope getScope() {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thoughts on putting Node.getScope through the BasicBlock input, such that getScope could be added to BasicBlock directly in its definition inside shared/controlflow/codeql/controlflow/BasicBlock.qll? That way this could just be an alias rather than having to copy all the member predicates.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would definitely save some boiler plate, but could make the BasicBlock module less flexible for instantiations that don't want a getScope.

if this instanceof EntryBasicBlock
then result = this.getFirstNode().getScope()
else result = this.getAPredecessor().getScope()
}

/** Gets the location of this basic block. */
Location getLocation() { result = this.getFirstNode().getLocation() }

/** Gets an immediate successor of this basic block, if any. */
BasicBlock getASuccessor() { result = super.getASuccessor() }

Expand Down Expand Up @@ -1678,10 +1674,13 @@ module MakeWithSplitting<
}

private module JoinBlockPredecessors {
int getId(JoinPredecessorBasicBlock jbp) {
idOfAstNode(jbp.getFirstNode().(AstCfgNode).getAstNode(), result)
predicate hasIdAndKind(JoinPredecessorBasicBlock jbp, int id, int kind) {
id = idOfCfgScope(jbp.(EntryBasicBlock).getScope()) and
kind = 0
or
idOfCfgScope(jbp.(EntryBasicBlock).getScope(), result)
not jbp instanceof EntryBasicBlock and
id = idOfAstNode(jbp.getFirstNode().(AstCfgNode).getAstNode()) and
kind = 1
}

string getSplitString(JoinPredecessorBasicBlock jbp) {
Expand All @@ -1699,10 +1698,10 @@ module MakeWithSplitting<
cached
JoinPredecessorBasicBlock getJoinBlockPredecessor(JoinBasicBlock jb, int i) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should be moved into the Cached module.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Wouldn't that require moving JoinPredecessorBasicBlock and JoinBasicBlock out of the BasicBlocks module as well? Isn't it nice to keep them encapsulated inside the BasicBlock module?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It should be possible to reference them with the BasicBlocks:: prefix?

result =
rank[i + 1](JoinPredecessorBasicBlock jbp |
jbp = jb.getAPredecessor()
rank[i + 1](JoinPredecessorBasicBlock jbp, int id, int kind |
jbp = jb.getAPredecessor() and JoinBlockPredecessors::hasIdAndKind(jbp, id, kind)
|
jbp order by JoinBlockPredecessors::getId(jbp), JoinBlockPredecessors::getSplitString(jbp)
jbp order by id, kind, JoinBlockPredecessors::getSplitString(jbp)
)
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -106,9 +106,9 @@ module CfgInput implements InputSig<Location> {
result = n.(FuncDeclElement).getAst()
}

predicate idOfAstNode(AstNode node, int id) { idOf(projectToAst(node), id) }
int idOfAstNode(AstNode node) { idOf(projectToAst(node), result) }

predicate idOfCfgScope(CfgScope node, int id) { idOf(node, id) }
int idOfCfgScope(CfgScope node) { idOf(node, result) }
}

private module CfgSplittingInput implements SplittingInputSig<Location, CfgInput> {
Expand Down
Loading