Explaining queries

If it is unclear how a given query will perform, clients can retrieve a query's execution plan from the AQL query optimizer without actually executing the query. Getting the query execution plan from the optimizer is called explaining.

An explain will throw an error if the given query is syntactically invalid. Otherwise, it will return the execution plan and some information about what optimizations could be applied to the query. The query will not be executed.

Explaining a query can be achieved by calling the HTTP REST API. A query can also be explained from the ArangoShell using ArangoStatement's explain method.

By default, the query optimizer will return what it considers to be the optimal plan. The optimal plan will be returned in the plan attribute of the result. If explain is called with option allPlans set to true, all plans will be returned in the plans attribute instead. The result object will also contain an attribute warnings, which is an array of warnings that occurred during optimization or execution plan creation.

Each plan in the result is an object with the following attributes:

nodes: the array of execution nodes of the plan. The list of available node types can be found here
estimatedCost: the total estimated cost for the plan. If there are multiple plans, the optimizer will choose the plan with the lowest total cost.
collections: an array of collections used in the query
rules: an array of rules the optimizer applied. The list of rules can be found here
variables: array of variables used in the query (note: this may contain internal variables created by the optimizer)

Here is an example for retrieving the execution plan of a simple query:

arangosh> var stmt = db._createStatement(
........> "FOR user IN _users RETURN user");
arangosh> stmt.explain();
{ 
  "plan" : { 
    "nodes" : [ 
      { 
        "type" : "SingletonNode", 
        "dependencies" : [ ], 
        "id" : 1, 
        "estimatedCost" : 1, 
        "estimatedNrItems" : 1 
      }, 
      { 
        "type" : "EnumerateCollectionNode", 
        "dependencies" : [ 
          1 
        ], 
        "id" : 2, 
        "estimatedCost" : 3, 
        "estimatedNrItems" : 1, 
        "database" : "_system", 
        "collection" : "_users", 
        "random" : false, 
        "satellite" : false, 
        "outVariable" : { 
          "id" : 0, 
          "name" : "user" 
        } 
      }, 
      { 
        "type" : "ReturnNode", 
        "dependencies" : [ 
          2 
        ], 
        "id" : 3, 
        "estimatedCost" : 4, 
        "estimatedNrItems" : 1, 
        "inVariable" : { 
          "id" : 0, 
          "name" : "user" 
        } 
      } 
    ], 
    "rules" : [ ], 
    "collections" : [ 
      { 
        "name" : "_users", 
        "type" : "read" 
      } 
    ], 
    "variables" : [ 
      { 
        "id" : 0, 
        "name" : "user" 
      } 
    ], 
    "estimatedCost" : 4, 
    "estimatedNrItems" : 1, 
    "initialize" : true 
  }, 
  "warnings" : [ ], 
  "stats" : { 
    "rulesExecuted" : 29, 
    "rulesSkipped" : 0, 
    "plansCreated" : 1 
  }, 
  "cacheable" : true 
}

arangosh> var stmt = db._createStatement(
........> "FOR user IN _users RETURN user");
arangosh> stmt.explain();

show execution results

As the output of explain is very detailed, it is recommended to use some scripting to make the output less verbose:

arangosh> var formatPlan = function (plan) {
........>    return { estimatedCost: plan.estimatedCost,
........>        nodes: plan.nodes.map(function(node) {
........> return node.type; }) }; };
arangosh> formatPlan(stmt.explain().plan);
{ 
  "estimatedCost" : 4, 
  "nodes" : [ 
    "SingletonNode", 
    "EnumerateCollectionNode", 
    "ReturnNode" 
  ] 
}

arangosh> var formatPlan = function (plan) {
........>    return { estimatedCost: plan.estimatedCost,
........>        nodes: plan.nodes.map(function(node) {
........> return node.type; }) }; };
arangosh> formatPlan(stmt.explain().plan);

show execution results

If a query contains bind parameters, they must be added to the statement before explain is called:

arangosh> var stmt = db._createStatement(
........> `FOR doc IN @@collection FILTER doc.user == @user RETURN doc`
........> );
arangosh> stmt.bind({ "@collection" : "_users", "user" : "root" });
arangosh> stmt.explain();
{ 
  "plan" : { 
    "nodes" : [ 
      { 
        "type" : "SingletonNode", 
        "dependencies" : [ ], 
        "id" : 1, 
        "estimatedCost" : 1, 
        "estimatedNrItems" : 1 
      }, 
      { 
        "type" : "IndexNode", 
        "dependencies" : [ 
          1 
        ], 
        "id" : 6, 
        "estimatedCost" : 1.95, 
        "estimatedNrItems" : 1, 
        "database" : "_system", 
        "collection" : "_users", 
        "satellite" : false, 
        "outVariable" : { 
          "id" : 0, 
          "name" : "doc" 
        }, 
        "indexes" : [ 
          { 
            "id" : "9", 
            "type" : "hash", 
            "fields" : [ 
              "user" 
            ], 
            "selectivityEstimate" : 1, 
            "unique" : true, 
            "sparse" : true, 
            "deduplicate" : true 
          } 
        ], 
        "condition" : { 
          "type" : "n-ary or", 
          "subNodes" : [ 
            { 
              "type" : "n-ary and", 
              "subNodes" : [ 
                { 
                  "type" : "compare ==", 
                  "subNodes" : [ 
                    { 
                      "type" : "attribute access", 
                      "name" : "user", 
                      "subNodes" : [ 
                        { 
                          "type" : "reference", 
                          "name" : "doc", 
                          "id" : 0 
                        } 
                      ] 
                    }, 
                    { 
                      "type" : "value", 
                      "value" : "root" 
                    } 
                  ] 
                } 
              ] 
            } 
          ] 
        }, 
        "reverse" : false 
      }, 
      { 
        "type" : "ReturnNode", 
        "dependencies" : [ 
          6 
        ], 
        "id" : 5, 
        "estimatedCost" : 2.95, 
        "estimatedNrItems" : 1, 
        "inVariable" : { 
          "id" : 0, 
          "name" : "doc" 
        } 
      } 
    ], 
    "rules" : [ 
      "use-indexes", 
      "remove-filter-covered-by-index", 
      "remove-unnecessary-calculations-2" 
    ], 
    "collections" : [ 
      { 
        "name" : "_users", 
        "type" : "read" 
      } 
    ], 
    "variables" : [ 
      { 
        "id" : 2, 
        "name" : "1" 
      }, 
      { 
        "id" : 0, 
        "name" : "doc" 
      } 
    ], 
    "estimatedCost" : 2.95, 
    "estimatedNrItems" : 1, 
    "initialize" : true 
  }, 
  "warnings" : [ ], 
  "stats" : { 
    "rulesExecuted" : 29, 
    "rulesSkipped" : 0, 
    "plansCreated" : 1 
  }, 
  "cacheable" : true 
}

arangosh> var stmt = db._createStatement(
........> `FOR doc IN @@collection FILTER doc.user == @user RETURN doc`
........> );
arangosh> stmt.bind({ "@collection" : "_users", "user" : "root" });
arangosh> stmt.explain();

show execution results

In some cases the AQL optimizer creates multiple plans for a single query. By default only the plan with the lowest total estimated cost is kept, and the other plans are discarded. To retrieve all plans the optimizer has generated, explain can be called with the option allPlans set to true.

In the following example, the optimizer has created two plans:

arangosh> var stmt = db._createStatement(
........> "FOR user IN _users FILTER user.user == 'root' RETURN user");
arangosh> stmt.explain({ allPlans: true }).plans.length;
1

To see a slightly more compact version of the plan, the following transformation can be applied:

arangosh> stmt.explain({ allPlans: true }).plans.map(
........> function(plan) { return formatPlan(plan); });
[ 
  { 
    "estimatedCost" : 2.95, 
    "nodes" : [ 
      "SingletonNode", 
      "IndexNode", 
      "ReturnNode" 
    ] 
  } 
]

arangosh> stmt.explain({ allPlans: true }).plans.map(
........> function(plan) { return formatPlan(plan); });

show execution results

explain will also accept the following additional options:

maxPlans: limits the maximum number of plans that are created by the AQL query optimizer
optimizer.rules: an array of to-be-included or to-be-excluded optimizer rules can be put into this attribute, telling the optimizer to include or exclude specific rules. To disable a rule, prefix its name with a -, to enable a rule, prefix it with a +. There is also a pseudo-rule all, which will match all optimizer rules.

The following example disables all optimizer rules but remove-redundant-calculations:

arangosh> stmt.explain({ optimizer: {
........> rules: [ "-all", "+remove-redundant-calculations" ] } });
{ 
  "plan" : { 
    "nodes" : [ 
      { 
        "type" : "SingletonNode", 
        "dependencies" : [ ], 
        "id" : 1, 
        "estimatedCost" : 1, 
        "estimatedNrItems" : 1 
      }, 
      { 
        "type" : "EnumerateCollectionNode", 
        "dependencies" : [ 
          1 
        ], 
        "id" : 2, 
        "estimatedCost" : 3, 
        "estimatedNrItems" : 1, 
        "database" : "_system", 
        "collection" : "_users", 
        "random" : false, 
        "satellite" : false, 
        "outVariable" : { 
          "id" : 0, 
          "name" : "user" 
        } 
      }, 
      { 
        "type" : "CalculationNode", 
        "dependencies" : [ 
          2 
        ], 
        "id" : 3, 
        "estimatedCost" : 4, 
        "estimatedNrItems" : 1, 
        "expression" : { 
          "type" : "compare ==", 
          "subNodes" : [ 
            { 
              "type" : "attribute access", 
              "name" : "user", 
              "subNodes" : [ 
                { 
                  "type" : "reference", 
                  "name" : "user", 
                  "id" : 0 
                } 
              ] 
            }, 
            { 
              "type" : "value", 
              "value" : "root" 
            } 
          ] 
        }, 
        "outVariable" : { 
          "id" : 2, 
          "name" : "1" 
        }, 
        "canThrow" : false, 
        "expressionType" : "simple" 
      }, 
      { 
        "type" : "FilterNode", 
        "dependencies" : [ 
          3 
        ], 
        "id" : 4, 
        "estimatedCost" : 5, 
        "estimatedNrItems" : 1, 
        "inVariable" : { 
          "id" : 2, 
          "name" : "1" 
        } 
      }, 
      { 
        "type" : "ReturnNode", 
        "dependencies" : [ 
          4 
        ], 
        "id" : 5, 
        "estimatedCost" : 6, 
        "estimatedNrItems" : 1, 
        "inVariable" : { 
          "id" : 0, 
          "name" : "user" 
        } 
      } 
    ], 
    "rules" : [ ], 
    "collections" : [ 
      { 
        "name" : "_users", 
        "type" : "read" 
      } 
    ], 
    "variables" : [ 
      { 
        "id" : 2, 
        "name" : "1" 
      }, 
      { 
        "id" : 0, 
        "name" : "user" 
      } 
    ], 
    "estimatedCost" : 6, 
    "estimatedNrItems" : 1, 
    "initialize" : true 
  }, 
  "warnings" : [ ], 
  "stats" : { 
    "rulesExecuted" : 1, 
    "rulesSkipped" : 28, 
    "plansCreated" : 1 
  }, 
  "cacheable" : true 
}

arangosh> stmt.explain({ optimizer: {
........> rules: [ "-all", "+remove-redundant-calculations" ] } });

show execution results

The contents of an execution plan are meant to be machine-readable. To get a human-readable version of a query's execution plan, the following commands can be used:

arangosh> var query = "FOR doc IN mycollection FILTER doc.value > 42 RETURN doc";
arangosh> require("@arangodb/aql/explainer").explain(query, {colors:false});
Query string:
 FOR doc IN mycollection FILTER doc.value > 42 RETURN doc

Execution plan:
 Id   NodeType                  Est.   Comment
  1   SingletonNode                1   * ROOT
  2   EnumerateCollectionNode    302     - FOR doc IN mycollection   /* full collection scan */
  3   CalculationNode            302       - LET #1 = (doc.`value` > 42)   /* simple expression */   /* collections used: doc : mycollection */
  4   FilterNode                 302       - FILTER #1
  5   ReturnNode                 302       - RETURN doc

Indexes used:
 none

Optimization rules applied:
 none

arangosh> var query = "FOR doc IN mycollection FILTER doc.value > 42 RETURN doc";
arangosh> require("@arangodb/aql/explainer").explain(query, {colors:false});

show execution results

The above command prints the query's execution plan in the ArangoShell directly, focusing on the most important information.