DAG - An Immutable Spreadsheet Data Structure
24 Aug 2018In finance, data grids can be defined as a set of input fields and function fields that take other field values as parameters. Spreadsheets are often used to do this, but they have several limitations.
Recently, I've been working on ways of describing calculations, so they can just as easily be viewed in a desktop application, web report or spreadsheet.
One of the components required to do this is a functional calculation graph much like how a spreadsheet works. This blog aims to construct a functional directed acyclic graph (DAG) calculation data structure.
DAG code
We don't have to work very hard to ensure the graph is not circular or keep the cells in topological order. The API can be designed such that it is only possible to add function cells when the parameter cells already exist in the DAG. All tasks can be performed with a single pass of the cells in the order they were added.
The DAG data structure is made immutable by cloning any internal arrays when they need to be changed. Grids can keep the old version of the calculations or compare and switch to the new version when needed.
The DAG is fully type safe by use of an applicative functor builder in constructing function cells.
Cells are evaluated as lazy
Task
so calculations run as parallel as possible.
Calculations are run only once, and results are reused even after further updates to the DAG.
The code can be downloaded here.
type Dag = private {
InputValues : obj array
FunctionInputs : (Set<int> * Set<int>) array
FunctionFunctions : (Dag -> obj) array //obj is Task<'a>
FunctionValues : Lazy<obj> array //obj is Task<'a>
}
module Dag =
let private append a v =
let mutable a = a
Array.Resize(&a, Array.length a + 1)
a.[Array.length a - 1] <- v
a
type Input = private | CellInput
type Function = private | CellFunction
type Cell<'a,'b> = private | Cell of int
let empty = {
InputValues = [||]
FunctionInputs = [||]
FunctionFunctions = [||]
FunctionValues = [||]
}
let addInput (v:'a) (d:Dag) : Dag * Cell<'a, Input> =
{ d with
InputValues = box v |> append d.InputValues
}, Cell d.InputValues.Length
let getValue (Cell i:Cell<'a,Input>) (d:Dag) : 'a =
downcast d.InputValues.[i]
let setInput (Cell i:Cell<'a,Input>) (a:'a) (d:Dag) : Dag =
if downcast d.InputValues.[i] = a then d
else
let dirtyCalcs =
Seq.fold (fun (j,s) (inputs,calcInputs) ->
if Set.contains i inputs ||
Set.intersect s calcInputs |> Set.isEmpty |> not then
j+1, Set.add j s
else
j+1, s
) (0,Set.empty) d.FunctionInputs
|> snd
let inputValues = Array.copy d.InputValues
inputValues.[i] <- box a
if Set.isEmpty dirtyCalcs then { d with InputValues = inputValues }
else
let functionValues = Array.copy d.FunctionValues
let dag = {
d with
InputValues = inputValues
FunctionValues = functionValues
}
Set.iter (fun i ->
functionValues.[i] <- lazy d.FunctionFunctions.[i] dag
) dirtyCalcs
dag
let getValueTask (Cell i:Cell<'a,Function>) (d:Dag) : Task<'a> =
downcast d.FunctionValues.[i].Value
let changed (Cell i:Cell<'a,'t>) (before:Dag) (after:Dag) : bool =
if typeof<'t> = typeof<Function> then
before.FunctionValues.[i] <> after.FunctionValues.[i]
else
downcast before.InputValues.[i] <> downcast after.InputValues.[i]
type 'a Builder = private {
Dag : Dag
Inputs : Set<int> * Set<int>
Function : Dag -> Task<'a>
}
let buildFunction (d:Dag) f = {
Dag = d
Inputs = Set.empty, Set.empty
Function = fun _ -> Task.FromResult f
}
let applyCell (Cell i:Cell<'a,'t>) {Dag=dag;Inputs=inI,inC;Function=bFn} =
let inline taskMap f (t:Task<_>) =
t.ContinueWith(fun (r:Task<_>) -> f r.Result)
let isFunctionCell = typeof<'t> = typeof<Function>
{
Dag = dag
Inputs =
if isFunctionCell then inI, Set.add i inC
else Set.add i inI, inC
Function =
if isFunctionCell then
fun d ->
let fTask = bFn d
( downcast d.FunctionValues.[i].Value
|> taskMap (fun a -> taskMap (fun f -> f a) fTask)
).Unwrap()
else
fun d ->
bFn d |> taskMap (fun f ->
downcast d.InputValues.[i] |> f )
}
let addFunction ({Dag=dag;Inputs=ips;Function=fn}:'a Builder) =
let calc = fn >> box
let d = {
dag with
FunctionInputs = append dag.FunctionInputs ips
FunctionFunctions = append dag.FunctionFunctions calc
FunctionValues = append dag.FunctionValues null
}
d.FunctionValues.[d.FunctionValues.Length-1] <- lazy calc d
let cell : Cell<'a,Function> = Cell dag.FunctionValues.Length
d, cell
Testing
The following tests demonstrate use of the DAG.
let tests =
testList "dag tests" [
testAsync "one cell" {
let dag, cell1 = Dag.addInput 7 Dag.empty
Expect.equal 7 (Dag.getValue cell1 dag) "one cell"
}
testAsync "two cell" {
let dag, cell1 = Dag.addInput 8 Dag.empty
let dag, cell2 = Dag.addInput 9 dag
Expect.equal 8 (Dag.getValue cell1 dag) "first 8"
Expect.equal 9 (Dag.getValue cell2 dag) "second 9"
}
testAsync "one function" {
let dag, cell1 = Dag.addInput 42 Dag.empty
let dag, cell2 =
Dag.buildFunction dag (fun x -> x * 10)
|> Dag.applyCell cell1
|> Dag.addFunction
let! result = Dag.getValueTask cell2 dag |> Async.AwaitTask
Expect.equal 420 result "42 * 10 = 420"
}
testAsync "one function with set" {
let dag, cell1 = Dag.addInput 13 Dag.empty
let dag, cell2 =
Dag.buildFunction dag (fun x -> x * 10)
|> Dag.applyCell cell1
|> Dag.addFunction
let dag = Dag.setInput cell1 43 dag
let! result = Dag.getValueTask cell2 dag |> Async.AwaitTask
Expect.equal 430 result "43 * 10 = 430"
}
testAsync "one function with set twice" {
let dag, cell1 = Dag.addInput 15 Dag.empty
let dag, cell2 =
Dag.buildFunction dag (fun x -> x * 10)
|> Dag.applyCell cell1
|> Dag.addFunction
let dag = Dag.setInput cell1 43 dag
let dag = Dag.setInput cell1 44 dag
let! result = Dag.getValueTask cell2 dag |> Async.AwaitTask
Expect.equal 440 result "44 * 10 = 440"
}
testAsync "not changed input" {
let dagBefore, cell1 = Dag.addInput 42 Dag.empty
let dagAfter,_ = Dag.addInput 45 dagBefore
Expect.isFalse (Dag.changed cell1 dagBefore dagAfter) "no change"
}
testAsync "changed input" {
let dagBefore, cell1 = Dag.addInput 42 Dag.empty
let dagAfter = Dag.setInput cell1 45 dagBefore
Expect.isTrue (Dag.changed cell1 dagBefore dagAfter) "changed"
}
testAsync "not changed function" {
let dag, cell1 = Dag.addInput 42 Dag.empty
let dagBefore, cell2 =
Dag.buildFunction dag (fun x -> x * 10)
|> Dag.applyCell cell1
|> Dag.addFunction
let dagAfter,_ = Dag.addInput 45 dagBefore
Expect.isFalse (Dag.changed cell2 dagBefore dagAfter) "no change"
}
testAsync "changed function" {
let dag, cell1 = Dag.addInput 17 Dag.empty
let dagBefore, cell2 =
Dag.buildFunction dag (fun x -> x * 10)
|> Dag.applyCell cell1
|> Dag.addFunction
let dagAfter = Dag.setInput cell1 45 dagBefore
Expect.isTrue (Dag.changed cell2 dagBefore dagAfter) "changed"
let! result = Dag.getValueTask cell2 dagAfter |> Async.AwaitTask
Expect.equal 450 result "45 * 10 = 450"
let! result = Dag.getValueTask cell2 dagBefore |> Async.AwaitTask
Expect.equal 170 result "17 * 10 = 170"
}
testAsync "chained functions" {
let dag, cell1 = Dag.addInput 18 Dag.empty
let dag, cell2 =
Dag.buildFunction dag (fun x -> x * 10)
|> Dag.applyCell cell1
|> Dag.addFunction
let dagBefore, cell3 =
Dag.buildFunction dag (fun x -> x + 1)
|> Dag.applyCell cell2
|> Dag.addFunction
let dagAfter = Dag.setInput cell1 23 dagBefore
let! result = Dag.getValueTask cell3 dagAfter |> Async.AwaitTask
Expect.equal 231 result "231"
let! result = Dag.getValueTask cell3 dagBefore |> Async.AwaitTask
Expect.equal 181 result "181"
}
testAsync "two function" {
let dag, cell1 = Dag.addInput "z" Dag.empty
let dag, cell2 = Dag.addInput 7 dag
let dag, cell3 =
Dag.buildFunction dag (fun s (n:int) -> s + string n)
|> Dag.applyCell cell1
|> Dag.applyCell cell2
|> Dag.addFunction
let! result = Dag.getValueTask cell3 dag |> Async.AwaitTask
Expect.equal "z7" result "z7"
}
testAsync "three function with set" {
let dag, cell1 = Dag.addInput "f" Dag.empty
let dag, cell2 = Dag.addInput 8 dag
let dag, cell3 = Dag.addInput 1.5 dag
let dag, cell4 =
Dag.buildFunction dag
(fun s (n:int) (f:float) -> s + string n + "-" + string f)
|> Dag.applyCell cell1
|> Dag.applyCell cell2
|> Dag.applyCell cell3
|> Dag.addFunction
let! result = Dag.getValueTask cell4 dag |> Async.AwaitTask
Expect.equal "f8-1.5" result "f8-1.5"
let dag = Dag.setInput cell1 "w" dag
let! result = Dag.getValueTask cell4 dag |> Async.AwaitTask
Expect.equal "w8-1.5" result "w8-1.5"
}
testAsync "chained functions multi" {
let dag, cell1 = Dag.addInput "a" Dag.empty
let dag, cell2 = Dag.addInput 1 dag
let dag, cell3 =
Dag.buildFunction dag (fun s (n:int) -> "x:" + s + string n)
|> Dag.applyCell cell1
|> Dag.applyCell cell2
|> Dag.addFunction
let dag, cell4 = Dag.addInput "b" dag
let dag, cell5 = Dag.addInput 2 dag
let dag, cell6 =
Dag.buildFunction dag (fun s (n:int) -> "y:" + s + string n)
|> Dag.applyCell cell4
|> Dag.applyCell cell5
|> Dag.addFunction
let dag, cell7 = Dag.addInput "c" dag
let dag, cell8 = Dag.addInput 3 dag
let dag, cell9 =
Dag.buildFunction dag (fun s (n:int) -> "z:" + s + string n)
|> Dag.applyCell cell7
|> Dag.applyCell cell8
|> Dag.addFunction
let dagBefore, cell10 =
Dag.buildFunction dag
(fun s1 s2 s3 -> String.concat "|" [s1;s2;s3])
|> Dag.applyCell cell3
|> Dag.applyCell cell6
|> Dag.applyCell cell9
|> Dag.addFunction
let dagAfter = Dag.setInput cell5 4 dagBefore
let! result = Dag.getValueTask cell10 dagAfter |> Async.AwaitTask
Expect.equal "x:a1|y:b4|z:c3" result "x:a1|y:b4|z:c3"
let! result = Dag.getValueTask cell10 dagBefore |> Async.AwaitTask
Expect.equal "x:a1|y:b2|z:c3" result "x:a1|y:b2|z:c3"
}
]
Conclusion
This has been a very successful experiment. The DAG has some nice features while keeping type safety.
The way immutability has been implemented means it is probably not best suited to fast realtime updates or very fine-grained calculations. For more coarse-grained calculations like grids of dependent fields, where each cell represents a column of values and summaries, it could be ideal.
private {InputValues: obj array;
FunctionInputs: (Set<int> * Set<int>) array;
FunctionFunctions: (Dag -> obj) array;
FunctionValues: Lazy<obj> array;}
module Set
from Microsoft.FSharp.Collections
--------------------
type Set<'T (requires comparison)> =
interface IReadOnlyCollection<'T>
interface IComparable
interface IEnumerable
interface IEnumerable<'T>
interface ICollection<'T>
new : elements:seq<'T> -> Set<'T>
member Add : value:'T -> Set<'T>
member Contains : value:'T -> bool
override Equals : obj -> bool
member IsProperSubsetOf : otherSet:Set<'T> -> bool
...
--------------------
new : elements:seq<'T> -> Set<'T>
val int : value:'T -> int (requires member op_Explicit)
--------------------
type int = int32
--------------------
type int<'Measure> = int
active recognizer Lazy: Lazy<'T> -> 'T
--------------------
type Lazy<'T> =
new : unit -> Lazy<'T> + 5 overloads
member IsValueCreated : bool
member ToString : unit -> string
member Value : 'T
--------------------
type Lazy<'T,'TMetadata> =
inherit Lazy<'T>
new : metadata:'TMetadata -> Lazy<'T, 'TMetadata> + 5 overloads
member Metadata : 'TMetadata
--------------------
Lazy() : Lazy<'T>
Lazy(valueFactory: Func<'T>) : Lazy<'T>
Lazy(isThreadSafe: bool) : Lazy<'T>
Lazy(mode: Threading.LazyThreadSafetyMode) : Lazy<'T>
Lazy(valueFactory: Func<'T>, isThreadSafe: bool) : Lazy<'T>
Lazy(valueFactory: Func<'T>, mode: Threading.LazyThreadSafetyMode) : Lazy<'T>
--------------------
Lazy(metadata: 'TMetadata) : Lazy<'T,'TMetadata>
Lazy(valueFactory: Func<'T>, metadata: 'TMetadata) : Lazy<'T,'TMetadata>
Lazy(metadata: 'TMetadata, isThreadSafe: bool) : Lazy<'T,'TMetadata>
Lazy(metadata: 'TMetadata, mode: Threading.LazyThreadSafetyMode) : Lazy<'T,'TMetadata>
Lazy(valueFactory: Func<'T>, metadata: 'TMetadata, isThreadSafe: bool) : Lazy<'T,'TMetadata>
Lazy(valueFactory: Func<'T>, metadata: 'TMetadata, mode: Threading.LazyThreadSafetyMode) : Lazy<'T,'TMetadata>
member Clone : unit -> obj
member CopyTo : array:Array * index:int -> unit + 1 overload
member GetEnumerator : unit -> IEnumerator
member GetLength : dimension:int -> int
member GetLongLength : dimension:int -> int64
member GetLowerBound : dimension:int -> int
member GetUpperBound : dimension:int -> int
member GetValue : [<ParamArray>] indices:int[] -> obj + 7 overloads
member Initialize : unit -> unit
member IsFixedSize : bool
...
union case Dag.Cell.Cell: int -> Dag.Cell<'a,'b>
--------------------
type Cell<'a,'b> = private | Cell of int
from Microsoft.FSharp.Collections
type Task =
new : action:Action -> Task + 7 overloads
member AsyncState : obj
member ConfigureAwait : continueOnCapturedContext:bool -> ConfiguredTaskAwaitable
member ContinueWith : continuationAction:Action<Task> -> Task + 19 overloads
member CreationOptions : TaskCreationOptions
member Dispose : unit -> unit
member Exception : AggregateException
member GetAwaiter : unit -> TaskAwaiter
member Id : int
member IsCanceled : bool
...
--------------------
type Task<'TResult> =
inherit Task
new : function:Func<'TResult> -> Task<'TResult> + 7 overloads
member ConfigureAwait : continueOnCapturedContext:bool -> ConfiguredTaskAwaitable<'TResult>
member ContinueWith : continuationAction:Action<Task<'TResult>> -> Task + 19 overloads
member GetAwaiter : unit -> TaskAwaiter<'TResult>
member Result : 'TResult
static member Factory : TaskFactory<'TResult>
--------------------
Task(action: Action) : Task
Task(action: Action, cancellationToken: Threading.CancellationToken) : Task
Task(action: Action, creationOptions: TaskCreationOptions) : Task
Task(action: Action<obj>, state: obj) : Task
Task(action: Action, cancellationToken: Threading.CancellationToken, creationOptions: TaskCreationOptions) : Task
Task(action: Action<obj>, state: obj, cancellationToken: Threading.CancellationToken) : Task
Task(action: Action<obj>, state: obj, creationOptions: TaskCreationOptions) : Task
Task(action: Action<obj>, state: obj, cancellationToken: Threading.CancellationToken, creationOptions: TaskCreationOptions) : Task
--------------------
Task(function: Func<'TResult>) : Task<'TResult>
Task(function: Func<'TResult>, cancellationToken: Threading.CancellationToken) : Task<'TResult>
Task(function: Func<'TResult>, creationOptions: TaskCreationOptions) : Task<'TResult>
Task(function: Func<obj,'TResult>, state: obj) : Task<'TResult>
Task(function: Func<'TResult>, cancellationToken: Threading.CancellationToken, creationOptions: TaskCreationOptions) : Task<'TResult>
Task(function: Func<obj,'TResult>, state: obj, cancellationToken: Threading.CancellationToken) : Task<'TResult>
Task(function: Func<obj,'TResult>, state: obj, creationOptions: TaskCreationOptions) : Task<'TResult>
Task(function: Func<obj,'TResult>, state: obj, cancellationToken: Threading.CancellationToken, creationOptions: TaskCreationOptions) : Task<'TResult>
private {Dag: Dag;
Inputs: Set<int> * Set<int>;
Function: Dag -> Task<'a>;}
Dag.Builder.Dag: Dag
--------------------
type Dag =
private {InputValues: obj array;
FunctionInputs: (Set<int> * Set<int>) array;
FunctionFunctions: (Dag -> obj) array;
FunctionValues: Lazy<obj> array;}
Dag.Builder.Function: Dag -> Task<'a>
--------------------
type Function = private | CellFunction
(+0 other overloads)
Task.ContinueWith(continuationAction: Action<Task>) : Task
(+0 other overloads)
Task.ContinueWith<'TNewResult>(continuationFunction: Func<Task<'c>,'TNewResult>) : Task<'TNewResult>
(+0 other overloads)
Task.ContinueWith(continuationAction: Action<Task<'c>>) : Task
(+0 other overloads)
Task.ContinueWith<'TResult>(continuationFunction: Func<Task,obj,'TResult>, state: obj) : Task<'TResult>
(+0 other overloads)
Task.ContinueWith<'TResult>(continuationFunction: Func<Task,'TResult>, continuationOptions: TaskContinuationOptions) : Task<'TResult>
(+0 other overloads)
Task.ContinueWith<'TResult>(continuationFunction: Func<Task,'TResult>, scheduler: TaskScheduler) : Task<'TResult>
(+0 other overloads)
Task.ContinueWith<'TResult>(continuationFunction: Func<Task,'TResult>, cancellationToken: Threading.CancellationToken) : Task<'TResult>
(+0 other overloads)
Task.ContinueWith(continuationAction: Action<Task,obj>, state: obj) : Task
(+0 other overloads)
Task.ContinueWith(continuationAction: Action<Task>, continuationOptions: TaskContinuationOptions) : Task
(+0 other overloads)
module Dag
from DagBlog
--------------------
type Dag =
private {InputValues: obj array;
FunctionInputs: (Set<int> * Set<int>) array;
FunctionFunctions: (Dag -> obj) array;
FunctionValues: Lazy<obj> array;}
type Async =
static member AsBeginEnd : computation:('Arg -> Async<'T>) -> ('Arg * AsyncCallback * obj -> IAsyncResult) * (IAsyncResult -> 'T) * (IAsyncResult -> unit)
static member AwaitEvent : event:IEvent<'Del,'T> * ?cancelAction:(unit -> unit) -> Async<'T> (requires delegate and 'Del :> Delegate)
static member AwaitIAsyncResult : iar:IAsyncResult * ?millisecondsTimeout:int -> Async<bool>
static member AwaitTask : task:Task -> Async<unit>
static member AwaitTask : task:Task<'T> -> Async<'T>
static member AwaitWaitHandle : waitHandle:WaitHandle * ?millisecondsTimeout:int -> Async<bool>
static member CancelDefaultToken : unit -> unit
static member Catch : computation:Async<'T> -> Async<Choice<'T,exn>>
static member Choice : computations:seq<Async<'T option>> -> Async<'T option>
static member FromBeginEnd : beginAction:(AsyncCallback * obj -> IAsyncResult) * endAction:(IAsyncResult -> 'T) * ?cancelAction:(unit -> unit) -> Async<'T>
...
--------------------
type Async<'T> =
static member Async.AwaitTask : task:Task<'T> -> Async<'T>
val string : value:'T -> string
--------------------
type string = String
val float : value:'T -> float (requires member op_Explicit)
--------------------
type float = Double
--------------------
type float<'Measure> = float
type String =
new : value:char -> string + 7 overloads
member Chars : int -> char
member Clone : unit -> obj
member CompareTo : value:obj -> int + 1 overload
member Contains : value:string -> bool
member CopyTo : sourceIndex:int * destination:char[] * destinationIndex:int * count:int -> unit
member EndsWith : value:string -> bool + 2 overloads
member Equals : obj:obj -> bool + 2 overloads
member GetEnumerator : unit -> CharEnumerator
member GetHashCode : unit -> int
...
--------------------
String(value: nativeptr<char>) : String
String(value: nativeptr<sbyte>) : String
String(value: char []) : String
String(c: char, count: int) : String
String(value: nativeptr<char>, startIndex: int, length: int) : String
String(value: nativeptr<sbyte>, startIndex: int, length: int) : String
String(value: char [], startIndex: int, length: int) : String
String(value: nativeptr<sbyte>, startIndex: int, length: int, enc: Text.Encoding) : String