## Monday, April 6, 2009

### F# - Converting sequences to lists on the fly

There was a question on StackOverflow about chunking sequences to arrays.  The poster wanted to have the input put into three-element arrays, so something like this:

```> seq { for i in 1..10000 -> i} ;;
val it : seq<int> = seq [1; 2; 3; 4; ...]```

would become:

```> seq { for i in 1..10000 -> i} |> Seq.in_groups_of_n 3;;
val it : seq<int list>
= seq [[1; 2; 3]; [4; 5; 6]; [7; 8; 9]; [10; 11; 12]; ...]
> ```

(I usually prefer to have my data in lists rather than arrays, so this isn't quite the answer they were looking for, but it's a trivial conversion from lists to arrays.)

My first solution was something more like iter:

```  let in_lists_of_length_n n f (ie : seq<'a>) =
use e = ie.GetEnumerator()
let rec next_with_acc acc =
match e.MoveNext(), acc with
| true, a when List.length a + 1 = n ->
f(List.rev (e.Current :: a))
next_with_acc []
| true, _ -> next_with_acc (e.Current :: acc)
| false, _ ->
f(List.rev acc)
()
next_with_acc [] ```
` `
That's OK, but it's pretty limited.  If what you're doing fits into something like iter, it's good enough.

But what about just transforming the sequence into a new sequence?  That seems like a more general solution.  So:

```module Seq =
let in_groups_of_n n (s: seq<'a>) =
let rec in_groups_of_n_with_acc n acc (ie: IEnumerator<'a>) =
seq {
match ie.MoveNext(), acc with
| true, a when List.length a + 1 = n ->
yield List.rev (ie.Current :: a)
yield! in_groups_of_n_with_acc n [] ie
| true, a -> yield! in_groups_of_n_with_acc n (ie.Current :: a) ie
| false, a when List.length a > 0 -> yield List.rev acc
| false, a -> ()
}
seq {
yield! in_groups_of_n_with_acc n [] (s.GetEnumerator())
}
```

That's pretty good - it gets us to the point where we're returning a real sequence, so it's much more useful than the iter-style solution.

But why limit ourselves to lists-of-length-n?  Let's turn the accumulator into a function, so you can get any groups you want:

```module Seq =
let grouped_by f (s: seq<'a>)=
let rec grouped_by_with_acc (f: 'a -> 'a list -> 'a list option * 'a list) acc (ie: IEnumerator<'a>) =
seq {
if ie.MoveNext()
then
let nextValue, leftovers = f ie.Current acc
if nextValue.IsSome then yield nextValue.Value
yield! grouped_by_with_acc f leftovers ie
else
if not acc.IsEmpty then yield acc
}
seq {
yield! grouped_by_with_acc f [] (s.GetEnumerator())
}

```
This takes a function that accepts:
• The next item in the sequence
• Leftovers from the last run
And returns a tuple:
• An optional value to yield
• Leftovers
Here's how its used:

```module TestGroupedBy =
let GroupsOfN n newValue acc =
let newList = newValue :: acc

// If we have the right length, return
// a Some as the first value.  That'll
// be yielded by the sequence.
if List.length acc = n - 1
then Some (List.rev newList), []
// If we don't have the right length,
// use None (so nothing will be yielded)
else None, newList

// Note that we're going to pass a curried function - (GroupsOfN 3) is a function
// taking two arguments, suitable for passing to grouped_by
let resultWithInts = seq { for i in 1..100 -> i} |> Seq.grouped_by (GroupsOfN 3)

printfn "int result %A" resultWithInts

let resultWithChars = seq { for i in ['a'; 'b'; 'c'; 'd'; 'e'; 'f'] -> i} |> Seq.grouped_by (GroupsOfN 2)

printfn "char result %A" resultWithChars
```

The results are:

```int result seq [[1; 2; 3]; [4; 5; 6]; [7; 8; 9]; [10; 11; 12]; ...]
char result seq [['a'; 'b']; ['c'; 'd']; ['e'; 'f']]
```

Let's use this with a more interesting grouping function. Say we want the colors of all the single cows in a list:

```module TestGroupedByWithMatching =
let Cows newValue acc =
let newList = newValue :: acc
match newList with
| "cow" :: color :: "one" :: t -> Some [color], []
| a :: b :: t -> None, [a; b]
| l -> None, newList

let someCows = seq { for i in ["one"; "red"; "cow"; "and"; "one"; "white"; "cow"; "and"; "three"; "green"; "cows"] -> i} |> Seq.grouped_by Cows
printfn "some cows are %A" someCows

// This prints:
// some cows are seq [["red"]; ["white"]; ["cows"; "green"]]
// >

```
So there's a problem - at the end, we're yielding the accumulator.  That's what we wanted in the previous examples; we were looking for groups of three, plus the last group of 0, 1, or 2 items.  Here, though, if we don't get a match we don't want to see the leftovers.  Let's create a new method that takes another function that specifies what to do with the accumulator:

```  let grouped_by_with_leftover_processing f (f2: 'a list -> list<'a> option) (s: seq<'a>)=
let rec grouped_by_with_acc (f: 'a -> 'a list -> 'a list option * 'a list) acc (ie: IEnumerator<'a>) =
seq {
if ie.MoveNext()
then
let nextValue, leftovers = f ie.Current acc
if nextValue.IsSome then yield nextValue.Value
yield! grouped_by_with_acc f leftovers ie
else
let rems = f2 acc
if rems.IsSome then yield rems.Value
}
seq {
yield! grouped_by_with_acc f [] (s.GetEnumerator())
}
```

Now we can specify a function to use for the leftovers.  This time we should get the right cows.

```module TestGroupedByWithMatching =
let Cows newValue acc =
let newList = newValue :: acc
match newList with
| "cow" :: color :: "one" :: t -> Some [color], []
| a :: b :: t -> None, [a; b]
| l -> None, newList

let IgnoreLeftovers f = None

let someCows = seq { for i in ["one"; "red"; "cow"; "and"; "one"; "white"; "cow"; "and"; "three"; "green"; "cows"] -> i}
let cowColors = someCows |> Seq.grouped_by_with_leftover_processing Cows IgnoreLeftovers
printfn "cowColors are %A" cowColors

// This prints:
// cowColors are seq [["red"]; ["white"]]

```
But we've got two separate functions (grouped_by_with_leftover_processing and grouped_by) that duplicate most of their code.  grouped_by is just grouped_by_with_leftover_processing so the final code is:

```module Seq =
let grouped_by_with_leftover_processing f (f2: 'a list -> list<'a> option) (s: seq<'a>)=
let rec grouped_by_with_acc (f: 'a -> 'a list -> 'a list option * 'a list) acc (ie: IEnumerator<'a>) =
seq {
if ie.MoveNext()
then
let nextValue, leftovers = f ie.Current acc
if nextValue.IsSome then yield nextValue.Value
yield! grouped_by_with_acc f leftovers ie
else
let rems = f2 acc
if rems.IsSome then yield rems.Value
}
seq {
yield! grouped_by_with_acc f [] (s.GetEnumerator())
}

let YieldReversedLeftovers (f: 'a list) =
if f.IsEmpty
then None
else Some (List.rev f)

let grouped_by f s =
grouped_by_with_leftover_processing f YieldReversedLeftovers s

let group_by_length_n n s =
let grouping_function newValue acc =
let newList = newValue :: acc
// If we have the right length, return
// a Some as the first value.  That'll
// be yielded by the sequence.
if List.length acc = n - 1
then Some (List.rev newList), []
// If we don't have the right length,
// use None (so nothing will be yielded)
else None, newList
grouped_by grouping_function s
```