> seq { for i in 1..10000 -> i} ;; val it : seq<int> = seq [1; 2; 3; 4; ...]

would become:

> seq { for i in 1..10000 -> i} |> Seq.in_groups_of_n 3;; val it : seq<int list> = seq [[1; 2; 3]; [4; 5; 6]; [7; 8; 9]; [10; 11; 12]; ...] >

(I usually prefer to have my data in lists rather than arrays, so this isn't quite the answer they were looking for, but it's a trivial conversion from lists to arrays.)

My first solution was something more like iter:

let in_lists_of_length_n n f (ie : seq<'a>) = use e = ie.GetEnumerator() let rec next_with_acc acc = match e.MoveNext(), acc with | true, a when List.length a + 1 = n -> f(List.rev (e.Current :: a)) next_with_acc [] | true, _ -> next_with_acc (e.Current :: acc) | false, _ -> f(List.rev acc) () next_with_acc []

That's OK, but it's pretty limited. If what you're doing fits into something like iter, it's good enough.

But what about just transforming the sequence into a new sequence? That seems like a more general solution. So:

module Seq = let in_groups_of_n n (s: seq<'a>) = let rec in_groups_of_n_with_acc n acc (ie: IEnumerator<'a>) = seq { match ie.MoveNext(), acc with | true, a when List.length a + 1 = n -> yield List.rev (ie.Current :: a) yield! in_groups_of_n_with_acc n [] ie | true, a -> yield! in_groups_of_n_with_acc n (ie.Current :: a) ie | false, a when List.length a > 0 -> yield List.rev acc | false, a -> () } seq { yield! in_groups_of_n_with_acc n [] (s.GetEnumerator()) }

That's pretty good - it gets us to the point where we're returning a real sequence, so it's much more useful than the iter-style solution.

But why limit ourselves to lists-of-length-n? Let's turn the accumulator into a function, so you can get any groups you want:

module Seq = let grouped_by f (s: seq<'a>)= let rec grouped_by_with_acc (f: 'a -> 'a list -> 'a list option * 'a list) acc (ie: IEnumerator<'a>) = seq { if ie.MoveNext() then let nextValue, leftovers = f ie.Current acc if nextValue.IsSome then yield nextValue.Value yield! grouped_by_with_acc f leftovers ie else if not acc.IsEmpty then yield acc } seq { yield! grouped_by_with_acc f [] (s.GetEnumerator()) }This takes a function that accepts:

- The next item in the sequence
- Leftovers from the last run

- An optional value to yield
- Leftovers

module TestGroupedBy = let GroupsOfN n newValue acc = let newList = newValue :: acc // If we have the right length, return // a Some as the first value. That'll // be yielded by the sequence. if List.length acc = n - 1 then Some (List.rev newList), [] // If we don't have the right length, // use None (so nothing will be yielded) else None, newList // Note that we're going to pass a curried function - (GroupsOfN 3) is a function // taking two arguments, suitable for passing to grouped_by let resultWithInts = seq { for i in 1..100 -> i} |> Seq.grouped_by (GroupsOfN 3) printfn "int result %A" resultWithInts let resultWithChars = seq { for i in ['a'; 'b'; 'c'; 'd'; 'e'; 'f'] -> i} |> Seq.grouped_by (GroupsOfN 2) printfn "char result %A" resultWithChars

The results are:

int result seq [[1; 2; 3]; [4; 5; 6]; [7; 8; 9]; [10; 11; 12]; ...] char result seq [['a'; 'b']; ['c'; 'd']; ['e'; 'f']]

Let's use this with a more interesting grouping function. Say we want the colors of all the single cows in a list:

module TestGroupedByWithMatching = let Cows newValue acc = let newList = newValue :: acc match newList with | "cow" :: color :: "one" :: t -> Some [color], [] | a :: b :: t -> None, [a; b] | l -> None, newList let someCows = seq { for i in ["one"; "red"; "cow"; "and"; "one"; "white"; "cow"; "and"; "three"; "green"; "cows"] -> i} |> Seq.grouped_by Cows printfn "some cows are %A" someCows // This prints: // some cows are seq [["red"]; ["white"]; ["cows"; "green"]] // >So there's a problem - at the end, we're yielding the accumulator. That's what we wanted in the previous examples; we were looking for groups of three, plus the last group of 0, 1, or 2 items. Here, though, if we don't get a match we don't want to see the leftovers. Let's create a new method that takes another function that specifies what to do with the accumulator:

let grouped_by_with_leftover_processing f (f2: 'a list -> list<'a> option) (s: seq<'a>)= let rec grouped_by_with_acc (f: 'a -> 'a list -> 'a list option * 'a list) acc (ie: IEnumerator<'a>) = seq { if ie.MoveNext() then let nextValue, leftovers = f ie.Current acc if nextValue.IsSome then yield nextValue.Value yield! grouped_by_with_acc f leftovers ie else let rems = f2 acc if rems.IsSome then yield rems.Value } seq { yield! grouped_by_with_acc f [] (s.GetEnumerator()) }

Now we can specify a function to use for the leftovers. This time we should get the right cows.

module TestGroupedByWithMatching = let Cows newValue acc = let newList = newValue :: acc match newList with | "cow" :: color :: "one" :: t -> Some [color], [] | a :: b :: t -> None, [a; b] | l -> None, newList let IgnoreLeftovers f = None let someCows = seq { for i in ["one"; "red"; "cow"; "and"; "one"; "white"; "cow"; "and"; "three"; "green"; "cows"] -> i} let cowColors = someCows |> Seq.grouped_by_with_leftover_processing Cows IgnoreLeftovers printfn "cowColors are %A" cowColors // This prints: // cowColors are seq [["red"]; ["white"]]But we've got two separate functions (grouped_by_with_leftover_processing and grouped_by) that duplicate most of their code. grouped_by is just grouped_by_with_leftover_processing so the final code is:

module Seq = let grouped_by_with_leftover_processing f (f2: 'a list -> list<'a> option) (s: seq<'a>)= let rec grouped_by_with_acc (f: 'a -> 'a list -> 'a list option * 'a list) acc (ie: IEnumerator<'a>) = seq { if ie.MoveNext() then let nextValue, leftovers = f ie.Current acc if nextValue.IsSome then yield nextValue.Value yield! grouped_by_with_acc f leftovers ie else let rems = f2 acc if rems.IsSome then yield rems.Value } seq { yield! grouped_by_with_acc f [] (s.GetEnumerator()) } let YieldReversedLeftovers (f: 'a list) = if f.IsEmpty then None else Some (List.rev f) let grouped_by f s = grouped_by_with_leftover_processing f YieldReversedLeftovers s let group_by_length_n n s = let grouping_function newValue acc = let newList = newValue :: acc // If we have the right length, return // a Some as the first value. That'll // be yielded by the sequence. if List.length acc = n - 1 then Some (List.rev newList), [] // If we don't have the right length, // use None (so nothing will be yielded) else None, newList grouped_by grouping_function s

## No comments:

Post a Comment