Home My Page Projects Code Snippets Project Openings SML/NJ
Summary Activity Forums Tracker Lists Tasks Docs Surveys News SCM Files

SCM Repository

[smlnj] Annotation of /sml/trunk/src/smlnj-lib/Util/univariate-stats.sml
ViewVC logotype

Annotation of /sml/trunk/src/smlnj-lib/Util/univariate-stats.sml

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1733 - (view) (download)

1 : mblume 1720 (* univariate-stats.sml
2 :     *
3 :     * Some statistical functions on unweighted univariate samples.
4 :     *
5 :     * Copyright (c) 2004 by The Fellowship of SML/NJ
6 :     *
7 :     * Author: Matthias Blume (blume@tti-c.org)
8 :     *)
9 :     structure UnivariateStats :> sig
10 :    
11 :     (* We distinguish between two kinds of samples. Only the "heavy"
12 :     * kind permits calculation of average deviation and median.
13 :     * It is also considerably more expensive because it keeps an
14 :     * array of all points while the "light" variety is constant-size. *)
15 : mblume 1733 type light
16 :     type heavy
17 : mblume 1720
18 :     type 'a sample (* light or heavy *)
19 :     type 'a evaluation (* light or heavy *)
20 :    
21 :     (* Samples are built incrementally by adding points to an initially
22 :     * empty sample: *)
23 :     val lempty : light sample
24 :     val hempty : unit -> heavy sample
25 :     val ladd : real * light sample -> light sample (* constant *)
26 :     val hadd : real * heavy sample -> heavy sample (* amortized constant *)
27 :    
28 :     (* Evaluate the sample; this completes all the expensive work except
29 :     * for things that depend on "heavy" samples: *)
30 :     val evaluate : 'a sample -> 'a evaluation (* constant *)
31 :    
32 :     (* extracting of "cheap" information (constant-time): *)
33 :     val N : 'a evaluation -> int
34 :     val n : 'a evaluation -> real (* N as real *)
35 :     val mean : 'a evaluation -> real
36 :     val variance : 'a evaluation -> real
37 :     val standardDeviation : 'a evaluation -> real
38 :     val skew : 'a evaluation -> real
39 :     val kurtosis : 'a evaluation -> real
40 :    
41 :     (* extracting of "expensive" information: *)
42 :     val median : heavy evaluation -> real (* randomized linear *)
43 :     val averageDeviation : heavy evaluation -> real (* linear *)
44 :    
45 :     end = struct
46 :    
47 :     infix 8 $ val op $ = Unsafe.Array.sub
48 :     infix 3 <- fun (a, i) <- x = Unsafe.Array.update (a, i, x)
49 :    
50 : mblume 1733 (* two kinds of "extra info" *)
51 :     type light = unit (* nothing *)
52 :     type heavy = real array * int (* rubber array of points, size *)
53 :     (* a sample : extra info * N * sum x^4 * sum x^3 * sum x^2 * sum x : *)
54 :     type 'a sample = 'a * int * real * real * real * real
55 :     (* an evaluation: extra info * N * N as real *
56 :     * mean * variance * standard deviation *
57 :     * skew * kurtosis : *)
58 :     datatype 'a evaluation =
59 :     E of { ext_info: 'a, (* extra info *)
60 :     ni: int, (* number of points *)
61 :     nr: real, (* number of points (as real) *)
62 :     mean: real,
63 :     sd2: real, (* sd*sd = variance *)
64 :     sd: real, (* standard deviation *)
65 :     skew: real,
66 :     kurtosis: real }
67 : mblume 1720
68 :     val SZ = 1024 (* minimum allocated size of heavy array *)
69 :     val lempty = ((), 0, 0.0, 0.0, 0.0, 0.0)
70 :     fun hempty () = ((Array.array (SZ, 0.0), SZ), 0, 0.0, 0.0, 0.0, 0.0)
71 :    
72 :     fun ladd (x:real, ((), n, sx4, sx3, sx2, sx1)) =
73 :     let val x2 = x*x val (x3, x4) = (x2*x, x2*x2)
74 : mblume 1733 in ((), n+1, sx4+x4, sx3+x3, sx2+x2, sx1+x)
75 :     end
76 : mblume 1720
77 :     fun hadd (x:real, ((a, sz), n, sx4, sx3, sx2, sx1)) =
78 :     let val x2 = x*x val (x3, x4) = (x2*x, x2*x2)
79 :     val (a, sz) =
80 :     if n < sz then (a, sz)
81 :     else let val sz = sz+sz
82 :     val b = Array.tabulate
83 :     (sz, fn i => if i<n then a$i else 0.0)
84 :     in (b, sz) end
85 :     in (a,n)<-x;
86 :     ((a, sz), n+1, sx4+x4, sx3+x3, sx2+x2, sx1+x)
87 :     end
88 :    
89 : mblume 1733 fun evaluate (ext_info, ni, sx4, sx3, sx2, sx1) =
90 :     let val n = real ni
91 :     val n' = n-1.0
92 :     val m = sx1/n
93 :     val m2 = m*m
94 :     val m3 = m2*m
95 : mblume 1720 val sd2 = (sx2 + m*(n*m-2.0*sx1))/n'
96 : mblume 1733 val sd = Math.sqrt sd2
97 :     val (sd3, sd4) = (sd*sd2, sd2*sd2)
98 : mblume 1720 val sk = (sx3-m*(3.0*(sx2-sx1*m)+n*m2))/(n*sd3)
99 :     val k = ((sx4+m*(6.0*sx2*m-4.0*(sx3+sx1*m2)+n*m3))/(n*sd4))-3.0
100 : mblume 1733 in E { ext_info = ext_info, ni = ni, nr = n, mean = m,
101 :     sd2 = sd2, sd = sd, skew = sk, kurtosis = k }
102 :     end
103 : mblume 1720
104 : mblume 1733 fun unE (E r) = r
105 : mblume 1720
106 : mblume 1733 fun N e = #ni (unE e)
107 :     fun n e = #nr (unE e)
108 :     fun mean e = #mean (unE e)
109 :     fun variance e = #sd2 (unE e)
110 :     fun standardDeviation e = #sd (unE e)
111 :     fun skew e = #skew (unE e)
112 :     fun kurtosis e = #kurtosis (unE e)
113 :    
114 :     fun median (E { ext_info = (a, _), ni, ... }) =
115 : mblume 1720 RealOrderStats.median' (ArraySlice.slice (a, 0, SOME ni))
116 :    
117 : mblume 1733 fun averageDeviation (E { ext_info = (a, _), ni, nr, mean = m, ... }) =
118 : mblume 1720 let fun ad (i, ds) = if i>ni then ds/nr else ad (i+1, ds + abs(a$i-m))
119 :     in ad (0, 0.0) end
120 :     end

root@smlnj-gforge.cs.uchicago.edu
ViewVC Help
Powered by ViewVC 1.0.0