diff --git a/.classpath b/.classpath
new file mode 100755
index 00000000..cf17507c
--- /dev/null
+++ b/.classpath
@@ -0,0 +1,16 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<classpath>
+	<classpathentry kind="src" path="src/main/scala"/>
+	<classpathentry kind="con" path="org.scala-ide.sdt.launching.SCALA_CONTAINER"/>
+	<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER"/>
+	<classpathentry combineaccessrules="false" kind="src" path="/BIDMat"/>
+	<classpathentry kind="lib" path="C:/code/BIDMat/lib/jhdf5.jar"/>
+	<classpathentry kind="lib" path="C:/code/BIDMat/lib/ptplot.jar"/>
+	<classpathentry kind="lib" path="C:/code/BIDMat/lib/ptplotapplication.jar"/>
+	<classpathentry kind="lib" path="C:/code/BIDMat/lib/jcublas-0.5.0.jar"/>
+	<classpathentry kind="lib" path="C:/code/BIDMat/lib/jcuda-0.5.0.jar"/>
+	<classpathentry kind="lib" path="C:/code/BIDMat/lib/jcufft-0.5.0.jar"/>
+	<classpathentry kind="lib" path="C:/code/BIDMat/lib/jcurand-0.5.0.jar"/>
+	<classpathentry kind="lib" path="C:/code/BIDMat/lib/jcusparse-0.5.0.jar"/>
+	<classpathentry kind="output" path="target/scala-2.9.1/classes"/>
+</classpath>
diff --git a/.project b/.project
new file mode 100755
index 00000000..4df1a6f3
--- /dev/null
+++ b/.project
@@ -0,0 +1,18 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<projectDescription>
+	<name>BIDMach</name>
+	<comment></comment>
+	<projects>
+	</projects>
+	<buildSpec>
+		<buildCommand>
+			<name>org.scala-ide.sdt.core.scalabuilder</name>
+			<arguments>
+			</arguments>
+		</buildCommand>
+	</buildSpec>
+	<natures>
+		<nature>org.scala-ide.sdt.core.scalanature</nature>
+		<nature>org.eclipse.jdt.core.javanature</nature>
+	</natures>
+</projectDescription>
diff --git a/BIDMach.jar b/BIDMach.jar
new file mode 100755
index 00000000..0cf5e46f
Binary files /dev/null and b/BIDMach.jar differ
diff --git a/Copyright.txt b/Copyright.txt
new file mode 100755
index 00000000..21326596
--- /dev/null
+++ b/Copyright.txt
@@ -0,0 +1,25 @@
+Copyright (c) 2012, Regents of the University of California
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the <organization> nor the
+      names of its contributors may be used to endorse or promote products
+      derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
diff --git a/bidmach b/bidmach
new file mode 100755
index 00000000..d0948549
--- /dev/null
+++ b/bidmach
@@ -0,0 +1,37 @@
+#!/bin/bash
+export JAVA_OPTS="-Xmx12G -Xms128M" # Set as much memory as possible
+BIDMACH_ROOT="${BASH_SOURCE[0]}"
+if [ ! `uname` = "Darwin" ]; then
+  BIDMACH_ROOT=`readlink -f "${BIDMACH_ROOT}"`
+else 
+  BIDMACH_ROOT=`readlink "${BIDMACH_ROOT}"`
+fi
+BIDMACH_ROOT=`dirname "$BIDMACH_ROOT"`
+BIDMACH_ROOT="$( echo ${BIDMACH_ROOT} | sed s+/cygdrive/c+c:+ )" 
+BIDMAT_ROOT="${BIDMACH_ROOT}/../BIDMat" # Change if needed
+# export JAVA_HOME="" # Set here if not set in environment
+# Fix these if needed
+JCUDA_VERSION="0.5.0"
+JCUDA_LIBDIR="${BIDMAT_ROOT}/lib"
+BIDLIB="${BIDMAT_ROOT}/lib"
+LIBDIR=${BIDMACH_ROOT}/lib
+if [ `uname` = "Darwin" ]; then
+  export DYLD_LIBRARY_PATH="${BIDMAT_ROOT}/lib:/usr/local/cuda/lib:${LD_LIBRARY_PATH}" 
+else
+  export LD_LIBRARY_PATH="${BIDMAT_ROOT}/lib:/usr/local/cuda/lib64:${LD_LIBRARY_PATH}" 
+fi
+
+BIDMAT_LIBS="${BIDMAT_ROOT}/BIDMat.jar;${BIDLIB}/ptplot.jar;${BIDLIB}/ptplotapplication.jar;${BIDLIB}/jhdf5.jar;${BIDLIB}/commons-math3-3.1.1.jar;${BIDLIB}/lz4-1.1.2.jar"
+JCUDA_LIBS="${JCUDA_LIBDIR}/jcuda-${JCUDA_VERSION}.jar;${JCUDA_LIBDIR}/jcublas-${JCUDA_VERSION}.jar;${JCUDA_LIBDIR}/jcufft-${JCUDA_VERSION}.jar;${JCUDA_LIBDIR}/jcurand-${JCUDA_VERSION}.jar;${JCUDA_LIBDIR}/jcusparse-${JCUDA_VERSION}.jar"
+
+export ALL_LIBS="${BIDMACH_ROOT}/BIDMach.jar;${BIDMAT_LIBS};${JCUDA_LIBS};${JAVA_HOME}/lib/tools.jar"
+
+if [ ! "$OS" = "Windows_NT" ]; then
+    export ALL_LIBS=`echo "${ALL_LIBS}" | sed 's/;/:/g'`
+else
+    NEWPATH=`${BIDMAT_ROOT}/shortpath.bat "${CUDA_BIN_PATH}"`
+    NEWPATH=`echo $NEWPATH | sed 's_\\\\_/_g'`
+    JAVA_OPTS="-Djava.library.path=${BIDMAT_ROOT}/lib;${NEWPATH} "$JAVA_OPTS
+fi
+
+scala -nobootcp -cp "${ALL_LIBS}" -Yrepl-sync -i ${LIBDIR}/bidmach_init.scala
\ No newline at end of file
diff --git a/bidmach.cmd b/bidmach.cmd
new file mode 100755
index 00000000..d904faf3
--- /dev/null
+++ b/bidmach.cmd
@@ -0,0 +1,20 @@
+@ECHO OFF
+:: Set JAVA_HOME here if not set in environment
+:: SET JAVA_HOME= 
+:: Set as much memory as possible
+(SET JAVA_OPTS=-Xmx12G -Xms128M)
+:: Fix these if needed
+SET JCUDA_VERSION=0.5.0
+SET BIDLIB="%CD%\..\BIDMat\lib"
+SET LIBDIR="%CD%\lib"
+SET JCUDA_LIBDIR=%BIDLIB%
+SET PATH=%BIDLIB%;%PATH%
+
+SET BIDMACH_LIBS=%BIDLIB%\..\BIDMat.jar;%CD%\BIDMach.jar;%BIDLIB%\ptplot.jar;%BIDLIB%\ptplotapplication.jar;%BIDLIB%\jhdf5.jar;%BIDLIB%\commons-math3-3.1.1.jar;%BIDLIB%\lz4-1.1.2.jar
+
+SET JCUDA_LIBS=%JCUDA_LIBDIR%\jcuda-%JCUDA_VERSION%.jar;%JCUDA_LIBDIR%\jcublas-%JCUDA_VERSION%.jar;%JCUDA_LIBDIR%\jcufft-%JCUDA_VERSION%.jar;%JCUDA_LIBDIR%\jcurand-%JCUDA_VERSION%.jar;%JCUDA_LIBDIR%\jcusparse-%JCUDA_VERSION%.jar
+
+SET ALL_LIBS=%BIDMACH_LIBS%;%JCUDA_LIBS%;%JAVA_HOME%\lib\tools.jar
+:: echo %ALL_LIBS%
+
+scala -nobootcp -cp "%ALL_LIBS%" -Yrepl-sync -i %LIBDIR%\bidmach_init.scala
\ No newline at end of file
diff --git a/build.sbt b/build.sbt
new file mode 100755
index 00000000..9aecc3b1
--- /dev/null
+++ b/build.sbt
@@ -0,0 +1,40 @@
+
+name := "BIDMach"
+
+version := "0.1.0"
+
+organization := "edu.berkeley.bid"
+
+scalaVersion := "2.9.2"
+
+resolvers ++= Seq(
+  "Scala Tools Snapshots" at "http://scala-tools.org/repo-snapshots/",
+  "Scala Mirror" at "https://oss.sonatype.org/content/repositories/releases/"
+)
+
+libraryDependencies <<= (scalaVersion, libraryDependencies) { (sv, deps) =>
+  deps :+ ("org.scala-lang" % "scala-compiler" % sv)
+}
+
+libraryDependencies += "org.scala-lang" % "jline" % "2.9.2"
+
+libraryDependencies += "org.scalatest" %% "scalatest" % "1.8" % "test"
+
+libraryDependencies += "org.scalacheck" %% "scalacheck" % "1.9" % "test"
+
+libraryDependencies += "junit" % "junit" % "4.5" % "test"
+
+credentials += Credentials(Path.userHome / ".ivy2" / ".credentials")
+
+javacOptions ++= Seq("-source", "1.5", "-target", "1.5")
+
+scalacOptions ++= Seq("-deprecation","-target:jvm-1.5")
+
+initialCommands := scala.io.Source.fromFile("lib/bidmach_init.scala").getLines.mkString("\n")
+
+javaOptions += "-Xmx12g"
+
+//seq(ProguardPlugin.proguardSettings :_*)
+
+
+
diff --git a/lib/bidmach_init.scala b/lib/bidmach_init.scala
new file mode 100755
index 00000000..d11e2758
--- /dev/null
+++ b/lib/bidmach_init.scala
@@ -0,0 +1,10 @@
+import BIDMat.{BMat,CMat,CSMat,DMat,Dict,IDict,FMat,GMat,GIMat,GSMat,HMat,IMat,Mat,SMat,SDMat}
+import BIDMat.MatFunctions._
+import BIDMat.SciFunctions._
+import BIDMat.Solvers._
+import BIDMat.Plotting._
+import BIDMach.{MatDataSource,FilesDataSource,SFilesDataSource,Learner,LDAModel,NMFModel}
+
+Mat.checkMKL
+Mat.checkCUDA
+
diff --git a/project/plugins.sbt b/project/plugins.sbt
new file mode 100755
index 00000000..bf5cb709
--- /dev/null
+++ b/project/plugins.sbt
@@ -0,0 +1,7 @@
+
+libraryDependencies <+= sbtVersion(v => "com.github.siasia" %% "xsbt-proguard-plugin" % (v+"-0.1.1"))
+
+resolvers += "Proguard plugin repo" at "http://siasia.github.com/maven2"
+
+
+
diff --git a/src/main/scala/BIDMach/Clustering.scala b/src/main/scala/BIDMach/Clustering.scala
new file mode 100755
index 00000000..4e7e6385
--- /dev/null
+++ b/src/main/scala/BIDMach/Clustering.scala
@@ -0,0 +1,372 @@
+package BIDMach
+import BIDMat.{Mat,BMat,CMat,DMat,FMat,IMat,HMat,GMat,GIMat,GSMat,SMat,SDMat}
+import BIDMat.MatFunctions._
+import BIDMat.SciFunctions._
+
+
+class PAMmodel(opts:PAMmodel.Options = new PAMmodel.Options) { 
+
+
+  var a:FMat = null
+  var nfeats = 0
+  var nsamps = 0
+  var ncenters = 0
+  var ntrys = 0
+  val options = opts
+  var maxdepth = 0
+  var nspills = 0
+  var bestc:IMat = null
+  var imin:IMat = null
+  var vdists:DMat = null
+  var sil:DMat = null
+  var mss = 0.0
+
+  var ncache = 0
+  
+
+  def dists(x:FMat) = options.metric.dists(x:FMat)
+  def dists(x:FMat, y:FMat) = options.metric.dists(x:FMat, y:FMat)
+  
+  def init(a0:FMat) = {
+
+    a = a0
+    nfeats = size(a0,2)
+    nsamps = size(a0,1)
+    ncenters = options.ncenters
+    ntrys = options.ntrys
+    vdists = dzeros(nsamps,1)
+    imin = izeros(nsamps,1)
+    sil = dzeros(nsamps,1)
+
+    ncache = min(nsamps,options.cbal*nsamps/ncenters)(0,0)
+
+  }
+
+
+  // Silhouette Score
+  def silhouetteScore(ds:FMat, iss:IMat, isamps:IMat, icenters:IMat, lab:IMat):DMat = {
+    
+    var aa = zeros(nsamps,1)
+    var bb = zeros(nsamps,1)
+
+    var silhouette = zeros(nsamps,1)
+
+    var dimclu = zeros(length(icenters),1) // Size of each cluster
+    for( i <- 0 to nsamps-1 ) { dimclu(lab(i))+=1 }
+    
+    //val ncache = size(iss,1)
+    val centmap = accum(icenters, icol(1 to length(icenters)), size(ds,2), 1)
+    var i = 0
+    while (i < length(isamps)) {
+      
+      val ii = isamps(i)
+      val labi = lab(i)
+
+      var maxsize = dimclu(labi) + 1
+
+      var first = true
+      var j = 0
+      var count = 0
+
+      while (j < ncache && count < maxsize) {
+        
+
+        if(lab(j)==labi){
+          aa(i)+=ds(j, ii)
+          }else { // pick the second closest center
+	    
+	    if(first){ 
+
+            val labj = lab(j)
+            maxsize += dimclu(labj) - 1
+            first = false
+
+	      
+            }
+            bb(i)+=ds(j, ii)
+          }
+
+        j += 1
+      }
+      silhouette(i) = (bb(i) - aa(i))  / max(aa(i),bb(i))
+      i += 1
+    }
+
+    silhouette
+
+
+  }
+
+  
+  def mindists(ds:FMat, iss:IMat, isamps:IMat, icenters:IMat, vmin:DMat, imin:IMat) = {
+    
+    val centmap = accum(icenters, icol(1 to length(icenters)), size(ds,2), 1)
+    var i = 0
+    var ispills = izeros(1,nsamps)
+
+    var spills = 0
+
+    while (i < length(isamps)) {
+      val ii = isamps(i)
+      var continue = true
+      var j = 0
+      while (j < ncache && continue) {
+
+        if (centmap(iss(j, ii)) > 0) {
+          imin(ii) = centmap(iss(j, ii)) - 1
+          vmin(ii) = ds(j, ii)
+          continue = false
+        } 
+        j += 1
+      }
+      maxdepth = math.max(maxdepth, j)
+
+      
+      if (j >= ncache & continue) 
+      {
+        
+        ispills(spills) = i
+        spills += 1
+        nspills += 1
+
+        imin(ii) = -1
+        vmin(ii) = -1
+
+      }
+      Mat.nflops += 4*j
+      i += 1
+    }
+
+    if(spills>0)
+    {
+
+      ispills = ispills(0,0 until spills)
+
+
+      val dspill = dists(a(icenters(?,0),?),a(ispills,?))
+
+      var (ddd,iii) = maxi2(dspill,1) 
+
+      imin(ispills) = centmap(iii)
+      for (i <- 0 until spills){   vmin(ispills(i),0) = ddd(0,i)}
+
+      }
+
+  }
+  
+  def mindists(ds:FMat, iss:IMat, isamps:IMat, icenters:IMat):(DMat, IMat) = {
+    val vmin = dzeros(nsamps,1)
+    val imin = izeros(nsamps,1)
+    mindists(ds, iss, isamps, icenters, vmin, imin)
+    (vmin, imin)
+  }
+  
+  
+  def pointdiffs(ds:FMat, iss:IMat, vd:DMat):DMat = {
+    val deltas = dzeros(nsamps,1)                                      // Array to hold improvements in distance over vd
+
+    var ispills = izeros(1,nsamps)
+    var spills = 0
+
+    var i = 0  
+    while (i < nsamps) {                                               // Calculate improvements over vd for new candidate centers
+      var j = 0  
+      while (j < ncache && ds(j,i) < vd(i)) {                          // using sorted order of ds
+        deltas(iss(j,i)) += ds(j,i) - vd(i)
+        j += 1
+      } 
+      maxdepth = math.max(maxdepth, j)
+      if (j >= ncache){
+        if(ds(j-1,i) < vd(i)) 
+        {
+          ispills(spills) = i
+          nspills += 1
+          spills += 1
+        }
+      }
+
+      Mat.nflops += 4 * j
+      i += 1
+    }
+
+    if(spills > 0)
+    {
+
+      ispills = ispills(?,0 until spills)
+      val threshold = ds(ncache-1,ispills)
+
+      val dspill = dists(a)(?,ispills)
+
+      for (i <- 0 until spills){                                               // Calculate improvements over vd for new candidate centers
+
+        var j = 0  
+        val ii = ispills(0,i)
+
+        while (j < nsamps) {
+          if(dspill(j,i) < vd(ii) & dspill(j,i) > threshold(i)) {
+            deltas(j) += dspill(j,i) - vd(ii)
+          }
+          j += 1
+        } 
+      }
+
+    }
+
+    deltas
+  }
+  
+  
+  def sortgen(dd:FMat):(FMat,IMat) = {      // Sorts the COLUMNS in ascending order...
+
+
+    if (Mat.hasCUDA <= 0) {  // until GPUsort fixed
+      var (smat, imat) = sort2(dd,1)
+
+
+      if(ncache < nsamps){
+        smat = smat(0 until ncache,?)
+        imat = imat(0 until ncache,?)
+      }
+
+      (smat, imat)
+
+    } else {
+
+      var smat = dd.copy
+      var imat = icol(0->nsamps)*iones(1,nsamps)
+
+      GMat.sortGPU(smat, imat)
+
+      if(ncache < nsamps){
+        smat = smat(0 until ncache,?)
+        imat = imat(0 until ncache,?)
+      }
+
+      (smat, imat)
+
+    }
+
+  }
+  
+  def run = {
+    println("PAM clustering %d points with %d features into %d centers" format (nsamps, nfeats, ncenters))
+    flip
+    val dd = dists(a)
+    val ft1 = gflop
+    println("Distances in %f seconds, %f gflops" format (ft1._2,ft1._1))
+    flip
+    val (ds, iss) = sortgen(dd)
+
+
+    
+
+    Mat.nflops += math.round(math.log(size(ds,1))/math.log(2.0))*size(ds,1)*size(ds,2)
+    val ft2 = gflop
+    println("Sort in %f seconds, %f gcomps" format (ft2._2,ft2._1))
+    var bestv:DMat = null
+    var besti:IMat = null
+    var bestvd = Double.MaxValue 
+    flip
+    var itry = 0
+    
+    while(itry < ntrys) {
+      
+      println("Try %d" format itry)
+      val rr = rand(nsamps,1)                                            // Get a random permutation for the centers
+      val (rs,irs) = sort2(rr,1)
+      val icenters = irs(0->ncenters,0)                                  // Pick centers from the permutation
+      val ics = icol(0->nsamps)                            
+      mindists(ds, iss, ics, icenters, vdists, imin)              // Get min distances from points to centers, and best center ids
+      println("  pass=0, mean dist=%f" format mean(vdists,1).v)
+      val vtmp = vdists.copy
+      val itmp = imin.copy
+      var nchanged = 1
+      var ipass = 0
+      var totchanged = 0
+      while (nchanged > 0 && ipass < options.maxpasses) {                // Keep making passes until no improvements
+        ipass += 1
+        nchanged = 0
+        var ipc = 0  
+        while (ipc < ncenters) {                                         // Try to improve this center (ipc)
+          vtmp <-- vdists                                                // Copy distances 
+          val ifix = find(imin == ipc)                                   // Find points in cluster with this center
+          val tcents = icenters((0->ipc) \ ((ipc+1)->ncenters),0)        // List of centers minus the current one
+          mindists(ds, iss, ifix, tcents, vtmp, itmp)                    // vtmp holds distances to centers minus the current center
+          val deltas = pointdiffs(ds, iss, vtmp)                         // deltas holds improvements for each potential center over vtmp
+          val (vs,is) = mini2(deltas)                                    // Find best new center
+          if (vs.v + sum(vtmp).v < sum(vdists).v && is.v != icenters(ipc,0)) { // Is the new center better than the old (and not equal to it)?
+            icenters(ipc) = is.v                                         // If yes, update the center list
+            mindists(ds, iss, ics, icenters, vdists, imin)               // Compute new distances and centers
+            nchanged += 1
+            if (options.verb) println("    pass=%d, ipc=%d, mean dist=%f, nchanged=%d" format (ipass, ipc, mean(vdists,1).v, nchanged))
+          }
+          ipc += 1
+        }
+        println("  pass=%d, mean dist=%f, nchanged=%d, nspills=%d" format (ipass, mean(vdists,1).v, nchanged, nspills))
+        totchanged += nchanged
+      }
+      val mv = mean(vdists).v
+      if (mv < bestvd) {
+        bestc = icenters
+        bestv = vdists
+        besti = imin
+        bestvd = mv       
+      }
+      itry += 1
+    }
+    val t3=gflop
+    val vdists2 = mini(dd(?,bestc),2)
+    println("Optimum in %f secs, %f gflops, mean dist=%f, verify=%f\n maxdepth=%d, nspills=%d, ncache=%d\nTotal time %f seconds" format 
+        (t3._2, t3._1, bestvd, mean(DMat(vdists2),1).v, maxdepth, nspills, ncache, t3._2+ft2._2+ft1._2))
+
+    val ics = icol(0->nsamps)
+    flip
+
+    sil= silhouetteScore(ds, iss, ics, bestc,imin)
+    val t4=gflop
+    mss = mean(sil,2)(0,0)
+
+    println("Mean Silhouette Score (MSS) %f \n Elapsed time %f secs" format(mss, t4._1 ))
+
+  }
+  
+}
+
+object PAMmodel { 
+  
+  class Options { 
+    var ncenters = 1000
+    var maxpasses = 10
+    var ntrys = 1
+    var metric:Distance = null
+    var verb = false
+    var cbal = 10
+
+  }
+  
+  def runit(nsamps:Int, nfeats:Int, ncenters:Int,metric:String) = {
+    println("Generating dataset")
+    val c = rand(ncenters, nfeats)
+    val a = rand(nsamps, nfeats)*0.3f
+    for (i <- 0 until nsamps by ncenters) {val il = math.min(i+ncenters, nsamps); a(i->il,?) += c(0->(il-i),?)}
+    val cc = new PAMmodel
+    cc.options.ncenters = ncenters
+    cc.options.metric = metric match {
+    case "euclid" => new euclidDistance
+    case "cosangle" => new cosangleDistance
+    case "corr" => new correlationDistance
+    }
+    cc.init(a)
+    cc.run
+  }
+  
+  def main(args:Array[String]) = {
+    Mat.checkCUDA
+    val nsamps= args(0).toInt
+    val nfeats = args(1).toInt
+    val ncenters = args(2).toInt
+    val metric = args(3)
+    runit(nsamps, nfeats, ncenters, metric)
+  }
+}
+
diff --git a/src/main/scala/BIDMach/Copyright.txt b/src/main/scala/BIDMach/Copyright.txt
new file mode 100755
index 00000000..21326596
--- /dev/null
+++ b/src/main/scala/BIDMach/Copyright.txt
@@ -0,0 +1,25 @@
+Copyright (c) 2012, Regents of the University of California
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the <organization> nor the
+      names of its contributors may be used to endorse or promote products
+      derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
diff --git a/src/main/scala/BIDMach/DataSource.scala b/src/main/scala/BIDMach/DataSource.scala
new file mode 100755
index 00000000..7561b20f
--- /dev/null
+++ b/src/main/scala/BIDMach/DataSource.scala
@@ -0,0 +1,738 @@
+package BIDMach
+import BIDMat.{Mat,BMat,CMat,CSMat,DMat,FMat,IMat,HMat,GMat,GIMat,GSMat,SMat,SDMat}
+import BIDMat.MatFunctions._
+import BIDMat.SciFunctions._
+import scala.actors._
+import java.io._
+
+abstract class DataSource(val opts:DataSource.Options = new DataSource.Options) {   
+  def next:Array[Mat]  
+  def hasNext:Boolean
+  def reset:Unit
+  def putBack(mats:Array[Mat],i:Int):Unit = {throw new RuntimeException("putBack not implemented")}
+  def setupPutBack(n:Int,dim:Int):Unit = {throw new RuntimeException("putBack not implemented")}
+  def nmats:Int
+  def init:Unit
+  def progress:Float
+  var omats:Array[Mat] = null
+}
+
+class MatDataSource(var mats:Array[Mat], override val opts:MatDataSource.Options = new MatDataSource.Options) extends DataSource(opts) { 
+  var sizeMargin = 0f 
+  var here = 0
+  var there = 0
+  var blockSize = 0
+  var totalSize = 0
+  var umat:Mat = null
+  
+  omats = null
+  
+  def init = {
+    sizeMargin = opts.sizeMargin
+    blockSize = opts.blockSize
+    if (opts.addConstFeat) {
+      mats(0) = mats(0) on sparse(ones(1, mats(0).ncols)) 
+    }
+    if (opts.featType == 0) {
+      mats(0).contents.set(1)     
+    }
+    here = -blockSize
+    totalSize = mats(0).ncols
+    omats = new Array[Mat](mats.length)
+    for (i <- 0 until mats.length) {
+      omats(i) = mats(i) match {
+        case mm:SMat => SMat(mats(i).nrows, blockSize, (mats(i).nnz * sizeMargin * blockSize / mats(i).ncols).toInt)
+        case mm:SDMat => SDMat(mats(i).nrows, blockSize, (mats(i).nnz * sizeMargin * blockSize / mats(i).ncols).toInt)
+        case _ => mats(i).zeros(mats(i).nrows, blockSize)
+      }      
+    }    
+  }
+  
+  def nmats = omats.length
+  
+  def reset = {
+    here = -blockSize
+  }
+  
+  def next:Array[Mat] = {
+    here = math.min(here+blockSize, mats(0).ncols)
+    there = math.min(here+blockSize, mats(0).ncols)
+  	for (i <- 0 until mats.length) {
+  	  omats(i) = mats(i).colslice(here, there, omats(i))
+  	}
+  	omats
+  }
+  
+  def hasNext:Boolean = {
+    here + blockSize < mats(0).ncols
+  }
+  
+  override def setupPutBack(n:Int, dim:Int) = {
+    if (mats.length < n) {
+      val newmats = new Array[Mat](n)
+      for (i <- 0 until n-1) {
+        newmats(i) = mats(i)
+      }
+      newmats(n-1) = ones(dim, mats(0).ncols)
+      mats = newmats
+    }
+  }
+  
+  override def putBack(tmats:Array[Mat],i:Int):Unit = {
+    tmats(i).colslice(0, tmats(i).ncols, mats(i), here)
+  }
+  
+  def progress = {
+    math.min((here+blockSize)*1f/totalSize, 1f)
+  }
+
+}
+
+class FilesDataSource(override val opts:FilesDataSource.Options = new FilesDataSource.Options) extends DataSource(opts) { 
+  var sizeMargin = 0f
+  var blockSize = 0 
+  @volatile var fileno = 0
+  var rowno = 0
+  var nstart = 0
+  var fnames:List[(Int)=>String] = null
+  omats = null
+  var matqueue:Array[Array[Mat]] = null
+  var ready:IMat = null
+  var stop:Boolean = false
+  var permfn:(Int)=>Int = null
+  var totalSize = 0
+  
+  def softperm(nstart:Int, nend:Int) = {
+    val dd1 = nstart / 24
+    val hh1 = nstart % 24
+    val dd2 = nend / 24
+    val hh2 = nend % 24
+    val (dmy, ii) = sort2(rand(dd2-dd1+1+opts.lookahead))
+    (n:Int) => {
+    	val dd = n / 24
+    	val hh = n % 24
+    	val ddx = ii(dd-dd1)+dd1
+    	val ddx0 = ddx % 31
+    	val ddx1 = ddx / 31
+    	val hhdd = hh + 24 * (ddx0 - 1)
+    	(ddx1 * 31 + (hhdd % 31 + 1)) * 24 + hhdd / 31
+    }    
+  }
+  
+  def initbase = {
+    nstart = opts.nstart
+    fnames = opts.fnames
+    blockSize = opts.blockSize
+    while (!fileExists(fnames(0)(nstart))) {nstart += 1}
+    if (opts.order == 1) {
+    	val (dmy, rr) = sort2(rand(opts.nend+opts.lookahead+1-nstart,1))         // Randomize the file read order
+    	permfn = (a:Int) => rr(a-nstart)+nstart
+    } else {
+      permfn = (n:Int) => {                                                    // Stripe reads across disks (different days)
+        val (yy, mm, dd, hh) = FilesDataSource.decodeDate(n)
+        val hhdd = hh + 24 * (dd - 1)
+        FilesDataSource.encodeDate(yy, mm, hhdd % 31 + 1, hhdd / 31)
+      } 
+    }    
+    fileno = nstart                                                            // Number of the current output file
+    rowno = 0                                                                  // row number in the current output file
+    totalSize = opts.nend - nstart
+    matqueue = new Array[Array[Mat]](opts.lookahead)                           // Queue of matrices for each output matrix
+    ready = -iones(opts.lookahead, 1)                                          // Numbers of files currently loaded in queue
+    for (i <- 0 until opts.lookahead) {
+      matqueue(i) = new Array[Mat](fnames.size)
+    }
+    for (i <- 0 until opts.lookahead) {
+      Actor.actor {
+        prefetch(nstart + i)
+      }
+    }
+  }
+  
+  def reset = {
+    fileno = nstart
+    rowno = 0
+    for (i <- 0 until opts.lookahead) {
+      val ifile = nstart + i
+      val ifilex = ifile % opts.lookahead
+      ready(ifilex) = ifile - opts.lookahead
+    } 
+  }
+  
+  def init = {
+    initbase
+    omats = new Array[Mat](fnames.size)
+    for (i <- 0 until fnames.size) {
+      var mm = HMat.loadMat(fnames(i)(nstart))
+      if (opts.dorows) {
+      	omats(i) = mm.zeros(blockSize, mm.ncols)
+      } else {
+      	omats(i) = mm.zeros(mm.nrows, blockSize)
+      }
+    } 
+  }
+  
+  def progress = {
+    (fileno-nstart)*1f / totalSize
+  }
+  
+  def nmats = omats.length
+  
+  def next:Array[Mat] = {
+    var donextfile = false
+    var todo = blockSize
+    while (todo > 0 && fileno < opts.nend) {
+    	var nrow = rowno
+    	val filex = fileno % opts.lookahead
+    	while (ready(filex) < fileno) Thread.sleep(1)
+    	for (i <- 0 until fnames.size) {
+    		val matq = matqueue(filex)(i)
+    		if (matq != null) {
+    		  val matqnr = if (opts.dorows) matq.nrows else matq.ncols
+    			nrow = math.min(rowno + todo, matqnr)
+    			if (opts.dorows) {
+      			omats(i) = matq.rowslice(rowno, nrow, omats(i), blockSize - todo)  			  
+    			} else {
+    				omats(i) = matq.colslice(rowno, nrow, omats(i), blockSize - todo)   			  
+    			}
+    			if (matqnr == nrow) donextfile = true
+    		} else {
+    		  donextfile = true
+    		}
+    	}
+    	todo -= nrow - rowno
+    	if (donextfile) {
+    	  fileno += 1
+    	  rowno = 0
+    	  donextfile = false
+    	} else {
+    		rowno = nrow
+    	}
+    }
+    omats
+  }
+  
+  def fileExists(fname:String) = {
+    val testme = new File(fname)
+    testme.exists
+  }
+  
+  def lazyTranspose(a:Mat) = {
+    a match {
+      case af:FMat => FMat(a.ncols, a.nrows, af.data)
+      case ad:DMat => DMat(a.ncols, a.nrows, ad.data)
+      case ai:IMat => IMat(a.ncols, a.nrows, ai.data)
+      case _ => throw new RuntimeException("laztTranspose cant deal with "+a.getClass.getName)
+    }
+  }
+  
+  def prefetch(ifile:Int) = {
+    val ifilex = ifile % opts.lookahead
+  	ready(ifilex) = ifile - opts.lookahead
+  	while  (!stop) {
+  		while (ready(ifilex) >= fileno) Thread.sleep(1)
+  		val inew = ready(ifilex) + opts.lookahead
+  		val pnew = permfn(inew)
+  		val fexists = fileExists(fnames(0)(pnew)) && (rand(1,1).v < opts.sampleFiles)
+  		for (i <- 0 until fnames.size) {
+  			matqueue(ifilex)(i) = if (fexists) {
+  			  HMat.loadMat(fnames(i)(pnew), matqueue(ifilex)(i))  			 
+  			} else null  			
+//  			println("%d" format inew)
+  		}
+  		ready(ifilex) = inew
+  	}
+  }
+  
+  def hasNext:Boolean = {
+    (fileno < opts.nend)
+  }
+
+}
+
+class SFilesDataSource(override val opts:SFilesDataSource.Options = new SFilesDataSource.Options) extends FilesDataSource(opts) {
+  
+  var inptrs:IMat = null
+  var offsets:IMat = null
+  
+  override def init = {
+    initbase
+    var totsize = sum(opts.fcounts).v
+    if (opts.addConstFeat) totsize += 1
+    omats = new Array[Mat](1)
+    omats(0) = SMat(totsize, opts.blockSize, opts.blockSize * opts.eltsPerSample)
+    inptrs = izeros(opts.fcounts.length, 1)
+    offsets = 0 on cumsum(opts.fcounts)
+  }
+  
+  def binFind(i:Int, mat:Mat):Int = {
+    val imat = mat.asInstanceOf[IMat]
+    val nrows = mat.nrows
+    var ibeg = 0
+    var iend = nrows
+    while (ibeg < iend) {
+      val imid = (iend + ibeg)/2
+      if (i > imat(imid, 0)) {
+        ibeg = imid+1
+      } else {
+        iend = imid
+      }
+    }
+    iend    
+  }
+  
+  def sprowslice(inmat:Array[Mat], rowno:Int, nrow:Int, omat0:Mat, done:Int):Mat = {
+    val omat = omat0.asInstanceOf[SMat]
+    val ioff = Mat.ioneBased
+    var idone = done
+    var innz = omat.nnz
+    val lims = opts.fcounts
+    val nfiles = opts.fcounts.length
+    val addConstFeat = opts.addConstFeat
+    val featType = opts.featType
+    var j = 0
+    while (j < nfiles) {
+    	inptrs(j, 0) = binFind(rowno, inmat(j))
+    	j += 1
+    }
+    var irow = rowno
+    while (irow < nrow) {
+      var j = 0
+      while (j < nfiles) {
+        val mat = inmat(j).asInstanceOf[IMat]
+        val mrows = mat.nrows
+        var k = inptrs(j)
+        while (k < mrows && mat.data(k) < irow) k += 1
+        inptrs(j) = k
+        val xoff = innz - k
+        val yoff = offsets(j) + ioff
+        while (k < mat.nrows && mat.data(k) == irow && mat.data(k+mrows) < lims(j)) {
+          omat.ir(xoff + k) = mat.data(k+mrows) + yoff
+          omat.data(xoff + k) = if (featType == 0) 1f else mat.data(k+2*mrows)
+          k += 1
+        }
+        innz = xoff + k
+        inptrs(j) = k
+        j += 1
+      }
+      irow += 1
+      idone += 1
+      if (addConstFeat) {
+        omat.ir(innz) = omat.nrows - 1 + ioff
+        omat.data(innz) = 1
+        innz += 1
+      }
+      omat.jc(idone) = innz + ioff
+    }
+    omat.nnz0 = innz
+    omat    
+  }
+  
+  def spmax(matq:Array[Mat]):Int = {
+    var maxv = 0
+    for (i <- 0 until matq.length) {
+      if (matq(i) != null) {
+      	val mat = matq(i).asInstanceOf[IMat]
+      	maxv = math.max(maxv, mat(mat.nrows-1,0))
+      }
+    }
+    maxv
+  }
+  
+  def fillup(mat:Mat, todo:Int) = {
+    val smat = mat.asInstanceOf[SMat]
+    val ncols = mat.ncols
+    var i = ncols - todo
+    val theend = smat.jc(i)
+    while (i < ncols) {
+      i += 1
+      smat.jc(i) = theend
+    }
+  }
+  
+  def flushMat(mat:Mat) = {
+    val smat = mat.asInstanceOf[SMat]
+    smat.nnz0 = 0
+    smat.jc(0) = Mat.ioneBased
+  }
+  
+  override def next:Array[Mat] = {
+    var donextfile = false
+    var todo = blockSize
+    flushMat(omats(0))
+    while (todo > 0 && fileno < opts.nend) {
+    	var nrow = rowno
+    	val filex = fileno % opts.lookahead
+    	while (ready(filex) < fileno) Thread.sleep(1)
+    	val spm = spmax(matqueue(filex))
+    	nrow = math.min(rowno + todo, spm)
+    	val matq = matqueue(filex)
+    	if (matq(0) != null) {
+    		omats(0) = sprowslice(matq, rowno, nrow, omats(0), blockSize - todo)
+    		if (spm == nrow) donextfile = true
+    	} else {
+    		donextfile = true
+    	}
+    	todo -= nrow - rowno
+    	if (donextfile) {
+    	  fileno += 1
+    	  rowno = 0
+    	  donextfile = false
+    	} else {
+    		rowno = nrow
+    	}
+    }
+    if (todo > 0) {
+      fillup(omats(0), todo)
+    }
+    omats
+  }
+
+}
+
+class BlendedDataSource(val s1:DataSource, val s2:DataSource, var alpha:Float, var samp1:Float, var samp2:Float,
+    override val opts:BlendedDataSource.Options = new BlendedDataSource.Options) extends DataSource(opts) {
+  var sizeMargin = 0f 
+  var here = 0L
+  var there = 0
+  var iptr1 = 0
+  var iptr2 = 0
+  var blockSize = 0
+  var bBlock = 0
+  var totalSize = 0
+  var randv:FMat = null
+  var rands1:FMat = null
+  var rands2:FMat = null
+  var mats1:Array[Mat] = null
+  var mats2:Array[Mat] = null
+  omats = null
+  
+  def init = {
+    sizeMargin = opts.sizeMargin
+    blockSize = opts.blockSize
+    bBlock = opts.bBlock
+    randv = rand(1, blockSize/bBlock + 1)
+    rands1 = rand(1, blockSize/bBlock + 1)
+    rands2 = rand(1, blockSize/bBlock + 1)
+    here = -blockSize
+    s1.opts.addConstFeat = opts.addConstFeat
+    s2.opts.addConstFeat = opts.addConstFeat
+    s1.opts.featType = opts.featType
+    s2.opts.featType = opts.featType
+    s1.init
+    s2.init
+    mats1 = s1.next
+    mats2 = s2.next
+    totalSize = mats1(0).ncols
+    omats = new Array[Mat](mats1.length)
+    for (i <- 0 until mats1.length) {
+      omats(i) = mats1(i) match {
+        case mm:SMat => SMat(mats1(i).nrows, blockSize, (mats1(i).nnz * sizeMargin).toInt)
+        case mm:SDMat => SDMat(mats1(i).nrows, blockSize, (mats1(i).nnz * sizeMargin).toInt)
+        case _ => mats1(i).zeros(mats1(i).nrows, blockSize)
+      }      
+    }    
+  }
+  
+  def nmats = omats.length
+  
+  def reset = {
+    s1.reset
+    s2.reset
+    here = -blockSize
+  }
+  
+  @inline def copycol(inmats:Array[Mat], iptr:Int, jptr:Int, omats:Array[Mat], here:Int) = {
+    var imat = 0
+    while (imat < inmats.length) {
+      omats(imat) = inmats(imat).colslice(iptr, jptr, omats(imat), here)
+      imat += 1
+    }
+  }
+  
+  def next:Array[Mat] = {
+    rand(0, 1f, randv)
+    var i = 0
+    var xptr = 0
+    while (xptr < blockSize && hascol(mats1, iptr1, s1) && hascol(mats2, iptr2, s2)) {
+      if (randv.data(i) < alpha) {
+        while (iptr1 < mats1(0).ncols && rands1.data(iptr1/bBlock) > samp1) iptr1 += bBlock
+        if (iptr1 >= mats1(0).ncols) {
+          mats1 = s1.next
+          iptr1 = 0
+          rand(0, 1f, samp1)
+        }
+        val jptr1 = math.min(mats1(0).ncols, iptr1 + math.min(bBlock, math.min(blockSize, omats(0).ncols) - xptr))
+        copycol(mats1, iptr1, jptr1,  omats, xptr)
+        xptr += jptr1 - iptr1
+        iptr1 = jptr1
+      } else {
+        while (iptr2 < mats2(0).ncols && rands2.data(iptr2/bBlock) > samp2) iptr2 += bBlock
+      	if (iptr2 >= mats2(0).ncols) {
+          mats2 = s2.next
+          iptr2 = 0
+          rand(0, 1f, samp2)
+        }
+        val jptr2 = math.min(mats1(0).ncols, iptr2 + math.min(bBlock, math.min(blockSize, omats(0).ncols) - xptr))
+        copycol(mats1, iptr2, jptr2,  omats, xptr)
+        xptr += jptr2 - iptr2
+        iptr2 = jptr2
+      }
+      i += 1
+    }
+    here += xptr
+    if (xptr == blockSize) {
+      omats
+    } else {
+      shrinkmats(omats, i)
+    }
+  }
+  
+  def hascol(mats:Array[Mat], iptr:Int, ss:DataSource):Boolean = {
+    (iptr < mats(0).ncols) || ss.hasNext
+  }
+    
+  def hasNext:Boolean = {
+    hascol(mats1, iptr1, s1) && hascol(mats2, iptr2, s2)
+  }
+  
+  def shrinkmats(xmats:Array[Mat], n:Int) = {
+    val outarr = new Array[Mat](omats.length)
+    var imat = 0
+    while (imat < omats.length) {
+      outarr(imat) = xmats(imat).colslice(0, n, null)
+      imat += 1
+    }
+    outarr
+  }
+    
+  def progress = {
+    math.max(s1.progress, s2.progress)
+  }
+}
+
+
+object DataSource {
+  class Options {
+    var blockSize = 100000
+    var sizeMargin = 3f
+    var sample = 1f
+    var addConstFeat:Boolean = false
+    var featType:Int = 1                 // 0 = binary features, 1 = linear features
+  } 
+}
+
+object MatDataSource {
+  class Options extends DataSource.Options {
+    
+  }
+}
+
+
+object FilesDataSource {
+  
+  def encodeDate(yy:Int, mm:Int, dd:Int, hh:Int) = (((12*yy + mm) * 31) + dd)*24 + hh
+  
+  def decodeDate(n:Int):(Int, Int, Int, Int) = {
+    val days = n / 24
+    val dd = (days - 1) % 31 + 1
+    val months = (days - dd) / 31
+    val mm = (months - 1) % 12 + 1
+    val yy = (months - mm) / 12
+    (yy, mm, dd, n % 24)
+  }
+  
+  def sampleFun(fname:String):(Int)=>String = {
+    (n:Int) => {    
+    	val (yy, mm, dd, hh) = decodeDate(n)
+    	(fname format ((n / 24) % 16, yy, mm, dd, hh))
+    }    
+  }
+  
+  def sampleFun(fname:String, m:Int, i:Int):(Int)=>String = {
+    (n0:Int) => { 
+      val n = n0 * m + i
+    	val (yy, mm, dd, hh) = decodeDate(n)
+    	(fname format ((n / 24) % 16, yy, mm, dd, hh))
+    }    
+  }
+  
+ 
+  class Options extends DataSource.Options {
+  	val localDir:String = ""
+  	def fnames:List[(Int)=>String] = null
+  	var lookahead = 8
+  	var sampleFiles = 1.0f
+    var nstart:Int = 0
+    var nend:Int = 0
+    var dorows:Boolean = true
+    var order:Int = 1                           // 0 = sequential order, 1 = random
+  }
+}
+
+object BlendedDataSource {
+  class Options extends DataSource.Options {
+  	var bBlock = 1000
+  }
+}
+
+object SFilesDataSource {
+  class Options extends FilesDataSource.Options {
+  	var fcounts:IMat = null
+    var eltsPerSample = 0
+  }
+  
+  val twitterFeatureDir = "/disk%02d/twitter/featurized/%04d/%02d/%02d/"
+  val twitterSmileyFeatureDir = "/disk%02d/twitter/smiley/featurized/%04d/%02d/%02d/"
+  
+  def twitterWords(
+      nstart0:Int = FilesDataSource.encodeDate(2012,3,1,0),
+  		nend0:Int = FilesDataSource.encodeDate(2012,12,1,0),
+  		n:Int = 1,
+  		i:Int = 0,
+  		nfeats:Int = 100000) = {
+  	val opts = new SFilesDataSource.Options { 
+  		override def fnames:List[(Int)=>String] = List(FilesDataSource.sampleFun(twitterFeatureDir + "unifeats%02d.lz4", n, i))
+  		fcounts = icol(nfeats)
+  		nstart = nstart0/n
+  		nend = nend0/n
+  		order = 1
+  		blockSize = 100000
+  		eltsPerSample = 40
+  		lookahead = 3
+  	}
+  	new SFilesDataSource(opts)
+  }
+  
+  def twitterSmileyWords(
+  		nstart0:Int = FilesDataSource.encodeDate(2012,3,1,0),
+  		nend0:Int = FilesDataSource.encodeDate(2013,7,1,0),
+  		n:Int = 1,
+  		i:Int = 0,
+  		nfeats:Int = 100000) = {
+  	val opts = new SFilesDataSource.Options { 
+  		override def fnames:List[(Int)=>String] = List(FilesDataSource.sampleFun(twitterSmileyFeatureDir + "unifeats%02d.lz4", n, i))
+  		fcounts = icol(nfeats)
+  		nstart = nstart0/n
+  		nend = nend0/n
+  		order = 1
+  		blockSize = 100000
+  		eltsPerSample = 40
+  		lookahead = 3
+  	}
+  	new SFilesDataSource(opts)
+  }
+  
+  def twitterNgrams(
+      nstart0:Int = FilesDataSource.encodeDate(2012,3,1,0),
+  		nend0:Int = FilesDataSource.encodeDate(2012,12,1,0),
+  		n:Int = 1,
+  		i:Int = 0,
+  		nuni0:Int = 50,
+  		nbi0:Int = 100,
+  		ntri0:Int = 200) = {
+  	val opts = new SFilesDataSource.Options { 
+  		override def fnames:List[(Int)=>String] = List(
+  				FilesDataSource.sampleFun(twitterFeatureDir + "unifeats%02d.lz4", n, i),
+  				FilesDataSource.sampleFun(twitterFeatureDir + "bifeats%02d.lz4", n, i),
+  				FilesDataSource.sampleFun(twitterFeatureDir + "trifeats%02d.lz4", n, i)
+  		    )
+  		fcounts = icol(nuni0*1000,nbi0*1000,ntri0*1000)
+  		nstart = nstart0/n
+  		nend = nend0/n
+  		order = 1
+  		blockSize = 100000
+  		eltsPerSample = 40
+  		lookahead = 3
+  	}
+  	new SFilesDataSource(opts)
+  }
+  
+  def twitterSmileyNgrams(
+      nstart0:Int = FilesDataSource.encodeDate(2012,3,1,0),
+  		nend0:Int = FilesDataSource.encodeDate(2013,7,1,0),
+  		n:Int = 1,
+  		i:Int = 0,
+  		nuni0:Int = 50,
+  		nbi0:Int = 100,
+  		ntri0:Int = 200) = {
+  	val opts = new SFilesDataSource.Options { 
+  		override def fnames:List[(Int)=>String] = List(
+  				FilesDataSource.sampleFun(twitterSmileyFeatureDir + "unifeats%02d.lz4", n, i),
+  				FilesDataSource.sampleFun(twitterSmileyFeatureDir + "bifeats%02d.lz4", n, i),
+  				FilesDataSource.sampleFun(twitterSmileyFeatureDir + "trifeats%02d.lz4", n, i)
+  		    )
+  		fcounts = icol(nuni0*1000,nbi0*1000,ntri0*1000)
+  		nstart = nstart0/n
+  		nend = nend0/n 
+  		order = 1
+  		blockSize = 100000
+  		eltsPerSample = 40
+  		lookahead = 3
+  	}
+  	new SFilesDataSource(opts)
+  }
+   
+  def twitterWordBlend(
+  		nstart0:Int = FilesDataSource.encodeDate(2012,3,1,0),
+  		nend0:Int = FilesDataSource.encodeDate(2013,7,1,0),
+  		n:Int = 1,
+  		i:Int = 0,
+  		nfeats:Int = 10000) = {  
+    val ds1 = twitterWords(nstart0, nend0, n, i, nfeats)
+    val ds2 = twitterSmileyWords(nstart0, nend0, n, i, nfeats)
+    if (n > 1) {
+    	ds1.opts.lookahead = 2
+    	ds2.opts.lookahead = 2
+    }
+    val opts3 = new BlendedDataSource.Options
+    new BlendedDataSource(ds1, ds2, 0.5f, 1f, 1f, opts3)
+  }
+  
+  def twitterNgramBlend( 
+  		nstart0:Int = FilesDataSource.encodeDate(2012,3,1,0),
+  		nend0:Int = FilesDataSource.encodeDate(2013,7,1,0),
+  		n:Int = 1,
+  		i:Int = 0,
+  		nuni0:Int = 50,
+  		nbi0:Int = 100,
+  		ntri0:Int = 200) = {
+    val ds1 = twitterNgrams(nstart0, nend0, n, i, nuni0, nbi0, ntri0)
+    val ds2 = twitterSmileyNgrams(nstart0, nend0, n, i, nuni0, nbi0, ntri0)
+    if (n > 1) {
+    	ds1.opts.lookahead = 2
+    	ds2.opts.lookahead = 2
+    }
+    val opts3 = new BlendedDataSource.Options
+    new BlendedDataSource(ds1, ds2, 0.7f, 1f, 1f, opts3)
+  }
+  
+  def testSources(nthreads:Int=4,ff:(Int,Int,Int,Int,Int)=>DataSource = twitterWords, nfeats:Int=100000):IMat = { 
+  	val nstart0 = FilesDataSource.encodeDate(2012,3,22,0)
+    val nend0 = FilesDataSource.encodeDate(2013,7,1,0)
+    var bytes = 0L
+    var done = 0L
+    var step = 10000000000L
+    var stop = izeros(1,1)
+    tic
+    for (i <- 0 until nthreads) { 
+      scala.actors.Actor.actor { 
+        val ss = ff(nstart0, nend0, nthreads, i, nfeats)
+        ss.init
+        while (ss.hasNext && stop.v != 1) { 
+        	val a = ss.next
+        	bytes += 12L*a(0).nnz
+        	if (bytes > done + step) { 
+        		done = (bytes/step)*step
+        		val t=toc
+        		println("GB=%4.2f, t=%4.2f, MB/s=%4.2f" format (bytes/1e9, t, bytes/t/1e6))
+        	}
+        }
+        val t = toc
+        println("Thread %d done, GB=%4.2f, t=%4.2f, MB/s=%4.2f" format (i, bytes/1e9, t, bytes/t/1e6))
+      }
+    }
+  	stop
+  }
+}
+
diff --git a/src/main/scala/BIDMach/Distance.scala b/src/main/scala/BIDMach/Distance.scala
new file mode 100644
index 00000000..ec94dda8
--- /dev/null
+++ b/src/main/scala/BIDMach/Distance.scala
@@ -0,0 +1,125 @@
+package BIDMach
+import BIDMat.{Mat,BMat,CMat,DMat,FMat,IMat,HMat,GMat,GIMat,GSMat,SMat,SDMat}
+import BIDMat.MatFunctions._
+import BIDMat.SciFunctions._
+
+abstract class Distance(val opts:Distance.Options = new Distance.Options) {
+
+  def dists(a:FMat):FMat
+  def dists(a:FMat, b:FMat):FMat
+
+}
+
+object Distance {
+  class Options {
+          
+  }
+}
+
+
+//Euclidean Distance
+class euclidDistance(override val opts:euclidDistance.Options = new euclidDistance.Options) extends Distance(opts) { 
+  
+  override def dists(a:FMat):FMat = {
+    val dd = if (Mat.hasCUDA > 0) a xTG a else a xT a;
+    val d1 = getdiag(dd)
+    dd ~ dd * 2.0f
+    dd ~ d1 - dd
+    dd ~ dd + (d1.t)
+    max(dd, 0f, dd)
+    sqrt(dd, dd)
+    dd
+  }
+
+  override def dists(a:FMat, b:FMat):FMat = {
+    val aa = getdiag(if (Mat.hasCUDA > 0) a xTG a else a xT a);
+    val bb = getdiag(if (Mat.hasCUDA > 0) b xTG b else b xT b);
+    val ab = if (Mat.hasCUDA > 0) a xTG b else a xT b;
+    ab ~ ab * 2.0f
+    ab ~ aa - ab + (bb.t)
+    max(ab, 0f, ab)
+    sqrt(ab, ab)
+
+    ab
+  }
+}
+
+object euclidDistance  {
+  class Options extends Distance.Options {
+
+  }
+}
+
+
+//Cosangle Distance
+class cosangleDistance(override val opts:cosangleDistance.Options = new cosangleDistance.Options) extends Distance(opts) { 
+  
+  override def dists(a:FMat):FMat = {
+    val dd = if (Mat.hasCUDA > 0) a xTG a else a xT a;
+    var d1 = getdiag(dd)
+    sqrt(d1, d1)
+    d1 =  if (Mat.hasCUDA > 0) 0.0f+(d1 xTG d1) else 0.0f+(d1 xT d1);
+    dd ~ 1 - dd / d1
+    dd
+  }
+
+  override def dists(a:FMat, b:FMat):FMat = {
+    val aa = getdiag(if (Mat.hasCUDA > 0) a xTG a else a xT a);
+    val bb = getdiag(if (Mat.hasCUDA > 0) b xTG b else b xT b);
+    val ab = if (Mat.hasCUDA > 0) a xTG b else a xT b;
+  
+    sqrt(aa, aa)
+    sqrt(bb, bb)
+
+    val dd = if (Mat.hasCUDA > 0) 0.0f+(aa xTG bb) else 0.0f+(aa xT bb);
+    ab ~ 1 - ab / dd
+    ab
+  }
+}
+
+object cosangleDistance  {
+  class Options extends Distance.Options {
+
+  }
+}
+
+
+//Correlation Distance
+class correlationDistance(override val opts:correlationDistance.Options = new correlationDistance.Options) extends Distance(opts) { 
+  
+  override def dists(a:FMat):FMat = {
+    val mu = mean(a,1)
+    a ~ a - mu
+    val dd = if (Mat.hasCUDA > 0) a xTG a else a xT a;
+    var d1 = getdiag(dd)
+    sqrt(d1, d1)
+    d1 =  if (Mat.hasCUDA > 0) 0+(d1 xTG d1) else 0+(d1 xT d1);
+    dd ~ 1 - dd / d1
+    dd
+  }
+
+  override def dists(a:FMat, b:FMat):FMat = {
+    val mua = mean(a,1)
+    a ~ a - mua
+    val mub = mean(b,1)
+    b ~ b - mub
+
+    val aa = getdiag(if (Mat.hasCUDA > 0) a xTG a else a xT a);
+    val bb = getdiag(if (Mat.hasCUDA > 0) b xTG b else b xT b);
+    val ab = if (Mat.hasCUDA > 0) a xTG b else a xT b;
+  
+    sqrt(aa, aa)
+    sqrt(bb, bb)
+
+    val dd = if (Mat.hasCUDA > 0) 0.0f+(aa xTG bb) else 0.0f+(aa xT bb);
+    ab ~ 1 - ab / dd
+    ab
+  }
+}
+
+object correlationDistance  {
+  class Options extends Distance.Options {
+
+  }
+}
+
diff --git a/src/main/scala/BIDMach/Experiments.scala b/src/main/scala/BIDMach/Experiments.scala
new file mode 100755
index 00000000..82e30a34
--- /dev/null
+++ b/src/main/scala/BIDMach/Experiments.scala
@@ -0,0 +1,264 @@
+package BIDMach
+import BIDMat.{Mat,BMat,CMat,CSMat,Dict,DMat,FMat,IDict,IMat,HMat,GMat,GIMat,GSMat,SMat,SDMat}
+import BIDMat.MatFunctions._
+import BIDMat.SciFunctions._
+import scala.actors._
+import java.io._
+
+
+object Twitter { 
+  
+   def dodicts(threshold:Int=10, rebuild:Boolean=false):Unit = {
+  		 val stokdir = "/twitter/smiley/tokenized/"
+    	 val tokdir = "/twitter/tokenized/"
+  		 val dy1 = mergedicts(2011, 2013, "/disk%02d" + stokdir, "/big" + stokdir, threshold, rebuild)
+  		 val dy2 = mergedicts(2011, 2013, "/disk%02d" + tokdir, "/big" + tokdir, threshold, rebuild)
+  		 val dy = Dict.union(dy1, dy2)
+  		 val (sv, iv) = sortdown2(dy.counts)
+  		 HMat.saveBMat("/big"+tokdir+"alldict.gz", BMat(dy.cstr(iv)))
+  		 HMat.saveDMat("/big"+tokdir+"allwcount.gz", sv)
+	}
+  
+	def mergedicts(year1:Int, year2:Int, infname:String, outfname:String, threshold:Int=10, rebuild:Boolean=false):Dict = {
+  	val dd = new Array[Dict](6)
+  	val md = new Array[Dict](6)
+  	val yd = new Array[Dict](5)
+  	var dy:Dict = null
+  	var nmerged = 0
+	  for (yy <- year1 to year2) {
+	  	for (mm <- 1 to 12) {
+	  		print("\n%d/%02d" format (yy, mm))
+	  		val ff = new File(outfname + "%04d/%02d/wcount.gz" format (yy, mm))
+	  		if (rebuild || ! ff.exists) {
+	  			var ndone = 0
+	  			for (id <- 1 to 31) {
+	  				var ielem = 372*yy + 31*mm + id
+	  				var idisk = ielem % 16
+	  				val fname = (infname + "%04d/%02d/%02d/" format (idisk, yy, mm, id))
+	  				val ff = new File(fname + "wcount.gz")
+	  				if (ff.exists) {
+	  					val bb = HMat.loadBMat(fname + "dict.gz")
+	  					val cc = HMat.loadIMat(fname + "wcount.gz")
+	  					dd(ndone % 6) = Dict(bb, cc, threshold)
+	  					ndone = ndone + 1
+	  					print("-")
+	  					if (ndone % 6 == 0) {
+	  						md(ndone / 6 - 1) = Dict.union(dd:_*)
+	  						print("+")
+	  					}
+	  				}
+	  			}
+	  			if (ndone % 6 != 0) {
+	  				md(ndone / 6) = Dict.union(dd.slice(0, ndone % 6):_*)
+	  				print("+")
+	  			}
+	  			if (ndone > 0) {
+	  				val dx = Dict.union(md.slice(0, (ndone-1)/6+1):_*)
+	  				val (sv, iv) = sortdown2(dx.counts)
+	  				val dxx = Dict(dx.cstr(iv), sv)
+	  				HMat.saveBMat(outfname + "%04d/%02d/dict.gz" format (yy, mm), BMat(dxx.cstr))
+	  				HMat.saveDMat(outfname + "%04d/%02d/wcount.gz" format (yy, mm), dxx.counts)
+	  			}
+//	  			println("")
+	  		}
+	  		val f2 = new File(outfname + "%04d/%02d/wcount.gz" format (yy, mm))
+	  		if (f2.exists) {
+	  			val bb = HMat.loadBMat(outfname + "%04d/%02d/dict.gz" format (yy, mm))
+	  			val cc = HMat.loadDMat(outfname + "%04d/%02d/wcount.gz" format (yy, mm))
+	  			yd(nmerged % 5) = Dict(bb, cc, 4*threshold)
+	  			nmerged += 1
+	  			print("*")
+	  			if (nmerged % 5 == 0) {
+	  			  val dm = Dict.union(yd:_*)
+	  			  if (nmerged == 5) {
+	  			    dy = dm
+	  			  } else {
+	  			  	dy = Dict.union(dy, dm)
+	  			  }
+	  			}
+	  		}
+	  	}
+	  }
+  	if (nmerged % 5 != 0) {
+  		val dm = Dict.union(yd.slice(0, nmerged % 5):_*)
+  		dy = Dict.union(dy, dm)
+  	}
+  	println
+  	val (sv, iv) = sortdown2(dy.counts)
+  	val dyy = Dict(dy.cstr(iv), sv)
+  	HMat.saveBMat(outfname + "dict.gz", BMat(dyy.cstr))
+  	HMat.saveDMat(outfname + "wcount.gz", dyy.counts)
+  	dyy
+	}
+	
+	def getDict = {
+  	val bd = loadBMat("/big/twitter/tokenized/alldict.gz")
+    val bc = loadDMat("/big/twitter/tokenized/allwcount.gz")
+    Dict(bd, bc)
+	}
+	
+  def getBiDict = {
+  	val bd = loadIMat("/big/twitter/tokenized/allbdict.lz4")
+    val bc = loadDMat("/big/twitter/tokenized/allbcnts.lz4")
+    IDict(bd, bc)
+	}
+  
+  def getTriDict = {
+  	val bd = loadIMat("/big/twitter/tokenized/alltdict.lz4")
+    val bc = loadDMat("/big/twitter/tokenized/alltcnts.lz4")
+    IDict(bd, bc)
+	}
+  
+  def junk:CSMat = {
+    csrow("<id>", "</id>", "<status>", "</status>", "<user>", "</user>", "<created_at>",
+        "</created_at>", "<screen_name>", "</screen_name>", "<lang>", "</lang>", "<statuses_count>", "</statuses_count>",
+        "<followers_count>", "</followers_count>", "<friends_count>", "</friends_count>", "<favorites_count>", "</favorites_count>" +
+        "<listed_count>", "</listed_count>", "<location>", "</location>", "<text>", "</text>", "<url>", "</url>", 
+        "<in_reply_to_user_id>", "</in_reply_to_user_id>", "<in_reply_to_screen_name>", "</in_reply_to_screen_name>",
+        "<in_reply_to_status_id>", "</in_reply_to_status_id>", "<retweet>", "</retweet>", "<retweet_count>", "</retweet_count>",
+        "<type>", "</type>", "<name>", "</name>", "<full_name>", "</full_name>", "<country>", "</country>", "<place>", "</place>",
+        "<country_code>", "</country_code>", "<bounding_box>", "</bounding_box>", "<coordinates>", "</coordinates>", 
+        "http", "https", "apos", "kml", "amp", "www", "quot", "id", "latitude", "longitude", "latlonbox", "geo", "json")
+  }
+	
+	def findEmoticons(n:Int, dd:Dict) = {
+    val smiles = csrow(":-)", ":)", ":o)", ":]", ":3", ":c)", ":>", "=]", "8)", "=)", ":}", ":^)", ":っ)")
+    val laughs = csrow(":-d", ":d", "8-d", "8d", "x-d", "xd", "x-x", "=-d", "=d", "=-3", "=3", "b^d")
+    val frowns = csrow(">:[", ":-(", ":(", "", ":-c", ":c", ":-<", "", ":っc", ":<", ":-[", ":[", ":{")
+    val angry = csrow(":-||", ":@", ">:(")
+    val crying = csrow(":'-(", ":'(", "qq")
+    val horror = csrow("d:<", "d:", "d8", "d;", "d=", "dx", "v.v", "d-':")
+    val surprise = csrow(">:o", ":-o", ":o", "°o°", "°o°", ":o", "o_o", "o_0", "o.o", "8-0")
+    val wink = csrow(";-)", ";)", "*-)", "*)", ";-]", ";]", ";d", ";^)", ":-,")
+    val all = List(smiles, laughs, frowns, angry, crying, horror, surprise, wink, junk)
+    val out = zeros(all.length, n)
+    for (i <- 0 until all.length) {
+      val mm = all(i)
+      var j = 0
+      while (j < mm.length) {
+        val k = dd(mm(j))
+        if (k >= 0 && k < n) out(i, k) = 1
+        j += 1
+      }      
+    }
+    out    
+	}
+	
+	def getGramDict(nuni0:Int=50, nbi0:Int=100, ntri0:Int=200, rebuild:Boolean=false):Dict = {
+	  val nuni = nuni0 * 1000
+	  val nbi = nbi0 * 1000
+	  val ntri = ntri0 * 1000
+	  val fname = "/big/twitter/tokenized/dict_%d_%d_%d" format (nuni0, nbi0, ntri0)
+	  if (!rebuild && (new File(fname + "_bmat.lz4").exists) && (new File(fname + "_dmat.lz4").exists)) {
+	    val bm = loadBMat(fname + "_bmat.lz4")
+	    val dm = loadDMat(fname + "_dmat.lz4")
+	    Dict(bm, dm)
+	  } else {
+	  	val ud = getDict
+	  	val bd = getBiDict
+	  	val td = getTriDict
+	  	val dd = IDict.gramDict(nuni, nbi, ntri, ud, bd, td)
+	  	saveBMat(fname + "_bmat.lz4", BMat(dd.cstr))
+	  	saveDMat(fname + "_dmat.lz4", dd.counts)
+	  	dd
+	  }
+	}
+	
+	def getEmoticonMap(nuni0:Int=50, nbi0:Int=100, ntri0:Int=200, rebuild:Boolean=false):FMat = {
+	   val nuni = nuni0 * 1000
+	   val nbi = nbi0 * 1000
+	   val ntri = ntri0 * 1000
+  	 val fname = "/big/twitter/tokenized/dict_%d_%d_%d" format (nuni0, nbi0, ntri0)
+  	 if (!rebuild && (new File(fname + "_emos.lz4").exists)) {
+  	   loadFMat(fname + "_emos.lz4")
+  	 } else {
+  		 val ud = getDict
+  		 val bdt = getBiDict.grams(0->nbi,?)
+  		 val tdt = getTriDict.grams(0->ntri,?)
+  		 val em = findEmoticons(1 + maxi(irow(nuni) \ maxi(bdt) \ maxi(tdt)).v, ud)
+  		 val bv = zeros(em.nrows, nbi)
+  		 val tv = zeros(em.nrows, ntri)
+  		 for (i <- 0 until em.nrows) {
+  			 bv(i, ?) = max(em(i, bdt(?, 0)), em(i, bdt(?, 1)))
+  			 tv(i, ?) = max(em(i, tdt(?, 0)), max(em(i, tdt(?, 1)), em(i, tdt(?, 2))))
+  		 }
+  		 val emos = em(?, 0->nuni) \ bv(?, 0->nbi) \ tv(?, 0->ntri)
+  		 saveFMat(fname + "_emos.lz4", emos)
+  		 emos
+  	 }
+	}
+	
+	def logisticModelPar(
+	    nstart0:Int = FilesDataSource.encodeDate(2012,3,1,0),
+			nend0:Int = FilesDataSource.encodeDate(2013,7,1,0),
+			nuni0:Int = 50,
+			nbi0:Int = 100,
+			ntri0:Int = 200		
+			) = {
+	  val ds = SFilesDataSource.twitterNgramBlend(nstart0, nend0)
+//	  val ds = SFilesDataSource.twitterWords(nstart0, nend0)
+	  ds.opts.addConstFeat = true
+	  ds.opts.featType = 0
+	  val gd = getGramDict(nuni0, nbi0, ntri0)
+	  val em = getEmoticonMap(nuni0, nbi0, ntri0)
+	  val nfeats = gd.length + 1
+	  val mask = (sum(em) == 0f) \ 1
+//	  val targets = em(0->(em.nrows-1), ?) \ zeros(em.nrows-1,1)
+	  val targets = em(0->1, ?) \ 0
+	  val ntargets = targets.nrows
+	  val exptsv = col(0.5, 0.6, 0.7, 0.8, 0.9, 1.0)
+	  val exptst = col(0.5, 0.6, 0.7, 0.8, 0.9, 1.0)
+//	  val expts = col(0.5)
+	  val avalues = col(0.1f, 1f, 10f)
+	  val expts1 = ones(avalues.length*ntargets, 1) ⊗ exptsv ⊗ ones(exptst.length, 1)
+	  val expts2 = ones(avalues.length*exptsv.length*ntargets, 1) ⊗ exptst 
+	  val alphas = ones(ntargets, 1) ⊗ avalues ⊗ ones(exptst.length*exptsv.length, 1)
+	  val aopts = new ADAGradUpdater.Options
+	  aopts.vecExponent = expts1
+	  aopts.timeExponent = expts2
+	  aopts.alpha = alphas
+	  aopts.mask = mask
+	  val gopts = new GLMmodel.Options
+	  gopts.links = iones(expts1.length, 1)
+	  gopts.mask = mask
+	  gopts.targmap = mkdiag(ones(ntargets, 1)) ⊗ ones(expts1.length/ntargets, 1)
+	  gopts.targets = targets
+  	new LearnFParModelx(ds, gopts, GLMmodel.mkGLMmodel _, aopts, GLMmodel.mkUpdater _)	  
+	}
+	
+	def logisticModel(
+	    mat:SMat,
+	    ntargs:Int = 1,
+	    exptsv:FMat = col(0.4, 0.5, 0.6),
+	    exptst:FMat = col(0.4, 0.5, 0.6),
+	    avalues:FMat = col(0.1, 0.3, 1),
+			nuni0:Int = 50,
+			nbi0:Int = 100,
+			ntri0:Int = 200		
+			) = { 
+	  val ds = new MatDataSource(Array(mat:Mat))
+	  val gd = getGramDict(nuni0, nbi0, ntri0)
+	  val em = getEmoticonMap(nuni0, nbi0, ntri0)
+	  val nfeats = gd.length + 1
+	  val mask = (sum(em) == 0f) \ 1
+	  val targets0 = em(0->(em.nrows-1), ?) \ zeros(em.nrows-1,1)
+	  val targets = targets0(0->ntargs, ?)
+	  val ntargets = targets.nrows
+	  val expts1 = ones(avalues.length*ntargets, 1) ⊗ exptsv ⊗ ones(exptst.length, 1)
+	  val expts2 = ones(avalues.length*exptsv.length*ntargets, 1) ⊗ exptst 
+	  val alphas = ones(ntargets, 1) ⊗ avalues ⊗ ones(exptst.length*exptsv.length, 1)
+	  val aopts = new ADAGradUpdater.Options
+	  aopts.vecExponent = expts1
+	  aopts.timeExponent = expts2
+	  aopts.alpha = alphas
+	  aopts.mask = mask
+	  val gopts = new GLMmodel.Options
+	  gopts.links = iones(expts1.length, 1)
+	  gopts.mask = mask
+	  gopts.targmap = mkdiag(ones(ntargets, 1)) ⊗ ones(expts1.length/ntargets, 1)
+	  gopts.targets = targets
+  	Learner(ds, new GLMmodel(gopts), null, new ADAGradUpdater(aopts))	  
+	}
+	
+
+}
\ No newline at end of file
diff --git a/src/main/scala/BIDMach/FactorModel.scala b/src/main/scala/BIDMach/FactorModel.scala
new file mode 100755
index 00000000..f9c7df63
--- /dev/null
+++ b/src/main/scala/BIDMach/FactorModel.scala
@@ -0,0 +1,76 @@
+package BIDMach
+
+import BIDMat.{Mat,BMat,CMat,DMat,FMat,IMat,HMat,GMat,GIMat,GSMat,SMat,SDMat}
+import BIDMat.MatFunctions._
+import BIDMat.SciFunctions._
+
+abstract class FactorModel(override val opts:FactorModel.Options) extends Model(opts) {
+  
+  override def init(datasource:DataSource) = {
+    super.init(datasource)
+    val data0 = mats(0)
+    val m = size(data0, 1)
+    val d = opts.dim
+    val sdat = (sum(data0,2).t + 1.0f).asInstanceOf[FMat]
+    val sp = sdat / sum(sdat)
+    println("initial perplexity=%f" format math.exp(- (sp ddot ln(sp))) )
+    
+    val modelmat = rand(d,m) 
+    modelmat ~ modelmat *@ sdat
+    val msum = sum(modelmat, 2)
+    modelmat ~ modelmat / msum
+    modelmats = new Array[Mat](1)
+    modelmats(0) = if (opts.useGPU && Mat.hasCUDA > 0) GMat(modelmat) else modelmat
+    datasource.reset
+    
+    if (mats.size > 1) {
+      while (datasource.hasNext) {
+        mats = datasource.next
+        val dmat = mats(1)
+        dmat.set(1.0f/d)
+        datasource.putBack(mats,1)
+      }
+    }
+  } 
+  
+  def reuseuser(a:Mat):Mat = {
+    val out = a match {
+      case aa:SMat => FMat.newOrCheckFMat(opts.dim, a.ncols, null, a.GUID, "reuseuser".##)
+      case aa:GSMat => GMat.newOrCheckGMat(opts.dim, a.ncols, null, a.GUID, "reuseuser".##)
+    }
+    out.set(1f)
+    out
+  }
+  
+  def uupdate(data:Mat, user:Mat)
+  
+  def mupdate(data:Mat, user:Mat)
+  
+  def mupdate2(data:Mat, user:Mat) = {}
+  
+  def evalfun(data:Mat, user:Mat):FMat
+  
+  def doblock(gmats:Array[Mat], i:Long) = {
+    val sdata = gmats(0)
+    val user = if (gmats.length > 1) gmats(1) else reuseuser(gmats(0))
+    uupdate(sdata, user)
+    mupdate(sdata, user)
+  }
+  
+  def evalblock(mats:Array[Mat]):FMat = {
+    val sdata = gmats(0)
+    val user = if (gmats.length > 1) gmats(1) else reuseuser(gmats(0))
+    uupdate(sdata, user)
+    evalfun(sdata, user)
+  } 
+}
+
+object FactorModel { 
+  class Options extends Model.Options { 
+    var uiter = 8
+    var weps = 1e-10f
+    var minuser = 1e-8f
+  }
+} 
+
+
diff --git a/src/main/scala/BIDMach/Featurizer.scala b/src/main/scala/BIDMach/Featurizer.scala
new file mode 100755
index 00000000..f5946d76
--- /dev/null
+++ b/src/main/scala/BIDMach/Featurizer.scala
@@ -0,0 +1,627 @@
+package BIDMach
+import BIDMat.{Mat,BMat,CMat,CSMat,Dict,DMat,FMat,GMat,GIMat,GSMat,HMat,IDict,IMat,SMat,SDMat}
+import BIDMat.MatFunctions._
+import BIDMat.SciFunctions._
+import scala.actors._
+import scala.annotation.switch
+import java.io._
+
+class Featurizer(val opts:Featurizer.Options = new Featurizer.Options) {
+  
+  var alldict:Dict = null
+  var allbdict:IDict = null
+  var alltdict:IDict = null
+  	
+  def mergeDicts(rebuild:Int,dictname:String="dict.gz",wcountname:String="wcount.gz"):Dict = {
+    val dd = new Array[Dict](5)                                                // Big enough to hold log2(days per month)
+  	val nmonths = 2 + (opts.nend - opts.nstart)/31
+  	val md = new Array[Dict](1+(math.log(nmonths)/math.log(2)).toInt)          // Big enough to hold log2(num months)
+  	println("Building monthly dicts for "+opts.thisDir)
+	  for (d <- opts.nstart to opts.nend) {                                      // Conditional on rebuild, merge the dictionaries for each month
+	    val (year, month, day) = Featurizer.decodeDate(d)
+	  	val fm = new File(opts.fromMonthDir(d) + wcountname)
+	    if (rebuild > 1 || ! fm.exists) {
+	    	val fd = new File(opts.fromDayDir(d) + wcountname)
+	    	if (fd.exists) {
+	    		val bb = loadBMat(opts.fromDayDir(d) + dictname)
+	    		val cc = loadIMat(opts.fromDayDir(d) + wcountname)
+	    		Dict.treeAdd(Dict(bb, cc, opts.threshold), dd)
+	    		print(".")
+	    	}
+	    	if (day == 31) {	    	  
+	    		val dx = Dict.treeFlush(dd)
+	    		if (dx != null) {
+	    			val (sv, iv) = sortdown2(dx.counts)
+	    			val dxx = Dict(dx.cstr(iv), sv)
+	    			val fd = new File(opts.fromMonthDir(d))
+      		  if (!fd.exists) fd.mkdirs
+	    			saveBMat(opts.fromMonthDir(d)+dictname, BMat(dxx.cstr))
+	    			saveDMat(opts.fromMonthDir(d)+wcountname, dxx.counts)
+	    			println("%04d-%02d" format (year,month))
+	    		}
+	    	}
+	    }
+	  }
+    if (rebuild > 0) {
+    	println("Merging monthly dicts for "+opts.thisDir)
+    	for (d <- opts.nstart to opts.nend) {                                      // Conditionally merge all monthly dictionaries
+    		val (year, month, day) = Featurizer.decodeDate(d)
+    		if (day == 31) {
+    			val fm = new File(opts.fromMonthDir(d) + wcountname)
+    			if (fm.exists) {
+    				val bb = loadBMat(opts.fromMonthDir(d) + dictname)
+    				val cc = loadDMat(opts.fromMonthDir(d) + wcountname)
+    				Dict.treeAdd(Dict(bb, cc, 4*opts.threshold), md)
+    				println("%04d-%02d" format (year,month))
+    			}
+    		}
+    	}
+    	println
+    	val dy = Dict.treeFlush(md)                                                // Get merged dictionary, sort by counts descending
+    	val (sv, iv) = sortdown2(dy.counts)
+    	val dyy = Dict(dy.cstr(iv), sv)
+    	saveBMat(opts.thisDir + dictname, BMat(dyy.cstr))
+    	saveDMat(opts.thisDir + wcountname, dyy.counts)
+    	dyy
+    } else {
+      Dict(loadBMat(opts.thisDir + dictname), loadDMat(opts.thisDir + wcountname))
+    }
+	}
+  
+  def mergeIDicts(rebuild:Int = 0, dictname:String="bdict.lz4", wcountname:String="bcnts.lz4", mapit:Boolean=true):IDict = {
+    println("Building monthly IDicts for " + opts.thisDir + " " + dictname)
+    if (alldict == null) alldict = Dict(loadBMat(opts.mainDict))
+  	val dd = new Array[IDict](5)                                               // Big enough to hold log2(days per month)
+  	val nmonths = 2 + (opts.nend - opts.nstart)/31
+  	val md = new Array[IDict](1+(math.log(nmonths)/math.log(2)).toInt)         // Big enough to hold log2(num months)
+  	var dy:IDict = null
+  	var mdict:Dict = null                                                     
+  	var domonth:Boolean = false
+  	var lastmonth = 0
+	  for (d <- opts.nstart to opts.nend) {
+	    val (year, month, day) = Featurizer.decodeDate(d)
+	    if (month != lastmonth) {
+	      val dfname = opts.fromMonthDir(d) + opts.localDict
+	      if (fileExists(dfname)) {
+	      	mdict = Dict(loadBMat(dfname))                                       // Load token dictionary for this month
+	      	val fm = new File(opts.fromMonthDir(d) + wcountname)                 // Did we process this month?
+	      	domonth = rebuild > 1 || !fm.exists
+	      } else {
+	        mdict = null
+	        domonth = false
+	      }
+	    	lastmonth = month
+	    }
+	    if (domonth) {
+	    	val fd = new File(opts.fromDayDir(d) + wcountname)
+	    	if (fd.exists) {
+	    		val bb = loadIMat(opts.fromDayDir(d) + dictname)                     // Load IDict info for this day
+	    		val cc = loadDMat(opts.fromDayDir(d) + wcountname)
+
+// Kludge to deal with (old) scanner problem
+	    		val ig = find(maxi(bb, 2) < 0x7fffffff)
+	    		val bb2 = bb(ig, ?)
+	    		val bm = if (mapit) {
+	    			val dict = Dict(loadBMat(opts.fromDayDir(d) + opts.localDict))     // Load token dictionary for this day
+	    			val map = dict --> mdict                                           // Map from this days tokens to month dictionary
+	    			map(bb2) // Map the ngrams
+	    		} else {
+	    		  bb2
+	    		}
+	    		val cc2 = cc(ig,0)
+// Done kludge
+	    		val igood = find(mini(bm, 2) >= 0)                                   // Find the good ones
+	    		val bg = bm(igood,?)
+	    		val cg = cc2(igood)
+	    		val ip = icol(0->igood.length)
+	    		sortlexInds(bg, ip)                                                  // lex sort them
+	    		IDict.treeAdd(IDict(bg, cg(ip), opts.threshold), dd)                 // accumulate them
+	    		print(".")
+	    	}
+	    	if (day == 31) {	    	                                               // On the last day, save the accumulated results
+	    		val dx = IDict.treeFlush(dd)
+	    		if (dx != null) {
+	  				saveIMat(opts.fromMonthDir(d)+dictname, dx.grams)
+	  				saveDMat(opts.fromMonthDir(d)+wcountname, dx.counts)
+	  			}
+	    		println("%04d-%02d" format (year,month))
+	    	}
+	    }
+	  }
+    if (rebuild > 0) {
+    	println("Merging monthly IDicts for " + opts.thisDir)
+    	for (d <- opts.nstart to opts.nend) {
+    		val (year, month, day) = Featurizer.decodeDate(d)
+    		if (day == 31) {                                                         // Conditionally accumulate monthly dicts
+    			val dfname = opts.fromMonthDir(d) + opts.localDict
+    			if (fileExists(dfname) || ! mapit) {
+    				mdict = if (mapit) Dict(loadBMat(dfname)) else null
+    				val fm = new File(opts.fromMonthDir(d) + wcountname)
+    				if (fm.exists) {
+    					val bb = HMat.loadIMat(opts.fromMonthDir(d) + dictname)              // Load the IDict data for this month
+    					val cc = HMat.loadDMat(opts.fromMonthDir(d) + wcountname)
+    					val bm = if (mapit) {
+    						val map = mdict --> alldict
+    					  map(bb)                                 // Map to global token dictionary
+    					}  else bb 
+    					val igood = find(mini(bm, 2) >= 0)                                   // Save the good stuff
+    					val bg = bm(igood,?)
+    					val cg = cc(igood)
+    					val ip = icol(0->igood.length)
+    					sortlexInds(bg, ip)
+    					IDict.treeAdd(IDict(bg, cg(ip), 4*opts.threshold), md)
+    					println("%04d-%02d" format (year,month))
+    				}
+    			}
+    		}
+    	}
+    	dy = IDict.treeFlush(md)                                                   // Final dictionary for the time period
+    	println
+    	val (sv, iv) = sortdown2(dy.counts)                                        // Sort down by ngram frequency
+    	val dyy = IDict(dy.grams(iv,?), sv)
+    	saveIMat(opts.thisDir + dictname, dyy.grams)
+    	saveDMat(opts.thisDir + wcountname, dyy.counts)
+    	dy                                                                         // Return the lex-sorted dictionary
+    } else {
+      val gyy = loadIMat(opts.thisDir + dictname)
+      val cyy = loadDMat(opts.thisDir + wcountname)
+      val iperm = icol(0->cyy.length)
+      sortlexInds(gyy, iperm)
+      IDict(gyy, cyy(iperm))
+    }
+	}
+  
+ 
+  def mkIDicts(rebuild:Int, scanner:Scanner=TwitterScanner) = {      // Build ngram dictionaries for each day
+    val nthreads = math.min(opts.nthreads, math.max(1, Mat.hasCUDA))
+    println("Building daily IDicts")
+    val done = izeros(nthreads,1)
+    for (ithread <- 0 until nthreads) {
+      Actor.actor {
+        if (Mat.hasCUDA > 0) setGPU(ithread+Mat.hasCUDA-nthreads)
+      	val bigramsx = IMat(opts.guessSize, 3)                                 // Temp storage for grams
+      	val trigramsx = IMat(opts.guessSize, 4)
+      	val useridsx = IMat(opts.guessSize/10, 2)
+      	val bdicts = new Array[IDict](5)                                       // Trees to hold partial merges
+      	val tdicts = new Array[IDict](5)
+      	val udicts = new Array[IDict](5)
+
+      	for (d <- (opts.nstart+ithread) to opts.nend by nthreads) {
+      		val (year, month, day) = Featurizer.decodeDate(d)
+      		val fname = opts.fromDayDir(d)+opts.localDict
+      		val fnew = opts.fromDayDir(d)+opts.usrCnts                           // Check if the userid dictionary was built yet
+      		if (fileExists(fname) && (rebuild > 1 || !fileExists(fnew))) {
+      			val dict = Dict(loadBMat(fname))                                   // load token dictionary for this day
+      			for (ifile <- 0 until 24) { 
+      				val fn = opts.fromDayDir(d)+opts.fromFile(ifile)
+      				if (fileExists(fn)) {
+      					val idata = loadIMat(fn)
+      					val (nuni, nbi, ntri, nusers) = scanner.scan(opts, dict, idata, null, bigramsx, trigramsx, useridsx)
+      					val bigrams = bigramsx(0->nbi, 0->2) 
+      					val bid = if (nbi > 0) IDict.dictFromData(bigrams) else null
+      					val trigrams = trigramsx(0->ntri, 0->3)
+      					val trid = if (ntri > 0) IDict.dictFromData(trigrams) else null
+      					val userids = useridsx(0->nusers, 0)
+      					val uid = if (nusers > 0) IDict.dictFromData(userids) else null
+      					IDict.treeAdd(bid, bdicts)
+      					IDict.treeAdd(trid, tdicts)    
+      					IDict.treeAdd(uid, udicts)
+      				} 
+      			}
+      			val bf = IDict.treeFlush(bdicts)
+      			val tf = IDict.treeFlush(tdicts)
+      			val uf = IDict.treeFlush(udicts)
+      			saveIMat(opts.fromDayDir(d) + opts.biDict, bf.grams)
+      			saveDMat(opts.fromDayDir(d) + opts.biCnts, bf.counts)
+      			saveIMat(opts.fromDayDir(d) + opts.triDict, tf.grams)
+      			saveDMat(opts.fromDayDir(d) + opts.triCnts, tf.counts)
+      			saveIMat(opts.fromDayDir(d) + opts.usrDict, uf.grams)
+      			saveDMat(opts.fromDayDir(d) + opts.usrCnts, uf.counts)
+      			print(".")
+      		}
+      		if (ithread == 0 && day/nthreads == 31/nthreads) println("%04d-%02d" format (year,month))
+      	}
+        done(ithread,0) = 1
+      }
+    }
+    while (mini(done).v == 0) Thread.`yield`
+  }
+  
+  def mkUniFeats(map:IMat, gramsx:IMat, ng:Int):IMat = {
+  	val unis = map(gramsx(0->ng, 0))
+  	val igood = find(unis >= 0) 
+  	val gg = unis(igood, 0)
+  	val ggn = gramsx(igood, 1)
+    val feats = ggn \ gg
+    sortlex(feats)
+    val (outr, ix, iy) = uniquerows(feats)
+    val fcounts = (ix(1->ix.length, 0) on iy.length) - ix
+    outr \ fcounts 
+  }
+  
+  def mkGramFeats(map:IMat, gramsx:IMat, ng:Int, alldict:IDict):IMat = {
+  	val grams = map(gramsx(0->ng, 0->(gramsx.ncols-1)))
+  	val igood = find(mini(grams, 2) >= 0) 
+  	val gg = grams(igood,?)
+  	val ggn = gramsx(igood, gramsx.ncols-1)
+  	val gmap = IDict(gg) --> alldict
+  	val igood2 = find(gmap >= 0)
+    val feats = ggn(igood2,0) \ gmap(igood2,0)
+    sortlex(feats)
+    val (outr, ix, iy) = uniquerows(feats)
+    val fcounts = (ix(1->ix.length, 0) on iy.length) - ix
+    outr \ fcounts 
+  }
+  
+  def featurize(rebuild:Int, scanner:Scanner=TwitterScanner) = {
+    println("Featurizing in " + opts.thisDir)
+    if (alldict == null) alldict = Dict(HMat.loadBMat(opts.mainDict))
+  	if (allbdict == null) allbdict = IDict(HMat.loadIMat(opts.mainBDict))
+  	if (alltdict == null) alltdict = IDict(HMat.loadIMat(opts.mainTDict))
+  	alldict.makeHash
+  	allbdict.makeSorted
+  	alltdict.makeSorted
+    val nthreads = math.min(opts.nthreads, math.max(1, Mat.hasCUDA))
+    val done = izeros(nthreads,1)
+    for (ithread <- 0 until nthreads) {
+      Actor.actor {
+        if (Mat.hasCUDA > 0) setGPU(ithread+Mat.hasCUDA-nthreads)
+        val unigramsx = IMat(opts.guessSize, 2)
+      	val bigramsx = IMat(opts.guessSize, 3)
+      	val trigramsx = IMat(opts.guessSize, 4)
+      	val userids = IMat(opts.guessSize/10, 2)
+      	for (d <- (opts.nstart+ithread) to opts.nend by nthreads) {
+      		val (year, month, day) = Featurizer.decodeDate(d)
+      		val fdict = opts.fromDayDir(d)+opts.localDict
+      		if (fileExists(fdict)) {
+      			var dict:Dict = null 
+      			var map:IMat = null
+      			val fd = new File(opts.toDayDir(d))
+      		  if (!fd.exists) fd.mkdirs
+      		  for (ifile <- 0 until 24) { 
+      		  	val fn = opts.fromDayDir(d)+opts.fromFile(ifile)
+      		  	val fx = opts.toDayDir(d)+opts.toTriFeats(ifile)
+      		  	if (fileExists(fn) && (rebuild > 0 || !fileExists(fx))) {
+      		  		if (dict == null) {
+      		  			dict = Dict(loadBMat(fdict))
+      		  			map = dict --> alldict
+      		  		}
+      		  		val idata = loadIMat(fn)
+      		  		val (nuni, nbi, ntri, nstatuses) = scanner.scan(opts, dict, idata, unigramsx, bigramsx, trigramsx, userids)
+      		  		val unifeats = mkUniFeats(map, unigramsx, nuni)
+      		  		val bifeats = mkGramFeats(map, bigramsx, nbi, allbdict)
+      		  		val trifeats = mkGramFeats(map, trigramsx, ntri, alltdict)   
+      		  		saveIMat(opts.toDayDir(d) + opts.toUniFeats(ifile), unifeats)
+      		  		saveIMat(opts.toDayDir(d) + opts.toBiFeats(ifile), bifeats)
+      		  		saveIMat(opts.toDayDir(d) + opts.toTriFeats(ifile), trifeats)
+      		  		saveIMat(opts.toDayDir(d) + opts.toUserids(ifile), userids(0->nstatuses, ?))
+      		  		if (ifile == 23) print(".")
+      		  	} 
+      		  }   		
+      		}
+      		if (ithread == 0 && day/nthreads == 31/nthreads) println("%04d-%02d" format (year,month))
+      	}
+        done(ithread,0) = 1
+      }
+    }
+    while (mini(done).v == 0) Thread.`yield`
+  }
+  
+  def fileExists(fname:String) = {
+    val testme = new File(fname)
+    testme.exists
+  }  
+    
+  def loadDicts() = {
+	  if (alldict == null) alldict = Dict(HMat.loadBMat(opts.mainDict))
+	  if (allbdict == null) allbdict = IDict(HMat.loadIMat(opts.mainBDict))
+	  if (alltdict == null) alltdict = IDict(HMat.loadIMat(opts.mainTDict))
+	  val alld = alldict.cstr
+	  val bg = allbdict.grams
+	  val tg = alltdict.grams
+	  val bd = CSMat(bg.nrows,1)
+	  val td = CSMat(tg.nrows,1)
+	  var i = 0
+	  while (i < bg.nrows) {
+	    bd(i) = alld(bg(i,0)) + " " + alld(bg(i,1))
+	    i += 1
+	  }
+	  i = 0
+	  while (i < tg.nrows) {
+	    td(i) = (alld(tg(i,0)) + " " + alld(tg(i,1))) + (" " + alld(tg(i,2)))
+	    i += 1
+	  }
+	  (alld, bd, td)
+  }
+}
+
+object Featurizer {
+  
+  def alloptions = {
+    val ff = new Featurizer
+    val newopts = new Featurizer.Options{
+      override val tokDirName = "twitter/smiley/tokenized/"
+      override val featDirName = "twitter/smiley/featurized/"
+    }
+    val fs = new Featurizer(newopts)
+    (ff,fs)
+  }
+  
+  /*
+   * Rebuild levels:
+   * 0: Incrementally build monthly Dicts and Idicts and featurize any new files. Dont rebuild dictionaries
+   * 1: Rebuild all dictionaries from monthlies, and rebuild all features.
+   * 2: Rebuild everything 
+   */
+      
+  def updateDicts(rebuild:Int=0) = {
+    val (ff,fs) = alloptions    
+    ff.mergeDicts(rebuild)
+    fs.mergeDicts(rebuild)
+    ff.mkIDicts(rebuild)
+    fs.mkIDicts(rebuild) 
+  }
+  
+  def buildAll(rebuild:Int=0) = {
+    buildMainDict(rebuild)
+    buildMainGDicts(rebuild)
+    buildFeatures(rebuild)
+  }
+ 
+  def buildMainDict(rebuild:Int) = {
+  	val (ff,fs) = alloptions    
+    val d1 = ff.mergeDicts(rebuild)
+    val d2 = fs.mergeDicts(rebuild)
+    if (rebuild>0) {
+    	val dd = Dict.union(d1, d2)
+    	val (sc, ic) = sortdown2(dd.counts)
+    	saveBMat(ff.opts.mainDict, BMat(dd.cstr(ic,0)))
+    	saveDMat(ff.opts.mainCounts, sc)
+    }
+  }
+ 
+  def buildMainGDicts(rebuild:Int) = {
+    val (ff, fs) = alloptions
+
+  	val bd1 = ff.mergeIDicts(rebuild)
+  	val bd2 = fs.mergeIDicts(rebuild)
+  	if (rebuild>0) {
+  		val bdd = IDict.merge2(bd1,bd2)
+  		val (sbc, ibc) = sortdown2(bdd.counts)
+  		saveIMat(ff.opts.mainBDict, IMat(bdd.grams(ibc,?)))
+  		saveDMat(ff.opts.mainBCounts, sbc)
+  	}
+  	
+  	val td1 = ff.mergeIDicts(rebuild, "tdict.lz4", "tcnts.lz4")
+  	val td2 = fs.mergeIDicts(rebuild, "tdict.lz4", "tcnts.lz4")
+  	if (rebuild>0) {
+  		val tdd = IDict.merge2(td1,td2)
+  		val (stc, itc) = sortdown2(tdd.counts)
+  		saveIMat(ff.opts.mainTDict, IMat(tdd.grams(itc,?)))
+  		saveDMat(ff.opts.mainTCounts, stc)
+  	}
+    
+  	ff.opts.threshold = 1
+  	fs.opts.threshold = 1
+    val usr1 = ff.mergeIDicts(rebuild, "usrdict.lz4", "usrcnts.lz4", false)
+  	val usr2 = fs.mergeIDicts(rebuild, "usrdict.lz4", "usrcnts.lz4", false)
+  	if (rebuild>0) {
+  		val usr = IDict.merge2(usr1,usr2)
+  		val (usrs, usrc) = sortdown2(usr.counts)
+  		saveIMat(ff.opts.mainUsrDict, IMat(usr.grams(usrc,?)))
+  		saveDMat(ff.opts.mainUsrCounts, usrs)
+  	}
+  }
+  
+  def buildFeatures(rebuild:Int) = {
+    val (ff, fs) = alloptions
+    fs.featurize(rebuild) 
+    ff.featurize(rebuild)
+  }
+  
+  def encodeDate(yy:Int, mm:Int, dd:Int) = (372*yy + 31*mm + dd)
+  
+  def decodeDate(n:Int):(Int, Int, Int) = {
+    val yy = (n - 32) / 372
+    val days = n - 32 - 372 * yy
+    val mm = days / 31 + 1
+    val dd = days - 31 * (mm - 1) + 1
+    (yy, mm, dd)
+  }
+  
+  def dirxMap(fname:String):(Int)=>String = {
+    (n:Int) => {    
+    	val (yy, mm, dd) = decodeDate(n)
+    	(fname format (n % 16, yy, mm, dd))
+    }    
+  }
+  
+  def dirMap(fname:String):(Int)=>String = {
+    (n:Int) => {    
+    	val (yy, mm, dd) = decodeDate(n)
+    	(fname format (yy, mm, dd))
+    }    
+  }
+
+  
+  class Options {
+    val tokDirName = "twitter/tokenized/"
+    val featDirName = "twitter/featurized/"
+    val localDict:String = "dict.gz"
+    val localCount:String = "wcount.gz"
+    val biDict:String = "bdict.lz4"
+    val triDict:String = "tdict.lz4"
+    val usrDict:String = "usrdict.lz4"
+    val biCnts:String = "bcnts.lz4"
+    val triCnts:String = "tcnts.lz4"
+    val usrCnts:String = "usrcnts.lz4"
+    def thisDir = "/big/" + tokDirName
+  	def mainDir = "/big/twitter/tokenized/"
+  	def mainDict:String = mainDir + "all" + localDict
+    def mainCounts:String = mainDir + "all" + localCount
+    def mainBDict:String = mainDir + "all" + biDict
+    def mainBCounts:String = mainDir + "all" + biCnts
+    def mainTDict:String = mainDir + "all" + triDict
+    def mainTCounts:String = mainDir + "all" + triCnts
+    def mainUsrDict:String = mainDir + "all" + usrDict
+    def mainUsrCounts:String = mainDir + "all" + usrCnts
+  	def fromYearDir:(Int)=>String = dirMap(thisDir + "%04d/")
+    def fromMonthDir:(Int)=>String = dirMap(thisDir + "%04d/%02d/")
+    def fromDayDir:(Int)=>String = dirxMap("/disk%02d/" + tokDirName + "%04d/%02d/%02d/")
+    def toDayDir:(Int)=>String = dirxMap("/disk%02d/" + featDirName + "%04d/%02d/%02d/") 
+    var fromFile:(Int)=>String = (n:Int) => ("tweet%02d.gz" format n)
+    var toUniFeats:(Int)=>String = (n:Int) => ("unifeats%02d.lz4" format n)
+    var toBiFeats:(Int)=>String = (n:Int) => ("bifeats%02d.lz4" format n)
+    var toTriFeats:(Int)=>String = (n:Int) => ("trifeats%02d.lz4" format n)
+    var toUserids:(Int)=>String = (n:Int) => ("userids%02d.lz4" format n)
+    var nstart:Int = encodeDate(2011,11,22)
+    var nend:Int = encodeDate(2013,6,31)
+    var threshold = 10
+    var guessSize = 200000000
+    var nthreads = 1
+  }
+}
+
+trait Scanner { 
+	def scan(opts:Featurizer.Options, dict:Dict, idata:IMat, unigramsx:IMat, bigramsx:IMat, trigramsx:IMat, userids:IMat):(Int, Int, Int, Int)
+}
+
+object TwitterScanner extends Scanner {  
+  	final val OutsideStatus  = 0
+		final val InsideStatus   = 1
+		final val InsideUser     = 2
+		final val InsideUserId   = 3
+		final val InsideText     = 4
+		final val InsideRetweet  = 5
+		final val InsideStatusL2 = 6
+		final val InsideUserL2   = 7
+		final val InsideUserIdL2 = 8
+		final val InsideTextL2   = 9
+		
+	def scan(opts:Featurizer.Options, dict:Dict, idata:IMat, unigramsx:IMat, bigramsx:IMat, trigramsx:IMat, userids:IMat):(Int, Int, Int, Int) = {
+
+  	val Isstart =  dict("<status>")
+		val Isend =    dict("</status>")
+		val Irstart =  dict("<retweet>")
+		val Irend =    dict("</retweet>")
+		val Itstart =  dict("<text>")
+		val Itend =    dict("</text>")
+		val Iuser  =   dict("<user>")
+		val Iuend  =   dict("</user>")
+		val Iistart =  dict("<id>")
+		val Iiend  =   dict("</id>")
+		var state = 0
+
+		var istatus = -1
+		var nuni = 0
+		var nbi = 0
+		var ntri = 0
+		var len = idata.length
+		var i = 0
+		while (i < len) {
+			val tok = idata.data(i)-1
+//			if (tok+1 >0) println(dict(tok)+ " " + state)
+//			else println("num " +(-(tok+1))+ " " + state)
+			if (tok == Isend) {
+				state = OutsideStatus
+			} else {
+				(state: @switch) match {
+				case OutsideStatus => 
+				if (tok == Isstart) {
+					state = InsideStatus
+					istatus += 1
+				}
+				case InsideStatus => 
+				  tok match {
+				    case Iuser   => state = InsideUser
+				    case Itstart => state = InsideText
+				    case Irstart =>	state = InsideRetweet
+				    case _ => {}
+				  } 
+				case InsideUser => 
+				  tok match {
+				    case Iistart =>	state = InsideUserId
+				    case Irstart => state = InsideRetweet
+				    case Iuend   => state = InsideStatus
+				    case _ => {}
+				  }
+				case InsideUserId => 
+				  if (tok == Iiend) {
+				  	state = InsideUser
+				  } else if (tok+1 < 0) {
+				  	if (userids != null) {
+				  		userids(istatus,0) = -(tok+1)
+				  		userids(istatus,1) = 0
+				  	}
+				  } 
+				case InsideText => 
+				  tok match {
+				  case Iuser =>	state = InsideUser
+				  case Itend => state = InsideStatus
+				  case _ => if (tok+1 > 0) {
+				  	if (unigramsx != null) {
+				  		unigramsx(nuni, 0) = tok
+				  		unigramsx(nuni, 1) = istatus
+				  		nuni += 1
+				  	}
+				  	if (idata.data(i-1) > 0) {  
+				  		val tok1 = idata.data(i-1)-1
+				  		if (tok1 != Itstart) {
+				  			bigramsx(nbi, 0) = tok1
+				  			bigramsx(nbi, 1) = tok
+				  			bigramsx(nbi, 2) = istatus
+				  			nbi += 1
+				  			if (idata.data(i-2) > 0) {
+				  				val tok2 = idata.data(i-2)-1
+				  				if (tok2 != Itstart) {
+				  					trigramsx(ntri, 0) = tok2
+				  					trigramsx(ntri, 1) = tok1
+				  					trigramsx(ntri, 2) = tok
+				  					trigramsx(ntri, 3) = istatus
+				  					ntri += 1
+				  				}
+				  			}
+				  		}
+				  	}
+				  }
+				  }
+				case InsideRetweet => 
+				  tok match {
+				    case Isstart =>	state = InsideStatusL2
+				    case Irend   =>	state = InsideStatus
+				    case _ => {}
+				  }
+				case InsideStatusL2 => 
+				  tok match {
+				    case Iuser   =>	state = InsideUserL2
+				    case Itstart => state = InsideTextL2
+				    case _ => {}
+				  } 
+				case InsideUserL2 => 
+				  tok match {
+				    case Iistart =>	state = InsideUserIdL2
+				    case Iuend   =>	state = InsideStatusL2
+				    case _ => {}
+				  }
+				case InsideUserIdL2 => 
+				  tok match {
+				    case Iiend =>	state = InsideUserL2
+				    case _ => if (tok-1 < 0) {
+				    	if (userids != null) userids(istatus, 1) = -(tok+1)
+				    }
+				  }
+				case InsideTextL2 => 
+				  tok match {
+				    case Itend => state = InsideStatusL2
+				    case Iuser => state = InsideUserL2
+				    case _ => {}
+				  }
+				case _ => {}
+				}
+				
+			}
+			i += 1
+		}
+		(nuni, nbi, ntri, istatus)
+	}
+}
\ No newline at end of file
diff --git a/src/main/scala/BIDMach/GLMmodel.scala b/src/main/scala/BIDMach/GLMmodel.scala
new file mode 100755
index 00000000..65a3a63c
--- /dev/null
+++ b/src/main/scala/BIDMach/GLMmodel.scala
@@ -0,0 +1,195 @@
+package BIDMach
+
+import BIDMat.{Mat,BMat,CMat,DMat,FMat,IMat,HMat,GMat,GIMat,GSMat,SMat,SDMat}
+import BIDMat.MatFunctions._
+import BIDMat.SciFunctions._
+import edu.berkeley.bid.CUMAT
+
+
+class GLMmodel(opts:GLMmodel.Options) extends RegressionModel(opts) {
+  
+  var mylinks:Mat = null
+  
+  val linkArray = Array[GLMlink](LinearLink, LogisticLink)
+  
+  var totflops = 0L
+  
+  override def init(datasource:DataSource) = {
+    super.init(datasource)
+    mylinks = if (useGPU) GIMat(opts.links) else opts.links
+    modelmats(0) ~ modelmats(0) ∘ mask
+    totflops = 0L
+    for (i <- 0 until opts.links.length) {
+      totflops += linkArray(opts.links(i)).fnflops
+    }
+  }
+    
+  def mupdate(in:Mat):FMat = {
+//    println("model %f" format (mean(mean(modelmats(0)))).dv)
+    val targs = targets * in
+    min(targs, 1f, targs)
+    val alltargs = targmap * targs
+    val eta = modelmats(0) * in
+    applymeans(eta, mylinks, eta)
+//    println("pred %f" format (mean(mean(pred))).dv)
+//    println("%s %s %s %s %s" format (modelmats(0).mytype, updatemats(0).mytype, alltargs.mytype, pred.mytype, in.mytype))
+    val lls = llfun(eta, alltargs, mylinks)
+    alltargs ~ alltargs - eta
+    updatemats(0) ~ alltargs *^ in
+    lls
+  }
+  
+  def applymeans(eta:Mat, links:Mat, out:Mat):Mat = {
+    (eta, links, out) match {
+      case (feta:FMat, ilinks:IMat, fout:FMat) => {
+        Mat.nflops += totflops * feta.ncols
+    		var i = 0
+    		val out = (feta + 3f)
+    		while (i < feta.ncols) {
+    		  var j = 0
+    		  while (j < feta.nrows) { 
+    		  	val fun = linkArray(ilinks(j)).invlinkfn
+    		  	fout.data(j + i * out.nrows) = fun(feta.data(j + i * feta.nrows))
+    		  	j += 1 
+    		  }
+    			i += 1
+    		}
+    		out
+      }
+      case (geta:GMat, gilinks:GIMat, gout:GMat) => {
+      	Mat.nflops += totflops * geta.ncols
+      	CUMAT.applymeans(geta.data, gilinks.data, gout.data, geta.nrows, geta.ncols)
+      	out
+      }
+    }
+  }
+  
+  def llfun(pred:Mat, targ:Mat, links:Mat):FMat = {
+    (pred, targ, links) match {
+      case (fpred:FMat, ftarg:FMat, ilinks:IMat) => {
+      	Mat.nflops += 10L * ftarg.length
+    		var i = 0
+    		val out = (ftarg + 5f)
+    		while (i < ftarg.ncols) {
+    			var j = 0
+    			while (j < ftarg.nrows) {
+    				val fun = linkArray(ilinks(j)).likelihoodfn
+    				out.data(j + i * out.nrows) = fun(fpred.data(j + i * ftarg.nrows),  ftarg.data(j + i * ftarg.nrows))
+    				j += 1
+    			}
+    			i += 1
+    		}
+    		mean(out,2)
+      }
+      case (gpred:GMat, gtarg:GMat, gilinks:GIMat) => {
+      	Mat.nflops += totflops * gpred.ncols
+      	val out = (gpred + 3f)
+      	CUMAT.applylls(gpred.data, gtarg.data, gilinks.data, out.data, gpred.nrows, gpred.ncols)
+      	FMat(mean(out,2))
+      }
+    }
+  }
+
+}
+
+
+object LinearLink extends GLMlink {
+  def link(in:Float) = {
+    in
+  }
+  
+  def invlink(in:Float) = {
+    in
+  }
+  
+  def dlink(in:Float) = {
+    1.0f
+  }
+  
+  def likelihood(pred:Float, targ:Float) = {
+    val diff = targ - pred
+    - diff * diff
+  }
+     
+  override val linkfn = link _
+  
+  override val dlinkfn = dlink _
+  
+  override val invlinkfn = invlink _
+  
+  override val likelihoodfn = likelihood _
+  
+  val fnflops = 2
+}
+
+object LogisticLink extends GLMlink {
+  def link(in:Float) = {
+    math.log(in / (1.0f - in)).toFloat
+  }
+  
+  def invlink(in:Float) = {
+    if (in > 0) {
+    	val tmp = math.exp(-in)
+    	(1.0 / (1.0 + tmp)).toFloat    
+    } else {
+    	val tmp = math.exp(in)
+    	(tmp / (1.0 + tmp)).toFloat
+    }
+  }
+  
+  def dlink(in:Float) = {
+    1 / (in * (1 - in))
+  }
+  
+  def likelihood(pred:Float, targ:Float) = {
+    math.log(targ * pred + (1.0f - targ) * (1.0f - pred) + 1e-20).toFloat
+  }
+  
+  override val linkfn = link _
+  
+  override val dlinkfn = dlink _
+  
+  override val invlinkfn = invlink _
+  
+  override val likelihoodfn = likelihood _
+  
+  val fnflops = 20
+}
+
+object LinkEnum extends Enumeration {
+  type LinkEnum = Value
+  val Linear, Logistic = Value
+}
+
+abstract class GLMlink {
+  val linkfn:(Float => Float)
+  val dlinkfn:(Float => Float)
+  val invlinkfn:(Float => Float)
+  val likelihoodfn:((Float,Float) => Float)
+  val fnflops:Int
+}
+
+object GLMmodel {
+  class Options extends RegressionModel.Options {
+    var links:IMat = null
+  }
+  
+  def mkGLMmodel(fopts:Model.Options) = {
+  	new GLMmodel(fopts.asInstanceOf[GLMmodel.Options])
+  }
+  
+  def mkUpdater(nopts:Updater.Options) = {
+  	new ADAGradUpdater(nopts.asInstanceOf[ADAGradUpdater.Options])
+  }
+  
+  def learnFParx(
+    nstart:Int=FilesDataSource.encodeDate(2012,3,1,0),
+		nend:Int=FilesDataSource.encodeDate(2012,12,1,0)
+		) = {	
+  	new LearnFParModelx(
+  	    SFilesDataSource.twitterNgramBlend(nstart, nend, 1, 0),
+  	    new GLMmodel.Options, mkGLMmodel _, 
+  	    new ADAGradUpdater.Options, mkUpdater _)
+  }
+}
+
diff --git a/src/main/scala/BIDMach/LDAModel.scala b/src/main/scala/BIDMach/LDAModel.scala
new file mode 100755
index 00000000..8b445f67
--- /dev/null
+++ b/src/main/scala/BIDMach/LDAModel.scala
@@ -0,0 +1,118 @@
+package BIDMach
+
+import BIDMat.{Mat,BMat,CMat,DMat,FMat,IMat,HMat,GMat,GIMat,GSMat,SMat,SDMat}
+import BIDMat.MatFunctions._
+import BIDMat.SciFunctions._
+
+
+class LDAModel(override val opts:LDAModel.Options = new LDAModel.Options) extends FactorModel(opts) { 
+  var mm:Mat = null
+  var alpha:Mat = null
+  
+  var traceMem = false
+  
+  override def init(datasource:DataSource) = {
+    super.init(datasource)
+    mm = modelmats(0)
+    modelmats = new Array[Mat](2)
+    modelmats(0) = mm
+    modelmats(1) = mm.ones(mm.nrows, 1)
+    updatemats = new Array[Mat](2)
+    updatemats(0) = mm.zeros(mm.nrows, mm.ncols)
+    updatemats(1) = mm.zeros(mm.nrows, 1)
+  }
+  
+  def uupdate(sdata:Mat, user:Mat):Unit = {
+    if (opts.putBack < 0) user.set(1f)
+	  for (i <- 0 until opts.uiter) {
+	  	val preds = DDS(mm, user, sdata)	
+	  	if (traceMem) println("uupdate %d %d %d, %d %f %d" format (mm.GUID, user.GUID, sdata.GUID, preds.GUID, GPUmem._1, getGPU))
+	  	val dc = sdata.contents
+	  	val pc = preds.contents
+	  	max(opts.weps, pc, pc)
+	  	pc ~ dc / pc
+	  	val unew = user ∘ (mm * preds) + opts.alpha
+	  	if (traceMem) println("uupdate %d %d %d, %d %d %d %d %f %d" format (mm.GUID, user.GUID, sdata.GUID, preds.GUID, dc.GUID, pc.GUID, unew.GUID, GPUmem._1, getGPU))
+	  	if (opts.exppsi) exppsi(unew, unew)
+	  	user <-- unew   
+	  }	
+//    println("user %g %g" format (mini(mini(user,1),2).dv, maxi(maxi(user,1),2).dv))
+  }
+  
+  def mupdate(sdata:Mat, user:Mat):Unit = {
+    val preds = DDS(mm, user, sdata)
+    val dc = sdata.contents
+    val pc = preds.contents
+    max(opts.weps, pc, pc)
+    pc ~ dc / pc
+    val ud = user *^ preds
+    ud ~ ud ∘ mm
+    ud ~ ud + opts.beta
+  	updatemats(0) <-- ud  
+  	sum(ud, 2, updatemats(1))
+  	if (traceMem) println("mupdate %d %d %d %d" format (sdata.GUID, user.GUID, ud.GUID, updatemats(0).GUID))
+  }
+  
+  def evalfun(sdata:Mat, user:Mat):FMat = {  
+  	val preds = DDS(mm, user, sdata)
+  	val dc = sdata.contents
+  	val pc = preds.contents
+  	max(opts.weps, pc, pc)
+  	ln(pc, pc)
+  	val sdat = sum(sdata,1)
+  	val mms = sum(mm,2)
+  	val suu = ln(mms ^* user)
+  	if (traceMem) println("evalfun %d %d %d, %d %d %d, %d %f" format (sdata.GUID, user.GUID, preds.GUID, pc.GUID, sdat.GUID, mms.GUID, suu.GUID, GPUmem._1))
+  	val vv = ((pc ddot dc) - (sdat ddot suu))/sum(sdat,2).dv
+  	row(vv, math.exp(-vv))
+  }
+}
+
+object LDAModel  {
+  class Options extends FactorModel.Options {
+    var LDAeps = 1e-9
+    var exppsi = true
+    var alpha = 0.001f
+    var beta = 0.0001f
+  }
+  
+  def mkLDAmodel(fopts:Model.Options) = {
+  	new LDAModel(fopts.asInstanceOf[LDAModel.Options])
+  }
+  
+  def mkUpdater(nopts:Updater.Options) = {
+  	new IncNormUpdater(nopts.asInstanceOf[IncNormUpdater.Options])
+  } 
+  
+  def learn(mat0:Mat) = {
+  	new Learner(new MatDataSource(Array(mat0:Mat)), new LDAModel(), null, new IncNormUpdater(), new Learner.Options)
+  }
+  
+  def learnBatch(mat0:Mat) = {	
+  	new Learner(new MatDataSource(Array(mat0:Mat)), new LDAModel(), null, new BatchNormUpdater(), new Learner.Options)
+  }
+  
+  def learnFPar(
+    nstart:Int=FilesDataSource.encodeDate(2012,3,1,0),
+		nend:Int=FilesDataSource.encodeDate(2012,12,1,0)
+		) = { 	
+  	new LearnFParModel(
+  			new LDAModel.Options, mkLDAmodel _, 
+  	    new IncNormUpdater.Options, mkUpdater _, 
+  	    (n:Int, i:Int) => SFilesDataSource.twitterWords(nstart, nend, n, i)
+  	    )
+  }
+  
+  def learnFParx(
+    nstart:Int=FilesDataSource.encodeDate(2012,3,1,0),
+		nend:Int=FilesDataSource.encodeDate(2012,12,1,0)
+		) = {	
+  	new LearnFParModelx(
+  	    SFilesDataSource.twitterWords(nstart, nend),
+  	    new LDAModel.Options, mkLDAmodel _, 
+  	    new IncNormUpdater.Options, mkUpdater _ 
+  	    )
+  }
+}
+
+
diff --git a/src/main/scala/BIDMach/Learner.scala b/src/main/scala/BIDMach/Learner.scala
new file mode 100755
index 00000000..2eb39f17
--- /dev/null
+++ b/src/main/scala/BIDMach/Learner.scala
@@ -0,0 +1,509 @@
+package BIDMach
+import BIDMat.{Mat,BMat,CMat,DMat,FMat,IMat,HMat,GMat,GIMat,GSMat,SMat,SDMat}
+import BIDMat.MatFunctions._
+import BIDMat.SciFunctions._
+import BIDMat.Plotting._
+import scala.collection.immutable.List
+import scala.collection.mutable.ListBuffer
+import scala.actors.Actor
+
+case class Learner(
+    val datasource:DataSource, 
+    val model:Model, 
+    val regularizer:Regularizer, 
+    val updater:Updater, 
+		val opts:Learner.Options = new Learner.Options) {
+  var results:FMat = null
+  val dopts:DataSource.Options = datasource.opts
+	val mopts:Model.Options	= model.opts
+	val ropts:Regularizer.Options = if (regularizer != null) regularizer.opts else null
+	val uopts:Updater.Options = updater.opts
+	
+	def setup = { 
+    datasource match {
+      case ddm:MatDataSource => {
+      	if (mopts.putBack >= 0) {
+      		ddm.setupPutBack(mopts.putBack+1, mopts.dim)
+      	}
+      }
+      case _ => {}
+    }
+    datasource.init
+    model.init(datasource)
+    updater.init(model)   
+  }
+  
+  def init = {
+    datasource.init
+    model.init(datasource)
+    updater.init(model)
+  }
+   
+  def run() = {
+    flip 
+    var done = false
+    var ipass = 0
+    var here = 0L
+    var lasti = 0
+    var bytes = 0L
+    updater.clear
+    val reslist = new ListBuffer[FMat]
+    val samplist = new ListBuffer[Float]
+    while (ipass < opts.npasses && ! done) {
+    	var lastp = 0f
+      datasource.reset
+      var istep = 0
+      println("i=%2d" format ipass)
+      while (datasource.hasNext) {
+        val mats = datasource.next
+        here += datasource.opts.blockSize
+        bytes += 12L*mats(0).nnz
+        if ((istep - 1) % opts.evalStep == 0 || ! datasource.hasNext) {
+        	val scores = model.evalblockg(mats)
+        	reslist.append(scores.newcopy)
+        	samplist.append(here)
+        } else {
+        	model.doblockg(mats, here)
+        	if (regularizer != null) regularizer.compute(here)
+        	updater.update(here)
+        }   
+        if (model.opts.putBack >= 0) datasource.putBack(mats, model.opts.putBack)
+        istep += 1
+        val dsp = datasource.progress
+        if (dsp > lastp + opts.pstep && reslist.length > lasti) {
+        	val gf = gflop
+        	lastp = dsp - (dsp % opts.pstep)
+        	print("%5.2f%%, %s, gf=%5.3f, secs=%3.1f, GB=%4.2f, MB/s=%5.2f" format (
+        			100f*lastp, 
+        			Learner.scoreSummary(reslist, lasti, reslist.length),
+        			gf._1,
+        			gf._2, 
+        			bytes*1e-9,
+        			bytes/gf._2*1e-6))  
+        			if (model.useGPU) {
+        				print(", GPUmem=%3.2f" format GPUmem._1) 
+        			}
+        	println
+        	lasti = reslist.length
+        }
+      }
+      updater.updateM
+      ipass += 1
+    }
+    val gf = gflop
+    println("Time=%5.4f secs, gflops=%4.2f" format (gf._2, gf._1))
+    results = Learner.scores2FMat(reslist) on row(samplist.toList)
+  }
+}
+
+case class ParLearner(
+    val datasources:Array[DataSource], 
+    val models:Array[Model], 
+    val regularizers:Array[Regularizer], 
+    val updaters:Array[Updater], 
+		val opts:Learner.Options = new Learner.Options) {
+  
+  var um:FMat = null
+  var mm:FMat = null
+  var results:FMat = null
+  
+  def run() = {
+	  flip 
+	  val mm0 = models(0).modelmats(0)
+	  mm = zeros(mm0.nrows, mm0.ncols)
+	  um = zeros(mm0.nrows, mm0.ncols)
+
+	  @volatile var done = izeros(opts.nthreads, 1)
+	  var ipass = 0
+	  var istep0 = 0L
+	  var ilast0 = 0L	
+	  var bytes = 0L
+	  val reslist = new ListBuffer[FMat]
+	  val samplist = new ListBuffer[Float]    	  	
+	  var lastp = 0f
+	  done.clear
+	  for (ithread <- 0 until opts.nthreads) {
+	  	Actor.actor {
+	  		if (ithread < Mat.hasCUDA) setGPU(ithread)
+	  		var here = 0L
+	  		updaters(ithread).clear
+	  		while (ipass < opts.npasses) {
+	  			if (ithread == 0) println("i=%2d" format ipass) 
+	  			datasources(ithread).reset
+	  			var istep = 0
+	  			var lasti = 0
+	  			while (datasources(ithread).hasNext) {
+	  				val mats = datasources(ithread).next
+	  				here += datasources(ithread).opts.blockSize
+	  				for (j <- 0 until mats.length) bytes += 12L * mats(j).nnz
+	  				istep += 1
+	  				istep0 += 1
+	  				try {
+	  					if (istep % opts.evalStep == 0) {
+	  						val scores = models(ithread).synchronized {models(ithread).evalblockg(mats)}
+	  						reslist.append(scores)
+	  						samplist.append(here)
+	  					} else {
+	  						models(ithread).synchronized {
+	  							models(ithread).doblockg(mats, here)
+	  							if (regularizers != null && regularizers(ithread) != null) regularizers(ithread).compute(here)
+	  							updaters(ithread).update(here)
+	  						}
+	  					}
+	  				} catch {
+	  				case e:Exception => {
+	  					print("Caught exception in thread %d %s\nTrying restart..." format (ithread, e.toString))
+	  					restart(ithread)
+	  					println("Keep on truckin...")
+	  				}
+	  				} 
+	  				Thread.sleep(opts.coolit)
+	  				if (models(ithread).opts.putBack >= 0) datasources(ithread).putBack(mats, models(ithread).opts.putBack)
+//	  				if (istep % (opts.syncStep/opts.nthreads) == 0) syncmodel(models, ithread)
+	  				if (ithread == 0 && datasources(0).progress > lastp + opts.pstep) {
+	  					lastp += opts.pstep
+	  					val gf = gflop
+	  					if (reslist.length > lasti) {
+	  						print("%5.2f%%, %s, gf=%5.3f, secs=%3.1f, GB=%4.2f, MB/s=%5.2f" format (
+	  								100f*lastp, 
+	  								Learner.scoreSummary(reslist, lasti, reslist.length),
+	  								gf._1,
+	  								gf._2, 
+	  								bytes*1e-9,
+	  								bytes/gf._2*1e-6))  
+	  					  if (models(0).useGPU) {
+	  					  	for (i <- 0 until math.min(opts.nthreads, Mat.hasCUDA)) {
+	  					  		setGPU(i)
+	  					  		if (i==0) print(", GPUmem=%3.2f" format GPUmem._1) else print(", %3.2f" format GPUmem._1)
+	  					  	}
+	  					  }
+	  						println
+	  					}
+	  					lasti = reslist.length
+	  				}
+	  			}
+	  			models(ithread).synchronized {updaters(ithread).updateM}
+	  			done(ithread) = ipass + 1
+	  			while (done(ithread) > ipass) Thread.sleep(1)
+	  		}
+	  	}
+	  }
+	  while (ipass < opts.npasses) {
+	  	while (mini(done).v == ipass) {
+	  		while (istep0 < ilast0 + opts.syncStep) Thread.sleep(1)
+	  		syncmodels(models)
+	  		ilast0 += opts.syncStep
+	  	}
+	  	ipass += 1
+	  }
+	  val gf = gflop
+	  println("Time=%5.4f secs, gflops=%4.2f, MB/s=%5.2f, GB=%5.2f" format (gf._2, gf._1, bytes/gf._2*1e-6, bytes*1e-9))
+	  results = Learner.scores2FMat(reslist) on row(samplist.toList)
+  }
+     
+  def syncmodels(models:Array[Model]) = {
+	  for (j <- 0 until models(0).modelmats.length) {
+	  	mm.clear
+	  	for (i <- 0 until models.length) {
+	  		if (i < Mat.hasCUDA) setGPU(i)
+	  		models(i).synchronized {
+	  			um <-- models(i).modelmats(j)
+	  		}
+	  		mm ~ mm + um
+	  	}
+	  	mm ~ mm *@ (1f/models.length)
+	  	for (i <- 0 until models.length) {
+	  		if (i < Mat.hasCUDA) setGPU(i)
+	  		models(i).synchronized {
+	  			models(i).modelmats(j) <-- mm
+	  		}
+	  	}
+	  }
+	  if (0 < Mat.hasCUDA) setGPU(0)
+  }
+  
+  def syncmodel(models:Array[Model], ithread:Int) = {
+	  mm.synchronized {
+	  	um <-- models(ithread).modelmats(0)
+	  	um ~ um *@ (1f/opts.nthreads)
+	  	mm ~ mm *@ (1 - 1f/opts.nthreads)
+	  	mm ~ mm + um
+	  	models(ithread).modelmats(0) <-- mm
+	  }
+  }
+  
+  def restart(ithread:Int) = {
+    if (models(0).useGPU) {
+      resetGPU
+      Mat.trimCache2(ithread)
+    }
+    models(ithread).init(datasources(ithread))
+    models(ithread).modelmats(0) <-- mm
+    updaters(ithread).init(models(ithread))      
+  }
+}
+
+case class ParLearnerx(
+    val datasource:DataSource, 
+    val models:Array[Model], 
+    val regularizers:Array[Regularizer], 
+    val updaters:Array[Updater], 
+		val opts:Learner.Options = new Learner.Options) {
+  
+  var um:FMat = null
+  var mm:FMat = null
+  var results:FMat = null
+  var cmats:Array[Array[Mat]] = null
+  
+  def run() = {
+    flip 
+    val mm0 = models(0).modelmats(0)
+    mm = zeros(mm0.nrows, mm0.ncols)
+    um = zeros(mm0.nrows, mm0.ncols)
+    cmats = new Array[Array[Mat]](opts.nthreads)
+    for (i <- 0 until opts.nthreads) cmats(i) = new Array[Mat](datasource.omats.length)
+    
+    val done = iones(opts.nthreads, 1)
+    var ipass = 0
+    var here = 0L
+    var feats = 0L
+    var lasti = 0
+    var bytes = 0L
+    val reslist = new ListBuffer[FMat]
+    val samplist = new ListBuffer[Float]
+    for (i <- 0 until opts.nthreads) {
+    	if (i < Mat.hasCUDA) setGPU(i)
+    	updaters(i).clear
+    }
+    while (ipass < opts.npasses) {
+    	datasource.reset
+      var istep = 0
+      var lastp = 0f
+      println("i=%2d" format ipass)
+      while (datasource.hasNext) {
+        for (ithread <- 0 until opts.nthreads) {
+        	if (datasource.hasNext) {
+        	  done(ithread) = 0
+        		val mats = datasource.next
+        		here += datasource.opts.blockSize
+        		feats += mats(0).nnz
+        		bytes += 12L*mats(0).nnz
+        		for (j <- 0 until mats.length) cmats(ithread)(j) = safeCopy(mats(j), ithread)
+        		Actor.actor {
+        			if (ithread < Mat.hasCUDA) setGPU(ithread)
+        			try {
+        				if ((istep + ithread + 1) % opts.evalStep == 0 || !datasource.hasNext ) {
+        					val scores = models(ithread).evalblockg(cmats(ithread))
+        					reslist.append(scores(0))
+        					samplist.append(here)
+        				} else {
+        					models(ithread).doblockg(cmats(ithread), here)
+        					if (regularizers != null && regularizers(ithread) != null) regularizers(ithread).compute(here)
+        					updaters(ithread).update(here)
+        				}
+        			} catch {
+        			  case e:Exception => {
+        			    print("Caught exception in thread %d %s\nTrying restart..." format (ithread, e.toString))
+        			    restart(ithread)
+        			    println("Keep on truckin...")
+        			  }
+        			} 
+        			done(ithread) = 1 
+        		}  
+        	}
+        }
+      	while (mini(done).v == 0) Thread.sleep(1)
+      	Thread.sleep(opts.coolit)
+      	istep += opts.nthreads
+      	if (istep % opts.syncStep == 0) syncmodels(models)
+      	if (datasource.progress > lastp + opts.pstep) {
+      		lastp += opts.pstep
+      		val gf = gflop
+      		if (reslist.length > lasti) {
+      			print("%5.2f%%, %s, gf=%5.3f, secs=%3.1f, GB=%4.2f, MB/s=%5.2f" format (
+      					100f*lastp, 
+      					Learner.scoreSummary(reslist, lasti, reslist.length),
+      					gf._1,
+      					gf._2, 
+      					bytes*1e-9,
+      					bytes/gf._2*1e-6))  
+      		  if (models(0).useGPU) {
+      		    for (i <- 0 until math.min(opts.nthreads, Mat.hasCUDA)) {
+      		      setGPU(i)
+      		      if (i==0) print(", GPUmem=%3.2f" format GPUmem._1) else print(", %3.2f" format GPUmem._1)
+      		    }
+      		  }
+      			println
+      		}
+      		lasti = reslist.length
+      	}
+      }
+      println
+      for (i <- 0 until opts.nthreads) {
+        if (i < Mat.hasCUDA) setGPU(i); 
+        updaters(i).updateM
+      }
+      ipass += 1
+      saveAs("/big/twitter/test/results.mat", Learner.scores2FMat(reslist) on row(samplist.toList), "results")
+    }
+    val gf = gflop
+    println("Time=%5.4f secs, gflops=%4.2f, samples=%4.2g, MB/sec=%4.2g" format (gf._2, gf._1, 1.0*here, bytes/gf._2/1e6))
+    results = Learner.scores2FMat(reslist) on row(samplist.toList)
+    if (0 < Mat.hasCUDA) setGPU(0)
+  }
+  
+  def safeCopy(m:Mat, ithread:Int):Mat = {
+    m match {
+      case ss:SMat => {
+        val out = SMat.newOrCheckSMat(ss.nrows, ss.ncols, ss.nnz, null, m.GUID, ithread, "safeCopy".##)
+        ss.copyTo(out)
+      }
+    }
+  }
+     
+  def syncmodels(models:Array[Model]) = {
+	  for (j <- 0 until models(0).modelmats.length) {
+	  	mm.clear
+	  	for (i <- 0 until models.length) {
+	  		if (i < Mat.hasCUDA) setGPU(i)
+	  		um <-- models(i).modelmats(j)
+	  		mm ~ mm + um
+	  	}
+	  	mm ~ mm *@ (1f/models.length)
+	  	for (i <- 0 until models.length) {
+	  		if (i < Mat.hasCUDA) setGPU(i)
+	  		models(i).modelmats(j) <-- mm
+	  	}
+	  }
+	  if (0 < Mat.hasCUDA) setGPU(0)
+  }
+  
+  def restart(ithread:Int) = {
+    if (models(0).useGPU) {
+      resetGPU
+      Mat.trimCaches(ithread)
+    }
+    models(ithread).init(datasource)
+    models(ithread).modelmats(0) <-- mm
+    updaters(ithread).init(models(ithread))      
+  }
+}
+
+
+class LearnFParModel(
+		val mopts:Model.Options,
+		mkmodel:(Model.Options)=>Model,
+		val uopts:Updater.Options,
+		mkupdater:(Updater.Options)=>Updater,
+		ddfun:(Int,Int)=>DataSource
+		) {
+  var dds:Array[DataSource] = null
+  var models:Array[Model] = null
+  var updaters:Array[Updater] = null
+  var learner:ParLearner = null
+  var lopts = new Learner.Options
+  
+  def setup = {
+    dds = new Array[DataSource](lopts.nthreads)
+    models = new Array[Model](lopts.nthreads)
+    updaters = new Array[Updater](lopts.nthreads)
+    for (i <- 0 until lopts.nthreads) {
+      if (i < Mat.hasCUDA) setGPU(i)
+    	dds(i) = ddfun(lopts.nthreads, i)
+    	dds(i).init
+    	models(i) = mkmodel(mopts)
+    	models(i).init(dds(i))
+    	updaters(i) = mkupdater(uopts)
+    	updaters(i).init(models(i))
+    }
+    if (0 < Mat.hasCUDA) setGPU(0)
+    learner = new ParLearner(dds, models, null, updaters, lopts)   
+  }
+  
+  def init = {
+  	for (i <- 0 until lopts.nthreads) {
+  	  if (i < Mat.hasCUDA) setGPU(i)
+  		if (dds(i).omats.length > 1) dds(i).omats(1) = ones(mopts.dim, dds(i).omats(0).ncols)
+  		dds(i).init
+  		models(i).init(dds(i))
+  		updaters(i).init(models(i))
+  	}
+  	if (0 < Mat.hasCUDA) setGPU(0)
+  }
+  
+  def run = learner.run
+}
+
+
+class LearnFParModelx(
+		val ds:DataSource,
+		val mopts:Model.Options,
+		mkmodel:(Model.Options)=>Model,
+		val uopts:Updater.Options,
+		mkupdater:(Updater.Options)=>Updater) {
+  var models:Array[Model] = null
+  var updaters:Array[Updater] = null
+  var learner:ParLearnerx = null
+  var lopts = new Learner.Options
+  
+  def setup = {
+    models = new Array[Model](lopts.nthreads)
+    updaters = new Array[Updater](lopts.nthreads) 
+    ds.init
+    for (i <- 0 until lopts.nthreads) {
+      if (i < Mat.hasCUDA) setGPU(i)
+    	models(i) = mkmodel(mopts)
+    	models(i).init(ds)
+    	updaters(i) = mkupdater(uopts)
+    	updaters(i).init(models(i))
+    }
+    if (0 < Mat.hasCUDA) setGPU(0)
+    learner = new ParLearnerx(ds, models, null, updaters, lopts)   
+  }
+  
+  def init = {
+	  ds.omats(1) = ones(mopts.dim, ds.omats(0).ncols)
+  	for (i <- 0 until lopts.nthreads) {
+  	  if (i < Mat.hasCUDA) setGPU(i)
+  		if (ds.omats.length > 1) 
+  		ds.init
+  		models(i).init(ds)
+  		updaters(i).init(models(i))
+  	}
+  	if (0 < Mat.hasCUDA) setGPU(0)
+  }  
+  def run = learner.run
+}
+
+object Learner {
+  
+  class Options {
+		var npasses = 10
+		var evalStep = 15
+		var syncStep = 32
+		var nthreads = 4
+		var pstep = 0.01f
+		var coolit = 60
+  }
+  
+  def scoreSummary(reslist:ListBuffer[FMat], lasti:Int, length:Int):String = {
+    var i = lasti
+    var sum = 0.0
+    while (i < length) {
+      sum += reslist(i)(0)
+      i += 1
+    }
+    ("ll=%5.3f" format sum/(length-lasti))    
+  }
+  
+  def scores2FMat(reslist:ListBuffer[FMat]):FMat = {
+    val out = FMat(reslist(0).length, reslist.length)
+    var i = 0
+    while (i < reslist.length) {
+      out(?, i) = reslist(i)
+      i += 1
+    }
+    out
+  }
+}
+
diff --git a/src/main/scala/BIDMach/Model.scala b/src/main/scala/BIDMach/Model.scala
new file mode 100755
index 00000000..8e166fa9
--- /dev/null
+++ b/src/main/scala/BIDMach/Model.scala
@@ -0,0 +1,69 @@
+package BIDMach
+import BIDMat.{Mat,BMat,CMat,CSMat,DMat,FMat,GMat,GIMat,GSMat,HMat,IMat,SMat,SDMat}
+import BIDMat.MatFunctions._
+import BIDMat.SciFunctions._
+
+abstract class Model(val opts:Model.Options = new Model.Options) {
+  
+  var modelmats:Array[Mat] = null
+  
+  var updatemats:Array[Mat] = null
+  
+  var mats:Array[Mat] = null
+  
+  var gmats:Array[Mat] = null
+  
+  var useGPU = false
+  
+  def init(datasource:DataSource):Unit = {
+	  mats = datasource.next
+	  datasource.reset
+	  useGPU = opts.useGPU && Mat.hasCUDA > 0
+	  if (useGPU) {
+	    gmats = new Array[Mat](mats.length)
+	    for (i <- 0 until mats.length) {
+	      gmats(i) = mats(i) match {
+	        case aa:FMat => GMat(aa)
+	        case aa:SMat => GSMat(aa)
+	      }
+	    }
+	  } else {
+	    gmats = mats
+	  }
+  }
+  
+  def doblock(mats:Array[Mat], i:Long)                                       // Calculate an update for the updater
+  
+  def evalblock(mats:Array[Mat]):FMat                                        // Scores (log likelihoods)
+  
+  def doblockg(amats:Array[Mat], i:Long) = {
+    if (useGPU) copyMats(amats, gmats)            		
+    doblock(gmats, i)
+    if (useGPU && opts.putBack >= 0) amats(opts.putBack) <-- gmats(opts.putBack)
+  }
+  
+  def evalblockg(amats:Array[Mat]):FMat = {
+	  if (useGPU) copyMats(amats, gmats)
+	  val v = evalblock(gmats)
+	  if (useGPU && opts.putBack >= 0) amats(opts.putBack) <-- gmats(opts.putBack)
+	  v
+  }
+
+  def copyMats(from:Array[Mat], to:Array[Mat]) = {
+	  for (i <- 0 until from.length) {
+		  to(i) = to(i) <-- from(i)
+	  }
+}
+}
+
+
+object Model {
+	class Options {
+	  var nzPerColumn:Int = 0
+	  var startBlock = 8000
+	  var useGPU = true
+	  var putBack = -1
+	  var doubleScore = false
+	  var dim = 256
+  }
+}
diff --git a/src/main/scala/BIDMach/NMFModel.scala b/src/main/scala/BIDMach/NMFModel.scala
new file mode 100755
index 00000000..115bfe7a
--- /dev/null
+++ b/src/main/scala/BIDMach/NMFModel.scala
@@ -0,0 +1,138 @@
+package BIDMach
+
+import BIDMat.{Mat,BMat,CMat,DMat,FMat,IMat,HMat,GMat,GIMat,GSMat,SMat,SDMat}
+import BIDMat.MatFunctions._
+import BIDMat.SciFunctions._
+
+
+class NMFModel(opts:NMFModel.Options = new NMFModel.Options) extends FactorModel(opts) { 
+  
+  var mm:Mat = null
+  var mdiag:Mat = null
+  var udiag:Mat = null
+  
+  override def init(datasource:DataSource) = {
+  	super.init(datasource)
+  	mm = modelmats(0)
+    modelmats = new Array[Mat](2)
+    modelmats(0) = mm
+    modelmats(1) = mm.zeros(mm.nrows, mm.ncols)
+  	updatemats = new Array[Mat](2)
+    updatemats(0) = mm.zeros(mm.nrows, mm.ncols)
+    updatemats(1) = mm.zeros(mm.nrows, mm.ncols)
+    udiag = mkdiag(opts.uprior*ones(opts.dim,1))
+  	mdiag = mkdiag(opts.mprior*ones(opts.dim,1))
+    if (useGPU) {
+      udiag = GMat(udiag)
+      mdiag = GMat(mdiag)
+    }
+  }
+  
+  override def uupdate(sdata:Mat, user:Mat) = {
+	  val modeldata = mm * sdata
+  	val mmu = mm *^ mm + udiag
+    for (i <- 0 until opts.uiter) {
+    	val quot =  modeldata / (mmu * user)               
+    	min(10.0f, max(0.1f, quot, quot), quot)
+    	user ~ user *@ quot
+    	max(opts.minuser, user, user)
+    }
+  }  
+  
+  override def mupdate(sdata:Mat, user:Mat):Unit = {
+    val uu = user *^ user + mdiag *@ (1.0f*size(user,2)/opts.nusers) 
+    updatemats(0) ~ (user *^ sdata) *@ mm
+    updatemats(1) ~ uu * mm
+    max(updatemats(1), opts.NMFeps, updatemats(1))
+  }
+
+  override def mupdate2(sdata:Mat, user:Mat):Unit = {
+    val uu = user *^ user + mdiag *@ (1.0f*size(user,2)/opts.nusers)
+    updatemats(0) ~ user *^ sdata
+    updatemats(1) ~ uu * mm
+  }
+  
+  override def evalfun(sdata:Mat, user:Mat):FMat = {
+    if (opts.doubleScore) {
+      evalfunx(sdata, user)
+    } else {
+    	val modeldata =  mm * sdata
+    	val uu = user *^ user + mdiag *@ (1.0f*size(user,2)/opts.nusers)
+    	val mmm = mm *^ mm
+
+    	val ll0 =  sdata.contents ddot sdata.contents
+    	val ll1 =  modeldata ddot user
+    	val ll2 =  uu ddot mmm
+    	val v1  =              (-ll0 + 2*ll1 - ll2)/sdata.nnz
+    	val v2 =               -opts.uprior*(user ddot user)/sdata.nnz
+    	row(v1,v2)
+    }
+  }
+  
+  def evalfunx(sdata0:Mat, user0:Mat):FMat = { 
+    val sdata = SDMat(sdata0)
+    val user = DMat(user0)
+    val mmf = DMat(mm)
+    val mdiagf = DMat(mdiag)
+
+	  val modeldata =  mmf * sdata
+    val uu = user *^ user + mdiagf *@ (1.0f*size(user,2)/opts.nusers)
+    val mmm = mmf *^ mmf
+
+    val ll0 =  sdata.contents ddot sdata.contents
+    val ll1 =  modeldata ddot user
+    val ll2 =  uu ddot mmm
+    val v1  =              (-ll0 + 2*ll1 - ll2)/sdata.nnz
+    val v2 =               -opts.uprior*(user ddot user)/sdata.nnz
+    row(v1,v2)
+  }
+}
+
+object NMFModel  {
+  class Options extends FactorModel.Options {
+    var NMFeps = 1e-12
+    var uprior = 0.01f
+    var mprior = 1e-4f
+    var nusers = 100000
+  }
+  
+  def mkNMFmodel(fopts:Model.Options) = {
+  	new NMFModel(fopts.asInstanceOf[NMFModel.Options])
+  } 
+   
+  def mkUpdater(nopts:Updater.Options) = {
+  	new IncNormUpdater(nopts.asInstanceOf[IncNormUpdater.Options])
+  }
+      
+  def learn(mat0:Mat) = {	
+  	new Learner(new MatDataSource(Array(mat0:Mat)), new NMFModel(), null, new IncNormUpdater(), new Learner.Options)
+  }
+  
+  def learnBatch(mat0:Mat) = {	
+  	new Learner(new MatDataSource(Array(mat0:Mat)), new NMFModel(), null, new BatchNormUpdater(), new Learner.Options)
+  }
+  
+  def learnFPar(
+    nstart:Int=FilesDataSource.encodeDate(2012,3,1,0),
+		nend:Int=FilesDataSource.encodeDate(2012,12,1,0)
+		) = { 	
+  	new LearnFParModel(
+  			new NMFModel.Options, mkNMFmodel _, 
+  	    new IncNormUpdater.Options, mkUpdater _, 
+  	    (n:Int, i:Int) => SFilesDataSource.twitterWords(nstart, nend, n, i)
+  	    )
+  }
+  
+  def learnFParx(
+    nstart:Int=FilesDataSource.encodeDate(2012,3,1,0),
+		nend:Int=FilesDataSource.encodeDate(2012,12,1,0)
+		) = {  	
+  	 	new LearnFParModelx(
+  	 		SFilesDataSource.twitterWords(nstart, nend),
+  	    new NMFModel.Options, mkNMFmodel _, 
+  	    new IncNormUpdater.Options, mkUpdater _)
+  }
+} 
+
+
+
diff --git a/src/main/scala/BIDMach/Regression.scala b/src/main/scala/BIDMach/Regression.scala
new file mode 100755
index 00000000..b74dab19
--- /dev/null
+++ b/src/main/scala/BIDMach/Regression.scala
@@ -0,0 +1,55 @@
+package BIDMach
+
+import BIDMat.{Mat,BMat,CMat,DMat,FMat,IMat,HMat,GMat,GIMat,GSMat,SMat,SDMat}
+import BIDMat.MatFunctions._
+import BIDMat.SciFunctions._
+
+abstract class RegressionModel(override val opts:RegressionModel.Options) extends Model {
+  var targmap:Mat = null
+  var targets:Mat = null
+  var mask:Mat = null
+  
+  override def init(datasource:DataSource) = {
+    super.init(datasource)
+    useGPU = opts.useGPU && Mat.hasCUDA > 0
+    val data0 = mats(0)
+    val m = size(data0, 1)
+    val d = opts.targmap.nrows
+    val sdat = (sum(data0,2).t + 0.5f).asInstanceOf[FMat]
+    val sp = sdat / sum(sdat)
+    println("initial perplexity=%f" format (sp ddot ln(sp)) )
+    
+    val rmat = rand(d,m) 
+    rmat ~ rmat *@ sdat
+    val msum = sum(rmat, 2)
+    rmat ~ rmat / msum
+    val mm = rmat
+    modelmats = Array[Mat](1)
+    modelmats(0) = if (useGPU) GMat(mm) else mm 
+    updatemats = new Array[Mat](1)
+    updatemats(0) = modelmats(0).zeros(mm.nrows, mm.ncols)
+    targets = if (useGPU) GMat(opts.targets) else opts.targets
+    targmap = if (useGPU) GMat(opts.targmap) else opts.targmap
+    mask = if (useGPU) GMat(opts.mask) else opts.mask
+  } 
+  
+  def mupdate(data:Mat):FMat
+  
+  def doblock(gmats:Array[Mat], i:Long) = {
+    val sdata = gmats(0)
+    mupdate(sdata)
+  }
+  
+  def evalblock(mats:Array[Mat]):FMat = {
+    val sdata = gmats(0)
+    mupdate(sdata)
+  }
+}
+
+object RegressionModel {
+  class Options extends Model.Options {
+    var targets:FMat = null
+    var targmap:FMat = null
+    var mask:FMat = null
+  }
+}
diff --git a/src/main/scala/BIDMach/Regularizer.scala b/src/main/scala/BIDMach/Regularizer.scala
new file mode 100755
index 00000000..105b24d3
--- /dev/null
+++ b/src/main/scala/BIDMach/Regularizer.scala
@@ -0,0 +1,38 @@
+package BIDMach
+import BIDMat.{Mat,BMat,CMat,DMat,FMat,IMat,HMat,GMat,GIMat,GSMat,SMat,SDMat}
+import BIDMat.MatFunctions._
+import BIDMat.SciFunctions._
+
+abstract class Regularizer(val opts:Regularizer.Options = new Regularizer.Options) { 
+  val options = opts
+  var modelmats:Array[Mat] = null
+  var updatemats:Array[Mat] = null
+  
+  def compute(step:Float)
+  
+  def init(model:Model) = {
+    modelmats = model.modelmats
+    updatemats = model.updatemats
+  }
+}
+
+class L1Regularizer(override val opts:Regularizer.Options = new Regularizer.Options) extends Regularizer(opts) { 
+   def compute(step:Float) = {
+     for (i <- 0 until modelmats.length) {
+       updatemats(i) ~ updatemats(i) + (sign(modelmats(i)) * (-step*options.mprior)) 
+     }
+   }
+}
+
+class L2Regularizer(override val opts:Regularizer.Options = new Regularizer.Options) extends Regularizer(opts) { 
+   def compute(step:Float) = {
+  	 for (i <- 0 until modelmats.length) {
+  		 updatemats(i) ~ updatemats(i) + (modelmats(i) * (-options.mprior * step))
+  	 }
+   }
+}
+
+object Regularizer {
+  class Options {
+    var mprior:FMat = 1e-7f }
+}
diff --git a/src/main/scala/BIDMach/Sampler.scala b/src/main/scala/BIDMach/Sampler.scala
new file mode 100755
index 00000000..7cb9d3b2
--- /dev/null
+++ b/src/main/scala/BIDMach/Sampler.scala
@@ -0,0 +1,22 @@
+package BIDMach
+import BIDMat.{Mat,BMat,CMat,CSMat,DMat,FMat,GMat,GIMat,GSMat,HMat,IMat,SMat,SDMat}
+import BIDMat.MatFunctions._
+import BIDMat.SciFunctions._
+
+abstract class Sampler {
+  
+  val options:Sampler.Options
+  
+  def insample(pos:Int, modelnum:Int):Int = 1
+  
+  def outsample(mat:Mat):Unit = {}
+
+
+}
+
+
+object Sampler {
+	class Options {
+	
+  }
+}
diff --git a/src/main/scala/BIDMach/TestLearner.scala b/src/main/scala/BIDMach/TestLearner.scala
new file mode 100755
index 00000000..e081906b
--- /dev/null
+++ b/src/main/scala/BIDMach/TestLearner.scala
@@ -0,0 +1,153 @@
+package BIDMach
+import BIDMat.{Mat,BMat,CMat,DMat,FMat,IMat,HMat,GMat,GIMat,GSMat,SMat,SDMat}
+import BIDMat.MatFunctions._
+import BIDMat.SciFunctions._
+import BIDMat.Plotting._
+
+
+object TestLearner {
+/*  
+  def runLDALearner(rt:SMat, rtest:SMat, ndims:Int, nthreads:Int, useGPU:Boolean):Learner = {
+    
+//    Mat.numThreads = 1
+    val model = new LDAmodel()
+    model.options.dim = ndims
+    model.options.uiter = 4
+    model.options.uprior = 1e-1f
+    model.options.mprior = 1e0f
+    model.options.minuser = 1e-7f
+    model.options.nzPerColumn = 400
+    model.options.useGPU = useGPU
+
+    val updater = new MultUpdater
+    updater.options.alpha = 0.3f
+//    val updater = new MultUpdater(model)
+//    updater.options.alpha = 0.1f
+    updater.options.initnsteps = 8000f
+    
+  	val learner = Learner(rt, null, rtest, null, model, null, updater)
+  	learner.options.npasses = 20
+  	learner.options.secprint = 100
+  	learner.options.blocksize = 8000 //size(rt,2)
+  	learner.options.numGPUthreads = nthreads
+  	learner.run
+  	learner
+  }
+  
+  def runNMFLearner(rt:SMat, rtest:SMat, ndims:Int, nthreads:Int, useGPU:Boolean):Learner = {
+    val model = new NMFmodel()
+    model.options.dim = ndims
+    model.options.uiter = 4
+    model.options.uprior = 1e-4f
+    model.options.mprior = 1e2f
+    model.options.minuser = 1e-8f
+    model.options.nzPerColumn = 400
+    model.options.useGPU = useGPU
+
+    val updater = new MultUpdater
+    updater.options.alpha = 0.1f
+//    val updater = new MultUpdater(model)
+//    updater.options.alpha = 0.1f
+    updater.options.initnsteps = 8000f
+    
+  	val learner = Learner(rt, null, rtest, null, model, null, updater)
+  	learner.options.npasses = 10
+  	learner.options.secprint = 100
+  	learner.options.blocksize = 16000/nthreads //size(rt,2)//40000 //
+  	learner.options.numGPUthreads = nthreads
+  	learner.run
+  	learner
+  }
+  
+  def runLogLearner(rt:SMat, st:FMat, rtest:SMat, stest:FMat):Learner = {
+    val model = new LogisticModel()
+    model.options.useGPU = false
+    
+    val regularizer = new L1Regularizer(model)
+   	regularizer.options.mprior = 1e-7f
+   	
+    val updater = new ADAGradUpdater
+    updater.options.alpha = 300f
+    updater.options.gradwindow = 1e6f
+    
+  	val learner = Learner(rt, st > 4, rtest, stest > 4, model, regularizer, updater)
+  	learner.options.npasses = 20
+  	learner.run
+  	learner
+  }
+  
+  def runLinLearner(rt:SMat, st:FMat, rtest:SMat, stest:FMat):Learner = {
+    val model = new LinearRegModel() { 
+      override def regfn(targ:Mat, pred:Mat, lls:Mat, gradw:Mat):Unit =  linearMap1(targ, pred, lls, gradw)
+    }
+    model.options.nzPerColumn = 400
+    model.options.transpose = false
+    model.options.useGPU = false
+    
+    val regularizer = new L1Regularizer(model)
+    regularizer.options.mprior = 1e-6f
+    
+    val updater = new ADAGradUpdater { override def update(step:Int):Unit = update1(step) }
+ //    regularizer.options.beta = 1e-7f
+    updater.options.alpha = 200f
+    updater.options.gradwindow = 1e6f
+    
+    val learner = Learner(rt, st, rtest, stest, model, regularizer, updater)
+  	learner.options.npasses = 10
+  	learner.options.secprint = 100
+  	learner.run
+  	learner
+  }
+  
+  def runtest(dirname:String, ntest:Int, ndims:Int, nthreads:Int, useGPU:Boolean):Learner = {
+    tic
+  	val revtrain:SMat = load(dirname+"xpart1.mat", "revtrain")
+  	val revtest:SMat = load(dirname+"xpart1.mat", "revtest")
+  	val t1 = toc; tic
+  	val rt = revtrain(0->4000,0->(8000*(size(revtrain,2)/8000)))
+  	val rtest = revtest(0->4000,0->(8000*(size(revtest,2)/8000)))
+  	val scrtrain:IMat = load(dirname+"xpart1.mat", "scrtrain")
+  	val scrtest:IMat = load(dirname+"xpart1.mat", "scrtest")
+  	val st = FMat(scrtrain).t
+  	val stest = (FMat(scrtest).t)(?,0->(8000*(size(revtest,2)/8000)))
+  	val t2 = toc
+  	println("Reading time=%3.2f+%3.2f seconds" format (t1,t2))
+  	val ntargs = ndims
+  	val stt = zeros(ntargs, size(st,2))
+  	val sttest = zeros(ntargs, size(stest,2))
+  	for (i<-0 until size(stt,1)) {stt(i,?) = st; sttest(i,?) = stest}
+  	flip
+  	val learner:Learner = ntest match {
+  	  case 1 => runLinLearner(rt, stt, rtest, sttest)
+  	  case 2 => runLogLearner(rt, stt, rtest, sttest)
+  	  case 3 => runNMFLearner(rt , rtest, ndims, nthreads, useGPU)
+  	  case 4 => runLDALearner(rt , rtest, ndims, nthreads, useGPU)
+  	}	
+    val (ff, tt) = gflop
+    println("Time=%5.3f, gflops=%3.2f" format (tt, ff))
+    val xvals = irow(1->(learner.tscores.size+1))
+    val tscores = learner.tscores
+    val tscorex = learner.tscorex
+    val tsteps = learner.tsteps
+    val timeplot = semilogy(xvals, drow(tscores), xvals, drow(tscorex))
+    val stepplot = semilogy(drow(tsteps), drow(learner.tscores), drow(tsteps), drow(tscorex))
+//    val userhist = hist(log10(FMat(targetmat)(?)),100)
+    timeplot.setTitle("Neg. log likelihood vs time in seconds")
+    stepplot.setTitle("Neg. log likelihood vs number of samples")
+    val modelhist = hist(log10(FMat(learner.model.modelmat)(?)),100)
+//    val userhist = hist(log10(FMat(learner.targetmat)(?)),100)
+    learner
+  }
+ 
+ 
+  def main(args: Array[String]): Unit = {
+    val dirname = args(0)
+    val ntest = args(1).toInt
+    val ndims = args(2).toInt
+    val nthreads = args(3).toInt
+    val useGPU = args(4).toBoolean
+    
+    Mat.checkCUDA
+    runtest(dirname, ntest, ndims, nthreads, useGPU)
+  } */
+} 
diff --git a/src/main/scala/BIDMach/Updater.scala b/src/main/scala/BIDMach/Updater.scala
new file mode 100755
index 00000000..1e4632ae
--- /dev/null
+++ b/src/main/scala/BIDMach/Updater.scala
@@ -0,0 +1,391 @@
+package BIDMach
+
+import BIDMat.{Mat,BMat,CMat,DMat,FMat,IMat,HMat,GMat,GIMat,GSMat,SMat,SDMat}
+import BIDMat.MatFunctions._
+import BIDMat.SciFunctions._
+
+
+abstract class Updater(val opts:Updater.Options = new Updater.Options) {
+  var model:Model = null
+  
+  def init(model0:Model) = {
+    model = model0 
+  }
+  
+  def update(step:Long):Unit
+  def updateM():Unit = {}
+  def clear():Unit = {}
+}
+
+
+class IncNormUpdater(override val opts:IncNormUpdater.Options = new IncNormUpdater.Options) extends Updater(opts) {
+  
+  var firstStep = 0f
+  var rm:Mat = null
+  var restart:Mat = null
+  var started:Int = 0
+  
+  override def init(model0:Model) = {
+  	super.init(model0)
+    val modelmats = model0.modelmats
+    val updatemats = model0.updatemats
+    restart = modelmats(0) + 1f
+    rm = model0.modelmats(0).zeros(1,1)
+    firstStep = 0f
+  }
+      
+  def update(step:Long) = {
+  	val modelmats = model.modelmats
+  	val updatemats = model.updatemats
+  	val mm = modelmats(0)
+  	val um = updatemats(0)
+  	val rr = if (step == 0) 0.99f else {
+  	  if (firstStep == 0f) {
+  	    firstStep = step
+  	    0.99f
+  	  } else {
+  	    math.pow(firstStep / step, opts.power).toFloat
+  	  }
+  	}
+  	if (modelmats.length > 1) {
+  		val ms = modelmats(1)
+  		val ums = updatemats(1)
+//  		println("ums0 %g %g %g" format (rr, mini(mini(ums,1),2).dv, maxi(maxi(ums,1),2).dv))
+  		ums ~ ums *@ rm.set(rr)
+//  		println("ums1 %g %g %g" format (rr, mini(mini(ums,1),2).dv, maxi(maxi(ums,1),2).dv))
+  		ms ~ ms *@ rm.set(1-rr)
+//  		println("ums2 %g %g %g" format (rr, mini(mini(ums,1),2).dv, maxi(maxi(ums,1),2).dv))
+  		ms ~ ms + ums
+//  		println("ums3 %g %g %g" format (rr, mini(mini(ums,1),2).dv, maxi(maxi(ums,1),2).dv))
+  		um ~ um / ms
+//  		println("um %g %g" format (mini(mini(um,1),2).dv, maxi(maxi(um,1),2).dv))
+  	}
+  	um ~ um *@ rm.set(rr)
+  	mm ~ mm *@ rm.set(1-rr)
+    mm ~ mm + um 
+    mm ~ mm / sum(mm,2)
+    if (opts.warmup > 0) {
+      if (started == 0 && step > opts.warmup) {
+        restart <-- mm
+        started = 1
+      }
+      if (started == 1 && step > 2*opts.warmup) {
+        mm ~ mm - restart
+        max(mm, 0f, mm)
+        mm ~ mm / sum(mm,2)
+        started = 2
+      }
+    }
+  }
+  
+  override def clear() = {
+	  firstStep = 0f
+  }
+}
+
+class BatchNormUpdater(override val opts:BatchNormUpdater.Options = new BatchNormUpdater.Options) extends Updater {
+  var accumulators:Array[Mat] = null
+  
+  override def init(model0:Model) = {
+    super.init(model0)
+    val modelmats = model.modelmats
+    val updatemats = model.updatemats
+    accumulators = new Array[Mat](updatemats.length)
+    for (i <- 0 until accumulators.length) {
+    	accumulators(i) = updatemats(i).zeros(updatemats(i).nrows, updatemats(i).ncols)
+    }
+  }
+     
+  def update(step:Long) = {
+  	val updatemats = model.updatemats
+    for (i <- 0 until accumulators.length) {
+    	accumulators(i) ~ accumulators(i) + updatemats(i) 
+    }
+  }
+  
+  override def clear() = {
+	  for (i <- 0 until accumulators.length) {
+	  	accumulators(i).clear
+	  }
+  }
+  
+  override def updateM():Unit = {
+    val mm = model.modelmats(0)
+    mm ~ accumulators(0) / accumulators(1)
+    mm ~ mm / sum(mm,2)
+    clear
+  }
+}
+
+
+class IncMultUpdater(override val opts:IncMultUpdater.Options = new IncMultUpdater.Options) extends Updater {
+  
+  var firstStep = 0f
+  var rm:Mat = null
+  
+  override def init(model0:Model) = {
+    super.init(model0)
+    rm = model0.modelmats(0).zeros(1,1)
+  }
+      
+  def update(step:Long) = {
+    val modelmats = model.modelmats
+    val updatemats = model.updatemats
+    val mm = modelmats(0)
+    val ms = modelmats(1)
+    val um = updatemats(0)
+    val ums = updatemats(1)
+    val rr = if (step == 0) 1f else {
+	    if (firstStep == 0f) {
+	    	firstStep = step
+	    	1f
+	    } else {
+	    	math.pow(firstStep / step, opts.power).toFloat
+	    }
+  	}
+//    println("rr=%g, %g %g" format (rr, mini(mini(um,1),2).dv, maxi(maxi(um,1),2).dv))
+    um ~ um *@ rm.set(rr)
+//    println("rr=%g, %g %g" format (rr, mini(mini(um,1),2).dv, maxi(maxi(um,1),2).dv))
+    ln(mm, mm)
+//    println("mm=%g %g" format (mini(mini(mm,1),2).dv, maxi(maxi(mm,1),2).dv))
+    mm ~ mm *@ rm.set(1-rr)
+//    println("mm=%g %g" format (mini(mini(mm,1),2).dv, maxi(maxi(mm,1),2).dv))
+    mm ~ mm + um 
+//    println("mm=%g %g" format (mini(mini(mm,1),2).dv, maxi(maxi(mm,1),2).dv))
+    exp(mm, mm)
+//    println("mm=%g %g" format (mini(mini(mm,1),2).dv, maxi(maxi(mm,1),2).dv))
+    mm ~ mm / sum(mm,2)
+  }
+  
+  override def clear() = {
+	  firstStep = 0f
+  }
+}
+
+class TelescopingUpdater(override val opts:TelescopingUpdater.Options = new TelescopingUpdater.Options) extends Updater {
+	var accumulators:Array[Mat] = null
+  var firstStep = 0L
+  var nextStep = 10L
+  var nextCount = 0L
+  var rm:Mat = null
+  
+  override def init(model0:Model) = {
+  	super.init(model0)
+    val modelmats = model0.modelmats
+    val updatemats = model0.updatemats
+    rm = model0.modelmats(0).zeros(1,1)
+    accumulators = new Array[Mat](updatemats.length)
+    for (i <- 0 until updatemats.length) {
+    	accumulators(i) = updatemats(i).zeros(updatemats(i).nrows, updatemats(i).ncols)
+    }
+  	firstStep = 0L
+    nextStep = 10L
+    nextCount = 0L
+  }
+	
+	def update(step:Long) = {
+	  if (firstStep == 0 && step > 0) {
+	    firstStep = step
+	  }
+	  val updatemats = model.updatemats
+    for (i <- 0 until updatemats.length) {
+	    accumulators(i) ~ accumulators(i) + updatemats(i) 
+    }
+	  if (step >= nextCount) {
+	    model.modelmats(0) ~ accumulators(0) / accumulators(1)
+	    nextStep = (nextStep * opts.factor).toLong
+	    nextCount = step + nextStep
+	  }
+  }
+  
+  override def clear() = {
+	  for (i <- 0 until accumulators.length) {
+     	accumulators(i).clear
+	  }
+  }
+}
+
+
+class GradUpdater(override val opts:GradUpdater.Options = new GradUpdater.Options) extends Updater {
+  
+  var firstStep = 0f
+  var modelmat:Mat = null
+  var updatemat:Mat = null  
+  var sumSq:Mat = null 
+  var stepn:Mat = null
+  var mask:Mat = null
+  var ve:Mat = null
+	var te:Mat = null
+	var alpha:Mat = null
+
+  override def init(model0:Model) = {
+    model = model0
+	  modelmat = model.modelmats(0)
+	  updatemat = model.updatemats(0) 
+	  mask = opts.mask
+    stepn = modelmat.zeros(1,1)
+    te = modelmat.zeros(opts.timeExponent.nrows, opts.timeExponent.ncols)
+    alpha = modelmat.zeros(opts.alpha.nrows, opts.alpha.ncols)
+    te <-- opts.timeExponent
+    alpha <-- opts.alpha
+  } 
+  
+	def update(step:Long):Unit = {
+	  val nsteps = if (step == 0) 1f else {
+  	  if (firstStep == 0f) {
+  	    firstStep = step
+  	    1f
+  	  } else {
+  	    step / firstStep
+  	  }
+  	}
+	  stepn.set(1f/nsteps)
+	  if (opts.waitsteps < nsteps) {
+	  	val tmp = updatemat *@ (alpha *@ (stepn ^ te))
+ 	  	modelmat ~ modelmat + tmp
+	  	if (mask != null) modelmat ~ modelmat *@ mask
+	  }
+	}
+}
+
+
+class ADAGradUpdater(override val opts:ADAGradUpdater.Options = new ADAGradUpdater.Options) extends Updater {
+  
+  var firstStep = 0f
+  var modelmat:Mat = null
+  var updatemat:Mat = null  
+  var sumSq:Mat = null 
+  var stepn:Mat = null
+  var mask:Mat = null
+  var ve:Mat = null
+	var te:Mat = null
+	var alpha:Mat = null
+
+  override def init(model0:Model) = {
+    model = model0
+	  modelmat = model.modelmats(0)
+	  updatemat = model.updatemats(0) 
+	  mask = opts.mask
+    if (sumSq.asInstanceOf[AnyRef] == null) {
+      sumSq = modelmat.ones(size(modelmat,1), size(modelmat,2)) *@ opts.initsumsq
+    } else {
+    	sumSq.set(opts.initsumsq)
+    }
+    stepn = modelmat.zeros(1,1)
+    ve = modelmat.zeros(opts.vecExponent.nrows, opts.vecExponent.ncols)
+    te = modelmat.zeros(opts.timeExponent.nrows, opts.timeExponent.ncols)
+    alpha = modelmat.zeros(opts.alpha.nrows, opts.alpha.ncols)
+    ve <-- opts.vecExponent
+    te <-- opts.timeExponent
+    alpha <-- opts.alpha
+  } 
+  
+	def update2(step:Long):Unit = {
+	  val nsteps = if (step == 0) 1f else {
+  	  if (firstStep == 0f) {
+  	    firstStep = step
+  	    1f
+  	  } else {
+  	    step / firstStep
+  	  }
+  	}
+	  stepn.set(nsteps)
+	  val nw = 1f / stepn
+	  val newsquares = updatemat *@ updatemat
+	  newsquares ~ newsquares *@ nw
+	  sumSq  ~ sumSq *@ (1f - nw)
+	  sumSq ~ sumSq + newsquares
+	  if (opts.waitsteps < nsteps) {
+	  	val tmp = sumSq ^ ve
+	  	tmp ~ tmp *@ (stepn ^ te)
+	  	tmp ~ tmp + opts.epsilon
+	  	modelmat ~ modelmat + ((updatemat / tmp) *@ alpha)
+	  	if (mask != null) modelmat ~ modelmat *@ mask
+	  }
+	}
+	
+	def update(step:Long):Unit = {
+	  val nsteps = if (step == 0) 1f else {
+  	  if (firstStep == 0f) {
+  	    firstStep = step
+  	    1f
+  	  } else {
+  	    step / firstStep
+  	  }
+  	}
+	  stepn.set(nsteps)
+	  val nw = 1f / stepn
+	  val newsquares = updatemat *@ updatemat
+	  newsquares ~ newsquares *@ nw
+	  sumSq  ~ sumSq *@ (1f - nw)
+	  sumSq ~ sumSq + newsquares
+	  if (opts.waitsteps < nsteps) {
+	  	val tmp = sumSq ^ ve
+	  	tmp ~ tmp *@ (stepn ^ te)
+	  	tmp ~ tmp + opts.epsilon
+	  	tmp ~ updatemat / tmp
+	  	tmp ~ tmp *@ alpha
+	  	modelmat ~ modelmat + tmp
+	  	if (mask != null) modelmat ~ modelmat *@ mask
+	  }
+	}
+}
+
+
+
+object IncNormUpdater {
+  class Options extends Updater.Options {
+    var warmup = 0L 
+    var power = 0.9f
+  }
+}
+
+object IncMultUpdater {
+  class Options extends Updater.Options {
+    var warmup = 0L 
+    var power = 0.9f
+  }
+}
+
+object BatchNormUpdater {
+  class Options extends Updater.Options {
+    
+  }
+}
+
+object BatchMultUpdater {
+  class Options extends Updater.Options {
+    var eps = 1e-12
+    
+  }
+}
+
+object TelescopingUpdater {
+  class Options extends Updater.Options {
+    val factor = 1.5f
+  }
+}
+
+object GradUpdater {
+  class Options extends Updater.Options {
+    var alpha:FMat = 1f
+    var timeExponent:FMat = 0.5f
+    var waitsteps = 2
+    var mask:FMat = null
+  }
+}
+
+
+object ADAGradUpdater {
+  class Options extends GradUpdater.Options {
+    var vecExponent:FMat = 0.5f
+    var epsilon = 1e-15f
+    var initsumsq = 1e-8f
+  }
+}
+
+object Updater {
+  class Options {
+    
+  }
+}