Browse Source

New management report, copy code from MMASR project (EE4H) to git, PCA code

master
Sam Black 11 years ago
parent
commit
aa0286ee45
23 changed files with 3229 additions and 377 deletions
  1. BIN
      ee4m/ee4m-performance_management-200901292307.odt
  2. +8
    -3
      ee4p/c/htkwrite.c
  3. +2
    -2
      ee4p/python/ee4h.conf
  4. +41
    -0
      ee4p/python/htk/.svn/all-wcprops
  5. +239
    -0
      ee4p/python/htk/.svn/entries
  6. +1
    -0
      ee4p/python/htk/.svn/format
  7. +26
    -0
      ee4p/python/htk/.svn/text-base/__init__.py.svn-base
  8. +92
    -0
      ee4p/python/htk/.svn/text-base/htkconfig.py.svn-base
  9. +380
    -0
      ee4p/python/htk/.svn/text-base/htkwrapper.py.svn-base
  10. +138
    -0
      ee4p/python/htk/.svn/text-base/htkwrite.c.svn-base
  11. +170
    -0
      ee4p/python/htk/.svn/text-base/htkwrite.py.svn-base
  12. +27
    -0
      ee4p/python/htk/.svn/text-base/setup.py.svn-base
  13. +138
    -0
      ee4p/python/htk/.svn/tmp/tempfile.tmp
  14. +26
    -0
      ee4p/python/htk/__init__.py
  15. +92
    -0
      ee4p/python/htk/htkconfig.py
  16. +380
    -0
      ee4p/python/htk/htkwrapper.py
  17. +138
    -0
      ee4p/python/htk/htkwrite.c
  18. +170
    -0
      ee4p/python/htk/htkwrite.py
  19. BIN
      ee4p/python/htk/htkwritefile.so
  20. +27
    -0
      ee4p/python/htk/setup.py
  21. +1000
    -0
      ee4p/python/pca/randpca
  22. +9
    -372
      ee4p/python/pyhtk.py
  23. +125
    -0
      ee4p/python/transcribe.py

BIN
ee4m/ee4m-performance_management-200901292307.odt View File


+ 8
- 3
ee4p/c/htkwrite.c View File

@@ -1,10 +1,10 @@
/* ------< C code to write HTK [1,2].* data >-----
by Guillaume Gravier <ggravier@inf.enst.fr>
*/
#include <Python.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <Python.h>

/* -------------------------------------------------- */
/* ----- Private type definition for HTKWrite() ----- */
@@ -38,12 +38,17 @@ static PyObject * HTKWriteBin(PyObject *self, PyObject *args)
int nelem, i, samps, len, *s;
float *ptr, period;
char *filename, *flags, buf[255];
FILE *destfile;
PyListObject *in_data;
int hasV=FALSE,hasE=FALSE,hasD=FALSE,hasN=FALSE,hasA=FALSE,hasT=FALSE,hasF=FALSE,hasC=FALSE,hasK=FALSE,hasZ=FALSE,has0=FALSE;

if (!PyArg_ParseTuple(args, "iifsOs", &nelem, &samps, &period, &flags, &in_data, &filename))
/*if (!PyArg_ParseTuple(args, "iifsOs", &nelem, &samps, &period, &flags, &in_data, &filename))*/
if (!PyArg_ParseTuple(args, "iifss", &nelem, &samps, &period, &flags, &filename))
return Py_BuildValue("i",7);

printf("%d %d %f %s %s", nelem, samps, period, flags, filename)

destfile = fopen(filename, "w");
header.nSamples=(long)samps; /* set number of samples */
header.sampSize=(short)(nelem*sizeof(float)); /* set sample size */
header.sampPeriod=(long)(period*10000.0); /* set sample period */
@@ -93,7 +98,7 @@ static PyObject * HTKWriteBin(PyObject *self, PyObject *args)
ptr=(float *)PyList_GetItem(in_data,i);
printf("%f\n", ptr);
/* now, ptr points to the i'th feature vector! Write it! */
if(fwrite(ptr,sizeof(float),nelem,filename) != nelem) {
if(fwrite(ptr,sizeof(float),nelem,destfile) != nelem) {
printf("HTKWriteFS(): cannot write %d'th vector (%d bytes)", i+1,nelem*sizeof(float));
return Py_BuildValue("i",5);
}


+ 2
- 2
ee4p/python/ee4h.conf View File

@@ -1,4 +1,4 @@
listTest list/listTestFulPath_Hcopy.scp
listTest list/listTestFulPath.scp
resultdir results/
binpath /home/sam/apps/htk/bin/
listTestHCopy list/listTestFulPath_Hcopy.scp
@@ -18,5 +18,5 @@ wordList lib/wordList_noSP
listTrainHCopy list/listTrainFulPath_Hcopy.scp
confpath /home/sam/htk/
project ee4h
flags
configHCopy config/config_HCopy
inputtype visualaudio

+ 41
- 0
ee4p/python/htk/.svn/all-wcprops View File

@@ -0,0 +1,41 @@
K 25
svn:wc:ra_dav:version-url
V 26
/svn/!svn/ver/18/trunk/htk
END
htkwrite.c
K 25
svn:wc:ra_dav:version-url
V 37
/svn/!svn/ver/15/trunk/htk/htkwrite.c
END
htkconfig.py
K 25
svn:wc:ra_dav:version-url
V 39
/svn/!svn/ver/11/trunk/htk/htkconfig.py
END
__init__.py
K 25
svn:wc:ra_dav:version-url
V 38
/svn/!svn/ver/11/trunk/htk/__init__.py
END
setup.py
K 25
svn:wc:ra_dav:version-url
V 35
/svn/!svn/ver/11/trunk/htk/setup.py
END
htkwrite.py
K 25
svn:wc:ra_dav:version-url
V 38
/svn/!svn/ver/15/trunk/htk/htkwrite.py
END
htkwrapper.py
K 25
svn:wc:ra_dav:version-url
V 40
/svn/!svn/ver/18/trunk/htk/htkwrapper.py
END

+ 239
- 0
ee4p/python/htk/.svn/entries View File

@@ -0,0 +1,239 @@
9

dir
18
http://mmasr.hippygeek.co.uk/svn/trunk/htk
http://mmasr.hippygeek.co.uk/svn



2008-12-08T11:48:59.649660Z
18
samwwwblack


svn:special svn:externals svn:needs-lock











d74afcdc-e8ac-4833-b602-f29cff999e65






0
htkwrite.c
file




2008-12-08T01:45:54.000000Z
f80a4f048c8a1e0c8eba699b41a0091a
2008-12-08T02:58:26.271393Z
15
samwwwblack





















4427
htkconfig.py
file




2008-12-06T23:58:37.000000Z
b00011f03e0deb5af893bae581a25e5f
2008-12-07T03:36:59.875348Z
11
samwwwblack





















3880
__init__.py
file




2008-12-06T23:25:59.000000Z
da9ceac1ffb0727c43c1996eda5849e3
2008-12-07T03:36:59.875348Z
11
samwwwblack





















1055
setup.py
file




2008-12-07T18:09:27.000000Z
fc915c08930410513f8237570faf2271
2008-12-07T03:36:59.875348Z
11
samwwwblack





















1129
htkwrite.py
file




2008-12-08T02:47:38.000000Z
2371d8526ac5f615b4309e18bea4124b
2008-12-08T02:58:26.271393Z
15
samwwwblack





















6473
htkwrapper.py
file




2008-12-08T11:47:06.000000Z
a1518715370ebbbeaf89114c93a15e10
2008-12-08T11:48:59.649660Z
18
samwwwblack





















12548

+ 1
- 0
ee4p/python/htk/.svn/format View File

@@ -0,0 +1 @@
9

+ 26
- 0
ee4p/python/htk/.svn/text-base/__init__.py.svn-base View File

@@ -0,0 +1,26 @@
#!/usr/bin/python
# __init__.py
#
# Copyright 2008 Sam Black <samwwwblack@lapwing.org>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
# MA 02110-1301, USA.

__version__ = "0.1"

import sys, os, pwd, subprocess, shutil
import htkconfig, htkwrapper, htkwrite
from subprocess import CalledProcessError
sys.float_output_precision = 25

+ 92
- 0
ee4p/python/htk/.svn/text-base/htkconfig.py.svn-base View File

@@ -0,0 +1,92 @@
#!/usr/bin/python
# htkconfig.py
#
# Copyright 2008 Sam Black <samwwwblack@lapwing.org>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
# MA 02110-1301, USA.

import sys, os, pwd, shutil

__version__ = "0.1"

class HtkConfig:
def __init__(self, binpath="/usr/bin/", configpath="./", projname="testHTK", list=False):
if not list:
self.configfile = open(configpath + projname + ".conf", 'r')
# Don't add the config path to these options
self.no_conf = ["binpath", "project", "confpath", "inputtype", "space_step", "flags"]
self.configs = {
"binpath" : binpath,
"project" : projname,
"confpath" : configpath,
"inputtype" : "audio",
"hmmdir" : configpath + "hmmsTrained/",
"mfccdir" : configpath + "mfccGen/",
"typeList" : configpath + "list/typeList_" + projname,
"listTrain" : configpath + "list/listTrain_" + projname + ".scp",
"listTrainHCopy" : configpath + "list/listTrain_" + projname + "_HCopy.scp",
"listTest" : configpath + "list/listTest_" + projname + ".scp",
"listTestHCopy" : configpath + "list/listTest_" + projname + "_HCopy.scp",
"configHList" : configpath + "config/configHList_" + projname + "_mfcc",
"configHCopy" : configpath + "config/configHCopy_" + projname,
"configTrain" : configpath + "config/configTrain_" + projname,
"configTest" : configpath + "config/configTest_" + projname,
"wordList" : configpath + "lib/wordList_" + projname,
"wordListSP" : configpath + "lib/wordList_" + projname + "_withsp",
"wordLabel" : configpath + "label/wordLabel_" + projname + ".mlf",
"wordLabelSP" : configpath + "label/wordLabel_" + projname + "_withsp.mlf",
"wordnet" : configpath + "lib/wordnet_" + projname,
"worddict" : configpath + "lib/worddict_" + projname,
"hedsilsp" : configpath + "lib/hedsilsp_" + projname + ".hed",
"proto" : configpath + "lib/proto_" + projname,
"resultdir" : configpath + "results/",
"space_step" : 8
}

if not list:
for line in self.configfile:
if len(line.split()) > 1:
if line.split()[0] in self.configs:
if line.split()[0] in self.no_conf:
self.configs[line.split()[0]] = line.split()[1]
else:
self.configs[line.split()[0]] = configpath + line.split()[1]

for part in self.configs:
if not self.configs[part].isdigit():
if not os.path.exists(self.configs[part]) and not part in self.no_conf:
print "%s does not exist, dieing" % self.configs[part]
sys.exit(1)
elif not part in self.no_conf:
if self.configs[part].endswith("/"):
if not os.path.isdir(self.configs[part]):
print "%s is not a directory and it is configured as such, dieing" % self.configs[part]
sys.exit(1)
else:
if not os.path.isfile(self.configs[part]) or not os.path.exists(self.configs[part]):
print "%s is not a file and it is configured as such, dieing" % self.configs[part]
sys.exit(1)

def getSetting(self, setting):
if setting in self.configs:
return self.configs[setting]
else:
return False

def listSettings(self):
print "Default settings:"
for item in self.configs:
print item + " %s" % self.configs[item]

+ 380
- 0
ee4p/python/htk/.svn/text-base/htkwrapper.py.svn-base View File

@@ -0,0 +1,380 @@
#!/usr/bin/python
# htk.py
#
# Copyright 2008 Sam Black <samwwwblack@lapwing.org>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
# MA 02110-1301, USA.

import sys, os, pwd, subprocess, shutil
import htkconfig, htkwrite
from subprocess import CalledProcessError

__version__ = "0.1"

class HtkWrapper:
def __init__(self, binpath, confpath, projname, visual=None):
self.mode = "normal"
self.realtime = False
if not binpath.endswith("/"):
binpath = binpath + "/"
if not confpath.endswith("/"):
confpath = confpath + "/"
self.config = htkconfig.HtkConfig(binpath, confpath, projname)
if visual != None:
self.visual = visual
else:
print("Alternate extraction not loaded")
sys.exit(1)

def listConfig(self):
self.config.listSettings()

def setTraining(self, flag):
if flag:
self.mode = "training"

def setTesting(self, flag):
if flag:
self.mode = "testing"

def setRecog(self, flag, rt=False):
if flag:
self.mode = "recog"
if rt:
self.realtime = False
print "Real time proecessing not yet supported"

def run(self):
print "Starting HTK with following options:"
if self.config.getSetting("project") != False:
print "Project: " + self.config.getSetting("project")
print "Running in %s mode" % self.mode
print "HTK binary path: " + self.config.getSetting("binpath")
print "HTK config path: " + self.config.getSetting("confpath")

if self.mode == "normal":
self.training()
self.testing()
elif self.mode == "training":
self.training()
elif self.mode == "testing":
self.testing()
elif self.mode == "recog":
self.recognition()

def training(self):
# Clean directories first
if self.config.getSetting("inputtype").find("audio") >= 0:
for root, dirs, files in os.walk(self.config.getSetting("mfccdir"), topdown=False):
for name in files:
os.remove(os.path.join(root, name))

if self.config.getSetting("inputtype") != "audio":
print("Multi input processing")
typeconf = open(self.config.getSetting("typeList"), 'r')
tmpjointconf = typeconf.readlines()
typeconf.close()
jointconf = []
for line in tmpjointconf:
jointconf.append(line.split())
del(tmpjointconf)

for line in jointconf:
if self.config.getSetting("inputtype").find("visual") >= 0:
if self.visual != None:
# Video processor detected
#try:
print("%s %s %s" % (self.config, self.visual, line))
audiovisual = htkwrite.HtkWrite(self.config, self.visual, line)
print(audiovisual)
audiovisual.run()
#except Exception, e:
# print e
# sys.exit(1)
else:
print("No other visual processor found, dieing")
sys.exit(1)
else:
print("No extra processing found, dieing")
sys.exit(1)
elif self.config.getSetting("inputtype") == "audio":
# We are processing audio only, no vodoo required
print("Audio processing only")
hcopy1 = [self.config.getSetting("binpath") + "HCopy",
'-C', self.config.getSetting("configHCopy"),
'-S', self.config.getSetting("listTrainHCopy")
]
hcopy2 = [self.config.getSetting("binpath") + "HCopy",
'-C', self.config.getSetting("configHCopy"),
'-S', self.config.getSetting("listTestHCopy")
]
try:
ret1 = subprocess.check_call(hcopy1)
ret2 = subprocess.check_call(hcopy2)
if ret1 != 0 or ret2 != 0:
print "Error, return codes wrong"
sys.exit(1)
except CalledProcessError, e:
print e
sys.exit(1)
else:
print("Something has gone badly, badly wrong.")
sys.exit(1)

else:
print("Error, pyhtk is not setup to handle none audio based work.")
print("Dieing now.")
sys.exit(1)

# Now training
print "Training"
if os.path.isdir(self.config.getSetting("hmmdir")):
for root, dirs, files in os.walk(self.config.getSetting("hmmdir"), topdown=False):
for name in files:
os.remove(os.path.join(root, name))
for name in dirs:
os.rmdir(os.path.join(root, name))
if not os.path.isdir(self.config.getSetting("hmmdir")):
os.mkdir(self.config.getSetting("hmmdir"))
for direc in range(0, int(self.config.getSetting("space_step"))+1):
if not os.path.isdir(self.config.getSetting("hmmdir") + "hmm%d" % direc):
os.mkdir(self.config.getSetting("hmmdir") + "hmm%d" % direc)

try:
hcompv = [self.config.getSetting("binpath") + "HCompV",
'-C', self.config.getSetting("configTrain"),
'-o', 'hmmdef',
'-f', '0.01',
'-m',
'-S', self.config.getSetting("listTrain"),
'-M', self.config.getSetting("hmmdir") + "hmm0",
self.config.getSetting("proto")
]
ret = subprocess.check_call(hcompv)
if ret != 0:
print "Error, return codes wrong"
sys.exit(1)
except CalledProcessError, e:
print e
sys.exit(1)

print "Seeding complete"

print "Copying HMM word models"
hmmdef = open(self.config.getSetting("hmmdir") + "hmm0/hmmdef", 'r')
record = False
tmpdata = []
for line in hmmdef:
if line.find("BEGINHMM") >= 0:
record = True
elif line.find("ENDHMM") >= 0:
record = False
tmpdata.append(line)
if record:
tmpdata.append(line)

model0 = open(self.config.getSetting("hmmdir") + "hmm0/models", 'a')
wordlist = open(self.config.getSetting("worddict"), 'r')
for line in wordlist:
if not line.find("sp") >= 0:
model0.write('~h "%s"\n' % line.split()[0])
for part in tmpdata:
model0.write(part)
model0.close()
wordlist.close()
hmmdef.close()

vfloor = open(self.config.getSetting("hmmdir") + "hmm0/vFloors", 'r')
hmmdef = open(self.config.getSetting("hmmdir") + "hmm0/hmmdef", 'r')
macros = open(self.config.getSetting("hmmdir") + "hmm0/macros", 'a')
tmpmacro = []
macroterms = ["~o", "STREAMINFO", "DIAGC"]
for line in hmmdef:
for items in macroterms:
if items in line:
tmpmacro.append(line.strip('\n'))
tmpline = tmpmacro.pop().split("<")
for item in tmpline:
if item.find("DIAGC") == -1 and len(item) > 0:
tmpmacro.append("<" + item)
for line in tmpmacro:
macros.write(line + "\n")
for line in vfloor:
macros.write(line)
print "Finished HMM word models"
vfloor.close()
macros.close()
hmmdef.close()
del tmpmacro

for iteration in range(1,4):
print "Iteration %d" % iteration

try:
herest = [self.config.getSetting("binpath") + "HERest",
'-D',
'-C', self.config.getSetting("configTrain"),
'-I', self.config.getSetting("wordLabel"),
'-t', str(250.0), str(150.0), str(1000.0),
'-S', self.config.getSetting("listTrain"),
'-H', "%shmm%d/macros" % (self.config.getSetting("hmmdir"), iteration - 1),
'-H', "%shmm%d/models" % (self.config.getSetting("hmmdir"), iteration - 1),
'-M', "%shmm%d" % (self.config.getSetting("hmmdir"), iteration),
self.config.getSetting("wordList")
]
ret = subprocess.check_call(herest)
if ret != 0:
print "Error, return codes wrong"
sys.exit(1)
except CalledProcessError, e:
print e
sys.exit(1)

print "Copying for 4th iteration"
for root, dirs, files in os.walk(self.config.getSetting("hmmdir") + "hmm3", topdown=False):
for name in files:
shutil.copy(self.config.getSetting("hmmdir") + "hmm3/" + name, self.config.getSetting("hmmdir") + "hmm4")
print "Copied 4th iteration"

# create silence model
print "Correcting silence model"
model3 = open(self.config.getSetting("hmmdir") + "hmm3/models", 'r')
model4 = open(self.config.getSetting("hmmdir") + "hmm4/models", 'a')
tmpmodel = []
record = False
for line in model3:
if line.find("sil") >= 0:
record = True
if record and line.find("ENDHMM") >= 0:
record = False
if record:
tmpmodel.append(line)
states = []
midstate = []
for line in tmpmodel:
if line.find("STATE") >= 0:
states.append([tmpmodel[tmpmodel.index(line):tmpmodel.index(line) + 6]])
for stat in states[int(len(states)/2)][0]:
midstate.append(stat)
model4.write('~h "sp"\n<BEGINHMM>\n<NUMSTATES> 3\n<STATE> 2\n')
for line in midstate:
if line.find("STATE") == -1:
model4.write(line)
model4.write("<TRANSP> 3\n")
model4.write("0.000000e+00 5.000000e-01 5.000000e-01\n")
model4.write("0.000000e+00 5.000000e-01 5.000000e-01\n")
model4.write("0.000000e+00 0.000000e+00 0.000000e+00\n")
model4.write("<ENDHMM>\n")

model3.close()
model4.close()

try:
hhed = [self.config.getSetting("binpath") + "HHEd",
'-T', "3",
'-H', self.config.getSetting("hmmdir") + "hmm4/macros",
'-H', self.config.getSetting("hmmdir") + "hmm4/models",
'-M', self.config.getSetting("hmmdir") + "hmm5",
self.config.getSetting("hedsilsp"),
self.config.getSetting("wordList")
]
ret = subprocess.check_call(hhed)
if ret != 0:
print "Error, return codes wrong"
sys.exit(1)
except CalledProcessError, e:
print e
sys.exit(1)
print "Corrected silence model"

for iteration in range(5, int(self.config.getSetting("space_step")) + 1):
print "Iteration %d" % iteration

try:
herest = [self.config.getSetting("binpath") + "HERest",
'-D',
'-C', self.config.getSetting("configTrain"),
'-I', self.config.getSetting("wordLabelSP"),
'-S', self.config.getSetting("listTrain"),
'-H', "%shmm%d/macros" % (self.config.getSetting("hmmdir"), iteration - 1),
'-H', "%shmm%d/models" % (self.config.getSetting("hmmdir"), iteration - 1),
'-M', "%shmm%d" % (self.config.getSetting("hmmdir"), iteration),
self.config.getSetting("wordListSP")
]
subprocess.check_call(herest)
if ret != 0:
print "Error, return codes wrong"
sys.exit(1)
except CalledProcessError, e:
print e
sys.exit(1)

print "%d Iterations completed" % (int(self.config.getSetting("space_step")) - 5)
print "Training complete"

def testing(self):
print "Testing"

if os.path.isfile(self.config.getSetting("resultdir") + self.config.getSetting("project") + ".mlf"):
os.remove(self.config.getSetting("resultdir") + self.config.getSetting("project") + ".mlf")
if os.path.isfile(self.config.getSetting("resultdir") + self.config.getSetting("project") + ".res"):
os.remove(self.config.getSetting("resultdir") + self.config.getSetting("project") + ".res")

try:
hvite = [self.config.getSetting("binpath") + "HVite",
'-H', "%shmm%d/macros" % (self.config.getSetting("hmmdir"), int(self.config.getSetting("space_step"))),
'-H', "%shmm%d/models" % (self.config.getSetting("hmmdir"), int(self.config.getSetting("space_step"))),
'-S', self.config.getSetting("listTest"),
'-C', self.config.getSetting("configTest"),
'-w', self.config.getSetting("wordnet"),
'-i', self.config.getSetting("resultdir") + self.config.getSetting("project") + ".mlf",
'-p', '10',
'-s', '0.0',
self.config.getSetting("worddict"),
self.config.getSetting("wordListSP")
]
ret = subprocess.check_call(hvite)
if ret != 0:
print "Error, return codes wrong"
sys.exit(1)
except CalledProcessError, e:
print e
sys.exit(1)

try:
hresult = [self.config.getSetting("binpath") + "HResults",
'-e', '\"???\"', 'sil',
'-e', '\"???\"', 'sp',
'-I', self.config.getSetting("wordLabelSP"),
self.config.getSetting("wordListSP"),
self.config.getSetting("resultdir") + self.config.getSetting("project") + ".mlf"
]
result = open(self.config.getSetting("resultdir") + self.config.getSetting("project") + ".res", 'w')
ret = subprocess.check_call(hresult, stdout=result)
if ret != 0:
print "Error, return codes wrong"
sys.exit(1)
else:
result.close()
except CalledProcessError, e:
print e
sys.exit(1)

print "Testing finished"
print "Results can be found at %s" % self.config.getSetting("resultdir") + self.config.getSetting("project") + ".res"

def recognition(self):
pass

+ 138
- 0
ee4p/python/htk/.svn/text-base/htkwrite.c.svn-base View File

@@ -0,0 +1,138 @@
/* ------< C code to write HTK [1,2].* data >-----
by Guillaume Gravier <ggravier@inf.enst.fr>
*/
#include <Python.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

/* -------------------------------------------------- */
/* ----- Private type definition for HTKWrite() ----- */
/* -------------------------------------------------- */
typedef struct {
long nSamples;
long sampPeriod;
short sampSize;
short parmKind;
} htk_header_t;

#define TRUE 1
#define FALSE 0
#define H_MFCC 6
#define H_USER 9

#define HASENERGY 0100 /* _E log energy included */
#define HASNULLE 0200 /* _N absolute energy suppressed */
#define HASDELTA 0400 /* _D delta coef appended */
#define HASACCS 01000 /* _A acceleration coefs appended */
#define HASCOMPX 02000 /* _C is compressed */
#define HASZEROM 04000 /* _Z zero meaned */
#define HASCRCC 010000 /* _K has CRC check */
#define HASZEROC 020000 /* _0 0'th Cepstra included */
#define HASVQ 040000 /* _V has VQ index attached */
#define HASTHIRD 0100000 /* _T has Delta-Delta-Delta index attached */

static PyObject * HTKWriteBin(PyObject *self, PyObject *args)
{
htk_header_t header;
short htk_kind;
int nelem, i, j, x, y, samps, len;
float *ptr, period, *tmpdata;
char *filename, *flags, buf[255], s;
FILE *destfile;
PyObject *in_data;
int hasV=FALSE,hasE=FALSE,hasD=FALSE,hasN=FALSE,hasA=FALSE,hasT=FALSE,hasF=FALSE,hasC=FALSE,hasK=FALSE,hasZ=FALSE,has0=FALSE;

if (!PyArg_ParseTuple(args, "iifsOs", &nelem, &samps, &period, &flags, &in_data, &filename))
return Py_BuildValue("i",7);

//printf("C prog:\%d %d %f %s %s %s\nend", nelem, samps, period, flags, in_data, filename);

destfile = fopen(filename, "w");
header.nSamples=(long)samps;
header.sampPeriod=(long)(period);
header.sampSize=(short)(nelem*sizeof(float));

double in_dataf[samps][nelem];

htk_kind=(short)H_MFCC;
printf("%d\nHeader setup\n", htk_kind);

strcpy(buf,flags);
len=strlen(buf);
while (len>0) {
s = buf[len-1];

switch(s){
case 'E': hasE = TRUE; break;
case 'D': hasD = TRUE; break;
case 'N': hasN = TRUE; break;
case 'A': hasA = TRUE; break;
case 'C': hasC = TRUE; break;
case 'T': hasT = TRUE; break;
case 'F': hasF = TRUE; break;
case 'K': hasK = TRUE; break;
case 'Z': hasZ = TRUE; break;
case '0': has0 = TRUE; break;
case 'V': hasV = TRUE; break;
default: ;;
}
s = '\0';
len -= 1;
}

if (hasE) htk_kind |= HASENERGY;
if (hasD) htk_kind |= HASDELTA;
if (hasN) htk_kind |= HASNULLE;
if (hasA) htk_kind |= HASACCS;
if (hasT) htk_kind |= HASTHIRD;
if (hasK) htk_kind |= HASCRCC;
if (hasC) htk_kind |= HASCOMPX;
if (hasZ) htk_kind |= HASZEROM;
if (has0) htk_kind |= HASZEROC;
if (hasV) htk_kind |= HASVQ;
header.parmKind=htk_kind;

if(fwrite(&header,sizeof(htk_header_t),1,destfile) != 1) {
printf("HTKWriteFS(): cannot write HTK header (%d bytes)", sizeof(htk_header_t));
return Py_BuildValue("i",5);
}
printf("\nHeader written\n");
/* Iterate the data out of the Python list */
for(i=0; i<samps; i++) {
tmpdata = PyList_GetItem(in_data, i);
for(j=0; j<nelem; j++) {
/* printf("i%di j%dj of %d f%lgf\n", i, j, nelem, PyFloat_AsDouble(PyList_GetItem(tmpdata, j))); */
in_dataf[i][j] = PyFloat_AsDouble(PyList_GetItem(tmpdata, j));
}
}
printf("Iterations done\n");

/* And now out to file */
for(x=0; x<samps; x++) {
for(y=0; y<nelem; y++) {
ptr=&in_dataf[x][y];
/*printf("d:%f: i:%d: j:%d: of :%d:\n", in_dataf[x][y], x, y, nelem);*/
if(fwrite(ptr,sizeof(float),1,destfile) != 1) {
printf("HTKWriteFS(): cannot write %d'th vector (%d bytes)", i+1,nelem*sizeof(float));
return Py_BuildValue("i",5);
}
}
/*if(fwrite("\n",sizeof(char),1,destfile) != 1) {
printf("HTKWriteFS(): cannot write %d'th vector (%d bytes)", i+1,nelem*sizeof(float));
return Py_BuildValue("i",5);
}*/
}
printf("File written\n");
return Py_BuildValue("i",0);
}

static PyMethodDef HTKWriteMethods[] = {
{"writebinfile", HTKWriteBin, METH_VARARGS, "Write HTK data out to a binary file"},
{NULL, NULL, 0, NULL} /* Sentinel */
};

PyMODINIT_FUNC inithtkwritefile(void) {
(void) Py_InitModule("htkwritefile", HTKWriteMethods);
}


+ 170
- 0
ee4p/python/htk/.svn/text-base/htkwrite.py.svn-base View File

@@ -0,0 +1,170 @@
#!/usr/bin/python
# htkwrite.py
#
# Copyright 2008 Sam Black <samwwwblack@lapwing.org>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
# MA 02110-1301, USA.

__version__ = "0.1"

import sys, os, pwd, subprocess, shutil
from subprocess import CalledProcessError

try:
import htkwritefile
except Exception, e:
print e
sys.exit(1)

class HtkWrite:
def __init__(self, config, processor, data):
self.config = config
self.processor = processor
self.data = data
self.options = {
"mfcc" : "",
"wavfile" : "",
"videofile" : "",
"wavtime" : "",
"videotime" : ""
}
print(self.data)
print(len(self.data))
self._validateData()

def _validateData(self):
if self.config.getSetting("inputtype").find("visual") >= 0:
for part in self.data:
print("part is :%s:" % part)
if part.endswith(".mfcc"):
self.options["mfcc"] = part
elif part.endswith((".mpg")):
self.options["videofile"] = part
elif part.endswith(".wav"):
self.options["wavfile"] = part
elif part.startswith("v"):
self.options["videotime"] = part.lstrip("v")
elif part.startswith("a"):
self.options["wavtime"] = part.lstrip("a")
else:
print("Nothing found")
print(self.options)
else:
print("No inputtype specified")

for key in self.options:
if self.options[key] is None:
print('Error, "%s" is not set' % key)
raise ValueError

def _runHcopy(self):
hcopy1 = [self.config.getSetting("binpath") + "HCopy",
'-C', self.config.getSetting("configHCopy"),
'-e', self.options["wavtime"],
self.options["wavfile"],
self.options["mfcc"].split(".")[0] + ".wav.mfcc"
]
try:
ret1 = subprocess.check_call(hcopy1)
if ret1 != 0:
print "Error, return codes wrong"
sys.exit(1)
except:
raise CalledProcessError

def _runExtraProcess(self):
return self.processor.run(self.options["videofile"], self.options["videotime"])

def _runHlist(self):
hlist1 = [self.config.getSetting("binpath") + "HList",
'-C', self.config.getSetting("configHList"),
'-r',
'-h',
self.options["mfcc"].split(".")[0] + ".wav.mfcc"
]
try:
ret1 = subprocess.Popen(hlist1, stdout=subprocess.PIPE)
output = ret1.communicate()[0]
except:
raise CalledProcessError

return output

def run(self):
self._runHcopy()
#altdata = self._runExtraProcess()
altdata = [153, 195, 374, 234, 485, 178, 209, 278, 243, 310, 381, 326, 290, 237, 226, 168, 190, 188, 240, 243, 449, 277, 461, 328, 348, 584, 230, 385, -1, 170, 242, 180, 293, 238, 352, 324, 539, 226, 164, 188, 187, 240, 511, 246, 296, 232, 188, 499, 146, 186, 208, 205, 256, 253, 292, 229, 407, 246, 378, 176, 203, 217, 191, 443, 297, 294, 465, 237, 185, 224, 212, 187, 149, 218, -1, -1, 221, 364, 223, 694, 166, 247, 235, 411, 313, 465, 266, 369, 233, 185, 569, 174, 226, 162, 229, 274, 362, 335, -1, 349, 205, 205, 180, 170, 166, 272, 521, 241, 283, 530, 248, 318, 881, 513, 366, 390, 230, 589, 1028, 429, 264, 200, 636, 1174, 992, 698, 360, 628, 857, 998, 367, 243, 609, 1258, 937, 216, 371, 414, 934, 370, 581, 789, 1276, 1501, 843, 515, 204, 280, 919, 640, 927, 1181, 596, 1170, 1374, 1190, 1012, 936, 207, -1, 484, 310, 342, 799, 1217, 1371, 1405, 1195, 320, 676, 547, 739, 430, 561, 1130, 1330, 974, 218, 477, 327, 312, 649, 548, 375, 440, 240, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 564, 476, 411, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 427, 700, 301, 1019, 808, 235, 290, 307, 332, 461, 508, 443, 743, 709, 340, 366, 396, 457, 417, 615, -1, 475, 666, -1, 704, 271, 288, 312, 650, 435, 358, 348, 299, 397, 425, 591, 710, 412, 418, 340, 520, 758, 708, 666, 774, 936, 1173, 1068, 1001, 628, 466, 723, 762, 701, 371, 954, 1031, 678, 1439, 1227, 780, 989, 1405, 814, 1164, 752, 685, 728, 909, 1116, 1656, 1007, 838, 1177, 1117, 1290, 750, 610, 903, 535, 400, 327, 443, 511, 373]
tmpwav = self._runHlist()

wavmfcc = tmpwav.split("\n")
mfccheader = []
mfccdata = []

for i in range(0, 3):
tmp = wavmfcc[i].strip().split(' ')
while "" in tmp:
for part in tmp:
if part == "":
tmp.pop(tmp.index(part))
for part in tmp:
if part.find(':') > 0:
if part.split(":")[1] == "":
mfccheader.append([tmp[tmp.index(part)].strip(" :"), tmp[tmp.index(part) + 1].strip()])
else:
mfccheader.append([tmp[tmp.index(part)].split(":")[0].strip(), tmp[tmp.index(part)].split(":")[1].strip()])

for i in range(3, len(wavmfcc) - 1):
stringtmp = wavmfcc[i].strip().split()
floattmp = []
for string in stringtmp:
floattmp.append(float(string))
mfccdata.append(floattmp)

# Calculate number of audio frames per
if len(mfccdata) / len(altdata) > 1:
ratio = len(mfccdata) / len(altdata)
sdata = altdata
combined = mfccdata
elif len(altdata) / len(mfccdata) > 1:
ratio = len(altdata) / len(mfccdata)
sdata = mfccdata
combined = altdata
else:
for line in mfccdata:
mfccdata[mfccdata.index(line)].append(altdata[altdata.index(line)])

slist = 0
diff = int(ratio / 2)
while slist < len(sdata):
midpoint = ratio * slist + diff
for i in range(midpoint - diff, midpoint + diff):
combined[i].append(float(sdata[slist]))
slist += 1

del(sdata)
del(altdata)
del(mfccdata)

# Now we can write out to binary
htkwriteargs = dict(mfccheader)
#try:
ret = htkwritefile.writebinfile(int(htkwriteargs["Num Comps"]) + 1, int(htkwriteargs["Num Samples"]), float(htkwriteargs["Sample Period"].split()[0]), htkwriteargs["Sample Kind"].lstrip("MFCC"), combined, self.options["mfcc"])
# if ret > 0:
# print("Something went titsup")
# else:
# print("Executed")
#except:
# raise Exception

+ 27
- 0
ee4p/python/htk/.svn/text-base/setup.py.svn-base View File

@@ -0,0 +1,27 @@
#!/usr/bin/python
# setup.py
#
# Copyright 2008 Sam Black <samwwwblack@lapwing.org>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
# MA 02110-1301, USA.
from distutils.core import setup, Extension

module1 = Extension('htkwritefile', sources = ['htkwrite.c'])

setup (name = 'HTKWriteFile',
version = '0.1',
description = 'Write HTK compatible binary files',
ext_modules = [module1])

+ 138
- 0
ee4p/python/htk/.svn/tmp/tempfile.tmp View File

@@ -0,0 +1,138 @@
/* ------< C code to write HTK [1,2].* data >-----
by Guillaume Gravier <ggravier@inf.enst.fr>
*/
#include <Python.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

/* -------------------------------------------------- */
/* ----- Private type definition for HTKWrite() ----- */
/* -------------------------------------------------- */
typedef struct {
long nSamples;
long sampPeriod;
short sampSize;
short parmKind;
} htk_header_t;

#define TRUE 1
#define FALSE 0
#define H_MFCC 6
#define H_USER 9

#define HASENERGY 0100 /* _E log energy included */
#define HASNULLE 0200 /* _N absolute energy suppressed */
#define HASDELTA 0400 /* _D delta coef appended */
#define HASACCS 01000 /* _A acceleration coefs appended */
#define HASCOMPX 02000 /* _C is compressed */
#define HASZEROM 04000 /* _Z zero meaned */
#define HASCRCC 010000 /* _K has CRC check */
#define HASZEROC 020000 /* _0 0'th Cepstra included */
#define HASVQ 040000 /* _V has VQ index attached */
#define HASTHIRD 0100000 /* _T has Delta-Delta-Delta index attached */

static PyObject * HTKWriteBin(PyObject *self, PyObject *args)
{
htk_header_t header;
short htk_kind;
int nelem, i, j, x, y, samps, len;
float *ptr, period, *tmpdata;
char *filename, *flags, buf[255], s;
FILE *destfile;
PyObject *in_data;
int hasV=FALSE,hasE=FALSE,hasD=FALSE,hasN=FALSE,hasA=FALSE,hasT=FALSE,hasF=FALSE,hasC=FALSE,hasK=FALSE,hasZ=FALSE,has0=FALSE;

if (!PyArg_ParseTuple(args, "iifsOs", &nelem, &samps, &period, &flags, &in_data, &filename))
return Py_BuildValue("i",7);

//printf("C prog:\%d %d %f %s %s %s\nend", nelem, samps, period, flags, in_data, filename);

destfile = fopen(filename, "w");
header.nSamples=(long)samps;
header.sampPeriod=(long)(period);
header.sampSize=(short)(nelem*sizeof(float));

double in_dataf[samps][nelem];

htk_kind=(short)H_MFCC;
printf("%d\nHeader setup\n", htk_kind);

strcpy(buf,flags);
len=strlen(buf);
while (len>0) {
s = buf[len-1];

switch(s){
case 'E': hasE = TRUE; break;
case 'D': hasD = TRUE; break;
case 'N': hasN = TRUE; break;
case 'A': hasA = TRUE; break;
case 'C': hasC = TRUE; break;
case 'T': hasT = TRUE; break;
case 'F': hasF = TRUE; break;
case 'K': hasK = TRUE; break;
case 'Z': hasZ = TRUE; break;
case '0': has0 = TRUE; break;
case 'V': hasV = TRUE; break;
default: ;;
}
s = '\0';
len -= 1;
}

if (hasE) htk_kind |= HASENERGY;
if (hasD) htk_kind |= HASDELTA;
if (hasN) htk_kind |= HASNULLE;
if (hasA) htk_kind |= HASACCS;
if (hasT) htk_kind |= HASTHIRD;
if (hasK) htk_kind |= HASCRCC;
if (hasC) htk_kind |= HASCOMPX;
if (hasZ) htk_kind |= HASZEROM;
if (has0) htk_kind |= HASZEROC;
if (hasV) htk_kind |= HASVQ;
header.parmKind=htk_kind;

if(fwrite(&header,sizeof(htk_header_t),1,destfile) != 1) {
printf("HTKWriteFS(): cannot write HTK header (%d bytes)", sizeof(htk_header_t));
return Py_BuildValue("i",5);
}
printf("\nHeader written\n");
/* Iterate the data out of the Python list */
for(i=0; i<samps; i++) {
tmpdata = PyList_GetItem(in_data, i);
for(j=0; j<nelem; j++) {
/* printf("i%di j%dj of %d f%lgf\n", i, j, nelem, PyFloat_AsDouble(PyList_GetItem(tmpdata, j))); */
in_dataf[i][j] = PyFloat_AsDouble(PyList_GetItem(tmpdata, j));
}
}
printf("Iterations done\n");

/* And now out to file */
for(x=0; x<samps; x++) {
for(y=0; y<nelem; y++) {
ptr=&in_dataf[x][y];
/*printf("d:%f: i:%d: j:%d: of :%d:\n", in_dataf[x][y], x, y, nelem);*/
if(fwrite(ptr,sizeof(float),1,destfile) != 1) {
printf("HTKWriteFS(): cannot write %d'th vector (%d bytes)", i+1,nelem*sizeof(float));
return Py_BuildValue("i",5);
}
}
/*if(fwrite("\n",sizeof(char),1,destfile) != 1) {
printf("HTKWriteFS(): cannot write %d'th vector (%d bytes)", i+1,nelem*sizeof(float));
return Py_BuildValue("i",5);
}*/
}
printf("File written\n");
return Py_BuildValue("i",0);
}

static PyMethodDef HTKWriteMethods[] = {
{"writebinfile", HTKWriteBin, METH_VARARGS, "Write HTK data out to a binary file"},
{NULL, NULL, 0, NULL} /* Sentinel */
};

PyMODINIT_FUNC inithtkwritefile(void) {
(void) Py_InitModule("htkwritefile", HTKWriteMethods);
}


+ 26
- 0
ee4p/python/htk/__init__.py View File

@@ -0,0 +1,26 @@
#!/usr/bin/python
# __init__.py
#
# Copyright 2008 Sam Black <samwwwblack@lapwing.org>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
# MA 02110-1301, USA.

__version__ = "0.1"

import sys, os, pwd, subprocess, shutil
import htkconfig, htkwrapper, htkwrite
from subprocess import CalledProcessError
sys.float_output_precision = 25

+ 92
- 0
ee4p/python/htk/htkconfig.py View File

@@ -0,0 +1,92 @@
#!/usr/bin/python
# htkconfig.py
#
# Copyright 2008 Sam Black <samwwwblack@lapwing.org>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
# MA 02110-1301, USA.

import sys, os, pwd, shutil

__version__ = "0.1"

class HtkConfig:
def __init__(self, binpath="/usr/bin/", configpath="./", projname="testHTK", list=False):
if not list:
self.configfile = open(configpath + projname + ".conf", 'r')
# Don't add the config path to these options
self.no_conf = ["binpath", "project", "confpath", "inputtype", "space_step", "flags"]
self.configs = {
"binpath" : binpath,
"project" : projname,
"confpath" : configpath,
"inputtype" : "audio",
"hmmdir" : configpath + "hmmsTrained/",
"mfccdir" : configpath + "mfccGen/",
"typeList" : configpath + "list/typeList_" + projname,
"listTrain" : configpath + "list/listTrain_" + projname + ".scp",
"listTrainHCopy" : configpath + "list/listTrain_" + projname + "_HCopy.scp",
"listTest" : configpath + "list/listTest_" + projname + ".scp",
"listTestHCopy" : configpath + "list/listTest_" + projname + "_HCopy.scp",
"configHList" : configpath + "config/configHList_" + projname + "_mfcc",
"configHCopy" : configpath + "config/configHCopy_" + projname,
"configTrain" : configpath + "config/configTrain_" + projname,
"configTest" : configpath + "config/configTest_" + projname,
"wordList" : configpath + "lib/wordList_" + projname,
"wordListSP" : configpath + "lib/wordList_" + projname + "_withsp",
"wordLabel" : configpath + "label/wordLabel_" + projname + ".mlf",
"wordLabelSP" : configpath + "label/wordLabel_" + projname + "_withsp.mlf",
"wordnet" : configpath + "lib/wordnet_" + projname,
"worddict" : configpath + "lib/worddict_" + projname,
"hedsilsp" : configpath + "lib/hedsilsp_" + projname + ".hed",
"proto" : configpath + "lib/proto_" + projname,
"resultdir" : configpath + "results/",
"space_step" : 8
}

if not list:
for line in self.configfile:
if len(line.split()) > 1:
if line.split()[0] in self.configs:
if line.split()[0] in self.no_conf:
self.configs[line.split()[0]] = line.split()[1]
else:
self.configs[line.split()[0]] = configpath + line.split()[1]

for part in self.configs:
if not self.configs[part].isdigit():
if not os.path.exists(self.configs[part]) and not part in self.no_conf:
print "%s does not exist, dieing" % self.configs[part]
sys.exit(1)
elif not part in self.no_conf:
if self.configs[part].endswith("/"):
if not os.path.isdir(self.configs[part]):
print "%s is not a directory and it is configured as such, dieing" % self.configs[part]
sys.exit(1)
else:
if not os.path.isfile(self.configs[part]) or not os.path.exists(self.configs[part]):
print "%s is not a file and it is configured as such, dieing" % self.configs[part]
sys.exit(1)

def getSetting(self, setting):
if setting in self.configs:
return self.configs[setting]
else:
return False

def listSettings(self):
print "Default settings:"
for item in self.configs:
print item + " %s" % self.configs[item]

+ 380
- 0
ee4p/python/htk/htkwrapper.py View File

@@ -0,0 +1,380 @@
#!/usr/bin/python
# htk.py
#
# Copyright 2008 Sam Black <samwwwblack@lapwing.org>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
# MA 02110-1301, USA.

import sys, os, pwd, subprocess, shutil
import htkconfig, htkwrite
from subprocess import CalledProcessError

__version__ = "0.1"

class HtkWrapper:
def __init__(self, binpath, confpath, projname, visual=None):
self.mode = "normal"
self.realtime = False
if not binpath.endswith("/"):
binpath = binpath + "/"
if not confpath.endswith("/"):
confpath = confpath + "/"
self.config = htkconfig.HtkConfig(binpath, confpath, projname)
if visual != None:
self.visual = visual
else:
print("Alternate extraction not loaded")
sys.exit(1)

def listConfig(self):
self.config.listSettings()

def setTraining(self, flag):
if flag:
self.mode = "training"

def setTesting(self, flag):
if flag:
self.mode = "testing"

def setRecog(self, flag, rt=False):
if flag:
self.mode = "recog"
if rt:
self.realtime = False
print "Real time proecessing not yet supported"

def run(self):
print "Starting HTK with following options:"
if self.config.getSetting("project") != False:
print "Project: " + self.config.getSetting("project")
print "Running in %s mode" % self.mode
print "HTK binary path: " + self.config.getSetting("binpath")
print "HTK config path: " + self.config.getSetting("confpath")

if self.mode == "normal":
self.training()
self.testing()
elif self.mode == "training":
self.training()
elif self.mode == "testing":
self.testing()
elif self.mode == "recog":
self.recognition()

def training(self):
# Clean directories first
if self.config.getSetting("inputtype").find("audio") >= 0:
for root, dirs, files in os.walk(self.config.getSetting("mfccdir"), topdown=False):
for name in files:
os.remove(os.path.join(root, name))

if self.config.getSetting("inputtype") != "audio":
print("Multi input processing")
typeconf = open(self.config.getSetting("typeList"), 'r')
tmpjointconf = typeconf.readlines()
typeconf.close()
jointconf = []
for line in tmpjointconf:
jointconf.append(line.split())
del(tmpjointconf)

for line in jointconf:
if self.config.getSetting("inputtype").find("visual") >= 0:
if self.visual != None:
# Video processor detected
#try:
print("%s %s %s" % (self.config, self.visual, line))
audiovisual = htkwrite.HtkWrite(self.config, self.visual, line)
print(audiovisual)
audiovisual.run()
#except Exception, e:
# print e
# sys.exit(1)
else:
print("No other visual processor found, dieing")
sys.exit(1)
else:
print("No extra processing found, dieing")
sys.exit(1)
elif self.config.getSetting("inputtype") == "audio":
# We are processing audio only, no vodoo required
print("Audio processing only")
hcopy1 = [self.config.getSetting("binpath") + "HCopy",
'-C', self.config.getSetting("configHCopy"),
'-S', self.config.getSetting("listTrainHCopy")
]
hcopy2 = [self.config.getSetting("binpath") + "HCopy",
'-C', self.config.getSetting("configHCopy"),
'-S', self.config.getSetting("listTestHCopy")
]
try:
ret1 = subprocess.check_call(hcopy1)
ret2 = subprocess.check_call(hcopy2)
if ret1 != 0 or ret2 != 0:
print "Error, return codes wrong"
sys.exit(1)
except CalledProcessError, e:
print e
sys.exit(1)
else:
print("Something has gone badly, badly wrong.")
sys.exit(1)

else:
print("Error, pyhtk is not setup to handle none audio based work.")
print("Dieing now.")
sys.exit(1)

# Now training
print "Training"
if os.path.isdir(self.config.getSetting("hmmdir")):
for root, dirs, files in os.walk(self.config.getSetting("hmmdir"), topdown=False):
for name in files:
os.remove(os.path.join(root, name))
for name in dirs:
os.rmdir(os.path.join(root, name))
if not os.path.isdir(self.config.getSetting("hmmdir")):
os.mkdir(self.config.getSetting("hmmdir"))
for direc in range(0, int(self.config.getSetting("space_step"))+1):
if not os.path.isdir(self.config.getSetting("hmmdir") + "hmm%d" % direc):
os.mkdir(self.config.getSetting("hmmdir") + "hmm%d" % direc)

try:
hcompv = [self.config.getSetting("binpath") + "HCompV",
'-C', self.config.getSetting("configTrain"),
'-o', 'hmmdef',
'-f', '0.01',
'-m',
'-S', self.config.getSetting("listTrain"),
'-M', self.config.getSetting("hmmdir") + "hmm0",
self.config.getSetting("proto")
]
ret = subprocess.check_call(hcompv)
if ret != 0:
print "Error, return codes wrong"
sys.exit(1)
except CalledProcessError, e:
print e
sys.exit(1)

print "Seeding complete"

print "Copying HMM word models"
hmmdef = open(self.config.getSetting("hmmdir") + "hmm0/hmmdef", 'r')
record = False
tmpdata = []
for line in hmmdef:
if line.find("BEGINHMM") >= 0:
record = True
elif line.find("ENDHMM") >= 0:
record = False
tmpdata.append(line)
if record:
tmpdata.append(line)

model0 = open(self.config.getSetting("hmmdir") + "hmm0/models", 'a')
wordlist = open(self.config.getSetting("worddict"), 'r')
for line in wordlist:
if not line.find("sp") >= 0:
model0.write('~h "%s"\n' % line.split()[0])
for part in tmpdata:
model0.write(part)
model0.close()
wordlist.close()
hmmdef.close()

vfloor = open(self.config.getSetting("hmmdir") + "hmm0/vFloors", 'r')
hmmdef = open(self.config.getSetting("hmmdir") + "hmm0/hmmdef", 'r')
macros = open(self.config.getSetting("hmmdir") + "hmm0/macros", 'a')
tmpmacro = []
macroterms = ["~o", "STREAMINFO", "DIAGC"]
for line in hmmdef:
for items in macroterms:
if items in line:
tmpmacro.append(line.strip('\n'))
tmpline = tmpmacro.pop().split("<")
for item in tmpline:
if item.find("DIAGC") == -1 and len(item) > 0:
tmpmacro.append("<" + item)
for line in tmpmacro:
macros.write(line + "\n")
for line in vfloor:
macros.write(line)
print "Finished HMM word models"
vfloor.close()
macros.close()
hmmdef.close()
del tmpmacro

for iteration in range(1,4):
print "Iteration %d" % iteration

try:
herest = [self.config.getSetting("binpath") + "HERest",
'-D',
'-C', self.config.getSetting("configTrain"),
'-I', self.config.getSetting("wordLabel"),
'-t', str(250.0), str(150.0), str(1000.0),
'-S', self.config.getSetting("listTrain"),
'-H', "%shmm%d/macros" % (self.config.getSetting("hmmdir"), iteration - 1),
'-H', "%shmm%d/models" % (self.config.getSetting("hmmdir"), iteration - 1),
'-M', "%shmm%d" % (self.config.getSetting("hmmdir"), iteration),
self.config.getSetting("wordList")
]
ret = subprocess.check_call(herest)
if ret != 0:
print "Error, return codes wrong"
sys.exit(1)
except CalledProcessError, e:
print e
sys.exit(1)

print "Copying for 4th iteration"
for root, dirs, files in os.walk(self.config.getSetting("hmmdir") + "hmm3", topdown=False):
for name in files:
shutil.copy(self.config.getSetting("hmmdir") + "hmm3/" + name, self.config.getSetting("hmmdir") + "hmm4")
print "Copied 4th iteration"

# create silence model
print "Correcting silence model"
model3 = open(self.config.getSetting("hmmdir") + "hmm3/models", 'r')
model4 = open(self.config.getSetting("hmmdir") + "hmm4/models", 'a')
tmpmodel = []
record = False
for line in model3:
if line.find("sil") >= 0:
record = True
if record and line.find("ENDHMM") >= 0:
record = False
if record:
tmpmodel.append(line)
states = []
midstate = []
for line in tmpmodel:
if line.find("STATE") >= 0:
states.append([tmpmodel[tmpmodel.index(line):tmpmodel.index(line) + 6]])
for stat in states[int(len(states)/2)][0]:
midstate.append(stat)
model4.write('~h "sp"\n<BEGINHMM>\n<NUMSTATES> 3\n<STATE> 2\n')
for line in midstate:
if line.find("STATE") == -1:
model4.write(line)
model4.write("<TRANSP> 3\n")
model4.write("0.000000e+00 5.000000e-01 5.000000e-01\n")
model4.write("0.000000e+00 5.000000e-01 5.000000e-01\n")
model4.write("0.000000e+00 0.000000e+00 0.000000e+00\n")
model4.write("<ENDHMM>\n")

model3.close()
model4.close()

try:
hhed = [self.config.getSetting("binpath") + "HHEd",
'-T', "3",
'-H', self.config.getSetting("hmmdir") + "hmm4/macros",
'-H', self.config.getSetting("hmmdir") + "hmm4/models",
'-M', self.config.getSetting("hmmdir") + "hmm5",
self.config.getSetting("hedsilsp"),
self.config.getSetting("wordList")
]
ret = subprocess.check_call(hhed)
if ret != 0:
print "Error, return codes wrong"
sys.exit(1)
except CalledProcessError, e:
print e
sys.exit(1)
print "Corrected silence model"

for iteration in range(5, int(self.config.getSetting("space_step")) + 1):
print "Iteration %d" % iteration

try:
herest = [self.config.getSetting("binpath") + "HERest",
'-D',
'-C', self.config.getSetting("configTrain"),
'-I', self.config.getSetting("wordLabelSP"),
'-S', self.config.getSetting("listTrain"),
'-H', "%shmm%d/macros" % (self.config.getSetting("hmmdir"), iteration - 1),
'-H', "%shmm%d/models" % (self.config.getSetting("hmmdir"), iteration - 1),
'-M', "%shmm%d" % (self.config.getSetting("hmmdir"), iteration),
self.config.getSetting("wordListSP")
]
subprocess.check_call(herest)
if ret != 0:
print "Error, return codes wrong"
sys.exit(1)
except CalledProcessError, e:
print e
sys.exit(1)

print "%d Iterations completed" % (int(self.config.getSetting("space_step")) - 5)
print "Training complete"

def testing(self):
print "Testing"

if os.path.isfile(self.config.getSetting("resultdir") + self.config.getSetting("project") + ".mlf"):
os.remove(self.config.getSetting("resultdir") + self.config.getSetting("project") + ".mlf")
if os.path.isfile(self.config.getSetting("resultdir") + self.config.getSetting("project") + ".res"):
os.remove(self.config.getSetting("resultdir") + self.config.getSetting("project") + ".res")

try:
hvite = [self.config.getSetting("binpath") + "HVite",
'-H', "%shmm%d/macros" % (self.config.getSetting("hmmdir"), int(self.config.getSetting("space_step"))),
'-H', "%shmm%d/models" % (self.config.getSetting("hmmdir"), int(self.config.getSetting("space_step"))),
'-S', self.config.getSetting("listTest"),
'-C', self.config.getSetting("configTest"),
'-w', self.config.getSetting("wordnet"),
'-i', self.config.getSetting("resultdir") + self.config.getSetting("project") + ".mlf",
'-p', '10',
'-s', '0.0',
self.config.getSetting("worddict"),
self.config.getSetting("wordListSP")
]
ret = subprocess.check_call(hvite)
if ret != 0:
print "Error, return codes wrong"
sys.exit(1)
except CalledProcessError, e:
print e
sys.exit(1)

try:
hresult = [self.config.getSetting("binpath") + "HResults",
'-e', '\"???\"', 'sil',
'-e', '\"???\"', 'sp',
'-I', self.config.getSetting("wordLabelSP"),
self.config.getSetting("wordListSP"),
self.config.getSetting("resultdir") + self.config.getSetting("project") + ".mlf"
]
result = open(self.config.getSetting("resultdir") + self.config.getSetting("project") + ".res", 'w')
ret = subprocess.check_call(hresult, stdout=result)
if ret != 0:
print "Error, return codes wrong"
sys.exit(1)
else:
result.close()
except CalledProcessError, e:
print e
sys.exit(1)

print "Testing finished"
print "Results can be found at %s" % self.config.getSetting("resultdir") + self.config.getSetting("project") + ".res"

def recognition(self):
pass

+ 138
- 0
ee4p/python/htk/htkwrite.c View File

@@ -0,0 +1,138 @@
/* ------< C code to write HTK [1,2].* data >-----
by Guillaume Gravier <ggravier@inf.enst.fr>
*/
#include <Python.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

/* -------------------------------------------------- */
/* ----- Private type definition for HTKWrite() ----- */
/* -------------------------------------------------- */
typedef struct {
long nSamples;
long sampPeriod;
short sampSize;
short parmKind;
} htk_header_t;

#define TRUE 1
#define FALSE 0
#define H_MFCC 6
#define H_USER 9

#define HASENERGY 0100 /* _E log energy included */
#define HASNULLE 0200 /* _N absolute energy suppressed */
#define HASDELTA 0400 /* _D delta coef appended */
#define HASACCS 01000 /* _A acceleration coefs appended */
#define HASCOMPX 02000 /* _C is compressed */
#define HASZEROM 04000 /* _Z zero meaned */
#define HASCRCC 010000 /* _K has CRC check */
#define HASZEROC 020000 /* _0 0'th Cepstra included */
#define HASVQ 040000 /* _V has VQ index attached */
#define HASTHIRD 0100000 /* _T has Delta-Delta-Delta index attached */

static PyObject * HTKWriteBin(PyObject *self, PyObject *args)
{
htk_header_t header;
short htk_kind;
int nelem, i, j, x, y, samps, len;
float *ptr, period, *tmpdata;
char *filename, *flags, buf[255], s;
FILE *destfile;
PyObject *in_data;
int hasV=FALSE,hasE=FALSE,hasD=FALSE,hasN=FALSE,hasA=FALSE,hasT=FALSE,hasF=FALSE,hasC=FALSE,hasK=FALSE,hasZ=FALSE,has0=FALSE;

if (!PyArg_ParseTuple(args, "iifsOs", &nelem, &samps, &period, &flags, &in_data, &filename))
return Py_BuildValue("i",7);

//printf("C prog:\%d %d %f %s %s %s\nend", nelem, samps, period, flags, in_data, filename);

destfile = fopen(filename, "w");
header.nSamples=(long)samps;
header.sampPeriod=(long)(period);
header.sampSize=(short)(nelem*sizeof(float));

double in_dataf[samps][nelem];

htk_kind=(short)H_MFCC;
printf("%d\nHeader setup\n", htk_kind);

strcpy(buf,flags);
len=strlen(buf);
while (len>0) {
s = buf[len-1];

switch(s){
case 'E': hasE = TRUE; break;
case 'D': hasD = TRUE; break;
case 'N': hasN = TRUE; break;
case 'A': hasA = TRUE; break;
case 'C': hasC = TRUE; break;
case 'T': hasT = TRUE; break;
case 'F': hasF = TRUE; break;
case 'K': hasK = TRUE; break;
case 'Z': hasZ = TRUE; break;
case '0': has0 = TRUE; break;
case 'V': hasV = TRUE; break;
default: ;;
}
s = '\0';
len -= 1;
}

if (hasE) htk_kind |= HASENERGY;
if (hasD) htk_kind |= HASDELTA;
if (hasN) htk_kind |= HASNULLE;
if (hasA) htk_kind |= HASACCS;
if (hasT) htk_kind |= HASTHIRD;
if (hasK) htk_kind |= HASCRCC;
if (hasC) htk_kind |= HASCOMPX;
if (hasZ) htk_kind |= HASZEROM;
if (has0) htk_kind |= HASZEROC;
if (hasV) htk_kind |= HASVQ;
header.parmKind=htk_kind;

if(fwrite(&header,sizeof(htk_header_t),1,destfile) != 1) {
printf("HTKWriteFS(): cannot write HTK header (%d bytes)", sizeof(htk_header_t));
return Py_BuildValue("i",5);
}
printf("\nHeader written\n");
/* Iterate the data out of the Python list */
for(i=0; i<samps; i++) {
tmpdata = PyList_GetItem(in_data, i);
for(j=0; j<nelem; j++) {
/* printf("i%di j%dj of %d f%lgf\n", i, j, nelem, PyFloat_AsDouble(PyList_GetItem(tmpdata, j))); */
in_dataf[i][j] = PyFloat_AsDouble(PyList_GetItem(tmpdata, j));
}
}
printf("Iterations done\n");

/* And now out to file */
for(x=0; x<samps; x++) {
for(y=0; y<nelem; y++) {
ptr=&in_dataf[x][y];
/*printf("d:%f: i:%d: j:%d: of :%d:\n", in_dataf[x][y], x, y, nelem);*/
if(fwrite(ptr,sizeof(float),1,destfile) != 1) {
printf("HTKWriteFS(): cannot write %d'th vector (%d bytes)", i+1,nelem*sizeof(float));
return Py_BuildValue("i",5);
}
}
/*if(fwrite("\n",sizeof(char),1,destfile) != 1) {
printf("HTKWriteFS(): cannot write %d'th vector (%d bytes)", i+1,nelem*sizeof(float));
return Py_BuildValue("i",5);
}*/
}
printf("File written\n");
return Py_BuildValue("i",0);
}

static PyMethodDef HTKWriteMethods[] = {
{"writebinfile", HTKWriteBin, METH_VARARGS, "Write HTK data out to a binary file"},
{NULL, NULL, 0, NULL} /* Sentinel */
};

PyMODINIT_FUNC inithtkwritefile(void) {
(void) Py_InitModule("htkwritefile", HTKWriteMethods);
}


+ 170
- 0
ee4p/python/htk/htkwrite.py View File

@@ -0,0 +1,170 @@
#!/usr/bin/python
# htkwrite.py
#
# Copyright 2008 Sam Black <samwwwblack@lapwing.org>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
# MA 02110-1301, USA.

__version__ = "0.1"

import sys, os, pwd, subprocess, shutil
from subprocess import CalledProcessError

try:
import htkwritefile
except Exception, e:
print e
sys.exit(1)

class HtkWrite:
def __init__(self, config, processor, data):
self.config = config
self.processor = processor
self.data = data
self.options = {
"mfcc" : "",
"wavfile" : "",
"videofile" : "",
"wavtime" : "",
"videotime" : ""
}
print(self.data)
print(len(self.data))
self._validateData()

def _validateData(self):
if self.config.getSetting("inputtype").find("visual") >= 0:
for part in self.data:
print("part is :%s:" % part)
if part.endswith(".mfcc"):
self.options["mfcc"] = part
elif part.endswith((".mpg")):
self.options["videofile"] = part
elif part.endswith(".wav"):
self.options["wavfile"] = part
elif part.startswith("v"):
self.options["videotime"] = part.lstrip("v")
elif part.startswith("a"):
self.options["wavtime"] = part.lstrip("a")
else:
print("Nothing found")
print(self.options)
else:
print("No inputtype specified")

for key in self.options:
if self.options[key] is None:
print('Error, "%s" is not set' % key)
raise ValueError

def _runHcopy(self):
hcopy1 = [self.config.getSetting("binpath") + "HCopy",
'-C', self.config.getSetting("configHCopy"),
'-e', self.options["wavtime"],
self.options["wavfile"],
self.options["mfcc"].split(".")[0] + ".wav.mfcc"
]
try:
ret1 = subprocess.check_call(hcopy1)
if ret1 != 0:
print "Error, return codes wrong"
sys.exit(1)
except:
raise CalledProcessError

def _runExtraProcess(self):
return self.processor.run(self.options["videofile"], self.options["videotime"])

def _runHlist(self):
hlist1 = [self.config.getSetting("binpath") + "HList",
'-C', self.config.getSetting("configHList"),
'-r',
'-h',
self.options["mfcc"].split(".")[0] + ".wav.mfcc"
]
try:
ret1 = subprocess.Popen(hlist1, stdout=subprocess.PIPE)
output = ret1.communicate()[0]
except:
raise CalledProcessError

return output

def run(self):
self._runHcopy()
#altdata = self._runExtraProcess()
altdata = [153, 195, 374, 234, 485, 178, 209, 278, 243, 310, 381, 326, 290, 237, 226, 168, 190, 188, 240, 243, 449, 277, 461, 328, 348, 584, 230, 385, -1, 170, 242, 180, 293, 238, 352, 324, 539, 226, 164, 188, 187, 240, 511, 246, 296, 232, 188, 499, 146, 186, 208, 205, 256, 253, 292, 229, 407, 246, 378, 176, 203, 217, 191, 443, 297, 294, 465, 237, 185, 224, 212, 187, 149, 218, -1, -1, 221, 364, 223, 694, 166, 247, 235, 411, 313, 465, 266, 369, 233, 185, 569, 174, 226, 162, 229, 274, 362, 335, -1, 349, 205, 205, 180, 170, 166, 272, 521, 241, 283, 530, 248, 318, 881, 513, 366, 390, 230, 589, 1028, 429, 264, 200, 636, 1174, 992, 698, 360, 628, 857, 998, 367, 243, 609, 1258, 937, 216, 371, 414, 934, 370, 581, 789, 1276, 1501, 843, 515, 204, 280, 919, 640, 927, 1181, 596, 1170, 1374, 1190, 1012, 936, 207, -1, 484, 310, 342, 799, 1217, 1371, 1405, 1195, 320, 676, 547, 739, 430, 561, 1130, 1330, 974, 218, 477, 327, 312, 649, 548, 375, 440, 240, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 564, 476, 411, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 427, 700, 301, 1019, 808, 235, 290, 307, 332, 461, 508, 443, 743, 709, 340, 366, 396, 457, 417, 615, -1, 475, 666, -1, 704, 271, 288, 312, 650, 435, 358, 348, 299, 397, 425, 591, 710, 412, 418, 340, 520, 758, 708, 666, 774, 936, 1173, 1068, 1001, 628, 466, 723, 762, 701, 371, 954, 1031, 678, 1439, 1227, 780, 989, 1405, 814, 1164, 752, 685, 728, 909, 1116, 1656, 1007, 838, 1177, 1117, 1290, 750, 610, 903, 535, 400, 327, 443, 511, 373]
tmpwav = self._runHlist()

wavmfcc = tmpwav.split("\n")
mfccheader = []
mfccdata = []

for i in range(0, 3):
tmp = wavmfcc[i].strip().split(' ')
while "" in tmp:
for part in tmp:
if part == "":
tmp.pop(tmp.index(part))
for part in tmp:
if part.find(':') > 0:
if part.split(":")[1] == "":
mfccheader.append([tmp[tmp.index(part)].strip(" :"), tmp[tmp.index(part) + 1].strip()])
else:
mfccheader.append([tmp[tmp.index(part)].split(":")[0].strip(), tmp[tmp.index(part)].split(":")[1].strip()])

for i in range(3, len(wavmfcc) - 1):
stringtmp = wavmfcc[i].strip().split()
floattmp = []
for string in stringtmp:
floattmp.append(float(string))
mfccdata.append(floattmp)

# Calculate number of audio frames per alt frame
if len(mfccdata) / len(altdata) > 1:
ratio = len(mfccdata) / len(altdata)
sdata = altdata
combined = mfccdata
elif len(altdata) / len(mfccdata) > 1:
ratio = len(altdata) / len(mfccdata)
sdata = mfccdata
combined = altdata
else:
for line in mfccdata:
mfccdata[mfccdata.index(line)].append(altdata[altdata.index(line)])

slist = 0
diff = int(ratio / 2)
while slist < len(sdata):
midpoint = ratio * slist + diff
for i in range(midpoint - diff, midpoint + diff):
combined[i].append(float(sdata[slist]))
slist += 1

del(sdata)
del(altdata)
del(mfccdata)

# Now we can write out to binary