Browse Source

Finished pyhtk

master
Sam Black 12 years ago
parent
commit
fa163418f7
1 changed files with 215 additions and 48 deletions
  1. +215
    -48
      ee4p/python/pyhtk.py

+ 215
- 48
ee4p/python/pyhtk.py View File

@@ -18,18 +18,20 @@
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
# MA 02110-1301, USA.

import sys, os, time, csv, pwd, subprocess, shutil
import sys, os, pwd, subprocess, shutil
from subprocess import CalledProcessError
from optparse import OptionParser

__version__ = "0.1"

class HtkConfig:

def __init__(self, binpath="/usr/bin/", configpath="./", projname="testHTK"):
def __init__(self, binpath="/usr/bin/", configpath="./", projname="testHTK", list=False):
self.configfile = open(configpath + projname + ".conf", 'r')
self.configs = {
"binpath" : binpath,
"project" : projname,
"confpath" : configpath,
"hmmdir" : configpath + "hmmsTrained/",
"listTrain" : configpath + "list/listTrain_" + projname + ".scp",
"listTrainHCopy" : configpath + "list/listTrain_" + projname + "_HCopy.scp",
@@ -44,15 +46,32 @@ class HtkConfig:
"wordLabelSP" : configpath + "label/wordLabel_" + projname + "_withsp.mlf",
"wordnet" : configpath + "lib/wordnet_" + projname,
"worddict" : configpath + "lib/worddict_" + projname,
"hedsilsp" : configpath + "lib/hedsilsp_" + projname + ".hed",
"proto" : configpath + "lib/proto_" + projname,
"results" : configpath + "results/results_" + projname,
"flags" : "",
"space_step" : 8
}

for line in self.configfile:
if line.split()[0] in self.configs:
self.configs[line.split()[0]] = line.split()[1]
if not list:
for line in self.configfile:
if line.split()[0] in self.configs:
self.configs[line.split()[0]] = line.split()[1]

for part in self.configs:
if not self.configs[part].isdigit():
if not os.path.exists(self.configs[part]):
print "%s does not exist, dieing" % self.configs[part]
sys.exit(1)
else:
if self.configs[part].endswith("/"):
if not os.path.isdir(self.configs[part]):
print "%s is not a directory and it is configured as such, dieing" % self.configs[part]
sys.exit(1)
else:
if not os.path.isfile(self.configs[part]) or not os.path.exists(self.configs[part]):
print "%s is not a file and it is configured as such, dieing" % self.configs[part]
sys.exit(1)

def getSetting(self, setting):
if setting in self.configs:
@@ -60,6 +79,10 @@ class HtkConfig:
else:
return False

def listSettings(self):
for item in self.configs:
print item

class Htk:
def __init__(self, binpath, confpath, projname):
self.mode = "normal"
@@ -70,6 +93,9 @@ class Htk:
confpath = confpath + "/"
self.config = HtkConfig(binpath, confpath, projname)

def listConfig(self):
self.config.listSettings()

def setTraining(self, flag):
if flag:
self.mode = "training"
@@ -91,37 +117,53 @@ class Htk:
print "Project: " + self.config.getSetting("project")
print "Running in %s mode" % self.mode
print "HTK binary path: " + self.config.getSetting("binpath")
print "HTK config path: " + self.config.getSetting("confpath")

def testing(self):
pass
if self.mode == "normal":
self.training()
self.testing()
elif self.mode == "training":
self.training()
elif self.mode == "testing":
self.testing()
elif self.mode == "recog":
self.recognition()

def training(self):
# Run HCopy to create initial HMMs
try:
hcopy1 = [self.config.getSetting("binpath") + "HCopy",
"-C %s -S %s" %
(self.config.getSetting("configHCopy"), self.config.getSetting("listTrainHCopy"))]
subprocess.check_call(hcopy1)
hcopy2 = [self.config.getSetting("binpath") + "HCopy",
"-C %s -S %s" %
(self.config.getSetting("configHCopy"), self.config.getSetting("listTestHCopy"))]
subprocess.check_call(hcopy2)
ret1 = subprocess.check_call("-C %s -S %s" %
(self.config.getSetting("configHCopy"),
self.config.getSetting("listTrainHCopy")),
executable=self.config.getSetting("binpath") + "HCopy"
)
ret2 = subprocess.check_call("-C %s -S %s" % (self.config.getSetting("configHCopy"),
self.config.getSetting("listTestHCopy")),
executable=self.config.getSetting("binpath") + "HCopy"
)
if ret1 != 0 or ret2 != 0:
print "Error, return codes wrong"
sys.exit(1)
except CalledProcessError, e:
print e
sys.exit(1)
# Now training
print "Training"
for direc in range(0, self.config.getSetting("space_step")):
if not os.path.isdir(self.config.getSetting("hmmdir") + "/hmm" + direc):
os.mkdir(self.config.getSetting("hmmdir") + "/hmm" + direc)
for direc in range(0, self.config.getSetting("space_step")+1):
if not os.path.isdir(self.config.getSetting("hmmdir") + "hmm%d" % direc):
os.mkdir(self.config.getSetting("hmmdir") + "hmm%d" % direc)

try:
hcompv1 = [self.config.getSetting("binpath") + "HCompV",
"-C %s -o hmmdef -f 0.01 -m -S %s -M %s/hmm0 %s" %
(self.config.getSetting("configTrain"), self.config.getSetting("listTrain"),
self.config.getSetting("hmmdir"), self.config.getSetting("proto"))
]
subprocess.check_call(hcompv1)
ret = subprocess.check_call("-C %s -o hmmdef -f 0.01 -m -S %s -M %shmm0 %s" %
(self.config.getSetting("configTrain"),
self.config.getSetting("listTrain"),
self.config.getSetting("hmmdir"),
self.config.getSetting("proto")),
executable=self.config.getSetting("binpath") + "HCompV"
)
if ret != 0:
print "Error, return codes wrong"
sys.exit(1)
except CalledProcessError, e:
print e
sys.exit(1)
@@ -169,33 +211,153 @@ class Htk:
for line in vfloor:
macros.append(line)
print "Finished HMM word models"
vfloor.close()
macros.close()
del tmpmacro

for iteration in range(1, 3):
print "Iteration %d" % iteration

herest = [self.config.getSetting("binpath") + "HERest",
"-D -C $CONFIG_train -I $LABELS -t 250.0 150.0 1000.0 -S $LIST_TRAIN -H $HMM_DIR/hmm$j/macros -H $HMM_DIR/hmm$j/models -M $HMM_DIR/hmm$i $WORD_LIST" %
(self.config.getSetting("configTrain"),
self.config.getSetting("wordLabel"),
self.config.getSetting("listTrain"),
self.config.getSetting("hmmdir"),
iteration - 1,
self.config.getSetting("hmmdir"),
iteration - 1,
self.config.getSetting("hmmdir"),
iteration,
self.config.getSetting("wordList"))
]
subprocess.check_call(herest)
try:
ret = subprocess.check_call("-D -C %s -I %s -t 250.0 150.0 1000.0 -S %s -H %shmm%d/macros -H %shmm%d/models -M %shmm%d %s" %
(self.config.getSetting("configTrain"),
self.config.getSetting("wordLabel"),
self.config.getSetting("listTrain"),
self.config.getSetting("hmmdir"),
iteration - 1,
self.config.getSetting("hmmdir"),
iteration - 1,
self.config.getSetting("hmmdir"),
iteration,
self.config.getSetting("wordList")),
executable=self.config.getSetting("binpath") + "HERest"
)
if ret != 0:
print "Error, return codes wrong"
sys.exit(1)
except CalledProcessError, e:
print e
sys.exit(1)

print "Copying for 4th iteration"
shutil.copytree(self.config.getSetting("hmmdir") + "hmm3", self.config.getSetting("hmmdir") + "hmm4")
print "Copied 4th iteration"

# create silence model
print "Correcting silence model"
macro3 = open(self.config.getSetting("hmmdir") + "hmm3/macros", 'r')
macro4 = open(self.config.getSetting("hmmdir") + "hmm4/macros", 'a')
tmpmacro = []
record = False
for line in macro3:
if line.contains("sil"):
record = True
if record and line.contains("ENDHMM"):
record = False
if record:
tmpmacro.append(line)
states = []
for line in tmpmacro:
if line.contains("STATE"):
states.append([tmpmacro[tmpmacro.index(line):tmpmacro.index(line) + 6]])
midstate = states[int(len(states)/2)]
macro4.append('~h "sp"\n<BEGINHMM>\n<NUMSTATES> 3\n<STATE> 2\n')
for line in midstate:
macro4.append(line)
macro4.append("<TRANSP> 3")
for i in range(1,3):
macro4.append("0.000000e+00 0.000000e+00 0.000000e+00")
macro4.append("<ENDHMM>")

macro3.close()
macro4.close()

try:
ret = subprocess.check_call("-T 2 -H %shmm4/macros -H %shmm4/models -M %shmm5 $ED_CMDFILE1 $WORD_LISTSP" %
(self.config.getSetting("hmmdir"),
self.config.getSetting("hmmdir"),
self.config.getSetting("hmmdir"),
self.config.getSetting("hedsilsp"),
self.config.getSetting("wordList")),
executable=self.config.getSetting("binpath") + "HHEd"
)
if ret != 0:
print "Error, return codes wrong"
sys.exit(1)
except CalledProcessError, e:
print e
sys.exit(1)
print "Corrected silence model"

for iteration in range(6, 8):
print "Iteration %d" % iteration

try:
subprocess.check_call("-D -C %s -I %s -S %s -H %shmm%d/macros -H %shmm%d/models -M %shmm%d %s" %
(self.config.getSetting("configTrain"),
self.config.getSetting("wordLabelSP"),
self.config.getSetting("listTrain"),
self.config.getSetting("hmmdir"),
iteration - 1,
self.config.getSetting("hmmdir"),
iteration - 1,
self.config.getSetting("hmmdir"),
iteration,
self.config.getSetting("wordListSP")),
executable=self.config.getSetting("binpath") + "HERest"
)
if ret != 0:
print "Error, return codes wrong"
sys.exit(1)
except CalledProcessError, e:
print e
sys.exit(1)

print "6 Iterations completed"
print "Training complete"

def testing(self):
print "Testing"

try:
ret = subprocess.check_call(
"-H %s -H %s -S %s -C %s -w %s -i %s.mlf %s %s %s" %
("%shmm%d/macros" % (self.config.getSetting("hmmdir"), self.config.getSetting("space_step")),
"%shmm%d/models" % (self.config.getSetting("hmmdir"), self.config.getSetting("space_step")),
self.config.getSetting("listTest"),
self.config.getSetting("configTest"),
self.config.getSetting("wordnet"),
self.config.getSetting("results"),
self.config.getSetting("flags"),
self.config.getSetting("worddict"),
self.config.getSetting("wordListSP")),
executable=self.config.getSetting("binpath") + "HVite"
)
if ret != 0:
print "Error, return codes wrong"
sys.exit(1)
except CalledProcessError, e:
print e
sys.exit(1)

try:
subprocess.check_call(
'-e "???" sil -e "???" sp -I %s %s %s.mlf >> %s' %
(self.config.getSetting("wordLabelSP"),
self.config.getSetting("wordListSP"),
self.config.getSetting("results"),
self.config.getSetting("results")),
executable=self.config.getSetting("binpath") + "HResults"
)
if ret != 0:
print "Error, return codes wrong"
sys.exit(1)
except CalledProcessError, e:
print e
sys.exit(1)

print "Testing finished"

def recognition(self):
pass

@@ -203,6 +365,7 @@ if __name__ == "__main__":
parser = OptionParser()

parser.add_option("-V", "--version", action="store_true", default=False, dest="version", help="version information")
parser.add_option("-l", "--list", action="store_true", default=False, dest="listconfig", help="List config options")
parser.add_option("-b", "--binary", action="store", type="string", default="/usr/bin/", dest="binpath", help="path to the HTK binaries")
parser.add_option("-c", "--config", action="store", type="string", default="./", dest="configpath", help="path to config file")
parser.add_option("-p", "--project", action="store", type="string", default="testHTK", dest="project", help="project name")
@@ -213,15 +376,19 @@ if __name__ == "__main__":

(options, args) = parser.parse_args()

app = Htk(options.binpath, options.configpath, options.project)

if options.version:
print "%s version %s" % (os.path.basename(sys.argv[0]),__version__)
if options.training:
app.setTraining(True)
elif options.testing:
app.setTesting(True)
elif options.recog:
app.setRecog(True, options.realtime)

app.run()
elif options.listconfig:
conf = HtkConfig(list=True)
conf.listSettings()
else:
app = Htk(options.binpath, options.configpath, options.project)

if options.training:
app.setTraining(True)
elif options.testing:
app.setTesting(True)
elif options.recog:
app.setRecog(True, options.realtime)

app.run()

Loading…
Cancel
Save